RFR: 8310190: C2 SuperWord: AlignVector is broken, generates misaligned packs [v58]

Emanuel Peter epeter at openjdk.org
Thu Jan 4 08:28:36 UTC 2024


On Thu, 4 Jan 2024 06:55:46 GMT, Emanuel Peter <epeter at openjdk.org> wrote:

>> May be hack only `loadV` and `storeV` instructions in .ad file to use `lea` and do the check.
>
> I don't understand this comment.
> The `LoadVector` and `StoreVector` have both a `MemNode::Address` input, which I think it the memory address.
> The address itself usually consists of `AddP` nodes, which do the (base, index, offset) computation. These nodes later can be folded into the load/store itself, or be computed with a `lea`.
> I simply take the address value, check it for alignment and pass it on to the load/store.
> 
> Take this example:
> 
> public class Test {
>     static int RANGE = 1024*64;
> 
>     public static void main(String[] strArr) {
>         int a[] = new int[RANGE];
>         test0(a);
>     }
> 
>     static void test0(int[] a) {
>         for (int i = 0; i < RANGE; i++) {
>             a[i]++;
>         }
>     }
> }
> 
> 
> `./java -XX:CompileCommand=compileonly,Test::test* -XX:+TraceSuperWord -Xcomp -XX:+PrintIdeal -XX:+AlignVector  -XX:+VerifyAlignVector -XX:CompileCommand=print,Test::test* Test.java`
> 
> This looks like the main loop:
> 
>  ;; B22: #	out( B22 B23 ) <- in( B21 B22 ) Loop( B22-B22 inner post of N743) Freq: 4.49988
>   0x00007f83c8bb2f68:   lea    0x10(%rbp,%rbx,4),%r10       ;*iaload {reexecute=0 rethrow=0 return_oop=0}
>                                                             ; - Test::test0 at 12 (line 11)
>   0x00007f83c8bb2f6d:   mov    %r10,%r8
>   0x00007f83c8bb2f70:   test   $0x7,%r8b
>   0x00007f83c8bb2f74:   je     0x00007f83c8bb2f8a
>   0x00007f83c8bb2f76:   movabs $0x7f83d77e2fc8,%rdi         ;   {external_word}
>   0x00007f83c8bb2f80:   and    $0xfffffffffffffff0,%rsp
>   0x00007f83c8bb2f84:   callq  0x00007f83d71a0162           ;   {runtime_call MacroAssembler::debug64(char*, long, long*)}
>   0x00007f83c8bb2f89:   hlt    
>   0x00007f83c8bb2f8a:   test   $0x7,%r10b
>   0x00007f83c8bb2f8e:   je     0x00007f83c8bb2fa4
>   0x00007f83c8bb2f90:   movabs $0x7f83d77e2fc8,%rdi         ;   {external_word}
>   0x00007f83c8bb2f9a:   and    $0xfffffffffffffff0,%rsp
>   0x00007f83c8bb2f9e:   callq  0x00007f83d71a0162           ;   {runtime_call MacroAssembler::debug64(char*, long, long*)}
>   0x00007f83c8bb2fa3:   hlt    
>   0x00007f83c8bb2fa4:   vpaddd (%r10),%zmm5,%zmm0
>   0x00007f83c8bb2faa:   vmovdqu32 %zmm0,(%r8)               ;*iastore {reexecute=0 rethrow=0 return_oop=0}
>                                                             ; - Test::test0 at 15 (line 11)
>   0x00007f83c8bb2fb0:   add    $0x10,%ebx                   ;*iinc {reexecute=0 rethrow=0 return_oop=0}
>                                                             ; - Test::test0 at 16 (line 10)
>   0x00007f83c8bb2fb3:   cmp    %r11d...

And without `-XX:-VerifyAlignVector`


 ;; B20: #	out( B20 B21 ) <- in( B19 B20 ) Loop( B20-B20 inner post of N743) Freq: 4.49988
  0x00007ff22cbb2924:   vpaddd 0x10(%rbx,%r13,4),%zmm0,%zmm1
  0x00007ff22cbb292f:   vmovdqu32 %zmm1,0x10(%rbx,%r13,4)   ;*iastore {reexecute=0 rethrow=0 return_oop=0}
                                                            ; - Test::test0 at 15 (line 11)
  0x00007ff22cbb293a:   add    $0x10,%r13d                  ;*iinc {reexecute=0 rethrow=0 return_oop=0}
                                                            ; - Test::test0 at 16 (line 10)
  0x00007ff22cbb293e:   cmp    %r10d,%r13d
  0x00007ff22cbb2941:   jl     0x00007ff22cbb2924           ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
                                                            ; - Test::test0 at 6 (line 10)

-------------

PR Review Comment: https://git.openjdk.org/jdk/pull/14785#discussion_r1441463566


More information about the hotspot-compiler-dev mailing list