RFR: 8310190: C2 SuperWord: AlignVector is broken, generates misaligned packs [v58]

Emanuel Peter epeter at openjdk.org
Fri Jan 5 08:38:36 UTC 2024


On Thu, 4 Jan 2024 16:51:19 GMT, Vladimir Kozlov <kvn at openjdk.org> wrote:

>> Can you show assembler code for simple load and store instructions (move data from one array to another)?
>> My concern is that LoadV and StoreV are defined only with `memory` input:
>> 
>> instruct loadV(vec dst, memory mem) %{
>>   match(Set dst (LoadVector mem));
>> 
>> I would assume it will be embedded memory only. But C2 may be smart enough to generate `lea` if it sees not AddP node.
>
> Also why your assembler example have tested alignment twice for the same address? May be because the same array's element for load and store?:
> 
>   0x00007f83c8bb2f6d:   mov    %r10,%r8
>   0x00007f83c8bb2f70:   test   $0x7,%r8b
>   0x00007f83c8bb2f74:   je     0x00007f83c8bb2f8a
> ...
>   0x00007f83c8bb2f8a:   test   $0x7,%r10b
>   0x00007f83c8bb2f8e:   je     0x00007f83c8bb2fa4
> 
> No need to optimize I think since it is only for debugging.

@vnkozlov 
> Can you show assembler code for simple load and store instructions (move data from one array to another)?

Here the example with simple load -> store with two different arrays:

public class Test {
    static int RANGE = 1024*64;

    public static void main(String[] strArr) {
        int a[] = new int[RANGE];
        int b[] = new int[RANGE];
        test0(a, b);
    }

    static void test0(int[] a, int[] b) {
        for (int i = 0; i < RANGE; i++) {
            a[i] = b[i];
        }
    }
}


With `-XX:+VerifyAlignVector`:
`./java -XX:CompileCommand=compileonly,Test::test* -XX:+TraceSuperWord -Xcomp -XX:+PrintIdeal -XX:+AlignVector  -XX:+VerifyAlignVector -XX:CompileCommand=print,Test::test* Test.java`


 ;; B32: #	out( B32 B33 ) <- in( B31 B32 ) Loop( B32-B32 inner post of N1028) Freq: 4.49976
  0x00007fbef8bb31ec:   movslq %ebx,%r10
  0x00007fbef8bb31ef:   shl    $0x2,%r10                    ;*iaload {reexecute=0 rethrow=0 return_oop=0}
                                                            ; - Test::test0 at 13 (line 12)
  0x00007fbef8bb31f3:   lea    0x10(%r13,%r10,1),%r8
  0x00007fbef8bb31f8:   lea    0x10(%r11,%r10,1),%r10
  0x00007fbef8bb31fd:   test   $0x7,%r8b
  0x00007fbef8bb3201:   je     0x00007fbef8bb3217
  0x00007fbef8bb3203:   movabs $0x7fbf08c15fc8,%rdi         ;   {external_word}
  0x00007fbef8bb320d:   and    $0xfffffffffffffff0,%rsp
  0x00007fbef8bb3211:   callq  0x00007fbf085d3162           ;   {runtime_call MacroAssembler::debug64(char*, long, long*)}
  0x00007fbef8bb3216:   hlt    
  0x00007fbef8bb3217:   vmovdqu32 (%r8),%zmm0
  0x00007fbef8bb321d:   test   $0x7,%r10b
  0x00007fbef8bb3221:   je     0x00007fbef8bb3237
  0x00007fbef8bb3223:   movabs $0x7fbf08c15fc8,%rdi         ;   {external_word}
  0x00007fbef8bb322d:   and    $0xfffffffffffffff0,%rsp
  0x00007fbef8bb3231:   callq  0x00007fbf085d3162           ;   {runtime_call MacroAssembler::debug64(char*, long, long*)}
  0x00007fbef8bb3236:   hlt    
  0x00007fbef8bb3237:   vmovdqu32 %zmm0,(%r10)              ;*iastore {reexecute=0 rethrow=0 return_oop=0}
                                                            ; - Test::test0 at 14 (line 12)
  0x00007fbef8bb323d:   add    $0x10,%ebx                   ;*iinc {reexecute=0 rethrow=0 return_oop=0}
                                                            ; - Test::test0 at 15 (line 11)
  0x00007fbef8bb3240:   cmp    %r9d,%ebx
  0x00007fbef8bb3243:   jl     0x00007fbef8bb31ec           ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
                                                            ; - Test::test0 at 6 (line 11)


With `-XX:-VerifyAlignVector`:
`./java -XX:CompileCommand=compileonly,Test::test* -XX:+TraceSuperWord -Xcomp -XX:+PrintIdeal -XX:+AlignVector  -XX:-VerifyAlignVector -XX:CompileCommand=print,Test::test* Test.java`


 ;; B30: #	out( B30 B31 ) <- in( B29 B30 ) Loop( B30-B30 inner post of N1028) Freq: 4.49976
  0x00007f90e4bb2ab8:   vmovdqu32 0x10(%rbx,%r13,4),%zmm0
  0x00007f90e4bb2ac3:   vmovdqu32 %zmm0,0x10(%rcx,%r13,4)   ;*iastore {reexecute=0 rethrow=0 return_oop=0}
                                                            ; - Test::test0 at 14 (line 12)
  0x00007f90e4bb2ace:   add    $0x10,%r13d                  ;*iinc {reexecute=0 rethrow=0 return_oop=0}
                                                            ; - Test::test0 at 15 (line 11)
  0x00007f90e4bb2ad2:   cmp    %r11d,%r13d
  0x00007f90e4bb2ad5:   jl     0x00007f90e4bb2ab8           ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
                                                            ; - Test::test0 at 6 (line 11)

-------------

PR Review Comment: https://git.openjdk.org/jdk/pull/14785#discussion_r1442616543


More information about the hotspot-compiler-dev mailing list