RFR: 8310190: C2 SuperWord: AlignVector is broken, generates misaligned packs [v58]

Emanuel Peter epeter at openjdk.org
Fri Jan 5 08:51:37 UTC 2024


On Fri, 5 Jan 2024 08:35:46 GMT, Emanuel Peter <epeter at openjdk.org> wrote:

>> Also why your assembler example have tested alignment twice for the same address? May be because the same array's element for load and store?:
>> 
>>   0x00007f83c8bb2f6d:   mov    %r10,%r8
>>   0x00007f83c8bb2f70:   test   $0x7,%r8b
>>   0x00007f83c8bb2f74:   je     0x00007f83c8bb2f8a
>> ...
>>   0x00007f83c8bb2f8a:   test   $0x7,%r10b
>>   0x00007f83c8bb2f8e:   je     0x00007f83c8bb2fa4
>> 
>> No need to optimize I think since it is only for debugging.
>
> @vnkozlov 
>> Can you show assembler code for simple load and store instructions (move data from one array to another)?
> 
> Here the example with simple load -> store with two different arrays:
> 
> public class Test {
>     static int RANGE = 1024*64;
> 
>     public static void main(String[] strArr) {
>         int a[] = new int[RANGE];
>         int b[] = new int[RANGE];
>         test0(a, b);
>     }
> 
>     static void test0(int[] a, int[] b) {
>         for (int i = 0; i < RANGE; i++) {
>             a[i] = b[i];
>         }
>     }
> }
> 
> 
> With `-XX:+VerifyAlignVector`:
> `./java -XX:CompileCommand=compileonly,Test::test* -XX:+TraceSuperWord -Xcomp -XX:+PrintIdeal -XX:+AlignVector  -XX:+VerifyAlignVector -XX:CompileCommand=print,Test::test* Test.java`
> 
> 
>  ;; B32: #	out( B32 B33 ) <- in( B31 B32 ) Loop( B32-B32 inner post of N1028) Freq: 4.49976
>   0x00007fbef8bb31ec:   movslq %ebx,%r10
>   0x00007fbef8bb31ef:   shl    $0x2,%r10                    ;*iaload {reexecute=0 rethrow=0 return_oop=0}
>                                                             ; - Test::test0 at 13 (line 12)
>   0x00007fbef8bb31f3:   lea    0x10(%r13,%r10,1),%r8
>   0x00007fbef8bb31f8:   lea    0x10(%r11,%r10,1),%r10
>   0x00007fbef8bb31fd:   test   $0x7,%r8b
>   0x00007fbef8bb3201:   je     0x00007fbef8bb3217
>   0x00007fbef8bb3203:   movabs $0x7fbf08c15fc8,%rdi         ;   {external_word}
>   0x00007fbef8bb320d:   and    $0xfffffffffffffff0,%rsp
>   0x00007fbef8bb3211:   callq  0x00007fbf085d3162           ;   {runtime_call MacroAssembler::debug64(char*, long, long*)}
>   0x00007fbef8bb3216:   hlt    
>   0x00007fbef8bb3217:   vmovdqu32 (%r8),%zmm0
>   0x00007fbef8bb321d:   test   $0x7,%r10b
>   0x00007fbef8bb3221:   je     0x00007fbef8bb3237
>   0x00007fbef8bb3223:   movabs $0x7fbf08c15fc8,%rdi         ;   {external_word}
>   0x00007fbef8bb322d:   and    $0xfffffffffffffff0,%rsp
>   0x00007fbef8bb3231:   callq  0x00007fbf085d3162           ;   {runtime_call MacroAssembler::debug64(char*, long, long*)}
>   0x00007fbef8bb3236:   hlt    
>   0x00007fbef8bb3237:   vmovdqu32 %zmm0,(%r10)              ;*iastore {reexecute=0 rethrow=0 return_oop=0}
>                                                             ; - Test::test0 at 14 (line 12)
>   0x00007fbef8bb323d:   add    $0x10,%ebx                   ;*iinc {reexecute=0 rethrow=0 return_oop=0}
>                                                             ; - Test::test0 at 15 (line 11)
>   0x00007fbef8bb3240:   cmp    %r9d,%ebx
>   0x00007fbef8bb3243:   jl     0x00007fbef8bb31ec           ;*if_icmpge {reexecute=0 rethrow...

> My concern is that LoadV and StoreV are defined only with memory input


// Indirect Memory Operand
operand indirect(any_RegP reg)
%{
  constraint(ALLOC_IN_RC(ptr_reg));
  match(reg);

  format %{ "[$reg]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0x4);
    scale(0x0);
    disp(0x0);
  %}
%}



opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
               indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
               indCompressedOopOffset,
               indirectNarrow, indOffset8Narrow, indOffset32Narrow,
               indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
               indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);

It seems that `memory` summarizes many different patterns. One of them is the `indirect` one, which simply loads the address from a register. In our case this address was computed by a `lea`, then used in the alignment verification, and then passed on as `memory` to load / store.

-------------

PR Review Comment: https://git.openjdk.org/jdk/pull/14785#discussion_r1442625027


More information about the hotspot-compiler-dev mailing list