RFR: 8155795: Optimize Integer/Long.reverse by using reverseBytes
Claes Redestad
claes.redestad at oracle.com
Tue May 10 11:18:59 UTC 2016
Hi,
I think this shows the compiler isn't doing anything really strange;
it'd be fun to know if it's
the different instruction order or the use of one less constant that
shows an effect, but it's
not really significant either way.
I think the latest patch is fine as it is and will push it soon.
Thanks!
/Claes
On 2016-05-09 21:50, Jaroslav Kameník wrote:
> Hi guys,
>
> I have looked at generated asm, it does not seem there is problem,
> only little different order
> of shifts and ands.. Plus, at haswell, times do not differ so much.
> Here are different parts:
>
> Slower version:
>
> 057 sall R10, #24
> 05b movl R11, RAX # spill
> 05e sall R11, #8
> 062 movl R8, RAX # spill
> 065 shrl R8, #8
> 069 andl R11, #16711680 # int
> 070 andl R8, #65280 # int
> 077 shrl RAX, #24
>
> Faster version:
>
> 057 shrl R10, #24
> 05b movl R11, RAX # spill
> 05e shrl R11, #8
> 062 movl R8, RAX # spill
> 065 andl R8, #65280 # int
> 06c andl R11, #65280 # int
> 073 sall R8, #8
> 077 sall RAX, #24
>
> Full code:
>
> ORIGINAL - 5,169 ± 0,377 ns/op
> ===============================
>
> 000 B1: # N1 <- BLOCK HEAD IS JUNK Freq: 1
> 000 # stack bang (96 bytes)
> pushq rbp # Save rbp
> subq rsp, #16 # Create frame
>
> 00c movl RAX, RSI # spill
> 00e andl RAX, #1431655765 # int
> 014 shrl RSI, #1
> 016 sall RAX, #1
> 018 andl RSI, #1431655765 # int
> 01e orl RAX, RSI # int
> 020 movl R10, RAX # spill
> 023 shrl R10, #2
> 027 andl RAX, #858993459 # int
> 02d andl R10, #858993459 # int
> 034 sall RAX, #2
> 037 orl RAX, R10 # int
> 03a movl R10, RAX # spill
> 03d shrl R10, #4
> 041 andl RAX, #252645135 # int
> 047 andl R10, #252645135 # int
> 04e sall RAX, #4
> 051 orl RAX, R10 # int
> 054 movl R10, RAX # spill
> 057 shrl R10, #24
> 05b movl R11, RAX # spill
> 05e shrl R11, #8
> 062 movl R8, RAX # spill
> 065 andl R8, #65280 # int
> 06c andl R11, #65280 # int
> 073 sall R8, #8
> 077 sall RAX, #24
> 07a orl RAX, R8 # int
> 07d orl RAX, R11 # int
> 080 orl RAX, R10 # int
> 083 addq rsp, 16 # Destroy frame
>
>
> USED INTRINSIC - 4,093 ± 0,100 ns/op
> =====================================
>
> 000 B1: # N1 <- BLOCK HEAD IS JUNK Freq: 1
> 000 # stack bang (96 bytes)
> pushq rbp # Save rbp
> subq rsp, #16 # Create frame
>
> 00c movl RAX, RSI # spill
> 00e andl RAX, #1431655765 # int
> 014 shrl RSI, #1
> 016 sall RAX, #1
> 018 andl RSI, #1431655765 # int
> 01e orl RAX, RSI # int
> 020 movl R10, RAX # spill
> 023 shrl R10, #2
> 027 andl RAX, #858993459 # int
> 02d andl R10, #858993459 # int
> 034 sall RAX, #2
> 037 orl RAX, R10 # int
> 03a movl R10, RAX # spill
> 03d shrl R10, #4
> 041 andl RAX, #252645135 # int
> 047 andl R10, #252645135 # int
> 04e sall RAX, #4
> 051 orl RAX, R10 # int
> 054 bswapl RAX
> 056 addq rsp, 16 # Destroy frame
>
>
> INTRINSICS DISABLED - 5,173 ± 0,096 ns/op
> ==========================================
>
>
> 000 B1: # N1 <- BLOCK HEAD IS JUNK Freq: 1
> 000 # stack bang (96 bytes)
> pushq rbp # Save rbp
> subq rsp, #16 # Create frame
>
> 00c movl RAX, RSI # spill
> 00e andl RAX, #1431655765 # int
> 014 shrl RSI, #1
> 016 sall RAX, #1
> 018 andl RSI, #1431655765 # int
> 01e orl RAX, RSI # int
> 020 movl R10, RAX # spill
> 023 shrl R10, #2
> 027 andl RAX, #858993459 # int
> 02d andl R10, #858993459 # int
> 034 sall RAX, #2
> 037 orl RAX, R10 # int
> 03a movl R10, RAX # spill
> 03d shrl R10, #4
> 041 andl RAX, #252645135 # int
> 047 andl R10, #252645135 # int
> 04e sall RAX, #4
> 051 orl RAX, R10 # int
> 054 movl R10, RAX # spill
> 057 sall R10, #24
> 05b movl R11, RAX # spill
> 05e sall R11, #8
> 062 movl R8, RAX # spill
> 065 shrl R8, #8
> 069 andl R11, #16711680 # int
> 070 andl R8, #65280 # int
> 077 shrl RAX, #24
> 07a orl RAX, R8 # int
> 07d orl RAX, R11 # int
> 080 orl RAX, R10 # int
> 083 addq rsp, 16 # Destroy frame
>
>
> INTRINSICS DISABLED 2 - 5,081 ± 0,092 ns/op
> ===========================================
>
> 000 B1: # N1 <- BLOCK HEAD IS JUNK Freq: 1
> 000 # stack bang (96 bytes)
> pushq rbp # Save rbp
> subq rsp, #16 # Create frame
>
> 00c movl RAX, RSI # spill
> 00e andl RAX, #1431655765 # int
> 014 shrl RSI, #1
> 016 sall RAX, #1
> 018 andl RSI, #1431655765 # int
> 01e orl RAX, RSI # int
> 020 movl R10, RAX # spill
> 023 shrl R10, #2
> 027 andl RAX, #858993459 # int
> 02d andl R10, #858993459 # int
> 034 sall RAX, #2
> 037 orl RAX, R10 # int
> 03a movl R10, RAX # spill
> 03d shrl R10, #4
> 041 andl RAX, #252645135 # int
> 047 andl R10, #252645135 # int
> 04e sall RAX, #4
> 051 orl RAX, R10 # int
> 054 movl R10, RAX # spill
> 057 shrl R10, #24
> 05b movl R11, RAX # spill
> 05e shrl R11, #8
> 062 movl R8, RAX # spill
> 065 andl R8, #65280 # int
> 06c andl R11, #65280 # int
> 073 sall R8, #8
> 077 sall RAX, #24
> 07a orl RAX, R8 # int
> 07d orl RAX, R11 # int
> 080 orl RAX, R10 # int
> 083 addq rsp, 16 # Destroy frame
>
>
> Jaroslav
More information about the core-libs-dev
mailing list