RFR: 8155795: Optimize Integer/Long.reverse by using reverseBytes
Jaroslav Kameník
jaroslav at kamenik.cz
Mon May 9 19:50:51 UTC 2016
Hi guys,
I have looked at generated asm, it does not seem there is problem, only
little different order
of shifts and ands.. Plus, at haswell, times do not differ so much. Here
are different parts:
Slower version:
057 sall R10, #24
05b movl R11, RAX # spill
05e sall R11, #8
062 movl R8, RAX # spill
065 shrl R8, #8
069 andl R11, #16711680 # int
070 andl R8, #65280 # int
077 shrl RAX, #24
Faster version:
057 shrl R10, #24
05b movl R11, RAX # spill
05e shrl R11, #8
062 movl R8, RAX # spill
065 andl R8, #65280 # int
06c andl R11, #65280 # int
073 sall R8, #8
077 sall RAX, #24
Full code:
ORIGINAL - 5,169 ± 0,377 ns/op
===============================
000 B1: # N1 <- BLOCK HEAD IS JUNK Freq: 1
000 # stack bang (96 bytes)
pushq rbp # Save rbp
subq rsp, #16 # Create frame
00c movl RAX, RSI # spill
00e andl RAX, #1431655765 # int
014 shrl RSI, #1
016 sall RAX, #1
018 andl RSI, #1431655765 # int
01e orl RAX, RSI # int
020 movl R10, RAX # spill
023 shrl R10, #2
027 andl RAX, #858993459 # int
02d andl R10, #858993459 # int
034 sall RAX, #2
037 orl RAX, R10 # int
03a movl R10, RAX # spill
03d shrl R10, #4
041 andl RAX, #252645135 # int
047 andl R10, #252645135 # int
04e sall RAX, #4
051 orl RAX, R10 # int
054 movl R10, RAX # spill
057 shrl R10, #24
05b movl R11, RAX # spill
05e shrl R11, #8
062 movl R8, RAX # spill
065 andl R8, #65280 # int
06c andl R11, #65280 # int
073 sall R8, #8
077 sall RAX, #24
07a orl RAX, R8 # int
07d orl RAX, R11 # int
080 orl RAX, R10 # int
083 addq rsp, 16 # Destroy frame
USED INTRINSIC - 4,093 ± 0,100 ns/op
=====================================
000 B1: # N1 <- BLOCK HEAD IS JUNK Freq: 1
000 # stack bang (96 bytes)
pushq rbp # Save rbp
subq rsp, #16 # Create frame
00c movl RAX, RSI # spill
00e andl RAX, #1431655765 # int
014 shrl RSI, #1
016 sall RAX, #1
018 andl RSI, #1431655765 # int
01e orl RAX, RSI # int
020 movl R10, RAX # spill
023 shrl R10, #2
027 andl RAX, #858993459 # int
02d andl R10, #858993459 # int
034 sall RAX, #2
037 orl RAX, R10 # int
03a movl R10, RAX # spill
03d shrl R10, #4
041 andl RAX, #252645135 # int
047 andl R10, #252645135 # int
04e sall RAX, #4
051 orl RAX, R10 # int
054 bswapl RAX
056 addq rsp, 16 # Destroy frame
INTRINSICS DISABLED - 5,173 ± 0,096 ns/op
==========================================
000 B1: # N1 <- BLOCK HEAD IS JUNK Freq: 1
000 # stack bang (96 bytes)
pushq rbp # Save rbp
subq rsp, #16 # Create frame
00c movl RAX, RSI # spill
00e andl RAX, #1431655765 # int
014 shrl RSI, #1
016 sall RAX, #1
018 andl RSI, #1431655765 # int
01e orl RAX, RSI # int
020 movl R10, RAX # spill
023 shrl R10, #2
027 andl RAX, #858993459 # int
02d andl R10, #858993459 # int
034 sall RAX, #2
037 orl RAX, R10 # int
03a movl R10, RAX # spill
03d shrl R10, #4
041 andl RAX, #252645135 # int
047 andl R10, #252645135 # int
04e sall RAX, #4
051 orl RAX, R10 # int
054 movl R10, RAX # spill
057 sall R10, #24
05b movl R11, RAX # spill
05e sall R11, #8
062 movl R8, RAX # spill
065 shrl R8, #8
069 andl R11, #16711680 # int
070 andl R8, #65280 # int
077 shrl RAX, #24
07a orl RAX, R8 # int
07d orl RAX, R11 # int
080 orl RAX, R10 # int
083 addq rsp, 16 # Destroy frame
INTRINSICS DISABLED 2 - 5,081 ± 0,092 ns/op
===========================================
000 B1: # N1 <- BLOCK HEAD IS JUNK Freq: 1
000 # stack bang (96 bytes)
pushq rbp # Save rbp
subq rsp, #16 # Create frame
00c movl RAX, RSI # spill
00e andl RAX, #1431655765 # int
014 shrl RSI, #1
016 sall RAX, #1
018 andl RSI, #1431655765 # int
01e orl RAX, RSI # int
020 movl R10, RAX # spill
023 shrl R10, #2
027 andl RAX, #858993459 # int
02d andl R10, #858993459 # int
034 sall RAX, #2
037 orl RAX, R10 # int
03a movl R10, RAX # spill
03d shrl R10, #4
041 andl RAX, #252645135 # int
047 andl R10, #252645135 # int
04e sall RAX, #4
051 orl RAX, R10 # int
054 movl R10, RAX # spill
057 shrl R10, #24
05b movl R11, RAX # spill
05e shrl R11, #8
062 movl R8, RAX # spill
065 andl R8, #65280 # int
06c andl R11, #65280 # int
073 sall R8, #8
077 sall RAX, #24
07a orl RAX, R8 # int
07d orl RAX, R11 # int
080 orl RAX, R10 # int
083 addq rsp, 16 # Destroy frame
Jaroslav
More information about the core-libs-dev
mailing list