RFR: 8155795: Optimize Integer/Long.reverse by using reverseBytes

Jaroslav Kameník jaroslav at kamenik.cz
Mon May 9 19:50:51 UTC 2016


Hi guys,

I have looked at generated asm, it does not seem there is problem, only
little different order
of shifts and ands.. Plus, at haswell, times do not differ so much. Here
are different parts:

Slower version:

057     sall    R10, #24
05b     movl    R11, RAX        # spill
05e     sall    R11, #8
062     movl    R8, RAX # spill
065     shrl    R8, #8
069     andl    R11, #16711680  # int
070     andl    R8, #65280      # int
077     shrl    RAX, #24

Faster version:

057     shrl    R10, #24
05b     movl    R11, RAX        # spill
05e     shrl    R11, #8
062     movl    R8, RAX # spill
065     andl    R8, #65280      # int
06c     andl    R11, #65280     # int
073     sall    R8, #8
077     sall    RAX, #24

Full code:

  ORIGINAL - 5,169 ± 0,377  ns/op
  ===============================

000   B1: #     N1 <- BLOCK HEAD IS JUNK   Freq: 1
000     # stack bang (96 bytes)
        pushq   rbp     # Save rbp
        subq    rsp, #16        # Create frame

00c     movl    RAX, RSI        # spill
00e     andl    RAX, #1431655765        # int
014     shrl    RSI, #1
016     sall    RAX, #1
018     andl    RSI, #1431655765        # int
01e     orl     RAX, RSI        # int
020     movl    R10, RAX        # spill
023     shrl    R10, #2
027     andl    RAX, #858993459 # int
02d     andl    R10, #858993459 # int
034     sall    RAX, #2
037     orl     RAX, R10        # int
03a     movl    R10, RAX        # spill
03d     shrl    R10, #4
041     andl    RAX, #252645135 # int
047     andl    R10, #252645135 # int
04e     sall    RAX, #4
051     orl     RAX, R10        # int
054     movl    R10, RAX        # spill
057     shrl    R10, #24
05b     movl    R11, RAX        # spill
05e     shrl    R11, #8
062     movl    R8, RAX # spill
065     andl    R8, #65280      # int
06c     andl    R11, #65280     # int
073     sall    R8, #8
077     sall    RAX, #24
07a     orl     RAX, R8 # int
07d     orl     RAX, R11        # int
080     orl     RAX, R10        # int
083     addq    rsp, 16 # Destroy frame


  USED INTRINSIC - 4,093 ± 0,100  ns/op
  =====================================

000   B1: #     N1 <- BLOCK HEAD IS JUNK   Freq: 1
000     # stack bang (96 bytes)
        pushq   rbp     # Save rbp
        subq    rsp, #16        # Create frame

00c     movl    RAX, RSI        # spill
00e     andl    RAX, #1431655765        # int
014     shrl    RSI, #1
016     sall    RAX, #1
018     andl    RSI, #1431655765        # int
01e     orl     RAX, RSI        # int
020     movl    R10, RAX        # spill
023     shrl    R10, #2
027     andl    RAX, #858993459 # int
02d     andl    R10, #858993459 # int
034     sall    RAX, #2
037     orl     RAX, R10        # int
03a     movl    R10, RAX        # spill
03d     shrl    R10, #4
041     andl    RAX, #252645135 # int
047     andl    R10, #252645135 # int
04e     sall    RAX, #4
051     orl     RAX, R10        # int
054     bswapl  RAX
056     addq    rsp, 16 # Destroy frame


  INTRINSICS DISABLED - 5,173 ± 0,096  ns/op
  ==========================================


000   B1: #     N1 <- BLOCK HEAD IS JUNK   Freq: 1
000     # stack bang (96 bytes)
        pushq   rbp     # Save rbp
        subq    rsp, #16        # Create frame

00c     movl    RAX, RSI        # spill
00e     andl    RAX, #1431655765        # int
014     shrl    RSI, #1
016     sall    RAX, #1
018     andl    RSI, #1431655765        # int
01e     orl     RAX, RSI        # int
020     movl    R10, RAX        # spill
023     shrl    R10, #2
027     andl    RAX, #858993459 # int
02d     andl    R10, #858993459 # int
034     sall    RAX, #2
037     orl     RAX, R10        # int
03a     movl    R10, RAX        # spill
03d     shrl    R10, #4
041     andl    RAX, #252645135 # int
047     andl    R10, #252645135 # int
04e     sall    RAX, #4
051     orl     RAX, R10        # int
054     movl    R10, RAX        # spill
057     sall    R10, #24
05b     movl    R11, RAX        # spill
05e     sall    R11, #8
062     movl    R8, RAX # spill
065     shrl    R8, #8
069     andl    R11, #16711680  # int
070     andl    R8, #65280      # int
077     shrl    RAX, #24
07a     orl     RAX, R8 # int
07d     orl     RAX, R11        # int
080     orl     RAX, R10        # int
083     addq    rsp, 16 # Destroy frame


  INTRINSICS DISABLED 2 - 5,081 ± 0,092  ns/op
  ===========================================

000   B1: #     N1 <- BLOCK HEAD IS JUNK   Freq: 1
000     # stack bang (96 bytes)
        pushq   rbp     # Save rbp
        subq    rsp, #16        # Create frame

00c     movl    RAX, RSI        # spill
00e     andl    RAX, #1431655765        # int
014     shrl    RSI, #1
016     sall    RAX, #1
018     andl    RSI, #1431655765        # int
01e     orl     RAX, RSI        # int
020     movl    R10, RAX        # spill
023     shrl    R10, #2
027     andl    RAX, #858993459 # int
02d     andl    R10, #858993459 # int
034     sall    RAX, #2
037     orl     RAX, R10        # int
03a     movl    R10, RAX        # spill
03d     shrl    R10, #4
041     andl    RAX, #252645135 # int
047     andl    R10, #252645135 # int
04e     sall    RAX, #4
051     orl     RAX, R10        # int
054     movl    R10, RAX        # spill
057     shrl    R10, #24
05b     movl    R11, RAX        # spill
05e     shrl    R11, #8
062     movl    R8, RAX # spill
065     andl    R8, #65280      # int
06c     andl    R11, #65280     # int
073     sall    R8, #8
077     sall    RAX, #24
07a     orl     RAX, R8 # int
07d     orl     RAX, R11        # int
080     orl     RAX, R10        # int
083     addq    rsp, 16 # Destroy frame



Jaroslav



More information about the core-libs-dev mailing list