RFR: 8362193: Re-work MacOS/AArch64 SpinPause to handle SB [v2]

Aleksey Shipilev shade at openjdk.org
Mon Jul 21 16:13:33 UTC 2025


On Mon, 21 Jul 2025 13:48:14 GMT, Evgeny Astigeevich <eastigeevich at openjdk.org> wrote:

>> I agree with you. I proposed to use the switch when JDK-8321371 was being reviewed: https://github.com/openjdk/jdk/pull/16994#issuecomment-1865147655
>> 
>> Frederick (@fbredber) wanted to avoid branches: https://github.com/openjdk/jdk/pull/16994#issuecomment-1865955740
>
> The switch-based version is committed: https://github.com/openjdk/jdk/pull/26387/commits/e984fdeb10cc91a04860cbdbe45aacb05c4ffd3e

OK, so this inline assembly was to optimize `SpinPause`, since it sits in hot loop. Have you looked at disassembly for `SpinPause` before/after? On my M1, I see this:


% lldb -o "disassemble -n SpinPause" -o quit -- build/macosx-aarch64-server-release/images/jdk/lib/server/libjvm.dylib

# Before
libjvm.dylib`::SpinPause():
libjvm.dylib[0x89f0d4] <+0>:  stp    x29, x30, [sp, #-0x10]!
libjvm.dylib[0x89f0d8] <+4>:  mov    x29, sp
libjvm.dylib[0x89f0dc] <+8>:  adrp   x8, 1409
libjvm.dylib[0x89f0e0] <+12>: add    x8, x8, #0x80 ; VM_Version::_spin_wait
libjvm.dylib[0x89f0e4] <+16>: ldrsw  x8, [x8]
libjvm.dylib[0x89f0e8] <+20>: lsl    x8, x8, #3
libjvm.dylib[0x89f0ec] <+24>: adr    x9, 0x89f100 ; <+44> at os_bsd_aarch64.cpp:545:5
libjvm.dylib[0x89f0f0] <+28>: add    x9, x9, x8
libjvm.dylib[0x89f0f4] <+32>: br     x9
libjvm.dylib[0x89f0f8] <+36>: b      0x89f114       ; <+64> at os_bsd_aarch64.cpp:561:5
libjvm.dylib[0x89f0fc] <+40>: nop    
libjvm.dylib[0x89f100] <+44>: nop    
libjvm.dylib[0x89f104] <+48>: b      0x89f114       ; <+64> at os_bsd_aarch64.cpp:561:5
libjvm.dylib[0x89f108] <+52>: isb    
libjvm.dylib[0x89f10c] <+56>: b      0x89f114       ; <+64> at os_bsd_aarch64.cpp:561:5
libjvm.dylib[0x89f110] <+60>: yield  
libjvm.dylib[0x89f114] <+64>: mov    w0, #0x1 ; =1 
libjvm.dylib[0x89f118] <+68>: ldp    x29, x30, [sp], #0x10
libjvm.dylib[0x89f11c] <+72>: ret  

# After
libjvm.dylib`::SpinPause():
libjvm.dylib[0x89f074] <+0>:   stp    x29, x30, [sp, #-0x10]!
libjvm.dylib[0x89f078] <+4>:   mov    x29, sp
libjvm.dylib[0x89f07c] <+8>:   adrp   x8, 1409
libjvm.dylib[0x89f080] <+12>:  add    x8, x8, #0x80 ; VM_Version::_spin_wait
libjvm.dylib[0x89f084] <+16>:  ldr    w8, [x8]
libjvm.dylib[0x89f088] <+20>:  add    w8, w8, #0x1
libjvm.dylib[0x89f08c] <+24>:  cmp    w8, #0x4
libjvm.dylib[0x89f090] <+28>:  b.hi   0x89f0ec       ; <+120> at os_bsd_aarch64.cpp:551:7
libjvm.dylib[0x89f094] <+32>:  adrp   x9, 0
libjvm.dylib[0x89f098] <+36>:  add    x9, x9, #0xfc ; ___lldb_unnamed_symbol66913
libjvm.dylib[0x89f09c] <+40>:  adr    x10, 0x89f09c ; <+40> at os_bsd_aarch64.cpp
libjvm.dylib[0x89f0a0] <+44>:  ldrsw  x11, [x9, x8, lsl #2]
libjvm.dylib[0x89f0a4] <+48>:  add    x10, x10, x11
libjvm.dylib[0x89f0a8] <+52>:  br     x10
libjvm.dylib[0x89f0ac] <+56>:  nop    
libjvm.dylib[0x89f0b0] <+60>:  mov    w0, #0x1 ; =1 
libjvm.dylib[0x89f0b4] <+64>:  ldp    x29, x30, [sp], #0x10
libjvm.dylib[0x89f0b8] <+68>:  ret    
libjvm.dylib[0x89f0bc] <+72>:  isb    
libjvm.dylib[0x89f0c0] <+76>:  mov    w0, #0x1 ; =1 
libjvm.dylib[0x89f0c4] <+80>:  ldp    x29, x30, [sp], #0x10
libjvm.dylib[0x89f0c8] <+84>:  ret    
libjvm.dylib[0x89f0cc] <+88>:  yield  
libjvm.dylib[0x89f0d0] <+92>:  mov    w0, #0x1 ; =1 
libjvm.dylib[0x89f0d4] <+96>:  ldp    x29, x30, [sp], #0x10
libjvm.dylib[0x89f0d8] <+100>: ret    
libjvm.dylib[0x89f0dc] <+104>: sb     
libjvm.dylib[0x89f0e0] <+108>: mov    w0, #0x1 ; =1 
libjvm.dylib[0x89f0e4] <+112>: ldp    x29, x30, [sp], #0x10
libjvm.dylib[0x89f0e8] <+116>: ret    
libjvm.dylib[0x89f0ec] <+120>: adrp   x0, 1063
libjvm.dylib[0x89f0f0] <+124>: add    x0, x0, #0xe2a ; "src/hotspot/os_cpu/bsd_aarch64/os_bsd_aarch64.cpp"
libjvm.dylib[0x89f0f4] <+128>: mov    w1, #0x227 ; =551 
libjvm.dylib[0x89f0f8] <+132>: bl     0x311f84       ; report_should_not_reach_here at debug.cpp:247


So I think switch is fairly well compiled. On first glance, it generates more code by duplicating the epilog for every case, but I think that is a bit cleaner than trying to do branch-overs. It generates marginally better code if you place `case` in enum order, and do `should_not_reach` here branch only for debug builds:


#ifdef ASSERT
    default:
      ShouldNotReachHere();
#endif

-------------

PR Review Comment: https://git.openjdk.org/jdk/pull/26387#discussion_r2219649388


More information about the hotspot-dev mailing list