One other thing: this exercise has shown that in many cases we trash scratch registers in places where it really doesn't matter, and we'd be much better off rewriting them not to do so. This makes push_call_clobbered_registers() something that can safely be used everywhere. But I'm holding off any of this because I want the first patch to be, if at all possible, neutral with regard to code generated. diff -r 33f9271b3167 src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp Mon Nov 04 13:13:34 2019 -0500 +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp Wed Nov 06 08:36:08 2019 -0500 @@ -2624,15 +2624,17 @@ int step = 4 * wordSize; push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp); sub(sp, sp, step); - mov(rscratch1, -step); + mov(r0, -step); // Push v0-v7, v16-v31. for (int i = 31; i>= 4; i -= 4) { if (i <= v7->encoding() || i >= v16->encoding()) st1(as_FloatRegister(i-3), as_FloatRegister(i-2), as_FloatRegister(i-1), - as_FloatRegister(i), T1D, Address(post(sp, rscratch1))); + as_FloatRegister(i), T1D, Address(post(sp, r0))); } st1(as_FloatRegister(0), as_FloatRegister(1), as_FloatRegister(2), as_FloatRegister(3), T1D, Address(sp)); + // Reload r0 from where it was saved before pushing v0-v7, v16-v31. + ldr(r0, Address(sp, (8 + 16) * wordSize)); } -- Andrew Haley (he/him) Java Platform Lead Engineer Red Hat UK Ltd. <https://www.redhat.com> https://keybase.io/andrewhaley EAC8 43EB D3EF DB98 CC77 2FAD A5CD 6035 332F A671