RFR: 8363620: AArch64: reimplement emit_static_call_stub()
Andrew Haley
aph at openjdk.org
Sat Aug 9 19:03:10 UTC 2025
On Tue, 5 Aug 2025 10:30:13 GMT, Fei Gao <fgao at openjdk.org> wrote:
> In the existing implementation, the static call stub typically emits a sequence like:
> `isb; movk; movz; movz; movk; movz; movz; br`.
>
> This patch reimplements it using a more compact and patch-friendly sequence:
>
> ldr x12, Label_data
> ldr x8, Label_entry
> br x8
> Label_data:
> 0x00000000
> 0x00000000
> Label_entry:
> 0x00000000
> 0x00000000
>
> The new approach places the target addresses adjacent to the code and loads them dynamically. This allows us to update the call target by modifying only the data in memory, without changing any instructions. This avoids the need for I-cache flushes or issuing an `isb`[1], which are both relatively expensive operations.
>
> While emitting direct branches in static stubs for small code caches can save 2 instructions compared to the new implementation, modifying those branches still requires I-cache flushes or an `isb`. This patch unifies the code generation by emitting the same static stubs for both small and large code caches.
>
> A microbenchmark (StaticCallStub.java) demonstrates a performance uplift of approximately 43%.
>
>
> Benchmark (length) Mode Cnt Master Patch Units
> StaticCallStubFar.callCompiled 1000 avgt 5 39.346 22.474 us/op
> StaticCallStubFar.callCompiled 10000 avgt 5 390.05 218.478 us/op
> StaticCallStubFar.callCompiled 100000 avgt 5 3869.264 2174.001 us/op
> StaticCallStubNear.callCompiled 1000 avgt 5 39.093 22.582 us/op
> StaticCallStubNear.callCompiled 10000 avgt 5 387.319 217.398 us/op
> StaticCallStubNear.callCompiled 100000 avgt 5 3855.825 2206.923 us/op
>
>
> All tests in Tier1 to Tier3, under both release and debug builds, have passed.
>
> [1] https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/caches-self-modifying-code-working-with-threads
Try this. It might be enough to rescue this PR.
diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp
index 12b941fc4f7..29853ed4a10 100644
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp
@@ -68,8 +68,8 @@ friend class ArrayCopyStub;
enum {
// call stub: CompiledDirectCall::to_interp_stub_size() +
- // CompiledDirectCall::to_trampoline_stub_size()
- _call_stub_size = 13 * NativeInstruction::instruction_size,
+ // CompiledDirectCall::to_trampoline_stub_size() + alignment nop
+ _call_stub_size = 15 * NativeInstruction::instruction_size,
_exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175),
_deopt_handler_size = 7 * NativeInstruction::instruction_size
};
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
index 4285524514b..63577a55809 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
@@ -983,22 +983,29 @@ void MacroAssembler::emit_static_call_stub() {
// Jump to the entry point of the c2i stub.
const int stub_start_offset = offset();
Label far_jump_metadata, far_jump_entry;
- ldr(rmethod, far_jump_metadata);
+ {
+ Label retry; bind(retry);
+ adr(rmethod, far_jump_metadata);
+ ldar(rmethod, (rmethod));
+ cbz(rmethod, retry);
+ }
ldr(rscratch1, far_jump_entry);
br(rscratch1);
+ nop();
bind(far_jump_metadata);
- assert(offset() - stub_start_offset == NativeStaticCallStub::far_jump_metadata_offset,
+ assert(offset() - stub_start_offset == NativeStaticCallStub::metadata_offset,
"should be");
+ assert(is_aligned(offset(), BytesPerWord), "offset is misaligned");
emit_int64(0);
bind(far_jump_entry);
- assert(offset() - stub_start_offset == NativeStaticCallStub::far_jump_entrypoint_offset,
+ assert(offset() - stub_start_offset == NativeStaticCallStub::entrypoint_offset,
"should be");
emit_int64(0);
}
int MacroAssembler::static_call_stub_size() {
- // ldr; ldr; br; zero; zero; zero; zero;
- return 7 * NativeInstruction::instruction_size;
+ // adr; ldar; cbz; ldr; br; nop; zero; zero; zero; zero;
+ return 10 * NativeInstruction::instruction_size;
}
void MacroAssembler::c2bool(Register x) {
diff --git a/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp b/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp
index 12a6eb6f7f0..5d33d94f1ae 100644
--- a/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp
@@ -393,22 +393,20 @@ void NativeCall::trampoline_jump(CodeBuffer &cbuf, address dest, JVMCI_TRAPS) {
#ifndef PRODUCT
// Mirror the logic in CompiledDirectCall::verify_mt_safe().
void NativeStaticCallStub::verify_static_stub(const methodHandle& callee, address entry) {
- intptr_t metadata = intptr_at(far_jump_metadata_offset);
- address entrypoint = ptr_at(far_jump_entrypoint_offset);
+ intptr_t metadata = intptr_at(metadata_offset);
+ address entrypoint = ptr_at(entrypoint_offset);
CompiledDirectCall::verify_mt_safe_helper(callee, entry, metadata, entrypoint);
}
#endif
void NativeStaticCallStub::set_metadata_and_destination(intptr_t callee, address entry) {
- set_intptr_at(far_jump_metadata_offset, callee);
- set_ptr_at(far_jump_entrypoint_offset, entry);
- OrderAccess::release();
+ set_ptr_at(entrypoint_offset, entry);
+ Atomic::release_store((intptr_t*)addr_at(metadata_offset), callee);
}
void NativeStaticCallStub::verify_instruction_sequence() {
- if (! (nativeInstruction_at(addr_at(0))->is_ldr_literal() &&
- nativeInstruction_at(addr_at(NativeInstruction::instruction_size))->is_ldr_literal() &&
- nativeInstruction_at(addr_at(NativeInstruction::instruction_size * 2))->is_blr())) {
+ if (! (nativeInstruction_at(addr_at(NativeInstruction::instruction_size * 3))->is_ldr_literal() &&
+ nativeInstruction_at(addr_at(NativeInstruction::instruction_size * 4))->is_blr())) {
fatal("Not expected instructions in static call stub");
}
}
diff --git a/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp b/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
index 17c87bf7c2b..897152f26f1 100644
--- a/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
@@ -461,8 +461,8 @@ class NativeStaticCallStub : public NativeInstruction {
public:
enum AArch64_specific_constants {
- far_jump_metadata_offset = 3 * 4,
- far_jump_entrypoint_offset = 5 * 4
+ metadata_offset = 6 * NativeInstruction::instruction_size,
+ entrypoint_offset = 6 * NativeInstruction::instruction_size + wordSize ,
};
void set_metadata_and_destination(intptr_t callee, address entry);
-------------
PR Comment: https://git.openjdk.org/jdk/pull/26638#issuecomment-3172040556
More information about the hotspot-dev
mailing list