RFR: 8369296: Add fast class init checks in interpreter for resolving ConstantPool entries for static field [v4]

Martin Doerr mdoerr at openjdk.org
Wed Oct 8 14:55:01 UTC 2025


On Wed, 8 Oct 2025 13:44:25 GMT, Ashutosh Mehra <asmehra at openjdk.org> wrote:

>> This patch adds fast clinit barrier in the interpreter when resolving cp entry for a static field.
>> 
>> Testing: tested x86-64 by running `hotspot_runtime` group
>> Specifically, `runtime/clinit/ClassInitBarrier.java` fails if the block for adding `clinit_barrier` is commented out in `TemplateTable::resolve_cache_and_index_for_field`
>
> Ashutosh Mehra has updated the pull request incrementally with one additional commit since the last revision:
> 
>   Add support for riscv
>   
>   Signed-off-by: Ashutosh Mehra <asmehra at redhat.com>

Thanks for the ping! PPC64 implementation:

diff --git a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
index 7431f77aeff..373dc24a62f 100644
--- a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
+++ b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
@@ -2179,17 +2179,11 @@ void TemplateTable::_return(TosState state) {
 //   - Rscratch
 void TemplateTable::resolve_cache_and_index_for_method(int byte_no, Register Rcache, Register Rscratch) {
   assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+
   Label Lresolved, Ldone, L_clinit_barrier_slow;
   Register Rindex = Rscratch;
 
   Bytecodes::Code code = bytecode();
-  switch (code) {
-    case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
-    case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
-    default:
-      break;
-  }
-
   const int bytecode_offset = (byte_no == f1_byte) ? in_bytes(ResolvedMethodEntry::bytecode1_offset())
                                                    : in_bytes(ResolvedMethodEntry::bytecode2_offset());
   __ load_method_entry(Rcache, Rindex);
@@ -2201,10 +2195,9 @@ void TemplateTable::resolve_cache_and_index_for_method(int byte_no, Register Rca
 
   // Class initialization barrier slow path lands here as well.
   __ bind(L_clinit_barrier_slow);
-
   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
   __ li(R4_ARG2, code);
-  __ call_VM(noreg, entry, R4_ARG2, true);
+  __ call_VM(noreg, entry, R4_ARG2);
 
   // Update registers with resolved info.
   __ load_method_entry(Rcache, Rindex);
@@ -2226,12 +2219,10 @@ void TemplateTable::resolve_cache_and_index_for_method(int byte_no, Register Rca
   __ bind(Ldone);
 }
 
-void TemplateTable::resolve_cache_and_index_for_field(int byte_no,
-                                            Register Rcache,
-                                            Register index) {
+void TemplateTable::resolve_cache_and_index_for_field(int byte_no, Register Rcache, Register index) {
   assert_different_registers(Rcache, index);
 
-  Label resolved;
+  Label Lresolved, Ldone, L_clinit_barrier_slow;
 
   Bytecodes::Code code = bytecode();
   switch (code) {
@@ -2246,19 +2237,34 @@ void TemplateTable::resolve_cache_and_index_for_field(int byte_no,
                                          : in_bytes(ResolvedFieldEntry::put_code_offset());
   __ lbz(R0, code_offset, Rcache);
   __ cmpwi(CR0, R0, (int)code); // have we resolved this bytecode?
-  __ beq(CR0, resolved);
+  __ beq(CR0, Lresolved);
 
   // resolve first time through
+  // Class initialization barrier slow path lands here as well.
+  __ bind(L_clinit_barrier_slow);
   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
-  __ li(R4_ARG2, (int)code);
+  __ li(R4_ARG2, code);
   __ call_VM(noreg, entry, R4_ARG2);
 
   // Update registers with resolved info
   __ load_field_entry(Rcache, index);
-  __ bind(resolved);
+  __ b(Ldone);
 
-  // Use acquire semantics for the bytecode (see ResolvedFieldEntry::fill_in()).
+  __ bind(Lresolved);
   __ isync(); // Order load wrt. succeeding loads.
+
+  if (VM_Version::supports_fast_class_init_checks() &&
+      (bytecode() == Bytecodes::_getstatic || bytecode() == Bytecodes::_putstatic)) {
+    const Register field_holder = R4_ARG2;
+
+    // InterpreterRuntime::resolve_get_put sets field_holder and finally release-stores put_code.
+    // We have seen the released put_code above and will read the corresponding field_holder and init_state
+    // (ordered by compare-branch-isync).
+    __ ld(field_holder, ResolvedFieldEntry::field_holder_offset(), Rcache);
+    __ clinit_barrier(field_holder, R16_thread, /* L_fast_path=*/nullptr , &L_clinit_barrier_slow);
+  }
+
+  __ bind(Ldone);
 }
 
 void TemplateTable::load_resolved_field_entry(Register obj,


I've made `resolve_cache_and_index_for_method` and `resolve_cache_and_index_for_field` more uniform and added a comment.

I've seen the discussion about the reexecution of the clinit barrier. My proposal avoids it by jumping over it. Is that better or at least less confusing? If nobody likes it and we want to have the platforms more similar, I can remove it.

-------------

PR Comment: https://git.openjdk.org/jdk/pull/27676#issuecomment-3381944631


More information about the hotspot-dev mailing list