RFR: 8365147: AArch64: Replace DMB + LD + DMB with LDAR for C1 volatile field loads

Andrew Haley aph at openjdk.org
Tue Aug 12 16:36:10 UTC 2025


On Tue, 12 Aug 2025 14:59:40 GMT, Samuel Chee <duke at openjdk.org> wrote:

> Replaces the DMB ISH + LD + DMB ISHLD sequence with LDAR for volatile field loads - for example, AtomicLong::get.
> 
> This is valid, as originally the DMBs were necessary due to the case described here - https://bugs.openjdk.org/browse/JDK-8179954. As in the rare case where the LD can be reordered with an LDAR or STLR from the C2 implementation for stores and loads, these DMBs are required.
> However, acquire/release operations use a sequentially consistent model which does not allow reordering between them. Hence, the LD can be replaced with an LDAR to disallow reordering with a STLR/LDAR and the first DMB can be removed.
> 
> The LDAR has acquire semantics, so it's impossible for memory accesses after to be reordered before; the DMB ISHLD is not required. Therefore, a singular LDAR is sufficient.
> 
> This excludes floats and doubles, as they do not have equivalent load-acquire instructions.

That looks good, nice work.

A little refactoring could make it even better. Try this: untested, but you get the idea.


diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
index 2e35763aa43..3e7a285d5aa 100644
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
@@ -1274,6 +1274,13 @@ class Assembler : public AbstractAssembler {
                          sz, 0b000, ordered);
   }
 
+  void load_store_ordered(Register data, BasicType type, Register addr,
+                          bool is_load) {
+    load_store_exclusive(dummy_reg, data, dummy_reg, addr,
+                         (Assembler::operand_size)exact_log2(type2aelembytes(type)),
+                         is_load ? 0b110: 0b100, 1);
+  }
+
 #define INSN4(NAME, sz, op, o0) /* Four registers */                    \
   void NAME(Register Rs, Register Rt1, Register Rt2, Register Rn) {     \
     guarantee(Rs != Rn && Rs != Rt1 && Rs != Rt2, "unpredictable instruction"); \
diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
index 6ac4f5df166..0246ab61351 100644
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
@@ -938,45 +938,21 @@ void LIR_Assembler::mem2reg_volatile(LIR_Opr src, LIR_Opr dest, BasicType type,
   int null_check_here = code_offset();
 
   __ lea(rscratch1, as_Address(from_addr));
-  switch (type) {
-    case T_BOOLEAN:
-      __ ldarb(dest->as_register(), rscratch1);
-      break;
-    case T_BYTE: // LDAR is unsigned so need to sign-extend for byte
-      __ ldarb(dest->as_register(), rscratch1);
-      __ sxtb(dest->as_register(), dest->as_register());
-      break;
-    case T_CHAR:
-      __ ldarh(dest->as_register(), rscratch1);
-      break;
-    case T_SHORT: // LDAR is unsigned so need to sign-extend for short
-      __ ldarh(dest->as_register(), rscratch1);
-      __ sxth(dest->as_register(), dest->as_register());
-      break;
-    case T_INT:
-      __ ldarw(dest->as_register(), rscratch1);
-      break;
-    case T_ADDRESS:
-      __ ldar(dest->as_register(), rscratch1);
-      break;
-    case T_LONG:
-      __ ldar(dest->as_register_lo(), rscratch1);
-      break;
-    case T_ARRAY:
-    case T_OBJECT:
-      if (UseCompressedOops) {
-        __ ldarw(dest->as_register(), rscratch1);
-      } else {
-        __ ldar(dest->as_register(), rscratch1);
-      }
-      break;
-    case T_METADATA:
-      // We get here to store a method pointer to the stack to pass to
-      // a dtrace runtime call. This can't work on 64 bit with
-      // compressed klass ptrs: T_METADATA can be a compressed klass
-      // ptr or a 64 bit method pointer.
-    default:
-      ShouldNotReachHere();
+  Register dest_reg = (dest->is_single_cpu()
+                       ? dest->as_register() : dest->as_register_lo());
+  __ load_store_ordered(dest_reg, type, rscratch1, /*is_load*/true);
+
+  if (is_signed_subword_type(type)) {
+    switch (type) {
+      case T_BYTE: // LDAR is unsigned so need to sign-extend for byte
+        __ sxtb(dest_reg, dest_reg);
+        break;
+      case T_SHORT: // LDAR is unsigned so need to sign-extend for short
+        __ sxth(dest_reg, dest_reg);
+        break;
+      default:
+        ShouldNotReachHere();
+    }
   }
 
   if (is_reference_type(type)) {

-------------

PR Comment: https://git.openjdk.org/jdk/pull/26748#issuecomment-3180149720


More information about the hotspot-dev mailing list