[aarch64-port-dev ] Allow cmpxchg to fall through when it fails

Andrew Haley aph at redhat.com
Wed Oct 9 05:53:15 PDT 2013


This is a small optimization.

Rather than branching when a cmpxchg fails, we can now optionally fall
through.  This saves a branch in the failure case.

Andrew.



# HG changeset patch
# User aph
# Date 1381322616 -3600
# Node ID 10b833f09e6a617fbc95fb3ca648bf0bc2c9b39e
# Parent  557a6ed9e5d03270183926ca313ea72cce2896cf
Allow cmpxchg to fall through when it fails

diff -r 557a6ed9e5d0 -r 10b833f09e6a src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad	Wed Oct 09 13:32:47 2013 +0100
+++ b/src/cpu/aarch64/vm/aarch64.ad	Wed Oct 09 13:43:36 2013 +0100
@@ -3192,7 +3192,7 @@
                   /*where=*/oop,
                   /*result=*/tmp,
                   cont,
-                  cas_failed);
+                  /*fail*/NULL);
     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");

     // If the compare-and-exchange succeeded, then we found an unlocked
@@ -3227,8 +3227,8 @@
                     /*exchange_value=*/rthread,
                     /*where=*/tmp,
                     /*result=*/rscratch1,
-                    next,
-                    next);
+                    /*succeed*/next,
+                    /*fail*/NULL);
       __ bind(next);

       // store a non-null value into the box.
@@ -3310,7 +3310,7 @@
                   /*where=*/oop,
                   /*result=*/tmp,
                   cont,
-                  cas_failed);
+                  /*cas_failed*/NULL);
     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");

     __ bind(cas_failed);
diff -r 557a6ed9e5d0 -r 10b833f09e6a src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp	Wed Oct 09 13:32:47 2013 +0100
+++ b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp	Wed Oct 09 13:43:36 2013 +0100
@@ -96,9 +96,8 @@
   // displaced header address in the object header - if it is not the same, get the
   // object header instead
   lea(rscratch2, Address(obj, hdr_offset));
-  cmpxchgptr(hdr, disp_hdr, rscratch2, rscratch1, done, fail);
+  cmpxchgptr(hdr, disp_hdr, rscratch2, rscratch1, done, /*fallthough*/NULL);
   // if the object header was the same, we're done
-  bind(fail);
   // if the object header was not the same, it is now in the hdr register
   // => test if it is a stack pointer into the same stack (recursive locking), i.e.:
   //
@@ -159,9 +158,9 @@
   // we do unlocking via runtime call
   if (hdr_offset) {
     lea(rscratch1, Address(obj, hdr_offset));
-    cmpxchgptr(disp_hdr, hdr, rscratch1, rscratch2, done, slow_case);
+    cmpxchgptr(disp_hdr, hdr, rscratch1, rscratch2, done, &slow_case);
   } else {
-    cmpxchgptr(disp_hdr, hdr, obj, rscratch2, done, slow_case);
+    cmpxchgptr(disp_hdr, hdr, obj, rscratch2, done, &slow_case);
   }
   // done
   bind(done);
diff -r 557a6ed9e5d0 -r 10b833f09e6a src/cpu/aarch64/vm/interp_masm_aarch64.cpp
--- a/src/cpu/aarch64/vm/interp_masm_aarch64.cpp	Wed Oct 09 13:32:47 2013 +0100
+++ b/src/cpu/aarch64/vm/interp_masm_aarch64.cpp	Wed Oct 09 13:43:36 2013 +0100
@@ -635,16 +635,16 @@
     Label fail;
     if (PrintBiasedLockingStatistics) {
       Label fast;
-      cmpxchgptr(swap_reg, lock_reg, obj_reg, rscratch1, fast, fail);
+      cmpxchgptr(swap_reg, lock_reg, obj_reg, rscratch1, fast, &fail);
       bind(fast);
       // cond_inc32(Assembler::zero,
       //            ExternalAddress((address) BiasedLocking::fast_path_entry_count_addr()));
       call_Unimplemented();
       b(done);
+      bind(fail);
     } else {
-      cmpxchgptr(swap_reg, lock_reg, obj_reg, rscratch1, done, fail);
+      cmpxchgptr(swap_reg, lock_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);
     }
-    bind(fail);

     // Test if the oopMark is an obvious stack pointer, i.e.,
     //  1) (mark & 7) == 0, and
@@ -734,9 +734,7 @@
     cbz(header_reg, done);

     // Atomic swap back the old header
-    Label fail;
-    cmpxchgptr(swap_reg, header_reg, obj_reg, rscratch1, done, fail);
-    bind(fail);
+    cmpxchgptr(swap_reg, header_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);

     // Call the runtime routine for slow case.
     str(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
diff -r 557a6ed9e5d0 -r 10b833f09e6a src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Wed Oct 09 13:32:47 2013 +0100
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Wed Oct 09 13:43:36 2013 +0100
@@ -1737,7 +1737,7 @@
 // register+offset Address.

 void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
-				Label &succeed, Label &fail) {
+				Label &succeed, Label *fail) {
   // oldv holds comparison value
   // newv holds value to write in exchange
   // addr identifies memory word to compare against/update
@@ -1759,13 +1759,12 @@
   // if the memory word differs we return it in oldv and signal a fail
   bind(nope);
   mov(oldv, tmp);
-  // if (fail)
-  //   b(*fail);
-  b(fail);
+  if (fail)
+    b(*fail);
 }

 void MacroAssembler::cmpxchgw(Register oldv, Register newv, Register addr, Register tmp,
-				Label &succeed, Label &fail) {
+				Label &succeed, Label *fail) {
   // oldv holds comparison value
   // newv holds value to write in exchange
   // addr identifies memory word to compare against/update
@@ -1787,9 +1786,8 @@
   // if the memory word differs we return it in oldv and signal a fail
   bind(nope);
   mov(oldv, tmp);
-  // if (fail)
-  //   b(*fail);
-  b(fail);
+  if (fail)
+    b(*fail);
 }

 void MacroAssembler::incr_allocated_bytes(Register thread,
diff -r 557a6ed9e5d0 -r 10b833f09e6a src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Wed Oct 09 13:32:47 2013 +0100
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Wed Oct 09 13:43:36 2013 +0100
@@ -1061,10 +1061,10 @@
   void cmpptr(Address src1, int32_t src2) { Unimplemented(); }

   void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
-		  Label &suceed, Label &fail);
+		  Label &suceed, Label *fail);

   void cmpxchgw(Register oldv, Register newv, Register addr, Register tmp,
-		  Label &suceed, Label &fail);
+		  Label &suceed, Label *fail);

   void imulptr(Register dst, Register src) { Unimplemented(); }

diff -r 557a6ed9e5d0 -r 10b833f09e6a src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp
--- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Wed Oct 09 13:32:47 2013 +0100
+++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Wed Oct 09 13:43:36 2013 +0100
@@ -1827,8 +1827,7 @@

     // src -> dest iff dest == r0 else r0 <- dest
     { Label here;
-      __ cmpxchgptr(r0, lock_reg, obj_reg, rscratch1, lock_done, here);
-      __ bind(here);
+      __ cmpxchgptr(r0, lock_reg, obj_reg, rscratch1, lock_done, /*fallthrough*/NULL);
     }

     // Hmm should this move to the slow path code area???
@@ -2033,7 +2032,7 @@

     // Atomic swap old header if oop still contains the stack lock
     Label succeed;
-    __ cmpxchgptr(r0, old_hdr, obj_reg, rscratch1, succeed, slow_path_unlock);
+    __ cmpxchgptr(r0, old_hdr, obj_reg, rscratch1, succeed, &slow_path_unlock);
     __ bind(succeed);

     // slow path re-enters here
diff -r 557a6ed9e5d0 -r 10b833f09e6a src/cpu/aarch64/vm/templateTable_aarch64.cpp
--- a/src/cpu/aarch64/vm/templateTable_aarch64.cpp	Wed Oct 09 13:32:47 2013 +0100
+++ b/src/cpu/aarch64/vm/templateTable_aarch64.cpp	Wed Oct 09 13:43:36 2013 +0100
@@ -3240,7 +3240,7 @@

     Label succeed;
     // if someone beat us on the allocation, try again, otherwise continue
-    __ cmpxchgptr(r0, r1, RtopAddr, rscratch1, succeed, retry);
+    __ cmpxchgptr(r0, r1, RtopAddr, rscratch1, succeed, &retry);
     __ bind(succeed);
     __ incr_allocated_bytes(rthread, r3, 0, rscratch1);
   }



More information about the aarch64-port-dev mailing list