[aarch64-port-dev ] DMB elimination in C2 synchronization implementation

Andrew Haley aph at redhat.com
Wed Sep 2 08:52:43 UTC 2015


On 09/02/2015 06:08 AM, Wei Tang wrote:
> No idea why attachment was blocked. Some supporting jpg files are excluded,and only the patch file is attached this time. The difference between this patch and Andrew's is that our focus is on redundant dmb in all locking paths not on general CAS handling. Please take a look!
> 
> http://cr.openjdk.java.net/~adinn/8080293/webrev.01/hotspot.changeset


diff -r 9df4555d2d7d src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad	Mon Aug 31 13:49:18 2015 +0200
+++ b/src/cpu/aarch64/vm/aarch64.ad	Tue Sep 01 16:21:19 2015 +0800
@@ -3801,6 +3801,90 @@
     __ cmpw(rscratch1, zr);
   %}

+  // The only difference between aarch64_enc_cmpxchg and aarch64_enc_cmpxchg_acq
+  // is that we use load-acquire in the CompareAndSwap sequence to serve as a barrier
+  // on acquiring lock.
+  enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
+    MacroAssembler _masm(&cbuf);
+    Register old_reg = as_Register($oldval$$reg);
+    Register new_reg = as_Register($newval$$reg);
+    Register base = as_Register($mem$$base);
+    Register addr_reg;
+    int index = $mem$$index;
+    int scale = $mem$$scale;
+    int disp = $mem$$disp;
+    if (index == -1) {
+       if (disp != 0) {
+        __ lea(rscratch2, Address(base, disp));
+        addr_reg = rscratch2;
+      } else {
+        // TODO
+        // should we ever get anything other than this case?
+        addr_reg = base;
+      }
+    } else {
+      Register index_reg = as_Register(index);
+      if (disp == 0) {
+        __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
+        addr_reg = rscratch2;
+      } else {
+        __ lea(rscratch2, Address(base, disp));
+        __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
+        addr_reg = rscratch2;
+      }
+    }
+    Label retry_load, done;
+    __ bind(retry_load);
+    __ ldaxr(rscratch1, addr_reg);
+    __ cmp(rscratch1, old_reg);
+    __ br(Assembler::NE, done);
+    __ stlxr(rscratch1, new_reg, addr_reg);
+    __ cbnzw(rscratch1, retry_load);
+    __ bind(done);
+  %}
+
+  // The only difference between aarch64_enc_cmpxchgw and aarch64_enc_cmpxchgw_acq
+  // is that we use load-acquire in the CompareAndSwap sequence to serve as a barrier
+  // on acquiring lock.
+  enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
+    MacroAssembler _masm(&cbuf);
+    Register old_reg = as_Register($oldval$$reg);
+    Register new_reg = as_Register($newval$$reg);
+    Register base = as_Register($mem$$base);
+    Register addr_reg;
+    int index = $mem$$index;
+    int scale = $mem$$scale;
+    int disp = $mem$$disp;
+    if (index == -1) {
+       if (disp != 0) {
+        __ lea(rscratch2, Address(base, disp));
+        addr_reg = rscratch2;
+      } else {
+        // TODO
+        // should we ever get anything other than this case?
+        addr_reg = base;
+      }
+    } else {
+      Register index_reg = as_Register(index);
+      if (disp == 0) {
+        __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
+        addr_reg = rscratch2;
+      } else {
+        __ lea(rscratch2, Address(base, disp));
+        __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
+        addr_reg = rscratch2;
+      }
+    }
+    Label retry_load, done;
+    __ bind(retry_load);
+    __ ldaxrw(rscratch1, addr_reg);
+    __ cmpw(rscratch1, old_reg);
+    __ br(Assembler::NE, done);
+    __ stlxrw(rscratch1, new_reg, addr_reg);
+    __ cbnzw(rscratch1, retry_load);
+    __ bind(done);
+  %}
+
   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
     MacroAssembler _masm(&cbuf);
     Register old_reg = as_Register($oldval$$reg);
@@ -4398,13 +4482,10 @@

     // Compare object markOop with mark and if equal exchange scratch1
     // with object markOop.
-    // Note that this is simply a CAS: it does not generate any
-    // barriers.  These are separately generated by
-    // membar_acquire_lock().
     {
       Label retry_load;
       __ bind(retry_load);
-      __ ldxr(tmp, oop);
+      __ ldaxr(tmp, oop);
       __ cmp(tmp, disp_hdr);
       __ br(Assembler::NE, cas_failed);
       // use stlxr to ensure update is immediately visible
@@ -4454,7 +4535,7 @@
       {
         Label retry_load, fail;
         __ bind(retry_load);
-        __ ldxr(rscratch1, tmp);
+        __ ldaxr(rscratch1, tmp);
         __ cmp(disp_hdr, rscratch1);
         __ br(Assembler::NE, fail);
         // use stlxr to ensure update is immediately visible
@@ -8017,11 +8098,9 @@
   match(MemBarAcquireLock);
   ins_cost(VOLATILE_REF_COST);

-  format %{ "membar_acquire_lock" %}
-
-  ins_encode %{
-    __ membar(Assembler::LoadLoad|Assembler::LoadStore);
-  %}
+  format %{ " -- \t// redundant MEMBAR-acquire - empty" %}
+
+  ins_encode ();

   ins_pipe(pipe_serial);
 %}
@@ -8080,11 +8159,9 @@
   match(MemBarReleaseLock);
   ins_cost(VOLATILE_REF_COST);

-  format %{ "membar_release_lock" %}
-
-  ins_encode %{
-    __ membar(Assembler::LoadStore|Assembler::StoreStore);
-  %}
+  format %{ " -- \t// redundant MEMBAR-release - empty" %}
+
+  ins_encode ();

   ins_pipe(pipe_serial);
 %}
@@ -8381,7 +8458,7 @@
     "cmpw rscratch1, zr\t# EQ on successful write"
   %}

-  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval));
+  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));

   ins_pipe(pipe_slow);
 %}
@@ -8398,7 +8475,7 @@
     "cmpw rscratch1, zr\t# EQ on successful write"
   %}

-  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval));
+  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));

   ins_pipe(pipe_slow);
 %}



More information about the aarch64-port-dev mailing list