[aarch64-port-dev ] C2: use store release instructions for all volatile stores

Andrew Haley aph at redhat.com
Wed Nov 5 14:13:27 UTC 2014


The AArch64 stlr instructions are all we need for volatile stores:
they provide all the ordering guarantees we need.  Hitherto we have
been emitting normal stores with leading and trailing barriers, but
this is not efficient, and it's not clear to me that it is strictly
correct.

This patch enables stlr and uses a few #ifdefs in the shared code to
disable the leading and trailing barriers.  I had hoped to be able to
do this without touching shared code, but it turned out to be
impossible.  The #ifdefs are ugly, but this really requires a
reorganization of the way barriers are handles to give back ends the
control they need.

Andrew.


# HG changeset patch
# User aph
# Date 1415196041 18000
#      Wed Nov 05 09:00:41 2014 -0500
# Node ID 4ff9e02880b6d361ee4a58141165be8b12189415
# Parent  cb0a994c0747f989cd023cb56bd3acfa0604d370
C2: use store release instructions for all volatile stores.  Remove
leading and traililng barriers around volatile stores.

diff -r cb0a994c0747 -r 4ff9e02880b6 src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad	Wed Nov 05 08:58:11 2014 -0500
+++ b/src/cpu/aarch64/vm/aarch64.ad	Wed Nov 05 09:00:41 2014 -0500
@@ -5379,7 +5379,7 @@
 instruct storeB(iRegI src, memory mem)
 %{
   match(Set mem (StoreB mem src));
-//   predicate(n->as_Store()->is_unordered());
+  predicate(n->as_Store()->is_unordered());

   ins_cost(INSN_COST);
   format %{ "strb  $src, $mem\t# byte" %}
@@ -5393,7 +5393,7 @@
 instruct storeimmB0(immI0 zero, memory mem)
 %{
   match(Set mem (StoreB mem zero));
-//   predicate(n->as_Store()->is_unordered());
+  predicate(n->as_Store()->is_unordered());

   ins_cost(INSN_COST);
   format %{ "strb zr, $mem\t# byte" %}
@@ -5407,7 +5407,7 @@
 instruct storeC(iRegI src, memory mem)
 %{
   match(Set mem (StoreC mem src));
-//   predicate(n->as_Store()->is_unordered());
+  predicate(n->as_Store()->is_unordered());

   ins_cost(INSN_COST);
   format %{ "strh  $src, $mem\t# short" %}
@@ -5420,7 +5420,7 @@
 instruct storeimmC0(immI0 zero, memory mem)
 %{
   match(Set mem (StoreC mem zero));
-//   predicate(n->as_Store()->is_unordered());
+  predicate(n->as_Store()->is_unordered());

   ins_cost(INSN_COST);
   format %{ "strh  zr, $mem\t# short" %}
@@ -5435,7 +5435,7 @@
 instruct storeI(iRegIorL2I src, memory mem)
 %{
   match(Set mem(StoreI mem src));
-//   predicate(n->as_Store()->is_unordered());
+  predicate(n->as_Store()->is_unordered());

   ins_cost(INSN_COST);
   format %{ "strw  $src, $mem\t# int" %}
@@ -5448,7 +5448,7 @@
 instruct storeimmI0(immI0 zero, memory mem)
 %{
   match(Set mem(StoreI mem zero));
-//   predicate(n->as_Store()->is_unordered());
+  predicate(n->as_Store()->is_unordered());

   ins_cost(INSN_COST);
   format %{ "strw  zr, $mem\t# int" %}
@@ -5462,7 +5462,7 @@
 instruct storeL(iRegL src, memory mem)
 %{
   match(Set mem (StoreL mem src));
-//   predicate(n->as_Store()->is_unordered());
+  predicate(n->as_Store()->is_unordered());

   ins_cost(INSN_COST);
   format %{ "str  $src, $mem\t# int" %}
@@ -5476,7 +5476,7 @@
 instruct storeimmL0(immL0 zero, memory mem)
 %{
   match(Set mem (StoreL mem zero));
-//   predicate(n->as_Store()->is_unordered());
+  predicate(n->as_Store()->is_unordered());

   ins_cost(INSN_COST);
   format %{ "str  zr, $mem\t# int" %}
@@ -5490,7 +5490,7 @@
 instruct storeP(iRegP src, memory mem)
 %{
   match(Set mem (StoreP mem src));
-//   predicate(n->as_Store()->is_unordered());
+  predicate(n->as_Store()->is_unordered());

   ins_cost(INSN_COST);
   format %{ "str  $src, $mem\t# ptr" %}
@@ -5504,7 +5504,7 @@
 instruct storeimmP0(immP0 zero, memory mem)
 %{
   match(Set mem (StoreP mem zero));
-//   predicate(n->as_Store()->is_unordered());
+  predicate(n->as_Store()->is_unordered());

   ins_cost(INSN_COST);
   format %{ "str zr, $mem\t# ptr" %}
@@ -5563,7 +5563,7 @@
 instruct storeN(iRegN src, memory mem)
 %{
   match(Set mem (StoreN mem src));
-//   predicate(n->as_Store()->is_unordered());
+  predicate(n->as_Store()->is_unordered());

   ins_cost(INSN_COST);
   format %{ "strw  $src, $mem\t# compressed ptr" %}
@@ -5577,9 +5577,8 @@
 %{
   match(Set mem (StoreN mem zero));
   predicate(Universe::narrow_oop_base() == NULL &&
-            Universe::narrow_klass_base() == NULL//  &&
-	    // n->as_Store()->is_unordered()
-	    );
+            Universe::narrow_klass_base() == NULL &&
+            n->as_Store()->is_unordered());

   ins_cost(INSN_COST);
   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
@@ -5593,7 +5592,7 @@
 instruct storeF(vRegF src, memory mem)
 %{
   match(Set mem (StoreF mem src));
-//   predicate(n->as_Store()->is_unordered());
+  predicate(n->as_Store()->is_unordered());

   ins_cost(INSN_COST);
   format %{ "strs  $src, $mem\t# float" %}
@@ -5610,7 +5609,7 @@
 instruct storeD(vRegD src, memory mem)
 %{
   match(Set mem (StoreD mem src));
-//   predicate(n->as_Store()->is_unordered());
+  predicate(n->as_Store()->is_unordered());

   ins_cost(INSN_COST);
   format %{ "strd  $src, $mem\t# double" %}
@@ -5623,7 +5622,7 @@
 // Store Compressed Klass Pointer
 instruct storeNKlass(iRegN src, memory mem)
 %{
-//   predicate(n->as_Store()->is_unordered());
+  predicate(n->as_Store()->is_unordered());
   match(Set mem (StoreNKlass mem src));

   ins_cost(INSN_COST);
@@ -5869,6 +5868,114 @@
   ins_pipe(pipe_class_memory);
 %}

+// Store Byte
+instruct storeB_volatile(iRegI src, /* sync_memory*/indirect mem)
+%{
+  match(Set mem (StoreB mem src));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "stlrb  $src, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_stlrb(src, mem));
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// Store Char/Short
+instruct storeC_volatile(iRegI src, /* sync_memory*/indirect mem)
+%{
+  match(Set mem (StoreC mem src));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "stlrh  $src, $mem\t# short" %}
+
+  ins_encode(aarch64_enc_stlrh(src, mem));
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// Store Integer
+
+instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
+%{
+  match(Set mem(StoreI mem src));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "stlrw  $src, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_stlrw(src, mem));
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// Store Long (64 bit signed)
+instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
+%{
+  match(Set mem (StoreL mem src));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "stlr  $src, $mem\t# int" %}
+
+  ins_encode(aarch64_enc_stlr(src, mem));
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// Store Pointer
+instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
+%{
+  match(Set mem (StoreP mem src));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "stlr  $src, $mem\t# ptr" %}
+
+  ins_encode(aarch64_enc_stlr(src, mem));
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// Store Compressed Pointer
+instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
+%{
+  match(Set mem (StoreN mem src));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "stlrw  $src, $mem\t# compressed ptr" %}
+
+  ins_encode(aarch64_enc_stlrw(src, mem));
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// Store Float
+instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
+%{
+  match(Set mem (StoreF mem src));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "stlrs  $src, $mem\t# float" %}
+
+  ins_encode( aarch64_enc_fstlrs(src, mem) );
+
+  ins_pipe(pipe_class_memory);
+%}
+
+// TODO
+// implement storeImmF0 and storeFImmPacked
+
+// Store Double
+instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
+%{
+  match(Set mem (StoreD mem src));
+
+  ins_cost(VOLATILE_REF_COST);
+  format %{ "stlrd  $src, $mem\t# double" %}
+
+  ins_encode( aarch64_enc_fstlrd(src, mem) );
+
+  ins_pipe(pipe_class_memory);
+%}
+
 //  ---------------- end of volatile loads and stores ----------------

 // ============================================================================
@@ -6096,7 +6203,7 @@

   ins_encode %{
     __ membar(Assembler::StoreLoad);
-    %}
+  %}

   ins_pipe(pipe_class_memory);
 %}
diff -r cb0a994c0747 -r 4ff9e02880b6 src/share/vm/opto/graphKit.cpp
--- a/src/share/vm/opto/graphKit.cpp	Wed Nov 05 08:58:11 2014 -0500
+++ b/src/share/vm/opto/graphKit.cpp	Wed Nov 05 09:00:41 2014 -0500
@@ -3792,7 +3792,8 @@

   // Smash zero into card
   if( !UseConcMarkSweepGC ) {
-    __ store(__ ctrl(), card_adr, zero, bt, adr_type, MemNode::release);
+    __ store(__ ctrl(), card_adr, zero, bt, adr_type,
+	     MemNode:: NOT_AARCH64(release) AARCH64_ONLY(unordered));
   } else {
     // Specialized path for CM store barrier
     __ storeCM(__ ctrl(), card_adr, zero, oop_store, adr_idx, bt, adr_type);
diff -r cb0a994c0747 -r 4ff9e02880b6 src/share/vm/opto/memnode.hpp
--- a/src/share/vm/opto/memnode.hpp	Wed Nov 05 08:58:11 2014 -0500
+++ b/src/share/vm/opto/memnode.hpp	Wed Nov 05 09:00:41 2014 -0500
@@ -488,10 +488,16 @@
   // Conservatively release stores of object references in order to
   // ensure visibility of object initialization.
   static inline MemOrd release_if_reference(const BasicType t) {
+#ifndef AARCH64
     const MemOrd mo = (t == T_ARRAY ||
                        t == T_ADDRESS || // Might be the address of an object reference (`boxing').
                        t == T_OBJECT) ? release : unordered;
     return mo;
+#else
+    // AArch64 doesn't need this because it emits barriers when an
+    // object is initialized.
+    return unordered;
+#endif
   }

   // Polymorphic factory method
diff -r cb0a994c0747 -r 4ff9e02880b6 src/share/vm/opto/parse2.cpp
--- a/src/share/vm/opto/parse2.cpp	Wed Nov 05 08:58:11 2014 -0500
+++ b/src/share/vm/opto/parse2.cpp	Wed Nov 05 09:00:41 2014 -0500
@@ -1744,7 +1744,8 @@
     a = pop();                  // the array itself
     const TypeOopPtr* elemtype  = _gvn.type(a)->is_aryptr()->elem()->make_oopptr();
     const TypeAryPtr* adr_type = TypeAryPtr::OOPS;
-    Node* store = store_oop_to_array(control(), a, d, adr_type, c, elemtype, T_OBJECT, MemNode::release);
+    Node* store = store_oop_to_array(control(), a, d, adr_type, c, elemtype, T_OBJECT,
+				     MemNode:: NOT_AARCH64(release) AARCH64_ONLY(unordered));
     break;
   }
   case Bytecodes::_lastore: {
diff -r cb0a994c0747 -r 4ff9e02880b6 src/share/vm/opto/parse3.cpp
--- a/src/share/vm/opto/parse3.cpp	Wed Nov 05 08:58:11 2014 -0500
+++ b/src/share/vm/opto/parse3.cpp	Wed Nov 05 09:00:41 2014 -0500
@@ -281,7 +281,10 @@
   // If reference is volatile, prevent following memory ops from
   // floating down past the volatile write.  Also prevents commoning
   // another volatile read.
-  if (is_vol)  insert_mem_bar(Op_MemBarRelease);
+  // AArch64 uses store release (which does everything we need to keep
+  // the machine in order) but we still need a compiler barrier here.
+  if (is_vol)
+    insert_mem_bar(NOT_AARCH64(Op_MemBarRelease) AARCH64_ONLY(Op_MemBarCPUOrder));

   // Compute address and memory type.
   int offset = field->offset_in_bytes();
@@ -322,7 +325,7 @@
   if (is_vol) {
     // If not multiple copy atomic, we do the MemBarVolatile before the load.
     if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
-      insert_mem_bar(Op_MemBarVolatile); // Use fat membar
+      insert_mem_bar(NOT_AARCH64(Op_MemBarVolatile) AARCH64_ONLY(Op_MemBarCPUOrder)); // Use fat membar
     }
     // Remember we wrote a volatile field.
     // For not multiple copy atomic cpu (ppc64) a barrier should be issued


More information about the aarch64-port-dev mailing list