[aarch64-port-dev ] Missing concurrency barriers and other bugs.

Andrew Haley aph at redhat.com
Wed Oct 1 18:23:12 UTC 2014


This is a bunch of fixes.

First, we were using str and strw almost randomly when writing
JavaThread::thread_state.  And, we weren't using any barriers, which
we seem to need.

MacroAssembler::serialize_memory was wrong; it now does what other
ports do.

The use_XOR_for_compressed_class_base was executed every time an
instance of Assembler was constructed, but it only needs to be
executed once.

Andrew.


# HG changeset patch
# User aph
# Date 1412186700 14400
#      Wed Oct 01 14:05:00 2014 -0400
# Node ID f77a6f417a54aeb12cedc962a753fb81b6dab958
# Parent  b43b612fe37291952d164b7e3dcee6ae760ade42
Missing concurrency barriers and other bugs.
JavaThread::thread_state accesses need barriers.
Correct implementation of serialize_memory().
Reduce the overhead of checking for use_XOR_for_compressed_class_base.

diff -r b43b612fe372 -r f77a6f417a54 src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Wed Oct 01 12:51:45 2014 -0400
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Wed Oct 01 14:05:00 2014 -0400
@@ -64,6 +64,9 @@
 #define STOP(error) block_comment(error); stop(error)
 #endif

+bool MacroAssembler::_use_XOR_for_compressed_class_base;
+address MacroAssembler::_narrow_klass_base;
+
 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")

 void MacroAssembler::pd_patch_instruction(address branch, address target) {
@@ -107,8 +110,8 @@
       // up treating a type 3 relocation as a type 1 or 2 just because it happened
       // to be followed by a random unrelated ldr/str or add instruction.
       //
-      // In the case of a type 3 relocation, we know that these are only generated
-      // for the safepoint polling page, or for the card type byte map base so we
+      // In the case of a type 3 relocation, we know that these are
+      // only generated for a few well-known pages addresses so we
       // assert as much and of course that the offset is 0.
       //
       unsigned insn2 = ((unsigned*)branch)[1];
@@ -130,7 +133,8 @@
 	assert((jbyte *)target ==
 		((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base ||
                target == StubRoutines::crc_table_addr() ||
-               (address)target == os::get_polling_page(),
+               target == os::get_polling_page() ||
+	       target == os::get_memory_serialize_page(),
 	       "adrp must be polling page or byte map base");
 	assert(offset_lo == 0, "offset must be 0 for polling page or byte map base");
       }
@@ -250,8 +254,16 @@
   return address(((uint64_t)insn_addr + (offset << 2)));
 }

-void MacroAssembler::serialize_memory(Register thread, Register tmp) {
-  dsb(Assembler::SY);
+void MacroAssembler::serialize_memory(Register tmp1, Register tmp2) {
+  andr(tmp1, rthread, (os::vm_page_size() - sizeof(int)) << os::get_serialize_page_shift_count());
+
+  unsigned long offset;
+  adrp(tmp2, os::get_memory_serialize_page(), offset);
+  guarantee(offset == 0, "should be a page address");
+
+  // Size of store must match masking code above
+  add(tmp1, tmp2, tmp1, LSR, os::get_serialize_page_shift_count());
+  stlrw(tmp2, tmp1);
 }


@@ -2678,7 +2690,7 @@
     return;
   }

-  if (use_XOR_for_compressed_class_base) {
+  if (_use_XOR_for_compressed_class_base) {
     if (Universe::narrow_klass_shift() != 0) {
       eor(dst, src, (uint64_t)Universe::narrow_klass_base());
       lsr(dst, dst, LogKlassAlignmentInBytes);
@@ -2727,7 +2739,7 @@
     return;
   }

-  if (use_XOR_for_compressed_class_base) {
+  if (_use_XOR_for_compressed_class_base) {
     if (Universe::narrow_klass_shift() != 0) {
       lsl(dst, src, LogKlassAlignmentInBytes);
       eor(dst, dst, (uint64_t)Universe::narrow_klass_base());
diff -r b43b612fe372 -r f77a6f417a54 src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Wed Oct 01 12:51:45 2014 -0400
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Wed Oct 01 14:05:00 2014 -0400
@@ -91,16 +91,19 @@

   void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);

-  // Maximum size of class area in Metaspace when compressed
-  uint64_t use_XOR_for_compressed_class_base;
+  static bool _use_XOR_for_compressed_class_base;
+  static address _narrow_klass_base;

  public:
   MacroAssembler(CodeBuffer* code) : Assembler(code) {
-    use_XOR_for_compressed_class_base
-      = (operand_valid_for_logical_immediate(false /*is32*/,
-					     (uint64_t)Universe::narrow_klass_base())
-	 && ((uint64_t)Universe::narrow_klass_base()
-	     > (1u << log2_intptr(CompressedClassSpaceSize))));
+    if (Universe::narrow_klass_base() != _narrow_klass_base) {
+      _use_XOR_for_compressed_class_base
+	= (operand_valid_for_logical_immediate(false /*is32*/,
+					       (uint64_t)Universe::narrow_klass_base())
+	   && ((uint64_t)Universe::narrow_klass_base()
+	       > (1u << log2_intptr(CompressedClassSpaceSize))));
+      _narrow_klass_base = Universe::narrow_klass_base();
+    }
   }

   // Biased locking support
@@ -877,7 +880,10 @@
                                                 int offset);

   // Support for serializing memory accesses between threads
-  void serialize_memory(Register thread, Register tmp);
+  void serialize_memory(Register tmp1, Register tmp2);
+
+  void release() { membar(LoadStore|StoreStore); }
+  void acquire() { membar(LoadLoad|LoadStore); }

   // Arithmetics

diff -r b43b612fe372 -r f77a6f417a54 src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp
--- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Wed Oct 01 12:51:45 2014 -0400
+++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Wed Oct 01 14:05:00 2014 -0400
@@ -1872,7 +1872,8 @@

   // Now set thread in native
   __ mov(rscratch1, _thread_in_native);
-  __ str(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
+  __ release();
+  __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));

   {
     int return_type = 0;
@@ -1929,18 +1930,20 @@
   //     Thread A is resumed to finish this native method, but doesn't block here since it
   //     didn't see any synchronization is progress, and escapes.
   __ mov(rscratch1, _thread_in_native_trans);
-  __ str(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
+  __ release();
+  __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));

   if(os::is_MP()) {
     if (UseMembar) {
       // Force this write out before the read below
-      __ dmb(Assembler::SY);
+      __ membar(Assembler::LoadLoad | Assembler::LoadStore |
+           Assembler::StoreLoad | Assembler::StoreStore);
     } else {
       // Write serialization page so VM thread can do a pseudo remote membar.
       // We use the current thread pointer to calculate a thread specific
       // offset to write to within the page. This minimizes bus traffic
       // due to cache line collision.
-      __ serialize_memory(rthread, r2);
+      __ serialize_memory(rscratch1, r2);
     }
   }

@@ -1993,7 +1996,8 @@

   // change thread state
   __ mov(rscratch1, _thread_in_Java);
-  __ str(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
+  __ release();
+  __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
   __ bind(after_transition);

   Label reguard;
diff -r b43b612fe372 -r f77a6f417a54 src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp
--- a/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp	Wed Oct 01 12:51:45 2014 -0400
+++ b/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp	Wed Oct 01 14:05:00 2014 -0400
@@ -1031,6 +1031,7 @@
 #endif

   // Change state to native
+  __ release();
   __ mov(rscratch1, _thread_in_native);
   __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));

@@ -1052,19 +1053,21 @@
   __ push(ltos);

   // change thread state
+  __ release();
   __ mov(rscratch1, _thread_in_native_trans);
   __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));

   if (os::is_MP()) {
     if (UseMembar) {
       // Force this write out before the read below
-      __ dsb(Assembler::SY);
+      __ membar(Assembler::LoadLoad | Assembler::LoadStore |
+           Assembler::StoreLoad | Assembler::StoreStore);
     } else {
       // Write serialization page so VM thread can do a pseudo remote membar.
       // We use the current thread pointer to calculate a thread specific
       // offset to write to within the page. This minimizes bus traffic
       // due to cache line collision.
-      __ serialize_memory(rthread, rscratch2);
+      __ serialize_memory(rscratch1, rscratch2);
     }
   }

@@ -1101,6 +1104,7 @@
   }

   // change thread state
+  __ release();
   __ mov(rscratch1, _thread_in_Java);
   __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));



More information about the aarch64-port-dev mailing list