[aarch64-port-dev ] Various concurrency fixes

Andrew Haley aph at redhat.com
Wed Sep 3 14:53:24 UTC 2014


I found three bugs in AArch64's handling of caches and barriers.

We weren't flushing all of a compiledIC stub.

When we move to a safepoint we overwrite the interpreter's dispatch
table with a changed version, but we had no memory barrier in the
intepreter so the changed dispatch table wasn't noticed.

Atomic::xchg() was a half barrier.  It is used in unpark(), which
really expects it to be a full barrier.

Andrew.
-------------- next part --------------
# HG changeset patch
# User aph
# Date 1409569367 14400
#      Mon Sep 01 07:02:47 2014 -0400
# Node ID 24958c74f6112b0625a9c659e5af58bd02e5cda0
# Parent  3d7fbe0c8b7efcb2c29520cd1d0cbf7403920507
Various concurrency fixes.
Invalidate the whole of a compiledIC stub.
Add membars to interpreter in branches and ret instructions.
Atomic::xchg must be a full barrier.

diff -r 3d7fbe0c8b7e -r 24958c74f611 src/cpu/aarch64/vm/compiledIC_aarch64.cpp
--- a/src/cpu/aarch64/vm/compiledIC_aarch64.cpp	Sat Sep 18 08:53:21 2032 -0400
+++ b/src/cpu/aarch64/vm/compiledIC_aarch64.cpp	Mon Sep 01 07:02:47 2014 -0400
@@ -78,15 +78,13 @@
 #undef __
 
 int CompiledStaticCall::to_interp_stub_size() {
-  // count a mov mem --> to 4 movz/k and a branch
-  return 6 * NativeInstruction::instruction_size;
+  // count a mov mem --> to 3 movz/k and a branch
+  return 4 * NativeInstruction::instruction_size;
 }
 
 // Relocation entries for call stub, compiled java to interpreter.
 int CompiledStaticCall::reloc_to_interp_stub() {
-  // TODO fixme
-  // return a large number
-  return 5;
+  return 4; // 3 in emit_to_interp_stub + 1 in emit_call
 }
 
 void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry) {
@@ -102,18 +100,18 @@
 
   // Creation also verifies the object.
   NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
-  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
+#ifndef PRODUCT
+  NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address());
 
   assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(),
          "a) MT-unsafe modification of inline cache");
-  assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry,
+  assert(method_holder->data() == 0 || jump->jump_destination() == entry,
          "b) MT-unsafe modification of inline cache");
-
+#endif
   // Update stub.
   method_holder->set_data((intptr_t)callee());
-  method_holder->flush();
-  jump->set_jump_destination(entry);
-
+  NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry);
+  ICache::invalidate_range(stub, to_interp_stub_size());
   // Update jump to call.
   set_destination_mt_safe(stub);
 }
@@ -125,9 +123,7 @@
   assert(stub != NULL, "stub not found");
   // Creation also verifies the object.
   NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
-  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
   method_holder->set_data(0);
-  jump->set_jump_destination((address)-1);
 }
 
 //-----------------------------------------------------------------------------
diff -r 3d7fbe0c8b7e -r 24958c74f611 src/cpu/aarch64/vm/nativeInst_aarch64.hpp
--- a/src/cpu/aarch64/vm/nativeInst_aarch64.hpp	Sat Sep 18 08:53:21 2032 -0400
+++ b/src/cpu/aarch64/vm/nativeInst_aarch64.hpp	Mon Sep 01 07:02:47 2014 -0400
@@ -142,6 +142,7 @@
     offset &= (1 << 26) - 1; // mask off insn part
     insn |= offset;
     set_int_at(displacement_offset, insn);
+    ICache::invalidate_range(instruction_address(), instruction_size);
   }
 
   // Similar to replace_mt_safe, but just changes the destination.  The
diff -r 3d7fbe0c8b7e -r 24958c74f611 src/cpu/aarch64/vm/templateTable_aarch64.cpp
--- a/src/cpu/aarch64/vm/templateTable_aarch64.cpp	Sat Sep 18 08:53:21 2032 -0400
+++ b/src/cpu/aarch64/vm/templateTable_aarch64.cpp	Mon Sep 01 07:02:47 2014 -0400
@@ -1569,6 +1569,12 @@
 
 void TemplateTable::branch(bool is_jsr, bool is_wide)
 {
+  // We might be moving to a safepoint.  The thread which calls
+  // Interpreter::notice_safepoints() will effectively flush its cache
+  // when it makes a system call, but we need to do something to
+  // ensure that we see the changed dispatch table.
+  __ membar(MacroAssembler::LoadLoad);
+
   __ profile_taken_branch(r0, r1);
   const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
                              InvocationCounter::counter_offset();
@@ -1850,6 +1856,12 @@
 
 void TemplateTable::ret() {
   transition(vtos, vtos);
+  // We might be moving to a safepoint.  The thread which calls
+  // Interpreter::notice_safepoints() will effectively flush its cache
+  // when it makes a system call, but we need to do something to
+  // ensure that we see the changed dispatch table.
+  __ membar(MacroAssembler::LoadLoad);
+
   locals_index(r1);
   __ ldr(r1, aaddress(r1)); // get return bci, compute return bcp
   __ profile_ret(r1, r2);
diff -r 3d7fbe0c8b7e -r 24958c74f611 src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp
--- a/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp	Sat Sep 18 08:53:21 2032 -0400
+++ b/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp	Mon Sep 01 07:02:47 2014 -0400
@@ -31,6 +31,10 @@
 
 // Implementation of class atomic
 
+#define FULL_MEM_BARRIER  __sync_synchronize()
+#define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
+#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
+
 inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
 inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
 inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
@@ -71,7 +75,9 @@
 
 inline jint Atomic::xchg (jint exchange_value, volatile jint* dest)
 {
- return __sync_lock_test_and_set (dest, exchange_value);
+  jint res = __sync_lock_test_and_set (dest, exchange_value);
+  FULL_MEM_BARRIER;
+  return res;
 }
 
 inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest)
@@ -111,7 +117,9 @@
 
 inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest)
 {
- return __sync_lock_test_and_set (dest, exchange_value);
+  jint res = __sync_lock_test_and_set (dest, exchange_value);
+  FULL_MEM_BARRIER;
+  return res;
 }
 
 inline jlong Atomic::cmpxchg (jlong exchange_value, volatile jlong* dest, jlong compare_value)
diff -r 3d7fbe0c8b7e -r 24958c74f611 src/os_cpu/linux_aarch64/vm/orderAccess_linux_aarch64.inline.hpp
--- a/src/os_cpu/linux_aarch64/vm/orderAccess_linux_aarch64.inline.hpp	Sat Sep 18 08:53:21 2032 -0400
+++ b/src/os_cpu/linux_aarch64/vm/orderAccess_linux_aarch64.inline.hpp	Mon Sep 01 07:02:47 2014 -0400
@@ -31,10 +31,6 @@
 #include "runtime/os.hpp"
 #include "vm_version_aarch64.hpp"
 
-#define FULL_MEM_BARRIER  __sync_synchronize()
-#define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
-#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
-
 // Implementation of class OrderAccess.
 
 inline void OrderAccess::loadload()   { acquire(); }


More information about the aarch64-port-dev mailing list