[aarch64-port-dev ] mem barriers take 2

Edward Nevill edward.nevill at linaro.org
Wed Nov 13 08:42:09 PST 2013


Hi,

Ok, here is my 2nd attempt at this.

Now works for BUILTIN_SIM and also implements load_acquire and store_release.

The build also works for any other arch (x86, sparc) because they do not include this file.

OK?

All the best,
Ed.

--- CUT HERE ---
exporting patch:
# HG changeset patch
# User Edward Nevill edward.nevill at linaro.org
# Date 1384360576 0
#      Wed Nov 13 16:36:16 2013 +0000
# Node ID 89c47088ba5f69988edceb3cf217e92fdcb220ba
# Parent  feef9250f2feffe79db7939b5be389e8ecc8c5e0
Implement READ/WRITE mem barriers and load_acquire and store_release

diff -r feef9250f2fe -r 89c47088ba5f src/os_cpu/linux_aarch64/vm/orderAccess_linux_aarch64.inline.hpp
--- a/src/os_cpu/linux_aarch64/vm/orderAccess_linux_aarch64.inline.hpp	Tue Nov 12 14:13:18 2013 -0500
+++ b/src/os_cpu/linux_aarch64/vm/orderAccess_linux_aarch64.inline.hpp	Wed Nov 13 16:36:16 2013 +0000
@@ -31,42 +31,19 @@
 #include "runtime/os.hpp"
 #include "vm_version_aarch64.hpp"
 
-#ifdef ARM
-
-/*
- * ARM Kernel helper for memory barrier.
- * Using __asm __volatile ("":::"memory") does not work reliable on ARM
- * and gcc __sync_synchronize(); implementation does not use the kernel
- * helper for all gcc versions so it is unreliable to use as well.
- */
-typedef void (__kernel_dmb_t) (void);
-#define __kernel_dmb (*(__kernel_dmb_t *) 0xffff0fa0)
-
-#define FULL_MEM_BARRIER __kernel_dmb()
-#define READ_MEM_BARRIER __kernel_dmb()
-#define WRITE_MEM_BARRIER __kernel_dmb()
-
-#else // ARM
+#ifdef BUILTIN_SIM
 
 #define FULL_MEM_BARRIER __sync_synchronize()
-
-#ifdef PPC
-
-#define READ_MEM_BARRIER __asm __volatile ("isync":::"memory")
-#ifdef __NO_LWSYNC__
-#define WRITE_MEM_BARRIER __asm __volatile ("sync":::"memory")
-#else
-#define WRITE_MEM_BARRIER __asm __volatile ("lwsync":::"memory")
-#endif
-
-#else // PPC
-
 #define READ_MEM_BARRIER __asm __volatile ("":::"memory")
 #define WRITE_MEM_BARRIER __asm __volatile ("":::"memory")
 
-#endif // PPC
+#else
 
-#endif // ARM
+#define FULL_MEM_BARRIER  __sync_synchronize() // dmb ish
+#define READ_MEM_BARRIER  __asm __volatile ("dmb ishld":::"memory")
+#define WRITE_MEM_BARRIER __asm __volatile ("dmb ishst":::"memory")
+
+#endif
 
 // Implementation of class OrderAccess.
 
@@ -87,6 +64,8 @@
   FULL_MEM_BARRIER;
 }
 
+#ifdef BUILTIN_SIM
+
 inline jbyte    OrderAccess::load_acquire(volatile jbyte*   p) { jbyte data = *p; acquire(); return data; }
 inline jshort   OrderAccess::load_acquire(volatile jshort*  p) { jshort data = *p; acquire(); return data; }
 inline jint     OrderAccess::load_acquire(volatile jint*    p) { jint data = *p; acquire(); return data; }
@@ -129,6 +108,123 @@
   return data;
 }
 
+#else
+
+inline jbyte    OrderAccess::load_acquire(volatile jbyte*   p) {
+    jbyte data;
+    __asm __volatile("ldarb %w[result], [%[address]]; sxtb %w[result], %w[result]"
+		: [result]"=r" (data)
+		: [address]"r" (p)
+		: "memory");
+    return data;
+}
+
+inline jshort   OrderAccess::load_acquire(volatile jshort*  p) {
+    jshort data;
+    __asm __volatile("ldarh %w[result], [%[address]]; sxth %w[result], %w[result]"
+		: [result]"=r" (data)
+		: [address]"r" (p)
+		: "memory");
+    return data;
+}
+
+inline jint     OrderAccess::load_acquire(volatile jint*    p) {
+    jint data;
+    __asm __volatile("ldar %w[result], [%[address]]"
+		: [result]"=r" (data)
+		: [address]"r" (p)
+		: "memory");
+    return data;
+}
+
+inline jlong    OrderAccess::load_acquire(volatile jlong*   p) {
+    jlong data;
+    __asm __volatile("ldar %[result], [%[address]]"
+		: [result]"=r" (data)
+		: [address]"r" (p)
+		: "memory");
+    return data;
+}
+
+inline jubyte    OrderAccess::load_acquire(volatile jubyte*   p) {
+    jubyte data;
+    __asm __volatile("ldarb %w[result], [%[address]]"
+		: [result]"=r" (data)
+		: [address]"r" (p)
+		: "memory");
+    return data;
+}
+
+inline jushort   OrderAccess::load_acquire(volatile jushort*  p) {
+    jushort data;
+    __asm __volatile("ldarh %w[result], [%[address]]"
+		: [result]"=r" (data)
+		: [address]"r" (p)
+		: "memory");
+    return data;
+}
+
+inline juint     OrderAccess::load_acquire(volatile juint*    p) {
+    juint data;
+    __asm __volatile("ldar %w[result], [%[address]]"
+		: [result]"=r" (data)
+		: [address]"r" (p)
+		: "memory");
+    return data;
+}
+
+inline julong   OrderAccess::load_acquire(volatile julong*  p) {
+    julong data;
+    __asm __volatile("ldar %[result], [%[address]]"
+		: [result]"=r" (data)
+		: [address]"r" (p)
+		: "memory");
+    return data;
+}
+
+// For float/double is is probably better to do a load and dmb
+inline jfloat   OrderAccess::load_acquire(volatile jfloat*  p) {
+    jfloat data = *p;
+    acquire();
+    return data;
+}
+
+inline jdouble  OrderAccess::load_acquire(volatile jdouble* p) {
+    jdouble data = *p;
+    acquire();
+    return data;
+}
+
+inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t*   p) {
+    intptr_t data;
+    __asm __volatile("ldar %[result], [%[address]]"
+		: [result]"=r" (data)
+		: [address]"r" (p)
+		: "memory");
+    return data;
+}
+
+inline void*    OrderAccess::load_ptr_acquire(volatile void*       p) {
+    void* data;
+    __asm __volatile("ldar %[result], [%[address]]"
+		: [result]"=r" (data)
+		: [address]"r" (p)
+		: "memory");
+    return data;
+}
+inline void*    OrderAccess::load_ptr_acquire(const volatile void* p) {
+    void* data;
+    __asm __volatile("ldar %[result], [%[address]]"
+		: [result]"=r" (data)
+		: [address]"r" (p)
+		: "memory");
+    return data;
+}
+
+#endif
+
+#ifdef BUILTIN_SIM
+
 inline void     OrderAccess::release_store(volatile jbyte*   p, jbyte   v) { release(); *p = v; }
 inline void     OrderAccess::release_store(volatile jshort*  p, jshort  v) { release(); *p = v; }
 inline void     OrderAccess::release_store(volatile jint*    p, jint    v) { release(); *p = v; }
@@ -147,16 +243,101 @@
 inline void     OrderAccess::release_store_ptr(volatile void*     p, void*    v)
 { release(); *(void* volatile *)p = v; }
 
+#else
+
+inline void     OrderAccess::release_store(volatile jbyte*   p, jbyte   v) {
+    __asm __volatile("stlrb %w[source], [%[address]]"
+		:
+		: [source]"r" (v), [address]"r" (p)
+		: "memory");
+}
+
+inline void     OrderAccess::release_store(volatile jshort*  p, jshort  v) {
+    __asm __volatile("stlrh %w[source], [%[address]]"
+		:
+		: [source]"r" (v), [address]"r" (p)
+		: "memory");
+}
+
+inline void     OrderAccess::release_store(volatile jint*    p, jint    v) {
+    __asm __volatile("stlr %w[source], [%[address]]"
+		:
+		: [source]"r" (v), [address]"r" (p)
+		: "memory");
+}
+
+inline void     OrderAccess::release_store(volatile jlong*   p, jlong   v) {
+    __asm __volatile("stlr %[source], [%[address]]"
+		:
+		: [source]"r" (v), [address]"r" (p)
+		: "memory");
+}
+
+inline void     OrderAccess::release_store(volatile jubyte*  p, jubyte  v) {
+    __asm __volatile("stlrb %w[source], [%[address]]"
+		:
+		: [source]"r" (v), [address]"r" (p)
+		: "memory");
+}
+
+inline void     OrderAccess::release_store(volatile jushort* p, jushort v) {
+    __asm __volatile("stlrh %w[source], [%[address]]"
+		:
+		: [source]"r" (v), [address]"r" (p)
+		: "memory");
+}
+
+inline void     OrderAccess::release_store(volatile juint*   p, juint   v) {
+    __asm __volatile("stlr %w[source], [%[address]]"
+		:
+		: [source]"r" (v), [address]"r" (p)
+		: "memory");
+}
+
+inline void     OrderAccess::release_store(volatile julong*  p, julong  v) {
+    __asm __volatile("stlr %[source], [%[address]]"
+		:
+		: [source]"r" (v), [address]"r" (p)
+		: "memory");
+}
+
+// Best to leave float/double
+inline void     OrderAccess::release_store(volatile jfloat*  p, jfloat  v) {
+    release();
+    *p = v;
+}
+
+inline void     OrderAccess::release_store(volatile jdouble* p, jdouble v) {
+    release();
+    *p = v;
+}
+
+inline void     OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) {
+    __asm __volatile("stlr %[source], [%[address]]"
+		:
+		: [source]"r" (v), [address]"r" (p)
+		: "memory");
+}
+
+inline void     OrderAccess::release_store_ptr(volatile void*     p, void*    v) {
+    __asm __volatile("stlr %[source], [%[address]]"
+		:
+		: [source]"r" (v), [address]"r" (p)
+		: "memory");
+}
+
+#endif
+
 inline void     OrderAccess::store_fence(jbyte*   p, jbyte   v) { *p = v; fence(); }
 inline void     OrderAccess::store_fence(jshort*  p, jshort  v) { *p = v; fence(); }
 inline void     OrderAccess::store_fence(jint*    p, jint    v) { *p = v; fence(); }
-inline void     OrderAccess::store_fence(jlong*   p, jlong   v) { os::atomic_copy64(&v, p); fence(); }
+inline void     OrderAccess::store_fence(jlong*   p, jlong   v) { *p = v; fence(); }
 inline void     OrderAccess::store_fence(jubyte*  p, jubyte  v) { *p = v; fence(); }
 inline void     OrderAccess::store_fence(jushort* p, jushort v) { *p = v; fence(); }
 inline void     OrderAccess::store_fence(juint*   p, juint   v) { *p = v; fence(); }
-inline void     OrderAccess::store_fence(julong*  p, julong  v) { os::atomic_copy64(&v, p); fence(); }
+inline void     OrderAccess::store_fence(julong*  p, julong  v) { *p = v; fence(); }
 inline void     OrderAccess::store_fence(jfloat*  p, jfloat  v) { *p = v; fence(); }
-inline void     OrderAccess::store_fence(jdouble* p, jdouble v) { os::atomic_copy64(&v, p); fence(); }
+inline void     OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; fence(); }
 
 inline void     OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; fence(); }
 inline void     OrderAccess::store_ptr_fence(void**    p, void*    v) { *p = v; fence(); }
--- CUT HERE ---





More information about the aarch64-port-dev mailing list