[aarch64-port-dev ] mem barriers take 2
Edward Nevill
edward.nevill at linaro.org
Wed Nov 13 08:42:09 PST 2013
Hi,
Ok, here is my 2nd attempt at this.
Now works for BUILTIN_SIM and also implements load_acquire and store_release.
The build also works for any other arch (x86, sparc) because they do not include this file.
OK?
All the best,
Ed.
--- CUT HERE ---
exporting patch:
# HG changeset patch
# User Edward Nevill edward.nevill at linaro.org
# Date 1384360576 0
# Wed Nov 13 16:36:16 2013 +0000
# Node ID 89c47088ba5f69988edceb3cf217e92fdcb220ba
# Parent feef9250f2feffe79db7939b5be389e8ecc8c5e0
Implement READ/WRITE mem barriers and load_acquire and store_release
diff -r feef9250f2fe -r 89c47088ba5f src/os_cpu/linux_aarch64/vm/orderAccess_linux_aarch64.inline.hpp
--- a/src/os_cpu/linux_aarch64/vm/orderAccess_linux_aarch64.inline.hpp Tue Nov 12 14:13:18 2013 -0500
+++ b/src/os_cpu/linux_aarch64/vm/orderAccess_linux_aarch64.inline.hpp Wed Nov 13 16:36:16 2013 +0000
@@ -31,42 +31,19 @@
#include "runtime/os.hpp"
#include "vm_version_aarch64.hpp"
-#ifdef ARM
-
-/*
- * ARM Kernel helper for memory barrier.
- * Using __asm __volatile ("":::"memory") does not work reliable on ARM
- * and gcc __sync_synchronize(); implementation does not use the kernel
- * helper for all gcc versions so it is unreliable to use as well.
- */
-typedef void (__kernel_dmb_t) (void);
-#define __kernel_dmb (*(__kernel_dmb_t *) 0xffff0fa0)
-
-#define FULL_MEM_BARRIER __kernel_dmb()
-#define READ_MEM_BARRIER __kernel_dmb()
-#define WRITE_MEM_BARRIER __kernel_dmb()
-
-#else // ARM
+#ifdef BUILTIN_SIM
#define FULL_MEM_BARRIER __sync_synchronize()
-
-#ifdef PPC
-
-#define READ_MEM_BARRIER __asm __volatile ("isync":::"memory")
-#ifdef __NO_LWSYNC__
-#define WRITE_MEM_BARRIER __asm __volatile ("sync":::"memory")
-#else
-#define WRITE_MEM_BARRIER __asm __volatile ("lwsync":::"memory")
-#endif
-
-#else // PPC
-
#define READ_MEM_BARRIER __asm __volatile ("":::"memory")
#define WRITE_MEM_BARRIER __asm __volatile ("":::"memory")
-#endif // PPC
+#else
-#endif // ARM
+#define FULL_MEM_BARRIER __sync_synchronize() // dmb ish
+#define READ_MEM_BARRIER __asm __volatile ("dmb ishld":::"memory")
+#define WRITE_MEM_BARRIER __asm __volatile ("dmb ishst":::"memory")
+
+#endif
// Implementation of class OrderAccess.
@@ -87,6 +64,8 @@
FULL_MEM_BARRIER;
}
+#ifdef BUILTIN_SIM
+
inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { jbyte data = *p; acquire(); return data; }
inline jshort OrderAccess::load_acquire(volatile jshort* p) { jshort data = *p; acquire(); return data; }
inline jint OrderAccess::load_acquire(volatile jint* p) { jint data = *p; acquire(); return data; }
@@ -129,6 +108,123 @@
return data;
}
+#else
+
+inline jbyte OrderAccess::load_acquire(volatile jbyte* p) {
+ jbyte data;
+ __asm __volatile("ldarb %w[result], [%[address]]; sxtb %w[result], %w[result]"
+ : [result]"=r" (data)
+ : [address]"r" (p)
+ : "memory");
+ return data;
+}
+
+inline jshort OrderAccess::load_acquire(volatile jshort* p) {
+ jshort data;
+ __asm __volatile("ldarh %w[result], [%[address]]; sxth %w[result], %w[result]"
+ : [result]"=r" (data)
+ : [address]"r" (p)
+ : "memory");
+ return data;
+}
+
+inline jint OrderAccess::load_acquire(volatile jint* p) {
+ jint data;
+ __asm __volatile("ldar %w[result], [%[address]]"
+ : [result]"=r" (data)
+ : [address]"r" (p)
+ : "memory");
+ return data;
+}
+
+inline jlong OrderAccess::load_acquire(volatile jlong* p) {
+ jlong data;
+ __asm __volatile("ldar %[result], [%[address]]"
+ : [result]"=r" (data)
+ : [address]"r" (p)
+ : "memory");
+ return data;
+}
+
+inline jubyte OrderAccess::load_acquire(volatile jubyte* p) {
+ jubyte data;
+ __asm __volatile("ldarb %w[result], [%[address]]"
+ : [result]"=r" (data)
+ : [address]"r" (p)
+ : "memory");
+ return data;
+}
+
+inline jushort OrderAccess::load_acquire(volatile jushort* p) {
+ jushort data;
+ __asm __volatile("ldarh %w[result], [%[address]]"
+ : [result]"=r" (data)
+ : [address]"r" (p)
+ : "memory");
+ return data;
+}
+
+inline juint OrderAccess::load_acquire(volatile juint* p) {
+ juint data;
+ __asm __volatile("ldar %w[result], [%[address]]"
+ : [result]"=r" (data)
+ : [address]"r" (p)
+ : "memory");
+ return data;
+}
+
+inline julong OrderAccess::load_acquire(volatile julong* p) {
+ julong data;
+ __asm __volatile("ldar %[result], [%[address]]"
+ : [result]"=r" (data)
+ : [address]"r" (p)
+ : "memory");
+ return data;
+}
+
+// For float/double is is probably better to do a load and dmb
+inline jfloat OrderAccess::load_acquire(volatile jfloat* p) {
+ jfloat data = *p;
+ acquire();
+ return data;
+}
+
+inline jdouble OrderAccess::load_acquire(volatile jdouble* p) {
+ jdouble data = *p;
+ acquire();
+ return data;
+}
+
+inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t* p) {
+ intptr_t data;
+ __asm __volatile("ldar %[result], [%[address]]"
+ : [result]"=r" (data)
+ : [address]"r" (p)
+ : "memory");
+ return data;
+}
+
+inline void* OrderAccess::load_ptr_acquire(volatile void* p) {
+ void* data;
+ __asm __volatile("ldar %[result], [%[address]]"
+ : [result]"=r" (data)
+ : [address]"r" (p)
+ : "memory");
+ return data;
+}
+inline void* OrderAccess::load_ptr_acquire(const volatile void* p) {
+ void* data;
+ __asm __volatile("ldar %[result], [%[address]]"
+ : [result]"=r" (data)
+ : [address]"r" (p)
+ : "memory");
+ return data;
+}
+
+#endif
+
+#ifdef BUILTIN_SIM
+
inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) { release(); *p = v; }
inline void OrderAccess::release_store(volatile jshort* p, jshort v) { release(); *p = v; }
inline void OrderAccess::release_store(volatile jint* p, jint v) { release(); *p = v; }
@@ -147,16 +243,101 @@
inline void OrderAccess::release_store_ptr(volatile void* p, void* v)
{ release(); *(void* volatile *)p = v; }
+#else
+
+inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) {
+ __asm __volatile("stlrb %w[source], [%[address]]"
+ :
+ : [source]"r" (v), [address]"r" (p)
+ : "memory");
+}
+
+inline void OrderAccess::release_store(volatile jshort* p, jshort v) {
+ __asm __volatile("stlrh %w[source], [%[address]]"
+ :
+ : [source]"r" (v), [address]"r" (p)
+ : "memory");
+}
+
+inline void OrderAccess::release_store(volatile jint* p, jint v) {
+ __asm __volatile("stlr %w[source], [%[address]]"
+ :
+ : [source]"r" (v), [address]"r" (p)
+ : "memory");
+}
+
+inline void OrderAccess::release_store(volatile jlong* p, jlong v) {
+ __asm __volatile("stlr %[source], [%[address]]"
+ :
+ : [source]"r" (v), [address]"r" (p)
+ : "memory");
+}
+
+inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) {
+ __asm __volatile("stlrb %w[source], [%[address]]"
+ :
+ : [source]"r" (v), [address]"r" (p)
+ : "memory");
+}
+
+inline void OrderAccess::release_store(volatile jushort* p, jushort v) {
+ __asm __volatile("stlrh %w[source], [%[address]]"
+ :
+ : [source]"r" (v), [address]"r" (p)
+ : "memory");
+}
+
+inline void OrderAccess::release_store(volatile juint* p, juint v) {
+ __asm __volatile("stlr %w[source], [%[address]]"
+ :
+ : [source]"r" (v), [address]"r" (p)
+ : "memory");
+}
+
+inline void OrderAccess::release_store(volatile julong* p, julong v) {
+ __asm __volatile("stlr %[source], [%[address]]"
+ :
+ : [source]"r" (v), [address]"r" (p)
+ : "memory");
+}
+
+// Best to leave float/double
+inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) {
+ release();
+ *p = v;
+}
+
+inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) {
+ release();
+ *p = v;
+}
+
+inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) {
+ __asm __volatile("stlr %[source], [%[address]]"
+ :
+ : [source]"r" (v), [address]"r" (p)
+ : "memory");
+}
+
+inline void OrderAccess::release_store_ptr(volatile void* p, void* v) {
+ __asm __volatile("stlr %[source], [%[address]]"
+ :
+ : [source]"r" (v), [address]"r" (p)
+ : "memory");
+}
+
+#endif
+
inline void OrderAccess::store_fence(jbyte* p, jbyte v) { *p = v; fence(); }
inline void OrderAccess::store_fence(jshort* p, jshort v) { *p = v; fence(); }
inline void OrderAccess::store_fence(jint* p, jint v) { *p = v; fence(); }
-inline void OrderAccess::store_fence(jlong* p, jlong v) { os::atomic_copy64(&v, p); fence(); }
+inline void OrderAccess::store_fence(jlong* p, jlong v) { *p = v; fence(); }
inline void OrderAccess::store_fence(jubyte* p, jubyte v) { *p = v; fence(); }
inline void OrderAccess::store_fence(jushort* p, jushort v) { *p = v; fence(); }
inline void OrderAccess::store_fence(juint* p, juint v) { *p = v; fence(); }
-inline void OrderAccess::store_fence(julong* p, julong v) { os::atomic_copy64(&v, p); fence(); }
+inline void OrderAccess::store_fence(julong* p, julong v) { *p = v; fence(); }
inline void OrderAccess::store_fence(jfloat* p, jfloat v) { *p = v; fence(); }
-inline void OrderAccess::store_fence(jdouble* p, jdouble v) { os::atomic_copy64(&v, p); fence(); }
+inline void OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; fence(); }
inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; fence(); }
inline void OrderAccess::store_ptr_fence(void** p, void* v) { *p = v; fence(); }
--- CUT HERE ---
More information about the aarch64-port-dev
mailing list