ARM: More intrinsics

Andrew Haley aph at redhat.com
Fri Mar 2 10:07:23 PST 2012


I've been adding _compareAndSwapInt and _compareAndSwapLong to the set
of intrinsics.  It's never that easy, of course, and while I was doing
that I found a bug in the code for abs that would cause locals to be
corrupted.

I also realized that if the JIT does long atomic swaps, so must the
interpreter, so this patch also provides a long atomic swap for Zero
on ARM.  It'll be used even if you don't have the JIT or the asm
interpreter.  There isn't a kernel builtin we can use to do 64-bit
swaps because the operation is only provided by the very most recent
Linux kernels.  This means we can't use it, really.

I also reorganized handle_special_method() a bit so that it uses VM
intrinsic_id.  This provides the opportunity for many more intrinsics
than Interpreter::method_kind().

Andrew.


2012-03-02  Andrew Haley  <aph at redhat.com>

	* arm_port/hotspot/src/cpu/zero/vm/arm_cas.S: New file.
	* patches/arm.patch (void get_processor_features): New function
	that enables compareAndSwap on jlongs.
	(atomic_linux_zero.inline.hpp: arm_val_compare_and_swap): New
	function.
	(atomic_linux_zero.inline.hpp: Atomic::store): Use
	arm_val_compare_and_swap.

	* openjdk/hotspot/src/cpu/zero/vm/thumb2.cpp (IT_MASK_TT)
	(IT_MASK_TE, IT_MASK_TTT, IT_MASK_TEE): Add a few new IT
	encodings.
	(Thumb2_dUnaryOp): Generalize Thumb2_dNeg.
	(Thumb2_dNeg, Thumb2_dAbs): Specializations of Thumb2_dUnaryOp.
	(handle_special_method): Use intrinsic_id instead of method_kind.
	Add handlers for _compareAndSwapInt and _compareAndSwapLong.
	(Thumb2_codegen): Call handle_special_method() for invokevirtual.
	Pass stackdepth to handle_special_method().

diff -r 01123e3102cc arm_port/hotspot/src/cpu/zero/vm/arm_cas.S
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/arm_port/hotspot/src/cpu/zero/vm/arm_cas.S	Fri Mar 02 17:52:08 2012 +0000
@@ -0,0 +1,30 @@
+#ifdef __ARM_ARCH_7A__
+@	jlong
+@	arm_val_compare_and_swap_long(volatile void *ptr,
+@				 jlong oldval,
+@				 jlong newval) {
+	.pushsection .text
+	.global arm_val_compare_and_swap_long
+#ifdef __thumb__
+	.thumb_func
+#endif
+	.type arm_val_compare_and_swap_long, %function
+arm_val_compare_and_swap_long:
+	stmfd	sp!, {r4, r5, r6, r7}
+	ldrd	r4, [sp, #16]
+	dmb	sy
+0:	ldrexd	r6, [r0]
+	cmp	r6, r2
+	it	eq
+	cmpeq	r7, r3
+	bne	1f
+	strexd	r1, r4, [r0]
+	cmp	r1, #0
+	bne	0b
+	dmb	sy
+1:	mov	r0, r6
+	mov	r1, r7
+	ldmfd	sp!, {r4, r5, r6, r7}
+	bx	lr
+	.popsection
+#endif // __ARM_ARCH_7A__
diff -r 01123e3102cc arm_port/hotspot/src/cpu/zero/vm/thumb2.cpp
--- a/arm_port/hotspot/src/cpu/zero/vm/thumb2.cpp	Wed Feb 22 15:36:29 2012 +0000
+++ b/arm_port/hotspot/src/cpu/zero/vm/thumb2.cpp	Fri Mar 02 17:52:08 2012 +0000
@@ -3042,6 +3042,10 @@
 #define T_IT(cond, mask) (0xbf00 | (conds[cond] << 4) | (mask))

 #define IT_MASK_T	8
+#define IT_MASK_TE	0x14
+#define IT_MASK_TT	0x1e
+#define IT_MASK_TTT	0x1e
+#define IT_MASK_TEE	0x12

 #define PATCH(loc)	do {						\
 	  unsigned oldidx = codebuf->idx;				\
@@ -4233,7 +4237,8 @@
   eor_imm(jinfo->codebuf, r_result, r, 0x80000000);
 }

-void Thumb2_dNeg(Thumb2_Info *jinfo, u32 opc)
+// arm_op is either DP_EOR (for dnegate) or DP_BIC (for dabs)
+static void Thumb2_dUnaryOp(Thumb2_Info *jinfo, u32 arm_op)
 {
   Thumb2_Stack *jstack = jinfo->jstack;
   unsigned r_lo, r_hi, r_res_lo, r_res_hi;
@@ -4248,7 +4253,17 @@
   JASSERT(r_res_lo != r_res_hi, "oops");
   JASSERT(r_res_lo != r_hi, "r_res_lo != r_hi");
   mov_reg(jinfo->codebuf, r_res_lo, r_lo);
-  eor_imm(jinfo->codebuf, r_res_hi, r_hi, 0x80000000);
+  dop_imm(jinfo->codebuf, arm_op, r_res_hi, r_hi, 0x80000000);
+}
+
+void Thumb2_dNeg(Thumb2_Info *jinfo)
+{
+  Thumb2_dUnaryOp(jinfo, DP_EOR);
+}
+
+void Thumb2_dAbs(Thumb2_Info *jinfo)
+{
+  Thumb2_dUnaryOp(jinfo, DP_BIC);
 }

 void Thumb2_lOp(Thumb2_Info *jinfo, u32 opc)
@@ -5008,42 +5023,34 @@

 // Expand a call to a "special" method.  These are usually inlines of
 // java.lang.Math methods.  Return true if the inlining succeeded.
-static bool handle_special_method(methodOop callee, Thumb2_Info *jinfo) {
+static bool handle_special_method(methodOop callee, Thumb2_Info *jinfo,
+				  unsigned stackdepth) {
+  Thumb2_Stack *jstack = jinfo->jstack;
+  CodeBuf *codebuf = jinfo->codebuf;
+
+  const char *entry_name;
+
+  switch (callee->intrinsic_id()) {
+  case vmIntrinsics::_dabs:
+   {
+     Thumb2_dAbs(jinfo);
+     return true;
+    }
+
 #ifdef __ARM_PCS_VFP
-  Thumb2_Stack *jstack = jinfo->jstack;
-
-  const char *entry_name;
-
-  unsigned loc1 = 0;
-
-  switch (Interpreter::method_kind(callee)) {
-  case Interpreter::java_lang_math_abs:
-   {
-      unsigned r_lo, r_hi;
-
-      Thumb2_Fill(jinfo, 2);
-      r_lo = POP(jstack);
-      r_hi = POP(jstack);
-      dop_imm_s(jinfo->codebuf, DP_BIC, r_hi, r_hi, 0x80000000, 0);
-      PUSH(jstack, r_hi);
-      PUSH(jstack, r_lo);
-
-      return true;
-    }
-
-  case Interpreter::java_lang_math_sin:
+  case vmIntrinsics::_dsin:
     entry_name = "Java_java_lang_StrictMath_sin";
     break;

-  case Interpreter::java_lang_math_cos:
+  case vmIntrinsics::_dcos:
     entry_name = "Java_java_lang_StrictMath_cos";
     break;

-  case Interpreter::java_lang_math_tan:
+  case vmIntrinsics::_dtan:
     entry_name = "Java_java_lang_StrictMath_tan";
     break;

-  case Interpreter::java_lang_math_sqrt:
+  case vmIntrinsics::_dsqrt:
     {
       void *entry_point = dlsym(NULL, "Java_java_lang_StrictMath_sqrt");
       if (! entry_point)
@@ -5077,13 +5084,104 @@
       return true;
     }

-  case Interpreter::java_lang_math_log:
+  case vmIntrinsics::_dlog:
     entry_name = "Java_java_lang_StrictMath_log";
     break;

-  case Interpreter::java_lang_math_log10:
+  case vmIntrinsics::_dlog10:
     entry_name = "Java_java_lang_StrictMath_log10";
     break;
+#endif // __ARM_PCS_VFP
+
+  case vmIntrinsics::_compareAndSwapInt:
+   {
+      Thumb2_Fill(jinfo, 4);
+
+      unsigned update = POP(jstack);
+      unsigned expect = POP(jstack);
+      unsigned offset = POP(jstack);
+      POP(jstack);  // Actually the high part of the offset
+
+      // unsigned object = POP(jstack);
+      // unsigned unsafe = POP(jstack);  // Initially an instance of java.lang.Unsafe
+
+      Thumb2_Flush(jinfo);
+      // Get ourself a result reg that's not one of the inputs
+      unsigned exclude = (1<<update)|(1<<expect)|(1<<offset);
+      unsigned result = JSTACK_PREFER(jstack, ~exclude);
+
+      ldm(codebuf, (1<<ARM_IP)|(1<<ARM_LR), Rstack, POP_FD, 1); // Object addr
+      add_reg(codebuf, result, offset, ARM_IP); // result now points to word
+      ldr_imm(codebuf, ARM_LR, ARM_LR, 0, 0, 0);  // Security check
+
+      fullBarrier(codebuf);
+
+      int retry = out_loc(codebuf);
+      ldrex_imm(codebuf, ARM_LR, result, 0);
+      cmp_reg(codebuf, ARM_LR, expect);
+      int loc_failed = forward_16(codebuf);
+      strex_imm(codebuf, ARM_IP, update, result, 0);
+      cmp_imm(codebuf, ARM_IP, 0);
+      branch(codebuf, COND_NE, retry);
+      bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
+
+      it(codebuf, COND_NE, IT_MASK_TEE);
+      mov_imm(codebuf, result, 0);
+      mov_imm(codebuf, result, 1);
+      fullBarrier(codebuf);
+
+      PUSH(jstack, result);
+    }
+    return true;
+
+  case vmIntrinsics::_compareAndSwapLong:
+    {
+      Thumb2_Fill(jinfo, 4);
+
+      unsigned update_lo = POP(jstack);
+      unsigned update_hi = POP(jstack);
+      unsigned expect_lo = POP(jstack);
+      unsigned expect_hi = POP(jstack);
+
+      Thumb2_Flush(jinfo);
+      Thumb2_save_locals(jinfo, stackdepth - 4); // 4 args popped above
+
+      // instance of java.lang.Unsafe:
+      ldr_imm(jinfo->codebuf, ARM_LR, Rstack, 3 * wordSize, 1, 0);
+      ldr_imm(codebuf, ARM_LR, ARM_LR, 0, 0, 0);  // Security check
+
+      // Object:
+      ldr_imm(jinfo->codebuf, ARM_LR, Rstack, 2 * wordSize, 1, 0);
+      // Offset:
+      ldr_imm(jinfo->codebuf, ARM_IP, Rstack, 0 * wordSize, 1, 0);
+      add_reg(codebuf, ARM_LR, ARM_LR, ARM_IP); // ARM_LR now points to word
+
+      fullBarrier(codebuf);
+
+      int retry = out_loc(codebuf);
+      ldrexd(codebuf, JAZ_V2, JAZ_V3, ARM_LR);
+      cmp_reg(codebuf, JAZ_V2, expect_lo);
+      it(jinfo->codebuf, COND_EQ, IT_MASK_T);
+      cmp_reg(codebuf, JAZ_V3, expect_hi);
+
+      int loc_failed = forward_16(codebuf);
+      strexd(codebuf, JAZ_V1, update_lo, update_hi, ARM_LR);
+      cmp_imm(codebuf, JAZ_V1, 0);
+      branch(codebuf, COND_NE, retry);
+      bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
+
+      unsigned result = JSTACK_REG(jinfo->jstack);
+
+      it(codebuf, COND_NE, IT_MASK_TEE);
+      mov_imm(codebuf, result, 0);
+      mov_imm(codebuf, result, 1);
+      fullBarrier(codebuf);
+
+      Thumb2_restore_locals(jinfo, stackdepth - 4); // 4 args popped above
+      add_imm(codebuf, Rstack, Rstack, 4 * wordSize);
+      PUSH(jstack, result);
+    }
+    return true;

   default:
     return false;
@@ -5093,8 +5191,6 @@
   if (! entry_point)
     return false;

-  unsigned r_lo, r_hi, r_res_lo, r_res_hi;
-
   jstack_to_vfp(jinfo, VFP_D0);
   // FIXME: The JNI StrictMath routines don't use the JNIEnv *env
   // parameter, so it's arguably pointless to pass it here.
@@ -5103,9 +5199,6 @@
   vfp_to_jstack(jinfo, VFP_D0);

   return true;
-#else
-  return false;
-#endif // __ARM_PCS_VFP
 }

 void Thumb2_codegen(Thumb2_Info *jinfo, unsigned start)
@@ -5766,7 +5859,7 @@
 	break;

       case opc_dneg:
-	Thumb2_dNeg(jinfo, opcode);
+	Thumb2_dNeg(jinfo);
 	break;

       case opc_i2l: {
@@ -6177,7 +6270,7 @@

 	callee = (methodOop)cache->f1();

-	if (handle_special_method(callee, jinfo))
+	if (handle_special_method(callee, jinfo, stackdepth))
 	  break;

 	if (callee->is_accessor()) {
@@ -6302,6 +6395,10 @@

 	if (cache->is_vfinal()) {
 	  methodOop callee = (methodOop)cache->f2();
+
+	  if (handle_special_method(callee, jinfo, stackdepth))
+	    break;
+
 	  if (callee->is_accessor()) {
 	    u1 *code = callee->code_base();
 	    int index = GET_NATIVE_U2(&code[2]);
@@ -7857,7 +7954,7 @@

 #define DEBUG_REGSET ((1<<ARM_R0)|(1<<ARM_R1)|(1<<ARM_R2)|(1<<ARM_R3)|(1<<ARM_IP))

-// DEBUG_METHODENTRY
+// DEBUG_METHDENTRY
   handlers[H_DEBUG_METHODENTRY] = out_pos(&codebuf);
   stm(&codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
   mov_reg(&codebuf, ARM_R2, ARM_R0);
diff -r 01123e3102cc patches/arm.patch
--- a/patches/arm.patch	Wed Feb 22 15:36:29 2012 +0000
+++ b/patches/arm.patch	Fri Mar 02 17:52:08 2012 +0000
@@ -230,3 +230,57 @@
  		-a ! \( -name DUMMY $(addprefix -o -name ,$(Src_Files_EXCLUDE)) \)))
  endef

+diff -r -uw icedtea6.pristine/openjdk/hotspot/src/cpu/zero/vm/vm_version_zero.hpp icedtea6/openjdk/hotspot/src/cpu/zero/vm/vm_version_zero.hpp
+--- openjdk.orig/hotspot/src/cpu/zero/vm/vm_version_zero.hpp	2011-11-14 22:07:31.000000000 +0000
++++ openjdk/hotspot/src/cpu/zero/vm/vm_version_zero.hpp	2012-02-29 17:27:11.472996427 +0000
+@@ -30,7 +30,18 @@
+ #include "runtime/vm_version.hpp"
+
+ class VM_Version : public Abstract_VM_Version {
++
+  public:
++  static void get_processor_features() {
++#ifdef __ARM_ARCH_7A__
++    Abstract_VM_Version::_supports_cx8 = true;
++#endif
++  }
++
++  static void initialize() {
++    get_processor_features();
++  }
++
+   static const char* cpu_features() {
+     return "";
+   }
+diff -r -uw openjdk.orig/hotspot/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp openjdk/hotspot/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp
+--- openjdk.orig/hotspot/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp	2011-11-14 22:07:32.000000000 +0000
++++ openjdk/hotspot/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp	2012-02-29 16:57:28.412360724 +0000
+@@ -160,6 +160,16 @@
+         return prev;
+     }
+ }
++
++#ifdef __ARM_ARCH_7A__
++/* Perform an atomic compare and swap: if the current value of `*PTR'
++   is OLDVAL, then write NEWVAL into `*PTR'.  Return the contents of
++   `*PTR' before the operation.*/
++extern "C" jlong arm_val_compare_and_swap_long(volatile void *ptr,
++					       jlong oldval,
++					       jlong newval);
++
++#endif	// __ARM_ARCH_7A__
+ #endif // ARM
+
+ inline void Atomic::store(jint store_value, volatile jint* dest) {
+@@ -274,7 +322,11 @@
+                              volatile jlong* dest,
+                              jlong compare_value) {
+
++#ifndef	__ARM_ARCH_7A__
+   return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
++#else
++  return arm_val_compare_and_swap_long(dest, compare_value, exchange_value);
++#endif
+ }
+
+ inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value,



More information about the distro-pkg-dev mailing list