ARM: More intrinsics

Chris Phillips ChrisPhi at redhat.com
Wed Mar 14 03:27:32 PDT 2012


Hi

Looks OK to me, but I'm still at the stage of having
to go and read the instruction def'n for a lot of arm stuff,
so my review is proceeding very slowly.

Cheers?
Chris

On 02/03/12 01:07 PM, Andrew Haley wrote:
> I've been adding _compareAndSwapInt and _compareAndSwapLong to the set
> of intrinsics.  It's never that easy, of course, and while I was doing
> that I found a bug in the code for abs that would cause locals to be
> corrupted.
>
> I also realized that if the JIT does long atomic swaps, so must the
> interpreter, so this patch also provides a long atomic swap for Zero
> on ARM.  It'll be used even if you don't have the JIT or the asm
> interpreter.  There isn't a kernel builtin we can use to do 64-bit
> swaps because the operation is only provided by the very most recent
> Linux kernels.  This means we can't use it, really.
>
> I also reorganized handle_special_method() a bit so that it uses VM
> intrinsic_id.  This provides the opportunity for many more intrinsics
> than Interpreter::method_kind().
>
> Andrew.
>
>
> 2012-03-02  Andrew Haley<aph at redhat.com>
>
> 	* arm_port/hotspot/src/cpu/zero/vm/arm_cas.S: New file.
> 	* patches/arm.patch (void get_processor_features): New function
> 	that enables compareAndSwap on jlongs.
> 	(atomic_linux_zero.inline.hpp: arm_val_compare_and_swap): New
> 	function.
> 	(atomic_linux_zero.inline.hpp: Atomic::store): Use
> 	arm_val_compare_and_swap.
>
> 	* openjdk/hotspot/src/cpu/zero/vm/thumb2.cpp (IT_MASK_TT)
> 	(IT_MASK_TE, IT_MASK_TTT, IT_MASK_TEE): Add a few new IT
> 	encodings.
> 	(Thumb2_dUnaryOp): Generalize Thumb2_dNeg.
> 	(Thumb2_dNeg, Thumb2_dAbs): Specializations of Thumb2_dUnaryOp.
> 	(handle_special_method): Use intrinsic_id instead of method_kind.
> 	Add handlers for _compareAndSwapInt and _compareAndSwapLong.
> 	(Thumb2_codegen): Call handle_special_method() for invokevirtual.
> 	Pass stackdepth to handle_special_method().
>
> diff -r 01123e3102cc arm_port/hotspot/src/cpu/zero/vm/arm_cas.S
> --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
> +++ b/arm_port/hotspot/src/cpu/zero/vm/arm_cas.S	Fri Mar 02 17:52:08 2012 +0000
> @@ -0,0 +1,30 @@
> +#ifdef __ARM_ARCH_7A__
> +@	jlong
> +@	arm_val_compare_and_swap_long(volatile void *ptr,
> +@				 jlong oldval,
> +@				 jlong newval) {
> +	.pushsection .text
> +	.global arm_val_compare_and_swap_long
> +#ifdef __thumb__
> +	.thumb_func
> +#endif
> +	.type arm_val_compare_and_swap_long, %function
> +arm_val_compare_and_swap_long:
> +	stmfd	sp!, {r4, r5, r6, r7}
> +	ldrd	r4, [sp, #16]
> +	dmb	sy
> +0:	ldrexd	r6, [r0]
> +	cmp	r6, r2
> +	it	eq
> +	cmpeq	r7, r3
> +	bne	1f
> +	strexd	r1, r4, [r0]
> +	cmp	r1, #0
> +	bne	0b
> +	dmb	sy
> +1:	mov	r0, r6
> +	mov	r1, r7
> +	ldmfd	sp!, {r4, r5, r6, r7}
> +	bx	lr
> +	.popsection
> +#endif // __ARM_ARCH_7A__
> diff -r 01123e3102cc arm_port/hotspot/src/cpu/zero/vm/thumb2.cpp
> --- a/arm_port/hotspot/src/cpu/zero/vm/thumb2.cpp	Wed Feb 22 15:36:29 2012 +0000
> +++ b/arm_port/hotspot/src/cpu/zero/vm/thumb2.cpp	Fri Mar 02 17:52:08 2012 +0000
> @@ -3042,6 +3042,10 @@
>   #define T_IT(cond, mask) (0xbf00 | (conds[cond]<<  4) | (mask))
>
>   #define IT_MASK_T	8
> +#define IT_MASK_TE	0x14
> +#define IT_MASK_TT	0x1e
> +#define IT_MASK_TTT	0x1e
> +#define IT_MASK_TEE	0x12
>
>   #define PATCH(loc)	do {						\
>   	  unsigned oldidx = codebuf->idx;				\
> @@ -4233,7 +4237,8 @@
>     eor_imm(jinfo->codebuf, r_result, r, 0x80000000);
>   }
>
> -void Thumb2_dNeg(Thumb2_Info *jinfo, u32 opc)
> +// arm_op is either DP_EOR (for dnegate) or DP_BIC (for dabs)
> +static void Thumb2_dUnaryOp(Thumb2_Info *jinfo, u32 arm_op)
>   {
>     Thumb2_Stack *jstack = jinfo->jstack;
>     unsigned r_lo, r_hi, r_res_lo, r_res_hi;
> @@ -4248,7 +4253,17 @@
>     JASSERT(r_res_lo != r_res_hi, "oops");
>     JASSERT(r_res_lo != r_hi, "r_res_lo != r_hi");
>     mov_reg(jinfo->codebuf, r_res_lo, r_lo);
> -  eor_imm(jinfo->codebuf, r_res_hi, r_hi, 0x80000000);
> +  dop_imm(jinfo->codebuf, arm_op, r_res_hi, r_hi, 0x80000000);
> +}
> +
> +void Thumb2_dNeg(Thumb2_Info *jinfo)
> +{
> +  Thumb2_dUnaryOp(jinfo, DP_EOR);
> +}
> +
> +void Thumb2_dAbs(Thumb2_Info *jinfo)
> +{
> +  Thumb2_dUnaryOp(jinfo, DP_BIC);
>   }
>
>   void Thumb2_lOp(Thumb2_Info *jinfo, u32 opc)
> @@ -5008,42 +5023,34 @@
>
>   // Expand a call to a "special" method.  These are usually inlines of
>   // java.lang.Math methods.  Return true if the inlining succeeded.
> -static bool handle_special_method(methodOop callee, Thumb2_Info *jinfo) {
> +static bool handle_special_method(methodOop callee, Thumb2_Info *jinfo,
> +				  unsigned stackdepth) {
> +  Thumb2_Stack *jstack = jinfo->jstack;
> +  CodeBuf *codebuf = jinfo->codebuf;
> +
> +  const char *entry_name;
> +
> +  switch (callee->intrinsic_id()) {
> +  case vmIntrinsics::_dabs:
> +   {
> +     Thumb2_dAbs(jinfo);
> +     return true;
> +    }
> +
>   #ifdef __ARM_PCS_VFP
> -  Thumb2_Stack *jstack = jinfo->jstack;
> -
> -  const char *entry_name;
> -
> -  unsigned loc1 = 0;
> -
> -  switch (Interpreter::method_kind(callee)) {
> -  case Interpreter::java_lang_math_abs:
> -   {
> -      unsigned r_lo, r_hi;
> -
> -      Thumb2_Fill(jinfo, 2);
> -      r_lo = POP(jstack);
> -      r_hi = POP(jstack);
> -      dop_imm_s(jinfo->codebuf, DP_BIC, r_hi, r_hi, 0x80000000, 0);
> -      PUSH(jstack, r_hi);
> -      PUSH(jstack, r_lo);
> -
> -      return true;
> -    }
> -
> -  case Interpreter::java_lang_math_sin:
> +  case vmIntrinsics::_dsin:
>       entry_name = "Java_java_lang_StrictMath_sin";
>       break;
>
> -  case Interpreter::java_lang_math_cos:
> +  case vmIntrinsics::_dcos:
>       entry_name = "Java_java_lang_StrictMath_cos";
>       break;
>
> -  case Interpreter::java_lang_math_tan:
> +  case vmIntrinsics::_dtan:
>       entry_name = "Java_java_lang_StrictMath_tan";
>       break;
>
> -  case Interpreter::java_lang_math_sqrt:
> +  case vmIntrinsics::_dsqrt:
>       {
>         void *entry_point = dlsym(NULL, "Java_java_lang_StrictMath_sqrt");
>         if (! entry_point)
> @@ -5077,13 +5084,104 @@
>         return true;
>       }
>
> -  case Interpreter::java_lang_math_log:
> +  case vmIntrinsics::_dlog:
>       entry_name = "Java_java_lang_StrictMath_log";
>       break;
>
> -  case Interpreter::java_lang_math_log10:
> +  case vmIntrinsics::_dlog10:
>       entry_name = "Java_java_lang_StrictMath_log10";
>       break;
> +#endif // __ARM_PCS_VFP
> +
> +  case vmIntrinsics::_compareAndSwapInt:
> +   {
> +      Thumb2_Fill(jinfo, 4);
> +
> +      unsigned update = POP(jstack);
> +      unsigned expect = POP(jstack);
> +      unsigned offset = POP(jstack);
> +      POP(jstack);  // Actually the high part of the offset
> +
> +      // unsigned object = POP(jstack);
> +      // unsigned unsafe = POP(jstack);  // Initially an instance of java.lang.Unsafe
> +
> +      Thumb2_Flush(jinfo);
> +      // Get ourself a result reg that's not one of the inputs
> +      unsigned exclude = (1<<update)|(1<<expect)|(1<<offset);
> +      unsigned result = JSTACK_PREFER(jstack, ~exclude);
> +
> +      ldm(codebuf, (1<<ARM_IP)|(1<<ARM_LR), Rstack, POP_FD, 1); // Object addr
> +      add_reg(codebuf, result, offset, ARM_IP); // result now points to word
> +      ldr_imm(codebuf, ARM_LR, ARM_LR, 0, 0, 0);  // Security check
> +
> +      fullBarrier(codebuf);
> +
> +      int retry = out_loc(codebuf);
> +      ldrex_imm(codebuf, ARM_LR, result, 0);
> +      cmp_reg(codebuf, ARM_LR, expect);
> +      int loc_failed = forward_16(codebuf);
> +      strex_imm(codebuf, ARM_IP, update, result, 0);
> +      cmp_imm(codebuf, ARM_IP, 0);
> +      branch(codebuf, COND_NE, retry);
> +      bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
> +
> +      it(codebuf, COND_NE, IT_MASK_TEE);
> +      mov_imm(codebuf, result, 0);
> +      mov_imm(codebuf, result, 1);
> +      fullBarrier(codebuf);
> +
> +      PUSH(jstack, result);
> +    }
> +    return true;
> +
> +  case vmIntrinsics::_compareAndSwapLong:
> +    {
> +      Thumb2_Fill(jinfo, 4);
> +
> +      unsigned update_lo = POP(jstack);
> +      unsigned update_hi = POP(jstack);
> +      unsigned expect_lo = POP(jstack);
> +      unsigned expect_hi = POP(jstack);
> +
> +      Thumb2_Flush(jinfo);
> +      Thumb2_save_locals(jinfo, stackdepth - 4); // 4 args popped above
> +
> +      // instance of java.lang.Unsafe:
> +      ldr_imm(jinfo->codebuf, ARM_LR, Rstack, 3 * wordSize, 1, 0);
> +      ldr_imm(codebuf, ARM_LR, ARM_LR, 0, 0, 0);  // Security check
> +
> +      // Object:
> +      ldr_imm(jinfo->codebuf, ARM_LR, Rstack, 2 * wordSize, 1, 0);
> +      // Offset:
> +      ldr_imm(jinfo->codebuf, ARM_IP, Rstack, 0 * wordSize, 1, 0);
> +      add_reg(codebuf, ARM_LR, ARM_LR, ARM_IP); // ARM_LR now points to word
> +
> +      fullBarrier(codebuf);
> +
> +      int retry = out_loc(codebuf);
> +      ldrexd(codebuf, JAZ_V2, JAZ_V3, ARM_LR);
> +      cmp_reg(codebuf, JAZ_V2, expect_lo);
> +      it(jinfo->codebuf, COND_EQ, IT_MASK_T);
> +      cmp_reg(codebuf, JAZ_V3, expect_hi);
> +
> +      int loc_failed = forward_16(codebuf);
> +      strexd(codebuf, JAZ_V1, update_lo, update_hi, ARM_LR);
> +      cmp_imm(codebuf, JAZ_V1, 0);
> +      branch(codebuf, COND_NE, retry);
> +      bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
> +
> +      unsigned result = JSTACK_REG(jinfo->jstack);
> +
> +      it(codebuf, COND_NE, IT_MASK_TEE);
> +      mov_imm(codebuf, result, 0);
> +      mov_imm(codebuf, result, 1);
> +      fullBarrier(codebuf);
> +
> +      Thumb2_restore_locals(jinfo, stackdepth - 4); // 4 args popped above
> +      add_imm(codebuf, Rstack, Rstack, 4 * wordSize);
> +      PUSH(jstack, result);
> +    }
> +    return true;
>
>     default:
>       return false;
> @@ -5093,8 +5191,6 @@
>     if (! entry_point)
>       return false;
>
> -  unsigned r_lo, r_hi, r_res_lo, r_res_hi;
> -
>     jstack_to_vfp(jinfo, VFP_D0);
>     // FIXME: The JNI StrictMath routines don't use the JNIEnv *env
>     // parameter, so it's arguably pointless to pass it here.
> @@ -5103,9 +5199,6 @@
>     vfp_to_jstack(jinfo, VFP_D0);
>
>     return true;
> -#else
> -  return false;
> -#endif // __ARM_PCS_VFP
>   }
>
>   void Thumb2_codegen(Thumb2_Info *jinfo, unsigned start)
> @@ -5766,7 +5859,7 @@
>   	break;
>
>         case opc_dneg:
> -	Thumb2_dNeg(jinfo, opcode);
> +	Thumb2_dNeg(jinfo);
>   	break;
>
>         case opc_i2l: {
> @@ -6177,7 +6270,7 @@
>
>   	callee = (methodOop)cache->f1();
>
> -	if (handle_special_method(callee, jinfo))
> +	if (handle_special_method(callee, jinfo, stackdepth))
>   	  break;
>
>   	if (callee->is_accessor()) {
> @@ -6302,6 +6395,10 @@
>
>   	if (cache->is_vfinal()) {
>   	  methodOop callee = (methodOop)cache->f2();
> +
> +	  if (handle_special_method(callee, jinfo, stackdepth))
> +	    break;
> +
>   	  if (callee->is_accessor()) {
>   	    u1 *code = callee->code_base();
>   	    int index = GET_NATIVE_U2(&code[2]);
> @@ -7857,7 +7954,7 @@
>
>   #define DEBUG_REGSET ((1<<ARM_R0)|(1<<ARM_R1)|(1<<ARM_R2)|(1<<ARM_R3)|(1<<ARM_IP))
>
> -// DEBUG_METHODENTRY
> +// DEBUG_METHDENTRY
>     handlers[H_DEBUG_METHODENTRY] = out_pos(&codebuf);
>     stm(&codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
>     mov_reg(&codebuf, ARM_R2, ARM_R0);
> diff -r 01123e3102cc patches/arm.patch
> --- a/patches/arm.patch	Wed Feb 22 15:36:29 2012 +0000
> +++ b/patches/arm.patch	Fri Mar 02 17:52:08 2012 +0000
> @@ -230,3 +230,57 @@
>    		-a ! \( -name DUMMY $(addprefix -o -name ,$(Src_Files_EXCLUDE)) \)))
>    endef
>
> +diff -r -uw icedtea6.pristine/openjdk/hotspot/src/cpu/zero/vm/vm_version_zero.hpp icedtea6/openjdk/hotspot/src/cpu/zero/vm/vm_version_zero.hpp
> +--- openjdk.orig/hotspot/src/cpu/zero/vm/vm_version_zero.hpp	2011-11-14 22:07:31.000000000 +0000
> ++++ openjdk/hotspot/src/cpu/zero/vm/vm_version_zero.hpp	2012-02-29 17:27:11.472996427 +0000
> +@@ -30,7 +30,18 @@
> + #include "runtime/vm_version.hpp"
> +
> + class VM_Version : public Abstract_VM_Version {
> ++
> +  public:
> ++  static void get_processor_features() {
> ++#ifdef __ARM_ARCH_7A__
> ++    Abstract_VM_Version::_supports_cx8 = true;
> ++#endif
> ++  }
> ++
> ++  static void initialize() {
> ++    get_processor_features();
> ++  }
> ++
> +   static const char* cpu_features() {
> +     return "";
> +   }
> +diff -r -uw openjdk.orig/hotspot/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp openjdk/hotspot/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp
> +--- openjdk.orig/hotspot/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp	2011-11-14 22:07:32.000000000 +0000
> ++++ openjdk/hotspot/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp	2012-02-29 16:57:28.412360724 +0000
> +@@ -160,6 +160,16 @@
> +         return prev;
> +     }
> + }
> ++
> ++#ifdef __ARM_ARCH_7A__
> ++/* Perform an atomic compare and swap: if the current value of `*PTR'
> ++   is OLDVAL, then write NEWVAL into `*PTR'.  Return the contents of
> ++   `*PTR' before the operation.*/
> ++extern "C" jlong arm_val_compare_and_swap_long(volatile void *ptr,
> ++					       jlong oldval,
> ++					       jlong newval);
> ++
> ++#endif	// __ARM_ARCH_7A__
> + #endif // ARM
> +
> + inline void Atomic::store(jint store_value, volatile jint* dest) {
> +@@ -274,7 +322,11 @@
> +                              volatile jlong* dest,
> +                              jlong compare_value) {
> +
> ++#ifndef	__ARM_ARCH_7A__
> +   return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
> ++#else
> ++  return arm_val_compare_and_swap_long(dest, compare_value, exchange_value);
> ++#endif
> + }
> +
> + inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value,




More information about the distro-pkg-dev mailing list