ARM: More intrinsics
Chris Phillips
ChrisPhi at redhat.com
Wed Mar 14 03:27:32 PDT 2012
Hi
Looks OK to me, but I'm still at the stage of having
to go and read the instruction def'n for a lot of arm stuff,
so my review is proceeding very slowly.
Cheers?
Chris
On 02/03/12 01:07 PM, Andrew Haley wrote:
> I've been adding _compareAndSwapInt and _compareAndSwapLong to the set
> of intrinsics. It's never that easy, of course, and while I was doing
> that I found a bug in the code for abs that would cause locals to be
> corrupted.
>
> I also realized that if the JIT does long atomic swaps, so must the
> interpreter, so this patch also provides a long atomic swap for Zero
> on ARM. It'll be used even if you don't have the JIT or the asm
> interpreter. There isn't a kernel builtin we can use to do 64-bit
> swaps because the operation is only provided by the very most recent
> Linux kernels. This means we can't use it, really.
>
> I also reorganized handle_special_method() a bit so that it uses VM
> intrinsic_id. This provides the opportunity for many more intrinsics
> than Interpreter::method_kind().
>
> Andrew.
>
>
> 2012-03-02 Andrew Haley<aph at redhat.com>
>
> * arm_port/hotspot/src/cpu/zero/vm/arm_cas.S: New file.
> * patches/arm.patch (void get_processor_features): New function
> that enables compareAndSwap on jlongs.
> (atomic_linux_zero.inline.hpp: arm_val_compare_and_swap): New
> function.
> (atomic_linux_zero.inline.hpp: Atomic::store): Use
> arm_val_compare_and_swap.
>
> * openjdk/hotspot/src/cpu/zero/vm/thumb2.cpp (IT_MASK_TT)
> (IT_MASK_TE, IT_MASK_TTT, IT_MASK_TEE): Add a few new IT
> encodings.
> (Thumb2_dUnaryOp): Generalize Thumb2_dNeg.
> (Thumb2_dNeg, Thumb2_dAbs): Specializations of Thumb2_dUnaryOp.
> (handle_special_method): Use intrinsic_id instead of method_kind.
> Add handlers for _compareAndSwapInt and _compareAndSwapLong.
> (Thumb2_codegen): Call handle_special_method() for invokevirtual.
> Pass stackdepth to handle_special_method().
>
> diff -r 01123e3102cc arm_port/hotspot/src/cpu/zero/vm/arm_cas.S
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/arm_port/hotspot/src/cpu/zero/vm/arm_cas.S Fri Mar 02 17:52:08 2012 +0000
> @@ -0,0 +1,30 @@
> +#ifdef __ARM_ARCH_7A__
> +@ jlong
> +@ arm_val_compare_and_swap_long(volatile void *ptr,
> +@ jlong oldval,
> +@ jlong newval) {
> + .pushsection .text
> + .global arm_val_compare_and_swap_long
> +#ifdef __thumb__
> + .thumb_func
> +#endif
> + .type arm_val_compare_and_swap_long, %function
> +arm_val_compare_and_swap_long:
> + stmfd sp!, {r4, r5, r6, r7}
> + ldrd r4, [sp, #16]
> + dmb sy
> +0: ldrexd r6, [r0]
> + cmp r6, r2
> + it eq
> + cmpeq r7, r3
> + bne 1f
> + strexd r1, r4, [r0]
> + cmp r1, #0
> + bne 0b
> + dmb sy
> +1: mov r0, r6
> + mov r1, r7
> + ldmfd sp!, {r4, r5, r6, r7}
> + bx lr
> + .popsection
> +#endif // __ARM_ARCH_7A__
> diff -r 01123e3102cc arm_port/hotspot/src/cpu/zero/vm/thumb2.cpp
> --- a/arm_port/hotspot/src/cpu/zero/vm/thumb2.cpp Wed Feb 22 15:36:29 2012 +0000
> +++ b/arm_port/hotspot/src/cpu/zero/vm/thumb2.cpp Fri Mar 02 17:52:08 2012 +0000
> @@ -3042,6 +3042,10 @@
> #define T_IT(cond, mask) (0xbf00 | (conds[cond]<< 4) | (mask))
>
> #define IT_MASK_T 8
> +#define IT_MASK_TE 0x14
> +#define IT_MASK_TT 0x1e
> +#define IT_MASK_TTT 0x1e
> +#define IT_MASK_TEE 0x12
>
> #define PATCH(loc) do { \
> unsigned oldidx = codebuf->idx; \
> @@ -4233,7 +4237,8 @@
> eor_imm(jinfo->codebuf, r_result, r, 0x80000000);
> }
>
> -void Thumb2_dNeg(Thumb2_Info *jinfo, u32 opc)
> +// arm_op is either DP_EOR (for dnegate) or DP_BIC (for dabs)
> +static void Thumb2_dUnaryOp(Thumb2_Info *jinfo, u32 arm_op)
> {
> Thumb2_Stack *jstack = jinfo->jstack;
> unsigned r_lo, r_hi, r_res_lo, r_res_hi;
> @@ -4248,7 +4253,17 @@
> JASSERT(r_res_lo != r_res_hi, "oops");
> JASSERT(r_res_lo != r_hi, "r_res_lo != r_hi");
> mov_reg(jinfo->codebuf, r_res_lo, r_lo);
> - eor_imm(jinfo->codebuf, r_res_hi, r_hi, 0x80000000);
> + dop_imm(jinfo->codebuf, arm_op, r_res_hi, r_hi, 0x80000000);
> +}
> +
> +void Thumb2_dNeg(Thumb2_Info *jinfo)
> +{
> + Thumb2_dUnaryOp(jinfo, DP_EOR);
> +}
> +
> +void Thumb2_dAbs(Thumb2_Info *jinfo)
> +{
> + Thumb2_dUnaryOp(jinfo, DP_BIC);
> }
>
> void Thumb2_lOp(Thumb2_Info *jinfo, u32 opc)
> @@ -5008,42 +5023,34 @@
>
> // Expand a call to a "special" method. These are usually inlines of
> // java.lang.Math methods. Return true if the inlining succeeded.
> -static bool handle_special_method(methodOop callee, Thumb2_Info *jinfo) {
> +static bool handle_special_method(methodOop callee, Thumb2_Info *jinfo,
> + unsigned stackdepth) {
> + Thumb2_Stack *jstack = jinfo->jstack;
> + CodeBuf *codebuf = jinfo->codebuf;
> +
> + const char *entry_name;
> +
> + switch (callee->intrinsic_id()) {
> + case vmIntrinsics::_dabs:
> + {
> + Thumb2_dAbs(jinfo);
> + return true;
> + }
> +
> #ifdef __ARM_PCS_VFP
> - Thumb2_Stack *jstack = jinfo->jstack;
> -
> - const char *entry_name;
> -
> - unsigned loc1 = 0;
> -
> - switch (Interpreter::method_kind(callee)) {
> - case Interpreter::java_lang_math_abs:
> - {
> - unsigned r_lo, r_hi;
> -
> - Thumb2_Fill(jinfo, 2);
> - r_lo = POP(jstack);
> - r_hi = POP(jstack);
> - dop_imm_s(jinfo->codebuf, DP_BIC, r_hi, r_hi, 0x80000000, 0);
> - PUSH(jstack, r_hi);
> - PUSH(jstack, r_lo);
> -
> - return true;
> - }
> -
> - case Interpreter::java_lang_math_sin:
> + case vmIntrinsics::_dsin:
> entry_name = "Java_java_lang_StrictMath_sin";
> break;
>
> - case Interpreter::java_lang_math_cos:
> + case vmIntrinsics::_dcos:
> entry_name = "Java_java_lang_StrictMath_cos";
> break;
>
> - case Interpreter::java_lang_math_tan:
> + case vmIntrinsics::_dtan:
> entry_name = "Java_java_lang_StrictMath_tan";
> break;
>
> - case Interpreter::java_lang_math_sqrt:
> + case vmIntrinsics::_dsqrt:
> {
> void *entry_point = dlsym(NULL, "Java_java_lang_StrictMath_sqrt");
> if (! entry_point)
> @@ -5077,13 +5084,104 @@
> return true;
> }
>
> - case Interpreter::java_lang_math_log:
> + case vmIntrinsics::_dlog:
> entry_name = "Java_java_lang_StrictMath_log";
> break;
>
> - case Interpreter::java_lang_math_log10:
> + case vmIntrinsics::_dlog10:
> entry_name = "Java_java_lang_StrictMath_log10";
> break;
> +#endif // __ARM_PCS_VFP
> +
> + case vmIntrinsics::_compareAndSwapInt:
> + {
> + Thumb2_Fill(jinfo, 4);
> +
> + unsigned update = POP(jstack);
> + unsigned expect = POP(jstack);
> + unsigned offset = POP(jstack);
> + POP(jstack); // Actually the high part of the offset
> +
> + // unsigned object = POP(jstack);
> + // unsigned unsafe = POP(jstack); // Initially an instance of java.lang.Unsafe
> +
> + Thumb2_Flush(jinfo);
> + // Get ourself a result reg that's not one of the inputs
> + unsigned exclude = (1<<update)|(1<<expect)|(1<<offset);
> + unsigned result = JSTACK_PREFER(jstack, ~exclude);
> +
> + ldm(codebuf, (1<<ARM_IP)|(1<<ARM_LR), Rstack, POP_FD, 1); // Object addr
> + add_reg(codebuf, result, offset, ARM_IP); // result now points to word
> + ldr_imm(codebuf, ARM_LR, ARM_LR, 0, 0, 0); // Security check
> +
> + fullBarrier(codebuf);
> +
> + int retry = out_loc(codebuf);
> + ldrex_imm(codebuf, ARM_LR, result, 0);
> + cmp_reg(codebuf, ARM_LR, expect);
> + int loc_failed = forward_16(codebuf);
> + strex_imm(codebuf, ARM_IP, update, result, 0);
> + cmp_imm(codebuf, ARM_IP, 0);
> + branch(codebuf, COND_NE, retry);
> + bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
> +
> + it(codebuf, COND_NE, IT_MASK_TEE);
> + mov_imm(codebuf, result, 0);
> + mov_imm(codebuf, result, 1);
> + fullBarrier(codebuf);
> +
> + PUSH(jstack, result);
> + }
> + return true;
> +
> + case vmIntrinsics::_compareAndSwapLong:
> + {
> + Thumb2_Fill(jinfo, 4);
> +
> + unsigned update_lo = POP(jstack);
> + unsigned update_hi = POP(jstack);
> + unsigned expect_lo = POP(jstack);
> + unsigned expect_hi = POP(jstack);
> +
> + Thumb2_Flush(jinfo);
> + Thumb2_save_locals(jinfo, stackdepth - 4); // 4 args popped above
> +
> + // instance of java.lang.Unsafe:
> + ldr_imm(jinfo->codebuf, ARM_LR, Rstack, 3 * wordSize, 1, 0);
> + ldr_imm(codebuf, ARM_LR, ARM_LR, 0, 0, 0); // Security check
> +
> + // Object:
> + ldr_imm(jinfo->codebuf, ARM_LR, Rstack, 2 * wordSize, 1, 0);
> + // Offset:
> + ldr_imm(jinfo->codebuf, ARM_IP, Rstack, 0 * wordSize, 1, 0);
> + add_reg(codebuf, ARM_LR, ARM_LR, ARM_IP); // ARM_LR now points to word
> +
> + fullBarrier(codebuf);
> +
> + int retry = out_loc(codebuf);
> + ldrexd(codebuf, JAZ_V2, JAZ_V3, ARM_LR);
> + cmp_reg(codebuf, JAZ_V2, expect_lo);
> + it(jinfo->codebuf, COND_EQ, IT_MASK_T);
> + cmp_reg(codebuf, JAZ_V3, expect_hi);
> +
> + int loc_failed = forward_16(codebuf);
> + strexd(codebuf, JAZ_V1, update_lo, update_hi, ARM_LR);
> + cmp_imm(codebuf, JAZ_V1, 0);
> + branch(codebuf, COND_NE, retry);
> + bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
> +
> + unsigned result = JSTACK_REG(jinfo->jstack);
> +
> + it(codebuf, COND_NE, IT_MASK_TEE);
> + mov_imm(codebuf, result, 0);
> + mov_imm(codebuf, result, 1);
> + fullBarrier(codebuf);
> +
> + Thumb2_restore_locals(jinfo, stackdepth - 4); // 4 args popped above
> + add_imm(codebuf, Rstack, Rstack, 4 * wordSize);
> + PUSH(jstack, result);
> + }
> + return true;
>
> default:
> return false;
> @@ -5093,8 +5191,6 @@
> if (! entry_point)
> return false;
>
> - unsigned r_lo, r_hi, r_res_lo, r_res_hi;
> -
> jstack_to_vfp(jinfo, VFP_D0);
> // FIXME: The JNI StrictMath routines don't use the JNIEnv *env
> // parameter, so it's arguably pointless to pass it here.
> @@ -5103,9 +5199,6 @@
> vfp_to_jstack(jinfo, VFP_D0);
>
> return true;
> -#else
> - return false;
> -#endif // __ARM_PCS_VFP
> }
>
> void Thumb2_codegen(Thumb2_Info *jinfo, unsigned start)
> @@ -5766,7 +5859,7 @@
> break;
>
> case opc_dneg:
> - Thumb2_dNeg(jinfo, opcode);
> + Thumb2_dNeg(jinfo);
> break;
>
> case opc_i2l: {
> @@ -6177,7 +6270,7 @@
>
> callee = (methodOop)cache->f1();
>
> - if (handle_special_method(callee, jinfo))
> + if (handle_special_method(callee, jinfo, stackdepth))
> break;
>
> if (callee->is_accessor()) {
> @@ -6302,6 +6395,10 @@
>
> if (cache->is_vfinal()) {
> methodOop callee = (methodOop)cache->f2();
> +
> + if (handle_special_method(callee, jinfo, stackdepth))
> + break;
> +
> if (callee->is_accessor()) {
> u1 *code = callee->code_base();
> int index = GET_NATIVE_U2(&code[2]);
> @@ -7857,7 +7954,7 @@
>
> #define DEBUG_REGSET ((1<<ARM_R0)|(1<<ARM_R1)|(1<<ARM_R2)|(1<<ARM_R3)|(1<<ARM_IP))
>
> -// DEBUG_METHODENTRY
> +// DEBUG_METHDENTRY
> handlers[H_DEBUG_METHODENTRY] = out_pos(&codebuf);
> stm(&codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
> mov_reg(&codebuf, ARM_R2, ARM_R0);
> diff -r 01123e3102cc patches/arm.patch
> --- a/patches/arm.patch Wed Feb 22 15:36:29 2012 +0000
> +++ b/patches/arm.patch Fri Mar 02 17:52:08 2012 +0000
> @@ -230,3 +230,57 @@
> -a ! \( -name DUMMY $(addprefix -o -name ,$(Src_Files_EXCLUDE)) \)))
> endef
>
> +diff -r -uw icedtea6.pristine/openjdk/hotspot/src/cpu/zero/vm/vm_version_zero.hpp icedtea6/openjdk/hotspot/src/cpu/zero/vm/vm_version_zero.hpp
> +--- openjdk.orig/hotspot/src/cpu/zero/vm/vm_version_zero.hpp 2011-11-14 22:07:31.000000000 +0000
> ++++ openjdk/hotspot/src/cpu/zero/vm/vm_version_zero.hpp 2012-02-29 17:27:11.472996427 +0000
> +@@ -30,7 +30,18 @@
> + #include "runtime/vm_version.hpp"
> +
> + class VM_Version : public Abstract_VM_Version {
> ++
> + public:
> ++ static void get_processor_features() {
> ++#ifdef __ARM_ARCH_7A__
> ++ Abstract_VM_Version::_supports_cx8 = true;
> ++#endif
> ++ }
> ++
> ++ static void initialize() {
> ++ get_processor_features();
> ++ }
> ++
> + static const char* cpu_features() {
> + return "";
> + }
> +diff -r -uw openjdk.orig/hotspot/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp openjdk/hotspot/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp
> +--- openjdk.orig/hotspot/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp 2011-11-14 22:07:32.000000000 +0000
> ++++ openjdk/hotspot/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp 2012-02-29 16:57:28.412360724 +0000
> +@@ -160,6 +160,16 @@
> + return prev;
> + }
> + }
> ++
> ++#ifdef __ARM_ARCH_7A__
> ++/* Perform an atomic compare and swap: if the current value of `*PTR'
> ++ is OLDVAL, then write NEWVAL into `*PTR'. Return the contents of
> ++ `*PTR' before the operation.*/
> ++extern "C" jlong arm_val_compare_and_swap_long(volatile void *ptr,
> ++ jlong oldval,
> ++ jlong newval);
> ++
> ++#endif // __ARM_ARCH_7A__
> + #endif // ARM
> +
> + inline void Atomic::store(jint store_value, volatile jint* dest) {
> +@@ -274,7 +322,11 @@
> + volatile jlong* dest,
> + jlong compare_value) {
> +
> ++#ifndef __ARM_ARCH_7A__
> + return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
> ++#else
> ++ return arm_val_compare_and_swap_long(dest, compare_value, exchange_value);
> ++#endif
> + }
> +
> + inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value,
More information about the distro-pkg-dev
mailing list