ARM: More intrinsics part 1/2
Xerxes Rånby
xerxes at zafena.se
Fri Mar 16 05:16:12 PDT 2012
2012-03-14 11:27, Chris Phillips skrev:
> Hi
>
> Looks OK to me, but I'm still at the stage of having
> to go and read the instruction def'n for a lot of arm stuff,
> so my review is proceeding very slowly.
>
> Cheers?
> Chris
>
> On 02/03/12 01:07 PM, Andrew Haley wrote:
>> I've been adding _compareAndSwapInt and _compareAndSwapLong to the set
>> of intrinsics. It's never that easy, of course, and while I was doing
>> that I found a bug in the code for abs that would cause locals to be
>> corrupted.
>>
>> I also realized that if the JIT does long atomic swaps, so must the
>> interpreter, so this patch also provides a long atomic swap for Zero
>> on ARM. It'll be used even if you don't have the JIT or the asm
>> interpreter. There isn't a kernel builtin we can use to do 64-bit
>> swaps because the operation is only provided by the very most recent
>> Linux kernels. This means we can't use it, really.
>>
>> I also reorganized handle_special_method() a bit so that it uses VM
>> intrinsic_id. This provides the opportunity for many more intrinsics
>> than Interpreter::method_kind().
>>
>> Andrew.
>>
>>
>> 2012-03-02 Andrew Haley<aph at redhat.com>
>>
>> * arm_port/hotspot/src/cpu/zero/vm/arm_cas.S: New file.
>> * patches/arm.patch (void get_processor_features): New function
>> that enables compareAndSwap on jlongs.
>> (atomic_linux_zero.inline.hpp: arm_val_compare_and_swap): New
>> function.
>> (atomic_linux_zero.inline.hpp: Atomic::store): Use
>> arm_val_compare_and_swap.
>>
>> * openjdk/hotspot/src/cpu/zero/vm/thumb2.cpp (IT_MASK_TT)
>> (IT_MASK_TE, IT_MASK_TTT, IT_MASK_TEE): Add a few new IT
>> encodings.
>> (Thumb2_dUnaryOp): Generalize Thumb2_dNeg.
>> (Thumb2_dNeg, Thumb2_dAbs): Specializations of Thumb2_dUnaryOp.
>> (handle_special_method): Use intrinsic_id instead of method_kind.
>> Add handlers for _compareAndSwapInt and _compareAndSwapLong.
>> (Thumb2_codegen): Call handle_special_method() for invokevirtual.
>> Pass stackdepth to handle_special_method().
>>
>> diff -r 01123e3102cc arm_port/hotspot/src/cpu/zero/vm/arm_cas.S
>> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
>> +++ b/arm_port/hotspot/src/cpu/zero/vm/arm_cas.S Fri Mar 02 17:52:08 2012 +0000
>> @@ -0,0 +1,30 @@
>> +#ifdef __ARM_ARCH_7A__
>> +@ jlong
>> +@ arm_val_compare_and_swap_long(volatile void *ptr,
>> +@ jlong oldval,
>> +@ jlong newval) {
>> + .pushsection .text
>> + .global arm_val_compare_and_swap_long
>> +#ifdef __thumb__
>> + .thumb_func
>> +#endif
Can we skip the #ifdef __thumb__ here and have .thumb_func set permanent.. i find the code below to be thumb in all situations.
>> + .type arm_val_compare_and_swap_long, %function
>> +arm_val_compare_and_swap_long:
>> + stmfd sp!, {r4, r5, r6, r7}
>> + ldrd r4, [sp, #16]
>> + dmb sy
>> +0: ldrexd r6, [r0]
>> + cmp r6, r2
>> + it eq
>> + cmpeq r7, r3
>> + bne 1f
>> + strexd r1, r4, [r0]
>> + cmp r1, #0
>> + bne 0b
>> + dmb sy
>> +1: mov r0, r6
>> + mov r1, r7
>> + ldmfd sp!, {r4, r5, r6, r7}
>> + bx lr
>> + .popsection
>> +#endif // __ARM_ARCH_7A__
OK I think, i have no good way to verify its functionality except running the Volatile tests from: http://www.cs.umd.edu/~pugh/java/memoryModel/ *a-lot*.
Testcase: cd into icedtea build dir and execute
wget http://www.cs.umd.edu/~pugh/java/memoryModel/Volatiles.tar
tar xvf Volatiles.tar
./openjdk.build/j2sdk-image/bin/javac volatile/AtomicLong.java
./openjdk.build/j2sdk-image/bin/javac volatile/CoherenceVolatile.java
./openjdk.build/j2sdk-image/bin/javac volatile/ReadAfterWrite.java
./openjdk.build/j2sdk-image/bin/java -cp volatile AtomicLong
./openjdk.build/j2sdk-image/bin/java -cp volatile CoherenceVolatile
./openjdk.build/j2sdk-image/bin/java -cp volatile ReadAfterWrite
>> diff -r 01123e3102cc arm_port/hotspot/src/cpu/zero/vm/thumb2.cpp
>> --- a/arm_port/hotspot/src/cpu/zero/vm/thumb2.cpp Wed Feb 22 15:36:29 2012 +0000
>> +++ b/arm_port/hotspot/src/cpu/zero/vm/thumb2.cpp Fri Mar 02 17:52:08 2012 +0000
>> @@ -3042,6 +3042,10 @@
>> #define T_IT(cond, mask) (0xbf00 | (conds[cond]<< 4) | (mask))
>>
>> #define IT_MASK_T 8
>> +#define IT_MASK_TE 0x14
>> +#define IT_MASK_TT 0x1e
>> +#define IT_MASK_TTT 0x1e
This are inconsistent IT_MASK_TT and IT_MASK_TTT are both 0x1e,
This block gets fixed in the follow up patch part 2/2 so its ok!
http://mail.openjdk.java.net/pipermail/distro-pkg-dev/2012-March/017718.html
>> @@ -5008,42 +5023,34 @@
>>
>> // Expand a call to a "special" method. These are usually inlines of
>> // java.lang.Math methods. Return true if the inlining succeeded.
>> -static bool handle_special_method(methodOop callee, Thumb2_Info *jinfo) {
>> +static bool handle_special_method(methodOop callee, Thumb2_Info *jinfo,
>> + unsigned stackdepth) {
>> + Thumb2_Stack *jstack = jinfo->jstack;
>> + CodeBuf *codebuf = jinfo->codebuf;
>> +
>> + const char *entry_name;
>> +
>> + switch (callee->intrinsic_id()) {
>> + case vmIntrinsics::_dabs:
>> + {
>> + Thumb2_dAbs(jinfo);
>> + return true;
>> + }
>> +
>> #ifdef __ARM_PCS_VFP
>> - Thumb2_Stack *jstack = jinfo->jstack;
>> -
>> - const char *entry_name;
>> -
>> - unsigned loc1 = 0;
>> -
>> - switch (Interpreter::method_kind(callee)) {
>> - case Interpreter::java_lang_math_abs:
>> - {
>> - unsigned r_lo, r_hi;
>> -
>> - Thumb2_Fill(jinfo, 2);
>> - r_lo = POP(jstack);
>> - r_hi = POP(jstack);
>> - dop_imm_s(jinfo->codebuf, DP_BIC, r_hi, r_hi, 0x80000000, 0);
>> - PUSH(jstack, r_hi);
>> - PUSH(jstack, r_lo);
>> -
>> - return true;
>> - }
>> -
>> - case Interpreter::java_lang_math_sin:
>> + case vmIntrinsics::_dsin:
>> entry_name = "Java_java_lang_StrictMath_sin";
>> break;
>>
>> - case Interpreter::java_lang_math_cos:
>> + case vmIntrinsics::_dcos:
>> entry_name = "Java_java_lang_StrictMath_cos";
>> break;
>>
>> - case Interpreter::java_lang_math_tan:
>> + case vmIntrinsics::_dtan:
>> entry_name = "Java_java_lang_StrictMath_tan";
>> break;
>>
>> - case Interpreter::java_lang_math_sqrt:
>> + case vmIntrinsics::_dsqrt:
>> {
>> void *entry_point = dlsym(NULL, "Java_java_lang_StrictMath_sqrt");
>> if (! entry_point)
>> @@ -5077,13 +5084,104 @@
>> return true;
>> }
>>
>> - case Interpreter::java_lang_math_log:
>> + case vmIntrinsics::_dlog:
>> entry_name = "Java_java_lang_StrictMath_log";
>> break;
>>
>> - case Interpreter::java_lang_math_log10:
>> + case vmIntrinsics::_dlog10:
>> entry_name = "Java_java_lang_StrictMath_log10";
>> break;
>> +#endif // __ARM_PCS_VFP
>> +
>> + case vmIntrinsics::_compareAndSwapInt:
>> + {
>> + Thumb2_Fill(jinfo, 4);
>> +
>> + unsigned update = POP(jstack);
>> + unsigned expect = POP(jstack);
>> + unsigned offset = POP(jstack);
>> + POP(jstack); // Actually the high part of the offset
>> +
>> + // unsigned object = POP(jstack);
>> + // unsigned unsafe = POP(jstack); // Initially an instance of java.lang.Unsafe
>> +
>> + Thumb2_Flush(jinfo);
>> + // Get ourself a result reg that's not one of the inputs
>> + unsigned exclude = (1<<update)|(1<<expect)|(1<<offset);
>> + unsigned result = JSTACK_PREFER(jstack, ~exclude);
>> +
>> + ldm(codebuf, (1<<ARM_IP)|(1<<ARM_LR), Rstack, POP_FD, 1); // Object addr
>> + add_reg(codebuf, result, offset, ARM_IP); // result now points to word
>> + ldr_imm(codebuf, ARM_LR, ARM_LR, 0, 0, 0); // Security check
>> +
>> + fullBarrier(codebuf);
>> +
>> + int retry = out_loc(codebuf);
>> + ldrex_imm(codebuf, ARM_LR, result, 0);
>> + cmp_reg(codebuf, ARM_LR, expect);
>> + int loc_failed = forward_16(codebuf);
>> + strex_imm(codebuf, ARM_IP, update, result, 0);
>> + cmp_imm(codebuf, ARM_IP, 0);
>> + branch(codebuf, COND_NE, retry);
>> + bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
>> +
>> + it(codebuf, COND_NE, IT_MASK_TEE);
>> + mov_imm(codebuf, result, 0);
>> + mov_imm(codebuf, result, 1);
>> + fullBarrier(codebuf);
>> +
>> + PUSH(jstack, result);
>> + }
>> + return true;
>> +
>> + case vmIntrinsics::_compareAndSwapLong:
>> + {
>> + Thumb2_Fill(jinfo, 4);
>> +
>> + unsigned update_lo = POP(jstack);
>> + unsigned update_hi = POP(jstack);
>> + unsigned expect_lo = POP(jstack);
>> + unsigned expect_hi = POP(jstack);
>> +
>> + Thumb2_Flush(jinfo);
>> + Thumb2_save_locals(jinfo, stackdepth - 4); // 4 args popped above
>> +
>> + // instance of java.lang.Unsafe:
>> + ldr_imm(jinfo->codebuf, ARM_LR, Rstack, 3 * wordSize, 1, 0);
>> + ldr_imm(codebuf, ARM_LR, ARM_LR, 0, 0, 0); // Security check
>> +
>> + // Object:
>> + ldr_imm(jinfo->codebuf, ARM_LR, Rstack, 2 * wordSize, 1, 0);
>> + // Offset:
>> + ldr_imm(jinfo->codebuf, ARM_IP, Rstack, 0 * wordSize, 1, 0);
>> + add_reg(codebuf, ARM_LR, ARM_LR, ARM_IP); // ARM_LR now points to word
>> +
>> + fullBarrier(codebuf);
>> +
>> + int retry = out_loc(codebuf);
>> + ldrexd(codebuf, JAZ_V2, JAZ_V3, ARM_LR);
>> + cmp_reg(codebuf, JAZ_V2, expect_lo);
>> + it(jinfo->codebuf, COND_EQ, IT_MASK_T);
>> + cmp_reg(codebuf, JAZ_V3, expect_hi);
>> +
>> + int loc_failed = forward_16(codebuf);
>> + strexd(codebuf, JAZ_V1, update_lo, update_hi, ARM_LR);
>> + cmp_imm(codebuf, JAZ_V1, 0);
>> + branch(codebuf, COND_NE, retry);
>> + bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
>> +
>> + unsigned result = JSTACK_REG(jinfo->jstack);
>> +
>> + it(codebuf, COND_NE, IT_MASK_TEE);
>> + mov_imm(codebuf, result, 0);
>> + mov_imm(codebuf, result, 1);
>> + fullBarrier(codebuf);
>> +
>> + Thumb2_restore_locals(jinfo, stackdepth - 4); // 4 args popped above
>> + add_imm(codebuf, Rstack, Rstack, 4 * wordSize);
>> + PUSH(jstack, result);
>> + }
>> + return true;
>>
>> default:
>> return false;
>> @@ -5093,8 +5191,6 @@
>> if (! entry_point)
>> return false;
>>
>> - unsigned r_lo, r_hi, r_res_lo, r_res_hi;
>> -
>> jstack_to_vfp(jinfo, VFP_D0);
>> // FIXME: The JNI StrictMath routines don't use the JNIEnv *env
>> // parameter, so it's arguably pointless to pass it here.
>> @@ -5103,9 +5199,6 @@
>> vfp_to_jstack(jinfo, VFP_D0);
>>
>> return true;
>> -#else
>> - return false;
>> -#endif // __ARM_PCS_VFP
>> }
>>
>> void Thumb2_codegen(Thumb2_Info *jinfo, unsigned start)
>> @@ -5766,7 +5859,7 @@
>> break;
>>
>> case opc_dneg:
>> - Thumb2_dNeg(jinfo, opcode);
>> + Thumb2_dNeg(jinfo);
>> break;
>>
>> case opc_i2l: {
>> @@ -6177,7 +6270,7 @@
>>
>> callee = (methodOop)cache->f1();
>>
>> - if (handle_special_method(callee, jinfo))
>> + if (handle_special_method(callee, jinfo, stackdepth))
>> break;
>>
>> if (callee->is_accessor()) {
>> @@ -6302,6 +6395,10 @@
>>
>> if (cache->is_vfinal()) {
>> methodOop callee = (methodOop)cache->f2();
>> +
>> + if (handle_special_method(callee, jinfo, stackdepth))
>> + break;
>> +
>> if (callee->is_accessor()) {
>> u1 *code = callee->code_base();
>> int index = GET_NATIVE_U2(&code[2]);
Ok nice to see this work gets supported on softfp armv7 as well as hard armv7 builds.
>> @@ -7857,7 +7954,7 @@
>>
>> #define DEBUG_REGSET ((1<<ARM_R0)|(1<<ARM_R1)|(1<<ARM_R2)|(1<<ARM_R3)|(1<<ARM_IP))
>>
>> -// DEBUG_METHODENTRY
>> +// DEBUG_METHDENTRY
>> handlers[H_DEBUG_METHODENTRY] = out_pos(&codebuf);
>> stm(&codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
>> mov_reg(&codebuf, ARM_R2, ARM_R0);
Please skip merging this comment typo.
>> diff -r 01123e3102cc patches/arm.patch
>> --- a/patches/arm.patch Wed Feb 22 15:36:29 2012 +0000
>> +++ b/patches/arm.patch Fri Mar 02 17:52:08 2012 +0000
>> @@ -230,3 +230,57 @@
>> -a ! \( -name DUMMY $(addprefix -o -name ,$(Src_Files_EXCLUDE)) \)))
>> endef
>>
>> +diff -r -uw icedtea6.pristine/openjdk/hotspot/src/cpu/zero/vm/vm_version_zero.hpp icedtea6/openjdk/hotspot/src/cpu/zero/vm/vm_version_zero.hpp
>> +--- openjdk.orig/hotspot/src/cpu/zero/vm/vm_version_zero.hpp 2011-11-14 22:07:31.000000000 +0000
>> ++++ openjdk/hotspot/src/cpu/zero/vm/vm_version_zero.hpp 2012-02-29 17:27:11.472996427 +0000
>> +@@ -30,7 +30,18 @@
>> + #include "runtime/vm_version.hpp"
>> +
>> + class VM_Version : public Abstract_VM_Version {
>> ++
>> + public:
>> ++ static void get_processor_features() {
>> ++#ifdef __ARM_ARCH_7A__
>> ++ Abstract_VM_Version::_supports_cx8 = true;
>> ++#endif
>> ++ }
>> ++
>> ++ static void initialize() {
>> ++ get_processor_features();
>> ++ }
>> ++
>> + static const char* cpu_features() {
>> + return "";
>> + }
>> +diff -r -uw openjdk.orig/hotspot/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp openjdk/hotspot/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp
>> +--- openjdk.orig/hotspot/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp 2011-11-14 22:07:32.000000000 +0000
>> ++++ openjdk/hotspot/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp 2012-02-29 16:57:28.412360724 +0000
>> +@@ -160,6 +160,16 @@
>> + return prev;
>> + }
>> + }
>> ++
>> ++#ifdef __ARM_ARCH_7A__
>> ++/* Perform an atomic compare and swap: if the current value of `*PTR'
>> ++ is OLDVAL, then write NEWVAL into `*PTR'. Return the contents of
>> ++ `*PTR' before the operation.*/
>> ++extern "C" jlong arm_val_compare_and_swap_long(volatile void *ptr,
>> ++ jlong oldval,
>> ++ jlong newval);
>> ++
>> ++#endif // __ARM_ARCH_7A__
>> + #endif // ARM
>> +
>> + inline void Atomic::store(jint store_value, volatile jint* dest) {
>> +@@ -274,7 +322,11 @@
>> + volatile jlong* dest,
>> + jlong compare_value) {
>> +
>> ++#ifndef __ARM_ARCH_7A__
>> + return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
>> ++#else
>> ++ return arm_val_compare_and_swap_long(dest, compare_value, exchange_value);
>> ++#endif
>> + }
>> +
>> + inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value,
>
Ok, the minor fixes mentioned above like can get fixed in a follow up patch.
Please push this part 1/2 into icedtea6 head.
Thank you for noticing and pointing out that a lot of low hanging fruit are ready for harvest in the arm port!
Cheers
Xerxes
More information about the distro-pkg-dev
mailing list