ARM: More intrinsics
Andrew Haley
aph at redhat.com
Fri Mar 2 10:07:23 PST 2012
I've been adding _compareAndSwapInt and _compareAndSwapLong to the set
of intrinsics. It's never that easy, of course, and while I was doing
that I found a bug in the code for abs that would cause locals to be
corrupted.
I also realized that if the JIT does long atomic swaps, so must the
interpreter, so this patch also provides a long atomic swap for Zero
on ARM. It'll be used even if you don't have the JIT or the asm
interpreter. There isn't a kernel builtin we can use to do 64-bit
swaps because the operation is only provided by the very most recent
Linux kernels. This means we can't use it, really.
I also reorganized handle_special_method() a bit so that it uses VM
intrinsic_id. This provides the opportunity for many more intrinsics
than Interpreter::method_kind().
Andrew.
2012-03-02 Andrew Haley <aph at redhat.com>
* arm_port/hotspot/src/cpu/zero/vm/arm_cas.S: New file.
* patches/arm.patch (void get_processor_features): New function
that enables compareAndSwap on jlongs.
(atomic_linux_zero.inline.hpp: arm_val_compare_and_swap): New
function.
(atomic_linux_zero.inline.hpp: Atomic::store): Use
arm_val_compare_and_swap.
* openjdk/hotspot/src/cpu/zero/vm/thumb2.cpp (IT_MASK_TT)
(IT_MASK_TE, IT_MASK_TTT, IT_MASK_TEE): Add a few new IT
encodings.
(Thumb2_dUnaryOp): Generalize Thumb2_dNeg.
(Thumb2_dNeg, Thumb2_dAbs): Specializations of Thumb2_dUnaryOp.
(handle_special_method): Use intrinsic_id instead of method_kind.
Add handlers for _compareAndSwapInt and _compareAndSwapLong.
(Thumb2_codegen): Call handle_special_method() for invokevirtual.
Pass stackdepth to handle_special_method().
diff -r 01123e3102cc arm_port/hotspot/src/cpu/zero/vm/arm_cas.S
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/arm_port/hotspot/src/cpu/zero/vm/arm_cas.S Fri Mar 02 17:52:08 2012 +0000
@@ -0,0 +1,30 @@
+#ifdef __ARM_ARCH_7A__
+@ jlong
+@ arm_val_compare_and_swap_long(volatile void *ptr,
+@ jlong oldval,
+@ jlong newval) {
+ .pushsection .text
+ .global arm_val_compare_and_swap_long
+#ifdef __thumb__
+ .thumb_func
+#endif
+ .type arm_val_compare_and_swap_long, %function
+arm_val_compare_and_swap_long:
+ stmfd sp!, {r4, r5, r6, r7}
+ ldrd r4, [sp, #16]
+ dmb sy
+0: ldrexd r6, [r0]
+ cmp r6, r2
+ it eq
+ cmpeq r7, r3
+ bne 1f
+ strexd r1, r4, [r0]
+ cmp r1, #0
+ bne 0b
+ dmb sy
+1: mov r0, r6
+ mov r1, r7
+ ldmfd sp!, {r4, r5, r6, r7}
+ bx lr
+ .popsection
+#endif // __ARM_ARCH_7A__
diff -r 01123e3102cc arm_port/hotspot/src/cpu/zero/vm/thumb2.cpp
--- a/arm_port/hotspot/src/cpu/zero/vm/thumb2.cpp Wed Feb 22 15:36:29 2012 +0000
+++ b/arm_port/hotspot/src/cpu/zero/vm/thumb2.cpp Fri Mar 02 17:52:08 2012 +0000
@@ -3042,6 +3042,10 @@
#define T_IT(cond, mask) (0xbf00 | (conds[cond] << 4) | (mask))
#define IT_MASK_T 8
+#define IT_MASK_TE 0x14
+#define IT_MASK_TT 0x1e
+#define IT_MASK_TTT 0x1e
+#define IT_MASK_TEE 0x12
#define PATCH(loc) do { \
unsigned oldidx = codebuf->idx; \
@@ -4233,7 +4237,8 @@
eor_imm(jinfo->codebuf, r_result, r, 0x80000000);
}
-void Thumb2_dNeg(Thumb2_Info *jinfo, u32 opc)
+// arm_op is either DP_EOR (for dnegate) or DP_BIC (for dabs)
+static void Thumb2_dUnaryOp(Thumb2_Info *jinfo, u32 arm_op)
{
Thumb2_Stack *jstack = jinfo->jstack;
unsigned r_lo, r_hi, r_res_lo, r_res_hi;
@@ -4248,7 +4253,17 @@
JASSERT(r_res_lo != r_res_hi, "oops");
JASSERT(r_res_lo != r_hi, "r_res_lo != r_hi");
mov_reg(jinfo->codebuf, r_res_lo, r_lo);
- eor_imm(jinfo->codebuf, r_res_hi, r_hi, 0x80000000);
+ dop_imm(jinfo->codebuf, arm_op, r_res_hi, r_hi, 0x80000000);
+}
+
+void Thumb2_dNeg(Thumb2_Info *jinfo)
+{
+ Thumb2_dUnaryOp(jinfo, DP_EOR);
+}
+
+void Thumb2_dAbs(Thumb2_Info *jinfo)
+{
+ Thumb2_dUnaryOp(jinfo, DP_BIC);
}
void Thumb2_lOp(Thumb2_Info *jinfo, u32 opc)
@@ -5008,42 +5023,34 @@
// Expand a call to a "special" method. These are usually inlines of
// java.lang.Math methods. Return true if the inlining succeeded.
-static bool handle_special_method(methodOop callee, Thumb2_Info *jinfo) {
+static bool handle_special_method(methodOop callee, Thumb2_Info *jinfo,
+ unsigned stackdepth) {
+ Thumb2_Stack *jstack = jinfo->jstack;
+ CodeBuf *codebuf = jinfo->codebuf;
+
+ const char *entry_name;
+
+ switch (callee->intrinsic_id()) {
+ case vmIntrinsics::_dabs:
+ {
+ Thumb2_dAbs(jinfo);
+ return true;
+ }
+
#ifdef __ARM_PCS_VFP
- Thumb2_Stack *jstack = jinfo->jstack;
-
- const char *entry_name;
-
- unsigned loc1 = 0;
-
- switch (Interpreter::method_kind(callee)) {
- case Interpreter::java_lang_math_abs:
- {
- unsigned r_lo, r_hi;
-
- Thumb2_Fill(jinfo, 2);
- r_lo = POP(jstack);
- r_hi = POP(jstack);
- dop_imm_s(jinfo->codebuf, DP_BIC, r_hi, r_hi, 0x80000000, 0);
- PUSH(jstack, r_hi);
- PUSH(jstack, r_lo);
-
- return true;
- }
-
- case Interpreter::java_lang_math_sin:
+ case vmIntrinsics::_dsin:
entry_name = "Java_java_lang_StrictMath_sin";
break;
- case Interpreter::java_lang_math_cos:
+ case vmIntrinsics::_dcos:
entry_name = "Java_java_lang_StrictMath_cos";
break;
- case Interpreter::java_lang_math_tan:
+ case vmIntrinsics::_dtan:
entry_name = "Java_java_lang_StrictMath_tan";
break;
- case Interpreter::java_lang_math_sqrt:
+ case vmIntrinsics::_dsqrt:
{
void *entry_point = dlsym(NULL, "Java_java_lang_StrictMath_sqrt");
if (! entry_point)
@@ -5077,13 +5084,104 @@
return true;
}
- case Interpreter::java_lang_math_log:
+ case vmIntrinsics::_dlog:
entry_name = "Java_java_lang_StrictMath_log";
break;
- case Interpreter::java_lang_math_log10:
+ case vmIntrinsics::_dlog10:
entry_name = "Java_java_lang_StrictMath_log10";
break;
+#endif // __ARM_PCS_VFP
+
+ case vmIntrinsics::_compareAndSwapInt:
+ {
+ Thumb2_Fill(jinfo, 4);
+
+ unsigned update = POP(jstack);
+ unsigned expect = POP(jstack);
+ unsigned offset = POP(jstack);
+ POP(jstack); // Actually the high part of the offset
+
+ // unsigned object = POP(jstack);
+ // unsigned unsafe = POP(jstack); // Initially an instance of java.lang.Unsafe
+
+ Thumb2_Flush(jinfo);
+ // Get ourself a result reg that's not one of the inputs
+ unsigned exclude = (1<<update)|(1<<expect)|(1<<offset);
+ unsigned result = JSTACK_PREFER(jstack, ~exclude);
+
+ ldm(codebuf, (1<<ARM_IP)|(1<<ARM_LR), Rstack, POP_FD, 1); // Object addr
+ add_reg(codebuf, result, offset, ARM_IP); // result now points to word
+ ldr_imm(codebuf, ARM_LR, ARM_LR, 0, 0, 0); // Security check
+
+ fullBarrier(codebuf);
+
+ int retry = out_loc(codebuf);
+ ldrex_imm(codebuf, ARM_LR, result, 0);
+ cmp_reg(codebuf, ARM_LR, expect);
+ int loc_failed = forward_16(codebuf);
+ strex_imm(codebuf, ARM_IP, update, result, 0);
+ cmp_imm(codebuf, ARM_IP, 0);
+ branch(codebuf, COND_NE, retry);
+ bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
+
+ it(codebuf, COND_NE, IT_MASK_TEE);
+ mov_imm(codebuf, result, 0);
+ mov_imm(codebuf, result, 1);
+ fullBarrier(codebuf);
+
+ PUSH(jstack, result);
+ }
+ return true;
+
+ case vmIntrinsics::_compareAndSwapLong:
+ {
+ Thumb2_Fill(jinfo, 4);
+
+ unsigned update_lo = POP(jstack);
+ unsigned update_hi = POP(jstack);
+ unsigned expect_lo = POP(jstack);
+ unsigned expect_hi = POP(jstack);
+
+ Thumb2_Flush(jinfo);
+ Thumb2_save_locals(jinfo, stackdepth - 4); // 4 args popped above
+
+ // instance of java.lang.Unsafe:
+ ldr_imm(jinfo->codebuf, ARM_LR, Rstack, 3 * wordSize, 1, 0);
+ ldr_imm(codebuf, ARM_LR, ARM_LR, 0, 0, 0); // Security check
+
+ // Object:
+ ldr_imm(jinfo->codebuf, ARM_LR, Rstack, 2 * wordSize, 1, 0);
+ // Offset:
+ ldr_imm(jinfo->codebuf, ARM_IP, Rstack, 0 * wordSize, 1, 0);
+ add_reg(codebuf, ARM_LR, ARM_LR, ARM_IP); // ARM_LR now points to word
+
+ fullBarrier(codebuf);
+
+ int retry = out_loc(codebuf);
+ ldrexd(codebuf, JAZ_V2, JAZ_V3, ARM_LR);
+ cmp_reg(codebuf, JAZ_V2, expect_lo);
+ it(jinfo->codebuf, COND_EQ, IT_MASK_T);
+ cmp_reg(codebuf, JAZ_V3, expect_hi);
+
+ int loc_failed = forward_16(codebuf);
+ strexd(codebuf, JAZ_V1, update_lo, update_hi, ARM_LR);
+ cmp_imm(codebuf, JAZ_V1, 0);
+ branch(codebuf, COND_NE, retry);
+ bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
+
+ unsigned result = JSTACK_REG(jinfo->jstack);
+
+ it(codebuf, COND_NE, IT_MASK_TEE);
+ mov_imm(codebuf, result, 0);
+ mov_imm(codebuf, result, 1);
+ fullBarrier(codebuf);
+
+ Thumb2_restore_locals(jinfo, stackdepth - 4); // 4 args popped above
+ add_imm(codebuf, Rstack, Rstack, 4 * wordSize);
+ PUSH(jstack, result);
+ }
+ return true;
default:
return false;
@@ -5093,8 +5191,6 @@
if (! entry_point)
return false;
- unsigned r_lo, r_hi, r_res_lo, r_res_hi;
-
jstack_to_vfp(jinfo, VFP_D0);
// FIXME: The JNI StrictMath routines don't use the JNIEnv *env
// parameter, so it's arguably pointless to pass it here.
@@ -5103,9 +5199,6 @@
vfp_to_jstack(jinfo, VFP_D0);
return true;
-#else
- return false;
-#endif // __ARM_PCS_VFP
}
void Thumb2_codegen(Thumb2_Info *jinfo, unsigned start)
@@ -5766,7 +5859,7 @@
break;
case opc_dneg:
- Thumb2_dNeg(jinfo, opcode);
+ Thumb2_dNeg(jinfo);
break;
case opc_i2l: {
@@ -6177,7 +6270,7 @@
callee = (methodOop)cache->f1();
- if (handle_special_method(callee, jinfo))
+ if (handle_special_method(callee, jinfo, stackdepth))
break;
if (callee->is_accessor()) {
@@ -6302,6 +6395,10 @@
if (cache->is_vfinal()) {
methodOop callee = (methodOop)cache->f2();
+
+ if (handle_special_method(callee, jinfo, stackdepth))
+ break;
+
if (callee->is_accessor()) {
u1 *code = callee->code_base();
int index = GET_NATIVE_U2(&code[2]);
@@ -7857,7 +7954,7 @@
#define DEBUG_REGSET ((1<<ARM_R0)|(1<<ARM_R1)|(1<<ARM_R2)|(1<<ARM_R3)|(1<<ARM_IP))
-// DEBUG_METHODENTRY
+// DEBUG_METHDENTRY
handlers[H_DEBUG_METHODENTRY] = out_pos(&codebuf);
stm(&codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
mov_reg(&codebuf, ARM_R2, ARM_R0);
diff -r 01123e3102cc patches/arm.patch
--- a/patches/arm.patch Wed Feb 22 15:36:29 2012 +0000
+++ b/patches/arm.patch Fri Mar 02 17:52:08 2012 +0000
@@ -230,3 +230,57 @@
-a ! \( -name DUMMY $(addprefix -o -name ,$(Src_Files_EXCLUDE)) \)))
endef
+diff -r -uw icedtea6.pristine/openjdk/hotspot/src/cpu/zero/vm/vm_version_zero.hpp icedtea6/openjdk/hotspot/src/cpu/zero/vm/vm_version_zero.hpp
+--- openjdk.orig/hotspot/src/cpu/zero/vm/vm_version_zero.hpp 2011-11-14 22:07:31.000000000 +0000
++++ openjdk/hotspot/src/cpu/zero/vm/vm_version_zero.hpp 2012-02-29 17:27:11.472996427 +0000
+@@ -30,7 +30,18 @@
+ #include "runtime/vm_version.hpp"
+
+ class VM_Version : public Abstract_VM_Version {
++
+ public:
++ static void get_processor_features() {
++#ifdef __ARM_ARCH_7A__
++ Abstract_VM_Version::_supports_cx8 = true;
++#endif
++ }
++
++ static void initialize() {
++ get_processor_features();
++ }
++
+ static const char* cpu_features() {
+ return "";
+ }
+diff -r -uw openjdk.orig/hotspot/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp openjdk/hotspot/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp
+--- openjdk.orig/hotspot/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp 2011-11-14 22:07:32.000000000 +0000
++++ openjdk/hotspot/src/os_cpu/linux_zero/vm/atomic_linux_zero.inline.hpp 2012-02-29 16:57:28.412360724 +0000
+@@ -160,6 +160,16 @@
+ return prev;
+ }
+ }
++
++#ifdef __ARM_ARCH_7A__
++/* Perform an atomic compare and swap: if the current value of `*PTR'
++ is OLDVAL, then write NEWVAL into `*PTR'. Return the contents of
++ `*PTR' before the operation.*/
++extern "C" jlong arm_val_compare_and_swap_long(volatile void *ptr,
++ jlong oldval,
++ jlong newval);
++
++#endif // __ARM_ARCH_7A__
+ #endif // ARM
+
+ inline void Atomic::store(jint store_value, volatile jint* dest) {
+@@ -274,7 +322,11 @@
+ volatile jlong* dest,
+ jlong compare_value) {
+
++#ifndef __ARM_ARCH_7A__
+ return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
++#else
++ return arm_val_compare_and_swap_long(dest, compare_value, exchange_value);
++#endif
+ }
+
+ inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value,
More information about the distro-pkg-dev
mailing list