[aarch64-port-dev ] Use 2- and 3-instruction immediate form of movoop and mov_metadata in C2-generated code
Andrew Haley
aph at redhat.com
Thu Jun 19 14:45:28 UTC 2014
Now that we have a three-instruction version of load address
constant, use it in C2.
Also fix a couple of unrelated bugs that were uncovered during
testing.
Also use lea(reg, Address) rather than mov(reg, Address); it was
unnecessarily confusing.
Andrew.
# HG changeset patch
# User aph
# Date 1403014483 14400
# Tue Jun 17 10:14:43 2014 -0400
# Node ID 745e0357529b3a046af1bcb56f1493a94657b924
# Parent f6b18d9a37d99880e982db1ec70f68d1e17fa8c3
Use 2- and 3-instruction immediate form of movoop and mov_metadata in C2-generated code.
Fix patching code to handle 2- and 3-word forms.
Fix offset out of range bug in frame generation.
Use lea (rather than mov) in mov(reg, Address) form.
diff -r f6b18d9a37d9 -r 745e0357529b src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad Tue Jun 17 09:30:17 2014 -0400
+++ b/src/cpu/aarch64/vm/aarch64.ad Tue Jun 17 10:14:43 2014 -0400
@@ -962,7 +962,7 @@
if (framesize == 0) {
// Is this even possible?
st->print("stp lr, rfp, [sp, #%d]!", -(2 * wordSize));
- } else if (framesize < (1 << 12)) {
+ } else if (framesize < (1 << 7)) {
st->print("sub sp, sp, #%d\n\t", framesize);
st->print("stp rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
} else {
@@ -991,7 +991,7 @@
if (framesize == 0) {
// Is this even possible?
__ stp(rfp, lr, Address(__ pre(sp, -2 * wordSize)));
- } else if (framesize < (1 << 12)) {
+ } else if (framesize < (1 << 7)) {
__ sub(sp, sp, framesize);
__ stp(rfp, lr, Address(sp, framesize - 2 * wordSize));
} else {
@@ -1040,7 +1040,7 @@
if (framesize == 0) {
st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
- } else if (framesize < (1 << 12)) {
+ } else if (framesize < (1 << 7)) {
st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
st->print("add sp, sp, #%d\n\t", framesize);
} else {
@@ -1064,7 +1064,7 @@
if (framesize == 0) {
__ ldp(rfp, lr, Address(__ post(sp, 2 * wordSize)));
- } else if (framesize < (1 << 12)) {
+ } else if (framesize < (1 << 7)) {
__ ldp(rfp, lr, Address(sp, framesize - 2 * wordSize));
__ add(sp, sp, framesize);
} else {
@@ -2143,7 +2143,7 @@
int call_offset = (nargs + 11) * 4;
int field_offset = in_bytes(JavaThread::frame_anchor_offset()) +
in_bytes(JavaFrameAnchor::last_Java_pc_offset());
- __ mov(rscratch1, InternalAddress(pc + call_offset));
+ __ lea(rscratch1, InternalAddress(pc + call_offset));
__ str(rscratch1, Address(rthread, field_offset));
%}
@@ -2570,9 +2570,9 @@
} else {
relocInfo::relocType rtype = $src->constant_reloc();
if (rtype == relocInfo::oop_type) {
- __ movoop(dst_reg, (jobject)con);
+ __ movoop(dst_reg, (jobject)con, /*mt_safe*/false);
} else if (rtype == relocInfo::metadata_type) {
- __ mov_metadata(dst_reg, (Metadata*)con);
+ __ mov_metadata(dst_reg, (Metadata*)con, /*mt_safe*/false);
} else {
assert(rtype == relocInfo::none, "unexpected reloc type");
if (con < (address)(uintptr_t)os::vm_page_size()) {
@@ -2625,7 +2625,7 @@
} else {
relocInfo::relocType rtype = $src->constant_reloc();
assert(rtype == relocInfo::oop_type, "unexpected reloc type");
- __ set_narrow_oop(dst_reg, (jobject)con);
+ __ set_narrow_oop(dst_reg, (jobject)con, /*mt_safe*/false);
}
%}
@@ -2644,7 +2644,7 @@
} else {
relocInfo::relocType rtype = $src->constant_reloc();
assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
- __ set_narrow_klass(dst_reg, (Klass *)con);
+ __ set_narrow_klass(dst_reg, (Klass *)con, /*mt_safe*/false);
}
%}
@@ -2912,7 +2912,7 @@
int fpcnt;
int rtype;
getCallInfo(tf(), gpcnt, fpcnt, rtype);
- __ mov(rscratch1, RuntimeAddress(entry));
+ __ lea(rscratch1, RuntimeAddress(entry));
__ blrt(rscratch1, gpcnt, fpcnt, rtype);
}
%}
diff -r f6b18d9a37d9 -r 745e0357529b src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp
--- a/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp Tue Jun 17 09:30:17 2014 -0400
+++ b/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp Tue Jun 17 10:14:43 2014 -0400
@@ -63,7 +63,7 @@
set_last_Java_frame(sp, rfp, retaddr, rscratch1);
// do the call
- mov(rscratch1, RuntimeAddress(entry));
+ lea(rscratch1, RuntimeAddress(entry));
blrt(rscratch1, args_size + 1, 8, 1);
bind(retaddr);
int call_offset = offset();
@@ -553,7 +553,7 @@
Label retaddr;
__ set_last_Java_frame(sp, rfp, retaddr, rscratch1);
// do the call
- __ mov(rscratch1, RuntimeAddress(target));
+ __ lea(rscratch1, RuntimeAddress(target));
__ blrt(rscratch1, 1, 0, 1);
__ bind(retaddr);
OopMapSet* oop_maps = new OopMapSet();
diff -r f6b18d9a37d9 -r 745e0357529b src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Tue Jun 17 09:30:17 2014 -0400
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Tue Jun 17 10:14:43 2014 -0400
@@ -137,14 +137,15 @@
offset >>= 2;
Instruction_aarch64::spatch(branch, 23, 5, offset);
Instruction_aarch64::patch(branch, 30, 29, offset_lo);
- } else if (Instruction_aarch64::extract(insn, 31, 23) == 0b110100101) {
+ } else if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010100) {
+ u_int64_t dest = (u_int64_t)target;
// Move wide constant
- u_int64_t dest = (u_int64_t)target;
assert(nativeInstruction_at(branch+4)->is_movk(), "wrong insns in patch");
assert(nativeInstruction_at(branch+8)->is_movk(), "wrong insns in patch");
Instruction_aarch64::patch(branch, 20, 5, dest & 0xffff);
- Instruction_aarch64::patch(branch += 4, 20, 5, (dest >>= 16) & 0xffff);
- Instruction_aarch64::patch(branch += 4, 20, 5, (dest >>= 16) & 0xffff);
+ Instruction_aarch64::patch(branch+4, 20, 5, (dest >>= 16) & 0xffff);
+ Instruction_aarch64::patch(branch+8, 20, 5, (dest >>= 16) & 0xffff);
+ assert(pd_call_destination(branch) == target, "should be");
} else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 &&
Instruction_aarch64::extract(insn, 4, 0) == 0b11111) {
// nothing to do
@@ -154,6 +155,19 @@
}
}
+void MacroAssembler::patch_oop(address insn_addr, address o) {
+ unsigned insn = *(unsigned*)insn_addr;
+ if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010101) {
+ // Move narrow constant
+ assert(nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");
+ narrowOop n = oopDesc::encode_heap_oop((oop)o);
+ Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16);
+ Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff);
+ } else {
+ pd_patch_instruction(insn_addr, o);
+ }
+}
+
address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) {
long offset = 0;
if ((Instruction_aarch64::extract(insn, 29, 24) & 0b011011) == 0b00011000) {
@@ -218,8 +232,8 @@
ShouldNotReachHere();
}
} else if (Instruction_aarch64::extract(insn, 31, 23) == 0b110100101) {
- // Move address constant: movz, movk, movk. See movptr().
u_int32_t *insns = (u_int32_t *)insn_addr;
+ // Move wide constant: movz, movk, movk. See movptr().
assert(nativeInstruction_at(insns+1)->is_movk(), "wrong insns in patch");
assert(nativeInstruction_at(insns+2)->is_movk(), "wrong insns in patch");
return address(u_int64_t(Instruction_aarch64::extract(insns[0], 20, 5))
@@ -2520,29 +2534,33 @@
decode_klass_not_null(r, r);
}
-// TODO
-//
-// these next two methods load a narrow oop or klass constant into a
-// register. they currently do the dumb thing of installing 64 bits of
-// unencoded constant into the register and then encoding it.
-// installing the encoded 32 bit constant directly requires updating
-// the relocation code so it can recognize that this is a 32 bit load
-// rather than a 64 bit load.
-
-void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
- assert (UseCompressedOops, "should only be used for compressed headers");
+void MacroAssembler::set_narrow_oop(Register dst, jobject obj, bool mt_safe) {
+ assert (UseCompressedOops, "should only be used for compressed oops");
assert (Universe::heap() != NULL, "java heap should be initialized");
assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
- movoop(dst, obj);
- encode_heap_oop_not_null(dst);
+
+ int oop_index = oop_recorder()->find_index(obj);
+ assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
+
+ InstructionMark im(this);
+ RelocationHolder rspec = oop_Relocation::spec(oop_index);
+ code_section()->relocate(inst_mark(), rspec);
+ movz(dst, 0xDEAD, 16);
+ movk(dst, 0xBEEF);
}
-
-void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
+void MacroAssembler::set_narrow_klass(Register dst, Klass* k, bool mt_safe) {
assert (UseCompressedClassPointers, "should only be used for compressed headers");
assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
- mov_metadata(dst, k);
- encode_klass_not_null(dst);
+ int index = oop_recorder()->find_index(k);
+ assert(! Universe::heap()->is_in_reserved(k), "should not be an oop");
+
+ InstructionMark im(this);
+ RelocationHolder rspec = metadata_Relocation::spec(index);
+ code_section()->relocate(inst_mark(), rspec);
+ narrowKlass nk = Klass::encode_klass(k);
+ movz(dst, (nk >> 16), 16);
+ movk(dst, nk & 0xffff);
}
void MacroAssembler::load_heap_oop(Register dst, Address src)
@@ -2552,7 +2570,7 @@
decode_heap_oop(dst);
} else {
ldr(dst, src);
- }
+ }
}
void MacroAssembler::load_heap_oop_not_null(Register dst, Address src)
@@ -2764,7 +2782,11 @@
return Address((address)obj, rspec);
}
-void MacroAssembler::movoop(Register dst, jobject obj) {
+// Move an oop into a register. mt_safe is true iff we are not going
+// to patch this instruction while the code is being executed by
+// another thread. In that case we can use move immediates rather
+// than the constant pool.
+void MacroAssembler::movoop(Register dst, jobject obj, bool mt_safe) {
int oop_index;
if (obj == NULL) {
oop_index = oop_recorder()->allocate_oop_index(obj);
@@ -2773,7 +2795,7 @@
assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
}
RelocationHolder rspec = oop_Relocation::spec(oop_index);
- address const_ptr = long_constant((jlong)obj);
+ address const_ptr = mt_safe ? long_constant((jlong)obj) : NULL;
if (! const_ptr) {
mov(dst, Address((address)obj, rspec));
} else {
@@ -2782,7 +2804,8 @@
}
}
-void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
+// Move a metadata address into a register.
+void MacroAssembler::mov_metadata(Register dst, Metadata* obj, bool mt_safe) {
int oop_index;
if (obj == NULL) {
oop_index = oop_recorder()->allocate_metadata_index(obj);
@@ -2790,7 +2813,7 @@
oop_index = oop_recorder()->find_index(obj);
}
RelocationHolder rspec = metadata_Relocation::spec(oop_index);
- address const_ptr = long_constant((jlong)obj);
+ address const_ptr = mt_safe ? long_constant((jlong)obj) : NULL;
if (! const_ptr) {
mov(dst, Address((address)obj, rspec));
} else {
diff -r f6b18d9a37d9 -r 745e0357529b src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Tue Jun 17 09:30:17 2014 -0400
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Tue Jun 17 10:14:43 2014 -0400
@@ -406,6 +406,8 @@
int push(unsigned int bitset, Register stack);
int pop(unsigned int bitset, Register stack);
+ void mov(Register dst, Address a);
+
public:
int push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); }
int pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); }
@@ -438,7 +440,6 @@
mov(dst, (long)i);
}
- void mov(Register dst, Address a);
void movptr(Register r, uintptr_t imm64);
// macro instructions for accessing and updating floating point
@@ -493,6 +494,8 @@
static void pd_print_patched_instruction(address branch);
#endif
+ static void patch_oop(address insn_addr, address o);
+
// The following 4 methods return the offset of the appropriate move instruction
// Support for fast byte/short loading with zero extension (depending on particular CPU)
@@ -734,7 +737,7 @@
void encode_heap_oop_not_null(Register dst, Register src);
void decode_heap_oop_not_null(Register dst, Register src);
- void set_narrow_oop(Register dst, jobject obj);
+ void set_narrow_oop(Register dst, jobject obj, bool mt_safe = true);
// currently unimplemented
#if 0
void set_narrow_oop(Address dst, jobject obj);
@@ -747,7 +750,7 @@
void encode_klass_not_null(Register dst, Register src);
void decode_klass_not_null(Register dst, Register src);
- void set_narrow_klass(Register dst, Klass* k);
+ void set_narrow_klass(Register dst, Klass* k, bool mt_safe = true);
// currently unimplemented
#if 0
void set_narrow_klass(Address dst, Klass* k);
@@ -1103,7 +1106,7 @@
// Data
- void mov_metadata(Register dst, Metadata* obj);
+ void mov_metadata(Register dst, Metadata* obj, bool mt_safe = true);
Address allocate_metadata_address(Metadata* obj);
Address constant_oop_address(jobject obj);
// unimplemented
@@ -1111,7 +1114,7 @@
void pushoop(jobject obj);
#endif
- void movoop(Register dst, jobject obj);
+ void movoop(Register dst, jobject obj, bool mt_safe = true);
// sign extend as need a l to ptr sized element
void movl2ptr(Register dst, Address src) { Unimplemented(); }
diff -r f6b18d9a37d9 -r 745e0357529b src/cpu/aarch64/vm/relocInfo_aarch64.cpp
--- a/src/cpu/aarch64/vm/relocInfo_aarch64.cpp Tue Jun 17 09:30:17 2014 -0400
+++ b/src/cpu/aarch64/vm/relocInfo_aarch64.cpp Tue Jun 17 10:14:43 2014 -0400
@@ -33,10 +33,15 @@
void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
- MacroAssembler::pd_patch_instruction(addr(), x);
+ switch(type()) {
+ case relocInfo::oop_type:
+ MacroAssembler::patch_oop(addr(), x);
+ break;
+ default:
+ MacroAssembler::pd_patch_instruction(addr(), x);
+ }
}
-
address Relocation::pd_call_destination(address orig_addr) {
if (orig_addr != NULL) {
return MacroAssembler::pd_call_destination(orig_addr);
diff -r f6b18d9a37d9 -r 745e0357529b src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp
--- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Tue Jun 17 09:30:17 2014 -0400
+++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Tue Jun 17 10:14:43 2014 -0400
@@ -316,7 +316,7 @@
__ mov(c_rarg0, rmethod);
__ mov(c_rarg1, lr);
- __ mov(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
+ __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
__ blrt(rscratch1, 2, 0, 0);
__ pop_CPU_state();
@@ -1168,7 +1168,7 @@
} else {
assert((unsigned)gpargs < 256, "eek!");
assert((unsigned)fpargs < 32, "eek!");
- __ mov(rscratch1, RuntimeAddress(dest));
+ __ lea(rscratch1, RuntimeAddress(dest));
__ mov(rscratch2, (gpargs << 6) | (fpargs << 2) | type);
__ blrt(rscratch1, rscratch2);
// __ blrt(rscratch1, gpargs, fpargs, type);
@@ -1965,9 +1965,9 @@
assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
#endif
if (!is_critical_native) {
- __ mov(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
+ __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
} else {
- __ mov(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
+ __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
}
__ blrt(rscratch1, 1, 0, 1);
// Restore any method result value
@@ -2388,7 +2388,7 @@
}
#endif // ASSERT
__ mov(c_rarg0, rthread);
- __ mov(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
+ __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
__ blrt(rscratch1, 1, 0, 1);
__ bind(retaddr);
@@ -2518,7 +2518,7 @@
__ mov(c_rarg0, rthread);
__ movw(c_rarg1, rcpool); // second arg: exec_mode
- __ mov(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
+ __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
__ blrt(rscratch1, 2, 0, 0);
// Set an oopmap for the call site
@@ -2871,7 +2871,7 @@
__ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
__ mov(c_rarg0, rthread);
- __ mov(rscratch1, RuntimeAddress(destination));
+ __ lea(rscratch1, RuntimeAddress(destination));
__ blrt(rscratch1, 1, 0, 1);
__ bind(retaddr);
diff -r f6b18d9a37d9 -r 745e0357529b src/cpu/aarch64/vm/templateTable_aarch64.cpp
--- a/src/cpu/aarch64/vm/templateTable_aarch64.cpp Tue Jun 17 09:30:17 2014 -0400
+++ b/src/cpu/aarch64/vm/templateTable_aarch64.cpp Tue Jun 17 10:14:43 2014 -0400
@@ -2431,7 +2431,7 @@
// we take the time to call into the VM.
Label L1;
assert_different_registers(cache, index, r0);
- __ mov(rscratch1, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
+ __ lea(rscratch1, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
__ ldrw(r0, Address(rscratch1));
__ cbz(r0, L1);
More information about the aarch64-port-dev
mailing list