[aarch64-port-dev ] RFR: Large code cache
Andrew Haley
aph at redhat.com
Mon Dec 8 15:51:25 UTC 2014
The current AArch64 port has a code cache size limit of 128Mb because
that is the range of a branch instruction. This patch removes that
restriction by using trampolines, very much like the PPC64 port.
For branches which don't have to be patched (except at safepoints) it
uses
ADRP rscratch1, dest; ADD rscratch1, rscratch1, #offset; BR rscratch1
which is shorter than generating a trampoline. The exception to this
is runtime calls in C2-compiled code where it uses trampoline calls
for speed.
The default code cache size remains at 128M. As long as that size is
not increased no trampolines are generated and direct branches are
used.
I considered overriding MacroAssembler::b() and br() to generate far
branches, but decided against it because we don't always need them and
the code is easier to understand if far branches are explicit.
Zombie methods are trapped with a SIGILL. I use DCPS1 #0xdead to do
this.
I tidied up a few things while I was at it. In particular, we're
using set_insts_mark() in many places for no good reason.
This is a big change, and it will take some time to test and
stabilize. I propose only to commit it to JDK 9.
Comments welcome.
Andrew.
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/aarch64.ad Mon Dec 08 10:46:11 2014 -0500
@@ -785,13 +785,12 @@
static int emit_deopt_handler(CodeBuffer& cbuf);
static uint size_exception_handler() {
- // count up to 4 movz/n/k instructions and one branch instruction
- return 5 * NativeInstruction::instruction_size;
+ return MacroAssembler::far_branch_size();
}
static uint size_deopt_handler() {
- // count one adr and one branch instruction
- return 2 * NativeInstruction::instruction_size;
+ // count one adr and one far branch instruction
+ return 4 * NativeInstruction::instruction_size;
}
};
@@ -859,7 +858,7 @@
int MachCallRuntimeNode::ret_addr_offset() {
// for generated stubs the call will be
- // bl(addr)
+ // far_call(addr)
// for real runtime callouts it will be six instructions
// see aarch64_enc_java_to_runtime
// adr(rscratch2, retaddr)
@@ -868,7 +867,7 @@
// blrt rscratch1
CodeBlob *cb = CodeCache::find_blob(_entry_point);
if (cb) {
- return NativeInstruction::instruction_size;
+ return MacroAssembler::far_branch_size();
} else {
return 6 * NativeInstruction::instruction_size;
}
@@ -1468,13 +1467,12 @@
// This is the unverified entry point.
MacroAssembler _masm(&cbuf);
- // no need to worry about 4-byte of br alignment on AArch64
__ cmp_klass(j_rarg0, rscratch2, rscratch1);
Label skip;
// TODO
// can we avoid this skip and still use a reloc?
__ br(Assembler::EQ, skip);
- __ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
__ bind(skip);
}
@@ -1499,7 +1497,7 @@
__ start_a_stub(size_exception_handler());
if (base == NULL) return 0; // CodeBuffer::expand failed
int offset = __ offset();
- __ b(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
+ __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
__ end_a_stub();
return offset;
@@ -1517,8 +1515,7 @@
int offset = __ offset();
__ adr(lr, __ pc());
- // should we load this into rscratch1 and use a br?
- __ b(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+ __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
__ end_a_stub();
@@ -2753,15 +2750,14 @@
enc_class aarch64_enc_java_static_call(method meth) %{
MacroAssembler _masm(&cbuf);
- cbuf.set_insts_mark();
address addr = (address)$meth$$method;
if (!_method) {
// A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
- __ bl(Address(addr, relocInfo::runtime_call_type));
+ __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
} else if (_optimized_virtual) {
- __ bl(Address(addr, relocInfo::opt_virtual_call_type));
+ __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
} else {
- __ bl(Address(addr, relocInfo::static_call_type));
+ __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
}
if (_method) {
@@ -2778,15 +2774,15 @@
// Use it to preserve SP.
__ mov(rfp, sp);
- cbuf.set_insts_mark();
+ const int start_offset = __ offset();
address addr = (address)$meth$$method;
if (!_method) {
// A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
- __ bl(Address(addr, relocInfo::runtime_call_type));
+ __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
} else if (_optimized_virtual) {
- __ bl(Address(addr, relocInfo::opt_virtual_call_type));
+ __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
} else {
- __ bl(Address(addr, relocInfo::static_call_type));
+ __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
}
if (_method) {
@@ -2821,7 +2817,7 @@
address entry = (address)$meth$$method;
CodeBlob *cb = CodeCache::find_blob(entry);
if (cb) {
- __ bl(Address(entry));
+ __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
} else {
int gpcnt;
int fpcnt;
@@ -2840,7 +2836,7 @@
enc_class aarch64_enc_rethrow() %{
MacroAssembler _masm(&cbuf);
- __ b(RuntimeAddress(OptoRuntime::rethrow_stub()));
+ __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
%}
enc_class aarch64_enc_ret() %{
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/assembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/assembler_aarch64.cpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/assembler_aarch64.cpp Mon Dec 08 10:46:11 2014 -0500
@@ -63,7 +63,6 @@
// #include "gc_implementation/g1/heapRegion.hpp"
// #endif
-
extern "C" void entry(CodeBuffer *cb);
#define __ _masm.
@@ -1362,7 +1361,6 @@
if (L.is_bound()) {
br(cc, target(L));
} else {
- InstructionMark im(this);
L.add_patch_at(code(), locator());
br(cc, pc());
}
@@ -1373,7 +1371,6 @@
if (L.is_bound()) {
(this->*insn)(target(L));
} else {
- InstructionMark im(this);
L.add_patch_at(code(), locator());
(this->*insn)(pc());
}
@@ -1384,7 +1381,6 @@
if (L.is_bound()) {
(this->*insn)(r, target(L));
} else {
- InstructionMark im(this);
L.add_patch_at(code(), locator());
(this->*insn)(r, pc());
}
@@ -1395,7 +1391,6 @@
if (L.is_bound()) {
(this->*insn)(r, bitpos, target(L));
} else {
- InstructionMark im(this);
L.add_patch_at(code(), locator());
(this->*insn)(r, bitpos, pc());
}
@@ -1405,7 +1400,6 @@
if (L.is_bound()) {
(this->*insn)(target(L), op);
} else {
- InstructionMark im(this);
L.add_patch_at(code(), locator());
(this->*insn)(pc(), op);
}
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/assembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp Mon Dec 08 10:46:11 2014 -0500
@@ -839,16 +839,27 @@
#undef INSN
+ // The maximum range of a branch is fixed for the AArch64
+ // architecture. In debug mode we shrink it in order to test
+ // trampolines, but not so small that branches in the interpreter
+ // are out of range.
+ static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);
+
+ static bool reachable_from_branch_at(address branch, address target) {
+ return uabs(target - branch) < branch_range;
+ }
+
// Unconditional branch (immediate)
-#define INSN(NAME, opcode) \
- void NAME(address dest) { \
- starti; \
- long offset = (dest - pc()) >> 2; \
- f(opcode, 31), f(0b00101, 30, 26), sf(offset, 25, 0); \
- } \
- void NAME(Label &L) { \
- wrap_label(L, &Assembler::NAME); \
- } \
+#define INSN(NAME, opcode) \
+ void NAME(address dest) { \
+ starti; \
+ long offset = (dest - pc()) >> 2; \
+ DEBUG_ONLY(assert(reachable_from_branch_at(pc(), dest), "debug only")); \
+ f(opcode, 31), f(0b00101, 30, 26), sf(offset, 25, 0); \
+ } \
+ void NAME(Label &L) { \
+ wrap_label(L, &Assembler::NAME); \
+ } \
void NAME(const Address &dest);
INSN(b, 0);
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp
--- a/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp Mon Dec 08 10:46:11 2014 -0500
@@ -44,7 +44,7 @@
__ bind(_entry);
ce->store_parameter(_method->as_register(), 1);
ce->store_parameter(_bci, 0);
- __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id)));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
__ b(_continuation);
@@ -63,7 +63,7 @@
__ bind(_entry);
if (_info->deoptimize_on_exception()) {
address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
- __ call(RuntimeAddress(a));
+ __ far_call(RuntimeAddress(a));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
debug_only(__ should_not_reach_here());
@@ -81,7 +81,7 @@
} else {
stub_id = Runtime1::throw_range_check_failed_id;
}
- __ call(RuntimeAddress(Runtime1::entry_for(stub_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(stub_id)));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
debug_only(__ should_not_reach_here());
@@ -94,7 +94,7 @@
void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
- __ call(RuntimeAddress(a));
+ __ far_call(RuntimeAddress(a));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
debug_only(__ should_not_reach_here());
@@ -105,7 +105,7 @@
ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
}
__ bind(_entry);
- __ bl(Address(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type));
+ __ far_call(Address(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
#ifdef ASSERT
@@ -135,7 +135,7 @@
assert(__ rsp_offset() == 0, "frame size should be fixed");
__ bind(_entry);
__ mov(r3, _klass_reg->as_register());
- __ bl(RuntimeAddress(Runtime1::entry_for(_stub_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(_stub_id)));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
assert(_result->as_register() == r0, "result must in r0,");
@@ -160,7 +160,7 @@
__ bind(_entry);
assert(_length->as_register() == r19, "length must in r19,");
assert(_klass_reg->as_register() == r3, "klass_reg must in r3");
- __ bl(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id)));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
assert(_result->as_register() == r0, "result must in r0");
@@ -183,7 +183,7 @@
__ bind(_entry);
assert(_length->as_register() == r19, "length must in r19,");
assert(_klass_reg->as_register() == r3, "klass_reg must in r3");
- __ bl(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id)));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
assert(_result->as_register() == r0, "result must in r0");
@@ -209,7 +209,7 @@
} else {
enter_id = Runtime1::monitorenter_nofpu_id;
}
- __ bl(RuntimeAddress(Runtime1::entry_for(enter_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(enter_id)));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
__ b(_continuation);
@@ -231,7 +231,7 @@
exit_id = Runtime1::monitorexit_nofpu_id;
}
__ adr(lr, _continuation);
- __ b(RuntimeAddress(Runtime1::entry_for(exit_id)));
+ __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id)));
}
@@ -255,7 +255,7 @@
void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
- __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id)));
ce->add_call_info_here(_info);
DEBUG_ONLY(__ should_not_reach_here());
}
@@ -272,7 +272,7 @@
ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
__ bind(_entry);
- __ call(RuntimeAddress(a));
+ __ far_call(RuntimeAddress(a));
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
debug_only(__ should_not_reach_here());
@@ -288,7 +288,7 @@
if (_obj->is_cpu_register()) {
__ mov(rscratch1, _obj->as_register());
}
- __ call(RuntimeAddress(Runtime1::entry_for(_stub)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)));
ce->add_call_info_here(_info);
debug_only(__ should_not_reach_here());
}
@@ -330,7 +330,7 @@
ce->emit_static_call_stub();
Address resolve(SharedRuntime::get_resolve_static_call_stub(),
relocInfo::static_call_type);
- __ bl(resolve);
+ __ trampoline_call(resolve);
ce->add_call_info_here(info());
#ifndef PRODUCT
@@ -361,7 +361,7 @@
}
__ cbz(pre_val_reg, _continuation);
ce->store_parameter(pre_val()->as_register(), 0);
- __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id)));
__ b(_continuation);
}
@@ -382,7 +382,7 @@
Register new_val_reg = new_val()->as_register();
__ cbz(new_val_reg, _continuation);
ce->store_parameter(addr()->as_pointer_register(), 0);
- __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id)));
__ b(_continuation);
}
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Mon Dec 08 10:46:11 2014 -0500
@@ -297,7 +297,7 @@
// Note: RECEIVER must still contain the receiver!
Label dont;
__ br(Assembler::EQ, dont);
- __ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
// We align the verified entry point unless the method body
// (including its inline cache check) will fit in a single 64-byte
@@ -344,7 +344,7 @@
default: ShouldNotReachHere();
}
- __ bl(RuntimeAddress(target));
+ __ far_call(RuntimeAddress(target));
add_call_info_here(info);
}
@@ -390,8 +390,7 @@
__ verify_not_null_oop(r0);
// search an exception handler (r0: exception oop, r3: throwing pc)
- __ bl(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id)));
- __ should_not_reach_here();
+ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id))); __ should_not_reach_here();
guarantee(code_offset() - offset <= exception_handler_size, "overflow");
__ end_a_stub();
@@ -446,7 +445,7 @@
// remove the activation and dispatch to the unwind handler
__ block_comment("remove_frame and dispatch to the unwind handler");
__ remove_frame(initial_frame_size_in_bytes());
- __ b(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id)));
+ __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id)));
// Emit the slow path assembly
if (stub != NULL) {
@@ -476,7 +475,7 @@
int offset = code_offset();
__ adr(lr, pc());
- __ b(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+ __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
guarantee(code_offset() - offset <= deopt_handler_size, "overflow");
__ end_a_stub();
@@ -954,7 +953,7 @@
default: ShouldNotReachHere();
}
- __ bl(RuntimeAddress(target));
+ __ far_call(RuntimeAddress(target));
add_call_info_here(info);
}
@@ -1425,7 +1424,7 @@
__ br(Assembler::EQ, *success_target);
__ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize)));
- __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
__ ldr(klass_RInfo, Address(__ post(sp, 2 * wordSize)));
// result is a boolean
__ cbzw(klass_RInfo, *failure_target);
@@ -1436,7 +1435,7 @@
__ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
// call out-of-line instance of __ check_klass_subtype_slow_path(...):
__ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize)));
- __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
__ ldp(k_RInfo, klass_RInfo, Address(__ post(sp, 2 * wordSize)));
// result is a boolean
__ cbz(k_RInfo, *failure_target);
@@ -1526,7 +1525,7 @@
__ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
// call out-of-line instance of __ check_klass_subtype_slow_path(...):
__ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize)));
- __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
__ ldp(k_RInfo, klass_RInfo, Address(__ post(sp, 2 * wordSize)));
// result is a boolean
__ cbzw(k_RInfo, *failure_target);
@@ -2017,7 +2016,7 @@
void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
- __ bl(Address(op->addr(), rtype));
+ __ trampoline_call(Address(op->addr(), rtype));
add_call_info(code_offset(), op->info());
}
@@ -2046,7 +2045,8 @@
__ relocate(static_stub_Relocation::spec(call_pc));
__ mov_metadata(rmethod, (Metadata*)NULL);
- __ b(__ pc());
+ __ movptr(rscratch1, 0);
+ __ br(rscratch1);
assert(__ offset() - start <= call_stub_size, "stub too big");
__ end_a_stub();
@@ -2076,7 +2076,7 @@
} else {
unwind_id = Runtime1::handle_exception_nofpu_id;
}
- __ bl(RuntimeAddress(Runtime1::entry_for(unwind_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(unwind_id)));
// FIXME: enough room for two byte trap ????
__ nop();
@@ -2239,7 +2239,7 @@
__ incrementw(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
}
#endif
- __ bl(RuntimeAddress(copyfunc_addr));
+ __ far_call(RuntimeAddress(copyfunc_addr));
}
__ cbz(r0, *stub->continuation());
@@ -2352,7 +2352,7 @@
__ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL);
__ PUSH(src, dst);
- __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
__ POP(src, dst);
__ cbnz(src, cont);
@@ -2402,7 +2402,7 @@
__ load_klass(c_rarg4, dst);
__ ldr(c_rarg4, Address(c_rarg4, ObjArrayKlass::element_klass_offset()));
__ ldrw(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset()));
- __ call(RuntimeAddress(copyfunc_addr));
+ __ far_call(RuntimeAddress(copyfunc_addr));
#ifndef PRODUCT
if (PrintC1Statistics) {
@@ -2517,7 +2517,7 @@
CodeBlob *cb = CodeCache::find_blob(entry);
if (cb) {
- __ bl(RuntimeAddress(entry));
+ __ far_call(RuntimeAddress(entry));
} else {
__ call_VM_leaf(entry, 3);
}
@@ -2855,7 +2855,7 @@
CodeBlob *cb = CodeCache::find_blob(dest);
if (cb) {
- __ bl(RuntimeAddress(dest));
+ __ far_call(RuntimeAddress(dest));
} else {
__ mov(rscratch1, RuntimeAddress(dest));
int len = args->length();
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp Mon Dec 08 10:46:11 2014 -0500
@@ -72,9 +72,8 @@
void store_parameter(jint c, int offset_from_esp_in_words);
void store_parameter(jobject c, int offset_from_esp_in_words);
- enum { call_stub_size = NOT_LP64(15) LP64_ONLY(28),
- exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175),
- deopt_handler_size = NOT_LP64(10) LP64_ONLY(17)
- };
+enum { call_stub_size = 12 * NativeInstruction::instruction_size,
+ exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175),
+ deopt_handler_size = 7 * NativeInstruction::instruction_size };
#endif // CPU_X86_VM_C1_LIRASSEMBLER_X86_HPP
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp Mon Dec 08 10:46:11 2014 -0500
@@ -351,7 +351,7 @@
if (CURRENT_ENV->dtrace_alloc_probes()) {
assert(obj == r0, "must be");
- call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
+ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
}
verify_oop(obj);
@@ -385,7 +385,7 @@
if (CURRENT_ENV->dtrace_alloc_probes()) {
assert(obj == r0, "must be");
- bl(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
+ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
}
verify_oop(obj);
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp
--- a/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp Mon Dec 08 10:46:11 2014 -0500
@@ -97,11 +97,11 @@
}
if (frame_size() == no_frame_size) {
leave();
- b(RuntimeAddress(StubRoutines::forward_exception_entry()));
+ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
} else if (_stub_id == Runtime1::forward_exception_id) {
should_not_reach_here();
} else {
- b(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
+ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
}
bind(L);
}
@@ -580,7 +580,7 @@
{ Label L1;
__ cbnz(r0, L1); // have we deoptimized?
- __ b(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
+ __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
__ bind(L1);
}
@@ -624,7 +624,7 @@
// registers and must leave throwing pc on the stack. A patch may
// have values live in registers so the entry point with the
// exception in tls.
- __ b(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls()));
+ __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls()));
__ bind(L);
}
@@ -641,7 +641,7 @@
// registers, pop all of our frame but the return address and jump to the deopt blob
restore_live_registers(sasm);
__ leave();
- __ b(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
+ __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
__ bind(cont);
restore_live_registers(sasm);
@@ -1095,7 +1095,7 @@
DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
assert(deopt_blob != NULL, "deoptimization blob must have been created");
__ leave();
- __ b(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
+ __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
}
break;
@@ -1304,7 +1304,7 @@
DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
assert(deopt_blob != NULL, "deoptimization blob must have been created");
- __ b(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
+ __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
}
break;
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/compiledIC_aarch64.cpp
--- a/src/cpu/aarch64/vm/compiledIC_aarch64.cpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/compiledIC_aarch64.cpp Mon Dec 08 10:46:11 2014 -0500
@@ -70,7 +70,8 @@
__ relocate(static_stub_Relocation::spec(mark));
// static stub relocation also tags the Method* in the code-stream.
__ mov_metadata(rmethod, (Metadata*)NULL);
- __ b(__ pc());
+ __ movptr(rscratch1, 0);
+ __ br(rscratch1);
assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big");
__ end_a_stub();
@@ -78,8 +79,7 @@
#undef __
int CompiledStaticCall::to_interp_stub_size() {
- // count a mov mem --> to 3 movz/k and a branch
- return 4 * NativeInstruction::instruction_size;
+ return 7 * NativeInstruction::instruction_size;
}
// Relocation entries for call stub, compiled java to interpreter.
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/globalDefinitions_aarch64.hpp
--- a/src/cpu/aarch64/vm/globalDefinitions_aarch64.hpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/globalDefinitions_aarch64.hpp Mon Dec 08 10:46:11 2014 -0500
@@ -37,9 +37,9 @@
#define SUPPORTS_NATIVE_CX8
-// The maximum B/BL offset range on AArch64 is 128MB
-#undef CODE_CACHE_SIZE_LIMIT
-#define CODE_CACHE_SIZE_LIMIT (128*M)
+// The maximum B/BL offset range on AArch64 is 128MB.
+#undef CODE_CACHE_DEFAULT_LIMIT
+#define CODE_CACHE_DEFAULT_LIMIT (128*M)
// According to the ARMv8 ARM, "Concurrent modification and execution
// of instructions can lead to the resulting instruction performing
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/globals_aarch64.hpp
--- a/src/cpu/aarch64/vm/globals_aarch64.hpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/globals_aarch64.hpp Mon Dec 08 10:46:11 2014 -0500
@@ -119,8 +119,10 @@
product(bool, UseNeon, false, \
"Use Neon for CRC32 computation") \
product(bool, UseCRC32, false, \
- "Use CRC32 instructions for CRC32 computation")
+ "Use CRC32 instructions for CRC32 computation") \
+ product(bool, TraceTraps, false, "Trace all traps the signal handler")
#endif
+
#endif // CPU_AARCH64_VM_GLOBALS_AARCH64_HPP
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/icBuffer_aarch64.cpp
--- a/src/cpu/aarch64/vm/icBuffer_aarch64.cpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/icBuffer_aarch64.cpp Mon Dec 08 10:46:11 2014 -0500
@@ -36,9 +36,10 @@
#include "oops/oop.inline2.hpp"
int InlineCacheBuffer::ic_stub_code_size() {
- return NativeInstruction::instruction_size * 5;
+ return (MacroAssembler::far_branches() ? 6 : 4) * NativeInstruction::instruction_size;
}
+#define __ masm->
void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
ResourceMark rm;
@@ -50,13 +51,16 @@
// (2) these ICStubs are removed *before* a GC happens, so the roots disappear
// assert(cached_value == NULL || cached_oop->is_perm(), "must be perm oop");
+ address start = __ pc();
Label l;
- masm->ldr(rscratch2, l);
- masm->b(ExternalAddress(entry_point));
- masm->bind(l);
- masm->emit_int64((int64_t)cached_value);
+ __ ldr(rscratch2, l);
+ __ far_jump(ExternalAddress(entry_point));
+ __ align(wordSize);
+ __ bind(l);
+ __ emit_int64((int64_t)cached_value);
// Only need to invalidate the 1st two instructions - not the whole ic stub
- ICache::invalidate_range(code_begin, NativeInstruction::instruction_size * 2);
+ ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size());
+ assert(__ pc() - start == ic_stub_code_size(), "must be");
}
address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
@@ -67,8 +71,8 @@
void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
- // creation also verifies the object
- uintptr_t *p = (uintptr_t *)(code_begin + 8);
+ // The word containing the cached value is at the end of this IC buffer
+ uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize);
void* o = (void*)*p;
return o;
}
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Mon Dec 08 10:46:11 2014 -0500
@@ -152,7 +152,7 @@
Instruction_aarch64::patch(branch, 20, 5, dest & 0xffff);
Instruction_aarch64::patch(branch+4, 20, 5, (dest >>= 16) & 0xffff);
Instruction_aarch64::patch(branch+8, 20, 5, (dest >>= 16) & 0xffff);
- assert(pd_call_destination(branch) == target, "should be");
+ assert(target_addr_for_insn(branch) == target, "should be");
instructions = 3;
} else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 &&
Instruction_aarch64::extract(insn, 4, 0) == 0b11111) {
@@ -220,21 +220,17 @@
// Return the target address for the following sequences
// 1 - adrp Rx, target_page
// ldr/str Ry, [Rx, #offset_in_page]
- // [ 2 - adrp Rx, target_page ] Not handled
- // [ add Ry, Rx, #offset_in_page ]
+ // 2 - adrp Rx, target_page ]
+ // add Ry, Rx, #offset_in_page
// 3 - adrp Rx, target_page (page aligned reloc, offset == 0)
//
- // In the case of type 1 we check that the register is the same and
+ // In the first two cases we check that the register is the same and
// return the target_page + the offset within the page.
- //
// Otherwise we assume it is a page aligned relocation and return
// the target page only. The only cases this is generated is for
// the safepoint polling page or for the card table byte map base so
// we assert as much.
//
- // Note: Strangely, we do not handle 'type 2' relocation (adrp followed
- // by add) which is handled in pd_patch_instruction above.
- //
unsigned insn2 = ((unsigned*)insn_addr)[1];
if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 &&
Instruction_aarch64::extract(insn, 4, 0) ==
@@ -243,6 +239,12 @@
unsigned int byte_offset = Instruction_aarch64::extract(insn2, 21, 10);
unsigned int size = Instruction_aarch64::extract(insn2, 31, 30);
return address(target_page + (byte_offset << size));
+ } else if (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 &&
+ Instruction_aarch64::extract(insn, 4, 0) ==
+ Instruction_aarch64::extract(insn2, 4, 0)) {
+ // add (immediate)
+ unsigned int byte_offset = Instruction_aarch64::extract(insn2, 21, 10);
+ return address(target_page + byte_offset);
} else {
assert((jbyte *)target_page ==
((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base ||
@@ -355,6 +357,42 @@
}
}
+void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
+ assert(ReservedCodeCacheSize < 4*G, "branch out of range");
+ assert(CodeCache::find_blob(entry.target()) != NULL,
+ "destination of far call not found in code cache");
+ if (far_branches()) {
+ unsigned long offset;
+ // We can use ADRP here because we know that the total size of
+ // the code cache cannot exceed 2Gb.
+ adrp(tmp, entry, offset);
+ add(tmp, tmp, offset);
+ if (cbuf) cbuf->set_insts_mark();
+ blr(tmp);
+ } else {
+ if (cbuf) cbuf->set_insts_mark();
+ bl(entry);
+ }
+}
+
+void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
+ assert(ReservedCodeCacheSize < 4*G, "branch out of range");
+ assert(CodeCache::find_blob(entry.target()) != NULL,
+ "destination of far call not found in code cache");
+ if (far_branches()) {
+ unsigned long offset;
+ // We can use ADRP here because we know that the total size of
+ // the code cache cannot exceed 2Gb.
+ adrp(tmp, entry, offset);
+ add(tmp, tmp, offset);
+ if (cbuf) cbuf->set_insts_mark();
+ br(tmp);
+ } else {
+ if (cbuf) cbuf->set_insts_mark();
+ b(entry);
+ }
+}
+
int MacroAssembler::biased_locking_enter(Register lock_reg,
Register obj_reg,
Register swap_reg,
@@ -632,23 +670,87 @@
call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
}
-void MacroAssembler::call(Address entry) {
- if (true // reachable(entry)
- ) {
- bl(entry);
+// Maybe emit a call via a trampoline. If the code cache is small
+// trampolines won't be emitted.
+
+void MacroAssembler::trampoline_call(Address entry, CodeBuffer *cbuf) {
+ assert(entry.rspec().type() == relocInfo::runtime_call_type
+ || entry.rspec().type() == relocInfo::opt_virtual_call_type
+ || entry.rspec().type() == relocInfo::static_call_type
+ || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
+
+ unsigned int start_offset = offset();
+ if (far_branches() && !Compile::current()->in_scratch_emit_size()) {
+ emit_trampoline_stub(offset(), entry.target());
+ }
+
+ if (cbuf) cbuf->set_insts_mark();
+ relocate(entry.rspec());
+ if (Assembler::reachable_from_branch_at(pc(), entry.target())) {
+ bl(entry.target());
} else {
- lea(rscratch1, entry);
- blr(rscratch1);
+ bl(pc());
}
}
+
+// Emit a trampoline stub for a call to a target which is too far away.
+//
+// code sequences:
+//
+// call-site:
+// branch-and-link to <destination> or <trampoline stub>
+//
+// Related trampoline stub for this call site in the stub section:
+// load the call target from the constant pool
+// branch (LR still points to the call site above)
+
+void MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
+ address dest) {
+ address stub = start_a_stub(Compile::MAX_stubs_size/2);
+ if (stub == NULL) {
+ start_a_stub(Compile::MAX_stubs_size/2);
+ Compile::current()->env()->record_out_of_memory_failure();
+ return;
+ }
+
+ // For java_to_interp stubs we use rscratch1 as scratch register and
+ // in call trampoline stubs we use rmethod. This way we can
+ // distinguish them (see is_NativeCallTrampolineStub_at()).
+
+ // Create a trampoline stub relocation which relates this trampoline stub
+ // with the call instruction at insts_call_instruction_offset in the
+ // instructions code-section.
+ align(wordSize);
+ relocate(trampoline_stub_Relocation::spec(code()->insts()->start()
+ + insts_call_instruction_offset));
+ const int stub_start_offset = offset();
+
+ // Now, create the trampoline stub's code:
+ // - load the call
+ // - call
+ Label target;
+ ldr(rscratch1, target);
+ br(rscratch1);
+ bind(target);
+ assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
+ "should be");
+ emit_int64((int64_t)dest);
+
+ const address stub_start_addr = addr_at(stub_start_offset);
+
+ assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
+
+ end_a_stub();
+}
+
void MacroAssembler::ic_call(address entry) {
RelocationHolder rh = virtual_call_Relocation::spec(pc());
// address const_ptr = long_constant((jlong)Universe::non_oop_word());
// unsigned long offset;
// ldr_constant(rscratch2, const_ptr);
movptr(rscratch2, (uintptr_t)Universe::non_oop_word());
- call(Address(entry, rh));
+ trampoline_call(Address(entry, rh));
}
// Implementation of call_VM versions
@@ -1296,8 +1398,7 @@
// public methods
void MacroAssembler::mov(Register r, Address dest) {
- InstructionMark im(this);
- code_section()->relocate(inst_mark(), dest.rspec());
+ code_section()->relocate(pc(), dest.rspec());
u_int64_t imm64 = (u_int64_t)dest.target();
movptr(r, imm64);
}
@@ -3413,6 +3514,7 @@
}
}
+
// Search for str1 in str2 and return index or -1
void MacroAssembler::string_indexof(Register str2, Register str1,
Register cnt2, Register cnt1,
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Mon Dec 08 10:46:11 2014 -0500
@@ -494,6 +494,10 @@
static bool needs_explicit_null_check(intptr_t offset);
static address target_addr_for_insn(address insn_addr, unsigned insn);
+ static address target_addr_for_insn(address insn_addr) {
+ unsigned insn = *(unsigned*)insn_addr;
+ return target_addr_for_insn(insn_addr, insn);
+ }
// Required platform-specific helpers for Label::patch_instructions.
// They _shadow_ the declarations in AbstractAssembler, which are undefined.
@@ -502,8 +506,7 @@
pd_patch_instruction_size(branch, target);
}
static address pd_call_destination(address branch) {
- unsigned insn = *(unsigned*)branch;
- return target_addr_for_insn(branch, insn);
+ return target_addr_for_insn(branch);
}
#ifndef PRODUCT
static void pd_print_patched_instruction(address branch);
@@ -511,6 +514,8 @@
static int patch_oop(address insn_addr, address o);
+ void emit_trampoline_stub(int insts_call_instruction_offset, address target);
+
// The following 4 methods return the offset of the appropriate move instruction
// Support for fast byte/short loading with zero extension (depending on particular CPU)
@@ -916,12 +921,24 @@
// Calls
- // void call(Label& L, relocInfo::relocType rtype);
+ void trampoline_call(Address entry, CodeBuffer *cbuf = NULL);
- // NOTE: this call tranfers to the effective address of entry NOT
- // the address contained by entry. This is because this is more natural
- // for jumps/calls.
- void call(Address entry);
+ static bool far_branches() {
+ return ReservedCodeCacheSize >= branch_range;
+ }
+
+ // Jumps that can reach anywhere in the code cache.
+ // Trashes tmp.
+ void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
+ void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
+
+ static int far_branch_size() {
+ if (far_branches()) {
+ return 3 * 4; // adrp, add, br
+ } else {
+ return 4;
+ }
+ }
// Emit the CompiledIC call idiom
void ic_call(address entry);
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/methodHandles_aarch64.cpp
--- a/src/cpu/aarch64/vm/methodHandles_aarch64.cpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/methodHandles_aarch64.cpp Mon Dec 08 10:46:11 2014 -0500
@@ -115,7 +115,7 @@
__ ldr(rscratch1,Address(method, entry_offset));
__ br(rscratch1);
__ bind(L_no_such_method);
- __ b(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry()));
+ __ far_jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry()));
}
void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
@@ -418,7 +418,7 @@
jump_from_method_handle(_masm, rmethod, temp1, for_compiler_entry);
if (iid == vmIntrinsics::_linkToInterface) {
__ bind(L_incompatible_class_change_error);
- __ b(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
+ __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
}
}
}
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/nativeInst_aarch64.cpp
--- a/src/cpu/aarch64/vm/nativeInst_aarch64.cpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/nativeInst_aarch64.cpp Mon Dec 08 10:46:11 2014 -0500
@@ -40,9 +40,87 @@
void NativeCall::verify() { ; }
address NativeCall::destination() const {
- return instruction_address() + displacement();
+ address addr = (address)this;
+ address destination = instruction_address() + displacement();
+
+ // Do we use a trampoline stub for this call?
+ CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie.
+ assert(cb && cb->is_nmethod(), "sanity");
+ nmethod *nm = (nmethod *)cb;
+ if (nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) {
+ // Yes we do, so get the destination from the trampoline stub.
+ const address trampoline_stub_addr = destination;
+ destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
+ }
+
+ return destination;
}
+// Similar to replace_mt_safe, but just changes the destination. The
+// important thing is that free-running threads are able to execute this
+// call instruction at all times.
+//
+// Used in the runtime linkage of calls; see class CompiledIC.
+//
+// Add parameter assert_lock to switch off assertion
+// during code generation, where no patching lock is needed.
+void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
+ assert(!assert_lock ||
+ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()),
+ "concurrent code patching");
+
+ ResourceMark rm;
+ int code_size = NativeInstruction::instruction_size;
+ address addr_call = addr_at(0);
+ assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
+
+ // Patch the call.
+ if (Assembler::reachable_from_branch_at(addr_call, dest)) {
+ set_destination(dest);
+ } else {
+ address trampoline_stub_addr = get_trampoline();
+
+ // We did not find a trampoline stub because the current codeblob
+ // does not provide this information. The branch will be patched
+ // later during a final fixup, when all necessary information is
+ // available.
+ if (trampoline_stub_addr == 0)
+ return;
+
+ assert (! is_NativeCallTrampolineStub_at(dest), "chained trampolines");
+
+ // Patch the constant in the call's trampoline stub.
+ nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
+
+ // And patch the call to point to the trampoline
+ set_destination(trampoline_stub_addr);
+ }
+ ICache::invalidate_range(addr_call, instruction_size);
+}
+
+address NativeCall::get_trampoline() {
+ address call_addr = addr_at(0);
+
+ CodeBlob *code = CodeCache::find_blob(call_addr);
+ assert(code != NULL, "Could not find the containing code blob");
+
+ address bl_destination
+ = MacroAssembler::pd_call_destination(call_addr);
+ if (code->content_contains(bl_destination) &&
+ is_NativeCallTrampolineStub_at(bl_destination))
+ return bl_destination;
+
+ // There are no relocations available when the code gets relocated
+ // during CodeBuffer expansion.
+ if (code->relocation_size() == 0)
+ return NULL;
+
+ // If the codeBlob is not a nmethod, this is because we get here from the
+ // CodeBlob constructor, which is called within the nmethod constructor.
+ return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
+}
+
+
// Inserts a native call instruction at a given pc
void NativeCall::insert(address code_pos, address entry) { Unimplemented(); }
@@ -55,7 +133,7 @@
intptr_t NativeMovConstReg::data() const {
// das(uint64_t(instruction_address()),2);
- address addr = MacroAssembler::pd_call_destination(instruction_address());
+ address addr = MacroAssembler::target_addr_for_insn(instruction_address());
if (maybe_cpool_ref(instruction_address())) {
return *(intptr_t*)addr;
} else {
@@ -65,7 +143,7 @@
void NativeMovConstReg::set_data(intptr_t x) {
if (maybe_cpool_ref(instruction_address())) {
- address addr = MacroAssembler::pd_call_destination(instruction_address());
+ address addr = MacroAssembler::target_addr_for_insn(instruction_address());
*(intptr_t*)addr = x;
} else {
MacroAssembler::pd_patch_instruction(instruction_address(), (address)x);
@@ -86,10 +164,10 @@
address pc = instruction_address();
unsigned insn = *(unsigned*)pc;
if (Instruction_aarch64::extract(insn, 28, 24) == 0b10000) {
- address addr = MacroAssembler::pd_call_destination(pc);
+ address addr = MacroAssembler::target_addr_for_insn(pc);
return *addr;
} else {
- return (int)(intptr_t)MacroAssembler::pd_call_destination(instruction_address());
+ return (int)(intptr_t)MacroAssembler::target_addr_for_insn(instruction_address());
}
}
@@ -97,7 +175,7 @@
address pc = instruction_address();
unsigned insn = *(unsigned*)pc;
if (maybe_cpool_ref(pc)) {
- address addr = MacroAssembler::pd_call_destination(pc);
+ address addr = MacroAssembler::target_addr_for_insn(pc);
*(long*)addr = x;
} else {
MacroAssembler::pd_patch_instruction(pc, (address)intptr_t(x));
@@ -107,7 +185,7 @@
void NativeMovRegMem::verify() {
#ifdef ASSERT
- address dest = MacroAssembler::pd_call_destination(instruction_address());
+ address dest = MacroAssembler::target_addr_for_insn(instruction_address());
#endif
}
@@ -121,7 +199,7 @@
address NativeJump::jump_destination() const {
- address dest = MacroAssembler::pd_call_destination(instruction_address());
+ address dest = MacroAssembler::target_addr_for_insn(instruction_address());
// We use jump to self as the unresolved address which the inline
// cache code (and relocs) know about
@@ -192,19 +270,39 @@
return Instruction_aarch64::extract(int_at(0), 30, 23) == 0b11100101;
}
+bool NativeInstruction::is_sigill_zombie_not_entrant() {
+ return uint_at(0) == 0xd4bbd5a1; // dcps1 #0xdead
+}
+
+void NativeIllegalInstruction::insert(address code_pos) {
+ *(juint*)code_pos = 0xd4bbd5a1; // dcps1 #0xdead
+}
+
//-------------------------------------------------------------------
-// MT safe inserting of a jump over a jump or a nop (used by nmethod::makeZombie)
+// MT-safe inserting of a jump over a jump or a nop (used by
+// nmethod::make_not_entrant_or_zombie)
void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
- ptrdiff_t disp = dest - verified_entry;
- guarantee(disp < 1 << 27 && disp > - (1 << 27), "branch overflow");
- unsigned int insn = (0b000101 << 26) | ((disp >> 2) & 0x3ffffff);
+ assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
+ assert(nativeInstruction_at(verified_entry)->is_jump_or_nop()
+ || nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(),
+ "Aarch64 cannot replace non-jump with jump");
- assert(nativeInstruction_at(verified_entry)->is_jump_or_nop(),
- "Aarch64 cannot replace non-jump with jump");
- *(unsigned int*)verified_entry = insn;
+ // Patch this nmethod atomically.
+ if (Assembler::reachable_from_branch_at(verified_entry, dest)) {
+ ptrdiff_t disp = dest - verified_entry;
+ guarantee(disp < 1 << 27 && disp > - (1 << 27), "branch overflow");
+
+ unsigned int insn = (0b000101 << 26) | ((disp >> 2) & 0x3ffffff);
+ *(unsigned int*)verified_entry = insn;
+ } else {
+ // We use an illegal instruction for marking a method as
+ // not_entrant or zombie.
+ NativeIllegalInstruction::insert(verified_entry);
+ }
+
ICache::invalidate_range(verified_entry, instruction_size);
}
@@ -212,23 +310,28 @@
void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
NativeGeneralJump* n_jump = (NativeGeneralJump*)code_pos;
- ptrdiff_t disp = entry - code_pos;
- guarantee(disp < 1 << 27 && disp > - (1 << 27), "branch overflow");
- unsigned int insn = (0b000101 << 26) | ((disp >> 2) & 0x3ffffff);
- *(unsigned int*)code_pos = insn;
+ CodeBuffer cb(code_pos, instruction_size);
+ MacroAssembler* a = new MacroAssembler(&cb);
+
+ a->mov(rscratch1, entry);
+ a->br(rscratch1);
+
ICache::invalidate_range(code_pos, instruction_size);
}
// MT-safe patching of a long jump instruction.
void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
- NativeGeneralJump* n_jump = (NativeGeneralJump*)instr_addr;
- assert(n_jump->is_jump_or_nop(),
- "Aarch64 cannot replace non-jump with jump");
- uint32_t instr = *(uint32_t*)code_buffer;
- *(uint32_t*)instr_addr = instr;
- ICache::invalidate_range(instr_addr, instruction_size);
+ ShouldNotCallThis();
}
bool NativeInstruction::is_dtrace_trap() { return false; }
+address NativeCallTrampolineStub::destination(nmethod *nm) const {
+ return ptr_at(data_offset);
+}
+
+void NativeCallTrampolineStub::set_destination(address new_destination) {
+ set_ptr_at(data_offset, new_destination);
+ OrderAccess::release();
+}
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/nativeInst_aarch64.hpp
--- a/src/cpu/aarch64/vm/nativeInst_aarch64.hpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/nativeInst_aarch64.hpp Mon Dec 08 10:46:11 2014 -0500
@@ -53,6 +53,7 @@
class NativeInstruction VALUE_OBJ_CLASS_SPEC {
friend class Relocation;
+ friend bool is_NativeCallTrampolineStub_at(address);
public:
enum { instruction_size = 4 };
inline bool is_nop();
@@ -66,6 +67,7 @@
inline bool is_mov_literal64();
bool is_movz();
bool is_movk();
+ bool is_sigill_zombie_not_entrant();
protected:
address addr_at(int offset) const { return address(this) + offset; }
@@ -73,16 +75,18 @@
s_char sbyte_at(int offset) const { return *(s_char*) addr_at(offset); }
u_char ubyte_at(int offset) const { return *(u_char*) addr_at(offset); }
- jint int_at(int offset) const { return *(jint*) addr_at(offset); }
+ jint int_at(int offset) const { return *(jint*) addr_at(offset); }
+ juint uint_at(int offset) const { return *(juint*) addr_at(offset); }
- intptr_t ptr_at(int offset) const { return *(intptr_t*) addr_at(offset); }
+ address ptr_at(int offset) const { return *(address*) addr_at(offset); }
oop oop_at (int offset) const { return *(oop*) addr_at(offset); }
void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; }
void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; }
- void set_ptr_at (int offset, intptr_t ptr) { *(intptr_t*) addr_at(offset) = ptr; }
+ void set_uint_at(int offset, jint i) { *(juint*)addr_at(offset) = i; }
+ void set_ptr_at (int offset, address ptr) { *(address*) addr_at(offset) = ptr; }
void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; }
public:
@@ -130,6 +134,7 @@
address displacement_address() const { return addr_at(displacement_offset); }
address return_address() const { return addr_at(return_address_offset); }
address destination() const;
+
void set_destination(address dest) {
int offset = dest - instruction_address();
unsigned int insn = 0b100101 << 26;
@@ -138,22 +143,8 @@
offset &= (1 << 26) - 1; // mask off insn part
insn |= offset;
set_int_at(displacement_offset, insn);
- ICache::invalidate_range(instruction_address(), instruction_size);
}
- // Similar to replace_mt_safe, but just changes the destination. The
- // important thing is that free-running threads are able to execute
- // this call instruction at all times. If the call is an immediate BL
- // instruction we can simply rely on atomicity of 32-bit writes to
- // make sure other threads will see no intermediate states.
-
- // We cannot rely on locks here, since the free-running threads must run at
- // full speed.
- //
- // Used in the runtime linkage of calls; see class CompiledIC.
- // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.)
- void set_destination_mt_safe(address dest) { set_destination(dest); }
-
void verify_alignment() { ; }
void verify();
void print();
@@ -175,6 +166,23 @@
static void insert(address code_pos, address entry);
static void replace_mt_safe(address instr_addr, address code_buffer);
+
+ // Similar to replace_mt_safe, but just changes the destination. The
+ // important thing is that free-running threads are able to execute
+ // this call instruction at all times. If the call is an immediate BL
+ // instruction we can simply rely on atomicity of 32-bit writes to
+ // make sure other threads will see no intermediate states.
+
+ // We cannot rely on locks here, since the free-running threads must run at
+ // full speed.
+ //
+ // Used in the runtime linkage of calls; see class CompiledIC.
+ // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.)
+
+ // The parameter assert_lock disables the assertion during code generation.
+ void set_destination_mt_safe(address dest, bool assert_lock = true);
+
+ address get_trampoline();
};
inline NativeCall* nativeCall_at(address address) {
@@ -378,10 +386,10 @@
class NativeGeneralJump: public NativeJump {
public:
enum AArch64_specific_constants {
- instruction_size = 4,
+ instruction_size = 4 * 4,
instruction_offset = 0,
data_offset = 0,
- next_instruction_offset = 4
+ next_instruction_offset = 4 * 4
};
static void insert_unconditional(address code_pos, address entry);
static void replace_mt_safe(address instr_addr, address code_buffer);
@@ -450,4 +458,32 @@
return is_nop() || is_jump();
}
+// Call trampoline stubs.
+class NativeCallTrampolineStub : public NativeInstruction {
+ public:
+
+ enum AArch64_specific_constants {
+ instruction_size = 4 * 4,
+ instruction_offset = 0,
+ data_offset = 2 * 4,
+ next_instruction_offset = 4 * 4
+ };
+
+ address destination(nmethod *nm = NULL) const;
+ void set_destination(address new_destination);
+ ptrdiff_t destination_offset() const;
+};
+
+inline bool is_NativeCallTrampolineStub_at(address address) {
+ NativeInstruction *first_instr = nativeInstruction_at(address);
+ return NativeInstruction::is_ldr_literal_at(address) &&
+ as_Register(Instruction_aarch64::extract(first_instr->int_at(0), 4, 0))
+ == rscratch1;
+}
+
+inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address address) {
+ assert(is_NativeCallTrampolineStub_at(address), "no call trampoline found");
+ return (NativeCallTrampolineStub*)address;
+}
+
#endif // CPU_AARCH64_VM_NATIVEINST_AARCH64_HPP
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/relocInfo_aarch64.cpp
--- a/src/cpu/aarch64/vm/relocInfo_aarch64.cpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/relocInfo_aarch64.cpp Mon Dec 08 10:46:11 2014 -0500
@@ -59,6 +59,13 @@
}
address Relocation::pd_call_destination(address orig_addr) {
+ assert(is_call(), "should be a call here");
+ if (is_call()) {
+ address trampoline = nativeCall_at(addr())->get_trampoline();
+ if (trampoline) {
+ return nativeCallTrampolineStub_at(trampoline)->destination();
+ }
+ }
if (orig_addr != NULL) {
return MacroAssembler::pd_call_destination(orig_addr);
}
@@ -67,6 +74,15 @@
void Relocation::pd_set_call_destination(address x) {
+ assert(is_call(), "should be a call here");
+ if (is_call()) {
+ address trampoline = nativeCall_at(addr())->get_trampoline();
+ if (trampoline) {
+ nativeCall_at(addr())->set_destination_mt_safe(x, /* assert_lock */false);
+ return;
+ }
+ }
+ assert(addr() != x, "call instruction in an infinite loop");
MacroAssembler::pd_patch_instruction(addr(), x);
}
@@ -80,17 +96,16 @@
}
void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
- // fprintf(stderr, "Try to fix poll reloc at %p to %p\n", addr(), dest);
if (NativeInstruction::maybe_cpool_ref(addr())) {
address old_addr = old_addr_for(addr(), src, dest);
- MacroAssembler::pd_patch_instruction(addr(), pd_call_destination(old_addr));
+ MacroAssembler::pd_patch_instruction(addr(), MacroAssembler::target_addr_for_insn(old_addr));
}
}
void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
if (NativeInstruction::maybe_cpool_ref(addr())) {
address old_addr = old_addr_for(addr(), src, dest);
- MacroAssembler::pd_patch_instruction(addr(), pd_call_destination(old_addr));
+ MacroAssembler::pd_patch_instruction(addr(), MacroAssembler::target_addr_for_insn(old_addr));
}
}
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp
--- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Mon Dec 08 10:46:11 2014 -0500
@@ -741,7 +741,7 @@
__ cmp(rscratch1, tmp);
__ ldr(rmethod, Address(holder, CompiledICHolder::holder_method_offset()));
__ br(Assembler::EQ, ok);
- __ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
__ bind(ok);
// Method might have been compiled since the call site was patched to
@@ -749,7 +749,7 @@
// the call site corrected.
__ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset())));
__ cbz(rscratch1, skip_fixup);
- __ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
__ block_comment("} c2i_unverified_entry");
}
@@ -1168,7 +1168,7 @@
static void rt_call(MacroAssembler* masm, address dest, int gpargs, int fpargs, int type) {
CodeBlob *cb = CodeCache::find_blob(dest);
if (cb) {
- __ bl(RuntimeAddress(dest));
+ __ far_call(RuntimeAddress(dest));
} else {
assert((unsigned)gpargs < 256, "eek!");
assert((unsigned)fpargs < 32, "eek!");
@@ -1539,7 +1539,7 @@
__ cmp_klass(receiver, ic_reg, rscratch1);
__ br(Assembler::EQ, hit);
- __ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
// Verified entry point must be aligned
__ align(8);
@@ -2099,7 +2099,7 @@
__ bind(exception_pending);
// and forward the exception
- __ b(RuntimeAddress(StubRoutines::forward_exception_entry()));
+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
}
// Slow path locking & unlocking
@@ -2835,7 +2835,7 @@
RegisterSaver::restore_live_registers(masm);
- __ b(RuntimeAddress(StubRoutines::forward_exception_entry()));
+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
// No exception case
__ bind(noException);
@@ -2931,7 +2931,7 @@
__ str(zr, Address(rthread, JavaThread::vm_result_offset()));
__ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
- __ b(RuntimeAddress(StubRoutines::forward_exception_entry()));
+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
// -------------
// make sure all code is generated
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Mon Dec 08 10:46:11 2014 -0500
@@ -2244,7 +2244,7 @@
__ should_not_reach_here();
__ bind(L);
#endif // ASSERT
- __ b(RuntimeAddress(StubRoutines::forward_exception_entry()));
+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
// codeBlob framesize is in words (not VMRegImpl::slot_size)
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp
--- a/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp Mon Dec 08 10:46:11 2014 -0500
@@ -524,7 +524,7 @@
// Note: the restored frame is not necessarily interpreted.
// Use the shared runtime version of the StackOverflowError.
assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
- __ b(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry()));
+ __ far_jump(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry()));
// all done with frame size check
__ bind(after_frame_check);
diff -r a45df3cb0eb5 src/cpu/aarch64/vm/vtableStubs_aarch64.cpp
--- a/src/cpu/aarch64/vm/vtableStubs_aarch64.cpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/cpu/aarch64/vm/vtableStubs_aarch64.cpp Mon Dec 08 10:46:11 2014 -0500
@@ -180,7 +180,7 @@
__ br(rscratch1);
__ bind(throw_icce);
- __ b(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
+ __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
__ flush();
diff -r a45df3cb0eb5 src/os_cpu/linux_aarch64/vm/os_linux_aarch64.cpp
--- a/src/os_cpu/linux_aarch64/vm/os_linux_aarch64.cpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/os_cpu/linux_aarch64/vm/os_linux_aarch64.cpp Mon Dec 08 10:46:11 2014 -0500
@@ -376,7 +376,14 @@
// Java thread running in Java code => find exception handler if any
// a fault inside compiled code, the interpreter, or a stub
- if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
+ // Handle signal from NativeJump::patch_verified_entry().
+ if ((sig == SIGILL || sig == SIGTRAP)
+ && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
+ if (TraceTraps) {
+ tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL");
+ }
+ stub = SharedRuntime::get_handle_wrong_method_stub();
+ } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
stub = SharedRuntime::get_poll_stub(pc);
} else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
// BugId 4454115: A read from a MappedByteBuffer can fault
diff -r a45df3cb0eb5 src/share/vm/runtime/arguments.cpp
--- a/src/share/vm/runtime/arguments.cpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/share/vm/runtime/arguments.cpp Mon Dec 08 10:46:11 2014 -0500
@@ -1162,7 +1162,7 @@
// Increase the code cache size - tiered compiles a lot more.
if (FLAG_IS_DEFAULT(ReservedCodeCacheSize)) {
FLAG_SET_ERGO(uintx, ReservedCodeCacheSize,
- MIN2(CODE_CACHE_SIZE_LIMIT, ReservedCodeCacheSize * 5));
+ MIN2(CODE_CACHE_DEFAULT_LIMIT, ReservedCodeCacheSize * 5));
}
// Enable SegmentedCodeCache if TieredCompilation is enabled and ReservedCodeCacheSize >= 240M
if (FLAG_IS_DEFAULT(SegmentedCodeCache) && ReservedCodeCacheSize >= 240*M) {
diff -r a45df3cb0eb5 src/share/vm/utilities/globalDefinitions.hpp
--- a/src/share/vm/utilities/globalDefinitions.hpp Fri Nov 21 10:28:35 2014 -0500
+++ b/src/share/vm/utilities/globalDefinitions.hpp Mon Dec 08 10:46:11 2014 -0500
@@ -419,6 +419,8 @@
// The maximum size of the code cache. Can be overridden by targets.
#define CODE_CACHE_SIZE_LIMIT (2*G)
+// Allow targets to reduce the default size of the code cache.
+#define CODE_CACHE_DEFAULT_LIMIT CODE_CACHE_SIZE_LIMIT
#ifdef TARGET_ARCH_x86
# include "globalDefinitions_x86.hpp"
More information about the aarch64-port-dev
mailing list