[aarch64-port-dev ] Simple accessors and C1 compilation
Andrew Haley
aph at redhat.com
Fri Jun 20 13:17:54 UTC 2014
Simple accessors are C1-compiled by the tiered compilation policy on
the reasonable assumption that C1- and C2-compiled code will be the
same. At the moment, though, it's not, and it really should be.
We were using an inferior instruction sequence from C2, so I copied it
over to C1. I also moved the frame cration code into MacroAssembler.
Also, we were padding the unverified entry point with NOPs, in order
to make sure that the verified entry point was aligned so that the
entire accessor fits into a single icache line. As it happens, we
don't need to do any padding because an accessor and its inline class
check fit nicely into a single 64-byte cache line, just:
0x0000007fa11b0540: ldr wscratch1, [x1,#8]
0x0000007fa11b0544: cmp xscratch2, xscratch1, lsl #3
0x0000007fa11b0548: b.eq 0x0000007fa11b0550
0x0000007fa11b054c: b 0x0000007fa10cdd80 ; {runtime_call}
[Verified Entry Point]
0x0000007fa11b0550: nop
;; 0xFFFFFFFFFFFF7000
0x0000007fa11b0554: movn xscratch2, #0x8fff
0x0000007fa11b0558: ldr xzr, [sp,x9]
0x0000007fa11b055c: sub sp, sp, #0x40
0x0000007fa11b0560: stp xfp, xlr, [sp,#48]
;; block B0 [0, 0]
0x0000007fa11b0564: ldr w0, [x1,#12]
0x0000007fa11b0568: lsl x0, x0, #3 ;*aload_0
; - java.lang.ref.Reference::get at 0 (line 202)
0x0000007fa11b056c: ldp xfp, xlr, [sp,#48]
0x0000007fa11b0570: add sp, sp, #0x40
0x0000007fa11b0574: adrp xscratch1, 0x0000007fb7ff7000
; {poll_return}
0x0000007fa11b0578: ldr wzr, [xscratch1] ; {poll_return}
0x0000007fa11b057c: ret
This patch treats accessors as a special case, and omits the padding.
Also, it avoids executing a long string of NOPs when the inline cache
is used.
I converted converts native methods to use the fast version of class
comparison.
I removed some unused cruft.
Andrew.
diff -r 8d1201669a7a src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad Thu Jun 19 16:02:36 2014 +0100
+++ b/src/cpu/aarch64/vm/aarch64.ad Fri Jun 20 09:15:38 2014 -0400
@@ -950,7 +950,7 @@
if (framesize == 0) {
// Is this even possible?
st->print("stp lr, rfp, [sp, #%d]!", -(2 * wordSize));
- } else if (framesize < (1 << 7)) {
+ } else if (framesize < ((1 << 9) + 2 * wordSize)) {
st->print("sub sp, sp, #%d\n\t", framesize);
st->print("stp rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
} else {
@@ -976,21 +976,7 @@
if (C->need_stack_bang(framesize))
__ generate_stack_overflow_check(framesize);
- if (framesize == 0) {
- // Is this even possible?
- __ stp(rfp, lr, Address(__ pre(sp, -2 * wordSize)));
- } else if (framesize < ((1 << 9) + 2 * wordSize)) {
- __ sub(sp, sp, framesize);
- __ stp(rfp, lr, Address(sp, framesize - 2 * wordSize));
- } else {
- __ stp(rfp, lr, Address(__ pre(sp, -2 * wordSize)));
- if (framesize < ((1 << 12) + 2 * wordSize))
- __ sub(sp, sp, framesize - 2 * wordSize);
- else {
- __ mov(rscratch1, framesize - 2 * wordSize);
- __ sub(sp, sp, rscratch1);
- }
- }
+ __ build_frame(framesize);
if (NotifySimulator) {
__ notify(Assembler::method_entry);
@@ -1032,7 +1018,7 @@
if (framesize == 0) {
st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
- } else if (framesize < (1 << 7)) {
+ } else if (framesize < ((1 << 9) + 2 * wordSize)) {
st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
st->print("add sp, sp, #%d\n\t", framesize);
} else {
@@ -1054,20 +1040,7 @@
MacroAssembler _masm(&cbuf);
int framesize = C->frame_slots() << LogBytesPerInt;
- if (framesize == 0) {
- __ ldp(rfp, lr, Address(__ post(sp, 2 * wordSize)));
- } else if (framesize < ((1 << 9) + 2 * wordSize)) {
- __ ldp(rfp, lr, Address(sp, framesize - 2 * wordSize));
- __ add(sp, sp, framesize);
- } else {
- if (framesize < ((1 << 12) + 2 * wordSize))
- __ add(sp, sp, framesize - 2 * wordSize);
- else {
- __ mov(rscratch1, framesize - 2 * wordSize);
- __ add(sp, sp, rscratch1);
- }
- __ ldp(rfp, lr, Address(__ post(sp, 2 * wordSize)));
- }
+ __ remove_frame(framesize);
if (NotifySimulator) {
__ notify(Assembler::method_reentry);
diff -r 8d1201669a7a src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Thu Jun 19 16:02:36 2014 +0100
+++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Fri Jun 20 09:15:38 2014 -0400
@@ -290,23 +290,25 @@
int LIR_Assembler::check_icache() {
Register receiver = FrameMap::receiver_opr->as_register();
Register ic_klass = IC_Klass;
- const int ic_cmp_size = 4 * 4;
- const bool do_post_padding = VerifyOops || UseCompressedClassPointers;
- if (!do_post_padding) {
- // insert some nops so that the verified entry point is aligned on CodeEntryAlignment
- while ((__ offset() + ic_cmp_size) % CodeEntryAlignment != 0) {
- __ nop();
- }
- }
- int offset = __ offset();
- __ inline_cache_check(receiver, IC_Klass);
- assert(__ offset() % CodeEntryAlignment == 0 || do_post_padding, "alignment must be correct");
- if (do_post_padding) {
+ int start_offset = __ offset();
+ __ inline_cache_check(receiver, ic_klass);
+
+ // if icache check fails, then jump to runtime routine
+ // Note: RECEIVER must still contain the receiver!
+ Label dont;
+ __ br(Assembler::EQ, dont);
+ __ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+
+ // We align the verified entry point unless the method body
+ // (including its inline cache check) will fit in a single 64-byte
+ // icache line.
+ if (! method()->is_accessor() || __ offset() - start_offset > 4 * 4) {
// force alignment after the cache check.
- // It's been verified to be aligned if !VerifyOops
__ align(CodeEntryAlignment);
}
- return offset;
+
+ __ bind(dont);
+ return start_offset;
}
diff -r 8d1201669a7a src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp Thu Jun 19 16:02:36 2014 +0100
+++ b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp Fri Jun 20 09:15:38 2014 -0400
@@ -404,23 +404,12 @@
// explicit NULL check not needed since load from [klass_offset] causes a trap
// check against inline cache
assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), "must add explicit null check");
- int start_offset = offset();
- load_klass(rscratch1, receiver);
- cmp(rscratch1, iCache);
-
- // if icache check fails, then jump to runtime routine
- // Note: RECEIVER must still contain the receiver!
- Label dont;
- br(Assembler::EQ, dont);
- b(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
- bind(dont);
- const int ic_cmp_size = 4 * 4;
- assert(UseCompressedClassPointers || offset() - start_offset == ic_cmp_size, "check alignment in emit_method_entry");
+ cmp_klass(receiver, iCache, rscratch1);
}
-void C1_MacroAssembler::build_frame(int frame_size_in_bytes) {
+void C1_MacroAssembler::build_frame(int framesize) {
// If we have to make this method not-entrant we'll overwrite its
// first instruction with a jump. For this action to be legal we
// must ensure that this first instruction is a B, BL, NOP, BKPT,
@@ -428,18 +417,15 @@
nop();
// Make sure there is enough stack space for this method's activation.
// Note that we do this before doing an enter().
- generate_stack_overflow_check(frame_size_in_bytes);
- enter();
- sub(sp, sp, frame_size_in_bytes); // does not emit code for frame_size == 0
+ generate_stack_overflow_check(framesize);
+ MacroAssembler::build_frame(framesize + 2 * wordSize);
if (NotifySimulator) {
notify(Assembler::method_entry);
}
}
-
-void C1_MacroAssembler::remove_frame(int frame_size_in_bytes) {
- add(sp, sp, frame_size_in_bytes); // Does not emit code for frame_size == 0
- ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
+void C1_MacroAssembler::remove_frame(int framesize) {
+ MacroAssembler::remove_frame(framesize + 2 * wordSize);
if (NotifySimulator) {
notify(Assembler::method_reentry);
}
diff -r 8d1201669a7a src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp Thu Jun 19 16:02:36 2014 +0100
+++ b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp Fri Jun 20 09:15:38 2014 -0400
@@ -102,20 +102,6 @@
int rsp_offset() const { return _rsp_offset; }
void set_rsp_offset(int n) { _rsp_offset = n; }
- // Note: NEVER push values directly, but only through following push_xxx functions;
- // This helps us to track the rsp changes compared to the entry rsp (->_rsp_offset)
-
- void push_jint (jint i) { Unimplemented(); }
- void push_oop (jobject o) { Unimplemented(); }
- // Seems to always be in wordSize
- void push_addr (Address a) { Unimplemented(); }
- void push_reg (Register r) { Unimplemented(); }
- void pop_reg (Register r) { Unimplemented(); }
-
- void dec_stack (int nof_words) { Unimplemented(); }
-
- void dec_stack_after_call (int nof_words) { Unimplemented(); }
-
void invalidate_registers(bool inv_r0, bool inv_r19, bool inv_r2, bool inv_r3, bool inv_r4, bool inv_r5) PRODUCT_RETURN;
#endif // CPU_AARCH64_VM_C1_MACROASSEMBLER_AARCH64_HPP
diff -r 8d1201669a7a src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Thu Jun 19 16:02:36 2014 +0100
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Fri Jun 20 09:15:38 2014 -0400
@@ -1842,8 +1842,12 @@
void MacroAssembler::reinit_heapbase()
{
if (UseCompressedOops) {
- lea(rheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
- ldr(rheapbase, Address(rheapbase));
+ if (Universe::is_fully_initialized()) {
+ mov(rheapbase, Universe::narrow_ptrs_base());
+ } else {
+ lea(rheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
+ ldr(rheapbase, Address(rheapbase));
+ }
}
}
@@ -3313,3 +3317,39 @@
return UseAcqRelForVolatileFields;
#endif
}
+
+void MacroAssembler::build_frame(int framesize) {
+ if (framesize == 0) {
+ // Is this even possible?
+ stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
+ } else if (framesize < ((1 << 9) + 2 * wordSize)) {
+ sub(sp, sp, framesize);
+ stp(rfp, lr, Address(sp, framesize - 2 * wordSize));
+ } else {
+ stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
+ if (framesize < ((1 << 12) + 2 * wordSize))
+ sub(sp, sp, framesize - 2 * wordSize);
+ else {
+ mov(rscratch1, framesize - 2 * wordSize);
+ sub(sp, sp, rscratch1);
+ }
+ }
+}
+
+void MacroAssembler::remove_frame(int framesize) {
+ if (framesize == 0) {
+ ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
+ } else if (framesize < ((1 << 9) + 2 * wordSize)) {
+ ldp(rfp, lr, Address(sp, framesize - 2 * wordSize));
+ add(sp, sp, framesize);
+ } else {
+ if (framesize < ((1 << 12) + 2 * wordSize))
+ add(sp, sp, framesize - 2 * wordSize);
+ else {
+ mov(rscratch1, framesize - 2 * wordSize);
+ add(sp, sp, rscratch1);
+ }
+ ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
+ }
+}
+
diff -r 8d1201669a7a src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Thu Jun 19 16:02:36 2014 +0100
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Fri Jun 20 09:15:38 2014 -0400
@@ -151,6 +151,10 @@
strw(scratch, a);
}
+ // Frame creation and destruction shared between JITs.
+ void build_frame(int framesize);
+ void remove_frame(int framesize);
+
virtual void _call_Unimplemented(address call_site) {
mov(rscratch2, call_site);
haltsim();
diff -r 8d1201669a7a src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp
--- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Thu Jun 19 16:02:36 2014 +0100
+++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Fri Jun 20 09:15:38 2014 -0400
@@ -1529,8 +1529,7 @@
assert_different_registers(ic_reg, receiver, rscratch1);
__ verify_oop(receiver);
- __ load_klass(rscratch1, receiver);
- __ cmp(ic_reg, rscratch1);
+ __ cmp_klass(receiver, ic_reg, rscratch1);
__ br(Assembler::EQ, hit);
__ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
More information about the aarch64-port-dev
mailing list