/hg/icedtea7-forest/hotspot: Summary: Add support for large code...

enevill at icedtea.classpath.org enevill at icedtea.classpath.org
Fri Jan 8 20:04:14 UTC 2016


changeset f9b06d6bb411 in /hg/icedtea7-forest/hotspot
details: http://icedtea.classpath.org/hg/icedtea7-forest/hotspot?cmd=changeset;node=f9b06d6bb411
author: enevill
date: Thu Dec 17 15:07:46 2015 +0000

	Summary: Add support for large code cache
	Contributed-by: aph at redhat.com, edward.nevill at gmail.com


diffstat:

 src/cpu/aarch64/vm/aarch64.ad                      |   56 +++----
 src/cpu/aarch64/vm/assembler_aarch64.cpp           |  139 ++++++++++++++++++--
 src/cpu/aarch64/vm/assembler_aarch64.hpp           |   66 +++++++--
 src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp        |   28 ++--
 src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp     |   48 ++----
 src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp     |    8 +-
 src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp   |    4 +-
 src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp         |   14 +-
 src/cpu/aarch64/vm/globalDefinitions_aarch64.hpp   |    4 +
 src/cpu/aarch64/vm/globals_aarch64.hpp             |    7 +-
 src/cpu/aarch64/vm/icBuffer_aarch64.cpp            |   21 +-
 src/cpu/aarch64/vm/methodHandles_aarch64.cpp       |    2 +-
 src/cpu/aarch64/vm/nativeInst_aarch64.cpp          |  141 ++++++++++++++++----
 src/cpu/aarch64/vm/nativeInst_aarch64.hpp          |   74 ++++++++--
 src/cpu/aarch64/vm/relocInfo_aarch64.cpp           |   29 +++-
 src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp       |   14 +-
 src/cpu/aarch64/vm/stubGenerator_aarch64.cpp       |    2 +-
 src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp |    2 +-
 src/cpu/aarch64/vm/vm_version_aarch64.cpp          |    4 +
 src/cpu/aarch64/vm/vtableStubs_aarch64.cpp         |    2 +-
 src/os_cpu/linux_aarch64/vm/os_linux_aarch64.cpp   |    9 +-
 src/share/vm/code/compiledIC.cpp                   |    2 +-
 src/share/vm/runtime/arguments.cpp                 |    6 +-
 src/share/vm/utilities/globalDefinitions.hpp       |    5 +
 24 files changed, 490 insertions(+), 197 deletions(-)

diffs (truncated from 1576 to 500 lines):

diff -r eeb4a3ec4563 -r f9b06d6bb411 src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/aarch64.ad	Thu Dec 17 15:07:46 2015 +0000
@@ -775,16 +775,18 @@
 
 int MachCallRuntimeNode::ret_addr_offset() {
   // for generated stubs the call will be
-  //   bl(addr)
+  //   far_call(addr)
   // for real runtime callouts it iwll be
   //   mov(rscratch1, RuntimeAddress(addr)
   //   blrt rscratch1
   CodeBlob *cb = CodeCache::find_blob(_entry_point);
   if (cb) {
-    return 4;
+    return MacroAssembler::far_branch_size();
   } else {
     // A 48-bit address.  See movptr().
-    return 16;
+    // then a blrt
+    // return 16;
+    return 4 * NativeInstruction::instruction_size;
   }
 }
 
@@ -1361,7 +1363,7 @@
 uint size_java_to_interp()
 {
   // ob jdk7 we only need a mov oop and a branch
-  return 2 * NativeInstruction::instruction_size;
+  return 7 * NativeInstruction::instruction_size;
 }
 
 // Offset from start of compiled java to interpreter stub to the load
@@ -1395,7 +1397,8 @@
   // pool oop and GC overwrites the patch with movk/z 0x0000 again
   __ movoop(rmethod, (jobject) NULL);
   // This is recognized as unresolved by relocs/nativeinst/ic code
-  __ b(__ pc());
+  __ movptr(rscratch1, 0);
+  __ br(rscratch1);
 
   assert((__ offset() - offset) <= (int)size_java_to_interp(), "stub too big");
   // Update current stubs pointer and restore insts_end.
@@ -1433,13 +1436,12 @@
   // This is the unverified entry point.
   MacroAssembler _masm(&cbuf);
 
-  // no need to worry about 4-byte of br alignment on AArch64
   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
   Label skip;
   // TODO
   // can we avoid this skip and still use a reloc?
   __ br(Assembler::EQ, skip);
-  __ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
   __ bind(skip);
 }
 
@@ -1454,8 +1456,7 @@
 
 uint size_exception_handler()
 {
-  // count up to 4 movz/n/k instructions and one branch instruction
-  return 5 * NativeInstruction::instruction_size;
+  return MacroAssembler::far_branch_size();
 }
 
 // Emit exception handler code.
@@ -1470,7 +1471,7 @@
   __ start_a_stub(size_exception_handler());
   if (base == NULL)  return 0;  // CodeBuffer::expand failed
   int offset = __ offset();
-  __ b(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
+  __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
   __ end_a_stub();
   return offset;
@@ -1478,8 +1479,8 @@
 
 uint size_deopt_handler()
 {
-  // count one adr and one branch instruction
-  return 2 * NativeInstruction::instruction_size;
+  // count one adr and one far branch instruction
+  return NativeInstruction::instruction_size + MacroAssembler::far_branch_size();
 }
 
 // Emit deopt handler code.
@@ -1494,8 +1495,7 @@
   int offset = __ offset();
 
   __ adr(lr, __ pc());
-  // should we load this into rscratch1 and use a br?
-  __ b(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+  __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 
   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
   __ end_a_stub();
@@ -2802,11 +2802,11 @@
     address addr = (address)$meth$$method;
     if (!_method) {
       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
-      __ bl(Address(addr, relocInfo::runtime_call_type));
+      __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
     } else if (_optimized_virtual) {
-      __ bl(Address(addr, relocInfo::opt_virtual_call_type));
+      __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
     } else {
-      __ bl(Address(addr, relocInfo::static_call_type));
+      __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
     }
 
     if (_method) {
@@ -2818,22 +2818,19 @@
   enc_class aarch64_enc_java_handle_call(method meth) %{
     MacroAssembler _masm(&cbuf);
 
-    // TODO fixme
-    // this is supposed to preserve and restore SP around the call
-    // need to check it works
+    // RFP is preserved across all calls, even compiled calls.
+    // Use it to preserve SP.
     __ mov(rfp, sp);
 
     address mark = __ pc();
     address addr = (address)$meth$$method;
     if (!_method) {
-      // TODO check this
-      // think we are calling generated Java here not x86
       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
-      __ bl(Address(addr, relocInfo::runtime_call_type));
+      __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
     } else if (_optimized_virtual) {
-      __ bl(Address(addr, relocInfo::opt_virtual_call_type));
+      __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
     } else {
-      __ bl(Address(addr, relocInfo::static_call_type));
+      __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
     }
 
     if (_method) {
@@ -2848,10 +2845,7 @@
   enc_class aarch64_enc_java_dynamic_call(method meth) %{
     MacroAssembler _masm(&cbuf);
     address entry = (address)$meth$$method;
-    RelocationHolder rh = virtual_call_Relocation::spec(__ pc());
-    // we use movoop here as per emit_java_to_interp and c1's ic_call
-    __ movoop(rscratch2, (jobject)Universe::non_oop_word(), /*immediate*/true);
-    __ bl(Address(entry, rh));
+    __ ic_call(entry);
   %}
 
   enc_class aarch64_enc_call_epilog() %{
@@ -2872,7 +2866,7 @@
     address entry = (address)$meth$$method;
     CodeBlob *cb = CodeCache::find_blob(entry);
     if (cb) {
-      __ bl(Address(entry));
+      __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
     } else {
       int gpcnt;
       int fpcnt;
@@ -2885,7 +2879,7 @@
 
   enc_class aarch64_enc_rethrow() %{
     MacroAssembler _masm(&cbuf);
-    __ b(RuntimeAddress(OptoRuntime::rethrow_stub()));
+    __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
   %}
 
   enc_class aarch64_enc_ret() %{
diff -r eeb4a3ec4563 -r f9b06d6bb411 src/cpu/aarch64/vm/assembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/assembler_aarch64.cpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/assembler_aarch64.cpp	Thu Dec 17 15:07:46 2015 +0000
@@ -1369,7 +1369,6 @@
   if (L.is_bound()) {
     br(cc, target(L));
   } else {
-    InstructionMark im(this);
     L.add_patch_at(code(), locator());
     br(cc, pc());
   }
@@ -1380,7 +1379,6 @@
   if (L.is_bound()) {
     (this->*insn)(target(L));
   } else {
-    InstructionMark im(this);
     L.add_patch_at(code(), locator());
     (this->*insn)(pc());
   }
@@ -1391,7 +1389,6 @@
   if (L.is_bound()) {
     (this->*insn)(r, target(L));
   } else {
-    InstructionMark im(this);
     L.add_patch_at(code(), locator());
     (this->*insn)(r, pc());
   }
@@ -1402,7 +1399,6 @@
   if (L.is_bound()) {
     (this->*insn)(r, bitpos, target(L));
   } else {
-    InstructionMark im(this);
     L.add_patch_at(code(), locator());
     (this->*insn)(r, bitpos, pc());
   }
@@ -1412,7 +1408,6 @@
   if (L.is_bound()) {
     (this->*insn)(target(L), op);
   } else {
-    InstructionMark im(this);
     L.add_patch_at(code(), locator());
     (this->*insn)(pc(), op);
   }
@@ -1653,8 +1648,8 @@
     Instruction_aarch64::patch(branch, 20, 5, dest & 0xffff);
     Instruction_aarch64::patch(branch+4, 20, 5, (dest >>= 16) & 0xffff);
     Instruction_aarch64::patch(branch+8, 20, 5, (dest >>= 16) & 0xffff);
-    assert(pd_call_destination(branch) == target, "should be");
-    instructions = 2;
+    assert(target_addr_for_insn(branch) == target, "should be");
+    instructions = 3;
   } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 &&
              Instruction_aarch64::extract(insn, 4, 0) == 0b11111) {
     // nothing to do
@@ -1861,6 +1856,42 @@
   }
 }
 
+void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
+  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
+  assert(CodeCache::find_blob(entry.target()) != NULL,
+         "destination of far call not found in code cache");
+  if (far_branches()) {
+    unsigned long offset;
+    // We can use ADRP here because we know that the total size of
+    // the code cache cannot exceed 2Gb.
+    adrp(tmp, entry, offset);
+    add(tmp, tmp, offset);
+    if (cbuf) cbuf->set_insts_mark();
+    blr(tmp);
+  } else {
+    if (cbuf) cbuf->set_insts_mark();
+    bl(entry);
+  }
+}
+
+void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
+  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
+  assert(CodeCache::find_blob(entry.target()) != NULL,
+         "destination of far call not found in code cache");
+  if (far_branches()) {
+    unsigned long offset;
+    // We can use ADRP here because we know that the total size of
+    // the code cache cannot exceed 2Gb.
+    adrp(tmp, entry, offset);
+    add(tmp, tmp, offset);
+    if (cbuf) cbuf->set_insts_mark();
+    br(tmp);
+  } else {
+    if (cbuf) cbuf->set_insts_mark();
+    b(entry);
+  }
+}
+
 int MacroAssembler::biased_locking_enter(Register lock_reg,
                                          Register obj_reg,
                                          Register swap_reg,
@@ -2135,14 +2166,93 @@
   call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
 }
 
-void MacroAssembler::call(Address entry) {
-  if (true // reachable(entry)
-      ) {
-    bl(entry);
+// Maybe emit a call via a trampoline.  If the code cache is small
+// trampolines won't be emitted.
+
+void MacroAssembler::trampoline_call(Address entry, CodeBuffer *cbuf) {
+  assert(entry.rspec().type() == relocInfo::runtime_call_type
+         || entry.rspec().type() == relocInfo::opt_virtual_call_type
+         || entry.rspec().type() == relocInfo::static_call_type
+         || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
+
+  unsigned int start_offset = offset();
+#ifdef COMPILER2
+  if (far_branches() && !Compile::current()->in_scratch_emit_size()) {
+    emit_trampoline_stub(offset(), entry.target());
+  }
+#endif
+
+  if (cbuf) cbuf->set_insts_mark();
+  relocate(entry.rspec());
+#ifdef COMPILER2
+  if (!far_branches()) {
+    bl(entry.target());
   } else {
-    lea(rscratch1, entry);
-    blr(rscratch1);
+    bl(pc());
   }
+#else
+    bl(entry.target());
+#endif
+}
+
+
+// Emit a trampoline stub for a call to a target which is too far away.
+//
+// code sequences:
+//
+// call-site:
+//   branch-and-link to <destination> or <trampoline stub>
+//
+// Related trampoline stub for this call site in the stub section:
+//   load the call target from the constant pool
+//   branch (LR still points to the call site above)
+
+void MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
+                                             address dest) {
+#ifdef COMPILER2
+  address stub = start_a_stub(Compile::MAX_stubs_size/2);
+  if (stub == NULL) {
+    start_a_stub(Compile::MAX_stubs_size/2);
+    Compile::current()->env()->record_out_of_memory_failure();
+    return;
+  }
+
+  // Create a trampoline stub relocation which relates this trampoline stub
+  // with the call instruction at insts_call_instruction_offset in the
+  // instructions code-section.
+  align(wordSize);
+  relocate(trampoline_stub_Relocation::spec(code()->insts()->start()
+                                            + insts_call_instruction_offset));
+  const int stub_start_offset = offset();
+
+  // Now, create the trampoline stub's code:
+  // - load the call
+  // - call
+  Label target;
+  ldr(rscratch1, target);
+  br(rscratch1);
+  bind(target);
+  assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
+         "should be");
+  emit_long64((int64_t)dest);
+
+  const address stub_start_addr = addr_at(stub_start_offset);
+
+  assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
+
+  end_a_stub();
+#else
+  ShouldNotReachHere();
+#endif
+}
+
+void MacroAssembler::ic_call(address entry) {
+  RelocationHolder rh = virtual_call_Relocation::spec(pc());
+  // address const_ptr = long_constant((jlong)Universe::non_oop_word());
+  // unsigned long offset;
+  // ldr_constant(rscratch2, const_ptr);
+  movoop(rscratch2, (jobject)Universe::non_oop_word(), /*immediate*/true);
+  trampoline_call(Address(entry, rh));
 }
 
 // Implementation of call_VM versions
@@ -2806,8 +2916,7 @@
 // public methods
 
 void MacroAssembler::mov(Register r, Address dest) {
-  InstructionMark im(this);
-  code_section()->relocate(inst_mark(), dest.rspec());
+  code_section()->relocate(pc(), dest.rspec());
   u_int64_t imm64 = (u_int64_t)dest.target();
   movptr(r, imm64);
 }
diff -r eeb4a3ec4563 -r f9b06d6bb411 src/cpu/aarch64/vm/assembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp	Thu Dec 17 15:07:46 2015 +0000
@@ -845,16 +845,28 @@
 
 #undef INSN
 
+  // The maximum range of a branch is fixed for the AArch64
+  // architecture.  In debug mode we shrink it in order to test
+  // trampolines, but not so small that branches in the interpreter
+  // are out of range.
+  static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);
+
+  static bool reachable_from_branch_at(address branch, address target) {
+    return uabs(target - branch) < branch_range;
+  }
+
   // Unconditional branch (immediate)
-#define INSN(NAME, opcode)					\
-  void NAME(address dest) {					\
-    starti;							\
-    long offset = (dest - pc()) >> 2;				\
-    f(opcode, 31), f(0b00101, 30, 26), sf(offset, 25, 0);	\
-  }								\
-  void NAME(Label &L) {						\
-    wrap_label(L, &Assembler::NAME);				\
-  }								\
+
+#define INSN(NAME, opcode)                                              \
+  void NAME(address dest) {                                             \
+    starti;                                                             \
+    long offset = (dest - pc()) >> 2;                                   \
+    DEBUG_ONLY(assert(reachable_from_branch_at(pc(), dest), "debug only")); \
+    f(opcode, 31), f(0b00101, 30, 26), sf(offset, 25, 0);               \
+  }                                                                     \
+  void NAME(Label &L) {                                                 \
+    wrap_label(L, &Assembler::NAME);                                    \
+  }                                                                     \
   void NAME(const Address &dest);
 
   INSN(b, 0);
@@ -2741,6 +2753,10 @@
   static bool needs_explicit_null_check(intptr_t offset);
 
   static address target_addr_for_insn(address insn_addr, unsigned insn);
+  static address target_addr_for_insn(address insn_addr) {
+    unsigned insn = *(unsigned*)insn_addr;
+    return target_addr_for_insn(insn_addr, insn);
+  }
 
   // Required platform-specific helpers for Label::patch_instructions.
   // They _shadow_ the declarations in AbstractAssembler, which are undefined.
@@ -2749,8 +2765,7 @@
     pd_patch_instruction_size (branch, target);
   }
   static address pd_call_destination(address branch) {
-    unsigned insn = *(unsigned*)branch;
-    return target_addr_for_insn(branch, insn);
+    return target_addr_for_insn(branch);
   }
 #ifndef PRODUCT
   static void pd_print_patched_instruction(address branch);
@@ -2758,6 +2773,8 @@
 
   static int patch_oop(address insn_addr, address o);
 
+  void emit_trampoline_stub(int insts_call_instruction_offset, address target);
+
   // The following 4 methods return the offset of the appropriate move instruction
 
   // Support for fast byte/short loading with zero extension (depending on particular CPU)
@@ -3265,12 +3282,27 @@
 
   // Calls
 
-  // void call(Label& L, relocInfo::relocType rtype);
-
-  // NOTE: this call tranfers to the effective address of entry NOT
-  // the address contained by entry. This is because this is more natural
-  // for jumps/calls.
-  void call(Address entry);
+  void trampoline_call(Address entry, CodeBuffer *cbuf = NULL);
+
+  static bool far_branches() {
+    return ReservedCodeCacheSize > branch_range;
+  }
+
+  // Jumps that can reach anywhere in the code cache.
+  // Trashes tmp.
+  void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
+  void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
+
+  static int far_branch_size() {
+    if (far_branches()) {
+      return 3 * 4;  // adrp, add, br
+    } else {
+      return 4;
+    }
+  }
+
+  // Emit the CompiledIC call idiom
+  void ic_call(address entry);
 
   // Jumps
 
diff -r eeb4a3ec4563 -r f9b06d6bb411 src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp
--- a/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp	Tue Nov 24 09:02:26 2015 +0000
+++ b/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp	Thu Dec 17 15:07:46 2015 +0000
@@ -115,7 +115,7 @@
   __ bind(_entry);
   ce->store_parameter(_method->as_register(), 1);
   ce->store_parameter(_bci, 0);
-  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id)));
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id)));
   ce->add_call_info_here(_info);
   ce->verify_oop_map(_info);
   __ b(_continuation);
@@ -143,7 +143,7 @@
   } else {
     stub_id = Runtime1::throw_range_check_failed_id;
   }


More information about the distro-pkg-dev mailing list