[aarch64-port-dev ] Use 2- and 3-instruction immediate form of movoop and mov_metadata in C2-generated code

Andrew Haley aph at redhat.com
Thu Jun 19 14:45:28 UTC 2014


Now that we have a three-instruction version of load address
constant, use it in C2.

Also fix a couple of unrelated bugs that were uncovered during
testing.

Also use lea(reg, Address) rather than mov(reg, Address); it was
unnecessarily confusing.

Andrew.



# HG changeset patch
# User aph
# Date 1403014483 14400
#      Tue Jun 17 10:14:43 2014 -0400
# Node ID 745e0357529b3a046af1bcb56f1493a94657b924
# Parent  f6b18d9a37d99880e982db1ec70f68d1e17fa8c3
Use 2- and 3-instruction immediate form of movoop and mov_metadata in C2-generated code.
Fix patching code to handle 2- and 3-word forms.
Fix offset out of range bug in frame generation.
Use lea (rather than mov) in mov(reg, Address) form.

diff -r f6b18d9a37d9 -r 745e0357529b src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad	Tue Jun 17 09:30:17 2014 -0400
+++ b/src/cpu/aarch64/vm/aarch64.ad	Tue Jun 17 10:14:43 2014 -0400
@@ -962,7 +962,7 @@
   if (framesize == 0) {
     // Is this even possible?
     st->print("stp  lr, rfp, [sp, #%d]!", -(2 * wordSize));
-  } else if (framesize < (1 << 12)) {
+  } else if (framesize < (1 << 7)) {
     st->print("sub  sp, sp, #%d\n\t", framesize);
     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
   } else {
@@ -991,7 +991,7 @@
   if (framesize == 0) {
     // Is this even possible?
     __ stp(rfp, lr, Address(__ pre(sp, -2 * wordSize)));
-  } else if (framesize < (1 << 12)) {
+  } else if (framesize < (1 << 7)) {
     __ sub(sp, sp, framesize);
     __ stp(rfp, lr, Address(sp, framesize - 2 * wordSize));
   } else {
@@ -1040,7 +1040,7 @@

   if (framesize == 0) {
     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
-  } else if (framesize < (1 << 12)) {
+  } else if (framesize < (1 << 7)) {
     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
     st->print("add  sp, sp, #%d\n\t", framesize);
   } else {
@@ -1064,7 +1064,7 @@

   if (framesize == 0) {
     __ ldp(rfp, lr, Address(__ post(sp, 2 * wordSize)));
-  } else if (framesize < (1 << 12)) {
+  } else if (framesize < (1 << 7)) {
     __ ldp(rfp, lr, Address(sp, framesize - 2 * wordSize));
     __ add(sp, sp, framesize);
   } else {
@@ -2143,7 +2143,7 @@
     int call_offset = (nargs + 11) * 4;
     int field_offset = in_bytes(JavaThread::frame_anchor_offset()) +
                        in_bytes(JavaFrameAnchor::last_Java_pc_offset());
-    __ mov(rscratch1, InternalAddress(pc + call_offset));
+    __ lea(rscratch1, InternalAddress(pc + call_offset));
     __ str(rscratch1, Address(rthread, field_offset));
   %}

@@ -2570,9 +2570,9 @@
     } else {
       relocInfo::relocType rtype = $src->constant_reloc();
       if (rtype == relocInfo::oop_type) {
-        __ movoop(dst_reg, (jobject)con);
+        __ movoop(dst_reg, (jobject)con, /*mt_safe*/false);
       } else if (rtype == relocInfo::metadata_type) {
-        __ mov_metadata(dst_reg, (Metadata*)con);
+        __ mov_metadata(dst_reg, (Metadata*)con, /*mt_safe*/false);
       } else {
         assert(rtype == relocInfo::none, "unexpected reloc type");
 	if (con < (address)(uintptr_t)os::vm_page_size()) {
@@ -2625,7 +2625,7 @@
     } else {
       relocInfo::relocType rtype = $src->constant_reloc();
       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
-      __ set_narrow_oop(dst_reg, (jobject)con);
+      __ set_narrow_oop(dst_reg, (jobject)con, /*mt_safe*/false);
     }
   %}

@@ -2644,7 +2644,7 @@
     } else {
       relocInfo::relocType rtype = $src->constant_reloc();
       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
-      __ set_narrow_klass(dst_reg, (Klass *)con);
+      __ set_narrow_klass(dst_reg, (Klass *)con, /*mt_safe*/false);
     }
   %}

@@ -2912,7 +2912,7 @@
       int fpcnt;
       int rtype;
       getCallInfo(tf(), gpcnt, fpcnt, rtype);
-      __ mov(rscratch1, RuntimeAddress(entry));
+      __ lea(rscratch1, RuntimeAddress(entry));
       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
     }
   %}
diff -r f6b18d9a37d9 -r 745e0357529b src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp
--- a/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp	Tue Jun 17 09:30:17 2014 -0400
+++ b/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp	Tue Jun 17 10:14:43 2014 -0400
@@ -63,7 +63,7 @@
   set_last_Java_frame(sp, rfp, retaddr, rscratch1);

   // do the call
-  mov(rscratch1, RuntimeAddress(entry));
+  lea(rscratch1, RuntimeAddress(entry));
   blrt(rscratch1, args_size + 1, 8, 1);
   bind(retaddr);
   int call_offset = offset();
@@ -553,7 +553,7 @@
   Label retaddr;
   __ set_last_Java_frame(sp, rfp, retaddr, rscratch1);
   // do the call
-  __ mov(rscratch1, RuntimeAddress(target));
+  __ lea(rscratch1, RuntimeAddress(target));
   __ blrt(rscratch1, 1, 0, 1);
   __ bind(retaddr);
   OopMapSet* oop_maps = new OopMapSet();
diff -r f6b18d9a37d9 -r 745e0357529b src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Tue Jun 17 09:30:17 2014 -0400
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Tue Jun 17 10:14:43 2014 -0400
@@ -137,14 +137,15 @@
     offset >>= 2;
     Instruction_aarch64::spatch(branch, 23, 5, offset);
     Instruction_aarch64::patch(branch, 30, 29, offset_lo);
-  } else if (Instruction_aarch64::extract(insn, 31, 23) == 0b110100101) {
+  } else if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010100) {
+    u_int64_t dest = (u_int64_t)target;
     // Move wide constant
-    u_int64_t dest = (u_int64_t)target;
     assert(nativeInstruction_at(branch+4)->is_movk(), "wrong insns in patch");
     assert(nativeInstruction_at(branch+8)->is_movk(), "wrong insns in patch");
     Instruction_aarch64::patch(branch, 20, 5, dest & 0xffff);
-    Instruction_aarch64::patch(branch += 4, 20, 5, (dest >>= 16) & 0xffff);
-    Instruction_aarch64::patch(branch += 4, 20, 5, (dest >>= 16) & 0xffff);
+    Instruction_aarch64::patch(branch+4, 20, 5, (dest >>= 16) & 0xffff);
+    Instruction_aarch64::patch(branch+8, 20, 5, (dest >>= 16) & 0xffff);
+    assert(pd_call_destination(branch) == target, "should be");
   } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 &&
              Instruction_aarch64::extract(insn, 4, 0) == 0b11111) {
     // nothing to do
@@ -154,6 +155,19 @@
   }
 }

+void MacroAssembler::patch_oop(address insn_addr, address o) {
+  unsigned insn = *(unsigned*)insn_addr;
+  if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010101) {
+      // Move narrow constant
+      assert(nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch");
+      narrowOop n = oopDesc::encode_heap_oop((oop)o);
+      Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16);
+      Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff);
+  } else {
+    pd_patch_instruction(insn_addr, o);
+  }
+}
+
 address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) {
   long offset = 0;
   if ((Instruction_aarch64::extract(insn, 29, 24) & 0b011011) == 0b00011000) {
@@ -218,8 +232,8 @@
       ShouldNotReachHere();
     }
   } else if (Instruction_aarch64::extract(insn, 31, 23) == 0b110100101) {
-    // Move address constant: movz, movk, movk.  See movptr().
     u_int32_t *insns = (u_int32_t *)insn_addr;
+    // Move wide constant: movz, movk, movk.  See movptr().
     assert(nativeInstruction_at(insns+1)->is_movk(), "wrong insns in patch");
     assert(nativeInstruction_at(insns+2)->is_movk(), "wrong insns in patch");
     return address(u_int64_t(Instruction_aarch64::extract(insns[0], 20, 5))
@@ -2520,29 +2534,33 @@
   decode_klass_not_null(r, r);
 }

-// TODO
-//
-// these next two methods load a narrow oop or klass constant into a
-// register. they currently do the dumb thing of installing 64 bits of
-// unencoded constant into the register and then encoding it.
-// installing the encoded 32 bit constant directly requires updating
-// the relocation code so it can recognize that this is a 32 bit load
-// rather than a 64 bit load.
-
-void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
-  assert (UseCompressedOops, "should only be used for compressed headers");
+void  MacroAssembler::set_narrow_oop(Register dst, jobject obj, bool mt_safe) {
+  assert (UseCompressedOops, "should only be used for compressed oops");
   assert (Universe::heap() != NULL, "java heap should be initialized");
   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
-  movoop(dst, obj);
-  encode_heap_oop_not_null(dst);
+
+  int oop_index = oop_recorder()->find_index(obj);
+  assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
+
+  InstructionMark im(this);
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+  code_section()->relocate(inst_mark(), rspec);
+  movz(dst, 0xDEAD, 16);
+  movk(dst, 0xBEEF);
 }

-
-void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
+void  MacroAssembler::set_narrow_klass(Register dst, Klass* k, bool mt_safe) {
   assert (UseCompressedClassPointers, "should only be used for compressed headers");
   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
-  mov_metadata(dst, k);
-  encode_klass_not_null(dst);
+  int index = oop_recorder()->find_index(k);
+  assert(! Universe::heap()->is_in_reserved(k), "should not be an oop");
+
+  InstructionMark im(this);
+  RelocationHolder rspec = metadata_Relocation::spec(index);
+  code_section()->relocate(inst_mark(), rspec);
+  narrowKlass nk = Klass::encode_klass(k);
+  movz(dst, (nk >> 16), 16);
+  movk(dst, nk & 0xffff);
 }

 void MacroAssembler::load_heap_oop(Register dst, Address src)
@@ -2552,7 +2570,7 @@
     decode_heap_oop(dst);
   } else {
     ldr(dst, src);
-  }
+  }
 }

 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src)
@@ -2764,7 +2782,11 @@
   return Address((address)obj, rspec);
 }

-void MacroAssembler::movoop(Register dst, jobject obj) {
+// Move an oop into a register.  mt_safe is true iff we are not going
+// to patch this instruction while the code is being executed by
+// another thread.  In that case we can use move immediates rather
+// than the constant pool.
+void MacroAssembler::movoop(Register dst, jobject obj, bool mt_safe) {
   int oop_index;
   if (obj == NULL) {
     oop_index = oop_recorder()->allocate_oop_index(obj);
@@ -2773,7 +2795,7 @@
     assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
   }
   RelocationHolder rspec = oop_Relocation::spec(oop_index);
-  address const_ptr = long_constant((jlong)obj);
+  address const_ptr = mt_safe ? long_constant((jlong)obj) : NULL;
   if (! const_ptr) {
     mov(dst, Address((address)obj, rspec));
   } else {
@@ -2782,7 +2804,8 @@
   }
 }

-void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
+// Move a metadata address into a register.
+void MacroAssembler::mov_metadata(Register dst, Metadata* obj, bool mt_safe) {
   int oop_index;
   if (obj == NULL) {
     oop_index = oop_recorder()->allocate_metadata_index(obj);
@@ -2790,7 +2813,7 @@
     oop_index = oop_recorder()->find_index(obj);
   }
   RelocationHolder rspec = metadata_Relocation::spec(oop_index);
-  address const_ptr = long_constant((jlong)obj);
+  address const_ptr = mt_safe ? long_constant((jlong)obj) : NULL;
   if (! const_ptr) {
     mov(dst, Address((address)obj, rspec));
   } else {
diff -r f6b18d9a37d9 -r 745e0357529b src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Tue Jun 17 09:30:17 2014 -0400
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Tue Jun 17 10:14:43 2014 -0400
@@ -406,6 +406,8 @@
   int push(unsigned int bitset, Register stack);
   int pop(unsigned int bitset, Register stack);

+  void mov(Register dst, Address a);
+
 public:
   int push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); }
   int pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); }
@@ -438,7 +440,6 @@
     mov(dst, (long)i);
   }

-  void mov(Register dst, Address a);
   void movptr(Register r, uintptr_t imm64);

   // macro instructions for accessing and updating floating point
@@ -493,6 +494,8 @@
   static void pd_print_patched_instruction(address branch);
 #endif

+  static void patch_oop(address insn_addr, address o);
+
   // The following 4 methods return the offset of the appropriate move instruction

   // Support for fast byte/short loading with zero extension (depending on particular CPU)
@@ -734,7 +737,7 @@
   void encode_heap_oop_not_null(Register dst, Register src);
   void decode_heap_oop_not_null(Register dst, Register src);

-  void set_narrow_oop(Register dst, jobject obj);
+  void set_narrow_oop(Register dst, jobject obj, bool mt_safe = true);
   // currently unimplemented
 #if 0
   void set_narrow_oop(Address dst, jobject obj);
@@ -747,7 +750,7 @@
   void encode_klass_not_null(Register dst, Register src);
   void decode_klass_not_null(Register dst, Register src);

-  void set_narrow_klass(Register dst, Klass* k);
+  void set_narrow_klass(Register dst, Klass* k, bool mt_safe = true);
   // currently unimplemented
 #if 0
   void set_narrow_klass(Address dst, Klass* k);
@@ -1103,7 +1106,7 @@

   // Data

-  void mov_metadata(Register dst, Metadata* obj);
+  void mov_metadata(Register dst, Metadata* obj, bool mt_safe = true);
   Address allocate_metadata_address(Metadata* obj);
   Address constant_oop_address(jobject obj);
   // unimplemented
@@ -1111,7 +1114,7 @@
   void pushoop(jobject obj);
 #endif

-  void movoop(Register dst, jobject obj);
+  void movoop(Register dst, jobject obj, bool mt_safe = true);

   // sign extend as need a l to ptr sized element
   void movl2ptr(Register dst, Address src) { Unimplemented(); }
diff -r f6b18d9a37d9 -r 745e0357529b src/cpu/aarch64/vm/relocInfo_aarch64.cpp
--- a/src/cpu/aarch64/vm/relocInfo_aarch64.cpp	Tue Jun 17 09:30:17 2014 -0400
+++ b/src/cpu/aarch64/vm/relocInfo_aarch64.cpp	Tue Jun 17 10:14:43 2014 -0400
@@ -33,10 +33,15 @@


 void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
-  MacroAssembler::pd_patch_instruction(addr(), x);
+  switch(type()) {
+  case relocInfo::oop_type:
+    MacroAssembler::patch_oop(addr(), x);
+    break;
+  default:
+    MacroAssembler::pd_patch_instruction(addr(), x);
+  }
 }

-
 address Relocation::pd_call_destination(address orig_addr) {
   if (orig_addr != NULL) {
     return MacroAssembler::pd_call_destination(orig_addr);
diff -r f6b18d9a37d9 -r 745e0357529b src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp
--- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Tue Jun 17 09:30:17 2014 -0400
+++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Tue Jun 17 10:14:43 2014 -0400
@@ -316,7 +316,7 @@

   __ mov(c_rarg0, rmethod);
   __ mov(c_rarg1, lr);
-  __ mov(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
+  __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
   __ blrt(rscratch1, 2, 0, 0);

   __ pop_CPU_state();
@@ -1168,7 +1168,7 @@
   } else {
     assert((unsigned)gpargs < 256, "eek!");
     assert((unsigned)fpargs < 32, "eek!");
-    __ mov(rscratch1, RuntimeAddress(dest));
+    __ lea(rscratch1, RuntimeAddress(dest));
     __ mov(rscratch2, (gpargs << 6) | (fpargs << 2) | type);
     __ blrt(rscratch1, rscratch2);
     // __ blrt(rscratch1, gpargs, fpargs, type);
@@ -1965,9 +1965,9 @@
   assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
 #endif
     if (!is_critical_native) {
-      __ mov(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
+      __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
     } else {
-      __ mov(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
+      __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
     }
     __ blrt(rscratch1, 1, 0, 1);
     // Restore any method result value
@@ -2388,7 +2388,7 @@
   }
 #endif // ASSERT
   __ mov(c_rarg0, rthread);
-  __ mov(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
+  __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
   __ blrt(rscratch1, 1, 0, 1);
   __ bind(retaddr);

@@ -2518,7 +2518,7 @@

   __ mov(c_rarg0, rthread);
   __ movw(c_rarg1, rcpool); // second arg: exec_mode
-  __ mov(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
+  __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
   __ blrt(rscratch1, 2, 0, 0);

   // Set an oopmap for the call site
@@ -2871,7 +2871,7 @@
     __ set_last_Java_frame(sp, noreg, retaddr, rscratch1);

     __ mov(c_rarg0, rthread);
-    __ mov(rscratch1, RuntimeAddress(destination));
+    __ lea(rscratch1, RuntimeAddress(destination));

     __ blrt(rscratch1, 1, 0, 1);
     __ bind(retaddr);
diff -r f6b18d9a37d9 -r 745e0357529b src/cpu/aarch64/vm/templateTable_aarch64.cpp
--- a/src/cpu/aarch64/vm/templateTable_aarch64.cpp	Tue Jun 17 09:30:17 2014 -0400
+++ b/src/cpu/aarch64/vm/templateTable_aarch64.cpp	Tue Jun 17 10:14:43 2014 -0400
@@ -2431,7 +2431,7 @@
     // we take the time to call into the VM.
     Label L1;
     assert_different_registers(cache, index, r0);
-    __ mov(rscratch1, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
+    __ lea(rscratch1, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
     __ ldrw(r0, Address(rscratch1));
     __ cbz(r0, L1);



More information about the aarch64-port-dev mailing list