/hg/icedtea7-forest/hotspot: 6 new changesets

enevill at icedtea.classpath.org enevill at icedtea.classpath.org
Wed Jun 8 08:29:58 UTC 2016


changeset f904e6fab2a4 in /hg/icedtea7-forest/hotspot
details: http://icedtea.classpath.org/hg/icedtea7-forest/hotspot?cmd=changeset;node=f904e6fab2a4
author: aph
date: Wed Apr 20 11:14:10 2016 +0000

	8154739: AArch64: TemplateTable::fast_xaccess loads in wrong mode
	Reviewed-by: roland


changeset 42f564f222fb in /hg/icedtea7-forest/hotspot
details: http://icedtea.classpath.org/hg/icedtea7-forest/hotspot?cmd=changeset;node=42f564f222fb
author: aph
date: Mon May 23 15:39:13 2016 +0000

	8150045: arraycopy causes segfaults in SATB during garbage collection
	Reviewed-by: roland


changeset 8a31bb24de0f in /hg/icedtea7-forest/hotspot
details: http://icedtea.classpath.org/hg/icedtea7-forest/hotspot?cmd=changeset;node=8a31bb24de0f
author: enevill
date: Thu Feb 04 16:24:28 2016 +0000

	8148783: aarch64: SEGV running SpecJBB2013
	Summary: Fix calculation of offset for adrp
	Reviewed-by: aph


changeset fbd4d25cd06f in /hg/icedtea7-forest/hotspot
details: http://icedtea.classpath.org/hg/icedtea7-forest/hotspot?cmd=changeset;node=fbd4d25cd06f
author: enevill
date: Wed Feb 03 11:34:12 2016 +0000

	8148948: aarch64: generate_copy_longs calls align() incorrectly
	Summary: Fix alignments
	Reviewed-by: aph


changeset ff7c30cebd52 in /hg/icedtea7-forest/hotspot
details: http://icedtea.classpath.org/hg/icedtea7-forest/hotspot?cmd=changeset;node=ff7c30cebd52
author: fyang
date: Wed Jan 27 12:20:53 2016 +0800

	8148328: aarch64: redundant lsr instructions in stub code.
	Summary: avoid redundant lsr instructions in jbyte_arraycopy and jbyte_disjoint_arraycopy.
	Reviewed-by: aph
	Contributed-by: felix.yang at linaro.org


changeset 9a209fde3800 in /hg/icedtea7-forest/hotspot
details: http://icedtea.classpath.org/hg/icedtea7-forest/hotspot?cmd=changeset;node=9a209fde3800
author: fyang
date: Fri May 27 20:38:38 2016 +0800

	8157906: aarch64: some more integer rotate instructions are never emitted
	Summary: fix wrong definition of source operand of left rotate instructions
	Reviewed-by: aph
	Contributed-by: teng.lu at linaro.org


diffstat:

 src/cpu/aarch64/vm/aarch64.ad                |  14 +++++-----
 src/cpu/aarch64/vm/assembler_aarch64.cpp     |  16 +++++++----
 src/cpu/aarch64/vm/globals_aarch64.hpp       |   4 --
 src/cpu/aarch64/vm/stubGenerator_aarch64.cpp |  39 ++++++++++++++++-----------
 src/cpu/aarch64/vm/templateTable_aarch64.cpp |   4 +-
 5 files changed, 42 insertions(+), 35 deletions(-)

diffs (235 lines):

diff -r 2d8e12787f80 -r 9a209fde3800 src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad	Tue Apr 19 19:52:39 2016 -0700
+++ b/src/cpu/aarch64/vm/aarch64.ad	Fri May 27 20:38:38 2016 +0800
@@ -9090,21 +9090,21 @@
   %}
 %}
 
-instruct rolI_rReg_Var_C_32(iRegLNoSp dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr)
+instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
 %{
   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
 
   expand %{
-    rolL_rReg(dst, src, shift, cr);
-  %}
-%}
-
-instruct rolI_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
+    rolI_rReg(dst, src, shift, cr);
+  %}
+%}
+
+instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
 %{
   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
 
   expand %{
-    rolL_rReg(dst, src, shift, cr);
+    rolI_rReg(dst, src, shift, cr);
   %}
 %}
 
diff -r 2d8e12787f80 -r 9a209fde3800 src/cpu/aarch64/vm/assembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/assembler_aarch64.cpp	Tue Apr 19 19:52:39 2016 -0700
+++ b/src/cpu/aarch64/vm/assembler_aarch64.cpp	Fri May 27 20:38:38 2016 +0800
@@ -1632,7 +1632,10 @@
                      Instruction_aarch64::extract(insn2, 4, 0)) {
         // movk #imm16<<32
         Instruction_aarch64::patch(branch + 4, 20, 5, (uint64_t)target >> 32);
-        offset &= (1<<20)-1;
+        long dest = ((long)target & 0xffffffffL) | ((long)branch & 0xffff00000000L);
+        long pc_page = (long)branch >> 12;
+        long adr_page = (long)dest >> 12;
+        offset = adr_page - pc_page;
         instructions = 2;
       }
     }
@@ -4920,11 +4923,12 @@
   if (offset_high >= -(1<<20) && offset_low < (1<<20)) {
     _adrp(reg1, dest.target());
   } else {
-    unsigned long pc_page = (unsigned long)pc() >> 12;
-    long offset = dest_page - pc_page;
-    offset = (offset & ((1<<20)-1)) << 12;
-    _adrp(reg1, pc()+offset);
-    movk(reg1, ((unsigned long)dest.target() >> 32), 32);
+    unsigned long target = (unsigned long)dest.target();
+    unsigned long adrp_target
+      = (target & 0xffffffffUL) | ((unsigned long)pc() & 0xffff00000000UL);
+           
+    _adrp(reg1, (address)adrp_target);
+    movk(reg1, target >> 32, 32);
   }
   byte_offset = (unsigned long)dest.target() & 0xfff;
 }
diff -r 2d8e12787f80 -r 9a209fde3800 src/cpu/aarch64/vm/globals_aarch64.hpp
--- a/src/cpu/aarch64/vm/globals_aarch64.hpp	Tue Apr 19 19:52:39 2016 -0700
+++ b/src/cpu/aarch64/vm/globals_aarch64.hpp	Fri May 27 20:38:38 2016 +0800
@@ -48,11 +48,7 @@
 // the the vep is aligned at CodeEntryAlignment whereas c2 only aligns
 // the uep and the vep doesn't get real alignment but just slops on by
 // only assured that the entry instruction meets the 5 byte size requirement.
-#ifdef COMPILER2
 define_pd_global(intx, CodeEntryAlignment,       64);
-#else
-define_pd_global(intx, CodeEntryAlignment,       16);
-#endif // COMPILER2
 define_pd_global(intx, OptoLoopAlignment,        16);
 define_pd_global(intx, InlineFrequencyCount,     100);
 
diff -r 2d8e12787f80 -r 9a209fde3800 src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Tue Apr 19 19:52:39 2016 -0700
+++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Fri May 27 20:38:38 2016 +0800
@@ -687,7 +687,7 @@
   //     count   -  element count
   //     tmp     - scratch register
   //
-  //     Destroy no registers!
+  //     Destroy no registers except rscratch1 and rscratch2
   //
   void  gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
     BarrierSet* bs = Universe::heap()->barrier_set();
@@ -696,12 +696,13 @@
     case BarrierSet::G1SATBCTLogging:
       // With G1, don't generate the call if we statically know that the target in uninitialized
       if (!dest_uninitialized) {
-	__ push(RegSet::range(r0, r29), sp);         // integer registers except lr & sp
+	__ push_call_clobbered_registers();
 	if (count == c_rarg0) {
 	  if (addr == c_rarg1) {
 	    // exactly backwards!!
-	    __ stp(c_rarg0, c_rarg1, __ pre(sp, -2 * wordSize));
-	    __ ldp(c_rarg1, c_rarg0, __ post(sp, -2 * wordSize));
+            __ mov(rscratch1, c_rarg0);
+            __ mov(c_rarg0, c_rarg1);
+            __ mov(c_rarg1, rscratch1);
 	  } else {
 	    __ mov(c_rarg1, count);
 	    __ mov(c_rarg0, addr);
@@ -711,7 +712,7 @@
 	  __ mov(c_rarg1, count);
 	}
 	__ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
-	__ pop(RegSet::range(r0, r29), sp);         // integer registers except lr & sp        }
+	__ pop_call_clobbered_registers();
 	break;
       case BarrierSet::CardTableModRef:
       case BarrierSet::CardTableExtension:
@@ -742,7 +743,7 @@
       case BarrierSet::G1SATBCTLogging:
 
         {
-	  __ push(RegSet::range(r0, r29), sp);         // integer registers except lr & sp
+	  __ push_call_clobbered_registers();
           // must compute element count unless barrier set interface is changed (other platforms supply count)
           assert_different_registers(start, end, scratch);
           __ lea(scratch, Address(end, BytesPerHeapOop));
@@ -751,7 +752,7 @@
           __ mov(c_rarg0, start);
           __ mov(c_rarg1, scratch);
           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2);
-	  __ pop(RegSet::range(r0, r29), sp);         // integer registers except lr & sp        }
+	  __ pop_call_clobbered_registers();
         }
         break;
       case BarrierSet::CardTableModRef:
@@ -811,7 +812,7 @@
     assert_different_registers(s, d, count, rscratch1);
 
     Label again, large, small;
-    __ align(6);
+    __ align(CodeEntryAlignment);
     __ bind(start);
     __ cmp(count, 8);
     __ br(Assembler::LO, small);
@@ -856,7 +857,7 @@
 
     __ ret(lr);
 
-    __ align(6);
+    __ align(CodeEntryAlignment);
     __ bind(large);
 
     // Fill 8 registers
@@ -1007,7 +1008,8 @@
       }
       // rscratch2 is the byte adjustment needed to align s.
       __ cbz(rscratch2, aligned);
-      __ lsr(rscratch2, rscratch2, exact_log2(granularity));
+      int shift = exact_log2(granularity);
+      if (shift)  __ lsr(rscratch2, rscratch2, shift);
       __ sub(count, count, rscratch2);
 
 #if 0
@@ -1386,10 +1388,10 @@
   //   no-overlap entry point used by generate_conjoint_long_oop_copy().
   //
   address generate_disjoint_oop_copy(bool aligned, address *entry,
-				     const char *name, bool dest_uninitialized = false) {
+				     const char *name, bool dest_uninitialized) {
     const bool is_oop = true;
     const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
-    return generate_disjoint_copy(size, aligned, is_oop, entry, name);
+    return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized);
   }
 
   // Arguments:
@@ -1404,10 +1406,11 @@
   //
   address generate_conjoint_oop_copy(bool aligned,
 				     address nooverlap_target, address *entry,
-				     const char *name, bool dest_uninitialized = false) {
+				     const char *name, bool dest_uninitialized) {
     const bool is_oop = true;
     const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
-    return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry, name);
+    return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry,
+                                  name, dest_uninitialized);
   }
 
 
@@ -1514,6 +1517,8 @@
     }
 #endif //ASSERT
 
+    gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
+
     // save the original count
     __ mov(count_save, count);
 
@@ -1655,9 +1660,11 @@
       bool aligned = !UseCompressedOops;
 
       StubRoutines::_arrayof_oop_disjoint_arraycopy
-	= generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy");
+	= generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy",
+                                     /*dest_uninitialized*/false);
       StubRoutines::_arrayof_oop_arraycopy
-	= generate_conjoint_oop_copy(aligned, entry, &entry_oop_arraycopy, "arrayof_oop_arraycopy");
+	= generate_conjoint_oop_copy(aligned, entry, &entry_oop_arraycopy, "arrayof_oop_arraycopy",
+                                     /*dest_uninitialized*/false);
       // Aligned versions without pre-barriers
       StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit
 	= generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy_uninit",
diff -r 2d8e12787f80 -r 9a209fde3800 src/cpu/aarch64/vm/templateTable_aarch64.cpp
--- a/src/cpu/aarch64/vm/templateTable_aarch64.cpp	Tue Apr 19 19:52:39 2016 -0700
+++ b/src/cpu/aarch64/vm/templateTable_aarch64.cpp	Fri May 27 20:38:38 2016 +0800
@@ -3032,7 +3032,7 @@
   __ null_check(r0);
   switch (state) {
   case itos:
-    __ ldr(r0, Address(r0, r1, Address::lsl(0)));
+    __ ldrw(r0, Address(r0, r1, Address::lsl(0)));
     break;
   case atos:
     __ load_heap_oop(r0, Address(r0, r1, Address::lsl(0)));
@@ -3052,7 +3052,7 @@
     __ ldrw(r3, Address(r2, in_bytes(constantPoolCacheOopDesc::base_offset() +
 				     ConstantPoolCacheEntry::flags_offset())));
     __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
-    __ membar(MacroAssembler::LoadLoad);
+    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
     __ bind(notVolatile);
   }
 


More information about the distro-pkg-dev mailing list