[aarch64-port-dev ] hg/icedtea7-forest-aarch64/hotspot: 4 new changesets

Thu Nov 20 14:51:33 UTC 2014

[Forwarding bounced check-in message from adinn at icedtea.classpath.org]

------ This is a copy of the message, including all the headers. ------

Return-path: <adinn at icedtea.classpath.org>
Received: from localhost ([127.0.0.1] helo=icedtea.classpath.org)
	by icedtea.classpath.org with esmtp (Exim 4.69)
	(envelope-from <adinn at icedtea.classpath.org>)
	id 1XrSdA-0003aF-6n
	for aarch64-port-dev at openjdk.java.net; Thu, 20 Nov 2014 14:22:44 +0000
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Date: Thu, 20 Nov 2014 14:22:44 +0000
Subject: /hg/icedtea7-forest-aarch64/hotspot: 4 new changesets
From: adinn at icedtea.classpath.org
X-Hg-Notification: changeset fa42b2cc6671
Message-Id:
<hg.fa42b2cc6671.1416493364.-5017525213744097322 at icedtea.classpath.org>
To: aarch64-port-dev at openjdk.java.net

changeset fa42b2cc6671 in /hg/icedtea7-forest-aarch64/hotspot
details:
http://icedtea.classpath.org/hg/icedtea7-forest-aarch64/hotspot?cmd=changeset;node=fa42b2cc6671
author: adinn
date: Fri Nov 14 13:59:10 2014 +0000

	Define uabs().  Use it everywhere an absolute value is wanted.


changeset 175b337bf651 in /hg/icedtea7-forest-aarch64/hotspot
details:
http://icedtea.classpath.org/hg/icedtea7-forest-aarch64/hotspot?cmd=changeset;node=175b337bf651
author: adinn
date: Fri Nov 14 15:13:21 2014 +0000

	Miscellaneous bug fixes.
	Fix CountCompiledCalls.
	Implement MacroAssembler::delayed_value_impl.
	Fix MacroAssembler::incrementw and MacroAssembler::increment.
	Fix DebugVtables.
	Fix VtableStub::pd_code_size_limit.


changeset ff8f304c9be4 in /hg/icedtea7-forest-aarch64/hotspot
details:
http://icedtea.classpath.org/hg/icedtea7-forest-aarch64/hotspot?cmd=changeset;node=ff8f304c9be4
author: adinn
date: Fri Nov 14 16:44:37 2014 +0000

	Add CNEG and CNEGW to macro assembler.

	actually the above is just an enabling step for generating better code
	for C1_MacroAssembler::float_cmp. the important part of this fix is to
	fix a bug in the aarch64.ad negI rule. it needs to use negw rather
	than negsw.


changeset f83ab0b76d43 in /hg/icedtea7-forest-aarch64/hotspot
details:
http://icedtea.classpath.org/hg/icedtea7-forest-aarch64/hotspot?cmd=changeset;node=f83ab0b76d43
author: adinn
date: Thu Nov 20 11:08:51 2014 +0000

	Add frame anchor fences.


diffstat:

 src/cpu/aarch64/vm/aarch64.ad                  |   2 +-
 src/cpu/aarch64/vm/assembler_aarch64.cpp       |  34 +++++++++----
 src/cpu/aarch64/vm/assembler_aarch64.hpp       |  32 +++++++++++++-
 src/cpu/aarch64/vm/javaFrameAnchor_aarch64.hpp |  13 +----
 src/cpu/aarch64/vm/stubGenerator_aarch64.cpp   |   4 +-
 src/cpu/aarch64/vm/vtableStubs_aarch64.cpp     |  60
+++++++++++++++++++++----
 6 files changed, 108 insertions(+), 37 deletions(-)

diffs (312 lines):

diff -r 0d51d09ef718 -r f83ab0b76d43 src/cpu/aarch64/vm/aarch64.ad

--- a/src/cpu/aarch64/vm/aarch64.ad	Fri Nov 14 11:38:48 2014 +0000
+++ b/src/cpu/aarch64/vm/aarch64.ad	Thu Nov 20 11:08:51 2014 +0000
@@ -6947,7 +6947,7 @@
   format %{ "negw $dst, $src\t# int" %}

   ins_encode %{
-    __ negsw(as_Register($dst$$reg),
+    __ negw(as_Register($dst$$reg),
              as_Register($src$$reg));
   %}

diff -r 0d51d09ef718 -r f83ab0b76d43
src/cpu/aarch64/vm/assembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/assembler_aarch64.cpp	Fri Nov 14 11:38:48 2014
+0000
+++ b/src/cpu/aarch64/vm/assembler_aarch64.cpp	Thu Nov 20 11:08:51 2014
+0000
@@ -1464,7 +1464,7 @@

 bool Assembler::operand_valid_for_add_sub_immediate(long imm) {
   bool shift = false;
-  unsigned long uimm = labs(imm);
+  unsigned long uimm = uabs(imm);
   if (uimm < (1 << 12))
     return true;
   if (uimm < (1 << 24)
@@ -2246,15 +2246,27 @@
   while (offset() % modulus != 0) nop();
 }

-// these are meant to be no-ops overridden by InterpreterMacroAssembler
-
-void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
Unimplemented(); }
-
-void MacroAssembler::check_and_handle_popframe(Register java_thread) {
Unimplemented(); }
+// these are no-ops overridden by InterpreterMacroAssembler
+
+void MacroAssembler::check_and_handle_earlyret(Register java_thread) { }
+
+void MacroAssembler::check_and_handle_popframe(Register java_thread) { }

 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t*
delayed_value_addr,
                                                       Register tmp,
-                                                      int offset) {
Unimplemented(); return RegisterOrConstant(r0); }
+                                                      int offset) {
+  intptr_t value = *delayed_value_addr;
+  if (value != 0)
+    return RegisterOrConstant(value + offset);
+
+  // load indirectly to solve generation ordering problem
+  ldr(tmp, ExternalAddress((address) delayed_value_addr));
+
+  if (offset != 0)
+    add(tmp, tmp, offset);
+
+  return RegisterOrConstant(tmp);
+}

 void MacroAssembler:: notify(int type) {
   if (type == bytecode_start) {
@@ -3200,7 +3212,7 @@
   }
 }

-void MacroAssembler::increment(Address dst, int value)
+void MacroAssembler::incrementw(Address dst, int value)
 {
   assert(!dst.uses(rscratch1), "invalid dst for address increment");
   ldrw(rscratch1, dst);
@@ -3208,7 +3220,7 @@
   strw(rscratch1, dst);
 }

-void MacroAssembler::incrementw(Address dst, int value)
+void MacroAssembler::increment(Address dst, int value)
 {
   assert(!dst.uses(rscratch1), "invalid dst for address increment");
   ldr(rscratch1, dst);
@@ -3325,7 +3337,7 @@
   if (operand_valid_for_add_sub_immediate((int)imm)) {
     (this->*insn1)(Rd, Rn, imm);
   } else {
-    if (labs(imm) < (1 << 24)) {
+    if (uabs(imm) < (1 << 24)) {
        (this->*insn1)(Rd, Rn, imm & -(1 << 12));
        (this->*insn1)(Rd, Rd, imm & ((1 << 12)-1));
     } else {
@@ -4720,7 +4732,7 @@

 void MacroAssembler::adrp(Register reg1, const Address &dest, unsigned
long &byte_offset) {
   relocInfo::relocType rtype = dest.rspec().reloc()->type();
-  if (labs(pc() - dest.target()) >= (1LL << 32)) {
+  if (uabs(pc() - dest.target()) >= (1LL << 32)) {
     guarantee(rtype == relocInfo::none
 	      || rtype == relocInfo::external_word_type
 	      || rtype == relocInfo::poll_type
diff -r 0d51d09ef718 -r f83ab0b76d43
src/cpu/aarch64/vm/assembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp	Fri Nov 14 11:38:48 2014
+0000
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp	Thu Nov 20 11:08:51 2014
+0000
@@ -322,6 +322,29 @@
   enum operation { uxtb, uxth, uxtw, uxtx, sxtb, sxth, sxtw, sxtx };
 };

+// abs methods which cannot overflow and so are well-defined across
+// the entire domain of integer types.
+static inline unsigned int uabs(unsigned int n) {
+  union {
+    unsigned int result;
+    int value;
+  };
+  result = n;
+  if (value < 0) result = -result;
+  return result;
+}
+static inline unsigned long uabs(unsigned long n) {
+  union {
+    unsigned long result;
+    long value;
+  };
+  result = n;
+  if (value < 0) result = -result;
+  return result;
+}
+static inline unsigned long uabs(long n) { return uabs((unsigned long)n); }
+static inline unsigned long uabs(int n) { return uabs((unsigned int)n); }
+
 // Addressing modes
 class Address VALUE_OBJ_CLASS_SPEC {
  public:
@@ -548,7 +571,7 @@
   static bool offset_ok_for_immed(long offset, int shift = 0) {
     unsigned mask = (1 << shift) - 1;
     if (offset < 0 || offset & mask) {
-      return (abs(offset) < (1 << (20 - 12))); // Unscaled offset
+      return (uabs(offset) < (1 << (20 - 12))); // Unscaled offset
     } else {
       return ((offset >> shift) < (1 << (21 - 10 + 1))); // Scaled,
unsigned offset
     }
@@ -2390,6 +2413,13 @@
     csincw(Rd, zr, zr, ~cond);
   }

+  void cneg(Register Rd, Register Rn, Assembler::Condition cond) {
+    csneg(Rd, Rn, Rn, ~cond);
+  }
+  void cnegw(Register Rd, Register Rn, Assembler::Condition cond) {
+    csnegw(Rd, Rn, Rn, ~cond);
+  }
+
   inline void movw(Register Rd, Register Rn) {
     if (Rd == sp || Rn == sp) {
       addw(Rd, Rn, 0U);
diff -r 0d51d09ef718 -r f83ab0b76d43
src/cpu/aarch64/vm/javaFrameAnchor_aarch64.hpp
--- a/src/cpu/aarch64/vm/javaFrameAnchor_aarch64.hpp	Fri Nov 14 11:38:48
2014 +0000
+++ b/src/cpu/aarch64/vm/javaFrameAnchor_aarch64.hpp	Thu Nov 20 11:08:51
2014 +0000
@@ -42,25 +42,16 @@
   void clear(void) {
     // clearing _last_Java_sp must be first
     _last_Java_sp = NULL;
-    // fence?
+    OrderAccess::release();
     _last_Java_fp = NULL;
     _last_Java_pc = NULL;
   }

   void copy(JavaFrameAnchor* src) {
-    // In order to make sure the transition state is valid for "this"
-    // We must clear _last_Java_sp before copying the rest of the new data
-    //
-    // Hack Alert: Temporary bugfix for 4717480/4721647
-    // To act like previous version (pd_cache_state) don't NULL
_last_Java_sp
-    // unless the value is changing
-    //
-    if (_last_Java_sp != src->_last_Java_sp)
-      _last_Java_sp = NULL;
-
     _last_Java_fp = src->_last_Java_fp;
     _last_Java_pc = src->_last_Java_pc;
     // Must be last so profiler will always see valid frame if
has_last_frame() is true
+    OrderAccess::release();
     _last_Java_sp = src->_last_Java_sp;
   }

diff -r 0d51d09ef718 -r f83ab0b76d43
src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Fri Nov 14 11:38:48
2014 +0000
+++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Thu Nov 20 11:08:51
2014 +0000
@@ -917,7 +917,7 @@

   void copy_memory_small(Register s, Register d, Register count,
Register tmp, int step) {
     bool is_backwards = step < 0;
-    size_t granularity = abs(step);
+    size_t granularity = uabs(step);
     int direction = is_backwards ? -1 : 1;
     int unit = wordSize * direction;

@@ -973,7 +973,7 @@
 		   Register count, Register tmp, int step) {
     copy_direction direction = step < 0 ? copy_backwards : copy_forwards;
     bool is_backwards = step < 0;
-    int granularity = abs(step);
+    int granularity = uabs(step);
     const Register t0 = r3, t1 = r4;

     if (is_backwards) {
diff -r 0d51d09ef718 -r f83ab0b76d43
src/cpu/aarch64/vm/vtableStubs_aarch64.cpp
--- a/src/cpu/aarch64/vm/vtableStubs_aarch64.cpp	Fri Nov 14 11:38:48
2014 +0000
+++ b/src/cpu/aarch64/vm/vtableStubs_aarch64.cpp	Thu Nov 20 11:08:51
2014 +0000
@@ -58,7 +58,8 @@

 #ifndef PRODUCT
   if (CountCompiledCalls) {
-    __ increment(ExternalAddress((address)
SharedRuntime::nof_megamorphic_calls_addr()));
+    __ lea(r19, ExternalAddress((address)
SharedRuntime::nof_megamorphic_calls_addr()));
+    __ incrementw(Address(r19));
   }
 #endif

@@ -73,12 +74,14 @@
   if (DebugVtables) {
     Label L;
     // check offset vs vtable length
-    __ ldrw(rscratch1, Address(r0,
instanceKlass::vtable_length_offset() * wordSize));
+    __ ldrw(rscratch1, Address(r19,
instanceKlass::vtable_length_offset() * wordSize));
     __ cmpw(rscratch1, vtable_index * vtableEntry::size());
     __ br(Assembler::GT, L);
+    __ enter();
     __ mov(r2, vtable_index);
     __ call_VM(noreg,
                CAST_FROM_FN_PTR(address, bad_compiled_vtable_index),
j_rarg0, r2);
+    __ leave();
     __ bind(L);
   }
 #endif // PRODUCT
@@ -109,9 +112,6 @@
                   (int)(s->code_end() - __ pc()));
   }
   guarantee(__ pc() <= s->code_end(), "overflowed buffer");
-  // shut the door on sizing bugs
-  int slop = 3;  // 32-bit offset is this much larger than an 8-bit one
-  assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room
for 32-bit offset");

   s->set_exception_points(npe_addr, ame_addr);
   return s;
@@ -130,7 +130,8 @@

 #ifndef PRODUCT
   if (CountCompiledCalls) {
-    __ increment(ExternalAddress((address)
SharedRuntime::nof_megamorphic_calls_addr()));
+    __ lea(r10, ExternalAddress((address)
SharedRuntime::nof_megamorphic_calls_addr()));
+    __ incrementw(Address(r10));
   }
 #endif

@@ -190,9 +191,6 @@
                   (int)(s->code_end() - __ pc()));
   }
   guarantee(__ pc() <= s->code_end(), "overflowed buffer");
-  // shut the door on sizing bugs
-  int slop = 3;  // 32-bit offset is this much larger than an 8-bit one
-  assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room
for 32-bit offset");

   s->set_exception_points(npe_addr, ame_addr);
   return s;
@@ -200,8 +198,48 @@


 int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
-  // FIXME
-  return 200;
+  int size = DebugVtables ? 216 : 0;
+  if (CountCompiledCalls)
+    size += 6 * 4;
+   // FIXME
+  if (is_vtable_stub)
+    size += 52;
+  else
+    size += 104;
+  return size;
+
+  // In order to tune these parameters, run the JVM with VM options
+  // +PrintMiscellaneous and +WizardMode to see information about
+  // actual itable stubs.  Run it with -Xmx31G -XX:+UseCompressedOops.
+  //
+  // If Universe::narrow_klass_base is nonzero, decoding a compressed
+  // class can take zeveral instructions.  Run it with -Xmx31G
+  // -XX:+UseCompressedOops.
+  //
+  // The JVM98 app. _202_jess has a megamorphic interface call.
+  // The itable code looks like this:
+  // Decoding VtableStub itbl[1]@12
+  //     ldr     w10, [x1,#8]
+  //     lsl     x10, x10, #3
+  //     ldr     w11, [x10,#280]
+  //     add     x11, x10, x11, uxtx #3
+  //     add     x11, x11, #0x1b8
+  //     ldr     x12, [x11]
+  //     cmp     x9, x12
+  //     b.eq    success
+  // loop:
+  //     cbz     x12, throw_icce
+  //     add     x11, x11, #0x10
+  //     ldr     x12, [x11]
+  //     cmp     x9, x12
+  //     b.ne    loop
+  // success:
+  //     ldr     x11, [x11,#8]
+  //     ldr     x12, [x10,x11]
+  //     ldr     x8, [x12,#72]
+  //     br      x8
+  // throw_icce:
+  //     b	throw_ICCE_entry
 }

 int VtableStub::pd_code_alignment() { return 4; }