[aarch64-port-dev ] /hg/icedtea7-forest-aarch64/hotspot: 2 new changesets

Fri Dec 5 14:17:07 UTC 2014

[forwarding bounced check-in message from icedtea7-forest-aarch64 repo]
------ This is a copy of the message, including all the headers. ------

Return-path: <adinn at icedtea.classpath.org>
Received: from localhost ([127.0.0.1] helo=icedtea.classpath.org)
	by icedtea.classpath.org with esmtp (Exim 4.69)
	(envelope-from <adinn at icedtea.classpath.org>)
	id 1Xwqzi-0006rK-Cb
	for aarch64-port-dev at openjdk.java.net; Fri, 05 Dec 2014 11:24:18 +0000
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Date: Fri, 05 Dec 2014 11:24:18 +0000
Subject: /hg/icedtea7-forest-aarch64/hotspot: 2 new changesets
From: adinn at icedtea.classpath.org
X-Hg-Notification: changeset 6e5799a89b56
Message-Id:
<hg.6e5799a89b56.1417778658.-5017525213744097322 at icedtea.classpath.org>
To: aarch64-port-dev at openjdk.java.net

changeset 6e5799a89b56 in /hg/icedtea7-forest-aarch64/hotspot
details:
http://icedtea.classpath.org/hg/icedtea7-forest-aarch64/hotspot?cmd=changeset;node=6e5799a89b56
author: andrew
date: Fri Dec 05 09:52:07 2014 +0000

	Added tag icedtea-2.6pre13 for changeset c6fa18ed8a01


changeset 1d3d9e81c8e1 in /hg/icedtea7-forest-aarch64/hotspot
details:
http://icedtea.classpath.org/hg/icedtea7-forest-aarch64/hotspot?cmd=changeset;node=1d3d9e81c8e1
author: adinn
date: Fri Dec 05 11:22:50 2014 +0000

	merge


diffstat:

 .hgtags
  |     1 +
 make/linux/makefiles/vm.make
  |     4 +
 src/cpu/aarch64/vm/aarch64.ad
  |  1420 ++++++---
 src/cpu/aarch64/vm/aarch64Test.cpp
  |    39 -
 src/cpu/aarch64/vm/aarch64_ad.m4
  |    26 +-
 src/cpu/aarch64/vm/assembler_aarch64.cpp
  |   691 ++++-
 src/cpu/aarch64/vm/assembler_aarch64.hpp
  |   383 +-
 src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
  |     6 +-
 src/cpu/aarch64/vm/c1_LinearScan_aarch64.cpp
  |  1208 +--------
 src/cpu/aarch64/vm/c1_LinearScan_aarch64.hpp
  |    79 +-
 src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp
  |    20 +-
 src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp
  |     8 +
 src/cpu/aarch64/vm/cppInterpreterGenerator_aarch64.hpp
  |    22 -
 src/cpu/aarch64/vm/frame_aarch64.cpp
  |     2 +-
 src/cpu/aarch64/vm/icache_aarch64.cpp
  |     5 +-
 src/cpu/aarch64/vm/icache_aarch64.hpp
  |     2 +-
 src/cpu/aarch64/vm/interp_masm_aarch64.cpp
  |     7 +-
 src/cpu/aarch64/vm/javaFrameAnchor_aarch64.hpp
  |    13 +-
 src/cpu/aarch64/vm/jniFastGetField_aarch64.cpp
  |    11 +-
 src/cpu/aarch64/vm/methodHandles_aarch64.cpp
  |    10 +-
 src/cpu/aarch64/vm/methodHandles_aarch64.hpp
  |     2 +-
 src/cpu/aarch64/vm/nativeInst_aarch64.cpp
  |    40 +-
 src/cpu/aarch64/vm/nativeInst_aarch64.hpp
  |    26 +-
 src/cpu/aarch64/vm/register_aarch64.hpp
  |    38 -
 src/cpu/aarch64/vm/relocInfo_aarch64.cpp
  |    13 +-
 src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp
  |    16 +-
 src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
  |   686 ++-
 src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp
  |     3 +
 src/cpu/aarch64/vm/templateTable_aarch64.cpp
  |    16 +
 src/cpu/aarch64/vm/vm_version_aarch64.cpp
  |    31 +-
 src/cpu/aarch64/vm/vtableStubs_aarch64.cpp
  |    60 +-
 src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp
  |    12 +-
 src/os_cpu/linux_aarch64/vm/globals_linux_aarch64.hpp
  |     2 +
 src/os_cpu/linux_aarch64/vm/orderAccess_linux_aarch64.inline.hpp
  |     5 +-
 src/os_cpu/linux_aarch64/vm/threadLS_linux_aarch64.cpp
  |    29 +-
 src/os_cpu/linux_aarch64/vm/threadLS_linux_aarch64.hpp
  |     4 +-
 src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp |     1 -
 src/share/vm/memory/collectorPolicy.cpp
  |     2 +-
 38 files changed, 2335 insertions(+), 2608 deletions(-)

diffs (truncated from 8389 to 500 lines):

diff -r c6fa18ed8a01 -r 1d3d9e81c8e1 .hgtags

--- a/.hgtags	Thu Dec 04 20:38:11 2014 +0000
+++ b/.hgtags	Fri Dec 05 11:22:50 2014 +0000
@@ -777,3 +777,4 @@
 e13857ecc7870c28dbebca79ff36612693dac157 icedtea-2.6pre11
 9d2b485d2a58ea57ab2b3c06b2128f456ab39a38 jdk7u80-b03
 0c2099cd04cd24778c5baccc7c8a72c311ef6f84 icedtea-2.6pre12
+c6fa18ed8a01a15e1210bf44dc7075463e0a514b icedtea-2.6pre13
diff -r c6fa18ed8a01 -r 1d3d9e81c8e1 make/linux/makefiles/vm.make
--- a/make/linux/makefiles/vm.make	Thu Dec 04 20:38:11 2014 +0000
+++ b/make/linux/makefiles/vm.make	Fri Dec 05 11:22:50 2014 +0000
@@ -92,6 +92,10 @@
 BUILD_USER    = -DHOTSPOT_BUILD_USER="\"$(HOTSPOT_BUILD_USER)\""
 VM_DISTRO     = -DHOTSPOT_VM_DISTRO="\"$(HOTSPOT_VM_DISTRO)\""

+ifeq ($(BUILTIN_SIM), true)
+  HS_LIB_ARCH=-DHOTSPOT_LIB_ARCH="\"aarch64\""
+endif
+
 CXXFLAGS =           \
   ${SYSDEFS}         \
   ${INCLUDES}        \
diff -r c6fa18ed8a01 -r 1d3d9e81c8e1 src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad	Thu Dec 04 20:38:11 2014 +0000
+++ b/src/cpu/aarch64/vm/aarch64.ad	Fri Dec 05 11:22:50 2014 +0000
@@ -804,11 +804,6 @@

 //=============================================================================

-// Emit an interrupt that is caught by the debugger (for debugging
compiler).
-void emit_break(CodeBuffer &cbuf) {
-  Unimplemented();
-}
-
 #ifndef PRODUCT
 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st)
const {
   st->print("BREAKPOINT");
@@ -1363,12 +1358,10 @@
   return 4;
 }

-// !!! FIXME AARCH64 -- this needs to be reworked for jdk7
-
 uint size_java_to_interp()
 {
-  // count a mov mem --> to 3 movz/k and a branch
-  return 4 * NativeInstruction::instruction_size;
+  // ob jdk7 we only need a mov oop and a branch
+  return 2 * NativeInstruction::instruction_size;
 }

 // Offset from start of compiled java to interpreter stub to the load
@@ -1395,11 +1388,11 @@
   // static stub relocation stores the instruction address of the call
   const RelocationHolder &rspec = static_stub_Relocation::spec(mark);
   __ relocate(rspec);
-  // !!! FIXME AARCH64
   // static stub relocation also tags the methodOop in the code-stream.
-  // for jdk7 we have to use movoop and locate the oop in the cpool
-  // if we use an immediate then patching fails to update the pool
-  // oop and GC overwrites the patch with movk/z 0x0000 again
+  //
+  // n.b. for jdk7 we have to use movoop and locate the oop in the
+  // cpool if we use an immediate then patching fails to update the
+  // pool oop and GC overwrites the patch with movk/z 0x0000 again
   __ movoop(rmethod, (jobject) NULL);
   // This is recognized as unresolved by relocs/nativeinst/ic code
   __ b(__ pc());
@@ -1412,9 +1405,8 @@
 // relocation entries for call stub, compiled java to interpretor
 uint reloc_java_to_interp()
 {
-  // TODO fixme
-  // return a large number
-  return 5;
+  // n.b. on jdk7 we use a movoop and a branch
+  return 2;
 }

 //=============================================================================
@@ -2414,16 +2406,13 @@
     int disp = $mem$$disp;
     if (index == -1) {
       __ prfm(Address(base, disp), PLDL1KEEP);
-      __ nop();
     } else {
       Register index_reg = as_Register(index);
       if (disp == 0) {
-        // __ prfm(Address(base, index_reg, Address::lsl(scale)),
PLDL1KEEP);
-        __ nop();
+        __ prfm(Address(base, index_reg, Address::lsl(scale)), PLDL1KEEP);
       } else {
         __ lea(rscratch1, Address(base, disp));
 	__ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PLDL1KEEP);
-        __ nop();
       }
     }
   %}
@@ -2441,11 +2430,9 @@
       Register index_reg = as_Register(index);
       if (disp == 0) {
         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
-        __ nop();
       } else {
         __ lea(rscratch1, Address(base, disp));
 	__ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
-        __ nop();
       }
     }
   %}
@@ -2458,16 +2445,13 @@
     int disp = $mem$$disp;
     if (index == -1) {
       __ prfm(Address(base, disp), PSTL1STRM);
-      __ nop();
     } else {
       Register index_reg = as_Register(index);
       if (disp == 0) {
         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1STRM);
-        __ nop();
       } else {
         __ lea(rscratch1, Address(base, disp));
 	__ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1STRM);
-        __ nop();
       }
     }
   %}
@@ -2589,7 +2573,12 @@
     Register dst_reg = as_Register($dst$$reg);
     unsigned long off;
     __ adrp(dst_reg, ExternalAddress(page), off);
-    assert(off == 0, "assumed offset == 0");
+    assert((off & 0x3ffL) == 0, "assumed offset aligned to 0x400");
+    // n.b. intra-page offset will never change even if this gets
+    // relocated so it is safe to omit the lea when off == 0
+    if (off != 0) {
+      __ lea(dst_reg, Address(dst_reg, off));
+    }
   %}

   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
@@ -3374,6 +3363,16 @@
   interface(CONST_INTER);
 %}

+operand immI_le_4()
+%{
+  predicate(n->get_int() <= 4);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
 operand immI_31()
 %{
   predicate(n->get_int() == 31);
@@ -4698,17 +4697,14 @@
 attributes %{
   // ARM instructions are of fixed length
   fixed_size_instructions;        // Fixed size instructions TODO does
-  // TODO does this relate to how many instructions can be scheduled
-  // at once? just guess 8 for now
-  max_instructions_per_bundle = 8;   // Up to 8 instructions per bundle
+  max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
   // ARM instructions come in 32-bit word units
   instruction_unit_size = 4;         // An instruction is 4 bytes long
-  // TODO identify correct cache line size  just guess 64 for now
   instruction_fetch_unit_size = 64;  // The processor fetches one line
   instruction_fetch_units = 1;       // of 64 bytes

   // List of nop instructions
-  //nops( MachNop );
+  nops( MachNop );
 %}

 // We don't use an actual pipeline model so don't care about resources
@@ -4718,21 +4714,387 @@
 //----------RESOURCES----------------------------------------------------------
 // Resources are the functional units available to the machine

-resources( D0, D1, D2, DECODE = D0 | D1 | D2,
-           MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
-           BR, FPU,
-           ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
+resources( INS0, INS1, INS01 = INS0 | INS1,
+           ALU0, ALU1, ALU = ALU0 | ALU1,
+           MAC,
+           DIV,
+           BRANCH,
+           LDST,
+           NEON_FP);

 //----------PIPELINE
DESCRIPTION-----------------------------------------------
 // Pipeline Description specifies the stages in the machine's pipeline

 // Generic P2/P3 pipeline
-pipe_desc(S0, S1, S2, S3, S4, S5);
+pipe_desc(ISS, EX1, EX2, WR);

 //----------PIPELINE
CLASSES---------------------------------------------------
 // Pipeline Classes describe the stages in which input and output are
 // referenced by the hardware pipeline.

+//------- Integer ALU operations --------------------------
+
+// Integer ALU reg-reg operation
+// Operands needed in EX1, result generated in EX2
+// Eg.	ADD	x0, x1, x2
+pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  dst    : EX2(write);
+  src1   : EX1(read);
+  src2   : EX1(read);
+  INS01  : ISS; // Dual issue as instruction 0 or 1
+  ALU    : EX2;
+%}
+
+// Integer ALU reg-reg operation with constant shift
+// Shifted register must be available in LATE_ISS instead of EX1
+// Eg.	ADD	x0, x1, x2, LSL #2
+pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI
shift)
+%{
+  single_instruction;
+  dst    : EX2(write);
+  src1   : EX1(read);
+  src2   : ISS(read);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+// Integer ALU reg operation with constant shift
+// Eg.	LSL	x0, x1, #shift
+pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
+%{
+  single_instruction;
+  dst    : EX2(write);
+  src1   : ISS(read);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+// Integer ALU reg-reg operation with variable shift
+// Both operands must be available in LATE_ISS instead of EX1
+// Result is available in EX1 instead of EX2
+// Eg.	LSLV	x0, x1, x2
+pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  dst    : EX1(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  INS01  : ISS;
+  ALU    : EX1;
+%}
+
+// Integer ALU reg-reg operation with extract
+// As for _vshift above, but result generated in EX2
+// Eg.	EXTR	x0, x1, x2, #N
+pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  dst    : EX2(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  INS1   : ISS; // Can only dual issue as Instruction 1
+  ALU    : EX1;
+%}
+
+// Integer ALU reg operation
+// Eg.	NEG	x0, x1
+pipe_class ialu_reg(iRegI dst, iRegI src)
+%{
+  single_instruction;
+  dst    : EX2(write);
+  src    : EX1(read);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+// Integer ALU reg mmediate operation
+// Eg.	ADD	x0, x1, #N
+pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
+%{
+  single_instruction;
+  dst    : EX2(write);
+  src1   : EX1(read);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+// Integer ALU immediate operation (no source operands)
+// Eg.	MOV	x0, #N
+pipe_class ialu_imm(iRegI dst)
+%{
+  single_instruction;
+  dst    : EX1(write);
+  INS01  : ISS;
+  ALU    : EX1;
+%}
+
+//------- Compare operation -------------------------------
+
+// Compare reg-reg
+// Eg.	CMP	x0, x1
+pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
+%{
+  single_instruction;
+//  fixed_latency(16);
+  cr     : EX2(write);
+  op1    : EX1(read);
+  op2    : EX1(read);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+// Compare reg-reg
+// Eg.	CMP	x0, #N
+pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
+%{
+  single_instruction;
+//  fixed_latency(16);
+  cr     : EX2(write);
+  op1    : EX1(read);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+//------- Conditional instructions ------------------------
+
+// Conditional no operands
+// Eg.	CSINC	x0, zr, zr, <cond>
+pipe_class icond_none(iRegI dst, rFlagsReg cr)
+%{
+  single_instruction;
+  cr     : EX1(read);
+  dst    : EX2(write);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+// Conditional 2 operand
+// EG.	CSEL	X0, X1, X2, <cond>
+pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
+%{
+  single_instruction;
+  cr     : EX1(read);
+  src1   : EX1(read);
+  src2   : EX1(read);
+  dst    : EX2(write);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+// Conditional 2 operand
+// EG.	CSEL	X0, X1, X2, <cond>
+pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
+%{
+  single_instruction;
+  cr     : EX1(read);
+  src    : EX1(read);
+  dst    : EX2(write);
+  INS01  : ISS;
+  ALU    : EX2;
+%}
+
+//------- Multiply pipeline operations --------------------
+
+// Multiply reg-reg
+// Eg.	MUL	w0, w1, w2
+pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  dst    : WR(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  INS01  : ISS;
+  MAC    : WR;
+%}
+
+// Multiply accumulate
+// Eg.	MADD	w0, w1, w2, w3
+pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
+%{
+  single_instruction;
+  dst    : WR(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  src3   : ISS(read);
+  INS01  : ISS;
+  MAC    : WR;
+%}
+
+// Eg.	MUL	w0, w1, w2
+pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  fixed_latency(3); // Maximum latency for 64 bit mul
+  dst    : WR(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  INS01  : ISS;
+  MAC    : WR;
+%}
+
+// Multiply accumulate
+// Eg.	MADD	w0, w1, w2, w3
+pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
+%{
+  single_instruction;
+  fixed_latency(3); // Maximum latency for 64 bit mul
+  dst    : WR(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  src3   : ISS(read);
+  INS01  : ISS;
+  MAC    : WR;
+%}
+
+//------- Divide pipeline operations --------------------
+
+// Eg.	SDIV	w0, w1, w2
+pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  fixed_latency(8); // Maximum latency for 32 bit divide
+  dst    : WR(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  INS0   : ISS; // Can only dual issue as instruction 0
+  DIV    : WR;
+%}
+
+// Eg.	SDIV	x0, x1, x2
+pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  fixed_latency(16); // Maximum latency for 64 bit divide
+  dst    : WR(write);
+  src1   : ISS(read);
+  src2   : ISS(read);
+  INS0   : ISS; // Can only dual issue as instruction 0
+  DIV    : WR;
+%}
+
+//------- Load pipeline operations ------------------------
+
+// Load - prefetch
+// Eg.	PFRM	<mem>
+pipe_class iload_prefetch(memory mem)
+%{
+  single_instruction;
+  mem    : ISS(read);
+  INS01  : ISS;
+  LDST   : WR;
+%}
+
+// Load - reg, mem
+// Eg.	LDR	x0, <mem>
+pipe_class iload_reg_mem(iRegI dst, memory mem)
+%{
+  single_instruction;
+  dst    : WR(write);
+  mem    : ISS(read);
+  INS01  : ISS;
+  LDST   : WR;
+%}
+
+// Load - reg, reg
+// Eg.	LDR	x0, [sp, x1]
+pipe_class iload_reg_reg(iRegI dst, iRegI src)
+%{
+  single_instruction;
+  dst    : WR(write);
+  src    : ISS(read);
+  INS01  : ISS;
+  LDST   : WR;
+%}
+
+//------- Store pipeline operations -----------------------
+
+// Store - zr, mem
+// Eg.	STR	zr, <mem>
+pipe_class istore_mem(memory mem)
+%{
+  single_instruction;
+  mem    : ISS(read);
+  INS01  : ISS;
+  LDST   : WR;
+%}
+
+// Store - reg, mem
+// Eg.	STR	x0, <mem>
+pipe_class istore_reg_mem(iRegI src, memory mem)
+%{
+  single_instruction;
+  mem    : ISS(read);
+  src    : EX2(read);