[aarch64-port-dev ] /hg/icedtea7-forest-aarch64/hotspot: 6 new changesets
Andrew Dinn
adinn at redhat.com
Wed Nov 26 10:36:03 UTC 2014
[forwarding bounced check-in message from icedtea7-forest-aarch64 repo]
------ This is a copy of the message, including all the headers. ------
Return-path: <adinn at icedtea.classpath.org>
Received: from localhost ([127.0.0.1] helo=icedtea.classpath.org)
by icedtea.classpath.org with esmtp (Exim 4.69)
(envelope-from <adinn at icedtea.classpath.org>)
id 1XtZvA-0002N2-6p
for aarch64-port-dev at openjdk.java.net; Wed, 26 Nov 2014 10:34:04 +0000
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Date: Wed, 26 Nov 2014 10:34:04 +0000
Subject: /hg/icedtea7-forest-aarch64/hotspot: 6 new changesets
From: adinn at icedtea.classpath.org
X-Hg-Notification: changeset 8e3cc52cbcef
Message-Id:
<hg.8e3cc52cbcef.1416998044.-5017525213744097322 at icedtea.classpath.org>
To: aarch64-port-dev at openjdk.java.net
changeset 8e3cc52cbcef in /hg/icedtea7-forest-aarch64/hotspot
details:
http://icedtea.classpath.org/hg/icedtea7-forest-aarch64/hotspot?cmd=changeset;node=8e3cc52cbcef
author: adinn
date: Mon Nov 24 13:37:15 2014 +0000
Add support for AES Intrinsics
backport from jdk8 also incorporates 3 subsequent updates
changeset f7326d2a6cda in /hg/icedtea7-forest-aarch64/hotspot
details:
http://icedtea.classpath.org/hg/icedtea7-forest-aarch64/hotspot?cmd=changeset;node=f7326d2a6cda
author: adinn
date: Mon Nov 24 15:59:18 2014 +0000
Use TLS for ThreadLocalStorage::thread()
changeset 405f393ec93d in /hg/icedtea7-forest-aarch64/hotspot
details:
http://icedtea.classpath.org/hg/icedtea7-forest-aarch64/hotspot?cmd=changeset;node=405f393ec93d
author: adinn
date: Mon Nov 24 16:56:09 2014 +0000
Add char_array_equals intrinsic
changeset 023d218976e3 in /hg/icedtea7-forest-aarch64/hotspot
details:
http://icedtea.classpath.org/hg/icedtea7-forest-aarch64/hotspot?cmd=changeset;node=023d218976e3
author: adinn
date: Mon Nov 24 18:11:42 2014 +0000
Add support for String.indexOf intrinsic
changeset 56958c314918 in /hg/icedtea7-forest-aarch64/hotspot
details:
http://icedtea.classpath.org/hg/icedtea7-forest-aarch64/hotspot?cmd=changeset;node=56958c314918
author: adinn
date: Tue Nov 25 11:10:14 2014 +0000
A more efficient sequence for C1_MacroAssembler::float_cmp.
changeset 80e04c4cd4b2 in /hg/icedtea7-forest-aarch64/hotspot
details:
http://icedtea.classpath.org/hg/icedtea7-forest-aarch64/hotspot?cmd=changeset;node=80e04c4cd4b2
author: adinn
date: Tue Nov 25 17:36:55 2014 +0000
Add support for pipeline scheduling
diffstat:
src/cpu/aarch64/vm/aarch64.ad | 1377
++++++++++-----
src/cpu/aarch64/vm/aarch64_ad.m4 | 26 +-
src/cpu/aarch64/vm/assembler_aarch64.cpp | 590 +++++-
src/cpu/aarch64/vm/assembler_aarch64.hpp | 311 +-
src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp | 16 +-
src/cpu/aarch64/vm/icache_aarch64.cpp | 5 +-
src/cpu/aarch64/vm/icache_aarch64.hpp | 2 +-
src/cpu/aarch64/vm/stubGenerator_aarch64.cpp | 415 ++++
src/cpu/aarch64/vm/vm_version_aarch64.cpp | 26 +
src/os_cpu/linux_aarch64/vm/globals_linux_aarch64.hpp | 2 +
src/os_cpu/linux_aarch64/vm/threadLS_linux_aarch64.cpp | 29 +-
src/os_cpu/linux_aarch64/vm/threadLS_linux_aarch64.hpp | 4 +-
12 files changed, 2045 insertions(+), 758 deletions(-)
diffs (truncated from 5305 to 500 lines):
diff -r 4868ef1912f1 -r 80e04c4cd4b2 src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad Fri Nov 21 20:35:24 2014 +0000
+++ b/src/cpu/aarch64/vm/aarch64.ad Tue Nov 25 17:36:55 2014 +0000
@@ -3363,6 +3363,16 @@
interface(CONST_INTER);
%}
+operand immI_le_4()
+%{
+ predicate(n->get_int() <= 4);
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
operand immI_31()
%{
predicate(n->get_int() == 31);
@@ -4687,17 +4697,14 @@
attributes %{
// ARM instructions are of fixed length
fixed_size_instructions; // Fixed size instructions TODO does
- // TODO does this relate to how many instructions can be scheduled
- // at once? just guess 8 for now
- max_instructions_per_bundle = 8; // Up to 8 instructions per bundle
+ max_instructions_per_bundle = 2; // A53 = 2, A57 = 4
// ARM instructions come in 32-bit word units
instruction_unit_size = 4; // An instruction is 4 bytes long
- // TODO identify correct cache line size just guess 64 for now
instruction_fetch_unit_size = 64; // The processor fetches one line
instruction_fetch_units = 1; // of 64 bytes
// List of nop instructions
- //nops( MachNop );
+ nops( MachNop );
%}
// We don't use an actual pipeline model so don't care about resources
@@ -4707,21 +4714,387 @@
//----------RESOURCES----------------------------------------------------------
// Resources are the functional units available to the machine
-resources( D0, D1, D2, DECODE = D0 | D1 | D2,
- MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
- BR, FPU,
- ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
+resources( INS0, INS1, INS01 = INS0 | INS1,
+ ALU0, ALU1, ALU = ALU0 | ALU1,
+ MAC,
+ DIV,
+ BRANCH,
+ LDST,
+ NEON_FP);
//----------PIPELINE
DESCRIPTION-----------------------------------------------
// Pipeline Description specifies the stages in the machine's pipeline
// Generic P2/P3 pipeline
-pipe_desc(S0, S1, S2, S3, S4, S5);
+pipe_desc(ISS, EX1, EX2, WR);
//----------PIPELINE
CLASSES---------------------------------------------------
// Pipeline Classes describe the stages in which input and output are
// referenced by the hardware pipeline.
+//------- Integer ALU operations --------------------------
+
+// Integer ALU reg-reg operation
+// Operands needed in EX1, result generated in EX2
+// Eg. ADD x0, x1, x2
+pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+ single_instruction;
+ dst : EX2(write);
+ src1 : EX1(read);
+ src2 : EX1(read);
+ INS01 : ISS; // Dual issue as instruction 0 or 1
+ ALU : EX2;
+%}
+
+// Integer ALU reg-reg operation with constant shift
+// Shifted register must be available in LATE_ISS instead of EX1
+// Eg. ADD x0, x1, x2, LSL #2
+pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI
shift)
+%{
+ single_instruction;
+ dst : EX2(write);
+ src1 : EX1(read);
+ src2 : ISS(read);
+ INS01 : ISS;
+ ALU : EX2;
+%}
+
+// Integer ALU reg operation with constant shift
+// Eg. LSL x0, x1, #shift
+pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
+%{
+ single_instruction;
+ dst : EX2(write);
+ src1 : ISS(read);
+ INS01 : ISS;
+ ALU : EX2;
+%}
+
+// Integer ALU reg-reg operation with variable shift
+// Both operands must be available in LATE_ISS instead of EX1
+// Result is available in EX1 instead of EX2
+// Eg. LSLV x0, x1, x2
+pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
+%{
+ single_instruction;
+ dst : EX1(write);
+ src1 : ISS(read);
+ src2 : ISS(read);
+ INS01 : ISS;
+ ALU : EX1;
+%}
+
+// Integer ALU reg-reg operation with extract
+// As for _vshift above, but result generated in EX2
+// Eg. EXTR x0, x1, x2, #N
+pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
+%{
+ single_instruction;
+ dst : EX2(write);
+ src1 : ISS(read);
+ src2 : ISS(read);
+ INS1 : ISS; // Can only dual issue as Instruction 1
+ ALU : EX1;
+%}
+
+// Integer ALU reg operation
+// Eg. NEG x0, x1
+pipe_class ialu_reg(iRegI dst, iRegI src)
+%{
+ single_instruction;
+ dst : EX2(write);
+ src : EX1(read);
+ INS01 : ISS;
+ ALU : EX2;
+%}
+
+// Integer ALU reg mmediate operation
+// Eg. ADD x0, x1, #N
+pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
+%{
+ single_instruction;
+ dst : EX2(write);
+ src1 : EX1(read);
+ INS01 : ISS;
+ ALU : EX2;
+%}
+
+// Integer ALU immediate operation (no source operands)
+// Eg. MOV x0, #N
+pipe_class ialu_imm(iRegI dst)
+%{
+ single_instruction;
+ dst : EX1(write);
+ INS01 : ISS;
+ ALU : EX1;
+%}
+
+//------- Compare operation -------------------------------
+
+// Compare reg-reg
+// Eg. CMP x0, x1
+pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
+%{
+ single_instruction;
+// fixed_latency(16);
+ cr : EX2(write);
+ op1 : EX1(read);
+ op2 : EX1(read);
+ INS01 : ISS;
+ ALU : EX2;
+%}
+
+// Compare reg-reg
+// Eg. CMP x0, #N
+pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
+%{
+ single_instruction;
+// fixed_latency(16);
+ cr : EX2(write);
+ op1 : EX1(read);
+ INS01 : ISS;
+ ALU : EX2;
+%}
+
+//------- Conditional instructions ------------------------
+
+// Conditional no operands
+// Eg. CSINC x0, zr, zr, <cond>
+pipe_class icond_none(iRegI dst, rFlagsReg cr)
+%{
+ single_instruction;
+ cr : EX1(read);
+ dst : EX2(write);
+ INS01 : ISS;
+ ALU : EX2;
+%}
+
+// Conditional 2 operand
+// EG. CSEL X0, X1, X2, <cond>
+pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
+%{
+ single_instruction;
+ cr : EX1(read);
+ src1 : EX1(read);
+ src2 : EX1(read);
+ dst : EX2(write);
+ INS01 : ISS;
+ ALU : EX2;
+%}
+
+// Conditional 2 operand
+// EG. CSEL X0, X1, X2, <cond>
+pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
+%{
+ single_instruction;
+ cr : EX1(read);
+ src : EX1(read);
+ dst : EX2(write);
+ INS01 : ISS;
+ ALU : EX2;
+%}
+
+//------- Multiply pipeline operations --------------------
+
+// Multiply reg-reg
+// Eg. MUL w0, w1, w2
+pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+ single_instruction;
+ dst : WR(write);
+ src1 : ISS(read);
+ src2 : ISS(read);
+ INS01 : ISS;
+ MAC : WR;
+%}
+
+// Multiply accumulate
+// Eg. MADD w0, w1, w2, w3
+pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
+%{
+ single_instruction;
+ dst : WR(write);
+ src1 : ISS(read);
+ src2 : ISS(read);
+ src3 : ISS(read);
+ INS01 : ISS;
+ MAC : WR;
+%}
+
+// Eg. MUL w0, w1, w2
+pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+ single_instruction;
+ fixed_latency(3); // Maximum latency for 64 bit mul
+ dst : WR(write);
+ src1 : ISS(read);
+ src2 : ISS(read);
+ INS01 : ISS;
+ MAC : WR;
+%}
+
+// Multiply accumulate
+// Eg. MADD w0, w1, w2, w3
+pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
+%{
+ single_instruction;
+ fixed_latency(3); // Maximum latency for 64 bit mul
+ dst : WR(write);
+ src1 : ISS(read);
+ src2 : ISS(read);
+ src3 : ISS(read);
+ INS01 : ISS;
+ MAC : WR;
+%}
+
+//------- Divide pipeline operations --------------------
+
+// Eg. SDIV w0, w1, w2
+pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+ single_instruction;
+ fixed_latency(8); // Maximum latency for 32 bit divide
+ dst : WR(write);
+ src1 : ISS(read);
+ src2 : ISS(read);
+ INS0 : ISS; // Can only dual issue as instruction 0
+ DIV : WR;
+%}
+
+// Eg. SDIV x0, x1, x2
+pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+ single_instruction;
+ fixed_latency(16); // Maximum latency for 64 bit divide
+ dst : WR(write);
+ src1 : ISS(read);
+ src2 : ISS(read);
+ INS0 : ISS; // Can only dual issue as instruction 0
+ DIV : WR;
+%}
+
+//------- Load pipeline operations ------------------------
+
+// Load - prefetch
+// Eg. PFRM <mem>
+pipe_class iload_prefetch(memory mem)
+%{
+ single_instruction;
+ mem : ISS(read);
+ INS01 : ISS;
+ LDST : WR;
+%}
+
+// Load - reg, mem
+// Eg. LDR x0, <mem>
+pipe_class iload_reg_mem(iRegI dst, memory mem)
+%{
+ single_instruction;
+ dst : WR(write);
+ mem : ISS(read);
+ INS01 : ISS;
+ LDST : WR;
+%}
+
+// Load - reg, reg
+// Eg. LDR x0, [sp, x1]
+pipe_class iload_reg_reg(iRegI dst, iRegI src)
+%{
+ single_instruction;
+ dst : WR(write);
+ src : ISS(read);
+ INS01 : ISS;
+ LDST : WR;
+%}
+
+//------- Store pipeline operations -----------------------
+
+// Store - zr, mem
+// Eg. STR zr, <mem>
+pipe_class istore_mem(memory mem)
+%{
+ single_instruction;
+ mem : ISS(read);
+ INS01 : ISS;
+ LDST : WR;
+%}
+
+// Store - reg, mem
+// Eg. STR x0, <mem>
+pipe_class istore_reg_mem(iRegI src, memory mem)
+%{
+ single_instruction;
+ mem : ISS(read);
+ src : EX2(read);
+ INS01 : ISS;
+ LDST : WR;
+%}
+
+// Store - reg, reg
+// Eg. STR x0, [sp, x1]
+pipe_class istore_reg_reg(iRegI dst, iRegI src)
+%{
+ single_instruction;
+ dst : ISS(read);
+ src : EX2(read);
+ INS01 : ISS;
+ LDST : WR;
+%}
+
+//------- Store pipeline operations -----------------------
+
+// Branch
+pipe_class pipe_branch()
+%{
+ single_instruction;
+ INS01 : ISS;
+ BRANCH : EX1;
+%}
+
+// Conditional branch
+pipe_class pipe_branch_cond(rFlagsReg cr)
+%{
+ single_instruction;
+ cr : EX1(read);
+ INS01 : ISS;
+ BRANCH : EX1;
+%}
+
+// Compare & Branch
+// EG. CBZ/CBNZ
+pipe_class pipe_cmp_branch(iRegI op1)
+%{
+ single_instruction;
+ op1 : EX1(read);
+ INS01 : ISS;
+ BRANCH : EX1;
+%}
+
+//------- Synchronisation operations ----------------------
+
+// Any operation requiring serialization.
+// EG. DMB/Atomic Ops/Load Acquire/Str Release
+pipe_class pipe_serial()
+%{
+ single_instruction;
+ force_serialization;
+ fixed_latency(16);
+ INS01 : ISS(2); // Cannot dual issue with any other instruction
+ LDST : WR;
+%}
+
+// Generic big/slow expanded idiom - also serialized
+pipe_class pipe_slow()
+%{
+ instruction_count(10);
+ multiple_bundles;
+ force_serialization;
+ fixed_latency(16);
+ INS01 : ISS(2); // Cannot dual issue with any other instruction
+ LDST : WR;
+%}
+
// Empty pipeline class
pipe_class pipe_class_empty()
%{
@@ -4743,30 +5116,23 @@
fixed_latency(16);
%}
-// Pipeline class for traps.
-pipe_class pipe_class_trap()
+// Pipeline class for memory operations.
+pipe_class pipe_class_memory()
+%{
+ single_instruction;
+ fixed_latency(16);
+%}
+
+// Pipeline class for call.
+pipe_class pipe_class_call()
%{
single_instruction;
fixed_latency(100);
%}
-// Pipeline class for memory operations.
-pipe_class pipe_class_memory()
-%{
- single_instruction;
- fixed_latency(16);
-%}
-
-// Pipeline class for call.
-pipe_class pipe_class_call()
-%{
- single_instruction;
- fixed_latency(100);
-%}
-
// Define the class for the Nop node.
define %{
- MachNop = pipe_class_default;
+ MachNop = pipe_class_empty;
%}
%}
@@ -4806,7 +5172,7 @@
ins_encode(aarch64_enc_ldrsbw(dst, mem));
- ins_pipe(pipe_class_memory);
+ ins_pipe(iload_reg_mem);
%}
// Load Byte (8 bit signed) into long
@@ -4819,7 +5185,7 @@
ins_encode(aarch64_enc_ldrsb(dst, mem));
- ins_pipe(pipe_class_memory);
+ ins_pipe(iload_reg_mem);
%}
// Load Byte (8 bit unsigned)
@@ -4832,7 +5198,7 @@
ins_encode(aarch64_enc_ldrb(dst, mem));
- ins_pipe(pipe_class_memory);
+ ins_pipe(iload_reg_mem);
%}
More information about the aarch64-port-dev
mailing list