[aarch64-port-dev ] Refactor & rename vector instructions
Andrew Haley
aph at redhat.com
Tue Jul 22 15:48:41 UTC 2014
This one is in two parts. The first part merely strips the "v_"
prefix from vector instructions, and the second changes the ld/st
instructions to use the Address form, saving a bunch of Assembler
methods along the way.
Ed, please check that I haven't broken anything.
Andrew.
-------------- next part --------------
# HG changeset patch
# User aph
# Date 1406033124 14400
# Tue Jul 22 08:45:24 2014 -0400
# Node ID e2941a6acc555c7736128d678c59033188b9bafe
# Parent c4af536cc45c9e0928e51408b07de2d3f9193d55
Remove "v_" prefixes from all SIMD instructions.
diff -r c4af536cc45c -r e2941a6acc55 src/cpu/aarch64/vm/assembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp Mon Jul 21 17:23:40 2014 +0100
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp Tue Jul 22 08:45:24 2014 -0400
@@ -1857,14 +1857,14 @@
S32, D64, Q128
};
-void v_ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int op1, int op2)
+void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int op1, int op2)
{
starti;
f(0,31), f((int)T & 1, 30);
f(op1, 29, 21), f(0, 20, 16), f(op2, 15, 12);
f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0);
}
-void v_ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
+void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
int imm, int op1, int op2)
{
starti;
@@ -1872,7 +1872,7 @@
f(op1 | 0b100, 29, 21), f(0b11111, 20, 16), f(op2, 15, 12);
f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0);
}
-void v_ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
+void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
Register Xm, int op1, int op2)
{
starti;
@@ -1883,90 +1883,90 @@
#define INSN1(NAME, op1, op2) \
void NAME(FloatRegister Vt, SIMD_Arrangement T, Register Xn) { \
- v_ld_st(Vt, T, Xn, op1, op2); \
+ ld_st(Vt, T, Xn, op1, op2); \
} \
void NAME(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int imm) { \
- v_ld_st(Vt, T, Xn, imm, op1, op2); \
+ ld_st(Vt, T, Xn, imm, op1, op2); \
} \
void NAME(FloatRegister Vt, SIMD_Arrangement T, Register Xn, Register Xm) { \
- v_ld_st(Vt, T, Xn, Xm, op1, op2); \
+ ld_st(Vt, T, Xn, Xm, op1, op2); \
}
#define INSN2(NAME, op1, op2) \
void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, Register Xn) { \
assert(Vt->successor() == Vt2, "Registers must be ordered"); \
- v_ld_st(Vt, T, Xn, op1, op2); \
+ ld_st(Vt, T, Xn, op1, op2); \
} \
void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, Register Xn, \
int imm) { \
assert(Vt->successor() == Vt2, "Registers must be ordered"); \
- v_ld_st(Vt, T, Xn, imm, op1, op2); \
+ ld_st(Vt, T, Xn, imm, op1, op2); \
} \
void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, Register Xn, \
Register Xm) { \
assert(Vt->successor() == Vt2, "Registers must be ordered"); \
- v_ld_st(Vt, T, Xn, Xm, op1, op2); \
+ ld_st(Vt, T, Xn, Xm, op1, op2); \
}
#define INSN3(NAME, op1, op2) \
void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \
SIMD_Arrangement T, Register Xn) { \
assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3, \
"Registers must be ordered"); \
- v_ld_st(Vt, T, Xn, op1, op2); \
+ ld_st(Vt, T, Xn, op1, op2); \
} \
void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \
SIMD_Arrangement T, Register Xn, int imm) { \
assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3, \
"Registers must be ordered"); \
- v_ld_st(Vt, T, Xn, imm, op1, op2); \
+ ld_st(Vt, T, Xn, imm, op1, op2); \
} \
void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \
SIMD_Arrangement T, Register Xn, Register Xm) { \
assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3, \
"Registers must be ordered"); \
- v_ld_st(Vt, T, Xn, Xm, op1, op2); \
+ ld_st(Vt, T, Xn, Xm, op1, op2); \
}
#define INSN4(NAME, op1, op2) \
void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \
FloatRegister Vt4, SIMD_Arrangement T, Register Xn) { \
assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 && \
Vt3->successor() == Vt4, "Registers must be ordered"); \
- v_ld_st(Vt, T, Xn, op1, op2); \
+ ld_st(Vt, T, Xn, op1, op2); \
} \
void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \
FloatRegister Vt4, SIMD_Arrangement T, Register Xn, int imm) { \
assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 && \
Vt3->successor() == Vt4, "Registers must be ordered"); \
- v_ld_st(Vt, T, Xn, imm, op1, op2); \
+ ld_st(Vt, T, Xn, imm, op1, op2); \
} \
void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \
FloatRegister Vt4, SIMD_Arrangement T, Register Xn, Register Xm) { \
assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 && \
Vt3->successor() == Vt4, "Registers must be ordered"); \
- v_ld_st(Vt, T, Xn, Xm, op1, op2); \
+ ld_st(Vt, T, Xn, Xm, op1, op2); \
}
- INSN1(v_ld1, 0b001100010, 0b0111);
- INSN2(v_ld1, 0b001100010, 0b1010);
- INSN3(v_ld1, 0b001100010, 0b0110);
- INSN4(v_ld1, 0b001100010, 0b0010);
+ INSN1(ld1, 0b001100010, 0b0111);
+ INSN2(ld1, 0b001100010, 0b1010);
+ INSN3(ld1, 0b001100010, 0b0110);
+ INSN4(ld1, 0b001100010, 0b0010);
- INSN2(v_ld2, 0b001100010, 0b1000);
- INSN3(v_ld3, 0b001100010, 0b0100);
- INSN4(v_ld4, 0b001100010, 0b0000);
+ INSN2(ld2, 0b001100010, 0b1000);
+ INSN3(ld3, 0b001100010, 0b0100);
+ INSN4(ld4, 0b001100010, 0b0000);
- INSN1(v_st1, 0b001100000, 0b0111);
- INSN2(v_st1, 0b001100000, 0b1010);
- INSN3(v_st1, 0b001100000, 0b0110);
- INSN4(v_st1, 0b001100000, 0b0010);
+ INSN1(st1, 0b001100000, 0b0111);
+ INSN2(st1, 0b001100000, 0b1010);
+ INSN3(st1, 0b001100000, 0b0110);
+ INSN4(st1, 0b001100000, 0b0010);
- INSN2(v_st2, 0b001100000, 0b1000);
- INSN3(v_st3, 0b001100000, 0b0100);
- INSN4(v_st4, 0b001100000, 0b0000);
+ INSN2(st2, 0b001100000, 0b1000);
+ INSN3(st3, 0b001100000, 0b0100);
+ INSN4(st4, 0b001100000, 0b0000);
- INSN1(v_ld1r, 0b001101010, 0b1100);
- INSN2(v_ld2r, 0b001101011, 0b1100);
- INSN3(v_ld3r, 0b001101010, 0b1110);
- INSN4(v_ld4r, 0b001101011, 0b1110);
+ INSN1(ld1r, 0b001101010, 0b1100);
+ INSN2(ld2r, 0b001101011, 0b1100);
+ INSN3(ld3r, 0b001101010, 0b1110);
+ INSN4(ld4r, 0b001101011, 0b1110);
#undef INSN1
#undef INSN2
@@ -1981,14 +1981,14 @@
rf(Vm, 16), f(0b000111, 15, 10), rf(Vn, 5), rf(Vd, 0); \
}
- INSN(v_eor, 0b101110001);
- INSN(v_orr, 0b001110101);
- INSN(v_and, 0b001110001);
- INSN(v_bic, 0b001110011);
- INSN(v_bif, 0b101110111);
- INSN(v_bit, 0b101110101);
- INSN(v_bsl, 0b101110011);
- INSN(v_orn, 0b001110111);
+ INSN(eor, 0b101110001);
+ INSN(orr, 0b001110101);
+ INSN(andr, 0b001110001);
+ INSN(bic, 0b001110011);
+ INSN(bif, 0b101110111);
+ INSN(bit, 0b101110101);
+ INSN(bsl, 0b101110011);
+ INSN(orn, 0b001110111);
#undef INSN
@@ -1998,14 +1998,14 @@
f(opc, 31, 10), rf(Vn, 5), rf(Vd, 0); \
}
- INSN(v_aese, 0b0100111000101000010010);
- INSN(v_aesd, 0b0100111000101000010110);
- INSN(v_aesmc, 0b0100111000101000011010);
- INSN(v_aesimc, 0b0100111000101000011110);
+ INSN(aese, 0b0100111000101000010010);
+ INSN(aesd, 0b0100111000101000010110);
+ INSN(aesmc, 0b0100111000101000011010);
+ INSN(aesimc, 0b0100111000101000011110);
#undef INSN
- void v_shl(FloatRegister Vd, FloatRegister Vn, SIMD_Arrangement T, int shift){
+ void shl(FloatRegister Vd, FloatRegister Vn, SIMD_Arrangement T, int shift){
starti;
/* The encodings for the immh:immb fields (bits 22:16) are
* 0001 xxx 8B/16B, shift = xxx
@@ -2018,7 +2018,7 @@
f(0b010101, 15, 10), rf(Vn, 5), rf(Vd, 0);
}
- void v_ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
+ void ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
starti;
/* The encodings for the immh:immb fields (bits 22:16) are
* 0001 xxx 8H, 8B/16b shift = xxx
@@ -2031,22 +2031,22 @@
f(0, 31), f(Tb & 1, 30), f(0b1011110, 29, 23), f((1 << ((Tb>>1)+3))|shift, 22, 16);
f(0b101001, 15, 10), rf(Vn, 5), rf(Vd, 0);
}
- void v_ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
- v_ushll(Vd, Ta, Vn, Tb, shift);
+ void ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
+ ushll(Vd, Ta, Vn, Tb, shift);
}
- void v_uzp1(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement T, int op = 0){
+ void uzp1(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement T, int op = 0){
starti;
f(0, 31), f((T & 0x1), 30), f(0b001110, 29, 24), f((T >> 1), 23, 22), f(0, 21);
rf(Vm, 16), f(0, 15), f(op, 14), f(0b0110, 13, 10), rf(Vn, 5), rf(Vd, 0);
}
- void v_uzp2(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement T){
- v_uzp1(Vd, Vn, Vm, T, 1);
+ void uzp2(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement T){
+ uzp1(Vd, Vn, Vm, T, 1);
}
// Move from general purpose register
// mov Vd.T[index], Rn
- void v_mov(FloatRegister Vd, SIMD_Arrangement T, int index, Register Xn) {
+ void mov(FloatRegister Vd, SIMD_Arrangement T, int index, Register Xn) {
starti;
f(0b01001110000, 31, 21), f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16);
f(0b000111, 15, 10), rf(Xn, 5), rf(Vd, 0);
@@ -2054,7 +2054,7 @@
// Move to general purpose register
// mov Rd, Vn.T[index]
- void v_mov(Register Xd, FloatRegister Vn, SIMD_Arrangement T, int index) {
+ void mov(Register Xd, FloatRegister Vn, SIMD_Arrangement T, int index) {
starti;
f(0, 31), f((T >= T1D) ? 1:0, 30), f(0b001110000, 29, 21);
f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16);
@@ -2062,17 +2062,17 @@
}
// We do not handle the 1Q arrangement.
- void v_pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
+ void pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
starti;
assert(Ta == T8H && (Tb == T8B || Tb == T16B), "Invalid Size specifier");
f(0, 31), f(Tb & 1, 30), f(0b001110001, 29, 21), rf(Vm, 16), f(0b111000, 15, 10);
rf(Vn, 5), rf(Vd, 0);
}
- void v_pmull2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
- v_pmull(Vd, Ta, Vn, Vm, Tb);
+ void pmull2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
+ pmull(Vd, Ta, Vn, Vm, Tb);
}
- void v_rev32(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn)
+ void rev32(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn)
{
starti;
assert(T <= T8H, "must be one of T8B, T16B, T4H, T8H");
diff -r c4af536cc45c -r e2941a6acc55 src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Mon Jul 21 17:23:40 2014 +0100
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Tue Jul 22 08:45:24 2014 -0400
@@ -2239,131 +2239,131 @@
if (UseNeon) {
cmp(len, 64);
br(Assembler::LT, L_by16);
- v_eor(v16, T16B, v16, v16);
+ eor(v16, T16B, v16, v16);
Label L_fold;
add(tmp, table0, 4*256*sizeof(juint)); // Point at the Neon constants
- v_ld1(v0, v1, T2D, buf, 32);
- v_ld1r(v4, T2D, tmp, 8);
- v_ld1r(v5, T2D, tmp, 8);
- v_ld1r(v6, T2D, tmp, 8);
- v_ld1r(v7, T2D, tmp, 8);
- v_mov(v16, T4S, 0, crc);
-
- v_eor(v0, T16B, v0, v16);
+ ld1(v0, v1, T2D, buf, 32);
+ ld1r(v4, T2D, tmp, 8);
+ ld1r(v5, T2D, tmp, 8);
+ ld1r(v6, T2D, tmp, 8);
+ ld1r(v7, T2D, tmp, 8);
+ mov(v16, T4S, 0, crc);
+
+ eor(v0, T16B, v0, v16);
sub(len, len, 64);
BIND(L_fold);
- v_pmull(v22, T8H, v0, v5, T8B);
- v_pmull(v20, T8H, v0, v7, T8B);
- v_pmull(v23, T8H, v0, v4, T8B);
- v_pmull(v21, T8H, v0, v6, T8B);
-
- v_pmull2(v18, T8H, v0, v5, T16B);
- v_pmull2(v16, T8H, v0, v7, T16B);
- v_pmull2(v19, T8H, v0, v4, T16B);
- v_pmull2(v17, T8H, v0, v6, T16B);
-
- v_uzp1(v24, v20, v22, T8H);
- v_uzp2(v25, v20, v22, T8H);
- v_eor(v20, T16B, v24, v25);
-
- v_uzp1(v26, v16, v18, T8H);
- v_uzp2(v27, v16, v18, T8H);
- v_eor(v16, T16B, v26, v27);
-
- v_ushll2(v22, T4S, v20, T8H, 8);
- v_ushll(v20, T4S, v20, T4H, 8);
-
- v_ushll2(v18, T4S, v16, T8H, 8);
- v_ushll(v16, T4S, v16, T4H, 8);
-
- v_eor(v22, T16B, v23, v22);
- v_eor(v18, T16B, v19, v18);
- v_eor(v20, T16B, v21, v20);
- v_eor(v16, T16B, v17, v16);
-
- v_uzp1(v17, v16, v20, T2D);
- v_uzp2(v21, v16, v20, T2D);
- v_eor(v17, T16B, v17, v21);
-
- v_ushll2(v20, T2D, v17, T4S, 16);
- v_ushll(v16, T2D, v17, T2S, 16);
-
- v_eor(v20, T16B, v20, v22);
- v_eor(v16, T16B, v16, v18);
-
- v_uzp1(v17, v20, v16, T2D);
- v_uzp2(v21, v20, v16, T2D);
- v_eor(v28, T16B, v17, v21);
-
- v_pmull(v22, T8H, v1, v5, T8B);
- v_pmull(v20, T8H, v1, v7, T8B);
- v_pmull(v23, T8H, v1, v4, T8B);
- v_pmull(v21, T8H, v1, v6, T8B);
-
- v_pmull2(v18, T8H, v1, v5, T16B);
- v_pmull2(v16, T8H, v1, v7, T16B);
- v_pmull2(v19, T8H, v1, v4, T16B);
- v_pmull2(v17, T8H, v1, v6, T16B);
-
- v_ld1(v0, v1, T2D, buf, 32);
-
- v_uzp1(v24, v20, v22, T8H);
- v_uzp2(v25, v20, v22, T8H);
- v_eor(v20, T16B, v24, v25);
-
- v_uzp1(v26, v16, v18, T8H);
- v_uzp2(v27, v16, v18, T8H);
- v_eor(v16, T16B, v26, v27);
-
- v_ushll2(v22, T4S, v20, T8H, 8);
- v_ushll(v20, T4S, v20, T4H, 8);
-
- v_ushll2(v18, T4S, v16, T8H, 8);
- v_ushll(v16, T4S, v16, T4H, 8);
-
- v_eor(v22, T16B, v23, v22);
- v_eor(v18, T16B, v19, v18);
- v_eor(v20, T16B, v21, v20);
- v_eor(v16, T16B, v17, v16);
-
- v_uzp1(v17, v16, v20, T2D);
- v_uzp2(v21, v16, v20, T2D);
- v_eor(v16, T16B, v17, v21);
-
- v_ushll2(v20, T2D, v16, T4S, 16);
- v_ushll(v16, T2D, v16, T2S, 16);
-
- v_eor(v20, T16B, v22, v20);
- v_eor(v16, T16B, v16, v18);
-
- v_uzp1(v17, v20, v16, T2D);
- v_uzp2(v21, v20, v16, T2D);
- v_eor(v20, T16B, v17, v21);
-
- v_shl(v16, v28, T2D, 1);
- v_shl(v17, v20, T2D, 1);
-
- v_eor(v0, T16B, v0, v16);
- v_eor(v1, T16B, v1, v17);
+ pmull(v22, T8H, v0, v5, T8B);
+ pmull(v20, T8H, v0, v7, T8B);
+ pmull(v23, T8H, v0, v4, T8B);
+ pmull(v21, T8H, v0, v6, T8B);
+
+ pmull2(v18, T8H, v0, v5, T16B);
+ pmull2(v16, T8H, v0, v7, T16B);
+ pmull2(v19, T8H, v0, v4, T16B);
+ pmull2(v17, T8H, v0, v6, T16B);
+
+ uzp1(v24, v20, v22, T8H);
+ uzp2(v25, v20, v22, T8H);
+ eor(v20, T16B, v24, v25);
+
+ uzp1(v26, v16, v18, T8H);
+ uzp2(v27, v16, v18, T8H);
+ eor(v16, T16B, v26, v27);
+
+ ushll2(v22, T4S, v20, T8H, 8);
+ ushll(v20, T4S, v20, T4H, 8);
+
+ ushll2(v18, T4S, v16, T8H, 8);
+ ushll(v16, T4S, v16, T4H, 8);
+
+ eor(v22, T16B, v23, v22);
+ eor(v18, T16B, v19, v18);
+ eor(v20, T16B, v21, v20);
+ eor(v16, T16B, v17, v16);
+
+ uzp1(v17, v16, v20, T2D);
+ uzp2(v21, v16, v20, T2D);
+ eor(v17, T16B, v17, v21);
+
+ ushll2(v20, T2D, v17, T4S, 16);
+ ushll(v16, T2D, v17, T2S, 16);
+
+ eor(v20, T16B, v20, v22);
+ eor(v16, T16B, v16, v18);
+
+ uzp1(v17, v20, v16, T2D);
+ uzp2(v21, v20, v16, T2D);
+ eor(v28, T16B, v17, v21);
+
+ pmull(v22, T8H, v1, v5, T8B);
+ pmull(v20, T8H, v1, v7, T8B);
+ pmull(v23, T8H, v1, v4, T8B);
+ pmull(v21, T8H, v1, v6, T8B);
+
+ pmull2(v18, T8H, v1, v5, T16B);
+ pmull2(v16, T8H, v1, v7, T16B);
+ pmull2(v19, T8H, v1, v4, T16B);
+ pmull2(v17, T8H, v1, v6, T16B);
+
+ ld1(v0, v1, T2D, buf, 32);
+
+ uzp1(v24, v20, v22, T8H);
+ uzp2(v25, v20, v22, T8H);
+ eor(v20, T16B, v24, v25);
+
+ uzp1(v26, v16, v18, T8H);
+ uzp2(v27, v16, v18, T8H);
+ eor(v16, T16B, v26, v27);
+
+ ushll2(v22, T4S, v20, T8H, 8);
+ ushll(v20, T4S, v20, T4H, 8);
+
+ ushll2(v18, T4S, v16, T8H, 8);
+ ushll(v16, T4S, v16, T4H, 8);
+
+ eor(v22, T16B, v23, v22);
+ eor(v18, T16B, v19, v18);
+ eor(v20, T16B, v21, v20);
+ eor(v16, T16B, v17, v16);
+
+ uzp1(v17, v16, v20, T2D);
+ uzp2(v21, v16, v20, T2D);
+ eor(v16, T16B, v17, v21);
+
+ ushll2(v20, T2D, v16, T4S, 16);
+ ushll(v16, T2D, v16, T2S, 16);
+
+ eor(v20, T16B, v22, v20);
+ eor(v16, T16B, v16, v18);
+
+ uzp1(v17, v20, v16, T2D);
+ uzp2(v21, v20, v16, T2D);
+ eor(v20, T16B, v17, v21);
+
+ shl(v16, v28, T2D, 1);
+ shl(v17, v20, T2D, 1);
+
+ eor(v0, T16B, v0, v16);
+ eor(v1, T16B, v1, v17);
subs(len, len, 32);
br(Assembler::GE, L_fold);
mov(crc, 0);
- v_mov(tmp, v0, T1D, 0);
+ mov(tmp, v0, T1D, 0);
update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false);
update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true);
- v_mov(tmp, v0, T1D, 1);
+ mov(tmp, v0, T1D, 1);
update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false);
update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true);
- v_mov(tmp, v1, T1D, 0);
+ mov(tmp, v1, T1D, 0);
update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false);
update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true);
- v_mov(tmp, v1, T1D, 1);
+ mov(tmp, v1, T1D, 1);
update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false);
update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true);
@@ -2773,7 +2773,7 @@
decode_heap_oop_not_null(dst);
} else {
ldr(dst, src);
- }
+ }
}
void MacroAssembler::store_heap_oop(Address dst, Register src) {
diff -r c4af536cc45c -r e2941a6acc55 src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Mon Jul 21 17:23:40 2014 +0100
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Tue Jul 22 08:45:24 2014 -0400
@@ -37,6 +37,8 @@
class MacroAssembler: public Assembler {
friend class LIR_Assembler;
+ using Assembler::mov;
+
protected:
// Support for VM calls
diff -r c4af536cc45c -r e2941a6acc55 src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Mon Jul 21 17:23:40 2014 +0100
+++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Tue Jul 22 08:45:24 2014 -0400
@@ -1901,75 +1901,75 @@
__ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
- __ v_ld1(v0, __ T16B, from); // get 16 bytes of input
+ __ ld1(v0, __ T16B, from); // get 16 bytes of input
- __ v_ld1(v1, v2, v3, v4, __ T16B, key, 64);
- __ v_rev32(v1, __ T16B, v1);
- __ v_rev32(v2, __ T16B, v2);
- __ v_rev32(v3, __ T16B, v3);
- __ v_rev32(v4, __ T16B, v4);
- __ v_aese(v0, v1);
- __ v_aesmc(v0, v0);
- __ v_aese(v0, v2);
- __ v_aesmc(v0, v0);
- __ v_aese(v0, v3);
- __ v_aesmc(v0, v0);
- __ v_aese(v0, v4);
- __ v_aesmc(v0, v0);
+ __ ld1(v1, v2, v3, v4, __ T16B, key, 64);
+ __ rev32(v1, __ T16B, v1);
+ __ rev32(v2, __ T16B, v2);
+ __ rev32(v3, __ T16B, v3);
+ __ rev32(v4, __ T16B, v4);
+ __ aese(v0, v1);
+ __ aesmc(v0, v0);
+ __ aese(v0, v2);
+ __ aesmc(v0, v0);
+ __ aese(v0, v3);
+ __ aesmc(v0, v0);
+ __ aese(v0, v4);
+ __ aesmc(v0, v0);
- __ v_ld1(v1, v2, v3, v4, __ T16B, key, 64);
- __ v_rev32(v1, __ T16B, v1);
- __ v_rev32(v2, __ T16B, v2);
- __ v_rev32(v3, __ T16B, v3);
- __ v_rev32(v4, __ T16B, v4);
- __ v_aese(v0, v1);
- __ v_aesmc(v0, v0);
- __ v_aese(v0, v2);
- __ v_aesmc(v0, v0);
- __ v_aese(v0, v3);
- __ v_aesmc(v0, v0);
- __ v_aese(v0, v4);
- __ v_aesmc(v0, v0);
+ __ ld1(v1, v2, v3, v4, __ T16B, key, 64);
+ __ rev32(v1, __ T16B, v1);
+ __ rev32(v2, __ T16B, v2);
+ __ rev32(v3, __ T16B, v3);
+ __ rev32(v4, __ T16B, v4);
+ __ aese(v0, v1);
+ __ aesmc(v0, v0);
+ __ aese(v0, v2);
+ __ aesmc(v0, v0);
+ __ aese(v0, v3);
+ __ aesmc(v0, v0);
+ __ aese(v0, v4);
+ __ aesmc(v0, v0);
- __ v_ld1(v1, v2, __ T16B, key, 32);
- __ v_rev32(v1, __ T16B, v1);
- __ v_rev32(v2, __ T16B, v2);
+ __ ld1(v1, v2, __ T16B, key, 32);
+ __ rev32(v1, __ T16B, v1);
+ __ rev32(v2, __ T16B, v2);
__ cmpw(keylen, 44);
__ br(Assembler::EQ, L_doLast);
- __ v_aese(v0, v1);
- __ v_aesmc(v0, v0);
- __ v_aese(v0, v2);
- __ v_aesmc(v0, v0);
+ __ aese(v0, v1);
+ __ aesmc(v0, v0);
+ __ aese(v0, v2);
+ __ aesmc(v0, v0);
- __ v_ld1(v1, v2, __ T16B, key, 32);
- __ v_rev32(v1, __ T16B, v1);
- __ v_rev32(v2, __ T16B, v2);
+ __ ld1(v1, v2, __ T16B, key, 32);
+ __ rev32(v1, __ T16B, v1);
+ __ rev32(v2, __ T16B, v2);
__ cmpw(keylen, 52);
__ br(Assembler::EQ, L_doLast);
- __ v_aese(v0, v1);
- __ v_aesmc(v0, v0);
- __ v_aese(v0, v2);
- __ v_aesmc(v0, v0);
+ __ aese(v0, v1);
+ __ aesmc(v0, v0);
+ __ aese(v0, v2);
+ __ aesmc(v0, v0);
- __ v_ld1(v1, v2, __ T16B, key, 32);
- __ v_rev32(v1, __ T16B, v1);
- __ v_rev32(v2, __ T16B, v2);
+ __ ld1(v1, v2, __ T16B, key, 32);
+ __ rev32(v1, __ T16B, v1);
+ __ rev32(v2, __ T16B, v2);
__ BIND(L_doLast);
- __ v_aese(v0, v1);
- __ v_aesmc(v0, v0);
- __ v_aese(v0, v2);
+ __ aese(v0, v1);
+ __ aesmc(v0, v0);
+ __ aese(v0, v2);
- __ v_ld1(v1, __ T16B, key);
- __ v_rev32(v1, __ T16B, v1);
- __ v_eor(v0, __ T16B, v0, v1);
+ __ ld1(v1, __ T16B, key);
+ __ rev32(v1, __ T16B, v1);
+ __ eor(v0, __ T16B, v0, v1);
- __ v_st1(v0, __ T16B, to);
+ __ st1(v0, __ T16B, to);
__ mov(r0, 0);
@@ -2002,76 +2002,76 @@
__ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
- __ v_ld1(v0, __ T16B, from); // get 16 bytes of input
+ __ ld1(v0, __ T16B, from); // get 16 bytes of input
- __ v_ld1(v5, __ T16B, key, 16);
- __ v_rev32(v5, __ T16B, v5);
+ __ ld1(v5, __ T16B, key, 16);
+ __ rev32(v5, __ T16B, v5);
- __ v_ld1(v1, v2, v3, v4, __ T16B, key, 64);
- __ v_rev32(v1, __ T16B, v1);
- __ v_rev32(v2, __ T16B, v2);
- __ v_rev32(v3, __ T16B, v3);
- __ v_rev32(v4, __ T16B, v4);
- __ v_aesd(v0, v1);
- __ v_aesimc(v0, v0);
- __ v_aesd(v0, v2);
- __ v_aesimc(v0, v0);
- __ v_aesd(v0, v3);
- __ v_aesimc(v0, v0);
- __ v_aesd(v0, v4);
- __ v_aesimc(v0, v0);
+ __ ld1(v1, v2, v3, v4, __ T16B, key, 64);
+ __ rev32(v1, __ T16B, v1);
+ __ rev32(v2, __ T16B, v2);
+ __ rev32(v3, __ T16B, v3);
+ __ rev32(v4, __ T16B, v4);
+ __ aesd(v0, v1);
+ __ aesimc(v0, v0);
+ __ aesd(v0, v2);
+ __ aesimc(v0, v0);
+ __ aesd(v0, v3);
+ __ aesimc(v0, v0);
+ __ aesd(v0, v4);
+ __ aesimc(v0, v0);
- __ v_ld1(v1, v2, v3, v4, __ T16B, key, 64);
- __ v_rev32(v1, __ T16B, v1);
- __ v_rev32(v2, __ T16B, v2);
- __ v_rev32(v3, __ T16B, v3);
- __ v_rev32(v4, __ T16B, v4);
- __ v_aesd(v0, v1);
- __ v_aesimc(v0, v0);
- __ v_aesd(v0, v2);
- __ v_aesimc(v0, v0);
- __ v_aesd(v0, v3);
- __ v_aesimc(v0, v0);
- __ v_aesd(v0, v4);
- __ v_aesimc(v0, v0);
+ __ ld1(v1, v2, v3, v4, __ T16B, key, 64);
+ __ rev32(v1, __ T16B, v1);
+ __ rev32(v2, __ T16B, v2);
+ __ rev32(v3, __ T16B, v3);
+ __ rev32(v4, __ T16B, v4);
+ __ aesd(v0, v1);
+ __ aesimc(v0, v0);
+ __ aesd(v0, v2);
+ __ aesimc(v0, v0);
+ __ aesd(v0, v3);
+ __ aesimc(v0, v0);
+ __ aesd(v0, v4);
+ __ aesimc(v0, v0);
- __ v_ld1(v1, v2, __ T16B, key, 32);
- __ v_rev32(v1, __ T16B, v1);
- __ v_rev32(v2, __ T16B, v2);
+ __ ld1(v1, v2, __ T16B, key, 32);
+ __ rev32(v1, __ T16B, v1);
+ __ rev32(v2, __ T16B, v2);
__ cmpw(keylen, 44);
__ br(Assembler::EQ, L_doLast);
- __ v_aesd(v0, v1);
- __ v_aesimc(v0, v0);
- __ v_aesd(v0, v2);
- __ v_aesimc(v0, v0);
+ __ aesd(v0, v1);
+ __ aesimc(v0, v0);
+ __ aesd(v0, v2);
+ __ aesimc(v0, v0);
- __ v_ld1(v1, v2, __ T16B, key, 32);
- __ v_rev32(v1, __ T16B, v1);
- __ v_rev32(v2, __ T16B, v2);
+ __ ld1(v1, v2, __ T16B, key, 32);
+ __ rev32(v1, __ T16B, v1);
+ __ rev32(v2, __ T16B, v2);
__ cmpw(keylen, 52);
__ br(Assembler::EQ, L_doLast);
- __ v_aesd(v0, v1);
- __ v_aesimc(v0, v0);
- __ v_aesd(v0, v2);
- __ v_aesimc(v0, v0);
+ __ aesd(v0, v1);
+ __ aesimc(v0, v0);
+ __ aesd(v0, v2);
+ __ aesimc(v0, v0);
- __ v_ld1(v1, v2, __ T16B, key, 32);
- __ v_rev32(v1, __ T16B, v1);
- __ v_rev32(v2, __ T16B, v2);
+ __ ld1(v1, v2, __ T16B, key, 32);
+ __ rev32(v1, __ T16B, v1);
+ __ rev32(v2, __ T16B, v2);
__ BIND(L_doLast);
- __ v_aesd(v0, v1);
- __ v_aesimc(v0, v0);
- __ v_aesd(v0, v2);
+ __ aesd(v0, v1);
+ __ aesimc(v0, v0);
+ __ aesd(v0, v2);
- __ v_eor(v0, __ T16B, v0, v5);
+ __ eor(v0, __ T16B, v0, v5);
- __ v_st1(v0, __ T16B, to);
+ __ st1(v0, __ T16B, to);
__ mov(r0, 0);
@@ -2114,65 +2114,65 @@
__ mov(rscratch1, len_reg);
__ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
- __ v_ld1(v0, __ T16B, rvec);
+ __ ld1(v0, __ T16B, rvec);
__ cmpw(keylen, 52);
__ br(Assembler::CC, L_loadkeys_44);
__ br(Assembler::EQ, L_loadkeys_52);
- __ v_ld1(v17, v18, __ T16B, key, 32);
- __ v_rev32(v17, __ T16B, v17);
- __ v_rev32(v18, __ T16B, v18);
+ __ ld1(v17, v18, __ T16B, key, 32);
+ __ rev32(v17, __ T16B, v17);
+ __ rev32(v18, __ T16B, v18);
__ BIND(L_loadkeys_52);
- __ v_ld1(v19, v20, __ T16B, key, 32);
- __ v_rev32(v19, __ T16B, v19);
- __ v_rev32(v20, __ T16B, v20);
+ __ ld1(v19, v20, __ T16B, key, 32);
+ __ rev32(v19, __ T16B, v19);
+ __ rev32(v20, __ T16B, v20);
__ BIND(L_loadkeys_44);
- __ v_ld1(v21, v22, v23, v24, __ T16B, key, 64);
- __ v_rev32(v21, __ T16B, v21);
- __ v_rev32(v22, __ T16B, v22);
- __ v_rev32(v23, __ T16B, v23);
- __ v_rev32(v24, __ T16B, v24);
- __ v_ld1(v25, v26, v27, v28, __ T16B, key, 64);
- __ v_rev32(v25, __ T16B, v25);
- __ v_rev32(v26, __ T16B, v26);
- __ v_rev32(v27, __ T16B, v27);
- __ v_rev32(v28, __ T16B, v28);
- __ v_ld1(v29, v30, v31, __ T16B, key);
- __ v_rev32(v29, __ T16B, v29);
- __ v_rev32(v30, __ T16B, v30);
- __ v_rev32(v31, __ T16B, v31);
+ __ ld1(v21, v22, v23, v24, __ T16B, key, 64);
+ __ rev32(v21, __ T16B, v21);
+ __ rev32(v22, __ T16B, v22);
+ __ rev32(v23, __ T16B, v23);
+ __ rev32(v24, __ T16B, v24);
+ __ ld1(v25, v26, v27, v28, __ T16B, key, 64);
+ __ rev32(v25, __ T16B, v25);
+ __ rev32(v26, __ T16B, v26);
+ __ rev32(v27, __ T16B, v27);
+ __ rev32(v28, __ T16B, v28);
+ __ ld1(v29, v30, v31, __ T16B, key);
+ __ rev32(v29, __ T16B, v29);
+ __ rev32(v30, __ T16B, v30);
+ __ rev32(v31, __ T16B, v31);
__ BIND(L_aes_loop);
- __ v_ld1(v1, __ T16B, from, 16);
- __ v_eor(v0, __ T16B, v0, v1);
+ __ ld1(v1, __ T16B, from, 16);
+ __ eor(v0, __ T16B, v0, v1);
__ br(Assembler::CC, L_rounds_44);
__ br(Assembler::EQ, L_rounds_52);
- __ v_aese(v0, v17); __ v_aesmc(v0, v0);
- __ v_aese(v0, v18); __ v_aesmc(v0, v0);
+ __ aese(v0, v17); __ aesmc(v0, v0);
+ __ aese(v0, v18); __ aesmc(v0, v0);
__ BIND(L_rounds_52);
- __ v_aese(v0, v19); __ v_aesmc(v0, v0);
- __ v_aese(v0, v20); __ v_aesmc(v0, v0);
+ __ aese(v0, v19); __ aesmc(v0, v0);
+ __ aese(v0, v20); __ aesmc(v0, v0);
__ BIND(L_rounds_44);
- __ v_aese(v0, v21); __ v_aesmc(v0, v0);
- __ v_aese(v0, v22); __ v_aesmc(v0, v0);
- __ v_aese(v0, v23); __ v_aesmc(v0, v0);
- __ v_aese(v0, v24); __ v_aesmc(v0, v0);
- __ v_aese(v0, v25); __ v_aesmc(v0, v0);
- __ v_aese(v0, v26); __ v_aesmc(v0, v0);
- __ v_aese(v0, v27); __ v_aesmc(v0, v0);
- __ v_aese(v0, v28); __ v_aesmc(v0, v0);
- __ v_aese(v0, v29); __ v_aesmc(v0, v0);
- __ v_aese(v0, v30);
- __ v_eor(v0, __ T16B, v0, v31);
+ __ aese(v0, v21); __ aesmc(v0, v0);
+ __ aese(v0, v22); __ aesmc(v0, v0);
+ __ aese(v0, v23); __ aesmc(v0, v0);
+ __ aese(v0, v24); __ aesmc(v0, v0);
+ __ aese(v0, v25); __ aesmc(v0, v0);
+ __ aese(v0, v26); __ aesmc(v0, v0);
+ __ aese(v0, v27); __ aesmc(v0, v0);
+ __ aese(v0, v28); __ aesmc(v0, v0);
+ __ aese(v0, v29); __ aesmc(v0, v0);
+ __ aese(v0, v30);
+ __ eor(v0, __ T16B, v0, v31);
- __ v_st1(v0, __ T16B, to, 16);
+ __ st1(v0, __ T16B, to, 16);
__ sub(len_reg, len_reg, 16);
__ cbnz(len_reg, L_aes_loop);
- __ v_st1(v0, __ T16B, rvec);
+ __ st1(v0, __ T16B, rvec);
__ mov(r0, rscratch2);
@@ -2213,70 +2213,70 @@
__ mov(rscratch2, len_reg);
__ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
- __ v_ld1(v2, __ T16B, rvec);
+ __ ld1(v2, __ T16B, rvec);
- __ v_ld1(v31, __ T16B, key, 16);
- __ v_rev32(v31, __ T16B, v31);
+ __ ld1(v31, __ T16B, key, 16);
+ __ rev32(v31, __ T16B, v31);
__ cmpw(keylen, 52);
__ br(Assembler::CC, L_loadkeys_44);
__ br(Assembler::EQ, L_loadkeys_52);
- __ v_ld1(v17, v18, __ T16B, key, 32);
- __ v_rev32(v17, __ T16B, v17);
- __ v_rev32(v18, __ T16B, v18);
+ __ ld1(v17, v18, __ T16B, key, 32);
+ __ rev32(v17, __ T16B, v17);
+ __ rev32(v18, __ T16B, v18);
__ BIND(L_loadkeys_52);
- __ v_ld1(v19, v20, __ T16B, key, 32);
- __ v_rev32(v19, __ T16B, v19);
- __ v_rev32(v20, __ T16B, v20);
+ __ ld1(v19, v20, __ T16B, key, 32);
+ __ rev32(v19, __ T16B, v19);
+ __ rev32(v20, __ T16B, v20);
__ BIND(L_loadkeys_44);
- __ v_ld1(v21, v22, v23, v24, __ T16B, key, 64);
- __ v_rev32(v21, __ T16B, v21);
- __ v_rev32(v22, __ T16B, v22);
- __ v_rev32(v23, __ T16B, v23);
- __ v_rev32(v24, __ T16B, v24);
- __ v_ld1(v25, v26, v27, v28, __ T16B, key, 64);
- __ v_rev32(v25, __ T16B, v25);
- __ v_rev32(v26, __ T16B, v26);
- __ v_rev32(v27, __ T16B, v27);
- __ v_rev32(v28, __ T16B, v28);
- __ v_ld1(v29, v30, __ T16B, key);
- __ v_rev32(v29, __ T16B, v29);
- __ v_rev32(v30, __ T16B, v30);
+ __ ld1(v21, v22, v23, v24, __ T16B, key, 64);
+ __ rev32(v21, __ T16B, v21);
+ __ rev32(v22, __ T16B, v22);
+ __ rev32(v23, __ T16B, v23);
+ __ rev32(v24, __ T16B, v24);
+ __ ld1(v25, v26, v27, v28, __ T16B, key, 64);
+ __ rev32(v25, __ T16B, v25);
+ __ rev32(v26, __ T16B, v26);
+ __ rev32(v27, __ T16B, v27);
+ __ rev32(v28, __ T16B, v28);
+ __ ld1(v29, v30, __ T16B, key);
+ __ rev32(v29, __ T16B, v29);
+ __ rev32(v30, __ T16B, v30);
__ BIND(L_aes_loop);
- __ v_ld1(v0, __ T16B, from, 16);
- __ v_orr(v1, __ T16B, v0, v0);
+ __ ld1(v0, __ T16B, from, 16);
+ __ orr(v1, __ T16B, v0, v0);
__ br(Assembler::CC, L_rounds_44);
__ br(Assembler::EQ, L_rounds_52);
- __ v_aesd(v0, v17); __ v_aesimc(v0, v0);
- __ v_aesd(v0, v17); __ v_aesimc(v0, v0);
+ __ aesd(v0, v17); __ aesimc(v0, v0);
+ __ aesd(v0, v17); __ aesimc(v0, v0);
__ BIND(L_rounds_52);
- __ v_aesd(v0, v19); __ v_aesimc(v0, v0);
- __ v_aesd(v0, v20); __ v_aesimc(v0, v0);
+ __ aesd(v0, v19); __ aesimc(v0, v0);
+ __ aesd(v0, v20); __ aesimc(v0, v0);
__ BIND(L_rounds_44);
- __ v_aesd(v0, v21); __ v_aesimc(v0, v0);
- __ v_aesd(v0, v22); __ v_aesimc(v0, v0);
- __ v_aesd(v0, v23); __ v_aesimc(v0, v0);
- __ v_aesd(v0, v24); __ v_aesimc(v0, v0);
- __ v_aesd(v0, v25); __ v_aesimc(v0, v0);
- __ v_aesd(v0, v26); __ v_aesimc(v0, v0);
- __ v_aesd(v0, v27); __ v_aesimc(v0, v0);
- __ v_aesd(v0, v28); __ v_aesimc(v0, v0);
- __ v_aesd(v0, v29); __ v_aesimc(v0, v0);
- __ v_aesd(v0, v30);
- __ v_eor(v0, __ T16B, v0, v31);
- __ v_eor(v0, __ T16B, v0, v2);
+ __ aesd(v0, v21); __ aesimc(v0, v0);
+ __ aesd(v0, v22); __ aesimc(v0, v0);
+ __ aesd(v0, v23); __ aesimc(v0, v0);
+ __ aesd(v0, v24); __ aesimc(v0, v0);
+ __ aesd(v0, v25); __ aesimc(v0, v0);
+ __ aesd(v0, v26); __ aesimc(v0, v0);
+ __ aesd(v0, v27); __ aesimc(v0, v0);
+ __ aesd(v0, v28); __ aesimc(v0, v0);
+ __ aesd(v0, v29); __ aesimc(v0, v0);
+ __ aesd(v0, v30);
+ __ eor(v0, __ T16B, v0, v31);
+ __ eor(v0, __ T16B, v0, v2);
- __ v_st1(v0, __ T16B, to, 16);
- __ v_orr(v2, __ T16B, v1, v1);
+ __ st1(v0, __ T16B, to, 16);
+ __ orr(v2, __ T16B, v1, v1);
__ sub(len_reg, len_reg, 16);
__ cbnz(len_reg, L_aes_loop);
- __ v_st1(v2, __ T16B, rvec);
+ __ st1(v2, __ T16B, rvec);
__ mov(r0, rscratch2);
-------------- next part --------------
# HG changeset patch
# User aph
# Date 1406043669 14400
# Tue Jul 22 11:41:09 2014 -0400
# Node ID edc520ea0515eec648bed444d3da5965b213f1ff
# Parent e2941a6acc555c7736128d678c59033188b9bafe
Reorganize vector instructions.
diff -r e2941a6acc55 -r edc520ea0515 src/cpu/aarch64/vm/assembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/assembler_aarch64.cpp Tue Jul 22 08:45:24 2014 -0400
+++ b/src/cpu/aarch64/vm/assembler_aarch64.cpp Tue Jul 22 11:41:09 2014 -0400
@@ -1191,18 +1191,11 @@
}
#ifndef PRODUCT
- {
- address PC = __ pc();
- __ bl(__ pc()+(1<<27)-4);
- NativeCall* call = nativeCall_at(PC);
- ptrdiff_t offset = call->destination()-PC;
- assert(offset == (1<<27)-4, "broken branch coding");
- PC = __ pc();
- __ bl(__ pc()-(1<<27));
- call = nativeCall_at(PC);
- offset = call->destination()-PC;
- assert(offset == -(1<<27), "broken branch coding");
- }
+
+ address PC = __ pc();
+ __ ld1(v0, __ T16B, Address(r16)); // No offset
+ __ ld1(v0, __ T16B, __ post(r16, 0)); // Post-index
+ __ ld1(v0, __ T16B, Address(r16, r17)); //
#endif // PRODUCT
diff -r e2941a6acc55 -r edc520ea0515 src/cpu/aarch64/vm/assembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp Tue Jul 22 08:45:24 2014 -0400
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp Tue Jul 22 11:41:09 2014 -0400
@@ -417,15 +417,16 @@
}
}
- Register base() {
- guarantee((_mode == base_plus_offset | _mode == base_plus_offset_reg),
+ Register base() const {
+ guarantee((_mode == base_plus_offset | _mode == base_plus_offset_reg
+ | _mode == post),
"wrong mode");
return _base;
}
- long offset() {
+ long offset() const {
return _offset;
}
- Register index() {
+ Register index() const {
return _index;
}
mode getMode() const {
@@ -1847,7 +1848,7 @@
* We just use FloatRegister in the following. They are exactly the same
* as SIMD registers.
*/
-public:
+ public:
enum SIMD_Arrangement {
T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D
@@ -1857,92 +1858,74 @@
S32, D64, Q128
};
-void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int op1, int op2)
-{
+
+ private:
+
+ void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int op1, int op2) {
starti;
f(0,31), f((int)T & 1, 30);
f(op1, 29, 21), f(0, 20, 16), f(op2, 15, 12);
f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0);
-}
-void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
- int imm, int op1, int op2)
-{
+ }
+ void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
+ int imm, int op1, int op2) {
starti;
f(0,31), f((int)T & 1, 30);
f(op1 | 0b100, 29, 21), f(0b11111, 20, 16), f(op2, 15, 12);
f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0);
-}
-void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
- Register Xm, int op1, int op2)
-{
+ }
+ void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
+ Register Xm, int op1, int op2) {
starti;
f(0,31), f((int)T & 1, 30);
f(op1 | 0b100, 29, 21), rf(Xm, 16), f(op2, 15, 12);
f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0);
-}
+ }
-#define INSN1(NAME, op1, op2) \
- void NAME(FloatRegister Vt, SIMD_Arrangement T, Register Xn) { \
- ld_st(Vt, T, Xn, op1, op2); \
- } \
- void NAME(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int imm) { \
- ld_st(Vt, T, Xn, imm, op1, op2); \
- } \
- void NAME(FloatRegister Vt, SIMD_Arrangement T, Register Xn, Register Xm) { \
- ld_st(Vt, T, Xn, Xm, op1, op2); \
+ void ld_st(FloatRegister Vt, SIMD_Arrangement T, Address a, int op1, int op2) {
+ switch (a.getMode()) {
+ case Address::base_plus_offset:
+ guarantee(a.offset() == 0, "no offset allowed here");
+ ld_st(Vt, T, a.base(), op1, op2);
+ break;
+ case Address::post:
+ ld_st(Vt, T, a.base(), a.offset(), op1, op2);
+ break;
+ case Address::base_plus_offset_reg:
+ ld_st(Vt, T, a.base(), a.index(), op1, op2);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ }
+
+ public:
+
+#define INSN1(NAME, op1, op2) \
+ void NAME(FloatRegister Vt, SIMD_Arrangement T, const Address &a) { \
+ ld_st(Vt, T, a, op1, op2); \
+ }
+
+#define INSN2(NAME, op1, op2) \
+ void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, const Address &a) { \
+ assert(Vt->successor() == Vt2, "Registers must be ordered"); \
+ ld_st(Vt, T, a, op1, op2); \
}
-#define INSN2(NAME, op1, op2) \
- void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, Register Xn) { \
- assert(Vt->successor() == Vt2, "Registers must be ordered"); \
- ld_st(Vt, T, Xn, op1, op2); \
- } \
- void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, Register Xn, \
- int imm) { \
- assert(Vt->successor() == Vt2, "Registers must be ordered"); \
- ld_st(Vt, T, Xn, imm, op1, op2); \
- } \
- void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, Register Xn, \
- Register Xm) { \
- assert(Vt->successor() == Vt2, "Registers must be ordered"); \
- ld_st(Vt, T, Xn, Xm, op1, op2); \
+
+#define INSN3(NAME, op1, op2) \
+ void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \
+ SIMD_Arrangement T, const Address &a) { \
+ assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3, \
+ "Registers must be ordered"); \
+ ld_st(Vt, T, a, op1, op2); \
}
-#define INSN3(NAME, op1, op2) \
- void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \
- SIMD_Arrangement T, Register Xn) { \
- assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3, \
- "Registers must be ordered"); \
- ld_st(Vt, T, Xn, op1, op2); \
- } \
- void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \
- SIMD_Arrangement T, Register Xn, int imm) { \
- assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3, \
- "Registers must be ordered"); \
- ld_st(Vt, T, Xn, imm, op1, op2); \
- } \
- void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \
- SIMD_Arrangement T, Register Xn, Register Xm) { \
- assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3, \
- "Registers must be ordered"); \
- ld_st(Vt, T, Xn, Xm, op1, op2); \
- }
-#define INSN4(NAME, op1, op2) \
- void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \
- FloatRegister Vt4, SIMD_Arrangement T, Register Xn) { \
- assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 && \
- Vt3->successor() == Vt4, "Registers must be ordered"); \
- ld_st(Vt, T, Xn, op1, op2); \
- } \
- void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \
- FloatRegister Vt4, SIMD_Arrangement T, Register Xn, int imm) { \
- assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 && \
- Vt3->successor() == Vt4, "Registers must be ordered"); \
- ld_st(Vt, T, Xn, imm, op1, op2); \
- } \
- void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \
- FloatRegister Vt4, SIMD_Arrangement T, Register Xn, Register Xm) { \
- assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 && \
- Vt3->successor() == Vt4, "Registers must be ordered"); \
- ld_st(Vt, T, Xn, Xm, op1, op2); \
+
+#define INSN4(NAME, op1, op2) \
+ void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \
+ FloatRegister Vt4, SIMD_Arrangement T, const Address &a) { \
+ assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 && \
+ Vt3->successor() == Vt4, "Registers must be ordered"); \
+ ld_st(Vt, T, a, op1, op2); \
}
INSN1(ld1, 0b001100010, 0b0111);
diff -r e2941a6acc55 -r edc520ea0515 src/cpu/aarch64/vm/icache_aarch64.cpp
--- a/src/cpu/aarch64/vm/icache_aarch64.cpp Tue Jul 22 08:45:24 2014 -0400
+++ b/src/cpu/aarch64/vm/icache_aarch64.cpp Tue Jul 22 11:41:09 2014 -0400
@@ -32,7 +32,10 @@
void ICacheStubGenerator::generate_icache_flush(
ICache::flush_icache_stub_t* flush_icache_stub) {
- aarch64TestHook();
// Give anyone who calls this a surprise
*flush_icache_stub = (ICache::flush_icache_stub_t)NULL;
}
+
+void ICache::initialize() {
+ aarch64TestHook();
+}
diff -r e2941a6acc55 -r edc520ea0515 src/cpu/aarch64/vm/icache_aarch64.hpp
--- a/src/cpu/aarch64/vm/icache_aarch64.hpp Tue Jul 22 08:45:24 2014 -0400
+++ b/src/cpu/aarch64/vm/icache_aarch64.hpp Tue Jul 22 11:41:09 2014 -0400
@@ -33,7 +33,7 @@
class ICache : public AbstractICache {
public:
- static void initialize() {}
+ static void initialize();
static void invalidate_word(address addr) {
__clear_cache((char *)addr, (char *)(addr + 3));
}
diff -r e2941a6acc55 -r edc520ea0515 src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Tue Jul 22 08:45:24 2014 -0400
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Tue Jul 22 11:41:09 2014 -0400
@@ -2245,11 +2245,11 @@
add(tmp, table0, 4*256*sizeof(juint)); // Point at the Neon constants
- ld1(v0, v1, T2D, buf, 32);
- ld1r(v4, T2D, tmp, 8);
- ld1r(v5, T2D, tmp, 8);
- ld1r(v6, T2D, tmp, 8);
- ld1r(v7, T2D, tmp, 8);
+ ld1(v0, v1, T2D, post(buf, 32));
+ ld1r(v4, T2D, post(tmp, 8));
+ ld1r(v5, T2D, post(tmp, 8));
+ ld1r(v6, T2D, post(tmp, 8));
+ ld1r(v7, T2D, post(tmp, 8));
mov(v16, T4S, 0, crc);
eor(v0, T16B, v0, v16);
@@ -2309,7 +2309,7 @@
pmull2(v19, T8H, v1, v4, T16B);
pmull2(v17, T8H, v1, v6, T16B);
- ld1(v0, v1, T2D, buf, 32);
+ ld1(v0, v1, T2D, post(buf, 32));
uzp1(v24, v20, v22, T8H);
uzp2(v25, v20, v22, T8H);
diff -r e2941a6acc55 -r edc520ea0515 src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Tue Jul 22 08:45:24 2014 -0400
+++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Tue Jul 22 11:41:09 2014 -0400
@@ -1903,7 +1903,7 @@
__ ld1(v0, __ T16B, from); // get 16 bytes of input
- __ ld1(v1, v2, v3, v4, __ T16B, key, 64);
+ __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64));
__ rev32(v1, __ T16B, v1);
__ rev32(v2, __ T16B, v2);
__ rev32(v3, __ T16B, v3);
@@ -1917,7 +1917,7 @@
__ aese(v0, v4);
__ aesmc(v0, v0);
- __ ld1(v1, v2, v3, v4, __ T16B, key, 64);
+ __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64));
__ rev32(v1, __ T16B, v1);
__ rev32(v2, __ T16B, v2);
__ rev32(v3, __ T16B, v3);
@@ -1931,7 +1931,7 @@
__ aese(v0, v4);
__ aesmc(v0, v0);
- __ ld1(v1, v2, __ T16B, key, 32);
+ __ ld1(v1, v2, __ T16B, __ post(key, 32));
__ rev32(v1, __ T16B, v1);
__ rev32(v2, __ T16B, v2);
@@ -1943,7 +1943,7 @@
__ aese(v0, v2);
__ aesmc(v0, v0);
- __ ld1(v1, v2, __ T16B, key, 32);
+ __ ld1(v1, v2, __ T16B, __ post(key, 32));
__ rev32(v1, __ T16B, v1);
__ rev32(v2, __ T16B, v2);
@@ -1955,7 +1955,7 @@
__ aese(v0, v2);
__ aesmc(v0, v0);
- __ ld1(v1, v2, __ T16B, key, 32);
+ __ ld1(v1, v2, __ T16B, __ post(key, 32));
__ rev32(v1, __ T16B, v1);
__ rev32(v2, __ T16B, v2);
@@ -2004,10 +2004,10 @@
__ ld1(v0, __ T16B, from); // get 16 bytes of input
- __ ld1(v5, __ T16B, key, 16);
+ __ ld1(v5, __ T16B, __ post(key, 16));
__ rev32(v5, __ T16B, v5);
- __ ld1(v1, v2, v3, v4, __ T16B, key, 64);
+ __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64));
__ rev32(v1, __ T16B, v1);
__ rev32(v2, __ T16B, v2);
__ rev32(v3, __ T16B, v3);
@@ -2021,7 +2021,7 @@
__ aesd(v0, v4);
__ aesimc(v0, v0);
- __ ld1(v1, v2, v3, v4, __ T16B, key, 64);
+ __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64));
__ rev32(v1, __ T16B, v1);
__ rev32(v2, __ T16B, v2);
__ rev32(v3, __ T16B, v3);
@@ -2035,7 +2035,7 @@
__ aesd(v0, v4);
__ aesimc(v0, v0);
- __ ld1(v1, v2, __ T16B, key, 32);
+ __ ld1(v1, v2, __ T16B, __ post(key, 32));
__ rev32(v1, __ T16B, v1);
__ rev32(v2, __ T16B, v2);
@@ -2047,7 +2047,7 @@
__ aesd(v0, v2);
__ aesimc(v0, v0);
- __ ld1(v1, v2, __ T16B, key, 32);
+ __ ld1(v1, v2, __ T16B, __ post(key, 32));
__ rev32(v1, __ T16B, v1);
__ rev32(v2, __ T16B, v2);
@@ -2059,7 +2059,7 @@
__ aesd(v0, v2);
__ aesimc(v0, v0);
- __ ld1(v1, v2, __ T16B, key, 32);
+ __ ld1(v1, v2, __ T16B, __ post(key, 32));
__ rev32(v1, __ T16B, v1);
__ rev32(v2, __ T16B, v2);
@@ -2120,20 +2120,20 @@
__ br(Assembler::CC, L_loadkeys_44);
__ br(Assembler::EQ, L_loadkeys_52);
- __ ld1(v17, v18, __ T16B, key, 32);
+ __ ld1(v17, v18, __ T16B, __ post(key, 32));
__ rev32(v17, __ T16B, v17);
__ rev32(v18, __ T16B, v18);
__ BIND(L_loadkeys_52);
- __ ld1(v19, v20, __ T16B, key, 32);
+ __ ld1(v19, v20, __ T16B, __ post(key, 32));
__ rev32(v19, __ T16B, v19);
__ rev32(v20, __ T16B, v20);
__ BIND(L_loadkeys_44);
- __ ld1(v21, v22, v23, v24, __ T16B, key, 64);
+ __ ld1(v21, v22, v23, v24, __ T16B, __ post(key, 64));
__ rev32(v21, __ T16B, v21);
__ rev32(v22, __ T16B, v22);
__ rev32(v23, __ T16B, v23);
__ rev32(v24, __ T16B, v24);
- __ ld1(v25, v26, v27, v28, __ T16B, key, 64);
+ __ ld1(v25, v26, v27, v28, __ T16B, __ post(key, 64));
__ rev32(v25, __ T16B, v25);
__ rev32(v26, __ T16B, v26);
__ rev32(v27, __ T16B, v27);
@@ -2144,7 +2144,7 @@
__ rev32(v31, __ T16B, v31);
__ BIND(L_aes_loop);
- __ ld1(v1, __ T16B, from, 16);
+ __ ld1(v1, __ T16B, __ post(from, 16));
__ eor(v0, __ T16B, v0, v1);
__ br(Assembler::CC, L_rounds_44);
@@ -2168,7 +2168,7 @@
__ aese(v0, v30);
__ eor(v0, __ T16B, v0, v31);
- __ st1(v0, __ T16B, to, 16);
+ __ st1(v0, __ T16B, __ post(to, 16));
__ sub(len_reg, len_reg, 16);
__ cbnz(len_reg, L_aes_loop);
@@ -2215,27 +2215,27 @@
__ ld1(v2, __ T16B, rvec);
- __ ld1(v31, __ T16B, key, 16);
+ __ ld1(v31, __ T16B, __ post(key, 16));
__ rev32(v31, __ T16B, v31);
__ cmpw(keylen, 52);
__ br(Assembler::CC, L_loadkeys_44);
__ br(Assembler::EQ, L_loadkeys_52);
- __ ld1(v17, v18, __ T16B, key, 32);
+ __ ld1(v17, v18, __ T16B, __ post(key, 32));
__ rev32(v17, __ T16B, v17);
__ rev32(v18, __ T16B, v18);
__ BIND(L_loadkeys_52);
- __ ld1(v19, v20, __ T16B, key, 32);
+ __ ld1(v19, v20, __ T16B, __ post(key, 32));
__ rev32(v19, __ T16B, v19);
__ rev32(v20, __ T16B, v20);
__ BIND(L_loadkeys_44);
- __ ld1(v21, v22, v23, v24, __ T16B, key, 64);
+ __ ld1(v21, v22, v23, v24, __ T16B, __ post(key, 64));
__ rev32(v21, __ T16B, v21);
__ rev32(v22, __ T16B, v22);
__ rev32(v23, __ T16B, v23);
__ rev32(v24, __ T16B, v24);
- __ ld1(v25, v26, v27, v28, __ T16B, key, 64);
+ __ ld1(v25, v26, v27, v28, __ T16B, __ post(key, 64));
__ rev32(v25, __ T16B, v25);
__ rev32(v26, __ T16B, v26);
__ rev32(v27, __ T16B, v27);
@@ -2245,7 +2245,7 @@
__ rev32(v30, __ T16B, v30);
__ BIND(L_aes_loop);
- __ ld1(v0, __ T16B, from, 16);
+ __ ld1(v0, __ T16B, __ post(from, 16));
__ orr(v1, __ T16B, v0, v0);
__ br(Assembler::CC, L_rounds_44);
@@ -2270,7 +2270,7 @@
__ eor(v0, __ T16B, v0, v31);
__ eor(v0, __ T16B, v0, v2);
- __ st1(v0, __ T16B, to, 16);
+ __ st1(v0, __ T16B, __ post(to, 16));
__ orr(v2, __ T16B, v1, v1);
__ sub(len_reg, len_reg, 16);
More information about the aarch64-port-dev
mailing list