[aarch64-port-dev ] RFR: Add support for A53 multiply accumulate (take 2)

Tue Dec 2 15:19:00 UTC 2014

Hi,

The following patches overrides madd, msub, maddw, msubw, smaddl, smsubl, umaddl and umsubl and adds a nop if Ra != zr (ie it is doing a genuine mul accumulate, not just a mul).

This can be bypassed if it is known that the nop is not necessary. I have done this in three cases, 32 bit and 64 bit mod functions and some array indexing in InterpreterMacroAssembler::profile_switch_case which are the only uses of these instructions outside of C2.

OK?
Ed.

--- CUT HERE ---
# HG changeset patch
# User enevill
# Date 1417533030 0
#      Tue Dec 02 15:10:30 2014 +0000
# Node ID 26fc60dd5da8d3f1554fb8f2553f050839a539c6
# Parent  f9a67c52dc334a714139dddd36ecc647aacedc0d
Add support for A53 multiply accumulate

diff -r f9a67c52dc33 -r 26fc60dd5da8 src/cpu/aarch64/vm/interp_masm_aarch64.cpp
--- a/src/cpu/aarch64/vm/interp_masm_aarch64.cpp        Wed Nov 26 15:20:42 2014 +0000
+++ b/src/cpu/aarch64/vm/interp_masm_aarch64.cpp        Tue Dec 02 15:10:30 2014 +0000
@@ -1314,7 +1314,7 @@
     // case_array_offset_in_bytes()
     movw(reg2, in_bytes(MultiBranchData::per_case_size()));
     movw(rscratch1, in_bytes(MultiBranchData::case_array_offset()));
-    maddw(index, index, reg2, rscratch1);
+    Assembler::maddw(index, index, reg2, rscratch1);
 
     // Update the case count
     increment_mdp_data_at(mdp,
diff -r f9a67c52dc33 -r 26fc60dd5da8 src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp     Wed Nov 26 15:20:42 2014 +0000
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp     Tue Dec 02 15:10:30 2014 +0000
@@ -1538,7 +1538,7 @@
     sdivw(result, ra, rb);
   } else {
     sdivw(scratch, ra, rb);
-    msubw(result, scratch, rb, ra);
+    Assembler::msubw(result, scratch, rb, ra);
   }
 
   return idivl_offset;
@@ -1568,7 +1568,7 @@
     sdiv(result, ra, rb);
   } else {
     sdiv(scratch, ra, rb);
-    msub(result, scratch, rb, ra);
+    Assembler::msub(result, scratch, rb, ra);
   }
 
   return idivq_offset;
diff -r f9a67c52dc33 -r 26fc60dd5da8 src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp     Wed Nov 26 15:20:42 2014 +0000
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp     Tue Dec 02 15:10:30 2014 +0000
@@ -407,6 +407,16 @@
     umaddl(Rd, Rn, Rm, zr);
   }
 
+#define WRAP(INSN)                                                \
+  void INSN(Register Rd, Register Rn, Register Rm, Register Ra) { \
+    if (Ra != zr) nop();                                          \
+    Assembler::INSN(Rd, Rn, Rm, Ra);                              \
+  }
+
+  WRAP(madd) WRAP(msub) WRAP(maddw) WRAP(msubw)
+  WRAP(smaddl) WRAP(smsubl) WRAP(umaddl) WRAP(umsubl)
+#undef WRAP
+
   // macro assembly operations needed for aarch64
 
   // first two private routines for loading 32 bit or 64 bit constants
--- CUT HERE ---