Re:Aarch64: optimation for doing remainder on AArch64
Jin Guojie
jinguojie.jgj at alibaba-inc.com
Thu Apr 18 02:43:32 UTC 2024
On 2024/4/ 23:42 Andrew Haley <aph-open at littlepinkcloud.com> wrote:
> If you can get a Github account and an OpenJDK account we can start to do that.
> The first thing for you to do is clone the OpenJDK repo into your own tree,
> then create a local branch, then create a PR.
> See the section https://openjdk.org/guide/#i-have-a-patch-what-do-i-do
According to this guide, a sponsor needs to first create an issue on JBS before submitting a PR.
Could you please create an issue in the JDK Bug System (JBS)?
I have submitted an OpenJDK account application, but But Oracle has not approved it yet.
I will submit this PR after my OCA is signed and the the issure in JBS is created.
Thanks very much.
Jin Guojie (Alibaba, hotspot developer).
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
index af744f39fef..39e91ea3bdb 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
@@ -2075,16 +2075,7 @@ int MacroAssembler::corrected_idivl(Register result, Register ra, Register rb,
sdivw(result, ra, rb);
} else {
sdivw(scratch, ra, rb);
- Assembler::msubw(result, scratch, rb, ra);
+ msubw(result, scratch, rb, ra);
}
return idivl_offset;
@@ -2114,16 +2105,7 @@ int MacroAssembler::corrected_idivq(Register result, Register ra, Register rb,
sdiv(result, ra, rb);
} else {
sdiv(scratch, ra, rb);
- Assembler::msub(result, scratch, rb, ra);
+ msub(result, scratch, rb, ra);
}
return idivq_offset;
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
index dad7ec4d497..7266b5d92b0 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
@@ -437,11 +437,39 @@ class MacroAssembler: public Assembler {
Assembler::INSN(Rd, Rn, Rm, Ra); \
}
- WRAP(madd) WRAP(msub) WRAP(maddw) WRAP(msubw)
+ WRAP(madd) WRAP(maddw)
WRAP(smaddl) WRAP(smsubl) WRAP(umaddl) WRAP(umsubl)
#undef WRAP
+ inline void msub(Register Rd, Register Rn, Register Rm, Register Ra) {
+ if (VM_Version::supports_a53mac() && Ra != zr)
+ nop();
+ if (VM_Version::model_is(VM_Version::CPU_MODEL_NEOVERSE_N1)
+ || VM_Version::model_is(VM_Version::CPU_MODEL_NEOVERSE_N2)) {
+ /* On Neoverse N series, MSUB uses the same ALU with SDIV.
+ * The combination of MUL/SUB can utilize multiple ALUS,
+ * and is much faster than MSUB. */
+ mul(rscratch1, Rn, Rm);
+ sub(Rd, Ra, rscratch1);
+ } else {
+ Assembler::msub(Rd, Rn, Rm, Ra);
+ }
+ }
+ inline void msubw(Register Rd, Register Rn, Register Rm, Register Ra) {
+ if (VM_Version::supports_a53mac() && Ra != zr)
+ nop();
+ if (VM_Version::model_is(VM_Version::CPU_MODEL_NEOVERSE_N1)
+ || VM_Version::model_is(VM_Version::CPU_MODEL_NEOVERSE_N2)) {
+ /* On Neoverse N series, MSUB uses the same ALU with SDIV.
+ * The combination of MUL/SUB can utilize multiple ALUS,
+ * and is much faster than MSUB. */
+ mulw(rscratch1, Rn, Rm);
+ subw(Rd, Ra, rscratch1);
+ } else {
+ Assembler::msubw(Rd, Rn, Rm, Ra);
+ }
+ }
More information about the hotspot-dev
mailing list