RFR: 8348561: Add aarch64 intrinsics for ML-DSA [v6]
Andrew Dinn
adinn at openjdk.org
Mon Feb 24 15:33:08 UTC 2025
On Thu, 20 Feb 2025 17:33:18 GMT, Ferenc Rakoczi <duke at openjdk.org> wrote:
>> By using the aarch64 vector registers the speed of the computation of the ML-DSA algorithms (key generation, document signing, signature verification) can be approximately doubled.
>
> Ferenc Rakoczi has updated the pull request incrementally with four additional commits since the last revision:
>
> - Accepting suggested change from Andrew Dinn
> - Added comments suggested by Andrew Dinn
> - Fixed copyright years
> - renaming a couple of functions
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4683:
> 4681: __ mulv(v19, __ T4S, v7, v19);
> 4682:
> 4683: __ mulv(v16, __ T4S, v16, v30);
__ mulv(v16, __ T4S, v16, v30); // m = aLow * qinv
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4688:
> 4686: __ mulv(v19, __ T4S, v19, v30);
> 4687:
> 4688: __ sqdmulh(v16, __ T4S, v16, v31);
__ sqdmulh(v16, __ T4S, v16, v31); // n = hi32(2 * m * q)
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4693:
> 4691: __ sqdmulh(v19, __ T4S, v19, v31);
> 4692:
> 4693: __ shsubv(v16, __ T4S, v24, v16);
__ shsubv(v16, __ T4S, v24, v16); // a = (aHigh - n) / 2
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4698:
> 4696: __ shsubv(v19, __ T4S, v27, v19);
> 4697:
> 4698: __ subv(v1, __ T4S, v0, v16);
__ subv(v1, __ T4S, v0, v16); // x1 = x - a
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4703:
> 4701: __ subv(v7, __ T4S, v6, v19);
> 4702:
> 4703: __ addv(v0, __ T4S, v0, v16);
__ addv(v0, __ T4S, v0, v16); // x0 = x + a
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4742:
> 4740:
> 4741: for (int i = 0; i < 4; i++) {
> 4742: __ ldpq(v30, v31, Address(dilithiumConsts, 0));
__ ldpq(v30, v31, Address(dilithiumConsts, 0)); // qinv, q
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4813:
> 4811: // level 5
> 4812: for (int i = 0; i < 1024; i += 256) {
> 4813: __ ldpq(v30, v31, Address(dilithiumConsts, 0));
__ ldpq(v30, v31, Address(dilithiumConsts, 0)); // qinv, q
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4853:
> 4851: // level 6
> 4852: for (int i = 0; i < 1024; i += 128) {
> 4853: __ ldpq(v30, v31, Address(dilithiumConsts, 0));
__ ldpq(v30, v31, Address(dilithiumConsts, 0)); // qinv, q
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4876:
> 4874: // level 7
> 4875: for (int i = 0; i < 1024; i += 128) {
> 4876: __ ldpq(v30, v31, Address(dilithiumConsts, 0));
__ ldpq(v30, v31, Address(dilithiumConsts, 0)); // qinv, q
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4905:
> 4903:
> 4904: void dilithium_sub_add_montmul16() {
> 4905: __ subv(v20, __ T4S, v0, v1);
__ subv(v20, __ T4S, v0, v1); // b = x0 - x1
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4910:
> 4908: __ subv(v23, __ T4S, v6, v7);
> 4909:
> 4910: __ addv(v0, __ T4S, v0, v1);
__ addv(v0, __ T4S, v0, v1); // a0 = x0 + x1
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4915:
> 4913: __ addv(v6, __ T4S, v6, v7);
> 4914:
> 4915: __ sqdmulh(v24, __ T4S, v20, v16);
__ sqdmulh(v24, __ T4S, v20, v16); // aHigh = hi32(2 * b * c)
__ mulv(v1, __ T4S, v20, v16); // aLow = lo32(b * c)
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4924:
> 4922: __ mulv(v7, __ T4S, v23, v19);
> 4923:
> 4924: __ mulv(v1, __ T4S, v1, v30);
__ mulv(v1, __ T4S, v1, v30); // m = (aLow * q)
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4929:
> 4927: __ mulv(v7, __ T4S, v7, v30);
> 4928:
> 4929: __ sqdmulh(v1, __ T4S, v1, v31);
__ sqdmulh(v1, __ T4S, v1, v31); // n = hi32(2 * m * q)
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4934:
> 4932: __ sqdmulh(v7, __ T4S, v7, v31);
> 4933:
> 4934: __ shsubv(v1, __ T4S, v24, v1);
__ shsubv(v1, __ T4S, v24, v1); // a1 = (aHigh - n) / 2
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 5044:
> 5042: // level0
> 5043: for (int i = 0; i < 1024; i += 128) {
> 5044: __ ldpq(v30, v31, Address(dilithiumConsts, 0));
__ ldpq(v30, v31, Address(dilithiumConsts, 0)); //qinv, q
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 5115:
> 5113: __ str(v31, __ Q, Address(coeffs, i + 224));
> 5114: dilithium_load32zetas(zetas);
> 5115: __ ldpq(v30, v31, Address(dilithiumConsts, 0));
__ ldpq(v30, v31, Address(dilithiumConsts, 0)); //qinv, q
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 5166:
> 5164: __ lea(dilithiumConsts, ExternalAddress((address) StubRoutines::aarch64::_dilithiumConsts));
> 5165:
> 5166: __ ldpq(v30, v31, Address(dilithiumConsts, 0));
__ ldpq(v30, v31, Address(dilithiumConsts, 0)); // qinv, q
__ ldr(v29, __ Q, Address(dilithiumConsts, 48)); // rsquare
src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 5228:
> 5226: __ lea(dilithiumConsts, ExternalAddress((address) StubRoutines::aarch64::_dilithiumConsts));
> 5227:
> 5228: __ ldpq(v30, v31, Address(dilithiumConsts, 0));
__ ldpq(v30, v31, Address(dilithiumConsts, 0)); // qinv, q
-------------
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967863821
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967864748
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967865658
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967866379
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967866822
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967867752
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967869143
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967870036
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967870373
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967871386
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967871949
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967872681
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967873281
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967873918
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967874418
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967875655
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967876745
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967877717
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967878884
More information about the hotspot-dev
mailing list