RFR: 8348561: Add aarch64 intrinsics for ML-DSA [v6]

Andrew Dinn adinn at openjdk.org
Mon Feb 24 15:33:08 UTC 2025


On Thu, 20 Feb 2025 17:33:18 GMT, Ferenc Rakoczi <duke at openjdk.org> wrote:

>> By using the aarch64 vector registers the speed of the computation of the ML-DSA algorithms (key generation, document signing, signature verification) can be approximately doubled.
>
> Ferenc Rakoczi has updated the pull request incrementally with four additional commits since the last revision:
> 
>  - Accepting suggested change from Andrew Dinn
>  - Added comments suggested by Andrew Dinn
>  - Fixed copyright years
>  - renaming a couple of functions

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4683:

> 4681:     __ mulv(v19, __ T4S, v7, v19);
> 4682: 
> 4683:     __ mulv(v16, __ T4S, v16, v30);

__ mulv(v16, __ T4S, v16, v30);    // m = aLow * qinv

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4688:

> 4686:     __ mulv(v19, __ T4S, v19, v30);
> 4687: 
> 4688:     __ sqdmulh(v16, __ T4S, v16, v31);

__ sqdmulh(v16, __ T4S, v16, v31); // n = hi32(2 * m * q)

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4693:

> 4691:     __ sqdmulh(v19, __ T4S, v19, v31);
> 4692: 
> 4693:     __ shsubv(v16, __ T4S, v24, v16);

__ shsubv(v16, __ T4S, v24, v16);  // a = (aHigh  - n) / 2

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4698:

> 4696:     __ shsubv(v19, __ T4S, v27, v19);
> 4697: 
> 4698:     __ subv(v1, __ T4S, v0, v16);

__ subv(v1, __ T4S, v0, v16);      // x1 = x - a

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4703:

> 4701:     __ subv(v7, __ T4S, v6, v19);
> 4702: 
> 4703:     __ addv(v0, __ T4S, v0, v16);

__ addv(v0, __ T4S, v0, v16);      // x0 = x + a

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4742:

> 4740: 
> 4741:       for (int i = 0; i < 4; i++) {
> 4742:         __ ldpq(v30, v31, Address(dilithiumConsts, 0));

__ ldpq(v30, v31, Address(dilithiumConsts, 0)); // qinv, q

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4813:

> 4811:     // level 5
> 4812:     for (int i = 0; i < 1024; i += 256) {
> 4813:       __ ldpq(v30, v31, Address(dilithiumConsts, 0));

__ ldpq(v30, v31, Address(dilithiumConsts, 0)); // qinv, q

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4853:

> 4851:     // level 6
> 4852:     for (int i = 0; i < 1024; i += 128) {
> 4853:       __ ldpq(v30, v31, Address(dilithiumConsts, 0));

__ ldpq(v30, v31, Address(dilithiumConsts, 0)); // qinv, q

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4876:

> 4874:     // level 7
> 4875:     for (int i = 0; i < 1024; i += 128) {
> 4876:       __ ldpq(v30, v31, Address(dilithiumConsts, 0));

__ ldpq(v30, v31, Address(dilithiumConsts, 0)); // qinv, q

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4905:

> 4903: 
> 4904:   void dilithium_sub_add_montmul16() {
> 4905:     __ subv(v20, __ T4S, v0, v1);

__ subv(v20, __ T4S, v0, v1);      // b = x0 - x1

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4910:

> 4908:     __ subv(v23, __ T4S, v6, v7);
> 4909: 
> 4910:     __ addv(v0, __ T4S, v0, v1);

__ addv(v0, __ T4S, v0, v1);       // a0 = x0 + x1

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4915:

> 4913:     __ addv(v6, __ T4S, v6, v7);
> 4914: 
> 4915:     __ sqdmulh(v24, __ T4S, v20, v16);

__ sqdmulh(v24, __ T4S, v20, v16); // aHigh = hi32(2 * b * c)
    __ mulv(v1, __ T4S, v20, v16);     // aLow = lo32(b * c)

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4924:

> 4922:     __ mulv(v7, __ T4S, v23, v19);
> 4923: 
> 4924:     __ mulv(v1, __ T4S, v1, v30);

__ mulv(v1, __ T4S, v1, v30);      // m = (aLow * q)

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4929:

> 4927:     __ mulv(v7, __ T4S, v7, v30);
> 4928: 
> 4929:     __ sqdmulh(v1, __ T4S, v1, v31);

__ sqdmulh(v1, __ T4S, v1, v31);  // n = hi32(2 * m * q)

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4934:

> 4932:     __ sqdmulh(v7, __ T4S, v7, v31);
> 4933: 
> 4934:     __ shsubv(v1, __ T4S, v24, v1);

__ shsubv(v1, __ T4S, v24, v1);  // a1 = (aHigh  - n) / 2

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 5044:

> 5042:     // level0
> 5043:     for (int i = 0; i < 1024; i += 128) {
> 5044:       __ ldpq(v30, v31, Address(dilithiumConsts, 0));

__ ldpq(v30, v31, Address(dilithiumConsts, 0)); //qinv, q

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 5115:

> 5113:       __ str(v31, __ Q, Address(coeffs, i + 224));
> 5114:       dilithium_load32zetas(zetas);
> 5115:       __ ldpq(v30, v31, Address(dilithiumConsts, 0));

__ ldpq(v30, v31, Address(dilithiumConsts, 0)); //qinv, q

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 5166:

> 5164:     __ lea(dilithiumConsts, ExternalAddress((address) StubRoutines::aarch64::_dilithiumConsts));
> 5165: 
> 5166:     __ ldpq(v30, v31, Address(dilithiumConsts, 0));

__ ldpq(v30, v31, Address(dilithiumConsts, 0));  // qinv, q
    __ ldr(v29, __ Q, Address(dilithiumConsts, 48)); // rsquare

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 5228:

> 5226:     __ lea(dilithiumConsts, ExternalAddress((address) StubRoutines::aarch64::_dilithiumConsts));
> 5227: 
> 5228:     __ ldpq(v30, v31, Address(dilithiumConsts, 0));

__ ldpq(v30, v31, Address(dilithiumConsts, 0)); // qinv, q

-------------

PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967863821
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967864748
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967865658
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967866379
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967866822
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967867752
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967869143
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967870036
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967870373
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967871386
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967871949
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967872681
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967873281
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967873918
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967874418
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967875655
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967876745
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967877717
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967878884


More information about the hotspot-dev mailing list