RFR: 8348561: Add aarch64 intrinsics for ML-DSA [v6]

Mon Feb 24 11:53:55 UTC 2025

On Thu, 20 Feb 2025 17:33:18 GMT, Ferenc Rakoczi <duke at openjdk.org> wrote:

>> By using the aarch64 vector registers the speed of the computation of the ML-DSA algorithms (key generation, document signing, signature verification) can be approximately doubled.
>
> Ferenc Rakoczi has updated the pull request incrementally with four additional commits since the last revision:
> 
>  - Accepting suggested change from Andrew Dinn
>  - Added comments suggested by Andrew Dinn
>  - Fixed copyright years
>  - renaming a couple of functions

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4604:

> 4602:     FloatRegister vr7 = by_constant ? v29 : v7;
> 4603: 
> 4604:     __ sqdmulh(v24, __ T4S, vr0, v16);

+    __ sqdmulh(v24, __ T4S, v0, v16);  // aHigh = hi32(2 * b * c)
+    __ mulv(v16, __ T4S, v0, v16);     // aLow = lo32(b * c)

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4613:

> 4611:     __ mulv(v19, __ T4S, vr3, v19);
> 4612: 
> 4613:     __ mulv(v16, __ T4S, v16, v30);

__ mulv(v16, __ T4S, v16, v30);    // m = aLow * qinv

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4618:

> 4616:     __ mulv(v19, __ T4S, v19, v30);
> 4617: 
> 4618:     __ sqdmulh(v16, __ T4S, v16, v31);

__ sqdmulh(v16, __ T4S, v16, v31); // n = hi32(2 * m * q)

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4623:

> 4621:     __ sqdmulh(v19, __ T4S, v19, v31);
> 4622: 
> 4623:     __ shsubv(v16, __ T4S, v24, v16);

__ shsubv(v16, __ T4S, v24, v16);  // a = (aHigh - n) / 2

-------------

PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967491928
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967492635
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967493031
PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967493643