RFR: JDK-8302027: Port fdlibm trig functions (sin, cos, tan) to Java
Joe Darcy
darcy at openjdk.org
Wed Mar 1 06:24:04 UTC 2023
On Wed, 1 Mar 2023 05:28:34 GMT, Joe Darcy <darcy at openjdk.org> wrote:
> Last and certainly not least in the port of FDLIBM to Java, the transcendental methods for sin, cos, and tan.
>
> Some more tests are to be written in the StrictMath directory to verify that the StrictMath algorihtm for sin/cos/tan is being used rather than a different one. However, I wanted to get the rest of the change out for review first.
>
> The sin/cos/tan methods are grouped together since they share the same argument reduction logic. Argument reduction is the process of mapping an argument of a function to an argument in a restricted range (and possibly returning some function of the reduced argument). For sin, cos, and tan, since they are fundamentally periodic with respect to a multiple of pi, argument reduction is done to find the remainder of the original argument with respect to pi/2.
And the shared remainder-pi-over-2 code:
$ diff -w RemPio2.c RemPio2.translit.java
1c1
< /* __ieee754_rem_pio2(x,y)
---
> /** __ieee754_rem_pio2(x,y)
6,8c6
<
< #include "fdlibm.h"
<
---
> static class RemPio2 {
12,16c10
< #ifdef __STDC__
< static const int two_over_pi[] = {
< #else
< static int two_over_pi[] = {
< #endif
---
> private static final int[] two_over_pi = {
30,34c24
< #ifdef __STDC__
< static const int npio2_hw[] = {
< #else
< static int npio2_hw[] = {
< #endif
---
> private static final int[] npio2_hw = {
53,57c43
< #ifdef __STDC__
< static const double
< #else
< static double
< #endif
---
> private static final double
69,77c55,57
< #ifdef __STDC__
< int __ieee754_rem_pio2(double x, double *y)
< #else
< int __ieee754_rem_pio2(x,y)
< double x,y[];
< #endif
< {
< double z,w,t,r,fn;
< double tx[3];
---
> static int __ieee754_rem_pio2(double x, double[] y) {
> double z = 0.0,w,t,r,fn;
> double[] tx = new double[3];
110c90
< t = fabs(x);
---
> t = Math.abs(x);
148c128,129
< __LO(z) = __LO(x);
---
> // __LO(z) = __LO(x);
> z = __LO(z, __LO(x));
150c131,132
< __HI(z) = ix - (e0<<20);
---
> // __HI(z) = ix - (e0<<20);
> z = __HI(z, ix - (e0<<20));
158c140
< n = __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
---
> n = KernelRemPio2.__kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
162c144,147
< /*
---
>
> }
>
> /**
268,269c253
<
<
---
> static class KernelRemPio2 {
278c262
< #include "fdlibm.h"
---
> private static final int[] init_jk = {2,3,4,6}; /* initial value for jk */
280,290c264
< #ifdef __STDC__
< static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
< #else
< static int init_jk[] = {2,3,4,6};
< #endif
<
< #ifdef __STDC__
< static const double PIo2[] = {
< #else
< static double PIo2[] = {
< #endif
---
> private static final double[] PIo2 = {
300,305c274
<
< #ifdef __STDC__
< static const double
< #else
< static double
< #endif
---
> static final double
311,319c280,286
< #ifdef __STDC__
< int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2)
< #else
< int __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
< double x[], y[]; int e0,nx,prec; int ipio2[];
< #endif
< {
< int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
< double z,fw,f[20],fq[20],q[20];
---
> static int __kernel_rem_pio2(double[] x, double[] y, int e0, int nx, int prec, final int[] ipio2) {
> int jz,jx,jv,jp,jk,carry,n,i,j,k,m,q0,ih;
> int[] iq = new int[20];
> double z,fw;
> double [] f = new double[20];
> double [] fq= new double[20];
> double [] q = new double[20];
341c308
< recompute:
---
> while(true) {
350,351c317,318
< z = scalbn(z,q0); /* actual value of z */
< z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */
---
> z = Math.scalb(z,q0); /* actual value of z */
> z -= 8.0*Math.floor(z*0.125); /* trim off integer >= 8 */
383c350
< if(carry!=0) z -= scalbn(one,q0);
---
> if(carry!=0) z -= Math.scalb(one,q0);
400,401c367,369
< goto recompute;
< }
---
> continue;
> } else { break;}
> } else {break;}
409c377
< z = scalbn(z,-q0);
---
> z = Math.scalb(z,-q0);
419c387
< fw = scalbn(one,q0);
---
> fw = Math.scalb(one,q0);
465a434
> }
Yes, dear reader, the original C code contained a goto to implement a looping contract. To maintain close code correspondence, I wrapped with code in question with a "while(true){...}" and then "break"-ed or "continue"-d to iterate or exit the loop.
$ diff -w RemPio2.translit.java RemPio2.fdlibm.java
44,53c44,50
< zero = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
< half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
< two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
< invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
< pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
< pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
< pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
< pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
< pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
< pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
---
> invpio2 = 0x1.45f306dc9c883p-1, // 6.36619772367581382433e-01
> pio2_1 = 0x1.921fb544p0, // 1.57079632673412561417e+00
> pio2_1t = 0x1.0b4611a626331p-34, // 6.07710050650619224932e-11
> pio2_2 = 0x1.0b4611a6p-34, // 6.07710050630396597660e-11
> pio2_2t = 0x1.3198a2e037073p-69, // 2.02226624879595063154e-21
> pio2_3 = 0x1.3198a2ep-69, // 2.02226624871116645580e-21
> pio2_3t = 0x1.b839a252049c1p-104; // 8.47842766036889956997e-32
60,64c57,64
< hx = __HI(x); /* high word of x */
< ix = hx&0x7fffffff;
< if(ix<=0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */
< {y[0] = x; y[1] = 0; return 0;}
< if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */
---
> hx = __HI(x); // high word of x
> ix = hx & 0x7fff_ffff;
> if (ix <= 0x3fe9_21fb) { // |x| ~<= pi/4 , no need for reduction
> y[0] = x;
> y[1] = 0;
> return 0;
> }
> if (ix < 0x4002_d97c) { // |x| < 3pi/4, special case with n=+-1
67c67
< if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
---
> if (ix != 0x3ff9_21fb) { // 33+53 bit pi is good enough
70c70
< } else { /* near pi/2, use 33+33+53 bit pi */
---
> } else { // near pi/2, use 33+33+53 bit pi
76c76
< } else { /* negative x */
---
> } else { // negative x
78c78
< if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
---
> if (ix != 0x3ff_921fb) { // 33+53 bit pi is good enough
81c81
< } else { /* near pi/2, use 33+33+53 bit pi */
---
> } else { // near pi/2, use 33+33+53 bit pi
89c89
< if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
---
> if (ix <= 0x4139_21fb) { // |x| ~<= 2^19*(pi/2), medium size
91c91
< n = (int) (t*invpio2+half);
---
> n = (int) (t*invpio2 + 0.5);
94c94
< w = fn*pio2_1t; /* 1st round good to 85 bit */
---
> w = fn*pio2_1t; // 1st round good to 85 bit
96c96
< y[0] = r-w; /* quick check no cancellation */
---
> y[0] = r - w; // quick check no cancellation
101c101
< if(i>16) { /* 2nd iteration needed, good to 118 */
---
> if (i > 16) { // 2nd iteration needed, good to 118
108,109c108,109
< if(i>49) { /* 3rd iteration need, 151 bits acc */
< t = r; /* will cover all possible cases */
---
> if (i > 49) { // 3rd iteration need, 151 bits acc
> t = r; // will cover all possible cases
118,119c118,124
< if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
< else return n;
---
> if (hx < 0) {
> y[0] = -y[0];
> y[1] = -y[1];
> return -n;
> } else {
> return n;
> }
124,125c129,131
< if(ix>=0x7ff00000) { /* x is inf or NaN */
< y[0]=y[1]=x-x; return 0;
---
> if (ix >= 0x7ff0_0000) { // x is inf or NaN
> y[0] = y[1] = x - x;
> return 0;
127,128c133
< /* set z = scalbn(|x|,ilogb(x)-23) */
< // __LO(z) = __LO(x);
---
> // set z = scalbn(|x|,ilogb(x)-23)
131d135
< // __HI(z) = ix - (e0<<20);
135c139
< z = (z-tx[i])*two24;
---
> z = (z - tx[i])*TWO24;
139c143,145
< while(tx[nx-1]==zero) nx--; /* skip zero term */
---
> while (tx[nx - 1] == 0.0) { // skip zero term
> nx--;
> }
141c147,151
< if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
---
> if (hx < 0) {
> y[0] = -y[0];
> y[1] = -y[1];
> return -n;
> }
144d153
<
262c271
< private static final int[] init_jk = {2,3,4,6}; /* initial value for jk */
---
> private static final int init_jk[] = {2, 3, 4, 6}; // initial value for jk
264,272c273,281
< private static final double[] PIo2 = {
< 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
< 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
< 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
< 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
< 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
< 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
< 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
< 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
---
> private static final double PIo2[] = {
> 0x1.921fb4p0, // 1.57079625129699707031e+00
> 0x1.4442dp-24, // 7.54978941586159635335e-08
> 0x1.846988p-48, // 5.39030252995776476554e-15
> 0x1.8cc516p-72, // 3.28200341580791294123e-22
> 0x1.01b838p-96, // 1.27065575308067607349e-29
> 0x1.a25204p-120, // 1.22933308981111328932e-36
> 0x1.382228p-145, // 2.73370053816464559624e-44
> 0x1.9f31dp-169, // 2.16741683877804819444e-51
273a283
>
275,278c285
< zero = 0.0,
< one = 1.0,
< two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
< twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
---
> twon24 = 0x1.0p-24; // 5.96046447753906250000e-08
288c295
< /* initialize jk*/
---
> // initialize jk
292c299
< /* determine jx,jv,q0, note that 3>q0 */
---
> // determine jx, jv, q0, note that 3 > q0
294c301,304
< jv = (e0-3)/24; if(jv<0) jv=0;
---
> jv = (e0 - 3)/24;
> if (jv < 0) {
> jv = 0;
> }
297,299c307,312
< /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
< j = jv-jx; m = jx+jk;
< for(i=0;i<=m;i++,j++) f[i] = (j<0)? zero : (double) ipio2[j];
---
> // set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk]
> j = jv - jx;
> m = jx + jk;
> for (i = 0; i <= m; i++, j++) {
> f[i] = (j < 0) ? 0.0 : (double) ipio2[j];
> }
301c314
< /* compute q[0],q[1],...q[jk] */
---
> // compute q[0],q[1],...q[jk]
303c316,318
< for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
---
> for(j = 0, fw = 0.0; j <= jx; j++) {
> fw += x[j]*f[jx + i - j];
> }
309c324
< /* distill q[] into iq[] reversingly */
---
> // distill q[] into iq[] reversingly
312c327
< iq[i] = (int)(z-two24*fw);
---
> iq[i] = (int)(z - TWO24*fw);
316,318c331,333
< /* compute n */
< z = Math.scalb(z,q0); /* actual value of z */
< z -= 8.0*Math.floor(z*0.125); /* trim off integer >= 8 */
---
> // compute n
> z = Math.scalb(z, q0); // actual value of z
> z -= 8.0*Math.floor(z*0.125); // trim off integer >= 8
322,323c337,339
< if(q0>0) { /* need iq[jz-1] to determine n */
< i = (iq[jz-1]>>(24-q0)); n += i;
---
> if (q0 > 0) { // need iq[jz - 1] to determine n
> i = (iq[jz - 1] >> (24 - q0));
> n += i;
325a342,345
> } else if (q0 == 0) {
> ih = iq[jz-1]>>23;
> } else if (z >= 0.5) {
> ih=2;
327,328d346
< else if(q0==0) ih = iq[jz-1]>>23;
< else if(z>=0.5) ih=2;
330,332c348,351
< if(ih>0) { /* q > 0.5 */
< n += 1; carry = 0;
< for(i=0;i<jz ;i++) { /* compute 1-q */
---
> if ( ih > 0) { // q > 0.5
> n += 1;
> carry = 0;
> for (i=0; i < jz; i++) { // compute 1-q
336c355,356
< carry = 1; iq[i] = 0x1000000- j;
---
> carry = 1;
> iq[i] = 0x100_0000 - j;
338c358,359
< } else iq[i] = 0xffffff - j;
---
> } else {
> iq[i] = 0xff_ffff - j;
340c361,362
< if(q0>0) { /* rare case: chance is 1 in 12 */
---
> }
> if (q0 > 0) { // rare case: chance is 1 in 12
343c365,366
< iq[jz-1] &= 0x7fffff; break;
---
> iq[jz-1] &= 0x7f_ffff;
> break;
345c368,369
< iq[jz-1] &= 0x3fffff; break;
---
> iq[jz-1] &= 0x3f_ffff;
> break;
349,350c373,376
< z = one - z;
< if(carry!=0) z -= Math.scalb(one,q0);
---
> z = 1.0 - z;
> if (carry != 0) {
> z -= Math.scalb(1.0, q0);
> }
354,355c380,381
< /* check if recomputation is needed */
< if(z==zero) {
---
> // check if recomputation is needed
> if (z == 0.0) {
357,359c383,387
< for (i=jz-1;i>=jk;i--) j |= iq[i];
< if(j==0) { /* need recomputation */
< for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */
---
> for (i = jz - 1; i >= jk; i--) {
> j |= iq[i];
> }
> if (j == 0) { // need recomputation
> for(k=1; iq[jk - k] == 0; k++); // k = no. of terms needed
361c389
< for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */
---
> for(i = jz + 1; i <= jz + k; i++) { // add q[jz+1] to q[jz+k]
363c391,393
< for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
---
> for (j=0, fw = 0.0; j <= jx; j++) {
> fw += x[j]*f[jx + i - j];
> }
368,369c398,403
< } else { break;}
< } else {break;}
---
> } else {
> break;
> }
> } else {
> break;
> }
372c406
< /* chop off zero terms */
---
> // chop off zero terms
374,376c408,414
< jz -= 1; q0 -= 24;
< while(iq[jz]==0) { jz--; q0-=24;}
< } else { /* break z into 24-bit if necessary */
---
> jz -= 1;
> q0 -= 24;
> while (iq[jz] == 0) {
> jz--;
> q0-=24;
> }
> } else { // break z into 24-bit if necessary
378c416
< if(z>=two24) {
---
> if (z >= TWO24) {
380,381c418,420
< iq[jz] = (int)(z-two24*fw);
< jz += 1; q0 += 24;
---
> iq[jz] = (int)(z - TWO24*fw);
> jz += 1;
> q0 += 24;
383c422,424
< } else iq[jz] = (int) z ;
---
> } else {
> iq[jz] = (int) z;
> }
386,387c427,428
< /* convert integer "bit" chunk to floating-point value */
< fw = Math.scalb(one,q0);
---
> // convert integer "bit" chunk to floating-point value
> fw = Math.scalb(1.0, q0);
389c430,431
< q[i] = fw*(double)iq[i]; fw*=twon24;
---
> q[i] = fw*(double)iq[i];
> fw *= twon24;
392c434
< /* compute PIo2[0,...,jp]*q[jz,...,0] */
---
> // compute PIo2[0,...,jp]*q[jz,...,0]
394c436,438
< for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
---
> for (fw = 0.0, k = 0; k <= jp && k <= jz-i; k++) {
> fw += PIo2[k]*q[i + k];
> }
398c442
< /* compress fq[] into y[] */
---
> // compress fq[] into y[]
402c446,448
< for (i=jz;i>=0;i--) fw += fq[i];
---
> for (i = jz; i >=0; i--) {
> fw += fq[i];
> }
408c454,456
< for (i=jz;i>=0;i--) fw += fq[i];
---
> for (i = jz; i>=0; i--) {
> fw += fq[i];
> }
411c459,461
< for (i=1;i<=jz;i++) fw += fq[i];
---
> for (i = 1; i <= jz; i++) {
> fw += fq[i];
> }
414c464
< case 3: /* painful */
---
> case 3: // painful
425c475,477
< for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
---
> for (fw = 0.0, i = jz; i >= 2; i--) {
> fw += fq[i];
> }
427c479,481
< y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;
---
> y[0] = fq[0];
> y[1] = fq[1];
> y[2] = fw;
429c483,485
< y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
---
> y[0] = -fq[0];
> y[1] = -fq[1];
> y[2] = -fw;
-------------
PR: https://git.openjdk.org/jdk/pull/12800
More information about the core-libs-dev
mailing list