RFR: JDK-8302027: Port fdlibm trig functions (sin, cos, tan) to Java

Wed Mar 1 06:24:04 UTC 2023

On Wed, 1 Mar 2023 05:28:34 GMT, Joe Darcy <darcy at openjdk.org> wrote:

> Last and certainly not least in the port of FDLIBM to Java, the transcendental methods for sin, cos, and tan.
> 
> Some more tests are to be written in the StrictMath directory to verify that the StrictMath algorihtm for sin/cos/tan is being used rather than a different one. However, I wanted to get the rest of the change out for review first.
> 
> The sin/cos/tan methods are grouped together since they share the same argument reduction logic. Argument reduction is the process of mapping an argument of a function to an argument in a restricted range (and possibly returning some function of the reduced argument). For sin, cos, and tan, since they are fundamentally periodic with respect to a multiple of pi, argument reduction is done to find the remainder of the original argument with respect to pi/2.

And the shared remainder-pi-over-2 code:

$ diff -w RemPio2.c RemPio2.translit.java 
1c1
< /* __ieee754_rem_pio2(x,y)
---
>     /** __ieee754_rem_pio2(x,y)
6,8c6
< 
< #include "fdlibm.h"
< 
---
>     static class RemPio2 {
12,16c10
< #ifdef __STDC__
< static const int two_over_pi[] = {
< #else
< static int two_over_pi[] = {
< #endif
---
>         private static final int[] two_over_pi = {
30,34c24
< #ifdef __STDC__
< static const int npio2_hw[] = {
< #else
< static int npio2_hw[] = {
< #endif
---
>         private static final int[] npio2_hw = {
53,57c43
< #ifdef __STDC__
< static const double
< #else
< static double
< #endif
---
>         private static final double
69,77c55,57
< #ifdef __STDC__
<         int __ieee754_rem_pio2(double x, double *y)
< #else
<         int __ieee754_rem_pio2(x,y)
<         double x,y[];
< #endif
< {
<         double z,w,t,r,fn;
<         double tx[3];
---
>         static int __ieee754_rem_pio2(double x, double[] y) {
>             double z = 0.0,w,t,r,fn;
>             double[] tx = new double[3];
110c90
<             t  = fabs(x);
---
>                 t  = Math.abs(x);
148c128,129
<         __LO(z) = __LO(x);
---
>             // __LO(z) = __LO(x);
>             z = __LO(z, __LO(x));
150c131,132
<         __HI(z) = ix - (e0<<20);
---
>             // __HI(z) = ix - (e0<<20);
>             z = __HI(z, ix - (e0<<20));
158c140
<         n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
---
>             n  =  KernelRemPio2.__kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
162c144,147
< /*
---
> 
>     }
> 
>     /**
268,269c253
< 
< 
---
>     static class KernelRemPio2 {
278c262
< #include "fdlibm.h"
---
>         private static final int[] init_jk = {2,3,4,6}; /* initial value for jk */
280,290c264
< #ifdef __STDC__
< static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
< #else
< static int init_jk[] = {2,3,4,6};
< #endif
< 
< #ifdef __STDC__
< static const double PIo2[] = {
< #else
< static double PIo2[] = {
< #endif
---
>         private static final double[] PIo2 = {
300,305c274
< 
< #ifdef __STDC__
< static const double
< #else
< static double
< #endif
---
>         static final double
311,319c280,286
< #ifdef __STDC__
<         int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2)
< #else
<         int __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
<         double x[], y[]; int e0,nx,prec; int ipio2[];
< #endif
< {
<         int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
<         double z,fw,f[20],fq[20],q[20];
---
>         static int __kernel_rem_pio2(double[] x, double[] y, int e0, int nx, int prec, final int[] ipio2) {
>             int jz,jx,jv,jp,jk,carry,n,i,j,k,m,q0,ih;
>             int[] iq = new int[20];
>             double z,fw;
>             double [] f = new double[20];
>             double [] fq= new double[20];
>             double [] q = new double[20];
341c308
< recompute:
---
>             while(true) {
350,351c317,318
<         z  = scalbn(z,q0);              /* actual value of z */
<         z -= 8.0*floor(z*0.125);                /* trim off integer >= 8 */
---
>                 z  = Math.scalb(z,q0);              /* actual value of z */
>                 z -= 8.0*Math.floor(z*0.125);           /* trim off integer >= 8 */
383c350
<                 if(carry!=0) z -= scalbn(one,q0);
---
>                         if(carry!=0) z -= Math.scalb(one,q0);
400,401c367,369
<                 goto recompute;
<             }
---
>                         continue;
>                     } else { break;}
>                 } else {break;}
409c377
<             z = scalbn(z,-q0);
---
>                 z = Math.scalb(z,-q0);
419c387
<         fw = scalbn(one,q0);
---
>             fw = Math.scalb(one,q0);
465a434
>     }

Yes, dear reader, the original C code contained a goto to implement a looping contract. To maintain close code correspondence, I wrapped with  code in question with a "while(true){...}" and then "break"-ed or "continue"-d to iterate or exit the loop.

$ diff -w RemPio2.translit.java RemPio2.fdlibm.java 
44,53c44,50
<             zero =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
<             half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
<             two24 =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
<             invpio2 =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
<             pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
<             pio2_1t =  6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
<             pio2_2  =  6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
<             pio2_2t =  2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
<             pio2_3  =  2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
<             pio2_3t =  8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
---
>             invpio2 =  0x1.45f306dc9c883p-1,   // 6.36619772367581382433e-01
>             pio2_1  =  0x1.921fb544p0,         // 1.57079632673412561417e+00
>             pio2_1t =  0x1.0b4611a626331p-34,  // 6.07710050650619224932e-11
>             pio2_2  =  0x1.0b4611a6p-34,       // 6.07710050630396597660e-11
>             pio2_2t =  0x1.3198a2e037073p-69,  // 2.02226624879595063154e-21
>             pio2_3  =  0x1.3198a2ep-69,        // 2.02226624871116645580e-21
>             pio2_3t =  0x1.b839a252049c1p-104; // 8.47842766036889956997e-32
60,64c57,64
<             hx = __HI(x);           /* high word of x */
<             ix = hx&0x7fffffff;
<             if(ix<=0x3fe921fb)   /* |x| ~<= pi/4 , no need for reduction */
<                 {y[0] = x; y[1] = 0; return 0;}
<             if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */
---
>             hx = __HI(x);           // high word of x
>             ix = hx & 0x7fff_ffff;
>             if (ix <= 0x3fe9_21fb) {   // |x| ~<= pi/4 , no need for reduction
>                 y[0] = x;
>                 y[1] = 0;
>                 return 0;
>             }
>             if (ix < 0x4002_d97c) {  // |x| < 3pi/4, special case with n=+-1
67c67
<                     if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
---
>                     if (ix != 0x3ff9_21fb) {    // 33+53 bit pi is good enough
70c70
<                     } else {                /* near pi/2, use 33+33+53 bit pi */
---
>                     } else {                // near pi/2, use 33+33+53 bit pi
76c76
<                 } else {    /* negative x */
---
>                 } else {    // negative x
78c78
<                     if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
---
>                     if (ix != 0x3ff_921fb) {    // 33+53 bit pi is good enough
81c81
<                     } else {                /* near pi/2, use 33+33+53 bit pi */
---
>                     } else {                // near pi/2, use 33+33+53 bit pi
89c89
<             if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
---
>             if (ix <= 0x4139_21fb) { // |x| ~<= 2^19*(pi/2), medium size
91c91
<                 n  = (int) (t*invpio2+half);
---
>                 n  = (int) (t*invpio2 + 0.5);
94c94
<                 w  = fn*pio2_1t;    /* 1st round good to 85 bit */
---
>                 w  = fn*pio2_1t;    // 1st round good to 85 bit
96c96
<                     y[0] = r-w;     /* quick check no cancellation */
---
>                     y[0] = r - w;     // quick check no cancellation
101c101
<                     if(i>16) {  /* 2nd iteration needed, good to 118 */
---
>                     if (i > 16) {  // 2nd iteration needed, good to 118
108,109c108,109
<                         if(i>49)  { /* 3rd iteration need, 151 bits acc */
<                             t  = r; /* will cover all possible cases */
---
>                         if (i > 49)  { // 3rd iteration need, 151 bits acc
>                             t  = r; // will cover all possible cases
118,119c118,124
<                 if(hx<0)    {y[0] = -y[0]; y[1] = -y[1]; return -n;}
<                 else         return n;
---
>                 if (hx < 0) {
>                     y[0] = -y[0];
>                     y[1] = -y[1];
>                     return -n;
>                 } else {
>                     return n;
>                 }
124,125c129,131
<             if(ix>=0x7ff00000) {            /* x is inf or NaN */
<                 y[0]=y[1]=x-x; return 0;
---
>             if (ix >= 0x7ff0_0000) {            // x is inf or NaN
>                 y[0] = y[1] = x - x;
>                 return 0;
127,128c133
<             /* set z = scalbn(|x|,ilogb(x)-23) */
<             // __LO(z) = __LO(x);
---
>             // set z = scalbn(|x|,ilogb(x)-23)
131d135
<             // __HI(z) = ix - (e0<<20);
135c139
<                 z     = (z-tx[i])*two24;
---
>                 z     = (z - tx[i])*TWO24;
139c143,145
<             while(tx[nx-1]==zero) nx--;     /* skip zero term */
---
>             while (tx[nx - 1] == 0.0) { // skip zero term
>                 nx--;
>             }
141c147,151
<             if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
---
>             if (hx < 0) {
>                 y[0] = -y[0];
>                 y[1] = -y[1];
>                 return -n;
>             }
144d153
< 
262c271
<         private static final int[] init_jk = {2,3,4,6}; /* initial value for jk */
---
>         private static final int init_jk[] = {2, 3, 4, 6}; // initial value for jk
264,272c273,281
<         private static final double[] PIo2 = {
<             1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
<             7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
<             5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
<             3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
<             1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
<             1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
<             2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
<             2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
---
>         private static final double PIo2[] = {
>             0x1.921fb4p0,    // 1.57079625129699707031e+00
>             0x1.4442dp-24,   // 7.54978941586159635335e-08
>             0x1.846988p-48,  // 5.39030252995776476554e-15
>             0x1.8cc516p-72,  // 3.28200341580791294123e-22
>             0x1.01b838p-96,  // 1.27065575308067607349e-29
>             0x1.a25204p-120, // 1.22933308981111328932e-36
>             0x1.382228p-145, // 2.73370053816464559624e-44
>             0x1.9f31dp-169,  // 2.16741683877804819444e-51
273a283
> 
275,278c285
<             zero   = 0.0,
<             one    = 1.0,
<             two24   =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
<             twon24  =  5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
---
>             twon24  = 0x1.0p-24; // 5.96046447753906250000e-08
288c295
<             /* initialize jk*/
---
>             // initialize jk
292c299
<             /* determine jx,jv,q0, note that 3>q0 */
---
>             // determine jx, jv, q0, note that 3 > q0
294c301,304
<             jv = (e0-3)/24; if(jv<0) jv=0;
---
>             jv = (e0 - 3)/24;
>             if (jv < 0) {
>                 jv = 0;
>             }
297,299c307,312
<             /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
<             j = jv-jx; m = jx+jk;
<             for(i=0;i<=m;i++,j++) f[i] = (j<0)? zero : (double) ipio2[j];
---
>             // set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk]
>             j = jv - jx;
>             m = jx + jk;
>             for (i = 0; i <= m; i++, j++) {
>                 f[i] = (j < 0) ? 0.0 : (double) ipio2[j];
>             }
301c314
<             /* compute q[0],q[1],...q[jk] */
---
>             // compute q[0],q[1],...q[jk]
303c316,318
<                 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
---
>                 for(j = 0, fw = 0.0; j <= jx; j++) {
>                     fw += x[j]*f[jx + i - j];
>                 }
309c324
<                 /* distill q[] into iq[] reversingly */
---
>                 // distill q[] into iq[] reversingly
312c327
<                     iq[i] =  (int)(z-two24*fw);
---
>                     iq[i] =  (int)(z - TWO24*fw);
316,318c331,333
<                 /* compute n */
<                 z  = Math.scalb(z,q0);              /* actual value of z */
<                 z -= 8.0*Math.floor(z*0.125);           /* trim off integer >= 8 */
---
>                 // compute n
>                 z  = Math.scalb(z, q0);              // actual value of z
>                 z -= 8.0*Math.floor(z*0.125);           // trim off integer >= 8
322,323c337,339
<                 if(q0>0) {      /* need iq[jz-1] to determine n */
<                     i  = (iq[jz-1]>>(24-q0)); n += i;
---
>                 if (q0 > 0) {      // need iq[jz - 1] to determine n
>                     i  = (iq[jz - 1] >> (24 - q0));
>                     n += i;
325a342,345
>                 } else if (q0 == 0) {
>                     ih = iq[jz-1]>>23;
>                 } else if (z >= 0.5) {
>                     ih=2;
327,328d346
<                 else if(q0==0) ih = iq[jz-1]>>23;
<                 else if(z>=0.5) ih=2;
330,332c348,351
<                 if(ih>0) {      /* q > 0.5 */
<                     n += 1; carry = 0;
<                     for(i=0;i<jz ;i++) {        /* compute 1-q */
---
>                 if ( ih > 0) {      // q > 0.5
>                     n += 1;
>                     carry = 0;
>                     for (i=0; i < jz; i++) {        // compute 1-q
336c355,356
<                                 carry = 1; iq[i] = 0x1000000- j;
---
>                                 carry = 1;
>                                 iq[i] = 0x100_0000 - j;
338c358,359
<                         } else  iq[i] = 0xffffff - j;
---
>                         } else {
>                             iq[i] = 0xff_ffff - j;
340c361,362
<                     if(q0>0) {          /* rare case: chance is 1 in 12 */
---
>                     }
>                     if (q0 > 0) {          // rare case: chance is 1 in 12
343c365,366
<                             iq[jz-1] &= 0x7fffff; break;
---
>                             iq[jz-1] &= 0x7f_ffff;
>                             break;
345c368,369
<                             iq[jz-1] &= 0x3fffff; break;
---
>                             iq[jz-1] &= 0x3f_ffff;
>                             break;
349,350c373,376
<                         z = one - z;
<                         if(carry!=0) z -= Math.scalb(one,q0);
---
>                         z = 1.0 - z;
>                         if (carry != 0) {
>                             z -= Math.scalb(1.0, q0);
>                         }
354,355c380,381
<                 /* check if recomputation is needed */
<                 if(z==zero) {
---
>                 // check if recomputation is needed
>                 if (z == 0.0) {
357,359c383,387
<                     for (i=jz-1;i>=jk;i--) j |= iq[i];
<                     if(j==0) { /* need recomputation */
<                         for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed */
---
>                     for (i = jz - 1; i >= jk; i--) {
>                         j |= iq[i];
>                     }
>                     if (j == 0) { // need recomputation
>                         for(k=1; iq[jk - k] == 0; k++);   // k = no. of terms needed
361c389
<                         for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k] */
---
>                         for(i = jz + 1; i <= jz + k; i++) {   // add q[jz+1] to q[jz+k]
363c391,393
<                             for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
---
>                             for (j=0, fw = 0.0; j <= jx; j++) {
>                                 fw += x[j]*f[jx + i - j];
>                             }
368,369c398,403
<                     } else { break;}
<                 } else {break;}
---
>                     } else {
>                         break;
>                     }
>                 } else {
>                     break;
>                 }
372c406
<             /* chop off zero terms */
---
>             // chop off zero terms
374,376c408,414
<                 jz -= 1; q0 -= 24;
<                 while(iq[jz]==0) { jz--; q0-=24;}
<             } else { /* break z into 24-bit if necessary */
---
>                 jz -= 1;
>                 q0 -= 24;
>                 while (iq[jz] == 0) {
>                     jz--;
>                     q0-=24;
>                 }
>             } else { // break z into 24-bit if necessary
378c416
<                 if(z>=two24) {
---
>                 if (z >= TWO24) {
380,381c418,420
<                     iq[jz] = (int)(z-two24*fw);
<                     jz += 1; q0 += 24;
---
>                     iq[jz] = (int)(z - TWO24*fw);
>                     jz += 1;
>                     q0 += 24;
383c422,424
<                 } else iq[jz] = (int) z ;
---
>                 } else {
>                     iq[jz] = (int) z;
>                 }
386,387c427,428
<             /* convert integer "bit" chunk to floating-point value */
<             fw = Math.scalb(one,q0);
---
>             // convert integer "bit" chunk to floating-point value
>             fw = Math.scalb(1.0, q0);
389c430,431
<                 q[i] = fw*(double)iq[i]; fw*=twon24;
---
>                 q[i] = fw*(double)iq[i];
>                 fw *= twon24;
392c434
<             /* compute PIo2[0,...,jp]*q[jz,...,0] */
---
>             // compute PIo2[0,...,jp]*q[jz,...,0]
394c436,438
<                 for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
---
>                 for (fw = 0.0, k = 0; k <= jp && k <= jz-i; k++) {
>                     fw += PIo2[k]*q[i + k];
>                 }
398c442
<             /* compress fq[] into y[] */
---
>             // compress fq[] into y[]
402c446,448
<                 for (i=jz;i>=0;i--) fw += fq[i];
---
>                 for (i = jz; i >=0; i--) {
>                     fw += fq[i];
>                 }
408c454,456
<                 for (i=jz;i>=0;i--) fw += fq[i];
---
>                 for (i = jz; i>=0; i--) {
>                     fw += fq[i];
>                 }
411c459,461
<                 for (i=1;i<=jz;i++) fw += fq[i];
---
>                 for (i = 1; i <= jz; i++) {
>                     fw += fq[i];
>                 }
414c464
<             case 3:     /* painful */
---
>             case 3:     // painful
425c475,477
<                 for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
---
>                 for (fw = 0.0, i = jz; i >= 2; i--) {
>                     fw += fq[i];
>                 }
427c479,481
<                     y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;
---
>                     y[0] =  fq[0];
>                     y[1] =  fq[1];
>                     y[2] =  fw;
429c483,485
<                     y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
---
>                     y[0] = -fq[0];
>                     y[1] = -fq[1];
>                     y[2] = -fw;

-------------

PR: https://git.openjdk.org/jdk/pull/12800