Line 0
Link Here
|
|
|
1 |
/*- |
2 |
* Copyright (c) 2017 Steven G. Kargl |
3 |
* All rights reserved. |
4 |
* |
5 |
* Redistribution and use in source and binary forms, with or without |
6 |
* modification, are permitted provided that the following conditions |
7 |
* are met: |
8 |
* 1. Redistributions of source code must retain the above copyright |
9 |
* notice unmodified, this list of conditions, and the following |
10 |
* disclaimer. |
11 |
* 2. Redistributions in binary form must reproduce the above copyright |
12 |
* notice, this list of conditions and the following disclaimer in the |
13 |
* documentation and/or other materials provided with the distribution. |
14 |
* |
15 |
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
16 |
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
17 |
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
18 |
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
19 |
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
20 |
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
21 |
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
22 |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
23 |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
24 |
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 |
*/ |
26 |
|
27 |
/** |
28 |
* tanpi(x) computes tan(pi*x) without multiplication by pi (almost). First, |
29 |
* note that tanpi(-x) = -tanpi(x), so the algorithm considers only |x| and |
30 |
* includes reflection symmetry by considering the sign of x on output. The |
31 |
* method used depends on the magnitude of x. |
32 |
* |
33 |
* 1. For small |x|, tanpi(x) = pi * x where a sloppy threshold is used. The |
34 |
* threshold is |x| < 0x1pN with N = -(P/2+M). P is the precision of the |
35 |
* floating-point type and M = 3 or 4. To achieve a max ULP less than |
36 |
* 0.59, pi is decomposed into high and low parts with the high part |
37 |
* containing a number of trailing zero bits. x is also split into high |
38 |
* and low parts. |
39 |
* |
40 |
* 2. For |x| < 1, argument reduction is not required and tanpi(x) is |
41 |
* computed by either a polynomial approximation or the definition of |
42 |
* tan in terms of sin and cos. For |x| in [0,0.25), a polynomial |
43 |
* approximation is used. For |x| in (0.25, 0.5], tanpi(x) is computed |
44 |
* by tanpi(x) = cospi(0.5 - x) / sinpi(0.5 - x). |
45 |
* |
46 |
* 3. For 1 <= |x| < 0x1p(P-1), argument reduction is required where |
47 |
* |x| = j0 + r with j0 an integer and 0 <= r < 1 is a remainder. With |
48 |
* the given domain, a simplified inline j0 = floor(x) is used. |
49 |
* |
50 |
* tanpi(x) = tan(pi*(j0+r)) |
51 |
* tan(pi*j0) + tan(pi*r) |
52 |
* = ---------------------------- |
53 |
* 1 - tan(pi*j0) * tan(pi*r) |
54 |
* = tanpi(r) |
55 |
* |
56 |
* 4. For |x| >= 0x1p(P-1), |x| is integral and tanpi(x) = copysign(0,x). |
57 |
* |
58 |
* 5. Special cases: |
59 |
* |
60 |
* tanpif(+-0) = +-0 |
61 |
* tanpif(+-n) = +-0, for positive integers n. |
62 |
* tanpif(+-n+1/4) = +-1, for positive integers n. |
63 |
* tanpif(+-n+1/2) = NaN, for positive integers n. |
64 |
* tanpif(+-inf) = nan. Raises the "invalid" floating-point exception. |
65 |
* tanpif(nan) = nan. Raises the "invalid" floating-point exception. |
66 |
* |
67 |
* Compute tanpi(x) = tan(pi*x) via a polynomial approximation. Coefficients |
68 |
* were determined for the following function: |
69 |
* |
70 |
* f(x) = tan(pi*x) / x = t0 + t1*x**2 + t2*x**4 + ... |
71 |
* |
72 |
* with an Remes algorithm. The Remes algorithm used MPFR with a working |
73 |
* precision of 30 times that of the target precision (e.g., for 53-bit |
74 |
* precision MPFR computed f(x) with 1590-bit precision). To accumulate the |
75 |
* result with sufficient accuracy the t0, t1, and t2 coefficients and final |
76 |
* multiplications and additions needed to be handled specially. |
77 |
* |
78 |
* x2 = x * x |
79 |
* T(x2) = t3 + t4*x2 + t5*x2**2 + ... |
80 |
* = t3 + (t4 + (t5 ...) * x2) * x2 (Horner's method) |
81 |
* tanpi(x) = x * (t0 + x2*(t1 + x2*(t2 + x2*T(x2)))) |
82 |
* |
83 |
* The final multiplications and additions in the above expression for |
84 |
* tanpi(x) are done in an extra precision arithmetic. |
85 |
* |
86 |
* On an Intel(R) Core(TM)2 Duo CPU T7250, limited testing in the |
87 |
* interval [0x1p-28,0.25] gave |
88 |
* |
89 |
* a = 2.4250162020325661e-01 |
90 |
* tanpi(a) = 9.5396227802027667e-01 (0x3fee86db 0xe636df37) |
91 |
* tan(pi*a) = 9.5396227802027678e-01 (0x3fee86db 0xe636df38) |
92 |
* |
93 |
* MAX ULP: 1.37582114 |
94 |
* Total tested: 134217727 |
95 |
* 1.0 < ULP : 309144 |
96 |
* 0.9 < ULP <= 1.0: 1047636 |
97 |
* 0.8 < ULP <= 0.9: 2732819 |
98 |
* 0.7 < ULP <= 0.8: 5491591 |
99 |
* 0.6 < ULP <= 0.7: 8639239 |
100 |
* |
101 |
* In the interval [0.25,0.48], limited testing finds |
102 |
* |
103 |
* a = 4.1875240078199505e-01 |
104 |
* tanpi(a) = 3.8323217592887793e+00 (0x400ea898 0x4f7f27eb) |
105 |
* tan(pi*a) = 3.8323217592887784e+00 (0x400ea898 0x4f7f27e9) |
106 |
* |
107 |
* MAX ULP: 2.01127812 |
108 |
* Total tested: 134217727 |
109 |
* 1.0 < ULP : 5702704 |
110 |
* 0.9 < ULP <= 1.0: 3602502 |
111 |
* 0.8 < ULP <= 0.9: 5207602 |
112 |
* 0.7 < ULP <= 0.8: 7173276 |
113 |
* 0.6 < ULP <= 0.7: 9449882 |
114 |
*/ |
115 |
|
116 |
#include "math.h" |
117 |
#include "math_private.h" |
118 |
#include "k_cospi.c" |
119 |
#include "k_sinpi.c" |
120 |
|
121 |
static const double pihi = 3.1415926814079285e+00; /* 0x400921fb 0x58000000 */ |
122 |
static const double pilo =-2.7818135228334233e-08; /* 0xbe5dde97 0x3dcb3b3a */ |
123 |
|
124 |
static const double |
125 |
t00 = 3.1415926535897931e+00, /* 0x400921fb 0x54442d18 */ |
126 |
t00lo = 1.2267625636978282e-16, /* 0x3ca1adf4 0x7b8ee56b */ |
127 |
t01 = 1.0335425560099939e+01, /* 0x4024abbc 0xe625be52 */ |
128 |
t01lo = -6.0289590484334045e-16, /* 0xbcc5b8bb 0xb4f3d850 */ |
129 |
t02 = 4.0802624638040442e+01, /* 0x404466bc 0x6775ac7c */ |
130 |
t02lo = -3.1181465177803440e-15, /* 0xbcec15f4 0xd355f491 */ |
131 |
t03 = 1.6299995197351416e+02, /* 0x40645fff 0x9b47fa0c */ |
132 |
t04 = 6.5190975669363422e+02, /* 0x40845f47 0x2e8473cf */ |
133 |
t05 = 2.6075994007466215e+03, /* 0x40a45f32 0xe4a797e0 */ |
134 |
t06 = 1.0430393632420044e+04, /* 0x40c45f32 0x628c115e */ |
135 |
t07 = 4.1720374420664804e+04, /* 0x40e45f0b 0xfb410bc9 */ |
136 |
t08 = 1.6695720713150888e+05, /* 0x41046169 0xa8349085 */ |
137 |
t09 = 6.6429163117898861e+05, /* 0x412445c7 0x4329e474 */ |
138 |
t10 = 2.7802499382503941e+06, /* 0x4145362c 0xf81896c3 */ |
139 |
t11 = 7.9185101944778729e+06, /* 0x415e34eb 0x8c725352 */ |
140 |
t12 = 9.3691291294289947e+07, /* 0x41965676 0x6d2d5a58 */ |
141 |
t13 = -5.0585651903137898e+08, /* 0xc1be26c2 0x07080874 */ |
142 |
t14 = 6.8772398519624548e+09, /* 0x41f99ea5 0xa2bf6637 */ |
143 |
t15 = -3.3094464588263161e+10, /* 0xc21ed255 0xd1310d7a */ |
144 |
t16 = 1.1673794006564838e+11; /* 0x423b2e1f 0x9a61a5fc */ |
145 |
|
146 |
static inline double |
147 |
__kernel_tanpi(double x) |
148 |
{ |
149 |
double hi, lo, t, x2, xhi, xlo; |
150 |
uint32_t hx, lx; |
151 |
|
152 |
if (x < 0.25) { |
153 |
x2 = x * x; |
154 |
t = x2 * (x2 * (x2 * (t15 + x2 * t16) + t14) + t13) + t12; |
155 |
t = x2 * (x2 * (x2 * (x2 * t + t11) + t10) + t09) + t08; |
156 |
t = x2 * (x2 * (x2 * (x2 * t + t07) + t06) + t05) + t04; |
157 |
|
158 |
EXTRACT_WORDS(hx, lx, x); |
159 |
INSERT_WORDS(xhi, hx, 0); |
160 |
xlo = x - xhi; |
161 |
xlo *= (xlo + xhi + xhi); |
162 |
xhi *= xhi; |
163 |
|
164 |
t = t02lo + x2 * (x2 * t + t03) + t02; |
165 |
t = t01lo + xlo * t + xhi * t + t01; |
166 |
t = t00lo + xlo * t + xhi * t + t00; |
167 |
return (t * x); |
168 |
} else if (x > 0.25) { |
169 |
x = 0.5 - x; |
170 |
t = __kernel_cospi(x) / __kernel_sinpi(x); |
171 |
} else |
172 |
t = 1; |
173 |
return (t); |
174 |
} |
175 |
|
176 |
static inline double |
177 |
__compute_tanpi(double x) |
178 |
{ |
179 |
|
180 |
return (x < 0.5 ? __kernel_tanpi(x) : -__kernel_tanpi(1 - x)); |
181 |
} |
182 |
|
183 |
double |
184 |
tanpi(double x) |
185 |
{ |
186 |
double ax; |
187 |
uint32_t hx, ix, j0, lx; |
188 |
|
189 |
EXTRACT_WORDS(hx, lx, x); |
190 |
ix = hx & 0x7fffffff; |
191 |
INSERT_WORDS(ax, ix, lx); |
192 |
|
193 |
if (ix < 0x3ff00000) { /* |x| < 1 */ |
194 |
if (ix < 0x3e200000) { /* |x| < 0x1p-29 */ |
195 |
if (x == 0) |
196 |
return (x); |
197 |
INSERT_WORDS(ax, hx, 0); |
198 |
x -= ax; |
199 |
return (pilo * x + pihi * x + pilo * ax + pihi * ax); |
200 |
} |
201 |
if (ix == 0x3fe00000 && !lx) |
202 |
return ((x - x) / (x - x)); |
203 |
ax = __compute_tanpi(ax); |
204 |
return ((hx & 0x80000000) ? -ax : ax); |
205 |
} |
206 |
|
207 |
if (ix < 0x43300000) { /* 1 <= |x| < 0x1p52 */ |
208 |
/* Determine integer part of ax. */ |
209 |
j0 = ((ix >> 20) & 0x7ff) - 0x3ff; |
210 |
if (j0 < 20) { |
211 |
ix &= ~(0x000fffff >> j0); |
212 |
lx = 0; |
213 |
} else { |
214 |
lx &= ~(((uint32_t)(0xffffffff)) >> (j0 - 20)); |
215 |
} |
216 |
INSERT_WORDS(x,ix,lx); |
217 |
|
218 |
ax -= x; |
219 |
|
220 |
if (ax == 0.5) |
221 |
return ((x - x) / (x - x)); |
222 |
|
223 |
ax = ax == 0 ? 0 : __compute_tanpi(ax); |
224 |
return ((hx & 0x80000000) ? -ax : ax); |
225 |
} |
226 |
|
227 |
/* x = +-inf or nan. */ |
228 |
if (ix >= 0x7f800000) |
229 |
return (x - x); |
230 |
|
231 |
/* |
232 |
* |x| >= 0x1p53 is always an integer, so return +-0. |
233 |
* FIXME: should this raise FE_INEXACT or FE_INVALID. |
234 |
*/ |
235 |
return (copysign(0, x)); |
236 |
} |
237 |
|
238 |
#if LDBL_MANT_DIG == 53 |
239 |
__weak_reference(tanpi, tanpil); |
240 |
#endif |