@@ -17,13 +17,27 @@ float logf(float x) { return __devicelib_logf(x); }
17
17
18
18
DEVICE_EXTERN_C
19
19
float expf (float x) { return __devicelib_expf (x); }
20
-
20
+ // On Windows, the math.h includes wrapper definition for functions:
21
+ // frexpf, ldexpf, hypotf, so we can't provide these 3 functions in
22
+ // device libraries which may lead to multiple definition error.
23
+ // The "hypotf" on Windows will call an internal function "_hypotf"
24
+ // and frexpf, ldexpf will call corresponding double version:
25
+ // frexp and ldexp. frexpf and ldexpf can only be used on platforms
26
+ // with fp64 support currently.
27
+ #ifndef _WIN32
21
28
DEVICE_EXTERN_C
22
29
float frexpf (float x, int *exp) { return __devicelib_frexpf (x, exp ); }
23
30
24
31
DEVICE_EXTERN_C
25
32
float ldexpf (float x, int exp) { return __devicelib_ldexpf (x, exp ); }
26
33
34
+ DEVICE_EXTERN_C
35
+ float hypotf (float x, float y) { return __devicelib_hypotf (x, y); }
36
+ #else
37
+ DEVICE_EXTERN_C
38
+ float _hypotf (float x, float y) { return __devicelib_hypotf (x, y); }
39
+ #endif
40
+
27
41
DEVICE_EXTERN_C
28
42
float log10f (float x) { return __devicelib_log10f (x); }
29
43
@@ -54,9 +68,6 @@ float sqrtf(float x) { return __devicelib_sqrtf(x); }
54
68
DEVICE_EXTERN_C
55
69
float cbrtf (float x) { return __devicelib_cbrtf (x); }
56
70
57
- DEVICE_EXTERN_C
58
- float hypotf (float x, float y) { return __devicelib_hypotf (x, y); }
59
-
60
71
DEVICE_EXTERN_C
61
72
float erff (float x) { return __devicelib_erff (x); }
62
73
@@ -128,3 +139,231 @@ float asinhf(float x) { return __devicelib_asinhf(x); }
128
139
129
140
DEVICE_EXTERN_C
130
141
float atanhf (float x) { return __devicelib_atanhf (x); }
142
+
143
+ #if defined(_WIN32)
144
+ #include < math.h>
145
+ union _Fval { // pun floating type as integer array
146
+ unsigned short _Sh[8 ];
147
+ float _Val;
148
+ };
149
+
150
+ union _Dconst { // pun float types as integer array
151
+ unsigned short _Word[2 ]; // TRANSITION, ABI: Twice as large as necessary.
152
+ float _Float;
153
+ };
154
+
155
+ #define _F0 1 // little-endian
156
+ #define _F1 0
157
+
158
+ // IEEE 754 float properties
159
+ #define FHUGE_EXP (int )(_FMAX * 900L / 1000 )
160
+
161
+ #define NBITS (16 + _FOFF)
162
+ #define FSIGN (x ) (((_Fval *)(char *)&(x))->_Sh[_F0] & _FSIGN)
163
+
164
+ #define INIT (w0 ) \
165
+ { 0 , w0 }
166
+
167
+ #define _FXbig (float )((NBITS + 1 ) * 347L / 1000 )
168
+ DEVICE_EXTERN_C
169
+ short _FDtest (float *px) { // categorize *px
170
+ _Fval *ps = (_Fval *)(char *)px;
171
+ short ret = 0 ;
172
+ if ((ps->_Sh [_F0] & _FMASK) == _FMAX << _FOFF)
173
+ ret = (short )((ps->_Sh [_F0] & _FFRAC) != 0 || ps->_Sh [_F1] != 0 ? _NANCODE
174
+ : _INFCODE);
175
+ else if ((ps->_Sh [_F0] & ~_FSIGN) != 0 || ps->_Sh [_F1] != 0 )
176
+ ret = (ps->_Sh [_F0] & _FMASK) == 0 ? _DENORM : _FINITE;
177
+
178
+ return ret;
179
+ }
180
+
181
+ DEVICE_EXTERN_C
182
+ short _FDnorm (_Fval *ps) { // normalize float fraction
183
+ short xchar;
184
+ unsigned short sign = (unsigned short )(ps->_Sh [_F0] & _FSIGN);
185
+
186
+ xchar = 1 ;
187
+ if ((ps->_Sh [_F0] &= _FFRAC) != 0 || ps->_Sh [_F1]) { // nonzero, scale
188
+ if (ps->_Sh [_F0] == 0 ) {
189
+ ps->_Sh [_F0] = ps->_Sh [_F1];
190
+ ps->_Sh [_F1] = 0 ;
191
+ xchar -= 16 ;
192
+ }
193
+
194
+ for (; ps->_Sh [_F0] < 1 << _FOFF; --xchar) { // shift left by 1
195
+ ps->_Sh [_F0] = (unsigned short )(ps->_Sh [_F0] << 1 | ps->_Sh [_F1] >> 15 );
196
+ ps->_Sh [_F1] <<= 1 ;
197
+ }
198
+ for (; 1 << (_FOFF + 1 ) <= ps->_Sh [_F0]; ++xchar) { // shift right by 1
199
+ ps->_Sh [_F1] = (unsigned short )(ps->_Sh [_F1] >> 1 | ps->_Sh [_F0] << 15 );
200
+ ps->_Sh [_F0] >>= 1 ;
201
+ }
202
+ ps->_Sh [_F0] &= _FFRAC;
203
+ }
204
+ ps->_Sh [_F0] |= sign;
205
+ return xchar;
206
+ }
207
+
208
+ DEVICE_EXTERN_C
209
+ short _FDscale (float *px, long lexp) { // scale *px by 2^xexp with checking
210
+ _Dconst _FInf = {INIT (_FMAX << _FOFF)};
211
+ _Fval *ps = (_Fval *)(char *)px;
212
+ short xchar = (short )((ps->_Sh [_F0] & _FMASK) >> _FOFF);
213
+
214
+ if (xchar == _FMAX)
215
+ return (short )((ps->_Sh [_F0] & _FFRAC) != 0 || ps->_Sh [_F1] != 0
216
+ ? _NANCODE
217
+ : _INFCODE);
218
+ if (xchar == 0 && 0 < (xchar = _FDnorm (ps)))
219
+ return 0 ;
220
+
221
+ short ret = 0 ;
222
+ if (0 < lexp && _FMAX - xchar <= lexp) { // overflow, return +/-INF
223
+ *px = ps->_Sh [_F0] & _FSIGN ? -_FInf._Float : _FInf._Float ;
224
+ ret = _INFCODE;
225
+ } else if (-xchar < lexp) { // finite result, repack
226
+ ps->_Sh [_F0] =
227
+ (unsigned short )(ps->_Sh [_F0] & ~_FMASK | (lexp + xchar) << _FOFF);
228
+ ret = _FINITE;
229
+ } else { // denormalized, scale
230
+ unsigned short sign = (unsigned short )(ps->_Sh [_F0] & _FSIGN);
231
+
232
+ ps->_Sh [_F0] = (unsigned short )(1 << _FOFF | ps->_Sh [_F0] & _FFRAC);
233
+ lexp += xchar - 1 ;
234
+ if (lexp < -(16 + 1 + _FOFF) || 0 <= lexp) { // underflow, return +/-0
235
+ ps->_Sh [_F0] = sign;
236
+ ps->_Sh [_F1] = 0 ;
237
+ ret = 0 ;
238
+ } else { // nonzero, align fraction
239
+ ret = _FINITE;
240
+ short xexp = (short )lexp;
241
+ unsigned short psx = 0 ;
242
+
243
+ if (xexp <= -16 ) { // scale by words
244
+ psx = ps->_Sh [_F1] | (psx != 0 ? 1 : 0 );
245
+ ps->_Sh [_F1] = ps->_Sh [_F0];
246
+ ps->_Sh [_F0] = 0 ;
247
+ xexp += 16 ;
248
+ }
249
+ if ((xexp = (short )-xexp) != 0 ) { // scale by bits
250
+ psx = (ps->_Sh [_F1] << (16 - xexp)) | (psx != 0 ? 1 : 0 );
251
+ ps->_Sh [_F1] = (unsigned short )(ps->_Sh [_F1] >> xexp |
252
+ ps->_Sh [_F0] << (16 - xexp));
253
+ ps->_Sh [_F0] >>= xexp;
254
+ }
255
+
256
+ ps->_Sh [_F0] |= sign;
257
+ if ((0x8000 < psx || 0x8000 == psx && (ps->_Sh [_F1] & 0x0001 ) != 0 ) &&
258
+ (++ps->_Sh [_F1] & 0xffff ) == 0 )
259
+ ++ps->_Sh [_F0]; // round up
260
+ else if (ps->_Sh [_F0] == sign && ps->_Sh [_F1] == 0 )
261
+ ret = 0 ;
262
+ }
263
+ }
264
+
265
+ return ret;
266
+ }
267
+
268
+ DEVICE_EXTERN_C
269
+ short _FExp (float *px, float y,
270
+ short eoff) { // compute y * e^(*px), (*px) finite, |y| not huge
271
+ static const float hugexp = FHUGE_EXP;
272
+ _Dconst _FInf = {INIT (_FMAX << _FOFF)};
273
+ static const float p[] = {1 .0F , 60 .09114349F };
274
+ static const float q[] = {12 .01517514F , 120 .18228722F };
275
+ static const float c1 = (22713 .0F / 32768 .0F );
276
+ static const float c2 = 1 .4286068203094172321214581765680755e-6F ;
277
+ static const float invln2 = 1 .4426950408889634073599246810018921F ;
278
+ short ret = 0 ;
279
+ if (*px < -hugexp || y == 0 .0F ) { // certain underflow
280
+ *px = 0 .0F ;
281
+ } else if (hugexp < *px) { // certain overflow
282
+ *px = _FInf._Float ;
283
+ ret = _INFCODE;
284
+ } else { // xexp won't overflow
285
+ float g = *px * invln2;
286
+ short xexp = (short )(g + (g < 0 .0F ? -0 .5F : +0 .5F ));
287
+
288
+ g = xexp;
289
+ g = (float )((*px - g * c1) - g * c2);
290
+ if (-__FLT_EPSILON__ < g && g < __FLT_EPSILON__) {
291
+ *px = y;
292
+ } else { // g * g worth computing
293
+ const float z = g * g;
294
+ const float w = q[0 ] * z + q[1 ];
295
+
296
+ g *= z + p[1 ];
297
+ *px = (w + g) / (w - g) * 2 .0F * y;
298
+ --xexp;
299
+ }
300
+ ret = _FDscale (px, (long )xexp + eoff);
301
+ }
302
+ return ret;
303
+ }
304
+
305
+ DEVICE_EXTERN_C
306
+ float _FCosh (float x, float y) { // compute y * cosh(x), |y| <= 1
307
+ switch (_FDtest (&x)) { // test for special codes
308
+ case _NANCODE:
309
+ case _INFCODE:
310
+ return x;
311
+ case 0 :
312
+ return y;
313
+ default : // finite
314
+ if (y == 0 .0F )
315
+ return y;
316
+
317
+ if (x < 0 .0F )
318
+ x = -x;
319
+
320
+ if (x < _FXbig) { // worth adding in exp(-x)
321
+ _FExp (&x, 1 .0F , -1 );
322
+ return y * (x + 0 .25F / x);
323
+ }
324
+ _FExp (&x, y, -1 );
325
+ return x;
326
+ }
327
+ }
328
+
329
+ DEVICE_EXTERN_C
330
+ float _FSinh (float x, float y) { // compute y * sinh(x), |y| <= 1
331
+ _Dconst _FRteps = {INIT ((_FBIAS - NBITS / 2 ) << _FOFF)};
332
+ static const float p[] = {0 .00020400F , 0 .00832983F , 0 .16666737F , 0 .99999998F };
333
+ short neg;
334
+
335
+ switch (_FDtest (&x)) { // test for special codes
336
+ case _NANCODE:
337
+ return x;
338
+ case _INFCODE:
339
+ return y != 0 .0F ? x : FSIGN (x) ? -y : y;
340
+ case 0 :
341
+ return x * y;
342
+ default : // finite
343
+ if (y == 0 .0F )
344
+ return x < 0 .0F ? -y : y;
345
+
346
+ if (x < 0 .0F ) {
347
+ x = -x;
348
+ neg = 1 ;
349
+ } else
350
+ neg = 0 ;
351
+
352
+ if (x < _FRteps._Float ) {
353
+ x *= y; // x tiny
354
+ } else if (x < 1 .0F ) {
355
+ float w = x * x;
356
+
357
+ x += ((p[0 ] * w + p[1 ]) * w + p[2 ]) * w * x;
358
+ x *= y;
359
+ } else if (x < _FXbig) { // worth adding in exp(-x)
360
+ _FExp (&x, 1 .0F , -1 );
361
+ x = y * (x - 0 .25F / x);
362
+ } else
363
+ _FExp (&x, y, -1 );
364
+
365
+ return neg ? -x : x;
366
+ }
367
+ }
368
+
369
+ #endif
0 commit comments