@@ -17,12 +17,20 @@ float logf(float x) { return __devicelib_logf(x); }
17
17
18
18
DEVICE_EXTERN_C
19
19
float expf (float x) { return __devicelib_expf (x); }
20
-
20
+ // On Windows, the math.h includes wrapper definition for functions:
21
+ // frexpf, ldexpf, hypotf, so we can't provide these 3 functions in
22
+ // device libraries which may lead to multiple definition error.
23
+ // The "hypotf" on Windows will call an internal function "_hypotf"
24
+ // and frexpf, ldexpf will call corresponding double version:
25
+ // frexp and ldexp. frexpf and ldexpf can only be used on platforms
26
+ // with fp64 support currently.
27
+ #ifndef _WIN32
21
28
DEVICE_EXTERN_C
22
29
float frexpf (float x, int *exp) { return __devicelib_frexpf (x, exp ); }
23
30
24
31
DEVICE_EXTERN_C
25
32
float ldexpf (float x, int exp) { return __devicelib_ldexpf (x, exp ); }
33
+ #endif
26
34
27
35
DEVICE_EXTERN_C
28
36
float log10f (float x) { return __devicelib_log10f (x); }
@@ -54,8 +62,13 @@ float sqrtf(float x) { return __devicelib_sqrtf(x); }
54
62
DEVICE_EXTERN_C
55
63
float cbrtf (float x) { return __devicelib_cbrtf (x); }
56
64
65
+ #ifndef _WIN32
57
66
DEVICE_EXTERN_C
58
67
float hypotf (float x, float y) { return __devicelib_hypotf (x, y); }
68
+ #else
69
+ DEVICE_EXTERN_C
70
+ float _hypotf (float x, float y) { return __devicelib_hypotf (x, y); }
71
+ #endif
59
72
60
73
DEVICE_EXTERN_C
61
74
float erff (float x) { return __devicelib_erff (x); }
@@ -128,3 +141,230 @@ float asinhf(float x) { return __devicelib_asinhf(x); }
128
141
129
142
DEVICE_EXTERN_C
130
143
float atanhf (float x) { return __devicelib_atanhf (x); }
144
+
145
+ #if defined(_WIN32)
146
+ #include < math.h>
147
+ union _Fval { // pun floating type as integer array
148
+ unsigned short _Sh[8 ];
149
+ float _Val;
150
+ };
151
+
152
+ union _Dconst { // pun float types as integer array
153
+ unsigned short _Word[2 ]; // TRANSITION, ABI: Twice as large as necessary.
154
+ float _Float;
155
+ };
156
+
157
+ #define _F0 1 // little-endian
158
+ #define _F1 0
159
+
160
+ // IEEE 754 float properties
161
+ #define FHUGE_EXP (int )(_FMAX * 900L / 1000 )
162
+
163
+ #define NBITS (16 + _FOFF)
164
+ #define FSIGN (x ) (((_Fval *)(char *)&(x))->_Sh[_F0] & _FSIGN)
165
+
166
+ #define INIT (w0 ) \
167
+ { 0 , w0 }
168
+
169
+ #define _FXbig (float )((NBITS + 1 ) * 347L / 1000 )
170
+ DEVICE_EXTERN_C
171
+ short _FDtest (float *px) { // categorize *px
172
+ _Fval *ps = (_Fval *)(char *)px;
173
+ short ret = 0 ;
174
+ if ((ps->_Sh [_F0] & _FMASK) == _FMAX << _FOFF)
175
+ ret = (short )((ps->_Sh [_F0] & _FFRAC) != 0 || ps->_Sh [_F1] != 0 ? _NANCODE
176
+ : _INFCODE);
177
+ else if ((ps->_Sh [_F0] & ~_FSIGN) != 0 || ps->_Sh [_F1] != 0 )
178
+ ret = (ps->_Sh [_F0] & _FMASK) == 0 ? _DENORM : _FINITE;
179
+
180
+ return ret;
181
+ }
182
+
183
+ DEVICE_EXTERN_C
184
+ short _FDnorm (_Fval *ps) { // normalize float fraction
185
+ short xchar;
186
+ unsigned short sign = (unsigned short )(ps->_Sh [_F0] & _FSIGN);
187
+
188
+ xchar = 1 ;
189
+ if ((ps->_Sh [_F0] &= _FFRAC) != 0 || ps->_Sh [_F1]) { // nonzero, scale
190
+ if (ps->_Sh [_F0] == 0 ) {
191
+ ps->_Sh [_F0] = ps->_Sh [_F1];
192
+ ps->_Sh [_F1] = 0 ;
193
+ xchar -= 16 ;
194
+ }
195
+
196
+ for (; ps->_Sh [_F0] < 1 << _FOFF; --xchar) { // shift left by 1
197
+ ps->_Sh [_F0] = (unsigned short )(ps->_Sh [_F0] << 1 | ps->_Sh [_F1] >> 15 );
198
+ ps->_Sh [_F1] <<= 1 ;
199
+ }
200
+ for (; 1 << (_FOFF + 1 ) <= ps->_Sh [_F0]; ++xchar) { // shift right by 1
201
+ ps->_Sh [_F1] = (unsigned short )(ps->_Sh [_F1] >> 1 | ps->_Sh [_F0] << 15 );
202
+ ps->_Sh [_F0] >>= 1 ;
203
+ }
204
+ ps->_Sh [_F0] &= _FFRAC;
205
+ }
206
+ ps->_Sh [_F0] |= sign;
207
+ return xchar;
208
+ }
209
+
210
+ DEVICE_EXTERN_C
211
+ short _FDscale (float *px, long lexp) { // scale *px by 2^xexp with checking
212
+ _Dconst _FInf = {INIT (_FMAX << _FOFF)};
213
+ _Fval *ps = (_Fval *)(char *)px;
214
+ short xchar = (short )((ps->_Sh [_F0] & _FMASK) >> _FOFF);
215
+
216
+ if (xchar == _FMAX)
217
+ return (short )((ps->_Sh [_F0] & _FFRAC) != 0 || ps->_Sh [_F1] != 0
218
+ ? _NANCODE
219
+ : _INFCODE);
220
+ if (xchar == 0 && 0 < (xchar = _FDnorm (ps)))
221
+ return 0 ;
222
+
223
+ short ret = 0 ;
224
+ if (0 < lexp && _FMAX - xchar <= lexp) { // overflow, return +/-INF
225
+ *px = ps->_Sh [_F0] & _FSIGN ? -_FInf._Float : _FInf._Float ;
226
+ ret = _INFCODE;
227
+ } else if (-xchar < lexp) { // finite result, repack
228
+ ps->_Sh [_F0] =
229
+ (unsigned short )(ps->_Sh [_F0] & ~_FMASK | (lexp + xchar) << _FOFF);
230
+ ret = _FINITE;
231
+ } else { // denormalized, scale
232
+ unsigned short sign = (unsigned short )(ps->_Sh [_F0] & _FSIGN);
233
+
234
+ ps->_Sh [_F0] = (unsigned short )(1 << _FOFF | ps->_Sh [_F0] & _FFRAC);
235
+ lexp += xchar - 1 ;
236
+ if (lexp < -(16 + 1 + _FOFF) || 0 <= lexp) { // underflow, return +/-0
237
+ ps->_Sh [_F0] = sign;
238
+ ps->_Sh [_F1] = 0 ;
239
+ ret = 0 ;
240
+ } else { // nonzero, align fraction
241
+ ret = _FINITE;
242
+ short xexp = (short )lexp;
243
+ unsigned short psx = 0 ;
244
+
245
+ if (xexp <= -16 ) { // scale by words
246
+ psx = ps->_Sh [_F1] | (psx != 0 ? 1 : 0 );
247
+ ps->_Sh [_F1] = ps->_Sh [_F0];
248
+ ps->_Sh [_F0] = 0 ;
249
+ xexp += 16 ;
250
+ }
251
+ if ((xexp = (short )-xexp) != 0 ) { // scale by bits
252
+ psx = (ps->_Sh [_F1] << (16 - xexp)) | (psx != 0 ? 1 : 0 );
253
+ ps->_Sh [_F1] = (unsigned short )(ps->_Sh [_F1] >> xexp |
254
+ ps->_Sh [_F0] << (16 - xexp));
255
+ ps->_Sh [_F0] >>= xexp;
256
+ }
257
+
258
+ ps->_Sh [_F0] |= sign;
259
+ if ((0x8000 < psx || 0x8000 == psx && (ps->_Sh [_F1] & 0x0001 ) != 0 ) &&
260
+ (++ps->_Sh [_F1] & 0xffff ) == 0 )
261
+ ++ps->_Sh [_F0]; // round up
262
+ else if (ps->_Sh [_F0] == sign && ps->_Sh [_F1] == 0 )
263
+ ret = 0 ;
264
+ }
265
+ }
266
+
267
+ return ret;
268
+ }
269
+
270
+ DEVICE_EXTERN_C
271
+ short _FExp (float *px, float y,
272
+ short eoff) { // compute y * e^(*px), (*px) finite, |y| not huge
273
+ static const float hugexp = FHUGE_EXP;
274
+ _Dconst _FInf = {INIT (_FMAX << _FOFF)};
275
+ static const float p[] = {1 .0F , 60 .09114349F };
276
+ static const float q[] = {12 .01517514F , 120 .18228722F };
277
+ static const float c1 = (22713 .0F / 32768 .0F );
278
+ static const float c2 = 1 .4286068203094172321214581765680755e-6F ;
279
+ static const float invln2 = 1 .4426950408889634073599246810018921F ;
280
+ short ret = 0 ;
281
+ if (*px < -hugexp || y == 0 .0F ) { // certain underflow
282
+ *px = 0 .0F ;
283
+ } else if (hugexp < *px) { // certain overflow
284
+ *px = _FInf._Float ;
285
+ ret = _INFCODE;
286
+ } else { // xexp won't overflow
287
+ float g = *px * invln2;
288
+ short xexp = (short )(g + (g < 0 .0F ? -0 .5F : +0 .5F ));
289
+
290
+ g = xexp;
291
+ g = (float )((*px - g * c1) - g * c2);
292
+ if (-__FLT_EPSILON__ < g && g < __FLT_EPSILON__) {
293
+ *px = y;
294
+ } else { // g * g worth computing
295
+ const float z = g * g;
296
+ const float w = q[0 ] * z + q[1 ];
297
+
298
+ g *= z + p[1 ];
299
+ *px = (w + g) / (w - g) * 2 .0F * y;
300
+ --xexp;
301
+ }
302
+ ret = _FDscale (px, (long )xexp + eoff);
303
+ }
304
+ return ret;
305
+ }
306
+
307
+ DEVICE_EXTERN_C
308
+ float _FCosh (float x, float y) { // compute y * cosh(x), |y| <= 1
309
+ switch (_FDtest (&x)) { // test for special codes
310
+ case _NANCODE:
311
+ case _INFCODE:
312
+ return x;
313
+ case 0 :
314
+ return y;
315
+ default : // finite
316
+ if (y == 0 .0F )
317
+ return y;
318
+
319
+ if (x < 0 .0F )
320
+ x = -x;
321
+
322
+ if (x < _FXbig) { // worth adding in exp(-x)
323
+ _FExp (&x, 1 .0F , -1 );
324
+ return y * (x + 0 .25F / x);
325
+ }
326
+ _FExp (&x, y, -1 );
327
+ return x;
328
+ }
329
+ }
330
+
331
+ DEVICE_EXTERN_C
332
+ float _FSinh (float x, float y) { // compute y * sinh(x), |y| <= 1
333
+ _Dconst _FRteps = {INIT ((_FBIAS - NBITS / 2 ) << _FOFF)};
334
+ static const float p[] = {0 .00020400F , 0 .00832983F , 0 .16666737F , 0 .99999998F };
335
+ short neg;
336
+
337
+ switch (_FDtest (&x)) { // test for special codes
338
+ case _NANCODE:
339
+ return x;
340
+ case _INFCODE:
341
+ return y != 0 .0F ? x : FSIGN (x) ? -y : y;
342
+ case 0 :
343
+ return x * y;
344
+ default : // finite
345
+ if (y == 0 .0F )
346
+ return x < 0 .0F ? -y : y;
347
+
348
+ if (x < 0 .0F ) {
349
+ x = -x;
350
+ neg = 1 ;
351
+ } else
352
+ neg = 0 ;
353
+
354
+ if (x < _FRteps._Float ) {
355
+ x *= y; // x tiny
356
+ } else if (x < 1 .0F ) {
357
+ float w = x * x;
358
+
359
+ x += ((p[0 ] * w + p[1 ]) * w + p[2 ]) * w * x;
360
+ x *= y;
361
+ } else if (x < _FXbig) { // worth adding in exp(-x)
362
+ _FExp (&x, 1 .0F , -1 );
363
+ x = y * (x - 0 .25F / x);
364
+ } else
365
+ _FExp (&x, y, -1 );
366
+
367
+ return neg ? -x : x;
368
+ }
369
+ }
370
+ #endif
0 commit comments