Skip to content

Commit fd14167

Browse files
authored
[SYCL] Add support for MSVC internal math functions in device library (#1441)
Signed-off-by: gejin <[email protected]>
1 parent 4370630 commit fd14167

File tree

7 files changed

+863
-94
lines changed

7 files changed

+863
-94
lines changed

libdevice/cmath_wrapper.cpp

+241-1
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,20 @@ float logf(float x) { return __devicelib_logf(x); }
1717

1818
DEVICE_EXTERN_C
1919
float expf(float x) { return __devicelib_expf(x); }
20-
20+
// On Windows, the math.h includes wrapper definition for functions:
21+
// frexpf, ldexpf, hypotf, so we can't provide these 3 functions in
22+
// device libraries which may lead to multiple definition error.
23+
// The "hypotf" on Windows will call an internal function "_hypotf"
24+
// and frexpf, ldexpf will call corresponding double version:
25+
// frexp and ldexp. frexpf and ldexpf can only be used on platforms
26+
// with fp64 support currently.
27+
#ifndef _WIN32
2128
DEVICE_EXTERN_C
2229
float frexpf(float x, int *exp) { return __devicelib_frexpf(x, exp); }
2330

2431
DEVICE_EXTERN_C
2532
float ldexpf(float x, int exp) { return __devicelib_ldexpf(x, exp); }
33+
#endif
2634

2735
DEVICE_EXTERN_C
2836
float log10f(float x) { return __devicelib_log10f(x); }
@@ -54,8 +62,13 @@ float sqrtf(float x) { return __devicelib_sqrtf(x); }
5462
DEVICE_EXTERN_C
5563
float cbrtf(float x) { return __devicelib_cbrtf(x); }
5664

65+
#ifndef _WIN32
5766
DEVICE_EXTERN_C
5867
float hypotf(float x, float y) { return __devicelib_hypotf(x, y); }
68+
#else
69+
DEVICE_EXTERN_C
70+
float _hypotf(float x, float y) { return __devicelib_hypotf(x, y); }
71+
#endif
5972

6073
DEVICE_EXTERN_C
6174
float erff(float x) { return __devicelib_erff(x); }
@@ -128,3 +141,230 @@ float asinhf(float x) { return __devicelib_asinhf(x); }
128141

129142
DEVICE_EXTERN_C
130143
float atanhf(float x) { return __devicelib_atanhf(x); }
144+
145+
#if defined(_WIN32)
146+
#include <math.h>
147+
union _Fval { // pun floating type as integer array
148+
unsigned short _Sh[8];
149+
float _Val;
150+
};
151+
152+
union _Dconst { // pun float types as integer array
153+
unsigned short _Word[2]; // TRANSITION, ABI: Twice as large as necessary.
154+
float _Float;
155+
};
156+
157+
#define _F0 1 // little-endian
158+
#define _F1 0
159+
160+
// IEEE 754 float properties
161+
#define FHUGE_EXP (int)(_FMAX * 900L / 1000)
162+
163+
#define NBITS (16 + _FOFF)
164+
#define FSIGN(x) (((_Fval *)(char *)&(x))->_Sh[_F0] & _FSIGN)
165+
166+
#define INIT(w0) \
167+
{ 0, w0 }
168+
169+
#define _FXbig (float)((NBITS + 1) * 347L / 1000)
170+
DEVICE_EXTERN_C
171+
short _FDtest(float *px) { // categorize *px
172+
_Fval *ps = (_Fval *)(char *)px;
173+
short ret = 0;
174+
if ((ps->_Sh[_F0] & _FMASK) == _FMAX << _FOFF)
175+
ret = (short)((ps->_Sh[_F0] & _FFRAC) != 0 || ps->_Sh[_F1] != 0 ? _NANCODE
176+
: _INFCODE);
177+
else if ((ps->_Sh[_F0] & ~_FSIGN) != 0 || ps->_Sh[_F1] != 0)
178+
ret = (ps->_Sh[_F0] & _FMASK) == 0 ? _DENORM : _FINITE;
179+
180+
return ret;
181+
}
182+
183+
DEVICE_EXTERN_C
184+
short _FDnorm(_Fval *ps) { // normalize float fraction
185+
short xchar;
186+
unsigned short sign = (unsigned short)(ps->_Sh[_F0] & _FSIGN);
187+
188+
xchar = 1;
189+
if ((ps->_Sh[_F0] &= _FFRAC) != 0 || ps->_Sh[_F1]) { // nonzero, scale
190+
if (ps->_Sh[_F0] == 0) {
191+
ps->_Sh[_F0] = ps->_Sh[_F1];
192+
ps->_Sh[_F1] = 0;
193+
xchar -= 16;
194+
}
195+
196+
for (; ps->_Sh[_F0] < 1 << _FOFF; --xchar) { // shift left by 1
197+
ps->_Sh[_F0] = (unsigned short)(ps->_Sh[_F0] << 1 | ps->_Sh[_F1] >> 15);
198+
ps->_Sh[_F1] <<= 1;
199+
}
200+
for (; 1 << (_FOFF + 1) <= ps->_Sh[_F0]; ++xchar) { // shift right by 1
201+
ps->_Sh[_F1] = (unsigned short)(ps->_Sh[_F1] >> 1 | ps->_Sh[_F0] << 15);
202+
ps->_Sh[_F0] >>= 1;
203+
}
204+
ps->_Sh[_F0] &= _FFRAC;
205+
}
206+
ps->_Sh[_F0] |= sign;
207+
return xchar;
208+
}
209+
210+
DEVICE_EXTERN_C
211+
short _FDscale(float *px, long lexp) { // scale *px by 2^xexp with checking
212+
_Dconst _FInf = {INIT(_FMAX << _FOFF)};
213+
_Fval *ps = (_Fval *)(char *)px;
214+
short xchar = (short)((ps->_Sh[_F0] & _FMASK) >> _FOFF);
215+
216+
if (xchar == _FMAX)
217+
return (short)((ps->_Sh[_F0] & _FFRAC) != 0 || ps->_Sh[_F1] != 0
218+
? _NANCODE
219+
: _INFCODE);
220+
if (xchar == 0 && 0 < (xchar = _FDnorm(ps)))
221+
return 0;
222+
223+
short ret = 0;
224+
if (0 < lexp && _FMAX - xchar <= lexp) { // overflow, return +/-INF
225+
*px = ps->_Sh[_F0] & _FSIGN ? -_FInf._Float : _FInf._Float;
226+
ret = _INFCODE;
227+
} else if (-xchar < lexp) { // finite result, repack
228+
ps->_Sh[_F0] =
229+
(unsigned short)(ps->_Sh[_F0] & ~_FMASK | (lexp + xchar) << _FOFF);
230+
ret = _FINITE;
231+
} else { // denormalized, scale
232+
unsigned short sign = (unsigned short)(ps->_Sh[_F0] & _FSIGN);
233+
234+
ps->_Sh[_F0] = (unsigned short)(1 << _FOFF | ps->_Sh[_F0] & _FFRAC);
235+
lexp += xchar - 1;
236+
if (lexp < -(16 + 1 + _FOFF) || 0 <= lexp) { // underflow, return +/-0
237+
ps->_Sh[_F0] = sign;
238+
ps->_Sh[_F1] = 0;
239+
ret = 0;
240+
} else { // nonzero, align fraction
241+
ret = _FINITE;
242+
short xexp = (short)lexp;
243+
unsigned short psx = 0;
244+
245+
if (xexp <= -16) { // scale by words
246+
psx = ps->_Sh[_F1] | (psx != 0 ? 1 : 0);
247+
ps->_Sh[_F1] = ps->_Sh[_F0];
248+
ps->_Sh[_F0] = 0;
249+
xexp += 16;
250+
}
251+
if ((xexp = (short)-xexp) != 0) { // scale by bits
252+
psx = (ps->_Sh[_F1] << (16 - xexp)) | (psx != 0 ? 1 : 0);
253+
ps->_Sh[_F1] = (unsigned short)(ps->_Sh[_F1] >> xexp |
254+
ps->_Sh[_F0] << (16 - xexp));
255+
ps->_Sh[_F0] >>= xexp;
256+
}
257+
258+
ps->_Sh[_F0] |= sign;
259+
if ((0x8000 < psx || 0x8000 == psx && (ps->_Sh[_F1] & 0x0001) != 0) &&
260+
(++ps->_Sh[_F1] & 0xffff) == 0)
261+
++ps->_Sh[_F0]; // round up
262+
else if (ps->_Sh[_F0] == sign && ps->_Sh[_F1] == 0)
263+
ret = 0;
264+
}
265+
}
266+
267+
return ret;
268+
}
269+
270+
DEVICE_EXTERN_C
271+
short _FExp(float *px, float y,
272+
short eoff) { // compute y * e^(*px), (*px) finite, |y| not huge
273+
static const float hugexp = FHUGE_EXP;
274+
_Dconst _FInf = {INIT(_FMAX << _FOFF)};
275+
static const float p[] = {1.0F, 60.09114349F};
276+
static const float q[] = {12.01517514F, 120.18228722F};
277+
static const float c1 = (22713.0F / 32768.0F);
278+
static const float c2 = 1.4286068203094172321214581765680755e-6F;
279+
static const float invln2 = 1.4426950408889634073599246810018921F;
280+
short ret = 0;
281+
if (*px < -hugexp || y == 0.0F) { // certain underflow
282+
*px = 0.0F;
283+
} else if (hugexp < *px) { // certain overflow
284+
*px = _FInf._Float;
285+
ret = _INFCODE;
286+
} else { // xexp won't overflow
287+
float g = *px * invln2;
288+
short xexp = (short)(g + (g < 0.0F ? -0.5F : +0.5F));
289+
290+
g = xexp;
291+
g = (float)((*px - g * c1) - g * c2);
292+
if (-__FLT_EPSILON__ < g && g < __FLT_EPSILON__) {
293+
*px = y;
294+
} else { // g * g worth computing
295+
const float z = g * g;
296+
const float w = q[0] * z + q[1];
297+
298+
g *= z + p[1];
299+
*px = (w + g) / (w - g) * 2.0F * y;
300+
--xexp;
301+
}
302+
ret = _FDscale(px, (long)xexp + eoff);
303+
}
304+
return ret;
305+
}
306+
307+
DEVICE_EXTERN_C
308+
float _FCosh(float x, float y) { // compute y * cosh(x), |y| <= 1
309+
switch (_FDtest(&x)) { // test for special codes
310+
case _NANCODE:
311+
case _INFCODE:
312+
return x;
313+
case 0:
314+
return y;
315+
default: // finite
316+
if (y == 0.0F)
317+
return y;
318+
319+
if (x < 0.0F)
320+
x = -x;
321+
322+
if (x < _FXbig) { // worth adding in exp(-x)
323+
_FExp(&x, 1.0F, -1);
324+
return y * (x + 0.25F / x);
325+
}
326+
_FExp(&x, y, -1);
327+
return x;
328+
}
329+
}
330+
331+
DEVICE_EXTERN_C
332+
float _FSinh(float x, float y) { // compute y * sinh(x), |y| <= 1
333+
_Dconst _FRteps = {INIT((_FBIAS - NBITS / 2) << _FOFF)};
334+
static const float p[] = {0.00020400F, 0.00832983F, 0.16666737F, 0.99999998F};
335+
short neg;
336+
337+
switch (_FDtest(&x)) { // test for special codes
338+
case _NANCODE:
339+
return x;
340+
case _INFCODE:
341+
return y != 0.0F ? x : FSIGN(x) ? -y : y;
342+
case 0:
343+
return x * y;
344+
default: // finite
345+
if (y == 0.0F)
346+
return x < 0.0F ? -y : y;
347+
348+
if (x < 0.0F) {
349+
x = -x;
350+
neg = 1;
351+
} else
352+
neg = 0;
353+
354+
if (x < _FRteps._Float) {
355+
x *= y; // x tiny
356+
} else if (x < 1.0F) {
357+
float w = x * x;
358+
359+
x += ((p[0] * w + p[1]) * w + p[2]) * w * x;
360+
x *= y;
361+
} else if (x < _FXbig) { // worth adding in exp(-x)
362+
_FExp(&x, 1.0F, -1);
363+
x = y * (x - 0.25F / x);
364+
} else
365+
_FExp(&x, y, -1);
366+
367+
return neg ? -x : x;
368+
}
369+
}
370+
#endif

0 commit comments

Comments
 (0)