Skip to content

Commit 503efc1

Browse files
committed
[SYCL] Add support for MSVC internal math functions in device library
Signed-off-by: gejin <[email protected]>
1 parent f43883b commit 503efc1

File tree

3 files changed

+519
-4
lines changed

3 files changed

+519
-4
lines changed

libdevice/cmath_wrapper.cpp

+243-4
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,27 @@ float logf(float x) { return __devicelib_logf(x); }
1717

1818
DEVICE_EXTERN_C
1919
float expf(float x) { return __devicelib_expf(x); }
20-
20+
// On Windows, the math.h includes wrapper definition for functions:
21+
// frexpf, ldexpf, hypotf, so we can't provide these 3 functions in
22+
// device libraries which may lead to multiple definition error.
23+
// The "hypotf" on Windows will call an internal function "_hypotf"
24+
// and frexpf, ldexpf will call corresponding double version:
25+
// frexp and ldexp. frexpf and ldexpf can only be used on platforms
26+
// with fp64 support currently.
27+
#ifndef _WIN32
2128
DEVICE_EXTERN_C
2229
float frexpf(float x, int *exp) { return __devicelib_frexpf(x, exp); }
2330

2431
DEVICE_EXTERN_C
2532
float ldexpf(float x, int exp) { return __devicelib_ldexpf(x, exp); }
2633

34+
DEVICE_EXTERN_C
35+
float hypotf(float x, float y) { return __devicelib_hypotf(x, y); }
36+
#else
37+
DEVICE_EXTERN_C
38+
float _hypotf(float x, float y) { return __devicelib_hypotf(x, y); }
39+
#endif
40+
2741
DEVICE_EXTERN_C
2842
float log10f(float x) { return __devicelib_log10f(x); }
2943

@@ -54,9 +68,6 @@ float sqrtf(float x) { return __devicelib_sqrtf(x); }
5468
DEVICE_EXTERN_C
5569
float cbrtf(float x) { return __devicelib_cbrtf(x); }
5670

57-
DEVICE_EXTERN_C
58-
float hypotf(float x, float y) { return __devicelib_hypotf(x, y); }
59-
6071
DEVICE_EXTERN_C
6172
float erff(float x) { return __devicelib_erff(x); }
6273

@@ -128,3 +139,231 @@ float asinhf(float x) { return __devicelib_asinhf(x); }
128139

129140
DEVICE_EXTERN_C
130141
float atanhf(float x) { return __devicelib_atanhf(x); }
142+
143+
#if defined(_WIN32)
144+
#include <math.h>
145+
union _Fval { // pun floating type as integer array
146+
unsigned short _Sh[8];
147+
float _Val;
148+
};
149+
150+
union _Dconst { // pun float types as integer array
151+
unsigned short _Word[2]; // TRANSITION, ABI: Twice as large as necessary.
152+
float _Float;
153+
};
154+
155+
#define _F0 1 // little-endian
156+
#define _F1 0
157+
158+
// IEEE 754 float properties
159+
#define FHUGE_EXP (int)(_FMAX * 900L / 1000)
160+
161+
#define NBITS (16 + _FOFF)
162+
#define FSIGN(x) (((_Fval *)(char *)&(x))->_Sh[_F0] & _FSIGN)
163+
164+
#define INIT(w0) \
165+
{ 0, w0 }
166+
167+
#define _FXbig (float)((NBITS + 1) * 347L / 1000)
168+
DEVICE_EXTERN_C
169+
short _FDtest(float *px) { // categorize *px
170+
_Fval *ps = (_Fval *)(char *)px;
171+
short ret = 0;
172+
if ((ps->_Sh[_F0] & _FMASK) == _FMAX << _FOFF)
173+
ret = (short)((ps->_Sh[_F0] & _FFRAC) != 0 || ps->_Sh[_F1] != 0 ? _NANCODE
174+
: _INFCODE);
175+
else if ((ps->_Sh[_F0] & ~_FSIGN) != 0 || ps->_Sh[_F1] != 0)
176+
ret = (ps->_Sh[_F0] & _FMASK) == 0 ? _DENORM : _FINITE;
177+
178+
return ret;
179+
}
180+
181+
DEVICE_EXTERN_C
182+
short _FDnorm(_Fval *ps) { // normalize float fraction
183+
short xchar;
184+
unsigned short sign = (unsigned short)(ps->_Sh[_F0] & _FSIGN);
185+
186+
xchar = 1;
187+
if ((ps->_Sh[_F0] &= _FFRAC) != 0 || ps->_Sh[_F1]) { // nonzero, scale
188+
if (ps->_Sh[_F0] == 0) {
189+
ps->_Sh[_F0] = ps->_Sh[_F1];
190+
ps->_Sh[_F1] = 0;
191+
xchar -= 16;
192+
}
193+
194+
for (; ps->_Sh[_F0] < 1 << _FOFF; --xchar) { // shift left by 1
195+
ps->_Sh[_F0] = (unsigned short)(ps->_Sh[_F0] << 1 | ps->_Sh[_F1] >> 15);
196+
ps->_Sh[_F1] <<= 1;
197+
}
198+
for (; 1 << (_FOFF + 1) <= ps->_Sh[_F0]; ++xchar) { // shift right by 1
199+
ps->_Sh[_F1] = (unsigned short)(ps->_Sh[_F1] >> 1 | ps->_Sh[_F0] << 15);
200+
ps->_Sh[_F0] >>= 1;
201+
}
202+
ps->_Sh[_F0] &= _FFRAC;
203+
}
204+
ps->_Sh[_F0] |= sign;
205+
return xchar;
206+
}
207+
208+
DEVICE_EXTERN_C
209+
short _FDscale(float *px, long lexp) { // scale *px by 2^xexp with checking
210+
_Dconst _FInf = {INIT(_FMAX << _FOFF)};
211+
_Fval *ps = (_Fval *)(char *)px;
212+
short xchar = (short)((ps->_Sh[_F0] & _FMASK) >> _FOFF);
213+
214+
if (xchar == _FMAX)
215+
return (short)((ps->_Sh[_F0] & _FFRAC) != 0 || ps->_Sh[_F1] != 0
216+
? _NANCODE
217+
: _INFCODE);
218+
if (xchar == 0 && 0 < (xchar = _FDnorm(ps)))
219+
return 0;
220+
221+
short ret = 0;
222+
if (0 < lexp && _FMAX - xchar <= lexp) { // overflow, return +/-INF
223+
*px = ps->_Sh[_F0] & _FSIGN ? -_FInf._Float : _FInf._Float;
224+
ret = _INFCODE;
225+
} else if (-xchar < lexp) { // finite result, repack
226+
ps->_Sh[_F0] =
227+
(unsigned short)(ps->_Sh[_F0] & ~_FMASK | (lexp + xchar) << _FOFF);
228+
ret = _FINITE;
229+
} else { // denormalized, scale
230+
unsigned short sign = (unsigned short)(ps->_Sh[_F0] & _FSIGN);
231+
232+
ps->_Sh[_F0] = (unsigned short)(1 << _FOFF | ps->_Sh[_F0] & _FFRAC);
233+
lexp += xchar - 1;
234+
if (lexp < -(16 + 1 + _FOFF) || 0 <= lexp) { // underflow, return +/-0
235+
ps->_Sh[_F0] = sign;
236+
ps->_Sh[_F1] = 0;
237+
ret = 0;
238+
} else { // nonzero, align fraction
239+
ret = _FINITE;
240+
short xexp = (short)lexp;
241+
unsigned short psx = 0;
242+
243+
if (xexp <= -16) { // scale by words
244+
psx = ps->_Sh[_F1] | (psx != 0 ? 1 : 0);
245+
ps->_Sh[_F1] = ps->_Sh[_F0];
246+
ps->_Sh[_F0] = 0;
247+
xexp += 16;
248+
}
249+
if ((xexp = (short)-xexp) != 0) { // scale by bits
250+
psx = (ps->_Sh[_F1] << (16 - xexp)) | (psx != 0 ? 1 : 0);
251+
ps->_Sh[_F1] = (unsigned short)(ps->_Sh[_F1] >> xexp |
252+
ps->_Sh[_F0] << (16 - xexp));
253+
ps->_Sh[_F0] >>= xexp;
254+
}
255+
256+
ps->_Sh[_F0] |= sign;
257+
if ((0x8000 < psx || 0x8000 == psx && (ps->_Sh[_F1] & 0x0001) != 0) &&
258+
(++ps->_Sh[_F1] & 0xffff) == 0)
259+
++ps->_Sh[_F0]; // round up
260+
else if (ps->_Sh[_F0] == sign && ps->_Sh[_F1] == 0)
261+
ret = 0;
262+
}
263+
}
264+
265+
return ret;
266+
}
267+
268+
DEVICE_EXTERN_C
269+
short _FExp(float *px, float y,
270+
short eoff) { // compute y * e^(*px), (*px) finite, |y| not huge
271+
static const float hugexp = FHUGE_EXP;
272+
_Dconst _FInf = {INIT(_FMAX << _FOFF)};
273+
static const float p[] = {1.0F, 60.09114349F};
274+
static const float q[] = {12.01517514F, 120.18228722F};
275+
static const float c1 = (22713.0F / 32768.0F);
276+
static const float c2 = 1.4286068203094172321214581765680755e-6F;
277+
static const float invln2 = 1.4426950408889634073599246810018921F;
278+
short ret = 0;
279+
if (*px < -hugexp || y == 0.0F) { // certain underflow
280+
*px = 0.0F;
281+
} else if (hugexp < *px) { // certain overflow
282+
*px = _FInf._Float;
283+
ret = _INFCODE;
284+
} else { // xexp won't overflow
285+
float g = *px * invln2;
286+
short xexp = (short)(g + (g < 0.0F ? -0.5F : +0.5F));
287+
288+
g = xexp;
289+
g = (float)((*px - g * c1) - g * c2);
290+
if (-__FLT_EPSILON__ < g && g < __FLT_EPSILON__) {
291+
*px = y;
292+
} else { // g * g worth computing
293+
const float z = g * g;
294+
const float w = q[0] * z + q[1];
295+
296+
g *= z + p[1];
297+
*px = (w + g) / (w - g) * 2.0F * y;
298+
--xexp;
299+
}
300+
ret = _FDscale(px, (long)xexp + eoff);
301+
}
302+
return ret;
303+
}
304+
305+
DEVICE_EXTERN_C
306+
float _FCosh(float x, float y) { // compute y * cosh(x), |y| <= 1
307+
switch (_FDtest(&x)) { // test for special codes
308+
case _NANCODE:
309+
case _INFCODE:
310+
return x;
311+
case 0:
312+
return y;
313+
default: // finite
314+
if (y == 0.0F)
315+
return y;
316+
317+
if (x < 0.0F)
318+
x = -x;
319+
320+
if (x < _FXbig) { // worth adding in exp(-x)
321+
_FExp(&x, 1.0F, -1);
322+
return y * (x + 0.25F / x);
323+
}
324+
_FExp(&x, y, -1);
325+
return x;
326+
}
327+
}
328+
329+
DEVICE_EXTERN_C
330+
float _FSinh(float x, float y) { // compute y * sinh(x), |y| <= 1
331+
_Dconst _FRteps = {INIT((_FBIAS - NBITS / 2) << _FOFF)};
332+
static const float p[] = {0.00020400F, 0.00832983F, 0.16666737F, 0.99999998F};
333+
short neg;
334+
335+
switch (_FDtest(&x)) { // test for special codes
336+
case _NANCODE:
337+
return x;
338+
case _INFCODE:
339+
return y != 0.0F ? x : FSIGN(x) ? -y : y;
340+
case 0:
341+
return x * y;
342+
default: // finite
343+
if (y == 0.0F)
344+
return x < 0.0F ? -y : y;
345+
346+
if (x < 0.0F) {
347+
x = -x;
348+
neg = 1;
349+
} else
350+
neg = 0;
351+
352+
if (x < _FRteps._Float) {
353+
x *= y; // x tiny
354+
} else if (x < 1.0F) {
355+
float w = x * x;
356+
357+
x += ((p[0] * w + p[1]) * w + p[2]) * w * x;
358+
x *= y;
359+
} else if (x < _FXbig) { // worth adding in exp(-x)
360+
_FExp(&x, 1.0F, -1);
361+
x = y * (x - 0.25F / x);
362+
} else
363+
_FExp(&x, y, -1);
364+
365+
return neg ? -x : x;
366+
}
367+
}
368+
369+
#endif

0 commit comments

Comments
 (0)