Skip to content

[SYCL] Add support for MSVC internal math functions in device library #1441

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
242 changes: 241 additions & 1 deletion libdevice/cmath_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,20 @@ float logf(float x) { return __devicelib_logf(x); }

DEVICE_EXTERN_C
float expf(float x) { return __devicelib_expf(x); }

// On Windows, the math.h includes wrapper definition for functions:
// frexpf, ldexpf, hypotf, so we can't provide these 3 functions in
// device libraries which may lead to multiple definition error.
// The "hypotf" on Windows will call an internal function "_hypotf"
// and frexpf, ldexpf will call corresponding double version:
// frexp and ldexp. frexpf and ldexpf can only be used on platforms
// with fp64 support currently.
#ifndef _WIN32
DEVICE_EXTERN_C
float frexpf(float x, int *exp) { return __devicelib_frexpf(x, exp); }

DEVICE_EXTERN_C
float ldexpf(float x, int exp) { return __devicelib_ldexpf(x, exp); }
#endif

DEVICE_EXTERN_C
float log10f(float x) { return __devicelib_log10f(x); }
Expand Down Expand Up @@ -54,8 +62,13 @@ float sqrtf(float x) { return __devicelib_sqrtf(x); }
DEVICE_EXTERN_C
float cbrtf(float x) { return __devicelib_cbrtf(x); }

#ifndef _WIN32
DEVICE_EXTERN_C
float hypotf(float x, float y) { return __devicelib_hypotf(x, y); }
#else
DEVICE_EXTERN_C
float _hypotf(float x, float y) { return __devicelib_hypotf(x, y); }
#endif

DEVICE_EXTERN_C
float erff(float x) { return __devicelib_erff(x); }
Expand Down Expand Up @@ -128,3 +141,230 @@ float asinhf(float x) { return __devicelib_asinhf(x); }

DEVICE_EXTERN_C
float atanhf(float x) { return __devicelib_atanhf(x); }

#if defined(_WIN32)
#include <math.h>
union _Fval { // pun floating type as integer array
unsigned short _Sh[8];
float _Val;
};

union _Dconst { // pun float types as integer array
unsigned short _Word[2]; // TRANSITION, ABI: Twice as large as necessary.
float _Float;
};

#define _F0 1 // little-endian
#define _F1 0

// IEEE 754 float properties
#define FHUGE_EXP (int)(_FMAX * 900L / 1000)

#define NBITS (16 + _FOFF)
#define FSIGN(x) (((_Fval *)(char *)&(x))->_Sh[_F0] & _FSIGN)

#define INIT(w0) \
{ 0, w0 }

#define _FXbig (float)((NBITS + 1) * 347L / 1000)
DEVICE_EXTERN_C
short _FDtest(float *px) { // categorize *px
_Fval *ps = (_Fval *)(char *)px;
short ret = 0;
if ((ps->_Sh[_F0] & _FMASK) == _FMAX << _FOFF)
ret = (short)((ps->_Sh[_F0] & _FFRAC) != 0 || ps->_Sh[_F1] != 0 ? _NANCODE
: _INFCODE);
else if ((ps->_Sh[_F0] & ~_FSIGN) != 0 || ps->_Sh[_F1] != 0)
ret = (ps->_Sh[_F0] & _FMASK) == 0 ? _DENORM : _FINITE;

return ret;
}

DEVICE_EXTERN_C
short _FDnorm(_Fval *ps) { // normalize float fraction
short xchar;
unsigned short sign = (unsigned short)(ps->_Sh[_F0] & _FSIGN);

xchar = 1;
if ((ps->_Sh[_F0] &= _FFRAC) != 0 || ps->_Sh[_F1]) { // nonzero, scale
if (ps->_Sh[_F0] == 0) {
ps->_Sh[_F0] = ps->_Sh[_F1];
ps->_Sh[_F1] = 0;
xchar -= 16;
}

for (; ps->_Sh[_F0] < 1 << _FOFF; --xchar) { // shift left by 1
ps->_Sh[_F0] = (unsigned short)(ps->_Sh[_F0] << 1 | ps->_Sh[_F1] >> 15);
ps->_Sh[_F1] <<= 1;
}
for (; 1 << (_FOFF + 1) <= ps->_Sh[_F0]; ++xchar) { // shift right by 1
ps->_Sh[_F1] = (unsigned short)(ps->_Sh[_F1] >> 1 | ps->_Sh[_F0] << 15);
ps->_Sh[_F0] >>= 1;
}
ps->_Sh[_F0] &= _FFRAC;
}
ps->_Sh[_F0] |= sign;
return xchar;
}

DEVICE_EXTERN_C
short _FDscale(float *px, long lexp) { // scale *px by 2^xexp with checking
_Dconst _FInf = {INIT(_FMAX << _FOFF)};
_Fval *ps = (_Fval *)(char *)px;
short xchar = (short)((ps->_Sh[_F0] & _FMASK) >> _FOFF);

if (xchar == _FMAX)
return (short)((ps->_Sh[_F0] & _FFRAC) != 0 || ps->_Sh[_F1] != 0
? _NANCODE
: _INFCODE);
if (xchar == 0 && 0 < (xchar = _FDnorm(ps)))
return 0;

short ret = 0;
if (0 < lexp && _FMAX - xchar <= lexp) { // overflow, return +/-INF
*px = ps->_Sh[_F0] & _FSIGN ? -_FInf._Float : _FInf._Float;
ret = _INFCODE;
} else if (-xchar < lexp) { // finite result, repack
ps->_Sh[_F0] =
(unsigned short)(ps->_Sh[_F0] & ~_FMASK | (lexp + xchar) << _FOFF);
ret = _FINITE;
} else { // denormalized, scale
unsigned short sign = (unsigned short)(ps->_Sh[_F0] & _FSIGN);

ps->_Sh[_F0] = (unsigned short)(1 << _FOFF | ps->_Sh[_F0] & _FFRAC);
lexp += xchar - 1;
if (lexp < -(16 + 1 + _FOFF) || 0 <= lexp) { // underflow, return +/-0
ps->_Sh[_F0] = sign;
ps->_Sh[_F1] = 0;
ret = 0;
} else { // nonzero, align fraction
ret = _FINITE;
short xexp = (short)lexp;
unsigned short psx = 0;

if (xexp <= -16) { // scale by words
psx = ps->_Sh[_F1] | (psx != 0 ? 1 : 0);
ps->_Sh[_F1] = ps->_Sh[_F0];
ps->_Sh[_F0] = 0;
xexp += 16;
}
if ((xexp = (short)-xexp) != 0) { // scale by bits
psx = (ps->_Sh[_F1] << (16 - xexp)) | (psx != 0 ? 1 : 0);
ps->_Sh[_F1] = (unsigned short)(ps->_Sh[_F1] >> xexp |
ps->_Sh[_F0] << (16 - xexp));
ps->_Sh[_F0] >>= xexp;
}

ps->_Sh[_F0] |= sign;
if ((0x8000 < psx || 0x8000 == psx && (ps->_Sh[_F1] & 0x0001) != 0) &&
(++ps->_Sh[_F1] & 0xffff) == 0)
++ps->_Sh[_F0]; // round up
else if (ps->_Sh[_F0] == sign && ps->_Sh[_F1] == 0)
ret = 0;
}
}

return ret;
}

DEVICE_EXTERN_C
short _FExp(float *px, float y,
short eoff) { // compute y * e^(*px), (*px) finite, |y| not huge
static const float hugexp = FHUGE_EXP;
_Dconst _FInf = {INIT(_FMAX << _FOFF)};
static const float p[] = {1.0F, 60.09114349F};
static const float q[] = {12.01517514F, 120.18228722F};
static const float c1 = (22713.0F / 32768.0F);
static const float c2 = 1.4286068203094172321214581765680755e-6F;
static const float invln2 = 1.4426950408889634073599246810018921F;
short ret = 0;
if (*px < -hugexp || y == 0.0F) { // certain underflow
*px = 0.0F;
} else if (hugexp < *px) { // certain overflow
*px = _FInf._Float;
ret = _INFCODE;
} else { // xexp won't overflow
float g = *px * invln2;
short xexp = (short)(g + (g < 0.0F ? -0.5F : +0.5F));

g = xexp;
g = (float)((*px - g * c1) - g * c2);
if (-__FLT_EPSILON__ < g && g < __FLT_EPSILON__) {
*px = y;
} else { // g * g worth computing
const float z = g * g;
const float w = q[0] * z + q[1];

g *= z + p[1];
*px = (w + g) / (w - g) * 2.0F * y;
--xexp;
}
ret = _FDscale(px, (long)xexp + eoff);
}
return ret;
}

DEVICE_EXTERN_C
float _FCosh(float x, float y) { // compute y * cosh(x), |y| <= 1
switch (_FDtest(&x)) { // test for special codes
case _NANCODE:
case _INFCODE:
return x;
case 0:
return y;
default: // finite
if (y == 0.0F)
return y;

if (x < 0.0F)
x = -x;

if (x < _FXbig) { // worth adding in exp(-x)
_FExp(&x, 1.0F, -1);
return y * (x + 0.25F / x);
}
_FExp(&x, y, -1);
return x;
}
}

DEVICE_EXTERN_C
float _FSinh(float x, float y) { // compute y * sinh(x), |y| <= 1
_Dconst _FRteps = {INIT((_FBIAS - NBITS / 2) << _FOFF)};
static const float p[] = {0.00020400F, 0.00832983F, 0.16666737F, 0.99999998F};
short neg;

switch (_FDtest(&x)) { // test for special codes
case _NANCODE:
return x;
case _INFCODE:
return y != 0.0F ? x : FSIGN(x) ? -y : y;
case 0:
return x * y;
default: // finite
if (y == 0.0F)
return x < 0.0F ? -y : y;

if (x < 0.0F) {
x = -x;
neg = 1;
} else
neg = 0;

if (x < _FRteps._Float) {
x *= y; // x tiny
} else if (x < 1.0F) {
float w = x * x;

x += ((p[0] * w + p[1]) * w + p[2]) * w * x;
x *= y;
} else if (x < _FXbig) { // worth adding in exp(-x)
_FExp(&x, 1.0F, -1);
x = y * (x - 0.25F / x);
} else
_FExp(&x, y, -1);

return neg ? -x : x;
}
}
#endif
Loading