[libclc] Move ceil/fabs/floor/rint/trunc to CLC library (#114774)

frasercrmck · web-flow · commit 293c78ba0a93 · 2024-11-04T16:35:14.000Z
These functions are all mapped to LLVM intrinsics.

The clspv and spirv targets don't declare or define any of these CLC
functions, and instead map these to their corresponding OpenCL symbols.
diff --git a/libclc/clc/include/clc/math/clc_ceil.h b/libclc/clc/include/clc/math/clc_ceil.h
@@ -0,0 +1,19 @@
+#ifndef __CLC_MATH_CLC_CEIL_H__
+#define __CLC_MATH_CLC_CEIL_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible ceil
+#define __clc_ceil ceil
+#else
+
+// Map the function to an LLVM intrinsic
+#define __CLC_FUNCTION __clc_ceil
+#define __CLC_INTRINSIC "llvm.ceil"
+#include <clc/math/unary_intrin.inc>
+
+#undef __CLC_INTRINSIC
+#undef __CLC_FUNCTION
+
+#endif
+
+#endif // __CLC_MATH_CLC_CEIL_H__
diff --git a/libclc/clc/include/clc/math/clc_fabs.h b/libclc/clc/include/clc/math/clc_fabs.h
@@ -0,0 +1,19 @@
+#ifndef __CLC_MATH_CLC_FABS_H__
+#define __CLC_MATH_CLC_FABS_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible fabs
+#define __clc_fabs fabs
+#else
+
+// Map the function to an LLVM intrinsic
+#define __CLC_FUNCTION __clc_fabs
+#define __CLC_INTRINSIC "llvm.fabs"
+#include <clc/math/unary_intrin.inc>
+
+#undef __CLC_INTRINSIC
+#undef __CLC_FUNCTION
+
+#endif
+
+#endif // __CLC_MATH_CLC_FABS_H__
diff --git a/libclc/clc/include/clc/math/clc_floor.h b/libclc/clc/include/clc/math/clc_floor.h
@@ -0,0 +1,19 @@
+#ifndef __CLC_MATH_CLC_FLOOR_H__
+#define __CLC_MATH_CLC_FLOOR_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible floor
+#define __clc_floor floor
+#else
+
+// Map the function to an LLVM intrinsic
+#define __CLC_FUNCTION __clc_floor
+#define __CLC_INTRINSIC "llvm.floor"
+#include <clc/math/unary_intrin.inc>
+
+#undef __CLC_INTRINSIC
+#undef __CLC_FUNCTION
+
+#endif
+
+#endif // __CLC_MATH_CLC_FLOOR_H__
diff --git a/libclc/clc/include/clc/math/clc_rint.h b/libclc/clc/include/clc/math/clc_rint.h
@@ -0,0 +1,19 @@
+#ifndef __CLC_MATH_CLC_RINT_H__
+#define __CLC_MATH_CLC_RINT_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible rint
+#define __clc_rint rint
+#else
+
+// Map the function to an LLVM intrinsic
+#define __CLC_FUNCTION __clc_rint
+#define __CLC_INTRINSIC "llvm.rint"
+#include <clc/math/unary_intrin.inc>
+
+#undef __CLC_INTRINSIC
+#undef __CLC_FUNCTION
+
+#endif
+
+#endif // __CLC_MATH_CLC_RINT_H__
diff --git a/libclc/clc/include/clc/math/clc_trunc.h b/libclc/clc/include/clc/math/clc_trunc.h
@@ -0,0 +1,19 @@
+#ifndef __CLC_MATH_CLC_TRUNC_H__
+#define __CLC_MATH_CLC_TRUNC_H__
+
+#if defined(CLC_CLSPV) || defined(CLC_SPIRV)
+// clspv and spir-v targets provide their own OpenCL-compatible trunc
+#define __clc_trunc trunc
+#else
+
+// Map the function to an LLVM intrinsic
+#define __CLC_FUNCTION __clc_trunc
+#define __CLC_INTRINSIC "llvm.trunc"
+#include <clc/math/unary_intrin.inc>
+
+#undef __CLC_INTRINSIC
+#undef __CLC_FUNCTION
+
+#endif
+
+#endif // __CLC_MATH_CLC_TRUNC_H__
diff --git a/libclc/clc/include/clc/math/unary_decl.inc b/libclc/clc/include/clc/math/unary_decl.inc
diff --git a/libclc/clc/include/clc/math/unary_intrin.inc b/libclc/clc/include/clc/math/unary_intrin.inc
@@ -3,7 +3,8 @@ _CLC_OVERLOAD float2 __CLC_FUNCTION(float2 f) __asm(__CLC_INTRINSIC ".v2f32");
 _CLC_OVERLOAD float3 __CLC_FUNCTION(float3 f) __asm(__CLC_INTRINSIC ".v3f32");
 _CLC_OVERLOAD float4 __CLC_FUNCTION(float4 f) __asm(__CLC_INTRINSIC ".v4f32");
 _CLC_OVERLOAD float8 __CLC_FUNCTION(float8 f) __asm(__CLC_INTRINSIC ".v8f32");
-_CLC_OVERLOAD float16 __CLC_FUNCTION(float16 f) __asm(__CLC_INTRINSIC ".v16f32");
+_CLC_OVERLOAD float16 __CLC_FUNCTION(float16 f) __asm(__CLC_INTRINSIC
+                                                      ".v16f32");
 
 #ifdef cl_khr_fp64
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
@@ -12,11 +13,12 @@ _CLC_OVERLOAD double2 __CLC_FUNCTION(double2 d) __asm(__CLC_INTRINSIC ".v2f64");
 _CLC_OVERLOAD double3 __CLC_FUNCTION(double3 d) __asm(__CLC_INTRINSIC ".v3f64");
 _CLC_OVERLOAD double4 __CLC_FUNCTION(double4 d) __asm(__CLC_INTRINSIC ".v4f64");
 _CLC_OVERLOAD double8 __CLC_FUNCTION(double8 d) __asm(__CLC_INTRINSIC ".v8f64");
-_CLC_OVERLOAD double16 __CLC_FUNCTION(double16 d) __asm(__CLC_INTRINSIC ".v16f64");
+_CLC_OVERLOAD double16 __CLC_FUNCTION(double16 d) __asm(__CLC_INTRINSIC
+                                                        ".v16f64");
 #endif
 
 #ifdef cl_khr_fp16
-#pragma OPENCL EXTENSION cl_khr_fp16: enable
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
 _CLC_OVERLOAD half __CLC_FUNCTION(half d) __asm(__CLC_INTRINSIC ".f16");
 _CLC_OVERLOAD half2 __CLC_FUNCTION(half2 d) __asm(__CLC_INTRINSIC ".v2f16");
 _CLC_OVERLOAD half3 __CLC_FUNCTION(half3 d) __asm(__CLC_INTRINSIC ".v3f16");
diff --git a/libclc/clc/include/clc/utils.h b/libclc/clc/include/clc/utils.h
@@ -0,0 +1,10 @@
+#ifndef __CLC_UTILS_H__
+#define __CLC_UTILS_H__
+
+#define __CLC_CONCAT(x, y) x##y
+#define __CLC_XCONCAT(x, y) __CLC_CONCAT(x, y)
+
+#define __CLC_STR(x) #x
+#define __CLC_XSTR(x) __CLC_STR(x)
+
+#endif // __CLC_UTILS_H__
diff --git a/libclc/generic/lib/clcmacro.h b/libclc/generic/lib/clcmacro.h
@@ -1,3 +1,4 @@
+#include <clc/clc.h>
 #include <utils.h>
 
 #define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
diff --git a/libclc/generic/lib/math/ceil.cl b/libclc/generic/lib/math/ceil.cl
@@ -1,10 +1,6 @@
-#include <clc/clc.h>
 #include "../clcmacro.h"
-
-// Map the llvm intrinsic to an OpenCL function.
-#define __CLC_FUNCTION __clc_ceil
-#define __CLC_INTRINSIC "llvm.ceil"
-#include "math/unary_intrin.inc"
+#include <clc/clc.h>
+#include <clc/math/clc_ceil.h>
 
 #undef __CLC_FUNCTION
 #define __CLC_FUNCTION ceil
diff --git a/libclc/generic/lib/math/clc_fmod.cl b/libclc/generic/lib/math/clc_fmod.cl
@@ -21,6 +21,8 @@
  */
 
 #include <clc/clc.h>
+#include <clc/math/clc_floor.h>
+#include <clc/math/clc_trunc.h>
 
 #include <math/clc_remainder.h>
 #include "../clcmacro.h"
@@ -119,7 +121,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y)
 
     for (i = 0; i < ntimes; i++) {
         // Compute integral multiplier
-        t = trunc(dx / w);
+        t = __clc_trunc(dx / w);
 
         // Compute w * t in quad precision
         p = w * t;
@@ -138,7 +140,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y)
 
     // One more time
     // Variable todd says whether the integer t is odd or not
-    t = floor(dx / w);
+    t = __clc_floor(dx / w);
     long lt = (long)t;
     int todd = lt & 1;
 
diff --git a/libclc/generic/lib/math/clc_pow.cl b/libclc/generic/lib/math/clc_pow.cl
@@ -21,6 +21,7 @@
  */
 
 #include <clc/clc.h>
+#include <clc/math/clc_fabs.h>
 
 #include "config.h"
 #include "math.h"
@@ -80,7 +81,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pow(float x, float y)
      *  First handle case that x is close to 1
      */
     float r = 1.0f - as_float(ax);
-    int near1 = fabs(r) < 0x1.0p-4f;
+    int near1 = __clc_fabs(r) < 0x1.0p-4f;
     float r2 = r*r;
 
     /* Coefficients are just 1/3, 1/4, 1/5 and 1/6 */
diff --git a/libclc/generic/lib/math/clc_pown.cl b/libclc/generic/lib/math/clc_pown.cl
@@ -21,6 +21,7 @@
  */
 
 #include <clc/clc.h>
+#include <clc/math/clc_fabs.h>
 
 #include "config.h"
 #include "math.h"
@@ -78,7 +79,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_pown(float x, int ny)
     // Extra precise log calculation
     // First handle case that x is close to 1
     float r = 1.0f - as_float(ax);
-    int near1 = fabs(r) < 0x1.0p-4f;
+    int near1 = __clc_fabs(r) < 0x1.0p-4f;
     float r2 = r*r;
 
     // Coefficients are just 1/3, 1/4, 1/5 and 1/6
diff --git a/libclc/generic/lib/math/clc_powr.cl b/libclc/generic/lib/math/clc_powr.cl
@@ -21,6 +21,7 @@
  */
 
 #include <clc/clc.h>
+#include <clc/math/clc_fabs.h>
 
 #include "config.h"
 #include "math.h"
@@ -76,7 +77,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_powr(float x, float y)
     // Extra precise log calculation
     // First handle case that x is close to 1
     float r = 1.0f - as_float(ax);
-    int near1 = fabs(r) < 0x1.0p-4f;
+    int near1 = __clc_fabs(r) < 0x1.0p-4f;
     float r2 = r*r;
 
     // Coefficients are just 1/3, 1/4, 1/5 and 1/6
diff --git a/libclc/generic/lib/math/clc_remainder.cl b/libclc/generic/lib/math/clc_remainder.cl
@@ -21,6 +21,8 @@
  */
 
 #include <clc/clc.h>
+#include <clc/math/clc_floor.h>
+#include <clc/math/clc_trunc.h>
 
 #include <math/clc_remainder.h>
 #include "../clcmacro.h"
@@ -129,7 +131,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y)
 
     for (i = 0; i < ntimes; i++) {
         // Compute integral multiplier
-        t = trunc(dx / w);
+        t = __clc_trunc(dx / w);
 
         // Compute w * t in quad precision
         p = w * t;
@@ -148,7 +150,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y)
 
     // One more time
     // Variable todd says whether the integer t is odd or not
-    t = floor(dx / w);
+    t = __clc_floor(dx / w);
     long lt = (long)t;
     int todd = lt & 1;
 
diff --git a/libclc/generic/lib/math/clc_remquo.cl b/libclc/generic/lib/math/clc_remquo.cl
@@ -21,6 +21,8 @@
  */
 
 #include <clc/clc.h>
+#include <clc/math/clc_floor.h>
+#include <clc/math/clc_trunc.h>
 
 #include <math/clc_remainder.h>
 #include "../clcmacro.h"
@@ -154,7 +156,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remquo(double x, double y, __private int *pq
 
     for (i = 0; i < ntimes; i++) {
         // Compute integral multiplier
-        t = trunc(dx / w);
+        t = __clc_trunc(dx / w);
 
         // Compute w * t in quad precision
         p = w * t;
@@ -173,7 +175,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remquo(double x, double y, __private int *pq
 
     // One more time
     // Variable todd says whether the integer t is odd or not
-    t = floor(dx / w);
+    t = __clc_floor(dx / w);
     long lt = (long)t;
     int todd = lt & 1;
 
diff --git a/libclc/generic/lib/math/clc_rootn.cl b/libclc/generic/lib/math/clc_rootn.cl
@@ -21,6 +21,7 @@
  */
 
 #include <clc/clc.h>
+#include <clc/math/clc_fabs.h>
 
 #include "config.h"
 #include "math.h"
@@ -78,7 +79,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rootn(float x, int ny)
     // Extra precise log calculation
     // First handle case that x is close to 1
     float r = 1.0f - as_float(ax);
-    int near1 = fabs(r) < 0x1.0p-4f;
+    int near1 = __clc_fabs(r) < 0x1.0p-4f;
     float r2 = r*r;
 
     // Coefficients are just 1/3, 1/4, 1/5 and 1/6
diff --git a/libclc/generic/lib/math/clc_sqrt.cl b/libclc/generic/lib/math/clc_sqrt.cl
@@ -25,7 +25,7 @@
 // Map the llvm sqrt intrinsic to an OpenCL function.
 #define __CLC_FUNCTION __clc_llvm_intr_sqrt
 #define __CLC_INTRINSIC "llvm.sqrt"
-#include <math/unary_intrin.inc>
+#include <clc/math/unary_intrin.inc>
 #undef __CLC_FUNCTION
 #undef __CLC_INTRINSIC
 
diff --git a/libclc/generic/lib/math/clc_tan.cl b/libclc/generic/lib/math/clc_tan.cl
@@ -20,6 +20,7 @@
  * THE SOFTWARE.
  */
 #include <clc/clc.h>
+#include <clc/math/clc_fabs.h>
 
 #include "math.h"
 #include "sincos_helpers.h"
@@ -48,7 +49,7 @@ _CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_tan, float);
 #include "sincosD_piby4.h"
 
 _CLC_DEF _CLC_OVERLOAD double __clc_tan(double x) {
-  double y = fabs(x);
+  double y = __clc_fabs(x);
 
   double r, rr;
   int regn;
@@ -66,4 +67,5 @@ _CLC_DEF _CLC_OVERLOAD double __clc_tan(double x) {
   return isnan(x) || isinf(x) ? as_double(QNANBITPATT_DP64) : as_double(t);
 }
 _CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_tan, double);
+
 #endif
diff --git a/libclc/generic/lib/math/fabs.cl b/libclc/generic/lib/math/fabs.cl
@@ -1,10 +1,6 @@
-#include <clc/clc.h>
 #include "../clcmacro.h"
-
-// Map the llvm intrinsic to an OpenCL function.
-#define __CLC_FUNCTION __clc_fabs
-#define __CLC_INTRINSIC "llvm.fabs"
-#include "math/unary_intrin.inc"
+#include <clc/clc.h>
+#include <clc/math/clc_fabs.h>
 
 #undef __CLC_FUNCTION
 #define __CLC_FUNCTION fabs
diff --git a/libclc/generic/lib/math/floor.cl b/libclc/generic/lib/math/floor.cl
@@ -1,10 +1,6 @@
-#include <clc/clc.h>
 #include "../clcmacro.h"
-
-// Map the llvm intrinsic to an OpenCL function.
-#define __CLC_FUNCTION __clc_floor
-#define __CLC_INTRINSIC "llvm.floor"
-#include "math/unary_intrin.inc"
+#include <clc/clc.h>
+#include <clc/math/clc_floor.h>
 
 #undef __CLC_FUNCTION
 #define __CLC_FUNCTION floor
diff --git a/libclc/generic/lib/math/native_unary_intrinsic.inc b/libclc/generic/lib/math/native_unary_intrinsic.inc
@@ -27,7 +27,7 @@
 #define __CLC_INTRINSIC "llvm." __CLC_XSTR(__CLC_NATIVE_INTRINSIC)
 
 #undef cl_khr_fp64
-#include <math/unary_intrin.inc>
+#include <clc/math/unary_intrin.inc>
 
 #endif
 
diff --git a/libclc/generic/lib/math/rint.cl b/libclc/generic/lib/math/rint.cl
@@ -1,9 +1,5 @@
 #include <clc/clc.h>
-
-// Map the llvm intrinsic to an OpenCL function.
-#define __CLC_FUNCTION __clc_rint
-#define __CLC_INTRINSIC "llvm.rint"
-#include "math/unary_intrin.inc"
+#include <clc/math/clc_rint.h>
 
 #undef __CLC_FUNCTION
 #define __CLC_FUNCTION rint
diff --git a/libclc/generic/lib/math/round.cl b/libclc/generic/lib/math/round.cl
@@ -3,7 +3,7 @@
 // Map the llvm intrinsic to an OpenCL function.
 #define __CLC_FUNCTION __clc_round
 #define __CLC_INTRINSIC "llvm.round"
-#include "math/unary_intrin.inc"
+#include <clc/math/unary_intrin.inc>
 
 #undef __CLC_FUNCTION
 #define __CLC_FUNCTION round
diff --git a/libclc/generic/lib/math/trunc.cl b/libclc/generic/lib/math/trunc.cl

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+#include <clc/clc.h>`
`1`	`2`	`#include <utils.h>`
`2`	`3`
`3`	`4`	`#define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \`