From 91f1801bd3102b7b349818e015353cf9e7e7449a Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Wed, 29 Mar 2023 12:11:51 -0400 Subject: [PATCH 1/4] [SYCL] Add marray support to integer built-in functions This patch adds support of sycl::marray to integer built-in functions (SYCL 2020 spec, Table 178). --- sycl/include/sycl/builtins.hpp | 277 +++++++++++++++++- .../sycl/detail/generic_type_traits.hpp | 6 +- sycl/include/sycl/detail/type_traits.hpp | 18 ++ .../DeviceLib/built-ins/marray_integer.cpp | 169 +++++++++++ 4 files changed, 455 insertions(+), 15 deletions(-) create mode 100644 sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index 9e808e60ddebc..4d6948f637ca0 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -933,15 +933,16 @@ __SYCL_MARRAY_COMMON_FUNCTION_UNOP_OVERLOAD(sign, T x, x[i]) // errors. __SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD((min), T x, T y, x[i], y[i]) __SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD((min), T x, - detail::marray_element_type y, + detail::marray_element_t y, x[i], y) __SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD((max), T x, T y, x[i], y[i]) __SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD((max), T x, - detail::marray_element_type y, + detail::marray_element_t y, x[i], y) __SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD(step, T edge, T x, edge[i], x[i]) -__SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD( - step, detail::marray_element_type edge, T x, edge, x[i]) +__SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD(step, + detail::marray_element_t edge, + T x, edge, x[i]) #undef __SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD @@ -956,18 +957,18 @@ __SYCL_MARRAY_COMMON_FUNCTION_BINOP_OVERLOAD( __SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD(clamp, T x, T minval, T maxval, x[i], minval[i], maxval[i]) __SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD( - clamp, T x, detail::marray_element_type minval, - detail::marray_element_type maxval, x[i], minval, maxval) + clamp, T x, detail::marray_element_t minval, + detail::marray_element_t maxval, x[i], minval, maxval) __SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD(mix, T x, T y, T a, x[i], y[i], a[i]) __SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD(mix, T x, T y, - detail::marray_element_type a, + detail::marray_element_t a, x[i], y[i], a) __SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD(smoothstep, T edge0, T edge1, T x, edge0[i], edge1[i], x[i]) __SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD( - smoothstep, detail::marray_element_type edge0, - detail::marray_element_type edge1, T x, edge0, edge1, x[i]) + smoothstep, detail::marray_element_t edge0, + detail::marray_element_t edge1, T x, edge0, edge1, x[i]) #undef __SYCL_MARRAY_COMMON_FUNCTION_TEROP_OVERLOAD #undef __SYCL_MARRAY_COMMON_FUNCTION_OVERLOAD_IMPL @@ -1310,6 +1311,260 @@ mul24(T x, T y) __NOEXC { return __sycl_std::__invoke_u_mul24(x, y); } +// marray integer functions + +// TODO: can be optimized in the way math functions are optimized (usage of +// vec) +#define __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, ...) \ + marray res; \ + for (int j = 0; j < N; j++) { \ + res[j] = NAME(__VA_ARGS__); \ + } \ + return res; + +// Keep NAME for readability +#define __SYCL_MARRAY_INTEGER_FUNCTION_ABS_U_OVERLOAD(NAME, ARG, ...) \ + template \ + std::enable_if_t::value, marray> NAME( \ + marray ARG) __NOEXC { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_ABS_I_OVERLOAD(NAME, ARG, ...) \ + template \ + std::enable_if_t::value, \ + marray, N>> \ + NAME(marray ARG) __NOEXC { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +__SYCL_MARRAY_INTEGER_FUNCTION_ABS_U_OVERLOAD(abs, x, x[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_ABS_I_OVERLOAD(abs, x, x[j]) + +#undef __SYCL_MARRAY_INTEGER_FUNCTION_ABS_I_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_ABS_U_OVERLOAD + +#define __SYCL_MARRAY_INTEGER_FUNCTION_UNOP_OVERLOAD(NAME, ARG, ...) \ + template \ + std::enable_if_t::value, marray> NAME( \ + marray ARG) __NOEXC { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +__SYCL_MARRAY_INTEGER_FUNCTION_UNOP_OVERLOAD(clz, x, x[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_UNOP_OVERLOAD(ctz, x, x[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_UNOP_OVERLOAD(popcount, x, x[j]) + +#undef __SYCL_MARRAY_INTEGER_FUNCTION_UNOP_OVERLOAD + +#define __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD(NAME, ARG1, ARG2, ...) \ + template \ + std::enable_if_t::value, marray> NAME( \ + marray ARG1, marray ARG2) __NOEXC { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_RET_U_OVERLOAD(NAME, ARG1, \ + ARG2, ...) \ + template \ + std::enable_if_t::value, \ + marray, N>> \ + NAME(marray ARG1, marray ARG2) __NOEXC { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD(NAME, ARG1, ARG2, ...) \ + template \ + std::enable_if_t::value, marray> NAME( \ + marray ARG1, marray ARG2) __NOEXC { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_2ND_ARG_SCALAR_OVERLOAD( \ + NAME, ARG1, ARG2, ...) \ + template \ + std::enable_if_t::value, marray> NAME( \ + marray ARG1, T ARG2) __NOEXC { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_2ND_ARG_SCALAR_OVERLOAD( \ + NAME, ARG1, ARG2, ...) \ + template \ + std::enable_if_t::value, marray> NAME( \ + marray ARG1, T ARG2) __NOEXC { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD(abs_diff, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_RET_U_OVERLOAD(abs_diff, x, y, x[j], + y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD(add_sat, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD(add_sat, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD(hadd, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD(hadd, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD(rhadd, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD(rhadd, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD((max), x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD((max), x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_2ND_ARG_SCALAR_OVERLOAD((max), x, y, + x[j], y) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_2ND_ARG_SCALAR_OVERLOAD((max), x, y, + x[j], y) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD((min), x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD((min), x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_2ND_ARG_SCALAR_OVERLOAD((min), x, y, + x[j], y) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_2ND_ARG_SCALAR_OVERLOAD((min), x, y, + x[j], y) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD(mul_hi, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD(mul_hi, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD(rotate, v, i, v[j], i[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD(rotate, v, i, v[j], i[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD(sub_sat, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD(sub_sat, x, y, x[j], y[j]) + +#undef __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_RET_U_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD + +#define __SYCL_MARRAY_INTEGER_FUNCTION_TEROP_U_OVERLOAD(NAME, ARG1, ARG2, \ + ARG3, ...) \ + template \ + std::enable_if_t::value, marray> NAME( \ + marray ARG1, marray ARG2, marray ARG3) __NOEXC { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_TEROP_I_OVERLOAD(NAME, ARG1, ARG2, \ + ARG3, ...) \ + template \ + std::enable_if_t::value, marray> NAME( \ + marray ARG1, marray ARG2, marray ARG3) __NOEXC { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_TEROP_U_2ND_3RD_ARGS_SCALAR_OVERLOAD( \ + NAME, ARG1, ARG2, ARG3, ...) \ + template \ + std::enable_if_t::value, marray> NAME( \ + marray ARG1, T ARG2, T ARG3) __NOEXC { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_TEROP_I_2ND_3RD_ARGS_SCALAR_OVERLOAD( \ + NAME, ARG1, ARG2, ARG3, ...) \ + template \ + std::enable_if_t::value, marray> NAME( \ + marray ARG1, T ARG2, T ARG3) __NOEXC { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +__SYCL_MARRAY_INTEGER_FUNCTION_TEROP_U_OVERLOAD(clamp, x, minval, maxval, x[j], + minval[j], maxval[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_TEROP_I_OVERLOAD(clamp, x, minval, maxval, x[j], + minval[j], maxval[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_TEROP_U_2ND_3RD_ARGS_SCALAR_OVERLOAD( + clamp, x, minval, maxval, x[j], minval, maxval) +__SYCL_MARRAY_INTEGER_FUNCTION_TEROP_I_2ND_3RD_ARGS_SCALAR_OVERLOAD( + clamp, x, minval, maxval, x[j], minval, maxval) +__SYCL_MARRAY_INTEGER_FUNCTION_TEROP_U_OVERLOAD(mad_hi, a, b, c, a[j], b[j], + c[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_TEROP_I_OVERLOAD(mad_hi, a, b, c, a[j], b[j], + c[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_TEROP_U_OVERLOAD(mad_sat, a, b, c, a[j], b[j], + c[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_TEROP_I_OVERLOAD(mad_sat, a, b, c, a[j], b[j], + c[j]) + +#undef __SYCL_MARRAY_INTEGER_FUNCTION_TEROP_I_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_TEROP_U_OVERLOAD + +// Keep NAME for readability +#define __SYCL_MARRAY_INTEGER_FUNCTION_MAD24_U_OVERLOAD(NAME, ARG1, ARG2, \ + ARG3, ...) \ + template \ + std::enable_if_t::value, marray> NAME( \ + marray ARG1, marray ARG2, marray ARG3) __NOEXC { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_MAD24_I_OVERLOAD(NAME, ARG1, ARG2, \ + ARG3, ...) \ + template \ + std::enable_if_t::value, marray> NAME( \ + marray ARG1, marray ARG2, marray ARG3) __NOEXC { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +__SYCL_MARRAY_INTEGER_FUNCTION_MAD24_U_OVERLOAD(mad24, x, y, z, x[j], y[j], + z[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_MAD24_I_OVERLOAD(mad24, x, y, z, x[j], y[j], + z[j]) + +#undef __SYCL_MARRAY_INTEGER_FUNCTION_MAD24_I_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_MAD24_U_OVERLOAD + +// Keep NAME for readability +#define __SYCL_MARRAY_INTEGER_FUNCTION_MUL24_U_OVERLOAD(NAME, ARG1, ARG2, ...) \ + template \ + std::enable_if_t::value, marray> NAME( \ + marray ARG1, marray ARG2) __NOEXC { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_MUL24_I_OVERLOAD(NAME, ARG1, ARG2, ...) \ + template \ + std::enable_if_t::value, marray> NAME( \ + marray ARG1, marray ARG2) __NOEXC { \ + __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL(NAME, __VA_ARGS__) \ + } + +__SYCL_MARRAY_INTEGER_FUNCTION_MUL24_U_OVERLOAD(mul24, x, y, x[j], y[j]) +__SYCL_MARRAY_INTEGER_FUNCTION_MUL24_I_OVERLOAD(mul24, x, y, x[j], y[j]) + +#undef __SYCL_MARRAY_INTEGER_FUNCTION_MUL24_I_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_MUL24_U_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_OVERLOAD_IMPL + +// TODO: can be optimized in the way math functions are optimized (usage of +// vec) +#define __SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_OVERLOAD_IMPL(NAME) \ + detail::make_larger_t> res; \ + for (int j = 0; j < N; j++) { \ + res[j] = NAME(hi[j], lo[j]); \ + } \ + return res; + +// Keep NAME for readability +#define __SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_UU_OVERLOAD(NAME, KBIT) \ + template \ + std::enable_if_t::value, \ + detail::make_larger_t>> \ + NAME(marray hi, marray lo) __NOEXC { \ + __SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_OVERLOAD_IMPL(NAME) \ + } + +#define __SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_IU_OVERLOAD(NAME, KBIT) \ + template \ + std::enable_if_t::value && \ + detail::is_ugeninteger##KBIT::value, \ + detail::make_larger_t>> \ + NAME(marray hi, marray lo) __NOEXC { \ + __SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_OVERLOAD_IMPL(NAME) \ + } + +__SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_UU_OVERLOAD(upsample, 8bit) +__SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_IU_OVERLOAD(upsample, 8bit) +__SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_UU_OVERLOAD(upsample, 16bit) +__SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_IU_OVERLOAD(upsample, 16bit) +__SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_UU_OVERLOAD(upsample, 32bit) +__SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_IU_OVERLOAD(upsample, 32bit) + +#undef __SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_IU_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_UU_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_UPSAMPLE_OVERLOAD_IMPL + /* --------------- 4.13.6 Geometric Functions. ------------------------------*/ // float3 cross (float3 p0, float3 p1) // float4 cross (float4 p0, float4 p1) @@ -1654,9 +1909,9 @@ detail::enable_if_t::value, T> select(T a, T b, // mgentype select (mgentype a, mgentype b, marray c) template ::value>> -sycl::marray, T::size()> +sycl::marray, T::size()> select(T a, T b, sycl::marray c) __NOEXC { - sycl::marray, T::size()> res; + sycl::marray, T::size()> res; for (int i = 0; i < a.size(); i++) { res[i] = select(a[i], b[i], c[i]); } diff --git a/sycl/include/sycl/detail/generic_type_traits.hpp b/sycl/include/sycl/detail/generic_type_traits.hpp index a8c479b816562..62fe70e0491e9 100644 --- a/sycl/include/sycl/detail/generic_type_traits.hpp +++ b/sycl/include/sycl/detail/generic_type_traits.hpp @@ -59,12 +59,10 @@ using is_vgenfloat = is_contained; template using is_svgenfloat = is_contained; -template using marray_element_type = typename T::value_type; - template using is_mgenfloat = bool_constant< - std::is_same, T::size()>>::value && - is_svgenfloat>::value>; + std::is_same, T::size()>>::value && + is_svgenfloat>::value>; template using is_gengeofloat = is_contained; diff --git a/sycl/include/sycl/detail/type_traits.hpp b/sycl/include/sycl/detail/type_traits.hpp index 474ced1a5fe91..2e229cb0de69d 100644 --- a/sycl/include/sycl/detail/type_traits.hpp +++ b/sycl/include/sycl/detail/type_traits.hpp @@ -111,6 +111,8 @@ template struct vector_element { }; template using vector_element_t = typename vector_element::type; +template using marray_element_t = typename T::value_type; + // change_base_type_t template struct change_base_type { using type = B; @@ -212,6 +214,13 @@ template struct make_unsigned { using type = copy_cv_qualifiers_t; }; +template struct make_unsigned> { + using base_type = marray_element_t>; + using new_type_wo_cv_qualifiers = + make_unsigned_impl_t>; + using type = marray, N>; +}; + template using make_unsigned_t = typename make_unsigned::type; // Checks that sizeof base type of T equal N and T satisfies S::value @@ -363,6 +372,15 @@ template struct make_larger_impl, vec> { using type = conditional_t; }; +template +struct make_larger_impl, marray> { + using base_type = marray_element_t>; + using upper_type = typename make_larger_impl::type; + using new_type = marray; + static constexpr bool found = !std::is_same::value; + using type = conditional_t; +}; + template struct make_larger { using type = typename make_larger_impl::type; }; diff --git a/sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp b/sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp new file mode 100644 index 0000000000000..2316767bd2726 --- /dev/null +++ b/sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp @@ -0,0 +1,169 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include + +#define TEST(FUNC, MARRAY_RET_TYPE, DIM, EXPECTED, ...) \ + { \ + { \ + MARRAY_RET_TYPE result[DIM]; \ + { \ + sycl::buffer b(result, sycl::range{DIM}); \ + deviceQueue.submit([&](sycl::handler &cgh) { \ + sycl::accessor res_access{b, cgh}; \ + cgh.single_task([=]() { \ + sycl::marray res = FUNC(__VA_ARGS__); \ + for (int i = 0; i < DIM; i++) \ + res_access[i] = res[i]; \ + }); \ + }); \ + } \ + for (int i = 0; i < DIM; i++) \ + assert(result[i] == EXPECTED[i]); \ + } \ + } + +#define EXPECTED(TYPE, ...) ((TYPE[]){__VA_ARGS__}) + +int main() { + sycl::queue deviceQueue; + + sycl::marray ma1{1, -5, 5}; + sycl::marray ma1_char{1, -5, 5}; + sycl::marray ma1_signed_char{1, -5, 5}; + sycl::marray ma1_short{1, -5, 5}; + sycl::marray ma1_long_int{1, -5, 5}; + sycl::marray ma1_long_long_int{1, -5, 5}; + sycl::marray ma2{2, 3, 8}; + sycl::marray ma3{8, 0, 9}; + sycl::marray ma4{1, 5, 5}; + sycl::marray ma4_uchar{1, 5, 5}; + sycl::marray ma4_ushort{1, 5, 5}; + sycl::marray ma4_ulong_int{1, 5, 5}; + sycl::marray ma4_ulong_long_int{1, 5, 5}; + sycl::marray ma5{2, 3, 8}; + sycl::marray ma6{8, 0, 9}; + sycl::marray ma7{1, 5, 5}; + sycl::marray ma8{1, 5, 5}; + sycl::marray ma9{2, 3, 8}; + sycl::marray ma10{1, 5, 5}; + sycl::marray ma11{1, 5, 5}; + sycl::marray ma12{2, 3, 8}; + sycl::marray ma13{1, 5, 5}; + sycl::marray ma14{2, 3, 8}; + sycl::marray ma15{1, 5, 5}; + sycl::marray ma16{2, 3, 8}; + + // geninteger abs(geninteger x) + TEST(sycl::abs, int, 3, EXPECTED(int, 1, 5, 5), ma1); + TEST(sycl::abs, int, 3, EXPECTED(int, 1, 5, 5), ma4); + TEST(sycl::abs, char, 3, EXPECTED(char, 1, 5, 5), ma1_char); + TEST(sycl::abs, signed char, 3, EXPECTED(signed char, 1, 5, 5), + ma1_signed_char); + TEST(sycl::abs, unsigned char, 3, EXPECTED(unsigned char, 1, 5, 5), + ma4_uchar); + TEST(sycl::abs, short, 3, EXPECTED(short, 1, 5, 5), ma1_short); + TEST(sycl::abs, unsigned short, 3, EXPECTED(unsigned short, 1, 5, 5), + ma4_ushort); + TEST(sycl::abs, long int, 3, EXPECTED(long int, 1, 5, 5), ma1_long_int); + TEST(sycl::abs, unsigned long int, 3, EXPECTED(unsigned long int, 1, 5, 5), + ma4_ulong_int); + TEST(sycl::abs, long long int, 3, EXPECTED(long long int, 1, 5, 5), + ma1_long_long_int); + TEST(sycl::abs, unsigned long long int, 3, + EXPECTED(unsigned long long int, 1, 5, 5), ma4_ulong_long_int); + // ugeninteger abs_diff(geninteger x, geninteger y) + TEST(sycl::abs_diff, int, 3, EXPECTED(int, 1, 8, 3), ma1, ma2); + TEST(sycl::abs_diff, int, 3, EXPECTED(int, 1, 2, 3), ma4, ma5); + // geninteger add_sat(geninteger x, geninteger y) + TEST(sycl::add_sat, int, 3, EXPECTED(int, 3, -2, 13), ma1, ma2); + TEST(sycl::add_sat, unsigned int, 3, EXPECTED(unsigned int, 3, 8, 13), ma4, + ma5); + // geninteger hadd(geninteger x, geninteger y) + TEST(sycl::hadd, int, 3, EXPECTED(int, 1, -1, 6), ma1, ma2); + TEST(sycl::hadd, unsigned int, 3, EXPECTED(unsigned int, 1, 4, 6), ma4, ma5); + // geninteger rhadd(geninteger x, geninteger y) + TEST(sycl::rhadd, int, 3, EXPECTED(int, 2, -1, 7), ma1, ma2); + TEST(sycl::rhadd, unsigned int, 3, EXPECTED(unsigned int, 2, 4, 7), ma4, ma5); + // geninteger clamp(geninteger x, geninteger minval, geninteger maxval) + TEST(sycl::clamp, int, 3, EXPECTED(int, 2, 0, 8), ma1, ma2, ma3); + TEST(sycl::clamp, unsigned int, 3, EXPECTED(unsigned int, 2, 0, 8), ma4, ma5, + ma6); + // geninteger clamp(geninteger x, sgeninteger minval, sgeninteger maxval) + TEST(sycl::clamp, int, 3, EXPECTED(int, 4, 4, 4), ma1, 4, 4); + TEST(sycl::clamp, unsigned int, 3, EXPECTED(unsigned int, 4, 4, 4), ma4, + (unsigned int)4, (unsigned int)4); + // geninteger clz(geninteger x) + TEST(sycl::clz, int, 3, EXPECTED(int, 31, 0, 29), ma1); + TEST(sycl::clz, unsigned int, 3, EXPECTED(unsigned int, 31, 29, 29), ma4); + // geninteger ctz(geninteger x) + TEST(sycl::ctz, int, 3, EXPECTED(int, 0, 0, 0), ma1); + TEST(sycl::ctz, unsigned int, 3, EXPECTED(unsigned int, 0, 0, 0), ma4); + // geninteger mad_hi(geninteger a, geninteger b, geninteger c) + TEST(sycl::mad_hi, int, 3, EXPECTED(int, 8, -1, 9), ma1, ma2, ma3); + TEST(sycl::mad_hi, unsigned int, 3, EXPECTED(unsigned int, 8, 0, 9), ma4, ma5, + ma6); + // geninteger mad_sat(geninteger a, geninteger b, geninteger c) + TEST(sycl::mad_sat, int, 3, EXPECTED(int, 10, -15, 49), ma1, ma2, ma3); + TEST(sycl::mad_sat, unsigned int, 3, EXPECTED(unsigned int, 10, 15, 49), ma4, + ma5, ma6); + // geninteger max(geninteger x, geninteger y) + TEST(sycl::max, int, 3, EXPECTED(int, 2, 3, 8), ma1, ma2); + TEST(sycl::max, unsigned int, 3, EXPECTED(unsigned int, 2, 5, 8), ma4, ma5); + // geninteger max(geninteger x, sgeninteger y) + TEST(sycl::max, int, 3, EXPECTED(int, 4, 4, 5), ma1, 4); + TEST(sycl::max, unsigned int, 3, EXPECTED(unsigned int, 4, 5, 5), ma4, + (unsigned int)4); + // geninteger min(geninteger x, geninteger y) + TEST(sycl::min, int, 3, EXPECTED(int, 1, -5, 5), ma1, ma2); + TEST(sycl::min, unsigned int, 3, EXPECTED(unsigned int, 1, 3, 5), ma4, ma5); + // geninteger min(geninteger x, sgeninteger y) + TEST(sycl::min, int, 3, EXPECTED(int, 1, -5, 4), ma1, 4); + TEST(sycl::min, unsigned int, 3, EXPECTED(unsigned int, 1, 4, 4), ma4, + (unsigned int)4); + // geninteger mul_hi(geninteger x, geninteger y) + TEST(sycl::mul_hi, int, 3, EXPECTED(int, 0, -1, 0), ma1, ma2); + TEST(sycl::mul_hi, unsigned int, 3, EXPECTED(unsigned int, 0, 0, 0), ma4, + ma5); + // geninteger rotate(geninteger v, geninteger i) + TEST(sycl::rotate, int, 3, EXPECTED(int, 4, -33, 1280), ma1, ma2); + TEST(sycl::rotate, unsigned int, 3, EXPECTED(unsigned int, 4, 40, 1280), ma4, + ma5); + // geninteger sub_sat(geninteger x, geninteger y) + TEST(sycl::sub_sat, int, 3, EXPECTED(int, -1, -8, -3), ma1, ma2); + TEST(sycl::sub_sat, unsigned int, 3, EXPECTED(unsigned int, 0, 2, 0), ma4, + ma5); + // ugeninteger16bit upsample(ugeninteger8bit hi, ugeninteger8bit lo) + TEST(sycl::upsample, uint16_t, 3, EXPECTED(uint16_t, 258, 1283, 1288), ma8, + ma9); + // igeninteger16bit upsample(igeninteger8bit hi, ugeninteger8bit lo) + TEST(sycl::upsample, int16_t, 3, EXPECTED(int16_t, 258, 1283, 1288), ma7, + ma9); + // ugeninteger32bit upsample(ugeninteger16bit hi, ugeninteger16bit lo) + TEST(sycl::upsample, uint32_t, 3, EXPECTED(uint32_t, 65538, 327683, 327688), + ma11, ma12); + // igeninteger32bit upsample(igeninteger16bit hi, ugeninteger16bit lo) + TEST(sycl::upsample, int32_t, 3, EXPECTED(int32_t, 65538, 327683, 327688), + ma10, ma12); + // ugeninteger64bit upsample(ugeninteger32bit hi, ugeninteger32bit lo) + TEST(sycl::upsample, uint64_t, 3, + EXPECTED(uint64_t, 4294967298, 21474836483, 21474836488), ma15, ma16); + // igeninteger64bit upsample(igeninteger32bit hi, ugeninteger32bit lo) + TEST(sycl::upsample, int64_t, 3, + EXPECTED(int64_t, 4294967298, 21474836483, 21474836488), ma13, ma16); + // geninteger popcount(geninteger x) + TEST(sycl::popcount, int, 3, EXPECTED(int, 1, 31, 2), ma1); + TEST(sycl::popcount, unsigned int, 3, EXPECTED(unsigned int, 1, 2, 2), ma4); + // geninteger32bit mad24(geninteger32bit x, geninteger32bit y, geninteger32bit + // z) + TEST(sycl::mad24, int32_t, 3, EXPECTED(int32_t, 4, 18, 48), ma13, ma14, ma14); + TEST(sycl::mad24, uint32_t, 3, EXPECTED(uint32_t, 4, 18, 48), ma15, ma16, + ma16); + // geninteger32bit mul24(geninteger32bit x, geninteger32bit y) + TEST(sycl::mul24, int32_t, 3, EXPECTED(int32_t, 2, 15, 40), ma13, ma14); + TEST(sycl::mul24, uint32_t, 3, EXPECTED(uint32_t, 2, 15, 40), ma15, ma16); + + return 0; +} From d94d6a0528e5541f4606954672cfbb5f0544130b Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Thu, 30 Mar 2023 07:07:45 -0400 Subject: [PATCH 2/4] Fix expected result for clamp test --- sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp b/sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp index 2316767bd2726..08c14b9e9925a 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp @@ -88,7 +88,7 @@ int main() { TEST(sycl::rhadd, int, 3, EXPECTED(int, 2, -1, 7), ma1, ma2); TEST(sycl::rhadd, unsigned int, 3, EXPECTED(unsigned int, 2, 4, 7), ma4, ma5); // geninteger clamp(geninteger x, geninteger minval, geninteger maxval) - TEST(sycl::clamp, int, 3, EXPECTED(int, 2, 0, 8), ma1, ma2, ma3); + TEST(sycl::clamp, int, 3, EXPECTED(int, 2, 3, 8), ma1, ma2, ma3); TEST(sycl::clamp, unsigned int, 3, EXPECTED(unsigned int, 2, 0, 8), ma4, ma5, ma6); // geninteger clamp(geninteger x, sgeninteger minval, sgeninteger maxval) From 82c6081934e91af126c19086a33895617092f0b3 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Thu, 30 Mar 2023 08:27:57 -0400 Subject: [PATCH 3/4] Fix clamp test to avoid UB --- sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp b/sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp index 08c14b9e9925a..e58cf5b7df074 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp @@ -88,9 +88,9 @@ int main() { TEST(sycl::rhadd, int, 3, EXPECTED(int, 2, -1, 7), ma1, ma2); TEST(sycl::rhadd, unsigned int, 3, EXPECTED(unsigned int, 2, 4, 7), ma4, ma5); // geninteger clamp(geninteger x, geninteger minval, geninteger maxval) - TEST(sycl::clamp, int, 3, EXPECTED(int, 2, 3, 8), ma1, ma2, ma3); - TEST(sycl::clamp, unsigned int, 3, EXPECTED(unsigned int, 2, 0, 8), ma4, ma5, - ma6); + TEST(sycl::clamp, int, 3, EXPECTED(int, 2, 3, 8), ma1, ma2, ma2); + TEST(sycl::clamp, unsigned int, 3, EXPECTED(unsigned int, 2, 3, 8), ma4, ma5, + ma5); // geninteger clamp(geninteger x, sgeninteger minval, sgeninteger maxval) TEST(sycl::clamp, int, 3, EXPECTED(int, 4, 4, 4), ma1, 4, 4); TEST(sycl::clamp, unsigned int, 3, EXPECTED(unsigned int, 4, 4, 4), ma4, From fa951d1d537f1e3451f665e6bd5e190c10ccaeea Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Thu, 30 Mar 2023 09:37:05 -0400 Subject: [PATCH 4/4] Add missing undefs --- sycl/include/sycl/builtins.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index 4d6948f637ca0..67083ea87d447 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -1424,6 +1424,8 @@ __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD(rotate, v, i, v[j], i[j]) __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD(sub_sat, x, y, x[j], y[j]) __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD(sub_sat, x, y, x[j], y[j]) +#undef __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_2ND_ARG_SCALAR_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_2ND_ARG_SCALAR_OVERLOAD #undef __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_OVERLOAD #undef __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_I_RET_U_OVERLOAD #undef __SYCL_MARRAY_INTEGER_FUNCTION_BINOP_U_OVERLOAD @@ -1477,6 +1479,8 @@ __SYCL_MARRAY_INTEGER_FUNCTION_TEROP_U_OVERLOAD(mad_sat, a, b, c, a[j], b[j], __SYCL_MARRAY_INTEGER_FUNCTION_TEROP_I_OVERLOAD(mad_sat, a, b, c, a[j], b[j], c[j]) +#undef __SYCL_MARRAY_INTEGER_FUNCTION_TEROP_U_2ND_3RD_ARGS_SCALAR_OVERLOAD +#undef __SYCL_MARRAY_INTEGER_FUNCTION_TEROP_I_2ND_3RD_ARGS_SCALAR_OVERLOAD #undef __SYCL_MARRAY_INTEGER_FUNCTION_TEROP_I_OVERLOAD #undef __SYCL_MARRAY_INTEGER_FUNCTION_TEROP_U_OVERLOAD