diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 7e86a024a094b..15d7e50a876ec 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10977,6 +10977,7 @@ def err_sycl_kernel_incorrectly_named : Error< "kernel %select{name is missing" "|needs to have a globally-visible name" "|name is invalid. Unscoped enum requires fixed underlying type" + "|name cannot be a type in the \"std\" namespace" "}0">; def err_sycl_kernel_not_function_object : Error<"kernel parameter must be a lambda or function object">; diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index a207c91cef3c1..6c632b022af88 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -2755,6 +2755,13 @@ void SYCLIntegrationHeader::emitFwdDecl(raw_ostream &O, const Decl *D, } break; } + + if (NS->isStdNamespace()) { + Diag.Report(KernelLocation, diag::err_sycl_kernel_incorrectly_named) + << /* name cannot be a type in the std namespace */ 3; + return; + } + ++NamespaceCnt; const StringRef NSInlinePrefix = NS->isInline() ? "inline " : ""; NSStr.insert( @@ -2837,8 +2844,13 @@ void SYCLIntegrationHeader::emitForwardClassDecls( ; const CXXRecordDecl *RD = T->getAsCXXRecordDecl(); - if (!RD) + if (!RD) { + if (T->isNullPtrType()) + Diag.Report(KernelLocation, diag::err_sycl_kernel_incorrectly_named) + << /* name cannot be a type in the std namespace */ 3; + return; + } // see if this is a template specialization ... if (const auto *TSD = dyn_cast(RD)) { diff --git a/clang/test/CodeGenSYCL/stdtypes_kernel_type.cpp b/clang/test/CodeGenSYCL/stdtypes_kernel_type.cpp new file mode 100644 index 0000000000000..1509a78741292 --- /dev/null +++ b/clang/test/CodeGenSYCL/stdtypes_kernel_type.cpp @@ -0,0 +1,63 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsycl-int-header=%t.h -DCHECK_ERROR -verify %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -fsycl-int-header=%t.h %s +// RUN: FileCheck -input-file=%t.h %s +// +// CHECK: #include +// CHECK-NEXT: #include +// +// CHECK: static constexpr +// CHECK-NEXT: const char* const kernel_names[] = { +// CHECK-NEXT: "_ZTSm", +// CHECK-NEXT: "_ZTSl" +// CHECK-NEXT: }; +// +// CHECK: static constexpr +// CHECK-NEXT: const kernel_param_desc_t kernel_signatures[] = { +// CHECK-NEXT: //--- _ZTSm +// CHECK-EMPTY: +// CHECK-NEXT: //--- _ZTSl +// CHECK-EMPTY: +// CHECK-NEXT: }; +// +// CHECK: static constexpr +// CHECK-NEXT: const unsigned kernel_signature_start[] = { +// CHECK-NEXT: 0, // _ZTSm +// CHECK-NEXT: 1 // _ZTSl +// CHECK-NEXT: }; + +// CHECK: template <> struct KernelInfo { +// CHECK: template <> struct KernelInfo { + +void usage() { +} + +namespace std { +typedef long unsigned int size_t; +typedef long int ptrdiff_t; +typedef decltype(nullptr) nullptr_t; +class T; +class U; +} // namespace std + +template +struct Templated_kernel_name; + +template +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { + kernelFunc(); +} + +int main() { +#ifdef CHECK_ERROR + kernel_single_task([=]() {}); // expected-error {{kernel name cannot be a type in the "std" namespace}} + kernel_single_task([=]() {}); // expected-error {{kernel name cannot be a type in the "std" namespace}} + kernel_single_task>([=]() {}); // expected-error {{kernel name cannot be a type in the "std" namespace}} + kernel_single_task>([=]() {}); // expected-error {{kernel name cannot be a type in the "std" namespace}} +#endif + + // Although in the std namespace, these resolve to builtins such as `int` that are allowed in kernel names + kernel_single_task([=]() {}); + kernel_single_task([=]() {}); + + return 0; +} diff --git a/clang/test/SemaSYCL/unnamed-kernel.cpp b/clang/test/SemaSYCL/unnamed-kernel.cpp index 45954f325b4d6..c3790cbab3de3 100644 --- a/clang/test/SemaSYCL/unnamed-kernel.cpp +++ b/clang/test/SemaSYCL/unnamed-kernel.cpp @@ -11,6 +11,11 @@ template class KernelName; } +namespace std { +typedef struct { +} max_align_t; +} // namespace std + struct MyWrapper { private: class InvalidKernelName0 {}; @@ -41,7 +46,7 @@ struct MyWrapper { #ifndef __SYCL_UNNAMED_LAMBDA__ // expected-error@+4 {{kernel needs to have a globally-visible name}} - // expected-note@16 {{InvalidKernelName0 declared here}} + // expected-note@21 {{InvalidKernelName0 declared here}} #endif q.submit([&](cl::sycl::handler &h) { h.single_task([] {}); @@ -49,7 +54,7 @@ struct MyWrapper { #ifndef __SYCL_UNNAMED_LAMBDA__ // expected-error@+4 {{kernel needs to have a globally-visible name}} - // expected-note@17 {{InvalidKernelName3 declared here}} + // expected-note@22 {{InvalidKernelName3 declared here}} #endif q.submit([&](cl::sycl::handler &h) { h.single_task>([] {}); @@ -60,10 +65,17 @@ struct MyWrapper { h.single_task([] {}); }); +#ifndef __SYCL_UNNAMED_LAMBDA__ + // expected-error@+3 {{kernel name cannot be a type in the "std" namespace}} +#endif + q.submit([&](cl::sycl::handler &h) { + h.single_task([] {}); + }); + using InvalidAlias = InvalidKernelName4; #ifndef __SYCL_UNNAMED_LAMBDA__ // expected-error@+4 {{kernel needs to have a globally-visible name}} - // expected-note@18 {{InvalidKernelName4 declared here}} + // expected-note@23 {{InvalidKernelName4 declared here}} #endif q.submit([&](cl::sycl::handler &h) { h.single_task([] {}); @@ -72,7 +84,7 @@ struct MyWrapper { using InvalidAlias1 = InvalidKernelName5; #ifndef __SYCL_UNNAMED_LAMBDA__ // expected-error@+4 {{kernel needs to have a globally-visible name}} - // expected-note@19 {{InvalidKernelName5 declared here}} + // expected-note@24 {{InvalidKernelName5 declared here}} #endif q.submit([&](cl::sycl::handler &h) { h.single_task>([] {}); diff --git a/sycl/test/group-algorithm/broadcast.cpp b/sycl/test/group-algorithm/broadcast.cpp index d0320701010f1..b9d05701113d4 100644 --- a/sycl/test/group-algorithm/broadcast.cpp +++ b/sycl/test/group-algorithm/broadcast.cpp @@ -15,14 +15,11 @@ using namespace sycl; using namespace sycl::ONEAPI; -template -class broadcast_kernel; - -template +template void test(queue q, InputContainer input, OutputContainer output) { typedef typename InputContainer::value_type InputT; typedef typename OutputContainer::value_type OutputT; - typedef class broadcast_kernel kernel_name; size_t N = input.size(); size_t G = 4; range<2> R(G, G); @@ -63,7 +60,7 @@ int main() { std::array output; std::iota(input.begin(), input.end(), 1); std::fill(output.begin(), output.end(), false); - test(q, input, output); + test(q, input, output); } // Test pointer type @@ -74,7 +71,7 @@ int main() { input[i] = static_cast(0x0) + i; } std::fill(output.begin(), output.end(), static_cast(0x0)); - test(q, input, output); + test(q, input, output); } // Test user-defined type @@ -88,7 +85,7 @@ int main() { std::complex(0, 1) + (float)i * std::complex(2, 2); } std::fill(output.begin(), output.end(), std::complex(0, 0)); - test(q, input, output); + test(q, input, output); } { std::array, N> input; @@ -98,7 +95,7 @@ int main() { std::complex(0, 1) + (double)i * std::complex(2, 2); } std::fill(output.begin(), output.end(), std::complex(0, 0)); - test(q, input, output); + test(q, input, output); } std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/group-algorithm/exclusive_scan.cpp b/sycl/test/group-algorithm/exclusive_scan.cpp index 1b831a18c0073..b18a16af1b816 100644 --- a/sycl/test/group-algorithm/exclusive_scan.cpp +++ b/sycl/test/group-algorithm/exclusive_scan.cpp @@ -24,7 +24,7 @@ using namespace sycl; using namespace sycl::ONEAPI; -template +template class exclusive_scan_kernel; // std::exclusive_scan isn't implemented yet, so use serial implementation @@ -44,17 +44,17 @@ OutputIterator exclusive_scan(InputIterator first, InputIterator last, } } // namespace emu -template +template void test(queue q, InputContainer input, OutputContainer output, BinaryOperation binary_op, typename OutputContainer::value_type identity) { typedef typename InputContainer::value_type InputT; typedef typename OutputContainer::value_type OutputT; - typedef class exclusive_scan_kernel kernel_name0; - typedef class exclusive_scan_kernel kernel_name1; - typedef class exclusive_scan_kernel kernel_name2; - typedef class exclusive_scan_kernel kernel_name3; + typedef class exclusive_scan_kernel kernel_name0; + typedef class exclusive_scan_kernel kernel_name1; + typedef class exclusive_scan_kernel kernel_name2; + typedef class exclusive_scan_kernel kernel_name3; OutputT init = 42; size_t N = input.size(); size_t G = 16; @@ -159,19 +159,24 @@ int main() { std::iota(input.begin(), input.end(), 0); std::fill(output.begin(), output.end(), 0); - test(q, input, output, plus<>(), 0); - test(q, input, output, minimum<>(), std::numeric_limits::max()); - test(q, input, output, maximum<>(), std::numeric_limits::lowest()); + test(q, input, output, plus<>(), 0); + test(q, input, output, minimum<>(), + std::numeric_limits::max()); + test(q, input, output, maximum<>(), + std::numeric_limits::lowest()); - test(q, input, output, plus(), 0); - test(q, input, output, minimum(), std::numeric_limits::max()); - test(q, input, output, maximum(), std::numeric_limits::lowest()); + test(q, input, output, plus(), 0); + test(q, input, output, minimum(), + std::numeric_limits::max()); + test(q, input, output, maximum(), + std::numeric_limits::lowest()); #ifdef SPIRV_1_3 - test(q, input, output, multiplies(), 1); - test(q, input, output, bit_or(), 0); - test(q, input, output, bit_xor(), 0); - test(q, input, output, bit_and(), ~0); + test(q, input, output, multiplies(), + 1); + test(q, input, output, bit_or(), 0); + test(q, input, output, bit_xor(), 0); + test(q, input, output, bit_and(), ~0); #endif // SPIRV_1_3 std::cout << "Test passed." << std::endl; diff --git a/sycl/test/group-algorithm/inclusive_scan.cpp b/sycl/test/group-algorithm/inclusive_scan.cpp index e6fddd1c2d4aa..ea398bf2d0273 100644 --- a/sycl/test/group-algorithm/inclusive_scan.cpp +++ b/sycl/test/group-algorithm/inclusive_scan.cpp @@ -24,7 +24,7 @@ using namespace sycl; using namespace sycl::ONEAPI; -template +template class inclusive_scan_kernel; // std::inclusive_scan isn't implemented yet, so use serial implementation @@ -44,17 +44,17 @@ OutputIterator inclusive_scan(InputIterator first, InputIterator last, } } // namespace emu -template +template void test(queue q, InputContainer input, OutputContainer output, BinaryOperation binary_op, typename OutputContainer::value_type identity) { typedef typename InputContainer::value_type InputT; typedef typename OutputContainer::value_type OutputT; - typedef class inclusive_scan_kernel kernel_name0; - typedef class inclusive_scan_kernel kernel_name1; - typedef class inclusive_scan_kernel kernel_name2; - typedef class inclusive_scan_kernel kernel_name3; + typedef class inclusive_scan_kernel kernel_name0; + typedef class inclusive_scan_kernel kernel_name1; + typedef class inclusive_scan_kernel kernel_name2; + typedef class inclusive_scan_kernel kernel_name3; OutputT init = 42; size_t N = input.size(); size_t G = 16; @@ -159,19 +159,25 @@ int main() { std::iota(input.begin(), input.end(), 0); std::fill(output.begin(), output.end(), 0); - test(q, input, output, plus<>(), 0); - test(q, input, output, minimum<>(), std::numeric_limits::max()); - test(q, input, output, maximum<>(), std::numeric_limits::lowest()); + test(q, input, output, plus<>(), 0); + test(q, input, output, minimum<>(), + std::numeric_limits::max()); + test(q, input, output, maximum<>(), + std::numeric_limits::lowest()); - test(q, input, output, plus(), 0); - test(q, input, output, minimum(), std::numeric_limits::max()); - test(q, input, output, maximum(), std::numeric_limits::lowest()); + test(q, input, output, plus(), 0); + test(q, input, output, minimum(), + std::numeric_limits::max()); + test(q, input, output, maximum(), + std::numeric_limits::lowest()); #ifdef SPIRV_1_3 - test(q, input, output, multiplies(), 1); - test(q, input, output, bit_or(), 0); - test(q, input, output, bit_xor(), 0); - test(q, input, output, bit_and(), ~0); + test(q, input, output, + multiplies(), 1); + test(q, input, output, bit_or(), 0); + test(q, input, output, bit_xor(), + 0); + test(q, input, output, bit_and(), ~0); #endif // SPIRV_1_3 std::cout << "Test passed." << std::endl; diff --git a/sycl/test/group-algorithm/reduce.cpp b/sycl/test/group-algorithm/reduce.cpp index 010a0ce75b2b4..708de23653d2d 100644 --- a/sycl/test/group-algorithm/reduce.cpp +++ b/sycl/test/group-algorithm/reduce.cpp @@ -23,17 +23,13 @@ using namespace sycl; using namespace sycl::ONEAPI; -template -class reduce_kernel; - -template +template void test(queue q, InputContainer input, OutputContainer output, BinaryOperation binary_op, typename OutputContainer::value_type identity) { typedef typename InputContainer::value_type InputT; typedef typename OutputContainer::value_type OutputT; - typedef class reduce_kernel kernel_name; OutputT init = 42; size_t N = input.size(); size_t G = 16; @@ -44,15 +40,17 @@ void test(queue q, InputContainer input, OutputContainer output, q.submit([&](handler &cgh) { auto in = in_buf.template get_access(cgh); auto out = out_buf.template get_access(cgh); - cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { - group<1> g = it.get_group(); - int lid = it.get_local_id(0); - out[0] = reduce(g, in[lid], binary_op); - out[1] = reduce(g, in[lid], init, binary_op); - out[2] = reduce(g, in.get_pointer(), in.get_pointer() + N, binary_op); - out[3] = - reduce(g, in.get_pointer(), in.get_pointer() + N, init, binary_op); - }); + cgh.parallel_for( + nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + int lid = it.get_local_id(0); + out[0] = reduce(g, in[lid], binary_op); + out[1] = reduce(g, in[lid], init, binary_op); + out[2] = + reduce(g, in.get_pointer(), in.get_pointer() + N, binary_op); + out[3] = reduce(g, in.get_pointer(), in.get_pointer() + N, init, + binary_op); + }); }); } // std::reduce is not implemented yet, so use std::accumulate instead @@ -97,19 +95,24 @@ int main() { std::iota(input.begin(), input.end(), 0); std::fill(output.begin(), output.end(), 0); - test(q, input, output, plus<>(), 0); - test(q, input, output, minimum<>(), std::numeric_limits::max()); - test(q, input, output, maximum<>(), std::numeric_limits::lowest()); + test(q, input, output, plus<>(), 0); + test(q, input, output, minimum<>(), + std::numeric_limits::max()); + test(q, input, output, maximum<>(), + std::numeric_limits::lowest()); - test(q, input, output, plus(), 0); - test(q, input, output, minimum(), std::numeric_limits::max()); - test(q, input, output, maximum(), std::numeric_limits::lowest()); + test(q, input, output, plus(), 0); + test(q, input, output, minimum(), + std::numeric_limits::max()); + test(q, input, output, maximum(), + std::numeric_limits::lowest()); #ifdef SPIRV_1_3 - test(q, input, output, multiplies(), 1); - test(q, input, output, bit_or(), 0); - test(q, input, output, bit_xor(), 0); - test(q, input, output, bit_and(), ~0); + test(q, input, output, + multiplies(), 1); + test(q, input, output, bit_or(), 0); + test(q, input, output, bit_xor(), 0); + test(q, input, output, bit_and(), ~0); #endif // SPIRV_1_3 std::cout << "Test passed." << std::endl; diff --git a/sycl/test/reduction/reduction_ctor.cpp b/sycl/test/reduction/reduction_ctor.cpp index 4828b1dc94535..c10fe74d7643c 100644 --- a/sycl/test/reduction/reduction_ctor.cpp +++ b/sycl/test/reduction/reduction_ctor.cpp @@ -10,7 +10,6 @@ using namespace cl::sycl; - template void test_reducer(Reduction &Redu, T A, T B) { typename Reduction::reducer_type Reducer; @@ -34,12 +33,10 @@ void test_reducer(Reduction &Redu, T Identity, BinaryOperation BOp, T A, T B) { "Wrong result of binary operation."); } -template -class Known; -template -class Unknown; +template class KernelNameGroup; -template +template void testKnown(T Identity, BinaryOperation BOp, T A, T B) { buffer ReduBuf(1); @@ -50,17 +47,17 @@ void testKnown(T Identity, BinaryOperation BOp, T A, T B) { accessor ReduAcc(ReduBuf, CGH); auto Redu = ONEAPI::reduction(ReduAcc, BOp); - assert(Redu.getIdentity() == Identity && - "Failed getIdentity() check()."); + assert(Redu.getIdentity() == Identity && "Failed getIdentity() check()."); test_reducer(Redu, A, B); test_reducer(Redu, Identity, BOp, A, B); // Command group must have at least one task in it. Use an empty one. - CGH.single_task>([=]() {}); + CGH.single_task([=]() {}); }); } -template +template void testUnknown(T Identity, BinaryOperation BOp, T A, T B) { buffer ReduBuf(1); queue Q; @@ -70,49 +67,61 @@ void testUnknown(T Identity, BinaryOperation BOp, T A, T B) { accessor ReduAcc(ReduBuf, CGH); auto Redu = ONEAPI::reduction(ReduAcc, Identity, BOp); - assert(Redu.getIdentity() == Identity && - "Failed getIdentity() check()."); + assert(Redu.getIdentity() == Identity && "Failed getIdentity() check()."); test_reducer(Redu, Identity, BOp, A, B); // Command group must have at least one task in it. Use an empty one. - CGH.single_task([=]() {}); + CGH.single_task([=]() {}); }); } -template +template void testBoth(T Identity, BinaryOperation BOp, T A, T B) { - testKnown(Identity, BOp, A, B); - testKnown(Identity, BOp, A, B); - testUnknown>(Identity, BOp, A, B); - testUnknown>(Identity, BOp, A, B); + testKnown, + T, 0>(Identity, BOp, A, B); + testKnown< + KernelNameGroup, + T, 1>(Identity, BOp, A, B); + testUnknown< + KernelNameGroup, T, + 0>(Identity, BOp, A, B); + testUnknown, + T, 1>(Identity, BOp, A, B); } int main() { - testBoth(0, ONEAPI::plus(), 1, 7); - testBoth(1, std::multiplies(), 1, 7); - testBoth(0, ONEAPI::bit_or(), 1, 8); - testBoth(0, ONEAPI::bit_xor(), 7, 3); - testBoth(~0, ONEAPI::bit_and(), 7, 3); - testBoth((std::numeric_limits::max)(), ONEAPI::minimum(), 7, - 3); - testBoth((std::numeric_limits::min)(), ONEAPI::maximum(), 7, - 3); - - testBoth(0, ONEAPI::plus(), 1, 7); - testBoth(1, std::multiplies(), 1, 7); - testBoth(getMaximumFPValue(), ONEAPI::minimum(), 7, 3); - testBoth(getMinimumFPValue(), ONEAPI::maximum(), 7, 3); - - testUnknown, 0, - Unknown, 0, CustomVecPlus>>( - CustomVec(0), CustomVecPlus(), CustomVec(1), - CustomVec(7)); - testUnknown, 1, - Unknown, 1, CustomVecPlus>>( + testBoth(0, ONEAPI::plus(), 1, + 7); + testBoth(1, std::multiplies(), 1, 7); + testBoth(0, ONEAPI::bit_or(), + 1, 8); + testBoth(0, ONEAPI::bit_xor(), + 7, 3); + testBoth(~0, ONEAPI::bit_and(), 7, + 3); + testBoth( + (std::numeric_limits::max)(), ONEAPI::minimum(), 7, 3); + testBoth((std::numeric_limits::min)(), + ONEAPI::maximum(), 7, 3); + + testBoth(0, ONEAPI::plus(), 1, + 7); + testBoth( + 1, std::multiplies(), 1, 7); + testBoth( + getMaximumFPValue(), ONEAPI::minimum(), 7, 3); + testBoth(getMinimumFPValue(), + ONEAPI::maximum(), 7, 3); + + testUnknown, 0, + CustomVecPlus>(CustomVec(0), CustomVecPlus(), + CustomVec(1), CustomVec(7)); + testUnknown, 1>( CustomVec(0), CustomVecPlus(), CustomVec(1), CustomVec(7)); - testUnknown( + testUnknown( 0, [](auto a, auto b) { return a | b; }, 1, 8); std::cout << "Test passed\n"; diff --git a/sycl/test/reduction/reduction_nd_conditional.cpp b/sycl/test/reduction/reduction_nd_conditional.cpp index 985f77cbc77f4..9db6fac910809 100644 --- a/sycl/test/reduction/reduction_nd_conditional.cpp +++ b/sycl/test/reduction/reduction_nd_conditional.cpp @@ -35,25 +35,16 @@ void initInputData(buffer &InBuf, T &ExpectedOut, T Identity, } }; -template -class SomeClass; - -template -struct Vec { +template struct Vec { Vec() : X(0), Y(0) {} Vec(T X, T Y) : X(X), Y(Y) {} Vec(T V) : X(V), Y(V) {} - bool operator==(const Vec &P) const { - return P.X == X && P.Y == Y; - } - bool operator!=(const Vec &P) const { - return !(*this == P); - } + bool operator==(const Vec &P) const { return P.X == X && P.Y == Y; } + bool operator!=(const Vec &P) const { return !(*this == P); } T X; T Y; }; -template -bool operator==(const Vec &A, const Vec &B) { +template bool operator==(const Vec &A, const Vec &B) { return A.X == B.X && A.Y == B.Y; } template @@ -61,15 +52,13 @@ std::ostream &operator<<(std::ostream &OS, const Vec &P) { return OS << "(" << P.X << ", " << P.Y << ")"; } -template -struct VecPlus { +template struct VecPlus { using P = Vec; - P operator()(const P &A, const P &B) const { - return P(A.X + B.X, A.Y + B.Y); - } + P operator()(const P &A, const P &B) const { return P(A.X + B.X, A.Y + B.Y); } }; -template +template void test(T Identity, size_t WGSize, size_t NWItems) { buffer InBuf(NWItems); buffer OutBuf(1); @@ -90,7 +79,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for>( + CGH.parallel_for( NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { size_t I = NDIt.get_global_linear_id(); if (I < 2) @@ -114,10 +103,11 @@ void test(T Identity, size_t WGSize, size_t NWItems) { } int main() { - test>(0, 2, 2); - test>(0, 7, 7); - test>(0, 2, 64); - test>(0, 16, 256); + test>(0, 2, 2); + test>(0, 7, 7); + test>(0, 2, + 64); + test>(0, 16, 256); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/reduction/reduction_nd_ext_type.hpp b/sycl/test/reduction/reduction_nd_ext_type.hpp index f81a913837d46..4cb9046092c82 100644 --- a/sycl/test/reduction/reduction_nd_ext_type.hpp +++ b/sycl/test/reduction/reduction_nd_ext_type.hpp @@ -8,10 +8,8 @@ using namespace cl::sycl; -template -class SomeClass; - -template +template void test(T Identity, size_t WGSize, size_t NWItems) { buffer InBuf(NWItems); buffer OutBuf(1); @@ -28,14 +26,13 @@ void test(T Identity, size_t WGSize, size_t NWItems) { queue Q; Q.submit([&](handler &CGH) { auto In = InBuf.template get_access(CGH); - accessor - Out(OutBuf, CGH); + accessor Out(OutBuf, CGH); auto Redu = ONEAPI::reduction(Out, Identity, BOp); range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for>( + CGH.parallel_for( NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); @@ -44,18 +41,18 @@ void test(T Identity, size_t WGSize, size_t NWItems) { // Check correctness. auto Out = OutBuf.template get_access(); T ComputedOut = *(Out.get_pointer()); - T MaxDiff = 3 * std::numeric_limits::epsilon() * std::fabs(ComputedOut + CorrectOut); + T MaxDiff = 3 * std::numeric_limits::epsilon() * + std::fabs(ComputedOut + CorrectOut); if (std::fabs(static_cast(ComputedOut - CorrectOut)) > MaxDiff) { std::cout << "NWItems = " << NWItems << ", WGSize = " << WGSize << "\n"; std::cout << "Computed value: " << ComputedOut - << ", Expected value: " << CorrectOut - << ", MaxDiff = " << MaxDiff << "\n"; + << ", Expected value: " << CorrectOut << ", MaxDiff = " << MaxDiff + << "\n"; assert(0 && "Wrong value."); } } -template -int runTests(const string_class &ExtensionName) { +template int runTests(const string_class &ExtensionName) { device D = default_selector().select_device(); if (!D.is_host() && !D.has_extension(ExtensionName)) { std::cout << "Test skipped\n"; @@ -63,20 +60,23 @@ int runTests(const string_class &ExtensionName) { } // Check some less standards WG sizes and corner cases first. - test>(0, 4, 4); - test>(0, 4, 64); + test>(0, 4, 4); + test>(0, 4, 64); - test>( - getMaximumFPValue(), 7, 7); - test>( - getMinimumFPValue(), 7, 7 * 5); + test>(getMaximumFPValue(), 7, 7); + test>(getMinimumFPValue(), 7, 7 * 5); #if __cplusplus >= 201402L - test>(1, 3, 3 * 5); - test>( - getMaximumFPValue(), 3, 3); - test>( - getMinimumFPValue(), 3, 3); + test>(1, 3, 3 * 5); + test>(getMaximumFPValue(), 3, 3); + test>(getMinimumFPValue(), 3, 3); #endif // __cplusplus >= 201402L std::cout << "Test passed\n"; diff --git a/sycl/test/reduction/reduction_nd_s0_dw.cpp b/sycl/test/reduction/reduction_nd_s0_dw.cpp index 8b900a3a1fd9a..0a4a9032a3b30 100644 --- a/sycl/test/reduction/reduction_nd_s0_dw.cpp +++ b/sycl/test/reduction/reduction_nd_s0_dw.cpp @@ -16,10 +16,8 @@ using namespace cl::sycl; -template -class SomeClass; - -template +template void test(T Identity, size_t WGSize, size_t NWItems) { buffer InBuf(NWItems); buffer OutBuf(1); @@ -40,7 +38,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for>( + CGH.parallel_for( NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); @@ -59,36 +57,44 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { // Check some less standards WG sizes and corner cases first. - test>(0, 2, 2); - test>(0, 7, 7); - test>(0, 9, 18); - test>(0, 49, 49 * 5); + test>(0, 2, 2); + test>(0, 7, 7); + test>(0, 9, 18); + test>(0, 49, 49 * 5); // Try some power-of-two work-group sizes. - test>(0, 2, 64); - test>(0, 4, 64); - test>(0, 8, 128); - test>(0, 16, 256); - test>(0, 32, 256); - test>(0, 64, 256); - test>(0, 128, 256); - test>(0, 256, 256); + test>(0, 2, 64); + test>(0, 4, 64); + test>(0, 8, + 128); + test>(0, 16, + 256); + test>(0, 32, 256); + test>(0, 64, 256); + test>(0, 128, 256); + test>(0, 256, 256); // Check with various operations. - test>(1, 8, 256); - test>(0, 8, 256); - test>(0, 8, 256); - test>(~0, 8, 256); - test>((std::numeric_limits::max)(), 8, 256); - test>((std::numeric_limits::min)(), 8, 256); + test>(1, 8, 256); + test>(0, 8, 256); + test>(0, 8, 256); + test>(~0, 8, 256); + test>( + (std::numeric_limits::max)(), 8, 256); + test>( + (std::numeric_limits::min)(), 8, 256); // Check with various types. - test>(1, 8, 256); - test>(getMaximumFPValue(), 8, 256); - test>(getMinimumFPValue(), 8, 256); + test>(1, 8, + 256); + test>( + getMaximumFPValue(), 8, 256); + test>( + getMinimumFPValue(), 8, 256); // Check with CUSTOM type. - test, 0, CustomVecPlus>(CustomVec(0), 8, 256); + test, 0, + CustomVecPlus>(CustomVec(0), 8, 256); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/reduction/reduction_nd_s0_rw.cpp b/sycl/test/reduction/reduction_nd_s0_rw.cpp index 029458942390a..d346016ae4bfd 100644 --- a/sycl/test/reduction/reduction_nd_s0_rw.cpp +++ b/sycl/test/reduction/reduction_nd_s0_rw.cpp @@ -16,10 +16,8 @@ using namespace cl::sycl; -template -class SomeClass; - -template +template void test(T Identity, size_t WGSize, size_t NWItems) { buffer InBuf(NWItems); buffer OutBuf(1); @@ -42,7 +40,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for>( + CGH.parallel_for( NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); @@ -61,36 +59,47 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { // Check some less standards WG sizes and corner cases first. - test>(0, 2, 2); - test>(0, 7, 7); - test>(0, 9, 18); - test>(0, 49, 49 * 5); + test>(0, 2, 2); + test>(0, 7, 7); + test>(0, 9, 18); + test>(0, 49, + 49 * 5); // Try some power-of-two work-group sizes. - test>(0, 2, 64); - test>(0, 4, 64); - test>(0, 8, 128); - test>(0, 16, 256); - test>(0, 32, 256); - test>(0, 64, 256); - test>(0, 128, 256); - test>(0, 256, 256); + test>(0, 2, 64); + test>(0, 4, + 64); + test>(0, 8, + 128); + test>(0, 16, 256); + test>(0, 32, 256); + test>(0, 64, 256); + test>(0, 128, 256); + test>(0, 256, + 256); // Check with various operations. - test>(1, 8, 256); - test>(0, 8, 256); - test>(0, 8, 256); - test>(~0, 8, 256); - test>((std::numeric_limits::max)(), 8, 256); - test>((std::numeric_limits::min)(), 8, 256); + test>(1, 8, 256); + test>(0, 8, + 256); + test>(0, 8, + 256); + test>(~0, 8, 256); + test>( + (std::numeric_limits::max)(), 8, 256); + test>( + (std::numeric_limits::min)(), 8, 256); // Check with various types. - test>(1, 8, 256); - test>(getMaximumFPValue(), 8, 256); - test>(getMinimumFPValue(), 8, 256); + test>(1, 8, 256); + test>( + getMaximumFPValue(), 8, 256); + test>( + getMinimumFPValue(), 8, 256); // Check with CUSTOM type. - test, 0, CustomVecPlus>(CustomVec(0), 8, 256); + test, 0, + CustomVecPlus>(CustomVec(0), 8, 256); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/reduction/reduction_nd_s1_dw.cpp b/sycl/test/reduction/reduction_nd_s1_dw.cpp index 7cc975e261dc2..df31961a1c908 100644 --- a/sycl/test/reduction/reduction_nd_s1_dw.cpp +++ b/sycl/test/reduction/reduction_nd_s1_dw.cpp @@ -17,10 +17,8 @@ using namespace cl::sycl; -template -class SomeClass; - -template +template void test(T Identity, size_t WGSize, size_t NWItems) { buffer InBuf(NWItems); buffer OutBuf(1); @@ -41,7 +39,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for>( + CGH.parallel_for( NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); @@ -60,36 +58,48 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { // Check some less standards WG sizes and corner cases first. - test>(0, 2, 2); - test>(0, 7, 7); - test>(0, 9, 18); - test>(0, 49, 49 * 5); + test>(0, 2, + 2); + test>(0, 7, + 7); + test>(0, 9, + 18); + test>(0, 49, 49 * 5); // Try some power-of-two work-group sizes. - test>(0, 2, 64); - test>(0, 4, 64); - test>(0, 8, 128); - test>(0, 16, 256); - test>(0, 32, 256); - test>(0, 64, 256); - test>(0, 128, 256); - test>(0, 256, 256); + test>(0, 2, + 64); + test>(0, 4, 64); + test>(0, 8, 128); + test>(0, 16, 256); + test>(0, 32, + 256); + test>(0, 64, 256); + test>(0, 128, 256); + test>(0, 256, + 256); // Check with various operations. - test>(1, 8, 256); - test>(0, 8, 256); - test>(0, 8, 256); - test>(~0, 8, 256); - test>((std::numeric_limits::max)(), 8, 256); - test>((std::numeric_limits::min)(), 8, 256); + test>(1, 8, 256); + test>(0, 8, 256); + test>(0, 8, 256); + test>(~0, 8, 256); + test>( + (std::numeric_limits::max)(), 8, 256); + test>( + (std::numeric_limits::min)(), 8, 256); // Check with various types. - test>(1, 8, 256); - test>(getMaximumFPValue(), 8, 256); - test>(getMinimumFPValue(), 8, 256); + test>(1, 8, + 256); + test>( + getMaximumFPValue(), 8, 256); + test>( + getMinimumFPValue(), 8, 256); // Check with CUSTOM type. - test, 1, CustomVecPlus>(CustomVec(0), 8, 256); + test, 1, + CustomVecPlus>(CustomVec(0), 8, 256); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/reduction/reduction_nd_s1_rw.cpp b/sycl/test/reduction/reduction_nd_s1_rw.cpp index 2c8f6a8343e83..ec7044f1c12f8 100644 --- a/sycl/test/reduction/reduction_nd_s1_rw.cpp +++ b/sycl/test/reduction/reduction_nd_s1_rw.cpp @@ -17,10 +17,8 @@ using namespace cl::sycl; -template -class SomeClass; - -template +template void test(T Identity, size_t WGSize, size_t NWItems) { buffer InBuf(NWItems); buffer OutBuf(1); @@ -43,7 +41,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for>( + CGH.parallel_for( NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); @@ -62,36 +60,45 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { // Check some less standards WG sizes and corner cases first. - test>(0, 2, 2); - test>(0, 7, 7); - test>(0, 9, 18); - test>(0, 49, 49 * 5); + test>(0, 2, 2); + test>(0, 7, 7); + test>(0, 9, 18); + test>(0, 49, + 49 * 5); // Try some power-of-two work-group sizes. - test>(0, 2, 64); - test>(0, 4, 64); - test>(0, 8, 128); - test>(0, 16, 256); - test>(0, 32, 256); - test>(0, 64, 256); - test>(0, 128, 256); - test>(0, 256, 256); + test>(0, 2, 64); + test>(0, 4, + 64); + test>(0, 8, 128); + test>(0, 16, 256); + test>(0, 32, 256); + test>(0, 64, + 256); + test>(0, 128, + 256); + test>(0, 256, 256); // Check with various operations. - test>(1, 8, 256); - test>(0, 8, 256); - test>(0, 8, 256); - test>(~0, 8, 256); - test>((std::numeric_limits::max)(), 8, 256); - test>((std::numeric_limits::min)(), 8, 256); + test>(1, 8, 256); + test>(0, 8, 256); + test>(0, 8, 256); + test>(~0, 8, 256); + test>( + (std::numeric_limits::max)(), 8, 256); + test>( + (std::numeric_limits::min)(), 8, 256); // Check with various types. - test>(1, 8, 256); - test>(getMaximumFPValue(), 1, 16); - test>(getMinimumFPValue(), 8, 256); + test>(1, 8, 256); + test>( + getMaximumFPValue(), 1, 16); + test>( + getMinimumFPValue(), 8, 256); // Check with CUSTOM type. - test, 1, CustomVecPlus>(CustomVec(0), 8, 256); + test, 1, + CustomVecPlus>(CustomVec(0), 8, 256); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/reduction/reduction_placeholder.cpp b/sycl/test/reduction/reduction_placeholder.cpp index 77633992ea2df..b0ce9ddfa88b8 100644 --- a/sycl/test/reduction/reduction_placeholder.cpp +++ b/sycl/test/reduction/reduction_placeholder.cpp @@ -19,10 +19,8 @@ using namespace cl::sycl; -template -class SomeClass; - -template +template void test(T Identity, size_t WGSize, size_t NWItems) { // Initialize. T CorrectOut; @@ -34,9 +32,9 @@ void test(T Identity, size_t WGSize, size_t NWItems) { (OutBuf.template get_access())[0] = Identity; - auto Out = accessor(OutBuf); + auto Out = + accessor(OutBuf); // Compute. queue Q; Q.submit([&](handler &CGH) { @@ -46,7 +44,7 @@ void test(T Identity, size_t WGSize, size_t NWItems) { range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for>( + CGH.parallel_for( NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); @@ -65,21 +63,26 @@ void test(T Identity, size_t WGSize, size_t NWItems) { int main() { // fast atomics and fast reduce - test>(0, 49, 49 * 5); - test>(0, 8, 8); + test>(0, 49, 49 * 5); + test>(0, 8, + 8); // fast atomics - test>(0, 7, 7 * 3); - test>(0, 4, 128); + test>(0, 7, + 7 * 3); + test>(0, 4, 128); // fast reduce - test>(getMaximumFPValue(), 5, 5 * 7); - test>(getMinimumFPValue(), 4, 128); + test>( + getMaximumFPValue(), 5, 5 * 7); + test>( + getMinimumFPValue(), 4, 128); // generic algorithm - test>(1, 7, 7 * 5); - test>(1, 8, 16); - test, 0, CustomVecPlus>(CustomVec(0), 8, 8 * 3); + test>(1, 7, 7 * 5); + test>(1, 8, 16); + test, 0, + CustomVecPlus>(CustomVec(0), 8, 8 * 3); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/reduction/reduction_transparent.cpp b/sycl/test/reduction/reduction_transparent.cpp index dea789b395401..5c7608b8930ab 100644 --- a/sycl/test/reduction/reduction_transparent.cpp +++ b/sycl/test/reduction/reduction_transparent.cpp @@ -19,14 +19,12 @@ using namespace cl::sycl; -template -class SomeIdClass; -template -class SomeNoIdClass; +template class KernelNameGroup; // Checks reductions initialized with transparent functor and explicitly set // identity value. -template +template void testId(T Identity, size_t WGSize, size_t NWItems) { buffer InBuf(NWItems); buffer OutBuf(1); @@ -46,7 +44,7 @@ void testId(T Identity, size_t WGSize, size_t NWItems) { range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for>( + CGH.parallel_for( NDRange, ONEAPI::reduction(Out, Identity, BOp), [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); @@ -67,7 +65,8 @@ void testId(T Identity, size_t WGSize, size_t NWItems) { // Checks reductions initialized with transparent functor and identity // value not explicitly specified. The parameter 'Identity' is passed here // only to pre-initialize input data correctly. -template +template void testNoId(T Identity, size_t WGSize, size_t NWItems) { buffer InBuf(NWItems); buffer OutBuf(1); @@ -87,7 +86,7 @@ void testNoId(T Identity, size_t WGSize, size_t NWItems) { range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for>( + CGH.parallel_for( NDRange, ONEAPI::reduction(Out, BOp), [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); @@ -104,18 +103,26 @@ void testNoId(T Identity, size_t WGSize, size_t NWItems) { } } -template +template void test(T Identity, size_t WGSize, size_t NWItems) { - testId(Identity, WGSize, NWItems); - testNoId(Identity, WGSize, NWItems); + testId, + T, Dim, BinaryOperation>(Identity, WGSize, NWItems); + testNoId, + T, Dim, BinaryOperation>(Identity, WGSize, NWItems); } int main() { #if __cplusplus >= 201402L - test>(getMinimumFPValue(), 7, 7 * 5); - test>(0, 7, 49); - test>(1, 4, 16); - test>(0, 1, 512 + 32); + test>( + getMinimumFPValue(), 7, 7 * 5); + test>(0, 7, 49); + test>(1, 4, 16); + test>( + 0, 1, 512 + 32); #endif // __cplusplus >= 201402L std::cout << "Test passed\n"; diff --git a/sycl/test/reduction/reduction_usm.cpp b/sycl/test/reduction/reduction_usm.cpp index 0ada4c515b615..d3d96b2cbcaa7 100644 --- a/sycl/test/reduction/reduction_usm.cpp +++ b/sycl/test/reduction/reduction_usm.cpp @@ -19,12 +19,10 @@ using namespace cl::sycl; -template -class SomeClass; -template -class Copy1; +template class KernelNameGroup; -template +template void test(T Identity, size_t WGSize, size_t NWItems, usm::alloc AllocType) { queue Q; auto Dev = Q.get_device(); @@ -44,9 +42,9 @@ void test(T Identity, size_t WGSize, size_t NWItems, usm::alloc AllocType) { return; if (AllocType == usm::alloc::device) { event E = Q.submit([&](handler &CGH) { - CGH.single_task>([=]() { - *ReduVarPtr = Identity; - }); + CGH.single_task>( + [=]() { *ReduVarPtr = Identity; }); }); E.wait(); } else { @@ -67,7 +65,8 @@ void test(T Identity, size_t WGSize, size_t NWItems, usm::alloc AllocType) { range<1> GlobalRange(NWItems); range<1> LocalRange(WGSize); nd_range<1> NDRange(GlobalRange, LocalRange); - CGH.parallel_for>( + CGH.parallel_for>( NDRange, Redu, [=](nd_item<1> NDIt, auto &Sum) { Sum.combine(In[NDIt.get_global_linear_id()]); }); @@ -97,32 +96,41 @@ void test(T Identity, size_t WGSize, size_t NWItems, usm::alloc AllocType) { free(ReduVarPtr, Q.get_context()); } -template +template void testUSM(T Identity, size_t WGSize, size_t NWItems) { - test(Identity, WGSize, NWItems, usm::alloc::shared); - test(Identity, WGSize, NWItems, usm::alloc::host); - test(Identity, WGSize, NWItems, usm::alloc::device); + test, T, Dim, + BinaryOperation>(Identity, WGSize, NWItems, usm::alloc::shared); + test, T, + Dim, BinaryOperation>(Identity, WGSize, NWItems, usm::alloc::host); + test, T, + Dim, BinaryOperation>(Identity, WGSize, NWItems, usm::alloc::device); } int main() { // fast atomics and fast reduce - testUSM>(0, 49, 49 * 5); - testUSM>(0, 8, 128); + testUSM>(0, 49, + 49 * 5); + testUSM>(0, 8, 128); // fast atomics - testUSM>(0, 7, 7 * 3); - testUSM>(0, 4, 128); + testUSM>(0, 7, + 7 * 3); + testUSM>(0, 4, + 128); // fast reduce - testUSM>(getMaximumFPValue(), 5, - 5 * 7); - testUSM>(getMinimumFPValue(), 4, 128); + testUSM>( + getMaximumFPValue(), 5, 5 * 7); + testUSM>( + getMinimumFPValue(), 4, 128); // generic algorithm - testUSM>(1, 7, 7 * 5); - testUSM>(1, 8, 16); - testUSM, 0, CustomVecPlus>( - CustomVec(0), 8, 8 * 3); + testUSM>(1, 7, 7 * 5); + testUSM>( + 1, 8, 16); + testUSM, 0, + CustomVecPlus>(CustomVec(0), 8, 8 * 3); std::cout << "Test passed\n"; return 0; diff --git a/sycl/test/sub_group/generic-shuffle.cpp b/sycl/test/sub_group/generic-shuffle.cpp index 786a9b57a0ec2..3363e44d88d8c 100644 --- a/sycl/test/sub_group/generic-shuffle.cpp +++ b/sycl/test/sub_group/generic-shuffle.cpp @@ -25,7 +25,7 @@ template class pointer_kernel; using namespace cl::sycl; -template +template void check_pointer(queue &Queue, size_t G = 256, size_t L = 64) { try { nd_range<1> NdRange(G, L); @@ -42,29 +42,30 @@ void check_pointer(queue &Queue, size_t G = 256, size_t L = 64) { auto acc_xor = buf_xor.template get_access(cgh); auto sgsizeacc = sgsizebuf.get_access(cgh); - cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { - ONEAPI::sub_group SG = NdItem.get_sub_group(); - uint32_t wggid = NdItem.get_global_id(0); - uint32_t sgid = SG.get_group_id().get(0); - if (wggid == 0) - sgsizeacc[0] = SG.get_max_local_range()[0]; + cgh.parallel_for( + NdRange, [=](nd_item<1> NdItem) { + ONEAPI::sub_group SG = NdItem.get_sub_group(); + uint32_t wggid = NdItem.get_global_id(0); + uint32_t sgid = SG.get_group_id().get(0); + if (wggid == 0) + sgsizeacc[0] = SG.get_max_local_range()[0]; - T *ptr = static_cast(0x0) + wggid; + T *ptr = static_cast(0x0) + wggid; - /*GID of middle element in every subgroup*/ - acc[NdItem.get_global_id()] = - SG.shuffle(ptr, SG.get_max_local_range()[0] / 2); + /*GID of middle element in every subgroup*/ + acc[NdItem.get_global_id()] = + SG.shuffle(ptr, SG.get_max_local_range()[0] / 2); - /* Save GID-SGID */ - acc_up[NdItem.get_global_id()] = SG.shuffle_up(ptr, sgid); + /* Save GID-SGID */ + acc_up[NdItem.get_global_id()] = SG.shuffle_up(ptr, sgid); - /* Save GID+SGID */ - acc_down[NdItem.get_global_id()] = SG.shuffle_down(ptr, sgid); + /* Save GID+SGID */ + acc_down[NdItem.get_global_id()] = SG.shuffle_down(ptr, sgid); - /* Save GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ - acc_xor[NdItem.get_global_id()] = - SG.shuffle_xor(ptr, sgid % SG.get_max_local_range()[0]); - }); + /* Save GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ + acc_xor[NdItem.get_global_id()] = + SG.shuffle_xor(ptr, sgid % SG.get_max_local_range()[0]); + }); }); auto acc = buf.template get_access(); auto acc_up = buf_up.template get_access(); @@ -119,7 +120,7 @@ void check_pointer(queue &Queue, size_t G = 256, size_t L = 64) { } } -template +template void check_struct(queue &Queue, Generator &Gen, size_t G = 256, size_t L = 64) { // Fill a vector with values that will be shuffled @@ -143,29 +144,30 @@ void check_struct(queue &Queue, Generator &Gen, size_t G = 256, size_t L = 64) { auto sgsizeacc = sgsizebuf.get_access(cgh); auto in = buf_in.template get_access(cgh); - cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { - ONEAPI::sub_group SG = NdItem.get_sub_group(); - uint32_t wggid = NdItem.get_global_id(0); - uint32_t sgid = SG.get_group_id().get(0); - if (wggid == 0) - sgsizeacc[0] = SG.get_max_local_range()[0]; + cgh.parallel_for( + NdRange, [=](nd_item<1> NdItem) { + ONEAPI::sub_group SG = NdItem.get_sub_group(); + uint32_t wggid = NdItem.get_global_id(0); + uint32_t sgid = SG.get_group_id().get(0); + if (wggid == 0) + sgsizeacc[0] = SG.get_max_local_range()[0]; - T val = in[wggid]; + T val = in[wggid]; - /*GID of middle element in every subgroup*/ - acc[NdItem.get_global_id()] = - SG.shuffle(val, SG.get_max_local_range()[0] / 2); + /*GID of middle element in every subgroup*/ + acc[NdItem.get_global_id()] = + SG.shuffle(val, SG.get_max_local_range()[0] / 2); - /* Save GID-SGID */ - acc_up[NdItem.get_global_id()] = SG.shuffle_up(val, sgid); + /* Save GID-SGID */ + acc_up[NdItem.get_global_id()] = SG.shuffle_up(val, sgid); - /* Save GID+SGID */ - acc_down[NdItem.get_global_id()] = SG.shuffle_down(val, sgid); + /* Save GID+SGID */ + acc_down[NdItem.get_global_id()] = SG.shuffle_down(val, sgid); - /* Save GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ - acc_xor[NdItem.get_global_id()] = - SG.shuffle_xor(val, sgid % SG.get_max_local_range()[0]); - }); + /* Save GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ + acc_xor[NdItem.get_global_id()] = + SG.shuffle_xor(val, sgid % SG.get_max_local_range()[0]); + }); }); auto acc = buf.template get_access(); auto acc_up = buf_up.template get_access(); @@ -223,18 +225,20 @@ int main() { } // Test shuffle of pointer types - check_pointer(Queue); + check_pointer(Queue); // Test shuffle of non-native types auto ComplexFloatGenerator = [state = std::complex(0, 1)]() mutable { return state += std::complex(2, 2); }; - check_struct>(Queue, ComplexFloatGenerator); + check_struct>( + Queue, ComplexFloatGenerator); auto ComplexDoubleGenerator = [state = std::complex(0, 1)]() mutable { return state += std::complex(2, 2); }; - check_struct>(Queue, ComplexDoubleGenerator); + check_struct>( + Queue, ComplexDoubleGenerator); std::cout << "Test passed." << std::endl; return 0; diff --git a/sycl/test/sub_group/reduce.cpp b/sycl/test/sub_group/reduce.cpp index f7a324f10b38c..6644516f52158 100644 --- a/sycl/test/sub_group/reduce.cpp +++ b/sycl/test/sub_group/reduce.cpp @@ -24,11 +24,11 @@ int main() { std::cout << "Skipping test\n"; return 0; } - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); std::cout << "Test passed." << std::endl; return 0; } diff --git a/sycl/test/sub_group/reduce.hpp b/sycl/test/sub_group/reduce.hpp index 66a737a95c8c4..f606dcf5e9898 100644 --- a/sycl/test/sub_group/reduce.hpp +++ b/sycl/test/sub_group/reduce.hpp @@ -9,11 +9,11 @@ #include "helper.hpp" #include -template class sycl_subgr; +template class sycl_subgr; using namespace cl::sycl; -template +template void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, size_t G = 256, size_t L = 64) { try { @@ -23,7 +23,7 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, Queue.submit([&](handler &cgh) { auto sgsizeacc = sgsizebuf.get_access(cgh); auto acc = buf.template get_access(cgh); - cgh.parallel_for>( + cgh.parallel_for( NdRange, [=](nd_item<1> NdItem) { ONEAPI::sub_group sg = NdItem.get_sub_group(); if (skip_init) { @@ -64,30 +64,48 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, } } -template void check(queue &Queue, size_t G = 256, size_t L = 64) { +template +void check(queue &Queue, size_t G = 256, size_t L = 64) { // limit data range for half to avoid rounding issues if (std::is_same::value) { G = 64; L = 32; } - check_op(Queue, T(L), ONEAPI::plus(), false, G, L); - check_op(Queue, T(0), ONEAPI::plus(), true, G, L); + check_op< + sycl_subgr, T>( + Queue, T(L), ONEAPI::plus(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::plus(), true, G, L); - check_op(Queue, T(0), ONEAPI::minimum(), false, G, L); - check_op(Queue, T(G), ONEAPI::minimum(), true, G, L); + check_op, + T>(Queue, T(0), ONEAPI::minimum(), false, G, L); + check_op, + T>(Queue, T(G), ONEAPI::minimum(), true, G, L); - check_op(Queue, T(G), ONEAPI::maximum(), false, G, L); - check_op(Queue, T(0), ONEAPI::maximum(), true, G, L); + check_op, + T>(Queue, T(G), ONEAPI::maximum(), false, G, L); + check_op, + T>(Queue, T(0), ONEAPI::maximum(), true, G, L); #if __cplusplus >= 201402L - check_op(Queue, T(L), ONEAPI::plus<>(), false, G, L); - check_op(Queue, T(0), ONEAPI::plus<>(), true, G, L); + check_op, + T>(Queue, T(L), ONEAPI::plus<>(), false, G, L); + check_op, + T>(Queue, T(0), ONEAPI::plus<>(), true, G, L); - check_op(Queue, T(0), ONEAPI::minimum<>(), false, G, L); - check_op(Queue, T(G), ONEAPI::minimum<>(), true, G, L); + check_op, + T>(Queue, T(0), ONEAPI::minimum<>(), false, G, L); + check_op, + T>(Queue, T(G), ONEAPI::minimum<>(), true, G, L); - check_op(Queue, T(G), ONEAPI::maximum<>(), false, G, L); - check_op(Queue, T(0), ONEAPI::maximum<>(), true, G, L); + check_op, + T>(Queue, T(G), ONEAPI::maximum<>(), false, G, L); + check_op< + sycl_subgr, + T>(Queue, T(0), ONEAPI::maximum<>(), true, G, L); #endif } diff --git a/sycl/test/sub_group/reduce_fp16.cpp b/sycl/test/sub_group/reduce_fp16.cpp index 17bd8507a7c90..dfe9299bfcd9e 100644 --- a/sycl/test/sub_group/reduce_fp16.cpp +++ b/sycl/test/sub_group/reduce_fp16.cpp @@ -19,7 +19,7 @@ int main() { std::cout << "Skipping test\n"; return 0; } - check(Queue); + check(Queue); std::cout << "Test passed." << std::endl; return 0; } diff --git a/sycl/test/sub_group/reduce_fp64.cpp b/sycl/test/sub_group/reduce_fp64.cpp index 1af1ae040e02e..958052e979324 100644 --- a/sycl/test/sub_group/reduce_fp64.cpp +++ b/sycl/test/sub_group/reduce_fp64.cpp @@ -24,7 +24,7 @@ int main() { std::cout << "Skipping test\n"; return 0; } - check(Queue); + check(Queue); std::cout << "Test passed." << std::endl; return 0; } diff --git a/sycl/test/sub_group/scan.cpp b/sycl/test/sub_group/scan.cpp index 32df9d63e9faa..03d3f2ab90f53 100644 --- a/sycl/test/sub_group/scan.cpp +++ b/sycl/test/sub_group/scan.cpp @@ -25,11 +25,11 @@ int main() { std::cout << "Skipping test\n"; return 0; } - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); + check(Queue); std::cout << "Test passed." << std::endl; return 0; } diff --git a/sycl/test/sub_group/scan.hpp b/sycl/test/sub_group/scan.hpp index ae9b4ced66ab0..c9d630dea2ea3 100644 --- a/sycl/test/sub_group/scan.hpp +++ b/sycl/test/sub_group/scan.hpp @@ -10,11 +10,11 @@ #include #include -template class sycl_subgr; +template class sycl_subgr; using namespace cl::sycl; -template +template void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, size_t G = 256, size_t L = 64) { try { @@ -25,7 +25,7 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, auto sgsizeacc = sgsizebuf.get_access(cgh); auto exacc = exbuf.template get_access(cgh); auto inacc = inbuf.template get_access(cgh); - cgh.parallel_for>( + cgh.parallel_for( NdRange, [=](nd_item<1> NdItem) { ONEAPI::sub_group sg = NdItem.get_sub_group(); if (skip_init) { @@ -72,58 +72,83 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false, } } -template void check(queue &Queue, size_t G = 256, size_t L = 64) { +template +void check(queue &Queue, size_t G = 256, size_t L = 64) { // limit data range for half to avoid rounding issues if (std::is_same::value) { G = 64; L = 32; } - check_op(Queue, T(L), ONEAPI::plus(), false, G, L); - check_op(Queue, T(0), ONEAPI::plus(), true, G, L); + check_op, + T>(Queue, T(L), ONEAPI::plus(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::plus(), true, G, L); - check_op(Queue, T(0), ONEAPI::minimum(), false, G, L); + check_op< + sycl_subgr, + T>(Queue, T(0), ONEAPI::minimum(), false, G, L); if (std::is_floating_point::value || std::is_same::value) { - check_op(Queue, std::numeric_limits::infinity(), ONEAPI::minimum(), - true, G, L); + check_op< + sycl_subgr, + T>(Queue, std::numeric_limits::infinity(), ONEAPI::minimum(), + true, G, L); } else { - check_op(Queue, std::numeric_limits::max(), ONEAPI::minimum(), + check_op, + T>(Queue, std::numeric_limits::max(), ONEAPI::minimum(), true, G, L); } - check_op(Queue, T(G), ONEAPI::maximum(), false, G, L); + check_op< + sycl_subgr, + T>(Queue, T(G), ONEAPI::maximum(), false, G, L); if (std::is_floating_point::value || std::is_same::value) { - check_op(Queue, -std::numeric_limits::infinity(), - ONEAPI::maximum(), true, G, L); + check_op< + sycl_subgr, + T>(Queue, -std::numeric_limits::infinity(), ONEAPI::maximum(), + true, G, L); } else { - check_op(Queue, std::numeric_limits::min(), ONEAPI::maximum(), - true, G, L); + check_op, T>( + Queue, std::numeric_limits::min(), ONEAPI::maximum(), true, G, L); } #if __cplusplus >= 201402L - check_op(Queue, T(L), ONEAPI::plus<>(), false, G, L); - check_op(Queue, T(0), ONEAPI::plus<>(), true, G, L); + check_op, T>( + Queue, T(L), ONEAPI::plus<>(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::plus<>(), true, G, L); - check_op(Queue, T(0), ONEAPI::minimum<>(), false, G, L); + check_op< + sycl_subgr, + T>(Queue, T(0), ONEAPI::minimum<>(), false, G, L); if (std::is_floating_point::value || std::is_same::value) { - check_op(Queue, std::numeric_limits::infinity(), ONEAPI::minimum<>(), - true, G, L); + check_op< + sycl_subgr, + T>(Queue, std::numeric_limits::infinity(), ONEAPI::minimum<>(), true, + G, L); } else { - check_op(Queue, std::numeric_limits::max(), ONEAPI::minimum<>(), true, + check_op, + T>(Queue, std::numeric_limits::max(), ONEAPI::minimum<>(), true, G, L); } - check_op(Queue, T(G), ONEAPI::maximum<>(), false, G, L); + check_op, T>( + Queue, T(G), ONEAPI::maximum<>(), false, G, L); if (std::is_floating_point::value || std::is_same::value) { - check_op(Queue, -std::numeric_limits::infinity(), ONEAPI::maximum<>(), - true, G, L); + check_op, T>( + Queue, -std::numeric_limits::infinity(), ONEAPI::maximum<>(), true, + G, L); } else { - check_op(Queue, std::numeric_limits::min(), ONEAPI::maximum<>(), true, - G, L); + check_op< + sycl_subgr, + T>(Queue, std::numeric_limits::min(), ONEAPI::maximum<>(), true, G, + L); } #endif } diff --git a/sycl/test/sub_group/scan_fp16.cpp b/sycl/test/sub_group/scan_fp16.cpp index 13e3f45b27944..4c0570fd4bdf7 100644 --- a/sycl/test/sub_group/scan_fp16.cpp +++ b/sycl/test/sub_group/scan_fp16.cpp @@ -23,7 +23,7 @@ int main() { std::cout << "Skipping test\n"; return 0; } - check(Queue); + check(Queue); std::cout << "Test passed." << std::endl; return 0; } diff --git a/sycl/test/sub_group/scan_fp64.cpp b/sycl/test/sub_group/scan_fp64.cpp index f2ecd6024e7a4..de7bbd9707464 100644 --- a/sycl/test/sub_group/scan_fp64.cpp +++ b/sycl/test/sub_group/scan_fp64.cpp @@ -25,7 +25,7 @@ int main() { std::cout << "Skipping test\n"; return 0; } - check(Queue); + check(Queue); std::cout << "Test passed." << std::endl; return 0; }