From 9ada7f4d121cee8f6d2766d4d674e42059c89771 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Mon, 24 Feb 2020 15:49:37 -0800 Subject: [PATCH 01/19] [SYCL] Refactor sub-group code for reuse Preparation to re-use code for work-group collectives: - Move calc to functional.hpp - Make SPIR-V scope an explicit argument to calc - Add C++ helper for __spirv_GroupBroadcast Signed-off-by: John Pennycook --- sycl/include/CL/sycl/detail/spirv.hpp | 51 ++++++++++++ sycl/include/CL/sycl/detail/type_traits.hpp | 8 ++ sycl/include/CL/sycl/intel/functional.hpp | 58 +++++++++++++- sycl/include/CL/sycl/intel/sub_group.hpp | 88 +++++---------------- 4 files changed, 134 insertions(+), 71 deletions(-) create mode 100644 sycl/include/CL/sycl/detail/spirv.hpp diff --git a/sycl/include/CL/sycl/detail/spirv.hpp b/sycl/include/CL/sycl/detail/spirv.hpp new file mode 100644 index 0000000000000..2c53bd538bd88 --- /dev/null +++ b/sycl/include/CL/sycl/detail/spirv.hpp @@ -0,0 +1,51 @@ +//===-- spirv.hpp - Helpers to generate SPIR-V instructions ----*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include + +#ifdef __SYCL_DEVICE_ONLY__ +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace detail { +namespace spirv { +template <__spv::Scope S, typename T, typename IdT> +detail::enable_if_t::value, T> +GroupBroadcast(T x, IdT local_id) { + using OCLT = detail::ConvertToOpenCLType_t; + using OCLIdT = detail::ConvertToOpenCLType_t; + OCLT ocl_x = detail::convertDataToType(x); + OCLIdT ocl_id = detail::convertDataToType(local_id); + return __spirv_GroupBroadcast(S, ocl_x, ocl_id); +} + +template <__spv::Scope S, typename T, int dimensions> +T GroupBroadcast(T x, id local_id) { + if (dimensions == 1) { + return GroupBroadcast(x, local_id[0]); + } + using IdT = vec; + using OCLT = detail::ConvertToOpenCLType_t; + using OCLIdT = detail::ConvertToOpenCLType_t; + IdT vec_id; + for (int i = 0; i < dimensions; ++i) { + vec_id[i] = local_id[dimensions - i - 1]; + } + OCLT ocl_x = detail::convertDataToType(x); + OCLIdT ocl_id = detail::convertDataToType(vec_id); + return __spirv_GroupBroadcast(S, ocl_x, ocl_id); +} +} // namespace spirv +} // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) +#endif // __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/CL/sycl/detail/type_traits.hpp b/sycl/include/CL/sycl/detail/type_traits.hpp index 4050c2742b66d..6b0bbd255f103 100644 --- a/sycl/include/CL/sycl/detail/type_traits.hpp +++ b/sycl/include/CL/sycl/detail/type_traits.hpp @@ -195,6 +195,14 @@ template struct is_arithmetic : bool_constant::value || is_floating_point::value> {}; +template +struct is_scalar_arithmetic + : bool_constant::value && is_arithmetic::value> {}; + +template +struct is_vector_arithmetic + : bool_constant::value && is_arithmetic::value> {}; + // is_pointer template struct is_pointer_impl : std::false_type {}; diff --git a/sycl/include/CL/sycl/intel/functional.hpp b/sycl/include/CL/sycl/intel/functional.hpp index 0971d9089205c..8c4bfd0025994 100644 --- a/sycl/include/CL/sycl/intel/functional.hpp +++ b/sycl/include/CL/sycl/intel/functional.hpp @@ -44,7 +44,8 @@ template <> struct maximum { template auto operator()(T &&lhs, U &&rhs) const -> typename std::common_type::type { - return std::greater<>()(std::forward(lhs), std::forward(rhs)) + return std::greater<>()(std::forward(lhs), + std::forward(rhs)) ? std::forward(lhs) : std::forward(rhs); } @@ -54,5 +55,60 @@ template <> struct maximum { template using plus = std::plus; } // namespace intel + +#ifdef __SYCL_DEVICE_ONLY__ +namespace detail { + +struct GroupOpISigned {}; +struct GroupOpIUnsigned {}; +struct GroupOpFP {}; + +template struct GroupOpTag; + +template +struct GroupOpTag::value>> { + using type = GroupOpISigned; +}; + +template +struct GroupOpTag::value>> { + using type = GroupOpIUnsigned; +}; + +template +struct GroupOpTag::value>> { + using type = GroupOpFP; +}; + +#define __SYCL_CALC_OVERLOAD(GroupTag, SPIRVOperation, BinaryOperation) \ + template \ + static T calc(GroupTag, T x, BinaryOperation op) { \ + using OCLT = detail::ConvertToOpenCLType_t; \ + OCLT Arg = x; \ + OCLT Ret = __spirv_Group##SPIRVOperation(S, O, Arg); \ + return Ret; \ + } + +__SYCL_CALC_OVERLOAD(GroupOpISigned, SMin, intel::minimum) +__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, UMin, intel::minimum) +__SYCL_CALC_OVERLOAD(GroupOpFP, FMin, intel::minimum) +__SYCL_CALC_OVERLOAD(GroupOpISigned, SMax, intel::maximum) +__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, UMax, intel::maximum) +__SYCL_CALC_OVERLOAD(GroupOpFP, FMax, intel::maximum) +__SYCL_CALC_OVERLOAD(GroupOpISigned, IAdd, intel::plus) +__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, IAdd, intel::plus) +__SYCL_CALC_OVERLOAD(GroupOpFP, FAdd, intel::plus) + +#undef __SYCL_CALC_OVERLOAD + +template class BinaryOperation> +static T calc(typename GroupOpTag::type, T x, BinaryOperation) { + return calc(typename GroupOpTag::type(), x, BinaryOperation()); +} + +} // namespace detail +#endif + } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/sub_group.hpp b/sycl/include/CL/sycl/intel/sub_group.hpp index c8326eaa0728b..12dfb0eb262f7 100644 --- a/sycl/include/CL/sycl/intel/sub_group.hpp +++ b/sycl/include/CL/sycl/intel/sub_group.hpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -33,12 +34,6 @@ namespace detail { namespace sub_group { -template T broadcast(T x, id<1> local_id) { - using OCLT = detail::ConvertToOpenCLType_t; - return __spirv_GroupBroadcast(__spv::Scope::Subgroup, OCLT(x), - local_id.get(0)); -} - #define __SYCL_SG_GENERATE_BODY_1ARG(name, SPIRVOperation) \ template T name(T x, id<1> local_id) { \ using OCLT = detail::ConvertToOpenCLType_t; \ @@ -130,52 +125,6 @@ void store(multi_ptr dst, const vec &x) { bit_cast(x)); } -struct GroupOpISigned {}; struct GroupOpIUnsigned {}; struct GroupOpFP {}; - -template struct GroupOpTag; - -template -struct GroupOpTag::value>> { - using type = GroupOpISigned; -}; - -template -struct GroupOpTag::value>> { - using type = GroupOpIUnsigned; -}; - -template -struct GroupOpTag::value>> { - using type = GroupOpFP; -}; - -#define __SYCL_SG_CALC_OVERLOAD(GroupTag, SPIRVOperation, BinaryOperation) \ - template \ - static T calc(GroupTag, T x, BinaryOperation op) { \ - using OCLT = detail::ConvertToOpenCLType_t; \ - OCLT Arg = x; \ - OCLT Ret = __spirv_Group##SPIRVOperation(__spv::Scope::Subgroup, O, Arg); \ - return Ret; \ - } - -__SYCL_SG_CALC_OVERLOAD(GroupOpISigned, SMin, intel::minimum) -__SYCL_SG_CALC_OVERLOAD(GroupOpIUnsigned, UMin, intel::minimum) -__SYCL_SG_CALC_OVERLOAD(GroupOpFP, FMin, intel::minimum) -__SYCL_SG_CALC_OVERLOAD(GroupOpISigned, SMax, intel::maximum) -__SYCL_SG_CALC_OVERLOAD(GroupOpIUnsigned, UMax, intel::maximum) -__SYCL_SG_CALC_OVERLOAD(GroupOpFP, FMax, intel::maximum) -__SYCL_SG_CALC_OVERLOAD(GroupOpISigned, IAdd, intel::plus) -__SYCL_SG_CALC_OVERLOAD(GroupOpIUnsigned, IAdd, intel::plus) -__SYCL_SG_CALC_OVERLOAD(GroupOpFP, FAdd, intel::plus) - -#undef __SYCL_SG_CALC_OVERLOAD - -template class BinaryOperation> -static T calc(typename GroupOpTag::type, T x, BinaryOperation) { - return calc(typename GroupOpTag::type(), x, BinaryOperation()); -} - } // namespace sub_group } // namespace detail @@ -213,20 +162,21 @@ struct sub_group { } template - using EnableIfIsScalarArithmetic = detail::enable_if_t< - !detail::is_vec::value && detail::is_arithmetic::value, T>; + using EnableIfIsScalarArithmetic = + detail::enable_if_t::value, T>; /* --- collectives --- */ template EnableIfIsScalarArithmetic broadcast(T x, id<1> local_id) const { - return detail::sub_group::broadcast(x, local_id); + return detail::spirv::GroupBroadcast<__spv::Scope::Subgroup>(x, local_id); } template EnableIfIsScalarArithmetic reduce(T x, BinaryOperation op) const { - return detail::sub_group::calc( - typename detail::sub_group::GroupOpTag::type(), x, op); + return detail::calc( + typename detail::GroupOpTag::type(), x, op); } template @@ -236,8 +186,9 @@ struct sub_group { template EnableIfIsScalarArithmetic exclusive_scan(T x, BinaryOperation op) const { - return detail::sub_group::calc( - typename detail::sub_group::GroupOpTag::type(), x, op); + return detail::calc( + typename detail::GroupOpTag::type(), x, op); } template @@ -255,13 +206,14 @@ struct sub_group { template EnableIfIsScalarArithmetic inclusive_scan(T x, BinaryOperation op) const { - return detail::sub_group::calc( - typename detail::sub_group::GroupOpTag::type(), x, op); + return detail::calc( + typename detail::GroupOpTag::type(), x, op); } template EnableIfIsScalarArithmetic inclusive_scan(T x, BinaryOperation op, - T init) const { + T init) const { if (get_local_id().get(0) == 0) { x = op(init, x); } @@ -271,8 +223,7 @@ struct sub_group { /* --- one-input shuffles --- */ /* indices in [0 , sub_group size) */ - template - T shuffle(T x, id<1> local_id) const { + template T shuffle(T x, id<1> local_id) const { return detail::sub_group::shuffle(x, local_id); } @@ -280,21 +231,18 @@ struct sub_group { return detail::sub_group::shuffle_down(x, x, delta); } - template - T shuffle_up(T x, uint32_t delta) const { + template T shuffle_up(T x, uint32_t delta) const { return detail::sub_group::shuffle_up(x, x, delta); } - template - T shuffle_xor(T x, id<1> value) const { + template T shuffle_xor(T x, id<1> value) const { return detail::sub_group::shuffle_xor(x, value); } /* --- two-input shuffles --- */ /* indices in [0 , 2 * sub_group size) */ - template - T shuffle(T x, T y, id<1> local_id) const { + template T shuffle(T x, T y, id<1> local_id) const { return detail::sub_group::shuffle_down(x, y, (local_id - get_local_id()).get(0)); } From a61ecd0303f953163cdb66d114f2f3c34cfdb5a8 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Mon, 24 Feb 2020 15:51:04 -0800 Subject: [PATCH 02/19] [SYCL] Add static members to group class Simplifies definition of library functions by providing: - id_type - range_type - linear_id_type - dimensions Signed-off-by: John Pennycook --- sycl/include/CL/sycl/group.hpp | 139 +++++++++++++++++---------------- 1 file changed, 73 insertions(+), 66 deletions(-) diff --git a/sycl/include/CL/sycl/group.hpp b/sycl/include/CL/sycl/group.hpp index 7fc777a53e644..733fe49473aa8 100644 --- a/sycl/include/CL/sycl/group.hpp +++ b/sycl/include/CL/sycl/group.hpp @@ -81,25 +81,32 @@ template class private_memory { #endif // #ifdef __SYCL_DEVICE_ONLY__ }; -template class group { +template class group { public: +#ifdef __SYCL_INTEL_GROUP_ALGORITHMS__ + typedef id id_type; + typedef range range_type; + typedef size_t linear_id_type; + static constexpr int dimensions = Dimensions; +#endif + group() = delete; - id get_id() const { return index; } + id get_id() const { return index; } size_t get_id(int dimension) const { return index[dimension]; } - range get_global_range() const { return globalRange; } + range get_global_range() const { return globalRange; } size_t get_global_range(int dimension) const { return globalRange[dimension]; } - range get_local_range() const { return localRange; } + range get_local_range() const { return localRange; } size_t get_local_range(int dimension) const { return localRange[dimension]; } - range get_group_range() const { return groupRange; } + range get_group_range() const { return groupRange; } size_t get_group_range(int dimension) const { return get_group_range()[dimension]; @@ -107,12 +114,12 @@ template class group { size_t operator[](int dimension) const { return index[dimension]; } - template + template typename std::enable_if<(dims == 1), size_t>::type get_linear_id() const { return index[0]; } - template + template typename std::enable_if<(dims == 2), size_t>::type get_linear_id() const { return index[0] * groupRange[1] + index[1]; } @@ -127,7 +134,7 @@ template class group { // size_t get_linear_id()const // Get a linearized version of the work-group id. Calculating a linear // work-group id from a multi-dimensional index follows the equation 4.3. - template + template typename std::enable_if<(dims == 3), size_t>::type get_linear_id() const { return (index[0] * groupRange[1] * groupRange[2]) + (index[1] * groupRange[2]) + index[2]; @@ -139,41 +146,41 @@ template class group { // compilers are expected to optimize when possible detail::workGroupBarrier(); #ifdef __SYCL_DEVICE_ONLY__ - range GlobalSize{ - __spirv::initGlobalSize>()}; - range LocalSize{ - __spirv::initWorkgroupSize>()}; - id GlobalId{ - __spirv::initGlobalInvocationId>()}; - id LocalId{ - __spirv::initLocalInvocationId>()}; + range GlobalSize{ + __spirv::initGlobalSize>()}; + range LocalSize{ + __spirv::initWorkgroupSize>()}; + id GlobalId{ + __spirv::initGlobalInvocationId>()}; + id LocalId{ + __spirv::initLocalInvocationId>()}; // no 'iterate' in the device code variant, because // (1) this code is already invoked by each work item as a part of the // enclosing parallel_for_work_group kernel // (2) the range this pfwi iterates over matches work group size exactly - item GlobalItem = - detail::Builder::createItem(GlobalSize, GlobalId); - item LocalItem = - detail::Builder::createItem(LocalSize, LocalId); - h_item HItem = - detail::Builder::createHItem(GlobalItem, LocalItem); + item GlobalItem = + detail::Builder::createItem(GlobalSize, GlobalId); + item LocalItem = + detail::Builder::createItem(LocalSize, LocalId); + h_item HItem = + detail::Builder::createHItem(GlobalItem, LocalItem); Func(HItem); #else - id GroupStartID = index * localRange; + id GroupStartID = index * localRange; // ... host variant needs explicit 'iterate' because it is serial - detail::NDLoop::iterate( - localRange, [&](const id &LocalID) { - item GlobalItem = - detail::Builder::createItem( + detail::NDLoop::iterate( + localRange, [&](const id &LocalID) { + item GlobalItem = + detail::Builder::createItem( globalRange, GroupStartID + LocalID); - item LocalItem = - detail::Builder::createItem(localRange, + item LocalItem = + detail::Builder::createItem(localRange, LocalID); - h_item HItem = - detail::Builder::createHItem(GlobalItem, LocalItem); + h_item HItem = + detail::Builder::createHItem(GlobalItem, LocalItem); Func(HItem); }); #endif // __SYCL_DEVICE_ONLY__ @@ -185,52 +192,52 @@ template class group { } template - void parallel_for_work_item(range flexibleRange, + void parallel_for_work_item(range flexibleRange, WorkItemFunctionT Func) const { detail::workGroupBarrier(); #ifdef __SYCL_DEVICE_ONLY__ - range GlobalSize{ - __spirv::initGlobalSize>()}; - range LocalSize{ - __spirv::initWorkgroupSize>()}; - id GlobalId{ - __spirv::initGlobalInvocationId>()}; - id LocalId{ - __spirv::initLocalInvocationId>()}; - - item GlobalItem = - detail::Builder::createItem(GlobalSize, GlobalId); - item LocalItem = - detail::Builder::createItem(LocalSize, LocalId); - h_item HItem = detail::Builder::createHItem( + range GlobalSize{ + __spirv::initGlobalSize>()}; + range LocalSize{ + __spirv::initWorkgroupSize>()}; + id GlobalId{ + __spirv::initGlobalInvocationId>()}; + id LocalId{ + __spirv::initLocalInvocationId>()}; + + item GlobalItem = + detail::Builder::createItem(GlobalSize, GlobalId); + item LocalItem = + detail::Builder::createItem(LocalSize, LocalId); + h_item HItem = detail::Builder::createHItem( GlobalItem, LocalItem, flexibleRange); // iterate over flexible range with work group size stride; each item // performs flexibleRange/LocalSize iterations (if the former is divisible // by the latter) - detail::NDLoop::iterate( + detail::NDLoop::iterate( LocalId, LocalSize, flexibleRange, - [&](const id &LogicalLocalID) { + [&](const id &LogicalLocalID) { HItem.setLogicalLocalID(LogicalLocalID); Func(HItem); }); #else - id GroupStartID = index * localRange; + id GroupStartID = index * localRange; - detail::NDLoop::iterate( - localRange, [&](const id &LocalID) { - item GlobalItem = - detail::Builder::createItem( + detail::NDLoop::iterate( + localRange, [&](const id &LocalID) { + item GlobalItem = + detail::Builder::createItem( globalRange, GroupStartID + LocalID); - item LocalItem = - detail::Builder::createItem(localRange, + item LocalItem = + detail::Builder::createItem(localRange, LocalID); - h_item HItem = detail::Builder::createHItem( + h_item HItem = detail::Builder::createHItem( GlobalItem, LocalItem, flexibleRange); - detail::NDLoop::iterate( + detail::NDLoop::iterate( LocalID, localRange, flexibleRange, - [&](const id &LogicalLocalID) { + [&](const id &LogicalLocalID) { HItem.setLogicalLocalID(LogicalLocalID); Func(HItem); }); @@ -311,7 +318,7 @@ template class group { waitForHelper(Events...); } - bool operator==(const group &rhs) const { + bool operator==(const group &rhs) const { bool Result = (rhs.globalRange == globalRange) && (rhs.localRange == localRange) && (rhs.index == index); __SYCL_ASSERT(rhs.groupRange == groupRange && @@ -319,15 +326,15 @@ template class group { return Result; } - bool operator!=(const group &rhs) const { + bool operator!=(const group &rhs) const { return !((*this) == rhs); } private: - range globalRange; - range localRange; - range groupRange; - id index; + range globalRange; + range localRange; + range groupRange; + id index; void waitForHelper() const {} @@ -343,8 +350,8 @@ template class group { protected: friend class detail::Builder; - group(const range &G, const range &L, - const range GroupRange, const id &I) + group(const range &G, const range &L, + const range GroupRange, const id &I) : globalRange(G), localRange(L), groupRange(GroupRange), index(I) { // Make sure local range divides global without remainder: __SYCL_ASSERT(((G % L).size() == 0) && From cea1028ae425d8677423bcd345526007b1f45ca3 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Mon, 24 Feb 2020 15:52:14 -0800 Subject: [PATCH 03/19] [SYCL] Add prototype of group algorithms Exposes group collectives: - all_of - any_of - none_of - reduce - exclusive_scan - inclusive_scan This prototype does not support the host device. Co-Authored-By: Roland Schulz Co-Authored-By: Alexey Sachkov Signed-off-by: John Pennycook --- sycl/include/CL/sycl.hpp | 1 + .../include/CL/sycl/intel/group_algorithm.hpp | 620 ++++++++++++++++++ sycl/test/group-algorithm/all_of.cpp | 80 +++ sycl/test/group-algorithm/any_of.cpp | 82 +++ sycl/test/group-algorithm/broadcast.cpp | 70 ++ sycl/test/group-algorithm/exclusive_scan.cpp | 145 ++++ sycl/test/group-algorithm/inclusive_scan.cpp | 145 ++++ sycl/test/group-algorithm/none_of.cpp | 80 +++ sycl/test/group-algorithm/reduce.cpp | 83 +++ 9 files changed, 1306 insertions(+) create mode 100644 sycl/include/CL/sycl/intel/group_algorithm.hpp create mode 100644 sycl/test/group-algorithm/all_of.cpp create mode 100644 sycl/test/group-algorithm/any_of.cpp create mode 100644 sycl/test/group-algorithm/broadcast.cpp create mode 100644 sycl/test/group-algorithm/exclusive_scan.cpp create mode 100644 sycl/test/group-algorithm/inclusive_scan.cpp create mode 100644 sycl/test/group-algorithm/none_of.cpp create mode 100644 sycl/test/group-algorithm/reduce.cpp diff --git a/sycl/include/CL/sycl.hpp b/sycl/include/CL/sycl.hpp index 4c6e6e8790052..b4ef5514e699a 100644 --- a/sycl/include/CL/sycl.hpp +++ b/sycl/include/CL/sycl.hpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/sycl/include/CL/sycl/intel/group_algorithm.hpp b/sycl/include/CL/sycl/intel/group_algorithm.hpp new file mode 100644 index 0000000000000..6ed55956108a8 --- /dev/null +++ b/sycl/include/CL/sycl/intel/group_algorithm.hpp @@ -0,0 +1,620 @@ +//==----------- group_algorithm.hpp --- SYCL group algorithm----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + +#ifdef __SYCL_INTEL_GROUP_ALGORITHMS__ +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace detail { + +template size_t get_local_linear_range(Group g); +template <> size_t get_local_linear_range>(group<1> g) { + return g.get_local_range(0); +} +template <> size_t get_local_linear_range>(group<2> g) { + return g.get_local_range(0) * g.get_local_range(1); +} +template <> size_t get_local_linear_range>(group<3> g) { + return g.get_local_range(0) * g.get_local_range(1) * g.get_local_range(2); +} + +template +id linear_id_to_id(range, size_t i); +template <> id<1> linear_id_to_id(range<1> r, size_t i) { return id<1>(i); } +template <> id<2> linear_id_to_id(range<2> r, size_t i) { + id<2> result; + result[0] = i / r[1]; + result[1] = i % r[1]; + return result; +} +template <> id<3> linear_id_to_id(range<3> r, size_t i) { + id<3> result; + result[0] = i / (r[1] * r[2]); + result[1] = (i % (r[1] * r[2])) / r[2]; + result[2] = i % r[2]; + return result; +} + +template struct is_group : std::false_type {}; + +template +struct is_group> : std::true_type {}; + +template struct identity {}; + +template struct identity> { + static constexpr T value = 0; +}; + +template struct identity> { + static constexpr T value = std::numeric_limits::max(); +}; + +template struct identity> { + static constexpr T value = std::numeric_limits::lowest(); +}; + +template +Function for_each(Group g, Ptr first, Ptr last, Function f) { +#ifdef __SYCL_DEVICE_ONLY__ + nd_item it = + ::sycl::detail::Builder::getNDItem(); + ptrdiff_t offset = it.get_local_linear_id(); + ptrdiff_t stride = detail::get_local_linear_range(g); + for (Ptr p = first + offset; p < last; p += stride) { + f(*p); + } + return f; +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +} // namespace detail + +namespace intel { + +template +using EnableIfIsScalarArithmetic = + ::sycl::detail::enable_if_t<::sycl::detail::is_scalar_arithmetic::value, + T>; + +template +using EnableIfIsVectorArithmetic = + ::sycl::detail::enable_if_t<::sycl::detail::is_vector_arithmetic::value, + T>; + +template +using EnableIfIsPointer = + ::sycl::detail::enable_if_t<::sycl::detail::is_pointer::value, T>; + +template bool all_of(Group g, bool pred) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); +#ifdef __SYCL_DEVICE_ONLY__ + return __spirv_GroupAll(__spv::Scope::Workgroup, pred); +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template +bool all_of(Group g, T x, Predicate pred) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + return all_of(g, pred(x)); +} + +template +EnableIfIsPointer all_of(Group g, Ptr first, Ptr last, + Predicate pred) { +#ifdef __SYCL_DEVICE_ONLY__ + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + bool partial = true; + detail::for_each(g, first, last, [&](auto x) { partial &= pred(x); }); + return all_of(g, partial); +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template bool any_of(Group g, bool pred) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); +#ifdef __SYCL_DEVICE_ONLY__ + return __spirv_GroupAny(__spv::Scope::Workgroup, pred); +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template +bool any_of(Group g, T x, Predicate pred) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + return any_of(g, pred(x)); +} + +template +EnableIfIsPointer any_of(Group g, Ptr first, Ptr last, + Predicate pred) { +#ifdef __SYCL_DEVICE_ONLY__ + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + bool partial = false; + detail::for_each(g, first, last, [&](auto x) { partial |= pred(x); }); + return any_of(g, partial); +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template bool none_of(Group g, bool pred) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); +#ifdef __SYCL_DEVICE_ONLY__ + return __spirv_GroupAll(__spv::Scope::Workgroup, not pred); +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template +bool none_of(Group g, T x, Predicate pred) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + return none_of(g, pred(x)); +} + +template +EnableIfIsPointer none_of(Group g, Ptr first, Ptr last, + Predicate pred) { +#ifdef __SYCL_DEVICE_ONLY__ + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + return not any_of(g, first, last, pred); +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template +EnableIfIsScalarArithmetic broadcast(Group g, T x, + typename Group::id_type local_id) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); +#ifdef __SYCL_DEVICE_ONLY__ + return detail::spirv::GroupBroadcast<__spv::Scope::Workgroup>(x, local_id); +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template +EnableIfIsVectorArithmetic broadcast(Group g, T x, + typename Group::id_type local_id) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); +#ifdef __SYCL_DEVICE_ONLY__ + T result; + for (int s = 0; s < x.get_size(); ++s) { + result[s] = broadcast(g, x[s], local_id); + } + return result; +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template +EnableIfIsScalarArithmetic +broadcast(Group g, T x, typename Group::linear_id_type linear_local_id) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); +#ifdef __SYCL_DEVICE_ONLY__ + return broadcast( + g, x, detail::linear_id_to_id(g.get_local_range(), linear_local_id)); +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template +EnableIfIsVectorArithmetic +broadcast(Group g, T x, typename Group::linear_id_type linear_local_id) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); +#ifdef __SYCL_DEVICE_ONLY__ + T result; + for (int s = 0; s < x.get_size(); ++s) { + result[s] = broadcast(g, x[s], linear_local_id); + } + return result; +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template +EnableIfIsScalarArithmetic broadcast(Group g, T x) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); +#ifdef __SYCL_DEVICE_ONLY__ + return broadcast(g, x, 0); +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template +EnableIfIsVectorArithmetic broadcast(Group g, T x) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); +#ifdef __SYCL_DEVICE_ONLY__ + T result; + for (int s = 0; s < x.get_size(); ++s) { + result[s] = broadcast(g, x[s]); + } + return result; +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template +EnableIfIsScalarArithmetic reduce(Group g, T x, BinaryOperation binary_op) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + static_assert( + std::is_same::value, + "Result type of binary_op must match reduction accumulation type."); +#ifdef __SYCL_DEVICE_ONLY__ + return detail::calc( + typename detail::GroupOpTag::type(), x, binary_op); +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template +EnableIfIsVectorArithmetic reduce(Group g, T x, BinaryOperation binary_op) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + static_assert( + std::is_same::value, + "Result type of binary_op must match reduction accumulation type."); + T result; + for (int s = 0; s < x.get_size(); ++s) { + result[s] = reduce(g, x[s], binary_op); + } + return result; +} + +template +EnableIfIsScalarArithmetic reduce(Group g, V x, T init, + BinaryOperation binary_op) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + static_assert( + std::is_same::value, + "Result type of binary_op must match reduction accumulation type."); +#ifdef __SYCL_DEVICE_ONLY__ + return binary_op(init, reduce(g, x, binary_op)); +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template +EnableIfIsVectorArithmetic reduce(Group g, V x, T init, + BinaryOperation binary_op) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + static_assert( + std::is_same::value, + "Result type of binary_op must match reduction accumulation type."); +#ifdef __SYCL_DEVICE_ONLY__ + T result = init; + for (int s = 0; s < x.get_size(); ++s) { + result[s] = binary_op(init[s], reduce(g, x[s], binary_op)); + } + return result; +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template +EnableIfIsPointer +reduce(Group g, Ptr first, Ptr last, BinaryOperation binary_op) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + static_assert( + std::is_same::value, + "Result type of binary_op must match reduction accumulation type."); +#ifdef __SYCL_DEVICE_ONLY__ + typename Ptr::element_type partial = + detail::identity::value; + detail::for_each(g, first, last, + [&](auto x) { partial = binary_op(partial, x); }); + return reduce(g, partial, binary_op); +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template +EnableIfIsPointer reduce(Group g, Ptr first, Ptr last, T init, + BinaryOperation binary_op) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + static_assert( + std::is_same::value, + "Result type of binary_op must match reduction accumulation type."); +#ifdef __SYCL_DEVICE_ONLY__ + T partial = + detail::identity::value; + detail::for_each(g, first, last, + [&](auto x) { partial = binary_op(partial, x); }); + return reduce(g, partial, init, binary_op); +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template +EnableIfIsScalarArithmetic exclusive_scan(Group g, T x, + BinaryOperation binary_op) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + static_assert(std::is_same::value, + "Result type of binary_op must match scan accumulation type."); +#ifdef __SYCL_DEVICE_ONLY__ + return detail::calc( + typename detail::GroupOpTag::type(), x, binary_op); +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template +EnableIfIsVectorArithmetic exclusive_scan(Group g, T x, + BinaryOperation binary_op) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + static_assert(std::is_same::value, + "Result type of binary_op must match scan accumulation type."); + T result; + for (int s = 0; s < x.get_size(); ++s) { + result[s] = exclusive_scan(g, x[s], binary_op); + } + return result; +} + +template +EnableIfIsVectorArithmetic exclusive_scan(Group g, V x, T init, + BinaryOperation binary_op) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + static_assert(std::is_same::value, + "Result type of binary_op must match scan accumulation type."); + T result; + for (int s = 0; s < x.get_size(); ++s) { + result[s] = exclusive_scan(g, x[s], init[s], binary_op); + } + return result; +} + +template +EnableIfIsScalarArithmetic exclusive_scan(Group g, V x, T init, + BinaryOperation binary_op) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + static_assert(std::is_same::value, + "Result type of binary_op must match scan accumulation type."); +#ifdef __SYCL_DEVICE_ONLY__ + nd_item it = + detail::Builder::getNDItem(); + if (it.get_local_linear_id() == 0) { + x = binary_op(init, x); + } + T scan = exclusive_scan(g, x, binary_op); + if (it.get_local_linear_id() == 0) { + scan = init; + } + return scan; +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template +EnableIfIsPointer +exclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, T init, + BinaryOperation binary_op) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + static_assert(std::is_same::value, + "Result type of binary_op must match scan accumulation type."); +#ifdef __SYCL_DEVICE_ONLY__ + nd_item it = + ::sycl::detail::Builder::getNDItem(); + ptrdiff_t offset = it.get_local_linear_id(); + ptrdiff_t stride = detail::get_local_linear_range(g); + ptrdiff_t N = last - first; + auto roundup = [=](const ptrdiff_t &v, const ptrdiff_t &divisor) -> ptrdiff_t { + return ((v + divisor - 1) / divisor) * divisor; + }; + typename InPtr::element_type x; + typename OutPtr::element_type carry = init; + for (ptrdiff_t chunk = 0; chunk < roundup(N, stride); chunk += stride) { + ptrdiff_t i = chunk + offset; + if (i < N) { + x = first[i]; + } + typename OutPtr::element_type out = exclusive_scan(g, x, carry, binary_op); + if (i < N) { + result[i] = out; + } + carry = broadcast(g, binary_op(out, x), stride - 1); + } + return result + N; +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template +EnableIfIsPointer exclusive_scan(Group g, InPtr first, + InPtr last, OutPtr result, + BinaryOperation binary_op) { + static_assert(std::is_same::value, + "Result type of binary_op must match scan accumulation type."); + return exclusive_scan( + g, first, last, result, + detail::identity::value, + binary_op); +} + +template +EnableIfIsVectorArithmetic inclusive_scan(Group g, T x, + BinaryOperation binary_op) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + static_assert(std::is_same::value, + "Result type of binary_op must match scan accumulation type."); + T result; + for (int s = 0; s < x.get_size(); ++s) { + result[s] = inclusive_scan(g, x[s], binary_op); + } + return result; +} + +template +EnableIfIsScalarArithmetic inclusive_scan(Group g, T x, + BinaryOperation binary_op) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + static_assert(std::is_same::value, + "Result type of binary_op must match scan accumulation type."); +#ifdef __SYCL_DEVICE_ONLY__ + return detail::calc( + typename detail::GroupOpTag::type(), x, binary_op); +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template +EnableIfIsScalarArithmetic +inclusive_scan(Group g, V x, BinaryOperation binary_op, T init) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + static_assert(std::is_same::value, + "Result type of binary_op must match scan accumulation type."); +#ifdef __SYCL_DEVICE_ONLY__ + nd_item it = + detail::Builder::getNDItem(); + if (it.get_local_linear_id() == 0) { + x = binary_op(init, x); + } + return inclusive_scan(g, x, binary_op); +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template +EnableIfIsVectorArithmetic +inclusive_scan(Group g, V x, BinaryOperation binary_op, T init) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + static_assert(std::is_same::value, + "Result type of binary_op must match scan accumulation type."); + T result; + for (int s = 0; s < x.get_size(); ++s) { + result[s] = inclusive_scan(g, x[s], binary_op, init[s]); + } + return result; +} + +template +EnableIfIsPointer +inclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, + BinaryOperation binary_op, T init) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); + static_assert(std::is_same::value, + "Result type of binary_op must match scan accumulation type."); +#ifdef __SYCL_DEVICE_ONLY__ + nd_item it = + ::sycl::detail::Builder::getNDItem(); + ptrdiff_t offset = it.get_local_linear_id(); + ptrdiff_t stride = detail::get_local_linear_range(g); + ptrdiff_t N = last - first; + auto roundup = [=](const ptrdiff_t &v, const ptrdiff_t &divisor) -> ptrdiff_t { + return ((v + divisor - 1) / divisor) * divisor; + }; + typename InPtr::element_type x; + typename OutPtr::element_type carry = init; + for (ptrdiff_t chunk = 0; chunk < roundup(N, stride); chunk += stride) { + ptrdiff_t i = chunk + offset; + if (i < N) { + x = first[i]; + } + typename OutPtr::element_type out = inclusive_scan(g, x, binary_op, carry); + if (i < N) { + result[i] = out; + } + carry = broadcast(g, out, stride - 1); + } + return result + N; +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + +template +EnableIfIsPointer inclusive_scan(Group g, InPtr first, + InPtr last, OutPtr result, + BinaryOperation binary_op) { + static_assert(std::is_same::value, + "Result type of binary_op must match scan accumulation type."); + return inclusive_scan( + g, first, last, result, binary_op, + detail::identity::value); +} + +} // namespace intel +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) +#endif // __SYCL_INTEL_GROUP_ALGORITHMS__ diff --git a/sycl/test/group-algorithm/all_of.cpp b/sycl/test/group-algorithm/all_of.cpp new file mode 100644 index 0000000000000..5e0314af09e83 --- /dev/null +++ b/sycl/test/group-algorithm/all_of.cpp @@ -0,0 +1,80 @@ +// RUN: %clangxx -fsycl -D __SYCL_INTEL_GROUP_ALGORITHMS__ %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +//==------------- all_of.cpp - SYCL group all_of test --*- C++ -*-----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +using namespace sycl; +using namespace sycl::intel; + +template class all_of_kernel; + +struct GeZero { + bool operator()(int i) const { return i >= 0; } +}; +struct IsEven { + bool operator()(int i) const { return (i % 2) == 0; } +}; +struct LtZero { + bool operator()(int i) const { return i < 0; } +}; + +template +void test(queue q, InputContainer input, OutputContainer output, + Predicate pred) { + typedef class all_of_kernel kernel_name; + size_t N = input.size(); + size_t G = 16; + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + + q.submit([&](handler &cgh) { + auto in = in_buf.get_access(cgh); + auto out = out_buf.get_access(cgh); + cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + int lid = it.get_local_id(0); + out[0] = all_of(g, pred(in[lid])); + out[1] = all_of(g, in[lid], pred); + out[2] = all_of(g, in.get_pointer(), in.get_pointer() + N, pred); + }); + }); + } + bool expected = std::all_of(input.begin(), input.end(), pred); + assert(output[0] == expected); + assert(output[1] == expected); + assert(output[2] == expected); +} + +int main() { + queue q; + std::string version = q.get_device().get_info(); + if (version < std::string("2.0")) { + std::cout << "Skipping test\n"; + return 0; + } + + constexpr int N = 32; + std::array input; + std::array output; + std::iota(input.begin(), input.end(), 0); + std::fill(output.begin(), output.end(), false); + + test(q, input, output, GeZero()); + test(q, input, output, IsEven()); + test(q, input, output, LtZero()); + + std::cout << "Test passed." << std::endl; +} diff --git a/sycl/test/group-algorithm/any_of.cpp b/sycl/test/group-algorithm/any_of.cpp new file mode 100644 index 0000000000000..ad68a793f31c0 --- /dev/null +++ b/sycl/test/group-algorithm/any_of.cpp @@ -0,0 +1,82 @@ +// RUN: %clangxx -fsycl -D __SYCL_INTEL_GROUP_ALGORITHMS__ %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +//==------------- any_of.cpp - SYCL group any_of test --*- C++ -*-----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +using namespace sycl; +using namespace sycl::intel; + +template class any_of_kernel; + +struct GeZero { + bool operator()(int i) const { return i >= 0; } +}; +struct IsEven { + bool operator()(int i) const { return (i % 2) == 0; } +}; +struct LtZero { + bool operator()(int i) const { return i < 0; } +}; + +template +void test(queue q, InputContainer input, OutputContainer output, + Predicate pred) { + typedef typename InputContainer::value_type InputT; + typedef typename OutputContainer::value_type OutputT; + typedef class any_of_kernel kernel_name; + size_t N = input.size(); + size_t G = 16; + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + + q.submit([&](handler &cgh) { + auto in = in_buf.template get_access(cgh); + auto out = out_buf.template get_access(cgh); + cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + int lid = it.get_local_id(0); + out[0] = any_of(g, pred(in[lid])); + out[1] = any_of(g, in[lid], pred); + out[2] = any_of(g, in.get_pointer(), in.get_pointer() + N, pred); + }); + }); + } + bool expected = std::any_of(input.begin(), input.end(), pred); + assert(output[0] == expected); + assert(output[1] == expected); + assert(output[2] == expected); +} + +int main() { + queue q; + std::string version = q.get_device().get_info(); + if (version < std::string("2.0")) { + std::cout << "Skipping test\n"; + return 0; + } + + constexpr int N = 32; + std::array input; + std::array output; + std::iota(input.begin(), input.end(), 0); + std::fill(output.begin(), output.end(), false); + + test(q, input, output, GeZero()); + test(q, input, output, IsEven()); + test(q, input, output, LtZero()); + + std::cout << "Test passed." << std::endl; +} diff --git a/sycl/test/group-algorithm/broadcast.cpp b/sycl/test/group-algorithm/broadcast.cpp new file mode 100644 index 0000000000000..374b666b37923 --- /dev/null +++ b/sycl/test/group-algorithm/broadcast.cpp @@ -0,0 +1,70 @@ +// RUN: %clangxx -fsycl -D __SYCL_INTEL_GROUP_ALGORITHMS__ %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +//==------------- broadcast.cpp - SYCL group broadcast test --*- C++ +//-*-----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +using namespace sycl; +using namespace sycl::intel; + +class broadcast_kernel; + +template +void test(queue q, InputContainer input, OutputContainer output) { + typedef typename InputContainer::value_type InputT; + typedef typename OutputContainer::value_type OutputT; + typedef class broadcast_kernel kernel_name; + size_t N = input.size(); + size_t G = 4; + range<2> R(G, G); + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + + q.submit([&](handler &cgh) { + auto in = in_buf.template get_access(cgh); + auto out = out_buf.template get_access(cgh); + cgh.parallel_for(nd_range<2>(R, R), [=](nd_item<2> it) { + group<2> g = it.get_group(); + int lid = it.get_local_linear_id(); + out[0] = broadcast(g, in[lid]); + out[1] = broadcast(g, in[lid], group<2>::id_type(1, 2)); + out[2] = broadcast(g, in[lid], group<2>::linear_id_type(2 * G + 1)); + }); + }); + } + assert(output[0] == input[0]); + assert(output[1] == input[1 * G + 2]); + assert(output[2] == input[2 * G + 1]); +} + +int main() { + queue q; + std::string version = q.get_device().get_info(); + if (version < std::string("2.0")) { + std::cout << "Skipping test\n"; + return 0; + } + + constexpr int N = 16; + std::array input; + std::array output; + std::iota(input.begin(), input.end(), 1); + std::fill(output.begin(), output.end(), false); + + test(q, input, output); + + std::cout << "Test passed." << std::endl; +} diff --git a/sycl/test/group-algorithm/exclusive_scan.cpp b/sycl/test/group-algorithm/exclusive_scan.cpp new file mode 100644 index 0000000000000..965d0c1089784 --- /dev/null +++ b/sycl/test/group-algorithm/exclusive_scan.cpp @@ -0,0 +1,145 @@ +// RUN: %clangxx -fsycl -D __SYCL_INTEL_GROUP_ALGORITHMS__ %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +//==---- exclusive_scan.cpp - SYCL group exclusive_scan test --*- C++ -*----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include +using namespace sycl; +using namespace sycl::intel; + +template class exclusive_scan_kernel; + +// std::exclusive_scan isn't implemented yet, so use serial implementation +// instead +namespace emu { +template +OutputIterator exclusive_scan(InputIterator first, InputIterator last, + OutputIterator result, T init, + BinaryOperation binary_op) { + T partial = init; + for (InputIterator it = first; it != last; ++it) { + *(result++) = partial; + partial = binary_op(partial, *it); + } + return result; +} +} // namespace emu + +template +void test(queue q, InputContainer input, OutputContainer output, + BinaryOperation binary_op, + typename OutputContainer::value_type identity) { + typedef typename InputContainer::value_type InputT; + typedef typename OutputContainer::value_type OutputT; + typedef class exclusive_scan_kernel kernel_name0; + typedef class exclusive_scan_kernel kernel_name1; + typedef class exclusive_scan_kernel kernel_name2; + typedef class exclusive_scan_kernel kernel_name3; + OutputT init = 42; + size_t N = input.size(); + size_t G = 16; + std::vector expected(N); + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + q.submit([&](handler &cgh) { + auto in = in_buf.template get_access(cgh); + auto out = out_buf.template get_access(cgh); + cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + int lid = it.get_local_id(0); + out[lid] = exclusive_scan(g, in[lid], binary_op); + }); + }); + } + emu::exclusive_scan(input.begin(), input.begin() + G, expected.begin(), + identity, binary_op); + assert(std::equal(output.begin(), output.begin() + G, expected.begin())); + + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + q.submit([&](handler &cgh) { + auto in = in_buf.template get_access(cgh); + auto out = out_buf.template get_access(cgh); + cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + int lid = it.get_local_id(0); + out[lid] = exclusive_scan(g, in[lid], init, binary_op); + }); + }); + } + emu::exclusive_scan(input.begin(), input.begin() + G, expected.begin(), init, + binary_op); + assert(std::equal(output.begin(), output.begin() + G, expected.begin())); + + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + q.submit([&](handler &cgh) { + auto in = in_buf.template get_access(cgh); + auto out = out_buf.template get_access(cgh); + cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + exclusive_scan(g, in.get_pointer(), in.get_pointer() + N, + out.get_pointer(), binary_op); + }); + }); + } + emu::exclusive_scan(input.begin(), input.begin() + N, expected.begin(), + identity, binary_op); + assert(std::equal(output.begin(), output.begin() + N, expected.begin())); + + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + q.submit([&](handler &cgh) { + auto in = in_buf.template get_access(cgh); + auto out = out_buf.template get_access(cgh); + cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + exclusive_scan(g, in.get_pointer(), in.get_pointer() + N, + out.get_pointer(), init, binary_op); + }); + }); + } + emu::exclusive_scan(input.begin(), input.begin() + N, expected.begin(), init, + binary_op); + assert(std::equal(output.begin(), output.begin() + N, expected.begin())); +} + +int main() { + queue q; + std::string version = q.get_device().get_info(); + if (version < std::string("2.0")) { + std::cout << "Skipping test\n"; + return 0; + } + + constexpr int N = 32; + std::array input; + std::array output; + std::iota(input.begin(), input.end(), 0); + std::fill(output.begin(), output.end(), 0); + + test(q, input, output, plus<>(), 0); + test(q, input, output, minimum<>(), std::numeric_limits::max()); + test(q, input, output, maximum<>(), std::numeric_limits::lowest()); + + std::cout << "Test passed." << std::endl; +} diff --git a/sycl/test/group-algorithm/inclusive_scan.cpp b/sycl/test/group-algorithm/inclusive_scan.cpp new file mode 100644 index 0000000000000..5c530fb69f173 --- /dev/null +++ b/sycl/test/group-algorithm/inclusive_scan.cpp @@ -0,0 +1,145 @@ +// RUN: %clangxx -fsycl -D __SYCL_INTEL_GROUP_ALGORITHMS__ %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +//==---- inclusive_scan.cpp - SYCL group inclusive_scan test --*- C++ -*----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include +using namespace sycl; +using namespace sycl::intel; + +template class inclusive_scan_kernel; + +// std::inclusive_scan isn't implemented yet, so use serial implementation +// instead +namespace emu { +template +OutputIterator inclusive_scan(InputIterator first, InputIterator last, + OutputIterator result, BinaryOperation binary_op, + T init) { + T partial = init; + for (InputIterator it = first; it != last; ++it) { + partial = binary_op(partial, *it); + *(result++) = partial; + } + return result; +} +} // namespace emu + +template +void test(queue q, InputContainer input, OutputContainer output, + BinaryOperation binary_op, + typename OutputContainer::value_type identity) { + typedef typename InputContainer::value_type InputT; + typedef typename OutputContainer::value_type OutputT; + typedef class inclusive_scan_kernel kernel_name0; + typedef class inclusive_scan_kernel kernel_name1; + typedef class inclusive_scan_kernel kernel_name2; + typedef class inclusive_scan_kernel kernel_name3; + OutputT init = 42; + size_t N = input.size(); + size_t G = 16; + std::vector expected(N); + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + q.submit([&](handler &cgh) { + auto in = in_buf.template get_access(cgh); + auto out = out_buf.template get_access(cgh); + cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + int lid = it.get_local_id(0); + out[lid] = inclusive_scan(g, in[lid], binary_op); + }); + }); + } + emu::inclusive_scan(input.begin(), input.begin() + G, expected.begin(), + binary_op, identity); + assert(std::equal(output.begin(), output.begin() + G, expected.begin())); + + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + q.submit([&](handler &cgh) { + auto in = in_buf.template get_access(cgh); + auto out = out_buf.template get_access(cgh); + cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + int lid = it.get_local_id(0); + out[lid] = inclusive_scan(g, in[lid], binary_op, init); + }); + }); + } + emu::inclusive_scan(input.begin(), input.begin() + G, expected.begin(), + binary_op, init); + assert(std::equal(output.begin(), output.begin() + G, expected.begin())); + + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + q.submit([&](handler &cgh) { + auto in = in_buf.template get_access(cgh); + auto out = out_buf.template get_access(cgh); + cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + inclusive_scan(g, in.get_pointer(), in.get_pointer() + N, + out.get_pointer(), binary_op); + }); + }); + } + emu::inclusive_scan(input.begin(), input.begin() + N, expected.begin(), + binary_op, identity); + assert(std::equal(output.begin(), output.begin() + N, expected.begin())); + + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + q.submit([&](handler &cgh) { + auto in = in_buf.template get_access(cgh); + auto out = out_buf.template get_access(cgh); + cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + inclusive_scan(g, in.get_pointer(), in.get_pointer() + N, + out.get_pointer(), binary_op, init); + }); + }); + } + emu::inclusive_scan(input.begin(), input.begin() + N, expected.begin(), + binary_op, init); + assert(std::equal(output.begin(), output.begin() + N, expected.begin())); +} + +int main() { + queue q; + std::string version = q.get_device().get_info(); + if (version < std::string("2.0")) { + std::cout << "Skipping test\n"; + return 0; + } + + constexpr int N = 32; + std::array input; + std::array output; + std::iota(input.begin(), input.end(), 0); + std::fill(output.begin(), output.end(), 0); + + test(q, input, output, plus<>(), 0); + test(q, input, output, minimum<>(), std::numeric_limits::max()); + test(q, input, output, maximum<>(), std::numeric_limits::lowest()); + + std::cout << "Test passed." << std::endl; +} diff --git a/sycl/test/group-algorithm/none_of.cpp b/sycl/test/group-algorithm/none_of.cpp new file mode 100644 index 0000000000000..384d452aaea5f --- /dev/null +++ b/sycl/test/group-algorithm/none_of.cpp @@ -0,0 +1,80 @@ +// RUN: %clangxx -fsycl -D __SYCL_INTEL_GROUP_ALGORITHMS__ %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +//==------------- none_of.cpp - SYCL group none_of test --*- C++ -*---------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +using namespace sycl; +using namespace sycl::intel; + +template class none_of_kernel; + +struct GeZero { + bool operator()(int i) const { return i >= 0; } +}; +struct IsEven { + bool operator()(int i) const { return (i % 2) == 0; } +}; +struct LtZero { + bool operator()(int i) const { return i < 0; } +}; + +template +void test(queue q, InputContainer input, OutputContainer output, + Predicate pred) { + typedef class none_of_kernel kernel_name; + size_t N = input.size(); + size_t G = 16; + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + + q.submit([&](handler &cgh) { + auto in = in_buf.get_access(cgh); + auto out = out_buf.get_access(cgh); + cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + int lid = it.get_local_id(0); + out[0] = none_of(g, pred(in[lid])); + out[1] = none_of(g, in[lid], pred); + out[2] = none_of(g, in.get_pointer(), in.get_pointer() + N, pred); + }); + }); + } + bool expected = std::none_of(input.begin(), input.end(), pred); + assert(output[0] == expected); + assert(output[1] == expected); + assert(output[2] == expected); +} + +int main() { + queue q; + std::string version = q.get_device().get_info(); + if (version < std::string("2.0")) { + std::cout << "Skipping test\n"; + return 0; + } + + constexpr int N = 32; + std::array input; + std::array output; + std::iota(input.begin(), input.end(), 0); + std::fill(output.begin(), output.end(), false); + + test(q, input, output, GeZero()); + test(q, input, output, IsEven()); + test(q, input, output, LtZero()); + + std::cout << "Test passed." << std::endl; +} diff --git a/sycl/test/group-algorithm/reduce.cpp b/sycl/test/group-algorithm/reduce.cpp new file mode 100644 index 0000000000000..2f7b79fe7119b --- /dev/null +++ b/sycl/test/group-algorithm/reduce.cpp @@ -0,0 +1,83 @@ +// RUN: %clangxx -fsycl -D __SYCL_INTEL_GROUP_ALGORITHMS__ %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +//==------------- reduce.cpp - SYCL group reduce test --*- C++ -*-----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +using namespace sycl; +using namespace sycl::intel; + +template class reduce_kernel; + +template +void test(queue q, InputContainer input, OutputContainer output, + BinaryOperation binary_op, + typename OutputContainer::value_type identity) { + typedef typename InputContainer::value_type InputT; + typedef typename OutputContainer::value_type OutputT; + typedef class reduce_kernel kernel_name; + OutputT init = 42; + size_t N = input.size(); + size_t G = 16; + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + + q.submit([&](handler &cgh) { + auto in = in_buf.template get_access(cgh); + auto out = out_buf.template get_access(cgh); + cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + int lid = it.get_local_id(0); + out[0] = reduce(g, in[lid], binary_op); + out[1] = reduce(g, in[lid], init, binary_op); + out[2] = reduce(g, in.get_pointer(), in.get_pointer() + N, binary_op); + out[3] = + reduce(g, in.get_pointer(), in.get_pointer() + N, init, binary_op); + }); + }); + } + // std::reduce is not implemented yet, so use std::accumulate instead + assert(output[0] == std::accumulate(input.begin(), input.begin() + G, + identity, binary_op)); + assert(output[1] == + std::accumulate(input.begin(), input.begin() + G, init, binary_op)); + assert(output[2] == + std::accumulate(input.begin(), input.end(), identity, binary_op)); + assert(output[3] == + std::accumulate(input.begin(), input.end(), init, binary_op)); +} + +int main() { + queue q; + std::string version = q.get_device().get_info(); + if (version < std::string("2.0")) { + std::cout << "Skipping test\n"; + return 0; + } + + constexpr int N = 32; + std::array input; + std::array output; + std::iota(input.begin(), input.end(), 0); + std::fill(output.begin(), output.end(), 0); + + test(q, input, output, plus<>(), 0); + test(q, input, output, minimum<>(), std::numeric_limits::max()); + test(q, input, output, maximum<>(), std::numeric_limits::lowest()); + + std::cout << "Test passed." << std::endl; +} From 58ac82dba3c8ee5e413941895ff29eb309cecd67 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 3 Mar 2020 10:14:44 -0800 Subject: [PATCH 04/19] Fix clang-format errors in group algorithms Signed-off-by: John Pennycook --- sycl/include/CL/sycl/intel/group_algorithm.hpp | 6 ++++-- sycl/test/group-algorithm/all_of.cpp | 3 ++- sycl/test/group-algorithm/any_of.cpp | 3 ++- sycl/test/group-algorithm/exclusive_scan.cpp | 3 ++- sycl/test/group-algorithm/inclusive_scan.cpp | 3 ++- sycl/test/group-algorithm/none_of.cpp | 3 ++- sycl/test/group-algorithm/reduce.cpp | 3 ++- 7 files changed, 16 insertions(+), 8 deletions(-) diff --git a/sycl/include/CL/sycl/intel/group_algorithm.hpp b/sycl/include/CL/sycl/intel/group_algorithm.hpp index 6ed55956108a8..b2eb91da1843a 100644 --- a/sycl/include/CL/sycl/intel/group_algorithm.hpp +++ b/sycl/include/CL/sycl/intel/group_algorithm.hpp @@ -464,7 +464,8 @@ exclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, T init, ptrdiff_t offset = it.get_local_linear_id(); ptrdiff_t stride = detail::get_local_linear_range(g); ptrdiff_t N = last - first; - auto roundup = [=](const ptrdiff_t &v, const ptrdiff_t &divisor) -> ptrdiff_t { + auto roundup = [=](const ptrdiff_t &v, + const ptrdiff_t &divisor) -> ptrdiff_t { return ((v + divisor - 1) / divisor) * divisor; }; typename InPtr::element_type x; @@ -579,7 +580,8 @@ inclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, ptrdiff_t offset = it.get_local_linear_id(); ptrdiff_t stride = detail::get_local_linear_range(g); ptrdiff_t N = last - first; - auto roundup = [=](const ptrdiff_t &v, const ptrdiff_t &divisor) -> ptrdiff_t { + auto roundup = [=](const ptrdiff_t &v, + const ptrdiff_t &divisor) -> ptrdiff_t { return ((v + divisor - 1) / divisor) * divisor; }; typename InPtr::element_type x; diff --git a/sycl/test/group-algorithm/all_of.cpp b/sycl/test/group-algorithm/all_of.cpp index 5e0314af09e83..4454e9f996a54 100644 --- a/sycl/test/group-algorithm/all_of.cpp +++ b/sycl/test/group-algorithm/all_of.cpp @@ -18,7 +18,8 @@ using namespace sycl; using namespace sycl::intel; -template class all_of_kernel; +template +class all_of_kernel; struct GeZero { bool operator()(int i) const { return i >= 0; } diff --git a/sycl/test/group-algorithm/any_of.cpp b/sycl/test/group-algorithm/any_of.cpp index ad68a793f31c0..7cd90a07e6344 100644 --- a/sycl/test/group-algorithm/any_of.cpp +++ b/sycl/test/group-algorithm/any_of.cpp @@ -18,7 +18,8 @@ using namespace sycl; using namespace sycl::intel; -template class any_of_kernel; +template +class any_of_kernel; struct GeZero { bool operator()(int i) const { return i >= 0; } diff --git a/sycl/test/group-algorithm/exclusive_scan.cpp b/sycl/test/group-algorithm/exclusive_scan.cpp index 965d0c1089784..07c00daacd27d 100644 --- a/sycl/test/group-algorithm/exclusive_scan.cpp +++ b/sycl/test/group-algorithm/exclusive_scan.cpp @@ -20,7 +20,8 @@ using namespace sycl; using namespace sycl::intel; -template class exclusive_scan_kernel; +template +class exclusive_scan_kernel; // std::exclusive_scan isn't implemented yet, so use serial implementation // instead diff --git a/sycl/test/group-algorithm/inclusive_scan.cpp b/sycl/test/group-algorithm/inclusive_scan.cpp index 5c530fb69f173..f36072d6df88d 100644 --- a/sycl/test/group-algorithm/inclusive_scan.cpp +++ b/sycl/test/group-algorithm/inclusive_scan.cpp @@ -20,7 +20,8 @@ using namespace sycl; using namespace sycl::intel; -template class inclusive_scan_kernel; +template +class inclusive_scan_kernel; // std::inclusive_scan isn't implemented yet, so use serial implementation // instead diff --git a/sycl/test/group-algorithm/none_of.cpp b/sycl/test/group-algorithm/none_of.cpp index 384d452aaea5f..47a8f0a3d41bc 100644 --- a/sycl/test/group-algorithm/none_of.cpp +++ b/sycl/test/group-algorithm/none_of.cpp @@ -18,7 +18,8 @@ using namespace sycl; using namespace sycl::intel; -template class none_of_kernel; +template +class none_of_kernel; struct GeZero { bool operator()(int i) const { return i >= 0; } diff --git a/sycl/test/group-algorithm/reduce.cpp b/sycl/test/group-algorithm/reduce.cpp index 2f7b79fe7119b..5d7837dc4bbb0 100644 --- a/sycl/test/group-algorithm/reduce.cpp +++ b/sycl/test/group-algorithm/reduce.cpp @@ -19,7 +19,8 @@ using namespace sycl; using namespace sycl::intel; -template class reduce_kernel; +template +class reduce_kernel; template From 546101e50cd9a439d99784c3cabb73f6b47fd834 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 3 Mar 2020 10:21:55 -0800 Subject: [PATCH 05/19] Fix more clang-format errors in group algorithms Signed-off-by: John Pennycook --- sycl/include/CL/sycl.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sycl/include/CL/sycl.hpp b/sycl/include/CL/sycl.hpp index b4ef5514e699a..3455e0ab07219 100644 --- a/sycl/include/CL/sycl.hpp +++ b/sycl/include/CL/sycl.hpp @@ -30,16 +30,15 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include #include #include #include #include - From 29cf3f026ba5063c7f1889042696a28963fafdfd Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Wed, 11 Mar 2020 08:26:46 -0700 Subject: [PATCH 06/19] [SYCL] Convert typedefs to using syntax Signed-off-by: John Pennycook --- sycl/include/CL/sycl/group.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/include/CL/sycl/group.hpp b/sycl/include/CL/sycl/group.hpp index 733fe49473aa8..be225201e4d60 100644 --- a/sycl/include/CL/sycl/group.hpp +++ b/sycl/include/CL/sycl/group.hpp @@ -84,9 +84,9 @@ template class private_memory { template class group { public: #ifdef __SYCL_INTEL_GROUP_ALGORITHMS__ - typedef id id_type; - typedef range range_type; - typedef size_t linear_id_type; + using id_type = id; + using range_type = range; + using linear_id_type = size_t; static constexpr int dimensions = Dimensions; #endif From 3100d90d048b06517547a580ce295d7cca517eae Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Wed, 11 Mar 2020 08:32:58 -0700 Subject: [PATCH 07/19] [SYCL] Remove copyright header from tests Signed-off-by: John Pennycook --- sycl/test/group-algorithm/all_of.cpp | 7 ------- sycl/test/group-algorithm/any_of.cpp | 7 ------- sycl/test/group-algorithm/broadcast.cpp | 8 -------- sycl/test/group-algorithm/exclusive_scan.cpp | 7 ------- sycl/test/group-algorithm/inclusive_scan.cpp | 7 ------- sycl/test/group-algorithm/none_of.cpp | 7 ------- sycl/test/group-algorithm/reduce.cpp | 7 ------- 7 files changed, 50 deletions(-) diff --git a/sycl/test/group-algorithm/all_of.cpp b/sycl/test/group-algorithm/all_of.cpp index 4454e9f996a54..6c2234616443f 100644 --- a/sycl/test/group-algorithm/all_of.cpp +++ b/sycl/test/group-algorithm/all_of.cpp @@ -3,13 +3,6 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -//==------------- all_of.cpp - SYCL group all_of test --*- C++ -*-----------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// #include #include diff --git a/sycl/test/group-algorithm/any_of.cpp b/sycl/test/group-algorithm/any_of.cpp index 7cd90a07e6344..5617381a128a1 100644 --- a/sycl/test/group-algorithm/any_of.cpp +++ b/sycl/test/group-algorithm/any_of.cpp @@ -3,13 +3,6 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -//==------------- any_of.cpp - SYCL group any_of test --*- C++ -*-----------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// #include #include diff --git a/sycl/test/group-algorithm/broadcast.cpp b/sycl/test/group-algorithm/broadcast.cpp index 374b666b37923..ed2dc2a83b282 100644 --- a/sycl/test/group-algorithm/broadcast.cpp +++ b/sycl/test/group-algorithm/broadcast.cpp @@ -3,14 +3,6 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -//==------------- broadcast.cpp - SYCL group broadcast test --*- C++ -//-*-----------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// #include #include diff --git a/sycl/test/group-algorithm/exclusive_scan.cpp b/sycl/test/group-algorithm/exclusive_scan.cpp index 07c00daacd27d..411313570214e 100644 --- a/sycl/test/group-algorithm/exclusive_scan.cpp +++ b/sycl/test/group-algorithm/exclusive_scan.cpp @@ -3,13 +3,6 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -//==---- exclusive_scan.cpp - SYCL group exclusive_scan test --*- C++ -*----==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// #include #include diff --git a/sycl/test/group-algorithm/inclusive_scan.cpp b/sycl/test/group-algorithm/inclusive_scan.cpp index f36072d6df88d..0d9a724882a9c 100644 --- a/sycl/test/group-algorithm/inclusive_scan.cpp +++ b/sycl/test/group-algorithm/inclusive_scan.cpp @@ -3,13 +3,6 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -//==---- inclusive_scan.cpp - SYCL group inclusive_scan test --*- C++ -*----==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// #include #include diff --git a/sycl/test/group-algorithm/none_of.cpp b/sycl/test/group-algorithm/none_of.cpp index 47a8f0a3d41bc..31b486a62315c 100644 --- a/sycl/test/group-algorithm/none_of.cpp +++ b/sycl/test/group-algorithm/none_of.cpp @@ -3,13 +3,6 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -//==------------- none_of.cpp - SYCL group none_of test --*- C++ -*---------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// #include #include diff --git a/sycl/test/group-algorithm/reduce.cpp b/sycl/test/group-algorithm/reduce.cpp index 5d7837dc4bbb0..447ddd6f255d8 100644 --- a/sycl/test/group-algorithm/reduce.cpp +++ b/sycl/test/group-algorithm/reduce.cpp @@ -3,13 +3,6 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -//==------------- reduce.cpp - SYCL group reduce test --*- C++ -*-----------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// #include #include From cf1c593f94c553fa5c317760cc07ded9d2930c5f Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Wed, 11 Mar 2020 08:33:59 -0700 Subject: [PATCH 08/19] [SYCL] Capitalize template argument dimensions => Dimensions Signed-off-by: John Pennycook --- sycl/include/CL/sycl/detail/spirv.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/include/CL/sycl/detail/spirv.hpp b/sycl/include/CL/sycl/detail/spirv.hpp index 2c53bd538bd88..c1e87547df189 100644 --- a/sycl/include/CL/sycl/detail/spirv.hpp +++ b/sycl/include/CL/sycl/detail/spirv.hpp @@ -28,17 +28,17 @@ GroupBroadcast(T x, IdT local_id) { return __spirv_GroupBroadcast(S, ocl_x, ocl_id); } -template <__spv::Scope S, typename T, int dimensions> -T GroupBroadcast(T x, id local_id) { - if (dimensions == 1) { +template <__spv::Scope S, typename T, int Dimensions> +T GroupBroadcast(T x, id local_id) { + if (Dimensions == 1) { return GroupBroadcast(x, local_id[0]); } - using IdT = vec; + using IdT = vec; using OCLT = detail::ConvertToOpenCLType_t; using OCLIdT = detail::ConvertToOpenCLType_t; IdT vec_id; - for (int i = 0; i < dimensions; ++i) { - vec_id[i] = local_id[dimensions - i - 1]; + for (int i = 0; i < Dimensions; ++i) { + vec_id[i] = local_id[Dimensions - i - 1]; } OCLT ocl_x = detail::convertDataToType(x); OCLIdT ocl_id = detail::convertDataToType(vec_id); From 3ffd44164ba23d48767d558c1f5b8e93c2c4d548 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Wed, 11 Mar 2020 08:36:37 -0700 Subject: [PATCH 09/19] [SYCL] Add comment to clarify broadcast overloads Signed-off-by: John Pennycook --- sycl/include/CL/sycl/detail/spirv.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sycl/include/CL/sycl/detail/spirv.hpp b/sycl/include/CL/sycl/detail/spirv.hpp index c1e87547df189..8c5f80f3674b2 100644 --- a/sycl/include/CL/sycl/detail/spirv.hpp +++ b/sycl/include/CL/sycl/detail/spirv.hpp @@ -18,6 +18,8 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { namespace spirv { + +// Broadcast with scalar local index template <__spv::Scope S, typename T, typename IdT> detail::enable_if_t::value, T> GroupBroadcast(T x, IdT local_id) { @@ -28,6 +30,7 @@ GroupBroadcast(T x, IdT local_id) { return __spirv_GroupBroadcast(S, ocl_x, ocl_id); } +// Broadcast with vector local index template <__spv::Scope S, typename T, int Dimensions> T GroupBroadcast(T x, id local_id) { if (Dimensions == 1) { @@ -44,6 +47,7 @@ T GroupBroadcast(T x, id local_id) { OCLIdT ocl_id = detail::convertDataToType(vec_id); return __spirv_GroupBroadcast(S, ocl_x, ocl_id); } + } // namespace spirv } // namespace detail } // namespace sycl From 372ba3310c128809e602468235e470511fbaa800 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Wed, 11 Mar 2020 08:38:25 -0700 Subject: [PATCH 10/19] [SYCL] Capitalize template argument dimensions => Dimensions Signed-off-by: John Pennycook --- sycl/include/CL/sycl/intel/group_algorithm.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/include/CL/sycl/intel/group_algorithm.hpp b/sycl/include/CL/sycl/intel/group_algorithm.hpp index b2eb91da1843a..386fa23b7400b 100644 --- a/sycl/include/CL/sycl/intel/group_algorithm.hpp +++ b/sycl/include/CL/sycl/intel/group_algorithm.hpp @@ -31,8 +31,8 @@ template <> size_t get_local_linear_range>(group<3> g) { return g.get_local_range(0) * g.get_local_range(1) * g.get_local_range(2); } -template -id linear_id_to_id(range, size_t i); +template +id linear_id_to_id(range, size_t i); template <> id<1> linear_id_to_id(range<1> r, size_t i) { return id<1>(i); } template <> id<2> linear_id_to_id(range<2> r, size_t i) { id<2> result; From 8704202a41b7b5c96474112f9254b8fe9bf9b5ba Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Wed, 11 Mar 2020 08:39:13 -0700 Subject: [PATCH 11/19] [SYCL] Add comment to #endif Signed-off-by: John Pennycook --- sycl/include/CL/sycl/intel/functional.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/CL/sycl/intel/functional.hpp b/sycl/include/CL/sycl/intel/functional.hpp index 8c4bfd0025994..018f6d0c2a28b 100644 --- a/sycl/include/CL/sycl/intel/functional.hpp +++ b/sycl/include/CL/sycl/intel/functional.hpp @@ -108,7 +108,7 @@ static T calc(typename GroupOpTag::type, T x, BinaryOperation) { } } // namespace detail -#endif +#endif // __SYCL_DEVICE_ONLY__ } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) From febf2179d4622139bc6da742716f210ce05b4234 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Wed, 11 Mar 2020 08:50:41 -0700 Subject: [PATCH 12/19] [SYCL] Invert #ifdef logic for group algorithms Now enabled by default, disabled by: __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__. Signed-off-by: John Pennycook --- sycl/include/CL/sycl/group.hpp | 4 ++-- sycl/include/CL/sycl/intel/group_algorithm.hpp | 4 ++-- sycl/test/group-algorithm/all_of.cpp | 2 +- sycl/test/group-algorithm/any_of.cpp | 2 +- sycl/test/group-algorithm/broadcast.cpp | 2 +- sycl/test/group-algorithm/exclusive_scan.cpp | 2 +- sycl/test/group-algorithm/inclusive_scan.cpp | 2 +- sycl/test/group-algorithm/none_of.cpp | 2 +- sycl/test/group-algorithm/reduce.cpp | 2 +- 9 files changed, 11 insertions(+), 11 deletions(-) diff --git a/sycl/include/CL/sycl/group.hpp b/sycl/include/CL/sycl/group.hpp index be225201e4d60..c871ec95bd0bf 100644 --- a/sycl/include/CL/sycl/group.hpp +++ b/sycl/include/CL/sycl/group.hpp @@ -83,12 +83,12 @@ template class private_memory { template class group { public: -#ifdef __SYCL_INTEL_GROUP_ALGORITHMS__ +#ifndef __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__ using id_type = id; using range_type = range; using linear_id_type = size_t; static constexpr int dimensions = Dimensions; -#endif +#endif // __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__ group() = delete; diff --git a/sycl/include/CL/sycl/intel/group_algorithm.hpp b/sycl/include/CL/sycl/intel/group_algorithm.hpp index 386fa23b7400b..64e689f498b9d 100644 --- a/sycl/include/CL/sycl/intel/group_algorithm.hpp +++ b/sycl/include/CL/sycl/intel/group_algorithm.hpp @@ -15,7 +15,7 @@ #include #include -#ifdef __SYCL_INTEL_GROUP_ALGORITHMS__ +#ifndef __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { @@ -619,4 +619,4 @@ EnableIfIsPointer inclusive_scan(Group g, InPtr first, } // namespace intel } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) -#endif // __SYCL_INTEL_GROUP_ALGORITHMS__ +#endif // __DISABLE_SYCL_INTEL_GROUP_ALGORITHMS__ diff --git a/sycl/test/group-algorithm/all_of.cpp b/sycl/test/group-algorithm/all_of.cpp index 6c2234616443f..a8b4fc4bfff2b 100644 --- a/sycl/test/group-algorithm/all_of.cpp +++ b/sycl/test/group-algorithm/all_of.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -D __SYCL_INTEL_GROUP_ALGORITHMS__ %s -o %t.out +// RUN: %clangxx -fsycl %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/sycl/test/group-algorithm/any_of.cpp b/sycl/test/group-algorithm/any_of.cpp index 5617381a128a1..4e5391b5b01be 100644 --- a/sycl/test/group-algorithm/any_of.cpp +++ b/sycl/test/group-algorithm/any_of.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -D __SYCL_INTEL_GROUP_ALGORITHMS__ %s -o %t.out +// RUN: %clangxx -fsycl %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/sycl/test/group-algorithm/broadcast.cpp b/sycl/test/group-algorithm/broadcast.cpp index ed2dc2a83b282..9fcce3b938673 100644 --- a/sycl/test/group-algorithm/broadcast.cpp +++ b/sycl/test/group-algorithm/broadcast.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -D __SYCL_INTEL_GROUP_ALGORITHMS__ %s -o %t.out +// RUN: %clangxx -fsycl %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/sycl/test/group-algorithm/exclusive_scan.cpp b/sycl/test/group-algorithm/exclusive_scan.cpp index 411313570214e..ebbe35b0e6a31 100644 --- a/sycl/test/group-algorithm/exclusive_scan.cpp +++ b/sycl/test/group-algorithm/exclusive_scan.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -D __SYCL_INTEL_GROUP_ALGORITHMS__ %s -o %t.out +// RUN: %clangxx -fsycl %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/sycl/test/group-algorithm/inclusive_scan.cpp b/sycl/test/group-algorithm/inclusive_scan.cpp index 0d9a724882a9c..4d554e172e233 100644 --- a/sycl/test/group-algorithm/inclusive_scan.cpp +++ b/sycl/test/group-algorithm/inclusive_scan.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -D __SYCL_INTEL_GROUP_ALGORITHMS__ %s -o %t.out +// RUN: %clangxx -fsycl %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/sycl/test/group-algorithm/none_of.cpp b/sycl/test/group-algorithm/none_of.cpp index 31b486a62315c..d0ef19b8ed3ea 100644 --- a/sycl/test/group-algorithm/none_of.cpp +++ b/sycl/test/group-algorithm/none_of.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -D __SYCL_INTEL_GROUP_ALGORITHMS__ %s -o %t.out +// RUN: %clangxx -fsycl %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/sycl/test/group-algorithm/reduce.cpp b/sycl/test/group-algorithm/reduce.cpp index 447ddd6f255d8..85a3afdc7abfc 100644 --- a/sycl/test/group-algorithm/reduce.cpp +++ b/sycl/test/group-algorithm/reduce.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -D __SYCL_INTEL_GROUP_ALGORITHMS__ %s -o %t.out +// RUN: %clangxx -fsycl %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out From 199ff4189eafe7da4c5422395d00e33e91110ca5 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Thu, 12 Mar 2020 11:00:26 -0700 Subject: [PATCH 13/19] [SYCL] Add prototype of leader algorithm Signed-off-by: John Pennycook --- .../include/CL/sycl/intel/group_algorithm.hpp | 13 +++++ sycl/test/group-algorithm/leader.cpp | 47 +++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 sycl/test/group-algorithm/leader.cpp diff --git a/sycl/include/CL/sycl/intel/group_algorithm.hpp b/sycl/include/CL/sycl/intel/group_algorithm.hpp index 64e689f498b9d..49dd7f5cd5388 100644 --- a/sycl/include/CL/sycl/intel/group_algorithm.hpp +++ b/sycl/include/CL/sycl/intel/group_algorithm.hpp @@ -616,6 +616,19 @@ EnableIfIsPointer inclusive_scan(Group g, InPtr first, detail::identity::value); } +template bool leader(Group g) { + static_assert(detail::is_group::value, + "Group algorithms only support the sycl::group class."); +#ifdef __SYCL_DEVICE_ONLY__ + nd_item it = + ::sycl::detail::Builder::getNDItem(); + typename Group::linear_id_type linear_id = it.get_local_linear_id(); + return (linear_id == 0); +#else + throw runtime_error("Group algorithms are not supported on host device."); +#endif +} + } // namespace intel } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/test/group-algorithm/leader.cpp b/sycl/test/group-algorithm/leader.cpp new file mode 100644 index 0000000000000..3e0bad4706cfc --- /dev/null +++ b/sycl/test/group-algorithm/leader.cpp @@ -0,0 +1,47 @@ +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include +#include +using namespace sycl; +using namespace sycl::intel; + +class leader_kernel; + +void test(queue q) { + typedef class leader_kernel kernel_name; + int out = 0; + size_t G = 4; + + range<2> R(G, G); + { + buffer out_buf(&out, 1); + + q.submit([&](handler &cgh) { + auto out = out_buf.template get_access(cgh); + cgh.parallel_for(nd_range<2>(R, R), [=](nd_item<2> it) { + group<2> g = it.get_group(); + if (leader(g)) { + out[0] += 1; + } + }); + }); + } + assert(out == 1); +} + +int main() { + queue q; + std::string version = q.get_device().get_info(); + if (version < std::string("2.0")) { + std::cout << "Skipping test\n"; + return 0; + } + + test(q); + + std::cout << "Test passed." << std::endl; +} From a7d6504529809751ad544a22bea5445f27c89d8a Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Thu, 12 Mar 2020 14:35:44 -0700 Subject: [PATCH 14/19] [SYCL] Add PI_INVALID_DEVICE to runtime_error Signed-off-by: John Pennycook --- .../include/CL/sycl/intel/group_algorithm.hpp | 75 ++++++++++++------- 1 file changed, 50 insertions(+), 25 deletions(-) diff --git a/sycl/include/CL/sycl/intel/group_algorithm.hpp b/sycl/include/CL/sycl/intel/group_algorithm.hpp index 49dd7f5cd5388..afa6cda16b3a6 100644 --- a/sycl/include/CL/sycl/intel/group_algorithm.hpp +++ b/sycl/include/CL/sycl/intel/group_algorithm.hpp @@ -79,7 +79,8 @@ Function for_each(Group g, Ptr first, Ptr last, Function f) { } return f; #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -107,7 +108,8 @@ template bool all_of(Group g, bool pred) { #ifdef __SYCL_DEVICE_ONLY__ return __spirv_GroupAll(__spv::Scope::Workgroup, pred); #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -128,7 +130,8 @@ EnableIfIsPointer all_of(Group g, Ptr first, Ptr last, detail::for_each(g, first, last, [&](auto x) { partial &= pred(x); }); return all_of(g, partial); #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -138,7 +141,8 @@ template bool any_of(Group g, bool pred) { #ifdef __SYCL_DEVICE_ONLY__ return __spirv_GroupAny(__spv::Scope::Workgroup, pred); #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -159,7 +163,8 @@ EnableIfIsPointer any_of(Group g, Ptr first, Ptr last, detail::for_each(g, first, last, [&](auto x) { partial |= pred(x); }); return any_of(g, partial); #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -169,7 +174,8 @@ template bool none_of(Group g, bool pred) { #ifdef __SYCL_DEVICE_ONLY__ return __spirv_GroupAll(__spv::Scope::Workgroup, not pred); #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -188,7 +194,8 @@ EnableIfIsPointer none_of(Group g, Ptr first, Ptr last, "Group algorithms only support the sycl::group class."); return not any_of(g, first, last, pred); #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -200,7 +207,8 @@ EnableIfIsScalarArithmetic broadcast(Group g, T x, #ifdef __SYCL_DEVICE_ONLY__ return detail::spirv::GroupBroadcast<__spv::Scope::Workgroup>(x, local_id); #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -216,7 +224,8 @@ EnableIfIsVectorArithmetic broadcast(Group g, T x, } return result; #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -229,7 +238,8 @@ broadcast(Group g, T x, typename Group::linear_id_type linear_local_id) { return broadcast( g, x, detail::linear_id_to_id(g.get_local_range(), linear_local_id)); #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -245,7 +255,8 @@ broadcast(Group g, T x, typename Group::linear_id_type linear_local_id) { } return result; #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -256,7 +267,8 @@ EnableIfIsScalarArithmetic broadcast(Group g, T x) { #ifdef __SYCL_DEVICE_ONLY__ return broadcast(g, x, 0); #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -271,7 +283,8 @@ EnableIfIsVectorArithmetic broadcast(Group g, T x) { } return result; #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -287,7 +300,8 @@ EnableIfIsScalarArithmetic reduce(Group g, T x, BinaryOperation binary_op) { __spv::Scope::Workgroup>( typename detail::GroupOpTag::type(), x, binary_op); #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -317,7 +331,8 @@ EnableIfIsScalarArithmetic reduce(Group g, V x, T init, #ifdef __SYCL_DEVICE_ONLY__ return binary_op(init, reduce(g, x, binary_op)); #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -337,7 +352,8 @@ EnableIfIsVectorArithmetic reduce(Group g, V x, T init, } return result; #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -357,7 +373,8 @@ reduce(Group g, Ptr first, Ptr last, BinaryOperation binary_op) { [&](auto x) { partial = binary_op(partial, x); }); return reduce(g, partial, binary_op); #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -376,7 +393,8 @@ EnableIfIsPointer reduce(Group g, Ptr first, Ptr last, T init, [&](auto x) { partial = binary_op(partial, x); }); return reduce(g, partial, init, binary_op); #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -392,7 +410,8 @@ EnableIfIsScalarArithmetic exclusive_scan(Group g, T x, __spv::Scope::Workgroup>( typename detail::GroupOpTag::type(), x, binary_op); #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -445,7 +464,8 @@ EnableIfIsScalarArithmetic exclusive_scan(Group g, V x, T init, } return scan; #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -483,7 +503,8 @@ exclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, T init, } return result + N; #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -528,7 +549,8 @@ EnableIfIsScalarArithmetic inclusive_scan(Group g, T x, __spv::Scope::Workgroup>( typename detail::GroupOpTag::type(), x, binary_op); #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -547,7 +569,8 @@ inclusive_scan(Group g, V x, BinaryOperation binary_op, T init) { } return inclusive_scan(g, x, binary_op); #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -599,7 +622,8 @@ inclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, } return result + N; #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } @@ -625,7 +649,8 @@ template bool leader(Group g) { typename Group::linear_id_type linear_id = it.get_local_linear_id(); return (linear_id == 0); #else - throw runtime_error("Group algorithms are not supported on host device."); + throw runtime_error("Group algorithms are not supported on host device.", + PI_INVALID_DEVICE); #endif } From 065f2b30a7fb0a3bb54e898271a6998fa43b72bd Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 13 Mar 2020 08:10:40 -0700 Subject: [PATCH 15/19] [SYCL] Fix compilation with C++11 - Remove generic lambdas - Guard usage of transparent functors Signed-off-by: John Pennycook --- sycl/include/CL/sycl/intel/group_algorithm.hpp | 18 ++++++++++++------ sycl/test/group-algorithm/exclusive_scan.cpp | 5 +++++ sycl/test/group-algorithm/inclusive_scan.cpp | 5 +++++ sycl/test/group-algorithm/reduce.cpp | 5 +++++ 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/sycl/include/CL/sycl/intel/group_algorithm.hpp b/sycl/include/CL/sycl/intel/group_algorithm.hpp index afa6cda16b3a6..0a5bc04ff85d9 100644 --- a/sycl/include/CL/sycl/intel/group_algorithm.hpp +++ b/sycl/include/CL/sycl/intel/group_algorithm.hpp @@ -127,7 +127,9 @@ EnableIfIsPointer all_of(Group g, Ptr first, Ptr last, static_assert(detail::is_group::value, "Group algorithms only support the sycl::group class."); bool partial = true; - detail::for_each(g, first, last, [&](auto x) { partial &= pred(x); }); + detail::for_each(g, first, last, [&](const typename Ptr::element_type &x) { + partial &= pred(x); + }); return all_of(g, partial); #else throw runtime_error("Group algorithms are not supported on host device.", @@ -160,7 +162,9 @@ EnableIfIsPointer any_of(Group g, Ptr first, Ptr last, static_assert(detail::is_group::value, "Group algorithms only support the sycl::group class."); bool partial = false; - detail::for_each(g, first, last, [&](auto x) { partial |= pred(x); }); + detail::for_each(g, first, last, [&](const typename Ptr::element_type &x) { + partial |= pred(x); + }); return any_of(g, partial); #else throw runtime_error("Group algorithms are not supported on host device.", @@ -369,8 +373,9 @@ reduce(Group g, Ptr first, Ptr last, BinaryOperation binary_op) { #ifdef __SYCL_DEVICE_ONLY__ typename Ptr::element_type partial = detail::identity::value; - detail::for_each(g, first, last, - [&](auto x) { partial = binary_op(partial, x); }); + detail::for_each(g, first, last, [&](const typename Ptr::element_type &x) { + partial = binary_op(partial, x); + }); return reduce(g, partial, binary_op); #else throw runtime_error("Group algorithms are not supported on host device.", @@ -389,8 +394,9 @@ EnableIfIsPointer reduce(Group g, Ptr first, Ptr last, T init, #ifdef __SYCL_DEVICE_ONLY__ T partial = detail::identity::value; - detail::for_each(g, first, last, - [&](auto x) { partial = binary_op(partial, x); }); + detail::for_each(g, first, last, [&](const typename Ptr::element_type &x) { + partial = binary_op(partial, x); + }); return reduce(g, partial, init, binary_op); #else throw runtime_error("Group algorithms are not supported on host device.", diff --git a/sycl/test/group-algorithm/exclusive_scan.cpp b/sycl/test/group-algorithm/exclusive_scan.cpp index ebbe35b0e6a31..fad4777a7cec1 100644 --- a/sycl/test/group-algorithm/exclusive_scan.cpp +++ b/sycl/test/group-algorithm/exclusive_scan.cpp @@ -131,9 +131,14 @@ int main() { std::iota(input.begin(), input.end(), 0); std::fill(output.begin(), output.end(), 0); +#if __cplusplus >= 201402L test(q, input, output, plus<>(), 0); test(q, input, output, minimum<>(), std::numeric_limits::max()); test(q, input, output, maximum<>(), std::numeric_limits::lowest()); +#endif + test(q, input, output, plus(), 0); + test(q, input, output, minimum(), std::numeric_limits::max()); + test(q, input, output, maximum(), std::numeric_limits::lowest()); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/group-algorithm/inclusive_scan.cpp b/sycl/test/group-algorithm/inclusive_scan.cpp index 4d554e172e233..54d79f72e5395 100644 --- a/sycl/test/group-algorithm/inclusive_scan.cpp +++ b/sycl/test/group-algorithm/inclusive_scan.cpp @@ -131,9 +131,14 @@ int main() { std::iota(input.begin(), input.end(), 0); std::fill(output.begin(), output.end(), 0); +#if __cplusplus >= 201402L test(q, input, output, plus<>(), 0); test(q, input, output, minimum<>(), std::numeric_limits::max()); test(q, input, output, maximum<>(), std::numeric_limits::lowest()); +#endif + test(q, input, output, plus(), 0); + test(q, input, output, minimum(), std::numeric_limits::max()); + test(q, input, output, maximum(), std::numeric_limits::lowest()); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/group-algorithm/reduce.cpp b/sycl/test/group-algorithm/reduce.cpp index 85a3afdc7abfc..988c40f245ff7 100644 --- a/sycl/test/group-algorithm/reduce.cpp +++ b/sycl/test/group-algorithm/reduce.cpp @@ -69,9 +69,14 @@ int main() { std::iota(input.begin(), input.end(), 0); std::fill(output.begin(), output.end(), 0); +#if __cplusplus >= 201402L test(q, input, output, plus<>(), 0); test(q, input, output, minimum<>(), std::numeric_limits::max()); test(q, input, output, maximum<>(), std::numeric_limits::lowest()); +#endif + test(q, input, output, plus(), 0); + test(q, input, output, minimum(), std::numeric_limits::max()); + test(q, input, output, maximum(), std::numeric_limits::lowest()); std::cout << "Test passed." << std::endl; } From 638fdedcc22c654f155e3f82c4427742f2e45664 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 13 Mar 2020 08:15:32 -0700 Subject: [PATCH 16/19] [SYCL] Rename function parameter id => linear_id Signed-off-by: John Pennycook --- .../include/CL/sycl/intel/group_algorithm.hpp | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/sycl/include/CL/sycl/intel/group_algorithm.hpp b/sycl/include/CL/sycl/intel/group_algorithm.hpp index 0a5bc04ff85d9..ea9a2e08f93cc 100644 --- a/sycl/include/CL/sycl/intel/group_algorithm.hpp +++ b/sycl/include/CL/sycl/intel/group_algorithm.hpp @@ -32,19 +32,21 @@ template <> size_t get_local_linear_range>(group<3> g) { } template -id linear_id_to_id(range, size_t i); -template <> id<1> linear_id_to_id(range<1> r, size_t i) { return id<1>(i); } -template <> id<2> linear_id_to_id(range<2> r, size_t i) { +id linear_id_to_id(range, size_t linear_id); +template <> id<1> linear_id_to_id(range<1> r, size_t linear_id) { + return id<1>(linear_id); +} +template <> id<2> linear_id_to_id(range<2> r, size_t linear_id) { id<2> result; - result[0] = i / r[1]; - result[1] = i % r[1]; + result[0] = linear_id / r[1]; + result[1] = linear_id % r[1]; return result; } -template <> id<3> linear_id_to_id(range<3> r, size_t i) { +template <> id<3> linear_id_to_id(range<3> r, size_t linear_id) { id<3> result; - result[0] = i / (r[1] * r[2]); - result[1] = (i % (r[1] * r[2])) / r[2]; - result[2] = i % r[2]; + result[0] = linear_id / (r[1] * r[2]); + result[1] = (linear_id % (r[1] * r[2])) / r[2]; + result[2] = linear_id % r[2]; return result; } From 7927e7e3d2c1f0142b44289ae50786269af1f9ec Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 13 Mar 2020 08:24:02 -0700 Subject: [PATCH 17/19] [SYCL] Make template specialization inline Avoids multiple definition errors. Signed-off-by: John Pennycook --- sycl/include/CL/sycl/intel/group_algorithm.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/include/CL/sycl/intel/group_algorithm.hpp b/sycl/include/CL/sycl/intel/group_algorithm.hpp index ea9a2e08f93cc..f90dfe6eeee6c 100644 --- a/sycl/include/CL/sycl/intel/group_algorithm.hpp +++ b/sycl/include/CL/sycl/intel/group_algorithm.hpp @@ -21,28 +21,28 @@ namespace sycl { namespace detail { template size_t get_local_linear_range(Group g); -template <> size_t get_local_linear_range>(group<1> g) { +template <> inline size_t get_local_linear_range>(group<1> g) { return g.get_local_range(0); } -template <> size_t get_local_linear_range>(group<2> g) { +template <> inline size_t get_local_linear_range>(group<2> g) { return g.get_local_range(0) * g.get_local_range(1); } -template <> size_t get_local_linear_range>(group<3> g) { +template <> inline size_t get_local_linear_range>(group<3> g) { return g.get_local_range(0) * g.get_local_range(1) * g.get_local_range(2); } template id linear_id_to_id(range, size_t linear_id); -template <> id<1> linear_id_to_id(range<1> r, size_t linear_id) { +template <> inline id<1> linear_id_to_id(range<1> r, size_t linear_id) { return id<1>(linear_id); } -template <> id<2> linear_id_to_id(range<2> r, size_t linear_id) { +template <> inline id<2> linear_id_to_id(range<2> r, size_t linear_id) { id<2> result; result[0] = linear_id / r[1]; result[1] = linear_id % r[1]; return result; } -template <> id<3> linear_id_to_id(range<3> r, size_t linear_id) { +template <> inline id<3> linear_id_to_id(range<3> r, size_t linear_id) { id<3> result; result[0] = linear_id / (r[1] * r[2]); result[1] = (linear_id % (r[1] * r[2])) / r[2]; From 35b9c8ac57dddba6adb1287045017cf75b7d10b3 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 13 Mar 2020 11:41:26 -0700 Subject: [PATCH 18/19] [SYCL] Do not rely on inline namespace Signed-off-by: John Pennycook --- .../include/CL/sycl/intel/group_algorithm.hpp | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/sycl/include/CL/sycl/intel/group_algorithm.hpp b/sycl/include/CL/sycl/intel/group_algorithm.hpp index f90dfe6eeee6c..ad8fa67313d91 100644 --- a/sycl/include/CL/sycl/intel/group_algorithm.hpp +++ b/sycl/include/CL/sycl/intel/group_algorithm.hpp @@ -73,7 +73,7 @@ template Function for_each(Group g, Ptr first, Ptr last, Function f) { #ifdef __SYCL_DEVICE_ONLY__ nd_item it = - ::sycl::detail::Builder::getNDItem(); + cl::sycl::detail::Builder::getNDItem(); ptrdiff_t offset = it.get_local_linear_id(); ptrdiff_t stride = detail::get_local_linear_range(g); for (Ptr p = first + offset; p < last; p += stride) { @@ -91,18 +91,16 @@ Function for_each(Group g, Ptr first, Ptr last, Function f) { namespace intel { template -using EnableIfIsScalarArithmetic = - ::sycl::detail::enable_if_t<::sycl::detail::is_scalar_arithmetic::value, - T>; +using EnableIfIsScalarArithmetic = cl::sycl::detail::enable_if_t< + cl::sycl::detail::is_scalar_arithmetic::value, T>; template -using EnableIfIsVectorArithmetic = - ::sycl::detail::enable_if_t<::sycl::detail::is_vector_arithmetic::value, - T>; +using EnableIfIsVectorArithmetic = cl::sycl::detail::enable_if_t< + cl::sycl::detail::is_vector_arithmetic::value, T>; template using EnableIfIsPointer = - ::sycl::detail::enable_if_t<::sycl::detail::is_pointer::value, T>; + cl::sycl::detail::enable_if_t::value, T>; template bool all_of(Group g, bool pred) { static_assert(detail::is_group::value, @@ -488,7 +486,7 @@ exclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, T init, "Result type of binary_op must match scan accumulation type."); #ifdef __SYCL_DEVICE_ONLY__ nd_item it = - ::sycl::detail::Builder::getNDItem(); + cl::sycl::detail::Builder::getNDItem(); ptrdiff_t offset = it.get_local_linear_id(); ptrdiff_t stride = detail::get_local_linear_range(g); ptrdiff_t N = last - first; @@ -607,7 +605,7 @@ inclusive_scan(Group g, InPtr first, InPtr last, OutPtr result, "Result type of binary_op must match scan accumulation type."); #ifdef __SYCL_DEVICE_ONLY__ nd_item it = - ::sycl::detail::Builder::getNDItem(); + cl::sycl::detail::Builder::getNDItem(); ptrdiff_t offset = it.get_local_linear_id(); ptrdiff_t stride = detail::get_local_linear_range(g); ptrdiff_t N = last - first; @@ -653,7 +651,7 @@ template bool leader(Group g) { "Group algorithms only support the sycl::group class."); #ifdef __SYCL_DEVICE_ONLY__ nd_item it = - ::sycl::detail::Builder::getNDItem(); + cl::sycl::detail::Builder::getNDItem(); typename Group::linear_id_type linear_id = it.get_local_linear_id(); return (linear_id == 0); #else From 760761f20a246dbf33f07e276a6cf4c553f0c55b Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Fri, 13 Mar 2020 12:40:05 -0700 Subject: [PATCH 19/19] [SYCL] Fix SPIR-V broadcast index uint32_t => size_t Signed-off-by: John Pennycook --- sycl/include/CL/__spirv/spirv_ops.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/CL/__spirv/spirv_ops.hpp b/sycl/include/CL/__spirv/spirv_ops.hpp index 38bcba20d8a27..a773694f69db7 100644 --- a/sycl/include/CL/__spirv/spirv_ops.hpp +++ b/sycl/include/CL/__spirv/spirv_ops.hpp @@ -191,7 +191,7 @@ extern SYCL_EXTERNAL bool __spirv_GroupAny(__spv::Scope Execution, template extern SYCL_EXTERNAL dataT __spirv_GroupBroadcast(__spv::Scope Execution, dataT Value, - uint32_t LocalId) noexcept; + size_t LocalId) noexcept; template extern SYCL_EXTERNAL dataT