From 6505024961e1b3ee4b71b840d69f5f830042ad55 Mon Sep 17 00:00:00 2001 From: rdeodhar Date: Fri, 22 Jan 2021 13:33:20 -0800 Subject: [PATCH 1/3] [SYCL] Allow user control over range rounding, and disable for older SYCL spec. Signed-off-by: rdeodhar --- sycl/include/CL/sycl/handler.hpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index c952e786bb295..ddbd0d31675c6 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -748,9 +748,13 @@ class __SYCL_EXPORT handler { using NameT = typename detail::get_kernel_name_t::name; - // FIXME Remove this ifndef once rounding of execution range works well with - // ESIMD compilation flow. -#ifndef __SYCL_EXPLICIT_SIMD__ + // FIXME Remove the ESIMD check once rounding of execution range works well + // with ESIMD compilation flow. + // Range rounding is supported only for newer SYCL standards. + // Range rounding can also be disabled by the user. +#if !defined(__SYCL_EXPLICIT_SIMD__) && \ + !defined(SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING) && \ + SYCL_LANGUAGE_VERSION > 202001 // The work group size preferred by this device. // A reasonable choice for rounding up the range is 32. constexpr size_t GoodLocalSizeX = 32; @@ -816,7 +820,8 @@ class __SYCL_EXPORT handler { MCGType = detail::CG::KERNEL; #endif } else -#endif // __SYCL_EXPLICIT_SIMD__ +#endif // !__SYCL_EXPLICIT_SIMD__ && !SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING + // && SYCL_LANGUAGE_VERSION > 202001 { #ifdef __SYCL_DEVICE_ONLY__ (void)NumWorkItems; From 7e0b60bef82bc4427e151c822e82c295ee578f00 Mon Sep 17 00:00:00 2001 From: rdeodhar Date: Fri, 22 Jan 2021 13:33:20 -0800 Subject: [PATCH 2/3] [SYCL] Allow user control over range rounding, and disable for older SYCL spec. Signed-off-by: rdeodhar --- sycl/include/CL/sycl/handler.hpp | 13 +++-- .../parallel_for_disable_range_roundup.cpp | 53 +++++++++++++++++++ ...llel_for_disable_range_roundup_syclstd.cpp | 51 ++++++++++++++++++ 3 files changed, 113 insertions(+), 4 deletions(-) create mode 100755 sycl/test/basic_tests/parallel_for_disable_range_roundup.cpp create mode 100755 sycl/test/basic_tests/parallel_for_disable_range_roundup_syclstd.cpp diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index c952e786bb295..e5b434eaa6441 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -748,9 +748,13 @@ class __SYCL_EXPORT handler { using NameT = typename detail::get_kernel_name_t::name; - // FIXME Remove this ifndef once rounding of execution range works well with - // ESIMD compilation flow. -#ifndef __SYCL_EXPLICIT_SIMD__ + // FIXME Remove the ESIMD check once rounding of execution range works well + // with ESIMD compilation flow. + // Range rounding is supported only for newer SYCL standards. + // Range rounding can also be disabled by the user. +#if !defined(__SYCL_EXPLICIT_SIMD__) && \ + !defined(SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING) && \ + SYCL_LANGUAGE_VERSION >= 202001 // The work group size preferred by this device. // A reasonable choice for rounding up the range is 32. constexpr size_t GoodLocalSizeX = 32; @@ -816,7 +820,8 @@ class __SYCL_EXPORT handler { MCGType = detail::CG::KERNEL; #endif } else -#endif // __SYCL_EXPLICIT_SIMD__ +#endif // !__SYCL_EXPLICIT_SIMD__ && !SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING + // && SYCL_LANGUAGE_VERSION > 202001 { #ifdef __SYCL_DEVICE_ONLY__ (void)NumWorkItems; diff --git a/sycl/test/basic_tests/parallel_for_disable_range_roundup.cpp b/sycl/test/basic_tests/parallel_for_disable_range_roundup.cpp new file mode 100755 index 0000000000000..9bf59e12c96e9 --- /dev/null +++ b/sycl/test/basic_tests/parallel_for_disable_range_roundup.cpp @@ -0,0 +1,53 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DSYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING %s -o %t.out +// RUN: env SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE=1 %RUN_ON_HOST %t.out + +#include + +using namespace sycl; + +range<1> Range1 = {0}; + +void check(const char *msg, size_t v, size_t ref) { + std::cout << msg << v << std::endl; + assert(v == ref); +} + +int try_rounding_off(size_t size) { + range<1> Size{size}; + int Counter = 0; + { + buffer, 1> BufRange(&Range1, 1); + buffer BufCounter(&Counter, 1); + queue myQueue; + + std::cout + << "Run parallel_for with SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING" + << std::endl; + myQueue.submit([&](handler &cgh) { + auto AccRange = BufRange.get_access(cgh); + auto AccCounter = BufCounter.get_access(cgh); + cgh.parallel_for(Size, [=](item<1> ITEM) { + AccCounter[0].fetch_add(1); + AccRange[0] = ITEM.get_range(0); + }); + }); + myQueue.wait(); + } + check("Size seen by user = ", Range1.get(0), size); + check("Counter = ", Counter, size); + return 0; +} + +int main() { + int x; + + x = 10; + try_rounding_off(x); + + return 0; +} + +// CHECK: Run parallel_for with SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING +// CHECK-NOT: parallel_for range adjusted from 10 to 32 +// CHECK: Size seen by user = 10 +// CHECK-NEXT: Counter = 10 diff --git a/sycl/test/basic_tests/parallel_for_disable_range_roundup_syclstd.cpp b/sycl/test/basic_tests/parallel_for_disable_range_roundup_syclstd.cpp new file mode 100755 index 0000000000000..a5c7280af8217 --- /dev/null +++ b/sycl/test/basic_tests/parallel_for_disable_range_roundup_syclstd.cpp @@ -0,0 +1,51 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -sycl-std=2017 %s -o %t.out +// RUN: env SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE=1 %RUN_ON_HOST %t.out + +#include + +using namespace sycl; + +range<1> Range1 = {0}; + +void check(const char *msg, size_t v, size_t ref) { + std::cout << msg << v << std::endl; + assert(v == ref); +} + +int try_rounding_off(size_t size) { + range<1> Size{size}; + int Counter = 0; + { + buffer, 1> BufRange(&Range1, 1); + buffer BufCounter(&Counter, 1); + queue myQueue; + + std::cout << "Run parallel_for with -sycl-std=2017" << std::endl; + myQueue.submit([&](handler &cgh) { + auto AccRange = BufRange.get_access(cgh); + auto AccCounter = BufCounter.get_access(cgh); + cgh.parallel_for(Size, [=](item<1> ITEM) { + AccCounter[0].fetch_add(1); + AccRange[0] = ITEM.get_range(0); + }); + }); + myQueue.wait(); + } + check("Size seen by user = ", Range1.get(0), size); + check("Counter = ", Counter, size); + return 0; +} + +int main() { + int x; + + x = 10; + try_rounding_off(x); + + return 0; +} + +// CHECK: Run parallel_for with -sycl-std=2017 +// CHECK-NOT: parallel_for range adjusted from 10 to 32 +// CHECK: Size seen by user = 10 +// CHECK-NEXT: Counter = 10 From 473c9e78a00bd612e7aa45cdb8a809766219a3d1 Mon Sep 17 00:00:00 2001 From: rdeodhar Date: Mon, 25 Jan 2021 15:20:02 -0800 Subject: [PATCH 3/3] [SYCL] Allow disabling range rounding, and disable for SYCL specs < 2020. --- .../parallel_for_disable_range_roundup.cpp | 53 ------------------- ...llel_for_disable_range_roundup_syclstd.cpp | 51 ------------------ 2 files changed, 104 deletions(-) delete mode 100755 sycl/test/basic_tests/parallel_for_disable_range_roundup.cpp delete mode 100755 sycl/test/basic_tests/parallel_for_disable_range_roundup_syclstd.cpp diff --git a/sycl/test/basic_tests/parallel_for_disable_range_roundup.cpp b/sycl/test/basic_tests/parallel_for_disable_range_roundup.cpp deleted file mode 100755 index 9bf59e12c96e9..0000000000000 --- a/sycl/test/basic_tests/parallel_for_disable_range_roundup.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DSYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING %s -o %t.out -// RUN: env SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE=1 %RUN_ON_HOST %t.out - -#include - -using namespace sycl; - -range<1> Range1 = {0}; - -void check(const char *msg, size_t v, size_t ref) { - std::cout << msg << v << std::endl; - assert(v == ref); -} - -int try_rounding_off(size_t size) { - range<1> Size{size}; - int Counter = 0; - { - buffer, 1> BufRange(&Range1, 1); - buffer BufCounter(&Counter, 1); - queue myQueue; - - std::cout - << "Run parallel_for with SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING" - << std::endl; - myQueue.submit([&](handler &cgh) { - auto AccRange = BufRange.get_access(cgh); - auto AccCounter = BufCounter.get_access(cgh); - cgh.parallel_for(Size, [=](item<1> ITEM) { - AccCounter[0].fetch_add(1); - AccRange[0] = ITEM.get_range(0); - }); - }); - myQueue.wait(); - } - check("Size seen by user = ", Range1.get(0), size); - check("Counter = ", Counter, size); - return 0; -} - -int main() { - int x; - - x = 10; - try_rounding_off(x); - - return 0; -} - -// CHECK: Run parallel_for with SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING -// CHECK-NOT: parallel_for range adjusted from 10 to 32 -// CHECK: Size seen by user = 10 -// CHECK-NEXT: Counter = 10 diff --git a/sycl/test/basic_tests/parallel_for_disable_range_roundup_syclstd.cpp b/sycl/test/basic_tests/parallel_for_disable_range_roundup_syclstd.cpp deleted file mode 100755 index a5c7280af8217..0000000000000 --- a/sycl/test/basic_tests/parallel_for_disable_range_roundup_syclstd.cpp +++ /dev/null @@ -1,51 +0,0 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -sycl-std=2017 %s -o %t.out -// RUN: env SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE=1 %RUN_ON_HOST %t.out - -#include - -using namespace sycl; - -range<1> Range1 = {0}; - -void check(const char *msg, size_t v, size_t ref) { - std::cout << msg << v << std::endl; - assert(v == ref); -} - -int try_rounding_off(size_t size) { - range<1> Size{size}; - int Counter = 0; - { - buffer, 1> BufRange(&Range1, 1); - buffer BufCounter(&Counter, 1); - queue myQueue; - - std::cout << "Run parallel_for with -sycl-std=2017" << std::endl; - myQueue.submit([&](handler &cgh) { - auto AccRange = BufRange.get_access(cgh); - auto AccCounter = BufCounter.get_access(cgh); - cgh.parallel_for(Size, [=](item<1> ITEM) { - AccCounter[0].fetch_add(1); - AccRange[0] = ITEM.get_range(0); - }); - }); - myQueue.wait(); - } - check("Size seen by user = ", Range1.get(0), size); - check("Counter = ", Counter, size); - return 0; -} - -int main() { - int x; - - x = 10; - try_rounding_off(x); - - return 0; -} - -// CHECK: Run parallel_for with -sycl-std=2017 -// CHECK-NOT: parallel_for range adjusted from 10 to 32 -// CHECK: Size seen by user = 10 -// CHECK-NEXT: Counter = 10