Skip to content

Commit a2a6ded

Browse files
committed
Enabled rounding for CPU also.
1 parent 4b9093e commit a2a6ded

File tree

3 files changed

+6
-20
lines changed

3 files changed

+6
-20
lines changed

sycl/include/CL/sycl/handler.hpp

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -765,15 +765,14 @@ class __SYCL_EXPORT handler {
765765
constexpr size_t GoodLocalSizeX = 32;
766766

767767
// Disable the rounding-up optimizations under these conditions:
768-
// 1. The device is not a GPU. Only GPUs benefit from rounding.
769-
// 2. The env var SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING is set.
770-
// 3. The string SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING is in
768+
// 1. The env var SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING is set.
769+
// 2. The string SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING is in
771770
// the kernel name.
772-
// 4. The kernel is provided via an interoperability method.
773-
// 5. The API "this_item" is used inside the kernel.
774-
// 6. The range is already a multiple of the rounding factor.
771+
// 3. The kernel is provided via an interoperability method.
772+
// 4. The API "this_item" is used inside the kernel.
773+
// 5. The range is already a multiple of the rounding factor.
775774
//
776-
// Cases 4 and 5 could be supported with extra effort.
775+
// Cases 3 and 4 could be supported with extra effort.
777776
// As an optimization for the common case it is an
778777
// implementation choice to not support those scenarios.
779778
// Note that "this_item" is a free function, i.e. not tied to any
@@ -787,7 +786,6 @@ class __SYCL_EXPORT handler {
787786
std::string KName = typeid(NameT *).name();
788787
using KI = detail::KernelInfo<KernelName>;
789788
bool DisableRounding =
790-
!is_gpu(MQueue) ||
791789
(getenv("SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING") != nullptr) ||
792790
(KName.find("SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING") !=
793791
std::string::npos) ||
@@ -899,12 +897,6 @@ class __SYCL_EXPORT handler {
899897

900898
#endif
901899

902-
/// Check if the queue being used is for a GPU device
903-
///
904-
/// \param Queue is the queue for this handler.
905-
/// \return Whether the device is a GPU.
906-
bool is_gpu(const shared_ptr_class<sycl::detail::queue_impl> Queue);
907-
908900
public:
909901
handler(const handler &) = delete;
910902
handler(handler &&) = delete;

sycl/source/handler.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -343,10 +343,5 @@ void handler::prefetch(const void *Ptr, size_t Count) {
343343
MLength = Count;
344344
MCGType = detail::CG::PREFETCH_USM;
345345
}
346-
347-
bool handler::is_gpu(const shared_ptr_class<sycl::detail::queue_impl> Queue) {
348-
device Dev = Queue->get_device();
349-
return Dev.is_gpu();
350-
}
351346
} // namespace sycl
352347
} // __SYCL_INLINE_NAMESPACE(cl)

sycl/test/abi/sycl_symbols_linux.dump

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3838,7 +3838,6 @@ _ZN2cl4sycl7handler18extractArgsAndReqsEv
38383838
_ZN2cl4sycl7handler20associateWithHandlerEPNS0_6detail16AccessorBaseHostENS0_6access6targetE
38393839
_ZN2cl4sycl7handler28extractArgsAndReqsFromLambdaEPcmPKNS0_6detail19kernel_param_desc_tE
38403840
_ZN2cl4sycl7handler28extractArgsAndReqsFromLambdaEPcmPKNS0_6detail19kernel_param_desc_tEb
3841-
_ZN2cl4sycl7handler6is_gpuESt10shared_ptrINS0_6detail10queue_implEE
38423841
_ZN2cl4sycl7handler6memcpyEPvPKvm
38433842
_ZN2cl4sycl7handler6memsetEPvim
38443843
_ZN2cl4sycl7handler7barrierERKSt6vectorINS0_5eventESaIS3_EE

0 commit comments

Comments
 (0)