Skip to content

Commit 6d3be5d

Browse files
Use sycl::ext::oneapi::experimental for complex tyes
This works around use of double precision functions/literals in implementations of these functions in MSVC headers, causing failures to offload on Iris Xe for single precision input citing lack of fp64 support by the hardware. Changes include CL/sycl.hpp to sycl/sycl.hpp per SYCL-2020 spec For every CMake target, where add_sycl_to_target is used, we also run target_compile_options( ${target_name} PRIVATE -fysl-targets=spir64-unknown-unknown,nvptx64-nvidia-cuda ) Add DPCTL_TARGET_CUDA Boolean cmake option Also DPCTL_SYCL_TARGETS parameter can be used to specify targets to build for. DPCTL_TARGET_CUDA could be set via cmake option, or via environment variable, e.g. ``` $ DPCTL_TARGET_CUDA=1 python scripts/build_locally.py --verbose ``` This calls `target_compile_options` to set sycl-targets for targets needing SYCL
1 parent f772888 commit 6d3be5d

File tree

160 files changed

+6871
-218
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

160 files changed

+6871
-218
lines changed

Diff for: CMakeLists.txt

+21
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,30 @@ option(DPCTL_GENERATE_COVERAGE
1717
"Build dpctl with coverage instrumentation"
1818
OFF
1919
)
20+
option(DPCTL_TARGET_CUDA
21+
"Build DPCTL to target CUDA devices"
22+
OFF
23+
)
2024

2125
find_package(IntelSYCL REQUIRED PATHS ${CMAKE_SOURCE_DIR}/cmake NO_DEFAULT_PATH)
2226

27+
set(_dpctl_sycl_targets)
28+
if ("x${DPCTL_SYCL_TARGETS}" STREQUAL "x")
29+
if(DPCTL_TARGET_CUDA)
30+
set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
31+
else()
32+
if(DEFINED ENV{DPCTL_TARGET_CUDA})
33+
set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
34+
endif()
35+
endif()
36+
else()
37+
set(_dpctl_sycl_targets ${DPCTL_SYCL_TARGETS})
38+
endif()
39+
40+
if(_dpctl_sycl_targets)
41+
message(STATUS "Compiling for -fsycl-targets=${_dpctl_sycl_targets}")
42+
endif()
43+
2344
add_subdirectory(libsyclinterface)
2445

2546
file(GLOB _dpctl_capi_headers dpctl/apis/include/*.h*)

Diff for: dpctl/CMakeLists.txt

+14-1
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,20 @@ function(build_dpctl_ext _trgt _src _dest)
143143
add_custom_target(${_cythonize_trgt} DEPENDS ${_src})
144144
Python_add_library(${_trgt} MODULE WITH_SOABI ${_generated_src})
145145
if (BUILD_DPCTL_EXT_SYCL)
146-
add_sycl_to_target(TARGET ${_trgt} SOURCES ${_generated_src})
146+
add_sycl_to_target(TARGET ${_trgt} SOURCES ${_generated_src})
147+
if(_dpctl_sycl_targets)
148+
# make fat binary
149+
target_compile_options(
150+
${_trgt}
151+
PRIVATE
152+
-fsycl-targets=${_dpctl_sycl_targets}
153+
)
154+
target_link_options(
155+
${_trgt}
156+
PRIVATE
157+
-fsycl-targets=${_dpctl_sycl_targets}
158+
)
159+
endif()
147160
endif()
148161
target_include_directories(${_trgt} PRIVATE ${NumPy_INCLUDE_DIR} ${DPCTL_INCLUDE_DIR})
149162
add_dependencies(${_trgt} _build_time_create_dpctl_include_copy ${_cythonize_trgt})

Diff for: dpctl/_host_task_util.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
#include "Python.h"
3434
#include "syclinterface/dpctl_data_types.h"
3535
#include "syclinterface/dpctl_sycl_type_casters.hpp"
36-
#include <CL/sycl.hpp>
36+
#include <sycl/sycl.hpp>
3737

3838
DPCTLSyclEventRef async_dec_ref(DPCTLSyclQueueRef QRef,
3939
PyObject **obj_array,

Diff for: dpctl/apis/include/dpctl4pybind11.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@
2626
#pragma once
2727

2828
#include "dpctl_capi.h"
29-
#include <CL/sycl.hpp>
3029
#include <complex>
3130
#include <memory>
3231
#include <pybind11/pybind11.h>
32+
#include <sycl/sycl.hpp>
3333
#include <utility>
3434
#include <vector>
3535

Diff for: dpctl/sycl.pxd

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from . cimport _backend as dpctl_backend
2121

2222

23-
cdef extern from "CL/sycl.hpp" namespace "sycl":
23+
cdef extern from "sycl/sycl.hpp" namespace "sycl":
2424
cdef cppclass queue "sycl::queue":
2525
pass
2626

Diff for: dpctl/tensor/CMakeLists.txt

+25-4
Original file line numberDiff line numberDiff line change
@@ -188,12 +188,20 @@ foreach(_src_fn ${_no_fast_math_sources})
188188
)
189189
endforeach()
190190
if (UNIX)
191-
set_source_files_properties(
192-
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/abs.cpp
193-
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sqrt.cpp
194-
PROPERTIES COMPILE_DEFINITIONS "USE_STD_ABS_FOR_COMPLEX_TYPES;USE_STD_SQRT_FOR_COMPLEX_TYPES")
191+
set(_compiler_definitions "USE_STD_ABS_FOR_COMPLEX_TYPES;USE_STD_SQRT_FOR_COMPLEX_TYPES;SYCL_EXT_ONEAPI_COMPLEX")
192+
else()
193+
set(_compiler_definitions "SYCL_EXT_ONEAPI_COMPLEX")
195194
endif()
196195

196+
foreach(_src_fn ${_elementwise_sources})
197+
get_source_file_property(_cmpl_options_defs ${_src_fn} COMPILE_DEFINITIONS)
198+
set(_combined_options_defs ${_cmpl_options_defs} "${_compiler_definitions}")
199+
set_source_files_properties(
200+
${_src_fn}
201+
PROPERTIES COMPILE_DEFINITIONS "${_combined_options_defs}"
202+
)
203+
endforeach()
204+
197205
set(_linker_options "LINKER:${DPCTL_LDFLAGS}")
198206
foreach(python_module_name ${_py_trgts})
199207
target_compile_options(${python_module_name} PRIVATE -fno-sycl-id-queries-fit-in-int)
@@ -209,6 +217,19 @@ foreach(python_module_name ${_py_trgts})
209217
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/
210218
)
211219
target_link_options(${python_module_name} PRIVATE ${_linker_options})
220+
if(_dpctl_sycl_targets)
221+
# make fat binary
222+
target_compile_options(
223+
${python_module_name}
224+
PRIVATE
225+
-fsycl-targets=${_dpctl_sycl_targets}
226+
)
227+
target_link_options(
228+
${python_module_name}
229+
PRIVATE
230+
-fsycl-targets=${_dpctl_sycl_targets}
231+
)
232+
endif()
212233
add_dependencies(${python_module_name} _dpctl4pybind11_deps)
213234
install(TARGETS ${python_module_name} DESTINATION "dpctl/tensor")
214235
endforeach()

Diff for: dpctl/tensor/libtensor/include/kernels/accumulators.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@
2323
//===---------------------------------------------------------------------===//
2424

2525
#pragma once
26-
#include <CL/sycl.hpp>
2726
#include <array>
2827
#include <cstdint>
2928
#include <limits>
3029
#include <pybind11/pybind11.h>
30+
#include <sycl/sycl.hpp>
3131
#include <utility>
3232
#include <vector>
3333

Diff for: dpctl/tensor/libtensor/include/kernels/boolean_advanced_indexing.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@
2323
//===---------------------------------------------------------------------===//
2424

2525
#pragma once
26-
#include <CL/sycl.hpp>
2726
#include <cstdint>
2827
#include <limits>
2928
#include <pybind11/pybind11.h>
29+
#include <sycl/sycl.hpp>
3030
#include <utility>
3131
#include <vector>
3232

Diff for: dpctl/tensor/libtensor/include/kernels/boolean_reductions.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
//===----------------------------------------------------------------------===//
2525

2626
#pragma once
27-
#include <CL/sycl.hpp>
27+
#include <sycl/sycl.hpp>
2828

2929
#include <complex>
3030
#include <cstdint>

Diff for: dpctl/tensor/libtensor/include/kernels/constructors.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@
2727
#include "utils/offset_utils.hpp"
2828
#include "utils/strided_iters.hpp"
2929
#include "utils/type_utils.hpp"
30-
#include <CL/sycl.hpp>
3130
#include <complex>
3231
#include <pybind11/pybind11.h>
32+
#include <sycl/sycl.hpp>
3333

3434
namespace dpctl
3535
{

Diff for: dpctl/tensor/libtensor/include/kernels/copy_and_cast.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@
2323
//===----------------------------------------------------------------------===//
2424

2525
#pragma once
26-
#include <CL/sycl.hpp>
2726
#include <complex>
2827
#include <cstdint>
2928
#include <pybind11/pybind11.h>
29+
#include <sycl/sycl.hpp>
3030
#include <type_traits>
3131

3232
#include "utils/offset_utils.hpp"

Diff for: dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,13 @@
2323
//===---------------------------------------------------------------------===//
2424

2525
#pragma once
26-
#include <CL/sycl.hpp>
2726
#include <cmath>
2827
#include <complex>
2928
#include <cstddef>
3029
#include <cstdint>
3130
#include <limits>
31+
#include <sycl/ext/oneapi/experimental/sycl_complex.hpp>
32+
#include <sycl/sycl.hpp>
3233
#include <type_traits>
3334

3435
#include "kernels/elementwise_functions/common.hpp"
@@ -49,6 +50,7 @@ namespace abs
4950

5051
namespace py = pybind11;
5152
namespace td_ns = dpctl::tensor::type_dispatch;
53+
namespace exprm_ns = sycl::ext::oneapi::experimental;
5254

5355
using dpctl::tensor::type_utils::is_complex;
5456

@@ -120,7 +122,7 @@ template <typename argT, typename resT> struct AbsFunctor
120122
}
121123
else {
122124
#ifdef USE_STD_ABS_FOR_COMPLEX_TYPES
123-
return std::abs(z);
125+
return exprm_ns::abs(exprm_ns::complex<realT>(z));
124126
#else
125127
return std::hypot(std::real(z), std::imag(z));
126128
#endif

Diff for: dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp

+10-5
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,11 @@
2323
//===---------------------------------------------------------------------===//
2424

2525
#pragma once
26-
#include <CL/sycl.hpp>
2726
#include <cmath>
2827
#include <cstddef>
2928
#include <cstdint>
29+
#include <sycl/ext/oneapi/experimental/sycl_complex.hpp>
30+
#include <sycl/sycl.hpp>
3031
#include <type_traits>
3132

3233
#include "kernels/elementwise_functions/common.hpp"
@@ -47,6 +48,7 @@ namespace acos
4748

4849
namespace py = pybind11;
4950
namespace td_ns = dpctl::tensor::type_dispatch;
51+
namespace exprm_ns = sycl::ext::oneapi::experimental;
5052

5153
using dpctl::tensor::type_utils::is_complex;
5254

@@ -103,18 +105,21 @@ template <typename argT, typename resT> struct AcosFunctor
103105
constexpr realT r_eps =
104106
realT(1) / std::numeric_limits<realT>::epsilon();
105107
if (std::abs(x) > r_eps || std::abs(y) > r_eps) {
106-
argT log_in = std::log(in);
108+
using sycl_complexT = exprm_ns::complex<realT>;
109+
sycl_complexT log_in =
110+
exprm_ns::log(exprm_ns::complex<realT>(in));
107111

108-
const realT wx = std::real(log_in);
109-
const realT wy = std::imag(log_in);
112+
const realT wx = log_in.real();
113+
const realT wy = log_in.imag();
110114
const realT rx = std::abs(wy);
111115

112116
realT ry = wx + std::log(realT(2));
113117
return resT{rx, (std::signbit(y)) ? ry : -ry};
114118
}
115119

116120
/* ordinary cases */
117-
return std::acos(in);
121+
return exprm_ns::acos(
122+
exprm_ns::complex<realT>(in)); // std::acos(in);
118123
}
119124
else {
120125
static_assert(std::is_floating_point_v<argT> ||

Diff for: dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp

+9-4
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,11 @@
2323
//===---------------------------------------------------------------------===//
2424

2525
#pragma once
26-
#include <CL/sycl.hpp>
2726
#include <cmath>
2827
#include <cstddef>
2928
#include <cstdint>
29+
#include <sycl/ext/oneapi/experimental/sycl_complex.hpp>
30+
#include <sycl/sycl.hpp>
3031
#include <type_traits>
3132

3233
#include "kernels/elementwise_functions/common.hpp"
@@ -47,6 +48,7 @@ namespace acosh
4748

4849
namespace py = pybind11;
4950
namespace td_ns = dpctl::tensor::type_dispatch;
51+
namespace exprm_ns = sycl::ext::oneapi::experimental;
5052

5153
using dpctl::tensor::type_utils::is_complex;
5254

@@ -110,15 +112,18 @@ template <typename argT, typename resT> struct AcoshFunctor
110112
* For large x or y including acos(+-Inf + I*+-Inf)
111113
*/
112114
if (std::abs(x) > r_eps || std::abs(y) > r_eps) {
113-
const realT wx = std::real(std::log(in));
114-
const realT wy = std::imag(std::log(in));
115+
using sycl_complexT = typename exprm_ns::complex<realT>;
116+
const sycl_complexT log_in = exprm_ns::log(sycl_complexT(in));
117+
const realT wx = log_in.real();
118+
const realT wy = log_in.imag();
115119
const realT rx = std::abs(wy);
116120
realT ry = wx + std::log(realT(2));
117121
acos_in = resT{rx, (std::signbit(y)) ? ry : -ry};
118122
}
119123
else {
120124
/* ordinary cases */
121-
acos_in = std::acos(in);
125+
acos_in = exprm_ns::acos(
126+
exprm_ns::complex<realT>(in)); // std::acos(in);
122127
}
123128

124129
/* Now we calculate acosh(z) */

Diff for: dpctl/tensor/libtensor/include/kernels/elementwise_functions/add.hpp

+28-2
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,10 @@
2424
//===---------------------------------------------------------------------===//
2525

2626
#pragma once
27-
#include <CL/sycl.hpp>
2827
#include <cstddef>
2928
#include <cstdint>
29+
#include <sycl/ext/oneapi/experimental/sycl_complex.hpp>
30+
#include <sycl/sycl.hpp>
3031
#include <type_traits>
3132

3233
#include "utils/offset_utils.hpp"
@@ -49,6 +50,7 @@ namespace add
4950
namespace py = pybind11;
5051
namespace td_ns = dpctl::tensor::type_dispatch;
5152
namespace tu_ns = dpctl::tensor::type_utils;
53+
namespace exprm_ns = sycl::ext::oneapi::experimental;
5254

5355
template <typename argT1, typename argT2, typename resT> struct AddFunctor
5456
{
@@ -60,7 +62,31 @@ template <typename argT1, typename argT2, typename resT> struct AddFunctor
6062

6163
resT operator()(const argT1 &in1, const argT2 &in2) const
6264
{
63-
return in1 + in2;
65+
if constexpr (tu_ns::is_complex<argT1>::value &&
66+
tu_ns::is_complex<argT2>::value)
67+
{
68+
using rT1 = typename argT1::value_type;
69+
using rT2 = typename argT2::value_type;
70+
71+
return exprm_ns::complex<rT1>(in1) + exprm_ns::complex<rT2>(in2);
72+
}
73+
else if constexpr (tu_ns::is_complex<argT1>::value &&
74+
!tu_ns::is_complex<argT2>::value)
75+
{
76+
using rT1 = typename argT1::value_type;
77+
78+
return exprm_ns::complex<rT1>(in1) + in2;
79+
}
80+
else if constexpr (!tu_ns::is_complex<argT1>::value &&
81+
tu_ns::is_complex<argT2>::value)
82+
{
83+
using rT2 = typename argT2::value_type;
84+
85+
return in1 + exprm_ns::complex<rT2>(in2);
86+
}
87+
else {
88+
return in1 + in2;
89+
}
6490
}
6591

6692
template <int vec_sz>

0 commit comments

Comments
 (0)