Skip to content

Commit 5ec9fd5

Browse files
Merge pull request #1411 from IntelPython/use-sycl-ext-oneapi-experimental-for-complex
Use sycl ext oneapi experimental for complex
2 parents f772888 + 0efe28b commit 5ec9fd5

File tree

161 files changed

+7016
-200
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

161 files changed

+7016
-200
lines changed

Diff for: CMakeLists.txt

+21
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,30 @@ option(DPCTL_GENERATE_COVERAGE
1717
"Build dpctl with coverage instrumentation"
1818
OFF
1919
)
20+
option(DPCTL_TARGET_CUDA
21+
"Build DPCTL to target CUDA devices"
22+
OFF
23+
)
2024

2125
find_package(IntelSYCL REQUIRED PATHS ${CMAKE_SOURCE_DIR}/cmake NO_DEFAULT_PATH)
2226

27+
set(_dpctl_sycl_targets)
28+
if ("x${DPCTL_SYCL_TARGETS}" STREQUAL "x")
29+
if(DPCTL_TARGET_CUDA)
30+
set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
31+
else()
32+
if(DEFINED ENV{DPCTL_TARGET_CUDA})
33+
set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
34+
endif()
35+
endif()
36+
else()
37+
set(_dpctl_sycl_targets ${DPCTL_SYCL_TARGETS})
38+
endif()
39+
40+
if(_dpctl_sycl_targets)
41+
message(STATUS "Compiling for -fsycl-targets=${_dpctl_sycl_targets}")
42+
endif()
43+
2344
add_subdirectory(libsyclinterface)
2445

2546
file(GLOB _dpctl_capi_headers dpctl/apis/include/*.h*)

Diff for: dpctl/CMakeLists.txt

+14-1
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,20 @@ function(build_dpctl_ext _trgt _src _dest)
143143
add_custom_target(${_cythonize_trgt} DEPENDS ${_src})
144144
Python_add_library(${_trgt} MODULE WITH_SOABI ${_generated_src})
145145
if (BUILD_DPCTL_EXT_SYCL)
146-
add_sycl_to_target(TARGET ${_trgt} SOURCES ${_generated_src})
146+
add_sycl_to_target(TARGET ${_trgt} SOURCES ${_generated_src})
147+
if(_dpctl_sycl_targets)
148+
# make fat binary
149+
target_compile_options(
150+
${_trgt}
151+
PRIVATE
152+
-fsycl-targets=${_dpctl_sycl_targets}
153+
)
154+
target_link_options(
155+
${_trgt}
156+
PRIVATE
157+
-fsycl-targets=${_dpctl_sycl_targets}
158+
)
159+
endif()
147160
endif()
148161
target_include_directories(${_trgt} PRIVATE ${NumPy_INCLUDE_DIR} ${DPCTL_INCLUDE_DIR})
149162
add_dependencies(${_trgt} _build_time_create_dpctl_include_copy ${_cythonize_trgt})

Diff for: dpctl/_host_task_util.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
#include "Python.h"
3434
#include "syclinterface/dpctl_data_types.h"
3535
#include "syclinterface/dpctl_sycl_type_casters.hpp"
36-
#include <CL/sycl.hpp>
36+
#include <sycl/sycl.hpp>
3737

3838
DPCTLSyclEventRef async_dec_ref(DPCTLSyclQueueRef QRef,
3939
PyObject **obj_array,

Diff for: dpctl/apis/include/dpctl4pybind11.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@
2626
#pragma once
2727

2828
#include "dpctl_capi.h"
29-
#include <CL/sycl.hpp>
3029
#include <complex>
3130
#include <memory>
3231
#include <pybind11/pybind11.h>
32+
#include <sycl/sycl.hpp>
3333
#include <utility>
3434
#include <vector>
3535

Diff for: dpctl/sycl.pxd

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from . cimport _backend as dpctl_backend
2121

2222

23-
cdef extern from "CL/sycl.hpp" namespace "sycl":
23+
cdef extern from "sycl/sycl.hpp" namespace "sycl":
2424
cdef cppclass queue "sycl::queue":
2525
pass
2626

Diff for: dpctl/tensor/CMakeLists.txt

+23-5
Original file line numberDiff line numberDiff line change
@@ -187,12 +187,17 @@ foreach(_src_fn ${_no_fast_math_sources})
187187
PROPERTIES COMPILE_OPTIONS "${_combined_options_prop}"
188188
)
189189
endforeach()
190-
if (UNIX)
190+
191+
set(_compiler_definitions "USE_SYCL_FOR_COMPLEX_TYPES")
192+
193+
foreach(_src_fn ${_elementwise_sources})
194+
get_source_file_property(_cmpl_options_defs ${_src_fn} COMPILE_DEFINITIONS)
195+
set(_combined_options_defs ${_cmpl_options_defs} "${_compiler_definitions}")
191196
set_source_files_properties(
192-
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/abs.cpp
193-
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sqrt.cpp
194-
PROPERTIES COMPILE_DEFINITIONS "USE_STD_ABS_FOR_COMPLEX_TYPES;USE_STD_SQRT_FOR_COMPLEX_TYPES")
195-
endif()
197+
${_src_fn}
198+
PROPERTIES COMPILE_DEFINITIONS "${_combined_options_defs}"
199+
)
200+
endforeach()
196201

197202
set(_linker_options "LINKER:${DPCTL_LDFLAGS}")
198203
foreach(python_module_name ${_py_trgts})
@@ -209,6 +214,19 @@ foreach(python_module_name ${_py_trgts})
209214
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/
210215
)
211216
target_link_options(${python_module_name} PRIVATE ${_linker_options})
217+
if(_dpctl_sycl_targets)
218+
# make fat binary
219+
target_compile_options(
220+
${python_module_name}
221+
PRIVATE
222+
-fsycl-targets=${_dpctl_sycl_targets}
223+
)
224+
target_link_options(
225+
${python_module_name}
226+
PRIVATE
227+
-fsycl-targets=${_dpctl_sycl_targets}
228+
)
229+
endif()
212230
add_dependencies(${python_module_name} _dpctl4pybind11_deps)
213231
install(TARGETS ${python_module_name} DESTINATION "dpctl/tensor")
214232
endforeach()

Diff for: dpctl/tensor/libtensor/include/kernels/accumulators.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@
2323
//===---------------------------------------------------------------------===//
2424

2525
#pragma once
26-
#include <CL/sycl.hpp>
2726
#include <array>
2827
#include <cstdint>
2928
#include <limits>
3029
#include <pybind11/pybind11.h>
30+
#include <sycl/sycl.hpp>
3131
#include <utility>
3232
#include <vector>
3333

Diff for: dpctl/tensor/libtensor/include/kernels/boolean_advanced_indexing.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@
2323
//===---------------------------------------------------------------------===//
2424

2525
#pragma once
26-
#include <CL/sycl.hpp>
2726
#include <cstdint>
2827
#include <limits>
2928
#include <pybind11/pybind11.h>
29+
#include <sycl/sycl.hpp>
3030
#include <utility>
3131
#include <vector>
3232

Diff for: dpctl/tensor/libtensor/include/kernels/boolean_reductions.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
//===----------------------------------------------------------------------===//
2525

2626
#pragma once
27-
#include <CL/sycl.hpp>
27+
#include <sycl/sycl.hpp>
2828

2929
#include <complex>
3030
#include <cstdint>

Diff for: dpctl/tensor/libtensor/include/kernels/constructors.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@
2727
#include "utils/offset_utils.hpp"
2828
#include "utils/strided_iters.hpp"
2929
#include "utils/type_utils.hpp"
30-
#include <CL/sycl.hpp>
3130
#include <complex>
3231
#include <pybind11/pybind11.h>
32+
#include <sycl/sycl.hpp>
3333

3434
namespace dpctl
3535
{

Diff for: dpctl/tensor/libtensor/include/kernels/copy_and_cast.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@
2323
//===----------------------------------------------------------------------===//
2424

2525
#pragma once
26-
#include <CL/sycl.hpp>
2726
#include <complex>
2827
#include <cstdint>
2928
#include <pybind11/pybind11.h>
29+
#include <sycl/sycl.hpp>
3030
#include <type_traits>
3131

3232
#include "utils/offset_utils.hpp"

Diff for: dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp

+4-3
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,16 @@
2323
//===---------------------------------------------------------------------===//
2424

2525
#pragma once
26-
#include <CL/sycl.hpp>
2726
#include <cmath>
2827
#include <complex>
2928
#include <cstddef>
3029
#include <cstdint>
3130
#include <limits>
31+
#include <sycl/sycl.hpp>
3232
#include <type_traits>
3333

3434
#include "kernels/elementwise_functions/common.hpp"
35+
#include "sycl_complex.hpp"
3536

3637
#include "utils/offset_utils.hpp"
3738
#include "utils/type_dispatch.hpp"
@@ -119,8 +120,8 @@ template <typename argT, typename resT> struct AbsFunctor
119120
return q_nan;
120121
}
121122
else {
122-
#ifdef USE_STD_ABS_FOR_COMPLEX_TYPES
123-
return std::abs(z);
123+
#ifdef USE_SYCL_FOR_COMPLEX_TYPES
124+
return exprm_ns::abs(exprm_ns::complex<realT>(z));
124125
#else
125126
return std::hypot(std::real(z), std::imag(z));
126127
#endif

Diff for: dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp

+20-2
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,14 @@
2323
//===---------------------------------------------------------------------===//
2424

2525
#pragma once
26-
#include <CL/sycl.hpp>
2726
#include <cmath>
2827
#include <cstddef>
2928
#include <cstdint>
29+
#include <sycl/sycl.hpp>
3030
#include <type_traits>
3131

3232
#include "kernels/elementwise_functions/common.hpp"
33+
#include "sycl_complex.hpp"
3334

3435
#include "utils/offset_utils.hpp"
3536
#include "utils/type_dispatch.hpp"
@@ -103,18 +104,35 @@ template <typename argT, typename resT> struct AcosFunctor
103104
constexpr realT r_eps =
104105
realT(1) / std::numeric_limits<realT>::epsilon();
105106
if (std::abs(x) > r_eps || std::abs(y) > r_eps) {
106-
argT log_in = std::log(in);
107+
#ifdef USE_SYCL_FOR_COMPLEX_TYPES
108+
using sycl_complexT = exprm_ns::complex<realT>;
109+
sycl_complexT log_in =
110+
exprm_ns::log(exprm_ns::complex<realT>(in));
107111

112+
const realT wx = log_in.real();
113+
const realT wy = log_in.imag();
114+
const realT rx = std::abs(wy);
115+
116+
realT ry = wx + std::log(realT(2));
117+
return resT{rx, (std::signbit(y)) ? ry : -ry};
118+
#else
119+
resT log_in = std::log(in);
108120
const realT wx = std::real(log_in);
109121
const realT wy = std::imag(log_in);
110122
const realT rx = std::abs(wy);
111123

112124
realT ry = wx + std::log(realT(2));
113125
return resT{rx, (std::signbit(y)) ? ry : -ry};
126+
#endif
114127
}
115128

116129
/* ordinary cases */
130+
#if USE_SYCL_FOR_COMPLEX_TYPES
131+
return exprm_ns::acos(
132+
exprm_ns::complex<realT>(in)); // std::acos(in);
133+
#else
117134
return std::acos(in);
135+
#endif
118136
}
119137
else {
120138
static_assert(std::is_floating_point_v<argT> ||

Diff for: dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp

+17-3
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,14 @@
2323
//===---------------------------------------------------------------------===//
2424

2525
#pragma once
26-
#include <CL/sycl.hpp>
2726
#include <cmath>
2827
#include <cstddef>
2928
#include <cstdint>
29+
#include <sycl/sycl.hpp>
3030
#include <type_traits>
3131

3232
#include "kernels/elementwise_functions/common.hpp"
33+
#include "sycl_complex.hpp"
3334

3435
#include "utils/offset_utils.hpp"
3536
#include "utils/type_dispatch.hpp"
@@ -110,15 +111,28 @@ template <typename argT, typename resT> struct AcoshFunctor
110111
* For large x or y including acos(+-Inf + I*+-Inf)
111112
*/
112113
if (std::abs(x) > r_eps || std::abs(y) > r_eps) {
113-
const realT wx = std::real(std::log(in));
114-
const realT wy = std::imag(std::log(in));
114+
#ifdef USE_SYCL_FOR_COMPLEX_TYPES
115+
using sycl_complexT = typename exprm_ns::complex<realT>;
116+
const sycl_complexT log_in = exprm_ns::log(sycl_complexT(in));
117+
const realT wx = log_in.real();
118+
const realT wy = log_in.imag();
119+
#else
120+
const resT log_in = std::log(in);
121+
const realT wx = std::real(log_in);
122+
const realT wy = std::imag(log_in);
123+
#endif
115124
const realT rx = std::abs(wy);
116125
realT ry = wx + std::log(realT(2));
117126
acos_in = resT{rx, (std::signbit(y)) ? ry : -ry};
118127
}
119128
else {
120129
/* ordinary cases */
130+
#if USE_SYCL_FOR_COMPLEX_TYPES
131+
acos_in = exprm_ns::acos(
132+
exprm_ns::complex<realT>(in)); // std::acos(in);
133+
#else
121134
acos_in = std::acos(in);
135+
#endif
122136
}
123137

124138
/* Now we calculate acosh(z) */

Diff for: dpctl/tensor/libtensor/include/kernels/elementwise_functions/add.hpp

+39-2
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,12 @@
2424
//===---------------------------------------------------------------------===//
2525

2626
#pragma once
27-
#include <CL/sycl.hpp>
2827
#include <cstddef>
2928
#include <cstdint>
29+
#include <sycl/sycl.hpp>
3030
#include <type_traits>
3131

32+
#include "sycl_complex.hpp"
3233
#include "utils/offset_utils.hpp"
3334
#include "utils/type_dispatch.hpp"
3435
#include "utils/type_utils.hpp"
@@ -60,7 +61,43 @@ template <typename argT1, typename argT2, typename resT> struct AddFunctor
6061

6162
resT operator()(const argT1 &in1, const argT2 &in2) const
6263
{
63-
return in1 + in2;
64+
if constexpr (tu_ns::is_complex<argT1>::value &&
65+
tu_ns::is_complex<argT2>::value)
66+
{
67+
#ifdef USE_SYCL_FOR_COMPLEX_TYPES
68+
using rT1 = typename argT1::value_type;
69+
using rT2 = typename argT2::value_type;
70+
71+
return exprm_ns::complex<rT1>(in1) + exprm_ns::complex<rT2>(in2);
72+
#else
73+
return in1 + in2;
74+
#endif
75+
}
76+
else if constexpr (tu_ns::is_complex<argT1>::value &&
77+
!tu_ns::is_complex<argT2>::value)
78+
{
79+
#ifdef USE_SYCL_FOR_COMPLEX_TYPES
80+
using rT1 = typename argT1::value_type;
81+
82+
return exprm_ns::complex<rT1>(in1) + in2;
83+
#else
84+
return in1 + in2;
85+
#endif
86+
}
87+
else if constexpr (!tu_ns::is_complex<argT1>::value &&
88+
tu_ns::is_complex<argT2>::value)
89+
{
90+
#ifdef USE_SYCL_FOR_COMPLEX_TYPES
91+
using rT2 = typename argT2::value_type;
92+
93+
return in1 + exprm_ns::complex<rT2>(in2);
94+
#else
95+
return in1 + in2;
96+
#endif
97+
}
98+
else {
99+
return in1 + in2;
100+
}
64101
}
65102

66103
template <int vec_sz>

0 commit comments

Comments
 (0)