Skip to content

Commit 479a969

Browse files
Merge pull request #1454 from IntelPython/compile-for-cuda
Enable compiling for cuda
2 parents 71b85ab + 986dc6f commit 479a969

26 files changed

+1732
-31
lines changed

CMakeLists.txt

+21
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,30 @@ option(DPCTL_GENERATE_COVERAGE
1717
"Build dpctl with coverage instrumentation"
1818
OFF
1919
)
20+
option(DPCTL_TARGET_CUDA
21+
"Build DPCTL to target CUDA devices"
22+
OFF
23+
)
2024

2125
find_package(IntelSYCL REQUIRED PATHS ${CMAKE_SOURCE_DIR}/cmake NO_DEFAULT_PATH)
2226

27+
set(_dpctl_sycl_targets)
28+
if ("x${DPCTL_SYCL_TARGETS}" STREQUAL "x")
29+
if(DPCTL_TARGET_CUDA)
30+
set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
31+
else()
32+
if(DEFINED ENV{DPCTL_TARGET_CUDA})
33+
set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
34+
endif()
35+
endif()
36+
else()
37+
set(_dpctl_sycl_targets ${DPCTL_SYCL_TARGETS})
38+
endif()
39+
40+
if(_dpctl_sycl_targets)
41+
message(STATUS "Compiling for -fsycl-targets=${_dpctl_sycl_targets}")
42+
endif()
43+
2344
add_subdirectory(libsyclinterface)
2445

2546
file(GLOB _dpctl_capi_headers dpctl/apis/include/*.h*)

dpctl/CMakeLists.txt

+14-1
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,20 @@ function(build_dpctl_ext _trgt _src _dest)
143143
add_custom_target(${_cythonize_trgt} DEPENDS ${_src})
144144
Python_add_library(${_trgt} MODULE WITH_SOABI ${_generated_src})
145145
if (BUILD_DPCTL_EXT_SYCL)
146-
add_sycl_to_target(TARGET ${_trgt} SOURCES ${_generated_src})
146+
add_sycl_to_target(TARGET ${_trgt} SOURCES ${_generated_src})
147+
if(_dpctl_sycl_targets)
148+
# make fat binary
149+
target_compile_options(
150+
${_trgt}
151+
PRIVATE
152+
-fsycl-targets=${_dpctl_sycl_targets}
153+
)
154+
target_link_options(
155+
${_trgt}
156+
PRIVATE
157+
-fsycl-targets=${_dpctl_sycl_targets}
158+
)
159+
endif()
147160
endif()
148161
target_include_directories(${_trgt} PRIVATE ${NumPy_INCLUDE_DIR} ${DPCTL_INCLUDE_DIR})
149162
add_dependencies(${_trgt} _build_time_create_dpctl_include_copy ${_cythonize_trgt})

dpctl/tensor/CMakeLists.txt

+14
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,20 @@ set(_tensor_impl_sources
5555
set(python_module_name _tensor_impl)
5656
pybind11_add_module(${python_module_name} MODULE ${_tensor_impl_sources})
5757
add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_tensor_impl_sources})
58+
if(_dpctl_sycl_targets)
59+
# make fat binary
60+
target_compile_options(
61+
${python_module_name}
62+
PRIVATE
63+
-fsycl-targets=${_dpctl_sycl_targets}
64+
)
65+
target_link_options(
66+
${python_module_name}
67+
PRIVATE
68+
-fsycl-targets=${_dpctl_sycl_targets}
69+
)
70+
endif()
71+
5872
set(_clang_prefix "")
5973
if (WIN32)
6074
set(_clang_prefix "/clang:")

dpctl/tensor/__init__.py

+8
Original file line numberDiff line numberDiff line change
@@ -110,13 +110,16 @@
110110
bitwise_or,
111111
bitwise_right_shift,
112112
bitwise_xor,
113+
cbrt,
113114
ceil,
114115
conj,
116+
copysign,
115117
cos,
116118
cosh,
117119
divide,
118120
equal,
119121
exp,
122+
exp2,
120123
expm1,
121124
floor,
122125
floor_divide,
@@ -149,6 +152,7 @@
149152
real,
150153
remainder,
151154
round,
155+
rsqrt,
152156
sign,
153157
signbit,
154158
sin,
@@ -314,4 +318,8 @@
314318
"argmax",
315319
"argmin",
316320
"prod",
321+
"cbrt",
322+
"exp2",
323+
"copysign",
324+
"rsqrt",
317325
]

dpctl/tensor/_elementwise_funcs.py

+113
Original file line numberDiff line numberDiff line change
@@ -1761,3 +1761,116 @@
17611761
hypot = BinaryElementwiseFunc(
17621762
"hypot", ti._hypot_result_type, ti._hypot, _hypot_docstring_
17631763
)
1764+
1765+
1766+
# U37: ==== CBRT (x)
1767+
_cbrt_docstring_ = """
1768+
cbrt(x, out=None, order='K')
1769+
1770+
Computes positive cube-root for each element `x_i` for input array `x`.
1771+
1772+
Args:
1773+
x (usm_ndarray):
1774+
Input array, expected to have a real floating-point data type.
1775+
out ({None, usm_ndarray}, optional):
1776+
Output array to populate.
1777+
Array have the correct shape and the expected data type.
1778+
order ("C","F","A","K", optional):
1779+
Memory layout of the newly output array, if parameter `out` is `None`.
1780+
Default: "K".
1781+
Returns:
1782+
usm_narray:
1783+
An array containing the element-wise positive cube-root.
1784+
The data type of the returned array is determined by
1785+
the Type Promotion Rules.
1786+
"""
1787+
1788+
cbrt = UnaryElementwiseFunc(
1789+
"cbrt", ti._cbrt_result_type, ti._cbrt, _cbrt_docstring_
1790+
)
1791+
1792+
1793+
# U38: ==== EXP2 (x)
1794+
_exp2_docstring_ = """
1795+
exp2(x, out=None, order='K')
1796+
1797+
Computes the base-2 exponential for each element `x_i` for input array `x`.
1798+
1799+
Args:
1800+
x (usm_ndarray):
1801+
Input array, expected to have a floating-point data type.
1802+
out ({None, usm_ndarray}, optional):
1803+
Output array to populate.
1804+
Array have the correct shape and the expected data type.
1805+
order ("C","F","A","K", optional):
1806+
Memory layout of the newly output array, if parameter `out` is `None`.
1807+
Default: "K".
1808+
Returns:
1809+
usm_narray:
1810+
An array containing the element-wise base-2 exponentials.
1811+
The data type of the returned array is determined by
1812+
the Type Promotion Rules.
1813+
"""
1814+
1815+
exp2 = UnaryElementwiseFunc(
1816+
"exp2", ti._exp2_result_type, ti._exp2, _exp2_docstring_
1817+
)
1818+
1819+
1820+
# B25: ==== COPYSIGN (x1, x2)
1821+
_copysign_docstring_ = """
1822+
copysign(x1, x2, out=None, order='K')
1823+
1824+
Composes a floating-point value with the magnitude of `x1_i` and the sign of
1825+
`x2_i` for each element of input arrays `x1` and `x2`.
1826+
1827+
Args:
1828+
x1 (usm_ndarray):
1829+
First input array, expected to have a real floating-point data type.
1830+
x2 (usm_ndarray):
1831+
Second input array, also expected to have a real floating-point data
1832+
type.
1833+
out ({None, usm_ndarray}, optional):
1834+
Output array to populate.
1835+
Array have the correct shape and the expected data type.
1836+
order ("C","F","A","K", optional):
1837+
Memory layout of the newly output array, if parameter `out` is `None`.
1838+
Default: "K".
1839+
Returns:
1840+
usm_narray:
1841+
An array containing the element-wise results. The data type
1842+
of the returned array is determined by the Type Promotion Rules.
1843+
"""
1844+
copysign = BinaryElementwiseFunc(
1845+
"copysign",
1846+
ti._copysign_result_type,
1847+
ti._copysign,
1848+
_copysign_docstring_,
1849+
)
1850+
1851+
1852+
# U39: ==== RSQRT (x)
1853+
_rsqrt_docstring_ = """
1854+
rsqrt(x, out=None, order='K')
1855+
1856+
Computes the reciprocal square-root for each element `x_i` for input array `x`.
1857+
1858+
Args:
1859+
x (usm_ndarray):
1860+
Input array, expected to have a real floating-point data type.
1861+
out ({None, usm_ndarray}, optional):
1862+
Output array to populate.
1863+
Array have the correct shape and the expected data type.
1864+
order ("C","F","A","K", optional):
1865+
Memory layout of the newly output array, if parameter `out` is `None`.
1866+
Default: "K".
1867+
Returns:
1868+
usm_narray:
1869+
An array containing the element-wise reciprocal square-root.
1870+
The data type of the returned array is determined by
1871+
the Type Promotion Rules.
1872+
"""
1873+
1874+
rsqrt = UnaryElementwiseFunc(
1875+
"rsqrt", ti._rsqrt_result_type, ti._rsqrt, _rsqrt_docstring_
1876+
)

dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,12 @@ template <typename argT, typename resT> struct AcosFunctor
105105
constexpr realT r_eps =
106106
realT(1) / std::numeric_limits<realT>::epsilon();
107107
if (std::abs(x) > r_eps || std::abs(y) > r_eps) {
108-
argT log_in = std::log(in);
108+
using sycl_complexT = exprm_ns::complex<realT>;
109+
sycl_complexT log_in =
110+
exprm_ns::log(exprm_ns::complex<realT>(in));
109111

110-
const realT wx = std::real(log_in);
111-
const realT wy = std::imag(log_in);
112+
const realT wx = log_in.real();
113+
const realT wy = log_in.imag();
112114
const realT rx = std::abs(wy);
113115

114116
realT ry = wx + std::log(realT(2));

dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp

+7-5
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ namespace acosh
4848

4949
namespace py = pybind11;
5050
namespace td_ns = dpctl::tensor::type_dispatch;
51-
namespace cmplx_ns = sycl::ext::oneapi::experimental;
51+
namespace exprm_ns = sycl::ext::oneapi::experimental;
5252

5353
using dpctl::tensor::type_utils::is_complex;
5454

@@ -112,16 +112,18 @@ template <typename argT, typename resT> struct AcoshFunctor
112112
* For large x or y including acos(+-Inf + I*+-Inf)
113113
*/
114114
if (std::abs(x) > r_eps || std::abs(y) > r_eps) {
115-
const realT wx = std::real(std::log(in));
116-
const realT wy = std::imag(std::log(in));
115+
using sycl_complexT = typename exprm_ns::complex<realT>;
116+
const sycl_complexT log_in = exprm_ns::log(sycl_complexT(in));
117+
const realT wx = log_in.real();
118+
const realT wy = log_in.imag();
117119
const realT rx = std::abs(wy);
118120
realT ry = wx + std::log(realT(2));
119121
acos_in = resT{rx, (std::signbit(y)) ? ry : -ry};
120122
}
121123
else {
122124
/* ordinary cases */
123-
acos_in = cmplx_ns::acos(
124-
cmplx_ns::complex<realT>(in)); // std::acos(in);
125+
acos_in = exprm_ns::acos(
126+
exprm_ns::complex<realT>(in)); // std::acos(in);
125127
}
126128

127129
/* Now we calculate acosh(z) */

dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp

+8-7
Original file line numberDiff line numberDiff line change
@@ -119,17 +119,18 @@ template <typename argT, typename resT> struct AsinFunctor
119119
constexpr realT r_eps =
120120
realT(1) / std::numeric_limits<realT>::epsilon();
121121
if (std::abs(x) > r_eps || std::abs(y) > r_eps) {
122-
const resT z = {x, y};
122+
using sycl_complexT = exprm_ns::complex<realT>;
123+
const sycl_complexT z{x, y};
123124
realT wx, wy;
124125
if (!std::signbit(x)) {
125-
auto log_z = std::log(z);
126-
wx = std::real(log_z) + std::log(realT(2));
127-
wy = std::imag(log_z);
126+
auto log_z = exprm_ns::log(z);
127+
wx = log_z.real() + std::log(realT(2));
128+
wy = log_z.imag();
128129
}
129130
else {
130-
auto log_mz = std::log(-z);
131-
wx = std::real(log_mz) + std::log(realT(2));
132-
wy = std::imag(log_mz);
131+
auto log_mz = exprm_ns::log(-z);
132+
wx = log_mz.real() + std::log(realT(2));
133+
wy = log_mz.imag();
133134
}
134135
const realT asinh_re = std::copysign(wx, x);
135136
const realT asinh_im = std::copysign(wy, y);

dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp

+6-3
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,12 @@ template <typename argT, typename resT> struct AsinhFunctor
108108
realT(1) / std::numeric_limits<realT>::epsilon();
109109

110110
if (std::abs(x) > r_eps || std::abs(y) > r_eps) {
111-
resT log_in = (std::signbit(x)) ? std::log(-in) : std::log(in);
112-
realT wx = std::real(log_in) + std::log(realT(2));
113-
realT wy = std::imag(log_in);
111+
using sycl_complexT = exprm_ns::complex<realT>;
112+
sycl_complexT log_in = (std::signbit(x))
113+
? exprm_ns::log(sycl_complexT(-in))
114+
: exprm_ns::log(sycl_complexT(in));
115+
realT wx = log_in.real() + std::log(realT(2));
116+
realT wy = log_in.imag();
114117
const realT res_re = std::copysign(wx, x);
115118
const realT res_im = std::copysign(wy, y);
116119
return resT{res_re, res_im};

0 commit comments

Comments
 (0)