From 859dc82884458357beee12babfdc892d3901b444 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 5 Mar 2024 19:18:53 -0600 Subject: [PATCH 1/3] Use const references for method arguments in indexers This should avoid unnecessary copying, even though compiler might be optimizing them in the end anyway. --- .../libtensor/include/utils/strided_iters.hpp | 74 ++++++++++--------- 1 file changed, 38 insertions(+), 36 deletions(-) diff --git a/dpctl/tensor/libtensor/include/utils/strided_iters.hpp b/dpctl/tensor/libtensor/include/utils/strided_iters.hpp index 4cfa814130..e12bbebbd5 100644 --- a/dpctl/tensor/libtensor/include/utils/strided_iters.hpp +++ b/dpctl/tensor/libtensor/include/utils/strided_iters.hpp @@ -73,7 +73,7 @@ template class CIndexer_vector public: CIndexer_vector(int dim) : nd(dim) {} - template indT size(ShapeTy shape) const + template indT size(const ShapeTy &shape) const { indT s = static_cast(1); for (int i = 0; i < nd; ++i) { @@ -83,8 +83,10 @@ template class CIndexer_vector } template - void - get_displacement(indT i, ShapeTy shape, StridesTy stride, indT &disp) const + void get_displacement(const indT i, + const ShapeTy &shape, + const StridesTy &stride, + indT &disp) const { if (nd == 1) { disp = i * stride[0]; @@ -104,10 +106,10 @@ template class CIndexer_vector } template - void get_displacement(indT i, - ShapeTy shape, - StridesTy stride1, - StridesTy stride2, + void get_displacement(const indT i, + const ShapeTy &shape, + const StridesTy &stride1, + const StridesTy &stride2, indT &disp1, indT &disp2) const { @@ -133,11 +135,11 @@ template class CIndexer_vector } template - void get_displacement(indT i, - ShapeTy shape, - StridesTy stride1, - StridesTy stride2, - StridesTy stride3, + void get_displacement(const indT i, + const ShapeTy &shape, + const StridesTy &stride1, + const StridesTy &stride2, + const StridesTy &stride3, indT &disp1, indT &disp2, indT &disp3) const @@ -167,12 +169,12 @@ template class CIndexer_vector } template - void get_displacement(indT i, - ShapeTy shape, - StridesTy stride1, - StridesTy stride2, - StridesTy stride3, - StridesTy stride4, + void get_displacement(const indT i, + const ShapeTy &shape, + const StridesTy &stride1, + const StridesTy &stride2, + const StridesTy &stride3, + const StridesTy &stride4, indT &disp1, indT &disp2, indT &disp3, @@ -206,9 +208,9 @@ template class CIndexer_vector } template - void get_displacement(indT i, - ShapeTy shape, - const std::array strides, + void get_displacement(const indT i, + const ShapeTy &shape, + const std::array &strides, std::array &disps) const { if (nd == 1) { @@ -240,14 +242,14 @@ template class CIndexer_vector } template - void get_left_rolled_displacement(indT i, - ShapeTy shape, - StridesTy stride, - StridesTy shifts, + void get_left_rolled_displacement(const indT i, + const ShapeTy &shape, + const StridesTy &stride, + const StridesTy &shifts, indT &disp) const { indT i_ = i; - indT d = 0; + indT d(0); for (int dim = nd; --dim > 0;) { const indT si = shape[dim]; const indT q = i_ / si; @@ -275,7 +277,7 @@ template class CIndexer_vector template class CIndexer_array { - static const int ndim = _ndim; + static constexpr int ndim = _ndim; static_assert(std::is_integral::value, "Integral type is required"); static_assert(std::is_signed::value, @@ -295,7 +297,7 @@ template class CIndexer_array explicit CIndexer_array(const index_t &input_shape) : elem_count(0), shape{}, multi_index{} { - indT s = static_cast(1); + indT s(1); for (int i = 0; i < ndim; ++i) { shape[i] = input_shape[i]; s *= input_shape[i]; @@ -312,7 +314,7 @@ template class CIndexer_array return ndim; } - void set(indT i) + void set(const indT i) { if (ndim == 1) { multi_index[0] = i; @@ -366,7 +368,7 @@ int simplify_iteration_stride(const int nd, StridesTy *strides, StridesTy &disp) { - disp = std::ptrdiff_t(0); + disp = StridesTy(0); if (nd < 2) return nd; @@ -451,8 +453,8 @@ int simplify_iteration_two_strides(const int nd, StridesTy &disp1, StridesTy &disp2) { - disp1 = std::ptrdiff_t(0); - disp2 = std::ptrdiff_t(0); + disp1 = StridesTy(0); + disp2 = StridesTy(0); if (nd < 2) return nd; @@ -603,8 +605,8 @@ int simplify_iteration_three_strides(const int nd, StridesTy &disp2, StridesTy &disp3) { - disp1 = std::ptrdiff_t(0); - disp2 = std::ptrdiff_t(0); + disp1 = StridesTy(0); + disp2 = StridesTy(0); if (nd < 2) return nd; @@ -768,8 +770,8 @@ int simplify_iteration_four_strides(const int nd, StridesTy &disp3, StridesTy &disp4) { - disp1 = std::ptrdiff_t(0); - disp2 = std::ptrdiff_t(0); + disp1 = StridesTy(0); + disp2 = StridesTy(0); if (nd < 2) return nd; From 074ab3d507b63ab69ef4c712361dce49c12ce158 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 5 Mar 2024 19:19:47 -0600 Subject: [PATCH 2/3] Handle allocation error for USM-host for strides --- dpctl/tensor/libtensor/source/clip.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dpctl/tensor/libtensor/source/clip.cpp b/dpctl/tensor/libtensor/source/clip.cpp index e0a5ee15ad..ee54edc88e 100644 --- a/dpctl/tensor/libtensor/source/clip.cpp +++ b/dpctl/tensor/libtensor/source/clip.cpp @@ -225,6 +225,9 @@ py_clip(const dpctl::tensor::usm_ndarray &src, simplified_shape, simplified_src_strides, simplified_min_strides, simplified_max_strides, simplified_dst_strides); py::ssize_t *packed_shape_strides = std::get<0>(ptr_size_event_tuple); + if (!packed_shape_strides) { + throw std::runtime_error("USM-host memory allocation failure"); + } sycl::event copy_shape_strides_ev = std::get<2>(ptr_size_event_tuple); std::vector all_deps; From b030a797ea1020a20596ee560b5e216199405d0c Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 5 Mar 2024 23:27:05 -0800 Subject: [PATCH 3/3] Handle allocation error for strides for where --- dpctl/tensor/libtensor/source/where.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dpctl/tensor/libtensor/source/where.cpp b/dpctl/tensor/libtensor/source/where.cpp index d938c17685..e3dbbfed6c 100644 --- a/dpctl/tensor/libtensor/source/where.cpp +++ b/dpctl/tensor/libtensor/source/where.cpp @@ -203,6 +203,9 @@ py_where(const dpctl::tensor::usm_ndarray &condition, simplified_shape, simplified_cond_strides, simplified_x1_strides, simplified_x2_strides, simplified_dst_strides); py::ssize_t *packed_shape_strides = std::get<0>(ptr_size_event_tuple); + if (!packed_shape_strides) { + throw std::runtime_error("USM-host memory allocation failure"); + } sycl::event copy_shape_strides_ev = std::get<2>(ptr_size_event_tuple); std::vector all_deps;