Skip to content

Make constexpr variables static or inline #2094

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions dpctl/_host_task_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,19 +77,19 @@ DPCTLSyclEventRef async_dec_ref(DPCTLSyclQueueRef QRef,
});
});

constexpr int result_ok = 0;
static constexpr int result_ok = 0;

*status = result_ok;
auto e_ptr = new sycl::event(ht_ev);
return wrap<sycl::event>(e_ptr);
} catch (const std::exception &e) {
constexpr int result_std_exception = 1;
static constexpr int result_std_exception = 1;

*status = result_std_exception;
return nullptr;
}

constexpr int result_other_abnormal = 2;
static constexpr int result_other_abnormal = 2;

*status = result_other_abnormal;
return nullptr;
Expand Down
92 changes: 47 additions & 45 deletions dpctl/tensor/libtensor/include/kernels/accumulators.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ template <typename inputT, typename outputT> struct NonZeroIndicator

outputT operator()(const inputT &val) const
{
constexpr outputT out_one(1);
constexpr outputT out_zero(0);
constexpr inputT val_zero(0);
static constexpr outputT out_one(1);
static constexpr outputT out_zero(0);
static constexpr inputT val_zero(0);

return (val == val_zero) ? out_zero : out_one;
}
Expand Down Expand Up @@ -583,7 +583,7 @@ sycl::event update_local_chunks_1d(sycl::queue &exec_q,
cgh.depends_on(dependent_event);
cgh.use_kernel_bundle(kb);

constexpr nwiT updates_per_wi = n_wi;
static constexpr nwiT updates_per_wi = n_wi;
const std::size_t n_items =
ceiling_quotient<std::size_t>(src_size, sg_size * n_wi) * sg_size;

Expand All @@ -594,8 +594,8 @@ sycl::event update_local_chunks_1d(sycl::queue &exec_q,
cgh.parallel_for<UpdateKernelName>(
ndRange,
[chunk_size, src, src_size, local_scans](sycl::nd_item<1> ndit) {
constexpr ScanOpT scan_op{};
constexpr outputT identity =
static constexpr ScanOpT scan_op{};
static constexpr outputT identity =
su_ns::Identity<ScanOpT, outputT>::value;

const std::uint32_t lws = ndit.get_local_range(0);
Expand Down Expand Up @@ -640,16 +640,17 @@ sycl::event inclusive_scan_iter_1d(sycl::queue &exec_q,
std::vector<sycl::event> &host_tasks,
const std::vector<sycl::event> &depends = {})
{
constexpr ScanOpT scan_op{};
constexpr outputT identity = su_ns::Identity<ScanOpT, outputT>::value;
static constexpr ScanOpT scan_op{};
static constexpr outputT identity =
su_ns::Identity<ScanOpT, outputT>::value;

constexpr std::size_t _iter_nelems = 1;
static constexpr std::size_t _iter_nelems = 1;

using IterIndexerT = dpctl::tensor::offset_utils::TwoZeroOffsets_Indexer;
constexpr IterIndexerT _no_op_iter_indexer{};
static constexpr IterIndexerT _no_op_iter_indexer{};

using NoOpIndexerT = dpctl::tensor::offset_utils::NoOpIndexer;
constexpr NoOpIndexerT _no_op_indexer{};
static constexpr NoOpIndexerT _no_op_indexer{};

std::size_t n_groups;
sycl::event inc_scan_phase1_ev =
Expand Down Expand Up @@ -687,7 +688,7 @@ sycl::event inclusive_scan_iter_1d(sycl::queue &exec_q,
outputT *local_scans = temp;

using NoOpTransformerT = NoOpTransformer<outputT>;
constexpr NoOpTransformerT _no_op_transformer{};
static constexpr NoOpTransformerT _no_op_transformer{};
std::size_t size_to_update = n_elems;
while (n_groups_ > 1) {

Expand Down Expand Up @@ -761,16 +762,16 @@ accumulate_1d_contig_impl(sycl::queue &q,
dstT *dst_data_ptr = reinterpret_cast<dstT *>(dst);

using NoOpIndexerT = dpctl::tensor::offset_utils::NoOpIndexer;
constexpr NoOpIndexerT flat_indexer{};
constexpr transformerT transformer{};
static constexpr NoOpIndexerT flat_indexer{};
static constexpr transformerT transformer{};

constexpr std::size_t s0 = 0;
constexpr std::size_t s1 = 1;
static constexpr std::size_t s0 = 0;
static constexpr std::size_t s1 = 1;

sycl::event comp_ev;
const sycl::device &dev = q.get_device();
if (dev.has(sycl::aspect::cpu)) {
constexpr nwiT n_wi_for_cpu = 8;
static constexpr nwiT n_wi_for_cpu = 8;
const std::uint32_t wg_size = 256;
comp_ev = inclusive_scan_iter_1d<srcT, dstT, n_wi_for_cpu, NoOpIndexerT,
transformerT, AccumulateOpT,
Expand All @@ -779,7 +780,7 @@ accumulate_1d_contig_impl(sycl::queue &q,
flat_indexer, transformer, host_tasks, depends);
}
else {
constexpr nwiT n_wi_for_gpu = 4;
static constexpr nwiT n_wi_for_gpu = 4;
// base_scan_striped algorithm does not execute correctly
// on HIP device with wg_size > 64
const std::uint32_t wg_size =
Expand Down Expand Up @@ -829,7 +830,7 @@ sycl::event final_update_local_chunks(sycl::queue &exec_q,
const std::uint32_t sg_size = krn.template get_info<
sycl::info::kernel_device_specific::max_sub_group_size>(dev);

constexpr nwiT updates_per_wi = n_wi;
static constexpr nwiT updates_per_wi = n_wi;
const std::size_t updates_per_sg = sg_size * updates_per_wi;
const std::size_t update_nelems =
ceiling_quotient(src_size, updates_per_sg) * sg_size;
Expand All @@ -845,8 +846,8 @@ sycl::event final_update_local_chunks(sycl::queue &exec_q,
cgh.parallel_for<UpdateKernelName>(
ndRange, [chunk_size, src_size, local_stride, src, local_scans,
out_iter_indexer, out_indexer](sycl::nd_item<2> ndit) {
constexpr ScanOpT scan_op{};
constexpr outputT identity =
static constexpr ScanOpT scan_op{};
static constexpr outputT identity =
su_ns::Identity<ScanOpT, outputT>::value;

const std::uint32_t lws = ndit.get_local_range(1);
Expand Down Expand Up @@ -898,8 +899,8 @@ sycl::event update_local_chunks(sycl::queue &exec_q,
std::size_t local_stride,
sycl::event dependent_event)
{
constexpr NoOpIndexer out_indexer{};
constexpr NoOpIndexer iter_out_indexer{};
static constexpr NoOpIndexer out_indexer{};
static constexpr NoOpIndexer iter_out_indexer{};

return final_update_local_chunks<UpdateKernelName, outputT, n_wi,
NoOpIndexer, NoOpIndexer, ScanOpT>(
Expand Down Expand Up @@ -933,8 +934,9 @@ sycl::event inclusive_scan_iter(sycl::queue &exec_q,
std::vector<sycl::event> &host_tasks,
const std::vector<sycl::event> &depends = {})
{
constexpr ScanOpT scan_op{};
constexpr outputT identity = su_ns::Identity<ScanOpT, outputT>::value;
static constexpr ScanOpT scan_op{};
static constexpr outputT identity =
su_ns::Identity<ScanOpT, outputT>::value;

using IterIndexerT =
dpctl::tensor::offset_utils::TwoOffsets_CombinedIndexer<
Expand Down Expand Up @@ -977,9 +979,9 @@ sycl::event inclusive_scan_iter(sycl::queue &exec_q,
outputT *local_scans = temp;

using NoOpIndexerT = dpctl::tensor::offset_utils::NoOpIndexer;
constexpr NoOpIndexerT _no_op_indexer{};
static constexpr NoOpIndexerT _no_op_indexer{};
using NoOpTransformerT = NoOpTransformer<outputT>;
constexpr NoOpTransformerT _no_op_transformer{};
static constexpr NoOpTransformerT _no_op_transformer{};
std::size_t size_to_update = acc_nelems;

{
Expand Down Expand Up @@ -1142,15 +1144,15 @@ accumulate_strided_impl(sycl::queue &q,
iter_shape_strides,
iter_shape_strides + 2 * iter_nd};

constexpr transformerT transformer{};
static constexpr transformerT transformer{};

constexpr std::size_t s0 = 0;
constexpr std::size_t s1 = 1;
static constexpr std::size_t s0 = 0;
static constexpr std::size_t s1 = 1;

const sycl::device &dev = q.get_device();
sycl::event comp_ev;
if (dev.has(sycl::aspect::cpu)) {
constexpr nwiT n_wi_for_cpu = 8;
static constexpr nwiT n_wi_for_cpu = 8;
const std::uint32_t wg_size = 256;
comp_ev =
inclusive_scan_iter<srcT, dstT, n_wi_for_cpu, InpIndexerT,
Expand All @@ -1161,7 +1163,7 @@ accumulate_strided_impl(sycl::queue &q,
out_axis_indexer, transformer, host_tasks, depends);
}
else {
constexpr nwiT n_wi_for_gpu = 4;
static constexpr nwiT n_wi_for_gpu = 4;
// base_scan_striped algorithm does not execute correctly
// on HIP device with wg_size > 64
const std::uint32_t wg_size =
Expand Down Expand Up @@ -1198,18 +1200,18 @@ std::size_t cumsum_val_contig_impl(sycl::queue &q,
cumsumT *cumsum_data_ptr = reinterpret_cast<cumsumT *>(cumsum);

using NoOpIndexerT = dpctl::tensor::offset_utils::NoOpIndexer;
constexpr NoOpIndexerT flat_indexer{};
constexpr transformerT transformer{};
static constexpr NoOpIndexerT flat_indexer{};
static constexpr transformerT transformer{};

constexpr std::size_t s0 = 0;
constexpr std::size_t s1 = 1;
constexpr bool include_initial = false;
static constexpr std::size_t s0 = 0;
static constexpr std::size_t s1 = 1;
static constexpr bool include_initial = false;
using AccumulateOpT = sycl::plus<cumsumT>;

sycl::event comp_ev;
const sycl::device &dev = q.get_device();
if (dev.has(sycl::aspect::cpu)) {
constexpr nwiT n_wi_for_cpu = 8;
static constexpr nwiT n_wi_for_cpu = 8;
const std::uint32_t wg_size = 256;
comp_ev = inclusive_scan_iter_1d<maskT, cumsumT, n_wi_for_cpu,
NoOpIndexerT, transformerT,
Expand All @@ -1218,7 +1220,7 @@ std::size_t cumsum_val_contig_impl(sycl::queue &q,
flat_indexer, transformer, host_tasks, depends);
}
else {
constexpr nwiT n_wi_for_gpu = 4;
static constexpr nwiT n_wi_for_gpu = 4;
// base_scan_striped algorithm does not execute correctly
// on HIP device with wg_size > 64
const std::uint32_t wg_size =
Expand Down Expand Up @@ -1313,17 +1315,17 @@ cumsum_val_strided_impl(sycl::queue &q,

using StridedIndexerT = dpctl::tensor::offset_utils::StridedIndexer;
const StridedIndexerT strided_indexer{nd, 0, shape_strides};
constexpr transformerT transformer{};
static constexpr transformerT transformer{};

constexpr std::size_t s0 = 0;
constexpr std::size_t s1 = 1;
constexpr bool include_initial = false;
static constexpr std::size_t s0 = 0;
static constexpr std::size_t s1 = 1;
static constexpr bool include_initial = false;
using AccumulateOpT = sycl::plus<cumsumT>;

const sycl::device &dev = q.get_device();
sycl::event comp_ev;
if (dev.has(sycl::aspect::cpu)) {
constexpr nwiT n_wi_for_cpu = 8;
static constexpr nwiT n_wi_for_cpu = 8;
const std::uint32_t wg_size = 256;
comp_ev = inclusive_scan_iter_1d<maskT, cumsumT, n_wi_for_cpu,
StridedIndexerT, transformerT,
Expand All @@ -1332,7 +1334,7 @@ cumsum_val_strided_impl(sycl::queue &q,
strided_indexer, transformer, host_tasks, depends);
}
else {
constexpr nwiT n_wi_for_gpu = 4;
static constexpr nwiT n_wi_for_gpu = 4;
// base_scan_striped algorithm does not execute correctly
// on HIP device with wg_size > 64
const std::uint32_t wg_size =
Expand Down
2 changes: 1 addition & 1 deletion dpctl/tensor/libtensor/include/kernels/alignment.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ namespace kernels
namespace alignment_utils
{

static constexpr std::size_t required_alignment = 64UL;
inline constexpr std::size_t required_alignment = 64UL;

template <std::uintptr_t alignment, typename Ptr> bool is_aligned(Ptr p)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,11 +227,11 @@ std::size_t _get_lws_impl(std::size_t n)
}
}

std::size_t get_lws(std::size_t n)
inline std::size_t get_lws(std::size_t n)
{
constexpr std::size_t lws0 = 256u;
constexpr std::size_t lws1 = 128u;
constexpr std::size_t lws2 = 64u;
static constexpr std::size_t lws0 = 256u;
static constexpr std::size_t lws1 = 128u;
static constexpr std::size_t lws2 = 64u;
return _get_lws_impl<lws0, lws1, lws2>(n);
}

Expand Down Expand Up @@ -261,9 +261,9 @@ sycl::event masked_extract_all_slices_contig_impl(
ssize_t dst_stride,
const std::vector<sycl::event> &depends = {})
{
constexpr TwoZeroOffsets_Indexer orthog_src_dst_indexer{};
static constexpr TwoZeroOffsets_Indexer orthog_src_dst_indexer{};

constexpr NoOpIndexer masked_src_indexer{};
static constexpr NoOpIndexer masked_src_indexer{};
const Strided1DIndexer masked_dst_indexer(/* size */ dst_size,
/* step */ dst_stride);

Expand Down Expand Up @@ -339,7 +339,7 @@ sycl::event masked_extract_all_slices_strided_impl(
ssize_t dst_stride,
const std::vector<sycl::event> &depends = {})
{
constexpr TwoZeroOffsets_Indexer orthog_src_dst_indexer{};
static constexpr TwoZeroOffsets_Indexer orthog_src_dst_indexer{};

/* StridedIndexer(int _nd, ssize_t _offset, ssize_t const
* *_packed_shape_strides) */
Expand Down Expand Up @@ -578,7 +578,7 @@ sycl::event masked_place_all_slices_strided_impl(
ssize_t rhs_stride,
const std::vector<sycl::event> &depends = {})
{
constexpr TwoZeroOffsets_Indexer orthog_dst_rhs_indexer{};
static constexpr TwoZeroOffsets_Indexer orthog_dst_rhs_indexer{};

/* StridedIndexer(int _nd, ssize_t _offset, ssize_t const
* *_packed_shape_strides) */
Expand All @@ -589,7 +589,7 @@ sycl::event masked_place_all_slices_strided_impl(
TwoZeroOffsets_Indexer, StridedIndexer, Strided1DCyclicIndexer, dataT,
indT>;

constexpr std::size_t nominal_lws = 256;
static constexpr std::size_t nominal_lws = 256;
const std::size_t masked_extent = iteration_size;
const std::size_t lws = std::min(masked_extent, nominal_lws);

Expand Down Expand Up @@ -685,7 +685,7 @@ sycl::event masked_place_some_slices_strided_impl(
TwoOffsets_StridedIndexer, StridedIndexer, Strided1DCyclicIndexer,
dataT, indT>;

constexpr std::size_t nominal_lws = 256;
static constexpr std::size_t nominal_lws = 256;
const std::size_t orthog_extent = orthog_nelems;
const std::size_t masked_extent = masked_nelems;
const std::size_t lws = std::min(masked_extent, nominal_lws);
Expand Down Expand Up @@ -788,7 +788,7 @@ sycl::event non_zero_indexes_impl(sycl::queue &exec_q,
const indT1 *cumsum_data = reinterpret_cast<const indT1 *>(cumsum_cp);
indT2 *indexes_data = reinterpret_cast<indT2 *>(indexes_cp);

constexpr std::size_t nominal_lws = 256u;
static constexpr std::size_t nominal_lws = 256u;
const std::size_t masked_extent = iter_size;
const std::size_t lws = std::min(masked_extent, nominal_lws);

Expand Down
10 changes: 5 additions & 5 deletions dpctl/tensor/libtensor/include/kernels/clip.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ class ClipContigFunctor

void operator()(sycl::nd_item<1> ndit) const
{
constexpr std::uint8_t nelems_per_wi = n_vecs * vec_sz;
static constexpr std::uint8_t nelems_per_wi = n_vecs * vec_sz;

using dpctl::tensor::type_utils::is_complex;
if constexpr (is_complex<T>::value || !enable_sg_loadstore) {
Expand Down Expand Up @@ -202,8 +202,8 @@ sycl::event clip_contig_impl(sycl::queue &q,
cgh.depends_on(depends);

std::size_t lws = 64;
constexpr std::uint8_t vec_sz = 4;
constexpr std::uint8_t n_vecs = 2;
static constexpr std::uint8_t vec_sz = 4;
static constexpr std::uint8_t n_vecs = 2;
const std::size_t n_groups =
((nelems + lws * n_vecs * vec_sz - 1) / (lws * n_vecs * vec_sz));
const auto gws_range = sycl::range<1>(n_groups * lws);
Expand All @@ -214,7 +214,7 @@ sycl::event clip_contig_impl(sycl::queue &q,
is_aligned<required_alignment>(max_cp) &&
is_aligned<required_alignment>(dst_cp))
{
constexpr bool enable_sg_loadstore = true;
static constexpr bool enable_sg_loadstore = true;
using KernelName = clip_contig_kernel<T, vec_sz, n_vecs>;
using Impl =
ClipContigFunctor<T, vec_sz, n_vecs, enable_sg_loadstore>;
Expand All @@ -224,7 +224,7 @@ sycl::event clip_contig_impl(sycl::queue &q,
Impl(nelems, x_tp, min_tp, max_tp, dst_tp));
}
else {
constexpr bool disable_sg_loadstore = false;
static constexpr bool disable_sg_loadstore = false;
using InnerKernelName = clip_contig_kernel<T, vec_sz, n_vecs>;
using KernelName =
disabled_sg_loadstore_wrapper_krn<InnerKernelName>;
Expand Down
2 changes: 1 addition & 1 deletion dpctl/tensor/libtensor/include/kernels/constructors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ sycl::event tri_impl(sycl::queue &exec_q,
const std::vector<sycl::event> &depends,
const std::vector<sycl::event> &additional_depends)
{
constexpr int d2 = 2;
static constexpr int d2 = 2;
ssize_t src_s = nd;
ssize_t dst_s = 2 * nd;
ssize_t nd_1 = nd - 1;
Expand Down
Loading
Loading