Skip to content

Commit 5d72e6b

Browse files
authored
[SYCL][PI][CUDA] Implement kernel and kernel-group information queries (#1180)
Implements the PI info query for `PI_KERNEL_INFO_ATTRIBUTES`, `PI_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE`, and `PI_KERNEL_PRIVATE_MEM_SIZE`. Signed-off-by: Steffen Larsen <[email protected]>
1 parent 04ee17c commit 5d72e6b

File tree

3 files changed

+93
-2
lines changed

3 files changed

+93
-2
lines changed

sycl/include/CL/sycl/detail/pi.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -257,13 +257,17 @@ typedef enum {
257257
PI_KERNEL_INFO_NUM_ARGS = CL_KERNEL_NUM_ARGS,
258258
PI_KERNEL_INFO_REFERENCE_COUNT = CL_KERNEL_REFERENCE_COUNT,
259259
PI_KERNEL_INFO_CONTEXT = CL_KERNEL_CONTEXT,
260-
PI_KERNEL_INFO_PROGRAM = CL_KERNEL_PROGRAM
260+
PI_KERNEL_INFO_PROGRAM = CL_KERNEL_PROGRAM,
261+
PI_KERNEL_INFO_ATTRIBUTES = CL_KERNEL_ATTRIBUTES
261262
} _pi_kernel_info;
262263

263264
typedef enum {
264265
PI_KERNEL_GROUP_INFO_SIZE = CL_KERNEL_WORK_GROUP_SIZE,
265266
PI_KERNEL_COMPILE_GROUP_INFO_SIZE = CL_KERNEL_COMPILE_WORK_GROUP_SIZE,
266-
PI_KERNEL_LOCAL_MEM_SIZE = CL_KERNEL_LOCAL_MEM_SIZE
267+
PI_KERNEL_LOCAL_MEM_SIZE = CL_KERNEL_LOCAL_MEM_SIZE,
268+
PI_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE =
269+
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
270+
PI_KERNEL_PRIVATE_MEM_SIZE = CL_KERNEL_PRIVATE_MEM_SIZE
267271
} _pi_kernel_group_info;
268272

269273
typedef enum {

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2107,6 +2107,9 @@ pi_result cuda_piKernelGetInfo(
21072107
return getInfo(param_value_size, param_value, param_value_size_ret,
21082108
kernel->get_program());
21092109
}
2110+
case PI_KERNEL_INFO_ATTRIBUTES: {
2111+
return getInfo(param_value_size, param_value, param_value_size_ret, "");
2112+
}
21102113
default: {
21112114
PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
21122115
}
@@ -2154,6 +2157,24 @@ pi_result cuda_piKernelGetGroupInfo(pi_kernel kernel, pi_device device,
21542157
return getInfo(param_value_size, param_value, param_value_size_ret,
21552158
pi_uint64(bytes));
21562159
}
2160+
case PI_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: {
2161+
// Work groups should be multiples of the warp size
2162+
int warpSize = 0;
2163+
cl::sycl::detail::pi::assertion(
2164+
cuDeviceGetAttribute(&warpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE,
2165+
device->get()) == CUDA_SUCCESS);
2166+
return getInfo(param_value_size, param_value, param_value_size_ret,
2167+
static_cast<size_t>(warpSize));
2168+
}
2169+
case PI_KERNEL_PRIVATE_MEM_SIZE: {
2170+
// OpenCL PRIVATE == CUDA LOCAL
2171+
int bytes = 0;
2172+
cl::sycl::detail::pi::assertion(
2173+
cuFuncGetAttribute(&bytes, CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES,
2174+
kernel->get()) == CUDA_SUCCESS);
2175+
return getInfo(param_value_size, param_value, param_value_size_ret,
2176+
pi_uint64(bytes));
2177+
}
21572178
default:
21582179
PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
21592180
}

sycl/test/basic_tests/kernel_info.cpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL
2+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
3+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
5+
6+
//==--- kernel_info.cpp - SYCL kernel info test ----------------------------==//
7+
//
8+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9+
// See https://llvm.org/LICENSE.txt for license information.
10+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#include <CL/sycl.hpp>
15+
16+
using namespace cl::sycl;
17+
18+
void check(bool condition, const char *conditionString, const char *filename,
19+
const long line) noexcept {
20+
if (!condition) {
21+
std::cerr << "CHECK failed in " << filename << "#" << line << " "
22+
<< conditionString << "\n";
23+
std::abort();
24+
}
25+
}
26+
27+
#define CHECK(CONDITION) check(CONDITION, #CONDITION, __FILE__, __LINE__)
28+
29+
int main() {
30+
queue q;
31+
32+
buffer<int, 1> buf(range<1>(1));
33+
program prg(q.get_context());
34+
35+
prg.build_with_kernel_type<class SingleTask>();
36+
kernel krn = prg.get_kernel<class SingleTask>();
37+
38+
q.submit([&](handler &cgh) {
39+
auto acc = buf.get_access<access::mode::read_write>(cgh);
40+
cgh.single_task<class SingleTask>(krn, [=]() { acc[0] = acc[0] + 1; });
41+
});
42+
43+
const string_class krnName = krn.get_info<info::kernel::function_name>();
44+
CHECK(!krnName.empty());
45+
const cl_uint krnArgCount = krn.get_info<info::kernel::num_args>();
46+
CHECK(krnArgCount > 0);
47+
const context krnCtx = krn.get_info<info::kernel::context>();
48+
CHECK(krnCtx == q.get_context());
49+
const program krnPrg = krn.get_info<info::kernel::program>();
50+
CHECK(krnPrg == prg);
51+
const cl_uint krnRefCount = krn.get_info<info::kernel::reference_count>();
52+
CHECK(krnRefCount > 0);
53+
const string_class krnAttr = krn.get_info<info::kernel::attributes>();
54+
CHECK(krnAttr.empty());
55+
56+
device dev = q.get_device();
57+
const size_t wgSize =
58+
krn.get_work_group_info<info::kernel_work_group::work_group_size>(dev);
59+
CHECK(wgSize > 0);
60+
const size_t prefWGSizeMult = krn.get_work_group_info<
61+
info::kernel_work_group::preferred_work_group_size_multiple>(dev);
62+
CHECK(prefWGSizeMult > 0);
63+
const cl_ulong prvMemSize =
64+
krn.get_work_group_info<info::kernel_work_group::private_mem_size>(dev);
65+
CHECK(prvMemSize == 0);
66+
}

0 commit comments

Comments
 (0)