Skip to content

[SYCL] Enable querying kernel's number of registers #4665

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Oct 14, 2021
4 changes: 3 additions & 1 deletion sycl/include/CL/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,9 @@ typedef enum {
PI_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE = CL_KERNEL_LOCAL_MEM_SIZE,
PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE =
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = CL_KERNEL_PRIVATE_MEM_SIZE
PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = CL_KERNEL_PRIVATE_MEM_SIZE,
// The number of registers used by the compiled kernel (device specific)
PI_KERNEL_GROUP_INFO_NUM_REGS = 0x10112
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this one doesn't have analog in OpenCL it needs to be documented here: what exactly is being queried/returned.
Also let's follow

PI_ERROR_UNKNOWN = -999
and add new values from the "top" of the values set to avoid accidental overlap.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added a comment to explain what is being returned. Could you explain a little more what you mean about adding new values from the top? I thought I had done this - the largest value I found was

PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111
so I added one after that. Should I have left more of a gap?

} _pi_kernel_group_info;

typedef enum {
Expand Down
1 change: 1 addition & 0 deletions sycl/include/CL/sycl/info/info_desc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ enum class kernel_device_specific : cl_kernel_work_group_info {
preferred_work_group_size_multiple =
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
private_mem_size = CL_KERNEL_PRIVATE_MEM_SIZE,
ext_codeplay_num_regs = PI_KERNEL_GROUP_INFO_NUM_REGS,
max_sub_group_size = CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
max_num_sub_groups = CL_KERNEL_MAX_NUM_SUB_GROUPS,
compile_num_sub_groups = CL_KERNEL_COMPILE_NUM_SUB_GROUPS,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ __SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, global_work_size, cl::sycl::ran
__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific,
preferred_work_group_size_multiple, size_t)
__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, private_mem_size, cl_ulong)
__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, ext_codeplay_num_regs, uint32_t)
__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, work_group_size, size_t)
__SYCL_PARAM_TRAITS_SPEC_WITH_INPUT(kernel_device_specific, max_sub_group_size,
uint32_t, cl::sycl::range<3>)
Expand Down
8 changes: 8 additions & 0 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2548,6 +2548,14 @@ pi_result cuda_piKernelGetGroupInfo(pi_kernel kernel, pi_device device,
return getInfo(param_value_size, param_value, param_value_size_ret,
pi_uint64(bytes));
}
case PI_KERNEL_GROUP_INFO_NUM_REGS: {
int numRegs = 0;
cl::sycl::detail::pi::assertion(
cuFuncGetAttribute(&numRegs, CU_FUNC_ATTRIBUTE_NUM_REGS,
kernel->get()) == CUDA_SUCCESS);
return getInfo(param_value_size, param_value, param_value_size_ret,
pi_uint32(numRegs));
}
default:
__SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
}
Expand Down
6 changes: 6 additions & 0 deletions sycl/plugins/hip/pi_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3129,6 +3129,12 @@ pi_result hip_piKernelGetGroupInfo(pi_kernel kernel, pi_device device,
return getInfo(param_value_size, param_value, param_value_size_ret,
pi_uint64(bytes));
}
case PI_KERNEL_GROUP_INFO_NUM_REGS: {
cl::sycl::detail::pi::die("PI_KERNEL_GROUP_INFO_NUM_REGS in "
"piKernelGetGroupInfo not implemented\n");
return {};
}

default:
__SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
}
Expand Down
5 changes: 5 additions & 0 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4258,6 +4258,11 @@ pi_result piKernelGetGroupInfo(pi_kernel Kernel, pi_device Device,
}
case PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE:
return ReturnValue(pi_uint32{Kernel->ZeKernelProperties->privateMemSize});
case PI_KERNEL_GROUP_INFO_NUM_REGS: {
die("PI_KERNEL_GROUP_INFO_NUM_REGS in piKernelGetGroupInfo not "
"implemented\n");
break;
}
default:
zePrint("Unknown ParamName in piKernelGetGroupInfo: ParamName=%d(0x%x)\n",
ParamName, ParamName);
Expand Down
22 changes: 21 additions & 1 deletion sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,26 @@ pi_result piKernelCreate(pi_program program, const char *kernel_name,
return ret_err;
}

pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device,
pi_kernel_group_info param_name,
size_t param_value_size, void *param_value,
size_t *param_value_size_ret) {
if (kernel == nullptr) {
return PI_INVALID_KERNEL;
}

switch (param_name) {
case PI_KERNEL_GROUP_INFO_NUM_REGS:
return PI_INVALID_VALUE;
default:
cl_int result = clGetKernelWorkGroupInfo(
cast<cl_kernel>(kernel), cast<cl_device_id>(device),
cast<cl_kernel_work_group_info>(param_name), param_value_size,
param_value, param_value_size_ret);
return static_cast<pi_result>(result);
}
}

pi_result piKernelGetSubGroupInfo(pi_kernel kernel, pi_device device,
pi_kernel_sub_group_info param_name,
size_t input_value_size,
Expand Down Expand Up @@ -1308,7 +1328,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
_PI_CL(piKernelCreate, piKernelCreate)
_PI_CL(piKernelSetArg, clSetKernelArg)
_PI_CL(piKernelGetInfo, clGetKernelInfo)
_PI_CL(piKernelGetGroupInfo, clGetKernelWorkGroupInfo)
_PI_CL(piKernelGetGroupInfo, piKernelGetGroupInfo)
_PI_CL(piKernelGetSubGroupInfo, piKernelGetSubGroupInfo)
_PI_CL(piKernelRetain, clRetainKernel)
_PI_CL(piKernelRelease, clReleaseKernel)
Expand Down
10 changes: 10 additions & 0 deletions sycl/source/detail/kernel_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ struct IsWorkGroupInfo<
template <>
struct IsWorkGroupInfo<info::kernel_device_specific::private_mem_size>
: std::true_type {};
template <>
struct IsWorkGroupInfo<info::kernel_device_specific::ext_codeplay_num_regs>
: std::true_type {};

template <typename T, info::kernel_device_specific Param>
struct get_kernel_device_specific_info {
Expand Down Expand Up @@ -143,6 +146,13 @@ inline cl_ulong get_kernel_device_specific_info_host<
return 0;
}

template <>
inline uint32_t get_kernel_device_specific_info_host<
info::kernel_device_specific::ext_codeplay_num_regs>(
const cl::sycl::device &) {
return 0;
}

template <>
inline uint32_t get_kernel_device_specific_info_host<
info::kernel_device_specific::max_num_sub_groups>(
Expand Down