Skip to content

Commit 97d33b7

Browse files
authored
[SYCL] Enable querying kernel's number of registers (intel#4665)
1 parent 25d92a7 commit 97d33b7

File tree

11 files changed

+58
-2
lines changed

11 files changed

+58
-2
lines changed

sycl/include/CL/sycl/detail/pi.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,9 @@ typedef enum {
350350
PI_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE = CL_KERNEL_LOCAL_MEM_SIZE,
351351
PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE =
352352
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
353-
PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = CL_KERNEL_PRIVATE_MEM_SIZE
353+
PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = CL_KERNEL_PRIVATE_MEM_SIZE,
354+
// The number of registers used by the compiled kernel (device specific)
355+
PI_KERNEL_GROUP_INFO_NUM_REGS = 0x10112
354356
} _pi_kernel_group_info;
355357

356358
typedef enum {

sycl/include/CL/sycl/info/info_desc.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,7 @@ enum class kernel_device_specific : cl_kernel_work_group_info {
250250
preferred_work_group_size_multiple =
251251
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
252252
private_mem_size = CL_KERNEL_PRIVATE_MEM_SIZE,
253+
ext_codeplay_num_regs = PI_KERNEL_GROUP_INFO_NUM_REGS,
253254
max_sub_group_size = CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
254255
max_num_sub_groups = CL_KERNEL_MAX_NUM_SUB_GROUPS,
255256
compile_num_sub_groups = CL_KERNEL_COMPILE_NUM_SUB_GROUPS,

sycl/include/CL/sycl/info/kernel_device_specific_traits.def

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ __SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, global_work_size, cl::sycl::ran
44
__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific,
55
preferred_work_group_size_multiple, size_t)
66
__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, private_mem_size, cl_ulong)
7+
__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, ext_codeplay_num_regs, uint32_t)
78
__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, work_group_size, size_t)
89
__SYCL_PARAM_TRAITS_SPEC_WITH_INPUT(kernel_device_specific, max_sub_group_size,
910
uint32_t, cl::sycl::range<3>)

sycl/plugins/cuda/pi_cuda.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -2552,6 +2552,14 @@ pi_result cuda_piKernelGetGroupInfo(pi_kernel kernel, pi_device device,
25522552
return getInfo(param_value_size, param_value, param_value_size_ret,
25532553
pi_uint64(bytes));
25542554
}
2555+
case PI_KERNEL_GROUP_INFO_NUM_REGS: {
2556+
int numRegs = 0;
2557+
cl::sycl::detail::pi::assertion(
2558+
cuFuncGetAttribute(&numRegs, CU_FUNC_ATTRIBUTE_NUM_REGS,
2559+
kernel->get()) == CUDA_SUCCESS);
2560+
return getInfo(param_value_size, param_value, param_value_size_ret,
2561+
pi_uint32(numRegs));
2562+
}
25552563
default:
25562564
__SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
25572565
}

sycl/plugins/hip/pi_hip.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -3136,6 +3136,12 @@ pi_result hip_piKernelGetGroupInfo(pi_kernel kernel, pi_device device,
31363136
return getInfo(param_value_size, param_value, param_value_size_ret,
31373137
pi_uint64(bytes));
31383138
}
3139+
case PI_KERNEL_GROUP_INFO_NUM_REGS: {
3140+
cl::sycl::detail::pi::die("PI_KERNEL_GROUP_INFO_NUM_REGS in "
3141+
"piKernelGetGroupInfo not implemented\n");
3142+
return {};
3143+
}
3144+
31393145
default:
31403146
__SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
31413147
}

sycl/plugins/level_zero/pi_level_zero.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -4270,6 +4270,11 @@ pi_result piKernelGetGroupInfo(pi_kernel Kernel, pi_device Device,
42704270
}
42714271
case PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE:
42724272
return ReturnValue(pi_uint32{Kernel->ZeKernelProperties->privateMemSize});
4273+
case PI_KERNEL_GROUP_INFO_NUM_REGS: {
4274+
die("PI_KERNEL_GROUP_INFO_NUM_REGS in piKernelGetGroupInfo not "
4275+
"implemented\n");
4276+
break;
4277+
}
42734278
default:
42744279
zePrint("Unknown ParamName in piKernelGetGroupInfo: ParamName=%d(0x%x)\n",
42754280
ParamName, ParamName);

sycl/plugins/opencl/pi_opencl.cpp

+21-1
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,26 @@ pi_result piKernelCreate(pi_program program, const char *kernel_name,
747747
return ret_err;
748748
}
749749

750+
pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device,
751+
pi_kernel_group_info param_name,
752+
size_t param_value_size, void *param_value,
753+
size_t *param_value_size_ret) {
754+
if (kernel == nullptr) {
755+
return PI_INVALID_KERNEL;
756+
}
757+
758+
switch (param_name) {
759+
case PI_KERNEL_GROUP_INFO_NUM_REGS:
760+
return PI_INVALID_VALUE;
761+
default:
762+
cl_int result = clGetKernelWorkGroupInfo(
763+
cast<cl_kernel>(kernel), cast<cl_device_id>(device),
764+
cast<cl_kernel_work_group_info>(param_name), param_value_size,
765+
param_value, param_value_size_ret);
766+
return static_cast<pi_result>(result);
767+
}
768+
}
769+
750770
pi_result piKernelGetSubGroupInfo(pi_kernel kernel, pi_device device,
751771
pi_kernel_sub_group_info param_name,
752772
size_t input_value_size,
@@ -1371,7 +1391,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
13711391
_PI_CL(piKernelCreate, piKernelCreate)
13721392
_PI_CL(piKernelSetArg, clSetKernelArg)
13731393
_PI_CL(piKernelGetInfo, clGetKernelInfo)
1374-
_PI_CL(piKernelGetGroupInfo, clGetKernelWorkGroupInfo)
1394+
_PI_CL(piKernelGetGroupInfo, piKernelGetGroupInfo)
13751395
_PI_CL(piKernelGetSubGroupInfo, piKernelGetSubGroupInfo)
13761396
_PI_CL(piKernelRetain, clRetainKernel)
13771397
_PI_CL(piKernelRelease, clReleaseKernel)

sycl/source/detail/kernel_info.hpp

+10
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ struct IsWorkGroupInfo<
7171
template <>
7272
struct IsWorkGroupInfo<info::kernel_device_specific::private_mem_size>
7373
: std::true_type {};
74+
template <>
75+
struct IsWorkGroupInfo<info::kernel_device_specific::ext_codeplay_num_regs>
76+
: std::true_type {};
7477

7578
template <typename T, info::kernel_device_specific Param>
7679
struct get_kernel_device_specific_info {
@@ -143,6 +146,13 @@ inline cl_ulong get_kernel_device_specific_info_host<
143146
return 0;
144147
}
145148

149+
template <>
150+
inline uint32_t get_kernel_device_specific_info_host<
151+
info::kernel_device_specific::ext_codeplay_num_regs>(
152+
const cl::sycl::device &) {
153+
return 0;
154+
}
155+
146156
template <>
147157
inline uint32_t get_kernel_device_specific_info_host<
148158
info::kernel_device_specific::max_num_sub_groups>(

sycl/test/abi/pi_opencl_symbol_check.dump

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ piDevicesGet
1313
piEnqueueMemBufferMap
1414
piEventCreate
1515
piKernelCreate
16+
piKernelGetGroupInfo
1617
piKernelGetSubGroupInfo
1718
piKernelSetExecInfo
1819
piMemBufferCreate

sycl/test/abi/sycl_symbols_linux.dump

+1
Original file line numberDiff line numberDiff line change
@@ -4229,6 +4229,7 @@ _ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE4532EEENS3_12par
42294229
_ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE4533EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE
42304230
_ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE4537EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE
42314231
_ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE4538EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE
4232+
_ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE65810EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE
42324233
_ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE8243EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceENS6_10input_typeE
42334234
_ZNK2cl4sycl6kernel8get_infoILNS0_4info6kernelE4496EEENS3_12param_traitsIS4_XT_EE11return_typeEv
42344235
_ZNK2cl4sycl6kernel8get_infoILNS0_4info6kernelE4497EEENS3_12param_traitsIS4_XT_EE11return_typeEv

sycl/test/abi/sycl_symbols_windows.dump

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
??$get_info@$0BABB@@device@sycl@cl@@QEBA_KXZ
5050
??$get_info@$0BABBA@@device@sycl@cl@@QEBA_NXZ
5151
??$get_info@$0BABBB@@device@sycl@cl@@QEBA?AV?$vector@W4memory_order@sycl@cl@@V?$allocator@W4memory_order@sycl@cl@@@std@@@std@@XZ
52+
??$get_info@$0BABBC@@kernel@sycl@cl@@QEBAIAEBVdevice@12@@Z
5253
??$get_info@$0BABC@@device@sycl@cl@@QEBA_KXZ
5354
??$get_info@$0BABD@@device@sycl@cl@@QEBA_KXZ
5455
??$get_info@$0BABE@@device@sycl@cl@@QEBA_KXZ

0 commit comments

Comments
 (0)