diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index a38f1ab0b054f..3f1b3a95d2e5a 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -346,7 +346,9 @@ typedef enum { PI_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE = CL_KERNEL_LOCAL_MEM_SIZE, PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE = CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, - PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = CL_KERNEL_PRIVATE_MEM_SIZE + PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = CL_KERNEL_PRIVATE_MEM_SIZE, + // The number of registers used by the compiled kernel (device specific) + PI_KERNEL_GROUP_INFO_NUM_REGS = 0x10112 } _pi_kernel_group_info; typedef enum { diff --git a/sycl/include/CL/sycl/info/info_desc.hpp b/sycl/include/CL/sycl/info/info_desc.hpp index cc74ed463dd3e..9348ce670aaff 100644 --- a/sycl/include/CL/sycl/info/info_desc.hpp +++ b/sycl/include/CL/sycl/info/info_desc.hpp @@ -250,6 +250,7 @@ enum class kernel_device_specific : cl_kernel_work_group_info { preferred_work_group_size_multiple = CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, private_mem_size = CL_KERNEL_PRIVATE_MEM_SIZE, + ext_codeplay_num_regs = PI_KERNEL_GROUP_INFO_NUM_REGS, max_sub_group_size = CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, max_num_sub_groups = CL_KERNEL_MAX_NUM_SUB_GROUPS, compile_num_sub_groups = CL_KERNEL_COMPILE_NUM_SUB_GROUPS, diff --git a/sycl/include/CL/sycl/info/kernel_device_specific_traits.def b/sycl/include/CL/sycl/info/kernel_device_specific_traits.def index 8ff826fd95ac4..90496dd78d461 100644 --- a/sycl/include/CL/sycl/info/kernel_device_specific_traits.def +++ b/sycl/include/CL/sycl/info/kernel_device_specific_traits.def @@ -4,6 +4,7 @@ __SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, global_work_size, cl::sycl::ran __SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, preferred_work_group_size_multiple, size_t) __SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, private_mem_size, cl_ulong) +__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, ext_codeplay_num_regs, uint32_t) __SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, work_group_size, size_t) __SYCL_PARAM_TRAITS_SPEC_WITH_INPUT(kernel_device_specific, max_sub_group_size, uint32_t, cl::sycl::range<3>) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 20d45b55998fe..2060cdcd6c5bb 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -2548,6 +2548,14 @@ pi_result cuda_piKernelGetGroupInfo(pi_kernel kernel, pi_device device, return getInfo(param_value_size, param_value, param_value_size_ret, pi_uint64(bytes)); } + case PI_KERNEL_GROUP_INFO_NUM_REGS: { + int numRegs = 0; + cl::sycl::detail::pi::assertion( + cuFuncGetAttribute(&numRegs, CU_FUNC_ATTRIBUTE_NUM_REGS, + kernel->get()) == CUDA_SUCCESS); + return getInfo(param_value_size, param_value, param_value_size_ret, + pi_uint32(numRegs)); + } default: __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index aaac94b1fc362..fa3d3ccd745e6 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -3129,6 +3129,12 @@ pi_result hip_piKernelGetGroupInfo(pi_kernel kernel, pi_device device, return getInfo(param_value_size, param_value, param_value_size_ret, pi_uint64(bytes)); } + case PI_KERNEL_GROUP_INFO_NUM_REGS: { + cl::sycl::detail::pi::die("PI_KERNEL_GROUP_INFO_NUM_REGS in " + "piKernelGetGroupInfo not implemented\n"); + return {}; + } + default: __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 5ab3384f7b248..3cb211b5c86fd 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -4258,6 +4258,11 @@ pi_result piKernelGetGroupInfo(pi_kernel Kernel, pi_device Device, } case PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: return ReturnValue(pi_uint32{Kernel->ZeKernelProperties->privateMemSize}); + case PI_KERNEL_GROUP_INFO_NUM_REGS: { + die("PI_KERNEL_GROUP_INFO_NUM_REGS in piKernelGetGroupInfo not " + "implemented\n"); + break; + } default: zePrint("Unknown ParamName in piKernelGetGroupInfo: ParamName=%d(0x%x)\n", ParamName, ParamName); diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 0736a32935bc7..1c7972c8ae24d 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -684,6 +684,26 @@ pi_result piKernelCreate(pi_program program, const char *kernel_name, return ret_err; } +pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device, + pi_kernel_group_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + if (kernel == nullptr) { + return PI_INVALID_KERNEL; + } + + switch (param_name) { + case PI_KERNEL_GROUP_INFO_NUM_REGS: + return PI_INVALID_VALUE; + default: + cl_int result = clGetKernelWorkGroupInfo( + cast(kernel), cast(device), + cast(param_name), param_value_size, + param_value, param_value_size_ret); + return static_cast(result); + } +} + pi_result piKernelGetSubGroupInfo(pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name, size_t input_value_size, @@ -1308,7 +1328,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piKernelCreate, piKernelCreate) _PI_CL(piKernelSetArg, clSetKernelArg) _PI_CL(piKernelGetInfo, clGetKernelInfo) - _PI_CL(piKernelGetGroupInfo, clGetKernelWorkGroupInfo) + _PI_CL(piKernelGetGroupInfo, piKernelGetGroupInfo) _PI_CL(piKernelGetSubGroupInfo, piKernelGetSubGroupInfo) _PI_CL(piKernelRetain, clRetainKernel) _PI_CL(piKernelRelease, clReleaseKernel) diff --git a/sycl/source/detail/kernel_info.hpp b/sycl/source/detail/kernel_info.hpp index 85f46e00637e3..6ada88952cf1d 100644 --- a/sycl/source/detail/kernel_info.hpp +++ b/sycl/source/detail/kernel_info.hpp @@ -71,6 +71,9 @@ struct IsWorkGroupInfo< template <> struct IsWorkGroupInfo : std::true_type {}; +template <> +struct IsWorkGroupInfo + : std::true_type {}; template struct get_kernel_device_specific_info { @@ -143,6 +146,13 @@ inline cl_ulong get_kernel_device_specific_info_host< return 0; } +template <> +inline uint32_t get_kernel_device_specific_info_host< + info::kernel_device_specific::ext_codeplay_num_regs>( + const cl::sycl::device &) { + return 0; +} + template <> inline uint32_t get_kernel_device_specific_info_host< info::kernel_device_specific::max_num_sub_groups>( diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index 23f7a3992bf60..caf4f72b48adb 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -13,6 +13,7 @@ piDevicesGet piEnqueueMemBufferMap piEventCreate piKernelCreate +piKernelGetGroupInfo piKernelGetSubGroupInfo piKernelSetExecInfo piMemBufferCreate diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 7bbd5f580fa52..9c8fb983a11ff 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4218,6 +4218,7 @@ _ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE4532EEENS3_12par _ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE4533EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE _ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE4537EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE _ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE4538EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE +_ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE65810EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE _ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE8243EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceENS6_10input_typeE _ZNK2cl4sycl6kernel8get_infoILNS0_4info6kernelE4496EEENS3_12param_traitsIS4_XT_EE11return_typeEv _ZNK2cl4sycl6kernel8get_infoILNS0_4info6kernelE4497EEENS3_12param_traitsIS4_XT_EE11return_typeEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 9b347ceb7b47a..cf1b2c59e370b 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -49,6 +49,7 @@ ??$get_info@$0BABB@@device@sycl@cl@@QEBA_KXZ ??$get_info@$0BABBA@@device@sycl@cl@@QEBA_NXZ ??$get_info@$0BABBB@@device@sycl@cl@@QEBA?AV?$vector@W4memory_order@sycl@cl@@V?$allocator@W4memory_order@sycl@cl@@@std@@@std@@XZ +??$get_info@$0BABBC@@kernel@sycl@cl@@QEBAIAEBVdevice@12@@Z ??$get_info@$0BABC@@device@sycl@cl@@QEBA_KXZ ??$get_info@$0BABD@@device@sycl@cl@@QEBA_KXZ ??$get_info@$0BABE@@device@sycl@cl@@QEBA_KXZ