Skip to content

Commit 54e5b6a

Browse files
authored
Merge pull request #2258 from aarongreig/aaron/tryUseExtensionSubgroupInfo
Use extension version of clGetKernelSubGroupInfo when necessary.
2 parents e004a38 + 51f43d3 commit 54e5b6a

File tree

5 files changed

+54
-14
lines changed

5 files changed

+54
-14
lines changed

source/adapters/opencl/common.hpp

+9
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,8 @@ CONSTFIX char CommandFillBufferName[] = "clCommandFillBufferKHR";
218218
CONSTFIX char EnqueueCommandBufferName[] = "clEnqueueCommandBufferKHR";
219219
CONSTFIX char GetCommandBufferInfoName[] = "clGetCommandBufferInfoKHR";
220220
CONSTFIX char UpdateMutableCommandsName[] = "clUpdateMutableCommandsKHR";
221+
CONSTFIX char CreateProgramWithILName[] = "clCreateProgramWithILKHR";
222+
CONSTFIX char GetKernelSubGroupInfoName[] = "clGetKernelSubGroupInfoKHR";
221223

222224
#undef CONSTFIX
223225

@@ -316,6 +318,13 @@ cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer, cl_uint num_configs,
316318
const cl_command_buffer_update_type_khr *config_types,
317319
const void **configs);
318320

321+
using clCreateProgramWithILKHR_fn = CL_API_ENTRY
322+
cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *);
323+
324+
using clGetKernelSubGroupInfoKHR_fn = CL_API_ENTRY
325+
cl_int(CL_API_CALL *)(cl_kernel, cl_device_id, cl_kernel_sub_group_info, size_t,
326+
const void *, size_t, void *, size_t *);
327+
319328
template <typename T> struct FuncPtrCache {
320329
std::map<cl_context, T> Map;
321330
std::mutex Mutex;

source/adapters/opencl/device.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ ur_result_t cl_adapter::checkDeviceExtensions(
6161
// doesn't report them.
6262
if (isIntelFPGAEmuDevice(Dev) &&
6363
(Ext == "cl_intel_device_attribute_query" ||
64-
Ext == "cl_intel_required_subgroup_size")) {
64+
Ext == "cl_intel_required_subgroup_size" ||
65+
Ext == "cl_khr_subgroups")) {
6566
Supported = true;
6667
continue;
6768
}

source/adapters/opencl/extension_functions.def

+2
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,5 @@ CL_EXTENSION_FUNC(clCommandFillBufferKHR)
2424
CL_EXTENSION_FUNC(clEnqueueCommandBufferKHR)
2525
CL_EXTENSION_FUNC(clGetCommandBufferInfoKHR)
2626
CL_EXTENSION_FUNC(clUpdateMutableCommandsKHR)
27+
CL_EXTENSION_FUNC(clCreateProgramWithILKHR)
28+
CL_EXTENSION_FUNC(clGetKernelSubGroupInfoKHR)

source/adapters/opencl/kernel.cpp

+34-5
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
//
99
//===----------------------------------------------------------------------===//
1010
#include "common.hpp"
11+
#include "device.hpp"
1112

1213
#include <algorithm>
1314
#include <cstddef>
@@ -189,11 +190,39 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice,
189190
InputValueSize = MaxDims * sizeof(size_t);
190191
}
191192

192-
cl_int Ret = clGetKernelSubGroupInfo(cl_adapter::cast<cl_kernel>(hKernel),
193-
cl_adapter::cast<cl_device_id>(hDevice),
194-
mapURKernelSubGroupInfoToCL(propName),
195-
InputValueSize, InputValue.get(),
196-
sizeof(size_t), &RetVal, pPropSizeRet);
193+
// We need to allow for the possibility that this device runs an older CL and
194+
// supports the original khr subgroup extension.
195+
cl_ext::clGetKernelSubGroupInfoKHR_fn GetKernelSubGroupInfo = nullptr;
196+
197+
oclv::OpenCLVersion DevVer;
198+
CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion(
199+
cl_adapter::cast<cl_device_id>(hDevice), DevVer));
200+
201+
if (DevVer < oclv::V2_1) {
202+
bool SubgroupExtSupported = false;
203+
204+
UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions(
205+
cl_adapter::cast<cl_device_id>(hDevice), {"cl_khr_subgroups"},
206+
SubgroupExtSupported));
207+
if (!SubgroupExtSupported) {
208+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
209+
}
210+
cl_context Context = nullptr;
211+
CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast<cl_kernel>(hKernel),
212+
CL_KERNEL_CONTEXT, sizeof(Context),
213+
&Context, nullptr));
214+
UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext(
215+
Context, cl_ext::ExtFuncPtrCache->clGetKernelSubGroupInfoKHRCache,
216+
cl_ext::GetKernelSubGroupInfoName, &GetKernelSubGroupInfo));
217+
} else {
218+
GetKernelSubGroupInfo = clGetKernelSubGroupInfo;
219+
}
220+
221+
cl_int Ret = GetKernelSubGroupInfo(cl_adapter::cast<cl_kernel>(hKernel),
222+
cl_adapter::cast<cl_device_id>(hDevice),
223+
mapURKernelSubGroupInfoToCL(propName),
224+
InputValueSize, InputValue.get(),
225+
sizeof(size_t), &RetVal, pPropSizeRet);
197226

198227
if (Ret == CL_INVALID_OPERATION) {
199228
// clGetKernelSubGroupInfo returns CL_INVALID_OPERATION if the device does

source/adapters/opencl/program.cpp

+7-8
Original file line numberDiff line numberDiff line change
@@ -99,16 +99,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL(
9999
}
100100
}
101101

102-
using ApiFuncT =
103-
cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *);
104-
ApiFuncT FuncPtr =
105-
reinterpret_cast<ApiFuncT>(clGetExtensionFunctionAddressForPlatform(
106-
CurPlatform, "clCreateProgramWithILKHR"));
102+
cl_ext::clCreateProgramWithILKHR_fn CreateProgramWithIL = nullptr;
107103

108-
assert(FuncPtr != nullptr);
104+
UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext(
105+
cl_adapter::cast<cl_context>(hContext),
106+
cl_ext::ExtFuncPtrCache->clCreateProgramWithILKHRCache,
107+
cl_ext::CreateProgramWithILName, &CreateProgramWithIL));
109108

110-
*phProgram = cl_adapter::cast<ur_program_handle_t>(
111-
FuncPtr(cl_adapter::cast<cl_context>(hContext), pIL, length, &Err));
109+
*phProgram = cl_adapter::cast<ur_program_handle_t>(CreateProgramWithIL(
110+
cl_adapter::cast<cl_context>(hContext), pIL, length, &Err));
112111
}
113112

114113
// INVALID_VALUE is only returned in three circumstances according to the cl

0 commit comments

Comments
 (0)