Skip to content

Commit 5df2e7f

Browse files
aarongreigcallumfare
authored andcommitted
Use extension version of clGetKernelSubGroupInfo when necessary.
We do support at least one device (fpga emulator) that reports a cl version of < 2.1 and supports subgroups, in that scenario we need query out and use the KHR version of the entry point.
1 parent 76b5df4 commit 5df2e7f

File tree

2 files changed

+42
-6
lines changed

2 files changed

+42
-6
lines changed

source/adapters/opencl/device.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ ur_result_t cl_adapter::checkDeviceExtensions(
6161
// doesn't report them.
6262
if (isIntelFPGAEmuDevice(Dev) &&
6363
(Ext == "cl_intel_device_attribute_query" ||
64-
Ext == "cl_intel_required_subgroup_size")) {
64+
Ext == "cl_intel_required_subgroup_size" ||
65+
Ext == "cl_khr_subgroups")) {
6566
Supported = true;
6667
continue;
6768
}

source/adapters/opencl/kernel.cpp

+40-5
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
//
99
//===----------------------------------------------------------------------===//
1010
#include "common.hpp"
11+
#include "device.hpp"
1112

1213
#include <algorithm>
1314
#include <cstddef>
@@ -189,11 +190,45 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice,
189190
InputValueSize = MaxDims * sizeof(size_t);
190191
}
191192

192-
cl_int Ret = clGetKernelSubGroupInfo(cl_adapter::cast<cl_kernel>(hKernel),
193-
cl_adapter::cast<cl_device_id>(hDevice),
194-
mapURKernelSubGroupInfoToCL(propName),
195-
InputValueSize, InputValue.get(),
196-
sizeof(size_t), &RetVal, pPropSizeRet);
193+
// We need to allow for the possibility that this device runs an older CL and
194+
// supports the original khr subgroup extension.
195+
using ApiFuncT =
196+
cl_int(CL_API_CALL *)(cl_kernel, cl_device_id, cl_kernel_sub_group_info,
197+
size_t, const void *, size_t, void *, size_t *);
198+
ApiFuncT GetKernelSubGroupInfo = nullptr;
199+
200+
oclv::OpenCLVersion DevVer;
201+
CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion(
202+
cl_adapter::cast<cl_device_id>(hDevice), DevVer));
203+
204+
if (DevVer < oclv::V2_1) {
205+
bool SubgroupExtSupported = false;
206+
207+
UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions(
208+
cl_adapter::cast<cl_device_id>(hDevice), {"cl_khr_subgroups"},
209+
SubgroupExtSupported));
210+
if (!SubgroupExtSupported) {
211+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
212+
}
213+
cl_platform_id Platform;
214+
CL_RETURN_ON_FAILURE(clGetDeviceInfo(
215+
cl_adapter::cast<cl_device_id>(hDevice), CL_DEVICE_PLATFORM,
216+
sizeof(Platform), &Platform, nullptr));
217+
GetKernelSubGroupInfo =
218+
reinterpret_cast<ApiFuncT>(clGetExtensionFunctionAddressForPlatform(
219+
Platform, "clGetKernelSubGroupInfoKHR"));
220+
if (!GetKernelSubGroupInfo) {
221+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
222+
}
223+
} else {
224+
GetKernelSubGroupInfo = clGetKernelSubGroupInfo;
225+
}
226+
227+
cl_int Ret = GetKernelSubGroupInfo(cl_adapter::cast<cl_kernel>(hKernel),
228+
cl_adapter::cast<cl_device_id>(hDevice),
229+
mapURKernelSubGroupInfoToCL(propName),
230+
InputValueSize, InputValue.get(),
231+
sizeof(size_t), &RetVal, pPropSizeRet);
197232

198233
if (Ret == CL_INVALID_OPERATION) {
199234
// clGetKernelSubGroupInfo returns CL_INVALID_OPERATION if the device does

0 commit comments

Comments
 (0)