Skip to content

Commit 2cd1cc3

Browse files
committed
[EXP][Command-buffer] OpenCL kernel command update
Implement the API for updating the kernel commands in a command-buffer defined by oneapi-src#1089 for the OpenCL adapter. This depends on support for the [cl_khr_command_buffer_mutable_dispatch](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer_mutable_dispatch) extension. Tested on Intel GPU OpenCL implementations with the [command-buffer emulation layer](https://github.com/bashbaug/SimpleOpenCLSamples/tree/main/layers/10_cmdbufemu). ```bash $ OPENCL_LAYERS=<path/to/SimpleOpenCLSamples/build/layers/10_cmdbufemu/libCmdBufEmu.so> ./bin/test-exp_command_buffer --platform="Intel(R) OpenCL Graphics" ```
1 parent f15234d commit 2cd1cc3

11 files changed

+478
-82
lines changed

source/adapters/opencl/command_buffer.cpp

Lines changed: 279 additions & 68 deletions
Large diffs are not rendered by default.

source/adapters/opencl/command_buffer.hpp

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,93 @@
1111
#include <CL/cl_ext.h>
1212
#include <ur/ur.hpp>
1313

14+
/// Handle to a kernel command.
15+
struct ur_exp_command_buffer_command_handle_t_ {
16+
/// Command-buffer this command belongs to.
17+
ur_exp_command_buffer_handle_t hCommandBuffer;
18+
/// OpenCL command-handle.
19+
cl_mutable_command_khr CLMutableCommand;
20+
/// Work-dimension the command was originally created with.
21+
cl_uint WorkDim;
22+
/// Internal & External reference counts.
23+
/// We need to maintain these because in OpenCL a command-handle isn't
24+
/// reference counting, but is tied to the lifetime of the parent
25+
/// command-buffer. This is not the case in UR where a command-handle is
26+
/// reference counted.
27+
std::atomic_uint32_t RefCountInternal;
28+
std::atomic_uint32_t RefCountExternal;
29+
30+
ur_exp_command_buffer_command_handle_t_(
31+
ur_exp_command_buffer_handle_t hCommandBuffer,
32+
cl_mutable_command_khr CLMutableCommand, cl_uint WorkDim)
33+
: hCommandBuffer(hCommandBuffer), CLMutableCommand(CLMutableCommand),
34+
WorkDim(WorkDim), RefCountInternal(0), RefCountExternal(0) {}
35+
36+
uint32_t incrementInternalReferenceCount() noexcept {
37+
return ++RefCountInternal;
38+
}
39+
uint32_t decrementInternalReferenceCount() noexcept {
40+
return --RefCountInternal;
41+
}
42+
43+
uint32_t incrementExternalReferenceCount() noexcept {
44+
return ++RefCountExternal;
45+
}
46+
uint32_t decrementExternalReferenceCount() noexcept {
47+
return --RefCountExternal;
48+
}
49+
uint32_t getExternalReferenceCount() const noexcept {
50+
return RefCountExternal;
51+
}
52+
};
53+
54+
/// Handle to a command-buffer object.
1455
struct ur_exp_command_buffer_handle_t_ {
56+
/// UR queue belonging to the command-buffer, required for OpenCL creation.
1557
ur_queue_handle_t hInternalQueue;
58+
/// Context the command-buffer is created for.
1659
ur_context_handle_t hContext;
60+
/// OpenCL command-buffer object.
1761
cl_command_buffer_khr CLCommandBuffer;
62+
/// Set to true if the kernel commands in the command-buffer can be updated,
63+
/// false otherwise
64+
bool IsUpdatable;
65+
/// Set to true if the command-buffer has been finalized, false otherwise
66+
bool IsFinalized;
67+
/// List of commands in the command-buffer.
68+
std::vector<ur_exp_command_buffer_command_handle_t> CommandHandles;
69+
/// Internal & External reference counts of the command-buffer. We do this
70+
/// manually rather than forward to the OpenCL retain/release APIs because
71+
/// we also need to track the lifetimes of command handle objects, which
72+
/// extended the lifetime of a UR command-buffer even if its reference
73+
/// count is zero.
74+
std::atomic_uint32_t RefCountInternal;
75+
std::atomic_uint32_t RefCountExternal;
1876

1977
ur_exp_command_buffer_handle_t_(ur_queue_handle_t hQueue,
2078
ur_context_handle_t hContext,
21-
cl_command_buffer_khr CLCommandBuffer)
79+
cl_command_buffer_khr CLCommandBuffer,
80+
bool IsUpdatable)
2281
: hInternalQueue(hQueue), hContext(hContext),
23-
CLCommandBuffer(CLCommandBuffer) {}
82+
CLCommandBuffer(CLCommandBuffer), IsUpdatable(IsUpdatable),
83+
IsFinalized(false), RefCountInternal(0), RefCountExternal(0) {}
84+
85+
~ur_exp_command_buffer_handle_t_();
86+
87+
uint32_t incrementInternalReferenceCount() noexcept {
88+
return ++RefCountInternal;
89+
}
90+
uint32_t decrementInternalReferenceCount() noexcept {
91+
return --RefCountInternal;
92+
}
93+
94+
uint32_t incrementExternalReferenceCount() noexcept {
95+
return ++RefCountExternal;
96+
}
97+
uint32_t decrementExternalReferenceCount() noexcept {
98+
return --RefCountExternal;
99+
}
100+
uint32_t getExternalReferenceCount() const noexcept {
101+
return RefCountExternal;
102+
}
24103
};

source/adapters/opencl/common.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,35 @@ ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle) {
101101
*NativeHandle = reinterpret_cast<ur_native_handle_t>(URObj);
102102
return UR_RESULT_SUCCESS;
103103
}
104+
105+
cl_int deviceSupportsURCommandBufferKernelUpdate(cl_device_id Dev,
106+
bool &Result) {
107+
size_t ExtSize = 0;
108+
CL_RETURN_ON_FAILURE(
109+
clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize));
110+
111+
std::string ExtStr(ExtSize, '\0');
112+
CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, ExtSize,
113+
ExtStr.data(), nullptr));
114+
115+
std::string SupportedExtensions(ExtStr.c_str());
116+
if (ExtStr.find("cl_khr_command_buffer_mutable_dispatch") ==
117+
std::string::npos) {
118+
Result = false;
119+
return CL_SUCCESS;
120+
}
121+
122+
// All the CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR capabilities must
123+
// be supported by a device for UR update.
124+
cl_mutable_dispatch_fields_khr mutable_capabilities;
125+
CL_RETURN_ON_FAILURE(clGetDeviceInfo(
126+
Dev, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
127+
sizeof(mutable_capabilities), &mutable_capabilities, nullptr));
128+
const cl_mutable_dispatch_fields_khr required_caps =
129+
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR |
130+
CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR |
131+
CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR | CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR |
132+
CL_MUTABLE_DISPATCH_EXEC_INFO_KHR;
133+
Result = (mutable_capabilities & required_caps) == required_caps;
134+
return CL_SUCCESS;
135+
}

source/adapters/opencl/common.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ CONSTFIX char CommandCopyBufferRectName[] = "clCommandCopyBufferRectKHR";
215215
CONSTFIX char CommandFillBufferName[] = "clCommandFillBufferKHR";
216216
CONSTFIX char EnqueueCommandBufferName[] = "clEnqueueCommandBufferKHR";
217217
CONSTFIX char GetCommandBufferInfoName[] = "clGetCommandBufferInfoKHR";
218+
CONSTFIX char UpdateMutableCommandsName[] = "clUpdateMutableCommandsKHR";
218219

219220
#undef CONSTFIX
220221

@@ -305,6 +306,10 @@ using clGetCommandBufferInfoKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)(
305306
cl_command_buffer_khr command_buffer, cl_command_buffer_info_khr param_name,
306307
size_t param_value_size, void *param_value, size_t *param_value_size_ret);
307308

309+
using clUpdateMutableCommandsKHR_fn = CL_API_ENTRY
310+
cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer,
311+
const cl_mutable_base_config_khr *mutable_config);
312+
308313
template <typename T> struct FuncPtrCache {
309314
std::map<cl_context, T> Map;
310315
std::mutex Mutex;
@@ -344,6 +349,7 @@ struct ExtFuncPtrCacheT {
344349
FuncPtrCache<clCommandFillBufferKHR_fn> clCommandFillBufferKHRCache;
345350
FuncPtrCache<clEnqueueCommandBufferKHR_fn> clEnqueueCommandBufferKHRCache;
346351
FuncPtrCache<clGetCommandBufferInfoKHR_fn> clGetCommandBufferInfoKHRCache;
352+
FuncPtrCache<clUpdateMutableCommandsKHR_fn> clUpdateMutableCommandsKHRCache;
347353
};
348354
// A raw pointer is used here since the lifetime of this map has to be tied to
349355
// piTeardown to avoid issues with static destruction order (a user application
@@ -414,3 +420,6 @@ static ur_result_t getExtFuncFromContext(cl_context Context,
414420
ur_result_t mapCLErrorToUR(cl_int Result);
415421

416422
ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle);
423+
424+
cl_int deviceSupportsURCommandBufferKernelUpdate(cl_device_id Dev,
425+
bool &Result);

source/adapters/opencl/device.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -967,7 +967,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
967967
std::string::npos);
968968
}
969969
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: {
970-
return ReturnValue(false);
970+
cl_device_id Dev = cl_adapter::cast<cl_device_id>(hDevice);
971+
bool Supported = false;
972+
CL_RETURN_ON_FAILURE(
973+
deviceSupportsURCommandBufferKernelUpdate(Dev, Supported));
974+
return ReturnValue(Supported);
971975
}
972976
default: {
973977
return UR_RESULT_ERROR_INVALID_ENUMERATION;

test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,30 @@ struct BufferFillCommandTest
1414
UUR_RETURN_ON_FATAL_FAILURE(
1515
urUpdatableCommandBufferExpExecutionTest::SetUp());
1616

17-
// First argument is buffer to fill (will also be hidden accessor arg)
18-
AddBuffer1DArg(sizeof(val) * global_size, &buffer);
17+
ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE,
18+
sizeof(val) * global_size, nullptr,
19+
&buffer));
20+
21+
// TODO - Enable single code path after https://github.com/oneapi-src/unified-runtime/pull/1176
22+
// is merged
23+
if (backend != UR_PLATFORM_BACKEND_OPENCL) {
24+
// First argument is buffer to fill
25+
ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 0, nullptr, buffer));
26+
} else {
27+
// First argument is buffer to fill
28+
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 0, sizeof(buffer),
29+
nullptr, &buffer));
30+
}
31+
// second arg is hidden accessor
32+
struct {
33+
size_t offsets[1] = {0};
34+
} accessor;
35+
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(accessor), nullptr,
36+
&accessor));
37+
1938
// Second argument is scalar to fill with.
20-
AddPodArg(val);
39+
ASSERT_SUCCESS(
40+
urKernelSetArgValue(kernel, 2, sizeof(val), nullptr, &val));
2141

2242
// Append kernel command to command-buffer and close command-buffer
2343
ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp(

test/conformance/exp_command_buffer/buffer_saxpy_kernel_update.cpp

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,17 @@ struct BufferSaxpyKernelTest
2929
0, nullptr, nullptr));
3030
}
3131

32-
// Index 0 is output buffer
33-
ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 0, nullptr, buffers[0]));
32+
// TODO: Enable single code path once https://github.com/oneapi-src/unified-runtime/pull/1176
33+
// is merged
34+
if (backend != UR_PLATFORM_BACKEND_OPENCL) {
35+
// Index 0 is output buffer
36+
ASSERT_SUCCESS(
37+
urKernelSetArgMemObj(kernel, 0, nullptr, buffers[0]));
38+
} else {
39+
// Index 0 is output buffer
40+
ASSERT_SUCCESS(urKernelSetArgValue(
41+
kernel, 0, sizeof(ur_mem_handle_t), nullptr, &buffers[0]));
42+
}
3443
// Index 1 is output accessor
3544
struct {
3645
size_t offsets[1] = {0};
@@ -41,13 +50,25 @@ struct BufferSaxpyKernelTest
4150
// Index 2 is A
4251
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 2, sizeof(A), nullptr, &A));
4352
// Index 3 is X buffer
44-
ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 3, nullptr, buffers[1]));
53+
if (backend != UR_PLATFORM_BACKEND_OPENCL) {
54+
ASSERT_SUCCESS(
55+
urKernelSetArgMemObj(kernel, 3, nullptr, buffers[1]));
56+
} else {
57+
ASSERT_SUCCESS(urKernelSetArgValue(
58+
kernel, 3, sizeof(ur_mem_handle_t), nullptr, &buffers[1]));
59+
}
4560

4661
// Index 4 is X buffer accessor
4762
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 4, sizeof(accessor), nullptr,
4863
&accessor));
4964
// Index 5 is Y buffer
50-
ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, 5, nullptr, buffers[2]));
65+
if (backend != UR_PLATFORM_BACKEND_OPENCL) {
66+
ASSERT_SUCCESS(
67+
urKernelSetArgMemObj(kernel, 5, nullptr, buffers[2]));
68+
} else {
69+
ASSERT_SUCCESS(urKernelSetArgValue(
70+
kernel, 5, sizeof(ur_mem_handle_t), nullptr, &buffers[2]));
71+
}
5172

5273
// Index 6 is Y buffer accessor
5374
ASSERT_SUCCESS(urKernelSetArgValue(kernel, 6, sizeof(accessor), nullptr,

test/conformance/exp_command_buffer/fixtures.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ struct urCommandBufferExpExecutionTest : uur::urKernelExecutionTest {
5959
void SetUp() override {
6060
UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp());
6161

62+
ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND,
63+
sizeof(backend), &backend, nullptr));
64+
6265
size_t returned_size;
6366
ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_EXTENSIONS, 0,
6467
nullptr, &returned_size));
@@ -97,6 +100,7 @@ struct urCommandBufferExpExecutionTest : uur::urKernelExecutionTest {
97100

98101
ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr;
99102
ur_bool_t updatable_command_buffer_support = false;
103+
ur_platform_backend_t backend{};
100104
};
101105

102106
struct urUpdatableCommandBufferExpExecutionTest

test/conformance/exp_command_buffer/invalid_update.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ struct InvalidUpdateTest
4141
}
4242

4343
void TearDown() override {
44+
// Workaround an issue with the OpenCL adapter implementing urUsmFree
45+
// using a blocking free where hangs
46+
EXPECT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle));
47+
4448
if (shared_ptr) {
4549
EXPECT_SUCCESS(urUSMFree(context, shared_ptr));
4650
}

test/conformance/exp_command_buffer/ndrange_update.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,12 @@ TEST_P(NDRangeUpdateTest, Update3D) {
155155
// Update the kernel work dimensions to 2, and update global size, local size,
156156
// and global offset to new values.
157157
TEST_P(NDRangeUpdateTest, Update2D) {
158+
if (backend == UR_PLATFORM_BACKEND_OPENCL) {
159+
// OpenCL cl_khr_command_buffer_mutable_dispatch does not support
160+
// updating the work dimension.
161+
GTEST_SKIP();
162+
}
163+
158164
// Run command-buffer prior to update an verify output
159165
ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0,
160166
nullptr, nullptr));
@@ -205,6 +211,12 @@ TEST_P(NDRangeUpdateTest, Update2D) {
205211
// Update the kernel work dimensions to 1, and check that previously
206212
// set global size, local size, and global offset update accordingly.
207213
TEST_P(NDRangeUpdateTest, Update1D) {
214+
if (backend == UR_PLATFORM_BACKEND_OPENCL) {
215+
// OpenCL cl_khr_command_buffer_mutable_dispatch does not support
216+
// updating the work dimension.
217+
GTEST_SKIP();
218+
}
219+
208220
// Run command-buffer prior to update an verify output
209221
ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0,
210222
nullptr, nullptr));

test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ TEST_P(USMFillCommandTest, UpdateExecInfo) {
154154
ur_exp_command_buffer_update_exec_info_desc_t new_exec_info_descs[3];
155155

156156
// Update direct access flag
157-
bool indirect_access = false;
157+
bool indirect_access = true;
158158
new_exec_info_descs[0] = {
159159
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype
160160
nullptr, // pNext
@@ -179,14 +179,14 @@ TEST_P(USMFillCommandTest, UpdateExecInfo) {
179179
ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr,
180180
allocation_size, &new_shared_ptr));
181181
ASSERT_NE(new_shared_ptr, nullptr);
182-
void *pointers = {new_shared_ptr};
182+
void *pointers[1] = {new_shared_ptr};
183183
new_exec_info_descs[2] = {
184184
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_EXEC_INFO_DESC, // stype
185185
nullptr, // pNext
186186
UR_KERNEL_EXEC_INFO_USM_PTRS, // propName
187187
sizeof(pointers), // propSize
188-
nullptr, // pProperties
189-
&pointers, // pPropValue
188+
nullptr, // pProperties
189+
pointers, // pPropValue
190190
};
191191

192192
ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = {

0 commit comments

Comments
 (0)