Skip to content

Commit 60bc9b4

Browse files
committed
[SYCL][Graph][UR] In-order path for OpenCL command-buffers
After the [spec bump of cl_khr_command_buffer to 0.9.7](https://github.com/KhronosGroup/OpenCL-Docs/), in the OpenCL adapter we no longer need to worry about the in-order/out-of-order property of the internal queue used on command-command-buffer creation matching the queue used to enqueue the command-buffer. We can therefore take advantage of the in-order flag passed on UR command-buffer creation to use an in-order queue for command-buffer creation, and omit using sync points.
1 parent 0f7fca9 commit 60bc9b4

File tree

2 files changed

+42
-11
lines changed

2 files changed

+42
-11
lines changed

unified-runtime/source/adapters/opencl/command_buffer.cpp

+38-9
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
3434
ur_exp_command_buffer_handle_t *phCommandBuffer) {
3535

3636
ur_queue_handle_t Queue = nullptr;
37-
UR_RETURN_ON_FAILURE(urQueueCreate(hContext, hDevice, nullptr, &Queue));
37+
ur_queue_properties_t QueueProperties = {UR_STRUCTURE_TYPE_QUEUE_PROPERTIES,
38+
nullptr, 0};
39+
const bool IsInOrder =
40+
pCommandBufferDesc ? pCommandBufferDesc->isInOrder : false;
41+
if (!IsInOrder) {
42+
QueueProperties.flags = UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
43+
}
44+
UR_RETURN_ON_FAILURE(
45+
urQueueCreate(hContext, hDevice, &QueueProperties, &Queue));
3846

3947
cl_context CLContext = cl_adapter::cast<cl_context>(hContext);
4048
cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr;
@@ -67,7 +75,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
6775

6876
try {
6977
auto URCommandBuffer = std::make_unique<ur_exp_command_buffer_handle_t_>(
70-
Queue, hContext, hDevice, CLCommandBuffer, IsUpdatable);
78+
Queue, hContext, hDevice, CLCommandBuffer, IsUpdatable, IsInOrder);
7179
*phCommandBuffer = URCommandBuffer.release();
7280
} catch (...) {
7381
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
@@ -148,11 +156,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
148156

149157
cl_command_properties_khr *Properties =
150158
hCommandBuffer->IsUpdatable ? UpdateProperties : nullptr;
159+
160+
const bool IsInOrder = hCommandBuffer->IsInOrder;
161+
cl_sync_point_khr *RetSyncPoint = IsInOrder ? nullptr : pSyncPoint;
162+
const cl_sync_point_khr *SyncPointWaitList =
163+
IsInOrder ? nullptr : pSyncPointWaitList;
164+
uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
151165
CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR(
152166
hCommandBuffer->CLCommandBuffer, nullptr, Properties,
153167
cl_adapter::cast<cl_kernel>(hKernel), workDim, pGlobalWorkOffset,
154-
pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList,
155-
pSyncPointWaitList, pSyncPoint, OutCommandHandle));
168+
pGlobalWorkSize, pLocalWorkSize, WaitListSize, SyncPointWaitList,
169+
RetSyncPoint, OutCommandHandle));
156170

157171
try {
158172
auto Handle = std::make_unique<ur_exp_command_buffer_command_handle_t_>(
@@ -219,11 +233,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
219233
CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferKHRCache,
220234
cl_ext::CommandCopyBufferName, &clCommandCopyBufferKHR));
221235

236+
const bool IsInOrder = hCommandBuffer->IsInOrder;
237+
cl_sync_point_khr *RetSyncPoint = IsInOrder ? nullptr : pSyncPoint;
238+
const cl_sync_point_khr *SyncPointWaitList =
239+
IsInOrder ? nullptr : pSyncPointWaitList;
240+
uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
222241
CL_RETURN_ON_FAILURE(clCommandCopyBufferKHR(
223242
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
224243
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
225-
srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList,
226-
pSyncPoint, nullptr));
244+
srcOffset, dstOffset, size, WaitListSize, SyncPointWaitList, RetSyncPoint,
245+
nullptr));
227246

228247
return UR_RESULT_SUCCESS;
229248
}
@@ -257,12 +276,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
257276
CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferRectKHRCache,
258277
cl_ext::CommandCopyBufferRectName, &clCommandCopyBufferRectKHR));
259278

279+
const bool IsInOrder = hCommandBuffer->IsInOrder;
280+
cl_sync_point_khr *RetSyncPoint = IsInOrder ? nullptr : pSyncPoint;
281+
const cl_sync_point_khr *SyncPointWaitList =
282+
IsInOrder ? nullptr : pSyncPointWaitList;
283+
uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
260284
CL_RETURN_ON_FAILURE(clCommandCopyBufferRectKHR(
261285
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
262286
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
263287
OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, srcRowPitch, srcSlicePitch,
264-
dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList,
265-
pSyncPoint, nullptr));
288+
dstRowPitch, dstSlicePitch, WaitListSize, SyncPointWaitList, RetSyncPoint,
289+
nullptr));
266290

267291
return UR_RESULT_SUCCESS;
268292
}
@@ -361,10 +385,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
361385
CLContext, cl_ext::ExtFuncPtrCache->clCommandFillBufferKHRCache,
362386
cl_ext::CommandFillBufferName, &clCommandFillBufferKHR));
363387

388+
const bool IsInOrder = hCommandBuffer->IsInOrder;
389+
cl_sync_point_khr *RetSyncPoint = IsInOrder ? nullptr : pSyncPoint;
390+
const cl_sync_point_khr *SyncPointWaitList =
391+
IsInOrder ? nullptr : pSyncPointWaitList;
392+
uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
364393
CL_RETURN_ON_FAILURE(clCommandFillBufferKHR(
365394
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
366395
cl_adapter::cast<cl_mem>(hBuffer), pPattern, patternSize, offset, size,
367-
numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr));
396+
WaitListSize, SyncPointWaitList, RetSyncPoint, nullptr));
368397

369398
return UR_RESULT_SUCCESS;
370399
}

unified-runtime/source/adapters/opencl/command_buffer.hpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ struct ur_exp_command_buffer_handle_t_ {
4646
/// Set to true if the kernel commands in the command-buffer can be updated,
4747
/// false otherwise
4848
bool IsUpdatable;
49+
/// Set to true if the command-buffer was created with an in-order queue.
50+
bool IsInOrder;
4951
/// Set to true if the command-buffer has been finalized, false otherwise
5052
bool IsFinalized;
5153
/// List of commands in the command-buffer.
@@ -58,10 +60,10 @@ struct ur_exp_command_buffer_handle_t_ {
5860
ur_context_handle_t hContext,
5961
ur_device_handle_t hDevice,
6062
cl_command_buffer_khr CLCommandBuffer,
61-
bool IsUpdatable)
63+
bool IsUpdatable, bool IsInOrder)
6264
: hInternalQueue(hQueue), hContext(hContext), hDevice(hDevice),
6365
CLCommandBuffer(CLCommandBuffer), IsUpdatable(IsUpdatable),
64-
IsFinalized(false), RefCount(0) {}
66+
IsInOrder(IsInOrder), IsFinalized(false), RefCount(0) {}
6567

6668
~ur_exp_command_buffer_handle_t_();
6769

0 commit comments

Comments
 (0)