Skip to content

Commit a74d610

Browse files
committed
In-order path for OpenCL command-buffers
After the [spec bump of cl_khr_command_buffer to 0.9.7](https://github.com/KhronosGroup/OpenCL-Docs/), in the OpenCL adapter we no longer need to worry about the in-order/out-of-order property of the internal queue used on command-command-buffer creation matching the queue used to enqueue the command-buffer. We can therefore take advantage of the in-order flag passed on UR command-buffer creation to use an in-order queue for command-buffer creation, and omit using sync points.
1 parent e55fe50 commit a74d610

File tree

2 files changed

+48
-11
lines changed

2 files changed

+48
-11
lines changed

source/adapters/opencl/command_buffer.cpp

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
3434
ur_exp_command_buffer_handle_t *phCommandBuffer) {
3535

3636
ur_queue_handle_t Queue = nullptr;
37-
UR_RETURN_ON_FAILURE(urQueueCreate(hContext, hDevice, nullptr, &Queue));
37+
ur_queue_properties_t QueueProperties = {UR_STRUCTURE_TYPE_QUEUE_PROPERTIES,
38+
nullptr, 0};
39+
40+
const bool IsInOrder =
41+
pCommandBufferDesc ? pCommandBufferDesc->isInOrder : false;
42+
if (IsInOrder) {
43+
QueueProperties.flags = UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
44+
}
45+
46+
UR_RETURN_ON_FAILURE(
47+
urQueueCreate(hContext, hDevice, &QueueProperties, &Queue));
3848

3949
cl_context CLContext = cl_adapter::cast<cl_context>(hContext);
4050
cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr;
@@ -67,7 +77,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
6777

6878
try {
6979
auto URCommandBuffer = std::make_unique<ur_exp_command_buffer_handle_t_>(
70-
Queue, hContext, hDevice, CLCommandBuffer, IsUpdatable);
80+
Queue, hContext, hDevice, CLCommandBuffer, IsUpdatable, IsInOrder);
7181
*phCommandBuffer = URCommandBuffer.release();
7282
} catch (...) {
7383
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
@@ -148,11 +158,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
148158

149159
cl_command_properties_khr *Properties =
150160
hCommandBuffer->IsUpdatable ? UpdateProperties : nullptr;
161+
162+
cl_sync_point_khr *RetSyncPoint =
163+
hCommandBuffer->IsInOrder ? nullptr : pSyncPoint;
164+
const cl_sync_point_khr *SyncPointWaitList =
165+
hCommandBuffer->IsInOrder ? nullptr : pSyncPointWaitList;
166+
uint32_t WaitListSize =
167+
hCommandBuffer->IsInOrder ? 0 : numSyncPointsInWaitList;
151168
CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR(
152169
hCommandBuffer->CLCommandBuffer, nullptr, Properties,
153170
cl_adapter::cast<cl_kernel>(hKernel), workDim, pGlobalWorkOffset,
154-
pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList,
155-
pSyncPointWaitList, pSyncPoint, OutCommandHandle));
171+
pGlobalWorkSize, pLocalWorkSize, WaitListSize, SyncPointWaitList,
172+
RetSyncPoint, OutCommandHandle));
156173

157174
try {
158175
auto Handle = std::make_unique<ur_exp_command_buffer_command_handle_t_>(
@@ -219,11 +236,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
219236
CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferKHRCache,
220237
cl_ext::CommandCopyBufferName, &clCommandCopyBufferKHR));
221238

239+
cl_sync_point_khr *RetSyncPoint =
240+
hCommandBuffer->IsInOrder ? nullptr : pSyncPoint;
241+
const cl_sync_point_khr *SyncPointWaitList =
242+
hCommandBuffer->IsInOrder ? nullptr : pSyncPointWaitList;
243+
uint32_t WaitListSize =
244+
hCommandBuffer->IsInOrder ? 0 : numSyncPointsInWaitList;
222245
CL_RETURN_ON_FAILURE(clCommandCopyBufferKHR(
223246
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
224247
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
225-
srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList,
226-
pSyncPoint, nullptr));
248+
srcOffset, dstOffset, size, WaitListSize, SyncPointWaitList, RetSyncPoint,
249+
nullptr));
227250

228251
return UR_RESULT_SUCCESS;
229252
}
@@ -257,12 +280,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
257280
CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferRectKHRCache,
258281
cl_ext::CommandCopyBufferRectName, &clCommandCopyBufferRectKHR));
259282

283+
cl_sync_point_khr *RetSyncPoint =
284+
hCommandBuffer->IsInOrder ? nullptr : pSyncPoint;
285+
const cl_sync_point_khr *SyncPointWaitList =
286+
hCommandBuffer->IsInOrder ? nullptr : pSyncPointWaitList;
287+
uint32_t WaitListSize =
288+
hCommandBuffer->IsInOrder ? 0 : numSyncPointsInWaitList;
260289
CL_RETURN_ON_FAILURE(clCommandCopyBufferRectKHR(
261290
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
262291
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
263292
OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, srcRowPitch, srcSlicePitch,
264-
dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList,
265-
pSyncPoint, nullptr));
293+
dstRowPitch, dstSlicePitch, WaitListSize, SyncPointWaitList, RetSyncPoint,
294+
nullptr));
266295

267296
return UR_RESULT_SUCCESS;
268297
}
@@ -361,10 +390,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
361390
CLContext, cl_ext::ExtFuncPtrCache->clCommandFillBufferKHRCache,
362391
cl_ext::CommandFillBufferName, &clCommandFillBufferKHR));
363392

393+
cl_sync_point_khr *RetSyncPoint =
394+
hCommandBuffer->IsInOrder ? nullptr : pSyncPoint;
395+
const cl_sync_point_khr *SyncPointWaitList =
396+
hCommandBuffer->IsInOrder ? nullptr : pSyncPointWaitList;
397+
uint32_t WaitListSize =
398+
hCommandBuffer->IsInOrder ? 0 : numSyncPointsInWaitList;
364399
CL_RETURN_ON_FAILURE(clCommandFillBufferKHR(
365400
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
366401
cl_adapter::cast<cl_mem>(hBuffer), pPattern, patternSize, offset, size,
367-
numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr));
402+
WaitListSize, SyncPointWaitList, RetSyncPoint, nullptr));
368403

369404
return UR_RESULT_SUCCESS;
370405
}

source/adapters/opencl/command_buffer.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ struct ur_exp_command_buffer_handle_t_ {
4646
/// Set to true if the kernel commands in the command-buffer can be updated,
4747
/// false otherwise
4848
bool IsUpdatable;
49+
/// Set to true if the command-buffer was created from an in-order queue.
50+
bool IsInOrder;
4951
/// Set to true if the command-buffer has been finalized, false otherwise
5052
bool IsFinalized;
5153
/// List of commands in the command-buffer.
@@ -58,10 +60,10 @@ struct ur_exp_command_buffer_handle_t_ {
5860
ur_context_handle_t hContext,
5961
ur_device_handle_t hDevice,
6062
cl_command_buffer_khr CLCommandBuffer,
61-
bool IsUpdatable)
63+
bool IsUpdatable, bool IsInOrder)
6264
: hInternalQueue(hQueue), hContext(hContext), hDevice(hDevice),
6365
CLCommandBuffer(CLCommandBuffer), IsUpdatable(IsUpdatable),
64-
IsFinalized(false), RefCount(0) {}
66+
IsInOrder(IsInOrder), IsFinalized(false), RefCount(0) {}
6567

6668
~ur_exp_command_buffer_handle_t_();
6769

0 commit comments

Comments
 (0)