@@ -34,7 +34,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
34
34
ur_exp_command_buffer_handle_t *phCommandBuffer) {
35
35
36
36
ur_queue_handle_t Queue = nullptr ;
37
- UR_RETURN_ON_FAILURE (urQueueCreate (hContext, hDevice, nullptr , &Queue));
37
+ ur_queue_properties_t QueueProperties = {UR_STRUCTURE_TYPE_QUEUE_PROPERTIES,
38
+ nullptr , 0 };
39
+ const bool IsInOrder =
40
+ pCommandBufferDesc ? pCommandBufferDesc->isInOrder : false ;
41
+ if (!IsInOrder) {
42
+ QueueProperties.flags = UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
43
+ }
44
+ UR_RETURN_ON_FAILURE (
45
+ urQueueCreate (hContext, hDevice, &QueueProperties, &Queue));
38
46
39
47
cl_context CLContext = cl_adapter::cast<cl_context>(hContext);
40
48
cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr ;
@@ -67,7 +75,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
67
75
68
76
try {
69
77
auto URCommandBuffer = std::make_unique<ur_exp_command_buffer_handle_t_>(
70
- Queue, hContext, hDevice, CLCommandBuffer, IsUpdatable);
78
+ Queue, hContext, hDevice, CLCommandBuffer, IsUpdatable, IsInOrder );
71
79
*phCommandBuffer = URCommandBuffer.release ();
72
80
} catch (...) {
73
81
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
@@ -148,11 +156,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
148
156
149
157
cl_command_properties_khr *Properties =
150
158
hCommandBuffer->IsUpdatable ? UpdateProperties : nullptr ;
159
+
160
+ const bool IsInOrder = hCommandBuffer->IsInOrder ;
161
+ cl_sync_point_khr *RetSyncPoint = IsInOrder ? nullptr : pSyncPoint;
162
+ const cl_sync_point_khr *SyncPointWaitList =
163
+ IsInOrder ? nullptr : pSyncPointWaitList;
164
+ uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
151
165
CL_RETURN_ON_FAILURE (clCommandNDRangeKernelKHR (
152
166
hCommandBuffer->CLCommandBuffer , nullptr , Properties,
153
167
cl_adapter::cast<cl_kernel>(hKernel), workDim, pGlobalWorkOffset,
154
- pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList ,
155
- pSyncPointWaitList, pSyncPoint , OutCommandHandle));
168
+ pGlobalWorkSize, pLocalWorkSize, WaitListSize, SyncPointWaitList ,
169
+ RetSyncPoint , OutCommandHandle));
156
170
157
171
try {
158
172
auto Handle = std::make_unique<ur_exp_command_buffer_command_handle_t_>(
@@ -219,11 +233,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
219
233
CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferKHRCache ,
220
234
cl_ext::CommandCopyBufferName, &clCommandCopyBufferKHR));
221
235
236
+ const bool IsInOrder = hCommandBuffer->IsInOrder ;
237
+ cl_sync_point_khr *RetSyncPoint = IsInOrder ? nullptr : pSyncPoint;
238
+ const cl_sync_point_khr *SyncPointWaitList =
239
+ IsInOrder ? nullptr : pSyncPointWaitList;
240
+ uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
222
241
CL_RETURN_ON_FAILURE (clCommandCopyBufferKHR (
223
242
hCommandBuffer->CLCommandBuffer , nullptr , nullptr ,
224
243
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
225
- srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList ,
226
- pSyncPoint, nullptr ));
244
+ srcOffset, dstOffset, size, WaitListSize, SyncPointWaitList, RetSyncPoint ,
245
+ nullptr ));
227
246
228
247
return UR_RESULT_SUCCESS;
229
248
}
@@ -257,12 +276,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
257
276
CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferRectKHRCache ,
258
277
cl_ext::CommandCopyBufferRectName, &clCommandCopyBufferRectKHR));
259
278
279
+ const bool IsInOrder = hCommandBuffer->IsInOrder ;
280
+ cl_sync_point_khr *RetSyncPoint = IsInOrder ? nullptr : pSyncPoint;
281
+ const cl_sync_point_khr *SyncPointWaitList =
282
+ IsInOrder ? nullptr : pSyncPointWaitList;
283
+ uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
260
284
CL_RETURN_ON_FAILURE (clCommandCopyBufferRectKHR (
261
285
hCommandBuffer->CLCommandBuffer , nullptr , nullptr ,
262
286
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
263
287
OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, srcRowPitch, srcSlicePitch,
264
- dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList ,
265
- pSyncPoint, nullptr ));
288
+ dstRowPitch, dstSlicePitch, WaitListSize, SyncPointWaitList, RetSyncPoint ,
289
+ nullptr ));
266
290
267
291
return UR_RESULT_SUCCESS;
268
292
}
@@ -361,10 +385,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
361
385
CLContext, cl_ext::ExtFuncPtrCache->clCommandFillBufferKHRCache ,
362
386
cl_ext::CommandFillBufferName, &clCommandFillBufferKHR));
363
387
388
+ const bool IsInOrder = hCommandBuffer->IsInOrder ;
389
+ cl_sync_point_khr *RetSyncPoint = IsInOrder ? nullptr : pSyncPoint;
390
+ const cl_sync_point_khr *SyncPointWaitList =
391
+ IsInOrder ? nullptr : pSyncPointWaitList;
392
+ uint32_t WaitListSize = IsInOrder ? 0 : numSyncPointsInWaitList;
364
393
CL_RETURN_ON_FAILURE (clCommandFillBufferKHR (
365
394
hCommandBuffer->CLCommandBuffer , nullptr , nullptr ,
366
395
cl_adapter::cast<cl_mem>(hBuffer), pPattern, patternSize, offset, size,
367
- numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint , nullptr ));
396
+ WaitListSize, SyncPointWaitList, RetSyncPoint , nullptr ));
368
397
369
398
return UR_RESULT_SUCCESS;
370
399
}
0 commit comments