@@ -34,7 +34,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
34
34
ur_exp_command_buffer_handle_t *phCommandBuffer) {
35
35
36
36
ur_queue_handle_t Queue = nullptr ;
37
- UR_RETURN_ON_FAILURE (urQueueCreate (hContext, hDevice, nullptr , &Queue));
37
+ ur_queue_properties_t QueueProperties = {UR_STRUCTURE_TYPE_QUEUE_PROPERTIES,
38
+ nullptr , 0 };
39
+
40
+ const bool IsInOrder =
41
+ pCommandBufferDesc ? pCommandBufferDesc->isInOrder : false ;
42
+ if (IsInOrder) {
43
+ QueueProperties.flags = UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
44
+ }
45
+
46
+ UR_RETURN_ON_FAILURE (
47
+ urQueueCreate (hContext, hDevice, &QueueProperties, &Queue));
38
48
39
49
cl_context CLContext = cl_adapter::cast<cl_context>(hContext);
40
50
cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr ;
@@ -67,7 +77,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
67
77
68
78
try {
69
79
auto URCommandBuffer = std::make_unique<ur_exp_command_buffer_handle_t_>(
70
- Queue, hContext, hDevice, CLCommandBuffer, IsUpdatable);
80
+ Queue, hContext, hDevice, CLCommandBuffer, IsUpdatable, IsInOrder );
71
81
*phCommandBuffer = URCommandBuffer.release ();
72
82
} catch (...) {
73
83
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
@@ -148,11 +158,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
148
158
149
159
cl_command_properties_khr *Properties =
150
160
hCommandBuffer->IsUpdatable ? UpdateProperties : nullptr ;
161
+
162
+ cl_sync_point_khr *RetSyncPoint =
163
+ hCommandBuffer->IsInOrder ? nullptr : pSyncPoint;
164
+ const cl_sync_point_khr *SyncPointWaitList =
165
+ hCommandBuffer->IsInOrder ? nullptr : pSyncPointWaitList;
166
+ uint32_t WaitListSize =
167
+ hCommandBuffer->IsInOrder ? 0 : numSyncPointsInWaitList;
151
168
CL_RETURN_ON_FAILURE (clCommandNDRangeKernelKHR (
152
169
hCommandBuffer->CLCommandBuffer , nullptr , Properties,
153
170
cl_adapter::cast<cl_kernel>(hKernel), workDim, pGlobalWorkOffset,
154
- pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList ,
155
- pSyncPointWaitList, pSyncPoint , OutCommandHandle));
171
+ pGlobalWorkSize, pLocalWorkSize, WaitListSize, SyncPointWaitList ,
172
+ RetSyncPoint , OutCommandHandle));
156
173
157
174
try {
158
175
auto Handle = std::make_unique<ur_exp_command_buffer_command_handle_t_>(
@@ -219,11 +236,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
219
236
CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferKHRCache ,
220
237
cl_ext::CommandCopyBufferName, &clCommandCopyBufferKHR));
221
238
239
+ cl_sync_point_khr *RetSyncPoint =
240
+ hCommandBuffer->IsInOrder ? nullptr : pSyncPoint;
241
+ const cl_sync_point_khr *SyncPointWaitList =
242
+ hCommandBuffer->IsInOrder ? nullptr : pSyncPointWaitList;
243
+ uint32_t WaitListSize =
244
+ hCommandBuffer->IsInOrder ? 0 : numSyncPointsInWaitList;
222
245
CL_RETURN_ON_FAILURE (clCommandCopyBufferKHR (
223
246
hCommandBuffer->CLCommandBuffer , nullptr , nullptr ,
224
247
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
225
- srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList ,
226
- pSyncPoint, nullptr ));
248
+ srcOffset, dstOffset, size, WaitListSize, SyncPointWaitList, RetSyncPoint ,
249
+ nullptr ));
227
250
228
251
return UR_RESULT_SUCCESS;
229
252
}
@@ -257,12 +280,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
257
280
CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferRectKHRCache ,
258
281
cl_ext::CommandCopyBufferRectName, &clCommandCopyBufferRectKHR));
259
282
283
+ cl_sync_point_khr *RetSyncPoint =
284
+ hCommandBuffer->IsInOrder ? nullptr : pSyncPoint;
285
+ const cl_sync_point_khr *SyncPointWaitList =
286
+ hCommandBuffer->IsInOrder ? nullptr : pSyncPointWaitList;
287
+ uint32_t WaitListSize =
288
+ hCommandBuffer->IsInOrder ? 0 : numSyncPointsInWaitList;
260
289
CL_RETURN_ON_FAILURE (clCommandCopyBufferRectKHR (
261
290
hCommandBuffer->CLCommandBuffer , nullptr , nullptr ,
262
291
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
263
292
OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, srcRowPitch, srcSlicePitch,
264
- dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList ,
265
- pSyncPoint, nullptr ));
293
+ dstRowPitch, dstSlicePitch, WaitListSize, SyncPointWaitList, RetSyncPoint ,
294
+ nullptr ));
266
295
267
296
return UR_RESULT_SUCCESS;
268
297
}
@@ -361,10 +390,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
361
390
CLContext, cl_ext::ExtFuncPtrCache->clCommandFillBufferKHRCache ,
362
391
cl_ext::CommandFillBufferName, &clCommandFillBufferKHR));
363
392
393
+ cl_sync_point_khr *RetSyncPoint =
394
+ hCommandBuffer->IsInOrder ? nullptr : pSyncPoint;
395
+ const cl_sync_point_khr *SyncPointWaitList =
396
+ hCommandBuffer->IsInOrder ? nullptr : pSyncPointWaitList;
397
+ uint32_t WaitListSize =
398
+ hCommandBuffer->IsInOrder ? 0 : numSyncPointsInWaitList;
364
399
CL_RETURN_ON_FAILURE (clCommandFillBufferKHR (
365
400
hCommandBuffer->CLCommandBuffer , nullptr , nullptr ,
366
401
cl_adapter::cast<cl_mem>(hBuffer), pPattern, patternSize, offset, size,
367
- numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint , nullptr ));
402
+ WaitListSize, SyncPointWaitList, RetSyncPoint , nullptr ));
368
403
369
404
return UR_RESULT_SUCCESS;
370
405
}
0 commit comments