13
13
14
14
UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp (
15
15
ur_context_handle_t hContext, ur_device_handle_t hDevice,
16
- [[maybe_unused]] const ur_exp_command_buffer_desc_t *pCommandBufferDesc,
16
+ const ur_exp_command_buffer_desc_t *pCommandBufferDesc,
17
17
ur_exp_command_buffer_handle_t *phCommandBuffer) {
18
18
19
19
ur_queue_handle_t Queue = nullptr ;
@@ -29,13 +29,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
29
29
if (!clCreateCommandBufferKHR || Res != CL_SUCCESS)
30
30
return UR_RESULT_ERROR_INVALID_OPERATION;
31
31
32
+ bool IsUpdatable =
33
+ pCommandBufferDesc ? pCommandBufferDesc->isUpdatable : false ;
34
+
35
+ bool SupportsUpdate = false ;
36
+ cl_device_id CLDevice = cl_adapter::cast<cl_device_id >(hDevice);
37
+ CL_RETURN_ON_FAILURE (
38
+ deviceSupportsURCommandBufferKernelUpdate (CLDevice, SupportsUpdate));
39
+
40
+ bool Updatable = IsUpdatable && SupportsUpdate;
41
+
42
+ cl_command_buffer_properties_khr Properties[3 ] = {
43
+ CL_COMMAND_BUFFER_FLAGS_KHR,
44
+ Updatable ? CL_COMMAND_BUFFER_MUTABLE_KHR : 0u , 0 };
32
45
auto CLCommandBuffer = clCreateCommandBufferKHR (
33
- 1 , cl_adapter::cast<cl_command_queue *>(&Queue), nullptr , &Res);
46
+ 1 , cl_adapter::cast<cl_command_queue *>(&Queue), Properties , &Res);
34
47
CL_RETURN_ON_FAILURE_AND_SET_NULL (Res, phCommandBuffer);
35
48
36
49
try {
37
50
auto URCommandBuffer = std::make_unique<ur_exp_command_buffer_handle_t_>(
38
- Queue, hContext, CLCommandBuffer);
51
+ Queue, hContext, CLCommandBuffer, Updatable );
39
52
*phCommandBuffer = URCommandBuffer.release ();
40
53
} catch (...) {
41
54
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
@@ -95,6 +108,7 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
95
108
96
109
CL_RETURN_ON_FAILURE (
97
110
clFinalizeCommandBufferKHR (hCommandBuffer->CLCommandBuffer ));
111
+ hCommandBuffer->Finalized = true ;
98
112
return UR_RESULT_SUCCESS;
99
113
}
100
114
@@ -105,7 +119,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
105
119
uint32_t numSyncPointsInWaitList,
106
120
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
107
121
ur_exp_command_buffer_sync_point_t *pSyncPoint,
108
- ur_exp_command_buffer_command_handle_t *) {
122
+ ur_exp_command_buffer_command_handle_t *phCommandHandle ) {
109
123
110
124
cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext );
111
125
cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr ;
@@ -117,11 +131,35 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
117
131
if (!clCommandNDRangeKernelKHR || Res != CL_SUCCESS)
118
132
return UR_RESULT_ERROR_INVALID_OPERATION;
119
133
134
+ cl_mutable_command_khr CommandHandle = nullptr ;
135
+ cl_mutable_command_khr *OutCommandHandle =
136
+ hCommandBuffer->Updatable ? &CommandHandle : nullptr ;
137
+
138
+ cl_ndrange_kernel_command_properties_khr UpdateProperties[] = {
139
+ CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
140
+ CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR |
141
+ CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR |
142
+ CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR |
143
+ CL_MUTABLE_DISPATCH_ARGUMENTS_KHR | CL_MUTABLE_DISPATCH_EXEC_INFO_KHR,
144
+ 0 };
145
+
146
+ cl_ndrange_kernel_command_properties_khr *Properties =
147
+ hCommandBuffer->Updatable ? UpdateProperties : nullptr ;
120
148
CL_RETURN_ON_FAILURE (clCommandNDRangeKernelKHR (
121
- hCommandBuffer->CLCommandBuffer , nullptr , nullptr ,
149
+ hCommandBuffer->CLCommandBuffer , nullptr , Properties ,
122
150
cl_adapter::cast<cl_kernel>(hKernel), workDim, pGlobalWorkOffset,
123
151
pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList,
124
- pSyncPointWaitList, pSyncPoint, nullptr ));
152
+ pSyncPointWaitList, pSyncPoint, OutCommandHandle));
153
+
154
+ try {
155
+ auto URCommandHandle =
156
+ std::make_unique<ur_exp_command_buffer_command_handle_t_>(
157
+ hCommandBuffer, CommandHandle, workDim);
158
+ *phCommandHandle = URCommandHandle.release ();
159
+ hCommandBuffer->CommandHandles .push_back (*phCommandHandle);
160
+ } catch (...) {
161
+ return UR_RESULT_ERROR_OUT_OF_RESOURCES;
162
+ }
125
163
126
164
return UR_RESULT_SUCCESS;
127
165
}
@@ -360,19 +398,180 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
360
398
361
399
UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainCommandExp (
362
400
[[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand) {
363
- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
401
+ hCommand->incrementReferenceCount ();
402
+ return UR_RESULT_SUCCESS;
364
403
}
365
404
366
405
UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp (
367
406
[[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand) {
368
- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
407
+ if (hCommand->decrementReferenceCount () == 0 ) {
408
+ // TODO
409
+ }
410
+ return UR_RESULT_SUCCESS;
369
411
}
370
412
371
413
UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp (
372
414
[[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand,
373
415
[[maybe_unused]] const ur_exp_command_buffer_update_kernel_launch_desc_t
374
416
*pUpdateKernelLaunch) {
375
- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
417
+
418
+ ur_exp_command_buffer_handle_t hCommandBuffer = hCommand->hCommandBuffer ;
419
+ cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext );
420
+ cl_ext::clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr ;
421
+ cl_int Res =
422
+ cl_ext::getExtFuncFromContext<decltype (clUpdateMutableCommandsKHR)>(
423
+ CLContext, cl_ext::ExtFuncPtrCache->clUpdateMutableCommandsKHRCache ,
424
+ cl_ext::UpdateMutableCommandsName, &clUpdateMutableCommandsKHR);
425
+
426
+ if (!clUpdateMutableCommandsKHR || Res != CL_SUCCESS)
427
+ return UR_RESULT_ERROR_INVALID_OPERATION;
428
+
429
+ if (!hCommandBuffer->Finalized || !hCommandBuffer->Updatable )
430
+ return UR_RESULT_ERROR_INVALID_OPERATION;
431
+
432
+ // Find the CL execution info to update
433
+ uint32_t NumExecInfos = pUpdateKernelLaunch->numNewExecInfos ;
434
+ const ur_exp_command_buffer_update_exec_info_desc_t *ExecInfoList =
435
+ pUpdateKernelLaunch->pNewExecInfoList ;
436
+ std::vector<cl_mutable_dispatch_exec_info_khr> CLExecInfos;
437
+ for (uint32_t i = 0 ; i < NumExecInfos; i++) {
438
+ const ur_exp_command_buffer_update_exec_info_desc_t &URExecInfo =
439
+ ExecInfoList[i];
440
+
441
+ if (URExecInfo.propName == UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS) {
442
+ cl_bool TrueVal = CL_TRUE;
443
+ cl_mutable_dispatch_exec_info_khr CLExecInfo;
444
+ CLExecInfo.param_value_size = sizeof (cl_bool);
445
+ CLExecInfo.param_value = &TrueVal;
446
+ CLExecInfo.param_name = CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL;
447
+ CLExecInfos.push_back (CLExecInfo);
448
+
449
+ CLExecInfo.param_name = CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL;
450
+ CLExecInfos.push_back (CLExecInfo);
451
+
452
+ CLExecInfo.param_name = CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL;
453
+ CLExecInfos.push_back (CLExecInfo);
454
+ } else if (URExecInfo.propName == UR_KERNEL_EXEC_INFO_USM_PTRS) {
455
+ cl_mutable_dispatch_exec_info_khr CLExecInfo{};
456
+ CLExecInfo.param_value_size = URExecInfo.propSize ;
457
+ CLExecInfo.param_value = URExecInfo.pNewExecInfo ;
458
+ CLExecInfo.param_name = CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL;
459
+ CLExecInfos.push_back (CLExecInfo);
460
+ } else if (URExecInfo.propName != UR_KERNEL_EXEC_INFO_CACHE_CONFIG) {
461
+ return UR_RESULT_ERROR_INVALID_ENUMERATION;
462
+ }
463
+ }
464
+
465
+ // Find the CL USM pointer arguments to the kernel
466
+ // WARNING - This relies on USM and SVM using the same implementation,
467
+ // which is not guaranteed.
468
+ // See https://github.com/KhronosGroup/OpenCL-Docs/issues/843
469
+ uint32_t NumPointerArgs = pUpdateKernelLaunch->numNewPointerArgs ;
470
+ const ur_exp_command_buffer_update_pointer_arg_desc_t *ArgPointerList =
471
+ pUpdateKernelLaunch->pNewPointerArgList ;
472
+ std::vector<cl_mutable_dispatch_arg_khr> CLUSMArgs (NumPointerArgs);
473
+ for (uint32_t i = 0 ; i < NumPointerArgs; i++) {
474
+ const ur_exp_command_buffer_update_pointer_arg_desc_t &URPointerArg =
475
+ ArgPointerList[i];
476
+ cl_mutable_dispatch_arg_khr &USMArg = CLUSMArgs[i];
477
+ USMArg.arg_index = URPointerArg.argIndex ;
478
+ USMArg.arg_value = *(void **)URPointerArg.pNewPointerArg ;
479
+ }
480
+
481
+ uint32_t NumMemobjArgs = pUpdateKernelLaunch->numNewMemObjArgs ;
482
+ const ur_exp_command_buffer_update_memobj_arg_desc_t *ArgMemobjList =
483
+ pUpdateKernelLaunch->pNewMemObjArgList ;
484
+ uint32_t NumValueArgs = pUpdateKernelLaunch->numNewValueArgs ;
485
+ const ur_exp_command_buffer_update_value_arg_desc_t *ArgValueList =
486
+ pUpdateKernelLaunch->pNewValueArgList ;
487
+
488
+ std::vector<cl_mutable_dispatch_arg_khr> CLArgs;
489
+ for (uint32_t i = 0 ; i < NumMemobjArgs; i++) {
490
+ const ur_exp_command_buffer_update_memobj_arg_desc_t &URMemObjArg =
491
+ ArgMemobjList[i];
492
+ cl_mutable_dispatch_arg_khr CLArg{
493
+ URMemObjArg.argIndex , // arg_index
494
+ sizeof (cl_mem), // arg_size
495
+ cl_adapter::cast<const cl_mem *>(
496
+ &URMemObjArg.hNewMemObjArg ) // arg_value
497
+ };
498
+
499
+ CLArgs.push_back (CLArg);
500
+ }
501
+
502
+ for (uint32_t i = 0 ; i < NumValueArgs; i++) {
503
+ const ur_exp_command_buffer_update_value_arg_desc_t &URValueArg =
504
+ ArgValueList[i];
505
+ cl_mutable_dispatch_arg_khr CLArg{
506
+ URValueArg.argIndex , // arg_index
507
+ URValueArg.argSize , // arg_size
508
+ URValueArg.pNewValueArg // arg_value
509
+ };
510
+ CLArgs.push_back (CLArg);
511
+ }
512
+
513
+ const cl_uint NewWorkDim = pUpdateKernelLaunch->newWorkDim ;
514
+ cl_uint &CLWorkDim = hCommand->WorkDim ;
515
+ if (NewWorkDim != 0 && NewWorkDim != CLWorkDim) {
516
+ // Limitation of the cl_khr_command_buffer_mutable_dispatch specification
517
+ // that it is an error to change the ND-Range size.
518
+ // https://github.com/KhronosGroup/OpenCL-Docs/issues/1057
519
+ return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
520
+ }
521
+
522
+ const size_t CopySize = sizeof (size_t ) * CLWorkDim;
523
+ std::vector<size_t > CLGlobalWorkOffset, CLGlobalWorkSize, CLLocalWorkSize;
524
+
525
+ if (auto GlobalWorkOffsetPtr = pUpdateKernelLaunch->pNewGlobalWorkOffset ) {
526
+ CLGlobalWorkOffset.resize (CLWorkDim);
527
+ std::memcpy (CLGlobalWorkOffset.data (), GlobalWorkOffsetPtr, CopySize);
528
+ if (CLWorkDim < 3 ) {
529
+ const size_t ZeroSize = sizeof (size_t ) * (3 - CLWorkDim);
530
+ std::memset (CLGlobalWorkOffset.data () + CLWorkDim, 0 , ZeroSize);
531
+ }
532
+ }
533
+
534
+ if (auto GlobalWorkSizePtr = pUpdateKernelLaunch->pNewGlobalWorkSize ) {
535
+ CLGlobalWorkSize.resize (CLWorkDim);
536
+ std::memcpy (CLGlobalWorkSize.data (), GlobalWorkSizePtr, CopySize);
537
+ if (CLWorkDim < 3 ) {
538
+ const size_t ZeroSize = sizeof (size_t ) * (3 - CLWorkDim);
539
+ std::memset (CLGlobalWorkSize.data () + CLWorkDim, 0 , ZeroSize);
540
+ }
541
+ }
542
+
543
+ if (auto LocalWorkSizePtr = pUpdateKernelLaunch->pNewLocalWorkSize ) {
544
+ CLLocalWorkSize.resize (CLWorkDim);
545
+ std::memcpy (CLLocalWorkSize.data (), LocalWorkSizePtr, CopySize);
546
+ if (CLWorkDim < 3 ) {
547
+ const size_t ZeroSize = sizeof (size_t ) * (3 - CLWorkDim);
548
+ std::memset (CLLocalWorkSize.data () + CLWorkDim, 0 , ZeroSize);
549
+ }
550
+ }
551
+
552
+ cl_mutable_command_khr command =
553
+ cl_adapter::cast<cl_mutable_command_khr>(hCommand->CLMutableCommand );
554
+ cl_mutable_dispatch_config_khr dispatch_config = {
555
+ CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR,
556
+ nullptr ,
557
+ command,
558
+ static_cast <cl_uint>(CLArgs.size ()), // num_args
559
+ static_cast <cl_uint>(CLUSMArgs.size ()), // num_svm_args
560
+ static_cast <cl_uint>(CLExecInfos.size ()), // num_exec_infos
561
+ CLWorkDim, // work_dim
562
+ CLArgs.data (), // arg_list
563
+ CLUSMArgs.data (), // arg_svm_list
564
+ CLExecInfos.data (), // exec_info_list
565
+ CLGlobalWorkOffset.data (), // global_work_offset
566
+ CLGlobalWorkSize.data (), // global_work_size
567
+ CLLocalWorkSize.data (), // local_work_size
568
+ };
569
+ cl_mutable_base_config_khr config = {
570
+ CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr , 1 , &dispatch_config};
571
+ CL_RETURN_ON_FAILURE (
572
+ clUpdateMutableCommandsKHR (hCommandBuffer->CLCommandBuffer , &config));
573
+
574
+ return UR_RESULT_SUCCESS;
376
575
}
377
576
378
577
UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp (
@@ -415,9 +614,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp(
415
614
}
416
615
417
616
UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp (
418
- [[maybe_unused]] ur_exp_command_buffer_command_handle_t hCommand,
419
- [[maybe_unused]] ur_exp_command_buffer_command_info_t propName,
420
- [[maybe_unused]] size_t propSize, [[maybe_unused]] void *pPropValue,
421
- [[maybe_unused]] size_t *pPropSizeRet) {
422
- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
617
+ ur_exp_command_buffer_command_handle_t hCommand,
618
+ ur_exp_command_buffer_command_info_t propName, size_t propSize,
619
+ void *pPropValue, size_t *pPropSizeRet) {
620
+ UrReturnHelper ReturnValue (propSize, pPropValue, pPropSizeRet);
621
+
622
+ switch (propName) {
623
+ case UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT:
624
+ return ReturnValue (hCommand->getReferenceCount ());
625
+ default :
626
+ assert (!" Command-buffer command info request not implemented" );
627
+ }
628
+
629
+ return UR_RESULT_ERROR_INVALID_ENUMERATION;
423
630
}
0 commit comments