Skip to content

Commit ca5c342

Browse files
authored
Merge pull request #1353 from againull/againull/l0_adapter_update_cmd_buffer
[L0] Support updating kernel commands in command buffers
2 parents 4d0183a + c9be1e2 commit ca5c342

File tree

11 files changed

+602
-80
lines changed

11 files changed

+602
-80
lines changed

source/adapters/level_zero/command_buffer.cpp

Lines changed: 406 additions & 53 deletions
Large diffs are not rendered by default.

source/adapters/level_zero/command_buffer.hpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,4 +78,21 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
7878
// Event which a command-buffer waits on until the main command-list event
7979
// have been reset.
8080
ur_event_handle_t AllResetEvent = nullptr;
81+
// Indicates if command-buffer commands can be updated after it is closed.
82+
bool IsUpdatable = false;
83+
// Indicates if command buffer was finalized.
84+
bool IsFinalized = false;
85+
};
86+
87+
struct ur_exp_command_buffer_command_handle_t_ : public _ur_object {
88+
ur_exp_command_buffer_command_handle_t_(ur_exp_command_buffer_handle_t,
89+
uint64_t, ur_kernel_handle_t);
90+
91+
~ur_exp_command_buffer_command_handle_t_();
92+
93+
// Command-buffer of this command.
94+
ur_exp_command_buffer_handle_t CommandBuffer;
95+
96+
uint64_t CommandId;
97+
ur_kernel_handle_t Kernel;
8198
};

source/adapters/level_zero/common.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ ur_result_t ze2urResult(ze_result_t ZeResult) {
5858
return UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
5959
case ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY:
6060
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
61+
case ZE_RESULT_ERROR_UNSUPPORTED_FEATURE:
62+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
6163
default:
6264
return UR_RESULT_ERROR_UNKNOWN;
6365
}
@@ -171,6 +173,40 @@ template <> ze_structure_type_t getZeStructureType<ze_fence_desc_t>() {
171173
template <> ze_structure_type_t getZeStructureType<ze_command_list_desc_t>() {
172174
return ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
173175
}
176+
template <>
177+
ze_structure_type_t
178+
getZeStructureType<ze_mutable_command_list_exp_properties_t>() {
179+
return ZE_STRUCTURE_TYPE_MUTABLE_COMMAND_LIST_EXP_PROPERTIES;
180+
}
181+
template <>
182+
ze_structure_type_t getZeStructureType<ze_mutable_command_list_exp_desc_t>() {
183+
return ZE_STRUCTURE_TYPE_MUTABLE_COMMAND_LIST_EXP_DESC;
184+
}
185+
template <>
186+
ze_structure_type_t getZeStructureType<ze_mutable_command_id_exp_desc_t>() {
187+
return ZE_STRUCTURE_TYPE_MUTABLE_COMMAND_ID_EXP_DESC;
188+
}
189+
template <>
190+
ze_structure_type_t getZeStructureType<ze_mutable_group_count_exp_desc_t>() {
191+
return ZE_STRUCTURE_TYPE_MUTABLE_GROUP_COUNT_EXP_DESC;
192+
}
193+
template <>
194+
ze_structure_type_t getZeStructureType<ze_mutable_group_size_exp_desc_t>() {
195+
return ZE_STRUCTURE_TYPE_MUTABLE_GROUP_SIZE_EXP_DESC;
196+
}
197+
template <>
198+
ze_structure_type_t getZeStructureType<ze_mutable_global_offset_exp_desc_t>() {
199+
return ZE_STRUCTURE_TYPE_MUTABLE_GLOBAL_OFFSET_EXP_DESC;
200+
}
201+
template <>
202+
ze_structure_type_t
203+
getZeStructureType<ze_mutable_kernel_argument_exp_desc_t>() {
204+
return ZE_STRUCTURE_TYPE_MUTABLE_KERNEL_ARGUMENT_EXP_DESC;
205+
}
206+
template <>
207+
ze_structure_type_t getZeStructureType<ze_mutable_commands_exp_desc_t>() {
208+
return ZE_STRUCTURE_TYPE_MUTABLE_COMMANDS_EXP_DESC;
209+
}
174210
template <> ze_structure_type_t getZeStructureType<ze_context_desc_t>() {
175211
return ZE_STRUCTURE_TYPE_CONTEXT_DESC;
176212
}

source/adapters/level_zero/device.cpp

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -917,8 +917,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(
917917
}
918918
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP:
919919
return ReturnValue(true);
920-
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP:
921-
return ReturnValue(false);
920+
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: {
921+
// TODO: Level Zero API allows to check support for all sub-features:
922+
// ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS,
923+
// ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT,
924+
// ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE,
925+
// ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET,
926+
// ZE_MUTABLE_COMMAND_EXP_FLAG_SIGNAL_EVENT,
927+
// ZE_MUTABLE_COMMAND_EXP_FLAG_WAIT_EVENTS
928+
// but UR has only one property to check the mutable command lists feature
929+
// support. For now return true if kernel arguments can be updated.
930+
auto KernelArgUpdateSupport =
931+
Device->ZeDeviceMutableCmdListsProperties->mutableCommandFlags &
932+
ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS;
933+
return ReturnValue(KernelArgUpdateSupport &&
934+
Device->Platform->ZeMutableCmdListExt.Supported);
935+
}
922936
case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP:
923937
return ReturnValue(true);
924938
case UR_DEVICE_INFO_BINDLESS_IMAGES_SHARED_USM_SUPPORT_EXP:
@@ -1142,6 +1156,15 @@ ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal,
11421156
(ZeDevice, &Count, &Properties));
11431157
};
11441158

1159+
ZeDeviceMutableCmdListsProperties.Compute =
1160+
[ZeDevice](
1161+
ZeStruct<ze_mutable_command_list_exp_properties_t> &Properties) {
1162+
ze_device_properties_t P;
1163+
P.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
1164+
P.pNext = &Properties;
1165+
ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &P));
1166+
};
1167+
11451168
ImmCommandListUsed = this->useImmediateCommandLists();
11461169

11471170
uint32_t numQueueGroups = 0;

source/adapters/level_zero/device.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,4 +195,6 @@ struct ur_device_handle_t_ : _ur_object {
195195
ZeCache<ZeStruct<ze_device_cache_properties_t>> ZeDeviceCacheProperties;
196196
ZeCache<ZeStruct<ze_device_ip_version_ext_t>> ZeDeviceIpVersionExt;
197197
ZeCache<struct ze_global_memsize> ZeGlobalMemSize;
198+
ZeCache<ZeStruct<ze_mutable_command_list_exp_properties_t>>
199+
ZeDeviceMutableCmdListsProperties;
198200
};

source/adapters/level_zero/platform.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,39 @@ ur_result_t ur_platform_handle_t_::initialize() {
206206
// If yes, then set up L0 API pointers if the platform supports it.
207207
ZeUSMImport.setZeUSMImport(this);
208208

209+
// Check if mutable command list extension is supported and initialize
210+
// function pointers.
211+
ZeMutableCmdListExt.Supported |=
212+
(ZE_CALL_NOCHECK(
213+
zeDriverGetExtensionFunctionAddress,
214+
(ZeDriver, "zeCommandListGetNextCommandIdExp",
215+
reinterpret_cast<void **>(
216+
&ZeMutableCmdListExt.zexCommandListGetNextCommandIdExp))) == 0);
217+
218+
ZeMutableCmdListExt.Supported &=
219+
(ZE_CALL_NOCHECK(zeDriverGetExtensionFunctionAddress,
220+
(ZeDriver, "zeCommandListUpdateMutableCommandsExp",
221+
reinterpret_cast<void **>(
222+
&ZeMutableCmdListExt
223+
.zexCommandListUpdateMutableCommandsExp))) ==
224+
0);
225+
226+
ZeMutableCmdListExt.Supported &=
227+
(ZE_CALL_NOCHECK(
228+
zeDriverGetExtensionFunctionAddress,
229+
(ZeDriver, "zeCommandListUpdateMutableCommandSignalEventExp",
230+
reinterpret_cast<void **>(
231+
&ZeMutableCmdListExt
232+
.zexCommandListUpdateMutableCommandSignalEventExp))) == 0);
233+
234+
ZeMutableCmdListExt.Supported &=
235+
(ZE_CALL_NOCHECK(
236+
zeDriverGetExtensionFunctionAddress,
237+
(ZeDriver, "zeCommandListUpdateMutableCommandWaitEventsExp",
238+
reinterpret_cast<void **>(
239+
&ZeMutableCmdListExt
240+
.zexCommandListUpdateMutableCommandWaitEventsExp))) == 0);
241+
209242
return UR_RESULT_SUCCESS;
210243
}
211244

source/adapters/level_zero/platform.hpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,4 +55,22 @@ struct ur_platform_handle_t_ : public _ur_platform {
5555
// in the driver.
5656
std::list<ur_context_handle_t> Contexts;
5757
ur_shared_mutex ContextsMutex;
58+
59+
// Structure with function pointers for mutable command list extension.
60+
// Not all drivers may support it, so considering that the platform object is
61+
// associated with particular Level Zero driver, store this extension here.
62+
struct ZeMutableCmdListExtension {
63+
bool Supported = false;
64+
ze_result_t (*zexCommandListGetNextCommandIdExp)(
65+
ze_command_list_handle_t, const ze_mutable_command_id_exp_desc_t *,
66+
uint64_t *) = nullptr;
67+
ze_result_t (*zexCommandListUpdateMutableCommandsExp)(
68+
ze_command_list_handle_t,
69+
const ze_mutable_commands_exp_desc_t *) = nullptr;
70+
ze_result_t (*zexCommandListUpdateMutableCommandSignalEventExp)(
71+
ze_command_list_handle_t, uint64_t, ze_event_handle_t) = nullptr;
72+
ze_result_t (*zexCommandListUpdateMutableCommandWaitEventsExp)(
73+
ze_command_list_handle_t, uint64_t, uint32_t,
74+
ze_event_handle_t *) = nullptr;
75+
} ZeMutableCmdListExt;
5876
};

test/conformance/exp_command_buffer/buffer_fill_kernel_update.cpp

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,10 @@ TEST_P(BufferFillCommandTest, UpdateParameters) {
123123

124124
// Test updating the global size so that the fill outputs to a larger buffer
125125
TEST_P(BufferFillCommandTest, UpdateGlobalSize) {
126+
if (!updatable_execution_range_support) {
127+
GTEST_SKIP() << "Execution range update is not supported.";
128+
}
129+
126130
ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0,
127131
nullptr, nullptr));
128132
ASSERT_SUCCESS(urQueueFinish(queue));
@@ -153,7 +157,7 @@ TEST_P(BufferFillCommandTest, UpdateGlobalSize) {
153157
0, // numNewPointerArgs
154158
0, // numNewValueArgs
155159
0, // numNewExecInfos
156-
0, // newWorkDim
160+
1, // newWorkDim
157161
&new_output_desc, // pNewMemObjArgList
158162
nullptr, // pNewPointerArgList
159163
nullptr, // pNewValueArgList
@@ -180,7 +184,8 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) {
180184
ASSERT_SUCCESS(urQueueFinish(queue));
181185
ValidateBuffer(buffer, sizeof(val) * global_size, val);
182186

183-
size_t new_global_size = 64;
187+
size_t new_global_size =
188+
updatable_execution_range_support ? 64 : global_size;
184189
const size_t new_buffer_size = sizeof(val) * new_global_size;
185190
ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE,
186191
new_buffer_size, nullptr, &new_buffer));
@@ -247,25 +252,28 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) {
247252
ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle,
248253
&input_update_desc));
249254

250-
ur_exp_command_buffer_update_kernel_launch_desc_t global_size_update_desc = {
251-
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype
252-
nullptr, // pNext
253-
0, // numNewMemObjArgs
254-
0, // numNewPointerArgs
255-
0, // numNewValueArgs
256-
0, // numNewExecInfos
257-
0, // newWorkDim
258-
nullptr, // pNewMemObjArgList
259-
nullptr, // pNewPointerArgList
260-
nullptr, // pNewValueArgList
261-
nullptr, // pNewExecInfoList
262-
nullptr, // pNewGlobalWorkOffset
263-
&new_global_size, // pNewGlobalWorkSize
264-
nullptr, // pNewLocalWorkSize
265-
};
266-
267-
ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(
268-
command_handle, &global_size_update_desc));
255+
if (updatable_execution_range_support) {
256+
ur_exp_command_buffer_update_kernel_launch_desc_t
257+
global_size_update_desc = {
258+
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype
259+
nullptr, // pNext
260+
0, // numNewMemObjArgs
261+
0, // numNewPointerArgs
262+
0, // numNewValueArgs
263+
0, // numNewExecInfos
264+
0, // newWorkDim
265+
nullptr, // pNewMemObjArgList
266+
nullptr, // pNewPointerArgList
267+
nullptr, // pNewValueArgList
268+
nullptr, // pNewExecInfoList
269+
nullptr, // pNewGlobalWorkOffset
270+
&new_global_size, // pNewGlobalWorkSize
271+
nullptr, // pNewLocalWorkSize
272+
};
273+
274+
ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(
275+
command_handle, &global_size_update_desc));
276+
}
269277

270278
ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0,
271279
nullptr, nullptr));

test/conformance/exp_command_buffer/fixtures.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,24 +112,50 @@ struct urUpdatableCommandBufferExpExecutionTest
112112
GTEST_SKIP() << "Updating EXP command-buffers is not supported.";
113113
}
114114

115+
// Currently level zero driver doesn't support updating execution range.
116+
if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO) {
117+
updatable_execution_range_support = false;
118+
}
119+
115120
// Create a command-buffer with update enabled.
116121
ur_exp_command_buffer_desc_t desc{
117122
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC, nullptr, true};
118123

119124
ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, &desc,
120125
&updatable_cmd_buf_handle));
121126
ASSERT_NE(updatable_cmd_buf_handle, nullptr);
127+
128+
// Currently there are synchronization issue with immediate submission when used for command buffers.
129+
// So, create queue with batched submission for this test suite if the backend is Level Zero.
130+
if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO) {
131+
ur_queue_flags_t flags = UR_QUEUE_FLAG_SUBMISSION_BATCHED;
132+
ur_queue_properties_t props = {
133+
/*.stype =*/UR_STRUCTURE_TYPE_QUEUE_PROPERTIES,
134+
/*.pNext =*/nullptr,
135+
/*.flags =*/flags,
136+
};
137+
ASSERT_SUCCESS(urQueueCreate(context, device, &props, &queue));
138+
ASSERT_NE(queue, nullptr);
139+
} else {
140+
queue = urCommandBufferExpExecutionTest::queue;
141+
}
122142
}
123143

124144
void TearDown() override {
125145
if (updatable_cmd_buf_handle) {
126146
EXPECT_SUCCESS(urCommandBufferReleaseExp(updatable_cmd_buf_handle));
127147
}
148+
if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO && queue) {
149+
ASSERT_SUCCESS(urQueueRelease(queue));
150+
}
151+
128152
UUR_RETURN_ON_FATAL_FAILURE(
129153
urCommandBufferExpExecutionTest::TearDown());
130154
}
131155

132156
ur_exp_command_buffer_handle_t updatable_cmd_buf_handle = nullptr;
157+
ur_bool_t updatable_execution_range_support = true;
158+
ur_queue_handle_t queue = nullptr;
133159
};
134160

135161
struct urCommandBufferCommandExpTest

test/conformance/exp_command_buffer/ndrange_update.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ struct NDRangeUpdateTest
1515
UUR_RETURN_ON_FATAL_FAILURE(
1616
urUpdatableCommandBufferExpExecutionTest::SetUp());
1717

18+
if (!updatable_execution_range_support) {
19+
GTEST_SKIP() << "Execution range update is not supported.";
20+
}
21+
1822
ur_device_usm_access_capability_flags_t shared_usm_flags;
1923
ASSERT_SUCCESS(
2024
uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags));

test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,9 @@ TEST_P(USMFillCommandTest, UpdateParameters) {
8787
ASSERT_SUCCESS(urQueueFinish(queue));
8888
Validate((uint32_t *)shared_ptr, global_size, val);
8989

90-
// Allocate a new USM pointer of larger size
91-
size_t new_global_size = 64;
90+
// Allocate a new USM pointer of larger size if feature is supported.
91+
size_t new_global_size =
92+
updatable_execution_range_support ? 64 : global_size;
9293
const size_t new_allocation_size = sizeof(val) * new_global_size;
9394
ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr,
9495
new_allocation_size, &new_shared_ptr));
@@ -128,8 +129,9 @@ TEST_P(USMFillCommandTest, UpdateParameters) {
128129
&new_input_desc, // pNewValueArgList
129130
nullptr, // pNewExecInfoList
130131
nullptr, // pNewGlobalWorkOffset
131-
&new_global_size, // pNewGlobalWorkSize
132-
nullptr, // pNewLocalWorkSize
132+
updatable_execution_range_support ? &new_global_size
133+
: nullptr, // pNewGlobalWorkSize
134+
nullptr, // pNewLocalWorkSize
133135
};
134136

135137
// Update kernel and enqueue command-buffer again

0 commit comments

Comments
 (0)