Skip to content

Commit 1a3ea20

Browse files
Xewar313KornevNikita
authored andcommitted
[UR] [V2] Add wait before enqueue in command buffer (#17709)
According to @MichalMrozek, zeCommandListImmediateAppendCommandListsExp has the same requirements as zeCommandQueueExecuteCommandLists, because of this, the command list must not be referenced by device when it is enqueued. This PR fixes this issue by adding event to synchronize append and execution
1 parent 12b7c2b commit 1a3ea20

File tree

8 files changed

+72
-23
lines changed

8 files changed

+72
-23
lines changed

unified-runtime/source/adapters/level_zero/v2/command_buffer.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,28 @@ ur_result_t ur_exp_command_buffer_handle_t_::finalizeCommandBuffer() {
4848
isFinalized = true;
4949
return UR_RESULT_SUCCESS;
5050
}
51+
ur_event_handle_t ur_exp_command_buffer_handle_t_::getExecutionEventUnlocked() {
52+
return currentExecution;
53+
}
54+
55+
ur_result_t ur_exp_command_buffer_handle_t_::registerExecutionEventUnlocked(
56+
ur_event_handle_t nextExecutionEvent) {
57+
if (currentExecution) {
58+
UR_CALL(currentExecution->release());
59+
currentExecution = nullptr;
60+
}
61+
if (nextExecutionEvent) {
62+
currentExecution = nextExecutionEvent;
63+
UR_CALL(nextExecutionEvent->retain());
64+
}
65+
return UR_RESULT_SUCCESS;
66+
}
5167

68+
ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
69+
if (currentExecution) {
70+
currentExecution->release();
71+
}
72+
}
5273
namespace ur::level_zero {
5374

5475
ur_result_t

unified-runtime/source/adapters/level_zero/v2/command_buffer.hpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,11 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
2323
v2::raii::command_list_unique_handle &&commandList,
2424
const ur_exp_command_buffer_desc_t *desc);
2525

26-
~ur_exp_command_buffer_handle_t_() = default;
26+
~ur_exp_command_buffer_handle_t_();
27+
28+
ur_event_handle_t getExecutionEventUnlocked();
29+
ur_result_t
30+
registerExecutionEventUnlocked(ur_event_handle_t nextExecutionEvent);
2731

2832
lockable<ur_command_list_manager> commandListManager;
2933

@@ -36,6 +40,8 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
3640
private:
3741
// Indicates if command-buffer was finalized.
3842
bool isFinalized = false;
43+
44+
ur_event_handle_t currentExecution = nullptr;
3945
};
4046

4147
struct ur_exp_command_buffer_command_handle_t_ : public _ur_object {

unified-runtime/source/adapters/level_zero/v2/command_list_manager.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -157,16 +157,20 @@ ur_result_t ur_command_list_manager::appendRegionCopyUnlocked(
157157
return UR_RESULT_SUCCESS;
158158
}
159159

160-
wait_list_view
161-
ur_command_list_manager::getWaitListView(const ur_event_handle_t *phWaitEvents,
162-
uint32_t numWaitEvents) {
160+
wait_list_view ur_command_list_manager::getWaitListView(
161+
const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents,
162+
ur_event_handle_t additionalWaitEvent) {
163163

164-
waitList.resize(numWaitEvents);
164+
uint32_t totalNumWaitEvents =
165+
numWaitEvents + (additionalWaitEvent != nullptr ? 1 : 0);
166+
waitList.resize(totalNumWaitEvents);
165167
for (uint32_t i = 0; i < numWaitEvents; i++) {
166168
waitList[i] = phWaitEvents[i]->getZeEvent();
167169
}
168-
169-
return {waitList.data(), static_cast<uint32_t>(numWaitEvents)};
170+
if (additionalWaitEvent != nullptr) {
171+
waitList[totalNumWaitEvents - 1] = additionalWaitEvent->getZeEvent();
172+
}
173+
return {waitList.data(), static_cast<uint32_t>(totalNumWaitEvents)};
170174
}
171175

172176
ze_event_handle_t

unified-runtime/source/adapters/level_zero/v2/command_list_manager.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,9 @@ struct ur_command_list_manager {
133133

134134
ze_command_list_handle_t getZeCommandList();
135135

136-
wait_list_view getWaitListView(const ur_event_handle_t *phWaitEvents,
137-
uint32_t numWaitEvents);
136+
wait_list_view
137+
getWaitListView(const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents,
138+
ur_event_handle_t additionalWaitEvent = nullptr);
138139
ze_event_handle_t getSignalEvent(ur_event_handle_t *hUserEvent,
139140
ur_command_t commandType);
140141

unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.cpp

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@ namespace v2 {
2323

2424
wait_list_view ur_queue_immediate_in_order_t::getWaitListView(
2525
locked<ur_command_list_manager> &commandList,
26-
const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents) {
27-
return commandList->getWaitListView(phWaitEvents, numWaitEvents);
26+
const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents,
27+
ur_event_handle_t additionalWaitEvent) {
28+
return commandList->getWaitListView(phWaitEvents, numWaitEvents,
29+
additionalWaitEvent);
2830
}
2931

3032
static int32_t getZeOrdinal(ur_device_handle_t hDevice) {
@@ -896,7 +898,8 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueTimestampRecordingExp(
896898
ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCommandListsExp(
897899
uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists,
898900
ur_event_handle_t *phEvent, uint32_t numEventsInWaitList,
899-
const ur_event_handle_t *phEventWaitList, ur_command_t callerCommand) {
901+
const ur_event_handle_t *phEventWaitList, ur_command_t callerCommand,
902+
ur_event_handle_t additionalWaitEvent) {
900903
TRACK_SCOPE_LATENCY(
901904
"ur_queue_immediate_in_order_t::enqueueGenericCommandListsExp");
902905

@@ -905,7 +908,8 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCommandListsExp(
905908
getSignalEvent(commandListLocked, phEvent, callerCommand);
906909

907910
auto [pWaitEvents, numWaitEvents] =
908-
getWaitListView(commandListLocked, phEventWaitList, numEventsInWaitList);
911+
getWaitListView(commandListLocked, phEventWaitList, numEventsInWaitList,
912+
additionalWaitEvent);
909913
// zeCommandListImmediateAppendCommandListsExp is not working with in-order
910914
// immediate lists what causes problems with synchronization
911915
// TODO: remove synchronization when it is not needed
@@ -926,9 +930,21 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueCommandBufferExp(
926930
auto commandListLocked = hCommandBuffer->commandListManager.lock();
927931
ze_command_list_handle_t commandBufferCommandList =
928932
commandListLocked->getZeCommandList();
929-
return enqueueGenericCommandListsExp(1, &commandBufferCommandList, phEvent,
930-
numEventsInWaitList, phEventWaitList,
931-
UR_COMMAND_ENQUEUE_COMMAND_BUFFER_EXP);
933+
ur_event_handle_t internalEvent = nullptr;
934+
if (phEvent == nullptr) {
935+
phEvent = &internalEvent;
936+
}
937+
ur_event_handle_t executionEvent =
938+
hCommandBuffer->getExecutionEventUnlocked();
939+
940+
UR_CALL(enqueueGenericCommandListsExp(
941+
1, &commandBufferCommandList, phEvent, numEventsInWaitList,
942+
phEventWaitList, UR_COMMAND_ENQUEUE_COMMAND_BUFFER_EXP, executionEvent));
943+
UR_CALL(hCommandBuffer->registerExecutionEventUnlocked(*phEvent));
944+
if (internalEvent != nullptr) {
945+
internalEvent->release();
946+
}
947+
return UR_RESULT_SUCCESS;
932948
}
933949

934950
ur_result_t ur_queue_immediate_in_order_t::enqueueKernelLaunchCustomExp(

unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.hpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,10 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_t_ {
3737
std::vector<ur_event_handle_t> deferredEvents;
3838
std::vector<ur_kernel_handle_t> submittedKernels;
3939

40-
wait_list_view getWaitListView(locked<ur_command_list_manager> &commandList,
41-
const ur_event_handle_t *phWaitEvents,
42-
uint32_t numWaitEvents);
40+
wait_list_view
41+
getWaitListView(locked<ur_command_list_manager> &commandList,
42+
const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents,
43+
ur_event_handle_t additionalWaitEvent = nullptr);
4344

4445
ze_event_handle_t getSignalEvent(locked<ur_command_list_manager> &commandList,
4546
ur_event_handle_t *hUserEvent,
@@ -56,7 +57,8 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_t_ {
5657
ur_result_t enqueueGenericCommandListsExp(
5758
uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists,
5859
ur_event_handle_t *phEvent, uint32_t numEventsInWaitList,
59-
const ur_event_handle_t *phEventWaitList, ur_command_t callerCommand);
60+
const ur_event_handle_t *phEventWaitList, ur_command_t callerCommand,
61+
ur_event_handle_t additionalWaitEvent);
6062

6163
ur_result_t
6264
enqueueEventsWaitWithBarrierImpl(uint32_t numEventsInWaitList,

unified-runtime/test/conformance/exp_command_buffer/fixtures.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ static void checkCommandBufferUpdateSupport(
4545

4646
struct urCommandBufferExpTest : uur::urContextTest {
4747
void SetUp() override {
48-
UUR_KNOWN_FAILURE_ON(uur::LevelZeroV2{});
4948

5049
UUR_RETURN_ON_FATAL_FAILURE(uur::urContextTest::SetUp());
5150

@@ -72,7 +71,6 @@ struct urCommandBufferExpTest : uur::urContextTest {
7271
template <class T>
7372
struct urCommandBufferExpTestWithParam : urQueueTestWithParam<T> {
7473
void SetUp() override {
75-
UUR_KNOWN_FAILURE_ON(uur::LevelZeroV2{});
7674

7775
UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTestWithParam<T>::SetUp());
7876

@@ -97,7 +95,6 @@ struct urCommandBufferExpTestWithParam : urQueueTestWithParam<T> {
9795

9896
struct urCommandBufferExpExecutionTest : uur::urKernelExecutionTest {
9997
void SetUp() override {
100-
UUR_KNOWN_FAILURE_ON(uur::LevelZeroV2{});
10198

10299
UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp());
103100

unified-runtime/test/conformance/exp_command_buffer/update/invalid_update.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,8 @@ TEST_P(InvalidUpdateTest, CommandBufferMismatch) {
309309
// that isn't supported.
310310
struct InvalidUpdateCommandBufferExpExecutionTest : uur::urKernelExecutionTest {
311311
void SetUp() override {
312+
UUR_KNOWN_FAILURE_ON(uur::LevelZeroV2{});
313+
312314
program_name = "fill_usm";
313315
UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp());
314316

0 commit comments

Comments
 (0)