Skip to content

Commit b0d133d

Browse files
authored
Merge pull request #2391 from igchor/profiling_events_Wait
[L0 v2] fixes around eventsWait and eventsWaitWithBarrier
2 parents 0c74842 + fae0932 commit b0d133d

File tree

3 files changed

+91
-6
lines changed

3 files changed

+91
-6
lines changed

source/adapters/level_zero/v2/queue_immediate_in_order.cpp

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -283,14 +283,46 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWait(
283283
return UR_RESULT_SUCCESS;
284284
}
285285

286+
ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrierImpl(
287+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
288+
ur_event_handle_t *phEvent) {
289+
TRACK_SCOPE_LATENCY(
290+
"ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrier");
291+
292+
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
293+
294+
if (!numEventsInWaitList && !phEvent) {
295+
// nop
296+
return UR_RESULT_SUCCESS;
297+
}
298+
299+
auto signalEvent =
300+
getSignalEvent(phEvent, UR_COMMAND_EVENTS_WAIT_WITH_BARRIER);
301+
auto [pWaitEvents, numWaitEvents] =
302+
getWaitListView(phEventWaitList, numEventsInWaitList);
303+
304+
ZE2UR_CALL(zeCommandListAppendBarrier,
305+
(handler.commandList.get(), signalEvent->getZeEvent(),
306+
numWaitEvents, pWaitEvents));
307+
308+
return UR_RESULT_SUCCESS;
309+
}
310+
286311
ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrier(
287312
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
288313
ur_event_handle_t *phEvent) {
289314
// For in-order queue we don't need a real barrier, just wait for
290315
// requested events in potentially different queues and add a "barrier"
291316
// event signal because it is already guaranteed that previous commands
292-
// in this queue are completed when the signal is started.
293-
return enqueueEventsWait(numEventsInWaitList, phEventWaitList, phEvent);
317+
// in this queue are completed when the signal is started. However, we do
318+
// need to use barrier if profiling is enabled: see
319+
// zeCommandListAppendWaitOnEvents
320+
if ((flags & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0) {
321+
return enqueueEventsWaitWithBarrierImpl(numEventsInWaitList,
322+
phEventWaitList, phEvent);
323+
} else {
324+
return enqueueEventsWait(numEventsInWaitList, phEventWaitList, phEvent);
325+
}
294326
}
295327

296328
ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrierExt(
@@ -757,8 +789,8 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueUSMPrefetch(
757789
getWaitListView(phEventWaitList, numEventsInWaitList);
758790

759791
if (pWaitEvents) {
760-
ZE2UR_CALL(zeCommandListAppendBarrier, (handler.commandList.get(), nullptr,
761-
numWaitEvents, pWaitEvents));
792+
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
793+
(handler.commandList.get(), numWaitEvents, pWaitEvents));
762794
}
763795
// TODO: figure out how to translate "flags"
764796
ZE2UR_CALL(zeCommandListAppendMemoryPrefetch,
@@ -789,8 +821,8 @@ ur_queue_immediate_in_order_t::enqueueUSMAdvise(const void *pMem, size_t size,
789821
auto [pWaitEvents, numWaitEvents] = getWaitListView(nullptr, 0);
790822

791823
if (pWaitEvents) {
792-
ZE2UR_CALL(zeCommandListAppendBarrier, (handler.commandList.get(), nullptr,
793-
numWaitEvents, pWaitEvents));
824+
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
825+
(handler.commandList.get(), numWaitEvents, pWaitEvents));
794826
}
795827

796828
// TODO: figure out how to translate "flags"

source/adapters/level_zero/v2/queue_immediate_in_order.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,11 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ {
7777
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent,
7878
ur_command_t commandType);
7979

80+
ur_result_t
81+
enqueueEventsWaitWithBarrierImpl(uint32_t numEventsInWaitList,
82+
const ur_event_handle_t *phEventWaitList,
83+
ur_event_handle_t *phEvent);
84+
8085
public:
8186
ur_queue_immediate_in_order_t(ur_context_handle_t, ur_device_handle_t,
8287
const ur_queue_properties_t *);

test/conformance/event/urEventGetProfilingInfo.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,51 @@ TEST_P(urEventGetProfilingInfoNegativeTest, InvalidValue) {
121121
}
122122

123123
UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEventGetProfilingInfoNegativeTest);
124+
125+
struct urEventGetProfilingInfoForWaitWithBarrier : uur::urProfilingQueueTest {
126+
void SetUp() override {
127+
UUR_RETURN_ON_FATAL_FAILURE(urProfilingQueueTest::SetUp());
128+
ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_WRITE_ONLY, size,
129+
nullptr, &buffer));
130+
131+
input.assign(count, 42);
132+
ur_event_handle_t membuf_event = nullptr;
133+
ASSERT_SUCCESS(urEnqueueMemBufferWrite(queue, buffer, false, 0, size,
134+
input.data(), 0, nullptr,
135+
&membuf_event));
136+
137+
ASSERT_SUCCESS(
138+
urEnqueueEventsWaitWithBarrier(queue, 1, &membuf_event, &event));
139+
ASSERT_SUCCESS(urQueueFinish(queue));
140+
}
141+
142+
void TearDown() override {
143+
UUR_RETURN_ON_FATAL_FAILURE(urProfilingQueueTest::TearDown());
144+
}
145+
146+
const size_t count = 1024;
147+
const size_t size = sizeof(uint32_t) * count;
148+
ur_mem_handle_t buffer = nullptr;
149+
ur_event_handle_t event = nullptr;
150+
std::vector<uint32_t> input;
151+
};
152+
153+
UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEventGetProfilingInfoForWaitWithBarrier);
154+
155+
TEST_P(urEventGetProfilingInfoForWaitWithBarrier, Success) {
156+
std::vector<uint8_t> submit_data(size);
157+
ASSERT_SUCCESS(urEventGetProfilingInfo(event,
158+
UR_PROFILING_INFO_COMMAND_START,
159+
size, submit_data.data(), nullptr));
160+
auto start_timing = reinterpret_cast<size_t *>(submit_data.data());
161+
ASSERT_NE(*start_timing, 0);
162+
163+
std::vector<uint8_t> complete_data(size);
164+
ASSERT_SUCCESS(urEventGetProfilingInfo(event, UR_PROFILING_INFO_COMMAND_END,
165+
size, complete_data.data(),
166+
nullptr));
167+
auto end_timing = reinterpret_cast<size_t *>(complete_data.data());
168+
ASSERT_NE(*end_timing, 0);
169+
170+
ASSERT_GT(*end_timing, *start_timing);
171+
}

0 commit comments

Comments
 (0)