Skip to content

Commit de6f32e

Browse files
[UR] Draft for adding support for counter-based events
Draft for counter-based events implementation. As of now, only the creation of event, cmdlists, cmdqueue/pools are implemented. Signed-off-by: Zhang, Winston <[email protected]>
1 parent 4814e71 commit de6f32e

File tree

8 files changed

+89
-33
lines changed

8 files changed

+89
-33
lines changed

source/adapters/level_zero/command_buffer.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -942,9 +942,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
942942
(SignalCommandList->first, CommandBuffer->WaitEvent->ZeEvent));
943943

944944
if (Event) {
945-
UR_CALL(createEventAndAssociateQueue(
946-
Queue, &RetEvent, UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP,
947-
SignalCommandList, false, false, true));
945+
UR_CALL(createEventAndAssociateQueue(Queue, &RetEvent,
946+
UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP,
947+
SignalCommandList, false, false, true,
948+
Queue->usingCounterBasedEvents()));
948949

949950
if ((Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE)) {
950951
// Multiple submissions of a command buffer implies that we need to save

source/adapters/level_zero/context.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,8 @@ static const uint32_t MaxNumEventsPerPool = [] {
468468

469469
ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
470470
ze_event_pool_handle_t &Pool, size_t &Index, bool HostVisible,
471-
bool ProfilingEnabled, ur_device_handle_t Device) {
471+
bool ProfilingEnabled, ur_device_handle_t Device,
472+
std::optional<bool> CounterBasedEventEnabled) {
472473
// Lock while updating event pool machinery.
473474
std::scoped_lock<ur_mutex> Lock(ZeEventPoolCacheMutex);
474475

@@ -510,6 +511,16 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
510511
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
511512
if (ProfilingEnabled)
512513
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
514+
if (CounterBasedEventEnabled.has_value() &&
515+
CounterBasedEventEnabled.value()) {
516+
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
517+
ze_event_pool_counter_based_exp_desc_t counterBasedExt = {
518+
ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC};
519+
counterBasedExt.flags |=
520+
ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE |
521+
ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_NON_IMMEDIATE;
522+
ZeEventPoolDesc.pNext = &counterBasedExt;
523+
}
513524
urPrint("ze_event_pool_desc_t flags set to: %d\n", ZeEventPoolDesc.flags);
514525

515526
std::vector<ze_device_handle_t> ZeDevices;

source/adapters/level_zero/context.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -192,10 +192,10 @@ struct ur_context_handle_t_ : _ur_object {
192192
// pool then create new one. The HostVisible parameter tells if we need a
193193
// slot for a host-visible event. The ProfilingEnabled tells is we need a
194194
// slot for an event with profiling capabilities.
195-
ur_result_t getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &, size_t &,
196-
bool HostVisible,
197-
bool ProfilingEnabled,
198-
ur_device_handle_t Device);
195+
ur_result_t getFreeSlotInExistingOrNewPool(
196+
ze_event_pool_handle_t &, size_t &, bool HostVisible,
197+
bool ProfilingEnabled, ur_device_handle_t Device,
198+
std::optional<bool> CounterBasedEventEnabled = std::nullopt);
199199

200200
// Get ur_event_handle_t from cache.
201201
ur_event_handle_t getEventFromContextCache(bool HostVisible,

source/adapters/level_zero/event.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -606,7 +606,7 @@ ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent(
606606
UR_CALL(createEventAndAssociateQueue(
607607
UrQueue, &HostVisibleEvent, UR_EXT_COMMAND_TYPE_USER, CommandList,
608608
/* IsInternal */ false, /* IsMultiDevice */ false,
609-
/* HostVisible */ true));
609+
/* HostVisible */ true, UrQueue->usingCounterBasedEvents()));
610610

611611
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
612612
(CommandList->first, 1, &ZeEvent));
@@ -1049,7 +1049,8 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked,
10491049
//
10501050
ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
10511051
bool IsMultiDevice, bool HostVisible,
1052-
ur_event_handle_t *RetEvent) {
1052+
ur_event_handle_t *RetEvent,
1053+
std::optional<bool> CounterBasedEventEnabled) {
10531054

10541055
bool ProfilingEnabled = !Queue || Queue->isProfilingEnabled();
10551056

@@ -1071,14 +1072,18 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
10711072
size_t Index = 0;
10721073

10731074
if (auto Res = Context->getFreeSlotInExistingOrNewPool(
1074-
ZeEventPool, Index, HostVisible, ProfilingEnabled, Device))
1075+
ZeEventPool, Index, HostVisible, ProfilingEnabled, Device,
1076+
CounterBasedEventEnabled.has_value()
1077+
? CounterBasedEventEnabled.value()
1078+
: false))
10751079
return Res;
10761080

10771081
ZeStruct<ze_event_desc_t> ZeEventDesc;
10781082
ZeEventDesc.index = Index;
10791083
ZeEventDesc.wait = 0;
10801084

1081-
if (HostVisible) {
1085+
if (HostVisible || (CounterBasedEventEnabled.has_value() &&
1086+
CounterBasedEventEnabled.value())) {
10821087
ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
10831088
} else {
10841089
//
@@ -1287,7 +1292,7 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
12871292

12881293
UR_CALL(createEventAndAssociateQueue(
12891294
Queue, &MultiDeviceEvent, EventList[I]->CommandType, CommandList,
1290-
IsInternal, IsMultiDevice));
1295+
IsInternal, IsMultiDevice, Queue->usingCounterBasedEvents()));
12911296
MultiDeviceZeEvent = MultiDeviceEvent->ZeEvent;
12921297
const auto &ZeCommandList = CommandList->first;
12931298
EventList[I]->RefCount.increment();

source/adapters/level_zero/event.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,10 @@
2929

3030
extern "C" {
3131
ur_result_t urEventReleaseInternal(ur_event_handle_t Event);
32-
ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
33-
bool IsMultiDevice, bool HostVisible,
34-
ur_event_handle_t *RetEvent);
32+
ur_result_t
33+
EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
34+
bool IsMultiDevice, bool HostVisible, ur_event_handle_t *RetEvent,
35+
std::optional<bool> CounterBasedEventEnabled = std::nullopt);
3536
} // extern "C"
3637

3738
// This is an experimental option that allows to disable caching of events in

source/adapters/level_zero/kernel.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
206206
ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent;
207207

208208
UR_CALL(createEventAndAssociateQueue(Queue, Event, UR_COMMAND_KERNEL_LAUNCH,
209-
CommandList, IsInternal, false));
209+
CommandList, IsInternal, false,
210+
Queue->usingCounterBasedEvents()));
210211
ZeEvent = (*Event)->ZeEvent;
211212
(*Event)->WaitList = TmpWaitList;
212213

source/adapters/level_zero/queue.cpp

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1127,7 +1127,9 @@ ur_queue_handle_t_::executeCommandList(ur_command_list_ptr_t CommandList,
11271127
reinterpret_cast<ur_queue_handle_t>(this), &HostVisibleEvent,
11281128
UR_EXT_COMMAND_TYPE_USER, CommandList,
11291129
/* IsInternal */ false, /* IsMultiDevice */ true,
1130-
/* HostVisible */ true);
1130+
/* HostVisible */ true,
1131+
(reinterpret_cast<ur_queue_handle_t>(this))
1132+
->usingCounterBasedEvents());
11311133
if (Res)
11321134
return Res;
11331135

@@ -1364,6 +1366,28 @@ bool ur_queue_handle_t_::isInOrderQueue() const {
13641366
0);
13651367
}
13661368

1369+
bool ur_queue_handle_t_::usingCounterBasedEvents() const {
1370+
if (!this->isInOrderQueue())
1371+
return false;
1372+
1373+
static const bool UseDriverCounterBasedEvents = [this] {
1374+
const char *UrRet = std::getenv("UR_L0_USE_DRIVER_COUNTER_BASED_EVENTS");
1375+
if (!UrRet)
1376+
return false;
1377+
return std::atoi(UrRet) != 0;
1378+
}();
1379+
1380+
bool usingInOrderList = true;
1381+
for (auto &&It = this->CommandListMap.begin();
1382+
It != this->CommandListMap.end(); ++It) {
1383+
if (It->second.ZeQueueDesc.flags != ZE_COMMAND_QUEUE_FLAG_IN_ORDER) {
1384+
usingInOrderList = false;
1385+
break;
1386+
}
1387+
}
1388+
return UseDriverCounterBasedEvents && usingInOrderList;
1389+
}
1390+
13671391
// Helper function to perform the necessary cleanup of the events from reset cmd
13681392
// list.
13691393
ur_result_t CleanupEventListFromResetCmdList(
@@ -1498,12 +1522,11 @@ ur_event_handle_t ur_queue_handle_t_::getEventFromQueueCache(bool IsMultiDevice,
14981522
// visible pool.
14991523
// \param HostVisible tells if the event must be created in the
15001524
// host-visible pool. If not set then this function will decide.
1501-
ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue,
1502-
ur_event_handle_t *Event,
1503-
ur_command_t CommandType,
1504-
ur_command_list_ptr_t CommandList,
1505-
bool IsInternal, bool IsMultiDevice,
1506-
std::optional<bool> HostVisible) {
1525+
ur_result_t createEventAndAssociateQueue(
1526+
ur_queue_handle_t Queue, ur_event_handle_t *Event, ur_command_t CommandType,
1527+
ur_command_list_ptr_t CommandList, bool IsInternal, bool IsMultiDevice,
1528+
std::optional<bool> HostVisible,
1529+
std::optional<bool> usingCounterBasedEvents) {
15071530

15081531
if (!HostVisible.has_value()) {
15091532
// Internal/discarded events do not need host-scope visibility.
@@ -1516,8 +1539,10 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue,
15161539
: nullptr;
15171540

15181541
if (*Event == nullptr)
1519-
UR_CALL(EventCreate(Queue->Context, Queue, IsMultiDevice,
1520-
HostVisible.value(), Event));
1542+
UR_CALL(EventCreate(
1543+
Queue->Context, Queue, IsMultiDevice, HostVisible.value(), Event,
1544+
usingCounterBasedEvents.has_value() ? usingCounterBasedEvents.value()
1545+
: false));
15211546

15221547
(*Event)->UrQueue = Queue;
15231548
(*Event)->CommandType = CommandType;
@@ -1805,6 +1830,9 @@ ur_queue_handle_t_::ur_queue_group_t::getZeQueue(uint32_t *QueueGroupOrdinal) {
18051830
ZeCommandQueueDesc.ordinal = *QueueGroupOrdinal;
18061831
ZeCommandQueueDesc.index = QueueIndex;
18071832
ZeCommandQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
1833+
if (Queue->usingCounterBasedEvents()) {
1834+
ZeCommandQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
1835+
}
18081836
const char *Priority = "Normal";
18091837
if (Queue->isPriorityLow()) {
18101838
ZeCommandQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW;
@@ -1859,27 +1887,33 @@ ur_result_t ur_queue_handle_t_::createCommandList(
18591887
ze_command_list_handle_t ZeCommandList;
18601888

18611889
uint32_t QueueGroupOrdinal;
1890+
ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
1891+
if (usingCounterBasedEvents()) {
1892+
ZeCommandListDesc.flags = ZE_COMMAND_LIST_FLAG_IN_ORDER;
1893+
ZeCommandListDesc.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
1894+
}
18621895
auto &QGroup = getQueueGroup(UseCopyEngine);
18631896
auto &ZeCommandQueue =
18641897
ForcedCmdQueue ? *ForcedCmdQueue : QGroup.getZeQueue(&QueueGroupOrdinal);
18651898
if (ForcedCmdQueue)
18661899
QueueGroupOrdinal = QGroup.getCmdQueueOrdinal(ZeCommandQueue);
18671900

1868-
ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
18691901
ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal;
1870-
18711902
ZE2UR_CALL(zeCommandListCreate, (Context->ZeContext, Device->ZeDevice,
18721903
&ZeCommandListDesc, &ZeCommandList));
18731904

1874-
ZE2UR_CALL(zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence));
1905+
if (!usingCounterBasedEvents()) {
1906+
ZE2UR_CALL(zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence));
1907+
}
18751908
ZeStruct<ze_command_queue_desc_t> ZeQueueDesc;
18761909
ZeQueueDesc.ordinal = QueueGroupOrdinal;
18771910
std::tie(CommandList, std::ignore) = CommandListMap.insert(
18781911
std::pair<ze_command_list_handle_t, ur_command_list_info_t>(
18791912
ZeCommandList, {ZeFence, false, false, ZeCommandQueue, ZeQueueDesc}));
1880-
1881-
UR_CALL(insertStartBarrierIfDiscardEventsMode(CommandList));
1882-
UR_CALL(insertActiveBarriers(CommandList, UseCopyEngine));
1913+
if (!usingCounterBasedEvents()) {
1914+
UR_CALL(insertStartBarrierIfDiscardEventsMode(CommandList));
1915+
UR_CALL(insertActiveBarriers(CommandList, UseCopyEngine));
1916+
}
18831917
return UR_RESULT_SUCCESS;
18841918
}
18851919

source/adapters/level_zero/queue.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,8 @@ struct ur_queue_handle_t_ : _ur_object {
399399
// Returns true if the queue is a in-order queue.
400400
bool isInOrderQueue() const;
401401

402+
bool usingCounterBasedEvents() const;
403+
402404
// Returns true if the queue has discard events property.
403405
bool isDiscardEvents() const;
404406

@@ -543,7 +545,8 @@ struct ur_queue_handle_t_ : _ur_object {
543545
ur_result_t createEventAndAssociateQueue(
544546
ur_queue_handle_t Queue, ur_event_handle_t *Event, ur_command_t CommandType,
545547
ur_command_list_ptr_t CommandList, bool IsInternal, bool IsMultiDevice,
546-
std::optional<bool> HostVisible = std::nullopt);
548+
std::optional<bool> HostVisible = std::nullopt,
549+
std::optional<bool> usingCounterBasedEvents = std::nullopt);
547550

548551
// Helper function to perform the necessary cleanup of the events from reset cmd
549552
// list.

0 commit comments

Comments
 (0)