Skip to content

Commit e7901bc

Browse files
[UR] Draft for adding support for counter-based events
Draft for counter-based events implementation. Right now, to enable counter-based events we use the flag: UR_L0_USE_DRIVER_COUNTER_BASED_EVENTS Signed-off-by: Zhang, Winston <[email protected]>
1 parent 4814e71 commit e7901bc

File tree

6 files changed

+71
-17
lines changed

6 files changed

+71
-17
lines changed

source/adapters/level_zero/context.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,8 @@ static const uint32_t MaxNumEventsPerPool = [] {
468468

469469
ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
470470
ze_event_pool_handle_t &Pool, size_t &Index, bool HostVisible,
471-
bool ProfilingEnabled, ur_device_handle_t Device) {
471+
bool ProfilingEnabled, ur_device_handle_t Device,
472+
bool CounterBasedEventEnabled) {
472473
// Lock while updating event pool machinery.
473474
std::scoped_lock<ur_mutex> Lock(ZeEventPoolCacheMutex);
474475

@@ -510,6 +511,12 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
510511
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
511512
if (ProfilingEnabled)
512513
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
514+
if (CounterBasedEventEnabled) {
515+
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
516+
ze_event_pool_counter_based_exp_desc_t counterBasedExt = {
517+
ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC};
518+
ZeEventPoolDesc.pNext = &counterBasedExt;
519+
}
513520
urPrint("ze_event_pool_desc_t flags set to: %d\n", ZeEventPoolDesc.flags);
514521

515522
std::vector<ze_device_handle_t> ZeDevices;

source/adapters/level_zero/context.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,8 @@ struct ur_context_handle_t_ : _ur_object {
195195
ur_result_t getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &, size_t &,
196196
bool HostVisible,
197197
bool ProfilingEnabled,
198-
ur_device_handle_t Device);
198+
ur_device_handle_t Device,
199+
bool CounterBasedEventEnabled);
199200

200201
// Get ur_event_handle_t from cache.
201202
ur_event_handle_t getEventFromContextCache(bool HostVisible,

source/adapters/level_zero/event.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,7 +1049,8 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked,
10491049
//
10501050
ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
10511051
bool IsMultiDevice, bool HostVisible,
1052-
ur_event_handle_t *RetEvent) {
1052+
ur_event_handle_t *RetEvent,
1053+
std::optional<bool> CounterBasedEventEnabled) {
10531054

10541055
bool ProfilingEnabled = !Queue || Queue->isProfilingEnabled();
10551056

@@ -1069,16 +1070,20 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
10691070
ze_event_pool_handle_t ZeEventPool = {};
10701071

10711072
size_t Index = 0;
1073+
(*RetEvent)->CounterBasedEventsEnabled =
1074+
CounterBasedEventEnabled.has_value() ? CounterBasedEventEnabled.value()
1075+
: false;
10721076

10731077
if (auto Res = Context->getFreeSlotInExistingOrNewPool(
1074-
ZeEventPool, Index, HostVisible, ProfilingEnabled, Device))
1078+
ZeEventPool, Index, HostVisible, ProfilingEnabled, Device,
1079+
(*RetEvent)->CounterBasedEventsEnabled))
10751080
return Res;
10761081

10771082
ZeStruct<ze_event_desc_t> ZeEventDesc;
10781083
ZeEventDesc.index = Index;
10791084
ZeEventDesc.wait = 0;
10801085

1081-
if (HostVisible) {
1086+
if (HostVisible || (*RetEvent)->CounterBasedEventsEnabled) {
10821087
ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
10831088
} else {
10841089
//
@@ -1124,8 +1129,8 @@ ur_result_t ur_event_handle_t_::reset() {
11241129

11251130
if (!isHostVisible())
11261131
HostVisibleEvent = nullptr;
1127-
1128-
ZE2UR_CALL(zeEventHostReset, (ZeEvent));
1132+
if (!usingCounterBasedEvents())
1133+
ZE2UR_CALL(zeEventHostReset, (ZeEvent));
11291134
return UR_RESULT_SUCCESS;
11301135
}
11311136

source/adapters/level_zero/event.hpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,10 @@
2929

3030
extern "C" {
3131
ur_result_t urEventReleaseInternal(ur_event_handle_t Event);
32-
ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
33-
bool IsMultiDevice, bool HostVisible,
34-
ur_event_handle_t *RetEvent);
32+
ur_result_t
33+
EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
34+
bool IsMultiDevice, bool HostVisible, ur_event_handle_t *RetEvent,
35+
std::optional<bool> CounterBasedEventEnabled = std::nullopt);
3536
} // extern "C"
3637

3738
// This is an experimental option that allows to disable caching of events in
@@ -222,6 +223,11 @@ struct ur_event_handle_t_ : _ur_object {
222223

223224
// Get the host-visible event or create one and enqueue its signal.
224225
ur_result_t getOrCreateHostVisibleEvent(ze_event_handle_t &HostVisibleEvent);
226+
227+
// Keeps track of whether we are using Counter-based Events.
228+
bool CounterBasedEventsEnabled = false;
229+
230+
bool usingCounterBasedEvents() const { return CounterBasedEventsEnabled; }
225231
};
226232

227233
// Helper function to implement zeHostSynchronize.

source/adapters/level_zero/queue.cpp

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -955,6 +955,15 @@ ur_queue_handle_t_::ur_queue_handle_t_(
955955
ComputeCommandBatch.QueueBatchSize =
956956
ZeCommandListBatchComputeConfig.startSize();
957957
CopyCommandBatch.QueueBatchSize = ZeCommandListBatchCopyConfig.startSize();
958+
959+
static const bool useDriverCounterBasedEvents = [] {
960+
const char *UrRet = std::getenv("UR_L0_USE_DRIVER_COUNTER_BASED_EVENTS");
961+
if (!UrRet)
962+
return false;
963+
return std::atoi(UrRet) != 0;
964+
}();
965+
this->counterBasedEventsEnabled =
966+
isInOrderQueue() && useDriverCounterBasedEvents;
958967
}
959968

960969
void ur_queue_handle_t_::adjustBatchSizeForFullBatch(bool IsCopy) {
@@ -1236,7 +1245,8 @@ bool ur_queue_handle_t_::doReuseDiscardedEvents() {
12361245

12371246
ur_result_t
12381247
ur_queue_handle_t_::resetDiscardedEvent(ur_command_list_ptr_t CommandList) {
1239-
if (LastCommandEvent && LastCommandEvent->IsDiscarded) {
1248+
if (!usingCounterBasedEvents() && LastCommandEvent &&
1249+
LastCommandEvent->IsDiscarded) {
12401250
ZE2UR_CALL(zeCommandListAppendBarrier,
12411251
(CommandList->first, nullptr, 1, &(LastCommandEvent->ZeEvent)));
12421252
ZE2UR_CALL(zeCommandListAppendEventReset,
@@ -1364,6 +1374,18 @@ bool ur_queue_handle_t_::isInOrderQueue() const {
13641374
0);
13651375
}
13661376

1377+
bool ur_queue_handle_t_::usingCounterBasedEvents() const {
1378+
bool usingInOrderList = true;
1379+
for (auto &&It = this->CommandListMap.begin();
1380+
It != this->CommandListMap.end(); ++It) {
1381+
if (It->second.ZeQueueDesc.flags != ZE_COMMAND_QUEUE_FLAG_IN_ORDER) {
1382+
usingInOrderList = false;
1383+
break;
1384+
}
1385+
}
1386+
return usingInOrderList && this->counterBasedEventsEnabled;
1387+
}
1388+
13671389
// Helper function to perform the necessary cleanup of the events from reset cmd
13681390
// list.
13691391
ur_result_t CleanupEventListFromResetCmdList(
@@ -1517,7 +1539,8 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue,
15171539

15181540
if (*Event == nullptr)
15191541
UR_CALL(EventCreate(Queue->Context, Queue, IsMultiDevice,
1520-
HostVisible.value(), Event));
1542+
HostVisible.value(), Event,
1543+
Queue->usingCounterBasedEvents()));
15211544

15221545
(*Event)->UrQueue = Queue;
15231546
(*Event)->CommandType = CommandType;
@@ -1818,6 +1841,9 @@ ur_queue_handle_t_::ur_queue_group_t::getZeQueue(uint32_t *QueueGroupOrdinal) {
18181841
if (QueueIndex != 0) {
18191842
ZeCommandQueueDesc.flags = ZE_COMMAND_QUEUE_FLAG_EXPLICIT_ONLY;
18201843
}
1844+
if (Queue->usingCounterBasedEvents()) {
1845+
ZeCommandQueueDesc.flags = ZE_COMMAND_QUEUE_FLAG_IN_ORDER;
1846+
}
18211847

18221848
urPrint("[getZeQueue]: create queue ordinal = %d, index = %d "
18231849
"(round robin in [%d, %d]) priority = %s\n",
@@ -1859,15 +1885,18 @@ ur_result_t ur_queue_handle_t_::createCommandList(
18591885
ze_command_list_handle_t ZeCommandList;
18601886

18611887
uint32_t QueueGroupOrdinal;
1888+
ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
1889+
if (usingCounterBasedEvents()) {
1890+
ZeCommandListDesc.flags = ZE_COMMAND_LIST_FLAG_IN_ORDER;
1891+
ZeCommandListDesc.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
1892+
}
18621893
auto &QGroup = getQueueGroup(UseCopyEngine);
18631894
auto &ZeCommandQueue =
18641895
ForcedCmdQueue ? *ForcedCmdQueue : QGroup.getZeQueue(&QueueGroupOrdinal);
18651896
if (ForcedCmdQueue)
18661897
QueueGroupOrdinal = QGroup.getCmdQueueOrdinal(ZeCommandQueue);
18671898

1868-
ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
18691899
ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal;
1870-
18711900
ZE2UR_CALL(zeCommandListCreate, (Context->ZeContext, Device->ZeDevice,
18721901
&ZeCommandListDesc, &ZeCommandList));
18731902

@@ -1877,9 +1906,10 @@ ur_result_t ur_queue_handle_t_::createCommandList(
18771906
std::tie(CommandList, std::ignore) = CommandListMap.insert(
18781907
std::pair<ze_command_list_handle_t, ur_command_list_info_t>(
18791908
ZeCommandList, {ZeFence, false, false, ZeCommandQueue, ZeQueueDesc}));
1880-
1881-
UR_CALL(insertStartBarrierIfDiscardEventsMode(CommandList));
1882-
UR_CALL(insertActiveBarriers(CommandList, UseCopyEngine));
1909+
if (!usingCounterBasedEvents()) {
1910+
UR_CALL(insertStartBarrierIfDiscardEventsMode(CommandList));
1911+
UR_CALL(insertActiveBarriers(CommandList, UseCopyEngine));
1912+
}
18831913
return UR_RESULT_SUCCESS;
18841914
}
18851915

source/adapters/level_zero/queue.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,9 @@ struct ur_queue_handle_t_ : _ur_object {
234234
// Keeps the properties of this queue.
235235
ur_queue_flags_t Properties;
236236

237+
// Keeps track of whether we are using Counter-based Events
238+
bool counterBasedEventsEnabled = false;
239+
237240
// Map of all command lists used in this queue.
238241
ur_command_list_map_t CommandListMap;
239242

@@ -399,6 +402,8 @@ struct ur_queue_handle_t_ : _ur_object {
399402
// Returns true if the queue is a in-order queue.
400403
bool isInOrderQueue() const;
401404

405+
bool usingCounterBasedEvents() const;
406+
402407
// Returns true if the queue has discard events property.
403408
bool isDiscardEvents() const;
404409

0 commit comments

Comments
 (0)