Skip to content

Commit 2c362a6

Browse files
[UR] Draft for adding support for counter-based events
Draft for counter-based events implementation. As of now, only the creation of event, cmdlists, cmdqueue/pools are implemented. Signed-off-by: Zhang, Winston <[email protected]>
1 parent 4814e71 commit 2c362a6

File tree

7 files changed

+72
-20
lines changed

7 files changed

+72
-20
lines changed

source/adapters/level_zero/context.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,8 @@ static const uint32_t MaxNumEventsPerPool = [] {
468468

469469
ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
470470
ze_event_pool_handle_t &Pool, size_t &Index, bool HostVisible,
471-
bool ProfilingEnabled, ur_device_handle_t Device) {
471+
bool ProfilingEnabled, ur_device_handle_t Device,
472+
std::optional<bool> CounterBasedEventEnabled) {
472473
// Lock while updating event pool machinery.
473474
std::scoped_lock<ur_mutex> Lock(ZeEventPoolCacheMutex);
474475

@@ -510,6 +511,16 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
510511
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
511512
if (ProfilingEnabled)
512513
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
514+
if (CounterBasedEventEnabled.has_value() &&
515+
CounterBasedEventEnabled.value()) {
516+
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
517+
ze_event_pool_counter_based_exp_desc_t counterBasedExt = {
518+
ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC};
519+
counterBasedExt.flags |=
520+
ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE |
521+
ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_NON_IMMEDIATE;
522+
ZeEventPoolDesc.pNext = &counterBasedExt;
523+
}
513524
urPrint("ze_event_pool_desc_t flags set to: %d\n", ZeEventPoolDesc.flags);
514525

515526
std::vector<ze_device_handle_t> ZeDevices;

source/adapters/level_zero/context.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -192,10 +192,10 @@ struct ur_context_handle_t_ : _ur_object {
192192
// pool then create new one. The HostVisible parameter tells if we need a
193193
// slot for a host-visible event. The ProfilingEnabled tells is we need a
194194
// slot for an event with profiling capabilities.
195-
ur_result_t getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &, size_t &,
196-
bool HostVisible,
197-
bool ProfilingEnabled,
198-
ur_device_handle_t Device);
195+
ur_result_t getFreeSlotInExistingOrNewPool(
196+
ze_event_pool_handle_t &, size_t &, bool HostVisible,
197+
bool ProfilingEnabled, ur_device_handle_t Device,
198+
std::optional<bool> CounterBasedEventEnabled = std::nullopt);
199199

200200
// Get ur_event_handle_t from cache.
201201
ur_event_handle_t getEventFromContextCache(bool HostVisible,

source/adapters/level_zero/event.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,7 +1049,8 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked,
10491049
//
10501050
ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
10511051
bool IsMultiDevice, bool HostVisible,
1052-
ur_event_handle_t *RetEvent) {
1052+
ur_event_handle_t *RetEvent,
1053+
std::optional<bool> CounterBasedEventEnabled) {
10531054

10541055
bool ProfilingEnabled = !Queue || Queue->isProfilingEnabled();
10551056

@@ -1071,14 +1072,18 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
10711072
size_t Index = 0;
10721073

10731074
if (auto Res = Context->getFreeSlotInExistingOrNewPool(
1074-
ZeEventPool, Index, HostVisible, ProfilingEnabled, Device))
1075+
ZeEventPool, Index, HostVisible, ProfilingEnabled, Device,
1076+
CounterBasedEventEnabled.has_value()
1077+
? CounterBasedEventEnabled.value()
1078+
: false))
10751079
return Res;
10761080

10771081
ZeStruct<ze_event_desc_t> ZeEventDesc;
10781082
ZeEventDesc.index = Index;
10791083
ZeEventDesc.wait = 0;
10801084

1081-
if (HostVisible) {
1085+
if (HostVisible || (CounterBasedEventEnabled.has_value() &&
1086+
CounterBasedEventEnabled.value())) {
10821087
ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
10831088
} else {
10841089
//

source/adapters/level_zero/event.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,10 @@
2929

3030
extern "C" {
3131
ur_result_t urEventReleaseInternal(ur_event_handle_t Event);
32-
ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
33-
bool IsMultiDevice, bool HostVisible,
34-
ur_event_handle_t *RetEvent);
32+
ur_result_t
33+
EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
34+
bool IsMultiDevice, bool HostVisible, ur_event_handle_t *RetEvent,
35+
std::optional<bool> CounterBasedEventEnabled = std::nullopt);
3536
} // extern "C"
3637

3738
// This is an experimental option that allows to disable caching of events in

source/adapters/level_zero/kernel.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
206206
ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent;
207207

208208
UR_CALL(createEventAndAssociateQueue(Queue, Event, UR_COMMAND_KERNEL_LAUNCH,
209-
CommandList, IsInternal, false));
209+
CommandList, IsInternal, false,
210+
Queue->usingCounterBasedEvents()));
210211
ZeEvent = (*Event)->ZeEvent;
211212
(*Event)->WaitList = TmpWaitList;
212213

source/adapters/level_zero/queue.cpp

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1364,6 +1364,28 @@ bool ur_queue_handle_t_::isInOrderQueue() const {
13641364
0);
13651365
}
13661366

1367+
bool ur_queue_handle_t_::usingCounterBasedEvents() const {
1368+
if (!this->isInOrderQueue())
1369+
return false;
1370+
1371+
static const bool UseDriverCounterBasedEvents = [] {
1372+
const char *UrRet = std::getenv("UR_L0_USE_DRIVER_COUNTER_BASED_EVENTS");
1373+
if (!UrRet)
1374+
return false;
1375+
return std::atoi(UrRet) != 0;
1376+
}();
1377+
1378+
bool usingInOrderList = true;
1379+
for (auto &&It = this->CommandListMap.begin();
1380+
It != this->CommandListMap.end(); ++It) {
1381+
if (It->second.ZeQueueDesc.flags != ZE_COMMAND_QUEUE_FLAG_IN_ORDER) {
1382+
usingInOrderList = false;
1383+
break;
1384+
}
1385+
}
1386+
return UseDriverCounterBasedEvents && usingInOrderList;
1387+
}
1388+
13671389
// Helper function to perform the necessary cleanup of the events from reset cmd
13681390
// list.
13691391
ur_result_t CleanupEventListFromResetCmdList(
@@ -1517,7 +1539,8 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue,
15171539

15181540
if (*Event == nullptr)
15191541
UR_CALL(EventCreate(Queue->Context, Queue, IsMultiDevice,
1520-
HostVisible.value(), Event));
1542+
HostVisible.value(), Event,
1543+
Queue->usingCounterBasedEvents()));
15211544

15221545
(*Event)->UrQueue = Queue;
15231546
(*Event)->CommandType = CommandType;
@@ -1805,6 +1828,9 @@ ur_queue_handle_t_::ur_queue_group_t::getZeQueue(uint32_t *QueueGroupOrdinal) {
18051828
ZeCommandQueueDesc.ordinal = *QueueGroupOrdinal;
18061829
ZeCommandQueueDesc.index = QueueIndex;
18071830
ZeCommandQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
1831+
if (Queue->usingCounterBasedEvents()) {
1832+
ZeCommandQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
1833+
}
18081834
const char *Priority = "Normal";
18091835
if (Queue->isPriorityLow()) {
18101836
ZeCommandQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW;
@@ -1854,32 +1880,38 @@ ur_result_t ur_queue_handle_t_::createCommandList(
18541880
bool UseCopyEngine, ur_command_list_ptr_t &CommandList,
18551881
ze_command_queue_handle_t *ForcedCmdQueue) {
18561882

1857-
ze_fence_handle_t ZeFence;
1883+
ze_fence_handle_t ZeFence = nullptr;
18581884
ZeStruct<ze_fence_desc_t> ZeFenceDesc;
18591885
ze_command_list_handle_t ZeCommandList;
18601886

18611887
uint32_t QueueGroupOrdinal;
1888+
ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
1889+
if (usingCounterBasedEvents()) {
1890+
ZeCommandListDesc.flags = ZE_COMMAND_LIST_FLAG_IN_ORDER;
1891+
ZeCommandListDesc.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
1892+
}
18621893
auto &QGroup = getQueueGroup(UseCopyEngine);
18631894
auto &ZeCommandQueue =
18641895
ForcedCmdQueue ? *ForcedCmdQueue : QGroup.getZeQueue(&QueueGroupOrdinal);
18651896
if (ForcedCmdQueue)
18661897
QueueGroupOrdinal = QGroup.getCmdQueueOrdinal(ZeCommandQueue);
18671898

1868-
ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
18691899
ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal;
1870-
18711900
ZE2UR_CALL(zeCommandListCreate, (Context->ZeContext, Device->ZeDevice,
18721901
&ZeCommandListDesc, &ZeCommandList));
18731902

1874-
ZE2UR_CALL(zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence));
1903+
if (!usingCounterBasedEvents()) {
1904+
ZE2UR_CALL(zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence));
1905+
}
18751906
ZeStruct<ze_command_queue_desc_t> ZeQueueDesc;
18761907
ZeQueueDesc.ordinal = QueueGroupOrdinal;
18771908
std::tie(CommandList, std::ignore) = CommandListMap.insert(
18781909
std::pair<ze_command_list_handle_t, ur_command_list_info_t>(
18791910
ZeCommandList, {ZeFence, false, false, ZeCommandQueue, ZeQueueDesc}));
1880-
1881-
UR_CALL(insertStartBarrierIfDiscardEventsMode(CommandList));
1882-
UR_CALL(insertActiveBarriers(CommandList, UseCopyEngine));
1911+
if (!usingCounterBasedEvents()) {
1912+
UR_CALL(insertStartBarrierIfDiscardEventsMode(CommandList));
1913+
UR_CALL(insertActiveBarriers(CommandList, UseCopyEngine));
1914+
}
18831915
return UR_RESULT_SUCCESS;
18841916
}
18851917

source/adapters/level_zero/queue.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,8 @@ struct ur_queue_handle_t_ : _ur_object {
399399
// Returns true if the queue is a in-order queue.
400400
bool isInOrderQueue() const;
401401

402+
bool usingCounterBasedEvents() const;
403+
402404
// Returns true if the queue has discard events property.
403405
bool isDiscardEvents() const;
404406

0 commit comments

Comments
 (0)