Skip to content

[L0] Interrupt-based event implementation #2334

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions source/adapters/level_zero/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,8 @@ ur_result_t createSyncPointAndGetZeEvents(
UR_CALL(EventCreate(CommandBuffer->Context, nullptr /*Queue*/,
false /*IsMultiDevice*/, HostVisible, &LaunchEvent,
false /*CounterBasedEventEnabled*/,
!CommandBuffer->IsProfilingEnabled));
!CommandBuffer->IsProfilingEnabled,
false /*InterruptBasedEventEnabled*/));
LaunchEvent->CommandType = CommandType;
ZeLaunchEvent = LaunchEvent->ZeEvent;

Expand Down Expand Up @@ -680,13 +681,15 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
if (Device->hasMainCopyEngine()) {
UR_CALL(EventCreate(Context, nullptr /*Queue*/, false, false,
&CopyFinishedEvent, UseCounterBasedEvents,
!EnableProfiling));
!EnableProfiling,
false /*InterruptBasedEventEnabled*/));
}

if (EnableProfiling) {
UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/,
false /*HostVisible*/, &ComputeFinishedEvent,
UseCounterBasedEvents, !EnableProfiling));
UseCounterBasedEvents, !EnableProfiling,
false /*InterruptBasedEventEnabled*/));
}
}

Expand All @@ -695,7 +698,8 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
if (WaitEventPath) {
UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/,
false /*HostVisible*/, &WaitEvent,
false /*CounterBasedEventEnabled*/, !EnableProfiling));
false /*CounterBasedEventEnabled*/, !EnableProfiling,
false /*InterruptBasedEventEnabled*/));
}

// Create ZeCommandListResetEvents only if counter-based events are not being
Expand All @@ -707,15 +711,17 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
if (!UseCounterBasedEvents) {
UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/,
false /*HostVisible*/, &AllResetEvent,
false /*CounterBasedEventEnabled*/, !EnableProfiling));
false /*CounterBasedEventEnabled*/, !EnableProfiling,
false /*InterruptBasedEventEnabled*/));

UR_CALL(createMainCommandList(Context, Device, false, false, false,
ZeCommandListResetEvents));

// The ExecutionFinishedEvent is only waited on by ZeCommandListResetEvents.
UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/,
false /*HostVisible*/, &ExecutionFinishedEvent,
false /*CounterBasedEventEnabled*/, !EnableProfiling));
false /*CounterBasedEventEnabled*/, !EnableProfiling,
false /*InterruptBased*/));
}

try {
Expand Down
44 changes: 29 additions & 15 deletions source/adapters/level_zero/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,8 @@ static const uint32_t MaxNumEventsPerPool = [] {
ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
ze_event_pool_handle_t &Pool, size_t &Index, bool HostVisible,
bool ProfilingEnabled, ur_device_handle_t Device,
bool CounterBasedEventEnabled, bool UsingImmCmdList) {
bool CounterBasedEventEnabled, bool UsingImmCmdList,
bool InterruptBasedEventEnabled) {
// Lock while updating event pool machinery.
std::scoped_lock<ur_mutex> Lock(ZeEventPoolCacheMutex);

Expand All @@ -487,9 +488,9 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
if (Device) {
ZeDevice = Device->ZeDevice;
}
std::list<ze_event_pool_handle_t> *ZePoolCache =
getZeEventPoolCache(HostVisible, ProfilingEnabled,
CounterBasedEventEnabled, UsingImmCmdList, ZeDevice);
std::list<ze_event_pool_handle_t> *ZePoolCache = getZeEventPoolCache(
HostVisible, ProfilingEnabled, CounterBasedEventEnabled, UsingImmCmdList,
InterruptBasedEventEnabled, ZeDevice);

if (!ZePoolCache->empty()) {
if (NumEventsAvailableInEventPool[ZePoolCache->front()] == 0) {
Expand Down Expand Up @@ -537,6 +538,14 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
counterBasedExt.flags);
ZeEventPoolDesc.pNext = &counterBasedExt;
}
if (InterruptBasedEventEnabled) {
ze_intel_event_sync_mode_exp_desc_t eventSyncMode = {
ZE_INTEL_STRUCTURE_TYPE_EVENT_SYNC_MODE_EXP_DESC, nullptr, 0};
eventSyncMode.syncModeFlags =
ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_LOW_POWER_WAIT |
ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_SIGNAL_INTERRUPT;
ZeEventPoolDesc.pNext = &eventSyncMode;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi! This stores a pointer to a temporary in ZeEventPoolDesc.pNext, and also potentially (with CounterBasedEventEnabled == true) overwrites the previously stored value.

}

std::vector<ze_device_handle_t> ZeDevices;
if (ZeDevice) {
Expand All @@ -563,27 +572,31 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(

ur_event_handle_t ur_context_handle_t_::getEventFromContextCache(
bool HostVisible, bool WithProfiling, ur_device_handle_t Device,
bool CounterBasedEventEnabled) {
bool CounterBasedEventEnabled, bool InterruptBasedEventEnabled) {
std::scoped_lock<ur_mutex> Lock(EventCacheMutex);
auto Cache = getEventCache(HostVisible, WithProfiling, Device,
CounterBasedEventEnabled);
auto Cache =
getEventCache(HostVisible, WithProfiling, Device,
CounterBasedEventEnabled, InterruptBasedEventEnabled);
if (Cache->empty()) {
logger::info("Cache empty (Host Visible: {}, Profiling: {}, Counter: {}, "
"Device: {})",
HostVisible, WithProfiling, CounterBasedEventEnabled, Device);
"Interrupt: {}, Device: {})",
HostVisible, WithProfiling, CounterBasedEventEnabled,
InterruptBasedEventEnabled, Device);
return nullptr;
}

auto It = Cache->begin();
ur_event_handle_t Event = *It;

Cache->erase(It);
// We have to reset event before using it.
Event->reset();

logger::info("Using {} event (Host Visible: {}, Profiling: {}, Counter: {}, "
"Device: {}) from cache {}",
"Interrupt: {}, Device: {}) from cache {}",
Event, Event->HostVisibleEvent, Event->isProfilingEnabled(),
Event->CounterBasedEventsEnabled, Device, Cache);
Event->CounterBasedEventsEnabled,
Event->InterruptBasedEventsEnabled, Cache);

return Event;
}
Expand All @@ -596,9 +609,9 @@ void ur_context_handle_t_::addEventToContextCache(ur_event_handle_t Event) {
Device = Event->UrQueue->Device;
}

auto Cache =
getEventCache(Event->isHostVisible(), Event->isProfilingEnabled(), Device,
Event->CounterBasedEventsEnabled);
auto Cache = getEventCache(
Event->isHostVisible(), Event->isProfilingEnabled(), Device,
Event->CounterBasedEventsEnabled, Event->InterruptBasedEventsEnabled);
logger::info("Inserting {} event (Host Visible: {}, Profiling: {}, Counter: "
"{}, Device: {}) into cache {}",
Event, Event->HostVisibleEvent, Event->isProfilingEnabled(),
Expand Down Expand Up @@ -627,7 +640,8 @@ ur_context_handle_t_::decrementUnreleasedEventsInPool(ur_event_handle_t Event) {

std::list<ze_event_pool_handle_t> *ZePoolCache = getZeEventPoolCache(
Event->isHostVisible(), Event->isProfilingEnabled(),
Event->CounterBasedEventsEnabled, UsingImmediateCommandlists, ZeDevice);
Event->CounterBasedEventsEnabled, UsingImmediateCommandlists,
Event->InterruptBasedEventsEnabled, ZeDevice);

// Put the empty pool to the cache of the pools.
if (NumEventsUnreleasedInEventPool[Event->ZeEventPool] == 0)
Expand Down
117 changes: 94 additions & 23 deletions source/adapters/level_zero/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,24 @@ struct l0_command_list_cache_info {
bool IsImmediate = false;
};

typedef uint32_t ze_intel_event_sync_mode_exp_flags_t;
typedef enum _ze_intel_event_sync_mode_exp_flag_t {
ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_LOW_POWER_WAIT = ZE_BIT(0),
ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_SIGNAL_INTERRUPT = ZE_BIT(1),
ZE_INTEL_EVENT_SYNC_MODE_EXP_EXP_FLAG_FORCE_UINT32 = 0x7fffffff

} ze_intel_event_sync_mode_exp_flag_t;

#define ZE_INTEL_STRUCTURE_TYPE_EVENT_SYNC_MODE_EXP_DESC \
(ze_structure_type_t)0x00030016

typedef struct _ze_intel_event_sync_mode_exp_desc_t {
ze_structure_type_t stype;
const void *pNext;

ze_intel_event_sync_mode_exp_flags_t syncModeFlags;
} ze_intel_event_sync_mode_exp_desc_t;

struct ur_context_handle_t_ : _ur_object {
ur_context_handle_t_(ze_context_handle_t ZeContext, uint32_t NumDevices,
const ur_device_handle_t *Devs, bool OwnZeContext)
Expand Down Expand Up @@ -150,9 +168,9 @@ struct ur_context_handle_t_ : _ur_object {
// head.
//
// Cache of event pools to which host-visible events are added to.
std::vector<std::list<ze_event_pool_handle_t>> ZeEventPoolCache{12};
std::vector<std::list<ze_event_pool_handle_t>> ZeEventPoolCache{30};
std::vector<std::unordered_map<ze_device_handle_t, size_t>>
ZeEventPoolCacheDeviceMap{12};
ZeEventPoolCacheDeviceMap{30};

// This map will be used to determine if a pool is full or not
// by storing number of empty slots available in the pool.
Expand Down Expand Up @@ -199,13 +217,15 @@ struct ur_context_handle_t_ : _ur_object {
bool ProfilingEnabled,
ur_device_handle_t Device,
bool CounterBasedEventEnabled,
bool UsingImmCmdList);
bool UsingImmCmdList,
bool InterruptBasedEventEnabled);

// Get ur_event_handle_t from cache.
ur_event_handle_t getEventFromContextCache(bool HostVisible,
bool WithProfiling,
ur_device_handle_t Device,
bool CounterBasedEventEnabled);
bool CounterBasedEventEnabled,
bool InterruptBasedEventEnabled);

// Add ur_event_handle_t to cache.
void addEventToContextCache(ur_event_handle_t);
Expand All @@ -216,17 +236,29 @@ struct ur_context_handle_t_ : _ur_object {
HostVisibleCounterBasedRegularCacheType,
HostInvisibleCounterBasedRegularCacheType,
HostVisibleCounterBasedImmediateCacheType,
HostInvisibleCounterBasedImmediateCacheType
HostInvisibleCounterBasedImmediateCacheType,

HostVisibleInterruptBasedRegularCacheType,
HostInvisibleInterruptBasedRegularCacheType,
HostVisibleInterruptBasedImmediateCacheType,
HostInvisibleInterruptBasedImmediateCacheType,

HostVisibleInterruptAndCounterBasedRegularCacheType,
HostInvisibleInterruptAndCounterBasedRegularCacheType,
HostVisibleInterruptAndCounterBasedImmediateCacheType,
HostInvisibleInterruptAndCounterBasedImmediateCacheType
};

std::list<ze_event_pool_handle_t> *
getZeEventPoolCache(bool HostVisible, bool WithProfiling,
bool CounterBasedEventEnabled, bool UsingImmediateCmdList,
bool InterruptBasedEventEnabled,
ze_device_handle_t ZeDevice) {
EventPoolCacheType CacheType;

calculateCacheIndex(HostVisible, CounterBasedEventEnabled,
UsingImmediateCmdList, CacheType);
UsingImmediateCmdList, InterruptBasedEventEnabled,
CacheType);
if (ZeDevice) {
auto ZeEventPoolCacheMap =
WithProfiling ? &ZeEventPoolCacheDeviceMap[CacheType * 2]
Expand All @@ -246,23 +278,57 @@ struct ur_context_handle_t_ : _ur_object {
ur_result_t calculateCacheIndex(bool HostVisible,
bool CounterBasedEventEnabled,
bool UsingImmediateCmdList,
bool InterruptBasedEventEnabled,
EventPoolCacheType &CacheType) {
if (CounterBasedEventEnabled && HostVisible && !UsingImmediateCmdList) {
CacheType = HostVisibleCounterBasedRegularCacheType;
} else if (CounterBasedEventEnabled && !HostVisible &&
!UsingImmediateCmdList) {
CacheType = HostInvisibleCounterBasedRegularCacheType;
} else if (CounterBasedEventEnabled && HostVisible &&
UsingImmediateCmdList) {
CacheType = HostVisibleCounterBasedImmediateCacheType;
} else if (CounterBasedEventEnabled && !HostVisible &&
UsingImmediateCmdList) {
CacheType = HostInvisibleCounterBasedImmediateCacheType;
} else if (!CounterBasedEventEnabled && HostVisible) {
CacheType = HostVisibleCacheType;
if (InterruptBasedEventEnabled) {
if (CounterBasedEventEnabled) {
if (HostVisible) {
if (UsingImmediateCmdList) {
CacheType = HostVisibleInterruptAndCounterBasedImmediateCacheType;
} else {
CacheType = HostVisibleInterruptAndCounterBasedRegularCacheType;
}
} else {
if (UsingImmediateCmdList) {
CacheType = HostInvisibleInterruptAndCounterBasedImmediateCacheType;
} else {
CacheType = HostInvisibleInterruptAndCounterBasedRegularCacheType;
}
}
} else {
if (HostVisible) {
if (UsingImmediateCmdList) {
CacheType = HostVisibleInterruptBasedImmediateCacheType;
} else {
CacheType = HostVisibleInterruptBasedRegularCacheType;
}
} else {
if (UsingImmediateCmdList) {
CacheType = HostInvisibleInterruptBasedImmediateCacheType;
} else {
CacheType = HostInvisibleInterruptBasedRegularCacheType;
}
}
}
} else {
CacheType = HostInvisibleCacheType;
if (CounterBasedEventEnabled && HostVisible && !UsingImmediateCmdList) {
CacheType = HostVisibleCounterBasedRegularCacheType;
} else if (CounterBasedEventEnabled && !HostVisible &&
!UsingImmediateCmdList) {
CacheType = HostInvisibleCounterBasedRegularCacheType;
} else if (CounterBasedEventEnabled && HostVisible &&
UsingImmediateCmdList) {
CacheType = HostVisibleCounterBasedImmediateCacheType;
} else if (CounterBasedEventEnabled && !HostVisible &&
UsingImmediateCmdList) {
CacheType = HostInvisibleCounterBasedImmediateCacheType;
} else if (!CounterBasedEventEnabled && HostVisible) {
CacheType = HostVisibleCacheType;
} else {
CacheType = HostInvisibleCacheType;
}
}

return UR_RESULT_SUCCESS;
}

Expand Down Expand Up @@ -308,9 +374,10 @@ struct ur_context_handle_t_ : _ur_object {
EVENT_FLAG_HOST_VISIBLE = UR_BIT(0),
EVENT_FLAG_WITH_PROFILING = UR_BIT(1),
EVENT_FLAG_COUNTER = UR_BIT(2),
EVENT_FLAG_DEVICE = UR_BIT(3), // if set, subsequent bits are device id
EVENT_FLAG_INTERRUPT = UR_BIT(3),
EVENT_FLAG_DEVICE = UR_BIT(5), // if set, subsequent bits are device id
MAX_EVENT_FLAG_BITS =
4, // this is used as an offset for embedding device id
6, // this is used as an offset for embedding device id
};

// Mutex to control operations on event caches.
Expand All @@ -322,7 +389,8 @@ struct ur_context_handle_t_ : _ur_object {

// Get the cache of events for a provided scope and profiling mode.
EventCache *getEventCache(bool HostVisible, bool WithProfiling,
ur_device_handle_t Device, bool Counter) {
ur_device_handle_t Device, bool Counter,
bool Interrupt) {

size_t index = 0;
if (HostVisible) {
Expand All @@ -334,6 +402,9 @@ struct ur_context_handle_t_ : _ur_object {
if (Counter) {
index |= EVENT_FLAG_COUNTER;
}
if (Interrupt) {
index |= EVENT_FLAG_INTERRUPT;
}
if (Device) {
index |= EVENT_FLAG_DEVICE | (*Device->Id << MAX_EVENT_FLAG_BITS);
}
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/level_zero/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,8 @@ ur_result_t urDeviceGetInfo(
case UR_DEVICE_INFO_BUILT_IN_KERNELS:
// TODO: To find out correct value
return ReturnValue("");
case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP:
return ReturnValue(static_cast<ur_bool_t>(true));
case UR_DEVICE_INFO_QUEUE_PROPERTIES:
return ReturnValue(
ur_queue_flag_t(UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE |
Expand Down Expand Up @@ -1155,8 +1157,6 @@ ur_result_t urDeviceGetInfo(
return ReturnValue(true);
case UR_DEVICE_INFO_USM_POOL_SUPPORT:
return ReturnValue(true);
case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP:
return ReturnValue(false);
case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: {
#ifdef ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_NAME
const auto ZeDeviceBlockArrayFlags =
Expand Down
Loading
Loading