Skip to content

Commit ed1e677

Browse files
committed
[L0] track ze queue usage before destroy
Signed-off-by: Neil R. Spruit <[email protected]>
1 parent 0086401 commit ed1e677

File tree

2 files changed

+24
-9
lines changed

2 files changed

+24
-9
lines changed

source/adapters/level_zero/queue.cpp

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1314,10 +1314,13 @@ ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue) {
13141314
for (auto &QueueGroup : QueueMap)
13151315
for (auto &ZeQueue : QueueGroup.second.ZeQueues)
13161316
if (ZeQueue) {
1317-
auto ZeResult = ZE_CALL_NOCHECK(zeCommandQueueDestroy, (ZeQueue));
1318-
// Gracefully handle the case that L0 was already unloaded.
1319-
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
1320-
return ze2urResult(ZeResult);
1317+
{
1318+
std::scoped_lock<ur_shared_mutex> Lock(UrQueue->zeQueueInUseMutex);
1319+
auto ZeResult = ZE_CALL_NOCHECK(zeCommandQueueDestroy, (ZeQueue));
1320+
// Gracefully handle the case that L0 was already unloaded.
1321+
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
1322+
return ze2urResult(ZeResult);
1323+
}
13211324
}
13221325
}
13231326

@@ -1392,7 +1395,6 @@ ur_result_t CleanupEventListFromResetCmdList(
13921395
// runtime. Need to investigate whether relase can be done earlier, at sync
13931396
// points such as this, to reduce total number of active Events.
13941397
ur_result_t ur_queue_handle_t_::synchronize() {
1395-
// std::shared_lock<ur_shared_mutex> QueueLock(this->Mutex);
13961398
this->Mutex.lock();
13971399
if (!Healthy) {
13981400
this->Mutex.unlock();
@@ -1406,7 +1408,10 @@ ur_result_t ur_queue_handle_t_::synchronize() {
14061408

14071409
// wait for all commands previously submitted to this immediate command list
14081410
Queue->Mutex.unlock();
1409-
ZE2UR_CALL(zeCommandListHostSynchronize, (ImmCmdList->first, UINT64_MAX));\
1411+
{
1412+
std::scoped_lock<ur_shared_mutex> Lock(Queue->zeQueueInUseMutex);
1413+
ZE2UR_CALL(zeCommandListHostSynchronize, (ImmCmdList->first, UINT64_MAX));
1414+
}
14101415
Queue->Mutex.lock();
14111416

14121417
// Cleanup all events from the synced command list.
@@ -1421,7 +1426,12 @@ ur_result_t ur_queue_handle_t_::synchronize() {
14211426
// zero handle can have device scope, so we can't synchronize the last
14221427
// event.
14231428
if (isInOrderQueue() && !LastCommandEvent->IsDiscarded) {
1424-
ZE2UR_CALL(zeHostSynchronize, (LastCommandEvent->ZeEvent));
1429+
this->Mutex.unlock();
1430+
{
1431+
std::scoped_lock<ur_shared_mutex> Lock(this->zeQueueInUseMutex);
1432+
ZE2UR_CALL(zeHostSynchronize, (LastCommandEvent->ZeEvent));
1433+
}
1434+
this->Mutex.lock();
14251435

14261436
// clean up all events known to have been completed as well,
14271437
// so they can be reused later
@@ -1450,7 +1460,10 @@ ur_result_t ur_queue_handle_t_::synchronize() {
14501460
for (auto &ZeQueue : QueueGroup.second.ZeQueues)
14511461
if (ZeQueue) {
14521462
this->Mutex.unlock();
1453-
ZE2UR_CALL(zeHostSynchronize, (ZeQueue));
1463+
{
1464+
std::scoped_lock<ur_shared_mutex> Lock(this->zeQueueInUseMutex);
1465+
ZE2UR_CALL(zeHostSynchronize, (ZeQueue));
1466+
}
14541467
this->Mutex.lock();
14551468
}
14561469
}

source/adapters/level_zero/queue.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,8 @@ struct ur_queue_handle_t_ : _ur_object {
237237
// Map of all command lists used in this queue.
238238
ur_command_list_map_t CommandListMap;
239239

240+
ur_shared_mutex zeQueueInUseMutex;
241+
240242
// Helper data structure to hold all variables related to batching
241243
struct command_batch {
242244
// These two members are used to keep track of how often the
@@ -548,4 +550,4 @@ ur_result_t createEventAndAssociateQueue(
548550
// list.
549551
ur_result_t CleanupEventListFromResetCmdList(
550552
std::vector<ur_event_handle_t> &EventListToCleanup,
551-
bool QueueLocked = false);
553+
bool QueueLocked = false);

0 commit comments

Comments
 (0)