Skip to content

Commit b28525c

Browse files
committed
[L0] track ze queue usage before destroy
Signed-off-by: Neil R. Spruit <[email protected]>
1 parent 0086401 commit b28525c

File tree

2 files changed

+26
-9
lines changed

2 files changed

+26
-9
lines changed

source/adapters/level_zero/queue.cpp

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1314,10 +1314,14 @@ ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue) {
13141314
for (auto &QueueGroup : QueueMap)
13151315
for (auto &ZeQueue : QueueGroup.second.ZeQueues)
13161316
if (ZeQueue) {
1317-
auto ZeResult = ZE_CALL_NOCHECK(zeCommandQueueDestroy, (ZeQueue));
1318-
// Gracefully handle the case that L0 was already unloaded.
1319-
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
1320-
return ze2urResult(ZeResult);
1317+
{
1318+
std::scoped_lock<ur_shared_mutex> Lock(
1319+
UrQueue->zeQueueInUseMutex);
1320+
auto ZeResult = ZE_CALL_NOCHECK(zeCommandQueueDestroy, (ZeQueue));
1321+
// Gracefully handle the case that L0 was already unloaded.
1322+
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
1323+
return ze2urResult(ZeResult);
1324+
}
13211325
}
13221326
}
13231327

@@ -1392,7 +1396,6 @@ ur_result_t CleanupEventListFromResetCmdList(
13921396
// runtime. Need to investigate whether relase can be done earlier, at sync
13931397
// points such as this, to reduce total number of active Events.
13941398
ur_result_t ur_queue_handle_t_::synchronize() {
1395-
// std::shared_lock<ur_shared_mutex> QueueLock(this->Mutex);
13961399
this->Mutex.lock();
13971400
if (!Healthy) {
13981401
this->Mutex.unlock();
@@ -1406,7 +1409,10 @@ ur_result_t ur_queue_handle_t_::synchronize() {
14061409

14071410
// wait for all commands previously submitted to this immediate command list
14081411
Queue->Mutex.unlock();
1409-
ZE2UR_CALL(zeCommandListHostSynchronize, (ImmCmdList->first, UINT64_MAX));\
1412+
{
1413+
std::scoped_lock<ur_shared_mutex> Lock(Queue->zeQueueInUseMutex);
1414+
ZE2UR_CALL(zeCommandListHostSynchronize, (ImmCmdList->first, UINT64_MAX));
1415+
}
14101416
Queue->Mutex.lock();
14111417

14121418
// Cleanup all events from the synced command list.
@@ -1421,7 +1427,12 @@ ur_result_t ur_queue_handle_t_::synchronize() {
14211427
// zero handle can have device scope, so we can't synchronize the last
14221428
// event.
14231429
if (isInOrderQueue() && !LastCommandEvent->IsDiscarded) {
1424-
ZE2UR_CALL(zeHostSynchronize, (LastCommandEvent->ZeEvent));
1430+
this->Mutex.unlock();
1431+
{
1432+
std::scoped_lock<ur_shared_mutex> Lock(this->zeQueueInUseMutex);
1433+
ZE2UR_CALL(zeHostSynchronize, (LastCommandEvent->ZeEvent));
1434+
}
1435+
this->Mutex.lock();
14251436

14261437
// clean up all events known to have been completed as well,
14271438
// so they can be reused later
@@ -1450,7 +1461,11 @@ ur_result_t ur_queue_handle_t_::synchronize() {
14501461
for (auto &ZeQueue : QueueGroup.second.ZeQueues)
14511462
if (ZeQueue) {
14521463
this->Mutex.unlock();
1453-
ZE2UR_CALL(zeHostSynchronize, (ZeQueue));
1464+
{
1465+
std::scoped_lock<ur_shared_mutex> Lock(
1466+
this->zeQueueInUseMutex);
1467+
ZE2UR_CALL(zeHostSynchronize, (ZeQueue));
1468+
}
14541469
this->Mutex.lock();
14551470
}
14561471
}

source/adapters/level_zero/queue.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,8 @@ struct ur_queue_handle_t_ : _ur_object {
237237
// Map of all command lists used in this queue.
238238
ur_command_list_map_t CommandListMap;
239239

240+
ur_shared_mutex zeQueueInUseMutex;
241+
240242
// Helper data structure to hold all variables related to batching
241243
struct command_batch {
242244
// These two members are used to keep track of how often the
@@ -548,4 +550,4 @@ ur_result_t createEventAndAssociateQueue(
548550
// list.
549551
ur_result_t CleanupEventListFromResetCmdList(
550552
std::vector<ur_event_handle_t> &EventListToCleanup,
551-
bool QueueLocked = false);
553+
bool QueueLocked = false);

0 commit comments

Comments
 (0)