@@ -395,8 +395,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(
395
395
396
396
std::vector<ur_event_handle_t > EventListToCleanup;
397
397
{
398
- std::unique_lock<ur_shared_mutex> QueueLock (Queue->Mutex , std::defer_lock);
399
- QueueLock.lock ();
398
+ std::scoped_lock<ur_shared_mutex> Lock (Queue->Mutex );
400
399
401
400
if ((--Queue->RefCountExternal ) != 0 )
402
401
return UR_RESULT_SUCCESS;
@@ -411,11 +410,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(
411
410
auto Res = Queue->executeAllOpenCommandLists ();
412
411
413
412
// Make sure all commands get executed.
414
- if (Res == UR_RESULT_SUCCESS) {
415
- QueueLock.unlock ();
413
+ if (Res == UR_RESULT_SUCCESS)
416
414
UR_CALL (Queue->synchronize ());
417
- QueueLock.lock ();
418
- }
419
415
420
416
// Destroy all the fences created associated with this queue.
421
417
for (auto it = Queue->CommandListMap .begin ();
@@ -628,6 +624,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(
628
624
ur_queue_handle_t UrQueue // /< [in] handle of the queue to be finished.
629
625
) {
630
626
if (UrQueue->UsingImmCmdLists ) {
627
+ // Lock automatically releases when this goes out of scope.
628
+ std::scoped_lock<ur_shared_mutex> Lock (UrQueue->Mutex );
631
629
UR_CALL (UrQueue->synchronize ());
632
630
} else {
633
631
std::unique_lock<ur_shared_mutex> Lock (UrQueue->Mutex );
@@ -1314,9 +1312,17 @@ ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue) {
1314
1312
for (auto &QueueGroup : QueueMap)
1315
1313
for (auto &ZeQueue : QueueGroup.second .ZeQueues )
1316
1314
if (ZeQueue) {
1317
- {
1318
- std::scoped_lock<ur_shared_mutex> Lock (
1319
- UrQueue->zeQueueInUseMutex );
1315
+ if (UrL0QueueSyncNonBlocking) {
1316
+ {
1317
+ std::scoped_lock<ur_shared_mutex> Lock (
1318
+ UrQueue->zeQueueInUseMutex );
1319
+ auto ZeResult =
1320
+ ZE_CALL_NOCHECK (zeCommandQueueDestroy, (ZeQueue));
1321
+ // Gracefully handle the case that L0 was already unloaded.
1322
+ if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
1323
+ return ze2urResult (ZeResult);
1324
+ }
1325
+ } else {
1320
1326
auto ZeResult = ZE_CALL_NOCHECK (zeCommandQueueDestroy, (ZeQueue));
1321
1327
// Gracefully handle the case that L0 was already unloaded.
1322
1328
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
@@ -1396,24 +1402,26 @@ ur_result_t CleanupEventListFromResetCmdList(
1396
1402
// runtime. Need to investigate whether relase can be done earlier, at sync
1397
1403
// points such as this, to reduce total number of active Events.
1398
1404
ur_result_t ur_queue_handle_t_::synchronize () {
1399
- this ->Mutex .lock ();
1400
- if (!Healthy) {
1401
- this ->Mutex .unlock ();
1405
+ if (!Healthy)
1402
1406
return UR_RESULT_SUCCESS;
1403
- }
1404
1407
1405
1408
auto syncImmCmdList = [](ur_queue_handle_t_ *Queue,
1406
1409
ur_command_list_ptr_t ImmCmdList) {
1407
1410
if (ImmCmdList == Queue->CommandListMap .end ())
1408
1411
return UR_RESULT_SUCCESS;
1409
1412
1410
1413
// wait for all commands previously submitted to this immediate command list
1411
- Queue->Mutex .unlock ();
1412
- {
1413
- std::scoped_lock<ur_shared_mutex> Lock (Queue->zeQueueInUseMutex );
1414
+ if (UrL0QueueSyncNonBlocking) {
1415
+ Queue->Mutex .unlock ();
1416
+ {
1417
+ std::scoped_lock<ur_shared_mutex> Lock (Queue->zeQueueInUseMutex );
1418
+ ZE2UR_CALL (zeCommandListHostSynchronize,
1419
+ (ImmCmdList->first , UINT64_MAX));
1420
+ }
1421
+ Queue->Mutex .lock ();
1422
+ } else {
1414
1423
ZE2UR_CALL (zeCommandListHostSynchronize, (ImmCmdList->first , UINT64_MAX));
1415
1424
}
1416
- Queue->Mutex .lock ();
1417
1425
1418
1426
// Cleanup all events from the synced command list.
1419
1427
CleanupEventListFromResetCmdList (ImmCmdList->second .EventList , true );
@@ -1427,12 +1435,16 @@ ur_result_t ur_queue_handle_t_::synchronize() {
1427
1435
// zero handle can have device scope, so we can't synchronize the last
1428
1436
// event.
1429
1437
if (isInOrderQueue () && !LastCommandEvent->IsDiscarded ) {
1430
- this ->Mutex .unlock ();
1431
- {
1432
- std::scoped_lock<ur_shared_mutex> Lock (this ->zeQueueInUseMutex );
1438
+ if (UrL0QueueSyncNonBlocking) {
1439
+ this ->Mutex .unlock ();
1440
+ {
1441
+ std::scoped_lock<ur_shared_mutex> Lock (this ->zeQueueInUseMutex );
1442
+ ZE2UR_CALL (zeHostSynchronize, (LastCommandEvent->ZeEvent ));
1443
+ }
1444
+ this ->Mutex .lock ();
1445
+ } else {
1433
1446
ZE2UR_CALL (zeHostSynchronize, (LastCommandEvent->ZeEvent ));
1434
1447
}
1435
- this ->Mutex .lock ();
1436
1448
1437
1449
// clean up all events known to have been completed as well,
1438
1450
// so they can be reused later
@@ -1460,13 +1472,17 @@ ur_result_t ur_queue_handle_t_::synchronize() {
1460
1472
} else {
1461
1473
for (auto &ZeQueue : QueueGroup.second .ZeQueues )
1462
1474
if (ZeQueue) {
1463
- this ->Mutex .unlock ();
1464
- {
1465
- std::scoped_lock<ur_shared_mutex> Lock (
1466
- this ->zeQueueInUseMutex );
1475
+ if (UrL0QueueSyncNonBlocking) {
1476
+ this ->Mutex .unlock ();
1477
+ {
1478
+ std::scoped_lock<ur_shared_mutex> Lock (
1479
+ this ->zeQueueInUseMutex );
1480
+ ZE2UR_CALL (zeHostSynchronize, (ZeQueue));
1481
+ }
1482
+ this ->Mutex .lock ();
1483
+ } else {
1467
1484
ZE2UR_CALL (zeHostSynchronize, (ZeQueue));
1468
1485
}
1469
- this ->Mutex .lock ();
1470
1486
}
1471
1487
}
1472
1488
}
@@ -1477,12 +1493,9 @@ ur_result_t ur_queue_handle_t_::synchronize() {
1477
1493
1478
1494
// With the entire queue synchronized, the active barriers must be done so we
1479
1495
// can remove them.
1480
- if (auto Res = ActiveBarriers.clear ()) {
1481
- this ->Mutex .unlock ();
1496
+ if (auto Res = ActiveBarriers.clear ())
1482
1497
return Res;
1483
- }
1484
1498
1485
- this ->Mutex .unlock ();
1486
1499
return UR_RESULT_SUCCESS;
1487
1500
}
1488
1501
0 commit comments