@@ -395,8 +395,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(
395
395
396
396
std::vector<ur_event_handle_t > EventListToCleanup;
397
397
{
398
- std::unique_lock<ur_shared_mutex> QueueLock (Queue->Mutex , std::defer_lock);
399
- QueueLock.lock ();
398
+ std::scoped_lock<ur_shared_mutex> Lock (Queue->Mutex );
400
399
401
400
if ((--Queue->RefCountExternal ) != 0 )
402
401
return UR_RESULT_SUCCESS;
@@ -411,11 +410,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(
411
410
auto Res = Queue->executeAllOpenCommandLists ();
412
411
413
412
// Make sure all commands get executed.
414
- if (Res == UR_RESULT_SUCCESS) {
415
- QueueLock.unlock ();
413
+ if (Res == UR_RESULT_SUCCESS)
416
414
UR_CALL (Queue->synchronize ());
417
- QueueLock.lock ();
418
- }
419
415
420
416
// Destroy all the fences created associated with this queue.
421
417
for (auto it = Queue->CommandListMap .begin ();
@@ -628,6 +624,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(
628
624
ur_queue_handle_t UrQueue // /< [in] handle of the queue to be finished.
629
625
) {
630
626
if (UrQueue->UsingImmCmdLists ) {
627
+ // Lock automatically releases when this goes out of scope.
628
+ std::scoped_lock<ur_shared_mutex> Lock (UrQueue->Mutex );
631
629
UR_CALL (UrQueue->synchronize ());
632
630
} else {
633
631
std::unique_lock<ur_shared_mutex> Lock (UrQueue->Mutex );
@@ -1315,8 +1313,10 @@ ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue) {
1315
1313
for (auto &ZeQueue : QueueGroup.second .ZeQueues )
1316
1314
if (ZeQueue) {
1317
1315
{
1318
- std::scoped_lock<ur_shared_mutex> Lock (
1319
- UrQueue->zeQueueInUseMutex );
1316
+ if (UrL0QueueSyncNonBlocking) {
1317
+ std::scoped_lock<ur_shared_mutex> Lock (
1318
+ UrQueue->zeQueueInUseMutex );
1319
+ }
1320
1320
auto ZeResult = ZE_CALL_NOCHECK (zeCommandQueueDestroy, (ZeQueue));
1321
1321
// Gracefully handle the case that L0 was already unloaded.
1322
1322
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
@@ -1396,24 +1396,26 @@ ur_result_t CleanupEventListFromResetCmdList(
1396
1396
// runtime. Need to investigate whether relase can be done earlier, at sync
1397
1397
// points such as this, to reduce total number of active Events.
1398
1398
ur_result_t ur_queue_handle_t_::synchronize () {
1399
- this ->Mutex .lock ();
1400
- if (!Healthy) {
1401
- this ->Mutex .unlock ();
1399
+ if (!Healthy)
1402
1400
return UR_RESULT_SUCCESS;
1403
- }
1404
1401
1405
1402
auto syncImmCmdList = [](ur_queue_handle_t_ *Queue,
1406
1403
ur_command_list_ptr_t ImmCmdList) {
1407
1404
if (ImmCmdList == Queue->CommandListMap .end ())
1408
1405
return UR_RESULT_SUCCESS;
1409
1406
1410
1407
// wait for all commands previously submitted to this immediate command list
1411
- Queue->Mutex .unlock ();
1412
- {
1413
- std::scoped_lock<ur_shared_mutex> Lock (Queue->zeQueueInUseMutex );
1408
+ if (UrL0QueueSyncNonBlocking) {
1409
+ Queue->Mutex .unlock ();
1410
+ {
1411
+ std::scoped_lock<ur_shared_mutex> Lock (Queue->zeQueueInUseMutex );
1412
+ ZE2UR_CALL (zeCommandListHostSynchronize,
1413
+ (ImmCmdList->first , UINT64_MAX));
1414
+ }
1415
+ Queue->Mutex .lock ();
1416
+ } else {
1414
1417
ZE2UR_CALL (zeCommandListHostSynchronize, (ImmCmdList->first , UINT64_MAX));
1415
1418
}
1416
- Queue->Mutex .lock ();
1417
1419
1418
1420
// Cleanup all events from the synced command list.
1419
1421
CleanupEventListFromResetCmdList (ImmCmdList->second .EventList , true );
@@ -1427,12 +1429,16 @@ ur_result_t ur_queue_handle_t_::synchronize() {
1427
1429
// zero handle can have device scope, so we can't synchronize the last
1428
1430
// event.
1429
1431
if (isInOrderQueue () && !LastCommandEvent->IsDiscarded ) {
1430
- this ->Mutex .unlock ();
1431
- {
1432
- std::scoped_lock<ur_shared_mutex> Lock (this ->zeQueueInUseMutex );
1432
+ if (UrL0QueueSyncNonBlocking) {
1433
+ this ->Mutex .unlock ();
1434
+ {
1435
+ std::scoped_lock<ur_shared_mutex> Lock (this ->zeQueueInUseMutex );
1436
+ ZE2UR_CALL (zeHostSynchronize, (LastCommandEvent->ZeEvent ));
1437
+ }
1438
+ this ->Mutex .lock ();
1439
+ } else {
1433
1440
ZE2UR_CALL (zeHostSynchronize, (LastCommandEvent->ZeEvent ));
1434
1441
}
1435
- this ->Mutex .lock ();
1436
1442
1437
1443
// clean up all events known to have been completed as well,
1438
1444
// so they can be reused later
@@ -1460,13 +1466,17 @@ ur_result_t ur_queue_handle_t_::synchronize() {
1460
1466
} else {
1461
1467
for (auto &ZeQueue : QueueGroup.second .ZeQueues )
1462
1468
if (ZeQueue) {
1463
- this ->Mutex .unlock ();
1464
- {
1465
- std::scoped_lock<ur_shared_mutex> Lock (
1466
- this ->zeQueueInUseMutex );
1469
+ if (UrL0QueueSyncNonBlocking) {
1470
+ this ->Mutex .unlock ();
1471
+ {
1472
+ std::scoped_lock<ur_shared_mutex> Lock (
1473
+ this ->zeQueueInUseMutex );
1474
+ ZE2UR_CALL (zeHostSynchronize, (ZeQueue));
1475
+ }
1476
+ this ->Mutex .lock ();
1477
+ } else {
1467
1478
ZE2UR_CALL (zeHostSynchronize, (ZeQueue));
1468
1479
}
1469
- this ->Mutex .lock ();
1470
1480
}
1471
1481
}
1472
1482
}
@@ -1477,12 +1487,9 @@ ur_result_t ur_queue_handle_t_::synchronize() {
1477
1487
1478
1488
// With the entire queue synchronized, the active barriers must be done so we
1479
1489
// can remove them.
1480
- if (auto Res = ActiveBarriers.clear ()) {
1481
- this ->Mutex .unlock ();
1490
+ if (auto Res = ActiveBarriers.clear ())
1482
1491
return Res;
1483
- }
1484
1492
1485
- this ->Mutex .unlock ();
1486
1493
return UR_RESULT_SUCCESS;
1487
1494
}
1488
1495
0 commit comments