Skip to content

Commit edd5ae9

Browse files
committed
[L0] Simplify and allow disabling of queue sync non blocking
Signed-off-by: Neil R. Spruit <[email protected]>
1 parent b28525c commit edd5ae9

File tree

3 files changed

+48
-33
lines changed

3 files changed

+48
-33
lines changed

source/adapters/level_zero/common.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,16 @@ static const uint32_t UrL0Serialize = [] {
231231
return SerializeModeValue;
232232
}();
233233

234+
static const uint32_t UrL0QueueSyncNonBlocking = [] {
235+
const char *UrL0QueueSyncNonBlocking =
236+
std::getenv("UR_L0_QUEUE_SYNCHRONIZE_NON_BLOCKING");
237+
uint32_t L0QueueSyncLockingModeValue = 1;
238+
if (UrL0QueueSyncNonBlocking) {
239+
L0QueueSyncLockingModeValue = std::atoi(UrL0QueueSyncNonBlocking);
240+
}
241+
return L0QueueSyncLockingModeValue;
242+
}();
243+
234244
// This class encapsulates actions taken along with a call to Level Zero API.
235245
class ZeCall {
236246
private:

source/adapters/level_zero/event.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait(
113113
//
114114
// TODO: find a way to do that without blocking the host.
115115

116-
std::unique_lock<ur_shared_mutex> QueueLock(Queue->Mutex, std::defer_lock);
117-
QueueLock.lock();
116+
// Lock automatically releases when this goes out of scope.
117+
std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);
118118

119119
if (OutEvent) {
120120
UR_CALL(createEventAndAssociateQueue(Queue, OutEvent,
@@ -123,9 +123,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait(
123123
/* IsInternal */ false));
124124
}
125125

126-
QueueLock.unlock();
127126
UR_CALL(Queue->synchronize());
128-
QueueLock.lock();
129127

130128
if (OutEvent) {
131129
Queue->LastCommandEvent = reinterpret_cast<ur_event_handle_t>(*OutEvent);

source/adapters/level_zero/queue.cpp

Lines changed: 36 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -395,8 +395,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(
395395

396396
std::vector<ur_event_handle_t> EventListToCleanup;
397397
{
398-
std::unique_lock<ur_shared_mutex> QueueLock(Queue->Mutex, std::defer_lock);
399-
QueueLock.lock();
398+
std::scoped_lock<ur_shared_mutex> Lock(Queue->Mutex);
400399

401400
if ((--Queue->RefCountExternal) != 0)
402401
return UR_RESULT_SUCCESS;
@@ -411,11 +410,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(
411410
auto Res = Queue->executeAllOpenCommandLists();
412411

413412
// Make sure all commands get executed.
414-
if (Res == UR_RESULT_SUCCESS) {
415-
QueueLock.unlock();
413+
if (Res == UR_RESULT_SUCCESS)
416414
UR_CALL(Queue->synchronize());
417-
QueueLock.lock();
418-
}
419415

420416
// Destroy all the fences created associated with this queue.
421417
for (auto it = Queue->CommandListMap.begin();
@@ -628,6 +624,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(
628624
ur_queue_handle_t UrQueue ///< [in] handle of the queue to be finished.
629625
) {
630626
if (UrQueue->UsingImmCmdLists) {
627+
// Lock automatically releases when this goes out of scope.
628+
std::scoped_lock<ur_shared_mutex> Lock(UrQueue->Mutex);
631629
UR_CALL(UrQueue->synchronize());
632630
} else {
633631
std::unique_lock<ur_shared_mutex> Lock(UrQueue->Mutex);
@@ -1315,8 +1313,10 @@ ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue) {
13151313
for (auto &ZeQueue : QueueGroup.second.ZeQueues)
13161314
if (ZeQueue) {
13171315
{
1318-
std::scoped_lock<ur_shared_mutex> Lock(
1319-
UrQueue->zeQueueInUseMutex);
1316+
if (UrL0QueueSyncNonBlocking) {
1317+
std::scoped_lock<ur_shared_mutex> Lock(
1318+
UrQueue->zeQueueInUseMutex);
1319+
}
13201320
auto ZeResult = ZE_CALL_NOCHECK(zeCommandQueueDestroy, (ZeQueue));
13211321
// Gracefully handle the case that L0 was already unloaded.
13221322
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
@@ -1396,24 +1396,26 @@ ur_result_t CleanupEventListFromResetCmdList(
13961396
// runtime. Need to investigate whether relase can be done earlier, at sync
13971397
// points such as this, to reduce total number of active Events.
13981398
ur_result_t ur_queue_handle_t_::synchronize() {
1399-
this->Mutex.lock();
1400-
if (!Healthy) {
1401-
this->Mutex.unlock();
1399+
if (!Healthy)
14021400
return UR_RESULT_SUCCESS;
1403-
}
14041401

14051402
auto syncImmCmdList = [](ur_queue_handle_t_ *Queue,
14061403
ur_command_list_ptr_t ImmCmdList) {
14071404
if (ImmCmdList == Queue->CommandListMap.end())
14081405
return UR_RESULT_SUCCESS;
14091406

14101407
// wait for all commands previously submitted to this immediate command list
1411-
Queue->Mutex.unlock();
1412-
{
1413-
std::scoped_lock<ur_shared_mutex> Lock(Queue->zeQueueInUseMutex);
1408+
if (UrL0QueueSyncNonBlocking) {
1409+
Queue->Mutex.unlock();
1410+
{
1411+
std::scoped_lock<ur_shared_mutex> Lock(Queue->zeQueueInUseMutex);
1412+
ZE2UR_CALL(zeCommandListHostSynchronize,
1413+
(ImmCmdList->first, UINT64_MAX));
1414+
}
1415+
Queue->Mutex.lock();
1416+
} else {
14141417
ZE2UR_CALL(zeCommandListHostSynchronize, (ImmCmdList->first, UINT64_MAX));
14151418
}
1416-
Queue->Mutex.lock();
14171419

14181420
// Cleanup all events from the synced command list.
14191421
CleanupEventListFromResetCmdList(ImmCmdList->second.EventList, true);
@@ -1427,12 +1429,16 @@ ur_result_t ur_queue_handle_t_::synchronize() {
14271429
// zero handle can have device scope, so we can't synchronize the last
14281430
// event.
14291431
if (isInOrderQueue() && !LastCommandEvent->IsDiscarded) {
1430-
this->Mutex.unlock();
1431-
{
1432-
std::scoped_lock<ur_shared_mutex> Lock(this->zeQueueInUseMutex);
1432+
if (UrL0QueueSyncNonBlocking) {
1433+
this->Mutex.unlock();
1434+
{
1435+
std::scoped_lock<ur_shared_mutex> Lock(this->zeQueueInUseMutex);
1436+
ZE2UR_CALL(zeHostSynchronize, (LastCommandEvent->ZeEvent));
1437+
}
1438+
this->Mutex.lock();
1439+
} else {
14331440
ZE2UR_CALL(zeHostSynchronize, (LastCommandEvent->ZeEvent));
14341441
}
1435-
this->Mutex.lock();
14361442

14371443
// clean up all events known to have been completed as well,
14381444
// so they can be reused later
@@ -1460,13 +1466,17 @@ ur_result_t ur_queue_handle_t_::synchronize() {
14601466
} else {
14611467
for (auto &ZeQueue : QueueGroup.second.ZeQueues)
14621468
if (ZeQueue) {
1463-
this->Mutex.unlock();
1464-
{
1465-
std::scoped_lock<ur_shared_mutex> Lock(
1466-
this->zeQueueInUseMutex);
1469+
if (UrL0QueueSyncNonBlocking) {
1470+
this->Mutex.unlock();
1471+
{
1472+
std::scoped_lock<ur_shared_mutex> Lock(
1473+
this->zeQueueInUseMutex);
1474+
ZE2UR_CALL(zeHostSynchronize, (ZeQueue));
1475+
}
1476+
this->Mutex.lock();
1477+
} else {
14671478
ZE2UR_CALL(zeHostSynchronize, (ZeQueue));
14681479
}
1469-
this->Mutex.lock();
14701480
}
14711481
}
14721482
}
@@ -1477,12 +1487,9 @@ ur_result_t ur_queue_handle_t_::synchronize() {
14771487

14781488
// With the entire queue synchronized, the active barriers must be done so we
14791489
// can remove them.
1480-
if (auto Res = ActiveBarriers.clear()) {
1481-
this->Mutex.unlock();
1490+
if (auto Res = ActiveBarriers.clear())
14821491
return Res;
1483-
}
14841492

1485-
this->Mutex.unlock();
14861493
return UR_RESULT_SUCCESS;
14871494
}
14881495

0 commit comments

Comments
 (0)