Skip to content

Commit 04d880d

Browse files
Xewar313kbenzie
authored andcommitted
Fix synchronization between command_list_manager usages (#17297)
Changes: Command_list_manager no longer synchronize its calls, instead the responsibility to ensure exclusivity belongs to the caller. To add synchronization I implemented the mechanism similar to rust lock as suggested in intel/llvm#17061 (comment).
1 parent 5a0c90c commit 04d880d

8 files changed

+231
-136
lines changed

source/adapters/level_zero/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ if(UR_BUILD_ADAPTER_L0_V2)
158158
${CMAKE_CURRENT_SOURCE_DIR}/v2/event.hpp
159159
${CMAKE_CURRENT_SOURCE_DIR}/v2/kernel.hpp
160160
${CMAKE_CURRENT_SOURCE_DIR}/v2/memory.hpp
161+
${CMAKE_CURRENT_SOURCE_DIR}/v2/lockable.hpp
161162
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.hpp
162163
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.hpp
163164
${CMAKE_CURRENT_SOURCE_DIR}/v2/usm.hpp

source/adapters/level_zero/v2/command_buffer.cpp

+41-20
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
4040

4141
ur_result_t ur_exp_command_buffer_handle_t_::finalizeCommandBuffer() {
4242
// It is not allowed to append to command list from multiple threads.
43-
std::scoped_lock<ur_shared_mutex> guard(this->Mutex);
43+
auto commandListLocked = commandListManager.lock();
4444
UR_ASSERT(!isFinalized, UR_RESULT_ERROR_INVALID_OPERATION);
4545
// Close the command lists and have them ready for dispatch.
46-
ZE2UR_CALL(zeCommandListClose, (this->commandListManager.getZeCommandList()));
46+
ZE2UR_CALL(zeCommandListClose, (commandListLocked->getZeCommandList()));
4747
isFinalized = true;
4848
return UR_RESULT_SUCCESS;
4949
}
@@ -130,7 +130,8 @@ ur_result_t urCommandBufferAppendKernelLaunchExp(
130130
std::ignore = numKernelAlternatives;
131131
std::ignore = kernelAlternatives;
132132
std::ignore = command;
133-
UR_CALL(commandBuffer->commandListManager.appendKernelLaunch(
133+
auto commandListLocked = commandBuffer->commandListManager.lock();
134+
UR_CALL(commandListLocked->appendKernelLaunch(
134135
hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, 0,
135136
nullptr, nullptr));
136137
return UR_RESULT_SUCCESS;
@@ -157,8 +158,9 @@ ur_result_t urCommandBufferAppendUSMMemcpyExp(
157158

158159
std::ignore = phCommand;
159160
// Responsibility of UMD to offload to copy engine
160-
UR_CALL(hCommandBuffer->commandListManager.appendUSMMemcpy(
161-
false, pDst, pSrc, size, 0, nullptr, nullptr));
161+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
162+
UR_CALL(commandListLocked->appendUSMMemcpy(false, pDst, pSrc, size, 0,
163+
nullptr, nullptr));
162164

163165
return UR_RESULT_SUCCESS;
164166
} catch (...) {
@@ -185,7 +187,8 @@ ur_result_t urCommandBufferAppendMemBufferCopyExp(
185187

186188
std::ignore = phCommand;
187189
// Responsibility of UMD to offload to copy engine
188-
UR_CALL(hCommandBuffer->commandListManager.appendMemBufferCopy(
190+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
191+
UR_CALL(commandListLocked->appendMemBufferCopy(
189192
hSrcMem, hDstMem, srcOffset, dstOffset, size, 0, nullptr, nullptr));
190193

191194
return UR_RESULT_SUCCESS;
@@ -213,8 +216,9 @@ ur_result_t urCommandBufferAppendMemBufferWriteExp(
213216

214217
std::ignore = phCommand;
215218
// Responsibility of UMD to offload to copy engine
216-
UR_CALL(hCommandBuffer->commandListManager.appendMemBufferWrite(
217-
hBuffer, false, offset, size, pSrc, 0, nullptr, nullptr));
219+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
220+
UR_CALL(commandListLocked->appendMemBufferWrite(hBuffer, false, offset, size,
221+
pSrc, 0, nullptr, nullptr));
218222

219223
return UR_RESULT_SUCCESS;
220224
} catch (...) {
@@ -241,8 +245,9 @@ ur_result_t urCommandBufferAppendMemBufferReadExp(
241245
std::ignore = phCommand;
242246

243247
// Responsibility of UMD to offload to copy engine
244-
UR_CALL(hCommandBuffer->commandListManager.appendMemBufferRead(
245-
hBuffer, false, offset, size, pDst, 0, nullptr, nullptr));
248+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
249+
UR_CALL(commandListLocked->appendMemBufferRead(hBuffer, false, offset, size,
250+
pDst, 0, nullptr, nullptr));
246251

247252
return UR_RESULT_SUCCESS;
248253
} catch (...) {
@@ -271,7 +276,8 @@ ur_result_t urCommandBufferAppendMemBufferCopyRectExp(
271276

272277
std::ignore = phCommand;
273278
// Responsibility of UMD to offload to copy engine
274-
UR_CALL(hCommandBuffer->commandListManager.appendMemBufferCopyRect(
279+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
280+
UR_CALL(commandListLocked->appendMemBufferCopyRect(
275281
hSrcMem, hDstMem, srcOrigin, dstOrigin, region, srcRowPitch,
276282
srcSlicePitch, dstRowPitch, dstSlicePitch, 0, nullptr, nullptr));
277283

@@ -303,7 +309,8 @@ ur_result_t urCommandBufferAppendMemBufferWriteRectExp(
303309
std::ignore = phCommand;
304310

305311
// Responsibility of UMD to offload to copy engine
306-
UR_CALL(hCommandBuffer->commandListManager.appendMemBufferWriteRect(
312+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
313+
UR_CALL(commandListLocked->appendMemBufferWriteRect(
307314
hBuffer, false, bufferOffset, hostOffset, region, bufferRowPitch,
308315
bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, 0, nullptr,
309316
nullptr));
@@ -336,7 +343,8 @@ ur_result_t urCommandBufferAppendMemBufferReadRectExp(
336343
std::ignore = phCommand;
337344

338345
// Responsibility of UMD to offload to copy engine
339-
UR_CALL(hCommandBuffer->commandListManager.appendMemBufferReadRect(
346+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
347+
UR_CALL(commandListLocked->appendMemBufferReadRect(
340348
hBuffer, false, bufferOffset, hostOffset, region, bufferRowPitch,
341349
bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, 0, nullptr,
342350
nullptr));
@@ -366,8 +374,9 @@ ur_result_t urCommandBufferAppendUSMFillExp(
366374

367375
std::ignore = phCommand;
368376

369-
UR_CALL(hCommandBuffer->commandListManager.appendUSMFill(
370-
pMemory, patternSize, pPattern, size, 0, nullptr, nullptr));
377+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
378+
UR_CALL(commandListLocked->appendUSMFill(pMemory, patternSize, pPattern, size,
379+
0, nullptr, nullptr));
371380
return UR_RESULT_SUCCESS;
372381
} catch (...) {
373382
return exceptionToResult(std::current_exception());
@@ -393,7 +402,8 @@ ur_result_t urCommandBufferAppendMemBufferFillExp(
393402

394403
std::ignore = phCommand;
395404

396-
UR_CALL(hCommandBuffer->commandListManager.appendMemBufferFill(
405+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
406+
UR_CALL(commandListLocked->appendMemBufferFill(
397407
hBuffer, pPattern, patternSize, offset, size, 0, nullptr, nullptr));
398408
return UR_RESULT_SUCCESS;
399409
} catch (...) {
@@ -420,8 +430,9 @@ ur_result_t urCommandBufferAppendUSMPrefetchExp(
420430

421431
std::ignore = phCommand;
422432

423-
UR_CALL(hCommandBuffer->commandListManager.appendUSMPrefetch(
424-
pMemory, size, flags, 0, nullptr, nullptr));
433+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
434+
UR_CALL(commandListLocked->appendUSMPrefetch(pMemory, size, flags, 0, nullptr,
435+
nullptr));
425436

426437
return UR_RESULT_SUCCESS;
427438
} catch (...) {
@@ -447,8 +458,8 @@ ur_result_t urCommandBufferAppendUSMAdviseExp(
447458

448459
std::ignore = phCommand;
449460

450-
UR_CALL(hCommandBuffer->commandListManager.appendUSMAdvise(pMemory, size,
451-
advice, nullptr));
461+
auto commandListLocked = hCommandBuffer->commandListManager.lock();
462+
UR_CALL(commandListLocked->appendUSMAdvise(pMemory, size, advice, nullptr));
452463

453464
return UR_RESULT_SUCCESS;
454465
} catch (...) {
@@ -483,4 +494,14 @@ urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer,
483494
return exceptionToResult(std::current_exception());
484495
}
485496

497+
ur_result_t urCommandBufferEnqueueExp(
498+
ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t UrQueue,
499+
uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList,
500+
ur_event_handle_t *Event) try {
501+
return UrQueue->get().enqueueCommandBufferExp(
502+
CommandBuffer, NumEventsInWaitList, EventWaitList, Event);
503+
} catch (...) {
504+
return exceptionToResult(std::current_exception());
505+
}
506+
486507
} // namespace ur::level_zero

source/adapters/level_zero/v2/command_buffer.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "common.hpp"
1414
#include "context.hpp"
1515
#include "kernel.hpp"
16+
#include "lockable.hpp"
1617
#include "queue_api.hpp"
1718
#include <ze_api.h>
1819

@@ -24,7 +25,7 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
2425

2526
~ur_exp_command_buffer_handle_t_() = default;
2627

27-
ur_command_list_manager commandListManager;
28+
lockable<ur_command_list_manager> commandListManager;
2829

2930
ur_result_t finalizeCommandBuffer();
3031
// Indicates if command-buffer commands can be updated after it is closed.

source/adapters/level_zero/v2/command_list_manager.cpp

+10-26
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,7 @@ ur_result_t ur_command_list_manager::appendKernelLaunch(
196196

197197
ze_kernel_handle_t hZeKernel = hKernel->getZeHandle(device);
198198

199-
std::scoped_lock<ur_shared_mutex, ur_shared_mutex> Lock(this->Mutex,
200-
hKernel->Mutex);
199+
std::scoped_lock<ur_shared_mutex> Lock(hKernel->Mutex);
201200

202201
ze_group_count_t zeThreadGroupDimensions{1, 1, 1};
203202
uint32_t WG[3]{};
@@ -235,8 +234,6 @@ ur_result_t ur_command_list_manager::appendUSMMemcpy(
235234
ur_event_handle_t *phEvent) {
236235
TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMMemcpy");
237236

238-
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
239-
240237
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_MEMCPY);
241238

242239
auto [pWaitEvents, numWaitEvents] =
@@ -262,8 +259,7 @@ ur_result_t ur_command_list_manager::appendMemBufferFill(
262259
auto hBuffer = hMem->getBuffer();
263260
UR_ASSERT(offset + size <= hBuffer->getSize(), UR_RESULT_ERROR_INVALID_SIZE);
264261

265-
std::scoped_lock<ur_shared_mutex, ur_shared_mutex> lock(this->Mutex,
266-
hBuffer->getMutex());
262+
std::scoped_lock<ur_shared_mutex> lock(hBuffer->getMutex());
267263

268264
return appendGenericFillUnlocked(hBuffer, offset, patternSize, pPattern, size,
269265
numEventsInWaitList, phEventWaitList,
@@ -276,8 +272,6 @@ ur_result_t ur_command_list_manager::appendUSMFill(
276272
ur_event_handle_t *phEvent) {
277273
TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMFill");
278274

279-
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
280-
281275
ur_usm_handle_t dstHandle(context, size, pMem);
282276
return appendGenericFillUnlocked(&dstHandle, 0, patternSize, pPattern, size,
283277
numEventsInWaitList, phEventWaitList,
@@ -292,8 +286,6 @@ ur_result_t ur_command_list_manager::appendUSMPrefetch(
292286

293287
std::ignore = flags;
294288

295-
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
296-
297289
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_PREFETCH);
298290

299291
auto [pWaitEvents, numWaitEvents] =
@@ -320,8 +312,6 @@ ur_command_list_manager::appendUSMAdvise(const void *pMem, size_t size,
320312
ur_event_handle_t *phEvent) {
321313
TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMAdvise");
322314

323-
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
324-
325315
auto zeAdvice = ur_cast<ze_memory_advice_t>(advice);
326316

327317
auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_ADVISE);
@@ -354,8 +344,7 @@ ur_result_t ur_command_list_manager::appendMemBufferRead(
354344

355345
ur_usm_handle_t dstHandle(context, size, pDst);
356346

357-
std::scoped_lock<ur_shared_mutex, ur_shared_mutex> lock(this->Mutex,
358-
hBuffer->getMutex());
347+
std::scoped_lock<ur_shared_mutex> lock(hBuffer->getMutex());
359348

360349
return appendGenericCopyUnlocked(hBuffer, &dstHandle, blockingRead, offset, 0,
361350
size, numEventsInWaitList, phEventWaitList,
@@ -373,8 +362,7 @@ ur_result_t ur_command_list_manager::appendMemBufferWrite(
373362

374363
ur_usm_handle_t srcHandle(context, size, pSrc);
375364

376-
std::scoped_lock<ur_shared_mutex, ur_shared_mutex> lock(this->Mutex,
377-
hBuffer->getMutex());
365+
std::scoped_lock<ur_shared_mutex> lock(hBuffer->getMutex());
378366

379367
return appendGenericCopyUnlocked(
380368
&srcHandle, hBuffer, blockingWrite, 0, offset, size, numEventsInWaitList,
@@ -395,8 +383,8 @@ ur_result_t ur_command_list_manager::appendMemBufferCopy(
395383
UR_ASSERT(dstOffset + size <= hBufferDst->getSize(),
396384
UR_RESULT_ERROR_INVALID_SIZE);
397385

398-
std::scoped_lock<ur_shared_mutex, ur_shared_mutex, ur_shared_mutex> lock(
399-
this->Mutex, hBufferSrc->getMutex(), hBufferDst->getMutex());
386+
std::scoped_lock<ur_shared_mutex, ur_shared_mutex> lock(
387+
hBufferSrc->getMutex(), hBufferDst->getMutex());
400388

401389
return appendGenericCopyUnlocked(hBufferSrc, hBufferDst, false, srcOffset,
402390
dstOffset, size, numEventsInWaitList,
@@ -415,8 +403,7 @@ ur_result_t ur_command_list_manager::appendMemBufferReadRect(
415403
auto hBuffer = hMem->getBuffer();
416404
ur_usm_handle_t dstHandle(context, 0, pDst);
417405

418-
std::scoped_lock<ur_shared_mutex, ur_shared_mutex> lock(this->Mutex,
419-
hBuffer->getMutex());
406+
std::scoped_lock<ur_shared_mutex> lock(hBuffer->getMutex());
420407

421408
return appendRegionCopyUnlocked(
422409
hBuffer, &dstHandle, blockingRead, bufferOrigin, hostOrigin, region,
@@ -436,8 +423,7 @@ ur_result_t ur_command_list_manager::appendMemBufferWriteRect(
436423
auto hBuffer = hMem->getBuffer();
437424
ur_usm_handle_t srcHandle(context, 0, pSrc);
438425

439-
std::scoped_lock<ur_shared_mutex, ur_shared_mutex> lock(this->Mutex,
440-
hBuffer->getMutex());
426+
std::scoped_lock<ur_shared_mutex> lock(hBuffer->getMutex());
441427

442428
return appendRegionCopyUnlocked(
443429
&srcHandle, hBuffer, blockingWrite, hostOrigin, bufferOrigin, region,
@@ -457,8 +443,8 @@ ur_result_t ur_command_list_manager::appendMemBufferCopyRect(
457443
auto hBufferSrc = hSrc->getBuffer();
458444
auto hBufferDst = hDst->getBuffer();
459445

460-
std::scoped_lock<ur_shared_mutex, ur_shared_mutex, ur_shared_mutex> lock(
461-
this->Mutex, hBufferSrc->getMutex(), hBufferDst->getMutex());
446+
std::scoped_lock<ur_shared_mutex, ur_shared_mutex> lock(
447+
hBufferSrc->getMutex(), hBufferDst->getMutex());
462448

463449
return appendRegionCopyUnlocked(
464450
hBufferSrc, hBufferDst, false, srcOrigin, dstOrigin, region, srcRowPitch,
@@ -475,8 +461,6 @@ ur_result_t ur_command_list_manager::appendUSMMemcpy2D(
475461
ur_rect_offset_t zeroOffset{0, 0, 0};
476462
ur_rect_region_t region{width, height, 0};
477463

478-
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
479-
480464
ur_usm_handle_t srcHandle(context, 0, pSrc);
481465
ur_usm_handle_t dstHandle(context, 0, pDst);
482466

source/adapters/level_zero/v2/command_list_manager.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,14 @@ struct wait_list_view {
3131
}
3232
};
3333

34-
struct ur_command_list_manager : public _ur_object {
34+
struct ur_command_list_manager {
3535

3636
ur_command_list_manager(ur_context_handle_t context,
3737
ur_device_handle_t device,
3838
v2::raii::command_list_unique_handle &&commandList,
3939
v2::event_flags_t flags = v2::EVENT_FLAGS_COUNTER,
4040
ur_queue_t_ *queue = nullptr);
41+
ur_command_list_manager(ur_command_list_manager &&src) = default;
4142
~ur_command_list_manager();
4243

4344
ur_result_t appendKernelLaunch(ur_kernel_handle_t hKernel, uint32_t workDim,
+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
//===--------- memory.hpp - Level Zero Adapter ---------------------------===//
2+
//
3+
// Copyright (C) 2024 Intel Corporation
4+
//
5+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
6+
// Exceptions. See LICENSE.TXT
7+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#pragma once
12+
#include <mutex>
13+
14+
template <typename T> struct locked {
15+
public:
16+
locked(T *object, std::unique_lock<std::mutex> &&lock)
17+
: lock_(std::move(lock)) {
18+
object_ = object;
19+
}
20+
T *operator->() { return object_; }
21+
22+
private:
23+
std::unique_lock<std::mutex> lock_;
24+
T *object_;
25+
};
26+
27+
/*
28+
lockable<T> wraps T class object in exclusive access lock, similar to one used
29+
in rust
30+
31+
construction:
32+
lockable<X> l(arguments, to, construct, X);
33+
34+
access without synchronization:
35+
X* obj_ptr = l.get_no_lock();
36+
obj_ptr->print_name();
37+
38+
exclusive access to object kept in l:
39+
// as long as lock exists, thread has exclusive access to underlaying object
40+
locked<X> lock = l.lock();
41+
// that object is accessed through ->() operator on lock object
42+
lock->print_name();
43+
*/
44+
45+
template <typename T> struct lockable {
46+
public:
47+
template <typename... Args>
48+
lockable(Args &&...args) : object_(std::forward<Args>(args)...) {}
49+
locked<T> lock() {
50+
std::unique_lock lock{mut_};
51+
return locked<T>(&object_, std::move(lock));
52+
}
53+
T *get_no_lock() { return &object_; }
54+
55+
private:
56+
T object_;
57+
std::mutex mut_;
58+
};

0 commit comments

Comments
 (0)