Skip to content

Commit bf7a654

Browse files
authored
Merge pull request #2516 from igchor/sycl_e2e_fixes
[L0 v2][CTS] Fix problems reported by SYCL e2e tests
2 parents 75eb585 + 96a845d commit bf7a654

12 files changed

+235
-89
lines changed

source/adapters/level_zero/queue.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -2293,6 +2293,10 @@ ur_result_t ur_queue_handle_t_::createCommandList(
22932293
IsInOrderList = true;
22942294
}
22952295

2296+
logger::debug(
2297+
"create command list ordinal: {}, type: regular, device: {}, inOrder: {}",
2298+
QueueGroupOrdinal, Device->ZeDevice, IsInOrderList);
2299+
22962300
ZE2UR_CALL(zeCommandListCreate, (Context->ZeContext, Device->ZeDevice,
22972301
&ZeCommandListDesc, &ZeCommandList));
22982302

@@ -2459,6 +2463,10 @@ ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() {
24592463
"(round robin in [{}, {}]) priority = {}",
24602464
ZeCommandQueueDesc.ordinal, ZeCommandQueueDesc.index,
24612465
LowerIndex, UpperIndex, Priority);
2466+
logger::debug("create command list ordinal: {}, type: immediate, device: "
2467+
"{}, inOrder: {}",
2468+
ZeCommandQueueDesc.ordinal, Queue->Device->ZeDevice,
2469+
isInOrderList);
24622470

24632471
ZE_CALL_NOCHECK(zeCommandListCreateImmediate,
24642472
(Queue->Context->ZeContext, Queue->Device->ZeDevice,

source/adapters/level_zero/v2/api.cpp

-11
Original file line numberDiff line numberDiff line change
@@ -474,15 +474,4 @@ ur_result_t urCommandBufferCommandGetInfoExp(
474474
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
475475
}
476476

477-
ur_result_t urUSMImportExp(ur_context_handle_t hContext, void *pMem,
478-
size_t size) {
479-
logger::error("{} function not implemented!", __FUNCTION__);
480-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
481-
}
482-
483-
ur_result_t urUSMReleaseExp(ur_context_handle_t hContext, void *pMem) {
484-
logger::error("{} function not implemented!", __FUNCTION__);
485-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
486-
}
487-
488477
} // namespace ur::level_zero

source/adapters/level_zero/v2/command_list_cache.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,12 @@ command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) {
6969
QueueDesc.index = ImmCmdDesc->Index.value();
7070
}
7171
QueueDesc.pNext = &offloadDesc;
72+
73+
logger::debug("create command list ordinal: {}, type: immediate, device: "
74+
"{}, inOrder: {}",
75+
ImmCmdDesc->Ordinal, ImmCmdDesc->ZeDevice,
76+
ImmCmdDesc->IsInOrder);
77+
7278
ZE2UR_CALL_THROWS(
7379
zeCommandListCreateImmediate,
7480
(ZeContext, ImmCmdDesc->ZeDevice, &QueueDesc, &ZeCommandList));
@@ -81,6 +87,11 @@ command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) {
8187
CmdListDesc.commandQueueGroupOrdinal = RegCmdDesc.Ordinal;
8288
CmdListDesc.pNext = &offloadDesc;
8389

90+
logger::debug("create command list ordinal: {}, type: immediate, device: "
91+
"{}, inOrder: {}",
92+
RegCmdDesc.Ordinal, RegCmdDesc.ZeDevice,
93+
RegCmdDesc.IsInOrder);
94+
8495
ze_command_list_handle_t ZeCommandList;
8596
ZE2UR_CALL_THROWS(zeCommandListCreate, (ZeContext, RegCmdDesc.ZeDevice,
8697
&CmdListDesc, &ZeCommandList));

source/adapters/level_zero/v2/event_provider_normal.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ provider_pool::provider_pool(ur_context_handle_t context, queue_type queue,
5050
devices.push_back(d->ZeDevice);
5151
}
5252

53+
logger::debug("ze_event_pool_desc_t flags set to: {}", desc.flags);
54+
5355
ZE2UR_CALL_THROWS(zeEventPoolCreate,
5456
(context->getZeHandle(), &desc, devices.size(),
5557
devices.data(), pool.ptr()));

source/adapters/level_zero/v2/kernel.cpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -287,8 +287,12 @@ ur_result_t ur_kernel_handle_t_::prepareForSubmission(
287287
(hZeKernel, groupSizeX, groupSizeY, groupSizeZ));
288288

289289
for (auto &pending : pending_allocations) {
290-
auto zePtr = pending.hMem->getDevicePtr(hDevice, pending.mode, 0,
291-
pending.hMem->getSize(), migrate);
290+
void *zePtr = nullptr;
291+
if (pending.hMem) {
292+
// NULL is a valid value
293+
zePtr = pending.hMem->getDevicePtr(hDevice, pending.mode, 0,
294+
pending.hMem->getSize(), migrate);
295+
}
292296
UR_CALL(setArgPointer(pending.argIndex, nullptr, zePtr));
293297
}
294298
pending_allocations.clear();

source/adapters/level_zero/v2/memory.cpp

+54-52
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,7 @@ ur_integrated_mem_handle_t::ur_integrated_mem_handle_t(
115115
if (!ownHostPtr) {
116116
return;
117117
}
118-
auto ret = hContext->getDefaultUSMPool()->free(ptr);
119-
if (ret != UR_RESULT_SUCCESS) {
120-
logger::error("Failed to free host memory: {}", ret);
121-
}
118+
ZE_CALL_NOCHECK(zeMemFree, (hContext->getZeHandle(), ptr));
122119
});
123120
}
124121

@@ -209,7 +206,7 @@ ur_discrete_mem_handle_t::ur_discrete_mem_handle_t(
209206
device_access_mode_t accessMode)
210207
: ur_mem_handle_t_(hContext, size, accessMode),
211208
deviceAllocations(hContext->getPlatform()->getNumDevices()),
212-
activeAllocationDevice(nullptr), hostAllocations() {
209+
activeAllocationDevice(nullptr), mapToPtr(hostPtr), hostAllocations() {
213210
if (hostPtr) {
214211
auto initialDevice = hContext->getDevices()[0];
215212
UR_CALL_THROWS(migrateBufferTo(initialDevice, hostPtr, size));
@@ -234,10 +231,7 @@ ur_discrete_mem_handle_t::ur_discrete_mem_handle_t(
234231
if (!ownZePtr) {
235232
return;
236233
}
237-
auto ret = hContext->getDefaultUSMPool()->free(ptr);
238-
if (ret != UR_RESULT_SUCCESS) {
239-
logger::error("Failed to free device memory: {}", ret);
240-
}
234+
ZE_CALL_NOCHECK(zeMemFree, (hContext->getZeHandle(), ptr));
241235
});
242236
}
243237
}
@@ -246,12 +240,18 @@ ur_discrete_mem_handle_t::~ur_discrete_mem_handle_t() {
246240
if (!activeAllocationDevice || !writeBackPtr)
247241
return;
248242

249-
auto srcPtr = ur_cast<char *>(
250-
deviceAllocations[activeAllocationDevice->Id.value()].get());
243+
auto srcPtr = getActiveDeviceAlloc();
251244
synchronousZeCopy(hContext, activeAllocationDevice, writeBackPtr, srcPtr,
252245
getSize());
253246
}
254247

248+
void *ur_discrete_mem_handle_t::getActiveDeviceAlloc(size_t offset) {
249+
assert(activeAllocationDevice);
250+
return ur_cast<char *>(
251+
deviceAllocations[activeAllocationDevice->Id.value()].get()) +
252+
offset;
253+
}
254+
255255
void *ur_discrete_mem_handle_t::getDevicePtr(
256256
ur_device_handle_t hDevice, device_access_mode_t access, size_t offset,
257257
size_t size, std::function<void(void *src, void *dst, size_t)> migrate) {
@@ -272,10 +272,8 @@ void *ur_discrete_mem_handle_t::getDevicePtr(
272272
hDevice = activeAllocationDevice;
273273
}
274274

275-
char *ptr;
276275
if (activeAllocationDevice == hDevice) {
277-
ptr = ur_cast<char *>(deviceAllocations[hDevice->Id.value()].get());
278-
return ptr + offset;
276+
return getActiveDeviceAlloc(offset);
279277
}
280278

281279
auto &p2pDevices = hContext->getP2PDevices(hDevice);
@@ -288,9 +286,7 @@ void *ur_discrete_mem_handle_t::getDevicePtr(
288286
}
289287

290288
// TODO: see if it's better to migrate the memory to the specified device
291-
return ur_cast<char *>(
292-
deviceAllocations[activeAllocationDevice->Id.value()].get()) +
293-
offset;
289+
return getActiveDeviceAlloc(offset);
294290
}
295291

296292
void *ur_discrete_mem_handle_t::mapHostPtr(
@@ -299,55 +295,63 @@ void *ur_discrete_mem_handle_t::mapHostPtr(
299295
TRACK_SCOPE_LATENCY("ur_discrete_mem_handle_t::mapHostPtr");
300296
// TODO: use async alloc?
301297

302-
void *ptr;
303-
UR_CALL_THROWS(hContext->getDefaultUSMPool()->allocate(
304-
hContext, nullptr, nullptr, UR_USM_TYPE_HOST, size, &ptr));
298+
void *ptr = mapToPtr;
299+
if (!ptr) {
300+
UR_CALL_THROWS(hContext->getDefaultUSMPool()->allocate(
301+
hContext, nullptr, nullptr, UR_USM_TYPE_HOST, size, &ptr));
302+
}
305303

306-
hostAllocations.emplace_back(ptr, size, offset, flags);
304+
usm_unique_ptr_t mappedPtr =
305+
usm_unique_ptr_t(ptr, [ownsAlloc = bool(mapToPtr), this](void *p) {
306+
if (ownsAlloc) {
307+
auto ret = hContext->getDefaultUSMPool()->free(p);
308+
if (ret != UR_RESULT_SUCCESS) {
309+
logger::error("Failed to mapped memory: {}", ret);
310+
}
311+
}
312+
});
313+
314+
hostAllocations.emplace_back(std::move(mappedPtr), size, offset, flags);
307315

308316
if (activeAllocationDevice && (flags & UR_MAP_FLAG_READ)) {
309-
auto srcPtr =
310-
ur_cast<char *>(
311-
deviceAllocations[activeAllocationDevice->Id.value()].get()) +
312-
offset;
313-
migrate(srcPtr, hostAllocations.back().ptr, size);
317+
auto srcPtr = getActiveDeviceAlloc(offset);
318+
migrate(srcPtr, hostAllocations.back().ptr.get(), size);
314319
}
315320

316-
return hostAllocations.back().ptr;
321+
return hostAllocations.back().ptr.get();
317322
}
318323

319324
void ur_discrete_mem_handle_t::unmapHostPtr(
320325
void *pMappedPtr,
321326
std::function<void(void *src, void *dst, size_t)> migrate) {
322327
TRACK_SCOPE_LATENCY("ur_discrete_mem_handle_t::unmapHostPtr");
323328

324-
for (auto &hostAllocation : hostAllocations) {
325-
if (hostAllocation.ptr == pMappedPtr) {
326-
void *devicePtr = nullptr;
327-
if (activeAllocationDevice) {
328-
devicePtr =
329-
ur_cast<char *>(
330-
deviceAllocations[activeAllocationDevice->Id.value()].get()) +
331-
hostAllocation.offset;
332-
} else if (!(hostAllocation.flags &
333-
UR_MAP_FLAG_WRITE_INVALIDATE_REGION)) {
334-
devicePtr = ur_cast<char *>(getDevicePtr(
335-
hContext->getDevices()[0], device_access_mode_t::read_only,
336-
hostAllocation.offset, hostAllocation.size, migrate));
337-
}
329+
auto hostAlloc =
330+
std::find_if(hostAllocations.begin(), hostAllocations.end(),
331+
[pMappedPtr](const host_allocation_desc_t &desc) {
332+
return desc.ptr.get() == pMappedPtr;
333+
});
338334

339-
if (devicePtr) {
340-
migrate(hostAllocation.ptr, devicePtr, hostAllocation.size);
341-
}
335+
if (hostAlloc == hostAllocations.end()) {
336+
throw UR_RESULT_ERROR_INVALID_ARGUMENT;
337+
}
342338

343-
// TODO: use async free here?
344-
UR_CALL_THROWS(hContext->getDefaultUSMPool()->free(hostAllocation.ptr));
345-
return;
346-
}
339+
bool shouldMigrateToDevice =
340+
!(hostAlloc->flags & UR_MAP_FLAG_WRITE_INVALIDATE_REGION);
341+
342+
if (!activeAllocationDevice && shouldMigrateToDevice) {
343+
allocateOnDevice(hContext->getDevices()[0], getSize());
344+
}
345+
346+
// TODO: tests require that memory is migrated even for
347+
// UR_MAP_FLAG_WRITE_INVALIDATE_REGION when there is an active device
348+
// allocation. is this correct?
349+
if (activeAllocationDevice) {
350+
migrate(hostAlloc->ptr.get(), getActiveDeviceAlloc(hostAlloc->offset),
351+
hostAlloc->size);
347352
}
348353

349-
// No mapping found
350-
throw UR_RESULT_ERROR_INVALID_ARGUMENT;
354+
hostAllocations.erase(hostAlloc);
351355
}
352356

353357
static bool useHostBuffer(ur_context_handle_t hContext) {
@@ -419,8 +423,6 @@ ur_result_t urMemBufferCreate(ur_context_handle_t hContext,
419423
auto accessMode = getDeviceAccessMode(flags);
420424

421425
if (useHostBuffer(hContext)) {
422-
// TODO: assert that if hostPtr is set, either UR_MEM_FLAG_USE_HOST_POINTER
423-
// or UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER is set?
424426
auto hostPtrAction =
425427
flags & UR_MEM_FLAG_USE_HOST_POINTER
426428
? ur_integrated_mem_handle_t::host_ptr_action_t::import

source/adapters/level_zero/v2/memory.hpp

+7-4
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,11 @@ struct ur_integrated_mem_handle_t : public ur_mem_handle_t_ {
9898
};
9999

100100
struct host_allocation_desc_t {
101-
host_allocation_desc_t(void *ptr, size_t size, size_t offset,
101+
host_allocation_desc_t(usm_unique_ptr_t ptr, size_t size, size_t offset,
102102
ur_map_flags_t flags)
103-
: ptr(ptr), size(size), offset(offset), flags(flags) {}
103+
: ptr(std::move(ptr)), size(size), offset(offset), flags(flags) {}
104104

105-
void *ptr;
105+
usm_unique_ptr_t ptr;
106106
size_t size;
107107
size_t offset;
108108
ur_map_flags_t flags;
@@ -146,10 +146,13 @@ struct ur_discrete_mem_handle_t : public ur_mem_handle_t_ {
146146
// If not null, copy the buffer content back to this memory on release.
147147
void *writeBackPtr = nullptr;
148148

149+
// If not null, mapHostPtr should map memory to this ptr
150+
void *mapToPtr = nullptr;
151+
149152
std::vector<host_allocation_desc_t> hostAllocations;
150153

154+
void *getActiveDeviceAlloc(size_t offset = 0);
151155
void *allocateOnDevice(ur_device_handle_t hDevice, size_t size);
152-
153156
ur_result_t migrateBufferTo(ur_device_handle_t hDevice, void *src,
154157
size_t size);
155158
};

source/adapters/level_zero/v2/queue_immediate_in_order.cpp

+28-17
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,15 @@ ur_queue_immediate_in_order_t::queueGetInfo(ur_queue_info_t propName,
132132
case UR_QUEUE_INFO_DEVICE_DEFAULT:
133133
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
134134
case UR_QUEUE_INFO_EMPTY: {
135-
// We can't tell if the queue is empty as we don't hold to any events
136-
return ReturnValue(false);
135+
auto status = ZE_CALL_NOCHECK(zeCommandListHostSynchronize,
136+
(handler.commandList.get(), 0));
137+
if (status == ZE_RESULT_SUCCESS) {
138+
return ReturnValue(true);
139+
} else if (status == ZE_RESULT_NOT_READY) {
140+
return ReturnValue(false);
141+
} else {
142+
return ze2urResult(status);
143+
}
137144
}
138145
default:
139146
logger::error("Unsupported ParamName in urQueueGetInfo: "
@@ -660,10 +667,11 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferMap(
660667
// If memory was not migrated, we need to wait on the events here.
661668
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
662669
(handler.commandList.get(), waitList.second, waitList.first));
663-
if (signalEvent) {
664-
ZE2UR_CALL(zeCommandListAppendSignalEvent,
665-
(handler.commandList.get(), signalEvent->getZeEvent()));
666-
}
670+
}
671+
672+
if (signalEvent) {
673+
ZE2UR_CALL(zeCommandListAppendSignalEvent,
674+
(handler.commandList.get(), signalEvent->getZeEvent()));
667675
}
668676

669677
if (blockingMap) {
@@ -872,17 +880,20 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueUSMMemcpy2D(
872880
bool blocking, void *pDst, size_t dstPitch, const void *pSrc,
873881
size_t srcPitch, size_t width, size_t height, uint32_t numEventsInWaitList,
874882
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
875-
std::ignore = blocking;
876-
std::ignore = pDst;
877-
std::ignore = dstPitch;
878-
std::ignore = pSrc;
879-
std::ignore = srcPitch;
880-
std::ignore = width;
881-
std::ignore = height;
882-
std::ignore = numEventsInWaitList;
883-
std::ignore = phEventWaitList;
884-
std::ignore = phEvent;
885-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
883+
TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::enqueueUSMMemcpy2D");
884+
885+
ur_rect_offset_t zeroOffset{0, 0, 0};
886+
ur_rect_region_t region{width, height, 0};
887+
888+
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
889+
890+
ur_usm_handle_t_ srcHandle(hContext, 0, pSrc);
891+
ur_usm_handle_t_ dstHandle(hContext, 0, pDst);
892+
893+
return enqueueRegionCopyUnlocked(&srcHandle, &dstHandle, blocking, zeroOffset,
894+
zeroOffset, region, srcPitch, 0, dstPitch, 0,
895+
numEventsInWaitList, phEventWaitList,
896+
phEvent, UR_COMMAND_MEM_BUFFER_COPY_RECT);
886897
}
887898

888899
static void *getGlobalPointerFromModule(ze_module_handle_t hModule,

0 commit comments

Comments
 (0)