Skip to content

Use UMF Proxy pool manager with UMF CUDA memory provider in UR #2659

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 25 additions & 8 deletions source/adapters/cuda/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,9 @@ typedef void (*ur_context_extended_deleter_t)(void *user_data);
///

static ur_result_t
CreateHostMemoryProvider(ur_device_handle_t_ *DeviceHandle,
umf_memory_provider_handle_t *MemoryProviderHost) {
CreateHostMemoryProviderPool(ur_device_handle_t_ *DeviceHandle,
umf_memory_provider_handle_t *MemoryProviderHost,
umf_memory_pool_handle_t *MemoryPoolHost) {
umf_cuda_memory_provider_params_handle_t CUMemoryProviderParams = nullptr;

*MemoryProviderHost = nullptr;
Expand All @@ -91,10 +92,20 @@ CreateHostMemoryProvider(ur_device_handle_t_ *DeviceHandle,
umf::cuda_params_unique_handle_t CUMemoryProviderParamsUnique(
CUMemoryProviderParams, umfCUDAMemoryProviderParamsDestroy);

// create UMF CUDA memory provider for the host memory (UMF_MEMORY_TYPE_HOST)
UmfResult = umf::createMemoryProvider(
CUMemoryProviderParamsUnique.get(), 0 /* cuDevice */, context,
UMF_MEMORY_TYPE_HOST, MemoryProviderHost);
UmfResult = umf::setCUMemoryProviderParams(CUMemoryProviderParamsUnique.get(),
0 /* cuDevice */, context,
UMF_MEMORY_TYPE_HOST);
UMF_RETURN_UR_ERROR(UmfResult);

// create UMF CUDA memory provider and pool for the host memory
// (UMF_MEMORY_TYPE_HOST)
UmfResult = umfMemoryProviderCreate(umfCUDAMemoryProviderOps(),
CUMemoryProviderParamsUnique.get(),
MemoryProviderHost);
UMF_RETURN_UR_ERROR(UmfResult);

UmfResult = umfPoolCreate(umfProxyPoolOps(), *MemoryProviderHost, nullptr, 0,
MemoryPoolHost);
UMF_RETURN_UR_ERROR(UmfResult);

return UR_RESULT_SUCCESS;
Expand All @@ -112,8 +123,10 @@ struct ur_context_handle_t_ {
std::vector<ur_device_handle_t> Devices;
std::atomic_uint32_t RefCount;

// UMF CUDA memory provider for the host memory (UMF_MEMORY_TYPE_HOST)
// UMF CUDA memory provider and pool for the host memory
// (UMF_MEMORY_TYPE_HOST)
umf_memory_provider_handle_t MemoryProviderHost = nullptr;
umf_memory_pool_handle_t MemoryPoolHost = nullptr;

ur_context_handle_t_(const ur_device_handle_t *Devs, uint32_t NumDevices)
: Devices{Devs, Devs + NumDevices}, RefCount{1} {
Expand All @@ -124,10 +137,14 @@ struct ur_context_handle_t_ {
// Create UMF CUDA memory provider for the host memory
// (UMF_MEMORY_TYPE_HOST) from any device (Devices[0] is used here, because
// it is guaranteed to exist).
UR_CHECK_ERROR(CreateHostMemoryProvider(Devices[0], &MemoryProviderHost));
UR_CHECK_ERROR(CreateHostMemoryProviderPool(Devices[0], &MemoryProviderHost,
&MemoryPoolHost));
};

~ur_context_handle_t_() {
if (MemoryPoolHost) {
umfPoolDestroy(MemoryPoolHost);
}
if (MemoryProviderHost) {
umfMemoryProviderDestroy(MemoryProviderHost);
}
Expand Down
17 changes: 15 additions & 2 deletions source/adapters/cuda/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include <ur/ur.hpp>

#include <umf/memory_pool.h>
#include <umf/memory_provider.h>

#include "common.hpp"
Expand Down Expand Up @@ -84,9 +85,17 @@ struct ur_device_handle_t_ {

MemoryProviderDevice = nullptr;
MemoryProviderShared = nullptr;
MemoryPoolDevice = nullptr;
MemoryPoolShared = nullptr;
}

~ur_device_handle_t_() {
if (MemoryPoolDevice) {
umfPoolDestroy(MemoryPoolDevice);
}
if (MemoryPoolShared) {
umfPoolDestroy(MemoryPoolShared);
}
if (MemoryProviderDevice) {
umfMemoryProviderDestroy(MemoryProviderDevice);
}
Expand Down Expand Up @@ -131,11 +140,15 @@ struct ur_device_handle_t_ {
// bookkeeping for mipmappedArray leaks in Mapping external Memory
std::map<CUarray, CUmipmappedArray> ChildCuarrayFromMipmapMap;

// UMF CUDA memory provider for the device memory (UMF_MEMORY_TYPE_DEVICE)
// UMF CUDA memory provider and pool for the device memory
// (UMF_MEMORY_TYPE_DEVICE)
umf_memory_provider_handle_t MemoryProviderDevice;
umf_memory_pool_handle_t MemoryPoolDevice;

// UMF CUDA memory provider for the shared memory (UMF_MEMORY_TYPE_SHARED)
// UMF CUDA memory provider and pool for the shared memory
// (UMF_MEMORY_TYPE_SHARED)
umf_memory_provider_handle_t MemoryProviderShared;
umf_memory_pool_handle_t MemoryPoolShared;
};

int getAttribute(ur_device_handle_t Device, CUdevice_attribute Attribute);
8 changes: 4 additions & 4 deletions source/adapters/cuda/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate(
cuMemHostRegister(HostPtr, size, CU_MEMHOSTREGISTER_DEVICEMAP));
AllocMode = BufferMem::AllocMode::UseHostPtr;
} else if (flags & UR_MEM_FLAG_ALLOC_HOST_POINTER) {
UMF_CHECK_ERROR(umfMemoryProviderAlloc(hContext->MemoryProviderHost, size,
0, &HostPtr));
HostPtr = umfPoolMalloc(hContext->MemoryPoolHost, size);
UMF_CHECK_PTR(HostPtr);
AllocMode = BufferMem::AllocMode::AllocHostPtr;
} else if (flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) {
AllocMode = BufferMem::AllocMode::CopyIn;
Expand Down Expand Up @@ -442,8 +442,8 @@ ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem,
CU_MEMHOSTALLOC_DEVICEMAP));
UR_CHECK_ERROR(cuMemHostGetDevicePointer(&DevPtr, Buffer.HostPtr, 0));
} else {
UMF_CHECK_ERROR(umfMemoryProviderAlloc(hDevice->MemoryProviderDevice,
Buffer.Size, 0, (void **)&DevPtr));
*(void **)&DevPtr = umfPoolMalloc(hDevice->MemoryPoolDevice, Buffer.Size);
UMF_CHECK_PTR(*(void **)&DevPtr);
}
} else {
CUarray ImageArray{};
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/cuda/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,15 +158,15 @@ struct BufferMem {
case AllocMode::Classic:
for (auto &DevPtr : Ptrs) {
if (DevPtr != native_type{0}) {
UR_CHECK_ERROR(cuMemFree(DevPtr));
UMF_CHECK_ERROR(umfFree((void *)DevPtr));
}
}
break;
case AllocMode::UseHostPtr:
UR_CHECK_ERROR(cuMemHostUnregister(HostPtr));
break;
case AllocMode::AllocHostPtr:
UR_CHECK_ERROR(cuMemFreeHost(HostPtr));
UMF_CHECK_ERROR(umfFree((void *)HostPtr));
}
return UR_RESULT_SUCCESS;
}
Expand Down
40 changes: 32 additions & 8 deletions source/adapters/cuda/platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#include <sstream>

static ur_result_t
CreateDeviceMemoryProviders(ur_platform_handle_t_ *Platform) {
CreateDeviceMemoryProvidersPools(ur_platform_handle_t_ *Platform) {
umf_cuda_memory_provider_params_handle_t CUMemoryProviderParams = nullptr;

umf_result_t UmfResult =
Expand All @@ -37,16 +37,40 @@ CreateDeviceMemoryProviders(ur_platform_handle_t_ *Platform) {

// create UMF CUDA memory provider for the device memory
// (UMF_MEMORY_TYPE_DEVICE)
UmfResult = umf::createMemoryProvider(
CUMemoryProviderParamsUnique.get(), device, context,
UMF_MEMORY_TYPE_DEVICE, &device_handle->MemoryProviderDevice);
UmfResult =
umf::setCUMemoryProviderParams(CUMemoryProviderParamsUnique.get(),
device, context, UMF_MEMORY_TYPE_DEVICE);
UMF_RETURN_UR_ERROR(UmfResult);

UmfResult = umfMemoryProviderCreate(umfCUDAMemoryProviderOps(),
CUMemoryProviderParamsUnique.get(),
&device_handle->MemoryProviderDevice);
UMF_RETURN_UR_ERROR(UmfResult);

// create UMF CUDA memory provider for the shared memory
// (UMF_MEMORY_TYPE_SHARED)
UmfResult = umf::createMemoryProvider(
CUMemoryProviderParamsUnique.get(), device, context,
UMF_MEMORY_TYPE_SHARED, &device_handle->MemoryProviderShared);
UmfResult =
umf::setCUMemoryProviderParams(CUMemoryProviderParamsUnique.get(),
device, context, UMF_MEMORY_TYPE_SHARED);
UMF_RETURN_UR_ERROR(UmfResult);

UmfResult = umfMemoryProviderCreate(umfCUDAMemoryProviderOps(),
CUMemoryProviderParamsUnique.get(),
&device_handle->MemoryProviderShared);
UMF_RETURN_UR_ERROR(UmfResult);

// create UMF CUDA memory pool for the device memory
// (UMF_MEMORY_TYPE_DEVICE)
UmfResult =
umfPoolCreate(umfProxyPoolOps(), device_handle->MemoryProviderDevice,
nullptr, 0, &device_handle->MemoryPoolDevice);
UMF_RETURN_UR_ERROR(UmfResult);

// create UMF CUDA memory pool for the shared memory
// (UMF_MEMORY_TYPE_SHARED)
UmfResult =
umfPoolCreate(umfProxyPoolOps(), device_handle->MemoryProviderShared,
nullptr, 0, &device_handle->MemoryPoolShared);
UMF_RETURN_UR_ERROR(UmfResult);
}

Expand Down Expand Up @@ -134,7 +158,7 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries,
static_cast<uint32_t>(i)});
}

UR_CHECK_ERROR(CreateDeviceMemoryProviders(&Platform));
UR_CHECK_ERROR(CreateDeviceMemoryProvidersPools(&Platform));
} catch (const std::bad_alloc &) {
// Signal out-of-memory situation
for (int i = 0; i < NumDevices; ++i) {
Expand Down
Loading