Skip to content

Commit da6f5a7

Browse files
committed
Use UMF Proxy pool manager with UMF CUDA memory provider in UR
UMF Proxy pool manager is just a wrapper for the UMF memory provider (CUDA memory provider in this case) plus it adds also tracking of memory allocations. Signed-off-by: Lukasz Dorau <[email protected]>
1 parent ebe65d1 commit da6f5a7

File tree

8 files changed

+127
-186
lines changed

8 files changed

+127
-186
lines changed

source/adapters/cuda/context.hpp

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,21 @@ CreateHostMemoryProvider(ur_device_handle_t_ *DeviceHandle,
100100
return UR_RESULT_SUCCESS;
101101
}
102102

103+
static ur_result_t
104+
CreateHostMemoryPool(umf_memory_provider_handle_t MemoryProviderHost,
105+
umf_memory_pool_handle_t *MemoryPoolHost) {
106+
107+
umf_memory_pool_handle_t _MemoryPoolHost = nullptr;
108+
109+
umf_result_t UmfResult =
110+
umf::createMemoryProxyPool(MemoryProviderHost, &_MemoryPoolHost);
111+
UMF_RETURN_UR_ERROR(UmfResult);
112+
113+
*MemoryPoolHost = _MemoryPoolHost;
114+
115+
return UR_RESULT_SUCCESS;
116+
}
117+
103118
struct ur_context_handle_t_ {
104119

105120
struct deleter_data {
@@ -112,8 +127,10 @@ struct ur_context_handle_t_ {
112127
std::vector<ur_device_handle_t> Devices;
113128
std::atomic_uint32_t RefCount;
114129

115-
// UMF CUDA memory provider for the host memory (UMF_MEMORY_TYPE_HOST)
130+
// UMF CUDA memory provider and pool for the host memory
131+
// (UMF_MEMORY_TYPE_HOST)
116132
umf_memory_provider_handle_t MemoryProviderHost = nullptr;
133+
umf_memory_pool_handle_t MemoryPoolHost = nullptr;
117134

118135
ur_context_handle_t_(const ur_device_handle_t *Devs, uint32_t NumDevices)
119136
: Devices{Devs, Devs + NumDevices}, RefCount{1} {
@@ -125,9 +142,13 @@ struct ur_context_handle_t_ {
125142
// (UMF_MEMORY_TYPE_HOST) from any device (Devices[0] is used here, because
126143
// it is guaranteed to exist).
127144
UR_CHECK_ERROR(CreateHostMemoryProvider(Devices[0], &MemoryProviderHost));
145+
UR_CHECK_ERROR(CreateHostMemoryPool(MemoryProviderHost, &MemoryPoolHost));
128146
};
129147

130148
~ur_context_handle_t_() {
149+
if (MemoryPoolHost) {
150+
umfPoolDestroy(MemoryPoolHost);
151+
}
131152
if (MemoryProviderHost) {
132153
umfMemoryProviderDestroy(MemoryProviderHost);
133154
}

source/adapters/cuda/device.hpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include <ur/ur.hpp>
1313

14+
#include <umf/memory_pool.h>
1415
#include <umf/memory_provider.h>
1516

1617
#include "common.hpp"
@@ -84,9 +85,17 @@ struct ur_device_handle_t_ {
8485

8586
MemoryProviderDevice = nullptr;
8687
MemoryProviderShared = nullptr;
88+
MemoryPoolDevice = nullptr;
89+
MemoryPoolShared = nullptr;
8790
}
8891

8992
~ur_device_handle_t_() {
93+
if (MemoryPoolDevice) {
94+
umfPoolDestroy(MemoryPoolDevice);
95+
}
96+
if (MemoryPoolShared) {
97+
umfPoolDestroy(MemoryPoolShared);
98+
}
9099
if (MemoryProviderDevice) {
91100
umfMemoryProviderDestroy(MemoryProviderDevice);
92101
}
@@ -131,11 +140,15 @@ struct ur_device_handle_t_ {
131140
// bookkeeping for mipmappedArray leaks in Mapping external Memory
132141
std::map<CUarray, CUmipmappedArray> ChildCuarrayFromMipmapMap;
133142

134-
// UMF CUDA memory provider for the device memory (UMF_MEMORY_TYPE_DEVICE)
143+
// UMF CUDA memory provider and pool for the device memory
144+
// (UMF_MEMORY_TYPE_DEVICE)
135145
umf_memory_provider_handle_t MemoryProviderDevice;
146+
umf_memory_pool_handle_t MemoryPoolDevice;
136147

137-
// UMF CUDA memory provider for the shared memory (UMF_MEMORY_TYPE_SHARED)
148+
// UMF CUDA memory provider and pool for the shared memory
149+
// (UMF_MEMORY_TYPE_SHARED)
138150
umf_memory_provider_handle_t MemoryProviderShared;
151+
umf_memory_pool_handle_t MemoryPoolShared;
139152
};
140153

141154
int getAttribute(ur_device_handle_t Device, CUdevice_attribute Attribute);

source/adapters/cuda/memory.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate(
5050
cuMemHostRegister(HostPtr, size, CU_MEMHOSTREGISTER_DEVICEMAP));
5151
AllocMode = BufferMem::AllocMode::UseHostPtr;
5252
} else if (flags & UR_MEM_FLAG_ALLOC_HOST_POINTER) {
53-
UMF_CHECK_ERROR(umfMemoryProviderAlloc(hContext->MemoryProviderHost, size,
54-
0, &HostPtr));
53+
HostPtr = umfPoolMalloc(hContext->MemoryPoolHost, size);
54+
UMF_CHECK_PTR(HostPtr);
5555
AllocMode = BufferMem::AllocMode::AllocHostPtr;
5656
} else if (flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER) {
5757
AllocMode = BufferMem::AllocMode::CopyIn;
@@ -442,8 +442,8 @@ ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem,
442442
CU_MEMHOSTALLOC_DEVICEMAP));
443443
UR_CHECK_ERROR(cuMemHostGetDevicePointer(&DevPtr, Buffer.HostPtr, 0));
444444
} else {
445-
UMF_CHECK_ERROR(umfMemoryProviderAlloc(hDevice->MemoryProviderDevice,
446-
Buffer.Size, 0, (void **)&DevPtr));
445+
*(void **)&DevPtr = umfPoolMalloc(hDevice->MemoryPoolDevice, Buffer.Size);
446+
UMF_CHECK_PTR(*(void **)&DevPtr);
447447
}
448448
} else {
449449
CUarray ImageArray{};

source/adapters/cuda/memory.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,15 +158,15 @@ struct BufferMem {
158158
case AllocMode::Classic:
159159
for (auto &DevPtr : Ptrs) {
160160
if (DevPtr != native_type{0}) {
161-
UR_CHECK_ERROR(cuMemFree(DevPtr));
161+
UR_CHECK_ERROR(umf::umfCUDAFree((void *)DevPtr));
162162
}
163163
}
164164
break;
165165
case AllocMode::UseHostPtr:
166166
UR_CHECK_ERROR(cuMemHostUnregister(HostPtr));
167167
break;
168168
case AllocMode::AllocHostPtr:
169-
UR_CHECK_ERROR(cuMemFreeHost(HostPtr));
169+
UR_CHECK_ERROR(umf::umfCUDAFree((void *)HostPtr));
170170
}
171171
return UR_RESULT_SUCCESS;
172172
}

source/adapters/cuda/platform.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
#include <sstream>
2121

2222
static ur_result_t
23-
CreateDeviceMemoryProviders(ur_platform_handle_t_ *Platform) {
23+
CreateDeviceMemoryProvidersPools(ur_platform_handle_t_ *Platform) {
2424
umf_cuda_memory_provider_params_handle_t CUMemoryProviderParams = nullptr;
2525

2626
umf_result_t UmfResult =
@@ -48,6 +48,18 @@ CreateDeviceMemoryProviders(ur_platform_handle_t_ *Platform) {
4848
CUMemoryProviderParamsUnique.get(), device, context,
4949
UMF_MEMORY_TYPE_SHARED, &device_handle->MemoryProviderShared);
5050
UMF_RETURN_UR_ERROR(UmfResult);
51+
52+
// create UMF CUDA memory pool for the device memory
53+
// (UMF_MEMORY_TYPE_DEVICE)
54+
UmfResult = umf::createMemoryProxyPool(device_handle->MemoryProviderDevice,
55+
&device_handle->MemoryPoolDevice);
56+
UMF_RETURN_UR_ERROR(UmfResult);
57+
58+
// create UMF CUDA memory pool for the shared memory
59+
// (UMF_MEMORY_TYPE_SHARED)
60+
UmfResult = umf::createMemoryProxyPool(device_handle->MemoryProviderShared,
61+
&device_handle->MemoryPoolShared);
62+
UMF_RETURN_UR_ERROR(UmfResult);
5163
}
5264

5365
return UR_RESULT_SUCCESS;
@@ -134,7 +146,7 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries,
134146
static_cast<uint32_t>(i)});
135147
}
136148

137-
UR_CHECK_ERROR(CreateDeviceMemoryProviders(&Platform));
149+
UR_CHECK_ERROR(CreateDeviceMemoryProvidersPools(&Platform));
138150
} catch (const std::bad_alloc &) {
139151
// Signal out-of-memory situation
140152
for (int i = 0; i < NumDevices; ++i) {

source/adapters/cuda/usm.cpp

Lines changed: 25 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -125,18 +125,17 @@ ur_result_t USMFreeImpl(ur_context_handle_t hContext, void *Pointer) {
125125
UR_ASSERT(DeviceOrdinal < NumDevices, UR_RESULT_ERROR_INVALID_DEVICE);
126126

127127
ur_device_handle_t Device = Platform->Devices[DeviceOrdinal].get();
128-
umf_memory_provider_handle_t MemoryProvider;
128+
umf_memory_pool_handle_t MemoryPool;
129129

130130
if (IsManaged) {
131-
MemoryProvider = Device->MemoryProviderShared;
131+
MemoryPool = Device->MemoryPoolShared;
132132
} else if (Type == CU_MEMORYTYPE_DEVICE) {
133-
MemoryProvider = Device->MemoryProviderDevice;
133+
MemoryPool = Device->MemoryPoolDevice;
134134
} else {
135-
MemoryProvider = hContext->MemoryProviderHost;
135+
MemoryPool = hContext->MemoryPoolHost;
136136
}
137137

138-
UMF_CHECK_ERROR(umfMemoryProviderFree(MemoryProvider, Pointer,
139-
0 /* size is unknown */));
138+
UMF_CHECK_ERROR(umfPoolFree(MemoryPool, Pointer));
140139
} catch (ur_result_t Err) {
141140
Result = Err;
142141
}
@@ -158,8 +157,8 @@ ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t,
158157
uint32_t Alignment) {
159158
try {
160159
ScopedContext Active(Device);
161-
UMF_CHECK_ERROR(umfMemoryProviderAlloc(Device->MemoryProviderDevice, Size,
162-
Alignment, ResultPtr));
160+
*ResultPtr = umfPoolMalloc(Device->MemoryPoolDevice, Size);
161+
UMF_CHECK_PTR(*ResultPtr);
163162
} catch (ur_result_t Err) {
164163
return Err;
165164
}
@@ -180,8 +179,8 @@ ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t,
180179
uint32_t Alignment) {
181180
try {
182181
ScopedContext Active(Device);
183-
UMF_CHECK_ERROR(umfMemoryProviderAlloc(Device->MemoryProviderShared, Size,
184-
Alignment, ResultPtr));
182+
*ResultPtr = umfPoolMalloc(Device->MemoryPoolShared, Size);
183+
UMF_CHECK_PTR(*ResultPtr);
185184
} catch (ur_result_t Err) {
186185
return Err;
187186
}
@@ -199,8 +198,8 @@ ur_result_t USMHostAllocImpl(void **ResultPtr, ur_context_handle_t hContext,
199198
ur_usm_host_mem_flags_t, size_t Size,
200199
uint32_t Alignment) {
201200
try {
202-
UMF_CHECK_ERROR(umfMemoryProviderAlloc(hContext->MemoryProviderHost, Size,
203-
Alignment, ResultPtr));
201+
*ResultPtr = umfPoolMalloc(hContext->MemoryPoolHost, Size);
202+
UMF_CHECK_PTR(*ResultPtr);
204203
} catch (ur_result_t Err) {
205204
return Err;
206205
}
@@ -326,73 +325,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMReleaseExp(ur_context_handle_t Context,
326325
return UR_RESULT_SUCCESS;
327326
}
328327

329-
umf_result_t USMMemoryProvider::initialize(ur_context_handle_t Ctx,
330-
ur_device_handle_t Dev) {
331-
Context = Ctx;
332-
Device = Dev;
333-
// There isn't a way to query this in cuda, and there isn't much info on
334-
// cuda's approach to alignment or transfer granularity between host and
335-
// device. Within UMF this is only used to influence alignment, and since we
336-
// discard that in our alloc implementations it seems we can safely ignore
337-
// this as well, for now.
338-
MinPageSize = 0;
339-
340-
return UMF_RESULT_SUCCESS;
341-
}
342-
343-
enum umf_result_t USMMemoryProvider::alloc(size_t Size, size_t Align,
344-
void **Ptr) {
345-
auto Res = allocateImpl(Ptr, Size, Align);
346-
if (Res != UR_RESULT_SUCCESS) {
347-
getLastStatusRef() = Res;
348-
return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC;
349-
}
350-
351-
return UMF_RESULT_SUCCESS;
352-
}
353-
354-
enum umf_result_t USMMemoryProvider::free(void *Ptr, size_t Size) {
355-
(void)Size;
356-
357-
auto Res = USMFreeImpl(Context, Ptr);
358-
if (Res != UR_RESULT_SUCCESS) {
359-
getLastStatusRef() = Res;
360-
return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC;
361-
}
362-
363-
return UMF_RESULT_SUCCESS;
364-
}
365-
366-
void USMMemoryProvider::get_last_native_error(const char **ErrMsg,
367-
int32_t *ErrCode) {
368-
(void)ErrMsg;
369-
*ErrCode = static_cast<int32_t>(getLastStatusRef());
370-
}
371-
372-
umf_result_t USMMemoryProvider::get_min_page_size(void *Ptr, size_t *PageSize) {
373-
(void)Ptr;
374-
*PageSize = MinPageSize;
375-
376-
return UMF_RESULT_SUCCESS;
377-
}
378-
379-
ur_result_t USMSharedMemoryProvider::allocateImpl(void **ResultPtr, size_t Size,
380-
uint32_t Alignment) {
381-
return USMSharedAllocImpl(ResultPtr, Context, Device, /*host flags*/ 0,
382-
/*device flags*/ 0, Size, Alignment);
383-
}
384-
385-
ur_result_t USMDeviceMemoryProvider::allocateImpl(void **ResultPtr, size_t Size,
386-
uint32_t Alignment) {
387-
return USMDeviceAllocImpl(ResultPtr, Context, Device, /* flags */ 0, Size,
388-
Alignment);
389-
}
390-
391-
ur_result_t USMHostMemoryProvider::allocateImpl(void **ResultPtr, size_t Size,
392-
uint32_t Alignment) {
393-
return USMHostAllocImpl(ResultPtr, Context, /* flags */ 0, Size, Alignment);
394-
}
395-
396328
ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context,
397329
ur_usm_pool_desc_t *PoolDesc)
398330
: Context{Context} {
@@ -416,36 +348,28 @@ ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context,
416348
pNext = BaseDesc->pNext;
417349
}
418350

419-
auto MemProvider =
420-
umf::memoryProviderMakeUnique<USMHostMemoryProvider>(Context, nullptr)
421-
.second;
422-
423351
auto UmfHostParamsHandle = getUmfParamsHandle(
424352
DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Host]);
425-
HostMemPool =
426-
umf::poolMakeUniqueFromOps(umfDisjointPoolOps(), std::move(MemProvider),
427-
UmfHostParamsHandle.get())
428-
.second;
353+
HostMemPool = umf::poolMakeUniqueFromOps_CudaProvider(
354+
umfDisjointPoolOps(), Context->MemoryProviderHost,
355+
UmfHostParamsHandle.get())
356+
.second;
429357

430358
for (const auto &Device : Context->getDevices()) {
431-
MemProvider =
432-
umf::memoryProviderMakeUnique<USMDeviceMemoryProvider>(Context, Device)
433-
.second;
434359
auto UmfDeviceParamsHandle = getUmfParamsHandle(
435360
DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Device]);
436-
DeviceMemPool =
437-
umf::poolMakeUniqueFromOps(umfDisjointPoolOps(), std::move(MemProvider),
438-
UmfDeviceParamsHandle.get())
439-
.second;
440-
MemProvider =
441-
umf::memoryProviderMakeUnique<USMSharedMemoryProvider>(Context, Device)
442-
.second;
361+
DeviceMemPool = umf::poolMakeUniqueFromOps_CudaProvider(
362+
umfDisjointPoolOps(), Device->MemoryProviderDevice,
363+
UmfDeviceParamsHandle.get())
364+
.second;
365+
443366
auto UmfSharedParamsHandle = getUmfParamsHandle(
444367
DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Shared]);
445-
SharedMemPool =
446-
umf::poolMakeUniqueFromOps(umfDisjointPoolOps(), std::move(MemProvider),
447-
UmfSharedParamsHandle.get())
448-
.second;
368+
SharedMemPool = umf::poolMakeUniqueFromOps_CudaProvider(
369+
umfDisjointPoolOps(), Device->MemoryProviderShared,
370+
UmfSharedParamsHandle.get())
371+
.second;
372+
449373
Context->addPool(this);
450374
}
451375
}

0 commit comments

Comments
 (0)