@@ -1651,8 +1651,15 @@ pi_result cuda_piMemBufferCreate(pi_context context, pi_mem_flags flags,
1651
1651
cuMemHostRegister (host_ptr, size, CU_MEMHOSTREGISTER_DEVICEMAP));
1652
1652
retErr = PI_CHECK_ERROR (cuMemHostGetDevicePointer (&ptr, host_ptr, 0 ));
1653
1653
allocMode = _pi_mem::mem_::buffer_mem_::alloc_mode::use_host_ptr;
1654
+ } else if (flags & PI_MEM_FLAGS_HOST_PTR_ALLOC) {
1655
+ retErr = PI_CHECK_ERROR (cuMemAllocHost (&host_ptr, size));
1656
+ retErr = PI_CHECK_ERROR (cuMemHostGetDevicePointer (&ptr, host_ptr, 0 ));
1657
+ allocMode = _pi_mem::mem_::buffer_mem_::alloc_mode::alloc_host_ptr;
1654
1658
} else {
1655
1659
retErr = PI_CHECK_ERROR (cuMemAlloc (&ptr, size));
1660
+ if (flags & PI_MEM_FLAGS_HOST_PTR_COPY) {
1661
+ allocMode = _pi_mem::mem_::buffer_mem_::alloc_mode::copy_in;
1662
+ }
1656
1663
}
1657
1664
1658
1665
if (retErr == PI_SUCCESS) {
@@ -1715,13 +1722,16 @@ pi_result cuda_piMemRelease(pi_mem memObj) {
1715
1722
1716
1723
if (memObj->mem_type_ == _pi_mem::mem_type::buffer) {
1717
1724
switch (uniqueMemObj->mem_ .buffer_mem_ .allocMode_ ) {
1725
+ case _pi_mem::mem_::buffer_mem_::alloc_mode::copy_in:
1718
1726
case _pi_mem::mem_::buffer_mem_::alloc_mode::classic:
1719
1727
ret = PI_CHECK_ERROR (cuMemFree (uniqueMemObj->mem_ .buffer_mem_ .ptr_ ));
1720
1728
break ;
1721
1729
case _pi_mem::mem_::buffer_mem_::alloc_mode::use_host_ptr:
1722
1730
ret = PI_CHECK_ERROR (
1723
1731
cuMemHostUnregister (uniqueMemObj->mem_ .buffer_mem_ .hostPtr_ ));
1724
1732
break ;
1733
+ case _pi_mem::mem_::buffer_mem_::alloc_mode::alloc_host_ptr:
1734
+ ret = PI_CHECK_ERROR (cuMemFreeHost (uniqueMemObj->mem_ .buffer_mem_ .hostPtr_ ));
1725
1735
};
1726
1736
} else if (memObj->mem_type_ == _pi_mem::mem_type::surface) {
1727
1737
ret = PI_CHECK_ERROR (
0 commit comments