From 56eaa9ef937bf4aa8d1eaaf0b0d3035f4860082f Mon Sep 17 00:00:00 2001 From: youkaichao Date: Sun, 9 Feb 2025 16:47:32 +0800 Subject: [PATCH 1/6] add error handling Signed-off-by: youkaichao --- csrc/cumem_allocator.cpp | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/csrc/cumem_allocator.cpp b/csrc/cumem_allocator.cpp index e8555d853b7..c74472fb3f6 100644 --- a/csrc/cumem_allocator.cpp +++ b/csrc/cumem_allocator.cpp @@ -12,15 +12,20 @@ extern "C" { #include #include -#define CUDA_CHECK(condition) \ - do { \ - CUresult error = condition; \ - if (error != 0) { \ - char* error_string; \ - cuGetErrorString(error, (const char**)&error_string); \ - std::cerr << "CUDA Error: " << error_string << " at " << __FILE__ << ":" \ - << __LINE__ << std::endl; \ - } \ +char error_msg[10240]; // 10KB buffer to store error messages +CUresult error_code = 0; // store error code + +#define CUDA_CHECK(condition) \ + do { \ + CUresult error = condition; \ + if (error != 0) { \ + error_code = error; \ + char* error_string; \ + cuGetErrorString(error, (const char**)&error_string); \ + snprintf(error_msg, sizeof(error_msg), "CUDA Error: %s at %s:%d", \ + error_string, __FILE__, __LINE__); \ + std::cerr << error_msg << std::endl; \ + } \ } while (0) // Global references to Python callables @@ -258,6 +263,12 @@ static PyObject* python_unmap_and_release(PyObject* self, PyObject* args) { unmap_and_release(recv_device, recv_size, d_mem_ptr, p_memHandle); + if (error_code != 0) { + error_code = 0; + PyErr_SetString(PyExc_RuntimeError, error_msg); + return nullptr; + } + Py_RETURN_NONE; } @@ -282,6 +293,12 @@ static PyObject* python_create_and_map(PyObject* self, PyObject* args) { create_and_map(recv_device, recv_size, d_mem_ptr, p_memHandle); + if (error_code != 0) { + error_code = 0; + PyErr_SetString(PyExc_RuntimeError, error_msg); + return nullptr; + } + Py_RETURN_NONE; } From b3f2341bf32e63e52d5d741713b6c2cff328ca58 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Sun, 9 Feb 2025 16:57:20 +0800 Subject: [PATCH 2/6] add tests Signed-off-by: youkaichao --- tests/basic_correctness/test_cumem.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/basic_correctness/test_cumem.py b/tests/basic_correctness/test_cumem.py index da9239b0940..4e9f1bf1cf8 100644 --- a/tests/basic_correctness/test_cumem.py +++ b/tests/basic_correctness/test_cumem.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 +import pytest import torch from vllm import LLM, SamplingParams @@ -9,6 +10,32 @@ from ..utils import fork_new_process_for_each_test +@fork_new_process_for_each_test +def test_python_error(): + """ + Test if Python error occurs when there's low-level + error happening from the C++ side. + """ + allocator = CuMemAllocator.get_instance() + total_bytes = torch.cuda.mem_get_info()[1] + alloc_bytes = int(total_bytes * 0.7) + tensors = [] + with allocator.use_memory_pool(): + # allocate 70% of the total memory + x = torch.empty(alloc_bytes, dtype=torch.uint8, device='cuda') + tensors.append(x) + # release the memory + allocator.sleep() + + # allocate more memory than the total memory + y = torch.empty(alloc_bytes, dtype=torch.uint8, device='cuda') + tensors.append(y) + with pytest.raises(RuntimeError): + # when the allocator is woken up, it should raise an error + # because we don't have enough memory + allocator.wake_up() + + @fork_new_process_for_each_test def test_basic_cumem(): # some tensors from default memory pool From de3b6ba269a6cd815f0dcb7b3aa7188783e2caf7 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Sun, 9 Feb 2025 17:06:19 +0800 Subject: [PATCH 3/6] fix assignment Signed-off-by: youkaichao --- csrc/cumem_allocator.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/csrc/cumem_allocator.cpp b/csrc/cumem_allocator.cpp index c74472fb3f6..a9aaeb1ef82 100644 --- a/csrc/cumem_allocator.cpp +++ b/csrc/cumem_allocator.cpp @@ -12,8 +12,9 @@ extern "C" { #include #include -char error_msg[10240]; // 10KB buffer to store error messages -CUresult error_code = 0; // store error code +char error_msg[10240]; // 10KB buffer to store error messages +CUresult no_error = CUresult(0); +CUresult error_code = no_error; // store error code #define CUDA_CHECK(condition) \ do { \ @@ -264,7 +265,7 @@ static PyObject* python_unmap_and_release(PyObject* self, PyObject* args) { unmap_and_release(recv_device, recv_size, d_mem_ptr, p_memHandle); if (error_code != 0) { - error_code = 0; + error_code = no_error; PyErr_SetString(PyExc_RuntimeError, error_msg); return nullptr; } @@ -294,7 +295,7 @@ static PyObject* python_create_and_map(PyObject* self, PyObject* args) { create_and_map(recv_device, recv_size, d_mem_ptr, p_memHandle); if (error_code != 0) { - error_code = 0; + error_code = no_error; PyErr_SetString(PyExc_RuntimeError, error_msg); return nullptr; } From 585c98021bb0e9fdf45893f68676d4ea9e8ada97 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Sun, 9 Feb 2025 17:25:08 +0800 Subject: [PATCH 4/6] early stop Signed-off-by: youkaichao --- csrc/cumem_allocator.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/csrc/cumem_allocator.cpp b/csrc/cumem_allocator.cpp index a9aaeb1ef82..3ac9fceb28a 100644 --- a/csrc/cumem_allocator.cpp +++ b/csrc/cumem_allocator.cpp @@ -26,6 +26,7 @@ CUresult error_code = no_error; // store error code snprintf(error_msg, sizeof(error_msg), "CUDA Error: %s at %s:%d", \ error_string, __FILE__, __LINE__); \ std::cerr << error_msg << std::endl; \ + return; \ } \ } while (0) From 23fb99c016279178b60d66f8692b8f05543c87bf Mon Sep 17 00:00:00 2001 From: youkaichao Date: Sun, 9 Feb 2025 17:35:14 +0800 Subject: [PATCH 5/6] fix return type Signed-off-by: youkaichao --- csrc/cumem_allocator.cpp | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/csrc/cumem_allocator.cpp b/csrc/cumem_allocator.cpp index 3ac9fceb28a..f739ed92138 100644 --- a/csrc/cumem_allocator.cpp +++ b/csrc/cumem_allocator.cpp @@ -26,7 +26,6 @@ CUresult error_code = no_error; // store error code snprintf(error_msg, sizeof(error_msg), "CUDA Error: %s at %s:%d", \ error_string, __FILE__, __LINE__); \ std::cerr << error_msg << std::endl; \ - return; \ } \ } while (0) @@ -61,14 +60,22 @@ void create_and_map(unsigned long long device, ssize_t size, CUdeviceptr d_mem, // Allocate memory using cuMemCreate CUDA_CHECK(cuMemCreate(p_memHandle, size, &prop, 0)); + if (error_code != 0) { + return; + } CUDA_CHECK(cuMemMap(d_mem, size, 0, *p_memHandle, 0)); - + if (error_code != 0) { + return; + } CUmemAccessDesc accessDesc = {}; accessDesc.location.type = CU_MEM_LOCATION_TYPE_DEVICE; accessDesc.location.id = device; accessDesc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE; CUDA_CHECK(cuMemSetAccess(d_mem, size, &accessDesc, 1)); + if (error_code != 0) { + return; + } // std::cout << "create_and_map: device=" << device << ", size=" << size << ", // d_mem=" << d_mem << ", p_memHandle=" << p_memHandle << std::endl; } @@ -80,7 +87,13 @@ void unmap_and_release(unsigned long long device, ssize_t size, // ", d_mem=" << d_mem << ", p_memHandle=" << p_memHandle << std::endl; ensure_context(device); CUDA_CHECK(cuMemUnmap(d_mem, size)); + if (error_code != 0) { + return; + } CUDA_CHECK(cuMemRelease(*p_memHandle)); + if (error_code != 0) { + return; + } } PyObject* create_tuple_from_c_integers(unsigned long long a, @@ -128,12 +141,16 @@ void* my_malloc(ssize_t size, int device, CUstream stream) { size_t granularity; CUDA_CHECK(cuMemGetAllocationGranularity(&granularity, &prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM)); - + if (error_code != 0) { + return nullptr; + } size_t alignedSize = ((size + granularity - 1) / granularity) * granularity; CUdeviceptr d_mem; CUDA_CHECK(cuMemAddressReserve(&d_mem, alignedSize, 0, 0, 0)); - + if (error_code != 0) { + return nullptr; + } // allocate the CUmemGenericAllocationHandle CUmemGenericAllocationHandle* p_memHandle = (CUmemGenericAllocationHandle*)malloc( @@ -215,6 +232,9 @@ void my_free(void* ptr, ssize_t size, int device, CUstream stream) { // free address and the handle CUDA_CHECK(cuMemAddressFree(d_mem, size)); + if (error_code != 0) { + return nullptr; + } free(p_memHandle); } From 3038c19631add1a976c87bb7f6e3d39176254aa9 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Sun, 9 Feb 2025 17:40:04 +0800 Subject: [PATCH 6/6] fix return type Signed-off-by: youkaichao --- csrc/cumem_allocator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/cumem_allocator.cpp b/csrc/cumem_allocator.cpp index f739ed92138..fab6ca36d42 100644 --- a/csrc/cumem_allocator.cpp +++ b/csrc/cumem_allocator.cpp @@ -233,7 +233,7 @@ void my_free(void* ptr, ssize_t size, int device, CUstream stream) { // free address and the handle CUDA_CHECK(cuMemAddressFree(d_mem, size)); if (error_code != 0) { - return nullptr; + return; } free(p_memHandle); }