@@ -12,15 +12,21 @@ extern "C" {
1212#include < cuda_runtime_api.h>
1313#include < cuda.h>
1414
15- #define CUDA_CHECK (condition ) \
16- do { \
17- CUresult error = condition; \
18- if (error != 0 ) { \
19- char * error_string; \
20- cuGetErrorString (error, (const char **)&error_string); \
21- std::cerr << " CUDA Error: " << error_string << " at " << __FILE__ << " :" \
22- << __LINE__ << std::endl; \
23- } \
15+ char error_msg[10240 ]; // 10KB buffer to store error messages
16+ CUresult no_error = CUresult(0 );
17+ CUresult error_code = no_error; // store error code
18+
19+ #define CUDA_CHECK (condition ) \
20+ do { \
21+ CUresult error = condition; \
22+ if (error != 0 ) { \
23+ error_code = error; \
24+ char * error_string; \
25+ cuGetErrorString (error, (const char **)&error_string); \
26+ snprintf (error_msg, sizeof (error_msg), " CUDA Error: %s at %s:%d" , \
27+ error_string, __FILE__, __LINE__); \
28+ std::cerr << error_msg << std::endl; \
29+ } \
2430 } while (0 )
2531
2632// Global references to Python callables
@@ -54,14 +60,22 @@ void create_and_map(unsigned long long device, ssize_t size, CUdeviceptr d_mem,
5460
5561 // Allocate memory using cuMemCreate
5662 CUDA_CHECK (cuMemCreate (p_memHandle, size, &prop, 0 ));
63+ if (error_code != 0 ) {
64+ return ;
65+ }
5766 CUDA_CHECK (cuMemMap (d_mem, size, 0 , *p_memHandle, 0 ));
58-
67+ if (error_code != 0 ) {
68+ return ;
69+ }
5970 CUmemAccessDesc accessDesc = {};
6071 accessDesc.location .type = CU_MEM_LOCATION_TYPE_DEVICE;
6172 accessDesc.location .id = device;
6273 accessDesc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
6374
6475 CUDA_CHECK (cuMemSetAccess (d_mem, size, &accessDesc, 1 ));
76+ if (error_code != 0 ) {
77+ return ;
78+ }
6579 // std::cout << "create_and_map: device=" << device << ", size=" << size << ",
6680 // d_mem=" << d_mem << ", p_memHandle=" << p_memHandle << std::endl;
6781}
@@ -73,7 +87,13 @@ void unmap_and_release(unsigned long long device, ssize_t size,
7387 // ", d_mem=" << d_mem << ", p_memHandle=" << p_memHandle << std::endl;
7488 ensure_context (device);
7589 CUDA_CHECK (cuMemUnmap (d_mem, size));
90+ if (error_code != 0 ) {
91+ return ;
92+ }
7693 CUDA_CHECK (cuMemRelease (*p_memHandle));
94+ if (error_code != 0 ) {
95+ return ;
96+ }
7797}
7898
7999PyObject* create_tuple_from_c_integers (unsigned long long a,
@@ -121,12 +141,16 @@ void* my_malloc(ssize_t size, int device, CUstream stream) {
121141 size_t granularity;
122142 CUDA_CHECK (cuMemGetAllocationGranularity (&granularity, &prop,
123143 CU_MEM_ALLOC_GRANULARITY_MINIMUM));
124-
144+ if (error_code != 0 ) {
145+ return nullptr ;
146+ }
125147 size_t alignedSize = ((size + granularity - 1 ) / granularity) * granularity;
126148
127149 CUdeviceptr d_mem;
128150 CUDA_CHECK (cuMemAddressReserve (&d_mem, alignedSize, 0 , 0 , 0 ));
129-
151+ if (error_code != 0 ) {
152+ return nullptr ;
153+ }
130154 // allocate the CUmemGenericAllocationHandle
131155 CUmemGenericAllocationHandle* p_memHandle =
132156 (CUmemGenericAllocationHandle*)malloc (
@@ -208,6 +232,9 @@ void my_free(void* ptr, ssize_t size, int device, CUstream stream) {
208232
209233 // free address and the handle
210234 CUDA_CHECK (cuMemAddressFree (d_mem, size));
235+ if (error_code != 0 ) {
236+ return ;
237+ }
211238 free (p_memHandle);
212239}
213240
@@ -258,6 +285,12 @@ static PyObject* python_unmap_and_release(PyObject* self, PyObject* args) {
258285
259286 unmap_and_release (recv_device, recv_size, d_mem_ptr, p_memHandle);
260287
288+ if (error_code != 0 ) {
289+ error_code = no_error;
290+ PyErr_SetString (PyExc_RuntimeError, error_msg);
291+ return nullptr ;
292+ }
293+
261294 Py_RETURN_NONE;
262295}
263296
@@ -282,6 +315,12 @@ static PyObject* python_create_and_map(PyObject* self, PyObject* args) {
282315
283316 create_and_map (recv_device, recv_size, d_mem_ptr, p_memHandle);
284317
318+ if (error_code != 0 ) {
319+ error_code = no_error;
320+ PyErr_SetString (PyExc_RuntimeError, error_msg);
321+ return nullptr ;
322+ }
323+
285324 Py_RETURN_NONE;
286325}
287326
0 commit comments