diff --git a/src/runtime/cuda/cuda_device_api.cc b/src/runtime/cuda/cuda_device_api.cc
index 1c80397125e4..ae63f9a4b32f 100644
--- a/src/runtime/cuda/cuda_device_api.cc
+++ b/src/runtime/cuda/cuda_device_api.cc
@@ -142,6 +142,24 @@ class CUDADeviceAPI final : public DeviceAPI {
   }
 
   void FreeDataSpace(Device dev, void* ptr) final {
+    if (std::uncaught_exceptions() && cudaPeekAtLastError() == cudaErrorIllegalAddress) {
+      // For most CUDA calls, an error from an API call will be
+      // immediately reported, and raised as an exception.  However,
+      // errors raised from async kernel execution leave the CUDA
+      // driver in an inconsistent state.  These errors are "sticky",
+      // and are never cleared. (See [0] for more details.)
+      //
+      // If we are currently unwinding the stack due to a thrown
+      // exception, and the CUDA driver is in an unrecoverable error,
+      // do not attempt to free the CUDA allocations.  Performing any
+      // CUDA API call while in this state will throw an additional
+      // exception, causing a segfault.  In this case, it is better to
+      // allow the original error to continue propagating.
+      //
+      // [0] https://forums.developer.nvidia.com/t/cuda-errors-determine-sticky-ness/271625
+      return;
+    }
+
     if (dev.device_type == kDLCUDAHost) {
       VLOG(1) << "freeing host memory";
       CUDA_CALL(cudaFreeHost(ptr));