[Bugfix] DeepGEMM is not enabled on B200 due to _lazy_init() (vllm-project#21472)

smarterclayton · mgoin · noamgat · commit 8d4fb9f7ddf2 · 2025-08-09T10:39:20.000+03:00
Signed-off-by: Clayton Coleman &lt;smarterclayton@gmail.com&gt;
Co-authored-by: mgoin &lt;mgoin64@gmail.com&gt;
Signed-off-by: Noam Gat &lt;noamgat@gmail.com&gt;
diff --git a/vllm/utils/deep_gemm.py b/vllm/utils/deep_gemm.py
@@ -13,20 +13,24 @@
 import torch
 
 import vllm.envs as envs
-from vllm.utils import cuda_get_device_properties, has_deep_gemm
+from vllm.platforms import current_platform
+from vllm.utils import has_deep_gemm
 
 
 @functools.cache
 def is_blackwell_deep_gemm_used() -> bool:
     """Return ``True`` if vLLM is configured to use DeepGEMM on a
     Blackwell-class GPU.
     """
+    if not (envs.VLLM_USE_DEEP_GEMM and has_deep_gemm()):
+        return False
 
-    if not (envs.VLLM_USE_DEEP_GEMM and has_deep_gemm()
-            and _per_block_cast_impl is not None):
+    _lazy_init()
+    if _per_block_cast_impl is None:
         return False
 
-    return cuda_get_device_properties(0, ("major", ))[0] == 10
+    return (current_platform.is_cuda()
+            and current_platform.is_device_capability(100))
 
 
 def _missing(*_: Any, **__: Any) -> NoReturn: