Skip to content

Commit 8d4fb9f

Browse files
smarterclaytonmgoin
authored andcommitted
[Bugfix] DeepGEMM is not enabled on B200 due to _lazy_init() (vllm-project#21472)
Signed-off-by: Clayton Coleman <[email protected]> Co-authored-by: mgoin <[email protected]> Signed-off-by: Noam Gat <[email protected]>
1 parent fba6cc8 commit 8d4fb9f

File tree

1 file changed

+8
-4
lines changed

1 file changed

+8
-4
lines changed

vllm/utils/deep_gemm.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,24 @@
1313
import torch
1414

1515
import vllm.envs as envs
16-
from vllm.utils import cuda_get_device_properties, has_deep_gemm
16+
from vllm.platforms import current_platform
17+
from vllm.utils import has_deep_gemm
1718

1819

1920
@functools.cache
2021
def is_blackwell_deep_gemm_used() -> bool:
2122
"""Return ``True`` if vLLM is configured to use DeepGEMM on a
2223
Blackwell-class GPU.
2324
"""
25+
if not (envs.VLLM_USE_DEEP_GEMM and has_deep_gemm()):
26+
return False
2427

25-
if not (envs.VLLM_USE_DEEP_GEMM and has_deep_gemm()
26-
and _per_block_cast_impl is not None):
28+
_lazy_init()
29+
if _per_block_cast_impl is None:
2730
return False
2831

29-
return cuda_get_device_properties(0, ("major", ))[0] == 10
32+
return (current_platform.is_cuda()
33+
and current_platform.is_device_capability(100))
3034

3135

3236
def _missing(*_: Any, **__: Any) -> NoReturn:

0 commit comments

Comments
 (0)