File tree Expand file tree Collapse file tree 1 file changed +8
-1
lines changed Expand file tree Collapse file tree 1 file changed +8
-1
lines changed Original file line number Diff line number Diff line change 2929from vllm .model_executor .layers .quantization import (QUANTIZATION_METHODS ,
3030 get_quantization_config )
3131from vllm .model_executor .models import ModelRegistry
32- from vllm .platforms import CpuArchEnum
32+ from vllm .platforms import CpuArchEnum , current_platform
3333from vllm .sampling_params import GuidedDecodingParams
3434from vllm .tracing import is_otel_available , otel_import_error_traceback
3535from vllm .transformers_utils .config import (
@@ -684,6 +684,13 @@ def _verify_cuda_graph(self) -> None:
684684 self .max_seq_len_to_capture = self .max_model_len
685685 self .max_seq_len_to_capture = min (self .max_seq_len_to_capture ,
686686 self .max_model_len )
687+ ROCM_UNSUPPORTED_MODELS = ['mllama' ]
688+ if (self .hf_config .model_type in ROCM_UNSUPPORTED_MODELS
689+ and not self .enforce_eager and current_platform .is_rocm ()):
690+ logger .warning (
691+ "CUDA graph is not supported for %s on ROCm yet, fallback "
692+ "to the eager mode." , self .hf_config .model_type )
693+ self .enforce_eager = True
687694
688695 def _verify_bnb_config (self ) -> None :
689696 """
You can’t perform that action at this time.
0 commit comments