[None][fix] Fix ModelConfig.from_pretrained get quant config file (#8647)

yuantailing · web-flow · commit 858d6437c1dd · 2025-10-27T11:02:24.000+08:00
Signed-off-by: Tailing Yuan &lt;yuantailing@gmail.com&gt;
diff --git a/tensorrt_llm/_torch/model_config.py b/tensorrt_llm/_torch/model_config.py
@@ -268,7 +268,8 @@ def is_generation_model(model_architectures: Optional[List[str]],
         # once ModelType is used in pytorch flow.
 
     @staticmethod
-    def load_modelopt_quant_config(quant_config_file, model_dir, moe_backend):
+    def load_modelopt_quant_config(quant_config_file, checkpoint_dir,
+                                   moe_backend):
         quant_config = QuantConfig()
         layer_quant_config = None
 
@@ -288,7 +289,8 @@ def load_modelopt_quant_config(quant_config_file, model_dir, moe_backend):
             'exclude_modules', None)
 
         if quant_config.quant_algo == QuantAlgo.MIXED_PRECISION:
-            mixed_quant_config_file = model_dir / 'quant_cfg.json'
+            mixed_quant_config_file = transformers.utils.hub.cached_file(
+                checkpoint_dir, 'quant_cfg.json')
             with open(mixed_quant_config_file) as fm:
                 mixed_quant_configs = json.load(fm)
                 # kv_cache_quant_algo is global regardless of MIXED_PRECISION
@@ -475,31 +477,34 @@ def from_pretrained(cls,
                         checkpoint_dir,
                         trust_remote_code=trust_remote_code,
                     )
-
-                # Find the cache path by looking for the config.json file which should be in all
-                # huggingface models
-                model_dir = Path(
-                    transformers.utils.hub.cached_file(checkpoint_dir,
-                                                       'config.json')).parent
             else:
                 raise ValueError(
                     "checkpoint_dir is None. Cannot load model config without a valid checkpoint directory."
                 )
 
+        # Get cached file from path or repo id, return None if not exists.
+        def cached_file(path_or_repo_id, file_name):
+            try:
+                return transformers.utils.hub.cached_file(
+                    path_or_repo_id, file_name)
+            except OSError:
+                return None
+
         quant_config = QuantConfig()
         layer_quant_config = None
         moe_backend = kwargs.get('moe_backend', 'CUTLASS')
 
         # quantized ckpt in modelopt format
-        if (quant_config_file := model_dir / 'hf_quant_config.json').exists():
+        if quant_config_file := cached_file(checkpoint_dir,
+                                            'hf_quant_config.json'):
             quant_config, layer_quant_config = cls.load_modelopt_quant_config(
-                quant_config_file, model_dir, moe_backend)
+                quant_config_file, checkpoint_dir, moe_backend)
         # quantized ckpt in other formats
         elif hasattr(pretrained_config, "quantization_config"):
             hf_quant_config = pretrained_config.quantization_config
             quant_config, layer_quant_config = cls.load_hf_quant_config(
                 hf_quant_config, moe_backend)
-        elif (quant_config_file := model_dir / 'dtypes.json').exists():
+        elif quant_config_file := cached_file(checkpoint_dir, 'dtypes.json'):
             quant_config, layer_quant_config = cls.load_quant_config_from_dtypes_json(
                 quant_config_file, moe_backend)