init

Superjomn · Superjomn · commit 840b788a5761 · 2025-07-28T10:48:19.000+08:00
Signed-off-by: Superjomn &lt;328693+Superjomn@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/llmapi/llm_args.py b/tensorrt_llm/llmapi/llm_args.py
@@ -61,7 +61,16 @@
 # TODO[chunweiy]: move the following symbols back to utils scope, and remove the following import
 
 
-class CudaGraphConfig(BaseModel):
+class StrictBaseModel(BaseModel):
+    """
+    A base model that forbids arbitrary fields.
+    """
+
+    class Config:
+        extra = "forbid"  # globally forbid arbitrary fields
+
+
+class CudaGraphConfig(StrictBaseModel):
     """
     Configuration for CUDA graphs.
     """
@@ -88,8 +97,40 @@ def validate_cuda_graph_max_batch_size(cls, v):
                 "cuda_graph_config.max_batch_size must be non-negative")
         return v
 
+    @staticmethod
+    def _generate_cuda_graph_batch_sizes(max_batch_size: int,
+                                         enable_padding: bool) -> List[int]:
+        """Generate a list of batch sizes for CUDA graphs.
+
+        Args:
+            max_batch_size: Maximum batch size to generate up to
+            enable_padding: Whether padding is enabled, which affects the batch size distribution
+
+        Returns:
+            List of batch sizes to create CUDA graphs for
+        """
+        if enable_padding:
+            batch_sizes = [1, 2, 4] + [i * 8 for i in range(1, 17)]
+        else:
+            batch_sizes = list(range(1, 32)) + [32, 64, 128]
+
+        # Add powers of 2 up to max_batch_size
+        batch_sizes += [
+            2**i for i in range(8, math.floor(math.log(max_batch_size, 2)))
+        ]
+
+        # Filter and sort batch sizes
+        batch_sizes = sorted(
+            [size for size in batch_sizes if size <= max_batch_size])
+
+        # Add max_batch_size if not already included
+        if max_batch_size != batch_sizes[-1]:
+            batch_sizes.append(max_batch_size)
+
+        return batch_sizes
+
 
-class MoeConfig(BaseModel):
+class MoeConfig(StrictBaseModel):
     """
     Configuration for MoE.
     """
@@ -194,7 +235,7 @@ def to_mapping(self) -> Mapping:
                        auto_parallel=self.auto_parallel)
 
 
-class CalibConfig(BaseModel):
+class CalibConfig(StrictBaseModel):
     """
     Calibration configuration.
     """
@@ -246,7 +287,7 @@ class _ModelFormatKind(Enum):
     TLLM_ENGINE = 2
 
 
-class DecodingBaseConfig(BaseModel):
+class DecodingBaseConfig(StrictBaseModel):
     max_draft_len: Optional[int] = None
     speculative_model_dir: Optional[Union[str, Path]] = None
 
@@ -267,6 +308,7 @@ def from_dict(cls, data: dict):
         config_class = config_classes.get(decoding_type)
         if config_class is None:
             raise ValueError(f"Invalid decoding type: {decoding_type}")
+        data.pop("decoding_type")
 
         return config_class(**data)
 
@@ -465,7 +507,7 @@ def mirror_pybind_fields(pybind_class):
         """
 
         def decorator(cls):
-            assert issubclass(cls, BaseModel)
+            assert issubclass(cls, StrictBaseModel)
             # Get all non-private fields from the C++ class
             cpp_fields = PybindMirror.get_pybind_variable_fields(pybind_class)
             python_fields = set(cls.model_fields.keys())
@@ -566,7 +608,7 @@ def _to_pybind(self):
 
 
 @PybindMirror.mirror_pybind_fields(_DynamicBatchConfig)
-class DynamicBatchConfig(BaseModel, PybindMirror):
+class DynamicBatchConfig(StrictBaseModel, PybindMirror):
     """Dynamic batch configuration.
 
     Controls how batch size and token limits are dynamically adjusted at runtime.
@@ -592,7 +634,7 @@ def _to_pybind(self):
 
 
 @PybindMirror.mirror_pybind_fields(_SchedulerConfig)
-class SchedulerConfig(BaseModel, PybindMirror):
+class SchedulerConfig(StrictBaseModel, PybindMirror):
     capacity_scheduler_policy: CapacitySchedulerPolicy = Field(
         default=CapacitySchedulerPolicy.GUARANTEED_NO_EVICT,
         description="The capacity scheduler policy to use")
@@ -614,7 +656,7 @@ def _to_pybind(self):
 
 
 @PybindMirror.mirror_pybind_fields(_PeftCacheConfig)
-class PeftCacheConfig(BaseModel, PybindMirror):
+class PeftCacheConfig(StrictBaseModel, PybindMirror):
     """
     Configuration for the PEFT cache.
     """
@@ -742,7 +784,7 @@ def supports_backend(self, backend: str) -> bool:
 
 
 @PybindMirror.mirror_pybind_fields(_KvCacheConfig)
-class KvCacheConfig(BaseModel, PybindMirror):
+class KvCacheConfig(StrictBaseModel, PybindMirror):
     """
     Configuration for the KV cache.
     """
@@ -825,7 +867,7 @@ def _to_pybind(self):
 
 
 @PybindMirror.mirror_pybind_fields(_ExtendedRuntimePerfKnobConfig)
-class ExtendedRuntimePerfKnobConfig(BaseModel, PybindMirror):
+class ExtendedRuntimePerfKnobConfig(StrictBaseModel, PybindMirror):
     """
     Configuration for extended runtime performance knobs.
     """
@@ -856,7 +898,7 @@ def _to_pybind(self):
 
 
 @PybindMirror.mirror_pybind_fields(_CacheTransceiverConfig)
-class CacheTransceiverConfig(BaseModel, PybindMirror):
+class CacheTransceiverConfig(StrictBaseModel, PybindMirror):
     """
     Configuration for the cache transceiver.
     """
@@ -916,7 +958,7 @@ def model_name(self) -> Union[str, Path]:
         return self.model if isinstance(self.model, str) else None
 
 
-class BaseLlmArgs(BaseModel):
+class BaseLlmArgs(StrictBaseModel):
     """
     Base class for both TorchLlmArgs and TrtLlmArgs. It contains all the arguments that are common to both.
     """
@@ -1299,7 +1341,8 @@ def init_build_config(self):
         """
         Creating a default BuildConfig if none is provided
         """
-        if self.build_config is None:
+        build_config = getattr(self, "build_config", None)
+        if build_config is None:
             kwargs = {}
             if self.max_batch_size:
                 kwargs["max_batch_size"] = self.max_batch_size
@@ -1312,10 +1355,10 @@ def init_build_config(self):
             if self.max_input_len:
                 kwargs["max_input_len"] = self.max_input_len
             self.build_config = BuildConfig(**kwargs)
-
-        assert isinstance(
-            self.build_config, BuildConfig
-        ), f"build_config is not initialized: {self.build_config}"
+        else:
+            assert isinstance(
+                build_config,
+                BuildConfig), f"build_config is not initialized: {build_config}"
         return self
 
     @model_validator(mode="after")
@@ -1752,7 +1795,7 @@ class LoadFormat(Enum):
     DUMMY = 1
 
 
-class TorchCompileConfig(BaseModel):
+class TorchCompileConfig(StrictBaseModel):
     """
     Configuration for torch.compile.
     """
@@ -1966,38 +2009,6 @@ def validate_checkpoint_format(self):
 
         return self
 
-    @staticmethod
-    def _generate_cuda_graph_batch_sizes(max_batch_size: int,
-                                         enable_padding: bool) -> List[int]:
-        """Generate a list of batch sizes for CUDA graphs.
-
-        Args:
-            max_batch_size: Maximum batch size to generate up to
-            enable_padding: Whether padding is enabled, which affects the batch size distribution
-
-        Returns:
-            List of batch sizes to create CUDA graphs for
-        """
-        if enable_padding:
-            batch_sizes = [1, 2, 4] + [i * 8 for i in range(1, 17)]
-        else:
-            batch_sizes = list(range(1, 32)) + [32, 64, 128]
-
-        # Add powers of 2 up to max_batch_size
-        batch_sizes += [
-            2**i for i in range(8, math.floor(math.log(max_batch_size, 2)))
-        ]
-
-        # Filter and sort batch sizes
-        batch_sizes = sorted(
-            [size for size in batch_sizes if size <= max_batch_size])
-
-        # Add max_batch_size if not already included
-        if max_batch_size != batch_sizes[-1]:
-            batch_sizes.append(max_batch_size)
-
-        return batch_sizes
-
     @model_validator(mode="after")
     def validate_load_balancer(self) -> 'TorchLlmArgs':
         from .._torch import MoeLoadBalancerConfig
@@ -2034,7 +2045,7 @@ def validate_cuda_graph_config(self) -> 'TorchLlmArgs':
         if config.batch_sizes:
             config.batch_sizes = sorted(config.batch_sizes)
             if config.max_batch_size != 0:
-                if config.batch_sizes != self._generate_cuda_graph_batch_sizes(
+                if config.batch_sizes != CudaGraphConfig._generate_cuda_graph_batch_sizes(
                         config.max_batch_size, config.enable_padding):
                     raise ValueError(
                         "Please don't set both cuda_graph_config.batch_sizes "
@@ -2046,7 +2057,7 @@ def validate_cuda_graph_config(self) -> 'TorchLlmArgs':
                 config.max_batch_size = max(config.batch_sizes)
         else:
             max_batch_size = config.max_batch_size or 128
-            generated_sizes = self._generate_cuda_graph_batch_sizes(
+            generated_sizes = CudaGraphConfig._generate_cuda_graph_batch_sizes(
                 max_batch_size, config.enable_padding)
             config.batch_sizes = generated_sizes
             config.max_batch_size = max_batch_size
diff --git a/tests/unittest/_torch/test_beam_search.py b/tests/unittest/_torch/test_beam_search.py
@@ -63,7 +63,7 @@ def llm_cuda_graph(fixed_params, input_prompts):
         enable_trtllm_sampler=True,
         max_beam_width=fixed_params["max_beam_width"],
         disable_overlap_scheduler=False,
-        cuda_graph_config=CudaGraphConfig(enabled=True),
+        cuda_graph_config=CudaGraphConfig(),
     )
 
 
diff --git a/tests/unittest/llmapi/test_llm_args.py b/tests/unittest/llmapi/test_llm_args.py

Original file line number	Diff line number	Diff line change
`@@ -63,7 +63,7 @@ def llm_cuda_graph(fixed_params, input_prompts):`
`63`	`63`	`enable_trtllm_sampler=True,`
`64`	`64`	`max_beam_width=fixed_params["max_beam_width"],`
`65`	`65`	`disable_overlap_scheduler=False,`
`66`		`- cuda_graph_config=CudaGraphConfig(enabled=True),`
	`66`	`+ cuda_graph_config=CudaGraphConfig(),`
`67`	`67`	`)`
`68`	`68`
`69`	`69`