diff --git a/tensorrt_llm/_torch/model_config.py b/tensorrt_llm/_torch/model_config.py index 232d2ccecd6..2ab19e1e787 100644 --- a/tensorrt_llm/_torch/model_config.py +++ b/tensorrt_llm/_torch/model_config.py @@ -344,7 +344,8 @@ def get_bindings_model_config(self, architectures = self.pretrained_config.architectures if len(architectures ) == 1 and architectures[0] == "DeciLMForCausalLM": - mlp_hidden_size = self._infer_nemotron_ffn_mult() + mlp_hidden_size = self._infer_nemotron_ffn_mult( + ) // self.mapping.tp_size else: raise ValueError( f"Inferring mlp hidden size for model architecture: {architectures} isn't supported yet"