NVIDIA · yuxianq · May 21, 2025 · May 20, 2025 · May 21, 2025
@@ -128,7 +128,7 @@ trtllm-build --checkpoint_dir ./tllm_checkpoint_1gpu_fp16_wq \
             --output_dir ./tmp/llama/7B/trt_engines/weight_only/1-gpu/ \
             --gemm_plugin auto
 
-# Build LLaMA 7B using 2-way auto parallelism.
+# Build LLaMA 7B using 2-way auto parallelism (deprecated).
 python convert_checkpoint.py --model_dir ./tmp/llama/7B/ \
                             --output_dir ./tllm_checkpoint_1gpu_fp16 \
                             --dtype float16

@@ -149,6 +149,9 @@ def check_dtype(tensor):
 
 
 def auto_parallel(network: Network, config: AutoParallelConfig):
+    logger.warning(
+        "auto_parallel is deprecated, "
+        "please use explicit parallelism like tp_size/pp_size instead.")
     debug_mode = config.debug_mode
     memory_budget = config.get_cluster_info(
     ).memory_budget_per_device * 1024 * 1024 * 1024

@@ -771,11 +771,19 @@ class LlmArgs(BaseModel):
     cp_config: Optional[dict] = Field(default_factory=dict,
                                       description="Context parallel config.")
 
-    auto_parallel: bool = Field(default=False,
-                                description="Enable auto parallel mode.")
+    auto_parallel: bool = Field(
+        default=False,
+        description="Enable auto parallel mode.",
+        deprecated=
+        "Use tensor_parallel_size/pipeline_parallel_size/xxx_parallel_size instead.",
+    )
 
     auto_parallel_world_size: Optional[int] = Field(
-        default=None, description="The world size for auto parallel mode.")
+        default=None,
+        description="The world size for auto parallel mode.",
+        deprecated=
+        "Use tensor_parallel_size/pipeline_parallel_size/xxx_parallel_size instead.",
+    )
 
     load_format: Literal['auto', 'dummy'] = Field(
         default='auto',