From 1196bc0b1d1533530a1c412f879c51e704d370a5 Mon Sep 17 00:00:00 2001 From: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com> Date: Tue, 20 May 2025 05:23:54 +0000 Subject: [PATCH 1/2] chore: Deprecate autopp. Signed-off-by: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com> --- tensorrt_llm/auto_parallel/auto_parallel.py | 3 +++ tensorrt_llm/llmapi/llm_args.py | 14 +++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/tensorrt_llm/auto_parallel/auto_parallel.py b/tensorrt_llm/auto_parallel/auto_parallel.py index d7b79b8dd4f..adaea2a4f05 100644 --- a/tensorrt_llm/auto_parallel/auto_parallel.py +++ b/tensorrt_llm/auto_parallel/auto_parallel.py @@ -149,6 +149,9 @@ def check_dtype(tensor): def auto_parallel(network: Network, config: AutoParallelConfig): + logger.warning( + "auto_parallel is deprecated, " + "please use explicit parallelism like tp_size/pp_size instead.") debug_mode = config.debug_mode memory_budget = config.get_cluster_info( ).memory_budget_per_device * 1024 * 1024 * 1024 diff --git a/tensorrt_llm/llmapi/llm_args.py b/tensorrt_llm/llmapi/llm_args.py index a60766d7892..43b7c183c3b 100644 --- a/tensorrt_llm/llmapi/llm_args.py +++ b/tensorrt_llm/llmapi/llm_args.py @@ -771,11 +771,19 @@ class LlmArgs(BaseModel): cp_config: Optional[dict] = Field(default_factory=dict, description="Context parallel config.") - auto_parallel: bool = Field(default=False, - description="Enable auto parallel mode.") + auto_parallel: bool = Field( + default=False, + description="Enable auto parallel mode.", + deprecated= + "Use tensor_parallel_size/pipeline_parallel_size/xxx_parallel_size instead.", + ) auto_parallel_world_size: Optional[int] = Field( - default=None, description="The world size for auto parallel mode.") + default=None, + description="The world size for auto parallel mode.", + deprecated= + "Use tensor_parallel_size/pipeline_parallel_size/xxx_parallel_size instead.", + ) load_format: Literal['auto', 'dummy'] = Field( default='auto', From 4e6515ee56416ea2945a66f1c253f7e90aed7f08 Mon Sep 17 00:00:00 2001 From: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com> Date: Wed, 21 May 2025 04:16:32 +0000 Subject: [PATCH 2/2] Address comment. Signed-off-by: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com> --- examples/models/core/llama/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/models/core/llama/README.md b/examples/models/core/llama/README.md index cb5636ffb14..61d25158419 100644 --- a/examples/models/core/llama/README.md +++ b/examples/models/core/llama/README.md @@ -128,7 +128,7 @@ trtllm-build --checkpoint_dir ./tllm_checkpoint_1gpu_fp16_wq \ --output_dir ./tmp/llama/7B/trt_engines/weight_only/1-gpu/ \ --gemm_plugin auto -# Build LLaMA 7B using 2-way auto parallelism. +# Build LLaMA 7B using 2-way auto parallelism (deprecated). python convert_checkpoint.py --model_dir ./tmp/llama/7B/ \ --output_dir ./tllm_checkpoint_1gpu_fp16 \ --dtype float16