File tree Expand file tree Collapse file tree 2 files changed +2
-3
lines changed
tensorrt_llm/bench/benchmark
tests/integration/defs/accuracy Expand file tree Collapse file tree 2 files changed +2
-3
lines changed Original file line number Diff line number Diff line change @@ -450,6 +450,7 @@ def ignore_trt_only_args(kwargs: dict):
450450 elif runtime_config .backend == "_autodeploy" :
451451 ignore_trt_only_args (kwargs )
452452 kwargs ["world_size" ] = kwargs .pop ("tensor_parallel_size" , None )
453+
453454 llm = AutoDeployLLM (** kwargs )
454455 else :
455456 llm = LLM (** kwargs )
Original file line number Diff line number Diff line change 1717
1818from tensorrt_llm import LLM
1919from tensorrt_llm ._torch .auto_deploy import LLM as AutoDeployLLM
20- from tensorrt_llm .llmapi .llm_args import (CapacitySchedulerPolicy ,
21- ContextChunkingPolicy )
2220from tensorrt_llm .quantization import QuantAlgo
2321from tensorrt_llm .sampling_params import SamplingParams
2422
@@ -40,7 +38,7 @@ def get_default_kwargs(self):
4038 'max_batch_size' : 512 ,
4139 # 131072 is the max seq len for the model
4240 'max_seq_len' : 8192 ,
43- # max num tokens is derived in the build_config, which is not used by AutoDeploy llmargs.
41+ # max num tokens is derived in the build_config, which is not used by AutoDeploy llmargs.
4442 # Set it explicitly here to 8192 which is the default in build_config.
4543 'max_num_tokens' : 8192 ,
4644 'skip_loading_weights' : False ,
You can’t perform that action at this time.
0 commit comments