Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 1 addition & 8 deletions tensorrt_llm/_torch/pyexecutor/py_executor_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@
from tensorrt_llm._utils import get_sm_version
from tensorrt_llm.bindings.executor import (CapacitySchedulerPolicy,
ContextChunkingPolicy,
ExecutorConfig,
LogitsPostProcessorConfig,
ParallelConfig)
ExecutorConfig)
from tensorrt_llm.bindings.internal.batch_manager import ContextChunkingConfig
from tensorrt_llm.llmapi.llm_args import KvCacheConnectorConfig, TorchLlmArgs
from tensorrt_llm.llmapi.tokenizer import TokenizerBase
Expand Down Expand Up @@ -217,14 +215,9 @@ def create_py_executor(
tokenizer: Optional[TokenizerBase] = None,
lora_config: Optional[LoraConfig] = None,
kv_connector_config: Optional[KvCacheConnectorConfig] = None,
logits_post_processor_config: Optional[LogitsPostProcessorConfig] = None,
parallel_config: Optional[ParallelConfig] = None,
) -> PyExecutor:

executor_config = llm_args.get_executor_config(checkpoint_dir, tokenizer)
executor_config.logits_post_processor_config = logits_post_processor_config
executor_config.parallel_config = parallel_config

garbage_collection_gen0_threshold = llm_args.garbage_collection_gen0_threshold

_mangle_executor_config(executor_config)
Expand Down
8 changes: 1 addition & 7 deletions tensorrt_llm/executor/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ def _create_py_executor():
assert hasattr(
self.llm_args, "backend"
), "llm_args should be with backend in _create_py_executor"
_ = _get_comm_ranks_device_id()
if self.llm_args.backend == "pytorch":
from tensorrt_llm._torch.pyexecutor.py_executor_creator import \
create_py_executor
Expand All @@ -122,13 +123,6 @@ def _create_py_executor():
args["tokenizer"] = tokenizer
args["lora_config"] = lora_config
args["kv_connector_config"] = kv_connector_config
args[
"logits_post_processor_config"] = tllm.LogitsPostProcessorConfig(
processor_batched=batched_logits_processor,
replicate=False)
comm_ranks, device_ids = _get_comm_ranks_device_id()
args["parallel_config"] = tllm.ParallelConfig(
participant_ids=comm_ranks, device_ids=device_ids)
elif self.llm_args.backend == "_autodeploy":
from tensorrt_llm._torch.auto_deploy.llm_args import \
LlmArgs as ADLlmArgs
Expand Down