NVIDIA · leslie-fang25 · Sep 3, 2025 · Sep 2, 2025 · Sep 2, 2025 · Sep 3, 2025
@@ -14,9 +14,7 @@
 from tensorrt_llm._utils import get_sm_version
 from tensorrt_llm.bindings.executor import (CapacitySchedulerPolicy,
                                             ContextChunkingPolicy,
-                                            ExecutorConfig,
-                                            LogitsPostProcessorConfig,
-                                            ParallelConfig)
+                                            ExecutorConfig)
 from tensorrt_llm.bindings.internal.batch_manager import ContextChunkingConfig
 from tensorrt_llm.llmapi.llm_args import KvCacheConnectorConfig, TorchLlmArgs
 from tensorrt_llm.llmapi.tokenizer import TokenizerBase
@@ -217,14 +215,9 @@ def create_py_executor(
     tokenizer: Optional[TokenizerBase] = None,
     lora_config: Optional[LoraConfig] = None,
     kv_connector_config: Optional[KvCacheConnectorConfig] = None,
-    logits_post_processor_config: Optional[LogitsPostProcessorConfig] = None,
-    parallel_config: Optional[ParallelConfig] = None,
 ) -> PyExecutor:
 
     executor_config = llm_args.get_executor_config(checkpoint_dir, tokenizer)
-    executor_config.logits_post_processor_config = logits_post_processor_config
-    executor_config.parallel_config = parallel_config
-
     garbage_collection_gen0_threshold = llm_args.garbage_collection_gen0_threshold
 
     _mangle_executor_config(executor_config)

@@ -113,6 +113,7 @@ def _create_py_executor():
             assert hasattr(
                 self.llm_args, "backend"
             ), "llm_args should be with backend in _create_py_executor"
+            _ = _get_comm_ranks_device_id()
             if self.llm_args.backend == "pytorch":
                 from tensorrt_llm._torch.pyexecutor.py_executor_creator import \
                     create_py_executor
@@ -122,13 +123,6 @@ def _create_py_executor():
                 args["tokenizer"] = tokenizer
                 args["lora_config"] = lora_config
                 args["kv_connector_config"] = kv_connector_config
-                args[
-                    "logits_post_processor_config"] = tllm.LogitsPostProcessorConfig(
-                        processor_batched=batched_logits_processor,
-                        replicate=False)
-                comm_ranks, device_ids = _get_comm_ranks_device_id()
-                args["parallel_config"] = tllm.ParallelConfig(
-                    participant_ids=comm_ranks, device_ids=device_ids)
             elif self.llm_args.backend == "_autodeploy":
                 from tensorrt_llm._torch.auto_deploy.llm_args import \
                     LlmArgs as ADLlmArgs