From b27a7b541636cec3556815335302c0cc538293e4 Mon Sep 17 00:00:00 2001 From: leslie-fang25 Date: Fri, 29 Aug 2025 03:42:43 -0700 Subject: [PATCH] [None][chore] rm executor config in kv cache connector Signed-off-by: leslie-fang25 --- examples/llm-api/llm_kv_cache_connector.py | 11 +++++------ tensorrt_llm/_torch/pyexecutor/kv_cache_connector.py | 7 ++----- tensorrt_llm/_torch/pyexecutor/py_executor_creator.py | 5 ++--- 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/examples/llm-api/llm_kv_cache_connector.py b/examples/llm-api/llm_kv_cache_connector.py index bd8bf7fcc7e..599fab6f9ac 100644 --- a/examples/llm-api/llm_kv_cache_connector.py +++ b/examples/llm-api/llm_kv_cache_connector.py @@ -14,7 +14,6 @@ from tensorrt_llm import LLM, SamplingParams, logger from tensorrt_llm._torch.pyexecutor.kv_cache_connector import ( KvCacheConnectorScheduler, KvCacheConnectorWorker, SchedulerOutput) -from tensorrt_llm.bindings.executor import ExecutorConfig from tensorrt_llm.bindings.internal.batch_manager import LlmRequest from tensorrt_llm.llmapi.llm_args import KvCacheConnectorConfig @@ -34,8 +33,8 @@ class PersistentKvCacheConnectorMetadata: class PersistentKvCacheConnectorWorker(KvCacheConnectorWorker): - def __init__(self, executor_config: ExecutorConfig): - super().__init__(executor_config) + def __init__(self): + super().__init__() self.kv_cache_tensor = None @@ -81,10 +80,10 @@ def get_finished( class PersistentKvCacheConnectorLeader(KvCacheConnectorScheduler): - def __init__(self, executor_config: ExecutorConfig): - super().__init__(executor_config) + def __init__(self, tokens_per_block): + super().__init__() - self.block_size = self._config.tokens_per_block + self.block_size = tokens_per_block self.pending_loads = {} self.cache_folder = os.environ.get(CONNECTOR_CACHE_FOLDER_KEY, diff --git a/tensorrt_llm/_torch/pyexecutor/kv_cache_connector.py b/tensorrt_llm/_torch/pyexecutor/kv_cache_connector.py index 5e8bf6dfaa3..9bec793a8c4 100644 --- a/tensorrt_llm/_torch/pyexecutor/kv_cache_connector.py +++ b/tensorrt_llm/_torch/pyexecutor/kv_cache_connector.py @@ -44,7 +44,6 @@ from tensorrt_llm._utils import mpi_allgather, mpi_broadcast, mpi_rank from tensorrt_llm.bindings import LlmRequestState -from tensorrt_llm.bindings.executor import ExecutorConfig from tensorrt_llm.bindings.internal.batch_manager import \ KvCacheConnectorManager as KvCacheConnectorManagerCpp from tensorrt_llm.bindings.internal.batch_manager import LlmRequest @@ -81,8 +80,7 @@ class SchedulerOutput: class KvCacheConnectorWorker(ABC): - def __init__(self, config: ExecutorConfig): - self._config = config + def __init__(self): self._metadata = None super().__init__() @@ -162,8 +160,7 @@ def get_finished( class KvCacheConnectorScheduler(ABC): - def __init__(self, executor_config: ExecutorConfig): - self._config = executor_config + def __init__(self): super().__init__() @abstractmethod diff --git a/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py b/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py index e824ee02d8d..803f94cae26 100644 --- a/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py +++ b/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py @@ -409,12 +409,11 @@ def create_py_executor( # In this case, the worker may be dependent on the scheduler, or vice-versa. # To deal with cases like this, we instantiate them both concurrently. with ThreadPoolExecutor(max_workers=2) as executor: - connector_worker_task = executor.submit(worker_cls, - executor_config) + connector_worker_task = executor.submit(worker_cls) if scheduler_cls is not None and rank == 0: connector_scheduler_task = executor.submit( - scheduler_cls, executor_config) + scheduler_cls, executor_config.tokens_per_block) connector_scheduler = connector_scheduler_task.result() else: connector_scheduler = None