Skip to content
2 changes: 0 additions & 2 deletions tensorrt_llm/_torch/pyexecutor/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,6 @@ def create_py_executor_instance(
executor_config,
ctx_chunk_config,
model_engine,
draft_model_engine,
start_worker,
sampler,
drafter,
Expand Down Expand Up @@ -551,7 +550,6 @@ def create_py_executor_instance(
max_draft_len=spec_config.max_draft_len
if spec_config is not None else 0,
kv_cache_transceiver=kv_cache_transceiver,
draft_model_engine=draft_model_engine,
guided_decoder=guided_decoder,
start_worker=start_worker,
garbage_collection_gen0_threshold=garbage_collection_gen0_threshold)
Expand Down
6 changes: 2 additions & 4 deletions tensorrt_llm/_torch/pyexecutor/py_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ def __init__(self,
max_beam_width: int = 1,
max_draft_len: int = 0,
kv_cache_transceiver: Optional[KvCacheTransceiver] = None,
draft_model_engine: Optional[ModelEngine] = None,
guided_decoder: Optional[GuidedDecoder] = None,
garbage_collection_gen0_threshold: Optional[int] = None,
start_worker: bool = True):
Expand All @@ -161,13 +160,12 @@ def __init__(self,
self.enable_attention_dp = model_engine.enable_attention_dp
self.sampler = sampler
self.drafter = drafter
self.draft_model_engine = getattr(self.drafter, "draft_model_engine",
None)
self.guided_decoder = guided_decoder
self.dist = dist
self.disable_overlap_scheduler = disable_overlap_scheduler

# Draft model for certain spec decode algorithms, e.g. EAGLE3
self.draft_model_engine = draft_model_engine

# enqueue and _fetch_new_requests used data
self.next_req_id = max_batch_size # The first max_batch_size request IDs are reserved for dummy requests
self.max_beam_width = max_beam_width
Expand Down
2 changes: 0 additions & 2 deletions tensorrt_llm/_torch/pyexecutor/py_executor_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,6 @@ def create_py_executor(
executor_config=executor_config,
ctx_chunk_config=ctx_chunk_config,
model_engine=model_engine,
draft_model_engine=draft_model_engine,
start_worker=False,
sampler=sampler,
drafter=drafter,
Expand Down Expand Up @@ -425,7 +424,6 @@ def create_py_executor(
executor_config=executor_config,
ctx_chunk_config=ctx_chunk_config,
model_engine=model_engine,
draft_model_engine=draft_model_engine,
start_worker=False,
sampler=sampler,
drafter=drafter,
Expand Down