Skip to content

Commit 77af18b

Browse files
committed
[fix] Release slots with spec decode + disagg (#5975)
Signed-off-by: Iman Tabrizian <[email protected]> Signed-off-by: Iman Tabrizian <[email protected]>
1 parent f225f5c commit 77af18b

File tree

1 file changed

+12
-4
lines changed

1 file changed

+12
-4
lines changed

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -896,6 +896,10 @@ def _executor_loop_pp(self):
896896

897897
def _executor_loop(self):
898898
torch.cuda.set_device(self.device_id)
899+
is_ngram = hasattr(
900+
self.model_engine, "spec_config"
901+
) and self.model_engine.spec_config is not None and self.model_engine.spec_config.spec_dec_mode.is_ngram(
902+
)
899903
with self._profiler() as profile_step:
900904
sample_state = None
901905
iter_start_time = time.time()
@@ -918,8 +922,7 @@ def _executor_loop(self):
918922

919923
self._pad_attention_dp_dummy_request()
920924

921-
if self.draft_model_engine is not None or hasattr(
922-
self, 'drafter') and self.drafter is not None:
925+
if self.draft_model_engine is not None or is_ngram or self.drafter is not None:
923926
self._prepare_draft_requests(self.active_requests)
924927

925928
scheduled_batch, fitting_disagg_gen_init_requests, num_fitting_reqs = self._schedule(
@@ -1652,8 +1655,13 @@ def _send_disagg_ctx_cache(self, scheduled_ctx_requests):
16521655
if req.is_context_only_request and (req.is_context_finished or
16531656
req.is_finished_due_to_length):
16541657
self.kv_cache_transceiver.respond_and_send_async(req)
1655-
self.resource_manager.resource_managers[
1656-
ResourceManagerType.SEQ_SLOT_MANAGER].free_resources(req)
1658+
for resource_mgr_type in (
1659+
ResourceManagerType.SEQ_SLOT_MANAGER,
1660+
ResourceManagerType.SPEC_RESOURCE_MANAGER):
1661+
if resource_mgr_type in self.resource_manager.resource_managers and self.resource_manager.resource_managers[
1662+
resource_mgr_type] is not None:
1663+
self.resource_manager.resource_managers[
1664+
resource_mgr_type].free_resources(req)
16571665

16581666
self.kv_cache_transceiver.check_context_transfer_status(0)
16591667

0 commit comments

Comments
 (0)