Skip to content

Commit 4fa9284

Browse files
authored
[nvbug/5302638][nvbugs/5310314] fix _handle_cancelled_requests (#5532)
Signed-off-by: junq <[email protected]>
1 parent 06f8327 commit 4fa9284

File tree

2 files changed

+7
-20
lines changed

2 files changed

+7
-20
lines changed

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1957,31 +1957,20 @@ def _handle_cancelled_requests(self):
19571957
if len(self.canceled_req_ids) == 0:
19581958
return
19591959

1960-
cancelled_responses = {}
1961-
left_requests = []
1962-
# Tracks canceled requests for proper handling in overlap mode during `sampler.update_requests`.
1963-
self.canceled_requests = []
19641960
for request in self.active_requests:
19651961
req_id = request.py_request_id
19661962
if req_id in self.canceled_req_ids:
1967-
self._terminate_request(request)
1963+
# Mark requests as finished, then, we reuse all existing code
1964+
# to clean up the KV cache resources.
19681965
request.finish_by_reason(FinishReason.CANCELLED)
19691966
request.decoding_iter = request.py_decoding_iter
1970-
cancelled_responses[req_id] = request.create_response(
1971-
False, self.dist.rank)
1972-
self.canceled_requests.append(request)
19731967
self.canceled_req_ids.erase(req_id)
1974-
else:
1975-
left_requests.append(request)
1976-
self.active_requests = left_requests
19771968

1978-
# When enable attention dp, each rank does not have full copy of requests
1979-
# so we need to remove the cancel requests not in the local rank
1980-
self.canceled_req_ids.clear()
1981-
1982-
# enqueue the cancelled requests' responses as they are not
1983-
# active_requests and be discarded in the sampler loop.
1984-
self._enqueue_responses(cancelled_responses)
1969+
if self.enable_attention_dp:
1970+
# TODO: revisit the cancel logic of attention dp
1971+
# When enable attention dp, each rank does not have full copy of requests
1972+
# so we need to remove the cancel requests not in the local rank
1973+
self.canceled_req_ids.clear()
19851974

19861975
@nvtx_range("_enqueue_responses")
19871976
def _enqueue_responses(self, responses: Dict[int, LlmResponse]):

tests/unittest/llmapi/apps/_test_openai_misc.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,6 @@ async def test_request_cancellation(server: RemoteOpenAIServer,
8383
model_name: str):
8484
# clunky test: send an ungodly amount of load in with short timeouts
8585
# then ensure that it still responds quickly afterwards
86-
pytest.skip("https://nvbugs/5310314")
87-
8886
chat_input = [{"role": "user", "content": "Write a long story"}]
8987
client = server.get_async_client(timeout=0.5, max_retries=3)
9088
tasks = []

0 commit comments

Comments
 (0)