Skip to content

Commit b0f7624

Browse files
committed
Rename field and nullify empty lists
Signed-off-by: Qier Li <[email protected]>
1 parent e8a9759 commit b0f7624

File tree

3 files changed

+14
-13
lines changed

3 files changed

+14
-13
lines changed

tests/v1/worker/test_gpu_model_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ def test_update_states_request_resumed(model_runner, dist_init):
249249
req_ids=[req_id],
250250
resumed_from_preemption=[False],
251251
new_token_ids=[[]],
252-
token_ids=[[]],
252+
resumed_req_token_ids=[[]],
253253
new_block_ids=([[0]], ),
254254
num_computed_tokens=[0],
255255
)

vllm/v1/core/sched/output.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ class CachedRequestData:
8989
new_token_ids: list[list[int]]
9090
# If resumed_from_preemption is True, propogate the token ids to the
9191
# connector, otherwise will be empty.
92-
token_ids: list[list[int]]
92+
resumed_req_token_ids: list[Optional[list[int]]]
9393
new_block_ids: list[Optional[tuple[list[int], ...]]]
9494
num_computed_tokens: list[int]
9595

@@ -103,7 +103,7 @@ def make_empty(cls) -> CachedRequestData:
103103
req_ids=[],
104104
resumed_from_preemption=[],
105105
new_token_ids=[],
106-
token_ids=[],
106+
resumed_req_token_ids=[],
107107
new_block_ids=[],
108108
num_computed_tokens=[],
109109
)

vllm/v1/core/sched/scheduler.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -669,7 +669,7 @@ def _make_cached_request_data(
669669
req_ids: list[str] = []
670670
new_token_ids: list[list[int]] = []
671671
new_block_ids: list[Optional[tuple[list[int], ...]]] = []
672-
token_ids: list[list[int]] = []
672+
resumed_req_token_ids: list[Optional[list[int]]] = []
673673
num_computed_tokens: list[int] = []
674674

675675
use_connector = self.connector is not None
@@ -688,21 +688,22 @@ def _make_cached_request_data(
688688
# stage worker and the last-stage worker. Otherwise, we don't
689689
# need to send the sampled tokens back because the model runner
690690
# will cache them.
691-
tokens = req.all_token_ids[req.num_computed_tokens:req.
692-
num_computed_tokens + num_tokens]
693-
new_token_ids.append(tokens)
694-
token_ids.append([])
691+
token_ids = req.all_token_ids[req.num_computed_tokens:req.
692+
num_computed_tokens + num_tokens]
693+
new_token_ids.append(token_ids)
694+
resumed_req_token_ids.append(None)
695695
elif use_connector:
696696
# When using a KVConnector, we add a placeholder to avoid index
697697
# out of bounds errors. TODO: Remove this once the KVConnector
698698
# is updated to handle token IDs properly.
699699
new_token_ids.append([])
700700
if resumed_from_preemption[idx]:
701-
tokens = req.all_token_ids[:req.num_computed_tokens +
702-
num_tokens]
703-
token_ids.append(tokens)
701+
resumed_token_ids = req.all_token_ids[:req.
702+
num_computed_tokens +
703+
num_tokens]
704+
resumed_req_token_ids.append(resumed_token_ids)
704705
else:
705-
token_ids.append([])
706+
resumed_req_token_ids.append(None)
706707
new_block_ids.append(
707708
req_to_new_blocks[req_id].get_block_ids(allow_none=True))
708709
num_computed_tokens.append(req.num_computed_tokens)
@@ -711,7 +712,7 @@ def _make_cached_request_data(
711712
req_ids=req_ids,
712713
resumed_from_preemption=resumed_from_preemption,
713714
new_token_ids=new_token_ids,
714-
token_ids=token_ids,
715+
resumed_req_token_ids=resumed_req_token_ids,
715716
new_block_ids=new_block_ids,
716717
num_computed_tokens=num_computed_tokens,
717718
)

0 commit comments

Comments
 (0)