@@ -669,7 +669,7 @@ def _make_cached_request_data(
669669 req_ids : list [str ] = []
670670 new_token_ids : list [list [int ]] = []
671671 new_block_ids : list [Optional [tuple [list [int ], ...]]] = []
672- token_ids : list [list [int ]] = []
672+ resumed_req_token_ids : list [Optional [ list [int ] ]] = []
673673 num_computed_tokens : list [int ] = []
674674
675675 use_connector = self .connector is not None
@@ -688,21 +688,22 @@ def _make_cached_request_data(
688688 # stage worker and the last-stage worker. Otherwise, we don't
689689 # need to send the sampled tokens back because the model runner
690690 # will cache them.
691- tokens = req .all_token_ids [req .num_computed_tokens :req .
692- num_computed_tokens + num_tokens ]
693- new_token_ids .append (tokens )
694- token_ids .append ([] )
691+ token_ids = req .all_token_ids [req .num_computed_tokens :req .
692+ num_computed_tokens + num_tokens ]
693+ new_token_ids .append (token_ids )
694+ resumed_req_token_ids .append (None )
695695 elif use_connector :
696696 # When using a KVConnector, we add a placeholder to avoid index
697697 # out of bounds errors. TODO: Remove this once the KVConnector
698698 # is updated to handle token IDs properly.
699699 new_token_ids .append ([])
700700 if resumed_from_preemption [idx ]:
701- tokens = req .all_token_ids [:req .num_computed_tokens +
702- num_tokens ]
703- token_ids .append (tokens )
701+ resumed_token_ids = req .all_token_ids [:req .
702+ num_computed_tokens +
703+ num_tokens ]
704+ resumed_req_token_ids .append (resumed_token_ids )
704705 else :
705- token_ids .append ([] )
706+ resumed_req_token_ids .append (None )
706707 new_block_ids .append (
707708 req_to_new_blocks [req_id ].get_block_ids (allow_none = True ))
708709 num_computed_tokens .append (req .num_computed_tokens )
@@ -711,7 +712,7 @@ def _make_cached_request_data(
711712 req_ids = req_ids ,
712713 resumed_from_preemption = resumed_from_preemption ,
713714 new_token_ids = new_token_ids ,
714- token_ids = token_ids ,
715+ resumed_req_token_ids = resumed_req_token_ids ,
715716 new_block_ids = new_block_ids ,
716717 num_computed_tokens = num_computed_tokens ,
717718 )
0 commit comments