We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent d71767b commit f18cbdfCopy full SHA for f18cbdf
vllm/v1/core/sched/scheduler.py
@@ -476,7 +476,11 @@ def schedule(self) -> SchedulerOutput:
476
# Apply dynamic token budget constraints
477
effective_budget = self.get_dynamic_token_budget(request, token_budget)
478
num_new_tokens = min(num_new_tokens, effective_budget)
479
- assert num_new_tokens > 0
+ # assert num_new_tokens > 0
480
+ if num_new_tokens == 0:
481
+ self.waiting.pop_request()
482
+ skipped_waiting_requests.prepend_request(request)
483
+ continue
484
485
# Schedule encoder inputs.
486
if request.has_encoder_inputs:
0 commit comments