vllm-project · WoosukKwon · Feb 8, 2025 · Feb 8, 2025
@@ -205,8 +205,6 @@ def allocate_slots(
                 # Should not exceed the maximum number of blocks per request.
                 # This is especially because the block table has the shape
                 # [..., max_num_blocks_per_req].
-                # TODO(woosuk): Check and reject requests if
-                # num_prompt_tokens + max_tokens > max_model_len.
                 self.max_num_blocks_per_req - len(req_blocks),
             )
             assert num_new_blocks > 0