From a1777102f07fc9a28c3b3607b51a60c335941069 Mon Sep 17 00:00:00 2001 From: Juncheng Gu Date: Fri, 23 May 2025 04:59:07 +0000 Subject: [PATCH 1/2] fix error caused by skipping do_remote_prefill requests (promot < block_size) fix #18429 Signed-off-by: Juncheng Gu --- tests/v1/kv_connector/unit/test_nixl_connector.py | 13 +++++++++---- .../kv_transfer/kv_connector/v1/nixl_connector.py | 9 --------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/tests/v1/kv_connector/unit/test_nixl_connector.py b/tests/v1/kv_connector/unit/test_nixl_connector.py index 9b2a720c11c4..601868e1f641 100644 --- a/tests/v1/kv_connector/unit/test_nixl_connector.py +++ b/tests/v1/kv_connector/unit/test_nixl_connector.py @@ -45,8 +45,10 @@ def test_prompt_less_than_block_size(): Test that we can handle case where prompt is < block. In this case, the P worker will send empty remote_block_ids. - The D worker should not schedule an async read in this case, - since there is nothing to pull. + The D worker should not schedule the request but without + any async read (empty local_block_ids) since there is nothing to pull. + Keeping the request int connector metaata is for notifying the + prefill worker so that the remote blocks are freed. """ vllm_config = create_vllm_config() scheduler = create_scheduler(vllm_config) @@ -67,7 +69,10 @@ def test_prompt_less_than_block_size(): kv_connector_metadata = scheduler_output.kv_connector_metadata assert kv_connector_metadata is not None assert isinstance(kv_connector_metadata, NixlConnectorMetadata) - assert len(kv_connector_metadata.requests) == 0 + assert len(kv_connector_metadata.requests) == 1 + req_id, meta = next(iter(kv_connector_metadata.requests.items())) + # empty local_block_ids, so that async read will be skipped + assert not meta.local_block_ids # This request should be scheduled regularly. - assert len(scheduler_output.scheduled_new_reqs) == 1 + assert len(scheduler_output.scheduled_new_reqs) == 1 \ No newline at end of file diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index 6303d77ad305..f034afbce091 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -259,15 +259,6 @@ def build_connector_meta( # Loop through scheduled reqs and convert to ReqMeta. for req_id, (req, block_ids) in self._reqs_need_recv.items(): assert req.kv_transfer_params is not None - # For the case where there are no remote blocks to pull - # (block_ids is empty), we don't need to schedule - # an async read on the worker side. - if not block_ids: - logger.debug( - "Skipping adding request %s to NixlConnectorMetadata, " - "as there are no remote blocks to pull", req_id) - continue - meta.add_new_req( request_id=req_id, local_block_ids=block_ids, From 7d5333aefc61715f7f9bd8f3fa69d9d538a7f5e0 Mon Sep 17 00:00:00 2001 From: Juncheng Gu Date: Fri, 23 May 2025 17:10:48 +0000 Subject: [PATCH 2/2] tweaks Signed-off-by: Juncheng Gu --- tests/v1/kv_connector/unit/test_nixl_connector.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/v1/kv_connector/unit/test_nixl_connector.py b/tests/v1/kv_connector/unit/test_nixl_connector.py index 601868e1f641..86bbd094b86e 100644 --- a/tests/v1/kv_connector/unit/test_nixl_connector.py +++ b/tests/v1/kv_connector/unit/test_nixl_connector.py @@ -47,8 +47,8 @@ def test_prompt_less_than_block_size(): In this case, the P worker will send empty remote_block_ids. The D worker should not schedule the request but without any async read (empty local_block_ids) since there is nothing to pull. - Keeping the request int connector metaata is for notifying the - prefill worker so that the remote blocks are freed. + Keeping the request int connector metadata is for notifying the + prefill worker so that its remote blocks can be released. """ vllm_config = create_vllm_config() scheduler = create_scheduler(vllm_config) @@ -72,6 +72,7 @@ def test_prompt_less_than_block_size(): assert len(kv_connector_metadata.requests) == 1 req_id, meta = next(iter(kv_connector_metadata.requests.items())) # empty local_block_ids, so that async read will be skipped + assert hasattr(meta, "local_block_ids") assert not meta.local_block_ids # This request should be scheduled regularly.