From a1777102f07fc9a28c3b3607b51a60c335941069 Mon Sep 17 00:00:00 2001
From: Juncheng Gu <juncgu@gmail.com>
Date: Fri, 23 May 2025 04:59:07 +0000
Subject: [PATCH 1/2] fix error caused by skipping do_remote_prefill requests
 (promot < block_size) fix #18429

Signed-off-by: Juncheng Gu <juncgu@gmail.com>
---
 tests/v1/kv_connector/unit/test_nixl_connector.py   | 13 +++++++++----
 .../kv_transfer/kv_connector/v1/nixl_connector.py   |  9 ---------
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/tests/v1/kv_connector/unit/test_nixl_connector.py b/tests/v1/kv_connector/unit/test_nixl_connector.py
index 9b2a720c11c4..601868e1f641 100644
--- a/tests/v1/kv_connector/unit/test_nixl_connector.py
+++ b/tests/v1/kv_connector/unit/test_nixl_connector.py
@@ -45,8 +45,10 @@ def test_prompt_less_than_block_size():
     Test that we can handle case where prompt is < block.
 
     In this case, the P worker will send empty remote_block_ids.
-    The D worker should not schedule an async read in this case,
-    since there is nothing to pull.
+    The D worker should not schedule the request but without
+    any async read (empty local_block_ids) since there is nothing to pull.
+    Keeping the request int connector metaata is for notifying the
+    prefill worker so that the remote blocks are freed.
     """
     vllm_config = create_vllm_config()
     scheduler = create_scheduler(vllm_config)
@@ -67,7 +69,10 @@ def test_prompt_less_than_block_size():
     kv_connector_metadata = scheduler_output.kv_connector_metadata
     assert kv_connector_metadata is not None
     assert isinstance(kv_connector_metadata, NixlConnectorMetadata)
-    assert len(kv_connector_metadata.requests) == 0
+    assert len(kv_connector_metadata.requests) == 1
+    req_id, meta = next(iter(kv_connector_metadata.requests.items()))
+    # empty local_block_ids, so that async read will be skipped
+    assert not meta.local_block_ids
 
     # This request should be scheduled regularly.
-    assert len(scheduler_output.scheduled_new_reqs) == 1
+    assert len(scheduler_output.scheduled_new_reqs) == 1
\ No newline at end of file
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
index 6303d77ad305..f034afbce091 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
@@ -259,15 +259,6 @@ def build_connector_meta(
         # Loop through scheduled reqs and convert to ReqMeta.
         for req_id, (req, block_ids) in self._reqs_need_recv.items():
             assert req.kv_transfer_params is not None
-            # For the case where there are no remote blocks to pull
-            # (block_ids is empty), we don't need to schedule
-            # an async read on the worker side.
-            if not block_ids:
-                logger.debug(
-                    "Skipping adding request %s to NixlConnectorMetadata, "
-                    "as there are no remote blocks to pull", req_id)
-                continue
-
             meta.add_new_req(
                 request_id=req_id,
                 local_block_ids=block_ids,

From 7d5333aefc61715f7f9bd8f3fa69d9d538a7f5e0 Mon Sep 17 00:00:00 2001
From: Juncheng Gu <juncgu@gmail.com>
Date: Fri, 23 May 2025 17:10:48 +0000
Subject: [PATCH 2/2] tweaks

Signed-off-by: Juncheng Gu <juncgu@gmail.com>
---
 tests/v1/kv_connector/unit/test_nixl_connector.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/v1/kv_connector/unit/test_nixl_connector.py b/tests/v1/kv_connector/unit/test_nixl_connector.py
index 601868e1f641..86bbd094b86e 100644
--- a/tests/v1/kv_connector/unit/test_nixl_connector.py
+++ b/tests/v1/kv_connector/unit/test_nixl_connector.py
@@ -47,8 +47,8 @@ def test_prompt_less_than_block_size():
     In this case, the P worker will send empty remote_block_ids.
     The D worker should not schedule the request but without
     any async read (empty local_block_ids) since there is nothing to pull.
-    Keeping the request int connector metaata is for notifying the
-    prefill worker so that the remote blocks are freed.
+    Keeping the request int connector metadata is for notifying the
+    prefill worker so that its remote blocks can be released.
     """
     vllm_config = create_vllm_config()
     scheduler = create_scheduler(vllm_config)
@@ -72,6 +72,7 @@ def test_prompt_less_than_block_size():
     assert len(kv_connector_metadata.requests) == 1
     req_id, meta = next(iter(kv_connector_metadata.requests.items()))
     # empty local_block_ids, so that async read will be skipped
+    assert hasattr(meta, "local_block_ids")
     assert not meta.local_block_ids
 
     # This request should be scheduled regularly.