From afb09b02ea96fb28378934ec1514a43e7d2e34bc Mon Sep 17 00:00:00 2001 From: calvin chen Date: Tue, 22 Jul 2025 22:05:28 +0800 Subject: [PATCH] fixed fusedmoe layer for start_load_kv Signed-off-by: calvin chen --- .../kv_connector/v1/p2p/p2p_nccl_connector.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py index d47a75461d72..32d0e43d71af 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py @@ -192,8 +192,16 @@ def inject_kv_into_layer( # Load the KV for each request each layer for request in metadata.requests: for layer_name in forward_context.no_compile_layers: - attn_layer = forward_context.no_compile_layers[layer_name] - kv_cache_layer = attn_layer.kv_cache[ \ + layer = forward_context.no_compile_layers[layer_name] + + # Only process layers that have kv_cache + # attribute (attention layers) Skip non-attention + # layers like FusedMoE + kv_cache = getattr(layer, 'kv_cache', None) + if kv_cache is None: + continue + + kv_cache_layer = kv_cache[ \ forward_context.virtual_engine] kv_cache = self.p2p_nccl_engine.recv_tensor(