Skip to content

Commit db6c264

Browse files
authored
[Bugfix] Fix value unpack error of simple connector for KVCache transfer. (#11058)
Signed-off-by: ShangmingCai <[email protected]>
1 parent 9f3974a commit db6c264

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

vllm/distributed/kv_transfer/kv_connector/simple_connector.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,12 @@ def send_kv_caches_and_hidden_states(
118118
start_layer = model_executable.model.start_layer
119119
end_layer = model_executable.model.end_layer
120120

121+
model_config = model_executable.model.config
122+
num_heads = model_config.num_key_value_heads
123+
hidden_size = model_config.hidden_size
124+
num_attention_heads = model_config.num_attention_heads
125+
head_size = int(hidden_size / num_attention_heads)
126+
121127
# query_lens contains new KV caches that are added to vLLM.
122128
# so we will send them to decode instance
123129
# FIXME(Kuntai): This assume that all requests are prefill.
@@ -131,8 +137,6 @@ def send_kv_caches_and_hidden_states(
131137
for layer_id in range(start_layer, end_layer):
132138
kv_cache = kv_caches[layer_id - start_layer]
133139

134-
_, _, num_heads, head_size = kv_cache[0].shape
135-
136140
key_cache = kv_cache[0].reshape(-1, num_heads, head_size)
137141
value_cache = kv_cache[1].reshape(-1, num_heads, head_size)
138142

0 commit comments

Comments
 (0)