From add652dfe3dcbcee965cf61e9a58f4f6eea0ec77 Mon Sep 17 00:00:00 2001 From: Tomas Ruiz Date: Fri, 26 Sep 2025 10:49:24 +0200 Subject: [PATCH 1/2] Add assertions in prepare_inputs_padded() Signed-off-by: Tomas Ruiz --- vllm/v1/spec_decode/eagle.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py index 119f41d8580e..7e94d933a329 100644 --- a/vllm/v1/spec_decode/eagle.py +++ b/vllm/v1/spec_decode/eagle.py @@ -514,6 +514,24 @@ def prepare_inputs_padded(self, causal=True, ) + # NOTE(Tomas Ruiz): The update has no effect? + # Before and after are always the same + # Is the update only a shallow object copy? + b = common_attn_metadata + a = spec_common_attn_metadata + assert b.query_start_loc.eq(a.query_start_loc).all() + assert b.query_start_loc_cpu.eq(a.query_start_loc_cpu).all() + assert b.seq_lens.eq(a.seq_lens).all() + assert b.seq_lens_cpu.eq(a.seq_lens_cpu).all() + assert b.num_computed_tokens_cpu.eq(a.num_computed_tokens_cpu).all() + assert b.num_reqs == a.num_reqs + assert b.num_actual_tokens == a.num_actual_tokens + assert b.max_query_len == a.max_query_len + assert b.max_seq_len == a.max_seq_len + assert b.block_table_tensor.eq(a.block_table_tensor).all() + assert b.slot_mapping.eq(a.slot_mapping).all() + assert b.causal == a.causal + token_indices_to_sample = common_attn_metadata.query_start_loc[1:] - 1 \ - num_rejected_tokens_gpu From a9ab31350bc09bf6bfddfa0bc6d9242c6aba85e0 Mon Sep 17 00:00:00 2001 From: Tomas Ruiz Date: Fri, 26 Sep 2025 11:21:30 +0200 Subject: [PATCH 2/2] Pass input back as output Signed-off-by: Tomas Ruiz --- vllm/v1/spec_decode/eagle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py index 7e94d933a329..3a97be55b908 100644 --- a/vllm/v1/spec_decode/eagle.py +++ b/vllm/v1/spec_decode/eagle.py @@ -535,7 +535,7 @@ def prepare_inputs_padded(self, token_indices_to_sample = common_attn_metadata.query_start_loc[1:] - 1 \ - num_rejected_tokens_gpu - return spec_common_attn_metadata, token_indices, token_indices_to_sample + return common_attn_metadata, token_indices, token_indices_to_sample def propose_tree( self,