diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index dc1674cd1ea2..551f54b68ce6 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -828,7 +828,7 @@ def build(self) -> ModelInputForGPU: cuda_graph_pad_size = self._get_cuda_graph_pad_size( num_seqs=len(seq_lens), - max_decode_seq_len=max_encoder_seq_len, + max_decode_seq_len=max_decode_seq_len, max_encoder_seq_len=max_encoder_seq_len) batch_size = len(input_tokens)