[None][fix] Fix CUDA graph for Qwen2.5-VL (#8047)

yechank-nvidia · web-flow · commit 948b8b95692b · 2025-09-30T14:40:03.000+08:00
Signed-off-by: yechank &lt;161688079+yechank-nvidia@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/pyexecutor/_util.py b/tensorrt_llm/_torch/pyexecutor/_util.py
@@ -173,6 +173,15 @@ def _create_dummy_context_requests(
                                          beam_width=max_beam_width, ),
                                      output_config=trtllm.OutputConfig(),
                                      end_id=-1)
+            if self._model_engine.use_mrope:
+                request.py_multimodal_data = {
+                    "mrope_config": {
+                        "mrope_position_ids":
+                        torch.zeros(3, 1, input_seq_len, dtype=torch.int32),
+                        "mrope_position_deltas":
+                        torch.zeros(1, 1, dtype=torch.int32)
+                    }
+                }
             requests.append(request)
             remaining_tokens -= input_seq_len
         if self._mapping.enable_attention_dp: