Skip to content

Commit 17b4c66

Browse files
authored
[Bugfix] Fix Qwen3-VL max_num_video_tokens calculation for video profiling (#25648)
Signed-off-by: Isotr0py <[email protected]>
1 parent 3c2b2cc commit 17b4c66

File tree

2 files changed

+13
-1
lines changed

2 files changed

+13
-1
lines changed

vllm/model_executor/models/qwen2_vl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@
8282
logger = init_logger(__name__)
8383

8484
# For profile run
85-
_MAX_FRAMES_PER_VIDEO = 600
85+
_MAX_FRAMES_PER_VIDEO = 32
8686

8787
# === Vision Inputs === #
8888

vllm/model_executor/models/qwen3_vl.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,18 @@ def _get_dummy_videos(
715715
video_items.append(video_item)
716716
return video_items
717717

718+
def get_dummy_processor_inputs(self, seq_len, mm_counts):
719+
processor_inputs = super().get_dummy_processor_inputs(
720+
seq_len, mm_counts)
721+
# HACK(Isotr0py): We set do_resize to False here to reuse Qwen2-VL's
722+
# profiling logic, which will be problematic for configurable mm
723+
# profiling.
724+
# TODO(Isotr0py): Switch to the implementation in
725+
# https://github.com/vllm-project/vllm/pull/25557
726+
# after supporting configurable mm profiling.
727+
processor_inputs.hf_processor_mm_kwargs = {"do_resize": False}
728+
return processor_inputs
729+
718730

719731
class Qwen3VLMultiModalProcessor(BaseMultiModalProcessor[Qwen3VLProcessingInfo]
720732
):

0 commit comments

Comments
 (0)