Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion tests/models/multimodal/generation/test_qwen2_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,6 @@ def get_image_embeds(model):
return visual(pixel_values_on_device,
grid_thw=video_grid_thw_on_device).cpu()

# V1 Test: this calls a V0 internal.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I forgot to remove this in #18465

video_embeds = torch.concat(llm.apply_model(get_image_embeds))

# split into original batches
Expand Down
32 changes: 1 addition & 31 deletions vllm/multimodal/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
cached_tokenizer_from_config)
from vllm.utils import ClassRegistry

from .cache import (BaseMultiModalProcessorCache,
processor_only_cache_from_config)
from .cache import BaseMultiModalProcessorCache
from .processing import BaseMultiModalProcessor, BaseProcessingInfo
from .profiling import (BaseDummyInputsBuilder, DummyDecoderData,
DummyEncoderData, MultiModalProfiler)
Expand Down Expand Up @@ -176,35 +175,6 @@ def get_max_tokens_per_item_by_nonzero_modality(
if mm_limits[key] > 0
}

# TODO: Remove once V0 is gone
def get_max_tokens_by_modality(
self,
model_config: "ModelConfig",
) -> Mapping[str, int]:
"""
Get the maximum number of tokens from each modality
for profiling the memory usage of a model.
"""
cache = processor_only_cache_from_config(model_config, self)
mm_limits = self.get_mm_limits_per_prompt(model_config, cache=cache)
max_tokens_per_item = self.get_max_tokens_per_item_by_modality(
model_config,
cache=cache,
)

return {
key: mm_limits[key] * max_tokens_per_mm_item
for key, max_tokens_per_mm_item in max_tokens_per_item.items()
}

# TODO: Remove once V0 is gone
def get_max_multimodal_tokens(self, model_config: "ModelConfig") -> int:
"""
Get the maximum number of multi-modal tokens
for profiling the memory usage of a model.
"""
return sum(self.get_max_tokens_by_modality(model_config).values())

def get_mm_limits_per_prompt(
self,
model_config: "ModelConfig",
Expand Down
Loading