Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/configuration/optimization.md
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ Regardless, you need to set `mm_encoder_tp_mode="data"` in engine arguments to u
Known supported models:

- Llama4 (<gh-pr:18368>)
- MiniCPM-V-4 (<gh-pr:23327>)
- MiniCPM-V-2.5 or above (<gh-pr:23327>, <gh-pr:23948>)
- Qwen2.5-VL (<gh-pr:22742>)
- Step3 (<gh-pr:22697>)

Expand Down
43 changes: 29 additions & 14 deletions vllm/model_executor/models/minicpmv.py
Original file line number Diff line number Diff line change
Expand Up @@ -977,6 +977,8 @@ class MiniCPMVBaseModel(nn.Module, SupportsMultiModal, SupportsPP):
instantiated.
"""

supports_encoder_tp_data = True

@classmethod
def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]:
if modality.startswith("image"):
Expand All @@ -990,6 +992,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
config = vllm_config.model_config.hf_config
multimodal_config = vllm_config.model_config.multimodal_config
quant_config = vllm_config.quant_config
self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data"
super().__init__()
# All MiniCPM-V models disable `tie_word_embeddings` but
# `PretrainedConfig.tie_word_embeddings` defaults to True; we cannot
Expand Down Expand Up @@ -1237,6 +1240,8 @@ def get_vision_hidden_states(

class MiniCPMV2_0(MiniCPMVBaseModel):

supports_encoder_tp_data = False

def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__(vllm_config=vllm_config, prefix=prefix)
assert self.version == (2, 0)
Expand Down Expand Up @@ -1351,9 +1356,12 @@ def init_vision_module(
quant_config: Optional[QuantizationConfig],
prefix: str = "",
) -> nn.Module:
model = Idefics2VisionTransformer(config.vision_config,
quant_config=quant_config,
prefix=prefix)
model = Idefics2VisionTransformer(
config.vision_config,
quant_config=quant_config,
prefix=prefix,
use_data_parallel=self.use_data_parallel,
)
Comment on lines +1359 to +1364
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

This init_vision_module implementation is nearly identical across MiniCPMV2_5, MiniCPMV2_6, MiniCPMV4_0, and MiniCPMV4_5. The only significant difference is the conditional logic for quant_config in the v4.x models. This duplication increases maintenance effort. Consider refactoring this into a shared method in a base class to improve code reuse and maintainability.

if self.config.drop_vision_last_layer:
model.encoder.layers = model.encoder.layers[:-1]
return model
Expand Down Expand Up @@ -1441,9 +1449,12 @@ def init_vision_module(
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
) -> nn.Module:
model = Idefics2VisionTransformer(config.vision_config,
quant_config=quant_config,
prefix=prefix)
model = Idefics2VisionTransformer(
config.vision_config,
quant_config=quant_config,
prefix=prefix,
use_data_parallel=self.use_data_parallel,
)
if self.config.drop_vision_last_layer:
model.encoder.layers = model.encoder.layers[:-1]
return model
Expand Down Expand Up @@ -1521,8 +1532,6 @@ class MiniCPMV4_0(MiniCPMVBaseModel, SupportsLoRA):
],
}

supports_encoder_tp_data = True

def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__(vllm_config=vllm_config, prefix=prefix)
assert self.version == (4, 0)
Expand All @@ -1546,9 +1555,12 @@ def init_vision_module(
prefix: str = "",
) -> nn.Module:
quant_config = self._maybe_ignore_quant_config(quant_config)
model = Idefics2VisionTransformer(config.vision_config,
quant_config=quant_config,
prefix=prefix)
model = Idefics2VisionTransformer(
config.vision_config,
quant_config=quant_config,
prefix=prefix,
use_data_parallel=self.use_data_parallel,
)
if self.config.drop_vision_last_layer:
model.encoder.layers = model.encoder.layers[:-1]
return model
Expand Down Expand Up @@ -1652,9 +1664,12 @@ def init_vision_module(
prefix: str = "",
) -> nn.Module:
quant_config = self._maybe_ignore_quant_config(quant_config)
model = Idefics2VisionTransformer(config.vision_config,
quant_config=quant_config,
prefix=prefix)
model = Idefics2VisionTransformer(
config.vision_config,
quant_config=quant_config,
prefix=prefix,
use_data_parallel=self.use_data_parallel,
)
if self.config.drop_vision_last_layer:
model.encoder.layers = model.encoder.layers[:-1]
return model
Expand Down