From f2684a17130d55a326563ed00708c82990731b10 Mon Sep 17 00:00:00 2001 From: Vensenmu Date: Sat, 14 Jun 2025 12:15:53 +0800 Subject: [PATCH 1/3] Fix(siglip): Add remapping for quantized Gemma3 vision weights Signed-off-by: Vensenmu --- vllm/model_executor/models/siglip.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/vllm/model_executor/models/siglip.py b/vllm/model_executor/models/siglip.py index 3630f59f53e0..28835fee50fe 100644 --- a/vllm/model_executor/models/siglip.py +++ b/vllm/model_executor/models/siglip.py @@ -516,6 +516,18 @@ def load_weights(self, weights: Iterable[tuple[str, weight_loader(param, loaded_weight, shard_id) break else: + if name not in params_dict: + potential_name = f"vision_model.{name}" + if potential_name in params_dict: + # The following print statement is split into multiple + # lines to pass the E501 line-length check. + print(f"INFO: Remapping weight '{name}' to " + f"'{potential_name}'") + name = potential_name + else: + print(f"WARNING: Skipping weight '{name}', not found.") + loaded_params.add(name) + continue param = params_dict[name] weight_loader = getattr(param, "weight_loader", default_weight_loader) From 15cc68ef49aaa52691cea243326092703d762420 Mon Sep 17 00:00:00 2001 From: Vensenmu Date: Sat, 14 Jun 2025 14:41:10 +0800 Subject: [PATCH 2/3] Fix(siglip): Add remapping for quantized Gemma3 vision weights Signed-off-by: Vensenmu --- vllm/model_executor/models/siglip.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/vllm/model_executor/models/siglip.py b/vllm/model_executor/models/siglip.py index 28835fee50fe..b7a3b67e7a35 100644 --- a/vllm/model_executor/models/siglip.py +++ b/vllm/model_executor/models/siglip.py @@ -516,16 +516,14 @@ def load_weights(self, weights: Iterable[tuple[str, weight_loader(param, loaded_weight, shard_id) break else: + # Hotfix for quantized models to handle a weight name mismatch. + # The vLLM Siglip model expects a double `vision_model.` prefix. + # This remaps the standard artifact name to the expected name. if name not in params_dict: potential_name = f"vision_model.{name}" if potential_name in params_dict: - # The following print statement is split into multiple - # lines to pass the E501 line-length check. - print(f"INFO: Remapping weight '{name}' to " - f"'{potential_name}'") name = potential_name else: - print(f"WARNING: Skipping weight '{name}', not found.") loaded_params.add(name) continue param = params_dict[name] From e74d6816b184d7e9663272ea3d129440d4ec1cae Mon Sep 17 00:00:00 2001 From: Vensenmu Date: Thu, 19 Jun 2025 20:02:03 +0800 Subject: [PATCH 3/3] xxx Signed-off-by: Vensenmu --- vllm/model_executor/models/gemma3_mm.py | 1 + vllm/model_executor/models/siglip.py | 10 ---------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/vllm/model_executor/models/gemma3_mm.py b/vllm/model_executor/models/gemma3_mm.py index 3a1c14978b45..619d2aa67491 100644 --- a/vllm/model_executor/models/gemma3_mm.py +++ b/vllm/model_executor/models/gemma3_mm.py @@ -479,6 +479,7 @@ class Gemma3ForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP, "model.vision_tower.": "vision_tower.", "model.multi_modal_projector.": "multi_modal_projector.", "lm_head.": "language_model.lm_head.", + "vision_tower.vision_model.": "vision_model.", }) def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): diff --git a/vllm/model_executor/models/siglip.py b/vllm/model_executor/models/siglip.py index b7a3b67e7a35..3630f59f53e0 100644 --- a/vllm/model_executor/models/siglip.py +++ b/vllm/model_executor/models/siglip.py @@ -516,16 +516,6 @@ def load_weights(self, weights: Iterable[tuple[str, weight_loader(param, loaded_weight, shard_id) break else: - # Hotfix for quantized models to handle a weight name mismatch. - # The vLLM Siglip model expects a double `vision_model.` prefix. - # This remaps the standard artifact name to the expected name. - if name not in params_dict: - potential_name = f"vision_model.{name}" - if potential_name in params_dict: - name = potential_name - else: - loaded_params.add(name) - continue param = params_dict[name] weight_loader = getattr(param, "weight_loader", default_weight_loader)