Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/transformers/models/auto/configuration_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@
("deepseek_v2", "DeepseekV2Config"),
("deepseek_v3", "DeepseekV3Config"),
("deepseek_vl", "DeepseekVLConfig"),
("deepseek_vl_v2", "DeepseekVLV2Config"),
("deepseek_vl_hybrid", "DeepseekVLHybridConfig"),
("deformable_detr", "DeformableDetrConfig"),
("deit", "DeiTConfig"),
Expand Down Expand Up @@ -542,6 +543,7 @@
("deepseek_v2", "DeepSeek-V2"),
("deepseek_v3", "DeepSeek-V3"),
("deepseek_vl", "DeepseekVL"),
("deepseek_vl_v2", "DeepseekVLV2"),
("deepseek_vl_hybrid", "DeepseekVLHybrid"),
("deformable_detr", "Deformable DETR"),
("deit", "DeiT"),
Expand Down
2 changes: 2 additions & 0 deletions src/transformers/models/auto/modeling_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ class _BaseModelWithGenerate(PreTrainedModel, GenerationMixin):
("deepseek_v2", "DeepseekV2Model"),
("deepseek_v3", "DeepseekV3Model"),
("deepseek_vl", "DeepseekVLModel"),
("deepseek_vl_v2", "DeepseekVLV2Model"),
("deepseek_vl_hybrid", "DeepseekVLHybridModel"),
("deformable_detr", "DeformableDetrModel"),
("deit", "DeiTModel"),
Expand Down Expand Up @@ -651,6 +652,7 @@ class _BaseModelWithGenerate(PreTrainedModel, GenerationMixin):
("dbrx", "DbrxForCausalLM"),
("deepseek_v2", "DeepseekV2ForCausalLM"),
("deepseek_v3", "DeepseekV3ForCausalLM"),
("deepseek_vl_v2", "DeepseekVLV2ForCausalLM"),
("diffllama", "DiffLlamaForCausalLM"),
("doge", "DogeForCausalLM"),
("dots1", "Dots1ForCausalLM"),
Expand Down
1 change: 1 addition & 0 deletions src/transformers/models/auto/processing_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
("colpali", "ColPaliProcessor"),
("colqwen2", "ColQwen2Processor"),
("deepseek_vl", "DeepseekVLProcessor"),
("deepseek_vl_v2", "DeepseekVLV2Processor"),
("deepseek_vl_hybrid", "DeepseekVLHybridProcessor"),
("dia", "DiaProcessor"),
("edgetam", "Sam2Processor"),
Expand Down
13 changes: 13 additions & 0 deletions src/transformers/models/deepseek_v2/configuration_deepseek_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,12 +209,25 @@ def __init__(
self.rope_scaling["rope_type"] = self.rope_scaling["type"]
rope_config_validation(self)
self.first_k_dense_replace = first_k_dense_replace

if kv_lora_rank is None:
kv_lora_rank = 512
self.kv_lora_rank = kv_lora_rank

if q_lora_rank is None:
q_lora_rank = 1536
self.q_lora_rank = q_lora_rank

self.n_group = n_group
self.n_routed_experts = n_routed_experts
self.n_shared_experts = n_shared_experts

if qk_nope_head_dim == 0:
qk_nope_head_dim = 128
self.qk_nope_head_dim = qk_nope_head_dim

if qk_rope_head_dim == 0:
qk_rope_head_dim = 64
self.qk_rope_head_dim = qk_rope_head_dim
self.routed_scaling_factor = routed_scaling_factor
self.topk_group = topk_group
Expand Down
40 changes: 40 additions & 0 deletions src/transformers/models/deepseek_vl_v2/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from typing import TYPE_CHECKING

from ...utils import (
_LazyModule,
)


if TYPE_CHECKING:
from .configuration_deepseek_vl_v2 import (
DeepseekVLV2Config,
MlpProjectorConfig,
)
from .modeling_deepseek_vl_v2 import (
DeepseekVLV2ForCausalLM,
DeepseekVLV2Model,
DeepseekVLV2PreTrainedModel,
)
from .processing_deepseek_vl_v2 import DeepseekVLV2Processor


else:
import sys

sys.modules[__name__] = _LazyModule(
__name__,
globals()["__file__"],
{
"configuration_deepseek_vl_v2": [
"DeepseekVLV2Config",
"MlpProjectorConfig",
],
"modeling_deepseek_vl_v2": [
"DeepseekVLV2ForCausalLM",
"DeepseekVLV2Model",
"DeepseekVLV2PreTrainedModel",
],
"processing_deepseek_vl_v2": ["DeepseekVLV2Processor"],
},
module_spec=__spec__,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
# This file was automatically generated from src/transformers/models/deepseek_vl_v2/modular_deepseek_vl_v2.py.
# Do NOT edit this file manually as any edits will be overwritten by the generation of
# the file from the modular. If any change should be done, please apply the change to the
# modular_deepseek_vl_v2.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨

from ...configuration_utils import PretrainedConfig
from ...utils import logging
from ..auto import CONFIG_MAPPING, AutoConfig
from ..deepseek_v2.configuration_deepseek_v2 import DeepseekV2Config


logger = logging.get_logger(__name__)


class MlpProjectorConfig(PretrainedConfig):
model_type = "mlp_projector"
projector_type: str = "downsample_mlp_gelu"
input_dim: int = 1152
n_embed: int = 2048
depth: int = 2
mlp_ratio: int = 1
downsample_ratio: int = 2
token_pooling: bool = False

def __init__(
self,
projector_type: str = "downsample_mlp_gelu",
input_dim: int = 1152,
n_embed: int = 2048,
depth: int = 2,
mlp_ratio: int = 1,
downsample_ratio: int = 2,
**kwargs,
):
self.projector_type = projector_type
self.input_dim = input_dim
self.n_embed = n_embed
self.depth = depth
self.mlp_ratio = mlp_ratio
self.downsample_ratio = downsample_ratio

super().__init__(**kwargs)


class DeepseekVLV2Config(PretrainedConfig):
model_type = "deepseek_vl_v2"
sub_configs = {
"language_config": DeepseekV2Config,
"vision_config": AutoConfig,
"projector_config": MlpProjectorConfig,
}

tile_tag: str = "2D"
global_view_pos: str = "head"
candidate_resolutions: tuple[tuple[int, int]] = ((384, 384),)

def __init__(
self,
tile_tag: str = "tile_tag",
global_view_pos: str = "head",
candidate_resolutions: tuple[tuple[int, int]] = ((384, 384),),
n_embed: int = 512,
language_config: dict = None,
vision_config: dict = None,
projector_config: dict = None,
**kwargs,
):
super().__init__(**kwargs)
self.tile_tag = tile_tag
self.global_view_pos = global_view_pos
self.candidate_resolutions = candidate_resolutions
self.n_embed = n_embed

if language_config is None:
language_config = {}
logger.info("`language_config` is `None`. Initializing the `DeepseekV2Config` with default values.")

if vision_config is None:
vision_config = {}
logger.info("`vision_config` is `None`. Initializing the `SiglipVisionConfig` with default values.")

if isinstance(language_config, dict):
language_config["model_type"] = language_config.get("model_type", "deepseek_v2")
language_config = CONFIG_MAPPING[language_config["model_type"]](**language_config)

if isinstance(vision_config, dict):
vision_config["model_type"] = "siglip_vision_model"
vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config)

if isinstance(projector_config, dict):
projector_config["model_type"] = "mlp_projector"
projector_config = MlpProjectorConfig(**projector_config)

self.language_config = language_config
self.vision_config = vision_config
self.projector_config = projector_config
Loading