diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index de1493d8b4fa..5accfbd5d0c7 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -890,7 +890,6 @@ def to_diff_dict(self) -> dict[str, Any]: isinstance(getattr(self, key, None), PreTrainedConfig) and key in class_config_dict and isinstance(class_config_dict[key], dict) - or key in self.sub_configs ): # For nested configs we need to clean the diff recursively diff = recursive_diff_dict(value, default_config_dict, config_obj=getattr(self, key, None)) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index a9742d9bd704..ed167639d559 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -1187,13 +1187,13 @@ def _get_dtype( dtype = getattr(torch, dtype) config.dtype = dtype for sub_config_key in config.sub_configs: - sub_config = getattr(config, sub_config_key) - sub_config.dtype = dtype + if (sub_config := getattr(config, sub_config_key)) is not None: + sub_config.dtype = dtype elif isinstance(dtype, torch.dtype): config.dtype = dtype for sub_config_key in config.sub_configs: - sub_config = getattr(config, sub_config_key) - sub_config.dtype = dtype + if (sub_config := getattr(config, sub_config_key)) is not None: + sub_config.dtype = dtype elif isinstance(dtype, dict): for key, curr_dtype in dtype.items(): if hasattr(config, key): @@ -1218,8 +1218,8 @@ def _get_dtype( default_dtype = torch.get_default_dtype() config.dtype = default_dtype for key in config.sub_configs: - value = getattr(config, key) - value.dtype = default_dtype + if (sub_config := getattr(config, key)) is not None: + sub_config.dtype = default_dtype return config, dtype, dtype_orig @@ -2673,34 +2673,34 @@ def set_attn_implementation(self, attn_implementation: Union[str, dict]): # We need this as some old and badly designed models use subconfigs without declaring the corresponding modules as PreTrainedModel for subconfig_key in self.config.sub_configs: - subconfig = getattr(self.config, subconfig_key) - sub_implementation = ( - requested_implementation - if not isinstance(attn_implementation, dict) - else attn_implementation.get(subconfig_key, subconfig._attn_implementation) - ) - # This means we did not perform any check above for this particular subconfig -> set it in the dark if it is registered - if ( - not hasattr(subconfig, "_attn_was_changed") - # If it's already the same, then no need to enter here and raise warnings - and sub_implementation != subconfig._attn_implementation - ): - if sub_implementation not in ["eager"] + ALL_ATTENTION_FUNCTIONS.valid_keys(): - raise ValueError( - f'Specified `attn_implementation="{sub_implementation}"` is not supported for {subconfig_key}. ' - 'The only possible arguments are "eager" (manual attention implementation)' - f"or one of the following: {list(ALL_ATTENTION_FUNCTIONS.valid_keys())}" - ) - subconfig._attn_implementation_internal = sub_implementation - logger.warning( - f"We set the attention implementation for the sub-config `{subconfig_key}` to `{sub_implementation}` " - "without finding the associated sub-model. For this reason we could not check if the model supports it. " - "You may encounter undefined behavior." + if (subconfig := getattr(self.config, subconfig_key)) is not None: + sub_implementation = ( + requested_implementation + if not isinstance(attn_implementation, dict) + else attn_implementation.get(subconfig_key, subconfig._attn_implementation) ) - # Unset the attribute in this case, to avoid issues in the future - else: - if hasattr(subconfig, "_attn_was_changed"): - del subconfig._attn_was_changed + # This means we did not perform any check above for this particular subconfig -> set it in the dark if it is registered + if ( + not hasattr(subconfig, "_attn_was_changed") + # If it's already the same, then no need to enter here and raise warnings + and sub_implementation != subconfig._attn_implementation + ): + if sub_implementation not in ["eager"] + ALL_ATTENTION_FUNCTIONS.valid_keys(): + raise ValueError( + f'Specified `attn_implementation="{sub_implementation}"` is not supported for {subconfig_key}. ' + 'The only possible arguments are "eager" (manual attention implementation)' + f"or one of the following: {list(ALL_ATTENTION_FUNCTIONS.valid_keys())}" + ) + subconfig._attn_implementation_internal = sub_implementation + logger.warning( + f"We set the attention implementation for the sub-config `{subconfig_key}` to `{sub_implementation}` " + "without finding the associated sub-model. For this reason we could not check if the model supports it. " + "You may encounter undefined behavior." + ) + # Unset the attribute in this case, to avoid issues in the future + else: + if hasattr(subconfig, "_attn_was_changed"): + del subconfig._attn_was_changed def enable_input_require_grads(self): """ diff --git a/src/transformers/models/conditional_detr/configuration_conditional_detr.py b/src/transformers/models/conditional_detr/configuration_conditional_detr.py index 3b794d73195f..36693772df7c 100644 --- a/src/transformers/models/conditional_detr/configuration_conditional_detr.py +++ b/src/transformers/models/conditional_detr/configuration_conditional_detr.py @@ -23,7 +23,7 @@ from ...onnx import OnnxConfig from ...utils import logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto import CONFIG_MAPPING +from ..auto import CONFIG_MAPPING, AutoConfig logger = logging.get_logger(__name__) @@ -135,6 +135,7 @@ class ConditionalDetrConfig(PreTrainedConfig): ```""" model_type = "conditional_detr" + sub_configs = {"backbone_config": AutoConfig} keys_to_ignore_at_inference = ["past_key_values"] attribute_map = { "hidden_size": "d_model", @@ -245,22 +246,6 @@ def __init__( self.focal_alpha = focal_alpha super().__init__(is_encoder_decoder=is_encoder_decoder, **kwargs) - @property - def num_attention_heads(self) -> int: - return self.encoder_attention_heads - - @property - def hidden_size(self) -> int: - return self.d_model - - @property - def sub_configs(self): - return ( - {"backbone_config": type(self.backbone_config)} - if getattr(self, "backbone_config", None) is not None - else {} - ) - class ConditionalDetrOnnxConfig(OnnxConfig): torch_onnx_minimum_version = version.parse("1.11") diff --git a/src/transformers/models/d_fine/configuration_d_fine.py b/src/transformers/models/d_fine/configuration_d_fine.py index beb3cf6c0563..9a7464042dee 100644 --- a/src/transformers/models/d_fine/configuration_d_fine.py +++ b/src/transformers/models/d_fine/configuration_d_fine.py @@ -21,7 +21,7 @@ from ...configuration_utils import PreTrainedConfig from ...utils import logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto import CONFIG_MAPPING +from ..auto import CONFIG_MAPPING, AutoConfig logger = logging.get_logger(__name__) @@ -194,6 +194,7 @@ class DFineConfig(PreTrainedConfig): """ model_type = "d_fine" + sub_configs = {"backbone_config": AutoConfig} layer_types = ["basic", "bottleneck"] attribute_map = { "hidden_size": "d_model", @@ -396,22 +397,6 @@ def __init__( ) super().__init__(is_encoder_decoder=is_encoder_decoder, **kwargs) - @property - def num_attention_heads(self) -> int: - return self.encoder_attention_heads - - @property - def hidden_size(self) -> int: - return self.d_model - - @property - def sub_configs(self): - return ( - {"backbone_config": type(self.backbone_config)} - if getattr(self, "backbone_config", None) is not None - else {} - ) - @classmethod def from_backbone_configs(cls, backbone_config: PreTrainedConfig, **kwargs): """Instantiate a [`DFineConfig`] (or a derived class) from a pre-trained backbone model configuration and DETR model diff --git a/src/transformers/models/d_fine/modular_d_fine.py b/src/transformers/models/d_fine/modular_d_fine.py index a7d9f241e71e..93505d8deaaf 100644 --- a/src/transformers/models/d_fine/modular_d_fine.py +++ b/src/transformers/models/d_fine/modular_d_fine.py @@ -25,7 +25,7 @@ from ...image_transforms import corners_to_center_format from ...utils import is_torchdynamo_compiling, logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto import CONFIG_MAPPING +from ..auto import CONFIG_MAPPING, AutoConfig from ..rt_detr.modeling_rt_detr import ( RTDetrConvNormLayer, RTDetrDecoder, @@ -213,6 +213,7 @@ class DFineConfig(PreTrainedConfig): """ model_type = "d_fine" + sub_configs = {"backbone_config": AutoConfig} layer_types = ["basic", "bottleneck"] attribute_map = { "hidden_size": "d_model", @@ -415,22 +416,6 @@ def __init__( ) super().__init__(is_encoder_decoder=is_encoder_decoder, **kwargs) - @property - def num_attention_heads(self) -> int: - return self.encoder_attention_heads - - @property - def hidden_size(self) -> int: - return self.d_model - - @property - def sub_configs(self): - return ( - {"backbone_config": type(self.backbone_config)} - if getattr(self, "backbone_config", None) is not None - else {} - ) - @classmethod def from_backbone_configs(cls, backbone_config: PreTrainedConfig, **kwargs): """Instantiate a [`DFineConfig`] (or a derived class) from a pre-trained backbone model configuration and DETR model diff --git a/src/transformers/models/dab_detr/configuration_dab_detr.py b/src/transformers/models/dab_detr/configuration_dab_detr.py index a5116765f91e..364128485c30 100644 --- a/src/transformers/models/dab_detr/configuration_dab_detr.py +++ b/src/transformers/models/dab_detr/configuration_dab_detr.py @@ -17,7 +17,7 @@ from ...configuration_utils import PreTrainedConfig from ...utils import logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto import CONFIG_MAPPING +from ..auto import CONFIG_MAPPING, AutoConfig logger = logging.get_logger(__name__) @@ -136,6 +136,7 @@ class DabDetrConfig(PreTrainedConfig): ```""" model_type = "dab-detr" + sub_configs = {"backbone_config": AutoConfig} keys_to_ignore_at_inference = ["past_key_values"] attribute_map = { "num_attention_heads": "encoder_attention_heads", @@ -256,13 +257,5 @@ def __init__( self.initializer_bias_prior_prob = initializer_bias_prior_prob super().__init__(is_encoder_decoder=is_encoder_decoder, **kwargs) - @property - def sub_configs(self): - return ( - {"backbone_config": type(self.backbone_config)} - if getattr(self, "backbone_config", None) is not None - else {} - ) - __all__ = ["DabDetrConfig"] diff --git a/src/transformers/models/deformable_detr/configuration_deformable_detr.py b/src/transformers/models/deformable_detr/configuration_deformable_detr.py index ccd546b979da..93cee9c53969 100644 --- a/src/transformers/models/deformable_detr/configuration_deformable_detr.py +++ b/src/transformers/models/deformable_detr/configuration_deformable_detr.py @@ -17,7 +17,7 @@ from ...configuration_utils import PreTrainedConfig from ...utils import logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto import CONFIG_MAPPING +from ..auto import CONFIG_MAPPING, AutoConfig logger = logging.get_logger(__name__) @@ -144,6 +144,7 @@ class DeformableDetrConfig(PreTrainedConfig): ```""" model_type = "deformable_detr" + sub_configs = {"backbone_config": AutoConfig} attribute_map = { "hidden_size": "d_model", "num_attention_heads": "encoder_attention_heads", @@ -270,21 +271,5 @@ def __init__( self.disable_custom_kernels = disable_custom_kernels super().__init__(is_encoder_decoder=is_encoder_decoder, **kwargs) - @property - def num_attention_heads(self) -> int: - return self.encoder_attention_heads - - @property - def hidden_size(self) -> int: - return self.d_model - - @property - def sub_configs(self): - return ( - {"backbone_config": type(self.backbone_config)} - if getattr(self, "backbone_config", None) is not None - else {} - ) - __all__ = ["DeformableDetrConfig"] diff --git a/src/transformers/models/depth_anything/configuration_depth_anything.py b/src/transformers/models/depth_anything/configuration_depth_anything.py index bf8b70f03fda..9e263bb6406a 100644 --- a/src/transformers/models/depth_anything/configuration_depth_anything.py +++ b/src/transformers/models/depth_anything/configuration_depth_anything.py @@ -14,12 +14,10 @@ # limitations under the License. """DepthAnything model configuration""" -import copy - from ...configuration_utils import PreTrainedConfig from ...utils import logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto.configuration_auto import CONFIG_MAPPING +from ..auto.configuration_auto import CONFIG_MAPPING, AutoConfig logger = logging.get_logger(__name__) @@ -89,6 +87,7 @@ class DepthAnythingConfig(PreTrainedConfig): ```""" model_type = "depth_anything" + sub_configs = {"backbone_config": AutoConfig} def __init__( self, @@ -151,26 +150,5 @@ def __init__( self.depth_estimation_type = depth_estimation_type self.max_depth = max_depth if max_depth else 1 - @property - def sub_configs(self): - return ( - {"backbone_config": type(self.backbone_config)} - if getattr(self, "backbone_config", None) is not None - else {} - ) - - def to_dict(self): - """ - Serializes this instance to a Python dictionary. Override the default [`~PreTrainedConfig.to_dict`]. Returns: - `dict[str, any]`: Dictionary of all the attributes that make up this configuration instance, - """ - output = copy.deepcopy(self.__dict__) - - if output["backbone_config"] is not None: - output["backbone_config"] = self.backbone_config.to_dict() - - output["model_type"] = self.__class__.model_type - return output - __all__ = ["DepthAnythingConfig"] diff --git a/src/transformers/models/detr/configuration_detr.py b/src/transformers/models/detr/configuration_detr.py index f7c80b704cec..7c69f06318a1 100644 --- a/src/transformers/models/detr/configuration_detr.py +++ b/src/transformers/models/detr/configuration_detr.py @@ -23,7 +23,7 @@ from ...onnx import OnnxConfig from ...utils import logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto import CONFIG_MAPPING +from ..auto import CONFIG_MAPPING, AutoConfig logger = logging.get_logger(__name__) @@ -133,6 +133,7 @@ class DetrConfig(PreTrainedConfig): ```""" model_type = "detr" + sub_configs = {"backbone_config": AutoConfig} keys_to_ignore_at_inference = ["past_key_values"] attribute_map = { "hidden_size": "d_model", @@ -244,22 +245,6 @@ def __init__( self.eos_coefficient = eos_coefficient super().__init__(is_encoder_decoder=is_encoder_decoder, **kwargs) - @property - def num_attention_heads(self) -> int: - return self.encoder_attention_heads - - @property - def hidden_size(self) -> int: - return self.d_model - - @property - def sub_configs(self): - return ( - {"backbone_config": type(self.backbone_config)} - if getattr(self, "backbone_config", None) is not None - else {} - ) - @classmethod def from_backbone_config(cls, backbone_config: PreTrainedConfig, **kwargs): """Instantiate a [`DetrConfig`] (or a derived class) from a pre-trained backbone model configuration. diff --git a/src/transformers/models/dpt/configuration_dpt.py b/src/transformers/models/dpt/configuration_dpt.py index a217616a8b3b..99277ab87368 100644 --- a/src/transformers/models/dpt/configuration_dpt.py +++ b/src/transformers/models/dpt/configuration_dpt.py @@ -14,12 +14,10 @@ # limitations under the License. """DPT model configuration""" -import copy - from ...configuration_utils import PreTrainedConfig from ...utils import logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto.configuration_auto import CONFIG_MAPPING +from ..auto.configuration_auto import CONFIG_MAPPING, AutoConfig from ..bit import BitConfig @@ -140,6 +138,7 @@ class DPTConfig(PreTrainedConfig): ```""" model_type = "dpt" + sub_configs = {"backbone_config": AutoConfig} def __init__( self, @@ -275,26 +274,5 @@ def __init__( self.pooler_output_size = pooler_output_size if pooler_output_size else hidden_size self.pooler_act = pooler_act - def to_dict(self): - """ - Serializes this instance to a Python dictionary. Override the default [`~PreTrainedConfig.to_dict`]. Returns: - `dict[str, any]`: Dictionary of all the attributes that make up this configuration instance, - """ - output = copy.deepcopy(self.__dict__) - - if output["backbone_config"] is not None: - output["backbone_config"] = self.backbone_config.to_dict() - - output["model_type"] = self.__class__.model_type - return output - - @property - def sub_configs(self): - return ( - {"backbone_config": type(self.backbone_config)} - if getattr(self, "backbone_config", None) is not None - else {} - ) - __all__ = ["DPTConfig"] diff --git a/src/transformers/models/esm/configuration_esm.py b/src/transformers/models/esm/configuration_esm.py index 45e7383579f5..7a8577e80cd7 100644 --- a/src/transformers/models/esm/configuration_esm.py +++ b/src/transformers/models/esm/configuration_esm.py @@ -23,7 +23,159 @@ logger = logging.get_logger(__name__) -# TODO Update this + +@dataclass +class StructureModuleConfig: + """ + Args: + sequence_dim: + Single representation channel dimension + pairwise_dim: + Pair representation channel dimension + ipa_dim: + IPA hidden channel dimension + resnet_dim: + Angle resnet (Alg. 23 lines 11-14) hidden channel dimension + num_heads_ipa: + Number of IPA heads + num_qk_points: + Number of query/key points to generate during IPA + num_v_points: + Number of value points to generate during IPA + dropout_rate: + Dropout rate used throughout the layer + num_blocks: + Number of structure module blocks + num_transition_layers: + Number of layers in the single representation transition (Alg. 23 lines 8-9) + num_resnet_blocks: + Number of blocks in the angle resnet + num_angles: + Number of angles to generate in the angle resnet + trans_scale_factor: + Scale of single representation transition hidden dimension + epsilon: + Small number used in angle resnet normalization + inf: + Large number used for attention masking + """ + + sequence_dim: int = 384 + pairwise_dim: int = 128 + ipa_dim: int = 16 + resnet_dim: int = 128 + num_heads_ipa: int = 12 + num_qk_points: int = 4 + num_v_points: int = 8 + dropout_rate: float = 0.1 + num_blocks: int = 8 + num_transition_layers: int = 1 + num_resnet_blocks: int = 2 + num_angles: int = 7 + trans_scale_factor: int = 10 + epsilon: float = 1e-8 + inf: float = 1e5 + + def to_dict(self): + return asdict(self) + + +@dataclass +class TrunkConfig: + num_blocks: int = 48 + sequence_state_dim: int = 1024 + pairwise_state_dim: int = 128 + sequence_head_width: int = 32 + pairwise_head_width: int = 32 + position_bins: int = 32 + dropout: float = 0 + layer_drop: float = 0 + cpu_grad_checkpoint: bool = False + max_recycles: int = 4 + chunk_size: Optional[int] = 128 + structure_module: "StructureModuleConfig" = None + + def __post_init__(self): + if self.structure_module is None: + self.structure_module = StructureModuleConfig() + elif isinstance(self.structure_module, dict): + self.structure_module = StructureModuleConfig(**self.structure_module) + + if self.max_recycles <= 0: + raise ValueError(f"`max_recycles` should be positive, got {self.max_recycles}.") + if self.sequence_state_dim % self.sequence_state_dim != 0: + raise ValueError( + "`sequence_state_dim` should be a round multiple of `sequence_state_dim`, got" + f" {self.sequence_state_dim} and {self.sequence_state_dim}." + ) + if self.pairwise_state_dim % self.pairwise_state_dim != 0: + raise ValueError( + "`pairwise_state_dim` should be a round multiple of `pairwise_state_dim`, got" + f" {self.pairwise_state_dim} and {self.pairwise_state_dim}." + ) + + sequence_num_heads = self.sequence_state_dim // self.sequence_head_width + pairwise_num_heads = self.pairwise_state_dim // self.pairwise_head_width + + if self.sequence_state_dim != sequence_num_heads * self.sequence_head_width: + raise ValueError( + "`sequence_state_dim` should be equal to `sequence_num_heads * sequence_head_width, got" + f" {self.sequence_state_dim} != {sequence_num_heads} * {self.sequence_head_width}." + ) + if self.pairwise_state_dim != pairwise_num_heads * self.pairwise_head_width: + raise ValueError( + "`pairwise_state_dim` should be equal to `pairwise_num_heads * pairwise_head_width, got" + f" {self.pairwise_state_dim} != {pairwise_num_heads} * {self.pairwise_head_width}." + ) + if self.pairwise_state_dim % 2 != 0: + raise ValueError(f"`pairwise_state_dim` should be even, got {self.pairwise_state_dim}.") + + if self.dropout >= 0.4: + raise ValueError(f"`dropout` should not be greater than 0.4, got {self.dropout}.") + + def to_dict(self): + """ + Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`]. + + Returns: + `dict[str, any]`: Dictionary of all the attributes that make up this configuration instance, + """ + output = asdict(self) + output["structure_module"] = self.structure_module.to_dict() + return output + + +@dataclass +class EsmFoldConfig: + esm_type: Optional[str] = None + fp16_esm: bool = True + use_esm_attn_map: bool = False + esm_ablate_pairwise: bool = False + esm_ablate_sequence: bool = False + esm_input_dropout: float = 0 + + embed_aa: bool = True + bypass_lm: bool = False + + lddt_head_hid_dim: int = 128 + trunk: "TrunkConfig" = None + + def __post_init__(self): + if self.trunk is None: + self.trunk = TrunkConfig() + elif isinstance(self.trunk, dict): + self.trunk = TrunkConfig(**self.trunk) + + def to_dict(self): + """ + Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`]. + + Returns: + `dict[str, any]`: Dictionary of all the attributes that make up this configuration instance, + """ + output = asdict(self) + output["trunk"] = self.trunk.to_dict() + return output class EsmConfig(PreTrainedConfig): @@ -94,6 +246,7 @@ class EsmConfig(PreTrainedConfig): ```""" model_type = "esm" + sub_configs = {"esmfold_config": EsmFoldConfig} def __init__( self, @@ -153,6 +306,7 @@ def __init__( if self.esmfold_config is not None and getattr(self.esmfold_config, "use_esm_attn_map", False): raise ValueError("The HuggingFace port of ESMFold does not support use_esm_attn_map at this time!") + # TODO: update ESM to inherit from PreTrainedConfig def to_dict(self): """ Serializes this instance to a Python dictionary. Override the default [`~PreTrainedConfig.to_dict`]. @@ -166,160 +320,6 @@ def to_dict(self): return output -@dataclass -class EsmFoldConfig: - esm_type: Optional[str] = None - fp16_esm: bool = True - use_esm_attn_map: bool = False - esm_ablate_pairwise: bool = False - esm_ablate_sequence: bool = False - esm_input_dropout: float = 0 - - embed_aa: bool = True - bypass_lm: bool = False - - lddt_head_hid_dim: int = 128 - trunk: "TrunkConfig" = None - - def __post_init__(self): - if self.trunk is None: - self.trunk = TrunkConfig() - elif isinstance(self.trunk, dict): - self.trunk = TrunkConfig(**self.trunk) - - def to_dict(self): - """ - Serializes this instance to a Python dictionary. Override the default [`~PreTrainedConfig.to_dict`]. - - Returns: - `dict[str, any]`: Dictionary of all the attributes that make up this configuration instance, - """ - output = asdict(self) - output["trunk"] = self.trunk.to_dict() - return output - - -@dataclass -class TrunkConfig: - num_blocks: int = 48 - sequence_state_dim: int = 1024 - pairwise_state_dim: int = 128 - sequence_head_width: int = 32 - pairwise_head_width: int = 32 - position_bins: int = 32 - dropout: float = 0 - layer_drop: float = 0 - cpu_grad_checkpoint: bool = False - max_recycles: int = 4 - chunk_size: Optional[int] = 128 - structure_module: "StructureModuleConfig" = None - - def __post_init__(self): - if self.structure_module is None: - self.structure_module = StructureModuleConfig() - elif isinstance(self.structure_module, dict): - self.structure_module = StructureModuleConfig(**self.structure_module) - - if self.max_recycles <= 0: - raise ValueError(f"`max_recycles` should be positive, got {self.max_recycles}.") - if self.sequence_state_dim % self.sequence_state_dim != 0: - raise ValueError( - "`sequence_state_dim` should be a round multiple of `sequence_state_dim`, got" - f" {self.sequence_state_dim} and {self.sequence_state_dim}." - ) - if self.pairwise_state_dim % self.pairwise_state_dim != 0: - raise ValueError( - "`pairwise_state_dim` should be a round multiple of `pairwise_state_dim`, got" - f" {self.pairwise_state_dim} and {self.pairwise_state_dim}." - ) - - sequence_num_heads = self.sequence_state_dim // self.sequence_head_width - pairwise_num_heads = self.pairwise_state_dim // self.pairwise_head_width - - if self.sequence_state_dim != sequence_num_heads * self.sequence_head_width: - raise ValueError( - "`sequence_state_dim` should be equal to `sequence_num_heads * sequence_head_width, got" - f" {self.sequence_state_dim} != {sequence_num_heads} * {self.sequence_head_width}." - ) - if self.pairwise_state_dim != pairwise_num_heads * self.pairwise_head_width: - raise ValueError( - "`pairwise_state_dim` should be equal to `pairwise_num_heads * pairwise_head_width, got" - f" {self.pairwise_state_dim} != {pairwise_num_heads} * {self.pairwise_head_width}." - ) - if self.pairwise_state_dim % 2 != 0: - raise ValueError(f"`pairwise_state_dim` should be even, got {self.pairwise_state_dim}.") - - if self.dropout >= 0.4: - raise ValueError(f"`dropout` should not be greater than 0.4, got {self.dropout}.") - - def to_dict(self): - """ - Serializes this instance to a Python dictionary. Override the default [`~PreTrainedConfig.to_dict`]. - - Returns: - `dict[str, any]`: Dictionary of all the attributes that make up this configuration instance, - """ - output = asdict(self) - output["structure_module"] = self.structure_module.to_dict() - return output - - -@dataclass -class StructureModuleConfig: - """ - Args: - sequence_dim: - Single representation channel dimension - pairwise_dim: - Pair representation channel dimension - ipa_dim: - IPA hidden channel dimension - resnet_dim: - Angle resnet (Alg. 23 lines 11-14) hidden channel dimension - num_heads_ipa: - Number of IPA heads - num_qk_points: - Number of query/key points to generate during IPA - num_v_points: - Number of value points to generate during IPA - dropout_rate: - Dropout rate used throughout the layer - num_blocks: - Number of structure module blocks - num_transition_layers: - Number of layers in the single representation transition (Alg. 23 lines 8-9) - num_resnet_blocks: - Number of blocks in the angle resnet - num_angles: - Number of angles to generate in the angle resnet - trans_scale_factor: - Scale of single representation transition hidden dimension - epsilon: - Small number used in angle resnet normalization - inf: - Large number used for attention masking - """ - - sequence_dim: int = 384 - pairwise_dim: int = 128 - ipa_dim: int = 16 - resnet_dim: int = 128 - num_heads_ipa: int = 12 - num_qk_points: int = 4 - num_v_points: int = 8 - dropout_rate: float = 0.1 - num_blocks: int = 8 - num_transition_layers: int = 1 - num_resnet_blocks: int = 2 - num_angles: int = 7 - trans_scale_factor: int = 10 - epsilon: float = 1e-8 - inf: float = 1e5 - - def to_dict(self): - return asdict(self) - - def get_default_vocab_list(): return ( "", diff --git a/src/transformers/models/grounding_dino/configuration_grounding_dino.py b/src/transformers/models/grounding_dino/configuration_grounding_dino.py index 0944bfc015bc..5e8ed02ba972 100644 --- a/src/transformers/models/grounding_dino/configuration_grounding_dino.py +++ b/src/transformers/models/grounding_dino/configuration_grounding_dino.py @@ -17,7 +17,7 @@ from ...configuration_utils import PreTrainedConfig from ...utils import logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto import CONFIG_MAPPING +from ..auto import CONFIG_MAPPING, AutoConfig logger = logging.get_logger(__name__) @@ -146,6 +146,7 @@ class GroundingDinoConfig(PreTrainedConfig): ```""" model_type = "grounding-dino" + sub_configs = {"backbone_config": AutoConfig, "text_config": AutoConfig} attribute_map = { "hidden_size": "d_model", "num_attention_heads": "encoder_attention_heads", @@ -286,24 +287,5 @@ def __init__( self.layer_norm_eps = layer_norm_eps super().__init__(is_encoder_decoder=is_encoder_decoder, **kwargs) - @property - def num_attention_heads(self) -> int: - return self.encoder_attention_heads - - @property - def hidden_size(self) -> int: - return self.d_model - - @property - def sub_configs(self): - sub_configs = {} - backbone_config = getattr(self, "backbone_config", None) - text_config = getattr(self, "text_config", None) - if isinstance(backbone_config, PreTrainedConfig): - sub_configs["backbone_config"] = type(backbone_config) - if isinstance(text_config, PreTrainedConfig): - sub_configs["text_config"] = type(self.text_config) - return sub_configs - __all__ = ["GroundingDinoConfig"] diff --git a/src/transformers/models/mask2former/configuration_mask2former.py b/src/transformers/models/mask2former/configuration_mask2former.py index d6f735ce80c6..22f2f7034aa7 100644 --- a/src/transformers/models/mask2former/configuration_mask2former.py +++ b/src/transformers/models/mask2former/configuration_mask2former.py @@ -19,7 +19,7 @@ from ...configuration_utils import PreTrainedConfig from ...utils import logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto import CONFIG_MAPPING +from ..auto import CONFIG_MAPPING, AutoConfig logger = logging.get_logger(__name__) @@ -128,6 +128,7 @@ class Mask2FormerConfig(PreTrainedConfig): """ model_type = "mask2former" + sub_configs = {"backbone_config": AutoConfig} backbones_supported = ["swin"] attribute_map = {"hidden_size": "hidden_dim"} @@ -236,14 +237,6 @@ def __init__( super().__init__(**kwargs) - @property - def sub_configs(self): - return ( - {"backbone_config": type(self.backbone_config)} - if getattr(self, "backbone_config", None) is not None - else {} - ) - @classmethod def from_backbone_config(cls, backbone_config: PreTrainedConfig, **kwargs): """Instantiate a [`Mask2FormerConfig`] (or a derived class) from a pre-trained backbone model configuration. diff --git a/src/transformers/models/maskformer/configuration_maskformer.py b/src/transformers/models/maskformer/configuration_maskformer.py index 49d5f19ba7aa..6d16780818c1 100644 --- a/src/transformers/models/maskformer/configuration_maskformer.py +++ b/src/transformers/models/maskformer/configuration_maskformer.py @@ -19,7 +19,7 @@ from ...configuration_utils import PreTrainedConfig from ...utils import logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto import CONFIG_MAPPING +from ..auto import CONFIG_MAPPING, AutoConfig from ..detr import DetrConfig from ..swin import SwinConfig @@ -103,6 +103,7 @@ class MaskFormerConfig(PreTrainedConfig): """ model_type = "maskformer" + sub_configs = {"backbone_config": AutoConfig, "decoder_config": AutoConfig} attribute_map = {"hidden_size": "mask_feature_size"} backbones_supported = ["resnet", "swin"] decoders_supported = ["detr"] @@ -200,15 +201,6 @@ def __init__( self.backbone_kwargs = backbone_kwargs super().__init__(**kwargs) - @property - def sub_configs(self): - sub_configs = {} - if self.backbone_config is not None and self.backbone_config != {}: - sub_configs["backbone_config"] = type(self.backbone_config) - if self.decoder_config is not None and self.decoder_config != {}: - sub_configs["decoder_config"] = type(self.decoder_config) - return sub_configs - @classmethod def from_backbone_and_decoder_configs( cls, backbone_config: PreTrainedConfig, decoder_config: PreTrainedConfig, **kwargs diff --git a/src/transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py b/src/transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py index 5401ee9d59d8..8ee2e1ce3c13 100644 --- a/src/transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +++ b/src/transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py @@ -22,7 +22,7 @@ from ...configuration_utils import PreTrainedConfig from ...utils import logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto import CONFIG_MAPPING +from ..auto import CONFIG_MAPPING, AutoConfig logger = logging.get_logger(__name__) @@ -146,6 +146,7 @@ class MMGroundingDinoConfig(PreTrainedConfig): ```""" model_type = "mm-grounding-dino" + sub_configs = {"backbone_config": AutoConfig, "text_config": AutoConfig} attribute_map = { "hidden_size": "d_model", "num_attention_heads": "encoder_attention_heads", @@ -280,24 +281,5 @@ def __init__( self.init_std = init_std self.layer_norm_eps = layer_norm_eps - @property - def num_attention_heads(self) -> int: - return self.encoder_attention_heads - - @property - def hidden_size(self) -> int: - return self.d_model - - @property - def sub_configs(self): - sub_configs = {} - backbone_config = getattr(self, "backbone_config", None) - text_config = getattr(self, "text_config", None) - if isinstance(backbone_config, PreTrainedConfig): - sub_configs["backbone_config"] = type(backbone_config) - if isinstance(text_config, PreTrainedConfig): - sub_configs["text_config"] = type(self.text_config) - return sub_configs - __all__ = ["MMGroundingDinoConfig"] diff --git a/src/transformers/models/omdet_turbo/configuration_omdet_turbo.py b/src/transformers/models/omdet_turbo/configuration_omdet_turbo.py index 3c46c95cf1f1..90122c30f9de 100644 --- a/src/transformers/models/omdet_turbo/configuration_omdet_turbo.py +++ b/src/transformers/models/omdet_turbo/configuration_omdet_turbo.py @@ -17,7 +17,7 @@ from ...configuration_utils import PreTrainedConfig from ...utils import logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto import CONFIG_MAPPING +from ..auto import CONFIG_MAPPING, AutoConfig logger = logging.get_logger(__name__) @@ -145,6 +145,7 @@ class OmDetTurboConfig(PreTrainedConfig): ```""" model_type = "omdet-turbo" + sub_configs = {"backbone_config": AutoConfig, "text_config": AutoConfig} attribute_map = { "encoder_hidden_dim": "d_model", "num_attention_heads": "encoder_attention_heads", @@ -289,16 +290,5 @@ def __init__( super().__init__(is_encoder_decoder=is_encoder_decoder, **kwargs) - @property - def sub_configs(self): - sub_configs = {} - backbone_config = getattr(self, "backbone_config", None) - text_config = getattr(self, "text_config", None) - if isinstance(backbone_config, PreTrainedConfig): - sub_configs["backbone_config"] = type(backbone_config) - if isinstance(text_config, PreTrainedConfig): - sub_configs["text_config"] = type(text_config) - return sub_configs - __all__ = ["OmDetTurboConfig"] diff --git a/src/transformers/models/oneformer/configuration_oneformer.py b/src/transformers/models/oneformer/configuration_oneformer.py index b94ec0b615a6..29c8d5dabb49 100644 --- a/src/transformers/models/oneformer/configuration_oneformer.py +++ b/src/transformers/models/oneformer/configuration_oneformer.py @@ -19,7 +19,7 @@ from ...configuration_utils import PreTrainedConfig from ...utils import logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto import CONFIG_MAPPING +from ..auto import CONFIG_MAPPING, AutoConfig logger = logging.get_logger(__name__) @@ -146,6 +146,7 @@ class OneFormerConfig(PreTrainedConfig): """ model_type = "oneformer" + sub_configs = {"backbone_config": AutoConfig} attribute_map = {"hidden_size": "hidden_dim"} def __init__( @@ -273,13 +274,5 @@ def __init__( super().__init__(**kwargs) - @property - def sub_configs(self): - return ( - {"backbone_config": type(self.backbone_config)} - if getattr(self, "backbone_config", None) is not None - else {} - ) - __all__ = ["OneFormerConfig"] diff --git a/src/transformers/models/pegasus/configuration_pegasus.py b/src/transformers/models/pegasus/configuration_pegasus.py index b078bab0bf84..19f1e3c50dc0 100644 --- a/src/transformers/models/pegasus/configuration_pegasus.py +++ b/src/transformers/models/pegasus/configuration_pegasus.py @@ -152,13 +152,5 @@ def __init__( **kwargs, ) - @property - def num_attention_heads(self) -> int: - return self.encoder_attention_heads - - @property - def hidden_size(self) -> int: - return self.d_model - __all__ = ["PegasusConfig"] diff --git a/src/transformers/models/pegasus_x/configuration_pegasus_x.py b/src/transformers/models/pegasus_x/configuration_pegasus_x.py index 43d174810622..c36f86442dbe 100644 --- a/src/transformers/models/pegasus_x/configuration_pegasus_x.py +++ b/src/transformers/models/pegasus_x/configuration_pegasus_x.py @@ -165,13 +165,5 @@ def __init__( **kwargs, ) - @property - def num_attention_heads(self) -> int: - return self.encoder_attention_heads - - @property - def hidden_size(self) -> int: - return self.d_model - __all__ = ["PegasusXConfig"] diff --git a/src/transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py b/src/transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py index 5c38572e0896..22983bcccd1f 100644 --- a/src/transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +++ b/src/transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py @@ -17,12 +17,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import copy - from ...configuration_utils import PreTrainedConfig from ...utils import logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto.configuration_auto import CONFIG_MAPPING +from ..auto.configuration_auto import CONFIG_MAPPING, AutoConfig logger = logging.get_logger(__name__) @@ -92,6 +90,7 @@ class PromptDepthAnythingConfig(PreTrainedConfig): ```""" model_type = "prompt_depth_anything" + sub_configs = {"backbone_config": AutoConfig} def __init__( self, @@ -154,26 +153,5 @@ def __init__( self.depth_estimation_type = depth_estimation_type self.max_depth = max_depth if max_depth else 1 - @property - def sub_configs(self): - return ( - {"backbone_config": type(self.backbone_config)} - if getattr(self, "backbone_config", None) is not None - else {} - ) - - def to_dict(self): - """ - Serializes this instance to a Python dictionary. Override the default [`~PreTrainedConfig.to_dict`]. Returns: - `dict[str, any]`: Dictionary of all the attributes that make up this configuration instance, - """ - output = copy.deepcopy(self.__dict__) - - if output["backbone_config"] is not None: - output["backbone_config"] = self.backbone_config.to_dict() - - output["model_type"] = self.__class__.model_type - return output - __all__ = ["PromptDepthAnythingConfig"] diff --git a/src/transformers/models/rt_detr/configuration_rt_detr.py b/src/transformers/models/rt_detr/configuration_rt_detr.py index 007a21384979..f176390fd7b5 100644 --- a/src/transformers/models/rt_detr/configuration_rt_detr.py +++ b/src/transformers/models/rt_detr/configuration_rt_detr.py @@ -17,7 +17,7 @@ from ...configuration_utils import PreTrainedConfig from ...utils import logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto import CONFIG_MAPPING +from ..auto import CONFIG_MAPPING, AutoConfig from .configuration_rt_detr_resnet import RTDetrResNetConfig @@ -175,6 +175,7 @@ class RTDetrConfig(PreTrainedConfig): ```""" model_type = "rt_detr" + sub_configs = {"backbone_config": AutoConfig} layer_types = ["basic", "bottleneck"] attribute_map = { "hidden_size": "d_model", @@ -335,22 +336,6 @@ def __init__( self.eos_coefficient = eos_coefficient super().__init__(is_encoder_decoder=is_encoder_decoder, **kwargs) - @property - def num_attention_heads(self) -> int: - return self.encoder_attention_heads - - @property - def hidden_size(self) -> int: - return self.d_model - - @property - def sub_configs(self): - return ( - {"backbone_config": type(self.backbone_config)} - if getattr(self, "backbone_config", None) is not None - else {} - ) - @classmethod def from_backbone_configs(cls, backbone_config: PreTrainedConfig, **kwargs): """Instantiate a [`RTDetrConfig`] (or a derived class) from a pre-trained backbone model configuration and DETR model diff --git a/src/transformers/models/rt_detr_v2/configuration_rt_detr_v2.py b/src/transformers/models/rt_detr_v2/configuration_rt_detr_v2.py index 25a411005749..a711f6a4e6fe 100644 --- a/src/transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +++ b/src/transformers/models/rt_detr_v2/configuration_rt_detr_v2.py @@ -22,7 +22,7 @@ from ...configuration_utils import PreTrainedConfig from ...utils import logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto import CONFIG_MAPPING +from ..auto import CONFIG_MAPPING, AutoConfig logger = logging.get_logger(__name__) @@ -185,6 +185,7 @@ class RTDetrV2Config(PreTrainedConfig): """ model_type = "rt_detr_v2" + sub_configs = {"backbone_config": AutoConfig} layer_types = ["basic", "bottleneck"] attribute_map = { "hidden_size": "d_model", @@ -358,14 +359,6 @@ def __init__( self.decoder_offset_scale = decoder_offset_scale self.decoder_method = decoder_method - @property - def sub_configs(self): - return ( - {"backbone_config": type(self.backbone_config)} - if getattr(self, "backbone_config", None) is not None - else {} - ) - @classmethod def from_backbone_configs(cls, backbone_config: PreTrainedConfig, **kwargs): """Instantiate a [`RTDetrV2Config`] (or a derived class) from a pre-trained backbone model configuration and DETR model diff --git a/src/transformers/models/rt_detr_v2/modular_rt_detr_v2.py b/src/transformers/models/rt_detr_v2/modular_rt_detr_v2.py index 447320e38a51..b96b8b494d64 100644 --- a/src/transformers/models/rt_detr_v2/modular_rt_detr_v2.py +++ b/src/transformers/models/rt_detr_v2/modular_rt_detr_v2.py @@ -25,7 +25,7 @@ from ...utils.backbone_utils import ( verify_backbone_config_arguments, ) -from ..auto import CONFIG_MAPPING +from ..auto import CONFIG_MAPPING, AutoConfig from ..rt_detr.modeling_rt_detr import ( RTDetrDecoder, RTDetrDecoderLayer, @@ -196,6 +196,7 @@ class RTDetrV2Config(PreTrainedConfig): """ model_type = "rt_detr_v2" + sub_configs = {"backbone_config": AutoConfig} layer_types = ["basic", "bottleneck"] attribute_map = { "hidden_size": "d_model", @@ -369,14 +370,6 @@ def __init__( self.decoder_offset_scale = decoder_offset_scale self.decoder_method = decoder_method - @property - def sub_configs(self): - return ( - {"backbone_config": type(self.backbone_config)} - if getattr(self, "backbone_config", None) is not None - else {} - ) - @classmethod def from_backbone_configs(cls, backbone_config: PreTrainedConfig, **kwargs): """Instantiate a [`RTDetrV2Config`] (or a derived class) from a pre-trained backbone model configuration and DETR model diff --git a/src/transformers/models/superglue/configuration_superglue.py b/src/transformers/models/superglue/configuration_superglue.py index c8a13d10e7be..448229b3ae00 100644 --- a/src/transformers/models/superglue/configuration_superglue.py +++ b/src/transformers/models/superglue/configuration_superglue.py @@ -15,7 +15,7 @@ from ...configuration_utils import PreTrainedConfig from ...utils import logging -from ..auto import CONFIG_MAPPING +from ..auto import CONFIG_MAPPING, AutoConfig if TYPE_CHECKING: @@ -68,6 +68,7 @@ class SuperGlueConfig(PreTrainedConfig): """ model_type = "superglue" + sub_configs = {"keypoint_detector_config": AutoConfig} def __init__( self, @@ -114,9 +115,5 @@ def __init__( super().__init__(**kwargs) - @property - def sub_configs(self): - return {"keypoint_detector_config": type(self.keypoint_detector_config)} - __all__ = ["SuperGlueConfig"] diff --git a/src/transformers/models/table_transformer/configuration_table_transformer.py b/src/transformers/models/table_transformer/configuration_table_transformer.py index 28cb7bbcf7cd..8f8963c54a57 100644 --- a/src/transformers/models/table_transformer/configuration_table_transformer.py +++ b/src/transformers/models/table_transformer/configuration_table_transformer.py @@ -23,7 +23,7 @@ from ...onnx import OnnxConfig from ...utils import logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto import CONFIG_MAPPING +from ..auto import CONFIG_MAPPING, AutoConfig logger = logging.get_logger(__name__) @@ -133,6 +133,7 @@ class TableTransformerConfig(PreTrainedConfig): ```""" model_type = "table-transformer" + sub_configs = {"backbone_config": AutoConfig} keys_to_ignore_at_inference = ["past_key_values"] attribute_map = { "hidden_size": "d_model", @@ -245,22 +246,6 @@ def __init__( self.eos_coefficient = eos_coefficient super().__init__(is_encoder_decoder=is_encoder_decoder, **kwargs) - @property - def num_attention_heads(self) -> int: - return self.encoder_attention_heads - - @property - def hidden_size(self) -> int: - return self.d_model - - @property - def sub_configs(self): - return ( - {"backbone_config": type(self.backbone_config)} - if getattr(self, "backbone_config", None) is not None - else {} - ) - # Copied from transformers.models.detr.configuration_detr.DetrOnnxConfig class TableTransformerOnnxConfig(OnnxConfig): diff --git a/src/transformers/models/tvp/configuration_tvp.py b/src/transformers/models/tvp/configuration_tvp.py index eb719e042f38..7d4081b59c8b 100644 --- a/src/transformers/models/tvp/configuration_tvp.py +++ b/src/transformers/models/tvp/configuration_tvp.py @@ -14,12 +14,10 @@ # limitations under the License. """TVP model configuration""" -import copy - from ...configuration_utils import PreTrainedConfig from ...utils import logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto import CONFIG_MAPPING +from ..auto import CONFIG_MAPPING, AutoConfig logger = logging.get_logger(__name__) @@ -99,6 +97,7 @@ class TvpConfig(PreTrainedConfig): """ model_type = "tvp" + sub_configs = {"backbone_config": AutoConfig} def __init__( self, @@ -172,14 +171,6 @@ def __init__( self.initializer_range = initializer_range self.attention_probs_dropout_prob = attention_probs_dropout_prob - @property - def sub_configs(self): - return ( - {"backbone_config": type(self.backbone_config)} - if getattr(self, "backbone_config", None) is not None - else {} - ) - @classmethod def from_backbone_config(cls, backbone_config: PreTrainedConfig, **kwargs): """Instantiate a [`TvpConfig`] (or a derived class) from a pre-trained backbone model configuration. @@ -192,18 +183,5 @@ def from_backbone_config(cls, backbone_config: PreTrainedConfig, **kwargs): """ return cls(backbone_config=backbone_config, **kwargs) - def to_dict(self): - """ - Serializes this instance to a Python dictionary. Override the default [`~PreTrainedConfig.to_dict`]. - - Returns: - `dict[str, any]`: Dictionary of all the attributes that make up this configuration instance, - """ - output = copy.deepcopy(self.__dict__) - if output["backbone_config"] is not None: - output["backbone_config"] = self.backbone_config.to_dict() - output["model_type"] = self.__class__.model_type - return output - __all__ = ["TvpConfig"] diff --git a/src/transformers/models/upernet/configuration_upernet.py b/src/transformers/models/upernet/configuration_upernet.py index ee43928d7ee3..27b1c38bc52f 100644 --- a/src/transformers/models/upernet/configuration_upernet.py +++ b/src/transformers/models/upernet/configuration_upernet.py @@ -17,7 +17,7 @@ from ...configuration_utils import PreTrainedConfig from ...utils import logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto.configuration_auto import CONFIG_MAPPING +from ..auto.configuration_auto import CONFIG_MAPPING, AutoConfig logger = logging.get_logger(__name__) @@ -83,6 +83,7 @@ class UperNetConfig(PreTrainedConfig): ```""" model_type = "upernet" + sub_configs = {"backbone_config": AutoConfig} def __init__( self, @@ -136,13 +137,5 @@ def __init__( self.auxiliary_concat_input = auxiliary_concat_input self.loss_ignore_index = loss_ignore_index - @property - def sub_configs(self): - return ( - {"backbone_config": type(self.backbone_config)} - if getattr(self, "backbone_config", None) is not None - else {} - ) - __all__ = ["UperNetConfig"] diff --git a/src/transformers/models/vitmatte/configuration_vitmatte.py b/src/transformers/models/vitmatte/configuration_vitmatte.py index 922c5136f6a5..1fdeb03af759 100644 --- a/src/transformers/models/vitmatte/configuration_vitmatte.py +++ b/src/transformers/models/vitmatte/configuration_vitmatte.py @@ -14,13 +14,12 @@ # limitations under the License. """VitMatte model configuration""" -import copy from typing import Optional from ...configuration_utils import PreTrainedConfig from ...utils import logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto.configuration_auto import CONFIG_MAPPING +from ..auto.configuration_auto import CONFIG_MAPPING, AutoConfig logger = logging.get_logger(__name__) @@ -78,6 +77,7 @@ class VitMatteConfig(PreTrainedConfig): ```""" model_type = "vitmatte" + sub_configs = {"backbone_config": AutoConfig} def __init__( self, @@ -122,23 +122,5 @@ def __init__( self.convstream_hidden_sizes = convstream_hidden_sizes self.fusion_hidden_sizes = fusion_hidden_sizes - @property - def sub_configs(self): - return ( - {"backbone_config": type(self.backbone_config)} - if getattr(self, "backbone_config", None) is not None - else {} - ) - - def to_dict(self): - """ - Serializes this instance to a Python dictionary. Override the default [`~PreTrainedConfig.to_dict`]. Returns: - `dict[str, any]`: Dictionary of all the attributes that make up this configuration instance, - """ - output = copy.deepcopy(self.__dict__) - output["backbone_config"] = self.backbone_config.to_dict() - output["model_type"] = self.__class__.model_type - return output - __all__ = ["VitMatteConfig"] diff --git a/src/transformers/models/vitpose/configuration_vitpose.py b/src/transformers/models/vitpose/configuration_vitpose.py index de8c073f125b..e9ae2813f9d8 100644 --- a/src/transformers/models/vitpose/configuration_vitpose.py +++ b/src/transformers/models/vitpose/configuration_vitpose.py @@ -19,7 +19,7 @@ from ...configuration_utils import PreTrainedConfig from ...utils import logging from ...utils.backbone_utils import verify_backbone_config_arguments -from ..auto.configuration_auto import CONFIG_MAPPING +from ..auto.configuration_auto import CONFIG_MAPPING, AutoConfig logger = logging.get_logger(__name__) @@ -74,6 +74,7 @@ class VitPoseConfig(PreTrainedConfig): ```""" model_type = "vitpose" + sub_configs = {"backbone_config": AutoConfig} def __init__( self, @@ -122,13 +123,5 @@ def __init__( self.scale_factor = scale_factor self.use_simple_decoder = use_simple_decoder - @property - def sub_configs(self): - return ( - {"backbone_config": type(self.backbone_config)} - if getattr(self, "backbone_config", None) is not None - else {} - ) - __all__ = ["VitPoseConfig"] diff --git a/src/transformers/models/zoedepth/configuration_zoedepth.py b/src/transformers/models/zoedepth/configuration_zoedepth.py index bdcfcdde3da6..9f82523d5caa 100644 --- a/src/transformers/models/zoedepth/configuration_zoedepth.py +++ b/src/transformers/models/zoedepth/configuration_zoedepth.py @@ -16,7 +16,7 @@ from ...configuration_utils import PreTrainedConfig from ...utils import logging -from ..auto.configuration_auto import CONFIG_MAPPING +from ..auto.configuration_auto import CONFIG_MAPPING, AutoConfig logger = logging.get_logger(__name__) @@ -133,6 +133,7 @@ class ZoeDepthConfig(PreTrainedConfig): ```""" model_type = "zoedepth" + sub_configs = {"backbone_config": AutoConfig} def __init__( self, @@ -233,13 +234,5 @@ def __init__( self.patch_transformer_intermediate_size = patch_transformer_intermediate_size self.patch_transformer_num_attention_heads = patch_transformer_num_attention_heads - @property - def sub_configs(self): - return ( - {"backbone_config": type(self.backbone_config)} - if getattr(self, "backbone_config", None) is not None - else {} - ) - __all__ = ["ZOEDEPTH_PRETRAINED_CONFIG_ARCHIVE_MAP", "ZoeDepthConfig"] diff --git a/tests/test_configuration_common.py b/tests/test_configuration_common.py index 4bf85697c4cc..90c00f6f8c63 100644 --- a/tests/test_configuration_common.py +++ b/tests/test_configuration_common.py @@ -131,28 +131,29 @@ def create_and_test_config_from_and_save_pretrained_composite(self): # Iterate over all sub_configs if there are any and load them with their own classes sub_configs = general_config_loaded.sub_configs for sub_config_key, sub_class in sub_configs.items(): - if sub_class.__name__ == "AutoConfig": - sub_class = sub_class.for_model(**general_config_dict[sub_config_key]).__class__ - sub_config_loaded = sub_class.from_pretrained(tmpdirname) - else: - sub_config_loaded = sub_class.from_pretrained(tmpdirname) - - # Pop `transformers_version`, it never exists when a config is part of a general composite config - # Verify that loading with subconfig class results in same dict as if we loaded with general composite config class - sub_config_loaded_dict = sub_config_loaded.to_dict() - sub_config_loaded_dict.pop("transformers_version", None) - general_config_dict[sub_config_key].pop("transformers_version", None) - self.parent.assertEqual(sub_config_loaded_dict, general_config_dict[sub_config_key]) - - # Verify that the loaded config type is same as in the general config - type_from_general_config = type(getattr(general_config_loaded, sub_config_key)) - self.parent.assertTrue(isinstance(sub_config_loaded, type_from_general_config)) - - # Now save only the sub-config and load it back to make sure the whole load-save-load pipeline works - with tempfile.TemporaryDirectory() as tmpdirname2: - sub_config_loaded.save_pretrained(tmpdirname2) - sub_config_loaded_2 = sub_class.from_pretrained(tmpdirname2) - self.parent.assertEqual(sub_config_loaded.to_dict(), sub_config_loaded_2.to_dict()) + if general_config_dict[sub_config_key] is not None: + if sub_class.__name__ == "AutoConfig": + sub_class = sub_class.for_model(**general_config_dict[sub_config_key]).__class__ + sub_config_loaded = sub_class.from_pretrained(tmpdirname) + else: + sub_config_loaded = sub_class.from_pretrained(tmpdirname) + + # Pop `transformers_version`, it never exists when a config is part of a general composite config + # Verify that loading with subconfig class results in same dict as if we loaded with general composite config class + sub_config_loaded_dict = sub_config_loaded.to_dict() + sub_config_loaded_dict.pop("transformers_version", None) + general_config_dict[sub_config_key].pop("transformers_version", None) + self.parent.assertEqual(sub_config_loaded_dict, general_config_dict[sub_config_key]) + + # Verify that the loaded config type is same as in the general config + type_from_general_config = type(getattr(general_config_loaded, sub_config_key)) + self.parent.assertTrue(isinstance(sub_config_loaded, type_from_general_config)) + + # Now save only the sub-config and load it back to make sure the whole load-save-load pipeline works + with tempfile.TemporaryDirectory() as tmpdirname2: + sub_config_loaded.save_pretrained(tmpdirname2) + sub_config_loaded_2 = sub_class.from_pretrained(tmpdirname2) + self.parent.assertEqual(sub_config_loaded.to_dict(), sub_config_loaded_2.to_dict()) def create_and_test_config_from_pretrained_custom_kwargs(self): """ diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index e247d1ccc8f4..085b2473a878 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -1262,7 +1262,8 @@ def test_attention_outputs(self): del inputs_dict["output_attentions"] config.output_attentions = True for k in config.sub_configs: - getattr(config, k).output_attentions = True + if getattr(config, k) is not None: + getattr(config, k).output_attentions = True model = model_class(config) model.to(torch_device) @@ -1896,20 +1897,23 @@ def check_hidden_states_output(inputs_dict, config, model_class): del inputs_dict["output_hidden_states"] config.output_hidden_states = True for k in config.sub_configs: - getattr(config, k).output_hidden_states = True + if getattr(config, k) is not None: + getattr(config, k).output_hidden_states = True check_hidden_states_output(inputs_dict, config, model_class) def test_retain_grad_hidden_states_attentions(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() for k in config.sub_configs: - getattr(config, k).output_hidden_states = True + if getattr(config, k) is not None: + getattr(config, k).output_hidden_states = True config.output_hidden_states = True config.output_attentions = self.has_attentions for k in config.sub_configs: - getattr(config, k).output_attentions = self.has_attentions + if getattr(config, k) is not None: + getattr(config, k).output_attentions = self.has_attentions # force eager attention to support output attentions if self.has_attentions: @@ -3348,13 +3352,15 @@ def test_attn_implementation_composite_models(self): # we just need to test if passing 'attn_implementation' as a dict fails or not attn_implementation_per_subconfig = {"": "eager"} for key in config.sub_configs: - attn_implementation_per_subconfig[key] = "eager" + if getattr(config, key) is not None: + attn_implementation_per_subconfig[key] = "eager" config._attn_implementation = attn_implementation_per_subconfig model = model_class(config) for key in config.sub_configs: - sub_config = getattr(model.config, key) - self.assertTrue(sub_config._attn_implementation == "eager") + if getattr(config, key) is not None: + sub_config = getattr(model.config, key) + self.assertTrue(sub_config._attn_implementation == "eager") for name, submodule in model.named_modules(): class_name = submodule.__class__.__name__ @@ -4094,8 +4100,9 @@ def update_config_headdim(config, requested_dim): # Update config values update_config_headdim(config, requested_dim) for key in config.sub_configs: - sub_config = getattr(config, key) - update_config_headdim(sub_config, requested_dim) + if getattr(config, key) is not None: + sub_config = getattr(config, key) + update_config_headdim(sub_config, requested_dim) return config @@ -4279,7 +4286,10 @@ def test_internal_model_config_and_subconfig_are_same(self): for subconfig_key in subconfig_keys: # Get the subconfig from the model config subconfig_from_model_config = getattr(model.config, subconfig_key) - if subconfig_from_model_config.__class__ == subconfig_from_model_internal.__class__: + if ( + subconfig_from_model_config is not None + and subconfig_from_model_config.__class__ == subconfig_from_model_internal.__class__ + ): # Since some composite models have different submodels parameterized by 2 of the same config # class instances, we need to check against a list of matching classes, and check that at least # 1 is the exact object (instead of checking immediately for similar object) @@ -4310,7 +4320,8 @@ def test_can_set_attention_dynamically(self): # sanity check to make sure everything is correctly eager self.assertTrue(model.config._attn_implementation == "eager") for subconfig_key in model.config.sub_configs: - self.assertTrue(getattr(model.config, subconfig_key)._attn_implementation == "eager") + if getattr(config, subconfig_key) is not None: + self.assertTrue(getattr(model.config, subconfig_key)._attn_implementation == "eager") if not all( submodule._can_set_attn_implementation() @@ -4330,7 +4341,8 @@ def test_can_set_attention_dynamically(self): # Check everything was correctly changed self.assertTrue(model.config._attn_implementation == "sdpa") for subconfig_key in model.config.sub_configs: - self.assertTrue(getattr(model.config, subconfig_key)._attn_implementation == "sdpa") + if getattr(config, subconfig_key) is not None: + self.assertTrue(getattr(model.config, subconfig_key)._attn_implementation == "sdpa") # Check we cannot set it to random values, and it raises an error with self.assertRaisesRegex(ValueError, 'Specified `attn_implementation="foo"` is not supported'): @@ -4339,7 +4351,8 @@ def test_can_set_attention_dynamically(self): # Should still be sdpa everywhere self.assertTrue(model.config._attn_implementation == "sdpa") for subconfig_key in model.config.sub_configs: - self.assertTrue(getattr(model.config, subconfig_key)._attn_implementation == "sdpa") + if getattr(config, subconfig_key) is not None: + self.assertTrue(getattr(model.config, subconfig_key)._attn_implementation == "sdpa") def test_can_set_attention_dynamically_composite_model(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() @@ -4358,7 +4371,8 @@ def test_can_set_attention_dynamically_composite_model(self): # sanity check to make sure everything is correctly eager self.assertTrue(model.config._attn_implementation == "eager") for subconfig_key in model.config.sub_configs: - self.assertTrue(getattr(model.config, subconfig_key)._attn_implementation == "eager") + if getattr(config, subconfig_key) is not None: + self.assertTrue(getattr(model.config, subconfig_key)._attn_implementation == "eager") if not all( submodule._can_set_attn_implementation() @@ -4373,7 +4387,8 @@ def test_can_set_attention_dynamically_composite_model(self): # Check only top-most was correctly changed self.assertTrue(model.config._attn_implementation == "sdpa") for subconfig_key in model.config.sub_configs: - self.assertTrue(getattr(model.config, subconfig_key)._attn_implementation == "eager") + if getattr(config, subconfig_key) is not None: + self.assertTrue(getattr(model.config, subconfig_key)._attn_implementation == "eager") @require_torch def test_bc_torch_dtype(self):