Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/transformers/models/resnet/configuration_resnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ class ResNetConfig(BackboneConfigMixin, PretrainedConfig):
are supported.
downsample_in_first_stage (`bool`, *optional*, defaults to `False`):
If `True`, the first stage will downsample the inputs using a `stride` of 2.
downsample_in_bottleneck (`bool`, *optional*, defaults to `False`):
If `True`, the first conv 1x1 in ResNetBottleNeckLayer will downsample the inputs using a `stride` of 2.
out_features (`List[str]`, *optional*):
If used as backbone, list of features to output. Can be any of `"stem"`, `"stage1"`, `"stage2"`, etc.
(depending on how many stages the model has). If unset and `out_indices` is set, will default to the
Expand Down Expand Up @@ -94,6 +96,7 @@ def __init__(
layer_type="bottleneck",
hidden_act="relu",
downsample_in_first_stage=False,
downsample_in_bottleneck=False,
out_features=None,
out_indices=None,
**kwargs,
Expand All @@ -108,6 +111,7 @@ def __init__(
self.layer_type = layer_type
self.hidden_act = hidden_act
self.downsample_in_first_stage = downsample_in_first_stage
self.downsample_in_bottleneck = downsample_in_bottleneck
self.stage_names = ["stem"] + [f"stage{idx}" for idx in range(1, len(depths) + 1)]
self._out_features, self._out_indices = get_aligned_output_features_output_indices(
out_features=out_features, out_indices=out_indices, stage_names=self.stage_names
Expand Down
31 changes: 24 additions & 7 deletions src/transformers/models/resnet/modeling_resnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,11 +149,18 @@ class ResNetBottleNeckLayer(nn.Module):
A classic ResNet's bottleneck layer composed by three `3x3` convolutions.

The first `1x1` convolution reduces the input by a factor of `reduction` in order to make the second `3x3`
convolution faster. The last `1x1` convolution remaps the reduced features to `out_channels`.
convolution faster. The last `1x1` convolution remaps the reduced features to `out_channels`. If
`downsample_in_bottleneck` is true, downsample will be in the first layer instead of the second layer.
"""

def __init__(
self, in_channels: int, out_channels: int, stride: int = 1, activation: str = "relu", reduction: int = 4
self,
in_channels: int,
out_channels: int,
stride: int = 1,
activation: str = "relu",
reduction: int = 4,
downsample_in_bottleneck: bool = False,
):
super().__init__()
should_apply_shortcut = in_channels != out_channels or stride != 1
Expand All @@ -162,8 +169,10 @@ def __init__(
ResNetShortCut(in_channels, out_channels, stride=stride) if should_apply_shortcut else nn.Identity()
)
self.layer = nn.Sequential(
ResNetConvLayer(in_channels, reduces_channels, kernel_size=1),
ResNetConvLayer(reduces_channels, reduces_channels, stride=stride),
ResNetConvLayer(
in_channels, reduces_channels, kernel_size=1, stride=stride if downsample_in_bottleneck else 1
),
ResNetConvLayer(reduces_channels, reduces_channels, stride=stride if not downsample_in_bottleneck else 1),
ResNetConvLayer(reduces_channels, out_channels, kernel_size=1, activation=None),
)
self.activation = ACT2FN[activation]
Expand Down Expand Up @@ -194,10 +203,18 @@ def __init__(

layer = ResNetBottleNeckLayer if config.layer_type == "bottleneck" else ResNetBasicLayer

if config.layer_type == "bottleneck":
first_layer = layer(
in_channels,
out_channels,
stride=stride,
activation=config.hidden_act,
downsample_in_bottleneck=config.downsample_in_bottleneck,
)
else:
first_layer = layer(in_channels, out_channels, stride=stride, activation=config.hidden_act)
self.layers = nn.Sequential(
# downsampling is done in the first layer with stride of 2
layer(in_channels, out_channels, stride=stride, activation=config.hidden_act),
*[layer(out_channels, out_channels, activation=config.hidden_act) for _ in range(depth - 1)],
first_layer, *[layer(out_channels, out_channels, activation=config.hidden_act) for _ in range(depth - 1)]
)

def forward(self, input: Tensor) -> Tensor:
Expand Down