Update metaformers.py

2025-06-03 15:01:08 +08:00 · 2023-02-10 00:24:58 -08:00 · 2023-02-10 00:24:58 -08:00 · f938beb81b
commit f938beb81b
parent 10bde717e5
1 changed files with 82 additions and 3 deletions
--- a/timm/models/metaformers.py
+++ b/timm/models/metaformers.py
@ -24,8 +24,11 @@ Adapted from https://github.com/sail-sg/metaformer, original copyright below

 from collections import OrderedDict
 from functools import partial
+
 import torch
 import torch.nn as nn
+from torch import Tensor
+
 from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
 from timm.layers import trunc_normal_, DropPath, SelectAdaptivePool2d, GroupNorm1
 from timm.layers.helpers import to_2tuple
@ -415,6 +418,61 @@ class MetaFormerBlock(nn.Module):
        
        return x

+class MetaFormerStage(nn.Module):
+    def __init__(
+        self,
+        in_chs,
+        out_chs,
+        depth=2,
+        downsample_norm=partial(LayerNormGeneral, bias=False, eps=1e-6),
+        token_mixer=nn.Identity,
+        mlp=Mlp,
+        mlp_fn=nn.Linear,
+        mlp_act=StarReLU,
+        mlp_bias=False,
+        norm_layer=partial(LayerNormGeneral, eps=1e-6, bias=False),
+        dp_rates=[0.]*2,
+        layer_scale_init_value=None,
+        res_scale_init_value=None,
+    ):
+        super().__init__()
+
+        self.grad_checkpointing = False
+        
+        self.downsample = nn.Identity() if in_chs == out_chs else Downsampling(
+            in_chs,
+            out_chs,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            norm_layer=downsample_norm
+        )
+        
+        self.blocks = nn.Sequential(*[MetaFormerBlock(
+            dim=out_chs,
+            token_mixer=token_mixer,
+            mlp=mlp,
+            mlp_fn=mlp_fn,
+            mlp_act=mlp_act,
+            mlp_bias=mlp_bias,
+            norm_layer=norm_layer,
+            drop_path=dp_rates[i],
+            layer_scale_init_value=layer_scale_init_value,
+            res_scale_init_value=res_scale_init_value
+            ) for i in range(depth)])
+    
+    @torch.jit.ignore
+    def set_grad_checkpointing(self, enable=True):
+        self.grad_checkpointing = enable
+    
+    def forward(self, x: Tensor):
+        x = self.downsample(x)
+        if self.grad_checkpointing and not torch.jit.is_scripting():
+            x = checkpoint_seq(self.blocks, x)
+        else:
+            x = self.blocks(x)
+        return x
+
 class MetaFormer(nn.Module):
    r""" MetaFormer
        A PyTorch impl of : `MetaFormer Baselines for Vision`  -
@ -447,7 +505,7 @@ class MetaFormer(nn.Module):
        token_mixers=nn.Identity,
        mlps=Mlp,
        mlp_fn=nn.Linear,
-        mlp_act = StarReLU,
+        mlp_act=StarReLU,
        mlp_bias=False,
        norm_layers=partial(LayerNormGeneral, eps=1e-6, bias=False),
        drop_path_rate=0.,
@ -491,7 +549,7 @@ class MetaFormer(nn.Module):
        self.grad_checkpointing = False
        self.feature_info = []
        
-        dp_rates=[x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
+        dp_rates = [x.tolist() for x in torch.linspace(0, drop_path_rate, sum(depths)).split(depths)]

        
        self.stem = Stem(
@ -502,7 +560,9 @@ class MetaFormer(nn.Module):
        
        stages = nn.ModuleList() # each stage consists of multiple metaformer blocks
        cur = 0
+        last_dim = dims[0]
        for i in range(self.num_stages):
+            '''
            stage = nn.Sequential(OrderedDict([
                ('downsample', nn.Identity() if i == 0 else Downsampling(
                    dims[i-1],
@ -526,8 +586,27 @@ class MetaFormer(nn.Module):
                    ) for j in range(depths[i])])
                )])
            )
+            '''
+            
+            stage = MetaFormerStage(
+                dim,
+                dims[i],
+                depth=depths[i],
+                downsample_norm=downsample_norm,
+                token_mixer=token_mixers[i],
+                mlp=mlps[i],
+                mlp_fn=mlp_fn,
+                mlp_act=mlp_act,
+                mlp_bias=mlp_bias,
+                norm_layer=norm_layers[i],
+                dp_rates=dp_rates[i],
+                layer_scale_init_value=layer_scale_init_values[i],
+                res_scale_init_value=res_scale_init_values[i],
+            )
+            
            stages.append(stage)
            cur += depths[i]
+            last_dim = dims[i]
            self.feature_info += [dict(num_chs=dims[i], reduction=2, module=f'stages.{i}')]
        
        self.stages = nn.Sequential(*stages)
@ -596,7 +675,7 @@ class MetaFormer(nn.Module):
        x = self.norm_pre(x).permute(0, 3, 1, 2)
        return x 

-    def forward(self, x):
+    def forward(self, x: Tensor):
        x = self.forward_features(x)
        x = self.forward_head(x)
        return x