fixed intermediate output indices

2025-06-03 15:01:08 +08:00 · 2023-11-22 12:31:31 -08:00 · 2023-11-22 12:31:31 -08:00 · 63ee54853c
commit 63ee54853c
parent 4d0737d5fa
1 changed files with 5 additions and 3 deletions
--- a/timm/models/fastvit.py
+++ b/timm/models/fastvit.py
@ -1164,8 +1164,10 @@ class FastVit(nn.Module):

        # For segmentation and detection, extract intermediate output
        if self.fork_feat:
-            # add a norm layer for each output
-            self.out_indices = [0, 2, 4, 6]
+            # Add a norm layer for each output. self.stages is slightly different than self.network
+            # in the original code, the PatchEmbed layer is part of self.stages in this code where
+            # it was part of self.network in the original code. So we do not need to skip out indices.
+            self.out_indices = [0, 1, 2, 3]
            for i_emb, i_layer in enumerate(self.out_indices):
                if i_emb == 0 and os.environ.get("FORK_LAST3", None):
                    """For RetinaNet, `start_level=1`. The first norm layer will not used.
@ -1416,4 +1418,4 @@ def fastvit_ma36(pretrained=False, **kwargs):
        pos_embs=(None, None, None, partial(RepConditionalPosEnc, spatial_shape=(7, 7))),
        token_mixers=("repmixer", "repmixer", "repmixer", "attention")
    )
-    return _create_fastvit('fastvit_ma36', pretrained=pretrained, **dict(model_args, **kwargs))
+    return _create_fastvit('fastvit_ma36', pretrained=pretrained, **dict(model_args, **kwargs))