Update README.md and few more comments

2025-06-03 15:01:08 +08:00 · 2021-02-12 22:07:18 -08:00 · 2021-02-12 22:07:18 -08:00 · d86dbe45c2
commit d86dbe45c2
parent 0d253e2c5e
2 changed files with 13 additions and 7 deletions
--- a/README.md
+++ b/README.md
@ -2,8 +2,11 @@

 ## What's New

+### Feb 12, 2021
+* Update Normalization-Free nets to include new NFNet-F (https://arxiv.org/abs/2102.06171) model defs
+
 ### Feb 10, 2021
-* First Normalizer-Free model training experiments done,
+* First Normalization-Free model training experiments done,
  * nf_resnet50 - 80.68 top-1 @ 288x288, 80.31 @ 256x256
  * nf_regnet_b1 - 79.30 @ 288x288, 78.75 @ 256x256
 * More model archs, incl a flexible ByobNet backbone ('Bring-your-own-blocks')
@ -164,6 +167,7 @@ A full version of the list below with source links can be found in the [document
 * Inception-ResNet-V2 and Inception-V4 - https://arxiv.org/abs/1602.07261
 * MobileNet-V3 (MBConvNet w/ Efficient Head) - https://arxiv.org/abs/1905.02244
 * NASNet-A - https://arxiv.org/abs/1707.07012
+* NFNet-F - https://arxiv.org/abs/2102.06171
 * NF-RegNet / NF-ResNet - https://arxiv.org/abs/2101.08692
 * PNasNet - https://arxiv.org/abs/1712.00559
 * RegNet - https://arxiv.org/abs/2003.13678
--- a/timm/models/nfnet.py
+++ b/timm/models/nfnet.py
@ -236,7 +236,7 @@ class DownsampleAvg(nn.Module):


 class NormFreeBlock(nn.Module):
-    """Normalization-free pre-activation block.
+    """Normalization-Free pre-activation block.
    """

    def __init__(
@ -351,6 +351,7 @@ def create_stem(in_chs, out_chs, stem_type='', conv_layer=None, act_layer=None):
    return nn.Sequential(stem), stem_stride, stem_feature


+# from https://github.com/deepmind/deepmind-research/tree/master/nfnets
 _nonlin_gamma = dict(
    identity=1.0,
    celu=1.270926833152771,
@ -371,10 +372,13 @@ _nonlin_gamma = dict(


 class NormFreeNet(nn.Module):
-    """ Normalization-free ResNets and RegNets
+    """ Normalization-Free Network

-    As described in `Characterizing signal propagation to close the performance gap in unnormalized ResNets`
+    As described in :
+    `Characterizing signal propagation to close the performance gap in unnormalized ResNets`
        - https://arxiv.org/abs/2101.08692
+    and
+    `High-Performance Large-Scale Image Recognition Without Normalization` - https://arxiv.org/abs/2102.06171

    This model aims to cover both the NFRegNet-Bx models as detailed in the paper's code snippets and
    the (preact) ResNet models described earlier in the paper.
@ -432,7 +436,7 @@ class NormFreeNet(nn.Module):
                blocks += [NormFreeBlock(
                    in_chs=prev_chs, out_chs=out_chs,
                    alpha=cfg.alpha,
-                    beta=1. / expected_var ** 0.5,  # NOTE: beta used as multiplier in block
+                    beta=1. / expected_var ** 0.5,
                    stride=stride if block_idx == 0 else 1,
                    dilation=dilation,
                    first_dilation=first_dilation,
@ -477,8 +481,6 @@ class NormFreeNet(nn.Module):
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Conv2d):
-                # as per discussion with paper authors, original in haiku is
-                # hk.initializers.VarianceScaling(1.0, 'fan_in', 'normal')' w/ zero'd bias
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='linear')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)