diff --git a/README.md b/README.md index d3bc1573..0a6c1d7c 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,15 @@ ## What's New +### Dec 28, 2019 +* Add new model weights and training hparams (see Training Hparams section) + * `seresnext26d_32x4d`- 77.6 top-1, 93.6 top-5 + * deep stem (32, 32, 64), avgpool downsample + * stem/dowsample from bag-of-tricks paper + * `seresnext26t_32x4d`- 78.0 top-1, 93.7 top-5 + * deep tiered stem (24, 48, 64), avgpool downsample (a modified 'D' variant) + * stem sizing mods from Jeremy Howard and fastai devs discussing ResNet architecture experiments + ### Dec 23, 2019 * Add RandAugment trained MixNet-XL weights with 80.48 top-1. * `--dist-bn` argument added to train.py, will distribute BN stats between nodes after each train epoch, before eval @@ -114,6 +123,8 @@ I've leveraged the training scripts in this repository to train a few of the mod | efficientnet_b1 | 78.692 (21.308) | 94.086 (5.914) | 7.79M | bicubic | 240 | | resnext50_32x4d | 78.512 (21.488) | 94.042 (5.958) | 25M | bicubic | 224 | | resnet50 | 78.470 (21.530) | 94.266 (5.734) | 25.6M | bicubic | 224 | +| seresnext26t_32x4d | 77.998 (22.002) | 93.708 (6.292) | 16.8M | bicubic | 224 | +| seresnext26d_32x4d | 77.602 (22.398) | 93.608 (6.392) | 16.8M | bicubic | 224 | | mixnet_m | 77.256 (22.744) | 93.418 (6.582) | 5.01M | bicubic | 224 | | seresnext26_32x4d | 77.104 (22.896) | 93.316 (6.684) | 16.8M | bicubic | 224 | | efficientnet_b0 | 76.912 (23.088) | 93.210 (6.790) | 5.29M | bicubic | 224 | @@ -237,11 +248,20 @@ Sources for original weights: ## Training Hyperparameters ### EfficientNet-B2 with RandAugment - 80.4 top-1, 95.1 top-5 +These params are for dual Titan RTX cards with NVIDIA Apex installed: + `./distributed_train.sh 2 /imagenet/ --model efficientnet_b2 -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.3 --drop-connect 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .016` ### MixNet-XL with RandAugment - 80.5 top-1, 94.9 top-5 +This params are for dual Titan RTX cards with NVIDIA Apex installed: + `./distributed_train.sh 2 /imagenet/ --model mixnet_xl -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .969 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.3 --drop-connect 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.3 --amp --lr .016 --dist-bn reduce` +### SE-ResNeXt-26-D and SE-ResNeXt-26-T +These hparams (or similar) work well for a wide range of ResNet architecture, generally a good idea to increase the epoch # as the model size increases... ie approx 180-200 for ResNe(X)t50, and 220+ for larger. Increase batch size and LR proportionally for better GPUs or with AMP enabled. These params were for 2 1080Ti cards: + +`./distributed_train.sh 2 /imagenet/ --model seresnext26t_32x4d --lr 0.1 --warmup-epochs 5 --epochs 160 --weight-decay 1e-4 --sched cosine --reprob 0.4 --remode pixel -b 112` + **TODO dig up some more** diff --git a/timm/models/gluon_resnet.py b/timm/models/gluon_resnet.py index 3d0f926f..f835a485 100644 --- a/timm/models/gluon_resnet.py +++ b/timm/models/gluon_resnet.py @@ -121,7 +121,7 @@ def gluon_resnet50_v1c(pretrained=False, num_classes=1000, in_chans=3, **kwargs) """ default_cfg = default_cfgs['gluon_resnet50_v1c'] model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, in_chans=in_chans, - stem_width=32, deep_stem=True, **kwargs) + stem_width=32, stem_type='deep', **kwargs) model.default_cfg = default_cfg if pretrained: load_pretrained(model, default_cfg, num_classes, in_chans) @@ -134,7 +134,7 @@ def gluon_resnet101_v1c(pretrained=False, num_classes=1000, in_chans=3, **kwargs """ default_cfg = default_cfgs['gluon_resnet101_v1c'] model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, in_chans=in_chans, - stem_width=32, deep_stem=True, **kwargs) + stem_width=32, stem_type='deep', **kwargs) model.default_cfg = default_cfg if pretrained: load_pretrained(model, default_cfg, num_classes, in_chans) @@ -147,7 +147,7 @@ def gluon_resnet152_v1c(pretrained=False, num_classes=1000, in_chans=3, **kwargs """ default_cfg = default_cfgs['gluon_resnet152_v1c'] model = ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes, in_chans=in_chans, - stem_width=32, deep_stem=True, **kwargs) + stem_width=32, stem_type='deep', **kwargs) model.default_cfg = default_cfg if pretrained: load_pretrained(model, default_cfg, num_classes, in_chans) @@ -160,7 +160,7 @@ def gluon_resnet50_v1d(pretrained=False, num_classes=1000, in_chans=3, **kwargs) """ default_cfg = default_cfgs['gluon_resnet50_v1d'] model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, in_chans=in_chans, - stem_width=32, deep_stem=True, avg_down=True, **kwargs) + stem_width=32, stem_type='deep', avg_down=True, **kwargs) model.default_cfg = default_cfg if pretrained: load_pretrained(model, default_cfg, num_classes, in_chans) @@ -173,7 +173,7 @@ def gluon_resnet101_v1d(pretrained=False, num_classes=1000, in_chans=3, **kwargs """ default_cfg = default_cfgs['gluon_resnet101_v1d'] model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, in_chans=in_chans, - stem_width=32, deep_stem=True, avg_down=True, **kwargs) + stem_width=32, stem_type='deep', avg_down=True, **kwargs) model.default_cfg = default_cfg if pretrained: load_pretrained(model, default_cfg, num_classes, in_chans) @@ -186,7 +186,7 @@ def gluon_resnet152_v1d(pretrained=False, num_classes=1000, in_chans=3, **kwargs """ default_cfg = default_cfgs['gluon_resnet152_v1d'] model = ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes, in_chans=in_chans, - stem_width=32, deep_stem=True, avg_down=True, **kwargs) + stem_width=32, stem_type='deep', avg_down=True, **kwargs) model.default_cfg = default_cfg if pretrained: load_pretrained(model, default_cfg, num_classes, in_chans) @@ -199,7 +199,7 @@ def gluon_resnet50_v1e(pretrained=False, num_classes=1000, in_chans=3, **kwargs) """ default_cfg = default_cfgs['gluon_resnet50_v1e'] model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, in_chans=in_chans, - stem_width=64, deep_stem=True, avg_down=True, **kwargs) + stem_width=64, stem_type='deep', avg_down=True, **kwargs) model.default_cfg = default_cfg #if pretrained: # load_pretrained(model, default_cfg, num_classes, in_chans) @@ -212,7 +212,7 @@ def gluon_resnet101_v1e(pretrained=False, num_classes=1000, in_chans=3, **kwargs """ default_cfg = default_cfgs['gluon_resnet101_v1e'] model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, in_chans=in_chans, - stem_width=64, deep_stem=True, avg_down=True, **kwargs) + stem_width=64, stem_type='deep', avg_down=True, **kwargs) model.default_cfg = default_cfg if pretrained: load_pretrained(model, default_cfg, num_classes, in_chans) @@ -225,7 +225,7 @@ def gluon_resnet152_v1e(pretrained=False, num_classes=1000, in_chans=3, **kwargs """ default_cfg = default_cfgs['gluon_resnet152_v1e'] model = ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes, in_chans=in_chans, - stem_width=64, deep_stem=True, avg_down=True, **kwargs) + stem_width=64, stem_type='deep', avg_down=True, **kwargs) model.default_cfg = default_cfg if pretrained: load_pretrained(model, default_cfg, num_classes, in_chans) @@ -238,7 +238,7 @@ def gluon_resnet50_v1s(pretrained=False, num_classes=1000, in_chans=3, **kwargs) """ default_cfg = default_cfgs['gluon_resnet50_v1s'] model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, in_chans=in_chans, - stem_width=64, deep_stem=True, **kwargs) + stem_width=64, stem_type='deep', **kwargs) model.default_cfg = default_cfg if pretrained: load_pretrained(model, default_cfg, num_classes, in_chans) @@ -251,7 +251,7 @@ def gluon_resnet101_v1s(pretrained=False, num_classes=1000, in_chans=3, **kwargs """ default_cfg = default_cfgs['gluon_resnet101_v1s'] model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, in_chans=in_chans, - stem_width=64, deep_stem=True, **kwargs) + stem_width=64, stem_type='deep', **kwargs) model.default_cfg = default_cfg if pretrained: load_pretrained(model, default_cfg, num_classes, in_chans) @@ -264,7 +264,7 @@ def gluon_resnet152_v1s(pretrained=False, num_classes=1000, in_chans=3, **kwargs """ default_cfg = default_cfgs['gluon_resnet152_v1s'] model = ResNet(Bottleneck, [3, 8, 36, 3], num_classes=num_classes, in_chans=in_chans, - stem_width=64, deep_stem=True, **kwargs) + stem_width=64, stem_type='deep', **kwargs) model.default_cfg = default_cfg if pretrained: load_pretrained(model, default_cfg, num_classes, in_chans) @@ -362,7 +362,7 @@ def gluon_senet154(pretrained=False, num_classes=1000, in_chans=3, **kwargs): default_cfg = default_cfgs['gluon_senet154'] model = ResNet( Bottleneck, [3, 8, 36, 3], cardinality=64, base_width=4, use_se=True, - deep_stem=True, down_kernel_size=3, block_reduce_first=2, + stem_type='deep', down_kernel_size=3, block_reduce_first=2, num_classes=num_classes, in_chans=in_chans, **kwargs) model.default_cfg = default_cfg if pretrained: diff --git a/timm/models/resnet.py b/timm/models/resnet.py index b90bb9d5..9196cc0c 100644 --- a/timm/models/resnet.py +++ b/timm/models/resnet.py @@ -91,6 +91,12 @@ default_cfgs = { url='https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x8-b4712904.pth'), 'swsl_resnext101_32x16d': _cfg( url='https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_weakly_supervised_resnext101_32x16-f3559a9c.pth'), + 'seresnext26d_32x4d': _cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/seresnext26d_32x4d-80fa48a3.pth', + interpolation='bicubic'), + 'seresnext26t_32x4d': _cfg( + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/seresnext26t_32x4d-361bc1c4.pth', + interpolation='bicubic'), } @@ -231,10 +237,11 @@ class ResNet(nn.Module): ResNet variants: * normal, b - 7x7 stem, stem_width = 64, same as torchvision ResNet, NVIDIA ResNet 'v1.5', Gluon v1b - * c - 3 layer deep 3x3 stem, stem_width = 32 - * d - 3 layer deep 3x3 stem, stem_width = 32, average pool in downsample - * e - 3 layer deep 3x3 stem, stem_width = 64, average pool in downsample - * s - 3 layer deep 3x3 stem, stem_width = 64 + * c - 3 layer deep 3x3 stem, stem_width = 32 (32, 32, 64) + * d - 3 layer deep 3x3 stem, stem_width = 32 (32, 32, 64), average pool in downsample + * e - 3 layer deep 3x3 stem, stem_width = 64 (64, 64, 128), average pool in downsample + * s - 3 layer deep 3x3 stem, stem_width = 64 (64, 64, 128) + * t - 3 layer deep 3x3 stem, stem width = 32 (24, 48, 64), average pool in downsample ResNeXt * normal - 7x7 stem, stem_width = 64, standard cardinality and base widths @@ -263,10 +270,13 @@ class ResNet(nn.Module): Number of convolution groups for 3x3 conv in Bottleneck. base_width : int, default 64 Factor determining bottleneck channels. `planes * base_width / 64 * cardinality` - deep_stem : bool, default False - Whether to replace the 7x7 conv1 with 3 3x3 convolution layers. stem_width : int, default 64 Number of channels in stem convolutions + stem_type : str, default '' + The type of stem: + * '', default - a single 7x7 conv with a width of stem_width + * 'deep' - three 3x3 convolution layers of widths stem_width, stem_width, stem_width * 2 + * 'deep_tiered' - three 3x3 conv layers of widths stem_width//4 * 3, stem_width//4 * 6, stem_width * 2 block_reduce_first: int, default 1 Reduction factor for first convolution output width of residual blocks, 1 for all archs except senets, where 2 @@ -283,12 +293,13 @@ class ResNet(nn.Module): Global pooling type. One of 'avg', 'max', 'avgmax', 'catavgmax' """ def __init__(self, block, layers, num_classes=1000, in_chans=3, use_se=False, - cardinality=1, base_width=64, stem_width=64, deep_stem=False, + cardinality=1, base_width=64, stem_width=64, stem_type='', block_reduce_first=1, down_kernel_size=1, avg_down=False, dilated=False, norm_layer=nn.BatchNorm2d, drop_rate=0.0, global_pool='avg', zero_init_last_bn=True, block_args=None): block_args = block_args or dict() self.num_classes = num_classes + deep_stem = 'deep' in stem_type self.inplanes = stem_width * 2 if deep_stem else 64 self.cardinality = cardinality self.base_width = base_width @@ -298,16 +309,20 @@ class ResNet(nn.Module): super(ResNet, self).__init__() if deep_stem: + stem_chs_1 = stem_chs_2 = stem_width + if 'tiered' in stem_type: + stem_chs_1 = 3 * (stem_width // 4) + stem_chs_2 = 6 * (stem_width // 4) self.conv1 = nn.Sequential(*[ - nn.Conv2d(in_chans, stem_width, 3, stride=2, padding=1, bias=False), - norm_layer(stem_width), + nn.Conv2d(in_chans, stem_chs_1, 3, stride=2, padding=1, bias=False), + norm_layer(stem_chs_1), nn.ReLU(inplace=True), - nn.Conv2d(stem_width, stem_width, 3, stride=1, padding=1, bias=False), - norm_layer(stem_width), + nn.Conv2d(stem_chs_1, stem_chs_2, 3, stride=1, padding=1, bias=False), + norm_layer(stem_chs_2), nn.ReLU(inplace=True), - nn.Conv2d(stem_width, self.inplanes, 3, stride=1, padding=1, bias=False)]) + nn.Conv2d(stem_chs_2, self.inplanes, 3, stride=1, padding=1, bias=False)]) else: - self.conv1 = nn.Conv2d(in_chans, stem_width, kernel_size=7, stride=2, padding=3, bias=False) + self.conv1 = nn.Conv2d(in_chans, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = norm_layer(self.inplanes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) @@ -324,7 +339,7 @@ class ResNet(nn.Module): self.num_features = 512 * block.expansion self.fc = nn.Linear(self.num_features * self.global_pool.feat_mult(), num_classes) - last_bn_name = 'bn3' if 'Bottleneck' in block.__name__ else 'bn2' + last_bn_name = 'bn3' if 'Bottle' in block.__name__ else 'bn2' for n, m in self.named_modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') @@ -440,7 +455,7 @@ def resnet26d(pretrained=False, num_classes=1000, in_chans=3, **kwargs): """ default_cfg = default_cfgs['resnet26d'] model = ResNet( - Bottleneck, [2, 2, 2, 2], stem_width=32, deep_stem=True, avg_down=True, + Bottleneck, [2, 2, 2, 2], stem_width=32, stem_type='deep', avg_down=True, num_classes=num_classes, in_chans=in_chans, **kwargs) model.default_cfg = default_cfg if pretrained: @@ -466,7 +481,7 @@ def resnet50d(pretrained=False, num_classes=1000, in_chans=3, **kwargs): """ default_cfg = default_cfgs['resnet50d'] model = ResNet( - Bottleneck, [3, 4, 6, 3], stem_width=32, deep_stem=True, avg_down=True, + Bottleneck, [3, 4, 6, 3], stem_width=32, stem_type='deep', avg_down=True, num_classes=num_classes, in_chans=in_chans, **kwargs) model.default_cfg = default_cfg if pretrained: @@ -574,7 +589,7 @@ def resnext50d_32x4d(pretrained=False, num_classes=1000, in_chans=3, **kwargs): default_cfg = default_cfgs['resnext50d_32x4d'] model = ResNet( Bottleneck, [3, 4, 6, 3], cardinality=32, base_width=4, - stem_width=32, deep_stem=True, avg_down=True, + stem_width=32, stem_type='deep', avg_down=True, num_classes=num_classes, in_chans=in_chans, **kwargs) model.default_cfg = default_cfg if pretrained: @@ -854,3 +869,34 @@ def swsl_resnext101_32x16d(pretrained=True, **kwargs): if pretrained: load_pretrained(model, num_classes=kwargs.get('num_classes', 0), in_chans=kwargs.get('in_chans', 3)) return model + + +@register_model +def seresnext26d_32x4d(pretrained=False, num_classes=1000, in_chans=3, **kwargs): + """Constructs a ResNet-26 v1d model. + This is technically a 28 layer ResNet, sticking with 'd' modifier from Gluon for now. + """ + default_cfg = default_cfgs['seresnext26d_32x4d'] + model = ResNet( + Bottleneck, [2, 2, 2, 2], cardinality=32, base_width=4, + stem_width=32, stem_type='deep', avg_down=True, use_se=True, + num_classes=num_classes, in_chans=in_chans, **kwargs) + model.default_cfg = default_cfg + if pretrained: + load_pretrained(model, default_cfg, num_classes, in_chans) + return model + + +@register_model +def seresnext26t_32x4d(pretrained=False, num_classes=1000, in_chans=3, **kwargs): + """Constructs a ResNet-26 v1d model. + """ + default_cfg = default_cfgs['seresnext26t_32x4d'] + model = ResNet( + Bottleneck, [2, 2, 2, 2], cardinality=32, base_width=4, + stem_width=32, stem_type='deep_tiered', avg_down=True, use_se=True, + num_classes=num_classes, in_chans=in_chans, **kwargs) + model.default_cfg = default_cfg + if pretrained: + load_pretrained(model, default_cfg, num_classes, in_chans) + return model