mirror of
https://github.com/huggingface/pytorch-image-models.git
synced 2025-06-03 15:01:08 +08:00
Update README.md and few more comments
This commit is contained in:
parent
0d253e2c5e
commit
d86dbe45c2
@ -2,8 +2,11 @@
|
|||||||
|
|
||||||
## What's New
|
## What's New
|
||||||
|
|
||||||
|
### Feb 12, 2021
|
||||||
|
* Update Normalization-Free nets to include new NFNet-F (https://arxiv.org/abs/2102.06171) model defs
|
||||||
|
|
||||||
### Feb 10, 2021
|
### Feb 10, 2021
|
||||||
* First Normalizer-Free model training experiments done,
|
* First Normalization-Free model training experiments done,
|
||||||
* nf_resnet50 - 80.68 top-1 @ 288x288, 80.31 @ 256x256
|
* nf_resnet50 - 80.68 top-1 @ 288x288, 80.31 @ 256x256
|
||||||
* nf_regnet_b1 - 79.30 @ 288x288, 78.75 @ 256x256
|
* nf_regnet_b1 - 79.30 @ 288x288, 78.75 @ 256x256
|
||||||
* More model archs, incl a flexible ByobNet backbone ('Bring-your-own-blocks')
|
* More model archs, incl a flexible ByobNet backbone ('Bring-your-own-blocks')
|
||||||
@ -164,6 +167,7 @@ A full version of the list below with source links can be found in the [document
|
|||||||
* Inception-ResNet-V2 and Inception-V4 - https://arxiv.org/abs/1602.07261
|
* Inception-ResNet-V2 and Inception-V4 - https://arxiv.org/abs/1602.07261
|
||||||
* MobileNet-V3 (MBConvNet w/ Efficient Head) - https://arxiv.org/abs/1905.02244
|
* MobileNet-V3 (MBConvNet w/ Efficient Head) - https://arxiv.org/abs/1905.02244
|
||||||
* NASNet-A - https://arxiv.org/abs/1707.07012
|
* NASNet-A - https://arxiv.org/abs/1707.07012
|
||||||
|
* NFNet-F - https://arxiv.org/abs/2102.06171
|
||||||
* NF-RegNet / NF-ResNet - https://arxiv.org/abs/2101.08692
|
* NF-RegNet / NF-ResNet - https://arxiv.org/abs/2101.08692
|
||||||
* PNasNet - https://arxiv.org/abs/1712.00559
|
* PNasNet - https://arxiv.org/abs/1712.00559
|
||||||
* RegNet - https://arxiv.org/abs/2003.13678
|
* RegNet - https://arxiv.org/abs/2003.13678
|
||||||
|
@ -236,7 +236,7 @@ class DownsampleAvg(nn.Module):
|
|||||||
|
|
||||||
|
|
||||||
class NormFreeBlock(nn.Module):
|
class NormFreeBlock(nn.Module):
|
||||||
"""Normalization-free pre-activation block.
|
"""Normalization-Free pre-activation block.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@ -351,6 +351,7 @@ def create_stem(in_chs, out_chs, stem_type='', conv_layer=None, act_layer=None):
|
|||||||
return nn.Sequential(stem), stem_stride, stem_feature
|
return nn.Sequential(stem), stem_stride, stem_feature
|
||||||
|
|
||||||
|
|
||||||
|
# from https://github.com/deepmind/deepmind-research/tree/master/nfnets
|
||||||
_nonlin_gamma = dict(
|
_nonlin_gamma = dict(
|
||||||
identity=1.0,
|
identity=1.0,
|
||||||
celu=1.270926833152771,
|
celu=1.270926833152771,
|
||||||
@ -371,10 +372,13 @@ _nonlin_gamma = dict(
|
|||||||
|
|
||||||
|
|
||||||
class NormFreeNet(nn.Module):
|
class NormFreeNet(nn.Module):
|
||||||
""" Normalization-free ResNets and RegNets
|
""" Normalization-Free Network
|
||||||
|
|
||||||
As described in `Characterizing signal propagation to close the performance gap in unnormalized ResNets`
|
As described in :
|
||||||
|
`Characterizing signal propagation to close the performance gap in unnormalized ResNets`
|
||||||
- https://arxiv.org/abs/2101.08692
|
- https://arxiv.org/abs/2101.08692
|
||||||
|
and
|
||||||
|
`High-Performance Large-Scale Image Recognition Without Normalization` - https://arxiv.org/abs/2102.06171
|
||||||
|
|
||||||
This model aims to cover both the NFRegNet-Bx models as detailed in the paper's code snippets and
|
This model aims to cover both the NFRegNet-Bx models as detailed in the paper's code snippets and
|
||||||
the (preact) ResNet models described earlier in the paper.
|
the (preact) ResNet models described earlier in the paper.
|
||||||
@ -432,7 +436,7 @@ class NormFreeNet(nn.Module):
|
|||||||
blocks += [NormFreeBlock(
|
blocks += [NormFreeBlock(
|
||||||
in_chs=prev_chs, out_chs=out_chs,
|
in_chs=prev_chs, out_chs=out_chs,
|
||||||
alpha=cfg.alpha,
|
alpha=cfg.alpha,
|
||||||
beta=1. / expected_var ** 0.5, # NOTE: beta used as multiplier in block
|
beta=1. / expected_var ** 0.5,
|
||||||
stride=stride if block_idx == 0 else 1,
|
stride=stride if block_idx == 0 else 1,
|
||||||
dilation=dilation,
|
dilation=dilation,
|
||||||
first_dilation=first_dilation,
|
first_dilation=first_dilation,
|
||||||
@ -477,8 +481,6 @@ class NormFreeNet(nn.Module):
|
|||||||
if m.bias is not None:
|
if m.bias is not None:
|
||||||
nn.init.zeros_(m.bias)
|
nn.init.zeros_(m.bias)
|
||||||
elif isinstance(m, nn.Conv2d):
|
elif isinstance(m, nn.Conv2d):
|
||||||
# as per discussion with paper authors, original in haiku is
|
|
||||||
# hk.initializers.VarianceScaling(1.0, 'fan_in', 'normal')' w/ zero'd bias
|
|
||||||
nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='linear')
|
nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='linear')
|
||||||
if m.bias is not None:
|
if m.bias is not None:
|
||||||
nn.init.zeros_(m.bias)
|
nn.init.zeros_(m.bias)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user