From 5c4991a0885895f9de35920e17978ac61ec9ea6a Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Thu, 6 Feb 2020 12:51:58 -0800 Subject: [PATCH 1/2] Add PyTorch trained EfficientNet-ES weights from Andrew Lavin --- README.md | 11 ++++++++++- timm/models/efficientnet.py | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index afa5665e..7291c8ce 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,9 @@ ## What's New +### Feb 6, 2020 +* Add RandAugment trained EfficientNet-ES (EdgeTPU-Small) weights with 78.1 top-1. Trained by [Andrew Lavin](https://github.com/andravin) (see Training section for hparams) + ### Feb 1/2, 2020 * Port new EfficientNet-B8 (RandAugment) weights, these are different than the B8 AdvProp, different input normalization. * Update results csv files on all models for ImageNet validation and three other test sets @@ -151,6 +154,7 @@ I've leveraged the training scripts in this repository to train a few of the mod | mixnet_l | 78.976 (21.024 | 94.184 (5.816) | 7.33M | bicubic | 224 | | efficientnet_b1 | 78.692 (21.308) | 94.086 (5.914) | 7.79M | bicubic | 240 | | resnext50_32x4d | 78.512 (21.488) | 94.042 (5.958) | 25M | bicubic | 224 | +| efficientnet_es | 78.066 (21.934) | 93.926 (6.074) | 5.44M | bicubic | 224 | | seresnext26t_32x4d | 77.998 (22.002) | 93.708 (6.292) | 16.8M | bicubic | 224 | | seresnext26tn_32x4d | 77.986 (22.014) | 93.746 (6.254) | 16.8M | bicubic | 224 | | efficientnet_b0 | 77.698 (22.302) | 93.532 (6.468) | 5.29M | bicubic | 224 | @@ -297,7 +301,7 @@ These hparams (or similar) work well for a wide range of ResNet architecture, ge The training of this model started with the same command line as EfficientNet-B2 w/ RA above. After almost three weeks of training the process crashed. The results weren't looking amazing so I resumed the training several times with tweaks to a few params (increase RE prob, decrease rand-aug, increase ema-decay). Nothing looked great. I ended up averaging the best checkpoints from all restarts. The result is mediocre at default res/crop but oddly performs much better with a full image test crop of 1.0. ### EfficientNet-B0 with RandAugment - 77.7 top-1, 95.3 top-5 -Michael Klachko achieved these results with the command line for B2 adapted for larger batch size, with the recommended B0 dropout rate of 0.2. +[Michael Klachko](https://github.com/michaelklachko) achieved these results with the command line for B2 adapted for larger batch size, with the recommended B0 dropout rate of 0.2. `./distributed_train.sh 2 /imagenet/ --model efficientnet_b0 -b 384 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.2 --drop-connect 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .048` @@ -307,6 +311,11 @@ Trained on two older 1080Ti cards, this took a while. Only slightly, non statist `./distributed_train.sh 2 /imagenet -b 64 --model resnet50 --sched cosine --epochs 200 --lr 0.05 --amp --remode pixel --reprob 0.6 --aug-splits 3 --aa rand-m9-mstd0.5-inc1 --resplit --split-bn --jsd --dist-bn reduce` +### EfficientNet-ES (EdgeTPU-Small) with RandAugment - 78.066 top-1, 93.926 top-5 +Trained by [Andrew Lavin](https://github.com/andravin) with 8 V100 cards. Model EMA was not used, final checkpoint is the average of 8 best checkpoints during training. + +`./distributed_train.sh 8 /imagenet --model efficientnet_es --output /data/trained-models -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.2 --drop-connect 0.2 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .064` + **TODO dig up some more** diff --git a/timm/models/efficientnet.py b/timm/models/efficientnet.py index ae100b69..8d07a2ca 100644 --- a/timm/models/efficientnet.py +++ b/timm/models/efficientnet.py @@ -92,7 +92,7 @@ default_cfgs = { 'efficientnet_b8': _cfg( url='', input_size=(3, 672, 672), pool_size=(21, 21), crop_pct=0.954), 'efficientnet_es': _cfg( - url=''), + url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_es_ra-f111e99c.pth'), 'efficientnet_em': _cfg( url='', input_size=(3, 240, 240), pool_size=(8, 8), crop_pct=0.882), 'efficientnet_el': _cfg( From 5eb0e363a63e823f27810ea6bf5b6b8e136c4176 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Thu, 6 Feb 2020 12:56:46 -0800 Subject: [PATCH 2/2] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7291c8ce..f2bc9b43 100644 --- a/README.md +++ b/README.md @@ -314,7 +314,7 @@ Trained on two older 1080Ti cards, this took a while. Only slightly, non statist ### EfficientNet-ES (EdgeTPU-Small) with RandAugment - 78.066 top-1, 93.926 top-5 Trained by [Andrew Lavin](https://github.com/andravin) with 8 V100 cards. Model EMA was not used, final checkpoint is the average of 8 best checkpoints during training. -`./distributed_train.sh 8 /imagenet --model efficientnet_es --output /data/trained-models -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.2 --drop-connect 0.2 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .064` +`./distributed_train.sh 8 /imagenet --model efficientnet_es -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.2 --drop-connect 0.2 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .064` **TODO dig up some more**