From 5c4991a0885895f9de35920e17978ac61ec9ea6a Mon Sep 17 00:00:00 2001
From: Ross Wightman <rwightman@gmail.com>
Date: Thu, 6 Feb 2020 12:51:58 -0800
Subject: [PATCH 1/2] Add PyTorch trained EfficientNet-ES weights from Andrew
 Lavin

---
 README.md                   | 11 ++++++++++-
 timm/models/efficientnet.py |  2 +-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index afa5665e..7291c8ce 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,9 @@
 
 ## What's New
 
+### Feb 6, 2020
+* Add RandAugment trained EfficientNet-ES (EdgeTPU-Small) weights with 78.1 top-1. Trained by [Andrew Lavin](https://github.com/andravin) (see Training section for hparams)
+
 ### Feb 1/2, 2020
 * Port new EfficientNet-B8 (RandAugment) weights, these are different than the B8 AdvProp, different input normalization.
 * Update results csv files on all models for ImageNet validation and three other test sets
@@ -151,6 +154,7 @@ I've leveraged the training scripts in this repository to train a few of the mod
 | mixnet_l | 78.976 (21.024 | 94.184 (5.816) | 7.33M | bicubic | 224 |
 | efficientnet_b1 | 78.692 (21.308) | 94.086 (5.914) | 7.79M | bicubic | 240 |
 | resnext50_32x4d | 78.512 (21.488) | 94.042 (5.958) | 25M | bicubic | 224 |
+| efficientnet_es | 78.066 (21.934) | 93.926 (6.074) | 5.44M | bicubic | 224 |
 | seresnext26t_32x4d | 77.998 (22.002) | 93.708 (6.292) | 16.8M | bicubic | 224 |
 | seresnext26tn_32x4d | 77.986 (22.014) | 93.746 (6.254) | 16.8M | bicubic | 224 |
 | efficientnet_b0 | 77.698 (22.302) | 93.532 (6.468) | 5.29M | bicubic | 224 |
@@ -297,7 +301,7 @@ These hparams (or similar) work well for a wide range of ResNet architecture, ge
 The training of this model started with the same command line as EfficientNet-B2 w/ RA above. After almost three weeks of training the process crashed. The results weren't looking amazing so I resumed the training several times with tweaks to a few params (increase RE prob, decrease rand-aug, increase ema-decay). Nothing looked great. I ended up averaging the best checkpoints from all restarts. The result is mediocre at default res/crop but oddly performs much better with a full image test crop of 1.0. 
 
 ### EfficientNet-B0 with RandAugment - 77.7 top-1, 95.3 top-5
-Michael Klachko achieved these results with the command line for B2 adapted for larger batch size, with the recommended B0 dropout rate of 0.2.
+[Michael Klachko](https://github.com/michaelklachko) achieved these results with the command line for B2 adapted for larger batch size, with the recommended B0 dropout rate of 0.2.
 
 `./distributed_train.sh 2 /imagenet/ --model efficientnet_b0 -b 384 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.2 --drop-connect 0.2 --model-ema --model-ema-decay 0.9999 --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .048`
 
@@ -307,6 +311,11 @@ Trained on two older 1080Ti cards, this took a while. Only slightly, non statist
 
 `./distributed_train.sh 2 /imagenet -b 64 --model resnet50 --sched cosine --epochs 200 --lr 0.05 --amp --remode pixel --reprob 0.6 --aug-splits 3 --aa rand-m9-mstd0.5-inc1 --resplit --split-bn --jsd --dist-bn reduce`
 
+### EfficientNet-ES (EdgeTPU-Small) with RandAugment - 78.066 top-1, 93.926 top-5
+Trained by [Andrew Lavin](https://github.com/andravin) with 8 V100 cards. Model EMA was not used, final checkpoint is the average of 8 best checkpoints during training.
+
+`./distributed_train.sh 8 /imagenet --model efficientnet_es --output /data/trained-models -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.2 --drop-connect 0.2  --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .064`
+
 **TODO dig up some more**
 
 
diff --git a/timm/models/efficientnet.py b/timm/models/efficientnet.py
index ae100b69..8d07a2ca 100644
--- a/timm/models/efficientnet.py
+++ b/timm/models/efficientnet.py
@@ -92,7 +92,7 @@ default_cfgs = {
     'efficientnet_b8': _cfg(
         url='', input_size=(3, 672, 672), pool_size=(21, 21), crop_pct=0.954),
     'efficientnet_es': _cfg(
-        url=''),
+        url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_es_ra-f111e99c.pth'),
     'efficientnet_em': _cfg(
         url='', input_size=(3, 240, 240), pool_size=(8, 8), crop_pct=0.882),
     'efficientnet_el': _cfg(

From 5eb0e363a63e823f27810ea6bf5b6b8e136c4176 Mon Sep 17 00:00:00 2001
From: Ross Wightman <rwightman@users.noreply.github.com>
Date: Thu, 6 Feb 2020 12:56:46 -0800
Subject: [PATCH 2/2] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 7291c8ce..f2bc9b43 100644
--- a/README.md
+++ b/README.md
@@ -314,7 +314,7 @@ Trained on two older 1080Ti cards, this took a while. Only slightly, non statist
 ### EfficientNet-ES (EdgeTPU-Small) with RandAugment - 78.066 top-1, 93.926 top-5
 Trained by [Andrew Lavin](https://github.com/andravin) with 8 V100 cards. Model EMA was not used, final checkpoint is the average of 8 best checkpoints during training.
 
-`./distributed_train.sh 8 /imagenet --model efficientnet_es --output /data/trained-models -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.2 --drop-connect 0.2  --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .064`
+`./distributed_train.sh 8 /imagenet --model efficientnet_es -b 128 --sched step --epochs 450 --decay-epochs 2.4 --decay-rate .97 --opt rmsproptf --opt-eps .001 -j 8 --warmup-lr 1e-6 --weight-decay 1e-5 --drop 0.2 --drop-connect 0.2  --aa rand-m9-mstd0.5 --remode pixel --reprob 0.2 --amp --lr .064`
 
 **TODO dig up some more**