From b9f020a50918f1da10f8c7fb236e1f47b6975837 Mon Sep 17 00:00:00 2001
From: Ross Wightman <rwightman@gmail.com>
Date: Wed, 21 Aug 2024 16:51:38 -0700
Subject: [PATCH 1/2] Allow group_size override for more efficientnet and
 mobilenetv3 based models

---
 timm/models/efficientnet.py | 44 +++++++++++++++++++++++++------------
 timm/models/mobilenetv3.py  | 12 ++++++----
 2 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/timm/models/efficientnet.py b/timm/models/efficientnet.py
index 2cf4130d..f87d44c4 100644
--- a/timm/models/efficientnet.py
+++ b/timm/models/efficientnet.py
@@ -488,7 +488,7 @@ def _gen_mnasnet_small(variant, channel_multiplier=1.0, pretrained=False, **kwar
 
 def _gen_mobilenet_v1(
         variant, channel_multiplier=1.0, depth_multiplier=1.0,
-        fix_stem_head=False, head_conv=False, pretrained=False, **kwargs):
+        group_size=None, fix_stem_head=False, head_conv=False, pretrained=False, **kwargs):
     """
     Ref impl: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet_v2.py
     Paper: https://arxiv.org/abs/1801.04381
@@ -503,7 +503,12 @@ def _gen_mobilenet_v1(
     round_chs_fn = partial(round_channels, multiplier=channel_multiplier)
     head_features = (1024 if fix_stem_head else max(1024, round_chs_fn(1024))) if head_conv else 0
     model_kwargs = dict(
-        block_args=decode_arch_def(arch_def, depth_multiplier=depth_multiplier, fix_first_last=fix_stem_head),
+        block_args=decode_arch_def(
+            arch_def,
+            depth_multiplier=depth_multiplier,
+            fix_first_last=fix_stem_head,
+            group_size=group_size,
+        ),
         num_features=head_features,
         stem_size=32,
         fix_stem=fix_stem_head,
@@ -517,7 +522,9 @@ def _gen_mobilenet_v1(
 
 
 def _gen_mobilenet_v2(
-        variant, channel_multiplier=1.0, depth_multiplier=1.0, fix_stem_head=False, pretrained=False, **kwargs):
+        variant, channel_multiplier=1.0, depth_multiplier=1.0,
+        group_size=None, fix_stem_head=False, pretrained=False, **kwargs
+):
     """ Generate MobileNet-V2 network
     Ref impl: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet_v2.py
     Paper: https://arxiv.org/abs/1801.04381
@@ -533,7 +540,12 @@ def _gen_mobilenet_v2(
     ]
     round_chs_fn = partial(round_channels, multiplier=channel_multiplier)
     model_kwargs = dict(
-        block_args=decode_arch_def(arch_def, depth_multiplier=depth_multiplier, fix_first_last=fix_stem_head),
+        block_args=decode_arch_def(
+            arch_def,
+            depth_multiplier=depth_multiplier,
+            fix_first_last=fix_stem_head,
+            group_size=group_size,
+        ),
         num_features=1280 if fix_stem_head else max(1280, round_chs_fn(1280)),
         stem_size=32,
         fix_stem=fix_stem_head,
@@ -764,7 +776,7 @@ def _gen_efficientnet_lite(variant, channel_multiplier=1.0, depth_multiplier=1.0
 
 
 def _gen_efficientnetv2_base(
-        variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
+        variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, pretrained=False, **kwargs):
     """ Creates an EfficientNet-V2 base model
 
     Ref impl: https://github.com/google/automl/tree/master/efficientnetv2
@@ -780,7 +792,7 @@ def _gen_efficientnetv2_base(
     ]
     round_chs_fn = partial(round_channels, multiplier=channel_multiplier, round_limit=0.)
     model_kwargs = dict(
-        block_args=decode_arch_def(arch_def, depth_multiplier),
+        block_args=decode_arch_def(arch_def, depth_multiplier, group_size=group_size),
         num_features=round_chs_fn(1280),
         stem_size=32,
         round_chs_fn=round_chs_fn,
@@ -831,7 +843,8 @@ def _gen_efficientnetv2_s(
     return model
 
 
-def _gen_efficientnetv2_m(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
+def _gen_efficientnetv2_m(
+        variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, pretrained=False, **kwargs):
     """ Creates an EfficientNet-V2 Medium model
 
     Ref impl: https://github.com/google/automl/tree/master/efficientnetv2
@@ -849,7 +862,7 @@ def _gen_efficientnetv2_m(variant, channel_multiplier=1.0, depth_multiplier=1.0,
     ]
 
     model_kwargs = dict(
-        block_args=decode_arch_def(arch_def, depth_multiplier),
+        block_args=decode_arch_def(arch_def, depth_multiplier, group_size=group_size),
         num_features=1280,
         stem_size=24,
         round_chs_fn=partial(round_channels, multiplier=channel_multiplier),
@@ -861,7 +874,8 @@ def _gen_efficientnetv2_m(variant, channel_multiplier=1.0, depth_multiplier=1.0,
     return model
 
 
-def _gen_efficientnetv2_l(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
+def _gen_efficientnetv2_l(
+        variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, pretrained=False, **kwargs):
     """ Creates an EfficientNet-V2 Large model
 
     Ref impl: https://github.com/google/automl/tree/master/efficientnetv2
@@ -879,7 +893,7 @@ def _gen_efficientnetv2_l(variant, channel_multiplier=1.0, depth_multiplier=1.0,
     ]
 
     model_kwargs = dict(
-        block_args=decode_arch_def(arch_def, depth_multiplier),
+        block_args=decode_arch_def(arch_def, depth_multiplier, group_size=group_size),
         num_features=1280,
         stem_size=32,
         round_chs_fn=partial(round_channels, multiplier=channel_multiplier),
@@ -891,7 +905,8 @@ def _gen_efficientnetv2_l(variant, channel_multiplier=1.0, depth_multiplier=1.0,
     return model
 
 
-def _gen_efficientnetv2_xl(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
+def _gen_efficientnetv2_xl(
+        variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, pretrained=False, **kwargs):
     """ Creates an EfficientNet-V2 Xtra-Large model
 
     Ref impl: https://github.com/google/automl/tree/master/efficientnetv2
@@ -909,7 +924,7 @@ def _gen_efficientnetv2_xl(variant, channel_multiplier=1.0, depth_multiplier=1.0
     ]
 
     model_kwargs = dict(
-        block_args=decode_arch_def(arch_def, depth_multiplier),
+        block_args=decode_arch_def(arch_def, depth_multiplier, group_size=group_size),
         num_features=1280,
         stem_size=32,
         round_chs_fn=partial(round_channels, multiplier=channel_multiplier),
@@ -1094,7 +1109,8 @@ def _gen_tinynet(
     return model
 
 
-def _gen_mobilenet_edgetpu(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
+def _gen_mobilenet_edgetpu(
+        variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, pretrained=False, **kwargs):
     """
     Based on definitions in: https://github.com/tensorflow/models/tree/d2427a562f401c9af118e47af2f030a0a5599f55/official/projects/edgetpu/vision
     """
@@ -1170,7 +1186,7 @@ def _gen_mobilenet_edgetpu(variant, channel_multiplier=1.0, depth_multiplier=1.0
         ]
 
     model_kwargs = dict(
-        block_args=decode_arch_def(arch_def, depth_multiplier),
+        block_args=decode_arch_def(arch_def, depth_multiplier, group_size=group_size),
         num_features=num_features,
         stem_size=stem_size,
         stem_kernel_size=stem_kernel_size,
diff --git a/timm/models/mobilenetv3.py b/timm/models/mobilenetv3.py
index 2641fd08..44ec0764 100644
--- a/timm/models/mobilenetv3.py
+++ b/timm/models/mobilenetv3.py
@@ -450,7 +450,9 @@ def _gen_mobilenet_v3_rw(variant: str, channel_multiplier: float = 1.0, pretrain
     return model
 
 
-def _gen_mobilenet_v3(variant: str, channel_multiplier: float = 1.0, pretrained: bool = False, **kwargs) -> MobileNetV3:
+def _gen_mobilenet_v3(
+        variant: str, channel_multiplier: float = 1.0, group_size=None, pretrained: bool = False, **kwargs
+) -> MobileNetV3:
     """Creates a MobileNet-V3 model.
 
     Ref impl: ?
@@ -533,7 +535,7 @@ def _gen_mobilenet_v3(variant: str, channel_multiplier: float = 1.0, pretrained:
             ]
     se_layer = partial(SqueezeExcite, gate_layer='hard_sigmoid', force_act_layer=nn.ReLU, rd_round_fn=round_channels)
     model_kwargs = dict(
-        block_args=decode_arch_def(arch_def),
+        block_args=decode_arch_def(arch_def, group_size=group_size),
         num_features=num_features,
         stem_size=16,
         fix_stem=channel_multiplier < 0.75,
@@ -646,7 +648,9 @@ def _gen_lcnet(variant: str, channel_multiplier: float = 1.0, pretrained: bool =
     return model
 
 
-def _gen_mobilenet_v4(variant: str, channel_multiplier: float = 1.0, pretrained: bool = False, **kwargs) -> MobileNetV3:
+def _gen_mobilenet_v4(
+        variant: str, channel_multiplier: float = 1.0, group_size=None, pretrained: bool = False, **kwargs,
+) -> MobileNetV3:
     """Creates a MobileNet-V4 model.
 
     Ref impl: ?
@@ -877,7 +881,7 @@ def _gen_mobilenet_v4(variant: str, channel_multiplier: float = 1.0, pretrained:
             assert False, f'Unknown variant {variant}.'
 
     model_kwargs = dict(
-        block_args=decode_arch_def(arch_def),
+        block_args=decode_arch_def(arch_def, group_size=group_size),
         head_bias=False,
         head_norm=True,
         num_features=num_features,

From 39e92f0c0dbcfaa17b7985b79bbe81a8dbd6902c Mon Sep 17 00:00:00 2001
From: Ross Wightman <rwightman@gmail.com>
Date: Thu, 22 Aug 2024 11:44:02 -0700
Subject: [PATCH 2/2] mobilenet_edgetpu can use group_size override, more
 consistency in arg wrap/sadface w/ extra group_size arg

---
 timm/models/efficientnet.py | 42 +++++++++++++++++++++----------------
 timm/models/mobilenetv3.py  |  4 +++-
 2 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/timm/models/efficientnet.py b/timm/models/efficientnet.py
index f87d44c4..e097d822 100644
--- a/timm/models/efficientnet.py
+++ b/timm/models/efficientnet.py
@@ -488,7 +488,8 @@ def _gen_mnasnet_small(variant, channel_multiplier=1.0, pretrained=False, **kwar
 
 def _gen_mobilenet_v1(
         variant, channel_multiplier=1.0, depth_multiplier=1.0,
-        group_size=None, fix_stem_head=False, head_conv=False, pretrained=False, **kwargs):
+        group_size=None, fix_stem_head=False, head_conv=False, pretrained=False, **kwargs
+):
     """
     Ref impl: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet_v2.py
     Paper: https://arxiv.org/abs/1801.04381
@@ -625,7 +626,8 @@ def _gen_spnasnet(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
 
 def _gen_efficientnet(
         variant, channel_multiplier=1.0, depth_multiplier=1.0, channel_divisor=8,
-        group_size=None, pretrained=False, **kwargs):
+        group_size=None, pretrained=False, **kwargs
+):
     """Creates an EfficientNet model.
 
     Ref impl: https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/efficientnet_model.py
@@ -673,7 +675,8 @@ def _gen_efficientnet(
 
 
 def _gen_efficientnet_edge(
-        variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, pretrained=False, **kwargs):
+        variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, pretrained=False, **kwargs
+):
     """ Creates an EfficientNet-EdgeTPU model
 
     Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/edgetpu
@@ -704,7 +707,8 @@ def _gen_efficientnet_edge(
 
 
 def _gen_efficientnet_condconv(
-        variant, channel_multiplier=1.0, depth_multiplier=1.0, experts_multiplier=1, pretrained=False, **kwargs):
+        variant, channel_multiplier=1.0, depth_multiplier=1.0, experts_multiplier=1, pretrained=False, **kwargs
+):
     """Creates an EfficientNet-CondConv model.
 
     Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/condconv
@@ -776,7 +780,8 @@ def _gen_efficientnet_lite(variant, channel_multiplier=1.0, depth_multiplier=1.0
 
 
 def _gen_efficientnetv2_base(
-        variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, pretrained=False, **kwargs):
+        variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, pretrained=False, **kwargs
+):
     """ Creates an EfficientNet-V2 base model
 
     Ref impl: https://github.com/google/automl/tree/master/efficientnetv2
@@ -805,7 +810,8 @@ def _gen_efficientnetv2_base(
 
 
 def _gen_efficientnetv2_s(
-        variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, rw=False, pretrained=False, **kwargs):
+        variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, rw=False, pretrained=False, **kwargs
+):
     """ Creates an EfficientNet-V2 Small model
 
     Ref impl: https://github.com/google/automl/tree/master/efficientnetv2
@@ -844,7 +850,8 @@ def _gen_efficientnetv2_s(
 
 
 def _gen_efficientnetv2_m(
-        variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, pretrained=False, **kwargs):
+        variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, pretrained=False, **kwargs
+):
     """ Creates an EfficientNet-V2 Medium model
 
     Ref impl: https://github.com/google/automl/tree/master/efficientnetv2
@@ -875,7 +882,8 @@ def _gen_efficientnetv2_m(
 
 
 def _gen_efficientnetv2_l(
-        variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, pretrained=False, **kwargs):
+        variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, pretrained=False, **kwargs
+):
     """ Creates an EfficientNet-V2 Large model
 
     Ref impl: https://github.com/google/automl/tree/master/efficientnetv2
@@ -906,7 +914,8 @@ def _gen_efficientnetv2_l(
 
 
 def _gen_efficientnetv2_xl(
-        variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, pretrained=False, **kwargs):
+        variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, pretrained=False, **kwargs
+):
     """ Creates an EfficientNet-V2 Xtra-Large model
 
     Ref impl: https://github.com/google/automl/tree/master/efficientnetv2
@@ -938,7 +947,8 @@ def _gen_efficientnetv2_xl(
 
 def _gen_efficientnet_x(
         variant, channel_multiplier=1.0, depth_multiplier=1.0, channel_divisor=8,
-        group_size=None, version=1, pretrained=False, **kwargs):
+        group_size=None, version=1, pretrained=False, **kwargs
+):
     """Creates an EfficientNet model.
 
     Ref impl: https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/efficientnet_model.py
@@ -1084,9 +1094,7 @@ def _gen_mixnet_m(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrai
     return model
 
 
-def _gen_tinynet(
-    variant, model_width=1.0, depth_multiplier=1.0, pretrained=False, **kwargs
-):
+def _gen_tinynet(variant, model_width=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
     """Creates a TinyNet model.
     """
     arch_def = [
@@ -1109,8 +1117,7 @@ def _gen_tinynet(
     return model
 
 
-def _gen_mobilenet_edgetpu(
-        variant, channel_multiplier=1.0, depth_multiplier=1.0, group_size=None, pretrained=False, **kwargs):
+def _gen_mobilenet_edgetpu(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
     """
     Based on definitions in: https://github.com/tensorflow/models/tree/d2427a562f401c9af118e47af2f030a0a5599f55/official/projects/edgetpu/vision
     """
@@ -1186,7 +1193,7 @@ def _gen_mobilenet_edgetpu(
         ]
 
     model_kwargs = dict(
-        block_args=decode_arch_def(arch_def, depth_multiplier, group_size=group_size),
+        block_args=decode_arch_def(arch_def, depth_multiplier),
         num_features=num_features,
         stem_size=stem_size,
         stem_kernel_size=stem_kernel_size,
@@ -1199,8 +1206,7 @@ def _gen_mobilenet_edgetpu(
     return model
 
 
-def _gen_test_efficientnet(
-        variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
+def _gen_test_efficientnet(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
     """ Minimal test EfficientNet generator.
     """
     arch_def = [
diff --git a/timm/models/mobilenetv3.py b/timm/models/mobilenetv3.py
index 44ec0764..668341d6 100644
--- a/timm/models/mobilenetv3.py
+++ b/timm/models/mobilenetv3.py
@@ -412,7 +412,9 @@ def _create_mnv3(variant: str, pretrained: bool = False, **kwargs) -> MobileNetV
     return model
 
 
-def _gen_mobilenet_v3_rw(variant: str, channel_multiplier: float = 1.0, pretrained: bool = False, **kwargs) -> MobileNetV3:
+def _gen_mobilenet_v3_rw(
+        variant: str, channel_multiplier: float = 1.0, pretrained: bool = False, **kwargs
+) -> MobileNetV3:
     """Creates a MobileNet-V3 model.
 
     Ref impl: ?