[Enhance] Update fsdp vit-huge and vit-large config (#1675)

* Update fsdp vit-huge and vit-large config * Update fsdp vit-huge and vit-large config * rename
2023-06-30 11:15:18 +08:00 · 2023-06-30 11:15:18 +08:00 · 8afad77a35
parent 658db80089
commit 8afad77a35
4 changed files with 28 additions and 2 deletions
--- a/configs/mae/benchmarks/vit-huge-p14_8xb128-ds-zero3-coslr-50e_in1k.py
+++ b/configs/mae/benchmarks/vit-huge-p14_8xb128-ds-zero3-coslr-50e_in1k.py
@ -18,7 +18,7 @@ strategy = dict(
    ),
    inputs_to_half=['inputs'],
    zero_optimization=dict(
-        stage=3,
+        stage=1,
        allgather_partitions=True,
        reduce_scatter=True,
        allgather_bucket_size=50000000,
--- a/configs/mae/benchmarks/vit-huge-p14_8xb128-fsdp-coslr-50e_in1k.py
+++ b/configs/mae/benchmarks/vit-huge-p14_8xb128-fsdp-coslr-50e_in1k.py
@ -0,0 +1,13 @@
+_base_ = ['./vit-huge-p14_8xb128-coslr-50e_in1k.py']
+
+strategy = dict(
+    type='FSDPStrategy',
+    model_wrapper=dict(
+        auto_wrap_policy=dict(
+            type='torch.distributed.fsdp.wrap.size_based_auto_wrap_policy',
+            min_num_params=1e7)))
+
+optim_wrapper = dict(type='AmpOptimWrapper')
+
+# runner which supports strategies
+runner_type = 'FlexibleRunner'
--- a/configs/mae/benchmarks/vit-large-p16_8xb128-ds-zero3-coslr-50e_in1k.py
+++ b/configs/mae/benchmarks/vit-large-p16_8xb128-ds-zero3-coslr-50e_in1k.py
@ -18,7 +18,7 @@ strategy = dict(
    ),
    inputs_to_half=['inputs'],
    zero_optimization=dict(
-        stage=3,
+        stage=1,
        allgather_partitions=True,
        reduce_scatter=True,
        allgather_bucket_size=50000000,
--- a/configs/mae/benchmarks/vit-large-p16_8xb128-fsdp-coslr-50e_in1k.py
+++ b/configs/mae/benchmarks/vit-large-p16_8xb128-fsdp-coslr-50e_in1k.py
@ -0,0 +1,13 @@
+_base_ = ['./vit-large-p16_8xb128-coslr-50e_in1k.py']
+
+strategy = dict(
+    type='FSDPStrategy',
+    model_wrapper=dict(
+        auto_wrap_policy=dict(
+            type='torch.distributed.fsdp.wrap.size_based_auto_wrap_policy',
+            min_num_params=1e7)))
+
+optim_wrapper = dict(type='AmpOptimWrapper')
+
+# runner which supports strategies
+runner_type = 'FlexibleRunner'