251 lines
9.7 KiB
YAML
251 lines
9.7 KiB
YAML
Collections:
|
|
- Name: MAE
|
|
Metadata:
|
|
Training Data: ImageNet-1k
|
|
Training Techniques:
|
|
- AdamW
|
|
Training Resources: 8x A100-80G GPUs
|
|
Architecture:
|
|
- ViT
|
|
Paper:
|
|
URL: https://arxiv.org/abs/2111.06377
|
|
Title: "Masked Autoencoders Are Scalable Vision Learners"
|
|
README: configs/selfsup/mae/README.md
|
|
|
|
Models:
|
|
- Name: mae_vit-base-p16_8xb512-amp-coslr-300e_in1k
|
|
In Collection: MAE
|
|
Metadata:
|
|
Epochs: 300
|
|
Batch Size: 4096
|
|
Results: null
|
|
Config: configs/selfsup/mae/mae_vit-base-p16_8xb512-amp-coslr-300e_in1k.py
|
|
Weights: https://download.openmmlab.com/mmselfsup/1.x/mae/mae_vit-base-p16_8xb512-fp16-coslr-300e_in1k/mae_vit-base-p16_8xb512-coslr-300e-fp16_in1k_20220829-c2cf66ba.pth
|
|
Downstream:
|
|
- Type: Image Classification
|
|
Metadata:
|
|
Epochs: 90
|
|
Batch Size: 16384
|
|
Results:
|
|
- Task: Linear Evaluation
|
|
Dataset: ImageNet-1k
|
|
Metrics:
|
|
Top 1 Accuracy: 60.8
|
|
Config: configs/benchmarks/classification/imagenet/vit-base-p16_linear-8xb2048-coslr-90e_in1k.py
|
|
- Type: Image Classification
|
|
Metadata:
|
|
Epochs: 100
|
|
Batch Size: 1024
|
|
Results:
|
|
- Task: Fine-tuning
|
|
Dataset: ImageNet-1k
|
|
Metrics:
|
|
Top 1 Accuracy: 83.1
|
|
Config: configs/benchmarks/classification/imagenet/vit-base-p16_ft-8xb128-coslr-100e_in1k.py
|
|
- Name: mae_vit-base-p16_8xb512-amp-coslr-400e_in1k
|
|
In Collection: MAE
|
|
Metadata:
|
|
Epochs: 400
|
|
Batch Size: 4096
|
|
Results: null
|
|
Config: configs/selfsup/mae/mae_vit-base-p16_8xb512-amp-coslr-400e_in1k.py
|
|
Weights: https://download.openmmlab.com/mmselfsup/1.x/mae/mae_vit-base-p16_8xb512-fp16-coslr-400e_in1k/mae_vit-base-p16_8xb512-coslr-400e-fp16_in1k_20220825-bc79e40b.pth
|
|
Downstream:
|
|
- Type: Image Classification
|
|
Metadata:
|
|
Epochs: 90
|
|
Batch Size: 16384
|
|
Results:
|
|
- Task: Linear Evaluation
|
|
Dataset: ImageNet-1k
|
|
Metrics:
|
|
Top 1 Accuracy: 62.5
|
|
Config: configs/benchmarks/classification/imagenet/vit-base-p16_linear-8xb2048-coslr-90e_in1k.py
|
|
- Type: Image Classification
|
|
Metadata:
|
|
Epochs: 100
|
|
Batch Size: 1024
|
|
Results:
|
|
- Task: Fine-tuning
|
|
Dataset: ImageNet-1k
|
|
Metrics:
|
|
Top 1 Accuracy: 83.3
|
|
Config: configs/benchmarks/classification/imagenet/vit-base-p16_ft-8xb128-coslr-100e_in1k.py
|
|
- Name: mae_vit-base-p16_8xb512-amp-coslr-800e_in1k
|
|
In Collection: MAE
|
|
Metadata:
|
|
Epochs: 800
|
|
Batch Size: 4096
|
|
Results: null
|
|
Config: configs/selfsup/mae/mae_vit-base-p16_8xb512-amp-coslr-800e_in1k.py
|
|
Weights: https://download.openmmlab.com/mmselfsup/1.x/mae/mae_vit-base-p16_8xb512-fp16-coslr-800e_in1k/mae_vit-base-p16_8xb512-coslr-800e-fp16_in1k_20220825-5d81fbc4.pth
|
|
Downstream:
|
|
- Type: Image Classification
|
|
Metadata:
|
|
Epochs: 90
|
|
Batch Size: 16384
|
|
Results:
|
|
- Task: Linear Evaluation
|
|
Dataset: ImageNet-1k
|
|
Metrics:
|
|
Top 1 Accuracy: 65.1
|
|
Config: configs/benchmarks/classification/imagenet/vit-base-p16_linear-8xb2048-coslr-90e_in1k.py
|
|
- Type: Image Classification
|
|
Metadata:
|
|
Epochs: 100
|
|
Batch Size: 1024
|
|
Results:
|
|
- Task: Fine-tuning
|
|
Dataset: ImageNet-1k
|
|
Metrics:
|
|
Top 1 Accuracy: 83.3
|
|
Config: configs/benchmarks/classification/imagenet/vit-base-p16_ft-8xb128-coslr-100e_in1k.py
|
|
- Name: mae_vit-base-p16_8xb512-amp-coslr-1600e_in1k
|
|
In Collection: MAE
|
|
Metadata:
|
|
Epochs: 1600
|
|
Batch Size: 4096
|
|
Results: null
|
|
Config: configs/selfsup/mae/mae_vit-base-p16_8xb512-amp-coslr-1600e_in1k.py
|
|
Weights: https://download.openmmlab.com/mmselfsup/1.x/mae/mae_vit-base-p16_8xb512-fp16-coslr-1600e_in1k/mae_vit-base-p16_8xb512-fp16-coslr-1600e_in1k_20220825-f7569ca2.pth
|
|
Downstream:
|
|
- Type: Image Classification
|
|
Metadata:
|
|
Epochs: 90
|
|
Batch Size: 16384
|
|
Results:
|
|
- Task: Linear Evaluation
|
|
Dataset: ImageNet-1k
|
|
Metrics:
|
|
Top 1 Accuracy: 67.1
|
|
Config: configs/benchmarks/classification/imagenet/vit-base-p16_linear-8xb2048-coslr-90e_in1k.py
|
|
- Type: Image Classification
|
|
Metadata:
|
|
Epochs: 100
|
|
Batch Size: 1024
|
|
Results:
|
|
- Task: Fine-tuning
|
|
Dataset: ImageNet-1k
|
|
Metrics:
|
|
Top 1 Accuracy: 83.5
|
|
Config: configs/benchmarks/classification/imagenet/vit-base-p16_ft-8xb128-coslr-100e_in1k.py
|
|
Weights: https://download.openmmlab.com/mmselfsup/1.x/mae/mae_vit-base-p16_8xb512-fp16-coslr-1600e_in1k/vit-base-p16_ft-8xb128-coslr-100e_in1k/vit-base-p16_ft-8xb128-coslr-100e_in1k_20220825-cf70aa21.pth
|
|
- Name: mae_vit-large-p16_8xb512-amp-coslr-400e_in1k
|
|
In Collection: MAE
|
|
Metadata:
|
|
Epochs: 400
|
|
Batch Size: 4096
|
|
Results: null
|
|
Config: configs/selfsup/mae/mae_vit-large-p16_8xb512-amp-coslr-400e_in1k.py
|
|
Weights: https://download.openmmlab.com/mmselfsup/1.x/mae/mae_vit-large-p16_8xb512-fp16-coslr-400e_in1k/mae_vit-large-p16_8xb512-fp16-coslr-400e_in1k_20220825-b11d0425.pth
|
|
Downstream:
|
|
- Type: Image Classification
|
|
Metadata:
|
|
Epochs: 90
|
|
Batch Size: 16384
|
|
Results:
|
|
- Task: Linear Evaluation
|
|
Dataset: ImageNet-1k
|
|
Metrics:
|
|
Top 1 Accuracy: 70.7
|
|
Config: configs/benchmarks/classification/imagenet/vit-large-p16_linear-8xb2048-coslr-90e_in1k.py
|
|
- Type: Image Classification
|
|
Metadata:
|
|
Epochs: 50
|
|
Batch Size: 1024
|
|
Results:
|
|
- Task: Fine-tuning
|
|
Dataset: ImageNet-1k
|
|
Metrics:
|
|
Top 1 Accuracy: 85.2
|
|
Config: configs/benchmarks/classification/imagenet/vit-large-p16_ft-8xb128-coslr-50e_in1k.py
|
|
- Name: mae_vit-large-p16_8xb512-amp-coslr-800e_in1k
|
|
In Collection: MAE
|
|
Metadata:
|
|
Epochs: 800
|
|
Batch Size: 4096
|
|
Results: null
|
|
Config: configs/selfsup/mae/mae_vit-large-p16_8xb512-amp-coslr-800e_in1k.py
|
|
Weights: https://download.openmmlab.com/mmselfsup/1.x/mae/mae_vit-large-p16_8xb512-fp16-coslr-800e_in1k/mae_vit-large-p16_8xb512-fp16-coslr-800e_in1k_20220825-df72726a.pth
|
|
Downstream:
|
|
- Type: Image Classification
|
|
Metadata:
|
|
Epochs: 90
|
|
Batch Size: 16384
|
|
Results:
|
|
- Task: Linear Evaluation
|
|
Dataset: ImageNet-1k
|
|
Metrics:
|
|
Top 1 Accuracy: 73.7
|
|
Config: configs/benchmarks/classification/imagenet/vit-large-p16_linear-8xb2048-coslr-90e_in1k.py
|
|
- Type: Image Classification
|
|
Metadata:
|
|
Epochs: 50
|
|
Batch Size: 1024
|
|
Results:
|
|
- Task: Fine-tuning
|
|
Dataset: ImageNet-1k
|
|
Metrics:
|
|
Top 1 Accuracy: 85.4
|
|
Config: configs/benchmarks/classification/imagenet/vit-large-p16_ft-8xb128-coslr-50e_in1k.py
|
|
- Name: mae_vit-large-p16_8xb512-amp-coslr-1600e_in1k
|
|
In Collection: MAE
|
|
Metadata:
|
|
Epochs: 1600
|
|
Batch Size: 4096
|
|
Results: null
|
|
Config: configs/selfsup/mae/mae_vit-large-p16_8xb512-amp-coslr-1600e_in1k.py
|
|
Weights: https://download.openmmlab.com/mmselfsup/1.x/mae/mae_vit-large-p16_8xb512-fp16-coslr-1600e_in1k/mae_vit-large-p16_8xb512-fp16-coslr-1600e_in1k_20220825-cc7e98c9.pth
|
|
Downstream:
|
|
- Type: Image Classification
|
|
Metadata:
|
|
Epochs: 90
|
|
Batch Size: 16384
|
|
Results:
|
|
- Task: Linear Evaluation
|
|
Dataset: ImageNet-1k
|
|
Metrics:
|
|
Top 1 Accuracy: 75.5
|
|
Config: configs/benchmarks/classification/imagenet/vit-large-p16_linear-8xb2048-coslr-90e_in1k.py
|
|
- Type: Image Classification
|
|
Metadata:
|
|
Epochs: 50
|
|
Batch Size: 1024
|
|
Results:
|
|
- Task: Fine-tuning
|
|
Dataset: ImageNet-1k
|
|
Metrics:
|
|
Top 1 Accuracy: 85.7
|
|
Config: configs/benchmarks/classification/imagenet/vit-large-p16_ft-8xb128-coslr-50e_in1k.py
|
|
- Name: mae_vit-huge-p16_8xb512-amp-coslr-1600e_in1k.py
|
|
In Collection: MAE
|
|
Metadata:
|
|
Epochs: 1600
|
|
Batch Size: 4096
|
|
Results: null
|
|
Config: configs/selfsup/mae/mae_vit-huge-p16_8xb512-amp-coslr-1600e_in1k.py.py
|
|
Weights: https://download.openmmlab.com/mmselfsup/1.x/mae/mae_vit-huge-p16_8xb512-fp16-coslr-1600e_in1k/mae_vit-huge-p16_8xb512-fp16-coslr-1600e_in1k_20220916-ff848775.pth
|
|
Downstream:
|
|
- Type: Image Classification
|
|
Metadata:
|
|
Epochs: 50
|
|
Batch Size: 1024
|
|
Results:
|
|
- Task: Fine-tuning
|
|
Dataset: ImageNet-1k
|
|
Metrics:
|
|
Top 1 Accuracy: 86.9
|
|
Config: configs/benchmarks/classification/imagenet/vit-large-p16_ft-8xb128-coslr-50e_in1k.py
|
|
Weights: https://download.openmmlab.com/mmselfsup/1.x/mae/mae_vit-huge-p16_8xb512-fp16-coslr-1600e_in1k/vit-huge-p16_ft-8xb128-coslr-50e_in1k/vit-huge-p16_ft-8xb128-coslr-50e_in1k_20220916-0bfc9bfd.pth
|
|
- Type: Image Classification
|
|
Metadata:
|
|
Epochs: 50
|
|
Batch Size: 256
|
|
Results:
|
|
- Task: Fine-tuning
|
|
Dataset: ImageNet-1k
|
|
Metrics:
|
|
Top 1 Accuracy: 87.3
|
|
Config: configs/benchmarks/classification/imagenet/vit-huge-p16_ft-32xb8-coslr-50e_in1k-448.py
|
|
Weights: https://download.openmmlab.com/mmselfsup/1.x/mae/mae_vit-huge-p16_8xb512-fp16-coslr-1600e_in1k/vit-huge-p16_ft-32xb8-coslr-50e_in1k-448/vit-huge-p16_ft-32xb8-coslr-50e_in1k-448_20220916-95b6a0ce.pth
|