Merge remote-tracking branch 'origin/dev-1.x' into 1.x

pull/1083/head v1.0.0rc1
mzr1996 2022-09-30 17:39:00 +08:00
commit 38bea383c1
193 changed files with 8132 additions and 1289 deletions

View File

@ -31,7 +31,7 @@ jobs:
name: Check docstring coverage
command: |
pip install interrogate
interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-regex "__repr__" --fail-under 60 mmcls
interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-magic --ignore-regex "__repr__" --fail-under 60 mmcls
build_cpu:
parameters:
# The python version must match available image tags in
@ -42,8 +42,6 @@ jobs:
type: string
torchvision:
type: string
mmcv:
type: string
docker:
- image: cimg/python:<< parameters.python >>
resource_class: large
@ -57,31 +55,32 @@ jobs:
- run:
name: Configure Python & pip
command: |
python -m pip install --upgrade pip
python -m pip install wheel
pip install --upgrade pip
pip install wheel
- run:
name: Install PyTorch
command: |
python -V
python -m pip install torch==<< parameters.torch >>+cpu torchvision==<< parameters.torchvision >>+cpu -f https://download.pytorch.org/whl/torch_stable.html
pip install torch==<< parameters.torch >>+cpu torchvision==<< parameters.torchvision >>+cpu -f https://download.pytorch.org/whl/torch_stable.html
- run:
name: Install mmcls dependencies
command: |
python -m pip install git+ssh://git@github.com/open-mmlab/mmengine.git@main
python -m pip install << parameters.mmcv >>
python -m pip install timm
python -m pip install -r requirements.txt
pip install git+https://github.com/open-mmlab/mmengine.git@main
pip install -U openmim
mim install 'mmcv >= 2.0.0rc1'
pip install timm
pip install -r requirements.txt
python -c 'import mmcv; print(mmcv.__version__)'
- run:
name: Build and install
command: |
python -m pip install -e .
pip install -e .
- run:
name: Run unittests
command: |
python -m coverage run --branch --source mmcls -m pytest tests/
python -m coverage xml
python -m coverage report -m
coverage run --branch --source mmcls -m pytest tests/
coverage xml
coverage report -m
build_cuda:
machine:
@ -96,15 +95,13 @@ jobs:
cudnn:
type: integer
default: 7
mmcv:
type: string
steps:
- checkout
- run:
# Cloning repos in VM since Docker doesn't have access to the private key
name: Clone Repos
command: |
git clone -b main --depth 1 ssh://git@github.com/open-mmlab/mmengine.git /home/circleci/mmengine
git clone -b main --depth 1 https://github.com/open-mmlab/mmengine.git /home/circleci/mmengine
- run:
name: Build Docker image
command: |
@ -114,7 +111,8 @@ jobs:
name: Install mmcls dependencies
command: |
docker exec mmcls pip install -e /mmengine
docker exec mmcls pip install << parameters.mmcv >>
docker exec mmcls pip install -U openmim
docker exec mmcls mim install 'mmcv >= 2.0.0rc1'
docker exec mmcls pip install -r requirements.txt
docker exec mmcls python -c 'import mmcv; print(mmcv.__version__)'
- run:
@ -124,7 +122,7 @@ jobs:
- run:
name: Run unittests
command: |
docker exec mmcls python -m pytest tests/ --ignore tests/test_models/test_backbones/test_timm_backbone.py
docker exec mmcls python -m pytest tests/ -k 'not timm'
# Invoke jobs via workflows
# See: https://circleci.com/docs/2.0/configuration-reference/#workflows
@ -138,6 +136,7 @@ workflows:
branches:
ignore:
- dev-1.x
- 1.x
pr_stage_test:
when:
not:
@ -154,15 +153,13 @@ workflows:
torch: 1.6.0
torchvision: 0.7.0
python: 3.6.9 # The lowest python 3.6.x version available on CircleCI images
mmcv: https://download.openmmlab.com/mmcv/dev-2.x/cpu/torch1.6.0/mmcv_full-2.0.0rc0-cp36-cp36m-manylinux1_x86_64.whl
requires:
- lint
- build_cpu:
name: maximum_version_cpu
torch: 1.9.0 # TODO: Update the version after mmcv provides more pre-compiled packages.
torchvision: 0.10.0
torch: 1.12.1
torchvision: 0.13.1
python: 3.9.0
mmcv: https://download.openmmlab.com/mmcv/dev-2.x/cpu/torch1.9.0/mmcv_full-2.0.0rc0-cp39-cp39-manylinux1_x86_64.whl
requires:
- minimum_version_cpu
- hold:
@ -175,7 +172,6 @@ workflows:
# Use double quotation mark to explicitly specify its type
# as string instead of number
cuda: "10.2"
mmcv: https://download.openmmlab.com/mmcv/dev-2.x/cu102/torch1.8.0/mmcv_full-2.0.0rc0-cp37-cp37m-manylinux1_x86_64.whl
requires:
- hold
merge_stage_test:
@ -188,7 +184,6 @@ workflows:
torch: 1.6.0
# Use double quotation mark to explicitly specify its type
# as string instead of number
mmcv: https://download.openmmlab.com/mmcv/dev-2.x/cu101/torch1.6.0/mmcv_full-2.0.0rc0-cp37-cp37m-manylinux1_x86_64.whl
cuda: "10.1"
filters:
branches:

View File

@ -2,15 +2,17 @@ import logging
import re
import tempfile
from argparse import ArgumentParser
from collections import OrderedDict
from pathlib import Path
from time import time
from typing import OrderedDict
import mmcv
import numpy as np
import torch
from mmengine import Config, MMLogger, Runner
from mmengine.dataset import Compose
from mmengine import Config, DictAction, MMLogger
from mmengine.dataset import Compose, default_collate
from mmengine.fileio import FileClient
from mmengine.runner import Runner
from modelindex.load_model_index import load
from rich.console import Console
from rich.table import Table
@ -52,6 +54,16 @@ def parse_args():
'--flops-str',
action='store_true',
help='Output FLOPs and params counts in a string form.')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
args = parser.parse_args()
return args
@ -62,6 +74,8 @@ def inference(config_file, checkpoint, work_dir, args, exp_name):
cfg.load_from = checkpoint
cfg.log_level = 'WARN'
cfg.experiment_name = exp_name
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
# build the data pipeline
test_dataset = cfg.test_dataloader.dataset
@ -72,7 +86,8 @@ def inference(config_file, checkpoint, work_dir, args, exp_name):
test_dataset.pipeline.insert(1, dict(type='Resize', scale=32))
data = Compose(test_dataset.pipeline)({'img_path': args.img})
resolution = tuple(data['inputs'].shape[1:])
data = default_collate([data])
resolution = tuple(data['inputs'].shape[-2:])
runner: Runner = Runner.from_cfg(cfg)
model = runner.model
@ -83,26 +98,30 @@ def inference(config_file, checkpoint, work_dir, args, exp_name):
if args.inference_time:
time_record = []
for _ in range(10):
model.val_step(data) # warmup before profiling
torch.cuda.synchronize()
start = time()
model.val_step([data])
model.val_step(data)
torch.cuda.synchronize()
time_record.append((time() - start) * 1000)
result['time_mean'] = np.mean(time_record[1:-1])
result['time_std'] = np.std(time_record[1:-1])
else:
model.val_step([data])
model.val_step(data)
result['model'] = config_file.stem
if args.flops:
from mmcv.cnn.utils import get_model_complexity_info
from fvcore.nn import FlopCountAnalysis, parameter_count
from fvcore.nn.print_model_statistics import _format_size
_format_size = _format_size if args.flops_str else lambda x: x
with torch.no_grad():
if hasattr(model, 'extract_feat'):
model.forward = model.extract_feat
flops, params = get_model_complexity_info(
model,
input_shape=(3, ) + resolution,
print_per_layer_stat=False,
as_strings=args.flops_str)
model.to('cpu')
inputs = (torch.randn((1, 3, *resolution)), )
flops = _format_size(FlopCountAnalysis(model, inputs).total())
params = _format_size(parameter_count(model)[''])
result['flops'] = flops if args.flops_str else int(flops)
result['params'] = params if args.flops_str else int(params)
else:
@ -184,7 +203,6 @@ def main(args):
if args.checkpoint_root is not None:
root = args.checkpoint_root
if 's3://' in args.checkpoint_root:
from mmcv.fileio import FileClient
from petrel_client.common.exception import AccessDeniedError
file_client = FileClient.infer_client(uri=root)
checkpoint = file_client.join_path(

View File

@ -62,6 +62,12 @@ def parse_args():
action='store_true',
help='Summarize benchmark test results.')
parser.add_argument('--save', action='store_true', help='Save the summary')
parser.add_argument(
'--cfg-options',
nargs='+',
type=str,
default=[],
help='Config options for all config files.')
args = parser.parse_args()
return args
@ -76,7 +82,7 @@ def create_test_job_batch(commands, model_info, args, port, script_name):
http_prefix = 'https://download.openmmlab.com/mmclassification/'
if 's3://' in args.checkpoint_root:
from mmcv.fileio import FileClient
from mmengine.fileio import FileClient
from petrel_client.common.exception import AccessDeniedError
file_client = FileClient.infer_client(uri=args.checkpoint_root)
checkpoint = file_client.join_path(
@ -125,6 +131,7 @@ def create_test_job_batch(commands, model_info, args, port, script_name):
f'--work-dir={work_dir} '
f'--out={result_file} '
f'--cfg-option dist_params.port={port} '
f'{" ".join(args.cfg_options)} '
f'--launcher={launcher}\n')
with open(work_dir / 'job.sh', 'w') as f:

View File

@ -3,22 +3,48 @@ import json
import os
import os.path as osp
import re
from collections import OrderedDict
from datetime import datetime
from pathlib import Path
from zipfile import ZipFile
import yaml
from modelindex.load_model_index import load
from rich.console import Console
from rich.syntax import Syntax
from rich.table import Table
console = Console()
MMCLS_ROOT = Path(__file__).absolute().parents[2]
CYCLE_LEVELS = ['month', 'quarter', 'half-year', 'no-training']
METRICS_MAP = {
'Top 1 Accuracy': 'accuracy/top1',
'Top 5 Accuracy': 'accuracy/top5'
}
class RangeAction(argparse.Action):
def __call__(self, parser, namespace, values: str, option_string):
matches = re.match(r'([><=]*)([-\w]+)', values)
if matches is None:
raise ValueError(f'Unavailable range option {values}')
symbol, range_str = matches.groups()
assert range_str in CYCLE_LEVELS, \
f'{range_str} are not in {CYCLE_LEVELS}.'
level = CYCLE_LEVELS.index(range_str)
symbol = symbol or '<='
ranges = set()
if '=' in symbol:
ranges.add(level)
if '>' in symbol:
ranges.update(range(level + 1, len(CYCLE_LEVELS)))
if '<' in symbol:
ranges.update(range(level))
assert len(ranges) > 0, 'No range are selected.'
setattr(namespace, self.dest, ranges)
def parse_args():
parser = argparse.ArgumentParser(
description='Train models (in bench_train.yml) and compare accuracy.')
@ -32,6 +58,14 @@ def parse_args():
parser.add_argument('--port', type=int, default=29666, help='dist port')
parser.add_argument(
'--models', nargs='+', type=str, help='Specify model names to run.')
parser.add_argument(
'--range',
type=str,
default={0},
action=RangeAction,
metavar='{month,quarter,half-year,no-training}',
help='The training benchmark range, "no-training" means all models '
"including those we haven't trained.")
parser.add_argument(
'--work-dir',
default='work_dirs/benchmark_train',
@ -63,18 +97,33 @@ def parse_args():
'--save',
action='store_true',
help='Save the summary and archive log files.')
parser.add_argument(
'--cfg-options',
nargs='+',
type=str,
default=[],
help='Config options for all config files.')
args = parser.parse_args()
return args
def get_gpu_number(model_info):
config = osp.basename(model_info.config)
matches = re.match(r'.*[-_](\d+)xb(\d+).*', config)
if matches is None:
raise ValueError(
'Cannot get gpu numbers from the config name {config}')
gpus = int(matches.groups()[0])
return gpus
def create_train_job_batch(commands, model_info, args, port, script_name):
fname = model_info.name
assert 'Gpus' in model_info.data, \
f"Haven't specify gpu numbers for {fname}"
gpus = model_info.data['Gpus']
gpus = get_gpu_number(model_info)
gpus_per_node = min(gpus, 8)
config = Path(model_info.config)
assert config.exists(), f'"{fname}": {config} not found.'
@ -101,15 +150,17 @@ def create_train_job_batch(commands, model_info, args, port, script_name):
f'#SBATCH --output {work_dir}/job.%j.out\n'
f'#SBATCH --partition={args.partition}\n'
f'#SBATCH --job-name {job_name}\n'
f'#SBATCH --gres=gpu:8\n'
f'#SBATCH --gres=gpu:{gpus_per_node}\n'
f'{mail_cfg}{quota_cfg}'
f'#SBATCH --ntasks-per-node=8\n'
f'#SBATCH --ntasks-per-node={gpus_per_node}\n'
f'#SBATCH --ntasks={gpus}\n'
f'#SBATCH --cpus-per-task=5\n\n'
f'{runner} -u {script_name} {config} '
f'--work-dir={work_dir} --cfg-option '
f'dist_params.port={port} '
f'checkpoint_config.max_keep_ckpts=10 '
f'env_cfg.dist_cfg.port={port} '
f'{" ".join(args.cfg_options)} '
f'default_hooks.checkpoint.max_keep_ckpts=2 '
f'default_hooks.checkpoint.save_best="auto" '
f'--launcher={launcher}\n')
with open(work_dir / 'job.sh', 'w') as f:
@ -124,33 +175,16 @@ def create_train_job_batch(commands, model_info, args, port, script_name):
return work_dir / 'job.sh'
def train(args):
models_cfg = load(str(Path(__file__).parent / 'bench_train.yml'))
models_cfg.build_models_with_collections()
models = {model.name: model for model in models_cfg.models}
def train(models, args):
script_name = osp.join('tools', 'train.py')
port = args.port
commands = []
if args.models:
patterns = [re.compile(pattern) for pattern in args.models]
filter_models = {}
for k, v in models.items():
if any([re.match(pattern, k) for pattern in patterns]):
filter_models[k] = v
if len(filter_models) == 0:
print('No model found, please specify models in:')
print('\n'.join(models.keys()))
return
models = filter_models
for model_info in models.values():
months = model_info.data.get('Months', range(1, 13))
if datetime.now().month in months:
script_path = create_train_job_batch(commands, model_info, args,
port, script_name)
port += 1
script_path = create_train_job_batch(commands, model_info, args, port,
script_name)
port += 1
command_str = '\n'.join(commands)
@ -245,12 +279,14 @@ def show_summary(summary_data):
metric = summary[metric_key]
expect = metric['expect']
last = metric['last']
last_epoch = metric['last_epoch']
last_color = set_color(last, expect)
best = metric['best']
best_color = set_color(best, expect)
best_epoch = metric['best_epoch']
row.append(f'{expect:.2f}')
row.append(f'[{last_color}]{last:.2f}[/{last_color}]')
row.append(
f'[{last_color}]{last:.2f}[/{last_color}] ({last_epoch})')
row.append(
f'[{best_color}]{best:.2f}[/{best_color}] ({best_epoch})')
table.add_row(*row)
@ -258,25 +294,11 @@ def show_summary(summary_data):
console.print(table)
def summary(args):
models_cfg = load(str(Path(__file__).parent / 'bench_train.yml'))
models = {model.name: model for model in models_cfg.models}
def summary(models, args):
work_dir = Path(args.work_dir)
dir_map = {p.name: p for p in work_dir.iterdir() if p.is_dir()}
if args.models:
patterns = [re.compile(pattern) for pattern in args.models]
filter_models = {}
for k, v in models.items():
if any([re.match(pattern, k) for pattern in patterns]):
filter_models[k] = v
if len(filter_models) == 0:
print('No model found, please specify models in:')
print('\n'.join(models.keys()))
return
models = filter_models
summary_data = {}
for model_name, model_info in models.items():
@ -287,17 +309,19 @@ def summary(args):
# Skip if not found any vis_data folder.
sub_dir = dir_map[model_name]
vis_folders = [d for d in sub_dir.iterdir() if d.is_dir()]
if len(vis_folders) == 0:
continue
log_file = sorted(vis_folders)[-1] / 'vis_data' / 'scalars.json'
if not log_file.exists():
log_files = [f for f in sub_dir.glob('*/vis_data/scalars.json')]
if len(log_files) == 0:
continue
log_file = sorted(log_files)[-1]
# parse train log
with open(log_file) as f:
json_logs = [json.loads(s) for s in f.readlines()]
val_logs = [log for log in json_logs if 'loss' not in log]
val_logs = [
log for log in json_logs
# TODO: need a better method to extract validate log
if 'loss' not in log and 'accuracy/top1' in log
]
if len(val_logs) == 0:
continue
@ -320,9 +344,10 @@ def summary(args):
summary[key_yml] = dict(
expect=expect_result,
last=last,
last_epoch=len(val_logs),
best=best,
best_epoch=best_epoch)
summary_data[model_name] = summary
best_epoch=best_epoch + 1)
summary_data[model_name].update(summary)
show_summary(summary_data)
if args.save:
@ -332,10 +357,39 @@ def summary(args):
def main():
args = parse_args()
model_index_file = MMCLS_ROOT / 'model-index.yml'
model_index = load(str(model_index_file))
model_index.build_models_with_collections()
all_models = {model.name: model for model in model_index.models}
with open(Path(__file__).parent / 'bench_train.yml', 'r') as f:
train_items = yaml.safe_load(f)
models = OrderedDict()
for item in train_items:
name = item['Name']
model_info = all_models[name]
model_info.cycle = item.get('Cycle', None)
cycle = getattr(model_info, 'cycle', 'month')
cycle_level = CYCLE_LEVELS.index(cycle)
if cycle_level in args.range:
models[name] = model_info
if args.models:
patterns = [re.compile(pattern) for pattern in args.models]
filter_models = {}
for k, v in models.items():
if any([re.match(pattern, k) for pattern in patterns]):
filter_models[k] = v
if len(filter_models) == 0:
print('No model found, please specify models in:')
print('\n'.join(models.keys()))
return
models = filter_models
if args.summary:
summary(args)
summary(models, args)
else:
train(args)
train(models, args)
if __name__ == '__main__':

View File

@ -1,88 +1,86 @@
Models:
- Name: resnet50
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 76.55
Top 5 Accuracy: 93.06
Config: configs/resnet/resnet50_8xb32_in1k.py
Gpus: 8
- Name: mobilenet-v2_8xb32_in1k
Cycle: month
- Name: seresnet50
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 77.74
Top 5 Accuracy: 93.84
Config: configs/seresnet/seresnet50_8xb32_in1k.py
Gpus: 8
- Name: resnet50_8xb32_in1k
Cycle: month
- Name: vit-base
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 82.37
Top 5 Accuracy: 96.15
Config: configs/vision_transformer/vit-base-p16_pt-32xb128-mae_in1k-224.py
Gpus: 32
- Name: seresnet50_8xb32_in1k
Cycle: month
- Name: mobilenetv2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 71.86
Top 5 Accuracy: 90.42
Config: configs/mobilenet_v2/mobilenet-v2_8xb32_in1k.py
Gpus: 8
- Name: swin-small_16xb64_in1k
Cycle: month
- Name: swin_tiny
Results:
- Dataset: ImageNet
Metrics:
Top 1 Accuracy: 81.18
Top 5 Accuracy: 95.61
Weights: https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_tiny_224_b16x64_300e_imagenet_20210616_090925-66df6be6.pth
Config: configs/swin_transformer/swin-tiny_16xb64_in1k.py
Gpus: 16
- Name: vit-base-p16_pt-32xb128-mae_in1k
Cycle: month
- Name: vgg16
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 71.62
Top 5 Accuracy: 90.49
Config: configs/vgg/vgg16_8xb32_in1k.py
Gpus: 8
Months:
- 1
- 4
- 7
- 10
- Name: resnet50_8xb256-rsb-a1-600e_in1k
Cycle: quarter
- Name: shufflenet_v2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 69.55
Top 5 Accuracy: 88.92
Config: configs/shufflenet_v2/shufflenet-v2-1x_16xb64_in1k.py
Gpus: 16
Months:
- 2
- 5
- 8
- 11
- Name: resnext50-32x4d_8xb32_in1k
Cycle: quarter
- Name: resnet-rsb
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 80.12
Top 5 Accuracy: 94.78
Config: configs/resnet/resnet50_8xb256-rsb-a1-600e_in1k.py
Gpus: 8
Months:
- 3
- 6
- 9
- 12
- Name: shufflenet-v2-1x_16xb64_in1k
Cycle: quarter
- Name: vgg16_8xb32_in1k
Cycle: quarter
- Name: shufflenet-v1-1x_16xb64_in1k
Cycle: half-year
- Name: t2t-vit-t-14_8xb64_in1k
Cycle: half-year
- Name: regnetx-1.6gf_8xb128_in1k
Cycle: half-year
- Name: van-small_8xb128_in1k
Cycle: no-training
- Name: res2net50-w14-s8_3rdparty_8xb32_in1k
Cycle: no-training
- Name: repvgg-A2_3rdparty_4xb64-coslr-120e_in1k
Cycle: no-training
- Name: tnt-small-p16_3rdparty_in1k
Cycle: no-training
- Name: mlp-mixer-base-p16_3rdparty_64xb64_in1k
Cycle: no-training
- Name: conformer-small-p16_3rdparty_8xb128_in1k
Cycle: no-training
- Name: twins-pcpvt-base_3rdparty_8xb128_in1k
Cycle: no-training
- Name: efficientnet-b0_3rdparty_8xb32_in1k
Cycle: no-training
- Name: convnext-small_3rdparty_32xb128_in1k
Cycle: no-training
- Name: hrnet-w18_3rdparty_8xb32_in1k
Cycle: no-training
- Name: repmlp-base_3rdparty_8xb64_in1k
Cycle: no-training
- Name: wide-resnet50_3rdparty_8xb32_in1k
Cycle: no-training
- Name: cspresnet50_3rdparty_8xb32_in1k
Cycle: no-training
- Name: convmixer-768-32_10xb64_in1k
Cycle: no-training
- Name: densenet169_4xb256_in1k
Cycle: no-training
- Name: poolformer-s24_3rdparty_32xb128_in1k
Cycle: no-training
- Name: inception-v3_3rdparty_8xb32_in1k
Cycle: no-training

27
.github/workflows/lint.yml vendored 100644
View File

@ -0,0 +1,27 @@
name: lint
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.7
uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Install pre-commit hook
run: |
pip install pre-commit
pre-commit install
- name: Linting
run: pre-commit run --all-files
- name: Check docstring coverage
run: |
pip install interrogate
interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-magic --ignore-regex "__repr__" --fail-under 60 mmcls

View File

@ -0,0 +1,87 @@
name: pr_stage_test
on:
pull_request:
paths-ignore:
- 'README.md'
- 'README_zh-CN.md'
- 'docs/**'
- 'demo/**'
- 'tools/**'
- 'configs/**'
- '.dev_scripts/**'
- '.circleci/**'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
build:
runs-on: ubuntu-18.04
strategy:
matrix:
python-version: [3.7]
include:
- torch: 1.8.1
torchvision: 0.9.1
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Upgrade pip
run: pip install pip --upgrade
- name: Install PyTorch
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
- name: Install mmcls dependencies
run: |
pip install git+https://github.com/open-mmlab/mmengine.git@main
pip install -U openmim
mim install 'mmcv >= 2.0.0rc1'
pip install -r requirements.txt
- name: Build and install
run: pip install -e .
- name: Run unittests and generate coverage report
run: |
coverage run --branch --source mmcls -m pytest tests/ -k 'not timm'
coverage xml
coverage report -m
# Upload coverage report for python3.7 && pytorch1.8.1 cpu
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v1.0.14
with:
file: ./coverage.xml
flags: unittests
env_vars: OS,PYTHON
name: codecov-umbrella
fail_ci_if_error: false
build_windows:
runs-on: windows-2022
strategy:
matrix:
python: [3.7]
platform: [cu111]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Upgrade pip
run: pip install pip --upgrade
- name: Install PyTorch
run: pip install torch==1.8.2+${{matrix.platform}} torchvision==0.9.2+${{matrix.platform}} -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
- name: Install mmcls dependencies
run: |
pip install git+https://github.com/open-mmlab/mmengine.git@main
pip install -U openmim
mim install 'mmcv >= 2.0.0rc1'
pip install -r requirements.txt
- name: Build and install
run: pip install -e .
- name: Run unittests
run: |
pytest tests/ -k 'not timm' --ignore tests/test_models/test_backbones

View File

@ -0,0 +1,22 @@
name: deploy
on: push
jobs:
build-n-publish:
runs-on: ubuntu-latest
if: startsWith(github.event.ref, 'refs/tags')
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.7
uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Build MMClassification
run: |
pip install wheel
python setup.py sdist bdist_wheel
- name: Publish distribution to PyPI
run: |
pip install twine
twine upload dist/* -u __token__ -p ${{ secrets.pypi_password }}

44
.github/workflows/test_mim.yml vendored 100644
View File

@ -0,0 +1,44 @@
name: test-mim
on:
push:
paths:
- 'model-index.yml'
- 'configs/**'
pull_request:
paths:
- 'model-index.yml'
- 'configs/**'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
build_cpu:
runs-on: ubuntu-18.04
strategy:
matrix:
python-version: [3.7]
torch: [1.8.0]
include:
- torch: 1.8.0
torch_version: torch1.8
torchvision: 0.9.0
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Upgrade pip
run: pip install pip --upgrade
- name: Install PyTorch
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
- name: Install openmim
run: pip install openmim
- name: Build and install
run: mim install -e .
- name: test commands of mim
run: mim search mmcls

View File

@ -44,10 +44,10 @@ repos:
- id: docformatter
args: ["--in-place", "--wrap-descriptions", "79"]
- repo: https://github.com/open-mmlab/pre-commit-hooks
rev: v0.2.0
rev: v0.4.0
hooks:
- id: check-copyright
args: ["mmcls", "tests", "demo", "tools"]
args: ["mmcls", "tests", "demo", "tools", "--excludes", "mmcls/.mim/", "--ignore-file-not-found-error"]
# - repo: local
# hooks:
# - id: clang-format

View File

@ -58,6 +58,11 @@ The `1.x` branch works with **PyTorch 1.6+**.
## What's new
v1.0.0rc1 was released in 30/9/2022.
- Support MViT, EdgeNeXt, Swin-Transformer V2, EfficientFormer and MobileOne.
- Support BEiT type transformer layer.
v1.0.0rc0 was released in 31/8/2022.
This release introduced a brand new and flexible training & test engine, but it's still in progress. Welcome
@ -115,6 +120,7 @@ Results and models are available in the [model zoo](https://mmclassification.rea
- [x] [MobileNetV2](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/mobilenet_v2)
- [x] [MobileNetV3](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/mobilenet_v3)
- [x] [Swin-Transformer](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/swin_transformer)
- [x] [Swin-Transformer V2](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/swin_transformer_v2)
- [x] [RepVGG](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/repvgg)
- [x] [Vision-Transformer](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/vision_transformer)
- [x] [Transformer-in-Transformer](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/tnt)
@ -125,6 +131,7 @@ Results and models are available in the [model zoo](https://mmclassification.rea
- [x] [T2T-ViT](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/t2t_vit)
- [x] [Twins](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/twins)
- [x] [EfficientNet](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/efficientnet)
- [x] [EdgeNeXt](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/edgenext)
- [x] [ConvNeXt](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/convnext)
- [x] [HRNet](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/hrnet)
- [x] [VAN](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/van)
@ -132,6 +139,9 @@ Results and models are available in the [model zoo](https://mmclassification.rea
- [x] [CSPNet](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/cspnet)
- [x] [PoolFormer](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/poolformer)
- [x] [Inception V3](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/inception_v3)
- [x] [MobileOne](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/mobileone)
- [x] [EfficientFormer](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/efficientformer)
- [x] [MViT](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/mvit)
</details>

View File

@ -57,6 +57,11 @@ MMClassification 是一款基于 PyTorch 的开源图像分类工具箱,是 [O
## 更新日志
2022/9/30 发布了 v1.0.0rc1 版本
- 支持了 MViTEdgeNeXtSwin-Transformer V2EfficientFormerMobileOne 等主干网络。
- 支持了 BEiT 风格的 transformer 层。
2022/8/31 发布了 v1.0.0rc0 版本
这个版本引入一个全新的,可扩展性强的训练和测试引擎,但目前仍在开发中。欢迎根据[文档](https://mmclassification.readthedocs.io/zh_CN/1.x/)进行试用。
@ -114,6 +119,7 @@ mim install -e .
- [x] [MobileNetV2](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/mobilenet_v2)
- [x] [MobileNetV3](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/mobilenet_v3)
- [x] [Swin-Transformer](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/swin_transformer)
- [x] [Swin-Transformer V2](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/swin_transformer_v2)
- [x] [RepVGG](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/repvgg)
- [x] [Vision-Transformer](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/vision_transformer)
- [x] [Transformer-in-Transformer](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/tnt)
@ -124,6 +130,7 @@ mim install -e .
- [x] [T2T-ViT](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/t2t_vit)
- [x] [Twins](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/twins)
- [x] [EfficientNet](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/efficientnet)
- [x] [EdgeNeXt](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/edgenext)
- [x] [ConvNeXt](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/convnext)
- [x] [HRNet](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/hrnet)
- [x] [VAN](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/van)
@ -131,6 +138,9 @@ mim install -e .
- [x] [CSPNet](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/cspnet)
- [x] [PoolFormer](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/poolformer)
- [x] [Inception V3](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/inception_v3)
- [x] [MobileOne](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/mobileone)
- [x] [EfficientFormer](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/efficientformer)
- [x] [MViT](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/mvit)
</details>

View File

@ -0,0 +1,83 @@
# dataset settings
dataset_type = 'ImageNet'
data_preprocessor = dict(
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
to_rgb=True,
)
bgr_mean = data_preprocessor['mean'][::-1]
bgr_std = data_preprocessor['std'][::-1]
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
scale=256,
backend='pillow',
interpolation='bicubic'),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(
type='RandAugment',
policies='timm_increasing',
num_policies=2,
total_level=10,
magnitude_level=9,
magnitude_std=0.5,
hparams=dict(
pad_val=[round(x) for x in bgr_mean], interpolation='bicubic')),
dict(
type='RandomErasing',
erase_prob=0.25,
mode='rand',
min_area_ratio=0.02,
max_area_ratio=1 / 3,
fill_color=bgr_mean,
fill_std=bgr_std),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='ResizeEdge',
scale=292,
edge='short',
backend='pillow',
interpolation='bicubic'),
dict(type='CenterCrop', crop_size=256),
dict(type='PackClsInputs')
]
train_dataloader = dict(
batch_size=64,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
ann_file='meta/train.txt',
data_prefix='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
persistent_workers=True,
)
val_dataloader = dict(
batch_size=64,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
ann_file='meta/val.txt',
data_prefix='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
)
val_evaluator = dict(type='Accuracy', topk=(1, 5))
# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator

View File

@ -0,0 +1,83 @@
# dataset settings
dataset_type = 'ImageNet'
data_preprocessor = dict(
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
to_rgb=True,
)
bgr_mean = data_preprocessor['mean'][::-1]
bgr_std = data_preprocessor['std'][::-1]
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
scale=256,
backend='pillow',
interpolation='bicubic'),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(
type='RandAugment',
policies='timm_increasing',
num_policies=2,
total_level=10,
magnitude_level=9,
magnitude_std=0.5,
hparams=dict(
pad_val=[round(x) for x in bgr_mean], interpolation='bicubic')),
dict(
type='RandomErasing',
erase_prob=0.25,
mode='rand',
min_area_ratio=0.02,
max_area_ratio=1 / 3,
fill_color=bgr_mean,
fill_std=bgr_std),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='ResizeEdge',
scale=292, # ( 256 / 224 * 256 )
edge='short',
backend='pillow',
interpolation='bicubic'),
dict(type='CenterCrop', crop_size=256),
dict(type='PackClsInputs'),
]
train_dataloader = dict(
batch_size=64,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
ann_file='meta/train.txt',
data_prefix='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
persistent_workers=True,
)
val_dataloader = dict(
batch_size=64,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
ann_file='meta/val.txt',
data_prefix='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
)
val_evaluator = dict(type='Accuracy', topk=(1, 5))
# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator

View File

@ -0,0 +1,23 @@
# Model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='EdgeNeXt',
arch='base',
out_indices=(3, ),
drop_path_rate=0.1,
gap_before_final_norm=True,
init_cfg=[
dict(
type='TruncNormal',
layer=['Conv2d', 'Linear'],
std=.02,
bias=0.),
dict(type='Constant', layer=['LayerNorm'], val=1., bias=0.),
]),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=584,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
))

View File

@ -0,0 +1,23 @@
# Model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='EdgeNeXt',
arch='small',
out_indices=(3, ),
drop_path_rate=0.1,
gap_before_final_norm=True,
init_cfg=[
dict(
type='TruncNormal',
layer=['Conv2d', 'Linear'],
std=.02,
bias=0.),
dict(type='Constant', layer=['LayerNorm'], val=1., bias=0.),
]),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=304,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
))

View File

@ -0,0 +1,23 @@
# Model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='EdgeNeXt',
arch='xsmall',
out_indices=(3, ),
drop_path_rate=0.1,
gap_before_final_norm=True,
init_cfg=[
dict(
type='TruncNormal',
layer=['Conv2d', 'Linear'],
std=.02,
bias=0.),
dict(type='Constant', layer=['LayerNorm'], val=1., bias=0.),
]),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=192,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
))

View File

@ -0,0 +1,23 @@
# Model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='EdgeNeXt',
arch='xxsmall',
out_indices=(3, ),
drop_path_rate=0.1,
gap_before_final_norm=True,
init_cfg=[
dict(
type='TruncNormal',
layer=['Conv2d', 'Linear'],
std=.02,
bias=0.),
dict(type='Constant', layer=['LayerNorm'], val=1., bias=0.),
]),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=168,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
))

View File

@ -0,0 +1,18 @@
model = dict(
type='ImageClassifier',
backbone=dict(
type='EfficientFormer',
arch='l1',
drop_path_rate=0,
init_cfg=[
dict(
type='TruncNormal',
layer=['Conv2d', 'Linear'],
std=.02,
bias=0.),
dict(type='Constant', layer=['GroupNorm'], val=1., bias=0.),
dict(type='Constant', layer=['LayerScale'], val=1e-5)
]),
neck=dict(type='GlobalAveragePooling', dim=1),
head=dict(
type='EfficientFormerClsHead', in_channels=448, num_classes=1000))

View File

@ -0,0 +1,19 @@
model = dict(
type='ImageClassifier',
backbone=dict(
type='MobileOne',
arch='s0',
out_indices=(3, ),
),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1024,
loss=dict(
type='LabelSmoothLoss',
label_smooth_val=0.1,
mode='original',
),
topk=(1, 5),
))

View File

@ -0,0 +1,19 @@
model = dict(
type='ImageClassifier',
backbone=dict(
type='MobileOne',
arch='s1',
out_indices=(3, ),
),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1280,
loss=dict(
type='LabelSmoothLoss',
label_smooth_val=0.1,
mode='original',
),
topk=(1, 5),
))

View File

@ -0,0 +1,19 @@
model = dict(
type='ImageClassifier',
backbone=dict(
type='MobileOne',
arch='s2',
out_indices=(3, ),
),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=2048,
loss=dict(
type='LabelSmoothLoss',
label_smooth_val=0.1,
mode='original',
),
topk=(1, 5),
))

View File

@ -0,0 +1,19 @@
model = dict(
type='ImageClassifier',
backbone=dict(
type='MobileOne',
arch='s3',
out_indices=(3, ),
),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=2048,
loss=dict(
type='LabelSmoothLoss',
label_smooth_val=0.1,
mode='original',
),
topk=(1, 5),
))

View File

@ -0,0 +1,19 @@
model = dict(
type='ImageClassifier',
backbone=dict(
type='MobileOne',
arch='s4',
out_indices=(3, ),
),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=2048,
loss=dict(
type='LabelSmoothLoss',
label_smooth_val=0.1,
mode='original',
),
topk=(1, 5),
))

View File

@ -0,0 +1,19 @@
model = dict(
type='ImageClassifier',
backbone=dict(type='MViT', arch='base', drop_path_rate=0.3),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
in_channels=768,
num_classes=1000,
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.)
],
train_cfg=dict(augments=[
dict(type='Mixup', alpha=0.8, num_classes=1000),
dict(type='CutMix', alpha=1.0, num_classes=1000)
]))

View File

@ -0,0 +1,23 @@
model = dict(
type='ImageClassifier',
backbone=dict(
type='MViT',
arch='large',
drop_path_rate=0.5,
dim_mul_in_attention=False),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
in_channels=1152,
num_classes=1000,
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.)
],
train_cfg=dict(augments=[
dict(type='Mixup', alpha=0.8, num_classes=1000),
dict(type='CutMix', alpha=1.0, num_classes=1000)
]))

View File

@ -0,0 +1,19 @@
model = dict(
type='ImageClassifier',
backbone=dict(type='MViT', arch='small', drop_path_rate=0.1),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
in_channels=768,
num_classes=1000,
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.)
],
train_cfg=dict(augments=[
dict(type='Mixup', alpha=0.8, num_classes=1000),
dict(type='CutMix', alpha=1.0, num_classes=1000)
]))

View File

@ -0,0 +1,19 @@
model = dict(
type='ImageClassifier',
backbone=dict(type='MViT', arch='tiny', drop_path_rate=0.1),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
in_channels=768,
num_classes=1000,
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.)
],
train_cfg=dict(augments=[
dict(type='Mixup', alpha=0.8, num_classes=1000),
dict(type='CutMix', alpha=1.0, num_classes=1000)
]))

View File

@ -0,0 +1,26 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='SwinTransformerV2',
arch='base',
img_size=256,
drop_path_rate=0.5),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1024,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.)
],
train_cfg=dict(augments=[
dict(type='Mixup', alpha=0.8, num_classes=1000),
dict(type='CutMix', alpha=1.0, num_classes=1000)
]),
)

View File

@ -0,0 +1,17 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='SwinTransformerV2',
arch='base',
img_size=384,
drop_path_rate=0.2),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1024,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False))

View File

@ -0,0 +1,16 @@
# model settings
# Only for evaluation
model = dict(
type='ImageClassifier',
backbone=dict(
type='SwinTransformerV2',
arch='large',
img_size=256,
drop_path_rate=0.2),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1536,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5)))

View File

@ -0,0 +1,16 @@
# model settings
# Only for evaluation
model = dict(
type='ImageClassifier',
backbone=dict(
type='SwinTransformerV2',
arch='large',
img_size=384,
drop_path_rate=0.2),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1536,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5)))

View File

@ -0,0 +1,26 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='SwinTransformerV2',
arch='small',
img_size=256,
drop_path_rate=0.3),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=768,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.)
],
train_cfg=dict(augments=[
dict(type='Mixup', alpha=0.8, num_classes=1000),
dict(type='CutMix', alpha=1.0, num_classes=1000)
]),
)

View File

@ -0,0 +1,26 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='SwinTransformerV2',
arch='tiny',
img_size=256,
drop_path_rate=0.2),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=768,
init_cfg=None, # suppress the default init_cfg of LinearClsHead.
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
cal_acc=False),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer='LayerNorm', val=1., bias=0.)
],
train_cfg=dict(augments=[
dict(type='Mixup', alpha=0.8, num_classes=1000),
dict(type='CutMix', alpha=1.0, num_classes=1000)
]),
)

View File

@ -23,18 +23,11 @@ param_scheduler = [
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
begin=0,
end=20,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(
type='CosineAnnealingLR',
T_max=280,
eta_min=1e-5,
by_epoch=True,
begin=20,
end=300)
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=20)
]
# train, val, test setting

View File

@ -6,14 +6,8 @@ optim_wrapper = dict(
# learning policy
param_scheduler = [
dict(
type='ConstantLR',
factor=0.1,
by_epoch=True,
begin=0,
end=5,
convert_to_iter_based=True),
dict(type='PolyLR', eta_min=0, by_epoch=True, begin=5, end=300)
dict(type='ConstantLR', factor=0.1, by_epoch=False, begin=0, end=5000),
dict(type='PolyLR', eta_min=0, by_epoch=False, begin=5000)
]
# train, val, test setting

View File

@ -11,25 +11,22 @@ optim_wrapper = dict(
)
# learning policy
warmup_epochs = 15 # about 10000 iterations for ImageNet-1k
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
begin=0,
# about 10000 iterations for ImageNet-1k
end=15,
end=warmup_epochs,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(
type='CosineAnnealingLR',
T_max=285,
eta_min=1e-5,
by_epoch=True,
begin=15,
end=300)
begin=warmup_epochs)
]
# train, val, test setting

View File

@ -16,12 +16,12 @@ Within Convolutional Neural Network (CNN), the convolution operations are good a
### ImageNet-1k
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-------------------: | :-------: | :------: | :-------: | :-------: | :---------------------------------------------------------------------: | :-----------------------------------------------------------------------: |
| Conformer-tiny-p16\* | 23.52 | 4.90 | 81.31 | 95.60 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/conformer/conformer-tiny-p16_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/conformer/conformer-tiny-p16_3rdparty_8xb128_in1k_20211206-f6860372.pth) |
| Conformer-small-p32\* | 38.85 | 7.09 | 81.96 | 96.02 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/conformer/conformer-small-p32_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/conformer/conformer-small-p32_8xb128_in1k_20211206-947a0816.pth) |
| Conformer-small-p16\* | 37.67 | 10.31 | 83.32 | 96.46 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/conformer/conformer-small-p16_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/conformer/conformer-small-p16_3rdparty_8xb128_in1k_20211206-3065dcf5.pth) |
| Conformer-base-p16\* | 83.29 | 22.89 | 83.82 | 96.59 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/conformer/conformer-base-p16_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/conformer/conformer-base-p16_3rdparty_8xb128_in1k_20211206-bfdf8637.pth) |
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-------------------: | :-------: | :------: | :-------: | :-------: | :--------------------------------------------: | :------------------------------------------------------------------------------------------------: |
| Conformer-tiny-p16\* | 23.52 | 4.90 | 81.31 | 95.60 | [config](./conformer-tiny-p16_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/conformer/conformer-tiny-p16_3rdparty_8xb128_in1k_20211206-f6860372.pth) |
| Conformer-small-p32\* | 38.85 | 7.09 | 81.96 | 96.02 | [config](./conformer-small-p32_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/conformer/conformer-small-p32_8xb128_in1k_20211206-947a0816.pth) |
| Conformer-small-p16\* | 37.67 | 10.31 | 83.32 | 96.46 | [config](./conformer-small-p16_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/conformer/conformer-small-p16_3rdparty_8xb128_in1k_20211206-3065dcf5.pth) |
| Conformer-base-p16\* | 83.29 | 22.89 | 83.82 | 96.59 | [config](./conformer-base-p16_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/conformer/conformer-base-p16_3rdparty_8xb128_in1k_20211206-bfdf8637.pth) |
*Models with * are converted from the [official repo](https://github.com/pengzhiliang/Conformer). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*

View File

@ -20,11 +20,11 @@ Although convolutional networks have been the dominant architecture for vision t
### ImageNet-1k
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-----------------: | :-------: | :------: | :-------: | :-------: | :----------------------------------------------------------------------: | :------------------------------------------------------------------------: |
| ConvMixer-768/32\* | 21.11 | 19.62 | 80.16 | 95.08 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/convmixer/convmixer-768-32_10xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convmixer/convmixer-768-32_3rdparty_10xb64_in1k_20220323-bca1f7b8.pth) |
| ConvMixer-1024/20\* | 24.38 | 5.55 | 76.94 | 93.36 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/convmixer/convmixer-1024-20_10xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convmixer/convmixer-1024-20_3rdparty_10xb64_in1k_20220323-48f8aeba.pth) |
| ConvMixer-1536/20\* | 51.63 | 48.71 | 81.37 | 95.61 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/convmixer/convmixer-1536-20_10xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convmixer/convmixer-1536_20_3rdparty_10xb64_in1k_20220323-ea5786f3.pth) |
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-----------------: | :-------: | :------: | :-------: | :-------: | :------------------------------------------: | :----------------------------------------------------------------------------------------------------: |
| ConvMixer-768/32\* | 21.11 | 19.62 | 80.16 | 95.08 | [config](./convmixer-768-32_10xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convmixer/convmixer-768-32_3rdparty_10xb64_in1k_20220323-bca1f7b8.pth) |
| ConvMixer-1024/20\* | 24.38 | 5.55 | 76.94 | 93.36 | [config](./convmixer-1024-20_10xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convmixer/convmixer-1024-20_3rdparty_10xb64_in1k_20220323-48f8aeba.pth) |
| ConvMixer-1536/20\* | 51.63 | 48.71 | 81.37 | 95.61 | [config](./convmixer-1536-20_10xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convmixer/convmixer-1536_20_3rdparty_10xb64_in1k_20220323-ea5786f3.pth) |
*Models with * are converted from the [official repo](https://github.com/locuslab/convmixer). The config files of these models are only for inference. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*

View File

@ -15,7 +15,7 @@ Models:
Metadata:
FLOPs: 19623051264
Parameters: 21110248
In Collections: ConvMixer
In Collection: ConvMixer
Results:
- Dataset: ImageNet-1k
Metrics:
@ -31,7 +31,7 @@ Models:
Metadata:
FLOPs: 5550112768
Parameters: 24383464
In Collections: ConvMixer
In Collection: ConvMixer
Results:
- Dataset: ImageNet-1k
Metrics:
@ -47,7 +47,7 @@ Models:
Metadata:
FLOPs: 48713170944
Parameters: 51625960
In Collections: ConvMixer
In Collection: ConvMixer
Results:
- Dataset: ImageNet-1k
Metrics:

View File

@ -20,15 +20,15 @@ The "Roaring 20s" of visual recognition began with the introduction of Vision Tr
### ImageNet-1k
| Model | Pretrain | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-----------: | :----------: | :-------: | :------: | :-------: | :-------: | :-------------------------------------------------------------------: | :---------------------------------------------------------------------: |
| ConvNeXt-T\* | From scratch | 28.59 | 4.46 | 82.05 | 95.86 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/convnext/convnext-tiny_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_3rdparty_32xb128_in1k_20220124-18abde00.pth) |
| ConvNeXt-S\* | From scratch | 50.22 | 8.69 | 83.13 | 96.44 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/convnext/convnext-small_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-small_3rdparty_32xb128_in1k_20220124-d39b5192.pth) |
| ConvNeXt-B\* | From scratch | 88.59 | 15.36 | 83.85 | 96.74 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/convnext/convnext-base_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_32xb128_in1k_20220124-d0915162.pth) |
| ConvNeXt-B\* | ImageNet-21k | 88.59 | 15.36 | 85.81 | 97.86 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/convnext/convnext-base_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_in21k-pre-3rdparty_32xb128_in1k_20220124-eb2d6ada.pth) |
| ConvNeXt-L\* | From scratch | 197.77 | 34.37 | 84.30 | 96.89 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/convnext/convnext-large_64xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_3rdparty_64xb64_in1k_20220124-f8a0ded0.pth) |
| ConvNeXt-L\* | ImageNet-21k | 197.77 | 34.37 | 86.61 | 98.04 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/convnext/convnext-large_64xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_in21k-pre-3rdparty_64xb64_in1k_20220124-2412403d.pth) |
| ConvNeXt-XL\* | ImageNet-21k | 350.20 | 60.93 | 86.97 | 98.20 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/convnext/convnext-xlarge_64xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-xlarge_in21k-pre-3rdparty_64xb64_in1k_20220124-76b6863d.pth) |
| Model | Pretrain | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-----------: | :----------: | :-------: | :------: | :-------: | :-------: | :----------------------------------------: | :------------------------------------------------------------------------------------------------: |
| ConvNeXt-T\* | From scratch | 28.59 | 4.46 | 82.05 | 95.86 | [config](./convnext-tiny_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-tiny_3rdparty_32xb128_in1k_20220124-18abde00.pth) |
| ConvNeXt-S\* | From scratch | 50.22 | 8.69 | 83.13 | 96.44 | [config](./convnext-small_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-small_3rdparty_32xb128_in1k_20220124-d39b5192.pth) |
| ConvNeXt-B\* | From scratch | 88.59 | 15.36 | 83.85 | 96.74 | [config](./convnext-base_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_32xb128_in1k_20220124-d0915162.pth) |
| ConvNeXt-B\* | ImageNet-21k | 88.59 | 15.36 | 85.81 | 97.86 | [config](./convnext-base_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_in21k-pre-3rdparty_32xb128_in1k_20220124-eb2d6ada.pth) |
| ConvNeXt-L\* | From scratch | 197.77 | 34.37 | 84.30 | 96.89 | [config](./convnext-large_64xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_3rdparty_64xb64_in1k_20220124-f8a0ded0.pth) |
| ConvNeXt-L\* | ImageNet-21k | 197.77 | 34.37 | 86.61 | 98.04 | [config](./convnext-large_64xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_in21k-pre-3rdparty_64xb64_in1k_20220124-2412403d.pth) |
| ConvNeXt-XL\* | ImageNet-21k | 350.20 | 60.93 | 86.97 | 98.20 | [config](./convnext-xlarge_64xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext/convnext-xlarge_in21k-pre-3rdparty_64xb64_in1k_20220124-76b6863d.pth) |
*Models with * are converted from the [official repo](https://github.com/facebookresearch/ConvNeXt). The config files of these models are only for inference. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*

View File

@ -18,7 +18,7 @@ Models:
Metadata:
FLOPs: 4457472768
Parameters: 28589128
In Collections: ConvNeXt
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
@ -35,7 +35,7 @@ Models:
Training Data: ImageNet-1k
FLOPs: 4457472768
Parameters: 28589128
In Collections: ConvNeXt
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
@ -49,9 +49,10 @@ Models:
Code: https://github.com/facebookresearch/ConvNeXt
- Name: convnext-small_3rdparty_32xb128_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 8687008512
Parameters: 50223688
In Collections: ConvNeXt
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
@ -68,7 +69,7 @@ Models:
Training Data: ImageNet-1k
FLOPs: 8687008512
Parameters: 50223688
In Collections: ConvNeXt
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
@ -82,9 +83,10 @@ Models:
Code: https://github.com/facebookresearch/ConvNeXt
- Name: convnext-base_3rdparty_32xb128_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 15359124480
Parameters: 88591464
In Collections: ConvNeXt
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
@ -101,7 +103,7 @@ Models:
Training Data: ImageNet-1k
FLOPs: 15359124480
Parameters: 88591464
In Collections: ConvNeXt
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
@ -118,7 +120,7 @@ Models:
Training Data: ImageNet-21k
FLOPs: 15359124480
Parameters: 88591464
In Collections: ConvNeXt
In Collection: ConvNeXt
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-base_3rdparty_in21k_20220124-13b83eec.pth
Converted From:
@ -131,7 +133,7 @@ Models:
- ImageNet-1k
FLOPs: 15359124480
Parameters: 88591464
In Collections: ConvNeXt
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
@ -145,9 +147,10 @@ Models:
Code: https://github.com/facebookresearch/ConvNeXt
- Name: convnext-large_3rdparty_64xb64_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 34368026112
Parameters: 197767336
In Collections: ConvNeXt
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
@ -164,7 +167,7 @@ Models:
Training Data: ImageNet-21k
FLOPs: 34368026112
Parameters: 197767336
In Collections: ConvNeXt
In Collection: ConvNeXt
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-large_3rdparty_in21k_20220124-41b5a79f.pth
Converted From:
@ -177,7 +180,7 @@ Models:
- ImageNet-1k
FLOPs: 34368026112
Parameters: 197767336
In Collections: ConvNeXt
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
@ -194,7 +197,7 @@ Models:
Training Data: ImageNet-21k
FLOPs: 60929820672
Parameters: 350196968
In Collections: ConvNeXt
In Collection: ConvNeXt
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext/convnext-xlarge_3rdparty_in21k_20220124-f909bad7.pth
Converted From:
@ -207,7 +210,7 @@ Models:
- ImageNet-1k
FLOPs: 60929820672
Parameters: 350196968
In Collections: ConvNeXt
In Collection: ConvNeXt
Results:
- Dataset: ImageNet-1k
Metrics:

View File

@ -20,11 +20,11 @@ Neural networks have enabled state-of-the-art approaches to achieve incredible r
### ImageNet-1k
| Model | Pretrain | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :------------: | :----------: | :-------: | :------: | :-------: | :-------: | :------------------------------------------------------------------: | :---------------------------------------------------------------------: |
| CSPDarkNet50\* | From scratch | 27.64 | 5.04 | 80.05 | 95.07 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/cspnet/cspdarknet50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/cspnet/cspdarknet50_3rdparty_8xb32_in1k_20220329-bd275287.pth) |
| CSPResNet50\* | From scratch | 21.62 | 3.48 | 79.55 | 94.68 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/cspnet/cspresnet50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/cspnet/cspresnet50_3rdparty_8xb32_in1k_20220329-dd6dddfb.pth) |
| CSPResNeXt50\* | From scratch | 20.57 | 3.11 | 79.96 | 94.96 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/cspnet/cspresnext50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/cspnet/cspresnext50_3rdparty_8xb32_in1k_20220329-2cc84d21.pth) |
| Model | Pretrain | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :------------: | :----------: | :-------: | :------: | :-------: | :-------: | :------------------------------------: | :---------------------------------------------------------------------------------------------------: |
| CSPDarkNet50\* | From scratch | 27.64 | 5.04 | 80.05 | 95.07 | [config](./cspdarknet50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/cspnet/cspdarknet50_3rdparty_8xb32_in1k_20220329-bd275287.pth) |
| CSPResNet50\* | From scratch | 21.62 | 3.48 | 79.55 | 94.68 | [config](./cspresnet50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/cspnet/cspresnet50_3rdparty_8xb32_in1k_20220329-dd6dddfb.pth) |
| CSPResNeXt50\* | From scratch | 20.57 | 3.11 | 79.96 | 94.96 | [config](./cspresnext50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/cspnet/cspresnext50_3rdparty_8xb32_in1k_20220329-2cc84d21.pth) |
*Models with * are converted from the [timm repo](https://github.com/rwightman/pytorch-image-models). The config files of these models are only for inference. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*

View File

@ -17,7 +17,7 @@ Models:
Metadata:
FLOPs: 5040000000
Parameters: 27640000
In Collections: CSPNet
In Collection: CSPNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -34,7 +34,7 @@ Models:
Training Data: ImageNet-1k
FLOPs: 3480000000
Parameters: 21620000
In Collections: CSPNet
In Collection: CSPNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -50,7 +50,7 @@ Models:
Metadata:
FLOPs: 3110000000
Parameters: 20570000
In Collections: CSPNet
In Collection: CSPNet
Results:
- Dataset: ImageNet-1k
Metrics:

View File

@ -18,17 +18,17 @@ Recently, neural networks purely based on attention were shown to address image
The teacher of the distilled version DeiT is RegNetY-16GF.
| Model | Pretrain | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-------------------------: | :----------: | :-------: | :------: | :-------: | :-------: | :------------------------------------------------------------: | :--------------------------------------------------------------: |
| DeiT-tiny | From scratch | 5.72 | 1.08 | 74.50 | 92.24 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/deit/deit-tiny_pt-4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny_pt-4xb256_in1k_20220218-13b382a0.log.json) |
| DeiT-tiny distilled\* | From scratch | 5.72 | 1.08 | 74.51 | 91.90 | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/deit/deit-tiny-distilled_pt-4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny-distilled_3rdparty_pt-4xb256_in1k_20211216-c429839a.pth) |
| DeiT-small | From scratch | 22.05 | 4.24 | 80.69 | 95.06 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/deit/deit-small_pt-4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-small_pt-4xb256_in1k_20220218-9425b9bb.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/deit/deit-small_pt-4xb256_in1k_20220218-9425b9bb.log.json) |
| DeiT-small distilled\* | From scratch | 22.05 | 4.24 | 81.17 | 95.40 | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/deit/deit-small-distilled_pt-4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-small-distilled_3rdparty_pt-4xb256_in1k_20211216-4de1d725.pth) |
| DeiT-base | From scratch | 86.57 | 16.86 | 81.76 | 95.81 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/deit/deit-base_pt-16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-base_pt-16xb64_in1k_20220216-db63c16c.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/deit/deit-base_pt-16xb64_in1k_20220216-db63c16c.log.json) |
| DeiT-base\* | From scratch | 86.57 | 16.86 | 81.79 | 95.59 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/deit/deit-base_pt-16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-base_3rdparty_pt-16xb64_in1k_20211124-6f40c188.pth) |
| DeiT-base distilled\* | From scratch | 86.57 | 16.86 | 83.33 | 96.49 | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/deit/deit-base-distilled_pt-16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-base-distilled_3rdparty_pt-16xb64_in1k_20211216-42891296.pth) |
| DeiT-base 384px\* | ImageNet-1k | 86.86 | 49.37 | 83.04 | 96.31 | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/deit/deit-base_ft-16xb32_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-base_3rdparty_ft-16xb32_in1k-384px_20211124-822d02f2.pth) |
| DeiT-base distilled 384px\* | ImageNet-1k | 86.86 | 49.37 | 85.55 | 97.35 | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/deit/deit-base-distilled_ft-16xb32_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-base-distilled_3rdparty_ft-16xb32_in1k-384px_20211216-e48d6000.pth) |
| Model | Pretrain | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-------------------------: | :----------: | :-------: | :------: | :-------: | :-------: | :-----------------------------------------------------: | :---------------------------------------------------------------------: |
| DeiT-tiny | From scratch | 5.72 | 1.08 | 74.50 | 92.24 | [config](./deit-tiny_pt-4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny_pt-4xb256_in1k_20220218-13b382a0.log.json) |
| DeiT-tiny distilled\* | From scratch | 5.72 | 1.08 | 74.51 | 91.90 | [config](./deit-tiny-distilled_pt-4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny-distilled_3rdparty_pt-4xb256_in1k_20211216-c429839a.pth) |
| DeiT-small | From scratch | 22.05 | 4.24 | 80.69 | 95.06 | [config](./deit-small_pt-4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-small_pt-4xb256_in1k_20220218-9425b9bb.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/deit/deit-small_pt-4xb256_in1k_20220218-9425b9bb.log.json) |
| DeiT-small distilled\* | From scratch | 22.05 | 4.24 | 81.17 | 95.40 | [config](./deit-small-distilled_pt-4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-small-distilled_3rdparty_pt-4xb256_in1k_20211216-4de1d725.pth) |
| DeiT-base | From scratch | 86.57 | 16.86 | 81.76 | 95.81 | [config](./deit-base_pt-16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-base_pt-16xb64_in1k_20220216-db63c16c.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/deit/deit-base_pt-16xb64_in1k_20220216-db63c16c.log.json) |
| DeiT-base\* | From scratch | 86.57 | 16.86 | 81.79 | 95.59 | [config](./deit-base_pt-16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-base_3rdparty_pt-16xb64_in1k_20211124-6f40c188.pth) |
| DeiT-base distilled\* | From scratch | 86.57 | 16.86 | 83.33 | 96.49 | [config](./deit-base-distilled_pt-16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-base-distilled_3rdparty_pt-16xb64_in1k_20211216-42891296.pth) |
| DeiT-base 384px\* | ImageNet-1k | 86.86 | 49.37 | 83.04 | 96.31 | [config](./deit-base_ft-16xb32_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-base_3rdparty_ft-16xb32_in1k-384px_20211124-822d02f2.pth) |
| DeiT-base distilled 384px\* | ImageNet-1k | 86.86 | 49.37 | 85.55 | 97.35 | [config](./deit-base-distilled_ft-16xb32_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/deit/deit-base-distilled_3rdparty_ft-16xb32_in1k-384px_20211216-e48d6000.pth) |
*Models with * are converted from the [official repo](https://github.com/facebookresearch/deit). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*

View File

@ -16,12 +16,12 @@ Recent work has shown that convolutional networks can be substantially deeper, m
### ImageNet-1k
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-----------: | :-------: | :------: | :-------: | :-------: | :-------------------------------------------------------------------------: | :---------------------------------------------------------------------------: |
| DenseNet121\* | 7.98 | 2.88 | 74.96 | 92.21 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/densenet/densenet121_4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/densenet/densenet121_4xb256_in1k_20220426-07450f99.pth) |
| DenseNet169\* | 14.15 | 3.42 | 76.08 | 93.11 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/densenet/densenet169_4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/densenet/densenet169_4xb256_in1k_20220426-a2889902.pth) |
| DenseNet201\* | 20.01 | 4.37 | 77.32 | 93.64 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/densenet/densenet201_4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/densenet/densenet201_4xb256_in1k_20220426-05cae4ef.pth) |
| DenseNet161\* | 28.68 | 7.82 | 77.61 | 93.83 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/densenet/densenet161_4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/densenet/densenet161_4xb256_in1k_20220426-ee6a80a9.pth) |
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-----------: | :-------: | :------: | :-------: | :-------: | :------------------------------------: | :----------------------------------------------------------------------------------------------------------------: |
| DenseNet121\* | 7.98 | 2.88 | 74.96 | 92.21 | [config](./densenet121_4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/densenet/densenet121_4xb256_in1k_20220426-07450f99.pth) |
| DenseNet169\* | 14.15 | 3.42 | 76.08 | 93.11 | [config](./densenet169_4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/densenet/densenet169_4xb256_in1k_20220426-a2889902.pth) |
| DenseNet201\* | 20.01 | 4.37 | 77.32 | 93.64 | [config](./densenet201_4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/densenet/densenet201_4xb256_in1k_20220426-05cae4ef.pth) |
| DenseNet161\* | 28.68 | 7.82 | 77.61 | 93.83 | [config](./densenet161_4xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/densenet/densenet161_4xb256_in1k_20220426-ee6a80a9.pth) |
*Models with * are converted from [pytorch](https://pytorch.org/vision/stable/models.html), guided by [original repo](https://github.com/liuzhuang13/DenseNet). The config files of these models are only for inference. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*

View File

@ -14,7 +14,7 @@ Models:
Metadata:
FLOPs: 2881695488
Parameters: 7978856
In Collections: DenseNet
In Collection: DenseNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -30,7 +30,7 @@ Models:
Metadata:
FLOPs: 3416860160
Parameters: 14149480
In Collections: DenseNet
In Collection: DenseNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -46,7 +46,7 @@ Models:
Metadata:
FLOPs: 4365236736
Parameters: 20013928
In Collections: DenseNet
In Collection: DenseNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -62,7 +62,7 @@ Models:
Metadata:
FLOPs: 7816363968
Parameters: 28681000
In Collections: DenseNet
In Collection: DenseNet
Results:
- Dataset: ImageNet-1k
Metrics:

View File

@ -0,0 +1,43 @@
# EdgeNeXt
> [EdgeNeXt: Efficiently Amalgamated CNN-Transformer Architecture for Mobile Vision Applications](https://arxiv.org/abs/2206.10589)
<!-- [ALGORITHM] -->
## Abstract
<!-- [ABSTRACT] -->
In the pursuit of achieving ever-increasing accuracy, large and complex neural networks are usually developed. Such models demand high computational resources and therefore cannot be deployed on edge devices. It is of great interest to build resource-efficient general purpose networks due to their usefulness in several application areas. In this work, we strive to effectively combine the strengths of both CNN and Transformer models and propose a new efficient hybrid architecture EdgeNeXt. Specifically in EdgeNeXt, we introduce split depth-wise transpose attention (SDTA) encoder that splits input tensors into multiple channel groups and utilizes depth-wise convolution along with self-attention across channel dimensions to implicitly increase the receptive field and encode multi-scale features. Our extensive experiments on classification, detection and segmentation tasks, reveal the merits of the proposed approach, outperforming state-of-the-art methods with comparatively lower compute requirements. Our EdgeNeXt model with 1.3M parameters achieves 71.2% top-1 accuracy on ImageNet-1K, outperforming MobileViT with an absolute gain of 2.2% with 28% reduction in FLOPs. Further, our EdgeNeXt model with 5.6M parameters achieves 79.4% top-1 accuracy on ImageNet-1K.
<!-- [IMAGE] -->
<div align=center>
<img src="https://github.com/mmaaz60/EdgeNeXt/raw/main/images/EdgeNext.png" width="100%"/>
</div>
## Results and models
### ImageNet-1k
| Model | Pretrain | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :------------------: | :----------: | :-------: | :------: | :-------: | :-------: | :-------------------------------------------: | :--------------------------------------------------------------------------------------: |
| EdgeNeXt-Base-usi\* | From scratch | 18.51 | 3.84 | 83.67 | 96.7 | [config](./edgenext-base_8xb256-usi_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/edgenext/edgenext-base_3rdparty-usi_in1k_20220801-909e8939.pth) |
| EdgeNeXt-Base\* | From scratch | 18.51 | 3.84 | 82.48 | 96.2 | [config](./edgenext-base_8xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/edgenext/edgenext-base_3rdparty_in1k_20220801-9ade408b.pth) |
| EdgeNeXt-Small-usi\* | From scratch | 5.59 | 1.26 | 81.06 | 95.34 | [config](./edgenext-small_8xb256-usi_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/edgenext/edgenext-small_3rdparty-usi_in1k_20220801-ae6d8dd3.pth) |
| EdgeNeXt-Small\* | From scratch | 5.59 | 1.26 | 79.41 | 94.53 | [config](./edgenext-small_8xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/edgenext/edgenext-small_3rdparty_in1k_20220801-d00db5f8.pth) |
| EdgeNeXt-X-Small\* | From scratch | 2.34 | 0.538 | 74.86 | 92.31 | [config](./edgenext-xsmall_8xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/edgenext/edgenext-xsmall_3rdparty_in1k_20220801-974f9fe7.pth) |
| EdgeNeXt-XX-Small\* | From scratch | 1.33 | 0.261 | 71.2 | 89.91 | [config](./edgenext-xxsmall_8xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/edgenext/edgenext-xxsmall_3rdparty_in1k_20220801-7ca8a81d.pth) |
*Models with * are converted from the [official repo](https://github.com/mmaaz60/EdgeNeXt). The config files of these models are only for inference. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*
## Citation
```bibtex
@article{Maaz2022EdgeNeXt,
title={EdgeNeXt: Efficiently Amalgamated CNN-Transformer Architecture for Mobile Vision Applications},
author={Muhammad Maaz and Abdelrahman Shaker and Hisham Cholakkal and Salman Khan and Syed Waqas Zamir and Rao Muhammad Anwer and Fahad Shahbaz Khan},
journal={2206.10589},
year={2022}
}
```

View File

@ -0,0 +1,19 @@
_base_ = ['./edgenext-base_8xb256_in1k.py']
# dataset setting
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='ResizeEdge',
scale=269,
edge='short',
backend='pillow',
interpolation='bicubic'),
dict(type='CenterCrop', crop_size=256),
dict(type='PackClsInputs')
]
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = val_dataloader

View File

@ -0,0 +1,20 @@
_base_ = [
'../_base_/models/edgenext/edgenext-base.py',
'../_base_/datasets/imagenet_bs64_edgenext_256.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=6e-3),
clip_grad=dict(max_norm=5.0),
)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
# base_batch_size = (32 GPUs) x (128 samples per GPU)
auto_scale_lr = dict(base_batch_size=4096)

View File

@ -0,0 +1,19 @@
_base_ = ['./edgenext-small_8xb256_in1k.py']
# dataset setting
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='ResizeEdge',
scale=269,
edge='short',
backend='pillow',
interpolation='bicubic'),
dict(type='CenterCrop', crop_size=256),
dict(type='PackClsInputs')
]
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = val_dataloader

View File

@ -0,0 +1,20 @@
_base_ = [
'../_base_/models/edgenext/edgenext-small.py',
'../_base_/datasets/imagenet_bs64_edgenext_256.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=6e-3),
clip_grad=dict(max_norm=5.0),
)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
# base_batch_size = (32 GPUs) x (128 samples per GPU)
auto_scale_lr = dict(base_batch_size=4096)

View File

@ -0,0 +1,20 @@
_base_ = [
'../_base_/models/edgenext/edgenext-xsmall.py',
'../_base_/datasets/imagenet_bs64_edgenext_256.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=6e-3),
clip_grad=dict(max_norm=5.0),
)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
# base_batch_size = (32 GPUs) x (128 samples per GPU)
auto_scale_lr = dict(base_batch_size=4096)

View File

@ -0,0 +1,20 @@
_base_ = [
'../_base_/models/edgenext/edgenext-xxsmall.py',
'../_base_/datasets/imagenet_bs64_edgenext_256.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=6e-3),
clip_grad=dict(max_norm=5.0),
)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')]
# NOTE: `auto_scale_lr` is for automatically scaling LR
# based on the actual training batch size.
# base_batch_size = (32 GPUs) x (128 samples per GPU)
auto_scale_lr = dict(base_batch_size=4096)

View File

@ -0,0 +1,118 @@
Collections:
- Name: EdgeNeXt
Metadata:
Training Data: ImageNet-1k
Architecture:
- SDTA
- 1x1 Convolution
- Channel Self-attention
Paper:
URL: https://arxiv.org/abs/2206.10589
Title: 'EdgeNeXt: Efficiently Amalgamated CNN-Transformer Architecture for Mobile Vision Applications'
README: configs/edgenext/README.md
Code:
Version: v1.0.0rc1
URL: https://github.com/open-mmlab/mmclassification/blob/v0.23.2/mmcls/models/backbones/edgenext.py
Models:
- Name: edgenext-xxsmall_3rdparty_in1k
Metadata:
FLOPs: 255640144
Parameters: 1327216
In Collection: EdgeNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 71.20
Top 5 Accuracy: 89.91
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/edgenext/edgenext-xxsmall_3rdparty_in1k_20220801-7ca8a81d.pth
Config: configs/edgenext/edgenext-xxsmall_8xb256_in1k.py
Converted From:
Weights: https://github.com/mmaaz60/EdgeNeXt/releases/download/v1.0/edgenext_xxsmall.pth
Code: https://github.com/mmaaz60/EdgeNeXt
- Name: edgenext-xsmall_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 529970560
Parameters: 2336804
In Collection: EdgeNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 74.86
Top 5 Accuracy: 92.31
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/edgenext/edgenext-xsmall_3rdparty_in1k_20220801-974f9fe7.pth
Config: configs/edgenext/edgenext-xsmall_8xb256_in1k.py
Converted From:
Weights: https://github.com/mmaaz60/EdgeNeXt/releases/download/v1.0/edgenext_xsmall.pth
Code: https://github.com/mmaaz60/EdgeNeXt
- Name: edgenext-small_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 1249788000
Parameters: 5586832
In Collection: EdgeNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 79.41
Top 5 Accuracy: 94.53
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/edgenext/edgenext-small_3rdparty_in1k_20220801-d00db5f8.pth
Config: configs/edgenext/edgenext-small_8xb256_in1k.py
Converted From:
Weights: https://github.com/mmaaz60/EdgeNeXt/releases/download/v1.0/edgenext_small.pth
Code: https://github.com/mmaaz60/EdgeNeXt
- Name: edgenext-small-usi_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 1249788000
Parameters: 5586832
In Collection: EdgeNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 81.06
Top 5 Accuracy: 95.34
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/edgenext/edgenext-small_3rdparty-usi_in1k_20220801-ae6d8dd3.pth
Config: configs/edgenext/edgenext-small_8xb256-usi_in1k.py
Converted From:
Weights: https://github.com/mmaaz60/EdgeNeXt/releases/download/v1.1/edgenext_small_usi.pth
Code: https://github.com/mmaaz60/EdgeNeXt
- Name: edgenext-base_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 3814395280
Parameters: 18511292
In Collection: EdgeNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 82.48
Top 5 Accuracy: 96.2
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/edgenext/edgenext-base_3rdparty_in1k_20220801-9ade408b.pth
Config: configs/edgenext/edgenext-base_8xb256_in1k.py
Converted From:
Weights: https://github.com/mmaaz60/EdgeNeXt/releases/download/v1.2/edgenext_base.pth
Code: https://github.com/mmaaz60/EdgeNeXt
- Name: edgenext-base_3rdparty-usi_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 3814395280
Parameters: 18511292
In Collection: EdgeNeXt
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 83.67
Top 5 Accuracy: 96.7
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/edgenext/edgenext-base_3rdparty-usi_in1k_20220801-909e8939.pth
Config: configs/edgenext/edgenext-base_8xb256-usi_in1k.py
Converted From:
Weights: https://github.com/mmaaz60/EdgeNeXt/releases/download/v1.2/edgenext_base_usi.pth
Code: https://github.com/mmaaz60/EdgeNeXt

View File

@ -0,0 +1,47 @@
# EfficientFormer
> [EfficientFormer: Vision Transformers at MobileNet Speed](https://arxiv.org/abs/2206.01191)
<!-- [ALGORITHM] -->
## Abstract
Vision Transformers (ViT) have shown rapid progress in computer vision tasks, achieving promising results on various benchmarks. However, due to the massive number of parameters and model design, e.g., attention mechanism, ViT-based models are generally times slower than lightweight convolutional networks. Therefore, the deployment of ViT for real-time applications is particularly challenging, especially on resource-constrained hardware such as mobile devices. Recent efforts try to reduce the computation complexity of ViT through network architecture search or hybrid design with MobileNet block, yet the inference speed is still unsatisfactory. This leads to an important question: can transformers run as fast as MobileNet while obtaining high performance? To answer this, we first revisit the network architecture and operators used in ViT-based models and identify inefficient designs. Then we introduce a dimension-consistent pure transformer (without MobileNet blocks) as a design paradigm. Finally, we perform latency-driven slimming to get a series of final models dubbed EfficientFormer. Extensive experiments show the superiority of EfficientFormer in performance and speed on mobile devices. Our fastest model, EfficientFormer-L1, achieves 79.2% top-1 accuracy on ImageNet-1K with only 1.6 ms inference latency on iPhone 12 (compiled with CoreML), which runs as fast as MobileNetV2×1.4 (1.6 ms, 74.7% top-1), and our largest model, EfficientFormer-L7, obtains 83.3% accuracy with only 7.0 ms latency. Our work proves that properly designed transformers can reach extremely low latency on mobile devices while maintaining high performance.
<div align=center>
<img src="https://user-images.githubusercontent.com/18586273/180713426-9d3d77e3-3584-42d8-9098-625b4170d796.png" width="100%"/>
</div>
## Results and models
### ImageNet-1k
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :------------------: | :-------: | :------: | :-------: | :-------: | :-------------------------------------------: | :--------------------------------------------------------------------------------------------------: |
| EfficientFormer-l1\* | 12.19 | 1.30 | 80.46 | 94.99 | [config](./efficientformer-l1_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientformer/efficientformer-l1_3rdparty_in1k_20220915-cc3e1ac6.pth) |
| EfficientFormer-l3\* | 31.41 | 3.93 | 82.45 | 96.18 | [config](./efficientformer-l3_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientformer/efficientformer-l3_3rdparty_in1k_20220915-466793d6.pth) |
| EfficientFormer-l7\* | 82.23 | 10.16 | 83.40 | 96.60 | [config](./efficientformer-l7_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientformer/efficientformer-l7_3rdparty_in1k_20220915-185e30af.pth) |
*Models with * are converted from the [official repo](https://github.com/snap-research/EfficientFormer). The config files of these models are only for inference. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*
## Citation
```bibtex
@misc{https://doi.org/10.48550/arxiv.2206.01191,
doi = {10.48550/ARXIV.2206.01191},
url = {https://arxiv.org/abs/2206.01191},
author = {Li, Yanyu and Yuan, Geng and Wen, Yang and Hu, Eric and Evangelidis, Georgios and Tulyakov, Sergey and Wang, Yanzhi and Ren, Jian},
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {EfficientFormer: Vision Transformers at MobileNet Speed},
publisher = {arXiv},
year = {2022},
copyright = {Creative Commons Attribution 4.0 International}
}
```

View File

@ -0,0 +1,6 @@
_base_ = [
'../_base_/models/efficientformer-l1.py',
'../_base_/datasets/imagenet_bs128_poolformer_small_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]

View File

@ -0,0 +1,3 @@
_base_ = './efficientformer-l1_8xb128_in1k.py'
model = dict(backbone=dict(arch='l3'), head=dict(in_channels=512))

View File

@ -0,0 +1,3 @@
_base_ = './efficientformer-l1_8xb128_in1k.py'
model = dict(backbone=dict(arch='l7'), head=dict(in_channels=768))

View File

@ -0,0 +1,67 @@
Collections:
- Name: EfficientFormer
Metadata:
Training Data: ImageNet-1k
Architecture:
- Pooling
- 1x1 Convolution
- LayerScale
- MetaFormer
Paper:
URL: https://arxiv.org/pdf/2206.01191.pdf
Title: "EfficientFormer: Vision Transformers at MobileNet Speed"
README: configs/efficientformer/README.md
Code:
Version: v1.0.0rc1
URL: https://github.com/open-mmlab/mmclassification/blob/v1.0.0rc1/configs/efficientformer/metafile.yml
Models:
- Name: efficientformer-l1_3rdparty_8xb128_in1k
Metadata:
FLOPs: 1304601088 # 1.3G
Parameters: 12278696 # 12M
In Collection: EfficientFormer
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 80.46
Top 5 Accuracy: 94.99
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/efficientformer/efficientformer-l1_3rdparty_in1k_20220915-cc3e1ac6.pth
Config: configs/efficientformer/efficientformer-l1_8xb128_in1k.py
Converted From:
Weights: https://drive.google.com/file/d/11SbX-3cfqTOc247xKYubrAjBiUmr818y/view?usp=sharing
Code: https://github.com/snap-research/EfficientFormer
- Name: efficientformer-l3_3rdparty_8xb128_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 3737045760 # 3.7G
Parameters: 31406000 # 31M
In Collection: EfficientFormer
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 82.45
Top 5 Accuracy: 96.18
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/efficientformer/efficientformer-l3_3rdparty_in1k_20220915-466793d6.pth
Config: configs/efficientformer/efficientformer-l3_8xb128_in1k.py
Converted From:
Weights: https://drive.google.com/file/d/1OyyjKKxDyMj-BcfInp4GlDdwLu3hc30m/view?usp=sharing
Code: https://github.com/snap-research/EfficientFormer
- Name: efficientformer-l7_3rdparty_8xb128_in1k
Metadata:
FLOPs: 10163951616 # 10.2G
Parameters: 82229328 # 82M
In Collection: EfficientFormer
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 83.40
Top 5 Accuracy: 96.60
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/efficientformer/efficientformer-l7_3rdparty_in1k_20220915-185e30af.pth
Config: configs/efficientformer/efficientformer-l7_8xb128_in1k.py
Converted From:
Weights: https://drive.google.com/file/d/1cVw-pctJwgvGafeouynqWWCwgkcoFMM5/view?usp=sharing
Code: https://github.com/snap-research/EfficientFormer

View File

@ -20,31 +20,31 @@ In the result table, AA means trained with AutoAugment pre-processing, more deta
Note: In MMClassification, we support training with AutoAugment, don't support AdvProp by now.
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :------------------------------: | :-------: | :------: | :-------: | :-------: | :---------------------------------------------------------------: | :------------------------------------------------------------------: |
| EfficientNet-B0\* | 5.29 | 0.02 | 76.74 | 93.17 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b0_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b0_3rdparty_8xb32_in1k_20220119-a7e2a0b1.pth) |
| EfficientNet-B0 (AA)\* | 5.29 | 0.02 | 77.26 | 93.41 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b0_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b0_3rdparty_8xb32-aa_in1k_20220119-8d939117.pth) |
| EfficientNet-B0 (AA + AdvProp)\* | 5.29 | 0.02 | 77.53 | 93.61 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b0_8xb32-01norm_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b0_3rdparty_8xb32-aa-advprop_in1k_20220119-26434485.pth) |
| EfficientNet-B1\* | 7.79 | 0.03 | 78.68 | 94.28 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b1_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b1_3rdparty_8xb32_in1k_20220119-002556d9.pth) |
| EfficientNet-B1 (AA)\* | 7.79 | 0.03 | 79.20 | 94.42 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b1_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b1_3rdparty_8xb32-aa_in1k_20220119-619d8ae3.pth) |
| EfficientNet-B1 (AA + AdvProp)\* | 7.79 | 0.03 | 79.52 | 94.43 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b1_8xb32-01norm_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b1_3rdparty_8xb32-aa-advprop_in1k_20220119-5715267d.pth) |
| EfficientNet-B2\* | 9.11 | 0.03 | 79.64 | 94.80 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b2_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b2_3rdparty_8xb32_in1k_20220119-ea374a30.pth) |
| EfficientNet-B2 (AA)\* | 9.11 | 0.03 | 80.21 | 94.96 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b2_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b2_3rdparty_8xb32-aa_in1k_20220119-dd61e80b.pth) |
| EfficientNet-B2 (AA + AdvProp)\* | 9.11 | 0.03 | 80.45 | 95.07 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b2_8xb32-01norm_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b2_3rdparty_8xb32-aa-advprop_in1k_20220119-1655338a.pth) |
| EfficientNet-B3\* | 12.23 | 0.06 | 81.01 | 95.34 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b3_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b3_3rdparty_8xb32_in1k_20220119-4b4d7487.pth) |
| EfficientNet-B3 (AA)\* | 12.23 | 0.06 | 81.58 | 95.67 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b3_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b3_3rdparty_8xb32-aa_in1k_20220119-5b4887a0.pth) |
| EfficientNet-B3 (AA + AdvProp)\* | 12.23 | 0.06 | 81.81 | 95.69 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b3_8xb32-01norm_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b3_3rdparty_8xb32-aa-advprop_in1k_20220119-53b41118.pth) |
| EfficientNet-B4\* | 19.34 | 0.12 | 82.57 | 96.09 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b4_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b4_3rdparty_8xb32_in1k_20220119-81fd4077.pth) |
| EfficientNet-B4 (AA)\* | 19.34 | 0.12 | 82.95 | 96.26 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b4_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b4_3rdparty_8xb32-aa_in1k_20220119-45b8bd2b.pth) |
| EfficientNet-B4 (AA + AdvProp)\* | 19.34 | 0.12 | 83.25 | 96.44 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b4_8xb32-01norm_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b4_3rdparty_8xb32-aa-advprop_in1k_20220119-38c2238c.pth) |
| EfficientNet-B5\* | 30.39 | 0.24 | 83.18 | 96.47 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b5_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b5_3rdparty_8xb32_in1k_20220119-e9814430.pth) |
| EfficientNet-B5 (AA)\* | 30.39 | 0.24 | 83.82 | 96.76 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b5_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b5_3rdparty_8xb32-aa_in1k_20220119-2cab8b78.pth) |
| EfficientNet-B5 (AA + AdvProp)\* | 30.39 | 0.24 | 84.21 | 96.98 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b5_8xb32-01norm_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b5_3rdparty_8xb32-aa-advprop_in1k_20220119-f57a895a.pth) |
| EfficientNet-B6 (AA)\* | 43.04 | 0.41 | 84.05 | 96.82 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b6_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b6_3rdparty_8xb32-aa_in1k_20220119-45b03310.pth) |
| EfficientNet-B6 (AA + AdvProp)\* | 43.04 | 0.41 | 84.74 | 97.14 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b6_8xb32-01norm_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b6_3rdparty_8xb32-aa-advprop_in1k_20220119-bfe3485e.pth) |
| EfficientNet-B7 (AA)\* | 66.35 | 0.72 | 84.38 | 96.88 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b7_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b7_3rdparty_8xb32-aa_in1k_20220119-bf03951c.pth) |
| EfficientNet-B7 (AA + AdvProp)\* | 66.35 | 0.72 | 85.14 | 97.23 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b7_8xb32-01norm_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b7_3rdparty_8xb32-aa-advprop_in1k_20220119-c6dbff10.pth) |
| EfficientNet-B8 (AA + AdvProp)\* | 87.41 | 1.09 | 85.38 | 97.28 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/efficientnet/efficientnet-b8_8xb32-01norm_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b8_3rdparty_8xb32-aa-advprop_in1k_20220119-297ce1b7.pth) |
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :------------------------------: | :-------: | :------: | :-------: | :-------: | :----------------------------------------------: | :-----------------------------------------------------------------------------------: |
| EfficientNet-B0\* | 5.29 | 0.02 | 76.74 | 93.17 | [config](./efficientnet-b0_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b0_3rdparty_8xb32_in1k_20220119-a7e2a0b1.pth) |
| EfficientNet-B0 (AA)\* | 5.29 | 0.02 | 77.26 | 93.41 | [config](./efficientnet-b0_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b0_3rdparty_8xb32-aa_in1k_20220119-8d939117.pth) |
| EfficientNet-B0 (AA + AdvProp)\* | 5.29 | 0.02 | 77.53 | 93.61 | [config](./efficientnet-b0_8xb32-01norm_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b0_3rdparty_8xb32-aa-advprop_in1k_20220119-26434485.pth) |
| EfficientNet-B1\* | 7.79 | 0.03 | 78.68 | 94.28 | [config](./efficientnet-b1_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b1_3rdparty_8xb32_in1k_20220119-002556d9.pth) |
| EfficientNet-B1 (AA)\* | 7.79 | 0.03 | 79.20 | 94.42 | [config](./efficientnet-b1_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b1_3rdparty_8xb32-aa_in1k_20220119-619d8ae3.pth) |
| EfficientNet-B1 (AA + AdvProp)\* | 7.79 | 0.03 | 79.52 | 94.43 | [config](./efficientnet-b1_8xb32-01norm_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b1_3rdparty_8xb32-aa-advprop_in1k_20220119-5715267d.pth) |
| EfficientNet-B2\* | 9.11 | 0.03 | 79.64 | 94.80 | [config](./efficientnet-b2_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b2_3rdparty_8xb32_in1k_20220119-ea374a30.pth) |
| EfficientNet-B2 (AA)\* | 9.11 | 0.03 | 80.21 | 94.96 | [config](./efficientnet-b2_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b2_3rdparty_8xb32-aa_in1k_20220119-dd61e80b.pth) |
| EfficientNet-B2 (AA + AdvProp)\* | 9.11 | 0.03 | 80.45 | 95.07 | [config](./efficientnet-b2_8xb32-01norm_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b2_3rdparty_8xb32-aa-advprop_in1k_20220119-1655338a.pth) |
| EfficientNet-B3\* | 12.23 | 0.06 | 81.01 | 95.34 | [config](./efficientnet-b3_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b3_3rdparty_8xb32_in1k_20220119-4b4d7487.pth) |
| EfficientNet-B3 (AA)\* | 12.23 | 0.06 | 81.58 | 95.67 | [config](./efficientnet-b3_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b3_3rdparty_8xb32-aa_in1k_20220119-5b4887a0.pth) |
| EfficientNet-B3 (AA + AdvProp)\* | 12.23 | 0.06 | 81.81 | 95.69 | [config](./efficientnet-b3_8xb32-01norm_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b3_3rdparty_8xb32-aa-advprop_in1k_20220119-53b41118.pth) |
| EfficientNet-B4\* | 19.34 | 0.12 | 82.57 | 96.09 | [config](./efficientnet-b4_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b4_3rdparty_8xb32_in1k_20220119-81fd4077.pth) |
| EfficientNet-B4 (AA)\* | 19.34 | 0.12 | 82.95 | 96.26 | [config](./efficientnet-b4_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b4_3rdparty_8xb32-aa_in1k_20220119-45b8bd2b.pth) |
| EfficientNet-B4 (AA + AdvProp)\* | 19.34 | 0.12 | 83.25 | 96.44 | [config](./efficientnet-b4_8xb32-01norm_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b4_3rdparty_8xb32-aa-advprop_in1k_20220119-38c2238c.pth) |
| EfficientNet-B5\* | 30.39 | 0.24 | 83.18 | 96.47 | [config](./efficientnet-b5_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b5_3rdparty_8xb32_in1k_20220119-e9814430.pth) |
| EfficientNet-B5 (AA)\* | 30.39 | 0.24 | 83.82 | 96.76 | [config](./efficientnet-b5_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b5_3rdparty_8xb32-aa_in1k_20220119-2cab8b78.pth) |
| EfficientNet-B5 (AA + AdvProp)\* | 30.39 | 0.24 | 84.21 | 96.98 | [config](./efficientnet-b5_8xb32-01norm_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b5_3rdparty_8xb32-aa-advprop_in1k_20220119-f57a895a.pth) |
| EfficientNet-B6 (AA)\* | 43.04 | 0.41 | 84.05 | 96.82 | [config](./efficientnet-b6_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b6_3rdparty_8xb32-aa_in1k_20220119-45b03310.pth) |
| EfficientNet-B6 (AA + AdvProp)\* | 43.04 | 0.41 | 84.74 | 97.14 | [config](./efficientnet-b6_8xb32-01norm_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b6_3rdparty_8xb32-aa-advprop_in1k_20220119-bfe3485e.pth) |
| EfficientNet-B7 (AA)\* | 66.35 | 0.72 | 84.38 | 96.88 | [config](./efficientnet-b7_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b7_3rdparty_8xb32-aa_in1k_20220119-bf03951c.pth) |
| EfficientNet-B7 (AA + AdvProp)\* | 66.35 | 0.72 | 85.14 | 97.23 | [config](./efficientnet-b7_8xb32-01norm_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b7_3rdparty_8xb32-aa-advprop_in1k_20220119-c6dbff10.pth) |
| EfficientNet-B8 (AA + AdvProp)\* | 87.41 | 1.09 | 85.38 | 97.28 | [config](./efficientnet-b8_8xb32-01norm_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b8_3rdparty_8xb32-aa-advprop_in1k_20220119-297ce1b7.pth) |
*Models with * are converted from the [official repo](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet). The config files of these models are only for inference. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*

View File

@ -25,7 +25,7 @@ Models:
Metadata:
FLOPs: 16481180
Parameters: 5288548
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -41,7 +41,7 @@ Models:
Metadata:
FLOPs: 16481180
Parameters: 5288548
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -57,7 +57,7 @@ Models:
Metadata:
FLOPs: 16481180
Parameters: 5288548
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -73,7 +73,7 @@ Models:
Metadata:
FLOPs: 27052224
Parameters: 7794184
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -89,7 +89,7 @@ Models:
Metadata:
FLOPs: 27052224
Parameters: 7794184
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -105,7 +105,7 @@ Models:
Metadata:
FLOPs: 27052224
Parameters: 7794184
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -121,7 +121,7 @@ Models:
Metadata:
FLOPs: 34346386
Parameters: 9109994
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -137,7 +137,7 @@ Models:
Metadata:
FLOPs: 34346386
Parameters: 9109994
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -153,7 +153,7 @@ Models:
Metadata:
FLOPs: 34346386
Parameters: 9109994
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -169,7 +169,7 @@ Models:
Metadata:
FLOPs: 58641904
Parameters: 12233232
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -185,7 +185,7 @@ Models:
Metadata:
FLOPs: 58641904
Parameters: 12233232
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -201,7 +201,7 @@ Models:
Metadata:
FLOPs: 58641904
Parameters: 12233232
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -217,7 +217,7 @@ Models:
Metadata:
FLOPs: 121870624
Parameters: 19341616
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -233,7 +233,7 @@ Models:
Metadata:
FLOPs: 121870624
Parameters: 19341616
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -249,7 +249,7 @@ Models:
Metadata:
FLOPs: 121870624
Parameters: 19341616
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -265,7 +265,7 @@ Models:
Metadata:
FLOPs: 243879440
Parameters: 30389784
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -281,7 +281,7 @@ Models:
Metadata:
FLOPs: 243879440
Parameters: 30389784
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -297,7 +297,7 @@ Models:
Metadata:
FLOPs: 243879440
Parameters: 30389784
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -313,7 +313,7 @@ Models:
Metadata:
FLOPs: 412002408
Parameters: 43040704
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -329,7 +329,7 @@ Models:
Metadata:
FLOPs: 412002408
Parameters: 43040704
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -345,7 +345,7 @@ Models:
Metadata:
FLOPs: 715526512
Parameters: 66347960
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -361,7 +361,7 @@ Models:
Metadata:
FLOPs: 715526512
Parameters: 66347960
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:
@ -377,7 +377,7 @@ Models:
Metadata:
FLOPs: 1092755326
Parameters: 87413142
In Collections: EfficientNet
In Collection: EfficientNet
Results:
- Dataset: ImageNet-1k
Metrics:

View File

@ -16,17 +16,17 @@ High-resolution representations are essential for position-sensitive vision prob
## ImageNet-1k
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :----------------: | :-------: | :------: | :-------: | :-------: | :----------------------------------------------------------------------: | :-------------------------------------------------------------------------: |
| HRNet-W18\* | 21.30 | 4.33 | 76.75 | 93.44 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/hrnet/hrnet-w18_4xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hrnet/hrnet-w18_3rdparty_8xb32_in1k_20220120-0c10b180.pth) |
| HRNet-W30\* | 37.71 | 8.17 | 78.19 | 94.22 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/hrnet/hrnet-w30_4xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hrnet/hrnet-w30_3rdparty_8xb32_in1k_20220120-8aa3832f.pth) |
| HRNet-W32\* | 41.23 | 8.99 | 78.44 | 94.19 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/hrnet/hrnet-w32_4xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hrnet/hrnet-w32_3rdparty_8xb32_in1k_20220120-c394f1ab.pth) |
| HRNet-W40\* | 57.55 | 12.77 | 78.94 | 94.47 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/hrnet/hrnet-w40_4xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hrnet/hrnet-w40_3rdparty_8xb32_in1k_20220120-9a2dbfc5.pth) |
| HRNet-W44\* | 67.06 | 14.96 | 78.88 | 94.37 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/hrnet/hrnet-w44_4xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hrnet/hrnet-w44_3rdparty_8xb32_in1k_20220120-35d07f73.pth) |
| HRNet-W48\* | 77.47 | 17.36 | 79.32 | 94.52 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/hrnet/hrnet-w48_4xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hrnet/hrnet-w48_3rdparty_8xb32_in1k_20220120-e555ef50.pth) |
| HRNet-W64\* | 128.06 | 29.00 | 79.46 | 94.65 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/hrnet/hrnet-w64_4xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hrnet/hrnet-w64_3rdparty_8xb32_in1k_20220120-19126642.pth) |
| HRNet-W18 (ssld)\* | 21.30 | 4.33 | 81.06 | 95.70 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/hrnet/hrnet-w18_4xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hrnet/hrnet-w18_3rdparty_8xb32-ssld_in1k_20220120-455f69ea.pth) |
| HRNet-W48 (ssld)\* | 77.47 | 17.36 | 83.63 | 96.79 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/hrnet/hrnet-w48_4xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hrnet/hrnet-w48_3rdparty_8xb32-ssld_in1k_20220120-d0459c38.pth) |
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :----------------: | :-------: | :------: | :-------: | :-------: | :---------------------------------: | :--------------------------------------------------------------------------------------------------------------: |
| HRNet-W18\* | 21.30 | 4.33 | 76.75 | 93.44 | [config](./hrnet-w18_4xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hrnet/hrnet-w18_3rdparty_8xb32_in1k_20220120-0c10b180.pth) |
| HRNet-W30\* | 37.71 | 8.17 | 78.19 | 94.22 | [config](./hrnet-w30_4xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hrnet/hrnet-w30_3rdparty_8xb32_in1k_20220120-8aa3832f.pth) |
| HRNet-W32\* | 41.23 | 8.99 | 78.44 | 94.19 | [config](./hrnet-w32_4xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hrnet/hrnet-w32_3rdparty_8xb32_in1k_20220120-c394f1ab.pth) |
| HRNet-W40\* | 57.55 | 12.77 | 78.94 | 94.47 | [config](./hrnet-w40_4xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hrnet/hrnet-w40_3rdparty_8xb32_in1k_20220120-9a2dbfc5.pth) |
| HRNet-W44\* | 67.06 | 14.96 | 78.88 | 94.37 | [config](./hrnet-w44_4xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hrnet/hrnet-w44_3rdparty_8xb32_in1k_20220120-35d07f73.pth) |
| HRNet-W48\* | 77.47 | 17.36 | 79.32 | 94.52 | [config](./hrnet-w48_4xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hrnet/hrnet-w48_3rdparty_8xb32_in1k_20220120-e555ef50.pth) |
| HRNet-W64\* | 128.06 | 29.00 | 79.46 | 94.65 | [config](./hrnet-w64_4xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hrnet/hrnet-w64_3rdparty_8xb32_in1k_20220120-19126642.pth) |
| HRNet-W18 (ssld)\* | 21.30 | 4.33 | 81.06 | 95.70 | [config](./hrnet-w18_4xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hrnet/hrnet-w18_3rdparty_8xb32-ssld_in1k_20220120-455f69ea.pth) |
| HRNet-W48 (ssld)\* | 77.47 | 17.36 | 83.63 | 96.79 | [config](./hrnet-w48_4xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/hrnet/hrnet-w48_3rdparty_8xb32-ssld_in1k_20220120-d0459c38.pth) |
*Models with * are converted from the [official repo](https://github.com/HRNet/HRNet-Image-Classification). The config files of these models are only for inference. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*

View File

@ -16,9 +16,9 @@ Convolutional networks are at the core of most state-of-the-art computer vision
### ImageNet-1k
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :------------: | :-------: | :------: | :-------: | :-------: | :------------------------------------------------------------------------: | :---------------------------------------------------------------------------: |
| Inception V3\* | 23.83 | 5.75 | 77.57 | 93.58 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/inception_v3/inception-v3_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/inception-v3/inception-v3_3rdparty_8xb32_in1k_20220615-dcd4d910.pth) |
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :------------: | :-------: | :------: | :-------: | :-------: | :------------------------------------: | :---------------------------------------------------------------------------------------------------------------: |
| Inception V3\* | 23.83 | 5.75 | 77.57 | 93.58 | [config](./inception-v3_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/inception-v3/inception-v3_3rdparty_8xb32_in1k_20220615-dcd4d910.pth) |
*Models with * are converted from the [official repo](https://github.com/pytorch/vision/blob/main/torchvision/models/inception.py#L28). The config files of these models are only for inference. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*

View File

@ -15,8 +15,8 @@ Collections:
Title: "Rethinking the Inception Architecture for Computer Vision"
README: configs/inception_v3/README.md
Code:
URL: TODO
Version: TODO
URL: https://github.com/open-mmlab/mmclassification/blob/v1.0.0rc1/configs/inception_v3/metafile.yml
Version: v1.0.0rc1
Models:
- Name: inception-v3_3rdparty_8xb32_in1k

View File

@ -16,10 +16,10 @@ Convolutional Neural Networks (CNNs) are the go-to model for computer vision. Re
### ImageNet-1k
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :----------: | :-------: | :------: | :-------: | :-------: | :-------------------------------------------------------------------------: | :----------------------------------------------------------------------------: |
| Mixer-B/16\* | 59.88 | 12.61 | 76.68 | 92.25 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/mlp_mixer/mlp-mixer-base-p16_64xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mlp-mixer/mixer-base-p16_3rdparty_64xb64_in1k_20211124-1377e3e0.pth) |
| Mixer-L/16\* | 208.2 | 44.57 | 72.34 | 88.02 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/mlp_mixer/mlp-mixer-large-p16_64xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mlp-mixer/mixer-large-p16_3rdparty_64xb64_in1k_20211124-5a2519d2.pth) |
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :----------: | :-------: | :------: | :-------: | :-------: | :--------------------------------------------: | :---------------------------------------------------------------------------------------------------------: |
| Mixer-B/16\* | 59.88 | 12.61 | 76.68 | 92.25 | [config](./mlp-mixer-base-p16_64xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mlp-mixer/mixer-base-p16_3rdparty_64xb64_in1k_20211124-1377e3e0.pth) |
| Mixer-L/16\* | 208.2 | 44.57 | 72.34 | 88.02 | [config](./mlp-mixer-large-p16_64xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mlp-mixer/mixer-large-p16_3rdparty_64xb64_in1k_20211124-5a2519d2.pth) |
*Models with * are converted from [timm](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/mlp_mixer.py). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*

View File

@ -18,9 +18,9 @@ The MobileNetV2 architecture is based on an inverted residual structure where th
### ImageNet-1k
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :----------: | :-------: | :------: | :-------: | :-------: | :-------------------------------------------------------------------------: | :----------------------------------------------------------------------------: |
| MobileNet V2 | 3.5 | 0.319 | 71.86 | 90.42 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/mobilenet_v2/mobilenet-v2_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mobilenet_v2/mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/mobilenet_v2/mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.log.json) |
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :----------: | :-------: | :------: | :-------: | :-------: | :------------------------------------: | :-----------------------------------------------------------------------------------------------------------------: |
| MobileNet V2 | 3.5 | 0.319 | 71.86 | 90.42 | [config](./mobilenet-v2_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mobilenet_v2/mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/mobilenet_v2/mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.log.json) |
## Citation

View File

@ -16,10 +16,10 @@ We present the next generation of MobileNets based on a combination of complemen
### ImageNet-1k
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-----------------: | :-------: | :------: | :-------: | :-------: | :----------------------------------------------------------------------: | :------------------------------------------------------------------------: |
| MobileNetV3-Small\* | 2.54 | 0.06 | 67.66 | 87.41 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/mobilenet_v3/mobilenet-v3-small_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/convert/mobilenet_v3_small-8427ecf0.pth) |
| MobileNetV3-Large\* | 5.48 | 0.23 | 74.04 | 91.34 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/mobilenet_v3/mobilenet-v3-large_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/convert/mobilenet_v3_large-3ea3c186.pth) |
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-----------------: | :-------: | :------: | :-------: | :-------: | :-------------------------------------------: | :---------------------------------------------------------------------------------------------------: |
| MobileNetV3-Small\* | 2.54 | 0.06 | 67.66 | 87.41 | [config](./mobilenet-v3-small_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/convert/mobilenet_v3_small-8427ecf0.pth) |
| MobileNetV3-Large\* | 5.48 | 0.23 | 74.04 | 91.34 | [config](./mobilenet-v3-large_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/convert/mobilenet_v3_large-3ea3c186.pth) |
*Models with * are converted from [torchvision](https://pytorch.org/vision/stable/_modules/torchvision/models/mobilenetv3.html). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*

View File

@ -0,0 +1,132 @@
# MobileOne
> [An Improved One millisecond Mobile Backbone](https://arxiv.org/abs/2206.04040)
<!-- [ALGORITHM] -->
## Abstract
Efficient neural network backbones for mobile devices are often optimized for metrics such as FLOPs or parameter count. However, these metrics may not correlate well with latency of the network when deployed on a mobile device. Therefore, we perform extensive analysis of different metrics by deploying several mobile-friendly networks on a mobile device. We identify and analyze architectural and optimization bottlenecks in recent efficient neural networks and provide ways to mitigate these bottlenecks. To this end, we design an efficient backbone MobileOne, with variants achieving an inference time under 1 ms on an iPhone12 with 75.9% top-1 accuracy on ImageNet. We show that MobileOne achieves state-of-the-art performance within the efficient architectures while being many times faster on mobile. Our best model obtains similar performance on ImageNet as MobileFormer while being 38x faster. Our model obtains 2.3% better top-1 accuracy on ImageNet than EfficientNet at similar latency. Furthermore, we show that our model generalizes to multiple tasks - image classification, object detection, and semantic segmentation with significant improvements in latency and accuracy as compared to existing efficient architectures when deployed on a mobile device.
<div align=center>
<img src="https://user-images.githubusercontent.com/18586273/183552452-74657532-f461-48f7-9aa7-c23f006cdb07.png" width="40%"/>
</div>
## Results and models
### ImageNet-1k
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :------------: | :-----------------------------: | :----------------------------: | :-------: | :-------: | :--------------------------------------------------: | :-----------------------------------------------------: |
| MobileOne-s0\* | 5.29train) \| 2.08 (deploy) | 1.09 (train) \| 0.28 (deploy) | 71.36 | 89.87 | [config (train)](./mobileone-s0_8xb128_in1k.py) \| [config (deploy)](./deploy/mobileone-s0_deploy_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s0_3rdparty_in1k_20220915-007ae971.pth) |
| MobileOne-s1\* | 4.83 (train) \| 4.76 (deploy) | 0.86 (train) \| 0.84 (deploy) | 75.76 | 92.77 | [config (train)](./mobileone-s1_8xb128_in1k.py) \| [config (deploy)](./deploy/mobileone-s1_deploy_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s1_3rdparty_in1k_20220915-473c8469.pth) |
| MobileOne-s2\* | 7.88 (train) \| 7.88 (deploy) | 1.34 (train) \| 1.31 (deploy) | 77.39 | 93.63 | [config (train)](./mobileone-s2_8xb128_in1k.py) \|[config (deploy)](./deploy/mobileone-s2_deploy_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s2_3rdparty_in1k_20220915-ed2e4c30.pth) |
| MobileOne-s3\* | 10.17 (train) \| 10.08 (deploy) | 1.95 (train) \| 1.91 (deploy) | 77.93 | 93.89 | [config (train)](./mobileone-s3_8xb128_in1k.py) \|[config (deploy)](./deploy/mobileone-s3_deploy_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s3_3rdparty_in1k_20220915-84d6a02c.pth) |
| MobileOne-s4\* | 14.95 (train) \| 14.84 (deploy) | 3.05 (train) \| 3.00 (deploy) | 79.30 | 94.37 | [config (train)](./mobileone-s4_8xb128_in1k.py) \|[config (deploy)](./deploy/mobileone-s4_deploy_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s4_3rdparty_in1k_20220915-ce9509ee.pth) |
*Models with * are converted from the [official repo](https://github.com/apple/ml-mobileone). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*
*Because the [official repo.](https://github.com/apple/ml-mobileone) does not give a strategy for training and testing, the test data pipline of [RepVGG](https://github.com/open-mmlab/mmclassification/tree/master/configs/repvgg) is used here, and the result is about 0.1 lower than that in the paper. Refer to [this issue](https://github.com/apple/ml-mobileone/issues/2).*
## How to use
The checkpoints provided are all `training-time` models. Use the reparameterize tool to switch them to more efficient `inference-time` architecture, which not only has fewer parameters but also less calculations.
### Use tool
Use provided tool to reparameterize the given model and save the checkpoint:
```bash
python tools/convert_models/reparameterize_model.py ${CFG_PATH} ${SRC_CKPT_PATH} ${TARGET_CKPT_PATH}
```
`${CFG_PATH}` is the config file path, `${SRC_CKPT_PATH}` is the source chenpoint file path, `${TARGET_CKPT_PATH}` is the target deploy weight file path.
For example:
```shell
python ./tools/convert_models/reparameterize_model.py ./configs/mobileone/mobileone-s0_8xb128_in1k.py https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s0_3rdparty_in1k_20220811-db5ce29b.pth ./mobileone_s0_deploy.pth
```
To use reparameterized weights, the config file must switch to **the deploy config files**.
```bash
python tools/test.py ${Deploy_CFG} ${Deploy_Checkpoint} --metrics accuracy
```
For example of using the reparameterized weights above:
```shell
python ./tools/test.py ./configs/mobileone/deploy/mobileone-s0_deploy_8xb128_in1k.py mobileone_s0_deploy.pth --metrics accuracy
```
### In the code
Use the API `switch_to_deploy` of `MobileOne` backbone to to switch to the deploy mode. Usually called like `backbone.switch_to_deploy()` or `classificer.backbone.switch_to_deploy()`.
For Backbones:
```python
from mmcls.models import build_backbone
import torch
x = torch.randn( (1, 3, 224, 224) )
backbone_cfg=dict(type='MobileOne', arch='s0')
backbone = build_backbone(backbone_cfg)
backbone.init_weights()
backbone.eval()
outs_ori = backbone(x)
backbone.switch_to_deploy()
outs_dep = backbone(x)
for out1, out2 in zip(outs_ori, outs_dep):
assert torch.allclose(out1, out2)
```
For ImageClassifiers:
```python
from mmcls.models import build_classifier
import torch
import numpy as np
cfg = dict(
type='ImageClassifier',
backbone=dict(
type='MobileOne',
arch='s0',
out_indices=(3, ),
),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1024,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))
x = torch.randn( (1, 3, 224, 224) )
classifier = build_classifier(cfg)
classifier.init_weights()
classifier.eval()
y_ori = classifier(x, return_loss=False)
classifier.backbone.switch_to_deploy()
y_dep = classifier(x, return_loss=False)
for y1, y2 in zip(y_ori, y_dep):
assert np.allclose(y1, y2)
```
## Citation
```bibtex
@article{mobileone2022,
title={An Improved One millisecond Mobile Backbone},
author={Vasu, Pavan Kumar Anasosalu and Gabriel, James and Zhu, Jeff and Tuzel, Oncel and Ranjan, Anurag},
journal={arXiv preprint arXiv:2206.04040},
year={2022}
}
```

View File

@ -0,0 +1,3 @@
_base_ = ['../mobileone-s0_8xb128_in1k.py']
model = dict(backbone=dict(deploy=True))

View File

@ -0,0 +1,3 @@
_base_ = ['../mobileone-s1_8xb128_in1k.py']
model = dict(backbone=dict(deploy=True))

View File

@ -0,0 +1,3 @@
_base_ = ['../mobileone-s2_8xb128_in1k.py']
model = dict(backbone=dict(deploy=True))

View File

@ -0,0 +1,3 @@
_base_ = ['../mobileone-s3_8xb128_in1k.py']
model = dict(backbone=dict(deploy=True))

View File

@ -0,0 +1,3 @@
_base_ = ['../mobileone-s4_8xb128_in1k.py']
model = dict(backbone=dict(deploy=True))

View File

@ -0,0 +1,98 @@
Collections:
- Name: MobileOne
Metadata:
Training Data: ImageNet-1k
Architecture:
- re-parameterization Convolution
- VGG-style Neural Network
- Depthwise Convolution
- Pointwise Convolution
Paper:
URL: https://arxiv.org/abs/2206.04040
Title: 'An Improved One millisecond Mobile Backbone'
README: configs/mobileone/README.md
Code:
URL: https://github.com/open-mmlab/mmclassification/blob/v1.0.0rc1/configs/mobileone/metafile.yml
Version: v1.0.0rc1
Models:
- Name: mobileone-s0_3rdparty_8xb128_in1k
In Collection: MobileOne
Config: configs/mobileone/mobileone-s0_8xb128_in1k.py
Metadata:
FLOPs: 1091227648 # 1.09G
Parameters: 5293272 # 5.29M
Results:
- Dataset: ImageNet-1k
Task: Image Classification
Metrics:
Top 1 Accuracy: 71.36
Top 5 Accuracy: 89.87
Weights: https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s0_3rdparty_in1k_20220915-007ae971.pth
Converted From:
Weights: https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s0_unfused.pth.tar
Code: https://github.com/apple/ml-mobileone
- Name: mobileone-s1_3rdparty_8xb128_in1k
In Collection: MobileOne
Config: configs/mobileone/mobileone-s1_8xb128_in1k.py
Metadata:
FLOPs: 863491328 # 8.6G
Parameters: 4825192 # 4.82M
Results:
- Dataset: ImageNet-1k
Task: Image Classification
Metrics:
Top 1 Accuracy: 75.76
Top 5 Accuracy: 92.77
Weights: https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s1_3rdparty_in1k_20220915-473c8469.pth
Converted From:
Weights: https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s1_unfused.pth.tar
Code: https://github.com/apple/ml-mobileone
- Name: mobileone-s2_3rdparty_8xb128_in1k
In Collection: MobileOne
Config: configs/mobileone/mobileone-s2_8xb128_in1k.py
Metadata:
FLOPs: 1344083328
Parameters: 7884648
Results:
- Dataset: ImageNet-1k
Task: Image Classification
Metrics:
Top 1 Accuracy: 77.39
Top 5 Accuracy: 93.63
Weights: https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s2_3rdparty_in1k_20220915-ed2e4c30.pth
Converted From:
Weights: https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s2_unfused.pth.tar
Code: https://github.com/apple/ml-mobileone
- Name: mobileone-s3_3rdparty_8xb128_in1k
In Collection: MobileOne
Config: configs/mobileone/mobileone-s3_8xb128_in1k.py
Metadata:
FLOPs: 1951043584
Parameters: 10170600
Results:
- Dataset: ImageNet-1k
Task: Image Classification
Metrics:
Top 1 Accuracy: 77.93
Top 5 Accuracy: 93.89
Weights: https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s3_3rdparty_in1k_20220915-84d6a02c.pth
Converted From:
Weights: https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s3_unfused.pth.tar
Code: https://github.com/apple/ml-mobileone
- Name: mobileone-s4_3rdparty_8xb128_in1k
In Collection: MobileOne
Config: configs/mobileone/mobileone-s4_8xb128_in1k.py
Metadata:
FLOPs: 3052580688
Parameters: 14951248
Results:
- Dataset: ImageNet-1k
Task: Image Classification
Metrics:
Top 1 Accuracy: 79.30
Top 5 Accuracy: 94.37
Weights: https://download.openmmlab.com/mmclassification/v0/mobileone/mobileone-s4_3rdparty_in1k_20220915-ce9509ee.pth
Converted From:
Weights: https://docs-assets.developer.apple.com/ml-research/datasets/mobileone/mobileone_s4_unfused.pth.tar
Code: https://github.com/apple/ml-mobileone

View File

@ -0,0 +1,56 @@
_base_ = [
'../_base_/models/mobileone/mobileone_s0.py',
'../_base_/datasets/imagenet_bs32_pil_resize.py',
'../_base_/default_runtime.py'
]
# dataset settings
train_dataloader = dict(batch_size=128)
val_dataloader = dict(batch_size=128)
test_dataloader = dict(batch_size=128)
# schedule settings
optim_wrapper = dict(
optimizer=dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001),
paramwise_cfg=dict(bias_decay_mult=0., norm_decay_mult=0.),
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=0.001,
by_epoch=True,
begin=0,
end=5,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(
type='CosineAnnealingLR',
T_max=295,
eta_min=1.0e-6,
by_epoch=True,
begin=5,
end=300),
dict(
type='CosineAnnealingParamScheduler',
param_name='weight_decay',
eta_min=0.00001,
by_epoch=True,
begin=0,
end=300)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=1024)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=5e-4, priority='ABOVE_NORMAL')]

View File

@ -0,0 +1,15 @@
_base_ = [
'../_base_/models/mobileone/mobileone_s1.py',
'../_base_/datasets/imagenet_bs32_pil_resize.py',
'../_base_/schedules/imagenet_bs256_coslr.py',
'../_base_/default_runtime.py'
]
# dataset settings
train_dataloader = dict(batch_size=128)
val_dataloader = dict(batch_size=128)
test_dataloader = dict(batch_size=128)
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=1024)

View File

@ -0,0 +1,15 @@
_base_ = [
'../_base_/models/mobileone/mobileone_s2.py',
'../_base_/datasets/imagenet_bs32_pil_resize.py',
'../_base_/schedules/imagenet_bs256_coslr.py',
'../_base_/default_runtime.py'
]
# dataset settings
train_dataloader = dict(batch_size=128)
val_dataloader = dict(batch_size=128)
test_dataloader = dict(batch_size=128)
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=1024)

View File

@ -0,0 +1,15 @@
_base_ = [
'../_base_/models/mobileone/mobileone_s3.py',
'../_base_/datasets/imagenet_bs64_pil_resize.py',
'../_base_/schedules/imagenet_bs256_coslr.py',
'../_base_/default_runtime.py'
]
# dataset settings
train_dataloader = dict(batch_size=128)
val_dataloader = dict(batch_size=128)
test_dataloader = dict(batch_size=128)
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=1024)

View File

@ -0,0 +1,15 @@
_base_ = [
'../_base_/models/mobileone/mobileone_s4.py',
'../_base_/datasets/imagenet_bs64_pil_resize.py',
'../_base_/schedules/imagenet_bs256_coslr.py',
'../_base_/default_runtime.py'
]
# dataset settings
train_dataloader = dict(batch_size=128)
val_dataloader = dict(batch_size=128)
test_dataloader = dict(batch_size=128)
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=1024)

View File

@ -0,0 +1,44 @@
# MViT V2
> [MViTv2: Improved Multiscale Vision Transformers for Classification and Detection](http://openaccess.thecvf.com//content/CVPR2022/papers/Li_MViTv2_Improved_Multiscale_Vision_Transformers_for_Classification_and_Detection_CVPR_2022_paper.pdf)
<!-- [ALGORITHM] -->
## Abstract
In this paper, we study Multiscale Vision Transformers (MViTv2) as a unified architecture for image and video
classification, as well as object detection. We present an improved version of MViT that incorporates
decomposed relative positional embeddings and residual pooling connections. We instantiate this architecture
in five sizes and evaluate it for ImageNet classification, COCO detection and Kinetics video recognition where
it outperforms prior work. We further compare MViTv2s' pooling attention to window attention mechanisms where
it outperforms the latter in accuracy/compute. Without bells-and-whistles, MViTv2 has state-of-the-art
performance in 3 domains: 88.8% accuracy on ImageNet classification, 58.7 boxAP on COCO object detection as
well as 86.1% on Kinetics-400 video classification.
<div align=center>
<img src="https://user-images.githubusercontent.com/26739999/180376227-755243fa-158e-4068-940a-416036519665.png" width="50%"/>
</div>
## Results and models
### ImageNet-1k
| Model | Pretrain | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :------------: | :----------: | :-------: | :------: | :-------: | :-------: | :-------------------------------------: | :--------------------------------------------------------------------------------------------------: |
| MViTv2-tiny\* | From scratch | 24.17 | 4.70 | 82.33 | 96.15 | [config](./mvitv2-tiny_8xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mvit/mvitv2-tiny_3rdparty_in1k_20220722-db7beeef.pth) |
| MViTv2-small\* | From scratch | 34.87 | 7.00 | 83.63 | 96.51 | [config](./mvitv2-small_8xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mvit/mvitv2-small_3rdparty_in1k_20220722-986bd741.pth) |
| MViTv2-base\* | From scratch | 51.47 | 10.20 | 84.34 | 96.86 | [config](./mvitv2-base_8xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mvit/mvitv2-base_3rdparty_in1k_20220722-9c4f0a17.pth) |
| MViTv2-large\* | From scratch | 217.99 | 42.10 | 85.25 | 97.14 | [config](./mvitv2-large_8xb256_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/mvit/mvitv2-large_3rdparty_in1k_20220722-2b57b983.pth) |
*Models with * are converted from the [official repo](https://github.com/facebookresearch/mvit). The config files of these models are only for inference. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*
## Citation
```bibtex
@inproceedings{li2021improved,
title={MViTv2: Improved multiscale vision transformers for classification and detection},
author={Li, Yanghao and Wu, Chao-Yuan and Fan, Haoqi and Mangalam, Karttikeya and Xiong, Bo and Malik, Jitendra and Feichtenhofer, Christoph},
booktitle={CVPR},
year={2022}
}
```

View File

@ -0,0 +1,95 @@
Collections:
- Name: MViT V2
Metadata:
Architecture:
- Attention Dropout
- Convolution
- Dense Connections
- GELU
- Layer Normalization
- Scaled Dot-Product Attention
- Attention Pooling
Paper:
URL: http://openaccess.thecvf.com//content/CVPR2022/papers/Li_MViTv2_Improved_Multiscale_Vision_Transformers_for_Classification_and_Detection_CVPR_2022_paper.pdf
Title: 'MViTv2: Improved Multiscale Vision Transformers for Classification and Detection'
README: configs/mvit/README.md
Code:
URL: https://github.com/open-mmlab/mmclassification/blob/v0.24.0/mmcls/models/backbones/mvit.py
Version: v0.24.0
Models:
- Name: mvitv2-tiny_3rdparty_in1k
In Collection: MViT V2
Metadata:
FLOPs: 4703510768
Parameters: 24173320
Training Data:
- ImageNet-1k
Results:
- Dataset: ImageNet-1k
Task: Image Classification
Metrics:
Top 1 Accuracy: 82.33
Top 5 Accuracy: 96.15
Weights: https://download.openmmlab.com/mmclassification/v0/mvit/mvitv2-tiny_3rdparty_in1k_20220722-db7beeef.pth
Converted From:
Weights: https://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_T_in1k.pyth
Code: https://github.com/facebookresearch/mvit
Config: configs/mvit/mvitv2-tiny_8xb256_in1k.py
- Name: mvitv2-small_3rdparty_in1k
In Collection: MViT V2
Metadata:
FLOPs: 6997555136
Parameters: 34870216
Training Data:
- ImageNet-1k
Results:
- Dataset: ImageNet-1k
Task: Image Classification
Metrics:
Top 1 Accuracy: 83.63
Top 5 Accuracy: 96.51
Weights: https://download.openmmlab.com/mmclassification/v0/mvit/mvitv2-small_3rdparty_in1k_20220722-986bd741.pth
Converted From:
Weights: https://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_S_in1k.pyth
Code: https://github.com/facebookresearch/mvit
Config: configs/mvit/mvitv2-small_8xb256_in1k.py
- Name: mvitv2-base_3rdparty_in1k
In Collection: MViT V2
Metadata:
FLOPs: 10157964400
Parameters: 51472744
Training Data:
- ImageNet-1k
Results:
- Dataset: ImageNet-1k
Task: Image Classification
Metrics:
Top 1 Accuracy: 84.34
Top 5 Accuracy: 96.86
Weights: https://download.openmmlab.com/mmclassification/v0/mvit/mvitv2-base_3rdparty_in1k_20220722-9c4f0a17.pth
Converted From:
Weights: https://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_B_in1k.pyth
Code: https://github.com/facebookresearch/mvit
Config: configs/mvit/mvitv2-base_8xb256_in1k.py
- Name: mvitv2-large_3rdparty_in1k
In Collection: MViT V2
Metadata:
FLOPs: 43868151412
Parameters: 217992952
Training Data:
- ImageNet-1k
Results:
- Dataset: ImageNet-1k
Task: Image Classification
Metrics:
Top 1 Accuracy: 85.25
Top 5 Accuracy: 97.14
Weights: https://download.openmmlab.com/mmclassification/v0/mvit/mvitv2-large_3rdparty_in1k_20220722-2b57b983.pth
Converted From:
Weights: https://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_L_in1k.pyth
Code: https://github.com/facebookresearch/mvit
Config: configs/mvit/mvitv2-large_8xb256_in1k.py

View File

@ -0,0 +1,43 @@
_base_ = [
'../_base_/models/mvit/mvitv2-base.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py'
]
# dataset settings
train_dataloader = dict(batch_size=256)
val_dataloader = dict(batch_size=256)
test_dataloader = dict(batch_size=256)
# schedule settings
optim_wrapper = dict(
optimizer=dict(lr=2.5e-4),
paramwise_cfg=dict(
norm_decay_mult=0.0,
bias_decay_mult=0.0,
custom_keys={
'.pos_embed': dict(decay_mult=0.0),
'.rel_pos_h': dict(decay_mult=0.0),
'.rel_pos_w': dict(decay_mult=0.0)
}),
clip_grad=dict(max_norm=1.0),
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=70,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=70)
]
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=2048)

View File

@ -0,0 +1,43 @@
_base_ = [
'../_base_/models/mvit/mvitv2-large.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs2048_AdamW.py',
'../_base_/default_runtime.py'
]
# dataset settings
train_dataloader = dict(batch_size=256)
val_dataloader = dict(batch_size=256)
test_dataloader = dict(batch_size=256)
# schedule settings
optim_wrapper = dict(
optimizer=dict(lr=2.5e-4),
paramwise_cfg=dict(
norm_decay_mult=0.0,
bias_decay_mult=0.0,
custom_keys={
'.pos_embed': dict(decay_mult=0.0),
'.rel_pos_h': dict(decay_mult=0.0),
'.rel_pos_w': dict(decay_mult=0.0)
}),
clip_grad=dict(max_norm=1.0),
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=70,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=70)
]
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=2048)

View File

@ -0,0 +1,43 @@
_base_ = [
'../_base_/models/mvit/mvitv2-small.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs2048_AdamW.py',
'../_base_/default_runtime.py'
]
# dataset settings
train_dataloader = dict(batch_size=256)
val_dataloader = dict(batch_size=256)
test_dataloader = dict(batch_size=256)
# schedule settings
optim_wrapper = dict(
optimizer=dict(lr=2.5e-4),
paramwise_cfg=dict(
norm_decay_mult=0.0,
bias_decay_mult=0.0,
custom_keys={
'.pos_embed': dict(decay_mult=0.0),
'.rel_pos_h': dict(decay_mult=0.0),
'.rel_pos_w': dict(decay_mult=0.0)
}),
clip_grad=dict(max_norm=1.0),
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=70,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=70)
]
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=2048)

View File

@ -0,0 +1,43 @@
_base_ = [
'../_base_/models/mvit/mvitv2-tiny.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs2048_AdamW.py',
'../_base_/default_runtime.py'
]
# dataset settings
train_dataloader = dict(batch_size=256)
val_dataloader = dict(batch_size=256)
test_dataloader = dict(batch_size=256)
# schedule settings
optim_wrapper = dict(
optimizer=dict(lr=2.5e-4),
paramwise_cfg=dict(
norm_decay_mult=0.0,
bias_decay_mult=0.0,
custom_keys={
'.pos_embed': dict(decay_mult=0.0),
'.rel_pos_h': dict(decay_mult=0.0),
'.rel_pos_w': dict(decay_mult=0.0)
}),
clip_grad=dict(max_norm=1.0),
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=70,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=70)
]
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# based on the actual training batch size.
auto_scale_lr = dict(base_batch_size=2048)

View File

@ -16,13 +16,13 @@ Transformers have shown great potential in computer vision tasks. A common belie
### ImageNet-1k
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :--------------: | :-------: | :------: | :-------: | :-------: | :-----------------------------------------------------------------------: | :--------------------------------------------------------------------------: |
| PoolFormer-S12\* | 11.92 | 1.87 | 77.24 | 93.51 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/poolformer/poolformer-s12_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s12_3rdparty_32xb128_in1k_20220414-f8d83051.pth) |
| PoolFormer-S24\* | 21.39 | 3.51 | 80.33 | 95.05 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/poolformer/poolformer-s24_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s24_3rdparty_32xb128_in1k_20220414-d7055904.pth) |
| PoolFormer-S36\* | 30.86 | 5.15 | 81.43 | 95.45 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/poolformer/poolformer-s36_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s36_3rdparty_32xb128_in1k_20220414-d78ff3e8.pth) |
| PoolFormer-M36\* | 56.17 | 8.96 | 82.14 | 95.71 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/poolformer/poolformer-m36_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-m36_3rdparty_32xb128_in1k_20220414-c55e0949.pth) |
| PoolFormer-M48\* | 73.47 | 11.80 | 82.51 | 95.95 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/poolformer/poolformer-m48_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-m48_3rdparty_32xb128_in1k_20220414-9378f3eb.pth) |
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :--------------: | :-------: | :------: | :-------: | :-------: | :----------------------------------------: | :---------------------------------------------------------------------------------------------------------: |
| PoolFormer-S12\* | 11.92 | 1.87 | 77.24 | 93.51 | [config](./poolformer-s12_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s12_3rdparty_32xb128_in1k_20220414-f8d83051.pth) |
| PoolFormer-S24\* | 21.39 | 3.51 | 80.33 | 95.05 | [config](./poolformer-s24_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s24_3rdparty_32xb128_in1k_20220414-d7055904.pth) |
| PoolFormer-S36\* | 30.86 | 5.15 | 81.43 | 95.45 | [config](./poolformer-s36_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s36_3rdparty_32xb128_in1k_20220414-d78ff3e8.pth) |
| PoolFormer-M36\* | 56.17 | 8.96 | 82.14 | 95.71 | [config](./poolformer-m36_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-m36_3rdparty_32xb128_in1k_20220414-c55e0949.pth) |
| PoolFormer-M48\* | 73.47 | 11.80 | 82.51 | 95.95 | [config](./poolformer-m48_32xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-m48_3rdparty_32xb128_in1k_20220414-9378f3eb.pth) |
*Models with * are converted from the [official repo](https://github.com/sail-sg/poolformer). The config files of these models are only for inference. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*

View File

@ -19,7 +19,7 @@ Models:
Metadata:
FLOPs: 1871399424
Parameters: 11915176
In Collections: PoolFormer
In Collection: PoolFormer
Results:
- Dataset: ImageNet-1k
Metrics:
@ -36,7 +36,7 @@ Models:
Training Data: ImageNet-1k
FLOPs: 3510411008
Parameters: 21388968
In Collections: PoolFormer
In Collection: PoolFormer
Results:
- Dataset: ImageNet-1k
Metrics:
@ -52,7 +52,7 @@ Models:
Metadata:
FLOPs: 5149422592
Parameters: 30862760
In Collections: PoolFormer
In Collection: PoolFormer
Results:
- Dataset: ImageNet-1k
Metrics:
@ -69,7 +69,7 @@ Models:
Training Data: ImageNet-1k
FLOPs: 8960175744
Parameters: 56172520
In Collections: PoolFormer
In Collection: PoolFormer
Results:
- Dataset: ImageNet-1k
Metrics:
@ -85,7 +85,7 @@ Models:
Metadata:
FLOPs: 11801805696
Parameters: 73473448
In Collections: PoolFormer
In Collection: PoolFormer
Results:
- Dataset: ImageNet-1k
Metrics:

View File

@ -16,24 +16,24 @@ In this work, we present a new network design paradigm. Our goal is to help adva
### ImageNet-1k
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-------------: | :-------: | :------: | :-------: | :-------: | :------------------------------------------------------------------------: | :--------------------------------------------------------------------------: |
| RegNetX-400MF | 5.16 | 0.41 | 72.56 | 90.78 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/regnet/regnetx-400mf_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-400mf_8xb128_in1k_20211213-89bfc226.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-400mf_8xb128_in1k_20211208_143316.log.json) |
| RegNetX-800MF | 7.26 | 0.81 | 74.76 | 92.32 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/regnet/regnetx-800mf_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-800mf_8xb128_in1k_20211213-222b0f11.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-800mf_8xb128_in1k_20211207_143037.log.json) |
| RegNetX-1.6GF | 9.19 | 1.63 | 76.84 | 93.31 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/regnet/regnetx-1.6gf_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-1.6gf_8xb128_in1k_20211213-d1b89758.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-1.6gf_8xb128_in1k_20211208_143018.log.json) |
| RegNetX-3.2GF | 15.3 | 3.21 | 78.09 | 94.08 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/regnet/regnetx-3.2gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-3.2gf_8xb64_in1k_20211213-1fdd82ae.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-3.2gf_8xb64_in1k_20211208_142720.log.json) |
| RegNetX-4.0GF | 22.12 | 4.0 | 78.60 | 94.17 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/regnet/regnetx-4.0gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-4.0gf_8xb64_in1k_20211213-efed675c.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-4.0gf_8xb64_in1k_20211207_150431.log.json) |
| RegNetX-6.4GF | 26.21 | 6.51 | 79.38 | 94.65 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/regnet/regnetx-6.4gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-6.4gf_8xb64_in1k_20211215-5c6089da.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-6.4gf_8xb64_in1k_20211213_172748.log.json) |
| RegNetX-8.0GF | 39.57 | 8.03 | 79.12 | 94.51 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/regnet/regnetx-8.0gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-8.0gf_8xb64_in1k_20211213-9a9fcc76.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-8.0gf_8xb64_in1k_20211208_103250.log.json) |
| RegNetX-12GF | 46.11 | 12.15 | 79.67 | 95.03 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/regnet/regnetx-12gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-12gf_8xb64_in1k_20211213-5df8c2f8.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-12gf_8xb64_in1k_20211208_143713.log.json) |
| RegNetX-400MF\* | 5.16 | 0.41 | 72.55 | 90.91 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/regnet/regnetx-400mf_8xb128_in1k) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-400MF-0db9f35c.pth) |
| RegNetX-800MF\* | 7.26 | 0.81 | 75.21 | 92.37 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/regnet/regnetx-800mf_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-800MF-4f9d1e8a.pth) |
| RegNetX-1.6GF\* | 9.19 | 1.63 | 77.04 | 93.51 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/regnet/regnetx-1.6gf_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-1.6GF-cfb32375.pth) |
| RegNetX-3.2GF\* | 15.3 | 3.21 | 78.26 | 94.20 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/regnet/regnetx-3.2gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-3.2GF-82c43fd5.pth) |
| RegNetX-4.0GF\* | 22.12 | 4.0 | 78.72 | 94.22 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/regnet/regnetx-4.0gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-4.0GF-ef8bb32c.pth) |
| RegNetX-6.4GF\* | 26.21 | 6.51 | 79.22 | 94.61 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/regnet/regnetx-6.4gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-6.4GF-6888c0ea.pth) |
| RegNetX-8.0GF\* | 39.57 | 8.03 | 79.31 | 94.57 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/regnet/regnetx-8.0gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-8.0GF-cb4c77ec.pth) |
| RegNetX-12GF\* | 46.11 | 12.15 | 79.91 | 94.78 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/regnet/regnetx-12gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-12GF-0574538f.pth) |
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-------------: | :-------: | :------: | :-------: | :-------: | :--------------------------------------: | :------------------------------------------------------------------------------------------------------------: |
| RegNetX-400MF | 5.16 | 0.41 | 72.56 | 90.78 | [config](./regnetx-400mf_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-400mf_8xb128_in1k_20211213-89bfc226.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-400mf_8xb128_in1k_20211208_143316.log.json) |
| RegNetX-800MF | 7.26 | 0.81 | 74.76 | 92.32 | [config](./regnetx-800mf_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-800mf_8xb128_in1k_20211213-222b0f11.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-800mf_8xb128_in1k_20211207_143037.log.json) |
| RegNetX-1.6GF | 9.19 | 1.63 | 76.84 | 93.31 | [config](./regnetx-1.6gf_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-1.6gf_8xb128_in1k_20211213-d1b89758.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-1.6gf_8xb128_in1k_20211208_143018.log.json) |
| RegNetX-3.2GF | 15.3 | 3.21 | 78.09 | 94.08 | [config](./regnetx-3.2gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-3.2gf_8xb64_in1k_20211213-1fdd82ae.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-3.2gf_8xb64_in1k_20211208_142720.log.json) |
| RegNetX-4.0GF | 22.12 | 4.0 | 78.60 | 94.17 | [config](./regnetx-4.0gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-4.0gf_8xb64_in1k_20211213-efed675c.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-4.0gf_8xb64_in1k_20211207_150431.log.json) |
| RegNetX-6.4GF | 26.21 | 6.51 | 79.38 | 94.65 | [config](./regnetx-6.4gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-6.4gf_8xb64_in1k_20211215-5c6089da.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-6.4gf_8xb64_in1k_20211213_172748.log.json) |
| RegNetX-8.0GF | 39.57 | 8.03 | 79.12 | 94.51 | [config](./regnetx-8.0gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-8.0gf_8xb64_in1k_20211213-9a9fcc76.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-8.0gf_8xb64_in1k_20211208_103250.log.json) |
| RegNetX-12GF | 46.11 | 12.15 | 79.67 | 95.03 | [config](./regnetx-12gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-12gf_8xb64_in1k_20211213-5df8c2f8.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/regnet/regnetx-12gf_8xb64_in1k_20211208_143713.log.json) |
| RegNetX-400MF\* | 5.16 | 0.41 | 72.55 | 90.91 | [config](./regnetx-400mf_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-400MF-0db9f35c.pth) |
| RegNetX-800MF\* | 7.26 | 0.81 | 75.21 | 92.37 | [config](./regnetx-800mf_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-800MF-4f9d1e8a.pth) |
| RegNetX-1.6GF\* | 9.19 | 1.63 | 77.04 | 93.51 | [config](./regnetx-1.6gf_8xb128_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-1.6GF-cfb32375.pth) |
| RegNetX-3.2GF\* | 15.3 | 3.21 | 78.26 | 94.20 | [config](./regnetx-3.2gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-3.2GF-82c43fd5.pth) |
| RegNetX-4.0GF\* | 22.12 | 4.0 | 78.72 | 94.22 | [config](./regnetx-4.0gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-4.0GF-ef8bb32c.pth) |
| RegNetX-6.4GF\* | 26.21 | 6.51 | 79.22 | 94.61 | [config](./regnetx-6.4gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-6.4GF-6888c0ea.pth) |
| RegNetX-8.0GF\* | 39.57 | 8.03 | 79.31 | 94.57 | [config](./regnetx-8.0gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-8.0GF-cb4c77ec.pth) |
| RegNetX-12GF\* | 46.11 | 12.15 | 79.91 | 94.78 | [config](./regnetx-12gf_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/regnet/convert/RegNetX-12GF-0574538f.pth) |
*Models with * are converted from [pycls](https://github.com/facebookresearch/pycls/blob/master/MODEL_ZOO.md). The config files of these models are only for validation.*

View File

@ -18,8 +18,8 @@ We propose RepMLP, a multi-layer-perceptron-style neural network building block
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-----------: | :-------: | :------: | :-------: | :-------: | :-------------------------------------------------------------------------: | :---------------------------------------------------------------------------: |
| RepMLP-B224\* | 68.24 | 6.71 | 80.41 | 95.12 | [train_cfg](https://github.com/open-mmlab/mmclassification/blob/master/configs/repmlp/repmlp-base_8xb64_in1k.py) \| [deploy_cfg](https://github.com/open-mmlab/mmclassification/blob/master/configs/repmlp/repmlp-base_delopy_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repmlp/repmlp-base_3rdparty_8xb64_in1k_20220330-1cb1f11b.pth) |
| RepMLP-B256\* | 96.45 | 9.69 | 81.11 | 95.5 | [train_cfg](https://github.com/open-mmlab/mmclassification/blob/master/configs/repmlp/repmlp-base_8xb64_in1k-256px.py) \| [deploy_cfg](https://github.com/open-mmlab/mmclassification/blob/master/configs/repmlp/repmlp-b256_deploy_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repmlp/repmlp-base_3rdparty_8xb64_in1k-256px_20220330-7c5a91ce.pth) |
| RepMLP-B224\* | 68.24 | 6.71 | 80.41 | 95.12 | [train_cfg](./repmlp-base_8xb64_in1k.py) \| [deploy_cfg](./repmlp-base_delopy_8xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repmlp/repmlp-base_3rdparty_8xb64_in1k_20220330-1cb1f11b.pth) |
| RepMLP-B256\* | 96.45 | 9.69 | 81.11 | 95.5 | [train_cfg](./repmlp-base_8xb64_in1k-256px.py) \| [deploy_cfg](./repmlp-base_deploy_8xb64_in1k-256px.py) | [model](https://download.openmmlab.com/mmclassification/v0/repmlp/repmlp-base_3rdparty_8xb64_in1k-256px_20220330-7c5a91ce.pth) |
*Models with * are converted from [the official repo.](https://github.com/DingXiaoH/RepMLP). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*

View File

@ -18,18 +18,18 @@ We present a simple but powerful architecture of convolutional neural network, w
| Model | Epochs | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-----------: | :----: | :-------------------------------: | :-----------------------------: | :-------: | :-------: | :----------------------------------------------: | :-------------------------------------------------: |
| RepVGG-A0\* | 120 | 9.11train) \| 8.31 (deploy) | 1.52 (train) \| 1.36 (deploy) | 72.41 | 90.50 | [config (train)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/repvgg-A0_4xb64-coslr-120e_in1k.py) \| [config (deploy)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/deploy/repvgg-A0_deploy_4xb64-coslr-120e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-A0_3rdparty_4xb64-coslr-120e_in1k_20210909-883ab98c.pth) |
| RepVGG-A1\* | 120 | 14.09 (train) \| 12.79 (deploy) | 2.64 (train) \| 2.37 (deploy) | 74.47 | 91.85 | [config (train)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/repvgg-A1_4xb64-coslr-120e_in1k.py) \| [config (deploy)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/deploy/repvgg-A1_deploy_4xb64-coslr-120e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-A1_3rdparty_4xb64-coslr-120e_in1k_20210909-24003a24.pth) |
| RepVGG-A2\* | 120 | 28.21 (train) \| 25.5 (deploy) | 5.7 (train) \| 5.12 (deploy) | 76.48 | 93.01 | [config (train)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/repvgg-A2_4xb64-coslr-120e_in1k.py) \|[config (deploy)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/deploy/repvgg-A2_deploy_4xb64-coslr-120e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-A2_3rdparty_4xb64-coslr-120e_in1k_20210909-97d7695a.pth) |
| RepVGG-B0\* | 120 | 15.82 (train) \| 14.34 (deploy) | 3.42 (train) \| 3.06 (deploy) | 75.14 | 92.42 | [config (train)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/repvgg-B0_4xb64-coslr-120e_in1k.py) \|[config (deploy)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/deploy/repvgg-B0_deploy_4xb64-coslr-120e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B0_3rdparty_4xb64-coslr-120e_in1k_20210909-446375f4.pth) |
| RepVGG-B1\* | 120 | 57.42 (train) \| 51.83 (deploy) | 13.16 (train) \| 11.82 (deploy) | 78.37 | 94.11 | [config (train)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/repvgg-B1_4xb64-coslr-120e_in1k.py) \|[config (deploy)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/deploy/repvgg-B1_deploy_4xb64-coslr-120e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B1_3rdparty_4xb64-coslr-120e_in1k_20210909-750cdf67.pth) |
| RepVGG-B1g2\* | 120 | 45.78 (train) \| 41.36 (deploy) | 9.82 (train) \| 8.82 (deploy) | 77.79 | 93.88 | [config (train)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/repvgg-B1g2_4xb64-coslr-120e_in1k.py) \|[config (deploy)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/deploy/repvgg-B1g2_deploy_4xb64-coslr-120e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B1g2_3rdparty_4xb64-coslr-120e_in1k_20210909-344f6422.pth) |
| RepVGG-B1g4\* | 120 | 39.97 (train) \| 36.13 (deploy) | 8.15 (train) \| 7.32 (deploy) | 77.58 | 93.84 | [config (train)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/repvgg-B1g4_4xb64-coslr-120e_in1k.py) \|[config (deploy)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/deploy/repvgg-B1g4_deploy_4xb64-coslr-120e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B1g4_3rdparty_4xb64-coslr-120e_in1k_20210909-d4c1a642.pth) |
| RepVGG-B2\* | 120 | 89.02 (train) \| 80.32 (deploy) | 20.46 (train) \| 18.39 (deploy) | 78.78 | 94.42 | [config (train)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/repvgg-B2_4xb64-coslr-120e_in1k.py) \|[config (deploy)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/deploy/repvgg-B2_deploy_4xb64-coslr-120e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B2_3rdparty_4xb64-coslr-120e_in1k_20210909-bd6b937c.pth) |
| RepVGG-B2g4\* | 200 | 61.76 (train) \| 55.78 (deploy) | 12.63 (train) \| 11.34 (deploy) | 79.38 | 94.68 | [config (train)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/repvgg-B2g4_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py) \|[config (deploy)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/deploy/repvgg-B2g4_deploy_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B2g4_3rdparty_4xb64-autoaug-lbs-mixup-coslr-200e_in1k_20210909-7b7955f0.pth) |
| RepVGG-B3\* | 200 | 123.09 (train) \| 110.96 (deploy) | 29.17 (train) \| 26.22 (deploy) | 80.52 | 95.26 | [config (train)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/repvgg-B3_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py) \|[config (deploy)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/deploy/repvgg-B3_deploy_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B3_3rdparty_4xb64-autoaug-lbs-mixup-coslr-200e_in1k_20210909-dda968bf.pth) |
| RepVGG-B3g4\* | 200 | 83.83 (train) \| 75.63 (deploy) | 17.9 (train) \| 16.08 (deploy) | 80.22 | 95.10 | [config (train)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/repvgg-B3g4_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py) \|[config (deploy)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/deploy/repvgg-B3g4_deploy_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B3g4_3rdparty_4xb64-autoaug-lbs-mixup-coslr-200e_in1k_20210909-4e54846a.pth) |
| RepVGG-D2se\* | 200 | 133.33 (train) \| 120.39 (deploy) | 36.56 (train) \| 32.85 (deploy) | 81.81 | 95.94 | [config (train)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/repvgg-D2se_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py) \|[config (deploy)](https://github.com/open-mmlab/mmclassification/blob/master/configs/repvgg/deploy/repvgg-D2se_deploy_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-D2se_3rdparty_4xb64-autoaug-lbs-mixup-coslr-200e_in1k_20210909-cf3139b7.pth) |
| RepVGG-A0\* | 120 | 9.11train) \| 8.31 (deploy) | 1.52 (train) \| 1.36 (deploy) | 72.41 | 90.50 | [config (train)](./repvgg-A0_4xb64-coslr-120e_in1k.py) \| [config (deploy)](./deploy/repvgg-A0_deploy_4xb64-coslr-120e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-A0_3rdparty_4xb64-coslr-120e_in1k_20210909-883ab98c.pth) |
| RepVGG-A1\* | 120 | 14.09 (train) \| 12.79 (deploy) | 2.64 (train) \| 2.37 (deploy) | 74.47 | 91.85 | [config (train)](./repvgg-A1_4xb64-coslr-120e_in1k.py) \| [config (deploy)](./deploy/repvgg-A1_deploy_4xb64-coslr-120e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-A1_3rdparty_4xb64-coslr-120e_in1k_20210909-24003a24.pth) |
| RepVGG-A2\* | 120 | 28.21 (train) \| 25.5 (deploy) | 5.7 (train) \| 5.12 (deploy) | 76.48 | 93.01 | [config (train)](./repvgg-A2_4xb64-coslr-120e_in1k.py) \|[config (deploy)](./deploy/repvgg-A2_deploy_4xb64-coslr-120e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-A2_3rdparty_4xb64-coslr-120e_in1k_20210909-97d7695a.pth) |
| RepVGG-B0\* | 120 | 15.82 (train) \| 14.34 (deploy) | 3.42 (train) \| 3.06 (deploy) | 75.14 | 92.42 | [config (train)](./repvgg-B0_4xb64-coslr-120e_in1k.py) \|[config (deploy)](./deploy/repvgg-B0_deploy_4xb64-coslr-120e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B0_3rdparty_4xb64-coslr-120e_in1k_20210909-446375f4.pth) |
| RepVGG-B1\* | 120 | 57.42 (train) \| 51.83 (deploy) | 13.16 (train) \| 11.82 (deploy) | 78.37 | 94.11 | [config (train)](./repvgg-B1_4xb64-coslr-120e_in1k.py) \|[config (deploy)](./deploy/repvgg-B1_deploy_4xb64-coslr-120e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B1_3rdparty_4xb64-coslr-120e_in1k_20210909-750cdf67.pth) |
| RepVGG-B1g2\* | 120 | 45.78 (train) \| 41.36 (deploy) | 9.82 (train) \| 8.82 (deploy) | 77.79 | 93.88 | [config (train)](./repvgg-B1g2_4xb64-coslr-120e_in1k.py) \|[config (deploy)](./deploy/repvgg-B1g2_deploy_4xb64-coslr-120e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B1g2_3rdparty_4xb64-coslr-120e_in1k_20210909-344f6422.pth) |
| RepVGG-B1g4\* | 120 | 39.97 (train) \| 36.13 (deploy) | 8.15 (train) \| 7.32 (deploy) | 77.58 | 93.84 | [config (train)](./repvgg-B1g4_4xb64-coslr-120e_in1k.py) \|[config (deploy)](./deploy/repvgg-B1g4_deploy_4xb64-coslr-120e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B1g4_3rdparty_4xb64-coslr-120e_in1k_20210909-d4c1a642.pth) |
| RepVGG-B2\* | 120 | 89.02 (train) \| 80.32 (deploy) | 20.46 (train) \| 18.39 (deploy) | 78.78 | 94.42 | [config (train)](./repvgg-B2_4xb64-coslr-120e_in1k.py) \|[config (deploy)](./deploy/repvgg-B2_deploy_4xb64-coslr-120e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B2_3rdparty_4xb64-coslr-120e_in1k_20210909-bd6b937c.pth) |
| RepVGG-B2g4\* | 200 | 61.76 (train) \| 55.78 (deploy) | 12.63 (train) \| 11.34 (deploy) | 79.38 | 94.68 | [config (train)](./repvgg-B2g4_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py) \|[config (deploy)](./deploy/repvgg-B2g4_deploy_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B2g4_3rdparty_4xb64-autoaug-lbs-mixup-coslr-200e_in1k_20210909-7b7955f0.pth) |
| RepVGG-B3\* | 200 | 123.09 (train) \| 110.96 (deploy) | 29.17 (train) \| 26.22 (deploy) | 80.52 | 95.26 | [config (train)](./repvgg-B3_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py) \|[config (deploy)](./deploy/repvgg-B3_deploy_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B3_3rdparty_4xb64-autoaug-lbs-mixup-coslr-200e_in1k_20210909-dda968bf.pth) |
| RepVGG-B3g4\* | 200 | 83.83 (train) \| 75.63 (deploy) | 17.9 (train) \| 16.08 (deploy) | 80.22 | 95.10 | [config (train)](./repvgg-B3g4_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py) \|[config (deploy)](./deploy/repvgg-B3g4_deploy_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B3g4_3rdparty_4xb64-autoaug-lbs-mixup-coslr-200e_in1k_20210909-4e54846a.pth) |
| RepVGG-D2se\* | 200 | 133.33 (train) \| 120.39 (deploy) | 36.56 (train) \| 32.85 (deploy) | 81.81 | 95.94 | [config (train)](./repvgg-D2se_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py) \|[config (deploy)](./deploy/repvgg-D2se_deploy_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-D2se_3rdparty_4xb64-autoaug-lbs-mixup-coslr-200e_in1k_20210909-cf3139b7.pth) |
*Models with * are converted from the [official repo](https://github.com/DingXiaoH/RepVGG). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*

View File

@ -16,11 +16,11 @@ Representing features at multiple scales is of great importance for numerous vis
### ImageNet-1k
| Model | resolution | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :------------------: | :--------: | :-------: | :------: | :-------: | :-------: | :----------------------------------------------------------------: | :-------------------------------------------------------------------: |
| Res2Net-50-14w-8s\* | 224x224 | 25.06 | 4.22 | 78.14 | 93.85 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/res2net/res2net50-w14-s8_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/res2net/res2net50-w14-s8_3rdparty_8xb32_in1k_20210927-bc967bf1.pth) |
| Res2Net-50-26w-8s\* | 224x224 | 48.40 | 8.39 | 79.20 | 94.36 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/res2net/res2net50-w26-s8_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/res2net/res2net50-w26-s8_3rdparty_8xb32_in1k_20210927-f547a94b.pth) |
| Res2Net-101-26w-4s\* | 224x224 | 45.21 | 8.12 | 79.19 | 94.44 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/res2net/res2net101-w26-s4_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/res2net/res2net101-w26-s4_3rdparty_8xb32_in1k_20210927-870b6c36.pth) |
| Model | resolution | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :------------------: | :--------: | :-------: | :------: | :-------: | :-------: | :-----------------------------------------: | :------------------------------------------------------------------------------------------: |
| Res2Net-50-14w-8s\* | 224x224 | 25.06 | 4.22 | 78.14 | 93.85 | [config](./res2net50-w14-s8_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/res2net/res2net50-w14-s8_3rdparty_8xb32_in1k_20210927-bc967bf1.pth) |
| Res2Net-50-26w-8s\* | 224x224 | 48.40 | 8.39 | 79.20 | 94.36 | [config](./res2net50-w26-s8_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/res2net/res2net50-w26-s8_3rdparty_8xb32_in1k_20210927-f547a94b.pth) |
| Res2Net-101-26w-4s\* | 224x224 | 45.21 | 8.12 | 79.19 | 94.44 | [config](./res2net101-w26-s4_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/res2net/res2net101-w26-s4_3rdparty_8xb32_in1k_20210927-870b6c36.pth) |
*Models with * are converted from the [official repo](https://github.com/Res2Net/Res2Net-PretrainedModels). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*

View File

@ -26,41 +26,41 @@ The pre-trained models on ImageNet-21k are used to fine-tune, and therefore don'
### Cifar10
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :--------: | :-------: | :------: | :-------: | :-------: | :--------------------------------------------------------------------------: | :-----------------------------------------------------------------------------: |
| ResNet-18 | 11.17 | 0.56 | 94.82 | 99.87 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet18_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_b16x8_cifar10_20210528-bd6371c8.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_b16x8_cifar10_20210528-bd6371c8.log.json) |
| ResNet-34 | 21.28 | 1.16 | 95.34 | 99.87 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet34_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_b16x8_cifar10_20210528-a8aa36a6.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_b16x8_cifar10_20210528-a8aa36a6.log.json) |
| ResNet-50 | 23.52 | 1.31 | 95.55 | 99.91 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet50_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar10_20210528-f54bfad9.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar10_20210528-f54bfad9.log.json) |
| ResNet-101 | 42.51 | 2.52 | 95.58 | 99.87 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet101_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_b16x8_cifar10_20210528-2d29e936.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_b16x8_cifar10_20210528-2d29e936.log.json) |
| ResNet-152 | 58.16 | 3.74 | 95.76 | 99.89 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet152_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_b16x8_cifar10_20210528-3e8e9178.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_b16x8_cifar10_20210528-3e8e9178.log.json) |
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :--------: | :-------: | :------: | :-------: | :-------: | :------------------------------------: | :-------------------------------------------------------------------------------------------------------------------: |
| ResNet-18 | 11.17 | 0.56 | 94.82 | 99.87 | [config](./resnet18_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_b16x8_cifar10_20210528-bd6371c8.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_b16x8_cifar10_20210528-bd6371c8.log.json) |
| ResNet-34 | 21.28 | 1.16 | 95.34 | 99.87 | [config](./resnet34_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_b16x8_cifar10_20210528-a8aa36a6.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_b16x8_cifar10_20210528-a8aa36a6.log.json) |
| ResNet-50 | 23.52 | 1.31 | 95.55 | 99.91 | [config](./resnet50_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar10_20210528-f54bfad9.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar10_20210528-f54bfad9.log.json) |
| ResNet-101 | 42.51 | 2.52 | 95.58 | 99.87 | [config](./resnet101_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_b16x8_cifar10_20210528-2d29e936.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_b16x8_cifar10_20210528-2d29e936.log.json) |
| ResNet-152 | 58.16 | 3.74 | 95.76 | 99.89 | [config](./resnet152_8xb16_cifar10.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_b16x8_cifar10_20210528-3e8e9178.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_b16x8_cifar10_20210528-3e8e9178.log.json) |
### Cifar100
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-------: | :-------: | :------: | :-------: | :-------: | :---------------------------------------------------------------------------: | :-----------------------------------------------------------------------------: |
| ResNet-50 | 23.71 | 1.31 | 79.90 | 95.19 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet50_8xb16_cifar100.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar100_20210528-67b58a1b.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar100_20210528-67b58a1b.log.json) |
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-------: | :-------: | :------: | :-------: | :-------: | :------------------------------------: | :--------------------------------------------------------------------------------------------------------------------: |
| ResNet-50 | 23.71 | 1.31 | 79.90 | 95.19 | [config](./resnet50_8xb16_cifar100.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar100_20210528-67b58a1b.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_b16x8_cifar100_20210528-67b58a1b.log.json) |
### ImageNet-1k
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :----------------: | :-------: | :------: | :-------: | :-------: | :----------------------------------------------------------------------: | :-------------------------------------------------------------------------: |
| ResNet-18 | 11.69 | 1.82 | 69.90 | 89.43 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet18_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_8xb32_in1k_20210831-fbbb1da6.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_8xb32_in1k_20210831-fbbb1da6.log.json) |
| ResNet-34 | 21.8 | 3.68 | 73.62 | 91.59 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet34_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_8xb32_in1k_20210831-f257d4e6.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_8xb32_in1k_20210831-f257d4e6.log.json) |
| ResNet-50 | 25.56 | 4.12 | 76.55 | 93.06 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.log.json) |
| ResNet-101 | 44.55 | 7.85 | 77.97 | 94.06 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet101_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_8xb32_in1k_20210831-539c63f8.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_8xb32_in1k_20210831-539c63f8.log.json) |
| ResNet-152 | 60.19 | 11.58 | 78.48 | 94.13 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet152_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_8xb32_in1k_20210901-4d7582fa.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_8xb32_in1k_20210901-4d7582fa.log.json) |
| ResNetV1C-50 | 25.58 | 4.36 | 77.01 | 93.58 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnetv1c50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1c50_8xb32_in1k_20220214-3343eccd.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1c50_8xb32_in1k_20220214-3343eccd.log.json) |
| ResNetV1C-101 | 44.57 | 8.09 | 78.30 | 94.27 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnetv1c101_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1c101_8xb32_in1k_20220214-434fe45f.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1c101_8xb32_in1k_20220214-434fe45f.log.json) |
| ResNetV1C-152 | 60.21 | 11.82 | 78.76 | 94.41 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnetv1c152_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1c152_8xb32_in1k_20220214-c013291f.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1c152_8xb32_in1k_20220214-c013291f.log.json) |
| ResNetV1D-50 | 25.58 | 4.36 | 77.54 | 93.57 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnetv1d50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_b32x8_imagenet_20210531-db14775a.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_b32x8_imagenet_20210531-db14775a.log.json) |
| ResNetV1D-101 | 44.57 | 8.09 | 78.93 | 94.48 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnetv1d101_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d101_b32x8_imagenet_20210531-6e13bcd3.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d101_b32x8_imagenet_20210531-6e13bcd3.log.json) |
| ResNetV1D-152 | 60.21 | 11.82 | 79.41 | 94.70 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnetv1d152_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d152_b32x8_imagenet_20210531-278cf22a.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d152_b32x8_imagenet_20210531-278cf22a.log.json) |
| ResNet-50 (fp16) | 25.56 | 4.12 | 76.30 | 93.07 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet50_8xb32-fp16_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/fp16/resnet50_batch256_fp16_imagenet_20210320-b3964210.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/fp16/resnet50_batch256_fp16_imagenet_20210320-b3964210.log.json) |
| Wide-ResNet-50\* | 68.88 | 11.44 | 78.48 | 94.08 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/wide-resnet50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/wide-resnet50_3rdparty_8xb32_in1k_20220304-66678344.pth) |
| Wide-ResNet-101\* | 126.89 | 22.81 | 78.84 | 94.28 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet101_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/wide-resnet101_3rdparty_8xb32_in1k_20220304-8d5f9d61.pth) |
| ResNet-50 (rsb-a1) | 25.56 | 4.12 | 80.12 | 94.78 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet50_8xb256-rsb-a1-600e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a1-600e_in1k_20211228-20e21305.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a1-600e_in1k_20211228-20e21305.log.json) |
| ResNet-50 (rsb-a2) | 25.56 | 4.12 | 79.55 | 94.37 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet50_8xb256-rsb-a2-300e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a2-300e_in1k_20211228-0fd8be6e.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a2-300e_in1k_20211228-0fd8be6e.log.json) |
| ResNet-50 (rsb-a3) | 25.56 | 4.12 | 78.30 | 93.80 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet50_8xb256-rsb-a3-100e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a3-100e_in1k_20211228-3493673c.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a3-100e_in1k_20211228-3493673c.log.json) |
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :----------------: | :-------: | :------: | :-------: | :-------: | :---------------------------------------------: | :--------------------------------------------------------------------------------------------------: |
| ResNet-18 | 11.69 | 1.82 | 69.90 | 89.43 | [config](./resnet18_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_8xb32_in1k_20210831-fbbb1da6.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_8xb32_in1k_20210831-fbbb1da6.log.json) |
| ResNet-34 | 21.8 | 3.68 | 73.62 | 91.59 | [config](./resnet34_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_8xb32_in1k_20210831-f257d4e6.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_8xb32_in1k_20210831-f257d4e6.log.json) |
| ResNet-50 | 25.56 | 4.12 | 76.55 | 93.06 | [config](./resnet50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.log.json) |
| ResNet-101 | 44.55 | 7.85 | 77.97 | 94.06 | [config](./resnet101_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_8xb32_in1k_20210831-539c63f8.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_8xb32_in1k_20210831-539c63f8.log.json) |
| ResNet-152 | 60.19 | 11.58 | 78.48 | 94.13 | [config](./resnet152_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_8xb32_in1k_20210901-4d7582fa.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_8xb32_in1k_20210901-4d7582fa.log.json) |
| ResNetV1C-50 | 25.58 | 4.36 | 77.01 | 93.58 | [config](./resnetv1c50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1c50_8xb32_in1k_20220214-3343eccd.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1c50_8xb32_in1k_20220214-3343eccd.log.json) |
| ResNetV1C-101 | 44.57 | 8.09 | 78.30 | 94.27 | [config](./resnetv1c101_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1c101_8xb32_in1k_20220214-434fe45f.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1c101_8xb32_in1k_20220214-434fe45f.log.json) |
| ResNetV1C-152 | 60.21 | 11.82 | 78.76 | 94.41 | [config](./resnetv1c152_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1c152_8xb32_in1k_20220214-c013291f.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1c152_8xb32_in1k_20220214-c013291f.log.json) |
| ResNetV1D-50 | 25.58 | 4.36 | 77.54 | 93.57 | [config](./resnetv1d50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_b32x8_imagenet_20210531-db14775a.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_b32x8_imagenet_20210531-db14775a.log.json) |
| ResNetV1D-101 | 44.57 | 8.09 | 78.93 | 94.48 | [config](./resnetv1d101_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d101_b32x8_imagenet_20210531-6e13bcd3.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d101_b32x8_imagenet_20210531-6e13bcd3.log.json) |
| ResNetV1D-152 | 60.21 | 11.82 | 79.41 | 94.70 | [config](./resnetv1d152_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d152_b32x8_imagenet_20210531-278cf22a.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d152_b32x8_imagenet_20210531-278cf22a.log.json) |
| ResNet-50 (fp16) | 25.56 | 4.12 | 76.30 | 93.07 | [config](./resnet50_8xb32-fp16_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/fp16/resnet50_batch256_fp16_imagenet_20210320-b3964210.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/fp16/resnet50_batch256_fp16_imagenet_20210320-b3964210.log.json) |
| Wide-ResNet-50\* | 68.88 | 11.44 | 78.48 | 94.08 | [config](../wrn/wide-resnet50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/wide-resnet50_3rdparty_8xb32_in1k_20220304-66678344.pth) |
| Wide-ResNet-101\* | 126.89 | 22.81 | 78.84 | 94.28 | [config](../wrn/wide-resnet101_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/wide-resnet101_3rdparty_8xb32_in1k_20220304-8d5f9d61.pth) |
| ResNet-50 (rsb-a1) | 25.56 | 4.12 | 80.12 | 94.78 | [config](./resnet50_8xb256-rsb-a1-600e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a1-600e_in1k_20211228-20e21305.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a1-600e_in1k_20211228-20e21305.log.json) |
| ResNet-50 (rsb-a2) | 25.56 | 4.12 | 79.55 | 94.37 | [config](./resnet50_8xb256-rsb-a2-300e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a2-300e_in1k_20211228-0fd8be6e.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a2-300e_in1k_20211228-0fd8be6e.log.json) |
| ResNet-50 (rsb-a3) | 25.56 | 4.12 | 78.30 | 93.80 | [config](./resnet50_8xb256-rsb-a3-100e_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a3-100e_in1k_20211228-3493673c.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a3-100e_in1k_20211228-3493673c.log.json) |
*The "rsb" means using the training settings from [ResNet strikes back: An improved training procedure in timm](https://arxiv.org/abs/2110.00476).*
@ -68,9 +68,9 @@ The pre-trained models on ImageNet-21k are used to fine-tune, and therefore don'
### CUB-200-2011
| Model | Pretrain | resolution | Params(M) | Flops(G) | Top-1 (%) | Config | Download |
| :-------: | :--------------------------------------------------: | :--------: | :-------: | :------: | :-------: | :------------------------------------------------: | :---------------------------------------------------: |
| ResNet-50 | [ImageNet-21k-mill](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_3rdparty-mill_in21k_20220331-faac000b.pth) | 448x448 | 23.92 | 16.48 | 88.45 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet50_8xb8_cub.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb8_cub_20220307-57840e60.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb8_cub_20220307-57840e60.log.json) |
| Model | Pretrain | resolution | Params(M) | Flops(G) | Top-1 (%) | Config | Download |
| :-------: | :-----------------------------------------------------------: | :--------: | :-------: | :------: | :-------: | :------------------------------: | :------------------------------------------------------------: |
| ResNet-50 | [ImageNet-21k-mill](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_3rdparty-mill_in21k_20220331-faac000b.pth) | 448x448 | 23.92 | 16.48 | 88.45 | [config](./resnet50_8xb8_cub.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb8_cub_20220307-57840e60.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb8_cub_20220307-57840e60.log.json) |
## Citation

View File

@ -16,6 +16,7 @@ model = dict(
type='LabelSmoothLoss',
label_smooth_val=0.1,
mode='original',
use_sigmoid=True,
)),
train_cfg=dict(augments=[
dict(type='Mixup', alpha=0.2, num_classes=1000),

View File

@ -16,12 +16,12 @@ We present a simple, highly modularized network architecture for image classific
### ImageNet-1k
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :---------------: | :-------: | :------: | :-------: | :-------: | :-----------------------------------------------------------------------: | :-------------------------------------------------------------------------: |
| ResNeXt-32x4d-50 | 25.03 | 4.27 | 77.90 | 93.66 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnext/resnext50-32x4d_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnext/resnext50_32x4d_b32x8_imagenet_20210429-56066e27.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnext/resnext50_32x4d_b32x8_imagenet_20210429-56066e27.log.json) |
| ResNeXt-32x4d-101 | 44.18 | 8.03 | 78.61 | 94.17 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnext/resnext101-32x4d_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x4d_b32x8_imagenet_20210506-e0fa3dd5.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x4d_b32x8_imagenet_20210506-e0fa3dd5.log.json) |
| ResNeXt-32x8d-101 | 88.79 | 16.5 | 79.27 | 94.58 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnext/resnext101-32x8d_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x8d_b32x8_imagenet_20210506-23a247d5.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x8d_b32x8_imagenet_20210506-23a247d5.log.json) |
| ResNeXt-32x4d-152 | 59.95 | 11.8 | 78.88 | 94.33 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnext/resnext152-32x4d_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnext/resnext152_32x4d_b32x8_imagenet_20210524-927787be.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnext/resnext152_32x4d_b32x8_imagenet_20210524-927787be.log.json) |
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :---------------: | :-------: | :------: | :-------: | :-------: | :----------------------------------------: | :--------------------------------------------------------------------------------------------------------: |
| ResNeXt-32x4d-50 | 25.03 | 4.27 | 77.90 | 93.66 | [config](./resnext50-32x4d_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnext/resnext50_32x4d_b32x8_imagenet_20210429-56066e27.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnext/resnext50_32x4d_b32x8_imagenet_20210429-56066e27.log.json) |
| ResNeXt-32x4d-101 | 44.18 | 8.03 | 78.61 | 94.17 | [config](./resnext101-32x4d_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x4d_b32x8_imagenet_20210506-e0fa3dd5.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x4d_b32x8_imagenet_20210506-e0fa3dd5.log.json) |
| ResNeXt-32x8d-101 | 88.79 | 16.5 | 79.27 | 94.58 | [config](./resnext101-32x8d_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x8d_b32x8_imagenet_20210506-23a247d5.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x8d_b32x8_imagenet_20210506-23a247d5.log.json) |
| ResNeXt-32x4d-152 | 59.95 | 11.8 | 78.88 | 94.33 | [config](./resnext152-32x4d_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnext/resnext152_32x4d_b32x8_imagenet_20210524-927787be.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/resnext/resnext152_32x4d_b32x8_imagenet_20210524-927787be.log.json) |
## Citation

View File

@ -16,10 +16,10 @@ The central building block of convolutional neural networks (CNNs) is the convol
### ImageNet-1k
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-----------: | :-------: | :------: | :-------: | :-------: | :-------------------------------------------------------------------------: | :---------------------------------------------------------------------------: |
| SE-ResNet-50 | 28.09 | 4.13 | 77.74 | 93.84 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/seresnet/seresnet50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet50_batch256_imagenet_20200804-ae206104.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet50_batch256_imagenet_20200708-657b3c36.log.json) |
| SE-ResNet-101 | 49.33 | 7.86 | 78.26 | 94.07 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/seresnet/seresnet101_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet101_batch256_imagenet_20200804-ba5b51d4.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet101_batch256_imagenet_20200708-038a4d04.log.json) |
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-----------: | :-------: | :------: | :-------: | :-------: | :-----------------------------------: | :-----------------------------------------------------------------------------------------------------------------: |
| SE-ResNet-50 | 28.09 | 4.13 | 77.74 | 93.84 | [config](./seresnet50_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet50_batch256_imagenet_20200804-ae206104.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet50_batch256_imagenet_20200708-657b3c36.log.json) |
| SE-ResNet-101 | 49.33 | 7.86 | 78.26 | 94.07 | [config](./seresnet101_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet101_batch256_imagenet_20200804-ba5b51d4.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet101_batch256_imagenet_20200708-038a4d04.log.json) |
## Citation

View File

@ -16,9 +16,9 @@ We introduce an extremely computation-efficient CNN architecture named ShuffleNe
### ImageNet-1k
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-------------------------: | :-------: | :------: | :-------: | :-------: | :------------------------------------------------------------------: | :--------------------------------------------------------------------: |
| ShuffleNetV1 1.0x (group=3) | 1.87 | 0.146 | 68.13 | 87.81 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/shufflenet_v1/shufflenet-v1-1x_16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/shufflenet_v1/shufflenet_v1_batch1024_imagenet_20200804-5d6cec73.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/shufflenet_v1/shufflenet_v1_batch1024_imagenet_20200804-5d6cec73.log.json) |
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :-------------------------: | :-------: | :------: | :-------: | :-------: | :-----------------------------------------: | :---------------------------------------------------------------------------------------------: |
| ShuffleNetV1 1.0x (group=3) | 1.87 | 0.146 | 68.13 | 87.81 | [config](./shufflenet-v1-1x_16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/shufflenet_v1/shufflenet_v1_batch1024_imagenet_20200804-5d6cec73.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/shufflenet_v1/shufflenet_v1_batch1024_imagenet_20200804-5d6cec73.log.json) |
## Citation

View File

@ -16,9 +16,9 @@ Currently, the neural network architecture design is mostly guided by the *indir
### ImageNet-1k
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :---------------: | :-------: | :------: | :-------: | :-------: | :-----------------------------------------------------------------------: | :-------------------------------------------------------------------------: |
| ShuffleNetV2 1.0x | 2.28 | 0.149 | 69.55 | 88.92 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/shufflenet_v2/shufflenet-v2-1x_16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/shufflenet_v2/shufflenet_v2_batch1024_imagenet_20200812-5bf4721e.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/shufflenet_v2/shufflenet_v2_batch1024_imagenet_20200804-8860eec9.log.json) |
| Model | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :---------------: | :-------: | :------: | :-------: | :-------: | :-----------------------------------------: | :-------------------------------------------------------------------------------------------------------: |
| ShuffleNetV2 1.0x | 2.28 | 0.149 | 69.55 | 88.92 | [config](./shufflenet-v2-1x_16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/shufflenet_v2/shufflenet_v2_batch1024_imagenet_20200812-5bf4721e.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/shufflenet_v2/shufflenet_v2_batch1024_imagenet_20200804-8860eec9.log.json) |
## Citation

View File

@ -27,26 +27,26 @@ The pre-trained models on ImageNet-21k are used to fine-tune, and therefore don'
### ImageNet-1k
| Model | Pretrain | resolution | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :------: | :----------: | :--------: | :-------: | :------: | :-------: | :-------: | :----------------------------------------------------------------: | :-------------------------------------------------------------------: |
| Swin-T | From scratch | 224x224 | 28.29 | 4.36 | 81.18 | 95.61 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin-tiny_16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_tiny_224_b16x64_300e_imagenet_20210616_090925-66df6be6.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_tiny_224_b16x64_300e_imagenet_20210616_090925.log.json) |
| Swin-S | From scratch | 224x224 | 49.61 | 8.52 | 83.02 | 96.29 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin-small_16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_small_224_b16x64_300e_imagenet_20210615_110219-7f9d988b.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_small_224_b16x64_300e_imagenet_20210615_110219.log.json) |
| Swin-B | From scratch | 224x224 | 87.77 | 15.14 | 83.36 | 96.44 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin_base_224_b16x64_300e_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_base_224_b16x64_300e_imagenet_20210616_190742-93230b0d.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_base_224_b16x64_300e_imagenet_20210616_190742.log.json) |
| Swin-S\* | From scratch | 224x224 | 49.61 | 8.52 | 83.21 | 96.25 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin-small_16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin_small_patch4_window7_224-cc7a01c9.pth) |
| Swin-B\* | From scratch | 224x224 | 87.77 | 15.14 | 83.42 | 96.44 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin-base_16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin_base_patch4_window7_224-4670dd19.pth) |
| Swin-B\* | From scratch | 384x384 | 87.90 | 44.49 | 84.49 | 96.95 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin-base_16xb64_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin_base_patch4_window12_384-02c598a4.pth) |
| Swin-B\* | ImageNet-21k | 224x224 | 87.77 | 15.14 | 85.16 | 97.50 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin-base_16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin_base_patch4_window7_224_22kto1k-f967f799.pth) |
| Swin-B\* | ImageNet-21k | 384x384 | 87.90 | 44.49 | 86.44 | 98.05 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin-base_16xb64_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin_base_patch4_window12_384_22kto1k-d59b0d1d.pth) |
| Swin-L\* | ImageNet-21k | 224x224 | 196.53 | 34.04 | 86.24 | 97.88 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin-large_16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin_large_patch4_window7_224_22kto1k-5f0996db.pth) |
| Swin-L\* | ImageNet-21k | 384x384 | 196.74 | 100.04 | 87.25 | 98.25 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin-large_16xb64_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin_large_patch4_window12_384_22kto1k-0a40944b.pth) |
| Model | Pretrain | resolution | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :------: | :----------: | :--------: | :-------: | :------: | :-------: | :-------: | :-----------------------------------------: | :------------------------------------------------------------------------------------------: |
| Swin-T | From scratch | 224x224 | 28.29 | 4.36 | 81.18 | 95.61 | [config](./swin-tiny_16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_tiny_224_b16x64_300e_imagenet_20210616_090925-66df6be6.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_tiny_224_b16x64_300e_imagenet_20210616_090925.log.json) |
| Swin-S | From scratch | 224x224 | 49.61 | 8.52 | 83.02 | 96.29 | [config](./swin-small_16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_small_224_b16x64_300e_imagenet_20210615_110219-7f9d988b.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_small_224_b16x64_300e_imagenet_20210615_110219.log.json) |
| Swin-B | From scratch | 224x224 | 87.77 | 15.14 | 83.36 | 96.44 | [config](./swin-base_16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_base_224_b16x64_300e_imagenet_20210616_190742-93230b0d.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_base_224_b16x64_300e_imagenet_20210616_190742.log.json) |
| Swin-S\* | From scratch | 224x224 | 49.61 | 8.52 | 83.21 | 96.25 | [config](./swin-small_16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin_small_patch4_window7_224-cc7a01c9.pth) |
| Swin-B\* | From scratch | 224x224 | 87.77 | 15.14 | 83.42 | 96.44 | [config](./swin-base_16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin_base_patch4_window7_224-4670dd19.pth) |
| Swin-B\* | From scratch | 384x384 | 87.90 | 44.49 | 84.49 | 96.95 | [config](./swin-base_16xb64_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin_base_patch4_window12_384-02c598a4.pth) |
| Swin-B\* | ImageNet-21k | 224x224 | 87.77 | 15.14 | 85.16 | 97.50 | [config](./swin-base_16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin_base_patch4_window7_224_22kto1k-f967f799.pth) |
| Swin-B\* | ImageNet-21k | 384x384 | 87.90 | 44.49 | 86.44 | 98.05 | [config](./swin-base_16xb64_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin_base_patch4_window12_384_22kto1k-d59b0d1d.pth) |
| Swin-L\* | ImageNet-21k | 224x224 | 196.53 | 34.04 | 86.24 | 97.88 | [config](./swin-large_16xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin_large_patch4_window7_224_22kto1k-5f0996db.pth) |
| Swin-L\* | ImageNet-21k | 384x384 | 196.74 | 100.04 | 87.25 | 98.25 | [config](./swin-large_16xb64_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin_large_patch4_window12_384_22kto1k-0a40944b.pth) |
*Models with * are converted from the [official repo](https://github.com/microsoft/Swin-Transformer#main-results-on-imagenet-with-pretrained-models). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*
### CUB-200-2011
| Model | Pretrain | resolution | Params(M) | Flops(G) | Top-1 (%) | Config | Download |
| :----: | :---------------------------------------------------: | :--------: | :-------: | :------: | :-------: | :-------------------------------------------------: | :----------------------------------------------------: |
| Swin-L | [ImageNet-21k](https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin-base_3rdparty_in21k-384px.pth) | 384x384 | 195.51 | 100.04 | 91.87 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin-large_8xb8_cub_384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin-large_8xb8_cub_384px_20220307-1bbaee6a.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin-large_8xb8_cub_384px_20220307-1bbaee6a.log.json) |
| Model | Pretrain | resolution | Params(M) | Flops(G) | Top-1 (%) | Config | Download |
| :----: | :---------------------------------------------------------: | :--------: | :-------: | :------: | :-------: | :--------------------------------------: | :---------------------------------------------------------: |
| Swin-L | [ImageNet-21k](https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin-base_3rdparty_in21k-384px.pth) | 384x384 | 195.51 | 100.04 | 91.87 | [config](./swin-large_8xb8_cub_384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin-large_8xb8_cub_384px_20220307-1bbaee6a.pth) \| [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin-large_8xb8_cub_384px_20220307-1bbaee6a.log.json) |
## Citation

View File

@ -0,0 +1,58 @@
# Swin Transformer V2
> [Swin Transformer V2: Scaling Up Capacity and Resolution](https://arxiv.org/abs/2111.09883.pdf)
<!-- [ALGORITHM] -->
## Abstract
Large-scale NLP models have been shown to significantly improve the performance on language tasks with no signs of saturation. They also demonstrate amazing few-shot capabilities like that of human beings. This paper aims to explore large-scale models in computer vision. We tackle three major issues in training and application of large vision models, including training instability, resolution gaps between pre-training and fine-tuning, and hunger on labelled data. Three main techniques are proposed: 1) a residual-post-norm method combined with cosine attention to improve training stability; 2) A log-spaced continuous position bias method to effectively transfer models pre-trained using low-resolution images to downstream tasks with high-resolution inputs; 3) A self-supervised pre-training method, SimMIM, to reduce the needs of vast labeled images. Through these techniques, this paper successfully trained a 3 billion-parameter Swin Transformer V2 model, which is the largest dense vision model to date, and makes it capable of training with images of up to 1,536×1,536 resolution. It set new performance records on 4 representative vision tasks, including ImageNet-V2 image classification, COCO object detection, ADE20K semantic segmentation, and Kinetics-400 video action classification. Also note our training is much more efficient than that in Google's billion-level visual models, which consumes 40 times less labelled data and 40 times less training time.
<div align=center>
<img src="https://user-images.githubusercontent.com/42952108/180748696-ee7ed23d-7fee-4ccf-9eb5-f117db228a42.png" width="100%"/>
</div>
## Results and models
### ImageNet-21k
The pre-trained models on ImageNet-21k are used to fine-tune, and therefore don't have evaluation results.
| Model | resolution | Params(M) | Flops(G) | Download |
| :------: | :--------: | :-------: | :------: | :--------------------------------------------------------------------------------------------------------------------------------------: |
| Swin-B\* | 192x192 | 87.92 | 8.51 | [model](https://download.openmmlab.com/mmclassification/v0/swin-v2/pretrain/swinv2-base-w12_3rdparty_in21k-192px_20220803-f7dc9763.pth) |
| Swin-L\* | 192x192 | 196.74 | 19.04 | [model](https://download.openmmlab.com/mmclassification/v0/swin-v2/pretrain/swinv2-large-w12_3rdparty_in21k-192px_20220803-d9073fee.pth) |
### ImageNet-1k
| Model | Pretrain | resolution | window | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :------: | :----------: | :--------: | :----: | :-------: | :------: | :-------: | :-------: | :---------------------------------------------------------: | :--------------------------------------------------------------------: |
| Swin-T\* | From scratch | 256x256 | 8x8 | 28.35 | 4.35 | 81.76 | 95.87 | [config](./swinv2-tiny-w8_16xb64_in1k-256px.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-v2/swinv2-tiny-w8_3rdparty_in1k-256px_20220803-e318968f.pth) |
| Swin-T\* | From scratch | 256x256 | 16x16 | 28.35 | 4.4 | 82.81 | 96.23 | [config](./swinv2-tiny-w16_16xb64_in1k-256px.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-v2/swinv2-tiny-w16_3rdparty_in1k-256px_20220803-9651cdd7.pth) |
| Swin-S\* | From scratch | 256x256 | 8x8 | 49.73 | 8.45 | 83.74 | 96.6 | [config](./swinv2-small-w8_16xb64_in1k-256px.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-v2/swinv2-small-w8_3rdparty_in1k-256px_20220803-b01a4332.pth) |
| Swin-S\* | From scratch | 256x256 | 16x16 | 49.73 | 8.57 | 84.13 | 96.83 | [config](./swinv2-small-w16_16xb64_in1k-256px.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-v2/swinv2-small-w16_3rdparty_in1k-256px_20220803-b707d206.pth) |
| Swin-B\* | From scratch | 256x256 | 8x8 | 87.92 | 14.99 | 84.2 | 96.86 | [config](./swinv2-base-w8_16xb64_in1k-256px.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-v2/swinv2-base-w8_3rdparty_in1k-256px_20220803-8ff28f2b.pth) |
| Swin-B\* | From scratch | 256x256 | 16x16 | 87.92 | 15.14 | 84.6 | 97.05 | [config](./swinv2-base-w16_16xb64_in1k-256px.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-v2/swinv2-base-w16_3rdparty_in1k-256px_20220803-5a1886b7.pth) |
| Swin-B\* | ImageNet-21k | 256x256 | 16x16 | 87.92 | 15.14 | 86.17 | 97.88 | [config](./swinv2-base-w16_in21k-pre_16xb64_in1k-256px.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-v2/swinv2-base-w16_in21k-pre_3rdparty_in1k-256px_20220803-8d7aa8ad.pth) |
| Swin-B\* | ImageNet-21k | 384x384 | 24x24 | 87.92 | 34.07 | 87.14 | 98.23 | [config](./swinv2-base-w24_in21k-pre_16xb64_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-v2/swinv2-base-w24_in21k-pre_3rdparty_in1k-384px_20220803-44eb70f8.pth) |
| Swin-L\* | ImageNet-21k | 256X256 | 16x16 | 196.75 | 33.86 | 86.93 | 98.06 | [config](./swinv2-large-w16_in21k-pre_16xb64_in1k-256px.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-v2/swinv2-large-w16_in21k-pre_3rdparty_in1k-256px_20220803-c40cbed7.pth) |
| Swin-L\* | ImageNet-21k | 384x384 | 24x24 | 196.75 | 76.2 | 87.59 | 98.27 | [config](./swinv2-large-w24_in21k-pre_16xb64_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-v2/swinv2-large-w24_in21k-pre_3rdparty_in1k-384px_20220803-3b36c165.pth) |
*Models with * are converted from the [official repo](https://github.com/microsoft/Swin-Transformer#main-results-on-imagenet-with-pretrained-models). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*
*ImageNet-21k pretrained models with input resolution of 256x256 and 384x384 both fine-tuned from the same pre-training model using a smaller input resolution of 192x192.*
## Citation
```
@article{https://doi.org/10.48550/arxiv.2111.09883,
doi = {10.48550/ARXIV.2111.09883},
url = {https://arxiv.org/abs/2111.09883},
author = {Liu, Ze and Hu, Han and Lin, Yutong and Yao, Zhuliang and Xie, Zhenda and Wei, Yixuan and Ning, Jia and Cao, Yue and Zhang, Zheng and Dong, Li and Wei, Furu and Guo, Baining},
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
title = {Swin Transformer V2: Scaling Up Capacity and Resolution},
publisher = {arXiv},
year = {2021},
copyright = {Creative Commons Attribution 4.0 International}
}
```

Some files were not shown because too many files have changed in this diff Show More