[Feature] Add speed benchmark script.

pull/913/head
mzr1996 2022-05-23 09:08:50 +00:00
parent 2f2aa3037c
commit 1f2f0dae5e
1 changed files with 273 additions and 0 deletions

View File

@ -0,0 +1,273 @@
import logging
import re
import time
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
from pathlib import Path
from typing import OrderedDict
import numpy as np
import torch
from mmcv import Config
from mmcv.parallel.data_parallel import MMDataParallel
from mmcv.parallel.distributed import MMDistributedDataParallel
from mmcv.runner import load_checkpoint, wrap_fp16_model
from mmengine.logging.logger import MMLogger
from modelindex.load_model_index import load
from rich.console import Console
from rich.table import Table
from mmcls.datasets.builder import build_dataloader
from mmcls.datasets.pipelines import Compose
from mmcls.models.builder import build_classifier
console = Console()
MMCLS_ROOT = Path(__file__).absolute().parents[2]
logger = MMLogger(
name='benchmark',
logger_name='benchmark',
log_file='benchmark_speed.log',
log_level=logging.INFO)
def parse_args():
parser = ArgumentParser(
description='Get FPS of all models in model-index.yml')
parser.add_argument(
'--checkpoint-root',
help='Checkpoint file root path. If set, load checkpoint before test.')
parser.add_argument(
'--models', nargs='+', help='models name to inference.')
parser.add_argument(
'--work-dir',
type=Path,
default='work_dirs/benchmark_speed',
help='the dir to save speed test results')
parser.add_argument(
'--max-iter', type=int, default=2048, help='num of max iter')
parser.add_argument(
'--batch-size',
type=int,
default=64,
help='The batch size to inference.')
parser.add_argument(
'--launcher',
choices=['none', 'pytorch', 'slurm', 'mpi'],
default='none',
help='job launcher')
parser.add_argument(
'--device', default='cuda', help='Device used for inference')
parser.add_argument(
'--gpu-id',
type=int,
default=0,
help='id of gpu to use '
'(only applicable to non-distributed testing)')
args = parser.parse_args()
return args
class ToyDataset:
"""A dummy dataset used to provide images for benchmark."""
def __init__(self, num, hw) -> None:
data = []
for _ in range(num):
if isinstance(hw, int):
w = h = hw
else:
w, h = hw
img = np.random.randint(0, 256, size=(h, w, 3), dtype=np.uint8)
data.append({'img': img})
self.data = data
self.pipeline = None
def __getitem__(self, idx):
return self.pipeline(deepcopy(self.data[idx]))
def __len__(self):
return len(self.data)
def measure_fps(config_file, checkpoint, dataset, args, distributed=False):
cfg = Config.fromfile(config_file)
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
# build the data pipeline
if cfg.data.test.pipeline[0]['type'] == 'LoadImageFromFile':
cfg.data.test.pipeline.pop(0)
dataset.pipeline = Compose(cfg.data.test.pipeline)
resolution = tuple(dataset[0]['img'].shape[1:])
# build the dataloader
data_loader = build_dataloader(
dataset,
samples_per_gpu=args.batch_size,
# Because multiple processes will occupy additional CPU resources,
# FPS statistics will be more unstable when workers_per_gpu is not 0.
# It is reasonable to set workers_per_gpu to 0.
workers_per_gpu=0,
dist=False if args.launcher == 'none' else True,
shuffle=False,
drop_last=True,
persistent_workers=False)
# build the model and load checkpoint
model = build_classifier(cfg.model)
fp16_cfg = cfg.get('fp16', None)
if fp16_cfg is not None:
wrap_fp16_model(model)
if checkpoint is not None:
load_checkpoint(model, checkpoint, map_location='cpu')
if not distributed:
if args.device == 'cpu':
model = model.cpu()
else:
model = MMDataParallel(model, device_ids=[args.gpu_id])
else:
model = MMDistributedDataParallel(
model.cuda(),
device_ids=[torch.cuda.current_device()],
broadcast_buffers=False)
model.eval()
# the first several iterations may be very slow so skip them
num_warmup = 5
infer_time = []
fps = 0
# forward the model
result = {'model': config_file.stem, 'resolution': resolution}
for i, data in enumerate(data_loader):
torch.cuda.synchronize()
start_time = time.perf_counter()
with torch.no_grad():
model(return_loss=False, **data)
torch.cuda.synchronize()
elapsed = (time.perf_counter() - start_time) / args.batch_size
if i >= num_warmup:
infer_time.append(elapsed)
if (i + 1) % 8 == 0:
fps = (i + 1 - num_warmup) / sum(infer_time)
print(
f'Done image [{(i + 1)*args.batch_size:<4}/'
f'{args.max_iter}], fps: {fps:.1f} img / s, '
f'times per image: {1000 / fps:.1f} ms / img',
flush=True)
result['fps'] = (len(data_loader) - num_warmup) / sum(infer_time)
result['time_mean'] = np.mean(infer_time) * 1000
result['time_std'] = np.std(infer_time) * 1000
return result
def show_summary(summary_data, args):
table = Table(title='Speed Benchmark Regression Summary')
table.add_column('Model')
table.add_column('Resolution (h, w)')
table.add_column('FPS (img/s)')
table.add_column('Inference Time (std) (ms/img)')
for model_name, summary in summary_data.items():
row = [model_name]
row.append(str(summary['resolution']))
row.append(f"{summary['fps']:.2f}")
time_mean = f"{summary['time_mean']:.2f}"
time_std = f"{summary['time_std']:.2f}"
row.append(f'{time_mean}\t({time_std})'.expandtabs(8))
table.add_row(*row)
console.print(table)
# Sample test whether the inference code is correct
def main(args):
model_index_file = MMCLS_ROOT / 'model-index.yml'
model_index = load(str(model_index_file))
model_index.build_models_with_collections()
models = OrderedDict({model.name: model for model in model_index.models})
if args.models:
patterns = [re.compile(pattern) for pattern in args.models]
filter_models = {}
for k, v in models.items():
if any([re.match(pattern, k) for pattern in patterns]):
filter_models[k] = v
if len(filter_models) == 0:
print('No model found, please specify models in:')
print('\n'.join(models.keys()))
return
models = filter_models
dataset_map = {
# come from the average size of ImageNet
'ImageNet-1k': ToyDataset(args.max_iter, (442, 522)),
'CIFAR-10': ToyDataset(args.max_iter, 32),
'CIFAR-100': ToyDataset(args.max_iter, 32),
}
summary_data = {}
for model_name, model_info in models.items():
if model_info.config is None:
continue
config = Path(model_info.config)
assert config.exists(), f'{model_name}: {config} not found.'
logger.info(f'Processing: {model_name}')
http_prefix = 'https://download.openmmlab.com/mmclassification/'
dataset = model_info.results[0].dataset
if dataset not in dataset_map.keys():
continue
if args.checkpoint_root is not None:
root = args.checkpoint_root
if 's3://' in args.checkpoint_root:
from mmcv.fileio import FileClient
from petrel_client.common.exception import AccessDeniedError
file_client = FileClient.infer_client(uri=root)
checkpoint = file_client.join_path(
root, model_info.weights[len(http_prefix):])
try:
exists = file_client.exists(checkpoint)
except AccessDeniedError:
exists = False
else:
checkpoint = Path(root) / model_info.weights[len(http_prefix):]
exists = checkpoint.exists()
if exists:
checkpoint = str(checkpoint)
else:
print(f'WARNING: {model_name}: {checkpoint} not found.')
checkpoint = None
else:
checkpoint = None
# build the model from a config file and a checkpoint file
result = measure_fps(MMCLS_ROOT / config, checkpoint,
dataset_map[dataset], args)
summary_data[model_name] = result
show_summary(summary_data, args)
args.work_dir.mkdir(parents=True, exist_ok=True)
out_path = args.work_dir / datetime.now().strftime('%Y-%m-%d.csv')
with open(out_path, 'w') as f:
f.write('MODEL,SHAPE,FPS\n')
for model, summary in summary_data.items():
f.write(
f'{model},"{summary["resolution"]}",{summary["fps"]:.2f}\n')
if __name__ == '__main__':
args = parse_args()
main(args)