[Feature] Add speed benchmark script.
parent
2f2aa3037c
commit
1f2f0dae5e
|
@ -0,0 +1,273 @@
|
|||
import logging
|
||||
import re
|
||||
import time
|
||||
from argparse import ArgumentParser
|
||||
from copy import deepcopy
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import OrderedDict
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from mmcv import Config
|
||||
from mmcv.parallel.data_parallel import MMDataParallel
|
||||
from mmcv.parallel.distributed import MMDistributedDataParallel
|
||||
from mmcv.runner import load_checkpoint, wrap_fp16_model
|
||||
from mmengine.logging.logger import MMLogger
|
||||
from modelindex.load_model_index import load
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
from mmcls.datasets.builder import build_dataloader
|
||||
from mmcls.datasets.pipelines import Compose
|
||||
from mmcls.models.builder import build_classifier
|
||||
|
||||
console = Console()
|
||||
MMCLS_ROOT = Path(__file__).absolute().parents[2]
|
||||
logger = MMLogger(
|
||||
name='benchmark',
|
||||
logger_name='benchmark',
|
||||
log_file='benchmark_speed.log',
|
||||
log_level=logging.INFO)
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = ArgumentParser(
|
||||
description='Get FPS of all models in model-index.yml')
|
||||
parser.add_argument(
|
||||
'--checkpoint-root',
|
||||
help='Checkpoint file root path. If set, load checkpoint before test.')
|
||||
parser.add_argument(
|
||||
'--models', nargs='+', help='models name to inference.')
|
||||
parser.add_argument(
|
||||
'--work-dir',
|
||||
type=Path,
|
||||
default='work_dirs/benchmark_speed',
|
||||
help='the dir to save speed test results')
|
||||
parser.add_argument(
|
||||
'--max-iter', type=int, default=2048, help='num of max iter')
|
||||
parser.add_argument(
|
||||
'--batch-size',
|
||||
type=int,
|
||||
default=64,
|
||||
help='The batch size to inference.')
|
||||
parser.add_argument(
|
||||
'--launcher',
|
||||
choices=['none', 'pytorch', 'slurm', 'mpi'],
|
||||
default='none',
|
||||
help='job launcher')
|
||||
parser.add_argument(
|
||||
'--device', default='cuda', help='Device used for inference')
|
||||
parser.add_argument(
|
||||
'--gpu-id',
|
||||
type=int,
|
||||
default=0,
|
||||
help='id of gpu to use '
|
||||
'(only applicable to non-distributed testing)')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
class ToyDataset:
|
||||
"""A dummy dataset used to provide images for benchmark."""
|
||||
|
||||
def __init__(self, num, hw) -> None:
|
||||
data = []
|
||||
for _ in range(num):
|
||||
if isinstance(hw, int):
|
||||
w = h = hw
|
||||
else:
|
||||
w, h = hw
|
||||
img = np.random.randint(0, 256, size=(h, w, 3), dtype=np.uint8)
|
||||
data.append({'img': img})
|
||||
self.data = data
|
||||
self.pipeline = None
|
||||
|
||||
def __getitem__(self, idx):
|
||||
return self.pipeline(deepcopy(self.data[idx]))
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
|
||||
|
||||
def measure_fps(config_file, checkpoint, dataset, args, distributed=False):
|
||||
cfg = Config.fromfile(config_file)
|
||||
# set cudnn_benchmark
|
||||
if cfg.get('cudnn_benchmark', False):
|
||||
torch.backends.cudnn.benchmark = True
|
||||
|
||||
# build the data pipeline
|
||||
if cfg.data.test.pipeline[0]['type'] == 'LoadImageFromFile':
|
||||
cfg.data.test.pipeline.pop(0)
|
||||
|
||||
dataset.pipeline = Compose(cfg.data.test.pipeline)
|
||||
resolution = tuple(dataset[0]['img'].shape[1:])
|
||||
|
||||
# build the dataloader
|
||||
data_loader = build_dataloader(
|
||||
dataset,
|
||||
samples_per_gpu=args.batch_size,
|
||||
# Because multiple processes will occupy additional CPU resources,
|
||||
# FPS statistics will be more unstable when workers_per_gpu is not 0.
|
||||
# It is reasonable to set workers_per_gpu to 0.
|
||||
workers_per_gpu=0,
|
||||
dist=False if args.launcher == 'none' else True,
|
||||
shuffle=False,
|
||||
drop_last=True,
|
||||
persistent_workers=False)
|
||||
|
||||
# build the model and load checkpoint
|
||||
model = build_classifier(cfg.model)
|
||||
fp16_cfg = cfg.get('fp16', None)
|
||||
if fp16_cfg is not None:
|
||||
wrap_fp16_model(model)
|
||||
if checkpoint is not None:
|
||||
load_checkpoint(model, checkpoint, map_location='cpu')
|
||||
|
||||
if not distributed:
|
||||
if args.device == 'cpu':
|
||||
model = model.cpu()
|
||||
else:
|
||||
model = MMDataParallel(model, device_ids=[args.gpu_id])
|
||||
else:
|
||||
model = MMDistributedDataParallel(
|
||||
model.cuda(),
|
||||
device_ids=[torch.cuda.current_device()],
|
||||
broadcast_buffers=False)
|
||||
model.eval()
|
||||
|
||||
# the first several iterations may be very slow so skip them
|
||||
num_warmup = 5
|
||||
infer_time = []
|
||||
fps = 0
|
||||
|
||||
# forward the model
|
||||
result = {'model': config_file.stem, 'resolution': resolution}
|
||||
for i, data in enumerate(data_loader):
|
||||
torch.cuda.synchronize()
|
||||
start_time = time.perf_counter()
|
||||
|
||||
with torch.no_grad():
|
||||
model(return_loss=False, **data)
|
||||
|
||||
torch.cuda.synchronize()
|
||||
elapsed = (time.perf_counter() - start_time) / args.batch_size
|
||||
|
||||
if i >= num_warmup:
|
||||
infer_time.append(elapsed)
|
||||
if (i + 1) % 8 == 0:
|
||||
fps = (i + 1 - num_warmup) / sum(infer_time)
|
||||
print(
|
||||
f'Done image [{(i + 1)*args.batch_size:<4}/'
|
||||
f'{args.max_iter}], fps: {fps:.1f} img / s, '
|
||||
f'times per image: {1000 / fps:.1f} ms / img',
|
||||
flush=True)
|
||||
result['fps'] = (len(data_loader) - num_warmup) / sum(infer_time)
|
||||
result['time_mean'] = np.mean(infer_time) * 1000
|
||||
result['time_std'] = np.std(infer_time) * 1000
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def show_summary(summary_data, args):
|
||||
table = Table(title='Speed Benchmark Regression Summary')
|
||||
table.add_column('Model')
|
||||
table.add_column('Resolution (h, w)')
|
||||
table.add_column('FPS (img/s)')
|
||||
table.add_column('Inference Time (std) (ms/img)')
|
||||
|
||||
for model_name, summary in summary_data.items():
|
||||
row = [model_name]
|
||||
row.append(str(summary['resolution']))
|
||||
row.append(f"{summary['fps']:.2f}")
|
||||
time_mean = f"{summary['time_mean']:.2f}"
|
||||
time_std = f"{summary['time_std']:.2f}"
|
||||
row.append(f'{time_mean}\t({time_std})'.expandtabs(8))
|
||||
table.add_row(*row)
|
||||
|
||||
console.print(table)
|
||||
|
||||
|
||||
# Sample test whether the inference code is correct
|
||||
def main(args):
|
||||
model_index_file = MMCLS_ROOT / 'model-index.yml'
|
||||
model_index = load(str(model_index_file))
|
||||
model_index.build_models_with_collections()
|
||||
models = OrderedDict({model.name: model for model in model_index.models})
|
||||
|
||||
if args.models:
|
||||
patterns = [re.compile(pattern) for pattern in args.models]
|
||||
filter_models = {}
|
||||
for k, v in models.items():
|
||||
if any([re.match(pattern, k) for pattern in patterns]):
|
||||
filter_models[k] = v
|
||||
if len(filter_models) == 0:
|
||||
print('No model found, please specify models in:')
|
||||
print('\n'.join(models.keys()))
|
||||
return
|
||||
models = filter_models
|
||||
|
||||
dataset_map = {
|
||||
# come from the average size of ImageNet
|
||||
'ImageNet-1k': ToyDataset(args.max_iter, (442, 522)),
|
||||
'CIFAR-10': ToyDataset(args.max_iter, 32),
|
||||
'CIFAR-100': ToyDataset(args.max_iter, 32),
|
||||
}
|
||||
|
||||
summary_data = {}
|
||||
for model_name, model_info in models.items():
|
||||
|
||||
if model_info.config is None:
|
||||
continue
|
||||
|
||||
config = Path(model_info.config)
|
||||
assert config.exists(), f'{model_name}: {config} not found.'
|
||||
|
||||
logger.info(f'Processing: {model_name}')
|
||||
|
||||
http_prefix = 'https://download.openmmlab.com/mmclassification/'
|
||||
dataset = model_info.results[0].dataset
|
||||
if dataset not in dataset_map.keys():
|
||||
continue
|
||||
if args.checkpoint_root is not None:
|
||||
root = args.checkpoint_root
|
||||
if 's3://' in args.checkpoint_root:
|
||||
from mmcv.fileio import FileClient
|
||||
from petrel_client.common.exception import AccessDeniedError
|
||||
file_client = FileClient.infer_client(uri=root)
|
||||
checkpoint = file_client.join_path(
|
||||
root, model_info.weights[len(http_prefix):])
|
||||
try:
|
||||
exists = file_client.exists(checkpoint)
|
||||
except AccessDeniedError:
|
||||
exists = False
|
||||
else:
|
||||
checkpoint = Path(root) / model_info.weights[len(http_prefix):]
|
||||
exists = checkpoint.exists()
|
||||
if exists:
|
||||
checkpoint = str(checkpoint)
|
||||
else:
|
||||
print(f'WARNING: {model_name}: {checkpoint} not found.')
|
||||
checkpoint = None
|
||||
else:
|
||||
checkpoint = None
|
||||
|
||||
# build the model from a config file and a checkpoint file
|
||||
result = measure_fps(MMCLS_ROOT / config, checkpoint,
|
||||
dataset_map[dataset], args)
|
||||
|
||||
summary_data[model_name] = result
|
||||
|
||||
show_summary(summary_data, args)
|
||||
args.work_dir.mkdir(parents=True, exist_ok=True)
|
||||
out_path = args.work_dir / datetime.now().strftime('%Y-%m-%d.csv')
|
||||
with open(out_path, 'w') as f:
|
||||
f.write('MODEL,SHAPE,FPS\n')
|
||||
for model, summary in summary_data.items():
|
||||
f.write(
|
||||
f'{model},"{summary["resolution"]}",{summary["fps"]:.2f}\n')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parse_args()
|
||||
main(args)
|
Loading…
Reference in New Issue