[Feature] Implement offline evaluation.

pull/913/head
mzr1996 2022-06-24 09:33:24 +00:00
parent a9057e88c4
commit 5a3e736b4e
2 changed files with 30 additions and 33 deletions

View File

@ -1,10 +1,15 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import itertools
import json
from unittest.mock import MagicMock
import mmcv
from mmcv import Config, DictAction
import mmengine
import rich
from mmengine import Config, DictAction
from mmengine.evaluator import Evaluator
from mmcls.datasets import build_dataset
from mmcls.utils import register_all_modules
def parse_args():
@ -12,12 +17,6 @@ def parse_args():
'results saved in pkl format')
parser.add_argument('config', help='Config of the model')
parser.add_argument('pkl_results', help='Results in pickle format')
parser.add_argument(
'--metrics',
type=str,
nargs='+',
help='Evaluation metrics, which depends on the dataset, e.g., '
'"accuracy", "precision", "recall" and "support".')
parser.add_argument(
'--cfg-options',
nargs='+',
@ -28,12 +27,6 @@ def parse_args():
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
parser.add_argument(
'--metric-options',
nargs='+',
action=DictAction,
help='custom options for evaluation, the key-value pair in xxx=yyy '
'format will be kwargs for dataset.evaluate() function')
args = parser.parse_args()
return args
@ -41,30 +34,20 @@ def parse_args():
def main():
args = parse_args()
outputs = mmcv.load(args.pkl_results)
assert 'class_scores' in outputs, \
'No "class_scores" in result file, please set "--out-items" in test.py'
register_all_modules()
# load config
cfg = Config.fromfile(args.config)
assert args.metrics, (
'Please specify at least one metric the argument "--metrics".')
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
cfg.data.test.test_mode = True
dataset = build_dataset(cfg.data.test)
pred_score = outputs['class_scores']
predictions = mmengine.load(args.pkl_results)
eval_kwargs = cfg.get('evaluation', {}).copy()
# hard-code way to remove EvalHook args
for key in [
'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best', 'rule'
]:
eval_kwargs.pop(key, None)
eval_kwargs.update(
dict(metric=args.metrics, metric_options=args.metric_options))
print(dataset.evaluate(pred_score, **eval_kwargs))
evaluator = Evaluator(cfg.test_evaluator)
# dataset is not needed, use an endless iterator to mock it.
fake_dataset = itertools.repeat({'data_sample': MagicMock()})
eval_results = evaluator.offline_evaluate(fake_dataset, predictions)
rich.print_json(json.dumps(eval_results))
if __name__ == '__main__':

View File

@ -17,6 +17,10 @@ def parse_args():
parser.add_argument(
'--work-dir',
help='the directory to save the file containing evaluation metrics')
parser.add_argument(
'--dump',
type=str,
help='dump predictions to a pickle file for offline evaluation')
parser.add_argument(
'--cfg-options',
nargs='+',
@ -70,6 +74,16 @@ def merge_args(cfg, args):
cfg.default_hooks.visualization.out_dir = args.show_dir
cfg.default_hooks.visualization.interval = args.interval
# -------------------- Dump predictions --------------------
if args.dump is not None:
assert args.dump.endswith(('.pkl', '.pickle')), \
'The dump file must be a pkl file.'
dump_metric = dict(type='DumpResults', out_file_path=args.dump)
if isinstance(cfg.test_evaluator, (list, tuple)):
cfg.test_evaluator = list(cfg.test_evaluator).append(dump_metric)
else:
cfg.test_evaluator = [cfg.test_evaluator, dump_metric]
return cfg