import os.path as osp import torch.distributed as dist from mmcv.runner import DistEvalHook as _DistEvalHook from mmcv.runner import EvalHook as _EvalHook from torch.nn.modules.batchnorm import _BatchNorm class EvalHook(_EvalHook): """Single GPU EvalHook, with efficient test support. Args: by_epoch (bool): Determine perform evaluation by epoch or by iteration. If set to True, it will perform by epoch. Otherwise, by iteration. Default: False. efficient_test (bool): Whether save the results as local numpy files to save CPU memory during evaluation. Default: False. Returns: list: The prediction results. """ greater_keys = ['mIoU', 'mAcc', 'aAcc'] def __init__(self, *args, by_epoch=False, efficient_test=False, **kwargs): super().__init__(*args, by_epoch=by_epoch, **kwargs) self.efficient_test = efficient_test def _do_evaluate(self, runner): """perform evaluation and save ckpt.""" if not self._should_evaluate(runner): return from mmseg.apis import single_gpu_test results = single_gpu_test( runner.model, self.dataloader, show=False, efficient_test=self.efficient_test) runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) key_score = self.evaluate(runner, results) if self.save_best: self._save_ckpt(runner, key_score) class DistEvalHook(_DistEvalHook): """Distributed EvalHook, with efficient test support. Args: by_epoch (bool): Determine perform evaluation by epoch or by iteration. If set to True, it will perform by epoch. Otherwise, by iteration. Default: False. efficient_test (bool): Whether save the results as local numpy files to save CPU memory during evaluation. Default: False. Returns: list: The prediction results. """ greater_keys = ['mIoU', 'mAcc', 'aAcc'] def __init__(self, *args, by_epoch=False, efficient_test=False, **kwargs): super().__init__(*args, by_epoch=by_epoch, **kwargs) self.efficient_test = efficient_test def _do_evaluate(self, runner): """perform evaluation and save ckpt.""" # Synchronization of BatchNorm's buffer (running_mean # and running_var) is not supported in the DDP of pytorch, # which may cause the inconsistent performance of models in # different ranks, so we broadcast BatchNorm's buffers # of rank 0 to other ranks to avoid this. if self.broadcast_bn_buffer: model = runner.model for name, module in model.named_modules(): if isinstance(module, _BatchNorm) and module.track_running_stats: dist.broadcast(module.running_var, 0) dist.broadcast(module.running_mean, 0) if not self._should_evaluate(runner): return tmpdir = self.tmpdir if tmpdir is None: tmpdir = osp.join(runner.work_dir, '.eval_hook') from mmseg.apis import multi_gpu_test results = multi_gpu_test( runner.model, self.dataloader, tmpdir=tmpdir, gpu_collect=self.gpu_collect, efficient_test=self.efficient_test) if runner.rank == 0: print('\n') runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) key_score = self.evaluate(runner, results) if self.save_best: self._save_ckpt(runner, key_score)