mmpretrain/mmcls/apis/test.py

203 lines
7.2 KiB
Python
Raw Normal View History

2020-05-21 21:21:43 +08:00
import os.path as osp
import pickle
import shutil
import tempfile
import time
2020-05-21 21:21:43 +08:00
import mmcv
import numpy as np
2020-05-21 21:21:43 +08:00
import torch
import torch.distributed as dist
from mmcv.image import tensor2imgs
2020-05-21 21:21:43 +08:00
from mmcv.runner import get_dist_info
def single_gpu_test(model,
data_loader,
show=False,
out_dir=None,
**show_kwargs):
2020-05-21 21:21:43 +08:00
model.eval()
results = []
dataset = data_loader.dataset
prog_bar = mmcv.ProgressBar(len(dataset))
for i, data in enumerate(data_loader):
with torch.no_grad():
result = model(return_loss=False, **data)
batch_size = len(result)
results.extend(result)
2020-05-21 21:21:43 +08:00
if show or out_dir:
scores = np.vstack(result)
pred_score = np.max(scores, axis=1)
pred_label = np.argmax(scores, axis=1)
pred_class = [model.CLASSES[lb] for lb in pred_label]
img_metas = data['img_metas'].data[0]
imgs = tensor2imgs(data['img'], **img_metas[0]['img_norm_cfg'])
assert len(imgs) == len(img_metas)
for i, (img, img_meta) in enumerate(zip(imgs, img_metas)):
h, w, _ = img_meta['img_shape']
img_show = img[:h, :w, :]
ori_h, ori_w = img_meta['ori_shape'][:-1]
img_show = mmcv.imresize(img_show, (ori_w, ori_h))
if out_dir:
out_file = osp.join(out_dir, img_meta['ori_filename'])
else:
out_file = None
result_show = {
'pred_score': pred_score[i],
'pred_label': pred_label[i],
'pred_class': pred_class[i]
}
model.module.show_result(
img_show,
result_show,
show=show,
out_file=out_file,
**show_kwargs)
2020-05-21 21:21:43 +08:00
batch_size = data['img'].size(0)
2020-05-21 21:21:43 +08:00
for _ in range(batch_size):
prog_bar.update()
return results
def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
"""Test model with multiple gpus.
This method tests model with multiple gpus and collects the results
under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
it encodes results to gpu tensors and use gpu communication for results
collection. On cpu mode it saves the results on different gpus to 'tmpdir'
and collects them by the rank 0 worker.
Args:
model (nn.Module): Model to be tested.
data_loader (nn.Dataloader): Pytorch data loader.
tmpdir (str): Path of directory to save the temporary results from
different gpus under cpu mode.
gpu_collect (bool): Option to use either gpu or cpu to collect results.
Returns:
list: The prediction results.
"""
model.eval()
results = []
dataset = data_loader.dataset
rank, world_size = get_dist_info()
if rank == 0:
prog_bar = mmcv.ProgressBar(len(dataset))
time.sleep(2) # This line can prevent deadlock problem in some cases.
2020-05-21 21:21:43 +08:00
for i, data in enumerate(data_loader):
with torch.no_grad():
result = model(return_loss=False, **data)
if isinstance(result, list):
results.extend(result)
else:
results.append(result)
2020-05-21 21:21:43 +08:00
if rank == 0:
batch_size = data['img'].size(0)
2020-05-21 21:21:43 +08:00
for _ in range(batch_size * world_size):
prog_bar.update()
# collect results from all ranks
if gpu_collect:
results = collect_results_gpu(results, len(dataset))
else:
results = collect_results_cpu(results, len(dataset), tmpdir)
return results
def collect_results_cpu(result_part, size, tmpdir=None):
rank, world_size = get_dist_info()
# create a tmp dir if it is not specified
if tmpdir is None:
MAX_LEN = 512
# 32 is whitespace
dir_tensor = torch.full((MAX_LEN, ),
32,
dtype=torch.uint8,
device='cuda')
if rank == 0:
mmcv.mkdir_or_exist('.dist_test')
tmpdir = tempfile.mkdtemp(dir='.dist_test')
2020-05-21 21:21:43 +08:00
tmpdir = torch.tensor(
bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
dir_tensor[:len(tmpdir)] = tmpdir
dist.broadcast(dir_tensor, 0)
tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
else:
2021-05-10 17:12:43 +08:00
if osp.exist(tmpdir):
raise OSError((f'The tmpdir {tmpdir} already exists.',
' Since tmpdir will be deleted after testing,',
' please make sure you specify an empty one.'))
2020-05-21 21:21:43 +08:00
mmcv.mkdir_or_exist(tmpdir)
# dump the part result to the dir
mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))
dist.barrier()
# collect all parts
if rank != 0:
return None
else:
# load results of all parts from tmp dir
part_list = []
for i in range(world_size):
part_file = osp.join(tmpdir, f'part_{i}.pkl')
part_result = mmcv.load(part_file)
# When data is severely insufficient, an empty part_result
# on a certain gpu could makes the overall outputs empty.
if part_result:
part_list.append(part_result)
2020-05-21 21:21:43 +08:00
# sort the results
ordered_results = []
for res in zip(*part_list):
ordered_results.extend(list(res))
# the dataloader may pad some samples
ordered_results = ordered_results[:size]
# remove tmp dir
shutil.rmtree(tmpdir)
return ordered_results
def collect_results_gpu(result_part, size):
rank, world_size = get_dist_info()
# dump result part to tensor with pickle
part_tensor = torch.tensor(
bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
# gather all result part tensor shape
shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
shape_list = [shape_tensor.clone() for _ in range(world_size)]
dist.all_gather(shape_list, shape_tensor)
# padding result part tensor to max length
shape_max = torch.tensor(shape_list).max()
part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
part_send[:shape_tensor[0]] = part_tensor
part_recv_list = [
part_tensor.new_zeros(shape_max) for _ in range(world_size)
]
# gather all result part
dist.all_gather(part_recv_list, part_send)
if rank == 0:
part_list = []
for recv, shape in zip(part_recv_list, shape_list):
part_result = pickle.loads(recv[:shape[0]].cpu().numpy().tobytes())
# When data is severely insufficient, an empty part_result
# on a certain gpu could makes the overall outputs empty.
if part_result:
part_list.append(part_result)
2020-05-21 21:21:43 +08:00
# sort the results
ordered_results = []
for res in zip(*part_list):
ordered_results.extend(list(res))
# the dataloader may pad some samples
ordered_results = ordered_results[:size]
return ordered_results