[Bug fix] Fix efficient test for multi-node (#707)

* [Bug fix] Fix efficient test for multi-node

* fixed CI

* add efficient test dir

* remove unused args
pull/1801/head
Jerry Jiarui XU 2021-07-15 12:13:03 -07:00 committed by GitHub
parent 55085a85c3
commit 5184c6a8db
1 changed files with 14 additions and 87 deletions

View File

@ -1,17 +1,15 @@
import os.path as osp
import pickle
import shutil
import tempfile
import mmcv
import numpy as np
import torch
import torch.distributed as dist
from mmcv.engine import collect_results_cpu, collect_results_gpu
from mmcv.image import tensor2imgs
from mmcv.runner import get_dist_info
def np2tmp(array, temp_file_name=None):
def np2tmp(array, temp_file_name=None, tmpdir=None):
"""Save ndarray to local numpy file.
Args:
@ -19,6 +17,7 @@ def np2tmp(array, temp_file_name=None):
temp_file_name (str): Numpy file name. If 'temp_file_name=None', this
function will generate a file name with tempfile.NamedTemporaryFile
to save ndarray. Default: None.
tmpdir (str): Temporary directory to save Ndarray files. Default: None.
Returns:
str: The numpy file name.
@ -26,7 +25,7 @@ def np2tmp(array, temp_file_name=None):
if temp_file_name is None:
temp_file_name = tempfile.NamedTemporaryFile(
suffix='.npy', delete=False).name
suffix='.npy', delete=False, dir=tmpdir).name
np.save(temp_file_name, array)
return temp_file_name
@ -58,6 +57,8 @@ def single_gpu_test(model,
results = []
dataset = data_loader.dataset
prog_bar = mmcv.ProgressBar(len(dataset))
if efficient_test:
mmcv.mkdir_or_exist('.efficient_test')
for i, data in enumerate(data_loader):
with torch.no_grad():
result = model(return_loss=False, **data)
@ -90,11 +91,11 @@ def single_gpu_test(model,
if isinstance(result, list):
if efficient_test:
result = [np2tmp(_) for _ in result]
result = [np2tmp(_, tmpdir='.efficient_test') for _ in result]
results.extend(result)
else:
if efficient_test:
result = np2tmp(result)
result = np2tmp(result, tmpdir='.efficient_test')
results.append(result)
batch_size = len(result)
@ -120,7 +121,8 @@ def multi_gpu_test(model,
model (nn.Module): Model to be tested.
data_loader (utils.data.Dataloader): Pytorch data loader.
tmpdir (str): Path of directory to save the temporary results from
different gpus under cpu mode.
different gpus under cpu mode. The same path is used for efficient
test.
gpu_collect (bool): Option to use either gpu or cpu to collect results.
efficient_test (bool): Whether save the results as local numpy files to
save CPU memory during evaluation. Default: False.
@ -135,17 +137,19 @@ def multi_gpu_test(model,
rank, world_size = get_dist_info()
if rank == 0:
prog_bar = mmcv.ProgressBar(len(dataset))
if efficient_test:
mmcv.mkdir_or_exist('.efficient_test')
for i, data in enumerate(data_loader):
with torch.no_grad():
result = model(return_loss=False, rescale=True, **data)
if isinstance(result, list):
if efficient_test:
result = [np2tmp(_) for _ in result]
result = [np2tmp(_, tmpdir='.efficient_test') for _ in result]
results.extend(result)
else:
if efficient_test:
result = np2tmp(result)
result = np2tmp(result, tmpdir='.efficient_test')
results.append(result)
if rank == 0:
@ -159,80 +163,3 @@ def multi_gpu_test(model,
else:
results = collect_results_cpu(results, len(dataset), tmpdir)
return results
def collect_results_cpu(result_part, size, tmpdir=None):
"""Collect results with CPU."""
rank, world_size = get_dist_info()
# create a tmp dir if it is not specified
if tmpdir is None:
MAX_LEN = 512
# 32 is whitespace
dir_tensor = torch.full((MAX_LEN, ),
32,
dtype=torch.uint8,
device='cuda')
if rank == 0:
tmpdir = tempfile.mkdtemp()
tmpdir = torch.tensor(
bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
dir_tensor[:len(tmpdir)] = tmpdir
dist.broadcast(dir_tensor, 0)
tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
else:
mmcv.mkdir_or_exist(tmpdir)
# dump the part result to the dir
mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank)))
dist.barrier()
# collect all parts
if rank != 0:
return None
else:
# load results of all parts from tmp dir
part_list = []
for i in range(world_size):
part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i))
part_list.append(mmcv.load(part_file))
# sort the results
ordered_results = []
for res in zip(*part_list):
ordered_results.extend(list(res))
# the dataloader may pad some samples
ordered_results = ordered_results[:size]
# remove tmp dir
shutil.rmtree(tmpdir)
return ordered_results
def collect_results_gpu(result_part, size):
"""Collect results with GPU."""
rank, world_size = get_dist_info()
# dump result part to tensor with pickle
part_tensor = torch.tensor(
bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
# gather all result part tensor shape
shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
shape_list = [shape_tensor.clone() for _ in range(world_size)]
dist.all_gather(shape_list, shape_tensor)
# padding result part tensor to max length
shape_max = torch.tensor(shape_list).max()
part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
part_send[:shape_tensor[0]] = part_tensor
part_recv_list = [
part_tensor.new_zeros(shape_max) for _ in range(world_size)
]
# gather all result part
dist.all_gather(part_recv_list, part_send)
if rank == 0:
part_list = []
for recv, shape in zip(part_recv_list, shape_list):
part_list.append(
pickle.loads(recv[:shape[0]].cpu().numpy().tobytes()))
# sort the results
ordered_results = []
for res in zip(*part_list):
ordered_results.extend(list(res))
# the dataloader may pad some samples
ordered_results = ordered_results[:size]
return ordered_results