[Bug fix] Fix efficient test for multi-node (#707)

* [Bug fix] Fix efficient test for multi-node

* fixed CI

* add efficient test dir

* remove unused args
pull/1801/head
Jerry Jiarui XU 2021-07-15 12:13:03 -07:00 committed by GitHub
parent 55085a85c3
commit 5184c6a8db
1 changed files with 14 additions and 87 deletions

View File

@ -1,17 +1,15 @@
import os.path as osp import os.path as osp
import pickle
import shutil
import tempfile import tempfile
import mmcv import mmcv
import numpy as np import numpy as np
import torch import torch
import torch.distributed as dist from mmcv.engine import collect_results_cpu, collect_results_gpu
from mmcv.image import tensor2imgs from mmcv.image import tensor2imgs
from mmcv.runner import get_dist_info from mmcv.runner import get_dist_info
def np2tmp(array, temp_file_name=None): def np2tmp(array, temp_file_name=None, tmpdir=None):
"""Save ndarray to local numpy file. """Save ndarray to local numpy file.
Args: Args:
@ -19,6 +17,7 @@ def np2tmp(array, temp_file_name=None):
temp_file_name (str): Numpy file name. If 'temp_file_name=None', this temp_file_name (str): Numpy file name. If 'temp_file_name=None', this
function will generate a file name with tempfile.NamedTemporaryFile function will generate a file name with tempfile.NamedTemporaryFile
to save ndarray. Default: None. to save ndarray. Default: None.
tmpdir (str): Temporary directory to save Ndarray files. Default: None.
Returns: Returns:
str: The numpy file name. str: The numpy file name.
@ -26,7 +25,7 @@ def np2tmp(array, temp_file_name=None):
if temp_file_name is None: if temp_file_name is None:
temp_file_name = tempfile.NamedTemporaryFile( temp_file_name = tempfile.NamedTemporaryFile(
suffix='.npy', delete=False).name suffix='.npy', delete=False, dir=tmpdir).name
np.save(temp_file_name, array) np.save(temp_file_name, array)
return temp_file_name return temp_file_name
@ -58,6 +57,8 @@ def single_gpu_test(model,
results = [] results = []
dataset = data_loader.dataset dataset = data_loader.dataset
prog_bar = mmcv.ProgressBar(len(dataset)) prog_bar = mmcv.ProgressBar(len(dataset))
if efficient_test:
mmcv.mkdir_or_exist('.efficient_test')
for i, data in enumerate(data_loader): for i, data in enumerate(data_loader):
with torch.no_grad(): with torch.no_grad():
result = model(return_loss=False, **data) result = model(return_loss=False, **data)
@ -90,11 +91,11 @@ def single_gpu_test(model,
if isinstance(result, list): if isinstance(result, list):
if efficient_test: if efficient_test:
result = [np2tmp(_) for _ in result] result = [np2tmp(_, tmpdir='.efficient_test') for _ in result]
results.extend(result) results.extend(result)
else: else:
if efficient_test: if efficient_test:
result = np2tmp(result) result = np2tmp(result, tmpdir='.efficient_test')
results.append(result) results.append(result)
batch_size = len(result) batch_size = len(result)
@ -120,7 +121,8 @@ def multi_gpu_test(model,
model (nn.Module): Model to be tested. model (nn.Module): Model to be tested.
data_loader (utils.data.Dataloader): Pytorch data loader. data_loader (utils.data.Dataloader): Pytorch data loader.
tmpdir (str): Path of directory to save the temporary results from tmpdir (str): Path of directory to save the temporary results from
different gpus under cpu mode. different gpus under cpu mode. The same path is used for efficient
test.
gpu_collect (bool): Option to use either gpu or cpu to collect results. gpu_collect (bool): Option to use either gpu or cpu to collect results.
efficient_test (bool): Whether save the results as local numpy files to efficient_test (bool): Whether save the results as local numpy files to
save CPU memory during evaluation. Default: False. save CPU memory during evaluation. Default: False.
@ -135,17 +137,19 @@ def multi_gpu_test(model,
rank, world_size = get_dist_info() rank, world_size = get_dist_info()
if rank == 0: if rank == 0:
prog_bar = mmcv.ProgressBar(len(dataset)) prog_bar = mmcv.ProgressBar(len(dataset))
if efficient_test:
mmcv.mkdir_or_exist('.efficient_test')
for i, data in enumerate(data_loader): for i, data in enumerate(data_loader):
with torch.no_grad(): with torch.no_grad():
result = model(return_loss=False, rescale=True, **data) result = model(return_loss=False, rescale=True, **data)
if isinstance(result, list): if isinstance(result, list):
if efficient_test: if efficient_test:
result = [np2tmp(_) for _ in result] result = [np2tmp(_, tmpdir='.efficient_test') for _ in result]
results.extend(result) results.extend(result)
else: else:
if efficient_test: if efficient_test:
result = np2tmp(result) result = np2tmp(result, tmpdir='.efficient_test')
results.append(result) results.append(result)
if rank == 0: if rank == 0:
@ -159,80 +163,3 @@ def multi_gpu_test(model,
else: else:
results = collect_results_cpu(results, len(dataset), tmpdir) results = collect_results_cpu(results, len(dataset), tmpdir)
return results return results
def collect_results_cpu(result_part, size, tmpdir=None):
"""Collect results with CPU."""
rank, world_size = get_dist_info()
# create a tmp dir if it is not specified
if tmpdir is None:
MAX_LEN = 512
# 32 is whitespace
dir_tensor = torch.full((MAX_LEN, ),
32,
dtype=torch.uint8,
device='cuda')
if rank == 0:
tmpdir = tempfile.mkdtemp()
tmpdir = torch.tensor(
bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
dir_tensor[:len(tmpdir)] = tmpdir
dist.broadcast(dir_tensor, 0)
tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
else:
mmcv.mkdir_or_exist(tmpdir)
# dump the part result to the dir
mmcv.dump(result_part, osp.join(tmpdir, 'part_{}.pkl'.format(rank)))
dist.barrier()
# collect all parts
if rank != 0:
return None
else:
# load results of all parts from tmp dir
part_list = []
for i in range(world_size):
part_file = osp.join(tmpdir, 'part_{}.pkl'.format(i))
part_list.append(mmcv.load(part_file))
# sort the results
ordered_results = []
for res in zip(*part_list):
ordered_results.extend(list(res))
# the dataloader may pad some samples
ordered_results = ordered_results[:size]
# remove tmp dir
shutil.rmtree(tmpdir)
return ordered_results
def collect_results_gpu(result_part, size):
"""Collect results with GPU."""
rank, world_size = get_dist_info()
# dump result part to tensor with pickle
part_tensor = torch.tensor(
bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
# gather all result part tensor shape
shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
shape_list = [shape_tensor.clone() for _ in range(world_size)]
dist.all_gather(shape_list, shape_tensor)
# padding result part tensor to max length
shape_max = torch.tensor(shape_list).max()
part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
part_send[:shape_tensor[0]] = part_tensor
part_recv_list = [
part_tensor.new_zeros(shape_max) for _ in range(world_size)
]
# gather all result part
dist.all_gather(part_recv_list, part_send)
if rank == 0:
part_list = []
for recv, shape in zip(part_recv_list, shape_list):
part_list.append(
pickle.loads(recv[:shape[0]].cpu().numpy().tobytes()))
# sort the results
ordered_results = []
for res in zip(*part_list):
ordered_results.extend(list(res))
# the dataloader may pad some samples
ordered_results = ordered_results[:size]
return ordered_results