EasyCV/easycv/utils/test_util.py

# Copyright (c) Alibaba, Inc. and its affiliates.
"""Contains functions which are convenient for unit testing."""
import logging
import os
import shutil
import socket
import subprocess
import uuid
from multiprocessing import Process

import numpy as np
import torch

TEST_DIR = '/tmp/ev_pytorch_test'


def get_tmp_dir():
    if os.environ.get('TEST_DIR', '') != '':
        global TEST_DIR
        TEST_DIR = os.environ['TEST_DIR']
    dir_name = os.path.join(TEST_DIR, uuid.uuid4().hex)
    if os.path.exists(dir_name):
        shutil.rmtree(dir_name)
    os.makedirs(dir_name)
    return dir_name


def clear_all_tmp_dirs():
    shutil.rmtree(TEST_DIR)


def replace_data_for_test(cfg):
    """
  replace real data with test data

  Args:
    cfg: Config object
  """
    pass


# function dectorator to run function in subprocess
# if a function will start a tf session. Because tensorflow
# gpu memory will not be cleared until the process exit
def RunAsSubprocess(f):

    def wrapped_f(*args, **kw):
        p = Process(target=f, args=args, kwargs=kw)
        p.start()
        p.join(timeout=600)
        assert p.exitcode == 0, 'subprocess run failed: %s' % f.__name__

    return wrapped_f


def clean_up(test_dir):
    if test_dir is not None:
        shutil.rmtree(test_dir)


def run_in_subprocess(cmd):
    try:
        with subprocess.Popen(
                cmd, shell=True, stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT) as return_info:
            while True:
                next_line = return_info.stdout.readline()
                return_line = next_line.decode('utf-8', 'ignore').strip()
                if return_line == '' and return_info.poll() != None:
                    break
                if return_line != '':
                    logging.info(return_line)

            return_code = return_info.wait()
            if return_code:
                raise subprocess.CalledProcessError(return_code, return_info)
    except Exception as e:
        raise e


def dist_exec_wrapper(cmd,
                      nproc_per_node,
                      node_rank=0,
                      nnodes=1,
                      port='29527',
                      addr='127.0.0.1',
                      python_path=None):
    """
    donot forget init dist in your function or script of cmd
    ```python
    from mmcv.runner import init_dist
    init_dist(launcher='pytorch')
    ```
    """

    dist_world_size = nproc_per_node * nnodes
    cur_env = os.environ.copy()
    cur_env['WORLD_SIZE'] = str(dist_world_size)
    cur_env['MASTER_ADDR'] = addr

    if is_port_used(port):
        port = str(get_random_port())
        logging.warning('Given port is used, change to port %s' % port)

    cur_env['MASTER_PORT'] = port
    processes = []

    for local_rank in range(0, nproc_per_node):
        # rank of each process
        dist_rank = nproc_per_node * node_rank + local_rank
        cur_env['RANK'] = str(dist_rank)
        cur_env['LOCAL_RANK'] = str(local_rank)
        if python_path:
            cur_env['PYTHONPATH'] = ':'.join(
                (cur_env['PYTHONPATH'], python_path))

        process = subprocess.Popen(cmd, env=cur_env, shell=True)
        processes.append(process)

    for process in processes:
        process.wait()
        if process.returncode != 0:
            raise subprocess.CalledProcessError(
                returncode=process.returncode, cmd=cmd)


def is_port_used(port, host='127.0.0.1'):
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    try:
        s.connect((host, int(port)))
        return True
    except:
        return False
    finally:
        s.close()


def get_random_port():
    while True:
        port = np.random.randint(low=5000, high=10000)
        if is_port_used(port):
            continue
        else:
            break

    logging.info('Random port: %s' % port)

    return port


def pseudo_dist_init():
    os.environ['RANK'] = '0'
    os.environ['WORLD_SIZE'] = '1'
    os.environ['MASTER_ADDR'] = '127.0.0.1'
    os.environ['MASTER_PORT'] = str(get_random_port())
    torch.cuda.set_device(0)
    from torch import distributed as dist
    dist.init_process_group(backend='nccl')