object_localization_network/tests/async_benchmark.py

import asyncio
import os
import shutil
import urllib

import mmcv
import torch

from mmdet.apis import (async_inference_detector, inference_detector,
                        init_detector)
from mmdet.utils.contextmanagers import concurrent
from mmdet.utils.profiling import profile_time


async def main():
    """Benchmark between async and synchronous inference interfaces.

    Sample runs for 20 demo images on K80 GPU, model - mask_rcnn_r50_fpn_1x:

    async       sync

    7981.79 ms  9660.82 ms
    8074.52 ms  9660.94 ms
    7976.44 ms  9406.83 ms

    Async variant takes about 0.83-0.85 of the time of the synchronous
    interface.
    """
    project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))

    config_file = os.path.join(
        project_dir, 'configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py')
    checkpoint_file = os.path.join(
        project_dir,
        'checkpoints/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth')

    if not os.path.exists(checkpoint_file):
        url = ('http://download.openmmlab.com/mmdetection/v2.0'
               '/mask_rcnn/mask_rcnn_r50_fpn_1x_coco'
               '/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth')
        print(f'Downloading {url} ...')
        local_filename, _ = urllib.request.urlretrieve(url)
        os.makedirs(os.path.dirname(checkpoint_file), exist_ok=True)
        shutil.move(local_filename, checkpoint_file)
        print(f'Saved as {checkpoint_file}')
    else:
        print(f'Using existing checkpoint {checkpoint_file}')

    device = 'cuda:0'
    model = init_detector(
        config_file, checkpoint=checkpoint_file, device=device)

    # queue is used for concurrent inference of multiple images
    streamqueue = asyncio.Queue()
    # queue size defines concurrency level
    streamqueue_size = 4

    for _ in range(streamqueue_size):
        streamqueue.put_nowait(torch.cuda.Stream(device=device))

    # test a single image and show the results
    img = mmcv.imread(os.path.join(project_dir, 'demo/demo.jpg'))

    # warmup
    await async_inference_detector(model, img)

    async def detect(img):
        async with concurrent(streamqueue):
            return await async_inference_detector(model, img)

    num_of_images = 20
    with profile_time('benchmark', 'async'):
        tasks = [
            asyncio.create_task(detect(img)) for _ in range(num_of_images)
        ]
        async_results = await asyncio.gather(*tasks)

    with torch.cuda.stream(torch.cuda.default_stream()):
        with profile_time('benchmark', 'sync'):
            sync_results = [
                inference_detector(model, img) for _ in range(num_of_images)
            ]

    result_dir = os.path.join(project_dir, 'demo')
    model.show_result(
        img,
        async_results[0],
        score_thr=0.5,
        show=False,
        out_file=os.path.join(result_dir, 'result_async.jpg'))
    model.show_result(
        img,
        sync_results[0],
        score_thr=0.5,
        show=False,
        out_file=os.path.join(result_dir, 'result_sync.jpg'))


if __name__ == '__main__':
    asyncio.run(main())