deep-person-reid/torchreid/metrics/rank.py

from __future__ import division, print_function, absolute_import
import numpy as np
import warnings
from collections import defaultdict

try:
    from torchreid.metrics.rank_cylib.rank_cy import evaluate_cy
    IS_CYTHON_AVAI = True
except ImportError:
    IS_CYTHON_AVAI = False
    warnings.warn(
        'Cython evaluation (very fast so highly recommended) is '
        'unavailable, now use python evaluation.'
    )


def eval_cuhk03(distmat, q_pids, g_pids, q_camids, g_camids, max_rank):
    """Evaluation with cuhk03 metric
    Key: one image for each gallery identity is randomly sampled for each query identity.
    Random sampling is performed num_repeats times.
    """
    num_repeats = 10
    num_q, num_g = distmat.shape

    if num_g < max_rank:
        max_rank = num_g
        print(
            'Note: number of gallery samples is quite small, got {}'.
            format(num_g)
        )

    indices = np.argsort(distmat, axis=1)
    matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32)

    # compute cmc curve for each query
    all_cmc = []
    all_AP = []
    num_valid_q = 0. # number of valid query

    for q_idx in range(num_q):
        # get query pid and camid
        q_pid = q_pids[q_idx]
        q_camid = q_camids[q_idx]

        # remove gallery samples that have the same pid and camid with query
        order = indices[q_idx]
        remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid)
        keep = np.invert(remove)

        # compute cmc curve
        raw_cmc = matches[q_idx][
            keep] # binary vector, positions with value 1 are correct matches
        if not np.any(raw_cmc):
            # this condition is true when query identity does not appear in gallery
            continue

        kept_g_pids = g_pids[order][keep]
        g_pids_dict = defaultdict(list)
        for idx, pid in enumerate(kept_g_pids):
            g_pids_dict[pid].append(idx)

        cmc = 0.
        for repeat_idx in range(num_repeats):
            mask = np.zeros(len(raw_cmc), dtype=np.bool)
            for _, idxs in g_pids_dict.items():
                # randomly sample one image for each gallery person
                rnd_idx = np.random.choice(idxs)
                mask[rnd_idx] = True
            masked_raw_cmc = raw_cmc[mask]
            _cmc = masked_raw_cmc.cumsum()
            _cmc[_cmc > 1] = 1
            cmc += _cmc[:max_rank].astype(np.float32)

        cmc /= num_repeats
        all_cmc.append(cmc)
        # compute AP
        num_rel = raw_cmc.sum()
        tmp_cmc = raw_cmc.cumsum()
        tmp_cmc = [x / (i+1.) for i, x in enumerate(tmp_cmc)]
        tmp_cmc = np.asarray(tmp_cmc) * raw_cmc
        AP = tmp_cmc.sum() / num_rel
        all_AP.append(AP)
        num_valid_q += 1.

    assert num_valid_q > 0, 'Error: all query identities do not appear in gallery'

    all_cmc = np.asarray(all_cmc).astype(np.float32)
    all_cmc = all_cmc.sum(0) / num_valid_q
    mAP = np.mean(all_AP)

    return all_cmc, mAP


def eval_market1501(distmat, q_pids, g_pids, q_camids, g_camids, max_rank):
    """Evaluation with market1501 metric
    Key: for each query identity, its gallery images from the same camera view are discarded.
    """
    num_q, num_g = distmat.shape

    if num_g < max_rank:
        max_rank = num_g
        print(
            'Note: number of gallery samples is quite small, got {}'.
            format(num_g)
        )

    indices = np.argsort(distmat, axis=1)
    matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32)

    # compute cmc curve for each query
    all_cmc = []
    all_AP = []
    num_valid_q = 0. # number of valid query

    for q_idx in range(num_q):
        # get query pid and camid
        q_pid = q_pids[q_idx]
        q_camid = q_camids[q_idx]

        # remove gallery samples that have the same pid and camid with query
        order = indices[q_idx]
        remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid)
        keep = np.invert(remove)

        # compute cmc curve
        raw_cmc = matches[q_idx][
            keep] # binary vector, positions with value 1 are correct matches
        if not np.any(raw_cmc):
            # this condition is true when query identity does not appear in gallery
            continue

        cmc = raw_cmc.cumsum()
        cmc[cmc > 1] = 1

        all_cmc.append(cmc[:max_rank])
        num_valid_q += 1.

        # compute average precision
        # reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision
        num_rel = raw_cmc.sum()
        tmp_cmc = raw_cmc.cumsum()
        tmp_cmc = [x / (i+1.) for i, x in enumerate(tmp_cmc)]
        tmp_cmc = np.asarray(tmp_cmc) * raw_cmc
        AP = tmp_cmc.sum() / num_rel
        all_AP.append(AP)

    assert num_valid_q > 0, 'Error: all query identities do not appear in gallery'

    all_cmc = np.asarray(all_cmc).astype(np.float32)
    all_cmc = all_cmc.sum(0) / num_valid_q
    mAP = np.mean(all_AP)

    return all_cmc, mAP


def evaluate_py(
    distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_metric_cuhk03
):
    if use_metric_cuhk03:
        return eval_cuhk03(
            distmat, q_pids, g_pids, q_camids, g_camids, max_rank
        )
    else:
        return eval_market1501(
            distmat, q_pids, g_pids, q_camids, g_camids, max_rank
        )


def evaluate_rank(
    distmat,
    q_pids,
    g_pids,
    q_camids,
    g_camids,
    max_rank=50,
    use_metric_cuhk03=False,
    use_cython=True
):
    """Evaluates CMC rank.

    Args:
        distmat (numpy.ndarray): distance matrix of shape (num_query, num_gallery).
        q_pids (numpy.ndarray): 1-D array containing person identities
            of each query instance.
        g_pids (numpy.ndarray): 1-D array containing person identities
            of each gallery instance.
        q_camids (numpy.ndarray): 1-D array containing camera views under
            which each query instance is captured.
        g_camids (numpy.ndarray): 1-D array containing camera views under
            which each gallery instance is captured.
        max_rank (int, optional): maximum CMC rank to be computed. Default is 50.
        use_metric_cuhk03 (bool, optional): use single-gallery-shot setting for cuhk03.
            Default is False. This should be enabled when using cuhk03 classic split.
        use_cython (bool, optional): use cython code for evaluation. Default is True.
            This is highly recommended as the cython code can speed up the cmc computation
            by more than 10x. This requires Cython to be installed.
    """
    if use_cython and IS_CYTHON_AVAI:
        return evaluate_cy(
            distmat, q_pids, g_pids, q_camids, g_camids, max_rank,
            use_metric_cuhk03
        )
    else:
        return evaluate_py(
            distmat, q_pids, g_pids, q_camids, g_camids, max_rank,
            use_metric_cuhk03
        )
linting and formatting code 2019-12-01 02:35:44 +00:00			`from __future__ import division, print_function, absolute_import`
first commit 2018-03-11 21:17:48 +00:00			`import numpy as np`
add warning to non-cython evaluation 2018-11-05 19:24:20 +00:00			`import warnings`
linting and formatting code 2019-12-01 02:35:44 +00:00			`from collections import defaultdict`
update eval 2018-06-04 10:27:07 +01:00
			`try:`
create engine SDK 2019-03-19 17:26:08 +00:00			`from torchreid.metrics.rank_cylib.rank_cy import evaluate_cy`
update cython code: add cuhk03-metric 2018-11-10 21:09:13 +00:00			`IS_CYTHON_AVAI = True`
update eval 2018-06-04 10:27:07 +01:00			`except ImportError:`
update cython code: add cuhk03-metric 2018-11-10 21:09:13 +00:00			`IS_CYTHON_AVAI = False`
create engine SDK 2019-03-19 17:26:08 +00:00			`warnings.warn(`
update dataset and docs 2019-03-21 12:53:21 +00:00			`'Cython evaluation (very fast so highly recommended) is '`
			`'unavailable, now use python evaluation.'`
create engine SDK 2019-03-19 17:26:08 +00:00			`)`
first commit 2018-03-11 21:17:48 +00:00
update model & script 2018-07-02 10:17:14 +01:00
add evaluate_py 2018-11-10 21:25:40 +00:00			`def eval_cuhk03(distmat, q_pids, g_pids, q_camids, g_camids, max_rank):`
add cuhk03-metric 2018-04-23 12:55:47 +01:00			`"""Evaluation with cuhk03 metric`
			`Key: one image for each gallery identity is randomly sampled for each query identity.`
add evaluate_py 2018-11-10 21:25:40 +00:00			`Random sampling is performed num_repeats times.`
add cuhk03-metric 2018-04-23 12:55:47 +01:00			`"""`
add evaluate_py 2018-11-10 21:25:40 +00:00			`num_repeats = 10`
add cuhk03-metric 2018-04-23 12:55:47 +01:00			`num_q, num_g = distmat.shape`
linting and formatting code 2019-12-01 02:35:44 +00:00
add cuhk03-metric 2018-04-23 12:55:47 +01:00			`if num_g < max_rank:`
			`max_rank = num_g`
linting and formatting code 2019-12-01 02:35:44 +00:00			`print(`
			`'Note: number of gallery samples is quite small, got {}'.`
			`format(num_g)`
			`)`

add cuhk03-metric 2018-04-23 12:55:47 +01:00			`indices = np.argsort(distmat, axis=1)`
			`matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32)`

			`# compute cmc curve for each query`
			`all_cmc = []`
			`all_AP = []`
			`num_valid_q = 0. # number of valid query`
linting and formatting code 2019-12-01 02:35:44 +00:00
add cuhk03-metric 2018-04-23 12:55:47 +01:00			`for q_idx in range(num_q):`
			`# get query pid and camid`
			`q_pid = q_pids[q_idx]`
			`q_camid = q_camids[q_idx]`

			`# remove gallery samples that have the same pid and camid with query`
			`order = indices[q_idx]`
			`remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid)`
			`keep = np.invert(remove)`

			`# compute cmc curve`
linting and formatting code 2019-12-01 02:35:44 +00:00			`raw_cmc = matches[q_idx][`
			`keep] # binary vector, positions with value 1 are correct matches`
update cython code: add cuhk03-metric 2018-11-10 21:09:13 +00:00			`if not np.any(raw_cmc):`
add cuhk03-metric 2018-04-23 12:55:47 +01:00			`# this condition is true when query identity does not appear in gallery`
			`continue`

			`kept_g_pids = g_pids[order][keep]`
			`g_pids_dict = defaultdict(list)`
			`for idx, pid in enumerate(kept_g_pids):`
			`g_pids_dict[pid].append(idx)`

compute mAP for cuhk03 old split with multi-gallery-shot 2019-03-07 09:42:06 +00:00			`cmc = 0.`
add evaluate_py 2018-11-10 21:25:40 +00:00			`for repeat_idx in range(num_repeats):`
update cython code: add cuhk03-metric 2018-11-10 21:09:13 +00:00			`mask = np.zeros(len(raw_cmc), dtype=np.bool)`
add cuhk03-metric 2018-04-23 12:55:47 +01:00			`for _, idxs in g_pids_dict.items():`
			`# randomly sample one image for each gallery person`
			`rnd_idx = np.random.choice(idxs)`
			`mask[rnd_idx] = True`
update cython code: add cuhk03-metric 2018-11-10 21:09:13 +00:00			`masked_raw_cmc = raw_cmc[mask]`
			`_cmc = masked_raw_cmc.cumsum()`
add cuhk03-metric 2018-04-23 12:55:47 +01:00			`_cmc[_cmc > 1] = 1`
			`cmc += _cmc[:max_rank].astype(np.float32)`
linting and formatting code 2019-12-01 02:35:44 +00:00
add evaluate_py 2018-11-10 21:25:40 +00:00			`cmc /= num_repeats`
add cuhk03-metric 2018-04-23 12:55:47 +01:00			`all_cmc.append(cmc)`
compute mAP for cuhk03 old split with multi-gallery-shot 2019-03-07 09:42:06 +00:00			`# compute AP`
			`num_rel = raw_cmc.sum()`
			`tmp_cmc = raw_cmc.cumsum()`
			`tmp_cmc = [x / (i+1.) for i, x in enumerate(tmp_cmc)]`
			`tmp_cmc = np.asarray(tmp_cmc) * raw_cmc`
			`AP = tmp_cmc.sum() / num_rel`
add cuhk03-metric 2018-04-23 12:55:47 +01:00			`all_AP.append(AP)`
			`num_valid_q += 1.`

change double quotes to single quote in print 2019-02-20 21:50:47 +00:00			`assert num_valid_q > 0, 'Error: all query identities do not appear in gallery'`
add cuhk03-metric 2018-04-23 12:55:47 +01:00
			`all_cmc = np.asarray(all_cmc).astype(np.float32)`
			`all_cmc = all_cmc.sum(0) / num_valid_q`
			`mAP = np.mean(all_AP)`

			`return all_cmc, mAP`
add cuhk03 metric (incomplete) 2018-04-22 21:18:01 +01:00
update model & script 2018-07-02 10:17:14 +01:00
add cuhk03-metric 2018-04-23 12:55:47 +01:00			`def eval_market1501(distmat, q_pids, g_pids, q_camids, g_camids, max_rank):`
update comments 2018-04-23 10:27:51 +01:00			`"""Evaluation with market1501 metric`
			`Key: for each query identity, its gallery images from the same camera view are discarded.`
			`"""`
update 2018-03-11 21:47:45 +00:00			`num_q, num_g = distmat.shape`
linting and formatting code 2019-12-01 02:35:44 +00:00
update 2018-03-11 21:47:45 +00:00			`if num_g < max_rank:`
			`max_rank = num_g`
linting and formatting code 2019-12-01 02:35:44 +00:00			`print(`
			`'Note: number of gallery samples is quite small, got {}'.`
			`format(num_g)`
			`)`

first commit 2018-03-11 21:17:48 +00:00			`indices = np.argsort(distmat, axis=1)`
			`matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32)`

update 2018-03-11 21:47:45 +00:00			`# compute cmc curve for each query`
first commit 2018-03-11 21:17:48 +00:00			`all_cmc = []`
			`all_AP = []`
add cuhk03 metric (incomplete) 2018-04-22 21:18:01 +01:00			`num_valid_q = 0. # number of valid query`
linting and formatting code 2019-12-01 02:35:44 +00:00
first commit 2018-03-11 21:17:48 +00:00			`for q_idx in range(num_q):`
			`# get query pid and camid`
			`q_pid = q_pids[q_idx]`
			`q_camid = q_camids[q_idx]`

			`# remove gallery samples that have the same pid and camid with query`
			`order = indices[q_idx]`
update 2018-03-11 21:35:49 +00:00			`remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid)`
first commit 2018-03-11 21:17:48 +00:00			`keep = np.invert(remove)`

			`# compute cmc curve`
linting and formatting code 2019-12-01 02:35:44 +00:00			`raw_cmc = matches[q_idx][`
			`keep] # binary vector, positions with value 1 are correct matches`
update cython code: add cuhk03-metric 2018-11-10 21:09:13 +00:00			`if not np.any(raw_cmc):`
first commit 2018-03-11 21:17:48 +00:00			`# this condition is true when query identity does not appear in gallery`
			`continue`

update cython code: add cuhk03-metric 2018-11-10 21:09:13 +00:00			`cmc = raw_cmc.cumsum()`
first commit 2018-03-11 21:17:48 +00:00			`cmc[cmc > 1] = 1`

update 2018-03-11 21:47:45 +00:00			`all_cmc.append(cmc[:max_rank])`
first commit 2018-03-11 21:17:48 +00:00			`num_valid_q += 1.`

debug mAP 2018-03-11 22:08:09 +00:00			`# compute average precision`
			`# reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision`
update cython code: add cuhk03-metric 2018-11-10 21:09:13 +00:00			`num_rel = raw_cmc.sum()`
			`tmp_cmc = raw_cmc.cumsum()`
fixed typo 2018-03-11 21:22:14 +00:00			`tmp_cmc = [x / (i+1.) for i, x in enumerate(tmp_cmc)]`
update cython code: add cuhk03-metric 2018-11-10 21:09:13 +00:00			`tmp_cmc = np.asarray(tmp_cmc) * raw_cmc`
Test results between cython and non-cython code 2018-06-02 18:09:34 +08:00			`AP = tmp_cmc.sum() / num_rel`
first commit 2018-03-11 21:17:48 +00:00			`all_AP.append(AP)`

change double quotes to single quote in print 2019-02-20 21:50:47 +00:00			`assert num_valid_q > 0, 'Error: all query identities do not appear in gallery'`
first commit 2018-03-11 21:17:48 +00:00
			`all_cmc = np.asarray(all_cmc).astype(np.float32)`
			`all_cmc = all_cmc.sum(0) / num_valid_q`
			`mAP = np.mean(all_AP)`

			`return all_cmc, mAP`

update model & script 2018-07-02 10:17:14 +01:00
linting and formatting code 2019-12-01 02:35:44 +00:00			`def evaluate_py(`
			`distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_metric_cuhk03`
			`):`
add evaluate_py 2018-11-10 21:25:40 +00:00			`if use_metric_cuhk03:`
linting and formatting code 2019-12-01 02:35:44 +00:00			`return eval_cuhk03(`
			`distmat, q_pids, g_pids, q_camids, g_camids, max_rank`
			`)`
add evaluate_py 2018-11-10 21:25:40 +00:00			`else:`
linting and formatting code 2019-12-01 02:35:44 +00:00			`return eval_market1501(`
			`distmat, q_pids, g_pids, q_camids, g_camids, max_rank`
			`)`


			`def evaluate_rank(`
			`distmat,`
			`q_pids,`
			`g_pids,`
			`q_camids,`
			`g_camids,`
			`max_rank=50,`
			`use_metric_cuhk03=False,`
			`use_cython=True`
			`):`
add docstrings to losses, metrics, models, utils, optim 2019-03-22 00:14:41 +00:00			`"""Evaluates CMC rank.`

			`Args:`
			`distmat (numpy.ndarray): distance matrix of shape (num_query, num_gallery).`
			`q_pids (numpy.ndarray): 1-D array containing person identities`
			`of each query instance.`
			`g_pids (numpy.ndarray): 1-D array containing person identities`
			`of each gallery instance.`
			`q_camids (numpy.ndarray): 1-D array containing camera views under`
			`which each query instance is captured.`
			`g_camids (numpy.ndarray): 1-D array containing camera views under`
			`which each gallery instance is captured.`
			`max_rank (int, optional): maximum CMC rank to be computed. Default is 50.`
			`use_metric_cuhk03 (bool, optional): use single-gallery-shot setting for cuhk03.`
			`Default is False. This should be enabled when using cuhk03 classic split.`
			`use_cython (bool, optional): use cython code for evaluation. Default is True.`
			`This is highly recommended as the cython code can speed up the cmc computation`
			`by more than 10x. This requires Cython to be installed.`
			`"""`
update cython code: add cuhk03-metric 2018-11-10 21:09:13 +00:00			`if use_cython and IS_CYTHON_AVAI:`
linting and formatting code 2019-12-01 02:35:44 +00:00			`return evaluate_cy(`
			`distmat, q_pids, g_pids, q_camids, g_camids, max_rank,`
			`use_metric_cuhk03`
			`)`
add cuhk03 metric (incomplete) 2018-04-22 21:18:01 +01:00			`else:`
linting and formatting code 2019-12-01 02:35:44 +00:00			`return evaluate_py(`
			`distmat, q_pids, g_pids, q_camids, g_camids, max_rank,`
			`use_metric_cuhk03`
			`)`