新增coco-wholebody-hand数据集，新增pck auc epe nme评价指标

Link: https://code.alibaba-inc.com/pai-vision/EasyCV/codereview/9790242
2025-06-03 14:49:00 +08:00 · 2022-08-24 19:19:33 +08:00 · 2022-08-24 19:19:33 +08:00 · 2bf7c9f6ff
commit 2bf7c9f6ff
parent 0f74adb848
18 changed files with 3312 additions and 5 deletions
--- a/configs/pose/hand/litehrnet_30_coco_wholebody_hand_256x256.py
+++ b/configs/pose/hand/litehrnet_30_coco_wholebody_hand_256x256.py
@ -0,0 +1,176 @@
 # oss_io_config = dict(
 #     ak_id='your oss ak id',
 #     ak_secret='your oss ak secret',
 #     hosts='oss-cn-zhangjiakou.aliyuncs.com',  # your oss hosts
 #     buckets=['your_bucket'])  # your oss buckets
 oss_sync_config = dict(other_file_list=['**/events.out.tfevents*', '**/*log*'])
 log_level = 'INFO'
 load_from = None
 resume_from = None
 dist_params = dict(backend='nccl')
 workflow = [('train', 1)]
 checkpoint_config = dict(interval=10)
 optimizer = dict(type='Adam', lr=5e-4)
 optimizer_config = dict(grad_clip=None)
 # learning policy
 lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=0.001,
    step=[170, 200])
 total_epochs = 210
 log_config = dict(
    interval=50,
    hooks=[dict(type='TextLoggerHook'),
           dict(type='TensorboardLoggerHook')])
 channel_cfg = dict(
    num_output_channels=21,
    dataset_joints=21,
    dataset_channel=[
        [
            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
            19, 20
        ],
    ],
    inference_channel=[
        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
        20
    ])
 # model settings
 model = dict(
    type='TopDown',
    pretrained=False,
    backbone=dict(
        type='LiteHRNet',
        in_channels=3,
        extra=dict(
            stem=dict(stem_channels=32, out_channels=32, expand_ratio=1),
            num_stages=3,
            stages_spec=dict(
                num_modules=(3, 8, 3),
                num_branches=(2, 3, 4),
                num_blocks=(2, 2, 2),
                module_type=('LITE', 'LITE', 'LITE'),
                with_fuse=(True, True, True),
                reduce_ratios=(8, 8, 8),
                num_channels=(
                    (40, 80),
                    (40, 80, 160),
                    (40, 80, 160, 320),
                )),
            with_head=True,
        )),
    keypoint_head=dict(
        type='TopdownHeatmapSimpleHead',
        in_channels=40,
        out_channels=channel_cfg['num_output_channels'],
        num_deconv_layers=0,
        extra=dict(final_conv_kernel=1, ),
        loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
    train_cfg=dict(),
    test_cfg=dict(
        flip_test=True,
        post_process='default',
        shift_heatmap=True,
        modulate_kernel=11))
 data_root = 'data/coco'
 data_cfg = dict(
    image_size=[256, 256],
    heatmap_size=[64, 64],
    num_output_channels=channel_cfg['num_output_channels'],
    num_joints=channel_cfg['dataset_joints'],
    dataset_channel=channel_cfg['dataset_channel'],
    inference_channel=channel_cfg['inference_channel'],
 )
 train_pipeline = [
    # dict(type='TopDownGetBboxCenterScale', padding=1.25),
    dict(type='TopDownRandomFlip', flip_prob=0.5),
    dict(
        type='TopDownGetRandomScaleRotation', rot_factor=30,
        scale_factor=0.25),
    dict(type='TopDownAffine'),
    dict(type='MMToTensor'),
    dict(
        type='NormalizeTensor',
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]),
    dict(type='TopDownGenerateTarget', sigma=3),
    dict(
        type='PoseCollect',
        keys=['img', 'target', 'target_weight'],
        meta_keys=[
            'image_file', 'image_id', 'joints_3d', 'joints_3d_visible',
            'center', 'scale', 'rotation', 'flip_pairs'
        ])
 ]
 val_pipeline = [
    dict(type='TopDownAffine'),
    dict(type='MMToTensor'),
    dict(
        type='NormalizeTensor',
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]),
    dict(
        type='PoseCollect',
        keys=['img'],
        meta_keys=[
            'image_file', 'image_id', 'center', 'scale', 'rotation',
            'flip_pairs'
        ])
 ]
 test_pipeline = val_pipeline
 data_source_cfg = dict(type='HandCocoPoseTopDownSource', data_cfg=data_cfg)
 data = dict(
    imgs_per_gpu=32,  # for train
    workers_per_gpu=2,  # for train
    # imgs_per_gpu=1,  # for test
    # workers_per_gpu=1,  # for test
    val_dataloader=dict(samples_per_gpu=32),
    test_dataloader=dict(samples_per_gpu=32),
    train=dict(
        type='HandCocoWholeBodyDataset',
        data_source=dict(
            ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
            img_prefix=f'{data_root}/train2017/',
            **data_source_cfg),
        pipeline=train_pipeline),
    val=dict(
        type='HandCocoWholeBodyDataset',
        data_source=dict(
            ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
            img_prefix=f'{data_root}/val2017/',
            test_mode=True,
            **data_source_cfg),
        pipeline=val_pipeline),
    test=dict(
        type='HandCocoWholeBodyDataset',
        data_source=dict(
            ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
            img_prefix=f'{data_root}/val2017/',
            test_mode=True,
            **data_source_cfg),
        pipeline=val_pipeline),
 )
 eval_config = dict(interval=10, metric='PCK', save_best='PCK')
 evaluator_args = dict(
    metric_names=['PCK', 'AUC', 'EPE', 'NME'], pck_thr=0.2, auc_nor=30)
 eval_pipelines = [
    dict(
        mode='test',
        data=dict(**data['val'], imgs_per_gpu=1),
        evaluators=[dict(type='KeyPointEvaluator', **evaluator_args)])
 ]
 export = dict(use_jit=False)
 checkpoint_sync_export = True
--- a/data/test/pose/hand/small_whole_body_hand_coco/annotations/small_whole_body_hand_coco.json
+++ b/data/test/pose/hand/small_whole_body_hand_coco/annotations/small_whole_body_hand_coco.json
--- a/data/test/pose/hand/small_whole_body_hand_coco/train2017/000000292456.jpg
+++ b/data/test/pose/hand/small_whole_body_hand_coco/train2017/000000292456.jpg
@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:6c8207a06044306b0d271488a22e1a174af5a22e951a710e25a556cf5d212d5c
 size 160632
--- a/data/test/pose/hand/small_whole_body_hand_coco/train2017/000000425226.jpg
+++ b/data/test/pose/hand/small_whole_body_hand_coco/train2017/000000425226.jpg
@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:feadc69a8190787088fda0ac12971d91badc93dbe06057645050fdbec1ce6911
 size 204232
--- a/easycv/core/evaluation/init.py
+++ b/easycv/core/evaluation/init.py
@ -4,8 +4,10 @@ from .base_evaluator import Evaluator
 from .classification_eval import ClsEvaluator
 from .coco_evaluation import CocoDetectionEvaluator, CoCoPoseTopDownEvaluator
 from .faceid_pair_eval import FaceIDPairEvaluator
 from .keypoint_eval import KeyPointEvaluator
 from .mse_eval import MSEEvaluator
 from .retrival_topk_eval import RetrivalTopKEvaluator
 from .segmentation_eval import SegmentationEvaluator
-from .top_down_eval import (keypoint_pck_accuracy, keypoints_from_heatmaps,
+from .top_down_eval import (keypoint_auc, keypoint_epe, keypoint_nme,
                            keypoint_pck_accuracy, keypoints_from_heatmaps,
                            pose_pck_accuracy)
--- a/easycv/core/evaluation/keypoint_eval.py
+++ b/easycv/core/evaluation/keypoint_eval.py
@ -0,0 +1,123 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 # Adapt from
 # https://github.com/open-mmlab/mmpose/blob/master/mmpose/datasets/datasets/base/kpt_2d_sview_rgb_img_top_down_dataset.py
 import numpy as np
 from .base_evaluator import Evaluator
 from .builder import EVALUATORS
 from .metric_registry import METRICS
 from .top_down_eval import (keypoint_auc, keypoint_epe, keypoint_nme,
                            keypoint_pck_accuracy)
@EVALUATORS.register_module
 class KeyPointEvaluator(Evaluator):
    """ KeyPoint evaluator.
    """
    def __init__(self,
                 dataset_name=None,
                 metric_names=['PCK', 'PCKh', 'AUC', 'EPE', 'NME'],
                 pck_thr=0.2,
                 pckh_thr=0.7,
                 auc_nor=30):
        """
        Args:
            dataset_name: eval dataset name
            metric_names: eval metrics name
            pck_thr (float): PCK threshold, default as 0.2.
            pckh_thr (float): PCKh threshold, default as 0.7.
            auc_nor (float): AUC normalization factor, default as 30 pixel.
        """
        super(KeyPointEvaluator, self).__init__(dataset_name, metric_names)
        self._pck_thr = pck_thr
        self._pckh_thr = pckh_thr
        self._auc_nor = auc_nor
        self.dataset_name = dataset_name
        allowed_metrics = ['PCK', 'PCKh', 'AUC', 'EPE', 'NME']
        for metric in metric_names:
            if metric not in allowed_metrics:
                raise KeyError(f'metric {metric} is not supported')
    def _evaluate_impl(self, preds, coco_db, **kwargs):
        ''' keypoint evaluation code which will be run after
        all test batched data are predicted
        Args:
            preds: dict with key ``keypoints`` whose shape is Nx3
            coco_db: the db of wholebody coco datasource, sorted by 'bbox_id'
        Return:
            a dict,  each key is metric_name, value is metric value
        '''
        assert len(preds) == len(coco_db)
        eval_res = {}
        outputs = []
        gts = []
        masks = []
        box_sizes = []
        threshold_bbox = []
        threshold_head_box = []
        for pred, item in zip(preds, coco_db):
            outputs.append(np.array(pred['keypoints'])[:, :-1])
            gts.append(np.array(item['joints_3d'])[:, :-1])
            masks.append((np.array(item['joints_3d_visible'])[:, 0]) > 0)
            if 'PCK' in self.metric_names:
                bbox = np.array(item['bbox'])
                bbox_thr = np.max(bbox[2:])
                threshold_bbox.append(np.array([bbox_thr, bbox_thr]))
            if 'PCKh' in self.metric_names:
                head_box_thr = item['head_size']
                threshold_head_box.append(
                    np.array([head_box_thr, head_box_thr]))
            box_sizes.append(item.get('box_size', 1))
        outputs = np.array(outputs)
        gts = np.array(gts)
        masks = np.array(masks)
        threshold_bbox = np.array(threshold_bbox)
        threshold_head_box = np.array(threshold_head_box)
        box_sizes = np.array(box_sizes).reshape([-1, 1])
        if 'PCK' in self.metric_names:
            _, pck, _ = keypoint_pck_accuracy(outputs, gts, masks,
                                              self._pck_thr, threshold_bbox)
            eval_res['PCK'] = pck
        if 'PCKh' in self.metric_names:
            _, pckh, _ = keypoint_pck_accuracy(outputs, gts, masks,
                                               self._pckh_thr,
                                               threshold_head_box)
            eval_res['PCKh'] = pckh
        if 'AUC' in self.metric_names:
            eval_res['AUC'] = keypoint_auc(outputs, gts, masks, self._auc_nor)
        if 'EPE' in self.metric_names:
            eval_res['EPE'] = keypoint_epe(outputs, gts, masks)
        if 'NME' in self.metric_names:
            normalize_factor = self._get_normalize_factor(
                gts=gts, box_sizes=box_sizes)
            eval_res['NME'] = keypoint_nme(outputs, gts, masks,
                                           normalize_factor)
        return eval_res
    def _get_normalize_factor(self, gts, *args, **kwargs):
        """Get the normalize factor. generally inter-ocular distance measured
        as the Euclidean distance between the outer corners of the eyes is
        used. This function should be overrode, to measure NME.
        Args:
            gts (np.ndarray[N, K, 2]): Groundtruth keypoint location.
        Returns:
            np.ndarray[N, 2]: normalized factor
        """
        return np.ones([gts.shape[0], 2], dtype=np.float32)
 METRICS.register_default_best_metric(KeyPointEvaluator, 'PCK', 'max')
--- a/easycv/core/evaluation/top_down_eval.py
+++ b/easycv/core/evaluation/top_down_eval.py
@ -178,6 +178,86 @@ def keypoint_pck_accuracy(pred, gt, mask, thr, normalize):
    return acc, avg_acc, cnt
 def keypoint_auc(pred, gt, mask, normalize, num_step=20):
    """Calculate the pose accuracy of PCK for each individual keypoint and the
    averaged accuracy across all keypoints for coordinates.
    Note:
        - batch_size: N
        - num_keypoints: K
    Args:
        pred (np.ndarray[N, K, 2]): Predicted keypoint location.
        gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
        mask (np.ndarray[N, K]): Visibility of the target. False for invisible
            joints, and True for visible. Invisible joints will be ignored for
            accuracy calculation.
        normalize (float): Normalization factor.
    Returns:
        float: Area under curve.
    """
    nor = np.tile(np.array([[normalize, normalize]]), (pred.shape[0], 1))
    x = [1.0 * i / num_step for i in range(num_step)]
    y = []
    for thr in x:
        _, avg_acc, _ = keypoint_pck_accuracy(pred, gt, mask, thr, nor)
        y.append(avg_acc)
    auc = 0
    for i in range(num_step):
        auc += 1.0 / num_step * y[i]
    return auc
 def keypoint_nme(pred, gt, mask, normalize_factor):
    """Calculate the normalized mean error (NME).
    Note:
        - batch_size: N
        - num_keypoints: K
    Args:
        pred (np.ndarray[N, K, 2]): Predicted keypoint location.
        gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
        mask (np.ndarray[N, K]): Visibility of the target. False for invisible
            joints, and True for visible. Invisible joints will be ignored for
            accuracy calculation.
        normalize_factor (np.ndarray[N, 2]): Normalization factor.
    Returns:
        float: normalized mean error
    """
    distances = _calc_distances(pred, gt, mask, normalize_factor)
    distance_valid = distances[distances != -1]
    return distance_valid.sum() / max(1, len(distance_valid))
 def keypoint_epe(pred, gt, mask):
    """Calculate the end-point error.
    Note:
        - batch_size: N
        - num_keypoints: K
    Args:
        pred (np.ndarray[N, K, 2]): Predicted keypoint location.
        gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.
        mask (np.ndarray[N, K]): Visibility of the target. False for invisible
            joints, and True for visible. Invisible joints will be ignored for
            accuracy calculation.
    Returns:
        float: Average end-point error.
    """
    distances = _calc_distances(
        pred, gt, mask,
        np.ones((pred.shape[0], pred.shape[2]), dtype=np.float32))
    distance_valid = distances[distances != -1]
    return distance_valid.sum() / max(1, len(distance_valid))
 def _taylor(heatmap, coord):
    """Distribution aware coordinate decoding method.
--- a/easycv/core/post_processing/pose_transforms.py
+++ b/easycv/core/post_processing/pose_transforms.py
@ -83,7 +83,7 @@ def fliplr_regression(regression,
    allowed_center_mode = {'static', 'root'}
    assert center_mode in allowed_center_mode, 'Get invalid center_mode ' \
-        f'{center_mode}, allowed choices are {allowed_center_mode}'
+                                               f'{center_mode}, allowed choices are {allowed_center_mode}'
    if center_mode == 'static':
        x_c = center_x
--- a/easycv/datasets/pose/init.py
+++ b/easycv/datasets/pose/init.py
@ -1,6 +1,7 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from . import data_sources  # pylint: disable=unused-import
 from . import pipelines  # pylint: disable=unused-import
 from .hand_coco_wholebody_dataset import HandCocoWholeBodyDataset
 from .top_down import PoseTopDownDataset
-__all__ = ['PoseTopDownDataset']
+__all__ = ['PoseTopDownDataset', 'HandCocoWholeBodyDataset']
--- a/easycv/datasets/pose/data_sources/init.py
+++ b/easycv/datasets/pose/data_sources/init.py
@ -1,5 +1,8 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from .coco import PoseTopDownSourceCoco
 from .hand import HandCocoPoseTopDownSource
 from .top_down import PoseTopDownSource
-__all__ = ['PoseTopDownSourceCoco', 'PoseTopDownSource']
+__all__ = [
    'PoseTopDownSourceCoco', 'PoseTopDownSource', 'HandCocoPoseTopDownSource'
 ]
--- a/easycv/datasets/pose/data_sources/hand/init.py
+++ b/easycv/datasets/pose/data_sources/hand/init.py
@ -0,0 +1,3 @@
 # !/usr/bin/env python
 # -*- encoding: utf-8 -*-
 from .coco_hand import HandCocoPoseTopDownSource
--- a/easycv/datasets/pose/data_sources/hand/coco_hand.py
+++ b/easycv/datasets/pose/data_sources/hand/coco_hand.py
@ -0,0 +1,276 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 # Adapt from
 # https://github.com/open-mmlab/mmpose/blob/master/mmpose/datasets/datasets/hand/hand_coco_wholebody_dataset.py
 import logging
 import os.path as osp
 import numpy as np
 from easycv.datasets.registry import DATASOURCES
 from ..top_down import PoseTopDownSource
 COCO_WHOLEBODY_HAND_DATASET_INFO = dict(
    dataset_name='coco_wholebody_hand',
    paper_info=dict(
        author='Jin, Sheng and Xu, Lumin and Xu, Jin and '
        'Wang, Can and Liu, Wentao and '
        'Qian, Chen and Ouyang, Wanli and Luo, Ping',
        title='Whole-Body Human Pose Estimation in the Wild',
        container='Proceedings of the European '
        'Conference on Computer Vision (ECCV)',
        year='2020',
        homepage='https://github.com/jin-s13/COCO-WholeBody/',
    ),
    keypoint_info={
        0:
        dict(name='wrist', id=0, color=[255, 255, 255], type='', swap=''),
        1:
        dict(name='thumb1', id=1, color=[255, 128, 0], type='', swap=''),
        2:
        dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''),
        3:
        dict(name='thumb3', id=3, color=[255, 128, 0], type='', swap=''),
        4:
        dict(name='thumb4', id=4, color=[255, 128, 0], type='', swap=''),
        5:
        dict(
            name='forefinger1', id=5, color=[255, 153, 255], type='', swap=''),
        6:
        dict(
            name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''),
        7:
        dict(
            name='forefinger3', id=7, color=[255, 153, 255], type='', swap=''),
        8:
        dict(
            name='forefinger4', id=8, color=[255, 153, 255], type='', swap=''),
        9:
        dict(
            name='middle_finger1',
            id=9,
            color=[102, 178, 255],
            type='',
            swap=''),
        10:
        dict(
            name='middle_finger2',
            id=10,
            color=[102, 178, 255],
            type='',
            swap=''),
        11:
        dict(
            name='middle_finger3',
            id=11,
            color=[102, 178, 255],
            type='',
            swap=''),
        12:
        dict(
            name='middle_finger4',
            id=12,
            color=[102, 178, 255],
            type='',
            swap=''),
        13:
        dict(
            name='ring_finger1', id=13, color=[255, 51, 51], type='', swap=''),
        14:
        dict(
            name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''),
        15:
        dict(
            name='ring_finger3', id=15, color=[255, 51, 51], type='', swap=''),
        16:
        dict(
            name='ring_finger4', id=16, color=[255, 51, 51], type='', swap=''),
        17:
        dict(name='pinky_finger1', id=17, color=[0, 255, 0], type='', swap=''),
        18:
        dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''),
        19:
        dict(name='pinky_finger3', id=19, color=[0, 255, 0], type='', swap=''),
        20:
        dict(name='pinky_finger4', id=20, color=[0, 255, 0], type='', swap='')
    },
    skeleton_info={
        0:
        dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]),
        1:
        dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]),
        2:
        dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]),
        3:
        dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]),
        4:
        dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]),
        5:
        dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]),
        6:
        dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]),
        7:
        dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]),
        8:
        dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]),
        9:
        dict(
            link=('middle_finger1', 'middle_finger2'),
            id=9,
            color=[102, 178, 255]),
        10:
        dict(
            link=('middle_finger2', 'middle_finger3'),
            id=10,
            color=[102, 178, 255]),
        11:
        dict(
            link=('middle_finger3', 'middle_finger4'),
            id=11,
            color=[102, 178, 255]),
        12:
        dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]),
        13:
        dict(
            link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]),
        14:
        dict(
            link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]),
        15:
        dict(
            link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]),
        16:
        dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]),
        17:
        dict(
            link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]),
        18:
        dict(
            link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]),
        19:
        dict(
            link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0])
    },
    joint_weights=[1.] * 21,
    sigmas=[
        0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035, 0.018,
        0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019, 0.022,
        0.031
    ])
@DATASOURCES.register_module()
 class HandCocoPoseTopDownSource(PoseTopDownSource):
    """Coco Whole-Body-Hand Source for top-down hand pose estimation.
        "Whole-Body Human Pose Estimation in the Wild", ECCV'2020.
        More details can be found in the `paper
        <https://arxiv.org/abs/2007.11858>`__ .
        The dataset loads raw features and apply specified transforms
        to return a dict containing the image tensors and other information.
        COCO-WholeBody Hand keypoint indexes::
            0: 'wrist',
            1: 'thumb1',
            2: 'thumb2',
            3: 'thumb3',
            4: 'thumb4',
            5: 'forefinger1',
            6: 'forefinger2',
            7: 'forefinger3',
            8: 'forefinger4',
            9: 'middle_finger1',
            10: 'middle_finger2',
            11: 'middle_finger3',
            12: 'middle_finger4',
            13: 'ring_finger1',
            14: 'ring_finger2',
            15: 'ring_finger3',
            16: 'ring_finger4',
            17: 'pinky_finger1',
            18: 'pinky_finger2',
            19: 'pinky_finger3',
            20: 'pinky_finger4'
        Args:
            ann_file (str): Path to the annotation file.
            img_prefix (str): Path to a directory where images are held.
                Default: None.
            data_cfg (dict): config
            dataset_info (DatasetInfo): A class containing all dataset info.
            test_mode (bool): Store True when building test or
                validation dataset. Default: False.
        """
    def __init__(self,
                 ann_file,
                 img_prefix,
                 data_cfg,
                 dataset_info=None,
                 test_mode=False):
        if dataset_info is None:
            logging.info(
                'dataset_info is missing, use default coco wholebody hand dataset info'
            )
            dataset_info = COCO_WHOLEBODY_HAND_DATASET_INFO
        super().__init__(
            ann_file,
            img_prefix,
            data_cfg,
            dataset_info=dataset_info,
            test_mode=test_mode)
        self.ann_info['use_different_joint_weights'] = False
        self.db = self._get_db()
        print(f'=> num_images: {self.num_images}')
        print(f'=> load {len(self.db)} samples')
    def _get_db(self):
        """Load dataset."""
        gt_db = []
        bbox_id = 0
        num_joints = self.ann_info['num_joints']
        for img_id in self.img_ids:
            ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
            objs = self.coco.loadAnns(ann_ids)
            for obj in objs:
                for type in ['left', 'right']:
                    if obj[f'{type}hand_valid'] and max(
                            obj[f'{type}hand_kpts']) > 0:
                        joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
                        joints_3d_visible = np.zeros((num_joints, 3),
                                                     dtype=np.float32)
                        keypoints = np.array(obj[f'{type}hand_kpts']).reshape(
                            -1, 3)
                        joints_3d[:, :2] = keypoints[:, :2]
                        joints_3d_visible[:, :2] = np.minimum(
                            1, keypoints[:, 2:3])
                        image_file = osp.join(self.img_prefix,
                                              self.id2name[img_id])
                        center, scale = self._xywh2cs(
                            *obj[f'{type}hand_box'][:4])
                        gt_db.append({
                            'image_file': image_file,
                            'image_id': img_id,
                            'rotation': 0,
                            'center': center,
                            'scale': scale,
                            'joints_3d': joints_3d,
                            'joints_3d_visible': joints_3d_visible,
                            'dataset': self.dataset_name,
                            'bbox': obj[f'{type}hand_box'],
                            'bbox_score': 1,
                            'bbox_id': bbox_id
                        })
                        bbox_id = bbox_id + 1
        gt_db = sorted(gt_db, key=lambda x: x['bbox_id'])
        return gt_db
--- a/easycv/datasets/pose/hand_coco_wholebody_dataset.py
+++ b/easycv/datasets/pose/hand_coco_wholebody_dataset.py
@ -0,0 +1,70 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 # Adapt from
 # https://github.com/open-mmlab/mmpose/blob/master/mmpose/datasets/datasets/hand/hand_coco_wholebody_dataset.py
 from easycv.core.evaluation.keypoint_eval import KeyPointEvaluator
 from easycv.datasets.pose.data_sources.coco import PoseTopDownSource
 from easycv.datasets.registry import DATASETS
 from easycv.datasets.shared.base import BaseDataset
@DATASETS.register_module()
 class HandCocoWholeBodyDataset(BaseDataset):
    """CocoWholeBodyDataset for top-down hand pose estimation.
    Args:
        data_source: Data_source config dict
        pipeline: Pipeline config list
        profiling: If set True, will print pipeline time
    """
    def __init__(self, data_source, pipeline, profiling=False):
        super(HandCocoWholeBodyDataset, self).__init__(data_source, pipeline,
                                                       profiling)
        if not isinstance(self.data_source, PoseTopDownSource):
            raise ValueError('Only support `PoseTopDownSource`, but get %s' %
                             self.data_source)
    def evaluate(self, outputs, evaluators, **kwargs):
        if len(evaluators) > 1 or not isinstance(evaluators[0],
                                                 KeyPointEvaluator):
            raise ValueError(
                'HandCocoWholeBodyDataset only support one `KeyPointEvaluator` now, '
                'but get %s' % evaluators)
        evaluator = evaluators[0]
        image_ids = outputs['image_ids']
        preds = outputs['preds']
        boxes = outputs['boxes']
        bbox_ids = outputs['bbox_ids']
        kpts = []
        for i, image_id in enumerate(image_ids):
            kpts.append({
                'keypoints': preds[i],
                'center': boxes[i][0:2],
                'scale': boxes[i][2:4],
                'area': boxes[i][4],
                'score': boxes[i][5],
                'image_id': image_id,
                'bbox_id': bbox_ids[i]
            })
        kpts = self._sort_and_unique_bboxes(kpts)
        eval_res = evaluator.evaluate(kpts, self.data_source.db)
        return eval_res
    def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
        """sort kpts and remove the repeated ones."""
        kpts = sorted(kpts, key=lambda x: x[key])
        num = len(kpts)
        for i in range(num - 1, 0, -1):
            if kpts[i][key] == kpts[i - 1][key]:
                del kpts[i]
        return kpts
    def __getitem__(self, idx):
        """Get the sample given index."""
        results = self.data_source.get_sample(idx)
        return self.pipeline(results)
--- a/easycv/datasets/pose/pipelines/transforms.py
+++ b/easycv/datasets/pose/pipelines/transforms.py
@ -1,9 +1,9 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 # Adapt from https://github.com/open-mmlab/mmpose/blob/master/mmpose/datasets/pipelines/top_down_transform.py
 import cv2
 import numpy as np
 from mmcv.parallel import DataContainer as DC
 from torchvision.transforms import functional as F
 from easycv.core.post_processing import (affine_transform, fliplr_joints,
                                         get_affine_transform, get_warp_matrix,
--- a/tests/core/evaluation/test_keypoint_eval.py
+++ b/tests/core/evaluation/test_keypoint_eval.py
@ -0,0 +1,51 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import unittest
 import numpy as np
 from easycv.core.evaluation import KeyPointEvaluator
 class KeyPointEvaluatorTest(unittest.TestCase):
    def setUp(self):
        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
    def test_keypoint_evaluator_pck(self):
        evaluator = KeyPointEvaluator(pck_thr=0.5, pckh_thr=0.5, auc_nor=30)
        output = np.zeros((5, 3))
        target = np.zeros((5, 3))
        mask = np.zeros((5, 3))
        mask[:, :2] = 1
        # first channel
        output[0] = [10, 0, 0]
        target[0] = [10, 0, 0]
        # second channel
        output[1] = [20, 20, 0]
        target[1] = [10, 10, 0]
        # third channel
        output[2] = [0, 0, 0]
        target[2] = [-1, 0, 0]
        # fourth channel
        output[3] = [30, 30, 0]
        target[3] = [30, 30, 0]
        # fifth channel
        output[4] = [0, 10, 0]
        target[4] = [0, 10, 0]
        preds = {'keypoints': output}
        db = {
            'joints_3d': target,
            'joints_3d_visible': mask,
            'bbox': [10, 10, 10, 10],
            'head_size': 10
        }
        eval_res = evaluator.evaluate([preds, preds], [db, db])
        self.assertAlmostEqual(eval_res['PCK'], 0.8)
        self.assertAlmostEqual(eval_res['PCKh'], 0.8)
        self.assertAlmostEqual(eval_res['EPE'], 3.0284271240234375)
        self.assertAlmostEqual(eval_res['AUC'], 0.86)
        self.assertAlmostEqual(eval_res['NME'], 3.0284271240234375)
 if __name__ == '__main__':
    unittest.main()
--- a/tests/datasets/pose/data_sources/test_coco_hand.py
+++ b/tests/datasets/pose/data_sources/test_coco_hand.py
@ -0,0 +1,59 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import random
 import unittest
 import numpy as np
 from tests.ut_config import SMALL_COCO_WHOLE_BODY_HAND_ROOT
 from easycv.datasets.pose.data_sources import HandCocoPoseTopDownSource
 _DATA_CFG = dict(
    image_size=[256, 256],
    heatmap_size=[64, 64],
    num_output_channels=21,
    num_joints=21,
    dataset_channel=[list(range(21))],
    inference_channel=list(range(21)),
 )
 class HandCocoPoseSourceCocoTest(unittest.TestCase):
    def setUp(self):
        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
    def test_top_down_source_coco(self):
        data_source = HandCocoPoseTopDownSource(
            data_cfg=_DATA_CFG,
            ann_file=
            f'{SMALL_COCO_WHOLE_BODY_HAND_ROOT}/annotations/small_whole_body_hand_coco.json',
            img_prefix=f'{SMALL_COCO_WHOLE_BODY_HAND_ROOT}/train2017/')
        index_list = random.choices(list(range(4)), k=3)
        for idx in index_list:
            data = data_source.get_sample(idx)
            self.assertIn('image_file', data)
            self.assertIn('image_id', data)
            self.assertIn('bbox_score', data)
            self.assertIn('bbox_id', data)
            self.assertIn('image_id', data)
            self.assertEqual(data['center'].shape, (2, ))
            self.assertEqual(data['scale'].shape, (2, ))
            self.assertEqual(len(data['bbox']), 4)
            self.assertEqual(data['joints_3d'].shape, (21, 3))
            self.assertEqual(data['joints_3d_visible'].shape, (21, 3))
            self.assertEqual(data['img'].shape[-1], 3)
            ann_info = data['ann_info']
            self.assertEqual(ann_info['image_size'].all(),
                             np.array([256, 256]).all())
            self.assertEqual(ann_info['heatmap_size'].all(),
                             np.array([64, 64]).all())
            self.assertEqual(ann_info['num_joints'], 21)
            self.assertEqual(len(ann_info['inference_channel']), 21)
            self.assertEqual(ann_info['num_output_channels'], 21)
            break
        self.assertEqual(len(data_source), 4)
 if __name__ == '__main__':
    unittest.main()
--- a/tests/datasets/pose/test_coco_whole_body_hand_dataset.py
+++ b/tests/datasets/pose/test_coco_whole_body_hand_dataset.py
@ -0,0 +1,75 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import unittest
 import torch
 from tests.ut_config import SMALL_COCO_WHOLE_BODY_HAND_ROOT
 from easycv.datasets.pose import HandCocoWholeBodyDataset
 _DATA_CFG = dict(
    image_size=[256, 256],
    heatmap_size=[64, 64],
    num_output_channels=21,
    num_joints=21,
    dataset_channel=[list(range(21))],
    inference_channel=list(range(21)))
 _DATASET_ARGS = [{
    'data_source':
    dict(
        type='HandCocoPoseTopDownSource',
        data_cfg=_DATA_CFG,
        ann_file=
        f'{SMALL_COCO_WHOLE_BODY_HAND_ROOT}/annotations/small_whole_body_hand_coco.json',
        img_prefix=f'{SMALL_COCO_WHOLE_BODY_HAND_ROOT}/train2017/'),
    'pipeline': [
        dict(type='TopDownRandomFlip', flip_prob=0.5),
        dict(type='TopDownAffine'),
        dict(type='MMToTensor'),
        dict(type='TopDownGenerateTarget', sigma=3),
        dict(
            type='PoseCollect',
            keys=['img', 'target', 'target_weight'],
            meta_keys=[
                'image_file', 'joints_3d', 'flip_pairs', 'joints_3d_visible',
                'center', 'scale', 'rotation', 'bbox_score'
            ])
    ]
 }, {}]
 class PoseTopDownDatasetTest(unittest.TestCase):
    def setUp(self):
        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
    @staticmethod
    def build_dataset(index):
        dataset = HandCocoWholeBodyDataset(
            data_source=_DATASET_ARGS[index].get('data_source', None),
            pipeline=_DATASET_ARGS[index].get('pipeline', None))
        return dataset
    def test_0(self, index=0):
        dataset = self.build_dataset(index)
        ann_info = dataset.data_source.ann_info
        self.assertEqual(len(dataset), 4)
        for i, batch in enumerate(dataset):
            self.assertEqual(
                batch['img'].shape,
                torch.Size([3] + list(ann_info['image_size'][::-1])))
            self.assertEqual(batch['target'].shape,
                             (ann_info['num_joints'], ) +
                             tuple(ann_info['heatmap_size'][::-1]))
            self.assertEqual(batch['img_metas'].data['joints_3d'].shape,
                             (ann_info['num_joints'], 3))
            self.assertIn('center', batch['img_metas'].data)
            self.assertIn('scale', batch['img_metas'].data)
            break
 if __name__ == '__main__':
    unittest.main()
--- a/tests/ut_config.py
+++ b/tests/ut_config.py
@ -118,3 +118,4 @@ PRETRAINED_MODEL_SEGFORMER = os.path.join(
 )
 MODEL_CONFIG_SEGFORMER = (
    './configs/segmentation/segformer/segformer_b0_coco.py')
 SMALL_COCO_WHOLE_BODY_HAND_ROOT = 'data/test/pose/hand/small_whole_body_hand_coco'