feat: add hand keypoints predictor

Link: https://code.alibaba-inc.com/pai-vision/EasyCV/codereview/9935447

    * feat: add hand keypoints predictor
pull/191/head
liangting.zl 2022-08-31 10:25:05 +08:00 committed by jiangnana.jnn
parent 2bf3b55655
commit a5988732cc
9 changed files with 404 additions and 4 deletions

View File

@ -187,3 +187,4 @@ eval_pipelines = [
]
export = dict(use_jit=False)
checkpoint_sync_export = True
predict = dict(type='HandKeypointsPredictor')

View File

@ -0,0 +1,86 @@
model = dict(
type='SingleStageDetector',
backbone=dict(
type='MobileNetV2',
out_indices=(4, 7),
norm_cfg=dict(type='BN', eps=0.001, momentum=0.03),
init_cfg=dict(type='TruncNormal', layer='Conv2d', std=0.03)),
neck=dict(
type='SSDNeck',
in_channels=(96, 1280),
out_channels=(96, 1280, 512, 256, 256, 128),
level_strides=(2, 2, 2, 2),
level_paddings=(1, 1, 1, 1),
l2_norm_scale=None,
use_depthwise=True,
norm_cfg=dict(type='BN', eps=0.001, momentum=0.03),
act_cfg=dict(type='ReLU6'),
init_cfg=dict(type='TruncNormal', layer='Conv2d', std=0.03)),
bbox_head=dict(
type='SSDHead',
in_channels=(96, 1280, 512, 256, 256, 128),
num_classes=1,
use_depthwise=True,
norm_cfg=dict(type='BN', eps=0.001, momentum=0.03),
act_cfg=dict(type='ReLU6'),
init_cfg=dict(type='Normal', layer='Conv2d', std=0.001),
# set anchor size manually instead of using the predefined
# SSD300 setting.
anchor_generator=dict(
type='SSDAnchorGenerator',
scale_major=False,
strides=[16, 32, 64, 107, 160, 320],
ratios=[[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]],
min_sizes=[48, 100, 150, 202, 253, 304],
max_sizes=[100, 150, 202, 253, 304, 320]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[0.1, 0.1, 0.2, 0.2])),
# model training and testing settings
train_cfg=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.,
ignore_iof_thr=-1,
gt_max_assign_all=False),
smoothl1_beta=1.,
allowed_border=-1,
pos_weight=-1,
neg_pos_ratio=3,
debug=False),
test_cfg=dict(
nms_pre=1000,
nms=dict(type='nms', iou_threshold=0.45),
min_bbox_size=0,
score_thr=0.02,
max_per_img=200))
classes = ('hand', )
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
test_pipeline = [
dict(
type='MMMultiScaleFlipAug',
img_scale=(320, 320),
flip=False,
transforms=[
dict(type='MMResize', keep_ratio=False),
dict(type='MMNormalize', **img_norm_cfg),
dict(type='MMPad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
load_from = 'https://download.openmmlab.com/mmpose/mmdet_pretrained/' \
'ssdlite_mobilenetv2_scratch_600e_onehand-4f9f8686_20220523.pth'
mmlab_modules = [
dict(type='mmdet', name='SingleStageDetector', module='model'),
dict(type='mmdet', name='MobileNetV2', module='backbone'),
dict(type='mmdet', name='SSDNeck', module='neck'),
dict(type='mmdet', name='SSDHead', module='head'),
]
predictor = dict(type='DetectionPredictor', score_threshold=0.5)

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c05d58edee7398de37b8e479410676d6b97cfde69cc003e8356a348067e71988
size 7750

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8570f45c7e642288b23a1c8722ba2b9b40939f1d55c962d13c789157b16edf01
size 117072344

View File

@ -1,11 +1,12 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .classifier import TorchClassifier
from .detector import (TorchFaceDetector, TorchYoloXClassifierPredictor,
TorchYoloXPredictor)
from .detector import (DetectionPredictor, TorchFaceDetector,
TorchYoloXClassifierPredictor, TorchYoloXPredictor)
from .face_keypoints_predictor import FaceKeypointsPredictor
from .feature_extractor import (TorchFaceAttrExtractor,
TorchFaceFeatureExtractor,
TorchFeatureExtractor)
from .hand_keypoints_predictor import HandKeypointsPredictor
from .pose_predictor import (TorchPoseTopDownPredictor,
TorchPoseTopDownPredictorWithDetector)
from .segmentation import (Mask2formerPredictor, SegFormerPredictor,

View File

@ -14,6 +14,7 @@ from easycv.models.builder import build_model
from easycv.utils.checkpoint import load_checkpoint
from easycv.utils.config_tools import mmcv_config_fromfile
from easycv.utils.constant import CACHE_DIR
from easycv.utils.mmlab_utils import dynamic_adapt_for_mmlab
from easycv.utils.registry import build_from_cfg
@ -151,7 +152,8 @@ class PredictorV2(object):
def _build_model(self):
if self.cfg is None:
raise ValueError('Please provide "config_file"!')
# Use mmdet model
dynamic_adapt_for_mmlab(self.cfg)
model = build_model(self.cfg.model)
return model

View File

@ -3,7 +3,6 @@ import json
import os
from glob import glob
import cv2
import numpy as np
import torch
from mmcv.ops import RoIPool
@ -22,6 +21,7 @@ from easycv.utils.config_tools import mmcv_config_fromfile
from easycv.utils.constant import CACHE_DIR
from easycv.utils.mmlab_utils import dynamic_adapt_for_mmlab
from easycv.utils.registry import build_from_cfg
from .base import PredictorV2
from .builder import PREDICTORS
from .classifier import TorchClassifier
@ -36,6 +36,45 @@ except Exception:
from easycv.thirdparty.mtcnn import FaceDetector
@PREDICTORS.register_module()
class DetectionPredictor(PredictorV2):
"""Generic Detection Predictor, it will filter bbox results by ``score_threshold`` .
"""
def __init__(self,
model_path=None,
config_file=None,
batch_size=1,
device=None,
save_results=False,
save_path=None,
mode='rgb',
score_threshold=0.5):
super(DetectionPredictor, self).__init__(
model_path,
config_file=config_file,
batch_size=batch_size,
device=device,
save_results=save_results,
save_path=save_path,
mode=mode,
)
self.score_thresh = score_threshold
def postprocess(self, inputs, *args, **kwargs):
for batch_index in range(self.batch_size):
this_detection_scores = inputs['detection_scores'][batch_index]
sel_ids = this_detection_scores > self.score_thresh
inputs['detection_scores'][batch_index] = inputs[
'detection_scores'][batch_index][sel_ids]
inputs['detection_boxes'][batch_index] = inputs['detection_boxes'][
batch_index][sel_ids]
inputs['detection_classes'][batch_index] = inputs[
'detection_classes'][batch_index][sel_ids]
# TODO class label remapping
return inputs
@PREDICTORS.register_module()
class TorchYoloXPredictor(PredictorInterface):

View File

@ -0,0 +1,221 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import cv2
import mmcv
import numpy as np
from easycv.predictors.builder import PREDICTORS, build_predictor
from ..datasets.pose.data_sources.hand.coco_hand import \
COCO_WHOLEBODY_HAND_DATASET_INFO
from ..datasets.pose.data_sources.top_down import DatasetInfo
from .base import PredictorV2
from .pose_predictor import _box2cs
HAND_SKELETON = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7],
[7, 8], [9, 10], [10, 11], [11, 12], [13, 14], [14, 15],
[15, 16], [0, 17], [17, 18], [18, 19], [19, 20], [5, 9],
[9, 13], [13, 17]]
@PREDICTORS.register_module()
class HandKeypointsPredictor(PredictorV2):
"""HandKeypointsPredictor
Attributes:
model_path: path of keypoint model
config_file: path or ``Config`` of config file
detection_model_config: dict of hand detection model predictor config,
example like ``dict(type="", model_path="", config_file="", ......)``
batch_size: batch_size to infer
save_results: bool
save_path: path of result image
"""
def __init__(self,
model_path,
config_file=None,
detection_predictor_config=None,
batch_size=1,
device=None,
save_results=False,
save_path=None,
mode='rgb',
*args,
**kwargs):
super(HandKeypointsPredictor, self).__init__(
model_path,
config_file=config_file,
batch_size=batch_size,
device=device,
save_results=save_results,
save_path=save_path,
mode=mode,
*args,
**kwargs)
self.dataset_info = DatasetInfo(COCO_WHOLEBODY_HAND_DATASET_INFO)
assert detection_predictor_config is not None, f"{self.__class__.__name__} need 'detection_predictor_config' " \
f'property to build hand detection model'
self.detection_predictor = build_predictor(detection_predictor_config)
def _load_input(self, input):
""" load img and convert detection result to topdown style
Args:
input (dict):
{
"inputs": image path,
"results": {
"detection_boxes": B*ndarray(N*4)
"detection_scores": B*ndarray(N,)
"detection_classes": B*ndarray(N,)
}
}
"""
image_paths = input['inputs']
batch_data = []
box_id = 0
for batch_index, image_path in enumerate(image_paths):
det_bbox_result = input['results']['detection_boxes'][batch_index]
det_bbox_scores = input['results']['detection_scores'][batch_index]
img = mmcv.imread(image_path, 'color', self.mode)
for bbox, score in zip(det_bbox_result, det_bbox_scores):
center, scale = _box2cs(self.cfg.data_cfg['image_size'], bbox)
# prepare data
data = {
'image_file':
image_path,
'img':
img,
'image_id':
batch_index,
'center':
center,
'scale':
scale,
'bbox_score':
score,
'bbox_id':
box_id, # need to be assigned if batch_size > 1
'dataset':
'coco_wholebody_hand',
'joints_3d':
np.zeros((self.cfg.data_cfg.num_joints, 3),
dtype=np.float32),
'joints_3d_visible':
np.zeros((self.cfg.data_cfg.num_joints, 3),
dtype=np.float32),
'rotation':
0,
'flip_pairs':
self.dataset_info.flip_pairs,
'ann_info': {
'image_size':
np.array(self.cfg.data_cfg['image_size']),
'num_joints': self.cfg.data_cfg['num_joints']
}
}
batch_data.append(data)
box_id += 1
return batch_data
def preprocess_single(self, input):
results = []
outputs = self._load_input(input)
for output in outputs:
results.append(self.processor(output))
return results
def preprocess(self, inputs, *args, **kwargs):
"""Process all inputs list. And collate to batch and put to target device.
If you need custom ops to load or process a batch samples, you need to reimplement it.
"""
batch_outputs = []
for i in inputs:
for res in self.preprocess_single(i, *args, **kwargs):
batch_outputs.append(res)
batch_outputs = self._collate_fn(batch_outputs)
batch_outputs = self._to_device(batch_outputs)
return batch_outputs
def postprocess(self, inputs, *args, **kwargs):
output = {}
output['keypoints'] = inputs['preds']
output['boxes'] = inputs['boxes']
for i, bbox in enumerate(output['boxes']):
center, scale = bbox[:2], bbox[2:4]
output['boxes'][i][:4] = bbox_cs2xyxy(center, scale)
output['boxes'] = output['boxes'][:, :4]
return output
def __call__(self, inputs, keep_inputs=False):
if isinstance(inputs, str):
inputs = [inputs]
results_list = []
for i in range(0, len(inputs), self.batch_size):
batch = inputs[i:max(len(inputs) - 1, i + self.batch_size)]
# hand det and return source image
det_results = self.detection_predictor(batch, keep_inputs=True)
# hand keypoints
batch_outputs = self.preprocess(det_results)
batch_outputs = self.forward(batch_outputs)
results = self.postprocess(batch_outputs)
if keep_inputs:
results = {'inputs': batch, 'results': results}
# if dump, the outputs will not added to the return value to prevent taking up too much memory
if self.save_results:
self.dump([results], self.save_path, mode='ab+')
else:
results_list.append(results)
return results_list
def show_result(self,
image_path,
keypoints,
boxes=None,
scale=4,
save_path=None):
"""Draw `result` over `img`.
Args:
image_path (str): filepath of img
keypoints (ndarray): N*21*3
"""
point_color = [120, 225, 240]
sk_color = [0, 255, 0]
img = mmcv.imread(image_path)
img = img.copy()
img_h, img_w = img.shape[:2]
for kpts in keypoints:
# point
for kid, (x, y, s) in enumerate(kpts):
cv2.circle(img, (int(x), int(y)), scale, point_color, -1)
# skeleton
for sk_id, sk in enumerate(HAND_SKELETON):
pos1 = (int(kpts[sk[0], 0]), int(kpts[sk[0], 1]))
pos2 = (int(kpts[sk[1], 0]), int(kpts[sk[1], 1]))
if (pos1[0] <= 0 or pos1[0] >= img_w or pos1[1] <= 0
or pos1[1] >= img_h or pos2[0] <= 0 or pos2[0] >= img_w
or pos2[1] <= 0 or pos2[1] >= img_h):
# skip the link that should not be drawn
continue
cv2.line(img, pos1, pos2, sk_color, thickness=1)
if boxes is not None:
bboxes = np.vstack(boxes)
mmcv.imshow_bboxes(
img, bboxes, colors='green', top_k=-1, thickness=2, show=False)
if save_path is not None:
mmcv.imwrite(img, save_path)
return img
def bbox_cs2xyxy(center, scale, padding=1., pixel_std=200.):
wh = scale * 0.8 / padding * pixel_std
xy = center - 0.5 * wh
x1, y1 = xy
w, h = wh
return np.r_[x1, y1, x1 + w, y1 + h]

View File

@ -0,0 +1,44 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import unittest
from easycv.predictors.hand_keypoints_predictor import HandKeypointsPredictor
from easycv.utils.config_tools import mmcv_config_fromfile
MM_DEFAULT_HAND_DETECTION_SSDLITE_MODEL_PATH = 'https://download.openmmlab.com/mmpose/mmdet_pretrained/' \
'ssdlite_mobilenetv2_scratch_600e_onehand-4f9f8686_20220523.pth'
MM_DEFAULT_HAND_DETECTION_SSDLITE_CONFIG_FILE = 'data/test/pose/hand/configs/hand_keypoints_predictor.py'
class HandKeypointsPredictorTest(unittest.TestCase):
def setUp(self):
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
self.image_path = 'data/test/pose/hand/data/hand.jpg'
self.save_image_path = 'data/test/pose/hand/data/hand_result.jpg'
self.model_path = 'data/test/pose/hand/hrnet_w18_256x256.pth'
self.model_config_path = 'configs/pose/hand/hrnet_w18_coco_wholebody_hand_256x256_dark.py'
def test_single(self):
config = mmcv_config_fromfile(self.model_config_path)
predict_pipeline = HandKeypointsPredictor(
model_path=self.model_path,
config_file=config,
detection_predictor_config=dict(
type='DetectionPredictor',
model_path=MM_DEFAULT_HAND_DETECTION_SSDLITE_MODEL_PATH,
config_file=MM_DEFAULT_HAND_DETECTION_SSDLITE_CONFIG_FILE,
score_threshold=0.5))
output = predict_pipeline(self.image_path)[0]
keypoints = output['keypoints']
boxes = output['boxes']
image_show = predict_pipeline.show_result(
self.image_path, keypoints, boxes, save_path=self.save_image_path)
self.assertEqual(keypoints.shape[0], 1)
self.assertEqual(keypoints.shape[1], 21)
self.assertEqual(keypoints.shape[2], 3)
if __name__ == '__main__':
unittest.main()