EasyCV/easycv/toolkit/modelscope/pipelines/face_2d_keypoints_pipeline.py
Cathy0908 5b487e4977
add easycv plugin to modelscope (#303)
* add plugin for modelscope
2023-05-09 11:20:04 +08:00

245 lines
8.0 KiB
Python

# Copyright (c) Alibaba, Inc. and its affiliates.
import copy
import math
from typing import Any
import cv2
import numpy as np
from modelscope.outputs import OutputKeys
from modelscope.pipelines import pipeline
from modelscope.pipelines.builder import PIPELINES
from modelscope.preprocessors import LoadImage
from modelscope.utils.constant import ModelFile, Tasks
from modelscope.utils.logger import get_logger
from easycv.toolkit.modelscope.metainfo import EasyCVPipelines as Pipelines
from .base import EasyCVPipeline
logger = get_logger()
@PIPELINES.register_module(
Tasks.face_2d_keypoints, module_name=Pipelines.face_2d_keypoints)
class Face2DKeypointsPipeline(EasyCVPipeline):
"""Pipeline for face 2d keypoints detection."""
def __init__(self,
model: str,
model_file_pattern=ModelFile.TORCH_MODEL_FILE,
*args,
**kwargs):
"""
model (str): model id on modelscope hub or local model path.
model_file_pattern (str): model file pattern.
"""
super(Face2DKeypointsPipeline, self).__init__(
model=model,
model_file_pattern=model_file_pattern,
*args,
**kwargs)
# face detect pipeline
det_model_id = 'damo/cv_resnet_facedetection_scrfd10gkps'
self.face_detection = pipeline(
Tasks.face_detection, model=det_model_id)
def show_result(self, img, points, scale=2, save_path=None):
return self.predict_op.show_result(img, points, scale, save_path)
def _choose_face(self, det_result, min_face=10):
"""
choose face with maximum area
Args:
det_result: output of face detection pipeline
min_face: minimum size of valid face w/h
"""
bboxes = np.array(det_result[OutputKeys.BOXES])
landmarks = np.array(det_result[OutputKeys.KEYPOINTS])
if bboxes.shape[0] == 0:
logger.warning('No face detected!')
return None
# face idx with enough size
face_idx = []
for i in range(bboxes.shape[0]):
box = bboxes[i]
if (box[2] - box[0]) >= min_face and (box[3] - box[1]) >= min_face:
face_idx += [i]
if len(face_idx) == 0:
logger.warning(
f'Face size not enough, less than {min_face}x{min_face}!')
return None
bboxes = bboxes[face_idx]
landmarks = landmarks[face_idx]
return bboxes, landmarks
def expend_box(self, box, w, h, scalex=0.3, scaley=0.5):
x1 = box[0]
y1 = box[1]
wb = box[2] - x1
hb = box[3] - y1
deltax = int(wb * scalex)
deltay1 = int(hb * scaley)
deltay2 = int(hb * scalex)
x1 = x1 - deltax
y1 = y1 - deltay1
if x1 < 0:
deltax = deltax + x1
x1 = 0
if y1 < 0:
deltay1 = deltay1 + y1
y1 = 0
x2 = x1 + wb + 2 * deltax
y2 = y1 + hb + deltay1 + deltay2
x2 = np.clip(x2, 0, w - 1)
y2 = np.clip(y2, 0, h - 1)
return [x1, y1, x2, y2]
def rotate_point(self, angle, center, landmark):
rad = angle * np.pi / 180.0
alpha = np.cos(rad)
beta = np.sin(rad)
M = np.zeros((2, 3), dtype=np.float32)
M[0, 0] = alpha
M[0, 1] = beta
M[0, 2] = (1 - alpha) * center[0] - beta * center[1]
M[1, 0] = -beta
M[1, 1] = alpha
M[1, 2] = beta * center[0] + (1 - alpha) * center[1]
landmark_ = np.asarray([(M[0, 0] * x + M[0, 1] * y + M[0, 2],
M[1, 0] * x + M[1, 1] * y + M[1, 2])
for (x, y) in landmark])
return M, landmark_
def rotate_crop_img(self, img, pts, M):
imgT = cv2.warpAffine(img, M, (int(img.shape[1]), int(img.shape[0])))
x1 = pts[5][0]
x2 = pts[5][0]
y1 = pts[5][1]
y2 = pts[5][1]
for i in range(0, 9):
x1 = min(x1, pts[i][0])
x2 = max(x2, pts[i][0])
y1 = min(y1, pts[i][1])
y2 = max(y2, pts[i][1])
height, width, _ = imgT.shape
x1 = min(max(0, int(x1)), width)
y1 = min(max(0, int(y1)), height)
x2 = min(max(0, int(x2)), width)
y2 = min(max(0, int(y2)), height)
sub_imgT = imgT[y1:y2, x1:x2]
return sub_imgT, imgT, [x1, y1, x2, y2]
def crop_img(self, imgT, pts):
enlarge_ratio = 1.1
x1 = np.min(pts[:, 0])
x2 = np.max(pts[:, 0])
y1 = np.min(pts[:, 1])
y2 = np.max(pts[:, 1])
w = x2 - x1 + 1
h = y2 - y1 + 1
x1 = int(x1 - (enlarge_ratio - 1.0) / 2.0 * w)
y1 = int(y1 - (enlarge_ratio - 1.0) / 2.0 * h)
x1 = max(0, x1)
y1 = max(0, y1)
new_w = int(enlarge_ratio * w)
new_h = int(enlarge_ratio * h)
new_x1 = x1
new_y1 = y1
new_x2 = new_x1 + new_w
new_y2 = new_y1 + new_h
height, width, _ = imgT.shape
new_x1 = min(max(0, new_x1), width)
new_y1 = min(max(0, new_y1), height)
new_x2 = max(min(width, new_x2), 0)
new_y2 = max(min(height, new_y2), 0)
sub_imgT = imgT[new_y1:new_y2, new_x1:new_x2]
return sub_imgT, [new_x1, new_y1, new_x2, new_y2]
def __call__(self, inputs) -> Any:
img = LoadImage.convert_to_ndarray(inputs)
h, w, c = img.shape
img_rgb = copy.deepcopy(img)
img_rgb = img_rgb[:, :, ::-1]
det_result = self.face_detection(img_rgb)
bboxes = np.array(det_result[OutputKeys.BOXES])
if bboxes.shape[0] == 0:
logger.warning('No face detected!')
results = {
OutputKeys.KEYPOINTS: [],
OutputKeys.POSES: [],
OutputKeys.BOXES: []
}
return results
boxes, keypoints = self._choose_face(det_result)
output_boxes = []
output_keypoints = []
output_poses = []
for index, box_ori in enumerate(boxes):
box = self.expend_box(box_ori, w, h, scalex=0.1, scaley=0.1)
y0 = int(box[1])
y1 = int(box[3])
x0 = int(box[0])
x1 = int(box[2])
sub_img = img[y0:y1, x0:x1]
keypoint = keypoints[index]
pts = [[keypoint[0], keypoint[1]], [keypoint[2], keypoint[3]],
[keypoint[4], keypoint[5]], [keypoint[6], keypoint[7]],
[keypoint[8], keypoint[9]], [box[0], box[1]],
[box[2], box[1]], [box[0], box[3]], [box[2], box[3]]]
# radian
angle = math.atan2((pts[1][1] - pts[0][1]),
(pts[1][0] - pts[0][0]))
# angle
theta = angle * (180 / np.pi)
center = [w // 2, h // 2]
cx, cy = center
M, landmark_ = self.rotate_point(theta, (cx, cy), pts)
sub_imgT, imgT, bbox = self.rotate_crop_img(img, landmark_, M)
outputs = self.predict_op([sub_imgT])[0]
tmp_keypoints = outputs['point']
for idx in range(0, len(tmp_keypoints)):
tmp_keypoints[idx][0] += bbox[0]
tmp_keypoints[idx][1] += bbox[1]
for idx in range(0, 6):
sub_img, bbox = self.crop_img(imgT, tmp_keypoints)
outputs = self.predict_op([sub_img])[0]
tmp_keypoints = outputs['point']
for idx in range(0, len(tmp_keypoints)):
tmp_keypoints[idx][0] += bbox[0]
tmp_keypoints[idx][1] += bbox[1]
M2, tmp_keypoints = self.rotate_point(-theta, (cx, cy),
tmp_keypoints)
output_keypoints.append(np.array(tmp_keypoints))
output_poses.append(np.array(outputs['pose']))
output_boxes.append(np.array(box_ori))
results = {
OutputKeys.KEYPOINTS: output_keypoints,
OutputKeys.POSES: output_poses,
OutputKeys.BOXES: output_boxes
}
return results