add face-2d-keypoints

Link: https://code.alibaba-inc.com/pai-vision/EasyCV/codereview/9809249

    * add face 2d keypoint config
This commit is contained in:
shouzhou.bx 2022-08-25 16:57:37 +08:00 committed by jiangnana.jnn
parent bc64851614
commit 2e8fc44dc1
30 changed files with 1787 additions and 2 deletions

View File

@ -0,0 +1,236 @@
# model settings
POINT_NUMBER = 106
MEAN_FACE = [
0.05486667535113006, 0.24441904048908245, 0.05469932714062696,
0.30396829196709935, 0.05520653400164321, 0.3643191463607746,
0.05865501342257397, 0.42453849020500306, 0.0661603899137523,
0.48531377442945767, 0.07807677169271177, 0.5452126843738523,
0.09333319368757653, 0.6047840615432064, 0.11331425394034209,
0.6631144309665994, 0.13897813867699352, 0.7172296230155276,
0.17125811033538194, 0.767968859462583, 0.20831698519371536,
0.8146603379935117, 0.24944621000897876, 0.857321261721953,
0.2932993820558674, 0.8973900596678597, 0.33843820185594653,
0.9350576242126986, 0.38647802623495553, 0.966902971122812,
0.4411974776504609, 0.9878629960611088, 0.5000390697219397,
0.9934886214875595, 0.5588590024515473, 0.9878510782414189,
0.6135829360035883, 0.9668655595323074, 0.6616294188166414,
0.9350065330378543, 0.7067734980023662, 0.8973410411573094,
0.7506167730772516, 0.8572957679511382, 0.7917579157122047,
0.8146281598803492, 0.8288026446367324, 0.7679019642224981,
0.8610918526053805, 0.7171624168757985, 0.8867491048162915,
0.6630344261248556, 0.9067293813428708, 0.6047095492618413,
0.9219649147678989, 0.5451295187190602, 0.9338619041815587,
0.4852292097262674, 0.9413455695142587, 0.424454780475834,
0.9447753107545577, 0.3642347111991026, 0.9452649776939869,
0.30388458223793025, 0.9450854849661369, 0.24432737691068557,
0.1594802473020129, 0.17495177946520288, 0.2082918411850002,
0.12758378330875153, 0.27675902873293057, 0.11712230823088154,
0.34660582049732336, 0.12782553369032904, 0.4137234315527489,
0.14788458441422778, 0.4123890243720449, 0.18814226684806626,
0.3498927810760776, 0.17640650480816664, 0.28590212091591866,
0.16895271174960227, 0.22193967489846017, 0.16985862149585013,
0.5861805004572298, 0.147863456192582, 0.6532904167464643,
0.12780412047734288, 0.723142364263288, 0.11709102395419578,
0.7916076475508984, 0.12753867695205595, 0.8404440227263494,
0.17488715120168932, 0.7779848023963316, 0.1698261195288917,
0.7140264757991571, 0.1689377237959271, 0.650024882334848,
0.17640581823811927, 0.5875270068157493, 0.18815421057605972,
0.4999687027691624, 0.2770570778583906, 0.49996466107378934,
0.35408433007759227, 0.49996725190415664, 0.43227025345368053,
0.49997367716346774, 0.5099309118810921, 0.443147025685285,
0.2837021691260901, 0.4079306716593004, 0.4729519900478952,
0.3786223176615041, 0.5388017782630576, 0.4166237366074797,
0.5822229552544941, 0.4556754522760756, 0.5887956328134262,
0.49998730493119997, 0.5951855531982454, 0.5443300921009105,
0.5887796732983633, 0.5833722476054509, 0.582200985012979,
0.6213509190608012, 0.5387760772258134, 0.5920137550293199,
0.4729325070035326, 0.5567854054587345, 0.28368589871138317,
0.23395988420439123, 0.275313734012504, 0.27156519109550253,
0.2558735678926061, 0.31487949633428597, 0.2523033259214858,
0.356919009399118, 0.2627342680634766, 0.3866625969903256,
0.2913618036573405, 0.3482919069920915, 0.3009936818974329,
0.3064437008415846, 0.3037349617842158, 0.26724000706363993,
0.2961896087804692, 0.3135744691699477, 0.27611103614975246,
0.6132904312551143, 0.29135144033587107, 0.6430396927648264,
0.2627079452269443, 0.6850713556136455, 0.2522730391144915,
0.728377707003201, 0.25583118190779625, 0.7660035591791254,
0.27526375689471777, 0.7327054300488236, 0.2961495286346863,
0.6935171517115648, 0.3036951925380769, 0.6516533228539426,
0.3009921014909089, 0.6863983789278025, 0.2760904908649394,
0.35811903020866753, 0.7233174007629063, 0.4051199834269763,
0.6931800846807724, 0.4629631471997891, 0.6718031951363689,
0.5000016063148277, 0.6799150331999366, 0.5370506360177653,
0.6717809139952097, 0.5948714927411151, 0.6931581144392573,
0.6418878095835022, 0.7232890570786875, 0.6088129582142587,
0.7713407215524752, 0.5601450388292929, 0.8052499757498277,
0.5000181358125715, 0.8160749831906926, 0.4398905591799545,
0.8052697696938342, 0.39120318265892984, 0.771375905028864,
0.36888771299734613, 0.7241751210643214, 0.4331097084010058,
0.7194543690519717, 0.5000188612450743, 0.7216823277180712,
0.566895861884284, 0.7194302225129479, 0.631122598507516,
0.7241462073974219, 0.5678462302796355, 0.7386355816766528,
0.5000082906571756, 0.7479600838019628, 0.43217532542902076,
0.7386538729390463, 0.31371761254774383, 0.2753328284323114,
0.6862487843823917, 0.2752940437017121
]
IMAGE_SIZE = 96
loss_config = dict(
num_points=POINT_NUMBER,
left_eye_left_corner_index=66,
right_eye_right_corner_index=79,
points_weight=1.0,
contour_weight=1.5,
eyebrow_weight=1.5,
eye_weight=1.7,
nose_weight=1.3,
lip_weight=1.7,
omega=10,
epsilon=2)
model = dict(
type='FaceKeypoint',
backbone=dict(
type='FaceKeypointBackbone',
in_channels=3,
out_channels=48,
residual_activation='relu',
inverted_activation='half_v2',
inverted_expand_ratio=2,
),
keypoint_head=dict(
type='FaceKeypointHead',
in_channels=48,
out_channels=POINT_NUMBER * 2,
input_size=IMAGE_SIZE,
inverted_expand_ratio=2,
inverted_activation='half_v2',
mean_face=MEAN_FACE,
loss_keypoint=dict(type='WingLossWithPose', **loss_config),
),
pose_head=dict(
type='FacePoseHead',
in_channels=48,
out_channels=3,
inverted_expand_ratio=2,
inverted_activation='half_v2',
loss_pose=dict(type='FacePoseLoss', pose_weight=0.01),
),
)
train_pipeline = [
dict(type='FaceKeypointRandomAugmentation', input_size=IMAGE_SIZE),
dict(type='FaceKeypointNorm', input_size=IMAGE_SIZE),
dict(type='MMToTensor'),
dict(
type='NormalizeTensor',
mean=[0.4076, 0.458, 0.485],
std=[1.0, 1.0, 1.0]),
dict(
type='Collect',
keys=[
'img', 'target_point', 'target_point_mask', 'target_pose',
'target_pose_mask'
])
]
val_pipeline = [
dict(type='FaceKeypointNorm', input_size=IMAGE_SIZE),
dict(type='MMToTensor'),
dict(
type='NormalizeTensor',
mean=[0.4076, 0.458, 0.485],
std=[1.0, 1.0, 1.0]),
dict(
type='Collect',
keys=[
'img', 'target_point', 'target_point_mask', 'target_pose',
'target_pose_mask'
])
]
test_pipeline = val_pipeline
data_root = 'path/to/face_landmark_data/'
data_cfg = dict(
data_root=data_root,
input_size=IMAGE_SIZE,
)
data = dict(
imgs_per_gpu=512,
workers_per_gpu=2,
train=dict(
type='FaceKeypointDataset',
data_source=dict(
type='FaceKeypintSource',
train=True,
data_range=[0, 30000], # [0,30000] [0,478857]
data_cfg=data_cfg,
),
pipeline=train_pipeline),
val=dict(
type='FaceKeypointDataset',
data_source=dict(
type='FaceKeypintSource',
train=False,
data_range=[478857, 488857],
# data_range=[478857, 478999], #[478857, 478999] [478857, 488857]
data_cfg=data_cfg,
),
pipeline=val_pipeline),
test=dict(
type='FaceKeypointDataset',
data_source=dict(
type='FaceKeypintSource',
train=False,
data_range=[478857, 488857],
# data_range=[478857, 478999], #[478857, 478999] [478857, 488857]
data_cfg=data_cfg,
),
pipeline=test_pipeline),
)
# runtime setting
optimizer = dict(
type='Adam',
lr=0.005,
)
optimizer_config = dict(grad_clip=None)
lr_config = dict(
policy='CosineAnnealing',
min_lr=0.00001,
warmup='linear',
warmup_iters=10,
warmup_ratio=0.001,
warmup_by_epoch=True,
by_epoch=True)
total_epochs = 1000
checkpoint_config = dict(interval=10)
log_config = dict(
interval=5, hooks=[
dict(type='TextLoggerHook'),
])
log_level = 'INFO'
load_from = None
resume_from = None
dist_params = dict(backend='nccl')
workflow = [('train', 1)]
# disable opencv multithreading to avoid system being overloaded
opencv_num_threads = 0
# set multi-process start method as `fork` to speed up the training
mp_start_method = 'fork'
evaluation = dict(interval=1, metric=['NME'], save_best='NME')
eval_config = dict(interval=1)
evaluator_args = dict(metric_names='ave_nme')
eval_pipelines = [
dict(
mode='test',
data=dict(**data['val'], imgs_per_gpu=1),
evaluators=[dict(type='FaceKeypointEvaluator', **evaluator_args)])
]

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1a45cc56977e709361659d4123739d3647d122a0d80bf7249d0ccdef018f068e
size 112042

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8d516f30a8c1583b45e54d737d2a712ed1c63ef387d579517e1e23e416339ac2
size 94367

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5105c5aa83c59d2a1fdf8dc9ff83a8d84c19a70c7faabcf7f8bce8a913afe4f1
size 3421031

View File

@ -3,6 +3,7 @@ from .auc_eval import AucEvaluator
from .base_evaluator import Evaluator
from .classification_eval import ClsEvaluator
from .coco_evaluation import CocoDetectionEvaluator, CoCoPoseTopDownEvaluator
from .face_eval import FaceKeypointEvaluator
from .faceid_pair_eval import FaceIDPairEvaluator
from .keypoint_eval import KeyPointEvaluator
from .mse_eval import MSEEvaluator

View File

@ -0,0 +1,59 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import torch
from .base_evaluator import Evaluator
from .builder import EVALUATORS
from .metric_registry import METRICS
@EVALUATORS.register_module
class FaceKeypointEvaluator(Evaluator):
def __init__(self, dataset_name=None, metric_names=['ave_nme']):
super(FaceKeypointEvaluator, self).__init__(dataset_name, metric_names)
self.metric = metric_names
self.dataset_name = dataset_name
def _evaluate_impl(self, prediction_dict, groundtruth_dict, **kwargs):
"""
Args:
prediction_dict: model forward output dict, ['point', 'pose']
groundtruth_dict: groundtruth dict, ['target_point', 'target_point_mask', 'target_pose', 'target_pose_mask'] used for compute accuracy
kwargs: other parameters
"""
def evaluate(predicts, gts, **kwargs):
from easycv.models.utils.face_keypoint_utils import get_keypoint_accuracy, get_pose_accuracy
ave_pose_acc = 0
ave_nme = 0
idx = 0
for (predict_point, predict_pose,
gt) in zip(predicts['point'], predicts['pose'], gts):
target_point = gt['target_point']
target_point_mask = gt['target_point_mask']
target_pose = gt['target_pose']
target_pose_mask = gt['target_pose_mask']
target_point = target_point * target_point_mask
target_pose = target_pose * target_pose_mask
keypoint_accuracy = get_keypoint_accuracy(
predict_point, target_point)
pose_accuracy = get_pose_accuracy(predict_pose, target_pose)
ave_pose_acc += pose_accuracy['pose_acc']
ave_nme += keypoint_accuracy['nme']
idx += 1
eval_result = {}
idx += 0.000001
eval_result['ave_pose_acc'] = ave_pose_acc / idx
eval_result['ave_nme'] = ave_nme / idx
return eval_result
return evaluate(prediction_dict, groundtruth_dict)
METRICS.register_default_best_metric(FaceKeypointEvaluator, 'ave_nme', 'min')

View File

@ -1,5 +1,6 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from . import classification, detection, pose, segmentation, selfsup, shared
from . import (classification, detection, face, pose, segmentation, selfsup,
shared)
from .builder import build_dali_dataset, build_dataset
from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
from .registry import DATASETS

View File

@ -0,0 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .data_sources import *
from .face_keypoint_dataset import FaceKeypointDataset
from .pipelines import *

View File

@ -0,0 +1,2 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .face_keypoint_source import FaceKeypintSource

View File

@ -0,0 +1,171 @@
import copy
import json
import logging
import os
import cv2
import numpy as np
import torch
from easycv.datasets.face.pipelines.face_keypoint_transform import (
FaceKeypointNorm, FaceKeypointRandomAugmentation, normal)
from easycv.datasets.registry import DATASOURCES
from easycv.datasets.shared.base import BaseDataset
FACE_KEYPOINT_DATASET_INFO = dict(
real_list_file_dir='real_face_list.txt',
data_info_dir='infos/merge/',
data_image_dir='images/merge/',
data_overlay_dir='images/overlay/',
)
@DATASOURCES.register_module()
class FaceKeypintSource():
"""
load dataset for face key points
"""
def __init__(self,
data_cfg,
data_range,
real_list_path=None,
info_path=None,
image_path=None,
data_overlay_path=None,
dataset_info=None,
**kwargs):
super(FaceKeypintSource, self).__init__()
"""
Args:
data_cfg: Data config dict
data_range: rang of dataset for training or validation
real_list_file_path: path of file contains image list
data_info_dir: annotation file path
data_img_dir: image file path
data_overlay_dir: overlay background image path
dataset_info: A dict containing all dataset info
"""
if dataset_info is None:
logging.info(
'dataset_info is missing, use default face keypoiny dataset info'
)
dataset_info = FACE_KEYPOINT_DATASET_INFO
data_root = data_cfg['data_root']
real_list_file_path = os.path.join(data_root,
dataset_info['real_list_file_dir'])
data_info_dir = os.path.join(data_root, dataset_info['data_info_dir'])
data_img_dir = os.path.join(data_root, dataset_info['data_image_dir'])
data_overlay_dir = os.path.join(data_root,
dataset_info['data_overlay_dir'])
self.input_size = data_cfg['input_size']
data_range = data_range
if real_list_path is not None:
real_list_file_path = real_list_path
if info_path is not None:
data_info_dir = info_path
if image_path is not None:
data_img_dir = image_path
if data_overlay_path is not None:
data_overlay_dir = data_overlay_path
# overlay
self.overlay_image_path = []
for overlay_img_file in sorted(os.listdir(data_overlay_dir)):
overlay_img_filepath = os.path.join(data_overlay_dir,
overlay_img_file)
self.overlay_image_path.append(overlay_img_filepath)
self.points_and_pose_datas = []
with open(real_list_file_path, 'r') as real_list_file:
real_list_lines = real_list_file.readlines()
for index in range(data_range[0], data_range[1]):
idx = int(real_list_lines[index])
img_path = os.path.join(data_img_dir, '{:06d}.png'.format(idx))
if not os.path.exists(img_path):
logging.warning('image %s does not exist' % img_path)
continue
info_path = os.path.join(data_info_dir, '{:06d}.json'.format(idx))
if not os.path.exists(info_path):
logging.warning('annotation %s does not exist' % info_path)
continue
with open(info_path, 'r') as info_file:
info_json = json.load(info_file)
assert info_json['face_count'] == 1
base_info = info_json['face_infos'][0]['base_info']
# points
assert base_info['points_array'] is not None
points = np.asarray(base_info['points_array']).astype(
np.float32)
points_mask = np.abs(points - (-999)) > 0.0001
# pose
pose = {'pitch': -999, 'yaw': -999, 'roll': -999}
if base_info['pitch'] is not None and base_info[
'yaw'] is not None and base_info['roll'] is not None:
pose['pitch'] = base_info['pitch']
pose['yaw'] = base_info['yaw']
# pose["roll"] = base_info["roll"]
# datasets have been preprocessed, roll=0
# add noise to pose
pose['roll'] = normal() * 10.0
pose_mask = np.asarray([
np.abs(pose['pitch'] - (-999)) > 0.0001,
np.abs(pose['roll'] - (-999)) > 0.0001,
np.abs(pose['yaw'] - (-999)) > 0.0001
])
self.points_and_pose_datas.append(
(img_path, points, points_mask, pose, pose_mask))
self.db = []
for img_path, points, points_mask, pose, pose_mask in copy.deepcopy(
self.points_and_pose_datas):
image = cv2.imread(img_path)
points[:,
0] = points[:, 0] / image.shape[1] * float(self.input_size)
points[:,
1] = points[:, 1] / image.shape[0] * float(self.input_size)
target_point = np.reshape(points,
(points.shape[0] * points.shape[1]))
points_mask = points_mask.astype(np.float32)
points_mask = np.reshape(
points_mask, (points_mask.shape[0] * points_mask.shape[1]))
pose = np.asarray([pose['pitch'], pose['roll'], pose['yaw']])
self.db.append({
'img_path':
img_path,
'target_point':
torch.tensor(np.array(target_point, np.float32)),
'target_point_mask':
torch.tensor(points_mask),
'target_pose':
torch.tensor(np.array(pose, np.float32)),
'target_pose_mask':
torch.tensor(pose_mask.astype(np.float32))
})
def __getitem__(self, index):
img_path, points, points_mask, pose, pose_mask = copy.deepcopy(
self.points_and_pose_datas[index])
image = cv2.imread(img_path)
return {
'img': image,
'target_point': points,
'target_point_mask': points_mask,
'target_pose': pose,
'target_pose_mask': pose_mask,
'overlay_image_path': self.overlay_image_path
}
def __len__(self):
return len(self.points_and_pose_datas)

View File

@ -0,0 +1,45 @@
import copy
import json
import logging
import os
import cv2
import numpy as np
import torch
import torch.utils.data as data
from easycv.datasets.face.pipelines.face_keypoint_transform import (
FaceKeypointNorm, FaceKeypointRandomAugmentation, normal)
from easycv.datasets.registry import DATASETS
from easycv.datasets.shared.base import BaseDataset
@DATASETS.register_module()
class FaceKeypointDataset(BaseDataset):
"""
dataset for face key points
"""
def __init__(self, data_source, pipeline, profiling=False):
super(FaceKeypointDataset, self).__init__(data_source, pipeline,
profiling)
"""
Args:
data_source: Data_source config dict
pipeline: Pipeline config list
profiling: If set True, will print pipeline time
"""
def evaluate(self, outputs, evaluators, **kwargs):
eval_result = {}
for evaluator in evaluators:
eval_result.update(
evaluator.evaluate(
prediction_dict=outputs,
groundtruth_dict=self.data_source.db))
return eval_result
def __getitem__(self, idx):
results = self.data_source[idx]
return self.pipeline(results)

View File

@ -0,0 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .face_keypoint_transform import (FaceKeypointNorm,
FaceKeypointRandomAugmentation)
__all__ = ['FaceKeypointRandomAugmentation', 'FaceKeypointNorm']

View File

@ -0,0 +1,431 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import random
import cv2
import imgaug
import imgaug.augmenters as iaa
import numpy as np
from easycv.datasets.registry import PIPELINES
DEST_SIZE = 256
BASE_LANDMARK_NUM = 106
ENLARGE_RATIO = 1.1
CONTOUR_PARTS = [[0, 32], [1, 31], [2, 30], [3, 29], [4, 28], [5, 27], [6, 26],
[7, 25], [8, 24], [9, 23], [10, 22], [11, 21], [12, 20],
[13, 19], [14, 18], [15, 17]]
BROW_PARTS = [[33, 46], [34, 45], [35, 44], [36, 43], [37, 42], [38, 50],
[39, 49], [40, 48], [41, 47]]
EYE_PARTS = [[66, 79], [67, 78], [68, 77], [69, 76], [70, 75], [71, 82],
[72, 81], [73, 80], [74, 83]]
NOSE_PARTS = [[55, 65], [56, 64], [57, 63], [58, 62], [59, 61]]
MOUSE_PARTS = [[84, 90], [85, 89], [86, 88], [96, 100], [97, 99], [103, 101],
[95, 91], [94, 92]]
IRIS_PARTS = [[104, 105]]
MATCHED_PARTS = CONTOUR_PARTS + BROW_PARTS + EYE_PARTS + NOSE_PARTS + MOUSE_PARTS + IRIS_PARTS
def normal():
"""
3-sigma rule
return: (-1, +1)
"""
mu, sigma = 0, 1
while True:
s = np.random.normal(mu, sigma)
if s < mu - 3 * sigma or s > mu + 3 * sigma:
continue
return s / 3 * sigma
def rotate(angle, center, landmark):
rad = angle * np.pi / 180.0
alpha = np.cos(rad)
beta = np.sin(rad)
M = np.zeros((2, 3), dtype=np.float32)
M[0, 0] = alpha
M[0, 1] = beta
M[0, 2] = (1 - alpha) * center[0] - beta * center[1]
M[1, 0] = -beta
M[1, 1] = alpha
M[1, 2] = beta * center[0] + (1 - alpha) * center[1]
landmark_ = np.asarray([(M[0, 0] * x + M[0, 1] * y + M[0, 2],
M[1, 0] * x + M[1, 1] * y + M[1, 2])
for (x, y) in landmark])
return M, landmark_
class OverLayGenerator:
def __init__(self, shape):
# 4x4
h_seg_len = shape[0] // 4
w_seg_len = shape[1] // 4
self.overlay = []
# 2x2 overlay
for i in range(3):
for j in range(3):
if i == 1 and j == 1:
continue
self.overlay.append((i * w_seg_len, j * h_seg_len,
2 * w_seg_len, 2 * h_seg_len))
# 2x3 overlay
for i in range(3):
for j in range(2):
if i == 1:
continue
self.overlay.append((i * w_seg_len, j * h_seg_len,
2 * w_seg_len, 3 * h_seg_len))
for i in range(2):
for j in range(3):
if j == 1:
continue
self.overlay.append((i * w_seg_len, j * h_seg_len,
3 * w_seg_len, 2 * h_seg_len))
# 2x4 overlay
for i in range(3):
for j in range(1):
if i == 1:
continue
self.overlay.append((i * w_seg_len, j * h_seg_len,
2 * w_seg_len, 4 * h_seg_len))
for i in range(1):
for j in range(3):
if j == 1:
continue
self.overlay.append((i * w_seg_len, j * h_seg_len,
4 * w_seg_len, 2 * h_seg_len))
class FaceKeypointsDataAugumentation:
def __init__(self, input_size):
# option
self.enable_flip = True
self.enable_rotate = True
self.input_size = input_size
# mask generator
coarse_salt_and_pepper_iaa = iaa.CoarseSaltAndPepper(
(0.25, 0.35), size_percent=(0.03125, 0.015625))
self.mask_generator = coarse_salt_and_pepper_iaa.mask
# overlay generator
self.overlay_generator = OverLayGenerator(shape=(256, 256))
# flip
self.mirror_map = FaceKeypointsDataAugumentation.compute_mirror_map()
@staticmethod
def compute_mirror_map():
mirror_map = np.array(range(0, BASE_LANDMARK_NUM), np.int32)
for x, y in MATCHED_PARTS:
mirror_map[x] = y
mirror_map[y] = x
return mirror_map
def aug_flip(self, img, pts, visibility, pose):
# pts[:, 0] = self.input_size - pts[:, 0]
pts[:, 0] = img.shape[1] - pts[:, 0]
pts = pts[self.mirror_map]
if visibility is not None:
visibility = visibility[self.mirror_map]
img = cv2.flip(img, 1)
if pose is not None:
# fix roll&yaw in pose
pose['roll'] = -pose['roll']
pose['yaw'] = -pose['yaw']
return img, pts, visibility, pose
def aug_rotate(self, img, pts, pose, angle):
center = [DEST_SIZE // 2, DEST_SIZE // 2]
if pose is not None:
# fix roll in pose
pose['roll'] += angle
cx, cy = center
M, pts = rotate(angle, (cx, cy), pts)
imgT = cv2.warpAffine(img, M, (int(img.shape[1]), int(img.shape[0])))
x1 = np.min(pts[:, 0])
x2 = np.max(pts[:, 0])
y1 = np.min(pts[:, 1])
y2 = np.max(pts[:, 1])
w = x2 - x1 + 1
h = y2 - y1 + 1
x1 = int(x1 - (ENLARGE_RATIO - 1.0) / 2.0 * w)
y1 = int(y1 - (ENLARGE_RATIO - 1.0) * h)
new_w = int(ENLARGE_RATIO * (1 + normal() * 0.25) * w)
new_h = int(ENLARGE_RATIO * (1 + normal() * 0.25) * h)
new_x1 = x1 + int(normal() * DEST_SIZE * 0.15)
new_y1 = y1 + int(normal() * DEST_SIZE * 0.15)
new_x2 = new_x1 + new_w
new_y2 = new_y1 + new_h
new_xy = new_x1, new_y1
pts = pts - new_xy
height, width, _ = imgT.shape
dx = max(0, -new_x1)
dy = max(0, -new_y1)
new_x1 = max(0, new_x1)
new_y1 = max(0, new_y1)
edx = max(0, new_x2 - width)
edy = max(0, new_y2 - height)
new_x2 = min(width, new_x2)
new_y2 = min(height, new_y2)
imgT = imgT[new_y1:new_y2, new_x1:new_x2]
if dx > 0 or dy > 0 or edx > 0 or edy > 0:
imgT = cv2.copyMakeBorder(
imgT,
dy,
edy,
dx,
edx,
cv2.BORDER_CONSTANT,
value=(103.94, 116.78, 123.68))
return imgT, pts, pose
def random_mask(self, img):
mask = self.mask_generator.draw_samples(size=img.shape)
mask = np.expand_dims(np.sum(mask, axis=-1) > 0, axis=-1)
return mask
def random_overlay(self):
index = np.random.choice(len(self.overlay_generator.overlay))
overlay = self.overlay_generator.overlay[index]
return overlay
def augment_blur(self, img):
h, w = img.shape[:2]
assert h == w
ssize = int(random.uniform(0.01, 0.5) * h)
aug_seq = iaa.Sequential([
iaa.Sometimes(
1.0,
iaa.OneOf([
iaa.GaussianBlur((3, 15)),
iaa.AverageBlur(k=(3, 15)),
iaa.MedianBlur(k=(3, 15)),
iaa.MotionBlur((5, 25))
])),
iaa.Resize(ssize, interpolation=imgaug.ALL),
iaa.Sometimes(
0.6,
iaa.OneOf([
iaa.AdditiveGaussianNoise(
loc=0, scale=(0.0, 0.1 * 255), per_channel=0.5),
iaa.AdditiveLaplaceNoise(
loc=0, scale=(0.0, 0.1 * 255), per_channel=0.5),
iaa.AdditivePoissonNoise(lam=(0, 30), per_channel=0.5)
])),
iaa.Sometimes(0.8, iaa.JpegCompression(compression=(40, 90))),
iaa.Resize(h),
])
aug_img = aug_seq.augment_image(img)
return aug_img
def augment_color_temperature(self, img):
aug = iaa.ChangeColorTemperature((1000, 40000))
aug_img = aug.augment_image(img)
return aug_img
def aug_clr_noise_blur(self, img):
# skin&light
if np.random.choice((True, False), p=[0.05, 0.95]):
img_ycrcb_raw = cv2.cvtColor(img, cv2.COLOR_BGR2YCR_CB)
skin_factor_list = [0.6, 0.8, 1.0, 1.2, 1.4]
skin_factor = np.random.choice(skin_factor_list)
img_ycrcb_raw[:, :, 0:1] = np.clip(
img_ycrcb_raw[:, :, 0:1].astype(np.float) * skin_factor, 0,
255).astype(np.uint8)
img = cv2.cvtColor(img_ycrcb_raw, cv2.COLOR_YCR_CB2BGR)
# gauss blur 5%
if np.random.choice((True, False), p=[0.05, 0.95]):
sigma = np.random.choice([0.25, 0.50, 0.75])
gauss_blur_iaa = iaa.GaussianBlur(sigma=sigma)
img = gauss_blur_iaa(image=img)
# gauss noise 5%
if np.random.choice((True, False), p=[0.05, 0.95]):
scale = np.random.choice([0.01, 0.03, 0.05])
gauss_noise_iaa = iaa.AdditiveGaussianNoise(scale=scale * 255)
img = gauss_noise_iaa(image=img)
# motion blur 5%
if np.random.choice((True, False), p=[0.05, 0.95]):
angle = np.random.choice([0, 45, 90, 135, 180, 225, 270, 315])
motion_blur_iaa = iaa.MotionBlur(k=5, angle=angle)
img = motion_blur_iaa(image=img)
# jpeg compress 5%
if np.random.choice((True, False), p=[0.05, 0.95]):
jpeg_compress_iaa = iaa.JpegCompression(compression=(10, 50))
img = jpeg_compress_iaa(image=img)
# gamma contrast 5%
if np.random.choice((True, False), p=[0.05, 0.95]):
gamma_contrast_iaa = iaa.GammaContrast((0.85, 1.15))
img = gamma_contrast_iaa(image=img)
# brightness 5%
if np.random.choice((True, False), p=[0.05, 0.95]):
brightness_iaa = iaa.MultiplyAndAddToBrightness(
mul=(0.85, 1.15), add=(-10, 10))
img = brightness_iaa(image=img)
return img
def augment_set(self, img):
noisy_image = img.copy().astype(np.uint8)
if np.random.choice((True, False), p=[0.6, 0.4]):
aug = iaa.ChangeColorTemperature((1000, 40000))
noisy_image = aug.augment_image(noisy_image)
if np.random.choice((True, False), p=[0.8, 0.2]):
aug_seq = iaa.Sequential([
iaa.Sometimes(0.5, iaa.JpegCompression(compression=(40, 90))),
iaa.Sometimes(0.5, iaa.MotionBlur((3, 7))),
iaa.Sometimes(
0.5,
iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05 * 255))),
],
random_order=True)
noisy_image = aug_seq.augment_image(noisy_image)
sometimes = lambda aug: iaa.Sometimes(0.25, aug)
seq = iaa.Sequential([
sometimes(iaa.AverageBlur(k=(2, 5))),
sometimes(iaa.GammaContrast((0.5, 2.0)))
],
random_order=True)
noisy_image = seq(images=noisy_image)
return noisy_image
@PIPELINES.register_module()
class FaceKeypointNorm:
"""Data augmentation with Norm.
"""
def __init__(self, input_size=96):
self.input_size = input_size
def __call__(self, results):
"""Perform data augmentation with random image flip."""
# for key in results.get('img', []):
if 'img' in results.keys():
image = results['img']
image = cv2.resize(image, (self.input_size, self.input_size))
results['img'] = np.array(image)
# for key in results.get('target_point', []):
if 'target_point' in results.keys():
points = results['target_point']
points[:, 0] = points[:, 0] / image.shape[1] * float(
self.input_size)
points[:, 1] = points[:, 1] / image.shape[0] * float(
self.input_size)
target_point = np.reshape(points,
(points.shape[0] * points.shape[1]))
results['target_point'] = np.array(target_point, np.float32)
else:
results['target_point'] = np.array(np.zeros(212), np.float32)
# for key in results.get('target_point_mask', []):
if 'target_point_mask' in results.keys():
points_mask = results['target_point_mask']
points_mask = points_mask.astype(np.float32)
points_mask = np.reshape(
points_mask, (points_mask.shape[0] * points_mask.shape[1]))
results['target_point_mask'] = points_mask.astype(np.float32)
else:
results['target_point_mask'] = np.array(
np.zeros(212), np.float32)
# for key in results.get('target_pose', []):
if 'target_pose' in results.keys():
pose = results['target_pose']
pose = np.asarray([pose['pitch'], pose['roll'], pose['yaw']])
results['target_pose'] = pose.astype(np.float32)
else:
results['target_pose'] = np.array(np.zeros(3), np.float32)
if 'target_pose_mask' not in results.keys():
results['target_pose_mask'] = np.array(np.zeros(3), np.float32)
return results
@PIPELINES.register_module()
class FaceKeypointRandomAugmentation:
"""Data augmentation with random flip.
"""
def __init__(self, input_size=96):
self.input_size = input_size
# Data Augment
self.data_aug = FaceKeypointsDataAugumentation(self.input_size)
def __call__(self, results):
"""Perform data augmentation with random image flip."""
image = results['img']
points = results['target_point']
points_mask = results['target_point_mask']
pose = results['target_pose']
pose_mask = results['target_pose_mask']
overlay_image_path = results['overlay_image_path']
if np.random.choice((True, False), p=[0.2, 0.8]):
# overlay
overlay_pos = self.data_aug.random_overlay()
overlay_img_index = np.random.choice(len(overlay_image_path))
overlay_img_filepath = overlay_image_path[overlay_img_index]
overlay_img = cv2.imread(overlay_img_filepath,
cv2.IMREAD_UNCHANGED)
(x, y, w, h) = overlay_pos
x1, y1, x2, y2 = x, y, x + w, y + h
overlay_img = cv2.resize(overlay_img, dsize=(w, h))
overlay_mask = overlay_img[:, :, 3:4] / 255.0
image[y1:y2, x1:x2, :] = image[y1:y2, x1:x2, :] * (
1 - overlay_mask) + overlay_img[:, :, 0:3] * overlay_mask
image = image.astype(np.uint8)
angle = pose['roll']
image, points, pose = self.data_aug.aug_rotate(
image, points, pose, angle) # counterclockwise rotate angle
pose['roll'] = angle # reset roll=angle
if np.random.choice((True, False)):
image_transform, points, _, pose = self.data_aug.aug_flip(
image, points, None, pose)
else:
image_transform = image
image_transform = self.data_aug.aug_clr_noise_blur(image_transform)
results['img'] = image_transform
results['target_point'] = points
results['target_pose'] = pose
return results

View File

@ -3,6 +3,7 @@ from .backbones import * # noqa: F401,F403
from .builder import build_backbone, build_head, build_loss, build_model
from .classification import *
from .detection import *
from .face import *
from .heads import *
from .loss import *
from .pose import TopDown

View File

@ -4,6 +4,7 @@ from .bninception import BNInception
from .conv_mae_vit import FastConvMAEViT
from .conv_vitdet import ConvViTDet
from .efficientformer import EfficientFormer
from .face_keypoint_backbone import FaceKeypointBackbone
from .genet import PlainNet
from .hrnet import HRNet
from .inceptionv3 import Inception3

View File

@ -0,0 +1,90 @@
import torch.nn as nn
from easycv.models.registry import BACKBONES
from easycv.models.utils.face_keypoint_utils import InvertedResidual, Residual
@BACKBONES.register_module
class FaceKeypointBackbone(nn.Module):
def __init__(self,
in_channels=3,
out_channels=48,
residual_activation='relu',
inverted_activation='half_v2',
inverted_expand_ratio=2):
super(FaceKeypointBackbone, self).__init__()
self.conv1 = Residual(in_channels, 12, 3, 2, 0)
self.conv2 = Residual(12, 12, 3, 1, 0, activation=residual_activation)
self.conv3 = Residual(12, 12, 3, 1, 1, activation=residual_activation)
self.conv4 = Residual(12, 12, 3, 1, 0, activation=residual_activation)
self.conv5 = Residual(12, 24, 3, 2, 0, activation=residual_activation)
self.conv6 = Residual(24, 24, 3, 1, 0, activation=residual_activation)
self.conv7 = Residual(24, 24, 3, 1, 1, activation=residual_activation)
self.conv8 = Residual(24, 24, 3, 1, 1, activation=residual_activation)
self.conv9 = InvertedResidual(
24,
48,
3,
2,
0,
expand_ratio=inverted_expand_ratio,
activation=inverted_activation)
self.conv10 = InvertedResidual(
48,
48,
3,
1,
0,
expand_ratio=inverted_expand_ratio,
activation=inverted_activation)
self.conv11 = InvertedResidual(
48,
48,
3,
1,
1,
expand_ratio=inverted_expand_ratio,
activation=inverted_activation)
self.conv12 = InvertedResidual(
48,
48,
3,
1,
1,
expand_ratio=inverted_expand_ratio,
activation=inverted_activation)
self.conv13 = InvertedResidual(
48,
48,
3,
1,
1,
expand_ratio=inverted_expand_ratio,
activation=inverted_activation)
self.conv14 = InvertedResidual(
48,
out_channels,
3,
2,
0,
expand_ratio=inverted_expand_ratio,
activation=inverted_activation)
def forward(self, x):
x1 = self.conv1(x)
x2 = self.conv2(x1)
x3 = self.conv3(x2)
x4 = self.conv4(x3)
x5 = self.conv5(x4)
x6 = self.conv6(x5)
x7 = self.conv7(x6)
x8 = self.conv8(x7)
x9 = self.conv9(x8)
x10 = self.conv10(x9)
x11 = self.conv11(x10)
x12 = self.conv12(x11)
x13 = self.conv13(x12)
x14 = self.conv14(x13)
return x14

View File

@ -0,0 +1,2 @@
from .face_keypoint import FaceKeypoint
from .head import *

View File

@ -0,0 +1,103 @@
import mmcv
import numpy as np
from easycv.models import builder
from easycv.models.base import BaseModel
from easycv.models.builder import MODELS
from easycv.models.utils.face_keypoint_utils import (get_keypoint_accuracy,
get_pose_accuracy)
@MODELS.register_module()
class FaceKeypoint(BaseModel):
def __init__(self,
backbone,
neck=None,
keypoint_head=None,
pose_head=None,
pretrained=None,
loss_keypoint=None,
loss_pose=None):
super().__init__()
self.pretrained = pretrained
self.backbone = builder.build_backbone(backbone)
if neck is not None:
self.neck = builder.build_neck(neck)
if keypoint_head is not None:
if 'loss_keypoint' not in keypoint_head and loss_keypoint is not None:
keypoint_head['loss_keypoint'] = loss_keypoint
self.keypoint_head = builder.build_head(keypoint_head)
if pose_head is not None:
if 'loss_pose' not in pose_head and loss_pose is not None:
pose_head['loss_pose'] = loss_pose
self.pose_head = builder.build_head(pose_head)
@property
def with_neck(self):
"""Check if has keypoint_head."""
return hasattr(self, 'neck')
@property
def with_keypoint(self):
"""Check if has keypoint_head."""
return hasattr(self, 'keypoint_head')
@property
def with_pose(self):
"""Check if has pose_head."""
return hasattr(self, 'pose_head')
def forward_train(self, img, target_point, target_point_mask, target_pose,
target_pose_mask, **kwargs):
"""Defines the computation performed at every call when training."""
output = self.backbone(img)
if self.with_neck:
output = self.neck(output)
if self.with_keypoint:
output_points = self.keypoint_head(output)
if self.with_pose:
output_pose = self.pose_head(output)
target_point = target_point * target_point_mask
target_pose = target_pose * target_pose_mask
losses = dict()
if self.with_keypoint:
keypoint_losses = self.keypoint_head.get_loss(
output_points, target_point, target_point_mask, target_pose)
losses.update(keypoint_losses)
keypoint_accuracy = get_keypoint_accuracy(output_points,
target_point)
losses.update(keypoint_accuracy)
if self.with_pose:
output_pose = output_pose * 180.0 / np.pi
output_pose = output_pose * target_pose_mask
pose_losses = self.pose_head.get_loss(output_pose, target_pose)
losses.update(pose_losses)
pose_accuracy = get_pose_accuracy(output_pose, target_pose)
losses.update(pose_accuracy)
return losses
def forward_test(self, img, **kwargs):
"""Defines the computation performed at every call when testing."""
output = self.backbone(img)
if self.with_neck:
output = self.neck(output)
if self.with_keypoint:
output_points = self.keypoint_head(output)
if self.with_pose:
output_pose = self.pose_head(output)
ret = {}
ret['point'] = output_points
ret['pose'] = output_pose
return ret

View File

@ -0,0 +1,2 @@
from .face_keypoint_head import FaceKeypointHead
from .face_keypoint_pose_head import FacePoseHead

View File

@ -0,0 +1,68 @@
import copy
import numpy as np
import torch
import torch.nn as nn
from easycv.models.builder import HEADS, build_loss
from easycv.models.utils.face_keypoint_utils import (InvertedResidual, View,
conv_bn, conv_no_relu,
get_keypoint_accuracy)
@HEADS.register_module
class FaceKeypointHead(nn.Module):
def __init__(
self,
mean_face,
loss_keypoint,
in_channels=48,
out_channels=212,
input_size=96,
inverted_expand_ratio=2,
inverted_activation='half_v2',
):
super(FaceKeypointHead, self).__init__()
self.input_size = input_size
self.face_mean_shape = copy.deepcopy(np.asarray(mean_face))
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
self.branches = []
self.loss = build_loss(loss_keypoint)
# points
self.branches.append(
nn.Sequential(
InvertedResidual(
in_channels,
96,
3,
1,
1,
expand_ratio=inverted_expand_ratio,
activation=inverted_activation),
View((-1, 96 * 3 * 3, 1, 1)), conv_bn(96 * 3 * 3, 128, 1, 1,
0),
conv_bn(128, 128, 1, 1, 0),
conv_no_relu(128, out_channels, 1, 1, 0),
View((-1, out_channels))))
self.branches = nn.ModuleList(self.branches)
def get_loss(self, output, target_point, target_point_mask, target_pose):
losses = dict()
loss = self.loss(output * target_point_mask, target_point, target_pose)
losses['point_loss'] = loss
return losses
def get_accuracy(self, output, target_point):
return get_keypoint_accuracy(output, target_point)
def forward(self, x):
point = self.branches[0](x)
point = point * 0.5 + torch.from_numpy(self.face_mean_shape).to(
self.device)
point = point * self.input_size
return point

View File

@ -0,0 +1,55 @@
import numpy as np
import torch
import torch.nn as nn
from easycv.models.builder import HEADS, build_loss
from easycv.models.utils.face_keypoint_utils import (InvertedResidual, View,
conv_bn, conv_no_relu,
get_pose_accuracy)
@HEADS.register_module
class FacePoseHead(nn.Module):
def __init__(
self,
loss_pose,
in_channels=48,
out_channels=3,
inverted_expand_ratio=2,
inverted_activation='half_v2',
):
super(FacePoseHead, self).__init__()
self.branches = []
self.loss = build_loss(loss_pose)
# pose
self.branches.append(
nn.Sequential(
InvertedResidual(
in_channels,
48,
3,
1,
1,
expand_ratio=inverted_expand_ratio,
activation=inverted_activation),
View((-1, 48 * 3 * 3, 1, 1)), conv_bn(48 * 3 * 3, 48, 1, 1, 0),
conv_bn(48, 48, 1, 1, 0),
conv_no_relu(48, out_channels, 1, 1, 0),
View((-1, out_channels))))
self.branches = nn.ModuleList(self.branches)
def get_loss(self, output, target_pose):
losses = dict()
loss = self.loss(output, target_pose)
losses['pose_loss'] = loss
return losses
def get_accuracy(self, output, target_pose):
return get_pose_accuracy(output, target_pose)
def forward(self, x):
return self.branches[0](x)

View File

@ -1,5 +1,6 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .cross_entropy_loss import CrossEntropyLoss
from .face_keypoint_loss import FacePoseLoss, WingLossWithPose
from .focal_loss import FocalLoss
from .iou_loss import GIoULoss, IoULoss
from .mse_loss import JointsMSELoss

View File

@ -0,0 +1,91 @@
import copy
import math
import numpy as np
import torch
import torch.nn as nn
from easycv.models.builder import LOSSES
CONSTANT_CONTOUR = 66
CONSTANT_EYEBROW = 18
CONSTANT_EYE = 18
CONSTANT_NOSE = 30
CONSTANT_LIPS = 40
CONSTANT_EYE_CENTER = 4
@LOSSES.register_module()
class WingLossWithPose(nn.Module):
def __init__(self,
num_points=106,
left_eye_left_corner_index=66,
right_eye_right_corner_index=79,
points_weight=1.0,
contour_weight=1.5,
eyebrow_weight=1.5,
eye_weight=1.7,
nose_weight=1.3,
lip_weight=1.7,
omega=10,
epsilon=2):
super(WingLossWithPose, self).__init__()
self.omega = omega
self.epsilon = epsilon
self.num_points = num_points
self.left_eye_left_corner_index = left_eye_left_corner_index
self.right_eye_right_corner_index = right_eye_right_corner_index
self.points_weight = points_weight
contour_weight = np.full(CONSTANT_CONTOUR, contour_weight)
eyebrow_left_weight = np.full(CONSTANT_EYEBROW, eyebrow_weight)
eyebrow_right_weight = np.full(CONSTANT_EYEBROW, eyebrow_weight)
nose_weight = np.full(CONSTANT_NOSE, nose_weight)
eye_left_weight = np.full(CONSTANT_EYE, eye_weight)
eye_right_weight = np.full(CONSTANT_EYE, eye_weight)
lips_weight = np.full(CONSTANT_LIPS, lip_weight)
eye_center_weight = np.full(CONSTANT_EYE_CENTER, eye_weight)
part_weight = np.concatenate(
(contour_weight, eyebrow_left_weight, eyebrow_right_weight,
nose_weight, eye_left_weight, eye_right_weight, lips_weight,
eye_center_weight),
axis=0)
self.part_weight = None
if part_weight is not None:
self.part_weight = torch.from_numpy(part_weight).cuda()
def forward(self, pred, target, pose):
weight = 5.0 * (1.0 - torch.cos(pose * np.pi / 180.0)) + 1.0
weight = torch.sum(weight, dim=1) / 3.0
weight = weight.view((weight.shape[0], 1))
if self.part_weight is not None:
weight = weight * self.part_weight
y = target
y_hat = pred
delta_y = (y - y_hat).abs() * weight
delta_y1 = delta_y[delta_y < self.omega]
delta_y2 = delta_y[delta_y >= self.omega]
loss1 = self.omega * torch.log(1 + delta_y1 / self.epsilon)
C = self.omega - self.omega * math.log(1 + self.omega / self.epsilon)
loss = delta_y2 - C
result = self.points_weight * (loss1.sum() + loss.sum()) / (
len(loss1) + len(loss))
return result
@LOSSES.register_module()
class FacePoseLoss(nn.Module):
def __init__(self, pose_weight=1.0):
super(FacePoseLoss, self).__init__()
self.criterion = nn.MSELoss()
self.pose_weight = pose_weight
def forward(self, pred, target):
result = self.pose_weight * self.criterion(pred, target)
return result

View File

@ -5,6 +5,10 @@ from .conv_ws import ConvWS2d, conv_ws_2d
from .dist_utils import (DistributedLossWrapper, DistributedMinerWrapper,
get_world_size, is_dist_avail_and_initialized,
reduce_mean)
from .face_keypoint_utils import (ION, InvertedResidual, Residual, Softmax,
View, conv_bn, conv_no_relu,
get_keypoint_accuracy, get_pose_accuracy,
pose_accuracy)
from .gather_layer import GatherLayer
from .init_weights import _init_weights, trunc_normal_
from .multi_pooling import GeMPooling, MultiAvgPooling, MultiPooling

View File

@ -0,0 +1,240 @@
import copy
import math
import numpy as np
import torch
import torch.nn as nn
def conv_bn(inp, oup, kernel, stride, padding=1):
return nn.Sequential(
nn.Conv2d(inp, oup, kernel, stride, padding, bias=False),
nn.BatchNorm2d(oup), nn.PReLU(oup))
def conv_no_relu(inp, oup, kernel, stride, padding=1):
return nn.Sequential(
nn.Conv2d(inp, oup, kernel, stride, padding, bias=False),
nn.BatchNorm2d(oup))
class View(nn.Module):
def __init__(self, shape):
super(View, self).__init__()
self.shape = shape
def forward(self, x):
return x.view(*self.shape)
class Softmax(nn.Module):
def __init__(self, dim):
super(Softmax, self).__init__()
self.softmax = nn.Softmax(dim)
def forward(self, x):
return self.softmax(x)
class InvertedResidual(nn.Module):
def __init__(self,
inp,
oup,
kernel_size,
stride,
padding,
expand_ratio=2,
use_connect=False,
activation='relu'):
super(InvertedResidual, self).__init__()
hid_channels = int(inp * expand_ratio)
if activation == 'relu':
self.conv = nn.Sequential(
nn.Conv2d(inp, hid_channels, 1, 1, 0, bias=False),
nn.BatchNorm2d(hid_channels), nn.ReLU(inplace=True),
nn.Conv2d(
hid_channels,
hid_channels,
kernel_size,
stride,
padding,
groups=hid_channels,
bias=False), nn.BatchNorm2d(hid_channels),
nn.ReLU(inplace=True),
nn.Conv2d(hid_channels, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup))
elif activation == 'prelu':
self.conv = nn.Sequential(
nn.Conv2d(inp, hid_channels, 1, 1, 0, bias=False),
nn.BatchNorm2d(hid_channels), nn.PReLU(hid_channels),
nn.Conv2d(
hid_channels,
hid_channels,
kernel_size,
stride,
padding,
groups=hid_channels,
bias=False), nn.BatchNorm2d(hid_channels),
nn.PReLU(hid_channels),
nn.Conv2d(hid_channels, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup))
elif activation == 'half_v1':
self.conv = nn.Sequential(
nn.Conv2d(inp, hid_channels, 1, 1, 0, bias=False),
nn.BatchNorm2d(hid_channels), nn.ReLU(inplace=True),
nn.Conv2d(
hid_channels,
hid_channels,
kernel_size,
stride,
padding,
groups=hid_channels,
bias=False), nn.BatchNorm2d(hid_channels),
nn.PReLU(hid_channels),
nn.Conv2d(hid_channels, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup))
elif activation == 'half_v2':
self.conv = nn.Sequential(
nn.Conv2d(inp, hid_channels, 1, 1, 0, bias=False),
nn.BatchNorm2d(hid_channels), nn.PReLU(hid_channels),
nn.Conv2d(
hid_channels,
hid_channels,
kernel_size,
stride,
padding,
groups=hid_channels,
bias=False), nn.BatchNorm2d(hid_channels),
nn.ReLU(inplace=True),
nn.Conv2d(hid_channels, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup))
self.use_connect = use_connect
def forward(self, x):
if self.use_connect:
return x + self.conv(x)
else:
return self.conv(x)
class Residual(nn.Module):
def __init__(self,
inp,
oup,
kernel_size,
stride,
padding,
use_connect=False,
activation='relu'):
super(Residual, self).__init__()
self.use_connect = use_connect
if activation == 'relu':
self.conv = nn.Sequential(
nn.Conv2d(
inp,
inp,
kernel_size,
stride,
padding,
groups=inp,
bias=False), nn.BatchNorm2d(inp), nn.ReLU(inplace=True),
nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup),
nn.ReLU(inplace=True))
elif activation == 'prelu':
self.conv = nn.Sequential(
nn.Conv2d(
inp,
inp,
kernel_size,
stride,
padding,
groups=inp,
bias=False), nn.BatchNorm2d(inp), nn.PReLU(inp),
nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup),
nn.PReLU(oup))
elif activation == 'half_v1':
self.conv = nn.Sequential(
nn.Conv2d(
inp,
inp,
kernel_size,
stride,
padding,
groups=inp,
bias=False), nn.BatchNorm2d(inp), nn.ReLU(inplace=True),
nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup),
nn.PReLU(oup))
elif activation == 'half_v2':
self.conv = nn.Sequential(
nn.Conv2d(
inp,
inp,
kernel_size,
stride,
padding,
groups=inp,
bias=False), nn.BatchNorm2d(inp), nn.PReLU(inp),
nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup),
nn.ReLU(inplace=True))
def forward(self, x):
if self.use_connect:
return x + self.conv(x)
else:
return self.conv(x)
def pose_accuracy(output, target):
with torch.no_grad():
output = output.detach().cpu().numpy()
target = target.detach().cpu().numpy()
acc = np.mean(np.abs(output - target))
return acc
def ION(output, target, left_eye_left_coner_idx, right_eye_right_corner_idx,
num_pts):
with torch.no_grad():
output = output.view(-1, num_pts, 2).cpu().numpy()
target = target.view(-1, num_pts, 2).cpu().numpy()
interocular = target[:,
left_eye_left_coner_idx] - target[:,
right_eye_right_corner_idx]
interocular = np.sqrt(
np.square(interocular[:, 0]) + np.square(interocular[:, 1])) + 1e-5
dist = target - output
dist = np.sqrt(np.square(dist[:, :, 0]) + np.square(dist[:, :, 1]))
dist = np.sum(dist, axis=1)
nme = dist / (interocular * num_pts)
return np.mean(nme)
def get_keypoint_accuracy(output, target_point):
accuracy = dict()
num_points = 106
left_eye_left_corner_index = 66
right_eye_right_corner_index = 79
nme = ION(output, target_point, left_eye_left_corner_index,
right_eye_right_corner_index, num_points)
accuracy['nme'] = nme
return accuracy
def get_pose_accuracy(output, target_pose):
accuracy = dict()
pose_acc = pose_accuracy(output, target_pose)
accuracy['pose_acc'] = float(pose_acc)
return accuracy

View File

@ -2,6 +2,7 @@
from .classifier import TorchClassifier
from .detector import (TorchFaceDetector, TorchYoloXClassifierPredictor,
TorchYoloXPredictor)
from .face_keypoints_predictor import FaceKeypointsPredictor
from .feature_extractor import (TorchFaceAttrExtractor,
TorchFaceFeatureExtractor,
TorchFeatureExtractor)

View File

@ -113,6 +113,7 @@ class PredictorV2(object):
device=None,
save_results=False,
save_path=None,
mode='rgb',
*args,
**kwargs):
self.model_path = model_path
@ -135,6 +136,7 @@ class PredictorV2(object):
self.model = self.prepare_model()
self.processor = self.build_processor()
self._load_op = None
self.mode = mode
def prepare_model(self):
"""Build model from config file by default.
@ -182,7 +184,7 @@ class PredictorV2(object):
}
"""
if self._load_op is None:
load_cfg = dict(type='LoadImage', mode='rgb')
load_cfg = dict(type='LoadImage', mode=self.mode)
self._load_op = build_from_cfg(load_cfg, PIPELINES)
if not isinstance(input, str):

View File

@ -0,0 +1,120 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import copy
import os
import cv2
import numpy as np
import torch
from torchvision.transforms import Compose
from easycv.datasets.registry import PIPELINES
from easycv.models import build_model
from easycv.predictors.builder import PREDICTORS
from easycv.predictors.interface import PredictorInterface
from easycv.utils.checkpoint import load_checkpoint
from easycv.utils.config_tools import mmcv_config_fromfile
from easycv.utils.registry import build_from_cfg
from ..models import *
from .base import PredictorV2
face_contour_point_index = [
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
]
left_eye_brow_point_index = [33, 34, 35, 36, 37, 38, 39, 40, 41, 33]
right_eye_brow_point_index = [42, 43, 44, 45, 46, 47, 48, 49, 50, 42]
left_eye_point_index = [66, 67, 68, 69, 70, 71, 72, 73, 66]
right_eye_point_index = [75, 76, 77, 78, 79, 80, 81, 82, 75]
nose_bridge_point_index = [51, 52, 53, 54]
nose_contour_point_index = [55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65]
mouth_outer_point_index = [84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 84]
mouth_inter_point_index = [96, 97, 98, 99, 100, 101, 102, 103, 96]
@PREDICTORS.register_module()
class FaceKeypointsPredictor(PredictorV2):
"""Predict pipeline for face keypoint
Args:
model_path (str): Path of model path
model_config (str): config file path for model and processor to init. Defaults to None.
"""
def __init__(self,
model_path,
model_config,
batch_size=1,
device=None,
save_results=False,
save_path=None,
mode='bgr'):
super(FaceKeypointsPredictor, self).__init__(
model_path,
model_config,
batch_size=batch_size,
device=device,
save_results=save_results,
save_path=save_path,
mode=mode)
self.input_size = self.cfg.IMAGE_SIZE
self.point_number = self.cfg.POINT_NUMBER
def show_result(self, img, points, scale=4.0, save_path=None):
"""Draw `result` over `img`.
Args:
img (str or Tensor): The image to be displayed.
result (Tensor): The face keypoints to draw over `img`.
scale: zoom in or out scale
save_path: path to save drawned 'img'
Returns:
img (Tensor): Only if not `show` or `out_file`
"""
img = cv2.imread(img)
img = img.copy()
h, w, c = img.shape
scale_h = h / self.input_size
scale_w = w / self.input_size
points = points.view(-1, self.point_number, 2).cpu().numpy()[0]
for index in range(len(points)):
points[index][0] *= scale_w
points[index][1] *= scale_h
image = cv2.resize(img, dsize=None, fx=scale, fy=scale)
def draw_line(point_index, image, point):
for i in range(len(point_index) - 1):
cur_index = point_index[i]
next_index = point_index[i + 1]
cur_pt = (int(point[cur_index][0] * scale),
int(point[cur_index][1] * scale))
next_pt = (int(point[next_index][0] * scale),
int(point[next_index][1] * scale))
cv2.line(image, cur_pt, next_pt, (0, 0, 255), thickness=2)
draw_line(face_contour_point_index, image, points)
draw_line(left_eye_brow_point_index, image, points)
draw_line(right_eye_brow_point_index, image, points)
draw_line(left_eye_point_index, image, points)
draw_line(right_eye_point_index, image, points)
draw_line(nose_bridge_point_index, image, points)
draw_line(nose_contour_point_index, image, points)
draw_line(mouth_outer_point_index, image, points)
draw_line(mouth_inter_point_index, image, points)
size = len(points)
for i in range(size):
x = int(points[i][0])
y = int(points[i][1])
cv2.putText(image, str(i), (int(x * scale), int(y * scale)),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
cv2.circle(image, (int(x * scale), int(y * scale)), 2, (0, 255, 0),
cv2.FILLED)
if save_path is not None:
cv2.imwrite(save_path, image)
return image

View File

@ -3,6 +3,7 @@ dataclasses
einops
future
h5py
imgaug
json_tricks
numpy
opencv-python

View File

@ -0,0 +1,39 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import copy
import os
import tempfile
import unittest
import cv2
import numpy as np
from PIL import Image
from easycv.predictors.face_keypoints_predictor import FaceKeypointsPredictor
class FaceKeypointsPredictorWithoutDetectorTest(unittest.TestCase):
def setUp(self):
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
self.image_path = './data/test/face_2d_keypoints/data/002253.png'
self.save_image_path = './data/test/face_2d_keypoints/data/result_002253.png'
self.model_path = './data/test/face_2d_keypoints/models/epoch_580.pth'
self.model_config_path = './configs/face/face_96x96_wingloss.py'
def test_single(self):
predict_pipeline = FaceKeypointsPredictor(
model_path=self.model_path, model_config=self.model_config_path)
output = predict_pipeline(self.image_path)[0]
output_keypoints = output['point']
output_pose = output['pose']
image_show = predict_pipeline.show_result(
self.image_path,
output_keypoints,
scale=2,
save_path=self.save_image_path)
if __name__ == '__main__':
unittest.main()