mirror of
https://github.com/alibaba/EasyCV.git
synced 2025-06-03 14:49:00 +08:00
add face-2d-keypoints
Link: https://code.alibaba-inc.com/pai-vision/EasyCV/codereview/9809249 * add face 2d keypoint config
This commit is contained in:
parent
bc64851614
commit
2e8fc44dc1
236
configs/face/face_96x96_wingloss.py
Normal file
236
configs/face/face_96x96_wingloss.py
Normal file
@ -0,0 +1,236 @@
|
||||
# model settings
|
||||
POINT_NUMBER = 106
|
||||
MEAN_FACE = [
|
||||
0.05486667535113006, 0.24441904048908245, 0.05469932714062696,
|
||||
0.30396829196709935, 0.05520653400164321, 0.3643191463607746,
|
||||
0.05865501342257397, 0.42453849020500306, 0.0661603899137523,
|
||||
0.48531377442945767, 0.07807677169271177, 0.5452126843738523,
|
||||
0.09333319368757653, 0.6047840615432064, 0.11331425394034209,
|
||||
0.6631144309665994, 0.13897813867699352, 0.7172296230155276,
|
||||
0.17125811033538194, 0.767968859462583, 0.20831698519371536,
|
||||
0.8146603379935117, 0.24944621000897876, 0.857321261721953,
|
||||
0.2932993820558674, 0.8973900596678597, 0.33843820185594653,
|
||||
0.9350576242126986, 0.38647802623495553, 0.966902971122812,
|
||||
0.4411974776504609, 0.9878629960611088, 0.5000390697219397,
|
||||
0.9934886214875595, 0.5588590024515473, 0.9878510782414189,
|
||||
0.6135829360035883, 0.9668655595323074, 0.6616294188166414,
|
||||
0.9350065330378543, 0.7067734980023662, 0.8973410411573094,
|
||||
0.7506167730772516, 0.8572957679511382, 0.7917579157122047,
|
||||
0.8146281598803492, 0.8288026446367324, 0.7679019642224981,
|
||||
0.8610918526053805, 0.7171624168757985, 0.8867491048162915,
|
||||
0.6630344261248556, 0.9067293813428708, 0.6047095492618413,
|
||||
0.9219649147678989, 0.5451295187190602, 0.9338619041815587,
|
||||
0.4852292097262674, 0.9413455695142587, 0.424454780475834,
|
||||
0.9447753107545577, 0.3642347111991026, 0.9452649776939869,
|
||||
0.30388458223793025, 0.9450854849661369, 0.24432737691068557,
|
||||
0.1594802473020129, 0.17495177946520288, 0.2082918411850002,
|
||||
0.12758378330875153, 0.27675902873293057, 0.11712230823088154,
|
||||
0.34660582049732336, 0.12782553369032904, 0.4137234315527489,
|
||||
0.14788458441422778, 0.4123890243720449, 0.18814226684806626,
|
||||
0.3498927810760776, 0.17640650480816664, 0.28590212091591866,
|
||||
0.16895271174960227, 0.22193967489846017, 0.16985862149585013,
|
||||
0.5861805004572298, 0.147863456192582, 0.6532904167464643,
|
||||
0.12780412047734288, 0.723142364263288, 0.11709102395419578,
|
||||
0.7916076475508984, 0.12753867695205595, 0.8404440227263494,
|
||||
0.17488715120168932, 0.7779848023963316, 0.1698261195288917,
|
||||
0.7140264757991571, 0.1689377237959271, 0.650024882334848,
|
||||
0.17640581823811927, 0.5875270068157493, 0.18815421057605972,
|
||||
0.4999687027691624, 0.2770570778583906, 0.49996466107378934,
|
||||
0.35408433007759227, 0.49996725190415664, 0.43227025345368053,
|
||||
0.49997367716346774, 0.5099309118810921, 0.443147025685285,
|
||||
0.2837021691260901, 0.4079306716593004, 0.4729519900478952,
|
||||
0.3786223176615041, 0.5388017782630576, 0.4166237366074797,
|
||||
0.5822229552544941, 0.4556754522760756, 0.5887956328134262,
|
||||
0.49998730493119997, 0.5951855531982454, 0.5443300921009105,
|
||||
0.5887796732983633, 0.5833722476054509, 0.582200985012979,
|
||||
0.6213509190608012, 0.5387760772258134, 0.5920137550293199,
|
||||
0.4729325070035326, 0.5567854054587345, 0.28368589871138317,
|
||||
0.23395988420439123, 0.275313734012504, 0.27156519109550253,
|
||||
0.2558735678926061, 0.31487949633428597, 0.2523033259214858,
|
||||
0.356919009399118, 0.2627342680634766, 0.3866625969903256,
|
||||
0.2913618036573405, 0.3482919069920915, 0.3009936818974329,
|
||||
0.3064437008415846, 0.3037349617842158, 0.26724000706363993,
|
||||
0.2961896087804692, 0.3135744691699477, 0.27611103614975246,
|
||||
0.6132904312551143, 0.29135144033587107, 0.6430396927648264,
|
||||
0.2627079452269443, 0.6850713556136455, 0.2522730391144915,
|
||||
0.728377707003201, 0.25583118190779625, 0.7660035591791254,
|
||||
0.27526375689471777, 0.7327054300488236, 0.2961495286346863,
|
||||
0.6935171517115648, 0.3036951925380769, 0.6516533228539426,
|
||||
0.3009921014909089, 0.6863983789278025, 0.2760904908649394,
|
||||
0.35811903020866753, 0.7233174007629063, 0.4051199834269763,
|
||||
0.6931800846807724, 0.4629631471997891, 0.6718031951363689,
|
||||
0.5000016063148277, 0.6799150331999366, 0.5370506360177653,
|
||||
0.6717809139952097, 0.5948714927411151, 0.6931581144392573,
|
||||
0.6418878095835022, 0.7232890570786875, 0.6088129582142587,
|
||||
0.7713407215524752, 0.5601450388292929, 0.8052499757498277,
|
||||
0.5000181358125715, 0.8160749831906926, 0.4398905591799545,
|
||||
0.8052697696938342, 0.39120318265892984, 0.771375905028864,
|
||||
0.36888771299734613, 0.7241751210643214, 0.4331097084010058,
|
||||
0.7194543690519717, 0.5000188612450743, 0.7216823277180712,
|
||||
0.566895861884284, 0.7194302225129479, 0.631122598507516,
|
||||
0.7241462073974219, 0.5678462302796355, 0.7386355816766528,
|
||||
0.5000082906571756, 0.7479600838019628, 0.43217532542902076,
|
||||
0.7386538729390463, 0.31371761254774383, 0.2753328284323114,
|
||||
0.6862487843823917, 0.2752940437017121
|
||||
]
|
||||
IMAGE_SIZE = 96
|
||||
|
||||
loss_config = dict(
|
||||
num_points=POINT_NUMBER,
|
||||
left_eye_left_corner_index=66,
|
||||
right_eye_right_corner_index=79,
|
||||
points_weight=1.0,
|
||||
contour_weight=1.5,
|
||||
eyebrow_weight=1.5,
|
||||
eye_weight=1.7,
|
||||
nose_weight=1.3,
|
||||
lip_weight=1.7,
|
||||
omega=10,
|
||||
epsilon=2)
|
||||
|
||||
model = dict(
|
||||
type='FaceKeypoint',
|
||||
backbone=dict(
|
||||
type='FaceKeypointBackbone',
|
||||
in_channels=3,
|
||||
out_channels=48,
|
||||
residual_activation='relu',
|
||||
inverted_activation='half_v2',
|
||||
inverted_expand_ratio=2,
|
||||
),
|
||||
keypoint_head=dict(
|
||||
type='FaceKeypointHead',
|
||||
in_channels=48,
|
||||
out_channels=POINT_NUMBER * 2,
|
||||
input_size=IMAGE_SIZE,
|
||||
inverted_expand_ratio=2,
|
||||
inverted_activation='half_v2',
|
||||
mean_face=MEAN_FACE,
|
||||
loss_keypoint=dict(type='WingLossWithPose', **loss_config),
|
||||
),
|
||||
pose_head=dict(
|
||||
type='FacePoseHead',
|
||||
in_channels=48,
|
||||
out_channels=3,
|
||||
inverted_expand_ratio=2,
|
||||
inverted_activation='half_v2',
|
||||
loss_pose=dict(type='FacePoseLoss', pose_weight=0.01),
|
||||
),
|
||||
)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='FaceKeypointRandomAugmentation', input_size=IMAGE_SIZE),
|
||||
dict(type='FaceKeypointNorm', input_size=IMAGE_SIZE),
|
||||
dict(type='MMToTensor'),
|
||||
dict(
|
||||
type='NormalizeTensor',
|
||||
mean=[0.4076, 0.458, 0.485],
|
||||
std=[1.0, 1.0, 1.0]),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=[
|
||||
'img', 'target_point', 'target_point_mask', 'target_pose',
|
||||
'target_pose_mask'
|
||||
])
|
||||
]
|
||||
|
||||
val_pipeline = [
|
||||
dict(type='FaceKeypointNorm', input_size=IMAGE_SIZE),
|
||||
dict(type='MMToTensor'),
|
||||
dict(
|
||||
type='NormalizeTensor',
|
||||
mean=[0.4076, 0.458, 0.485],
|
||||
std=[1.0, 1.0, 1.0]),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=[
|
||||
'img', 'target_point', 'target_point_mask', 'target_pose',
|
||||
'target_pose_mask'
|
||||
])
|
||||
]
|
||||
test_pipeline = val_pipeline
|
||||
|
||||
data_root = 'path/to/face_landmark_data/'
|
||||
|
||||
data_cfg = dict(
|
||||
data_root=data_root,
|
||||
input_size=IMAGE_SIZE,
|
||||
)
|
||||
|
||||
data = dict(
|
||||
imgs_per_gpu=512,
|
||||
workers_per_gpu=2,
|
||||
train=dict(
|
||||
type='FaceKeypointDataset',
|
||||
data_source=dict(
|
||||
type='FaceKeypintSource',
|
||||
train=True,
|
||||
data_range=[0, 30000], # [0,30000] [0,478857]
|
||||
data_cfg=data_cfg,
|
||||
),
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type='FaceKeypointDataset',
|
||||
data_source=dict(
|
||||
type='FaceKeypintSource',
|
||||
train=False,
|
||||
data_range=[478857, 488857],
|
||||
# data_range=[478857, 478999], #[478857, 478999] [478857, 488857]
|
||||
data_cfg=data_cfg,
|
||||
),
|
||||
pipeline=val_pipeline),
|
||||
test=dict(
|
||||
type='FaceKeypointDataset',
|
||||
data_source=dict(
|
||||
type='FaceKeypintSource',
|
||||
train=False,
|
||||
data_range=[478857, 488857],
|
||||
# data_range=[478857, 478999], #[478857, 478999] [478857, 488857]
|
||||
data_cfg=data_cfg,
|
||||
),
|
||||
pipeline=test_pipeline),
|
||||
)
|
||||
|
||||
# runtime setting
|
||||
optimizer = dict(
|
||||
type='Adam',
|
||||
lr=0.005,
|
||||
)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
lr_config = dict(
|
||||
policy='CosineAnnealing',
|
||||
min_lr=0.00001,
|
||||
warmup='linear',
|
||||
warmup_iters=10,
|
||||
warmup_ratio=0.001,
|
||||
warmup_by_epoch=True,
|
||||
by_epoch=True)
|
||||
|
||||
total_epochs = 1000
|
||||
checkpoint_config = dict(interval=10)
|
||||
log_config = dict(
|
||||
interval=5, hooks=[
|
||||
dict(type='TextLoggerHook'),
|
||||
])
|
||||
|
||||
log_level = 'INFO'
|
||||
load_from = None
|
||||
resume_from = None
|
||||
dist_params = dict(backend='nccl')
|
||||
workflow = [('train', 1)]
|
||||
|
||||
# disable opencv multithreading to avoid system being overloaded
|
||||
opencv_num_threads = 0
|
||||
# set multi-process start method as `fork` to speed up the training
|
||||
mp_start_method = 'fork'
|
||||
|
||||
evaluation = dict(interval=1, metric=['NME'], save_best='NME')
|
||||
|
||||
eval_config = dict(interval=1)
|
||||
evaluator_args = dict(metric_names='ave_nme')
|
||||
eval_pipelines = [
|
||||
dict(
|
||||
mode='test',
|
||||
data=dict(**data['val'], imgs_per_gpu=1),
|
||||
evaluators=[dict(type='FaceKeypointEvaluator', **evaluator_args)])
|
||||
]
|
3
data/test/face_2d_keypoints/data/002253.png
Normal file
3
data/test/face_2d_keypoints/data/002253.png
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1a45cc56977e709361659d4123739d3647d122a0d80bf7249d0ccdef018f068e
|
||||
size 112042
|
3
data/test/face_2d_keypoints/data/002258.png
Normal file
3
data/test/face_2d_keypoints/data/002258.png
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8d516f30a8c1583b45e54d737d2a712ed1c63ef387d579517e1e23e416339ac2
|
||||
size 94367
|
3
data/test/face_2d_keypoints/models/epoch_580.pth
Normal file
3
data/test/face_2d_keypoints/models/epoch_580.pth
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5105c5aa83c59d2a1fdf8dc9ff83a8d84c19a70c7faabcf7f8bce8a913afe4f1
|
||||
size 3421031
|
@ -3,6 +3,7 @@ from .auc_eval import AucEvaluator
|
||||
from .base_evaluator import Evaluator
|
||||
from .classification_eval import ClsEvaluator
|
||||
from .coco_evaluation import CocoDetectionEvaluator, CoCoPoseTopDownEvaluator
|
||||
from .face_eval import FaceKeypointEvaluator
|
||||
from .faceid_pair_eval import FaceIDPairEvaluator
|
||||
from .keypoint_eval import KeyPointEvaluator
|
||||
from .mse_eval import MSEEvaluator
|
||||
|
59
easycv/core/evaluation/face_eval.py
Normal file
59
easycv/core/evaluation/face_eval.py
Normal file
@ -0,0 +1,59 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
import torch
|
||||
|
||||
from .base_evaluator import Evaluator
|
||||
from .builder import EVALUATORS
|
||||
from .metric_registry import METRICS
|
||||
|
||||
|
||||
@EVALUATORS.register_module
|
||||
class FaceKeypointEvaluator(Evaluator):
|
||||
|
||||
def __init__(self, dataset_name=None, metric_names=['ave_nme']):
|
||||
super(FaceKeypointEvaluator, self).__init__(dataset_name, metric_names)
|
||||
self.metric = metric_names
|
||||
self.dataset_name = dataset_name
|
||||
|
||||
def _evaluate_impl(self, prediction_dict, groundtruth_dict, **kwargs):
|
||||
"""
|
||||
Args:
|
||||
prediction_dict: model forward output dict, ['point', 'pose']
|
||||
groundtruth_dict: groundtruth dict, ['target_point', 'target_point_mask', 'target_pose', 'target_pose_mask'] used for compute accuracy
|
||||
kwargs: other parameters
|
||||
"""
|
||||
|
||||
def evaluate(predicts, gts, **kwargs):
|
||||
from easycv.models.utils.face_keypoint_utils import get_keypoint_accuracy, get_pose_accuracy
|
||||
ave_pose_acc = 0
|
||||
ave_nme = 0
|
||||
idx = 0
|
||||
|
||||
for (predict_point, predict_pose,
|
||||
gt) in zip(predicts['point'], predicts['pose'], gts):
|
||||
target_point = gt['target_point']
|
||||
target_point_mask = gt['target_point_mask']
|
||||
target_pose = gt['target_pose']
|
||||
target_pose_mask = gt['target_pose_mask']
|
||||
|
||||
target_point = target_point * target_point_mask
|
||||
target_pose = target_pose * target_pose_mask
|
||||
|
||||
keypoint_accuracy = get_keypoint_accuracy(
|
||||
predict_point, target_point)
|
||||
pose_accuracy = get_pose_accuracy(predict_pose, target_pose)
|
||||
|
||||
ave_pose_acc += pose_accuracy['pose_acc']
|
||||
ave_nme += keypoint_accuracy['nme']
|
||||
idx += 1
|
||||
|
||||
eval_result = {}
|
||||
idx += 0.000001
|
||||
eval_result['ave_pose_acc'] = ave_pose_acc / idx
|
||||
eval_result['ave_nme'] = ave_nme / idx
|
||||
|
||||
return eval_result
|
||||
|
||||
return evaluate(prediction_dict, groundtruth_dict)
|
||||
|
||||
|
||||
METRICS.register_default_best_metric(FaceKeypointEvaluator, 'ave_nme', 'min')
|
@ -1,5 +1,6 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
from . import classification, detection, pose, segmentation, selfsup, shared
|
||||
from . import (classification, detection, face, pose, segmentation, selfsup,
|
||||
shared)
|
||||
from .builder import build_dali_dataset, build_dataset
|
||||
from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
|
||||
from .registry import DATASETS
|
||||
|
4
easycv/datasets/face/__init__.py
Normal file
4
easycv/datasets/face/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
from .data_sources import *
|
||||
from .face_keypoint_dataset import FaceKeypointDataset
|
||||
from .pipelines import *
|
2
easycv/datasets/face/data_sources/__init__.py
Normal file
2
easycv/datasets/face/data_sources/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
from .face_keypoint_source import FaceKeypintSource
|
171
easycv/datasets/face/data_sources/face_keypoint_source.py
Normal file
171
easycv/datasets/face/data_sources/face_keypoint_source.py
Normal file
@ -0,0 +1,171 @@
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from easycv.datasets.face.pipelines.face_keypoint_transform import (
|
||||
FaceKeypointNorm, FaceKeypointRandomAugmentation, normal)
|
||||
from easycv.datasets.registry import DATASOURCES
|
||||
from easycv.datasets.shared.base import BaseDataset
|
||||
|
||||
FACE_KEYPOINT_DATASET_INFO = dict(
|
||||
real_list_file_dir='real_face_list.txt',
|
||||
data_info_dir='infos/merge/',
|
||||
data_image_dir='images/merge/',
|
||||
data_overlay_dir='images/overlay/',
|
||||
)
|
||||
|
||||
|
||||
@DATASOURCES.register_module()
|
||||
class FaceKeypintSource():
|
||||
"""
|
||||
load dataset for face key points
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
data_cfg,
|
||||
data_range,
|
||||
real_list_path=None,
|
||||
info_path=None,
|
||||
image_path=None,
|
||||
data_overlay_path=None,
|
||||
dataset_info=None,
|
||||
**kwargs):
|
||||
super(FaceKeypintSource, self).__init__()
|
||||
"""
|
||||
Args:
|
||||
data_cfg: Data config dict
|
||||
data_range: rang of dataset for training or validation
|
||||
real_list_file_path: path of file contains image list
|
||||
data_info_dir: annotation file path
|
||||
data_img_dir: image file path
|
||||
data_overlay_dir: overlay background image path
|
||||
|
||||
dataset_info: A dict containing all dataset info
|
||||
"""
|
||||
if dataset_info is None:
|
||||
logging.info(
|
||||
'dataset_info is missing, use default face keypoiny dataset info'
|
||||
)
|
||||
dataset_info = FACE_KEYPOINT_DATASET_INFO
|
||||
|
||||
data_root = data_cfg['data_root']
|
||||
real_list_file_path = os.path.join(data_root,
|
||||
dataset_info['real_list_file_dir'])
|
||||
data_info_dir = os.path.join(data_root, dataset_info['data_info_dir'])
|
||||
data_img_dir = os.path.join(data_root, dataset_info['data_image_dir'])
|
||||
data_overlay_dir = os.path.join(data_root,
|
||||
dataset_info['data_overlay_dir'])
|
||||
self.input_size = data_cfg['input_size']
|
||||
data_range = data_range
|
||||
|
||||
if real_list_path is not None:
|
||||
real_list_file_path = real_list_path
|
||||
if info_path is not None:
|
||||
data_info_dir = info_path
|
||||
if image_path is not None:
|
||||
data_img_dir = image_path
|
||||
if data_overlay_path is not None:
|
||||
data_overlay_dir = data_overlay_path
|
||||
|
||||
# overlay
|
||||
self.overlay_image_path = []
|
||||
for overlay_img_file in sorted(os.listdir(data_overlay_dir)):
|
||||
overlay_img_filepath = os.path.join(data_overlay_dir,
|
||||
overlay_img_file)
|
||||
self.overlay_image_path.append(overlay_img_filepath)
|
||||
|
||||
self.points_and_pose_datas = []
|
||||
with open(real_list_file_path, 'r') as real_list_file:
|
||||
real_list_lines = real_list_file.readlines()
|
||||
for index in range(data_range[0], data_range[1]):
|
||||
idx = int(real_list_lines[index])
|
||||
img_path = os.path.join(data_img_dir, '{:06d}.png'.format(idx))
|
||||
if not os.path.exists(img_path):
|
||||
logging.warning('image %s does not exist' % img_path)
|
||||
continue
|
||||
info_path = os.path.join(data_info_dir, '{:06d}.json'.format(idx))
|
||||
if not os.path.exists(info_path):
|
||||
logging.warning('annotation %s does not exist' % info_path)
|
||||
continue
|
||||
with open(info_path, 'r') as info_file:
|
||||
info_json = json.load(info_file)
|
||||
assert info_json['face_count'] == 1
|
||||
base_info = info_json['face_infos'][0]['base_info']
|
||||
|
||||
# points
|
||||
assert base_info['points_array'] is not None
|
||||
points = np.asarray(base_info['points_array']).astype(
|
||||
np.float32)
|
||||
points_mask = np.abs(points - (-999)) > 0.0001
|
||||
|
||||
# pose
|
||||
pose = {'pitch': -999, 'yaw': -999, 'roll': -999}
|
||||
if base_info['pitch'] is not None and base_info[
|
||||
'yaw'] is not None and base_info['roll'] is not None:
|
||||
pose['pitch'] = base_info['pitch']
|
||||
pose['yaw'] = base_info['yaw']
|
||||
# pose["roll"] = base_info["roll"]
|
||||
# datasets have been preprocessed, roll=0
|
||||
# add noise to pose
|
||||
pose['roll'] = normal() * 10.0
|
||||
|
||||
pose_mask = np.asarray([
|
||||
np.abs(pose['pitch'] - (-999)) > 0.0001,
|
||||
np.abs(pose['roll'] - (-999)) > 0.0001,
|
||||
np.abs(pose['yaw'] - (-999)) > 0.0001
|
||||
])
|
||||
|
||||
self.points_and_pose_datas.append(
|
||||
(img_path, points, points_mask, pose, pose_mask))
|
||||
|
||||
self.db = []
|
||||
for img_path, points, points_mask, pose, pose_mask in copy.deepcopy(
|
||||
self.points_and_pose_datas):
|
||||
image = cv2.imread(img_path)
|
||||
|
||||
points[:,
|
||||
0] = points[:, 0] / image.shape[1] * float(self.input_size)
|
||||
points[:,
|
||||
1] = points[:, 1] / image.shape[0] * float(self.input_size)
|
||||
|
||||
target_point = np.reshape(points,
|
||||
(points.shape[0] * points.shape[1]))
|
||||
points_mask = points_mask.astype(np.float32)
|
||||
points_mask = np.reshape(
|
||||
points_mask, (points_mask.shape[0] * points_mask.shape[1]))
|
||||
pose = np.asarray([pose['pitch'], pose['roll'], pose['yaw']])
|
||||
|
||||
self.db.append({
|
||||
'img_path':
|
||||
img_path,
|
||||
'target_point':
|
||||
torch.tensor(np.array(target_point, np.float32)),
|
||||
'target_point_mask':
|
||||
torch.tensor(points_mask),
|
||||
'target_pose':
|
||||
torch.tensor(np.array(pose, np.float32)),
|
||||
'target_pose_mask':
|
||||
torch.tensor(pose_mask.astype(np.float32))
|
||||
})
|
||||
|
||||
def __getitem__(self, index):
|
||||
img_path, points, points_mask, pose, pose_mask = copy.deepcopy(
|
||||
self.points_and_pose_datas[index])
|
||||
image = cv2.imread(img_path)
|
||||
|
||||
return {
|
||||
'img': image,
|
||||
'target_point': points,
|
||||
'target_point_mask': points_mask,
|
||||
'target_pose': pose,
|
||||
'target_pose_mask': pose_mask,
|
||||
'overlay_image_path': self.overlay_image_path
|
||||
}
|
||||
|
||||
def __len__(self):
|
||||
return len(self.points_and_pose_datas)
|
45
easycv/datasets/face/face_keypoint_dataset.py
Normal file
45
easycv/datasets/face/face_keypoint_dataset.py
Normal file
@ -0,0 +1,45 @@
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.utils.data as data
|
||||
|
||||
from easycv.datasets.face.pipelines.face_keypoint_transform import (
|
||||
FaceKeypointNorm, FaceKeypointRandomAugmentation, normal)
|
||||
from easycv.datasets.registry import DATASETS
|
||||
from easycv.datasets.shared.base import BaseDataset
|
||||
|
||||
|
||||
@DATASETS.register_module()
|
||||
class FaceKeypointDataset(BaseDataset):
|
||||
"""
|
||||
dataset for face key points
|
||||
"""
|
||||
|
||||
def __init__(self, data_source, pipeline, profiling=False):
|
||||
super(FaceKeypointDataset, self).__init__(data_source, pipeline,
|
||||
profiling)
|
||||
"""
|
||||
Args:
|
||||
data_source: Data_source config dict
|
||||
pipeline: Pipeline config list
|
||||
profiling: If set True, will print pipeline time
|
||||
"""
|
||||
|
||||
def evaluate(self, outputs, evaluators, **kwargs):
|
||||
eval_result = {}
|
||||
for evaluator in evaluators:
|
||||
eval_result.update(
|
||||
evaluator.evaluate(
|
||||
prediction_dict=outputs,
|
||||
groundtruth_dict=self.data_source.db))
|
||||
|
||||
return eval_result
|
||||
|
||||
def __getitem__(self, idx):
|
||||
results = self.data_source[idx]
|
||||
return self.pipeline(results)
|
5
easycv/datasets/face/pipelines/__init__.py
Normal file
5
easycv/datasets/face/pipelines/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
from .face_keypoint_transform import (FaceKeypointNorm,
|
||||
FaceKeypointRandomAugmentation)
|
||||
|
||||
__all__ = ['FaceKeypointRandomAugmentation', 'FaceKeypointNorm']
|
431
easycv/datasets/face/pipelines/face_keypoint_transform.py
Normal file
431
easycv/datasets/face/pipelines/face_keypoint_transform.py
Normal file
@ -0,0 +1,431 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
import random
|
||||
|
||||
import cv2
|
||||
import imgaug
|
||||
import imgaug.augmenters as iaa
|
||||
import numpy as np
|
||||
|
||||
from easycv.datasets.registry import PIPELINES
|
||||
|
||||
DEST_SIZE = 256
|
||||
BASE_LANDMARK_NUM = 106
|
||||
ENLARGE_RATIO = 1.1
|
||||
|
||||
CONTOUR_PARTS = [[0, 32], [1, 31], [2, 30], [3, 29], [4, 28], [5, 27], [6, 26],
|
||||
[7, 25], [8, 24], [9, 23], [10, 22], [11, 21], [12, 20],
|
||||
[13, 19], [14, 18], [15, 17]]
|
||||
BROW_PARTS = [[33, 46], [34, 45], [35, 44], [36, 43], [37, 42], [38, 50],
|
||||
[39, 49], [40, 48], [41, 47]]
|
||||
EYE_PARTS = [[66, 79], [67, 78], [68, 77], [69, 76], [70, 75], [71, 82],
|
||||
[72, 81], [73, 80], [74, 83]]
|
||||
NOSE_PARTS = [[55, 65], [56, 64], [57, 63], [58, 62], [59, 61]]
|
||||
MOUSE_PARTS = [[84, 90], [85, 89], [86, 88], [96, 100], [97, 99], [103, 101],
|
||||
[95, 91], [94, 92]]
|
||||
IRIS_PARTS = [[104, 105]]
|
||||
MATCHED_PARTS = CONTOUR_PARTS + BROW_PARTS + EYE_PARTS + NOSE_PARTS + MOUSE_PARTS + IRIS_PARTS
|
||||
|
||||
|
||||
def normal():
|
||||
"""
|
||||
3-sigma rule
|
||||
return: (-1, +1)
|
||||
"""
|
||||
mu, sigma = 0, 1
|
||||
while True:
|
||||
s = np.random.normal(mu, sigma)
|
||||
if s < mu - 3 * sigma or s > mu + 3 * sigma:
|
||||
continue
|
||||
return s / 3 * sigma
|
||||
|
||||
|
||||
def rotate(angle, center, landmark):
|
||||
rad = angle * np.pi / 180.0
|
||||
alpha = np.cos(rad)
|
||||
beta = np.sin(rad)
|
||||
M = np.zeros((2, 3), dtype=np.float32)
|
||||
M[0, 0] = alpha
|
||||
M[0, 1] = beta
|
||||
M[0, 2] = (1 - alpha) * center[0] - beta * center[1]
|
||||
M[1, 0] = -beta
|
||||
M[1, 1] = alpha
|
||||
M[1, 2] = beta * center[0] + (1 - alpha) * center[1]
|
||||
|
||||
landmark_ = np.asarray([(M[0, 0] * x + M[0, 1] * y + M[0, 2],
|
||||
M[1, 0] * x + M[1, 1] * y + M[1, 2])
|
||||
for (x, y) in landmark])
|
||||
return M, landmark_
|
||||
|
||||
|
||||
class OverLayGenerator:
|
||||
|
||||
def __init__(self, shape):
|
||||
# 4x4
|
||||
h_seg_len = shape[0] // 4
|
||||
w_seg_len = shape[1] // 4
|
||||
|
||||
self.overlay = []
|
||||
# 2x2 overlay
|
||||
for i in range(3):
|
||||
for j in range(3):
|
||||
if i == 1 and j == 1:
|
||||
continue
|
||||
self.overlay.append((i * w_seg_len, j * h_seg_len,
|
||||
2 * w_seg_len, 2 * h_seg_len))
|
||||
|
||||
# 2x3 overlay
|
||||
for i in range(3):
|
||||
for j in range(2):
|
||||
if i == 1:
|
||||
continue
|
||||
self.overlay.append((i * w_seg_len, j * h_seg_len,
|
||||
2 * w_seg_len, 3 * h_seg_len))
|
||||
for i in range(2):
|
||||
for j in range(3):
|
||||
if j == 1:
|
||||
continue
|
||||
self.overlay.append((i * w_seg_len, j * h_seg_len,
|
||||
3 * w_seg_len, 2 * h_seg_len))
|
||||
|
||||
# 2x4 overlay
|
||||
for i in range(3):
|
||||
for j in range(1):
|
||||
if i == 1:
|
||||
continue
|
||||
self.overlay.append((i * w_seg_len, j * h_seg_len,
|
||||
2 * w_seg_len, 4 * h_seg_len))
|
||||
for i in range(1):
|
||||
for j in range(3):
|
||||
if j == 1:
|
||||
continue
|
||||
self.overlay.append((i * w_seg_len, j * h_seg_len,
|
||||
4 * w_seg_len, 2 * h_seg_len))
|
||||
|
||||
|
||||
class FaceKeypointsDataAugumentation:
|
||||
|
||||
def __init__(self, input_size):
|
||||
# option
|
||||
self.enable_flip = True
|
||||
self.enable_rotate = True
|
||||
self.input_size = input_size
|
||||
|
||||
# mask generator
|
||||
coarse_salt_and_pepper_iaa = iaa.CoarseSaltAndPepper(
|
||||
(0.25, 0.35), size_percent=(0.03125, 0.015625))
|
||||
self.mask_generator = coarse_salt_and_pepper_iaa.mask
|
||||
|
||||
# overlay generator
|
||||
self.overlay_generator = OverLayGenerator(shape=(256, 256))
|
||||
|
||||
# flip
|
||||
self.mirror_map = FaceKeypointsDataAugumentation.compute_mirror_map()
|
||||
|
||||
@staticmethod
|
||||
def compute_mirror_map():
|
||||
|
||||
mirror_map = np.array(range(0, BASE_LANDMARK_NUM), np.int32)
|
||||
for x, y in MATCHED_PARTS:
|
||||
mirror_map[x] = y
|
||||
mirror_map[y] = x
|
||||
|
||||
return mirror_map
|
||||
|
||||
def aug_flip(self, img, pts, visibility, pose):
|
||||
# pts[:, 0] = self.input_size - pts[:, 0]
|
||||
pts[:, 0] = img.shape[1] - pts[:, 0]
|
||||
pts = pts[self.mirror_map]
|
||||
if visibility is not None:
|
||||
visibility = visibility[self.mirror_map]
|
||||
img = cv2.flip(img, 1)
|
||||
if pose is not None:
|
||||
# fix roll&yaw in pose
|
||||
pose['roll'] = -pose['roll']
|
||||
pose['yaw'] = -pose['yaw']
|
||||
|
||||
return img, pts, visibility, pose
|
||||
|
||||
def aug_rotate(self, img, pts, pose, angle):
|
||||
center = [DEST_SIZE // 2, DEST_SIZE // 2]
|
||||
if pose is not None:
|
||||
# fix roll in pose
|
||||
pose['roll'] += angle
|
||||
|
||||
cx, cy = center
|
||||
M, pts = rotate(angle, (cx, cy), pts)
|
||||
|
||||
imgT = cv2.warpAffine(img, M, (int(img.shape[1]), int(img.shape[0])))
|
||||
|
||||
x1 = np.min(pts[:, 0])
|
||||
x2 = np.max(pts[:, 0])
|
||||
y1 = np.min(pts[:, 1])
|
||||
y2 = np.max(pts[:, 1])
|
||||
w = x2 - x1 + 1
|
||||
h = y2 - y1 + 1
|
||||
x1 = int(x1 - (ENLARGE_RATIO - 1.0) / 2.0 * w)
|
||||
y1 = int(y1 - (ENLARGE_RATIO - 1.0) * h)
|
||||
|
||||
new_w = int(ENLARGE_RATIO * (1 + normal() * 0.25) * w)
|
||||
new_h = int(ENLARGE_RATIO * (1 + normal() * 0.25) * h)
|
||||
new_x1 = x1 + int(normal() * DEST_SIZE * 0.15)
|
||||
new_y1 = y1 + int(normal() * DEST_SIZE * 0.15)
|
||||
new_x2 = new_x1 + new_w
|
||||
new_y2 = new_y1 + new_h
|
||||
|
||||
new_xy = new_x1, new_y1
|
||||
pts = pts - new_xy
|
||||
|
||||
height, width, _ = imgT.shape
|
||||
dx = max(0, -new_x1)
|
||||
dy = max(0, -new_y1)
|
||||
new_x1 = max(0, new_x1)
|
||||
new_y1 = max(0, new_y1)
|
||||
|
||||
edx = max(0, new_x2 - width)
|
||||
edy = max(0, new_y2 - height)
|
||||
new_x2 = min(width, new_x2)
|
||||
new_y2 = min(height, new_y2)
|
||||
|
||||
imgT = imgT[new_y1:new_y2, new_x1:new_x2]
|
||||
if dx > 0 or dy > 0 or edx > 0 or edy > 0:
|
||||
imgT = cv2.copyMakeBorder(
|
||||
imgT,
|
||||
dy,
|
||||
edy,
|
||||
dx,
|
||||
edx,
|
||||
cv2.BORDER_CONSTANT,
|
||||
value=(103.94, 116.78, 123.68))
|
||||
|
||||
return imgT, pts, pose
|
||||
|
||||
def random_mask(self, img):
|
||||
mask = self.mask_generator.draw_samples(size=img.shape)
|
||||
mask = np.expand_dims(np.sum(mask, axis=-1) > 0, axis=-1)
|
||||
return mask
|
||||
|
||||
def random_overlay(self):
|
||||
index = np.random.choice(len(self.overlay_generator.overlay))
|
||||
overlay = self.overlay_generator.overlay[index]
|
||||
return overlay
|
||||
|
||||
def augment_blur(self, img):
|
||||
h, w = img.shape[:2]
|
||||
assert h == w
|
||||
ssize = int(random.uniform(0.01, 0.5) * h)
|
||||
aug_seq = iaa.Sequential([
|
||||
iaa.Sometimes(
|
||||
1.0,
|
||||
iaa.OneOf([
|
||||
iaa.GaussianBlur((3, 15)),
|
||||
iaa.AverageBlur(k=(3, 15)),
|
||||
iaa.MedianBlur(k=(3, 15)),
|
||||
iaa.MotionBlur((5, 25))
|
||||
])),
|
||||
iaa.Resize(ssize, interpolation=imgaug.ALL),
|
||||
iaa.Sometimes(
|
||||
0.6,
|
||||
iaa.OneOf([
|
||||
iaa.AdditiveGaussianNoise(
|
||||
loc=0, scale=(0.0, 0.1 * 255), per_channel=0.5),
|
||||
iaa.AdditiveLaplaceNoise(
|
||||
loc=0, scale=(0.0, 0.1 * 255), per_channel=0.5),
|
||||
iaa.AdditivePoissonNoise(lam=(0, 30), per_channel=0.5)
|
||||
])),
|
||||
iaa.Sometimes(0.8, iaa.JpegCompression(compression=(40, 90))),
|
||||
iaa.Resize(h),
|
||||
])
|
||||
|
||||
aug_img = aug_seq.augment_image(img)
|
||||
return aug_img
|
||||
|
||||
def augment_color_temperature(self, img):
|
||||
aug = iaa.ChangeColorTemperature((1000, 40000))
|
||||
|
||||
aug_img = aug.augment_image(img)
|
||||
return aug_img
|
||||
|
||||
def aug_clr_noise_blur(self, img):
|
||||
# skin&light
|
||||
if np.random.choice((True, False), p=[0.05, 0.95]):
|
||||
img_ycrcb_raw = cv2.cvtColor(img, cv2.COLOR_BGR2YCR_CB)
|
||||
skin_factor_list = [0.6, 0.8, 1.0, 1.2, 1.4]
|
||||
skin_factor = np.random.choice(skin_factor_list)
|
||||
img_ycrcb_raw[:, :, 0:1] = np.clip(
|
||||
img_ycrcb_raw[:, :, 0:1].astype(np.float) * skin_factor, 0,
|
||||
255).astype(np.uint8)
|
||||
img = cv2.cvtColor(img_ycrcb_raw, cv2.COLOR_YCR_CB2BGR)
|
||||
|
||||
# gauss blur 5%
|
||||
if np.random.choice((True, False), p=[0.05, 0.95]):
|
||||
sigma = np.random.choice([0.25, 0.50, 0.75])
|
||||
gauss_blur_iaa = iaa.GaussianBlur(sigma=sigma)
|
||||
img = gauss_blur_iaa(image=img)
|
||||
|
||||
# gauss noise 5%
|
||||
if np.random.choice((True, False), p=[0.05, 0.95]):
|
||||
scale = np.random.choice([0.01, 0.03, 0.05])
|
||||
gauss_noise_iaa = iaa.AdditiveGaussianNoise(scale=scale * 255)
|
||||
img = gauss_noise_iaa(image=img)
|
||||
|
||||
# motion blur 5%
|
||||
if np.random.choice((True, False), p=[0.05, 0.95]):
|
||||
angle = np.random.choice([0, 45, 90, 135, 180, 225, 270, 315])
|
||||
motion_blur_iaa = iaa.MotionBlur(k=5, angle=angle)
|
||||
img = motion_blur_iaa(image=img)
|
||||
|
||||
# jpeg compress 5%
|
||||
if np.random.choice((True, False), p=[0.05, 0.95]):
|
||||
jpeg_compress_iaa = iaa.JpegCompression(compression=(10, 50))
|
||||
img = jpeg_compress_iaa(image=img)
|
||||
|
||||
# gamma contrast 5%
|
||||
if np.random.choice((True, False), p=[0.05, 0.95]):
|
||||
gamma_contrast_iaa = iaa.GammaContrast((0.85, 1.15))
|
||||
img = gamma_contrast_iaa(image=img)
|
||||
|
||||
# brightness 5%
|
||||
if np.random.choice((True, False), p=[0.05, 0.95]):
|
||||
brightness_iaa = iaa.MultiplyAndAddToBrightness(
|
||||
mul=(0.85, 1.15), add=(-10, 10))
|
||||
img = brightness_iaa(image=img)
|
||||
|
||||
return img
|
||||
|
||||
def augment_set(self, img):
|
||||
noisy_image = img.copy().astype(np.uint8)
|
||||
if np.random.choice((True, False), p=[0.6, 0.4]):
|
||||
aug = iaa.ChangeColorTemperature((1000, 40000))
|
||||
noisy_image = aug.augment_image(noisy_image)
|
||||
|
||||
if np.random.choice((True, False), p=[0.8, 0.2]):
|
||||
aug_seq = iaa.Sequential([
|
||||
iaa.Sometimes(0.5, iaa.JpegCompression(compression=(40, 90))),
|
||||
iaa.Sometimes(0.5, iaa.MotionBlur((3, 7))),
|
||||
iaa.Sometimes(
|
||||
0.5,
|
||||
iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05 * 255))),
|
||||
],
|
||||
random_order=True)
|
||||
noisy_image = aug_seq.augment_image(noisy_image)
|
||||
|
||||
sometimes = lambda aug: iaa.Sometimes(0.25, aug)
|
||||
seq = iaa.Sequential([
|
||||
sometimes(iaa.AverageBlur(k=(2, 5))),
|
||||
sometimes(iaa.GammaContrast((0.5, 2.0)))
|
||||
],
|
||||
random_order=True)
|
||||
|
||||
noisy_image = seq(images=noisy_image)
|
||||
return noisy_image
|
||||
|
||||
|
||||
@PIPELINES.register_module()
|
||||
class FaceKeypointNorm:
|
||||
"""Data augmentation with Norm.
|
||||
"""
|
||||
|
||||
def __init__(self, input_size=96):
|
||||
self.input_size = input_size
|
||||
|
||||
def __call__(self, results):
|
||||
"""Perform data augmentation with random image flip."""
|
||||
|
||||
# for key in results.get('img', []):
|
||||
if 'img' in results.keys():
|
||||
image = results['img']
|
||||
image = cv2.resize(image, (self.input_size, self.input_size))
|
||||
results['img'] = np.array(image)
|
||||
|
||||
# for key in results.get('target_point', []):
|
||||
if 'target_point' in results.keys():
|
||||
points = results['target_point']
|
||||
points[:, 0] = points[:, 0] / image.shape[1] * float(
|
||||
self.input_size)
|
||||
points[:, 1] = points[:, 1] / image.shape[0] * float(
|
||||
self.input_size)
|
||||
target_point = np.reshape(points,
|
||||
(points.shape[0] * points.shape[1]))
|
||||
results['target_point'] = np.array(target_point, np.float32)
|
||||
else:
|
||||
results['target_point'] = np.array(np.zeros(212), np.float32)
|
||||
|
||||
# for key in results.get('target_point_mask', []):
|
||||
if 'target_point_mask' in results.keys():
|
||||
points_mask = results['target_point_mask']
|
||||
points_mask = points_mask.astype(np.float32)
|
||||
points_mask = np.reshape(
|
||||
points_mask, (points_mask.shape[0] * points_mask.shape[1]))
|
||||
results['target_point_mask'] = points_mask.astype(np.float32)
|
||||
else:
|
||||
results['target_point_mask'] = np.array(
|
||||
np.zeros(212), np.float32)
|
||||
|
||||
# for key in results.get('target_pose', []):
|
||||
if 'target_pose' in results.keys():
|
||||
pose = results['target_pose']
|
||||
pose = np.asarray([pose['pitch'], pose['roll'], pose['yaw']])
|
||||
results['target_pose'] = pose.astype(np.float32)
|
||||
else:
|
||||
results['target_pose'] = np.array(np.zeros(3), np.float32)
|
||||
|
||||
if 'target_pose_mask' not in results.keys():
|
||||
results['target_pose_mask'] = np.array(np.zeros(3), np.float32)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
@PIPELINES.register_module()
|
||||
class FaceKeypointRandomAugmentation:
|
||||
"""Data augmentation with random flip.
|
||||
"""
|
||||
|
||||
def __init__(self, input_size=96):
|
||||
self.input_size = input_size
|
||||
|
||||
# Data Augment
|
||||
self.data_aug = FaceKeypointsDataAugumentation(self.input_size)
|
||||
|
||||
def __call__(self, results):
|
||||
"""Perform data augmentation with random image flip."""
|
||||
|
||||
image = results['img']
|
||||
points = results['target_point']
|
||||
points_mask = results['target_point_mask']
|
||||
pose = results['target_pose']
|
||||
pose_mask = results['target_pose_mask']
|
||||
overlay_image_path = results['overlay_image_path']
|
||||
|
||||
if np.random.choice((True, False), p=[0.2, 0.8]):
|
||||
# overlay
|
||||
overlay_pos = self.data_aug.random_overlay()
|
||||
overlay_img_index = np.random.choice(len(overlay_image_path))
|
||||
overlay_img_filepath = overlay_image_path[overlay_img_index]
|
||||
overlay_img = cv2.imread(overlay_img_filepath,
|
||||
cv2.IMREAD_UNCHANGED)
|
||||
|
||||
(x, y, w, h) = overlay_pos
|
||||
x1, y1, x2, y2 = x, y, x + w, y + h
|
||||
overlay_img = cv2.resize(overlay_img, dsize=(w, h))
|
||||
overlay_mask = overlay_img[:, :, 3:4] / 255.0
|
||||
image[y1:y2, x1:x2, :] = image[y1:y2, x1:x2, :] * (
|
||||
1 - overlay_mask) + overlay_img[:, :, 0:3] * overlay_mask
|
||||
image = image.astype(np.uint8)
|
||||
|
||||
angle = pose['roll']
|
||||
image, points, pose = self.data_aug.aug_rotate(
|
||||
image, points, pose, angle) # counterclockwise rotate angle
|
||||
pose['roll'] = angle # reset roll=angle
|
||||
|
||||
if np.random.choice((True, False)):
|
||||
image_transform, points, _, pose = self.data_aug.aug_flip(
|
||||
image, points, None, pose)
|
||||
else:
|
||||
image_transform = image
|
||||
|
||||
image_transform = self.data_aug.aug_clr_noise_blur(image_transform)
|
||||
|
||||
results['img'] = image_transform
|
||||
results['target_point'] = points
|
||||
results['target_pose'] = pose
|
||||
return results
|
@ -3,6 +3,7 @@ from .backbones import * # noqa: F401,F403
|
||||
from .builder import build_backbone, build_head, build_loss, build_model
|
||||
from .classification import *
|
||||
from .detection import *
|
||||
from .face import *
|
||||
from .heads import *
|
||||
from .loss import *
|
||||
from .pose import TopDown
|
||||
|
@ -4,6 +4,7 @@ from .bninception import BNInception
|
||||
from .conv_mae_vit import FastConvMAEViT
|
||||
from .conv_vitdet import ConvViTDet
|
||||
from .efficientformer import EfficientFormer
|
||||
from .face_keypoint_backbone import FaceKeypointBackbone
|
||||
from .genet import PlainNet
|
||||
from .hrnet import HRNet
|
||||
from .inceptionv3 import Inception3
|
||||
|
90
easycv/models/backbones/face_keypoint_backbone.py
Normal file
90
easycv/models/backbones/face_keypoint_backbone.py
Normal file
@ -0,0 +1,90 @@
|
||||
import torch.nn as nn
|
||||
|
||||
from easycv.models.registry import BACKBONES
|
||||
from easycv.models.utils.face_keypoint_utils import InvertedResidual, Residual
|
||||
|
||||
|
||||
@BACKBONES.register_module
|
||||
class FaceKeypointBackbone(nn.Module):
|
||||
|
||||
def __init__(self,
|
||||
in_channels=3,
|
||||
out_channels=48,
|
||||
residual_activation='relu',
|
||||
inverted_activation='half_v2',
|
||||
inverted_expand_ratio=2):
|
||||
super(FaceKeypointBackbone, self).__init__()
|
||||
self.conv1 = Residual(in_channels, 12, 3, 2, 0)
|
||||
self.conv2 = Residual(12, 12, 3, 1, 0, activation=residual_activation)
|
||||
self.conv3 = Residual(12, 12, 3, 1, 1, activation=residual_activation)
|
||||
self.conv4 = Residual(12, 12, 3, 1, 0, activation=residual_activation)
|
||||
self.conv5 = Residual(12, 24, 3, 2, 0, activation=residual_activation)
|
||||
self.conv6 = Residual(24, 24, 3, 1, 0, activation=residual_activation)
|
||||
self.conv7 = Residual(24, 24, 3, 1, 1, activation=residual_activation)
|
||||
self.conv8 = Residual(24, 24, 3, 1, 1, activation=residual_activation)
|
||||
self.conv9 = InvertedResidual(
|
||||
24,
|
||||
48,
|
||||
3,
|
||||
2,
|
||||
0,
|
||||
expand_ratio=inverted_expand_ratio,
|
||||
activation=inverted_activation)
|
||||
self.conv10 = InvertedResidual(
|
||||
48,
|
||||
48,
|
||||
3,
|
||||
1,
|
||||
0,
|
||||
expand_ratio=inverted_expand_ratio,
|
||||
activation=inverted_activation)
|
||||
self.conv11 = InvertedResidual(
|
||||
48,
|
||||
48,
|
||||
3,
|
||||
1,
|
||||
1,
|
||||
expand_ratio=inverted_expand_ratio,
|
||||
activation=inverted_activation)
|
||||
self.conv12 = InvertedResidual(
|
||||
48,
|
||||
48,
|
||||
3,
|
||||
1,
|
||||
1,
|
||||
expand_ratio=inverted_expand_ratio,
|
||||
activation=inverted_activation)
|
||||
self.conv13 = InvertedResidual(
|
||||
48,
|
||||
48,
|
||||
3,
|
||||
1,
|
||||
1,
|
||||
expand_ratio=inverted_expand_ratio,
|
||||
activation=inverted_activation)
|
||||
self.conv14 = InvertedResidual(
|
||||
48,
|
||||
out_channels,
|
||||
3,
|
||||
2,
|
||||
0,
|
||||
expand_ratio=inverted_expand_ratio,
|
||||
activation=inverted_activation)
|
||||
|
||||
def forward(self, x):
|
||||
x1 = self.conv1(x)
|
||||
x2 = self.conv2(x1)
|
||||
x3 = self.conv3(x2)
|
||||
x4 = self.conv4(x3)
|
||||
x5 = self.conv5(x4)
|
||||
x6 = self.conv6(x5)
|
||||
x7 = self.conv7(x6)
|
||||
x8 = self.conv8(x7)
|
||||
x9 = self.conv9(x8)
|
||||
x10 = self.conv10(x9)
|
||||
x11 = self.conv11(x10)
|
||||
x12 = self.conv12(x11)
|
||||
x13 = self.conv13(x12)
|
||||
x14 = self.conv14(x13)
|
||||
|
||||
return x14
|
2
easycv/models/face/__init__.py
Normal file
2
easycv/models/face/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
from .face_keypoint import FaceKeypoint
|
||||
from .head import *
|
103
easycv/models/face/face_keypoint.py
Normal file
103
easycv/models/face/face_keypoint.py
Normal file
@ -0,0 +1,103 @@
|
||||
import mmcv
|
||||
import numpy as np
|
||||
|
||||
from easycv.models import builder
|
||||
from easycv.models.base import BaseModel
|
||||
from easycv.models.builder import MODELS
|
||||
from easycv.models.utils.face_keypoint_utils import (get_keypoint_accuracy,
|
||||
get_pose_accuracy)
|
||||
|
||||
|
||||
@MODELS.register_module()
|
||||
class FaceKeypoint(BaseModel):
|
||||
|
||||
def __init__(self,
|
||||
backbone,
|
||||
neck=None,
|
||||
keypoint_head=None,
|
||||
pose_head=None,
|
||||
pretrained=None,
|
||||
loss_keypoint=None,
|
||||
loss_pose=None):
|
||||
super().__init__()
|
||||
self.pretrained = pretrained
|
||||
|
||||
self.backbone = builder.build_backbone(backbone)
|
||||
|
||||
if neck is not None:
|
||||
self.neck = builder.build_neck(neck)
|
||||
|
||||
if keypoint_head is not None:
|
||||
if 'loss_keypoint' not in keypoint_head and loss_keypoint is not None:
|
||||
keypoint_head['loss_keypoint'] = loss_keypoint
|
||||
self.keypoint_head = builder.build_head(keypoint_head)
|
||||
|
||||
if pose_head is not None:
|
||||
if 'loss_pose' not in pose_head and loss_pose is not None:
|
||||
pose_head['loss_pose'] = loss_pose
|
||||
self.pose_head = builder.build_head(pose_head)
|
||||
|
||||
@property
|
||||
def with_neck(self):
|
||||
"""Check if has keypoint_head."""
|
||||
return hasattr(self, 'neck')
|
||||
|
||||
@property
|
||||
def with_keypoint(self):
|
||||
"""Check if has keypoint_head."""
|
||||
return hasattr(self, 'keypoint_head')
|
||||
|
||||
@property
|
||||
def with_pose(self):
|
||||
"""Check if has pose_head."""
|
||||
return hasattr(self, 'pose_head')
|
||||
|
||||
def forward_train(self, img, target_point, target_point_mask, target_pose,
|
||||
target_pose_mask, **kwargs):
|
||||
"""Defines the computation performed at every call when training."""
|
||||
output = self.backbone(img)
|
||||
|
||||
if self.with_neck:
|
||||
output = self.neck(output)
|
||||
if self.with_keypoint:
|
||||
output_points = self.keypoint_head(output)
|
||||
if self.with_pose:
|
||||
output_pose = self.pose_head(output)
|
||||
|
||||
target_point = target_point * target_point_mask
|
||||
target_pose = target_pose * target_pose_mask
|
||||
|
||||
losses = dict()
|
||||
if self.with_keypoint:
|
||||
keypoint_losses = self.keypoint_head.get_loss(
|
||||
output_points, target_point, target_point_mask, target_pose)
|
||||
losses.update(keypoint_losses)
|
||||
keypoint_accuracy = get_keypoint_accuracy(output_points,
|
||||
target_point)
|
||||
losses.update(keypoint_accuracy)
|
||||
|
||||
if self.with_pose:
|
||||
output_pose = output_pose * 180.0 / np.pi
|
||||
output_pose = output_pose * target_pose_mask
|
||||
|
||||
pose_losses = self.pose_head.get_loss(output_pose, target_pose)
|
||||
losses.update(pose_losses)
|
||||
pose_accuracy = get_pose_accuracy(output_pose, target_pose)
|
||||
losses.update(pose_accuracy)
|
||||
return losses
|
||||
|
||||
def forward_test(self, img, **kwargs):
|
||||
"""Defines the computation performed at every call when testing."""
|
||||
|
||||
output = self.backbone(img)
|
||||
if self.with_neck:
|
||||
output = self.neck(output)
|
||||
if self.with_keypoint:
|
||||
output_points = self.keypoint_head(output)
|
||||
if self.with_pose:
|
||||
output_pose = self.pose_head(output)
|
||||
|
||||
ret = {}
|
||||
ret['point'] = output_points
|
||||
ret['pose'] = output_pose
|
||||
return ret
|
2
easycv/models/face/head/__init__.py
Normal file
2
easycv/models/face/head/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
from .face_keypoint_head import FaceKeypointHead
|
||||
from .face_keypoint_pose_head import FacePoseHead
|
68
easycv/models/face/head/face_keypoint_head.py
Normal file
68
easycv/models/face/head/face_keypoint_head.py
Normal file
@ -0,0 +1,68 @@
|
||||
import copy
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from easycv.models.builder import HEADS, build_loss
|
||||
from easycv.models.utils.face_keypoint_utils import (InvertedResidual, View,
|
||||
conv_bn, conv_no_relu,
|
||||
get_keypoint_accuracy)
|
||||
|
||||
|
||||
@HEADS.register_module
|
||||
class FaceKeypointHead(nn.Module):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
mean_face,
|
||||
loss_keypoint,
|
||||
in_channels=48,
|
||||
out_channels=212,
|
||||
input_size=96,
|
||||
inverted_expand_ratio=2,
|
||||
inverted_activation='half_v2',
|
||||
):
|
||||
super(FaceKeypointHead, self).__init__()
|
||||
self.input_size = input_size
|
||||
self.face_mean_shape = copy.deepcopy(np.asarray(mean_face))
|
||||
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
||||
self.branches = []
|
||||
|
||||
self.loss = build_loss(loss_keypoint)
|
||||
|
||||
# points
|
||||
self.branches.append(
|
||||
nn.Sequential(
|
||||
InvertedResidual(
|
||||
in_channels,
|
||||
96,
|
||||
3,
|
||||
1,
|
||||
1,
|
||||
expand_ratio=inverted_expand_ratio,
|
||||
activation=inverted_activation),
|
||||
View((-1, 96 * 3 * 3, 1, 1)), conv_bn(96 * 3 * 3, 128, 1, 1,
|
||||
0),
|
||||
conv_bn(128, 128, 1, 1, 0),
|
||||
conv_no_relu(128, out_channels, 1, 1, 0),
|
||||
View((-1, out_channels))))
|
||||
self.branches = nn.ModuleList(self.branches)
|
||||
|
||||
def get_loss(self, output, target_point, target_point_mask, target_pose):
|
||||
losses = dict()
|
||||
loss = self.loss(output * target_point_mask, target_point, target_pose)
|
||||
losses['point_loss'] = loss
|
||||
|
||||
return losses
|
||||
|
||||
def get_accuracy(self, output, target_point):
|
||||
return get_keypoint_accuracy(output, target_point)
|
||||
|
||||
def forward(self, x):
|
||||
point = self.branches[0](x)
|
||||
point = point * 0.5 + torch.from_numpy(self.face_mean_shape).to(
|
||||
self.device)
|
||||
point = point * self.input_size
|
||||
|
||||
return point
|
55
easycv/models/face/head/face_keypoint_pose_head.py
Normal file
55
easycv/models/face/head/face_keypoint_pose_head.py
Normal file
@ -0,0 +1,55 @@
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from easycv.models.builder import HEADS, build_loss
|
||||
from easycv.models.utils.face_keypoint_utils import (InvertedResidual, View,
|
||||
conv_bn, conv_no_relu,
|
||||
get_pose_accuracy)
|
||||
|
||||
|
||||
@HEADS.register_module
|
||||
class FacePoseHead(nn.Module):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
loss_pose,
|
||||
in_channels=48,
|
||||
out_channels=3,
|
||||
inverted_expand_ratio=2,
|
||||
inverted_activation='half_v2',
|
||||
):
|
||||
super(FacePoseHead, self).__init__()
|
||||
self.branches = []
|
||||
|
||||
self.loss = build_loss(loss_pose)
|
||||
|
||||
# pose
|
||||
self.branches.append(
|
||||
nn.Sequential(
|
||||
InvertedResidual(
|
||||
in_channels,
|
||||
48,
|
||||
3,
|
||||
1,
|
||||
1,
|
||||
expand_ratio=inverted_expand_ratio,
|
||||
activation=inverted_activation),
|
||||
View((-1, 48 * 3 * 3, 1, 1)), conv_bn(48 * 3 * 3, 48, 1, 1, 0),
|
||||
conv_bn(48, 48, 1, 1, 0),
|
||||
conv_no_relu(48, out_channels, 1, 1, 0),
|
||||
View((-1, out_channels))))
|
||||
self.branches = nn.ModuleList(self.branches)
|
||||
|
||||
def get_loss(self, output, target_pose):
|
||||
losses = dict()
|
||||
loss = self.loss(output, target_pose)
|
||||
losses['pose_loss'] = loss
|
||||
|
||||
return losses
|
||||
|
||||
def get_accuracy(self, output, target_pose):
|
||||
return get_pose_accuracy(output, target_pose)
|
||||
|
||||
def forward(self, x):
|
||||
return self.branches[0](x)
|
@ -1,5 +1,6 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
from .cross_entropy_loss import CrossEntropyLoss
|
||||
from .face_keypoint_loss import FacePoseLoss, WingLossWithPose
|
||||
from .focal_loss import FocalLoss
|
||||
from .iou_loss import GIoULoss, IoULoss
|
||||
from .mse_loss import JointsMSELoss
|
||||
|
91
easycv/models/loss/face_keypoint_loss.py
Normal file
91
easycv/models/loss/face_keypoint_loss.py
Normal file
@ -0,0 +1,91 @@
|
||||
import copy
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from easycv.models.builder import LOSSES
|
||||
|
||||
CONSTANT_CONTOUR = 66
|
||||
CONSTANT_EYEBROW = 18
|
||||
CONSTANT_EYE = 18
|
||||
CONSTANT_NOSE = 30
|
||||
CONSTANT_LIPS = 40
|
||||
CONSTANT_EYE_CENTER = 4
|
||||
|
||||
|
||||
@LOSSES.register_module()
|
||||
class WingLossWithPose(nn.Module):
|
||||
|
||||
def __init__(self,
|
||||
num_points=106,
|
||||
left_eye_left_corner_index=66,
|
||||
right_eye_right_corner_index=79,
|
||||
points_weight=1.0,
|
||||
contour_weight=1.5,
|
||||
eyebrow_weight=1.5,
|
||||
eye_weight=1.7,
|
||||
nose_weight=1.3,
|
||||
lip_weight=1.7,
|
||||
omega=10,
|
||||
epsilon=2):
|
||||
super(WingLossWithPose, self).__init__()
|
||||
self.omega = omega
|
||||
self.epsilon = epsilon
|
||||
|
||||
self.num_points = num_points
|
||||
self.left_eye_left_corner_index = left_eye_left_corner_index
|
||||
self.right_eye_right_corner_index = right_eye_right_corner_index
|
||||
self.points_weight = points_weight
|
||||
contour_weight = np.full(CONSTANT_CONTOUR, contour_weight)
|
||||
eyebrow_left_weight = np.full(CONSTANT_EYEBROW, eyebrow_weight)
|
||||
eyebrow_right_weight = np.full(CONSTANT_EYEBROW, eyebrow_weight)
|
||||
nose_weight = np.full(CONSTANT_NOSE, nose_weight)
|
||||
eye_left_weight = np.full(CONSTANT_EYE, eye_weight)
|
||||
eye_right_weight = np.full(CONSTANT_EYE, eye_weight)
|
||||
lips_weight = np.full(CONSTANT_LIPS, lip_weight)
|
||||
eye_center_weight = np.full(CONSTANT_EYE_CENTER, eye_weight)
|
||||
part_weight = np.concatenate(
|
||||
(contour_weight, eyebrow_left_weight, eyebrow_right_weight,
|
||||
nose_weight, eye_left_weight, eye_right_weight, lips_weight,
|
||||
eye_center_weight),
|
||||
axis=0)
|
||||
|
||||
self.part_weight = None
|
||||
if part_weight is not None:
|
||||
self.part_weight = torch.from_numpy(part_weight).cuda()
|
||||
|
||||
def forward(self, pred, target, pose):
|
||||
weight = 5.0 * (1.0 - torch.cos(pose * np.pi / 180.0)) + 1.0
|
||||
weight = torch.sum(weight, dim=1) / 3.0
|
||||
weight = weight.view((weight.shape[0], 1))
|
||||
|
||||
if self.part_weight is not None:
|
||||
weight = weight * self.part_weight
|
||||
|
||||
y = target
|
||||
y_hat = pred
|
||||
delta_y = (y - y_hat).abs() * weight
|
||||
delta_y1 = delta_y[delta_y < self.omega]
|
||||
delta_y2 = delta_y[delta_y >= self.omega]
|
||||
loss1 = self.omega * torch.log(1 + delta_y1 / self.epsilon)
|
||||
C = self.omega - self.omega * math.log(1 + self.omega / self.epsilon)
|
||||
loss = delta_y2 - C
|
||||
result = self.points_weight * (loss1.sum() + loss.sum()) / (
|
||||
len(loss1) + len(loss))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@LOSSES.register_module()
|
||||
class FacePoseLoss(nn.Module):
|
||||
|
||||
def __init__(self, pose_weight=1.0):
|
||||
super(FacePoseLoss, self).__init__()
|
||||
self.criterion = nn.MSELoss()
|
||||
self.pose_weight = pose_weight
|
||||
|
||||
def forward(self, pred, target):
|
||||
result = self.pose_weight * self.criterion(pred, target)
|
||||
return result
|
@ -5,6 +5,10 @@ from .conv_ws import ConvWS2d, conv_ws_2d
|
||||
from .dist_utils import (DistributedLossWrapper, DistributedMinerWrapper,
|
||||
get_world_size, is_dist_avail_and_initialized,
|
||||
reduce_mean)
|
||||
from .face_keypoint_utils import (ION, InvertedResidual, Residual, Softmax,
|
||||
View, conv_bn, conv_no_relu,
|
||||
get_keypoint_accuracy, get_pose_accuracy,
|
||||
pose_accuracy)
|
||||
from .gather_layer import GatherLayer
|
||||
from .init_weights import _init_weights, trunc_normal_
|
||||
from .multi_pooling import GeMPooling, MultiAvgPooling, MultiPooling
|
||||
|
240
easycv/models/utils/face_keypoint_utils.py
Normal file
240
easycv/models/utils/face_keypoint_utils.py
Normal file
@ -0,0 +1,240 @@
|
||||
import copy
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
def conv_bn(inp, oup, kernel, stride, padding=1):
|
||||
return nn.Sequential(
|
||||
nn.Conv2d(inp, oup, kernel, stride, padding, bias=False),
|
||||
nn.BatchNorm2d(oup), nn.PReLU(oup))
|
||||
|
||||
|
||||
def conv_no_relu(inp, oup, kernel, stride, padding=1):
|
||||
return nn.Sequential(
|
||||
nn.Conv2d(inp, oup, kernel, stride, padding, bias=False),
|
||||
nn.BatchNorm2d(oup))
|
||||
|
||||
|
||||
class View(nn.Module):
|
||||
|
||||
def __init__(self, shape):
|
||||
super(View, self).__init__()
|
||||
self.shape = shape
|
||||
|
||||
def forward(self, x):
|
||||
return x.view(*self.shape)
|
||||
|
||||
|
||||
class Softmax(nn.Module):
|
||||
|
||||
def __init__(self, dim):
|
||||
super(Softmax, self).__init__()
|
||||
self.softmax = nn.Softmax(dim)
|
||||
|
||||
def forward(self, x):
|
||||
return self.softmax(x)
|
||||
|
||||
|
||||
class InvertedResidual(nn.Module):
|
||||
|
||||
def __init__(self,
|
||||
inp,
|
||||
oup,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
expand_ratio=2,
|
||||
use_connect=False,
|
||||
activation='relu'):
|
||||
super(InvertedResidual, self).__init__()
|
||||
|
||||
hid_channels = int(inp * expand_ratio)
|
||||
if activation == 'relu':
|
||||
self.conv = nn.Sequential(
|
||||
nn.Conv2d(inp, hid_channels, 1, 1, 0, bias=False),
|
||||
nn.BatchNorm2d(hid_channels), nn.ReLU(inplace=True),
|
||||
nn.Conv2d(
|
||||
hid_channels,
|
||||
hid_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
groups=hid_channels,
|
||||
bias=False), nn.BatchNorm2d(hid_channels),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Conv2d(hid_channels, oup, 1, 1, 0, bias=False),
|
||||
nn.BatchNorm2d(oup))
|
||||
elif activation == 'prelu':
|
||||
self.conv = nn.Sequential(
|
||||
nn.Conv2d(inp, hid_channels, 1, 1, 0, bias=False),
|
||||
nn.BatchNorm2d(hid_channels), nn.PReLU(hid_channels),
|
||||
nn.Conv2d(
|
||||
hid_channels,
|
||||
hid_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
groups=hid_channels,
|
||||
bias=False), nn.BatchNorm2d(hid_channels),
|
||||
nn.PReLU(hid_channels),
|
||||
nn.Conv2d(hid_channels, oup, 1, 1, 0, bias=False),
|
||||
nn.BatchNorm2d(oup))
|
||||
elif activation == 'half_v1':
|
||||
self.conv = nn.Sequential(
|
||||
nn.Conv2d(inp, hid_channels, 1, 1, 0, bias=False),
|
||||
nn.BatchNorm2d(hid_channels), nn.ReLU(inplace=True),
|
||||
nn.Conv2d(
|
||||
hid_channels,
|
||||
hid_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
groups=hid_channels,
|
||||
bias=False), nn.BatchNorm2d(hid_channels),
|
||||
nn.PReLU(hid_channels),
|
||||
nn.Conv2d(hid_channels, oup, 1, 1, 0, bias=False),
|
||||
nn.BatchNorm2d(oup))
|
||||
elif activation == 'half_v2':
|
||||
self.conv = nn.Sequential(
|
||||
nn.Conv2d(inp, hid_channels, 1, 1, 0, bias=False),
|
||||
nn.BatchNorm2d(hid_channels), nn.PReLU(hid_channels),
|
||||
nn.Conv2d(
|
||||
hid_channels,
|
||||
hid_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
groups=hid_channels,
|
||||
bias=False), nn.BatchNorm2d(hid_channels),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Conv2d(hid_channels, oup, 1, 1, 0, bias=False),
|
||||
nn.BatchNorm2d(oup))
|
||||
self.use_connect = use_connect
|
||||
|
||||
def forward(self, x):
|
||||
if self.use_connect:
|
||||
return x + self.conv(x)
|
||||
else:
|
||||
return self.conv(x)
|
||||
|
||||
|
||||
class Residual(nn.Module):
|
||||
|
||||
def __init__(self,
|
||||
inp,
|
||||
oup,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
use_connect=False,
|
||||
activation='relu'):
|
||||
super(Residual, self).__init__()
|
||||
|
||||
self.use_connect = use_connect
|
||||
|
||||
if activation == 'relu':
|
||||
self.conv = nn.Sequential(
|
||||
nn.Conv2d(
|
||||
inp,
|
||||
inp,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
groups=inp,
|
||||
bias=False), nn.BatchNorm2d(inp), nn.ReLU(inplace=True),
|
||||
nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup),
|
||||
nn.ReLU(inplace=True))
|
||||
elif activation == 'prelu':
|
||||
self.conv = nn.Sequential(
|
||||
nn.Conv2d(
|
||||
inp,
|
||||
inp,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
groups=inp,
|
||||
bias=False), nn.BatchNorm2d(inp), nn.PReLU(inp),
|
||||
nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup),
|
||||
nn.PReLU(oup))
|
||||
elif activation == 'half_v1':
|
||||
self.conv = nn.Sequential(
|
||||
nn.Conv2d(
|
||||
inp,
|
||||
inp,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
groups=inp,
|
||||
bias=False), nn.BatchNorm2d(inp), nn.ReLU(inplace=True),
|
||||
nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup),
|
||||
nn.PReLU(oup))
|
||||
elif activation == 'half_v2':
|
||||
self.conv = nn.Sequential(
|
||||
nn.Conv2d(
|
||||
inp,
|
||||
inp,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
groups=inp,
|
||||
bias=False), nn.BatchNorm2d(inp), nn.PReLU(inp),
|
||||
nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup),
|
||||
nn.ReLU(inplace=True))
|
||||
|
||||
def forward(self, x):
|
||||
if self.use_connect:
|
||||
return x + self.conv(x)
|
||||
else:
|
||||
return self.conv(x)
|
||||
|
||||
|
||||
def pose_accuracy(output, target):
|
||||
with torch.no_grad():
|
||||
output = output.detach().cpu().numpy()
|
||||
target = target.detach().cpu().numpy()
|
||||
|
||||
acc = np.mean(np.abs(output - target))
|
||||
return acc
|
||||
|
||||
|
||||
def ION(output, target, left_eye_left_coner_idx, right_eye_right_corner_idx,
|
||||
num_pts):
|
||||
with torch.no_grad():
|
||||
output = output.view(-1, num_pts, 2).cpu().numpy()
|
||||
target = target.view(-1, num_pts, 2).cpu().numpy()
|
||||
|
||||
interocular = target[:,
|
||||
left_eye_left_coner_idx] - target[:,
|
||||
right_eye_right_corner_idx]
|
||||
interocular = np.sqrt(
|
||||
np.square(interocular[:, 0]) + np.square(interocular[:, 1])) + 1e-5
|
||||
dist = target - output
|
||||
dist = np.sqrt(np.square(dist[:, :, 0]) + np.square(dist[:, :, 1]))
|
||||
dist = np.sum(dist, axis=1)
|
||||
nme = dist / (interocular * num_pts)
|
||||
|
||||
return np.mean(nme)
|
||||
|
||||
|
||||
def get_keypoint_accuracy(output, target_point):
|
||||
accuracy = dict()
|
||||
num_points = 106
|
||||
left_eye_left_corner_index = 66
|
||||
right_eye_right_corner_index = 79
|
||||
|
||||
nme = ION(output, target_point, left_eye_left_corner_index,
|
||||
right_eye_right_corner_index, num_points)
|
||||
|
||||
accuracy['nme'] = nme
|
||||
|
||||
return accuracy
|
||||
|
||||
|
||||
def get_pose_accuracy(output, target_pose):
|
||||
accuracy = dict()
|
||||
pose_acc = pose_accuracy(output, target_pose)
|
||||
accuracy['pose_acc'] = float(pose_acc)
|
||||
return accuracy
|
@ -2,6 +2,7 @@
|
||||
from .classifier import TorchClassifier
|
||||
from .detector import (TorchFaceDetector, TorchYoloXClassifierPredictor,
|
||||
TorchYoloXPredictor)
|
||||
from .face_keypoints_predictor import FaceKeypointsPredictor
|
||||
from .feature_extractor import (TorchFaceAttrExtractor,
|
||||
TorchFaceFeatureExtractor,
|
||||
TorchFeatureExtractor)
|
||||
|
@ -113,6 +113,7 @@ class PredictorV2(object):
|
||||
device=None,
|
||||
save_results=False,
|
||||
save_path=None,
|
||||
mode='rgb',
|
||||
*args,
|
||||
**kwargs):
|
||||
self.model_path = model_path
|
||||
@ -135,6 +136,7 @@ class PredictorV2(object):
|
||||
self.model = self.prepare_model()
|
||||
self.processor = self.build_processor()
|
||||
self._load_op = None
|
||||
self.mode = mode
|
||||
|
||||
def prepare_model(self):
|
||||
"""Build model from config file by default.
|
||||
@ -182,7 +184,7 @@ class PredictorV2(object):
|
||||
}
|
||||
"""
|
||||
if self._load_op is None:
|
||||
load_cfg = dict(type='LoadImage', mode='rgb')
|
||||
load_cfg = dict(type='LoadImage', mode=self.mode)
|
||||
self._load_op = build_from_cfg(load_cfg, PIPELINES)
|
||||
|
||||
if not isinstance(input, str):
|
||||
|
120
easycv/predictors/face_keypoints_predictor.py
Normal file
120
easycv/predictors/face_keypoints_predictor.py
Normal file
@ -0,0 +1,120 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
|
||||
import copy
|
||||
import os
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
from torchvision.transforms import Compose
|
||||
|
||||
from easycv.datasets.registry import PIPELINES
|
||||
from easycv.models import build_model
|
||||
from easycv.predictors.builder import PREDICTORS
|
||||
from easycv.predictors.interface import PredictorInterface
|
||||
from easycv.utils.checkpoint import load_checkpoint
|
||||
from easycv.utils.config_tools import mmcv_config_fromfile
|
||||
from easycv.utils.registry import build_from_cfg
|
||||
from ..models import *
|
||||
from .base import PredictorV2
|
||||
|
||||
face_contour_point_index = [
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
|
||||
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
|
||||
]
|
||||
left_eye_brow_point_index = [33, 34, 35, 36, 37, 38, 39, 40, 41, 33]
|
||||
right_eye_brow_point_index = [42, 43, 44, 45, 46, 47, 48, 49, 50, 42]
|
||||
left_eye_point_index = [66, 67, 68, 69, 70, 71, 72, 73, 66]
|
||||
right_eye_point_index = [75, 76, 77, 78, 79, 80, 81, 82, 75]
|
||||
nose_bridge_point_index = [51, 52, 53, 54]
|
||||
nose_contour_point_index = [55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65]
|
||||
mouth_outer_point_index = [84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 84]
|
||||
mouth_inter_point_index = [96, 97, 98, 99, 100, 101, 102, 103, 96]
|
||||
|
||||
|
||||
@PREDICTORS.register_module()
|
||||
class FaceKeypointsPredictor(PredictorV2):
|
||||
"""Predict pipeline for face keypoint
|
||||
Args:
|
||||
model_path (str): Path of model path
|
||||
model_config (str): config file path for model and processor to init. Defaults to None.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
model_path,
|
||||
model_config,
|
||||
batch_size=1,
|
||||
device=None,
|
||||
save_results=False,
|
||||
save_path=None,
|
||||
mode='bgr'):
|
||||
super(FaceKeypointsPredictor, self).__init__(
|
||||
model_path,
|
||||
model_config,
|
||||
batch_size=batch_size,
|
||||
device=device,
|
||||
save_results=save_results,
|
||||
save_path=save_path,
|
||||
mode=mode)
|
||||
|
||||
self.input_size = self.cfg.IMAGE_SIZE
|
||||
self.point_number = self.cfg.POINT_NUMBER
|
||||
|
||||
def show_result(self, img, points, scale=4.0, save_path=None):
|
||||
"""Draw `result` over `img`.
|
||||
|
||||
Args:
|
||||
img (str or Tensor): The image to be displayed.
|
||||
result (Tensor): The face keypoints to draw over `img`.
|
||||
scale: zoom in or out scale
|
||||
save_path: path to save drawned 'img'
|
||||
Returns:
|
||||
img (Tensor): Only if not `show` or `out_file`
|
||||
"""
|
||||
|
||||
img = cv2.imread(img)
|
||||
img = img.copy()
|
||||
h, w, c = img.shape
|
||||
scale_h = h / self.input_size
|
||||
scale_w = w / self.input_size
|
||||
|
||||
points = points.view(-1, self.point_number, 2).cpu().numpy()[0]
|
||||
for index in range(len(points)):
|
||||
points[index][0] *= scale_w
|
||||
points[index][1] *= scale_h
|
||||
|
||||
image = cv2.resize(img, dsize=None, fx=scale, fy=scale)
|
||||
|
||||
def draw_line(point_index, image, point):
|
||||
for i in range(len(point_index) - 1):
|
||||
cur_index = point_index[i]
|
||||
next_index = point_index[i + 1]
|
||||
cur_pt = (int(point[cur_index][0] * scale),
|
||||
int(point[cur_index][1] * scale))
|
||||
next_pt = (int(point[next_index][0] * scale),
|
||||
int(point[next_index][1] * scale))
|
||||
cv2.line(image, cur_pt, next_pt, (0, 0, 255), thickness=2)
|
||||
|
||||
draw_line(face_contour_point_index, image, points)
|
||||
draw_line(left_eye_brow_point_index, image, points)
|
||||
draw_line(right_eye_brow_point_index, image, points)
|
||||
draw_line(left_eye_point_index, image, points)
|
||||
draw_line(right_eye_point_index, image, points)
|
||||
draw_line(nose_bridge_point_index, image, points)
|
||||
draw_line(nose_contour_point_index, image, points)
|
||||
draw_line(mouth_outer_point_index, image, points)
|
||||
draw_line(mouth_inter_point_index, image, points)
|
||||
|
||||
size = len(points)
|
||||
for i in range(size):
|
||||
x = int(points[i][0])
|
||||
y = int(points[i][1])
|
||||
cv2.putText(image, str(i), (int(x * scale), int(y * scale)),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
|
||||
cv2.circle(image, (int(x * scale), int(y * scale)), 2, (0, 255, 0),
|
||||
cv2.FILLED)
|
||||
|
||||
if save_path is not None:
|
||||
cv2.imwrite(save_path, image)
|
||||
|
||||
return image
|
@ -3,6 +3,7 @@ dataclasses
|
||||
einops
|
||||
future
|
||||
h5py
|
||||
imgaug
|
||||
json_tricks
|
||||
numpy
|
||||
opencv-python
|
||||
|
39
tests/predictors/test_face_keypoints_predictor.py
Normal file
39
tests/predictors/test_face_keypoints_predictor.py
Normal file
@ -0,0 +1,39 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
|
||||
import copy
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from easycv.predictors.face_keypoints_predictor import FaceKeypointsPredictor
|
||||
|
||||
|
||||
class FaceKeypointsPredictorWithoutDetectorTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
|
||||
self.image_path = './data/test/face_2d_keypoints/data/002253.png'
|
||||
self.save_image_path = './data/test/face_2d_keypoints/data/result_002253.png'
|
||||
self.model_path = './data/test/face_2d_keypoints/models/epoch_580.pth'
|
||||
self.model_config_path = './configs/face/face_96x96_wingloss.py'
|
||||
|
||||
def test_single(self):
|
||||
predict_pipeline = FaceKeypointsPredictor(
|
||||
model_path=self.model_path, model_config=self.model_config_path)
|
||||
|
||||
output = predict_pipeline(self.image_path)[0]
|
||||
output_keypoints = output['point']
|
||||
output_pose = output['pose']
|
||||
image_show = predict_pipeline.show_result(
|
||||
self.image_path,
|
||||
output_keypoints,
|
||||
scale=2,
|
||||
save_path=self.save_image_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
x
Reference in New Issue
Block a user