diff --git a/configs/face/face_96x96_wingloss.py b/configs/face/face_96x96_wingloss.py new file mode 100644 index 00000000..0657a96a --- /dev/null +++ b/configs/face/face_96x96_wingloss.py @@ -0,0 +1,236 @@ +# model settings +POINT_NUMBER = 106 +MEAN_FACE = [ + 0.05486667535113006, 0.24441904048908245, 0.05469932714062696, + 0.30396829196709935, 0.05520653400164321, 0.3643191463607746, + 0.05865501342257397, 0.42453849020500306, 0.0661603899137523, + 0.48531377442945767, 0.07807677169271177, 0.5452126843738523, + 0.09333319368757653, 0.6047840615432064, 0.11331425394034209, + 0.6631144309665994, 0.13897813867699352, 0.7172296230155276, + 0.17125811033538194, 0.767968859462583, 0.20831698519371536, + 0.8146603379935117, 0.24944621000897876, 0.857321261721953, + 0.2932993820558674, 0.8973900596678597, 0.33843820185594653, + 0.9350576242126986, 0.38647802623495553, 0.966902971122812, + 0.4411974776504609, 0.9878629960611088, 0.5000390697219397, + 0.9934886214875595, 0.5588590024515473, 0.9878510782414189, + 0.6135829360035883, 0.9668655595323074, 0.6616294188166414, + 0.9350065330378543, 0.7067734980023662, 0.8973410411573094, + 0.7506167730772516, 0.8572957679511382, 0.7917579157122047, + 0.8146281598803492, 0.8288026446367324, 0.7679019642224981, + 0.8610918526053805, 0.7171624168757985, 0.8867491048162915, + 0.6630344261248556, 0.9067293813428708, 0.6047095492618413, + 0.9219649147678989, 0.5451295187190602, 0.9338619041815587, + 0.4852292097262674, 0.9413455695142587, 0.424454780475834, + 0.9447753107545577, 0.3642347111991026, 0.9452649776939869, + 0.30388458223793025, 0.9450854849661369, 0.24432737691068557, + 0.1594802473020129, 0.17495177946520288, 0.2082918411850002, + 0.12758378330875153, 0.27675902873293057, 0.11712230823088154, + 0.34660582049732336, 0.12782553369032904, 0.4137234315527489, + 0.14788458441422778, 0.4123890243720449, 0.18814226684806626, + 0.3498927810760776, 0.17640650480816664, 0.28590212091591866, + 0.16895271174960227, 0.22193967489846017, 0.16985862149585013, + 0.5861805004572298, 0.147863456192582, 0.6532904167464643, + 0.12780412047734288, 0.723142364263288, 0.11709102395419578, + 0.7916076475508984, 0.12753867695205595, 0.8404440227263494, + 0.17488715120168932, 0.7779848023963316, 0.1698261195288917, + 0.7140264757991571, 0.1689377237959271, 0.650024882334848, + 0.17640581823811927, 0.5875270068157493, 0.18815421057605972, + 0.4999687027691624, 0.2770570778583906, 0.49996466107378934, + 0.35408433007759227, 0.49996725190415664, 0.43227025345368053, + 0.49997367716346774, 0.5099309118810921, 0.443147025685285, + 0.2837021691260901, 0.4079306716593004, 0.4729519900478952, + 0.3786223176615041, 0.5388017782630576, 0.4166237366074797, + 0.5822229552544941, 0.4556754522760756, 0.5887956328134262, + 0.49998730493119997, 0.5951855531982454, 0.5443300921009105, + 0.5887796732983633, 0.5833722476054509, 0.582200985012979, + 0.6213509190608012, 0.5387760772258134, 0.5920137550293199, + 0.4729325070035326, 0.5567854054587345, 0.28368589871138317, + 0.23395988420439123, 0.275313734012504, 0.27156519109550253, + 0.2558735678926061, 0.31487949633428597, 0.2523033259214858, + 0.356919009399118, 0.2627342680634766, 0.3866625969903256, + 0.2913618036573405, 0.3482919069920915, 0.3009936818974329, + 0.3064437008415846, 0.3037349617842158, 0.26724000706363993, + 0.2961896087804692, 0.3135744691699477, 0.27611103614975246, + 0.6132904312551143, 0.29135144033587107, 0.6430396927648264, + 0.2627079452269443, 0.6850713556136455, 0.2522730391144915, + 0.728377707003201, 0.25583118190779625, 0.7660035591791254, + 0.27526375689471777, 0.7327054300488236, 0.2961495286346863, + 0.6935171517115648, 0.3036951925380769, 0.6516533228539426, + 0.3009921014909089, 0.6863983789278025, 0.2760904908649394, + 0.35811903020866753, 0.7233174007629063, 0.4051199834269763, + 0.6931800846807724, 0.4629631471997891, 0.6718031951363689, + 0.5000016063148277, 0.6799150331999366, 0.5370506360177653, + 0.6717809139952097, 0.5948714927411151, 0.6931581144392573, + 0.6418878095835022, 0.7232890570786875, 0.6088129582142587, + 0.7713407215524752, 0.5601450388292929, 0.8052499757498277, + 0.5000181358125715, 0.8160749831906926, 0.4398905591799545, + 0.8052697696938342, 0.39120318265892984, 0.771375905028864, + 0.36888771299734613, 0.7241751210643214, 0.4331097084010058, + 0.7194543690519717, 0.5000188612450743, 0.7216823277180712, + 0.566895861884284, 0.7194302225129479, 0.631122598507516, + 0.7241462073974219, 0.5678462302796355, 0.7386355816766528, + 0.5000082906571756, 0.7479600838019628, 0.43217532542902076, + 0.7386538729390463, 0.31371761254774383, 0.2753328284323114, + 0.6862487843823917, 0.2752940437017121 +] +IMAGE_SIZE = 96 + +loss_config = dict( + num_points=POINT_NUMBER, + left_eye_left_corner_index=66, + right_eye_right_corner_index=79, + points_weight=1.0, + contour_weight=1.5, + eyebrow_weight=1.5, + eye_weight=1.7, + nose_weight=1.3, + lip_weight=1.7, + omega=10, + epsilon=2) + +model = dict( + type='FaceKeypoint', + backbone=dict( + type='FaceKeypointBackbone', + in_channels=3, + out_channels=48, + residual_activation='relu', + inverted_activation='half_v2', + inverted_expand_ratio=2, + ), + keypoint_head=dict( + type='FaceKeypointHead', + in_channels=48, + out_channels=POINT_NUMBER * 2, + input_size=IMAGE_SIZE, + inverted_expand_ratio=2, + inverted_activation='half_v2', + mean_face=MEAN_FACE, + loss_keypoint=dict(type='WingLossWithPose', **loss_config), + ), + pose_head=dict( + type='FacePoseHead', + in_channels=48, + out_channels=3, + inverted_expand_ratio=2, + inverted_activation='half_v2', + loss_pose=dict(type='FacePoseLoss', pose_weight=0.01), + ), +) + +train_pipeline = [ + dict(type='FaceKeypointRandomAugmentation', input_size=IMAGE_SIZE), + dict(type='FaceKeypointNorm', input_size=IMAGE_SIZE), + dict(type='MMToTensor'), + dict( + type='NormalizeTensor', + mean=[0.4076, 0.458, 0.485], + std=[1.0, 1.0, 1.0]), + dict( + type='Collect', + keys=[ + 'img', 'target_point', 'target_point_mask', 'target_pose', + 'target_pose_mask' + ]) +] + +val_pipeline = [ + dict(type='FaceKeypointNorm', input_size=IMAGE_SIZE), + dict(type='MMToTensor'), + dict( + type='NormalizeTensor', + mean=[0.4076, 0.458, 0.485], + std=[1.0, 1.0, 1.0]), + dict( + type='Collect', + keys=[ + 'img', 'target_point', 'target_point_mask', 'target_pose', + 'target_pose_mask' + ]) +] +test_pipeline = val_pipeline + +data_root = 'path/to/face_landmark_data/' + +data_cfg = dict( + data_root=data_root, + input_size=IMAGE_SIZE, +) + +data = dict( + imgs_per_gpu=512, + workers_per_gpu=2, + train=dict( + type='FaceKeypointDataset', + data_source=dict( + type='FaceKeypintSource', + train=True, + data_range=[0, 30000], # [0,30000] [0,478857] + data_cfg=data_cfg, + ), + pipeline=train_pipeline), + val=dict( + type='FaceKeypointDataset', + data_source=dict( + type='FaceKeypintSource', + train=False, + data_range=[478857, 488857], + # data_range=[478857, 478999], #[478857, 478999] [478857, 488857] + data_cfg=data_cfg, + ), + pipeline=val_pipeline), + test=dict( + type='FaceKeypointDataset', + data_source=dict( + type='FaceKeypintSource', + train=False, + data_range=[478857, 488857], + # data_range=[478857, 478999], #[478857, 478999] [478857, 488857] + data_cfg=data_cfg, + ), + pipeline=test_pipeline), +) + +# runtime setting +optimizer = dict( + type='Adam', + lr=0.005, +) +optimizer_config = dict(grad_clip=None) +lr_config = dict( + policy='CosineAnnealing', + min_lr=0.00001, + warmup='linear', + warmup_iters=10, + warmup_ratio=0.001, + warmup_by_epoch=True, + by_epoch=True) + +total_epochs = 1000 +checkpoint_config = dict(interval=10) +log_config = dict( + interval=5, hooks=[ + dict(type='TextLoggerHook'), + ]) + +log_level = 'INFO' +load_from = None +resume_from = None +dist_params = dict(backend='nccl') +workflow = [('train', 1)] + +# disable opencv multithreading to avoid system being overloaded +opencv_num_threads = 0 +# set multi-process start method as `fork` to speed up the training +mp_start_method = 'fork' + +evaluation = dict(interval=1, metric=['NME'], save_best='NME') + +eval_config = dict(interval=1) +evaluator_args = dict(metric_names='ave_nme') +eval_pipelines = [ + dict( + mode='test', + data=dict(**data['val'], imgs_per_gpu=1), + evaluators=[dict(type='FaceKeypointEvaluator', **evaluator_args)]) +] diff --git a/data/test/face_2d_keypoints/data/002253.png b/data/test/face_2d_keypoints/data/002253.png new file mode 100644 index 00000000..3e0860fb --- /dev/null +++ b/data/test/face_2d_keypoints/data/002253.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a45cc56977e709361659d4123739d3647d122a0d80bf7249d0ccdef018f068e +size 112042 diff --git a/data/test/face_2d_keypoints/data/002258.png b/data/test/face_2d_keypoints/data/002258.png new file mode 100644 index 00000000..b5f1fb78 --- /dev/null +++ b/data/test/face_2d_keypoints/data/002258.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d516f30a8c1583b45e54d737d2a712ed1c63ef387d579517e1e23e416339ac2 +size 94367 diff --git a/data/test/face_2d_keypoints/models/epoch_580.pth b/data/test/face_2d_keypoints/models/epoch_580.pth new file mode 100644 index 00000000..ffba8aa8 --- /dev/null +++ b/data/test/face_2d_keypoints/models/epoch_580.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5105c5aa83c59d2a1fdf8dc9ff83a8d84c19a70c7faabcf7f8bce8a913afe4f1 +size 3421031 diff --git a/easycv/core/evaluation/__init__.py b/easycv/core/evaluation/__init__.py index d7eecfe4..2209e505 100644 --- a/easycv/core/evaluation/__init__.py +++ b/easycv/core/evaluation/__init__.py @@ -3,6 +3,7 @@ from .auc_eval import AucEvaluator from .base_evaluator import Evaluator from .classification_eval import ClsEvaluator from .coco_evaluation import CocoDetectionEvaluator, CoCoPoseTopDownEvaluator +from .face_eval import FaceKeypointEvaluator from .faceid_pair_eval import FaceIDPairEvaluator from .keypoint_eval import KeyPointEvaluator from .mse_eval import MSEEvaluator diff --git a/easycv/core/evaluation/face_eval.py b/easycv/core/evaluation/face_eval.py new file mode 100644 index 00000000..633a1392 --- /dev/null +++ b/easycv/core/evaluation/face_eval.py @@ -0,0 +1,59 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import torch + +from .base_evaluator import Evaluator +from .builder import EVALUATORS +from .metric_registry import METRICS + + +@EVALUATORS.register_module +class FaceKeypointEvaluator(Evaluator): + + def __init__(self, dataset_name=None, metric_names=['ave_nme']): + super(FaceKeypointEvaluator, self).__init__(dataset_name, metric_names) + self.metric = metric_names + self.dataset_name = dataset_name + + def _evaluate_impl(self, prediction_dict, groundtruth_dict, **kwargs): + """ + Args: + prediction_dict: model forward output dict, ['point', 'pose'] + groundtruth_dict: groundtruth dict, ['target_point', 'target_point_mask', 'target_pose', 'target_pose_mask'] used for compute accuracy + kwargs: other parameters + """ + + def evaluate(predicts, gts, **kwargs): + from easycv.models.utils.face_keypoint_utils import get_keypoint_accuracy, get_pose_accuracy + ave_pose_acc = 0 + ave_nme = 0 + idx = 0 + + for (predict_point, predict_pose, + gt) in zip(predicts['point'], predicts['pose'], gts): + target_point = gt['target_point'] + target_point_mask = gt['target_point_mask'] + target_pose = gt['target_pose'] + target_pose_mask = gt['target_pose_mask'] + + target_point = target_point * target_point_mask + target_pose = target_pose * target_pose_mask + + keypoint_accuracy = get_keypoint_accuracy( + predict_point, target_point) + pose_accuracy = get_pose_accuracy(predict_pose, target_pose) + + ave_pose_acc += pose_accuracy['pose_acc'] + ave_nme += keypoint_accuracy['nme'] + idx += 1 + + eval_result = {} + idx += 0.000001 + eval_result['ave_pose_acc'] = ave_pose_acc / idx + eval_result['ave_nme'] = ave_nme / idx + + return eval_result + + return evaluate(prediction_dict, groundtruth_dict) + + +METRICS.register_default_best_metric(FaceKeypointEvaluator, 'ave_nme', 'min') diff --git a/easycv/datasets/__init__.py b/easycv/datasets/__init__.py index 3f04bfd8..cb4abf82 100644 --- a/easycv/datasets/__init__.py +++ b/easycv/datasets/__init__.py @@ -1,5 +1,6 @@ # Copyright (c) Alibaba, Inc. and its affiliates. -from . import classification, detection, pose, segmentation, selfsup, shared +from . import (classification, detection, face, pose, segmentation, selfsup, + shared) from .builder import build_dali_dataset, build_dataset from .loader import DistributedGroupSampler, GroupSampler, build_dataloader from .registry import DATASETS diff --git a/easycv/datasets/face/__init__.py b/easycv/datasets/face/__init__.py new file mode 100644 index 00000000..d045ff4e --- /dev/null +++ b/easycv/datasets/face/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +from .data_sources import * +from .face_keypoint_dataset import FaceKeypointDataset +from .pipelines import * diff --git a/easycv/datasets/face/data_sources/__init__.py b/easycv/datasets/face/data_sources/__init__.py new file mode 100644 index 00000000..b23f3231 --- /dev/null +++ b/easycv/datasets/face/data_sources/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +from .face_keypoint_source import FaceKeypintSource diff --git a/easycv/datasets/face/data_sources/face_keypoint_source.py b/easycv/datasets/face/data_sources/face_keypoint_source.py new file mode 100644 index 00000000..031de97e --- /dev/null +++ b/easycv/datasets/face/data_sources/face_keypoint_source.py @@ -0,0 +1,171 @@ +import copy +import json +import logging +import os + +import cv2 +import numpy as np +import torch + +from easycv.datasets.face.pipelines.face_keypoint_transform import ( + FaceKeypointNorm, FaceKeypointRandomAugmentation, normal) +from easycv.datasets.registry import DATASOURCES +from easycv.datasets.shared.base import BaseDataset + +FACE_KEYPOINT_DATASET_INFO = dict( + real_list_file_dir='real_face_list.txt', + data_info_dir='infos/merge/', + data_image_dir='images/merge/', + data_overlay_dir='images/overlay/', +) + + +@DATASOURCES.register_module() +class FaceKeypintSource(): + """ + load dataset for face key points + """ + + def __init__(self, + data_cfg, + data_range, + real_list_path=None, + info_path=None, + image_path=None, + data_overlay_path=None, + dataset_info=None, + **kwargs): + super(FaceKeypintSource, self).__init__() + """ + Args: + data_cfg: Data config dict + data_range: rang of dataset for training or validation + real_list_file_path: path of file contains image list + data_info_dir: annotation file path + data_img_dir: image file path + data_overlay_dir: overlay background image path + + dataset_info: A dict containing all dataset info + """ + if dataset_info is None: + logging.info( + 'dataset_info is missing, use default face keypoiny dataset info' + ) + dataset_info = FACE_KEYPOINT_DATASET_INFO + + data_root = data_cfg['data_root'] + real_list_file_path = os.path.join(data_root, + dataset_info['real_list_file_dir']) + data_info_dir = os.path.join(data_root, dataset_info['data_info_dir']) + data_img_dir = os.path.join(data_root, dataset_info['data_image_dir']) + data_overlay_dir = os.path.join(data_root, + dataset_info['data_overlay_dir']) + self.input_size = data_cfg['input_size'] + data_range = data_range + + if real_list_path is not None: + real_list_file_path = real_list_path + if info_path is not None: + data_info_dir = info_path + if image_path is not None: + data_img_dir = image_path + if data_overlay_path is not None: + data_overlay_dir = data_overlay_path + + # overlay + self.overlay_image_path = [] + for overlay_img_file in sorted(os.listdir(data_overlay_dir)): + overlay_img_filepath = os.path.join(data_overlay_dir, + overlay_img_file) + self.overlay_image_path.append(overlay_img_filepath) + + self.points_and_pose_datas = [] + with open(real_list_file_path, 'r') as real_list_file: + real_list_lines = real_list_file.readlines() + for index in range(data_range[0], data_range[1]): + idx = int(real_list_lines[index]) + img_path = os.path.join(data_img_dir, '{:06d}.png'.format(idx)) + if not os.path.exists(img_path): + logging.warning('image %s does not exist' % img_path) + continue + info_path = os.path.join(data_info_dir, '{:06d}.json'.format(idx)) + if not os.path.exists(info_path): + logging.warning('annotation %s does not exist' % info_path) + continue + with open(info_path, 'r') as info_file: + info_json = json.load(info_file) + assert info_json['face_count'] == 1 + base_info = info_json['face_infos'][0]['base_info'] + + # points + assert base_info['points_array'] is not None + points = np.asarray(base_info['points_array']).astype( + np.float32) + points_mask = np.abs(points - (-999)) > 0.0001 + + # pose + pose = {'pitch': -999, 'yaw': -999, 'roll': -999} + if base_info['pitch'] is not None and base_info[ + 'yaw'] is not None and base_info['roll'] is not None: + pose['pitch'] = base_info['pitch'] + pose['yaw'] = base_info['yaw'] + # pose["roll"] = base_info["roll"] + # datasets have been preprocessed, roll=0 + # add noise to pose + pose['roll'] = normal() * 10.0 + + pose_mask = np.asarray([ + np.abs(pose['pitch'] - (-999)) > 0.0001, + np.abs(pose['roll'] - (-999)) > 0.0001, + np.abs(pose['yaw'] - (-999)) > 0.0001 + ]) + + self.points_and_pose_datas.append( + (img_path, points, points_mask, pose, pose_mask)) + + self.db = [] + for img_path, points, points_mask, pose, pose_mask in copy.deepcopy( + self.points_and_pose_datas): + image = cv2.imread(img_path) + + points[:, + 0] = points[:, 0] / image.shape[1] * float(self.input_size) + points[:, + 1] = points[:, 1] / image.shape[0] * float(self.input_size) + + target_point = np.reshape(points, + (points.shape[0] * points.shape[1])) + points_mask = points_mask.astype(np.float32) + points_mask = np.reshape( + points_mask, (points_mask.shape[0] * points_mask.shape[1])) + pose = np.asarray([pose['pitch'], pose['roll'], pose['yaw']]) + + self.db.append({ + 'img_path': + img_path, + 'target_point': + torch.tensor(np.array(target_point, np.float32)), + 'target_point_mask': + torch.tensor(points_mask), + 'target_pose': + torch.tensor(np.array(pose, np.float32)), + 'target_pose_mask': + torch.tensor(pose_mask.astype(np.float32)) + }) + + def __getitem__(self, index): + img_path, points, points_mask, pose, pose_mask = copy.deepcopy( + self.points_and_pose_datas[index]) + image = cv2.imread(img_path) + + return { + 'img': image, + 'target_point': points, + 'target_point_mask': points_mask, + 'target_pose': pose, + 'target_pose_mask': pose_mask, + 'overlay_image_path': self.overlay_image_path + } + + def __len__(self): + return len(self.points_and_pose_datas) diff --git a/easycv/datasets/face/face_keypoint_dataset.py b/easycv/datasets/face/face_keypoint_dataset.py new file mode 100644 index 00000000..a2c5fe11 --- /dev/null +++ b/easycv/datasets/face/face_keypoint_dataset.py @@ -0,0 +1,45 @@ +import copy +import json +import logging +import os + +import cv2 +import numpy as np +import torch +import torch.utils.data as data + +from easycv.datasets.face.pipelines.face_keypoint_transform import ( + FaceKeypointNorm, FaceKeypointRandomAugmentation, normal) +from easycv.datasets.registry import DATASETS +from easycv.datasets.shared.base import BaseDataset + + +@DATASETS.register_module() +class FaceKeypointDataset(BaseDataset): + """ + dataset for face key points + """ + + def __init__(self, data_source, pipeline, profiling=False): + super(FaceKeypointDataset, self).__init__(data_source, pipeline, + profiling) + """ + Args: + data_source: Data_source config dict + pipeline: Pipeline config list + profiling: If set True, will print pipeline time + """ + + def evaluate(self, outputs, evaluators, **kwargs): + eval_result = {} + for evaluator in evaluators: + eval_result.update( + evaluator.evaluate( + prediction_dict=outputs, + groundtruth_dict=self.data_source.db)) + + return eval_result + + def __getitem__(self, idx): + results = self.data_source[idx] + return self.pipeline(results) diff --git a/easycv/datasets/face/pipelines/__init__.py b/easycv/datasets/face/pipelines/__init__.py new file mode 100644 index 00000000..222448ab --- /dev/null +++ b/easycv/datasets/face/pipelines/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +from .face_keypoint_transform import (FaceKeypointNorm, + FaceKeypointRandomAugmentation) + +__all__ = ['FaceKeypointRandomAugmentation', 'FaceKeypointNorm'] diff --git a/easycv/datasets/face/pipelines/face_keypoint_transform.py b/easycv/datasets/face/pipelines/face_keypoint_transform.py new file mode 100644 index 00000000..5bae49bb --- /dev/null +++ b/easycv/datasets/face/pipelines/face_keypoint_transform.py @@ -0,0 +1,431 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. +import random + +import cv2 +import imgaug +import imgaug.augmenters as iaa +import numpy as np + +from easycv.datasets.registry import PIPELINES + +DEST_SIZE = 256 +BASE_LANDMARK_NUM = 106 +ENLARGE_RATIO = 1.1 + +CONTOUR_PARTS = [[0, 32], [1, 31], [2, 30], [3, 29], [4, 28], [5, 27], [6, 26], + [7, 25], [8, 24], [9, 23], [10, 22], [11, 21], [12, 20], + [13, 19], [14, 18], [15, 17]] +BROW_PARTS = [[33, 46], [34, 45], [35, 44], [36, 43], [37, 42], [38, 50], + [39, 49], [40, 48], [41, 47]] +EYE_PARTS = [[66, 79], [67, 78], [68, 77], [69, 76], [70, 75], [71, 82], + [72, 81], [73, 80], [74, 83]] +NOSE_PARTS = [[55, 65], [56, 64], [57, 63], [58, 62], [59, 61]] +MOUSE_PARTS = [[84, 90], [85, 89], [86, 88], [96, 100], [97, 99], [103, 101], + [95, 91], [94, 92]] +IRIS_PARTS = [[104, 105]] +MATCHED_PARTS = CONTOUR_PARTS + BROW_PARTS + EYE_PARTS + NOSE_PARTS + MOUSE_PARTS + IRIS_PARTS + + +def normal(): + """ + 3-sigma rule + return: (-1, +1) + """ + mu, sigma = 0, 1 + while True: + s = np.random.normal(mu, sigma) + if s < mu - 3 * sigma or s > mu + 3 * sigma: + continue + return s / 3 * sigma + + +def rotate(angle, center, landmark): + rad = angle * np.pi / 180.0 + alpha = np.cos(rad) + beta = np.sin(rad) + M = np.zeros((2, 3), dtype=np.float32) + M[0, 0] = alpha + M[0, 1] = beta + M[0, 2] = (1 - alpha) * center[0] - beta * center[1] + M[1, 0] = -beta + M[1, 1] = alpha + M[1, 2] = beta * center[0] + (1 - alpha) * center[1] + + landmark_ = np.asarray([(M[0, 0] * x + M[0, 1] * y + M[0, 2], + M[1, 0] * x + M[1, 1] * y + M[1, 2]) + for (x, y) in landmark]) + return M, landmark_ + + +class OverLayGenerator: + + def __init__(self, shape): + # 4x4 + h_seg_len = shape[0] // 4 + w_seg_len = shape[1] // 4 + + self.overlay = [] + # 2x2 overlay + for i in range(3): + for j in range(3): + if i == 1 and j == 1: + continue + self.overlay.append((i * w_seg_len, j * h_seg_len, + 2 * w_seg_len, 2 * h_seg_len)) + + # 2x3 overlay + for i in range(3): + for j in range(2): + if i == 1: + continue + self.overlay.append((i * w_seg_len, j * h_seg_len, + 2 * w_seg_len, 3 * h_seg_len)) + for i in range(2): + for j in range(3): + if j == 1: + continue + self.overlay.append((i * w_seg_len, j * h_seg_len, + 3 * w_seg_len, 2 * h_seg_len)) + + # 2x4 overlay + for i in range(3): + for j in range(1): + if i == 1: + continue + self.overlay.append((i * w_seg_len, j * h_seg_len, + 2 * w_seg_len, 4 * h_seg_len)) + for i in range(1): + for j in range(3): + if j == 1: + continue + self.overlay.append((i * w_seg_len, j * h_seg_len, + 4 * w_seg_len, 2 * h_seg_len)) + + +class FaceKeypointsDataAugumentation: + + def __init__(self, input_size): + # option + self.enable_flip = True + self.enable_rotate = True + self.input_size = input_size + + # mask generator + coarse_salt_and_pepper_iaa = iaa.CoarseSaltAndPepper( + (0.25, 0.35), size_percent=(0.03125, 0.015625)) + self.mask_generator = coarse_salt_and_pepper_iaa.mask + + # overlay generator + self.overlay_generator = OverLayGenerator(shape=(256, 256)) + + # flip + self.mirror_map = FaceKeypointsDataAugumentation.compute_mirror_map() + + @staticmethod + def compute_mirror_map(): + + mirror_map = np.array(range(0, BASE_LANDMARK_NUM), np.int32) + for x, y in MATCHED_PARTS: + mirror_map[x] = y + mirror_map[y] = x + + return mirror_map + + def aug_flip(self, img, pts, visibility, pose): + # pts[:, 0] = self.input_size - pts[:, 0] + pts[:, 0] = img.shape[1] - pts[:, 0] + pts = pts[self.mirror_map] + if visibility is not None: + visibility = visibility[self.mirror_map] + img = cv2.flip(img, 1) + if pose is not None: + # fix roll&yaw in pose + pose['roll'] = -pose['roll'] + pose['yaw'] = -pose['yaw'] + + return img, pts, visibility, pose + + def aug_rotate(self, img, pts, pose, angle): + center = [DEST_SIZE // 2, DEST_SIZE // 2] + if pose is not None: + # fix roll in pose + pose['roll'] += angle + + cx, cy = center + M, pts = rotate(angle, (cx, cy), pts) + + imgT = cv2.warpAffine(img, M, (int(img.shape[1]), int(img.shape[0]))) + + x1 = np.min(pts[:, 0]) + x2 = np.max(pts[:, 0]) + y1 = np.min(pts[:, 1]) + y2 = np.max(pts[:, 1]) + w = x2 - x1 + 1 + h = y2 - y1 + 1 + x1 = int(x1 - (ENLARGE_RATIO - 1.0) / 2.0 * w) + y1 = int(y1 - (ENLARGE_RATIO - 1.0) * h) + + new_w = int(ENLARGE_RATIO * (1 + normal() * 0.25) * w) + new_h = int(ENLARGE_RATIO * (1 + normal() * 0.25) * h) + new_x1 = x1 + int(normal() * DEST_SIZE * 0.15) + new_y1 = y1 + int(normal() * DEST_SIZE * 0.15) + new_x2 = new_x1 + new_w + new_y2 = new_y1 + new_h + + new_xy = new_x1, new_y1 + pts = pts - new_xy + + height, width, _ = imgT.shape + dx = max(0, -new_x1) + dy = max(0, -new_y1) + new_x1 = max(0, new_x1) + new_y1 = max(0, new_y1) + + edx = max(0, new_x2 - width) + edy = max(0, new_y2 - height) + new_x2 = min(width, new_x2) + new_y2 = min(height, new_y2) + + imgT = imgT[new_y1:new_y2, new_x1:new_x2] + if dx > 0 or dy > 0 or edx > 0 or edy > 0: + imgT = cv2.copyMakeBorder( + imgT, + dy, + edy, + dx, + edx, + cv2.BORDER_CONSTANT, + value=(103.94, 116.78, 123.68)) + + return imgT, pts, pose + + def random_mask(self, img): + mask = self.mask_generator.draw_samples(size=img.shape) + mask = np.expand_dims(np.sum(mask, axis=-1) > 0, axis=-1) + return mask + + def random_overlay(self): + index = np.random.choice(len(self.overlay_generator.overlay)) + overlay = self.overlay_generator.overlay[index] + return overlay + + def augment_blur(self, img): + h, w = img.shape[:2] + assert h == w + ssize = int(random.uniform(0.01, 0.5) * h) + aug_seq = iaa.Sequential([ + iaa.Sometimes( + 1.0, + iaa.OneOf([ + iaa.GaussianBlur((3, 15)), + iaa.AverageBlur(k=(3, 15)), + iaa.MedianBlur(k=(3, 15)), + iaa.MotionBlur((5, 25)) + ])), + iaa.Resize(ssize, interpolation=imgaug.ALL), + iaa.Sometimes( + 0.6, + iaa.OneOf([ + iaa.AdditiveGaussianNoise( + loc=0, scale=(0.0, 0.1 * 255), per_channel=0.5), + iaa.AdditiveLaplaceNoise( + loc=0, scale=(0.0, 0.1 * 255), per_channel=0.5), + iaa.AdditivePoissonNoise(lam=(0, 30), per_channel=0.5) + ])), + iaa.Sometimes(0.8, iaa.JpegCompression(compression=(40, 90))), + iaa.Resize(h), + ]) + + aug_img = aug_seq.augment_image(img) + return aug_img + + def augment_color_temperature(self, img): + aug = iaa.ChangeColorTemperature((1000, 40000)) + + aug_img = aug.augment_image(img) + return aug_img + + def aug_clr_noise_blur(self, img): + # skin&light + if np.random.choice((True, False), p=[0.05, 0.95]): + img_ycrcb_raw = cv2.cvtColor(img, cv2.COLOR_BGR2YCR_CB) + skin_factor_list = [0.6, 0.8, 1.0, 1.2, 1.4] + skin_factor = np.random.choice(skin_factor_list) + img_ycrcb_raw[:, :, 0:1] = np.clip( + img_ycrcb_raw[:, :, 0:1].astype(np.float) * skin_factor, 0, + 255).astype(np.uint8) + img = cv2.cvtColor(img_ycrcb_raw, cv2.COLOR_YCR_CB2BGR) + + # gauss blur 5% + if np.random.choice((True, False), p=[0.05, 0.95]): + sigma = np.random.choice([0.25, 0.50, 0.75]) + gauss_blur_iaa = iaa.GaussianBlur(sigma=sigma) + img = gauss_blur_iaa(image=img) + + # gauss noise 5% + if np.random.choice((True, False), p=[0.05, 0.95]): + scale = np.random.choice([0.01, 0.03, 0.05]) + gauss_noise_iaa = iaa.AdditiveGaussianNoise(scale=scale * 255) + img = gauss_noise_iaa(image=img) + + # motion blur 5% + if np.random.choice((True, False), p=[0.05, 0.95]): + angle = np.random.choice([0, 45, 90, 135, 180, 225, 270, 315]) + motion_blur_iaa = iaa.MotionBlur(k=5, angle=angle) + img = motion_blur_iaa(image=img) + + # jpeg compress 5% + if np.random.choice((True, False), p=[0.05, 0.95]): + jpeg_compress_iaa = iaa.JpegCompression(compression=(10, 50)) + img = jpeg_compress_iaa(image=img) + + # gamma contrast 5% + if np.random.choice((True, False), p=[0.05, 0.95]): + gamma_contrast_iaa = iaa.GammaContrast((0.85, 1.15)) + img = gamma_contrast_iaa(image=img) + + # brightness 5% + if np.random.choice((True, False), p=[0.05, 0.95]): + brightness_iaa = iaa.MultiplyAndAddToBrightness( + mul=(0.85, 1.15), add=(-10, 10)) + img = brightness_iaa(image=img) + + return img + + def augment_set(self, img): + noisy_image = img.copy().astype(np.uint8) + if np.random.choice((True, False), p=[0.6, 0.4]): + aug = iaa.ChangeColorTemperature((1000, 40000)) + noisy_image = aug.augment_image(noisy_image) + + if np.random.choice((True, False), p=[0.8, 0.2]): + aug_seq = iaa.Sequential([ + iaa.Sometimes(0.5, iaa.JpegCompression(compression=(40, 90))), + iaa.Sometimes(0.5, iaa.MotionBlur((3, 7))), + iaa.Sometimes( + 0.5, + iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05 * 255))), + ], + random_order=True) + noisy_image = aug_seq.augment_image(noisy_image) + + sometimes = lambda aug: iaa.Sometimes(0.25, aug) + seq = iaa.Sequential([ + sometimes(iaa.AverageBlur(k=(2, 5))), + sometimes(iaa.GammaContrast((0.5, 2.0))) + ], + random_order=True) + + noisy_image = seq(images=noisy_image) + return noisy_image + + +@PIPELINES.register_module() +class FaceKeypointNorm: + """Data augmentation with Norm. + """ + + def __init__(self, input_size=96): + self.input_size = input_size + + def __call__(self, results): + """Perform data augmentation with random image flip.""" + + # for key in results.get('img', []): + if 'img' in results.keys(): + image = results['img'] + image = cv2.resize(image, (self.input_size, self.input_size)) + results['img'] = np.array(image) + + # for key in results.get('target_point', []): + if 'target_point' in results.keys(): + points = results['target_point'] + points[:, 0] = points[:, 0] / image.shape[1] * float( + self.input_size) + points[:, 1] = points[:, 1] / image.shape[0] * float( + self.input_size) + target_point = np.reshape(points, + (points.shape[0] * points.shape[1])) + results['target_point'] = np.array(target_point, np.float32) + else: + results['target_point'] = np.array(np.zeros(212), np.float32) + + # for key in results.get('target_point_mask', []): + if 'target_point_mask' in results.keys(): + points_mask = results['target_point_mask'] + points_mask = points_mask.astype(np.float32) + points_mask = np.reshape( + points_mask, (points_mask.shape[0] * points_mask.shape[1])) + results['target_point_mask'] = points_mask.astype(np.float32) + else: + results['target_point_mask'] = np.array( + np.zeros(212), np.float32) + + # for key in results.get('target_pose', []): + if 'target_pose' in results.keys(): + pose = results['target_pose'] + pose = np.asarray([pose['pitch'], pose['roll'], pose['yaw']]) + results['target_pose'] = pose.astype(np.float32) + else: + results['target_pose'] = np.array(np.zeros(3), np.float32) + + if 'target_pose_mask' not in results.keys(): + results['target_pose_mask'] = np.array(np.zeros(3), np.float32) + + return results + + +@PIPELINES.register_module() +class FaceKeypointRandomAugmentation: + """Data augmentation with random flip. + """ + + def __init__(self, input_size=96): + self.input_size = input_size + + # Data Augment + self.data_aug = FaceKeypointsDataAugumentation(self.input_size) + + def __call__(self, results): + """Perform data augmentation with random image flip.""" + + image = results['img'] + points = results['target_point'] + points_mask = results['target_point_mask'] + pose = results['target_pose'] + pose_mask = results['target_pose_mask'] + overlay_image_path = results['overlay_image_path'] + + if np.random.choice((True, False), p=[0.2, 0.8]): + # overlay + overlay_pos = self.data_aug.random_overlay() + overlay_img_index = np.random.choice(len(overlay_image_path)) + overlay_img_filepath = overlay_image_path[overlay_img_index] + overlay_img = cv2.imread(overlay_img_filepath, + cv2.IMREAD_UNCHANGED) + + (x, y, w, h) = overlay_pos + x1, y1, x2, y2 = x, y, x + w, y + h + overlay_img = cv2.resize(overlay_img, dsize=(w, h)) + overlay_mask = overlay_img[:, :, 3:4] / 255.0 + image[y1:y2, x1:x2, :] = image[y1:y2, x1:x2, :] * ( + 1 - overlay_mask) + overlay_img[:, :, 0:3] * overlay_mask + image = image.astype(np.uint8) + + angle = pose['roll'] + image, points, pose = self.data_aug.aug_rotate( + image, points, pose, angle) # counterclockwise rotate angle + pose['roll'] = angle # reset roll=angle + + if np.random.choice((True, False)): + image_transform, points, _, pose = self.data_aug.aug_flip( + image, points, None, pose) + else: + image_transform = image + + image_transform = self.data_aug.aug_clr_noise_blur(image_transform) + + results['img'] = image_transform + results['target_point'] = points + results['target_pose'] = pose + return results diff --git a/easycv/models/__init__.py b/easycv/models/__init__.py index 1890a46b..d125236d 100644 --- a/easycv/models/__init__.py +++ b/easycv/models/__init__.py @@ -3,6 +3,7 @@ from .backbones import * # noqa: F401,F403 from .builder import build_backbone, build_head, build_loss, build_model from .classification import * from .detection import * +from .face import * from .heads import * from .loss import * from .pose import TopDown diff --git a/easycv/models/backbones/__init__.py b/easycv/models/backbones/__init__.py index 54e0f2f8..ec19f460 100644 --- a/easycv/models/backbones/__init__.py +++ b/easycv/models/backbones/__init__.py @@ -4,6 +4,7 @@ from .bninception import BNInception from .conv_mae_vit import FastConvMAEViT from .conv_vitdet import ConvViTDet from .efficientformer import EfficientFormer +from .face_keypoint_backbone import FaceKeypointBackbone from .genet import PlainNet from .hrnet import HRNet from .inceptionv3 import Inception3 diff --git a/easycv/models/backbones/face_keypoint_backbone.py b/easycv/models/backbones/face_keypoint_backbone.py new file mode 100644 index 00000000..32ef1ac3 --- /dev/null +++ b/easycv/models/backbones/face_keypoint_backbone.py @@ -0,0 +1,90 @@ +import torch.nn as nn + +from easycv.models.registry import BACKBONES +from easycv.models.utils.face_keypoint_utils import InvertedResidual, Residual + + +@BACKBONES.register_module +class FaceKeypointBackbone(nn.Module): + + def __init__(self, + in_channels=3, + out_channels=48, + residual_activation='relu', + inverted_activation='half_v2', + inverted_expand_ratio=2): + super(FaceKeypointBackbone, self).__init__() + self.conv1 = Residual(in_channels, 12, 3, 2, 0) + self.conv2 = Residual(12, 12, 3, 1, 0, activation=residual_activation) + self.conv3 = Residual(12, 12, 3, 1, 1, activation=residual_activation) + self.conv4 = Residual(12, 12, 3, 1, 0, activation=residual_activation) + self.conv5 = Residual(12, 24, 3, 2, 0, activation=residual_activation) + self.conv6 = Residual(24, 24, 3, 1, 0, activation=residual_activation) + self.conv7 = Residual(24, 24, 3, 1, 1, activation=residual_activation) + self.conv8 = Residual(24, 24, 3, 1, 1, activation=residual_activation) + self.conv9 = InvertedResidual( + 24, + 48, + 3, + 2, + 0, + expand_ratio=inverted_expand_ratio, + activation=inverted_activation) + self.conv10 = InvertedResidual( + 48, + 48, + 3, + 1, + 0, + expand_ratio=inverted_expand_ratio, + activation=inverted_activation) + self.conv11 = InvertedResidual( + 48, + 48, + 3, + 1, + 1, + expand_ratio=inverted_expand_ratio, + activation=inverted_activation) + self.conv12 = InvertedResidual( + 48, + 48, + 3, + 1, + 1, + expand_ratio=inverted_expand_ratio, + activation=inverted_activation) + self.conv13 = InvertedResidual( + 48, + 48, + 3, + 1, + 1, + expand_ratio=inverted_expand_ratio, + activation=inverted_activation) + self.conv14 = InvertedResidual( + 48, + out_channels, + 3, + 2, + 0, + expand_ratio=inverted_expand_ratio, + activation=inverted_activation) + + def forward(self, x): + x1 = self.conv1(x) + x2 = self.conv2(x1) + x3 = self.conv3(x2) + x4 = self.conv4(x3) + x5 = self.conv5(x4) + x6 = self.conv6(x5) + x7 = self.conv7(x6) + x8 = self.conv8(x7) + x9 = self.conv9(x8) + x10 = self.conv10(x9) + x11 = self.conv11(x10) + x12 = self.conv12(x11) + x13 = self.conv13(x12) + x14 = self.conv14(x13) + + return x14 diff --git a/easycv/models/face/__init__.py b/easycv/models/face/__init__.py new file mode 100644 index 00000000..d7782486 --- /dev/null +++ b/easycv/models/face/__init__.py @@ -0,0 +1,2 @@ +from .face_keypoint import FaceKeypoint +from .head import * diff --git a/easycv/models/face/face_keypoint.py b/easycv/models/face/face_keypoint.py new file mode 100644 index 00000000..42268ba2 --- /dev/null +++ b/easycv/models/face/face_keypoint.py @@ -0,0 +1,103 @@ +import mmcv +import numpy as np + +from easycv.models import builder +from easycv.models.base import BaseModel +from easycv.models.builder import MODELS +from easycv.models.utils.face_keypoint_utils import (get_keypoint_accuracy, + get_pose_accuracy) + + +@MODELS.register_module() +class FaceKeypoint(BaseModel): + + def __init__(self, + backbone, + neck=None, + keypoint_head=None, + pose_head=None, + pretrained=None, + loss_keypoint=None, + loss_pose=None): + super().__init__() + self.pretrained = pretrained + + self.backbone = builder.build_backbone(backbone) + + if neck is not None: + self.neck = builder.build_neck(neck) + + if keypoint_head is not None: + if 'loss_keypoint' not in keypoint_head and loss_keypoint is not None: + keypoint_head['loss_keypoint'] = loss_keypoint + self.keypoint_head = builder.build_head(keypoint_head) + + if pose_head is not None: + if 'loss_pose' not in pose_head and loss_pose is not None: + pose_head['loss_pose'] = loss_pose + self.pose_head = builder.build_head(pose_head) + + @property + def with_neck(self): + """Check if has keypoint_head.""" + return hasattr(self, 'neck') + + @property + def with_keypoint(self): + """Check if has keypoint_head.""" + return hasattr(self, 'keypoint_head') + + @property + def with_pose(self): + """Check if has pose_head.""" + return hasattr(self, 'pose_head') + + def forward_train(self, img, target_point, target_point_mask, target_pose, + target_pose_mask, **kwargs): + """Defines the computation performed at every call when training.""" + output = self.backbone(img) + + if self.with_neck: + output = self.neck(output) + if self.with_keypoint: + output_points = self.keypoint_head(output) + if self.with_pose: + output_pose = self.pose_head(output) + + target_point = target_point * target_point_mask + target_pose = target_pose * target_pose_mask + + losses = dict() + if self.with_keypoint: + keypoint_losses = self.keypoint_head.get_loss( + output_points, target_point, target_point_mask, target_pose) + losses.update(keypoint_losses) + keypoint_accuracy = get_keypoint_accuracy(output_points, + target_point) + losses.update(keypoint_accuracy) + + if self.with_pose: + output_pose = output_pose * 180.0 / np.pi + output_pose = output_pose * target_pose_mask + + pose_losses = self.pose_head.get_loss(output_pose, target_pose) + losses.update(pose_losses) + pose_accuracy = get_pose_accuracy(output_pose, target_pose) + losses.update(pose_accuracy) + return losses + + def forward_test(self, img, **kwargs): + """Defines the computation performed at every call when testing.""" + + output = self.backbone(img) + if self.with_neck: + output = self.neck(output) + if self.with_keypoint: + output_points = self.keypoint_head(output) + if self.with_pose: + output_pose = self.pose_head(output) + + ret = {} + ret['point'] = output_points + ret['pose'] = output_pose + return ret diff --git a/easycv/models/face/head/__init__.py b/easycv/models/face/head/__init__.py new file mode 100644 index 00000000..504755cb --- /dev/null +++ b/easycv/models/face/head/__init__.py @@ -0,0 +1,2 @@ +from .face_keypoint_head import FaceKeypointHead +from .face_keypoint_pose_head import FacePoseHead diff --git a/easycv/models/face/head/face_keypoint_head.py b/easycv/models/face/head/face_keypoint_head.py new file mode 100644 index 00000000..a75cfa8c --- /dev/null +++ b/easycv/models/face/head/face_keypoint_head.py @@ -0,0 +1,68 @@ +import copy + +import numpy as np +import torch +import torch.nn as nn + +from easycv.models.builder import HEADS, build_loss +from easycv.models.utils.face_keypoint_utils import (InvertedResidual, View, + conv_bn, conv_no_relu, + get_keypoint_accuracy) + + +@HEADS.register_module +class FaceKeypointHead(nn.Module): + + def __init__( + self, + mean_face, + loss_keypoint, + in_channels=48, + out_channels=212, + input_size=96, + inverted_expand_ratio=2, + inverted_activation='half_v2', + ): + super(FaceKeypointHead, self).__init__() + self.input_size = input_size + self.face_mean_shape = copy.deepcopy(np.asarray(mean_face)) + self.device = 'cuda' if torch.cuda.is_available() else 'cpu' + self.branches = [] + + self.loss = build_loss(loss_keypoint) + + # points + self.branches.append( + nn.Sequential( + InvertedResidual( + in_channels, + 96, + 3, + 1, + 1, + expand_ratio=inverted_expand_ratio, + activation=inverted_activation), + View((-1, 96 * 3 * 3, 1, 1)), conv_bn(96 * 3 * 3, 128, 1, 1, + 0), + conv_bn(128, 128, 1, 1, 0), + conv_no_relu(128, out_channels, 1, 1, 0), + View((-1, out_channels)))) + self.branches = nn.ModuleList(self.branches) + + def get_loss(self, output, target_point, target_point_mask, target_pose): + losses = dict() + loss = self.loss(output * target_point_mask, target_point, target_pose) + losses['point_loss'] = loss + + return losses + + def get_accuracy(self, output, target_point): + return get_keypoint_accuracy(output, target_point) + + def forward(self, x): + point = self.branches[0](x) + point = point * 0.5 + torch.from_numpy(self.face_mean_shape).to( + self.device) + point = point * self.input_size + + return point diff --git a/easycv/models/face/head/face_keypoint_pose_head.py b/easycv/models/face/head/face_keypoint_pose_head.py new file mode 100644 index 00000000..4adde695 --- /dev/null +++ b/easycv/models/face/head/face_keypoint_pose_head.py @@ -0,0 +1,55 @@ +import numpy as np +import torch +import torch.nn as nn + +from easycv.models.builder import HEADS, build_loss +from easycv.models.utils.face_keypoint_utils import (InvertedResidual, View, + conv_bn, conv_no_relu, + get_pose_accuracy) + + +@HEADS.register_module +class FacePoseHead(nn.Module): + + def __init__( + self, + loss_pose, + in_channels=48, + out_channels=3, + inverted_expand_ratio=2, + inverted_activation='half_v2', + ): + super(FacePoseHead, self).__init__() + self.branches = [] + + self.loss = build_loss(loss_pose) + + # pose + self.branches.append( + nn.Sequential( + InvertedResidual( + in_channels, + 48, + 3, + 1, + 1, + expand_ratio=inverted_expand_ratio, + activation=inverted_activation), + View((-1, 48 * 3 * 3, 1, 1)), conv_bn(48 * 3 * 3, 48, 1, 1, 0), + conv_bn(48, 48, 1, 1, 0), + conv_no_relu(48, out_channels, 1, 1, 0), + View((-1, out_channels)))) + self.branches = nn.ModuleList(self.branches) + + def get_loss(self, output, target_pose): + losses = dict() + loss = self.loss(output, target_pose) + losses['pose_loss'] = loss + + return losses + + def get_accuracy(self, output, target_pose): + return get_pose_accuracy(output, target_pose) + + def forward(self, x): + return self.branches[0](x) diff --git a/easycv/models/loss/__init__.py b/easycv/models/loss/__init__.py index f1b34084..4a638efd 100644 --- a/easycv/models/loss/__init__.py +++ b/easycv/models/loss/__init__.py @@ -1,5 +1,6 @@ # Copyright (c) Alibaba, Inc. and its affiliates. from .cross_entropy_loss import CrossEntropyLoss +from .face_keypoint_loss import FacePoseLoss, WingLossWithPose from .focal_loss import FocalLoss from .iou_loss import GIoULoss, IoULoss from .mse_loss import JointsMSELoss diff --git a/easycv/models/loss/face_keypoint_loss.py b/easycv/models/loss/face_keypoint_loss.py new file mode 100644 index 00000000..8d4a80c5 --- /dev/null +++ b/easycv/models/loss/face_keypoint_loss.py @@ -0,0 +1,91 @@ +import copy +import math + +import numpy as np +import torch +import torch.nn as nn + +from easycv.models.builder import LOSSES + +CONSTANT_CONTOUR = 66 +CONSTANT_EYEBROW = 18 +CONSTANT_EYE = 18 +CONSTANT_NOSE = 30 +CONSTANT_LIPS = 40 +CONSTANT_EYE_CENTER = 4 + + +@LOSSES.register_module() +class WingLossWithPose(nn.Module): + + def __init__(self, + num_points=106, + left_eye_left_corner_index=66, + right_eye_right_corner_index=79, + points_weight=1.0, + contour_weight=1.5, + eyebrow_weight=1.5, + eye_weight=1.7, + nose_weight=1.3, + lip_weight=1.7, + omega=10, + epsilon=2): + super(WingLossWithPose, self).__init__() + self.omega = omega + self.epsilon = epsilon + + self.num_points = num_points + self.left_eye_left_corner_index = left_eye_left_corner_index + self.right_eye_right_corner_index = right_eye_right_corner_index + self.points_weight = points_weight + contour_weight = np.full(CONSTANT_CONTOUR, contour_weight) + eyebrow_left_weight = np.full(CONSTANT_EYEBROW, eyebrow_weight) + eyebrow_right_weight = np.full(CONSTANT_EYEBROW, eyebrow_weight) + nose_weight = np.full(CONSTANT_NOSE, nose_weight) + eye_left_weight = np.full(CONSTANT_EYE, eye_weight) + eye_right_weight = np.full(CONSTANT_EYE, eye_weight) + lips_weight = np.full(CONSTANT_LIPS, lip_weight) + eye_center_weight = np.full(CONSTANT_EYE_CENTER, eye_weight) + part_weight = np.concatenate( + (contour_weight, eyebrow_left_weight, eyebrow_right_weight, + nose_weight, eye_left_weight, eye_right_weight, lips_weight, + eye_center_weight), + axis=0) + + self.part_weight = None + if part_weight is not None: + self.part_weight = torch.from_numpy(part_weight).cuda() + + def forward(self, pred, target, pose): + weight = 5.0 * (1.0 - torch.cos(pose * np.pi / 180.0)) + 1.0 + weight = torch.sum(weight, dim=1) / 3.0 + weight = weight.view((weight.shape[0], 1)) + + if self.part_weight is not None: + weight = weight * self.part_weight + + y = target + y_hat = pred + delta_y = (y - y_hat).abs() * weight + delta_y1 = delta_y[delta_y < self.omega] + delta_y2 = delta_y[delta_y >= self.omega] + loss1 = self.omega * torch.log(1 + delta_y1 / self.epsilon) + C = self.omega - self.omega * math.log(1 + self.omega / self.epsilon) + loss = delta_y2 - C + result = self.points_weight * (loss1.sum() + loss.sum()) / ( + len(loss1) + len(loss)) + + return result + + +@LOSSES.register_module() +class FacePoseLoss(nn.Module): + + def __init__(self, pose_weight=1.0): + super(FacePoseLoss, self).__init__() + self.criterion = nn.MSELoss() + self.pose_weight = pose_weight + + def forward(self, pred, target): + result = self.pose_weight * self.criterion(pred, target) + return result diff --git a/easycv/models/utils/__init__.py b/easycv/models/utils/__init__.py index d29512a9..f80361fc 100644 --- a/easycv/models/utils/__init__.py +++ b/easycv/models/utils/__init__.py @@ -5,6 +5,10 @@ from .conv_ws import ConvWS2d, conv_ws_2d from .dist_utils import (DistributedLossWrapper, DistributedMinerWrapper, get_world_size, is_dist_avail_and_initialized, reduce_mean) +from .face_keypoint_utils import (ION, InvertedResidual, Residual, Softmax, + View, conv_bn, conv_no_relu, + get_keypoint_accuracy, get_pose_accuracy, + pose_accuracy) from .gather_layer import GatherLayer from .init_weights import _init_weights, trunc_normal_ from .multi_pooling import GeMPooling, MultiAvgPooling, MultiPooling diff --git a/easycv/models/utils/face_keypoint_utils.py b/easycv/models/utils/face_keypoint_utils.py new file mode 100644 index 00000000..c094afbc --- /dev/null +++ b/easycv/models/utils/face_keypoint_utils.py @@ -0,0 +1,240 @@ +import copy +import math + +import numpy as np +import torch +import torch.nn as nn + + +def conv_bn(inp, oup, kernel, stride, padding=1): + return nn.Sequential( + nn.Conv2d(inp, oup, kernel, stride, padding, bias=False), + nn.BatchNorm2d(oup), nn.PReLU(oup)) + + +def conv_no_relu(inp, oup, kernel, stride, padding=1): + return nn.Sequential( + nn.Conv2d(inp, oup, kernel, stride, padding, bias=False), + nn.BatchNorm2d(oup)) + + +class View(nn.Module): + + def __init__(self, shape): + super(View, self).__init__() + self.shape = shape + + def forward(self, x): + return x.view(*self.shape) + + +class Softmax(nn.Module): + + def __init__(self, dim): + super(Softmax, self).__init__() + self.softmax = nn.Softmax(dim) + + def forward(self, x): + return self.softmax(x) + + +class InvertedResidual(nn.Module): + + def __init__(self, + inp, + oup, + kernel_size, + stride, + padding, + expand_ratio=2, + use_connect=False, + activation='relu'): + super(InvertedResidual, self).__init__() + + hid_channels = int(inp * expand_ratio) + if activation == 'relu': + self.conv = nn.Sequential( + nn.Conv2d(inp, hid_channels, 1, 1, 0, bias=False), + nn.BatchNorm2d(hid_channels), nn.ReLU(inplace=True), + nn.Conv2d( + hid_channels, + hid_channels, + kernel_size, + stride, + padding, + groups=hid_channels, + bias=False), nn.BatchNorm2d(hid_channels), + nn.ReLU(inplace=True), + nn.Conv2d(hid_channels, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup)) + elif activation == 'prelu': + self.conv = nn.Sequential( + nn.Conv2d(inp, hid_channels, 1, 1, 0, bias=False), + nn.BatchNorm2d(hid_channels), nn.PReLU(hid_channels), + nn.Conv2d( + hid_channels, + hid_channels, + kernel_size, + stride, + padding, + groups=hid_channels, + bias=False), nn.BatchNorm2d(hid_channels), + nn.PReLU(hid_channels), + nn.Conv2d(hid_channels, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup)) + elif activation == 'half_v1': + self.conv = nn.Sequential( + nn.Conv2d(inp, hid_channels, 1, 1, 0, bias=False), + nn.BatchNorm2d(hid_channels), nn.ReLU(inplace=True), + nn.Conv2d( + hid_channels, + hid_channels, + kernel_size, + stride, + padding, + groups=hid_channels, + bias=False), nn.BatchNorm2d(hid_channels), + nn.PReLU(hid_channels), + nn.Conv2d(hid_channels, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup)) + elif activation == 'half_v2': + self.conv = nn.Sequential( + nn.Conv2d(inp, hid_channels, 1, 1, 0, bias=False), + nn.BatchNorm2d(hid_channels), nn.PReLU(hid_channels), + nn.Conv2d( + hid_channels, + hid_channels, + kernel_size, + stride, + padding, + groups=hid_channels, + bias=False), nn.BatchNorm2d(hid_channels), + nn.ReLU(inplace=True), + nn.Conv2d(hid_channels, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup)) + self.use_connect = use_connect + + def forward(self, x): + if self.use_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +class Residual(nn.Module): + + def __init__(self, + inp, + oup, + kernel_size, + stride, + padding, + use_connect=False, + activation='relu'): + super(Residual, self).__init__() + + self.use_connect = use_connect + + if activation == 'relu': + self.conv = nn.Sequential( + nn.Conv2d( + inp, + inp, + kernel_size, + stride, + padding, + groups=inp, + bias=False), nn.BatchNorm2d(inp), nn.ReLU(inplace=True), + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup), + nn.ReLU(inplace=True)) + elif activation == 'prelu': + self.conv = nn.Sequential( + nn.Conv2d( + inp, + inp, + kernel_size, + stride, + padding, + groups=inp, + bias=False), nn.BatchNorm2d(inp), nn.PReLU(inp), + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup), + nn.PReLU(oup)) + elif activation == 'half_v1': + self.conv = nn.Sequential( + nn.Conv2d( + inp, + inp, + kernel_size, + stride, + padding, + groups=inp, + bias=False), nn.BatchNorm2d(inp), nn.ReLU(inplace=True), + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup), + nn.PReLU(oup)) + elif activation == 'half_v2': + self.conv = nn.Sequential( + nn.Conv2d( + inp, + inp, + kernel_size, + stride, + padding, + groups=inp, + bias=False), nn.BatchNorm2d(inp), nn.PReLU(inp), + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup), + nn.ReLU(inplace=True)) + + def forward(self, x): + if self.use_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +def pose_accuracy(output, target): + with torch.no_grad(): + output = output.detach().cpu().numpy() + target = target.detach().cpu().numpy() + + acc = np.mean(np.abs(output - target)) + return acc + + +def ION(output, target, left_eye_left_coner_idx, right_eye_right_corner_idx, + num_pts): + with torch.no_grad(): + output = output.view(-1, num_pts, 2).cpu().numpy() + target = target.view(-1, num_pts, 2).cpu().numpy() + + interocular = target[:, + left_eye_left_coner_idx] - target[:, + right_eye_right_corner_idx] + interocular = np.sqrt( + np.square(interocular[:, 0]) + np.square(interocular[:, 1])) + 1e-5 + dist = target - output + dist = np.sqrt(np.square(dist[:, :, 0]) + np.square(dist[:, :, 1])) + dist = np.sum(dist, axis=1) + nme = dist / (interocular * num_pts) + + return np.mean(nme) + + +def get_keypoint_accuracy(output, target_point): + accuracy = dict() + num_points = 106 + left_eye_left_corner_index = 66 + right_eye_right_corner_index = 79 + + nme = ION(output, target_point, left_eye_left_corner_index, + right_eye_right_corner_index, num_points) + + accuracy['nme'] = nme + + return accuracy + + +def get_pose_accuracy(output, target_pose): + accuracy = dict() + pose_acc = pose_accuracy(output, target_pose) + accuracy['pose_acc'] = float(pose_acc) + return accuracy diff --git a/easycv/predictors/__init__.py b/easycv/predictors/__init__.py index 9577e75d..973971b7 100644 --- a/easycv/predictors/__init__.py +++ b/easycv/predictors/__init__.py @@ -2,6 +2,7 @@ from .classifier import TorchClassifier from .detector import (TorchFaceDetector, TorchYoloXClassifierPredictor, TorchYoloXPredictor) +from .face_keypoints_predictor import FaceKeypointsPredictor from .feature_extractor import (TorchFaceAttrExtractor, TorchFaceFeatureExtractor, TorchFeatureExtractor) diff --git a/easycv/predictors/base.py b/easycv/predictors/base.py index 9bc64bad..d1efbbe2 100644 --- a/easycv/predictors/base.py +++ b/easycv/predictors/base.py @@ -113,6 +113,7 @@ class PredictorV2(object): device=None, save_results=False, save_path=None, + mode='rgb', *args, **kwargs): self.model_path = model_path @@ -135,6 +136,7 @@ class PredictorV2(object): self.model = self.prepare_model() self.processor = self.build_processor() self._load_op = None + self.mode = mode def prepare_model(self): """Build model from config file by default. @@ -182,7 +184,7 @@ class PredictorV2(object): } """ if self._load_op is None: - load_cfg = dict(type='LoadImage', mode='rgb') + load_cfg = dict(type='LoadImage', mode=self.mode) self._load_op = build_from_cfg(load_cfg, PIPELINES) if not isinstance(input, str): diff --git a/easycv/predictors/face_keypoints_predictor.py b/easycv/predictors/face_keypoints_predictor.py new file mode 100644 index 00000000..d1972031 --- /dev/null +++ b/easycv/predictors/face_keypoints_predictor.py @@ -0,0 +1,120 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import copy +import os + +import cv2 +import numpy as np +import torch +from torchvision.transforms import Compose + +from easycv.datasets.registry import PIPELINES +from easycv.models import build_model +from easycv.predictors.builder import PREDICTORS +from easycv.predictors.interface import PredictorInterface +from easycv.utils.checkpoint import load_checkpoint +from easycv.utils.config_tools import mmcv_config_fromfile +from easycv.utils.registry import build_from_cfg +from ..models import * +from .base import PredictorV2 + +face_contour_point_index = [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 +] +left_eye_brow_point_index = [33, 34, 35, 36, 37, 38, 39, 40, 41, 33] +right_eye_brow_point_index = [42, 43, 44, 45, 46, 47, 48, 49, 50, 42] +left_eye_point_index = [66, 67, 68, 69, 70, 71, 72, 73, 66] +right_eye_point_index = [75, 76, 77, 78, 79, 80, 81, 82, 75] +nose_bridge_point_index = [51, 52, 53, 54] +nose_contour_point_index = [55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65] +mouth_outer_point_index = [84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 84] +mouth_inter_point_index = [96, 97, 98, 99, 100, 101, 102, 103, 96] + + +@PREDICTORS.register_module() +class FaceKeypointsPredictor(PredictorV2): + """Predict pipeline for face keypoint + Args: + model_path (str): Path of model path + model_config (str): config file path for model and processor to init. Defaults to None. + """ + + def __init__(self, + model_path, + model_config, + batch_size=1, + device=None, + save_results=False, + save_path=None, + mode='bgr'): + super(FaceKeypointsPredictor, self).__init__( + model_path, + model_config, + batch_size=batch_size, + device=device, + save_results=save_results, + save_path=save_path, + mode=mode) + + self.input_size = self.cfg.IMAGE_SIZE + self.point_number = self.cfg.POINT_NUMBER + + def show_result(self, img, points, scale=4.0, save_path=None): + """Draw `result` over `img`. + + Args: + img (str or Tensor): The image to be displayed. + result (Tensor): The face keypoints to draw over `img`. + scale: zoom in or out scale + save_path: path to save drawned 'img' + Returns: + img (Tensor): Only if not `show` or `out_file` + """ + + img = cv2.imread(img) + img = img.copy() + h, w, c = img.shape + scale_h = h / self.input_size + scale_w = w / self.input_size + + points = points.view(-1, self.point_number, 2).cpu().numpy()[0] + for index in range(len(points)): + points[index][0] *= scale_w + points[index][1] *= scale_h + + image = cv2.resize(img, dsize=None, fx=scale, fy=scale) + + def draw_line(point_index, image, point): + for i in range(len(point_index) - 1): + cur_index = point_index[i] + next_index = point_index[i + 1] + cur_pt = (int(point[cur_index][0] * scale), + int(point[cur_index][1] * scale)) + next_pt = (int(point[next_index][0] * scale), + int(point[next_index][1] * scale)) + cv2.line(image, cur_pt, next_pt, (0, 0, 255), thickness=2) + + draw_line(face_contour_point_index, image, points) + draw_line(left_eye_brow_point_index, image, points) + draw_line(right_eye_brow_point_index, image, points) + draw_line(left_eye_point_index, image, points) + draw_line(right_eye_point_index, image, points) + draw_line(nose_bridge_point_index, image, points) + draw_line(nose_contour_point_index, image, points) + draw_line(mouth_outer_point_index, image, points) + draw_line(mouth_inter_point_index, image, points) + + size = len(points) + for i in range(size): + x = int(points[i][0]) + y = int(points[i][1]) + cv2.putText(image, str(i), (int(x * scale), int(y * scale)), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) + cv2.circle(image, (int(x * scale), int(y * scale)), 2, (0, 255, 0), + cv2.FILLED) + + if save_path is not None: + cv2.imwrite(save_path, image) + + return image diff --git a/requirements/runtime.txt b/requirements/runtime.txt index 9c7fd6d7..60075ec5 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -3,6 +3,7 @@ dataclasses einops future h5py +imgaug json_tricks numpy opencv-python diff --git a/tests/predictors/test_face_keypoints_predictor.py b/tests/predictors/test_face_keypoints_predictor.py new file mode 100644 index 00000000..6523bdc2 --- /dev/null +++ b/tests/predictors/test_face_keypoints_predictor.py @@ -0,0 +1,39 @@ +# Copyright (c) Alibaba, Inc. and its affiliates. + +import copy +import os +import tempfile +import unittest + +import cv2 +import numpy as np +from PIL import Image + +from easycv.predictors.face_keypoints_predictor import FaceKeypointsPredictor + + +class FaceKeypointsPredictorWithoutDetectorTest(unittest.TestCase): + + def setUp(self): + print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) + self.image_path = './data/test/face_2d_keypoints/data/002253.png' + self.save_image_path = './data/test/face_2d_keypoints/data/result_002253.png' + self.model_path = './data/test/face_2d_keypoints/models/epoch_580.pth' + self.model_config_path = './configs/face/face_96x96_wingloss.py' + + def test_single(self): + predict_pipeline = FaceKeypointsPredictor( + model_path=self.model_path, model_config=self.model_config_path) + + output = predict_pipeline(self.image_path)[0] + output_keypoints = output['point'] + output_pose = output['pose'] + image_show = predict_pipeline.show_result( + self.image_path, + output_keypoints, + scale=2, + save_path=self.save_image_path) + + +if __name__ == '__main__': + unittest.main()