add face-2d-keypoints

Link: https://code.alibaba-inc.com/pai-vision/EasyCV/codereview/9809249 * add face 2d keypoint config
2025-06-03 14:49:00 +08:00 · 2022-08-25 16:57:37 +08:00 · 2022-08-25 16:57:37 +08:00 · 2e8fc44dc1
commit 2e8fc44dc1
parent bc64851614
30 changed files with 1787 additions and 2 deletions
--- a/configs/face/face_96x96_wingloss.py
+++ b/configs/face/face_96x96_wingloss.py
@ -0,0 +1,236 @@
 # model settings
 POINT_NUMBER = 106
 MEAN_FACE = [
    0.05486667535113006, 0.24441904048908245, 0.05469932714062696,
    0.30396829196709935, 0.05520653400164321, 0.3643191463607746,
    0.05865501342257397, 0.42453849020500306, 0.0661603899137523,
    0.48531377442945767, 0.07807677169271177, 0.5452126843738523,
    0.09333319368757653, 0.6047840615432064, 0.11331425394034209,
    0.6631144309665994, 0.13897813867699352, 0.7172296230155276,
    0.17125811033538194, 0.767968859462583, 0.20831698519371536,
    0.8146603379935117, 0.24944621000897876, 0.857321261721953,
    0.2932993820558674, 0.8973900596678597, 0.33843820185594653,
    0.9350576242126986, 0.38647802623495553, 0.966902971122812,
    0.4411974776504609, 0.9878629960611088, 0.5000390697219397,
    0.9934886214875595, 0.5588590024515473, 0.9878510782414189,
    0.6135829360035883, 0.9668655595323074, 0.6616294188166414,
    0.9350065330378543, 0.7067734980023662, 0.8973410411573094,
    0.7506167730772516, 0.8572957679511382, 0.7917579157122047,
    0.8146281598803492, 0.8288026446367324, 0.7679019642224981,
    0.8610918526053805, 0.7171624168757985, 0.8867491048162915,
    0.6630344261248556, 0.9067293813428708, 0.6047095492618413,
    0.9219649147678989, 0.5451295187190602, 0.9338619041815587,
    0.4852292097262674, 0.9413455695142587, 0.424454780475834,
    0.9447753107545577, 0.3642347111991026, 0.9452649776939869,
    0.30388458223793025, 0.9450854849661369, 0.24432737691068557,
    0.1594802473020129, 0.17495177946520288, 0.2082918411850002,
    0.12758378330875153, 0.27675902873293057, 0.11712230823088154,
    0.34660582049732336, 0.12782553369032904, 0.4137234315527489,
    0.14788458441422778, 0.4123890243720449, 0.18814226684806626,
    0.3498927810760776, 0.17640650480816664, 0.28590212091591866,
    0.16895271174960227, 0.22193967489846017, 0.16985862149585013,
    0.5861805004572298, 0.147863456192582, 0.6532904167464643,
    0.12780412047734288, 0.723142364263288, 0.11709102395419578,
    0.7916076475508984, 0.12753867695205595, 0.8404440227263494,
    0.17488715120168932, 0.7779848023963316, 0.1698261195288917,
    0.7140264757991571, 0.1689377237959271, 0.650024882334848,
    0.17640581823811927, 0.5875270068157493, 0.18815421057605972,
    0.4999687027691624, 0.2770570778583906, 0.49996466107378934,
    0.35408433007759227, 0.49996725190415664, 0.43227025345368053,
    0.49997367716346774, 0.5099309118810921, 0.443147025685285,
    0.2837021691260901, 0.4079306716593004, 0.4729519900478952,
    0.3786223176615041, 0.5388017782630576, 0.4166237366074797,
    0.5822229552544941, 0.4556754522760756, 0.5887956328134262,
    0.49998730493119997, 0.5951855531982454, 0.5443300921009105,
    0.5887796732983633, 0.5833722476054509, 0.582200985012979,
    0.6213509190608012, 0.5387760772258134, 0.5920137550293199,
    0.4729325070035326, 0.5567854054587345, 0.28368589871138317,
    0.23395988420439123, 0.275313734012504, 0.27156519109550253,
    0.2558735678926061, 0.31487949633428597, 0.2523033259214858,
    0.356919009399118, 0.2627342680634766, 0.3866625969903256,
    0.2913618036573405, 0.3482919069920915, 0.3009936818974329,
    0.3064437008415846, 0.3037349617842158, 0.26724000706363993,
    0.2961896087804692, 0.3135744691699477, 0.27611103614975246,
    0.6132904312551143, 0.29135144033587107, 0.6430396927648264,
    0.2627079452269443, 0.6850713556136455, 0.2522730391144915,
    0.728377707003201, 0.25583118190779625, 0.7660035591791254,
    0.27526375689471777, 0.7327054300488236, 0.2961495286346863,
    0.6935171517115648, 0.3036951925380769, 0.6516533228539426,
    0.3009921014909089, 0.6863983789278025, 0.2760904908649394,
    0.35811903020866753, 0.7233174007629063, 0.4051199834269763,
    0.6931800846807724, 0.4629631471997891, 0.6718031951363689,
    0.5000016063148277, 0.6799150331999366, 0.5370506360177653,
    0.6717809139952097, 0.5948714927411151, 0.6931581144392573,
    0.6418878095835022, 0.7232890570786875, 0.6088129582142587,
    0.7713407215524752, 0.5601450388292929, 0.8052499757498277,
    0.5000181358125715, 0.8160749831906926, 0.4398905591799545,
    0.8052697696938342, 0.39120318265892984, 0.771375905028864,
    0.36888771299734613, 0.7241751210643214, 0.4331097084010058,
    0.7194543690519717, 0.5000188612450743, 0.7216823277180712,
    0.566895861884284, 0.7194302225129479, 0.631122598507516,
    0.7241462073974219, 0.5678462302796355, 0.7386355816766528,
    0.5000082906571756, 0.7479600838019628, 0.43217532542902076,
    0.7386538729390463, 0.31371761254774383, 0.2753328284323114,
    0.6862487843823917, 0.2752940437017121
 ]
 IMAGE_SIZE = 96
 loss_config = dict(
    num_points=POINT_NUMBER,
    left_eye_left_corner_index=66,
    right_eye_right_corner_index=79,
    points_weight=1.0,
    contour_weight=1.5,
    eyebrow_weight=1.5,
    eye_weight=1.7,
    nose_weight=1.3,
    lip_weight=1.7,
    omega=10,
    epsilon=2)
 model = dict(
    type='FaceKeypoint',
    backbone=dict(
        type='FaceKeypointBackbone',
        in_channels=3,
        out_channels=48,
        residual_activation='relu',
        inverted_activation='half_v2',
        inverted_expand_ratio=2,
    ),
    keypoint_head=dict(
        type='FaceKeypointHead',
        in_channels=48,
        out_channels=POINT_NUMBER * 2,
        input_size=IMAGE_SIZE,
        inverted_expand_ratio=2,
        inverted_activation='half_v2',
        mean_face=MEAN_FACE,
        loss_keypoint=dict(type='WingLossWithPose', **loss_config),
    ),
    pose_head=dict(
        type='FacePoseHead',
        in_channels=48,
        out_channels=3,
        inverted_expand_ratio=2,
        inverted_activation='half_v2',
        loss_pose=dict(type='FacePoseLoss', pose_weight=0.01),
    ),
 )
 train_pipeline = [
    dict(type='FaceKeypointRandomAugmentation', input_size=IMAGE_SIZE),
    dict(type='FaceKeypointNorm', input_size=IMAGE_SIZE),
    dict(type='MMToTensor'),
    dict(
        type='NormalizeTensor',
        mean=[0.4076, 0.458, 0.485],
        std=[1.0, 1.0, 1.0]),
    dict(
        type='Collect',
        keys=[
            'img', 'target_point', 'target_point_mask', 'target_pose',
            'target_pose_mask'
        ])
 ]
 val_pipeline = [
    dict(type='FaceKeypointNorm', input_size=IMAGE_SIZE),
    dict(type='MMToTensor'),
    dict(
        type='NormalizeTensor',
        mean=[0.4076, 0.458, 0.485],
        std=[1.0, 1.0, 1.0]),
    dict(
        type='Collect',
        keys=[
            'img', 'target_point', 'target_point_mask', 'target_pose',
            'target_pose_mask'
        ])
 ]
 test_pipeline = val_pipeline
 data_root = 'path/to/face_landmark_data/'
 data_cfg = dict(
    data_root=data_root,
    input_size=IMAGE_SIZE,
 )
 data = dict(
    imgs_per_gpu=512,
    workers_per_gpu=2,
    train=dict(
        type='FaceKeypointDataset',
        data_source=dict(
            type='FaceKeypintSource',
            train=True,
            data_range=[0, 30000],  # [0,30000]  [0,478857]
            data_cfg=data_cfg,
        ),
        pipeline=train_pipeline),
    val=dict(
        type='FaceKeypointDataset',
        data_source=dict(
            type='FaceKeypintSource',
            train=False,
            data_range=[478857, 488857],
            # data_range=[478857, 478999], #[478857, 478999] [478857, 488857]
            data_cfg=data_cfg,
        ),
        pipeline=val_pipeline),
    test=dict(
        type='FaceKeypointDataset',
        data_source=dict(
            type='FaceKeypintSource',
            train=False,
            data_range=[478857, 488857],
            # data_range=[478857, 478999], #[478857, 478999] [478857, 488857]
            data_cfg=data_cfg,
        ),
        pipeline=test_pipeline),
 )
 # runtime setting
 optimizer = dict(
    type='Adam',
    lr=0.005,
 )
 optimizer_config = dict(grad_clip=None)
 lr_config = dict(
    policy='CosineAnnealing',
    min_lr=0.00001,
    warmup='linear',
    warmup_iters=10,
    warmup_ratio=0.001,
    warmup_by_epoch=True,
    by_epoch=True)
 total_epochs = 1000
 checkpoint_config = dict(interval=10)
 log_config = dict(
    interval=5, hooks=[
        dict(type='TextLoggerHook'),
    ])
 log_level = 'INFO'
 load_from = None
 resume_from = None
 dist_params = dict(backend='nccl')
 workflow = [('train', 1)]
 # disable opencv multithreading to avoid system being overloaded
 opencv_num_threads = 0
 # set multi-process start method as `fork` to speed up the training
 mp_start_method = 'fork'
 evaluation = dict(interval=1, metric=['NME'], save_best='NME')
 eval_config = dict(interval=1)
 evaluator_args = dict(metric_names='ave_nme')
 eval_pipelines = [
    dict(
        mode='test',
        data=dict(**data['val'], imgs_per_gpu=1),
        evaluators=[dict(type='FaceKeypointEvaluator', **evaluator_args)])
 ]
--- a/data/test/face_2d_keypoints/data/002253.png
+++ b/data/test/face_2d_keypoints/data/002253.png
@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:1a45cc56977e709361659d4123739d3647d122a0d80bf7249d0ccdef018f068e
 size 112042
--- a/data/test/face_2d_keypoints/data/002258.png
+++ b/data/test/face_2d_keypoints/data/002258.png
@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:8d516f30a8c1583b45e54d737d2a712ed1c63ef387d579517e1e23e416339ac2
 size 94367
--- a/data/test/face_2d_keypoints/models/epoch_580.pth
+++ b/data/test/face_2d_keypoints/models/epoch_580.pth
@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:5105c5aa83c59d2a1fdf8dc9ff83a8d84c19a70c7faabcf7f8bce8a913afe4f1
 size 3421031
--- a/easycv/core/evaluation/init.py
+++ b/easycv/core/evaluation/init.py
@ -3,6 +3,7 @@ from .auc_eval import AucEvaluator
 from .base_evaluator import Evaluator
 from .classification_eval import ClsEvaluator
 from .coco_evaluation import CocoDetectionEvaluator, CoCoPoseTopDownEvaluator
 from .face_eval import FaceKeypointEvaluator
 from .faceid_pair_eval import FaceIDPairEvaluator
 from .keypoint_eval import KeyPointEvaluator
 from .mse_eval import MSEEvaluator
--- a/easycv/core/evaluation/face_eval.py
+++ b/easycv/core/evaluation/face_eval.py
@ -0,0 +1,59 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import torch
 from .base_evaluator import Evaluator
 from .builder import EVALUATORS
 from .metric_registry import METRICS
@EVALUATORS.register_module
 class FaceKeypointEvaluator(Evaluator):
    def __init__(self, dataset_name=None, metric_names=['ave_nme']):
        super(FaceKeypointEvaluator, self).__init__(dataset_name, metric_names)
        self.metric = metric_names
        self.dataset_name = dataset_name
    def _evaluate_impl(self, prediction_dict, groundtruth_dict, **kwargs):
        """
        Args:
            prediction_dict: model forward output dict, ['point', 'pose']
            groundtruth_dict: groundtruth dict, ['target_point', 'target_point_mask', 'target_pose', 'target_pose_mask'] used for compute accuracy
            kwargs: other parameters
        """
        def evaluate(predicts, gts, **kwargs):
            from easycv.models.utils.face_keypoint_utils import get_keypoint_accuracy, get_pose_accuracy
            ave_pose_acc = 0
            ave_nme = 0
            idx = 0
            for (predict_point, predict_pose,
                 gt) in zip(predicts['point'], predicts['pose'], gts):
                target_point = gt['target_point']
                target_point_mask = gt['target_point_mask']
                target_pose = gt['target_pose']
                target_pose_mask = gt['target_pose_mask']
                target_point = target_point * target_point_mask
                target_pose = target_pose * target_pose_mask
                keypoint_accuracy = get_keypoint_accuracy(
                    predict_point, target_point)
                pose_accuracy = get_pose_accuracy(predict_pose, target_pose)
                ave_pose_acc += pose_accuracy['pose_acc']
                ave_nme += keypoint_accuracy['nme']
                idx += 1
            eval_result = {}
            idx += 0.000001
            eval_result['ave_pose_acc'] = ave_pose_acc / idx
            eval_result['ave_nme'] = ave_nme / idx
            return eval_result
        return evaluate(prediction_dict, groundtruth_dict)
 METRICS.register_default_best_metric(FaceKeypointEvaluator, 'ave_nme', 'min')
--- a/easycv/datasets/init.py
+++ b/easycv/datasets/init.py
@ -1,5 +1,6 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
-from . import classification, detection, pose, segmentation, selfsup, shared
+from . import (classification, detection, face, pose, segmentation, selfsup,
               shared)
 from .builder import build_dali_dataset, build_dataset
 from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
 from .registry import DATASETS
--- a/easycv/datasets/face/init.py
+++ b/easycv/datasets/face/init.py
@ -0,0 +1,4 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from .data_sources import *
 from .face_keypoint_dataset import FaceKeypointDataset
 from .pipelines import *
--- a/easycv/datasets/face/data_sources/init.py
+++ b/easycv/datasets/face/data_sources/init.py
@ -0,0 +1,2 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from .face_keypoint_source import FaceKeypintSource
--- a/easycv/datasets/face/data_sources/face_keypoint_source.py
+++ b/easycv/datasets/face/data_sources/face_keypoint_source.py
@ -0,0 +1,171 @@
 import copy
 import json
 import logging
 import os
 import cv2
 import numpy as np
 import torch
 from easycv.datasets.face.pipelines.face_keypoint_transform import (
    FaceKeypointNorm, FaceKeypointRandomAugmentation, normal)
 from easycv.datasets.registry import DATASOURCES
 from easycv.datasets.shared.base import BaseDataset
 FACE_KEYPOINT_DATASET_INFO = dict(
    real_list_file_dir='real_face_list.txt',
    data_info_dir='infos/merge/',
    data_image_dir='images/merge/',
    data_overlay_dir='images/overlay/',
 )
@DATASOURCES.register_module()
 class FaceKeypintSource():
    """
        load dataset for face key points
    """
    def __init__(self,
                 data_cfg,
                 data_range,
                 real_list_path=None,
                 info_path=None,
                 image_path=None,
                 data_overlay_path=None,
                 dataset_info=None,
                 **kwargs):
        super(FaceKeypintSource, self).__init__()
        """
        Args:
            data_cfg: Data config dict
            data_range: rang of dataset for training or validation
            real_list_file_path: path of file contains image list
            data_info_dir: annotation file path
            data_img_dir: image file path
            data_overlay_dir: overlay background image path
            dataset_info: A dict containing all dataset info
        """
        if dataset_info is None:
            logging.info(
                'dataset_info is missing, use default face keypoiny dataset info'
            )
            dataset_info = FACE_KEYPOINT_DATASET_INFO
        data_root = data_cfg['data_root']
        real_list_file_path = os.path.join(data_root,
                                           dataset_info['real_list_file_dir'])
        data_info_dir = os.path.join(data_root, dataset_info['data_info_dir'])
        data_img_dir = os.path.join(data_root, dataset_info['data_image_dir'])
        data_overlay_dir = os.path.join(data_root,
                                        dataset_info['data_overlay_dir'])
        self.input_size = data_cfg['input_size']
        data_range = data_range
        if real_list_path is not None:
            real_list_file_path = real_list_path
        if info_path is not None:
            data_info_dir = info_path
        if image_path is not None:
            data_img_dir = image_path
        if data_overlay_path is not None:
            data_overlay_dir = data_overlay_path
        # overlay
        self.overlay_image_path = []
        for overlay_img_file in sorted(os.listdir(data_overlay_dir)):
            overlay_img_filepath = os.path.join(data_overlay_dir,
                                                overlay_img_file)
            self.overlay_image_path.append(overlay_img_filepath)
        self.points_and_pose_datas = []
        with open(real_list_file_path, 'r') as real_list_file:
            real_list_lines = real_list_file.readlines()
        for index in range(data_range[0], data_range[1]):
            idx = int(real_list_lines[index])
            img_path = os.path.join(data_img_dir, '{:06d}.png'.format(idx))
            if not os.path.exists(img_path):
                logging.warning('image %s does not exist' % img_path)
                continue
            info_path = os.path.join(data_info_dir, '{:06d}.json'.format(idx))
            if not os.path.exists(info_path):
                logging.warning('annotation %s does not exist' % info_path)
                continue
            with open(info_path, 'r') as info_file:
                info_json = json.load(info_file)
                assert info_json['face_count'] == 1
                base_info = info_json['face_infos'][0]['base_info']
                # points
                assert base_info['points_array'] is not None
                points = np.asarray(base_info['points_array']).astype(
                    np.float32)
                points_mask = np.abs(points - (-999)) > 0.0001
                # pose
                pose = {'pitch': -999, 'yaw': -999, 'roll': -999}
                if base_info['pitch'] is not None and base_info[
                        'yaw'] is not None and base_info['roll'] is not None:
                    pose['pitch'] = base_info['pitch']
                    pose['yaw'] = base_info['yaw']
                    # pose["roll"] = base_info["roll"]
                    # datasets have been preprocessed, roll=0
                    # add noise to pose
                    pose['roll'] = normal() * 10.0
                pose_mask = np.asarray([
                    np.abs(pose['pitch'] - (-999)) > 0.0001,
                    np.abs(pose['roll'] - (-999)) > 0.0001,
                    np.abs(pose['yaw'] - (-999)) > 0.0001
                ])
            self.points_and_pose_datas.append(
                (img_path, points, points_mask, pose, pose_mask))
        self.db = []
        for img_path, points, points_mask, pose, pose_mask in copy.deepcopy(
                self.points_and_pose_datas):
            image = cv2.imread(img_path)
            points[:,
                   0] = points[:, 0] / image.shape[1] * float(self.input_size)
            points[:,
                   1] = points[:, 1] / image.shape[0] * float(self.input_size)
            target_point = np.reshape(points,
                                      (points.shape[0] * points.shape[1]))
            points_mask = points_mask.astype(np.float32)
            points_mask = np.reshape(
                points_mask, (points_mask.shape[0] * points_mask.shape[1]))
            pose = np.asarray([pose['pitch'], pose['roll'], pose['yaw']])
            self.db.append({
                'img_path':
                img_path,
                'target_point':
                torch.tensor(np.array(target_point, np.float32)),
                'target_point_mask':
                torch.tensor(points_mask),
                'target_pose':
                torch.tensor(np.array(pose, np.float32)),
                'target_pose_mask':
                torch.tensor(pose_mask.astype(np.float32))
            })
    def __getitem__(self, index):
        img_path, points, points_mask, pose, pose_mask = copy.deepcopy(
            self.points_and_pose_datas[index])
        image = cv2.imread(img_path)
        return {
            'img': image,
            'target_point': points,
            'target_point_mask': points_mask,
            'target_pose': pose,
            'target_pose_mask': pose_mask,
            'overlay_image_path': self.overlay_image_path
        }
    def __len__(self):
        return len(self.points_and_pose_datas)
--- a/easycv/datasets/face/face_keypoint_dataset.py
+++ b/easycv/datasets/face/face_keypoint_dataset.py
@ -0,0 +1,45 @@
 import copy
 import json
 import logging
 import os
 import cv2
 import numpy as np
 import torch
 import torch.utils.data as data
 from easycv.datasets.face.pipelines.face_keypoint_transform import (
    FaceKeypointNorm, FaceKeypointRandomAugmentation, normal)
 from easycv.datasets.registry import DATASETS
 from easycv.datasets.shared.base import BaseDataset
@DATASETS.register_module()
 class FaceKeypointDataset(BaseDataset):
    """
        dataset for face key points
    """
    def __init__(self, data_source, pipeline, profiling=False):
        super(FaceKeypointDataset, self).__init__(data_source, pipeline,
                                                  profiling)
        """
        Args:
            data_source: Data_source config dict
            pipeline: Pipeline config list
            profiling: If set True, will print pipeline time
        """
    def evaluate(self, outputs, evaluators, **kwargs):
        eval_result = {}
        for evaluator in evaluators:
            eval_result.update(
                evaluator.evaluate(
                    prediction_dict=outputs,
                    groundtruth_dict=self.data_source.db))
        return eval_result
    def __getitem__(self, idx):
        results = self.data_source[idx]
        return self.pipeline(results)
--- a/easycv/datasets/face/pipelines/init.py
+++ b/easycv/datasets/face/pipelines/init.py
@ -0,0 +1,5 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from .face_keypoint_transform import (FaceKeypointNorm,
                                      FaceKeypointRandomAugmentation)
 __all__ = ['FaceKeypointRandomAugmentation', 'FaceKeypointNorm']
--- a/easycv/datasets/face/pipelines/face_keypoint_transform.py
+++ b/easycv/datasets/face/pipelines/face_keypoint_transform.py
@ -0,0 +1,431 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import random
 import cv2
 import imgaug
 import imgaug.augmenters as iaa
 import numpy as np
 from easycv.datasets.registry import PIPELINES
 DEST_SIZE = 256
 BASE_LANDMARK_NUM = 106
 ENLARGE_RATIO = 1.1
 CONTOUR_PARTS = [[0, 32], [1, 31], [2, 30], [3, 29], [4, 28], [5, 27], [6, 26],
                 [7, 25], [8, 24], [9, 23], [10, 22], [11, 21], [12, 20],
                 [13, 19], [14, 18], [15, 17]]
 BROW_PARTS = [[33, 46], [34, 45], [35, 44], [36, 43], [37, 42], [38, 50],
              [39, 49], [40, 48], [41, 47]]
 EYE_PARTS = [[66, 79], [67, 78], [68, 77], [69, 76], [70, 75], [71, 82],
             [72, 81], [73, 80], [74, 83]]
 NOSE_PARTS = [[55, 65], [56, 64], [57, 63], [58, 62], [59, 61]]
 MOUSE_PARTS = [[84, 90], [85, 89], [86, 88], [96, 100], [97, 99], [103, 101],
               [95, 91], [94, 92]]
 IRIS_PARTS = [[104, 105]]
 MATCHED_PARTS = CONTOUR_PARTS + BROW_PARTS + EYE_PARTS + NOSE_PARTS + MOUSE_PARTS + IRIS_PARTS
 def normal():
    """
    3-sigma rule
    return: (-1, +1)
    """
    mu, sigma = 0, 1
    while True:
        s = np.random.normal(mu, sigma)
        if s < mu - 3 * sigma or s > mu + 3 * sigma:
            continue
        return s / 3 * sigma
 def rotate(angle, center, landmark):
    rad = angle * np.pi / 180.0
    alpha = np.cos(rad)
    beta = np.sin(rad)
    M = np.zeros((2, 3), dtype=np.float32)
    M[0, 0] = alpha
    M[0, 1] = beta
    M[0, 2] = (1 - alpha) * center[0] - beta * center[1]
    M[1, 0] = -beta
    M[1, 1] = alpha
    M[1, 2] = beta * center[0] + (1 - alpha) * center[1]
    landmark_ = np.asarray([(M[0, 0] * x + M[0, 1] * y + M[0, 2],
                             M[1, 0] * x + M[1, 1] * y + M[1, 2])
                            for (x, y) in landmark])
    return M, landmark_
 class OverLayGenerator:
    def __init__(self, shape):
        # 4x4
        h_seg_len = shape[0] // 4
        w_seg_len = shape[1] // 4
        self.overlay = []
        # 2x2 overlay
        for i in range(3):
            for j in range(3):
                if i == 1 and j == 1:
                    continue
                self.overlay.append((i * w_seg_len, j * h_seg_len,
                                     2 * w_seg_len, 2 * h_seg_len))
        # 2x3 overlay
        for i in range(3):
            for j in range(2):
                if i == 1:
                    continue
                self.overlay.append((i * w_seg_len, j * h_seg_len,
                                     2 * w_seg_len, 3 * h_seg_len))
        for i in range(2):
            for j in range(3):
                if j == 1:
                    continue
                self.overlay.append((i * w_seg_len, j * h_seg_len,
                                     3 * w_seg_len, 2 * h_seg_len))
        # 2x4 overlay
        for i in range(3):
            for j in range(1):
                if i == 1:
                    continue
                self.overlay.append((i * w_seg_len, j * h_seg_len,
                                     2 * w_seg_len, 4 * h_seg_len))
        for i in range(1):
            for j in range(3):
                if j == 1:
                    continue
                self.overlay.append((i * w_seg_len, j * h_seg_len,
                                     4 * w_seg_len, 2 * h_seg_len))
 class FaceKeypointsDataAugumentation:
    def __init__(self, input_size):
        # option
        self.enable_flip = True
        self.enable_rotate = True
        self.input_size = input_size
        # mask generator
        coarse_salt_and_pepper_iaa = iaa.CoarseSaltAndPepper(
            (0.25, 0.35), size_percent=(0.03125, 0.015625))
        self.mask_generator = coarse_salt_and_pepper_iaa.mask
        # overlay generator
        self.overlay_generator = OverLayGenerator(shape=(256, 256))
        # flip
        self.mirror_map = FaceKeypointsDataAugumentation.compute_mirror_map()
    @staticmethod
    def compute_mirror_map():
        mirror_map = np.array(range(0, BASE_LANDMARK_NUM), np.int32)
        for x, y in MATCHED_PARTS:
            mirror_map[x] = y
            mirror_map[y] = x
        return mirror_map
    def aug_flip(self, img, pts, visibility, pose):
        # pts[:, 0] = self.input_size - pts[:, 0]
        pts[:, 0] = img.shape[1] - pts[:, 0]
        pts = pts[self.mirror_map]
        if visibility is not None:
            visibility = visibility[self.mirror_map]
        img = cv2.flip(img, 1)
        if pose is not None:
            # fix roll&yaw in pose
            pose['roll'] = -pose['roll']
            pose['yaw'] = -pose['yaw']
        return img, pts, visibility, pose
    def aug_rotate(self, img, pts, pose, angle):
        center = [DEST_SIZE // 2, DEST_SIZE // 2]
        if pose is not None:
            # fix roll in pose
            pose['roll'] += angle
        cx, cy = center
        M, pts = rotate(angle, (cx, cy), pts)
        imgT = cv2.warpAffine(img, M, (int(img.shape[1]), int(img.shape[0])))
        x1 = np.min(pts[:, 0])
        x2 = np.max(pts[:, 0])
        y1 = np.min(pts[:, 1])
        y2 = np.max(pts[:, 1])
        w = x2 - x1 + 1
        h = y2 - y1 + 1
        x1 = int(x1 - (ENLARGE_RATIO - 1.0) / 2.0 * w)
        y1 = int(y1 - (ENLARGE_RATIO - 1.0) * h)
        new_w = int(ENLARGE_RATIO * (1 + normal() * 0.25) * w)
        new_h = int(ENLARGE_RATIO * (1 + normal() * 0.25) * h)
        new_x1 = x1 + int(normal() * DEST_SIZE * 0.15)
        new_y1 = y1 + int(normal() * DEST_SIZE * 0.15)
        new_x2 = new_x1 + new_w
        new_y2 = new_y1 + new_h
        new_xy = new_x1, new_y1
        pts = pts - new_xy
        height, width, _ = imgT.shape
        dx = max(0, -new_x1)
        dy = max(0, -new_y1)
        new_x1 = max(0, new_x1)
        new_y1 = max(0, new_y1)
        edx = max(0, new_x2 - width)
        edy = max(0, new_y2 - height)
        new_x2 = min(width, new_x2)
        new_y2 = min(height, new_y2)
        imgT = imgT[new_y1:new_y2, new_x1:new_x2]
        if dx > 0 or dy > 0 or edx > 0 or edy > 0:
            imgT = cv2.copyMakeBorder(
                imgT,
                dy,
                edy,
                dx,
                edx,
                cv2.BORDER_CONSTANT,
                value=(103.94, 116.78, 123.68))
        return imgT, pts, pose
    def random_mask(self, img):
        mask = self.mask_generator.draw_samples(size=img.shape)
        mask = np.expand_dims(np.sum(mask, axis=-1) > 0, axis=-1)
        return mask
    def random_overlay(self):
        index = np.random.choice(len(self.overlay_generator.overlay))
        overlay = self.overlay_generator.overlay[index]
        return overlay
    def augment_blur(self, img):
        h, w = img.shape[:2]
        assert h == w
        ssize = int(random.uniform(0.01, 0.5) * h)
        aug_seq = iaa.Sequential([
            iaa.Sometimes(
                1.0,
                iaa.OneOf([
                    iaa.GaussianBlur((3, 15)),
                    iaa.AverageBlur(k=(3, 15)),
                    iaa.MedianBlur(k=(3, 15)),
                    iaa.MotionBlur((5, 25))
                ])),
            iaa.Resize(ssize, interpolation=imgaug.ALL),
            iaa.Sometimes(
                0.6,
                iaa.OneOf([
                    iaa.AdditiveGaussianNoise(
                        loc=0, scale=(0.0, 0.1 * 255), per_channel=0.5),
                    iaa.AdditiveLaplaceNoise(
                        loc=0, scale=(0.0, 0.1 * 255), per_channel=0.5),
                    iaa.AdditivePoissonNoise(lam=(0, 30), per_channel=0.5)
                ])),
            iaa.Sometimes(0.8, iaa.JpegCompression(compression=(40, 90))),
            iaa.Resize(h),
        ])
        aug_img = aug_seq.augment_image(img)
        return aug_img
    def augment_color_temperature(self, img):
        aug = iaa.ChangeColorTemperature((1000, 40000))
        aug_img = aug.augment_image(img)
        return aug_img
    def aug_clr_noise_blur(self, img):
        # skin&light
        if np.random.choice((True, False), p=[0.05, 0.95]):
            img_ycrcb_raw = cv2.cvtColor(img, cv2.COLOR_BGR2YCR_CB)
            skin_factor_list = [0.6, 0.8, 1.0, 1.2, 1.4]
            skin_factor = np.random.choice(skin_factor_list)
            img_ycrcb_raw[:, :, 0:1] = np.clip(
                img_ycrcb_raw[:, :, 0:1].astype(np.float) * skin_factor, 0,
                255).astype(np.uint8)
            img = cv2.cvtColor(img_ycrcb_raw, cv2.COLOR_YCR_CB2BGR)
        # gauss blur 5%
        if np.random.choice((True, False), p=[0.05, 0.95]):
            sigma = np.random.choice([0.25, 0.50, 0.75])
            gauss_blur_iaa = iaa.GaussianBlur(sigma=sigma)
            img = gauss_blur_iaa(image=img)
        # gauss noise 5%
        if np.random.choice((True, False), p=[0.05, 0.95]):
            scale = np.random.choice([0.01, 0.03, 0.05])
            gauss_noise_iaa = iaa.AdditiveGaussianNoise(scale=scale * 255)
            img = gauss_noise_iaa(image=img)
        # motion blur 5%
        if np.random.choice((True, False), p=[0.05, 0.95]):
            angle = np.random.choice([0, 45, 90, 135, 180, 225, 270, 315])
            motion_blur_iaa = iaa.MotionBlur(k=5, angle=angle)
            img = motion_blur_iaa(image=img)
        # jpeg compress 5%
        if np.random.choice((True, False), p=[0.05, 0.95]):
            jpeg_compress_iaa = iaa.JpegCompression(compression=(10, 50))
            img = jpeg_compress_iaa(image=img)
        # gamma contrast 5%
        if np.random.choice((True, False), p=[0.05, 0.95]):
            gamma_contrast_iaa = iaa.GammaContrast((0.85, 1.15))
            img = gamma_contrast_iaa(image=img)
        # brightness 5%
        if np.random.choice((True, False), p=[0.05, 0.95]):
            brightness_iaa = iaa.MultiplyAndAddToBrightness(
                mul=(0.85, 1.15), add=(-10, 10))
            img = brightness_iaa(image=img)
        return img
    def augment_set(self, img):
        noisy_image = img.copy().astype(np.uint8)
        if np.random.choice((True, False), p=[0.6, 0.4]):
            aug = iaa.ChangeColorTemperature((1000, 40000))
            noisy_image = aug.augment_image(noisy_image)
        if np.random.choice((True, False), p=[0.8, 0.2]):
            aug_seq = iaa.Sequential([
                iaa.Sometimes(0.5, iaa.JpegCompression(compression=(40, 90))),
                iaa.Sometimes(0.5, iaa.MotionBlur((3, 7))),
                iaa.Sometimes(
                    0.5,
                    iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05 * 255))),
            ],
                                     random_order=True)
            noisy_image = aug_seq.augment_image(noisy_image)
        sometimes = lambda aug: iaa.Sometimes(0.25, aug)
        seq = iaa.Sequential([
            sometimes(iaa.AverageBlur(k=(2, 5))),
            sometimes(iaa.GammaContrast((0.5, 2.0)))
        ],
                             random_order=True)
        noisy_image = seq(images=noisy_image)
        return noisy_image
@PIPELINES.register_module()
 class FaceKeypointNorm:
    """Data augmentation with Norm.
    """
    def __init__(self, input_size=96):
        self.input_size = input_size
    def __call__(self, results):
        """Perform data augmentation with random image flip."""
        # for key in results.get('img', []):
        if 'img' in results.keys():
            image = results['img']
            image = cv2.resize(image, (self.input_size, self.input_size))
            results['img'] = np.array(image)
            # for key in results.get('target_point', []):
            if 'target_point' in results.keys():
                points = results['target_point']
                points[:, 0] = points[:, 0] / image.shape[1] * float(
                    self.input_size)
                points[:, 1] = points[:, 1] / image.shape[0] * float(
                    self.input_size)
                target_point = np.reshape(points,
                                          (points.shape[0] * points.shape[1]))
                results['target_point'] = np.array(target_point, np.float32)
            else:
                results['target_point'] = np.array(np.zeros(212), np.float32)
            # for key in results.get('target_point_mask', []):
            if 'target_point_mask' in results.keys():
                points_mask = results['target_point_mask']
                points_mask = points_mask.astype(np.float32)
                points_mask = np.reshape(
                    points_mask, (points_mask.shape[0] * points_mask.shape[1]))
                results['target_point_mask'] = points_mask.astype(np.float32)
            else:
                results['target_point_mask'] = np.array(
                    np.zeros(212), np.float32)
            # for key in results.get('target_pose', []):
            if 'target_pose' in results.keys():
                pose = results['target_pose']
                pose = np.asarray([pose['pitch'], pose['roll'], pose['yaw']])
                results['target_pose'] = pose.astype(np.float32)
            else:
                results['target_pose'] = np.array(np.zeros(3), np.float32)
            if 'target_pose_mask' not in results.keys():
                results['target_pose_mask'] = np.array(np.zeros(3), np.float32)
        return results
@PIPELINES.register_module()
 class FaceKeypointRandomAugmentation:
    """Data augmentation with random  flip.
    """
    def __init__(self, input_size=96):
        self.input_size = input_size
        # Data Augment
        self.data_aug = FaceKeypointsDataAugumentation(self.input_size)
    def __call__(self, results):
        """Perform data augmentation with random image flip."""
        image = results['img']
        points = results['target_point']
        points_mask = results['target_point_mask']
        pose = results['target_pose']
        pose_mask = results['target_pose_mask']
        overlay_image_path = results['overlay_image_path']
        if np.random.choice((True, False), p=[0.2, 0.8]):
            # overlay
            overlay_pos = self.data_aug.random_overlay()
            overlay_img_index = np.random.choice(len(overlay_image_path))
            overlay_img_filepath = overlay_image_path[overlay_img_index]
            overlay_img = cv2.imread(overlay_img_filepath,
                                     cv2.IMREAD_UNCHANGED)
            (x, y, w, h) = overlay_pos
            x1, y1, x2, y2 = x, y, x + w, y + h
            overlay_img = cv2.resize(overlay_img, dsize=(w, h))
            overlay_mask = overlay_img[:, :, 3:4] / 255.0
            image[y1:y2, x1:x2, :] = image[y1:y2, x1:x2, :] * (
                1 - overlay_mask) + overlay_img[:, :, 0:3] * overlay_mask
            image = image.astype(np.uint8)
        angle = pose['roll']
        image, points, pose = self.data_aug.aug_rotate(
            image, points, pose, angle)  # counterclockwise rotate angle
        pose['roll'] = angle  # reset roll=angle
        if np.random.choice((True, False)):
            image_transform, points, _, pose = self.data_aug.aug_flip(
                image, points, None, pose)
        else:
            image_transform = image
        image_transform = self.data_aug.aug_clr_noise_blur(image_transform)
        results['img'] = image_transform
        results['target_point'] = points
        results['target_pose'] = pose
        return results
--- a/easycv/models/init.py
+++ b/easycv/models/init.py
@ -3,6 +3,7 @@ from .backbones import *  # noqa: F401,F403
 from .builder import build_backbone, build_head, build_loss, build_model
 from .classification import *
 from .detection import *
 from .face import *
 from .heads import *
 from .loss import *
 from .pose import TopDown
--- a/easycv/models/backbones/init.py
+++ b/easycv/models/backbones/init.py
@ -4,6 +4,7 @@ from .bninception import BNInception
 from .conv_mae_vit import FastConvMAEViT
 from .conv_vitdet import ConvViTDet
 from .efficientformer import EfficientFormer
 from .face_keypoint_backbone import FaceKeypointBackbone
 from .genet import PlainNet
 from .hrnet import HRNet
 from .inceptionv3 import Inception3
--- a/easycv/models/backbones/face_keypoint_backbone.py
+++ b/easycv/models/backbones/face_keypoint_backbone.py
@ -0,0 +1,90 @@
 import torch.nn as nn
 from easycv.models.registry import BACKBONES
 from easycv.models.utils.face_keypoint_utils import InvertedResidual, Residual
@BACKBONES.register_module
 class FaceKeypointBackbone(nn.Module):
    def __init__(self,
                 in_channels=3,
                 out_channels=48,
                 residual_activation='relu',
                 inverted_activation='half_v2',
                 inverted_expand_ratio=2):
        super(FaceKeypointBackbone, self).__init__()
        self.conv1 = Residual(in_channels, 12, 3, 2, 0)
        self.conv2 = Residual(12, 12, 3, 1, 0, activation=residual_activation)
        self.conv3 = Residual(12, 12, 3, 1, 1, activation=residual_activation)
        self.conv4 = Residual(12, 12, 3, 1, 0, activation=residual_activation)
        self.conv5 = Residual(12, 24, 3, 2, 0, activation=residual_activation)
        self.conv6 = Residual(24, 24, 3, 1, 0, activation=residual_activation)
        self.conv7 = Residual(24, 24, 3, 1, 1, activation=residual_activation)
        self.conv8 = Residual(24, 24, 3, 1, 1, activation=residual_activation)
        self.conv9 = InvertedResidual(
            24,
            48,
            3,
            2,
            0,
            expand_ratio=inverted_expand_ratio,
            activation=inverted_activation)
        self.conv10 = InvertedResidual(
            48,
            48,
            3,
            1,
            0,
            expand_ratio=inverted_expand_ratio,
            activation=inverted_activation)
        self.conv11 = InvertedResidual(
            48,
            48,
            3,
            1,
            1,
            expand_ratio=inverted_expand_ratio,
            activation=inverted_activation)
        self.conv12 = InvertedResidual(
            48,
            48,
            3,
            1,
            1,
            expand_ratio=inverted_expand_ratio,
            activation=inverted_activation)
        self.conv13 = InvertedResidual(
            48,
            48,
            3,
            1,
            1,
            expand_ratio=inverted_expand_ratio,
            activation=inverted_activation)
        self.conv14 = InvertedResidual(
            48,
            out_channels,
            3,
            2,
            0,
            expand_ratio=inverted_expand_ratio,
            activation=inverted_activation)
    def forward(self, x):
        x1 = self.conv1(x)
        x2 = self.conv2(x1)
        x3 = self.conv3(x2)
        x4 = self.conv4(x3)
        x5 = self.conv5(x4)
        x6 = self.conv6(x5)
        x7 = self.conv7(x6)
        x8 = self.conv8(x7)
        x9 = self.conv9(x8)
        x10 = self.conv10(x9)
        x11 = self.conv11(x10)
        x12 = self.conv12(x11)
        x13 = self.conv13(x12)
        x14 = self.conv14(x13)
        return x14
--- a/easycv/models/face/init.py
+++ b/easycv/models/face/init.py
@ -0,0 +1,2 @@
 from .face_keypoint import FaceKeypoint
 from .head import *
--- a/easycv/models/face/face_keypoint.py
+++ b/easycv/models/face/face_keypoint.py
@ -0,0 +1,103 @@
 import mmcv
 import numpy as np
 from easycv.models import builder
 from easycv.models.base import BaseModel
 from easycv.models.builder import MODELS
 from easycv.models.utils.face_keypoint_utils import (get_keypoint_accuracy,
                                                     get_pose_accuracy)
@MODELS.register_module()
 class FaceKeypoint(BaseModel):
    def __init__(self,
                 backbone,
                 neck=None,
                 keypoint_head=None,
                 pose_head=None,
                 pretrained=None,
                 loss_keypoint=None,
                 loss_pose=None):
        super().__init__()
        self.pretrained = pretrained
        self.backbone = builder.build_backbone(backbone)
        if neck is not None:
            self.neck = builder.build_neck(neck)
        if keypoint_head is not None:
            if 'loss_keypoint' not in keypoint_head and loss_keypoint is not None:
                keypoint_head['loss_keypoint'] = loss_keypoint
            self.keypoint_head = builder.build_head(keypoint_head)
        if pose_head is not None:
            if 'loss_pose' not in pose_head and loss_pose is not None:
                pose_head['loss_pose'] = loss_pose
            self.pose_head = builder.build_head(pose_head)
    @property
    def with_neck(self):
        """Check if has keypoint_head."""
        return hasattr(self, 'neck')
    @property
    def with_keypoint(self):
        """Check if has keypoint_head."""
        return hasattr(self, 'keypoint_head')
    @property
    def with_pose(self):
        """Check if has pose_head."""
        return hasattr(self, 'pose_head')
    def forward_train(self, img, target_point, target_point_mask, target_pose,
                      target_pose_mask, **kwargs):
        """Defines the computation performed at every call when training."""
        output = self.backbone(img)
        if self.with_neck:
            output = self.neck(output)
        if self.with_keypoint:
            output_points = self.keypoint_head(output)
        if self.with_pose:
            output_pose = self.pose_head(output)
        target_point = target_point * target_point_mask
        target_pose = target_pose * target_pose_mask
        losses = dict()
        if self.with_keypoint:
            keypoint_losses = self.keypoint_head.get_loss(
                output_points, target_point, target_point_mask, target_pose)
            losses.update(keypoint_losses)
            keypoint_accuracy = get_keypoint_accuracy(output_points,
                                                      target_point)
            losses.update(keypoint_accuracy)
        if self.with_pose:
            output_pose = output_pose * 180.0 / np.pi
            output_pose = output_pose * target_pose_mask
            pose_losses = self.pose_head.get_loss(output_pose, target_pose)
            losses.update(pose_losses)
            pose_accuracy = get_pose_accuracy(output_pose, target_pose)
            losses.update(pose_accuracy)
        return losses
    def forward_test(self, img, **kwargs):
        """Defines the computation performed at every call when testing."""
        output = self.backbone(img)
        if self.with_neck:
            output = self.neck(output)
        if self.with_keypoint:
            output_points = self.keypoint_head(output)
        if self.with_pose:
            output_pose = self.pose_head(output)
        ret = {}
        ret['point'] = output_points
        ret['pose'] = output_pose
        return ret
--- a/easycv/models/face/head/init.py
+++ b/easycv/models/face/head/init.py
@ -0,0 +1,2 @@
 from .face_keypoint_head import FaceKeypointHead
 from .face_keypoint_pose_head import FacePoseHead
--- a/easycv/models/face/head/face_keypoint_head.py
+++ b/easycv/models/face/head/face_keypoint_head.py
@ -0,0 +1,68 @@
 import copy
 import numpy as np
 import torch
 import torch.nn as nn
 from easycv.models.builder import HEADS, build_loss
 from easycv.models.utils.face_keypoint_utils import (InvertedResidual, View,
                                                     conv_bn, conv_no_relu,
                                                     get_keypoint_accuracy)
@HEADS.register_module
 class FaceKeypointHead(nn.Module):
    def __init__(
        self,
        mean_face,
        loss_keypoint,
        in_channels=48,
        out_channels=212,
        input_size=96,
        inverted_expand_ratio=2,
        inverted_activation='half_v2',
    ):
        super(FaceKeypointHead, self).__init__()
        self.input_size = input_size
        self.face_mean_shape = copy.deepcopy(np.asarray(mean_face))
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.branches = []
        self.loss = build_loss(loss_keypoint)
        # points
        self.branches.append(
            nn.Sequential(
                InvertedResidual(
                    in_channels,
                    96,
                    3,
                    1,
                    1,
                    expand_ratio=inverted_expand_ratio,
                    activation=inverted_activation),
                View((-1, 96 * 3 * 3, 1, 1)), conv_bn(96 * 3 * 3, 128, 1, 1,
                                                      0),
                conv_bn(128, 128, 1, 1, 0),
                conv_no_relu(128, out_channels, 1, 1, 0),
                View((-1, out_channels))))
        self.branches = nn.ModuleList(self.branches)
    def get_loss(self, output, target_point, target_point_mask, target_pose):
        losses = dict()
        loss = self.loss(output * target_point_mask, target_point, target_pose)
        losses['point_loss'] = loss
        return losses
    def get_accuracy(self, output, target_point):
        return get_keypoint_accuracy(output, target_point)
    def forward(self, x):
        point = self.branches[0](x)
        point = point * 0.5 + torch.from_numpy(self.face_mean_shape).to(
            self.device)
        point = point * self.input_size
        return point
--- a/easycv/models/face/head/face_keypoint_pose_head.py
+++ b/easycv/models/face/head/face_keypoint_pose_head.py
@ -0,0 +1,55 @@
 import numpy as np
 import torch
 import torch.nn as nn
 from easycv.models.builder import HEADS, build_loss
 from easycv.models.utils.face_keypoint_utils import (InvertedResidual, View,
                                                     conv_bn, conv_no_relu,
                                                     get_pose_accuracy)
@HEADS.register_module
 class FacePoseHead(nn.Module):
    def __init__(
        self,
        loss_pose,
        in_channels=48,
        out_channels=3,
        inverted_expand_ratio=2,
        inverted_activation='half_v2',
    ):
        super(FacePoseHead, self).__init__()
        self.branches = []
        self.loss = build_loss(loss_pose)
        # pose
        self.branches.append(
            nn.Sequential(
                InvertedResidual(
                    in_channels,
                    48,
                    3,
                    1,
                    1,
                    expand_ratio=inverted_expand_ratio,
                    activation=inverted_activation),
                View((-1, 48 * 3 * 3, 1, 1)), conv_bn(48 * 3 * 3, 48, 1, 1, 0),
                conv_bn(48, 48, 1, 1, 0),
                conv_no_relu(48, out_channels, 1, 1, 0),
                View((-1, out_channels))))
        self.branches = nn.ModuleList(self.branches)
    def get_loss(self, output, target_pose):
        losses = dict()
        loss = self.loss(output, target_pose)
        losses['pose_loss'] = loss
        return losses
    def get_accuracy(self, output, target_pose):
        return get_pose_accuracy(output, target_pose)
    def forward(self, x):
        return self.branches[0](x)
--- a/easycv/models/loss/init.py
+++ b/easycv/models/loss/init.py
@ -1,5 +1,6 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 from .cross_entropy_loss import CrossEntropyLoss
 from .face_keypoint_loss import FacePoseLoss, WingLossWithPose
 from .focal_loss import FocalLoss
 from .iou_loss import GIoULoss, IoULoss
 from .mse_loss import JointsMSELoss
--- a/easycv/models/loss/face_keypoint_loss.py
+++ b/easycv/models/loss/face_keypoint_loss.py
@ -0,0 +1,91 @@
 import copy
 import math
 import numpy as np
 import torch
 import torch.nn as nn
 from easycv.models.builder import LOSSES
 CONSTANT_CONTOUR = 66
 CONSTANT_EYEBROW = 18
 CONSTANT_EYE = 18
 CONSTANT_NOSE = 30
 CONSTANT_LIPS = 40
 CONSTANT_EYE_CENTER = 4
@LOSSES.register_module()
 class WingLossWithPose(nn.Module):
    def __init__(self,
                 num_points=106,
                 left_eye_left_corner_index=66,
                 right_eye_right_corner_index=79,
                 points_weight=1.0,
                 contour_weight=1.5,
                 eyebrow_weight=1.5,
                 eye_weight=1.7,
                 nose_weight=1.3,
                 lip_weight=1.7,
                 omega=10,
                 epsilon=2):
        super(WingLossWithPose, self).__init__()
        self.omega = omega
        self.epsilon = epsilon
        self.num_points = num_points
        self.left_eye_left_corner_index = left_eye_left_corner_index
        self.right_eye_right_corner_index = right_eye_right_corner_index
        self.points_weight = points_weight
        contour_weight = np.full(CONSTANT_CONTOUR, contour_weight)
        eyebrow_left_weight = np.full(CONSTANT_EYEBROW, eyebrow_weight)
        eyebrow_right_weight = np.full(CONSTANT_EYEBROW, eyebrow_weight)
        nose_weight = np.full(CONSTANT_NOSE, nose_weight)
        eye_left_weight = np.full(CONSTANT_EYE, eye_weight)
        eye_right_weight = np.full(CONSTANT_EYE, eye_weight)
        lips_weight = np.full(CONSTANT_LIPS, lip_weight)
        eye_center_weight = np.full(CONSTANT_EYE_CENTER, eye_weight)
        part_weight = np.concatenate(
            (contour_weight, eyebrow_left_weight, eyebrow_right_weight,
             nose_weight, eye_left_weight, eye_right_weight, lips_weight,
             eye_center_weight),
            axis=0)
        self.part_weight = None
        if part_weight is not None:
            self.part_weight = torch.from_numpy(part_weight).cuda()
    def forward(self, pred, target, pose):
        weight = 5.0 * (1.0 - torch.cos(pose * np.pi / 180.0)) + 1.0
        weight = torch.sum(weight, dim=1) / 3.0
        weight = weight.view((weight.shape[0], 1))
        if self.part_weight is not None:
            weight = weight * self.part_weight
        y = target
        y_hat = pred
        delta_y = (y - y_hat).abs() * weight
        delta_y1 = delta_y[delta_y < self.omega]
        delta_y2 = delta_y[delta_y >= self.omega]
        loss1 = self.omega * torch.log(1 + delta_y1 / self.epsilon)
        C = self.omega - self.omega * math.log(1 + self.omega / self.epsilon)
        loss = delta_y2 - C
        result = self.points_weight * (loss1.sum() + loss.sum()) / (
            len(loss1) + len(loss))
        return result
@LOSSES.register_module()
 class FacePoseLoss(nn.Module):
    def __init__(self, pose_weight=1.0):
        super(FacePoseLoss, self).__init__()
        self.criterion = nn.MSELoss()
        self.pose_weight = pose_weight
    def forward(self, pred, target):
        result = self.pose_weight * self.criterion(pred, target)
        return result
--- a/easycv/models/utils/init.py
+++ b/easycv/models/utils/init.py
@ -5,6 +5,10 @@ from .conv_ws import ConvWS2d, conv_ws_2d
 from .dist_utils import (DistributedLossWrapper, DistributedMinerWrapper,
                         get_world_size, is_dist_avail_and_initialized,
                         reduce_mean)
 from .face_keypoint_utils import (ION, InvertedResidual, Residual, Softmax,
                                  View, conv_bn, conv_no_relu,
                                  get_keypoint_accuracy, get_pose_accuracy,
                                  pose_accuracy)
 from .gather_layer import GatherLayer
 from .init_weights import _init_weights, trunc_normal_
 from .multi_pooling import GeMPooling, MultiAvgPooling, MultiPooling
--- a/easycv/models/utils/face_keypoint_utils.py
+++ b/easycv/models/utils/face_keypoint_utils.py
@ -0,0 +1,240 @@
 import copy
 import math
 import numpy as np
 import torch
 import torch.nn as nn
 def conv_bn(inp, oup, kernel, stride, padding=1):
    return nn.Sequential(
        nn.Conv2d(inp, oup, kernel, stride, padding, bias=False),
        nn.BatchNorm2d(oup), nn.PReLU(oup))
 def conv_no_relu(inp, oup, kernel, stride, padding=1):
    return nn.Sequential(
        nn.Conv2d(inp, oup, kernel, stride, padding, bias=False),
        nn.BatchNorm2d(oup))
 class View(nn.Module):
    def __init__(self, shape):
        super(View, self).__init__()
        self.shape = shape
    def forward(self, x):
        return x.view(*self.shape)
 class Softmax(nn.Module):
    def __init__(self, dim):
        super(Softmax, self).__init__()
        self.softmax = nn.Softmax(dim)
    def forward(self, x):
        return self.softmax(x)
 class InvertedResidual(nn.Module):
    def __init__(self,
                 inp,
                 oup,
                 kernel_size,
                 stride,
                 padding,
                 expand_ratio=2,
                 use_connect=False,
                 activation='relu'):
        super(InvertedResidual, self).__init__()
        hid_channels = int(inp * expand_ratio)
        if activation == 'relu':
            self.conv = nn.Sequential(
                nn.Conv2d(inp, hid_channels, 1, 1, 0, bias=False),
                nn.BatchNorm2d(hid_channels), nn.ReLU(inplace=True),
                nn.Conv2d(
                    hid_channels,
                    hid_channels,
                    kernel_size,
                    stride,
                    padding,
                    groups=hid_channels,
                    bias=False), nn.BatchNorm2d(hid_channels),
                nn.ReLU(inplace=True),
                nn.Conv2d(hid_channels, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup))
        elif activation == 'prelu':
            self.conv = nn.Sequential(
                nn.Conv2d(inp, hid_channels, 1, 1, 0, bias=False),
                nn.BatchNorm2d(hid_channels), nn.PReLU(hid_channels),
                nn.Conv2d(
                    hid_channels,
                    hid_channels,
                    kernel_size,
                    stride,
                    padding,
                    groups=hid_channels,
                    bias=False), nn.BatchNorm2d(hid_channels),
                nn.PReLU(hid_channels),
                nn.Conv2d(hid_channels, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup))
        elif activation == 'half_v1':
            self.conv = nn.Sequential(
                nn.Conv2d(inp, hid_channels, 1, 1, 0, bias=False),
                nn.BatchNorm2d(hid_channels), nn.ReLU(inplace=True),
                nn.Conv2d(
                    hid_channels,
                    hid_channels,
                    kernel_size,
                    stride,
                    padding,
                    groups=hid_channels,
                    bias=False), nn.BatchNorm2d(hid_channels),
                nn.PReLU(hid_channels),
                nn.Conv2d(hid_channels, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup))
        elif activation == 'half_v2':
            self.conv = nn.Sequential(
                nn.Conv2d(inp, hid_channels, 1, 1, 0, bias=False),
                nn.BatchNorm2d(hid_channels), nn.PReLU(hid_channels),
                nn.Conv2d(
                    hid_channels,
                    hid_channels,
                    kernel_size,
                    stride,
                    padding,
                    groups=hid_channels,
                    bias=False), nn.BatchNorm2d(hid_channels),
                nn.ReLU(inplace=True),
                nn.Conv2d(hid_channels, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup))
        self.use_connect = use_connect
    def forward(self, x):
        if self.use_connect:
            return x + self.conv(x)
        else:
            return self.conv(x)
 class Residual(nn.Module):
    def __init__(self,
                 inp,
                 oup,
                 kernel_size,
                 stride,
                 padding,
                 use_connect=False,
                 activation='relu'):
        super(Residual, self).__init__()
        self.use_connect = use_connect
        if activation == 'relu':
            self.conv = nn.Sequential(
                nn.Conv2d(
                    inp,
                    inp,
                    kernel_size,
                    stride,
                    padding,
                    groups=inp,
                    bias=False), nn.BatchNorm2d(inp), nn.ReLU(inplace=True),
                nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup),
                nn.ReLU(inplace=True))
        elif activation == 'prelu':
            self.conv = nn.Sequential(
                nn.Conv2d(
                    inp,
                    inp,
                    kernel_size,
                    stride,
                    padding,
                    groups=inp,
                    bias=False), nn.BatchNorm2d(inp), nn.PReLU(inp),
                nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup),
                nn.PReLU(oup))
        elif activation == 'half_v1':
            self.conv = nn.Sequential(
                nn.Conv2d(
                    inp,
                    inp,
                    kernel_size,
                    stride,
                    padding,
                    groups=inp,
                    bias=False), nn.BatchNorm2d(inp), nn.ReLU(inplace=True),
                nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup),
                nn.PReLU(oup))
        elif activation == 'half_v2':
            self.conv = nn.Sequential(
                nn.Conv2d(
                    inp,
                    inp,
                    kernel_size,
                    stride,
                    padding,
                    groups=inp,
                    bias=False), nn.BatchNorm2d(inp), nn.PReLU(inp),
                nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup),
                nn.ReLU(inplace=True))
    def forward(self, x):
        if self.use_connect:
            return x + self.conv(x)
        else:
            return self.conv(x)
 def pose_accuracy(output, target):
    with torch.no_grad():
        output = output.detach().cpu().numpy()
        target = target.detach().cpu().numpy()
        acc = np.mean(np.abs(output - target))
        return acc
 def ION(output, target, left_eye_left_coner_idx, right_eye_right_corner_idx,
        num_pts):
    with torch.no_grad():
        output = output.view(-1, num_pts, 2).cpu().numpy()
        target = target.view(-1, num_pts, 2).cpu().numpy()
        interocular = target[:,
                             left_eye_left_coner_idx] - target[:,
                                                               right_eye_right_corner_idx]
        interocular = np.sqrt(
            np.square(interocular[:, 0]) + np.square(interocular[:, 1])) + 1e-5
        dist = target - output
        dist = np.sqrt(np.square(dist[:, :, 0]) + np.square(dist[:, :, 1]))
        dist = np.sum(dist, axis=1)
        nme = dist / (interocular * num_pts)
    return np.mean(nme)
 def get_keypoint_accuracy(output, target_point):
    accuracy = dict()
    num_points = 106
    left_eye_left_corner_index = 66
    right_eye_right_corner_index = 79
    nme = ION(output, target_point, left_eye_left_corner_index,
              right_eye_right_corner_index, num_points)
    accuracy['nme'] = nme
    return accuracy
 def get_pose_accuracy(output, target_pose):
    accuracy = dict()
    pose_acc = pose_accuracy(output, target_pose)
    accuracy['pose_acc'] = float(pose_acc)
    return accuracy
--- a/easycv/predictors/init.py
+++ b/easycv/predictors/init.py
@ -2,6 +2,7 @@
 from .classifier import TorchClassifier
 from .detector import (TorchFaceDetector, TorchYoloXClassifierPredictor,
                       TorchYoloXPredictor)
 from .face_keypoints_predictor import FaceKeypointsPredictor
 from .feature_extractor import (TorchFaceAttrExtractor,
                                TorchFaceFeatureExtractor,
                                TorchFeatureExtractor)
--- a/easycv/predictors/base.py
+++ b/easycv/predictors/base.py
@ -113,6 +113,7 @@ class PredictorV2(object):
                 device=None,
                 save_results=False,
                 save_path=None,
                 mode='rgb',
                 *args,
                 **kwargs):
        self.model_path = model_path
@ -135,6 +136,7 @@ class PredictorV2(object):
        self.model = self.prepare_model()
        self.processor = self.build_processor()
        self._load_op = None
        self.mode = mode
    def prepare_model(self):
        """Build model from config file by default.
@ -182,7 +184,7 @@ class PredictorV2(object):
            }
        """
        if self._load_op is None:
-            load_cfg = dict(type='LoadImage', mode='rgb')
+            load_cfg = dict(type='LoadImage', mode=self.mode)
            self._load_op = build_from_cfg(load_cfg, PIPELINES)
        if not isinstance(input, str):
--- a/easycv/predictors/face_keypoints_predictor.py
+++ b/easycv/predictors/face_keypoints_predictor.py
@ -0,0 +1,120 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import copy
 import os
 import cv2
 import numpy as np
 import torch
 from torchvision.transforms import Compose
 from easycv.datasets.registry import PIPELINES
 from easycv.models import build_model
 from easycv.predictors.builder import PREDICTORS
 from easycv.predictors.interface import PredictorInterface
 from easycv.utils.checkpoint import load_checkpoint
 from easycv.utils.config_tools import mmcv_config_fromfile
 from easycv.utils.registry import build_from_cfg
 from ..models import *
 from .base import PredictorV2
 face_contour_point_index = [
    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
    21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
 ]
 left_eye_brow_point_index = [33, 34, 35, 36, 37, 38, 39, 40, 41, 33]
 right_eye_brow_point_index = [42, 43, 44, 45, 46, 47, 48, 49, 50, 42]
 left_eye_point_index = [66, 67, 68, 69, 70, 71, 72, 73, 66]
 right_eye_point_index = [75, 76, 77, 78, 79, 80, 81, 82, 75]
 nose_bridge_point_index = [51, 52, 53, 54]
 nose_contour_point_index = [55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65]
 mouth_outer_point_index = [84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 84]
 mouth_inter_point_index = [96, 97, 98, 99, 100, 101, 102, 103, 96]
@PREDICTORS.register_module()
 class FaceKeypointsPredictor(PredictorV2):
    """Predict pipeline for face keypoint
    Args:
        model_path (str): Path of model path
        model_config (str): config file path for model and processor to init. Defaults to None.
    """
    def __init__(self,
                 model_path,
                 model_config,
                 batch_size=1,
                 device=None,
                 save_results=False,
                 save_path=None,
                 mode='bgr'):
        super(FaceKeypointsPredictor, self).__init__(
            model_path,
            model_config,
            batch_size=batch_size,
            device=device,
            save_results=save_results,
            save_path=save_path,
            mode=mode)
        self.input_size = self.cfg.IMAGE_SIZE
        self.point_number = self.cfg.POINT_NUMBER
    def show_result(self, img, points, scale=4.0, save_path=None):
        """Draw `result` over `img`.
        Args:
            img (str or Tensor): The image to be displayed.
            result (Tensor): The face keypoints to draw over `img`.
            scale: zoom in or out scale
            save_path: path to save drawned 'img'
        Returns:
            img (Tensor): Only if not `show` or `out_file`
        """
        img = cv2.imread(img)
        img = img.copy()
        h, w, c = img.shape
        scale_h = h / self.input_size
        scale_w = w / self.input_size
        points = points.view(-1, self.point_number, 2).cpu().numpy()[0]
        for index in range(len(points)):
            points[index][0] *= scale_w
            points[index][1] *= scale_h
        image = cv2.resize(img, dsize=None, fx=scale, fy=scale)
        def draw_line(point_index, image, point):
            for i in range(len(point_index) - 1):
                cur_index = point_index[i]
                next_index = point_index[i + 1]
                cur_pt = (int(point[cur_index][0] * scale),
                          int(point[cur_index][1] * scale))
                next_pt = (int(point[next_index][0] * scale),
                           int(point[next_index][1] * scale))
                cv2.line(image, cur_pt, next_pt, (0, 0, 255), thickness=2)
        draw_line(face_contour_point_index, image, points)
        draw_line(left_eye_brow_point_index, image, points)
        draw_line(right_eye_brow_point_index, image, points)
        draw_line(left_eye_point_index, image, points)
        draw_line(right_eye_point_index, image, points)
        draw_line(nose_bridge_point_index, image, points)
        draw_line(nose_contour_point_index, image, points)
        draw_line(mouth_outer_point_index, image, points)
        draw_line(mouth_inter_point_index, image, points)
        size = len(points)
        for i in range(size):
            x = int(points[i][0])
            y = int(points[i][1])
            cv2.putText(image, str(i), (int(x * scale), int(y * scale)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
            cv2.circle(image, (int(x * scale), int(y * scale)), 2, (0, 255, 0),
                       cv2.FILLED)
        if save_path is not None:
            cv2.imwrite(save_path, image)
        return image
--- a/requirements/runtime.txt
+++ b/requirements/runtime.txt
@ -3,6 +3,7 @@ dataclasses
 einops
 future
 h5py
 imgaug
 json_tricks
 numpy
 opencv-python
--- a/tests/predictors/test_face_keypoints_predictor.py
+++ b/tests/predictors/test_face_keypoints_predictor.py
@ -0,0 +1,39 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import copy
 import os
 import tempfile
 import unittest
 import cv2
 import numpy as np
 from PIL import Image
 from easycv.predictors.face_keypoints_predictor import FaceKeypointsPredictor
 class FaceKeypointsPredictorWithoutDetectorTest(unittest.TestCase):
    def setUp(self):
        print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
        self.image_path = './data/test/face_2d_keypoints/data/002253.png'
        self.save_image_path = './data/test/face_2d_keypoints/data/result_002253.png'
        self.model_path = './data/test/face_2d_keypoints/models/epoch_580.pth'
        self.model_config_path = './configs/face/face_96x96_wingloss.py'
    def test_single(self):
        predict_pipeline = FaceKeypointsPredictor(
            model_path=self.model_path, model_config=self.model_config_path)
        output = predict_pipeline(self.image_path)[0]
        output_keypoints = output['point']
        output_pose = output['pose']
        image_show = predict_pipeline.show_result(
            self.image_path,
            output_keypoints,
            scale=2,
            save_path=self.save_image_path)
 if __name__ == '__main__':
    unittest.main()
		`@ -0,0 +1,2 @@`
							`# Copyright (c) Alibaba, Inc. and its affiliates.`
							`from .face_keypoint_source import FaceKeypintSource`
		`@ -0,0 +1,2 @@`
							`from .face_keypoint import FaceKeypoint`
							`from .head import *`
		`@ -0,0 +1,2 @@`
							`from .face_keypoint_head import FaceKeypointHead`
							`from .face_keypoint_pose_head import FacePoseHead`