EasyCV/easycv/datasets/video_recognition/pipelines/pose_transform.py

# Copyright (c) Alibaba, Inc. and its affiliates.
# Copyright (c) OpenMMLab. All rights reserved.
# Refer to: https://github.com/open-mmlab/mmaction2/blob/master/mmaction/datasets/pipelines/pose_loading.py
import numpy as np

from easycv.datasets.registry import PIPELINES


@PIPELINES.register_module()
class PaddingWithLoop:
    """Sample frames from the video.

    To sample an n-frame clip from the video, PaddingWithLoop samples
    the frames from zero index, and loop the frames if the length of
    video frames is less than te value of 'clip_len'.

    Required keys are "total_frames", added or modified keys
    are "frame_inds", "clip_len", "frame_interval" and "num_clips".

    Args:
        clip_len (int): Frames of each sampled output clip.
        num_clips (int): Number of clips to be sampled. Default: 1.
    """

    def __init__(self, clip_len, num_clips=1):

        self.clip_len = clip_len
        self.num_clips = num_clips

    def __call__(self, results):
        num_frames = results['total_frames']

        start = 0
        inds = np.arange(start, start + self.clip_len)
        inds = np.mod(inds, num_frames)

        results['frame_inds'] = inds.astype(np.int64)
        results['clip_len'] = self.clip_len
        results['frame_interval'] = None
        results['num_clips'] = self.num_clips
        return results


@PIPELINES.register_module()
class PoseDecode:
    """Load and decode pose with given indices.

    Required keys are "keypoint", "frame_inds" (optional), "keypoint_score"
    (optional), added or modified keys are "keypoint", "keypoint_score" (if
    applicable).
    """

    @staticmethod
    def _load_kp(kp, frame_inds):
        """Load keypoints given frame indices.

        Args:
            kp (np.ndarray): The keypoint coordinates.
            frame_inds (np.ndarray): The frame indices.
        """

        return [x[frame_inds].astype(np.float32) for x in kp]

    @staticmethod
    def _load_kpscore(kpscore, frame_inds):
        """Load keypoint scores given frame indices.

        Args:
            kpscore (np.ndarray): The confidence scores of keypoints.
            frame_inds (np.ndarray): The frame indices.
        """

        return [x[frame_inds].astype(np.float32) for x in kpscore]

    def __call__(self, results):

        if 'frame_inds' not in results:
            results['frame_inds'] = np.arange(results['total_frames'])

        if results['frame_inds'].ndim != 1:
            results['frame_inds'] = np.squeeze(results['frame_inds'])

        offset = results.get('offset', 0)
        frame_inds = results['frame_inds'] + offset

        if 'keypoint_score' in results:
            kpscore = results['keypoint_score']
            results['keypoint_score'] = kpscore[:,
                                                frame_inds].astype(np.float32)

        if 'keypoint' in results:
            results['keypoint'] = results['keypoint'][:, frame_inds].astype(
                np.float32)

        return results

    def __repr__(self):
        repr_str = f'{self.__class__.__name__}()'
        return repr_str


@PIPELINES.register_module()
class PoseNormalize:
    """Normalize the range of keypoint values to [-1,1].

    Args:
        mean (list | tuple): The mean value of the keypoint values.
        min_value (list | tuple): The minimum value of the keypoint values.
        max_value (list | tuple): The maximum value of the keypoint values.
    """

    def __init__(self,
                 mean=(960., 540., 0.5),
                 min_value=(0., 0., 0.),
                 max_value=(1920, 1080, 1.)):
        self.mean = np.array(mean, dtype=np.float32).reshape(-1, 1, 1, 1)
        self.min_value = np.array(
            min_value, dtype=np.float32).reshape(-1, 1, 1, 1)
        self.max_value = np.array(
            max_value, dtype=np.float32).reshape(-1, 1, 1, 1)

    def __call__(self, results):
        keypoint = results['keypoint']
        keypoint = (keypoint - self.mean) / (self.max_value - self.min_value)
        results['keypoint'] = keypoint
        results['keypoint_norm_cfg'] = dict(
            mean=self.mean, min_value=self.min_value, max_value=self.max_value)
        return results


@PIPELINES.register_module()
class FormatGCNInput:
    """Format final skeleton shape to the given input_format.

    Required keys are "keypoint" and "keypoint_score"(optional),
    added or modified keys are "keypoint" and "input_shape".

    Args:
        input_format (str): Define the final skeleton format.
    """

    def __init__(self, input_format, num_person=2):
        self.input_format = input_format
        if self.input_format not in ['NCTVM']:
            raise ValueError(
                f'The input format {self.input_format} is invalid.')
        self.num_person = num_person

    def __call__(self, results):
        """Performs the FormatShape formatting.

        Args:
            results (dict): The resulting dict to be modified and passed
                to the next transform in pipeline.
        """
        keypoint = results['keypoint']

        if 'keypoint_score' in results:
            keypoint_confidence = results['keypoint_score']
            keypoint_confidence = np.expand_dims(keypoint_confidence, -1)
            keypoint_3d = np.concatenate((keypoint, keypoint_confidence),
                                         axis=-1)
        else:
            keypoint_3d = keypoint

        keypoint_3d = np.transpose(keypoint_3d,
                                   (3, 1, 2, 0))  # M T V C -> C T V M

        if keypoint_3d.shape[-1] < self.num_person:
            pad_dim = self.num_person - keypoint_3d.shape[-1]
            pad = np.zeros(
                keypoint_3d.shape[:-1] + (pad_dim, ), dtype=keypoint_3d.dtype)
            keypoint_3d = np.concatenate((keypoint_3d, pad), axis=-1)
        elif keypoint_3d.shape[-1] > self.num_person:
            keypoint_3d = keypoint_3d[:, :, :, :self.num_person]

        results['keypoint'] = keypoint_3d
        results['input_shape'] = keypoint_3d.shape
        return results

    def __repr__(self):
        repr_str = self.__class__.__name__
        repr_str += f"(input_format='{self.input_format}')"
        return repr_str