import functools import json import mmcv import numpy as np import torch from mmcv.parallel import collate from mmcv.utils.path import is_filepath from torchvision.transforms import Compose from easycv.core.bbox.bbox_util import xywh2xyxy_coco, xyxy2xywh_coco from easycv.datasets.pose.data_sources.top_down import DatasetInfo from easycv.datasets.registry import PIPELINES from easycv.file import io from easycv.framework.errors import ModuleNotFoundError, TypeError, ValueError from easycv.models import build_model from easycv.predictors.builder import PREDICTORS from easycv.predictors.detector import TorchYoloXPredictor from easycv.utils.checkpoint import load_checkpoint from easycv.utils.config_tools import mmcv_config_fromfile from easycv.utils.registry import build_from_cfg try: from easy_vision.python.inference.predictor import PredictorInterface except: from easycv.predictors.interface import PredictorInterface class LoadImage: """A simple pipeline to load image.""" def __init__(self, color_type='color', channel_order='rgb'): self.color_type = color_type self.channel_order = channel_order def __call__(self, results): """Call function to load images into results. Args: results (dict): A result dict contains the img_or_path. if `img_or_path` is str, return self.channel_order mode, if np.ndarray, return raw without process. Returns: dict: ``results`` will be returned containing loaded image. """ if isinstance(results['img_or_path'], str): results['image_file'] = results['img_or_path'] img = mmcv.imread(results['img_or_path'], self.color_type, self.channel_order) elif isinstance(results['img_or_path'], np.ndarray): results['image_file'] = '' img = results['img_or_path'] else: raise TypeError( '"img_or_path" must be a numpy array or a str or a pathlib.Path object' ) results['img'] = img return results def _box2cs(image_size, box): """This encodes bbox(x,y,w,h) into (center, scale) Args: x, y, w, h Returns: tuple: A tuple containing center and scale. - np.ndarray[float32](2,): Center of the bbox (x, y). - np.ndarray[float32](2,): Scale of the bbox w & h. """ x, y, w, h = box[:4] aspect_ratio = image_size[0] / image_size[1] center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32) if w > aspect_ratio * h: h = w * 1.0 / aspect_ratio elif w < aspect_ratio * h: w = h * aspect_ratio # pixel std is 200.0 scale = np.array([w / 200.0, h / 200.0], dtype=np.float32) scale = scale * 1.25 return center, scale def rgetattr(obj, attr, *args): def _getattr(obj, attr): return getattr(obj, attr, *args) return functools.reduce(_getattr, [obj] + attr.split('.')) class OutputHook: def __init__(self, module, outputs=None, as_tensor=False): self.outputs = outputs self.as_tensor = as_tensor self.layer_outputs = {} self.register(module) def register(self, module): def hook_wrapper(name): def hook(model, input, output): if self.as_tensor: self.layer_outputs[name] = output else: if isinstance(output, list): self.layer_outputs[name] = [ out.detach().cpu().numpy() for out in output ] else: self.layer_outputs[name] = output.detach().cpu().numpy( ) return hook self.handles = [] if isinstance(self.outputs, (list, tuple)): for name in self.outputs: try: layer = rgetattr(module, name) h = layer.register_forward_hook(hook_wrapper(name)) except ModuleNotFoundError as module_not_found: raise ModuleNotFoundError( f'Module {name} not found') from module_not_found self.handles.append(h) def remove(self): for h in self.handles: h.remove() def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.remove() class TorchPoseTopDownPredictor(PredictorInterface): """Inference a single image with a list of bounding boxes. """ def __init__(self, model_path, model_config=None): """ init model Args: model_path: model file path model_config: config string for model to init, in json format """ bbox_thr = model_config.get('bbox_thr', 0.3) format = model_config.get('format', 'xywh') assert format in ['xyxy', 'xywh'] self.model_path = model_path self.bbox_thr = bbox_thr self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.model = None with io.open(self.model_path, 'rb') as infile: checkpoint = torch.load(infile, map_location='cpu') assert 'meta' in checkpoint and 'config' in checkpoint[ 'meta'], 'meta.config is missing from checkpoint' self.cfg = checkpoint['meta']['config'] assert hasattr(self.cfg, 'dataset_info'), \ 'Not find dataset_info in checkpoint["meta"]["config"]' if is_filepath(self.cfg.dataset_info): cfg = mmcv_config_fromfile(self.cfg.dataset_info) self.cfg.dataset_info = cfg._cfg_dict['dataset_info'] self.dataset_info = DatasetInfo(self.cfg.dataset_info) self.cfg.model.pretrained = None # build model self.model = build_model(self.cfg.model) map_location = 'cpu' if self.device == 'cpu' else 'cuda' self.ckpt = load_checkpoint( self.model, self.model_path, map_location=map_location) self.model.to(self.device) self.model.eval() # build pipeline channel_order = self.cfg.test_pipeline[0].get('channel_order', 'rgb') test_pipeline = [LoadImage(channel_order=channel_order)] + [ build_from_cfg(p, PIPELINES) for p in self.cfg.test_pipeline ] self.test_pipeline = Compose(test_pipeline) def _inference_single_pose_model(self, model, img_or_path, bboxes, dataset_info=None, return_heatmap=False): """Inference human bounding boxes. num_bboxes: N num_keypoints: K Args: model (nn.Module): The loaded pose model. img_or_path (str | np.ndarray): Image filename or loaded image. bboxes (list | np.ndarray): All bounding boxes (with scores), shaped (N, 4) or (N, 5). (left, top, width, height, [score]) where N is number of bounding boxes. dataset_info (DatasetInfo): A class containing all dataset info. outputs (list[str] | tuple[str]): Names of layers whose output is to be returned, default: None Returns: ndarray[NxKx3]: Predicted pose x, y, score. heatmap[N, K, H, W]: Model output heatmap. """ cfg = self.cfg device = next(model.parameters()).device assert len(bboxes[0]) in [4, 5] dataset_name = getattr(dataset_info, 'dataset_name', '') flip_pairs = dataset_info.flip_pairs batch_data = [] for bbox in bboxes: center, scale = _box2cs(cfg.data_cfg['image_size'], bbox) # prepare data data = { 'img_or_path': img_or_path, 'image_id': 0, 'center': center, 'scale': scale, 'bbox_score': bbox[4] if len(bbox) == 5 else 1, 'bbox_id': 0, # need to be assigned if batch_size > 1 'dataset': dataset_name, 'joints_3d': np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32), 'joints_3d_visible': np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32), 'rotation': 0, 'ann_info': { 'image_size': np.array(cfg.data_cfg['image_size']), 'num_joints': cfg.data_cfg['num_joints'], 'flip_pairs': flip_pairs } } data = self.test_pipeline(data) batch_data.append(data) batch_data = collate(batch_data, samples_per_gpu=1) if next(model.parameters()).is_cuda: # scatter not work so just move image to cuda device batch_data['img'] = batch_data['img'].to(device) # get all img_metas of each bounding box batch_data['img_metas'] = [ img_metas[0] for img_metas in batch_data['img_metas'].data ] # forward the model with torch.no_grad(): result = model( img=batch_data['img'], mode='test', img_metas=batch_data['img_metas'], return_heatmap=return_heatmap) if return_heatmap: return result['preds'], result['output_heatmap'] else: return result['preds'], None def _predict_single_img(self, img_info, bbox_thr, dataset_info, return_heatmap=False, outputs=None): pose_results = [] returned_outputs = [] img_or_path = img_info['img'] detection_results = img_info['detection_results'] if not detection_results: return [], [] # Change for-loop preprocess each bbox to preprocess all bboxes at once. bboxes = np.array([box['bbox'] for box in detection_results]) # Select bboxes by score threshold if bbox_thr is not None: assert bboxes.shape[1] == 5 valid_idx = np.where(bboxes[:, 4] > bbox_thr)[0] bboxes = bboxes[valid_idx] detection_results = [detection_results[i] for i in valid_idx] if format == 'xyxy': bboxes_xyxy = bboxes bboxes_xywh = xyxy2xywh_coco(bboxes.copy(), 1) else: # format is already 'xywh' bboxes_xywh = bboxes bboxes_xyxy = xywh2xyxy_coco(bboxes.copy(), -1) # if bbox_thr remove all bounding box if len(bboxes_xywh) == 0: return [], [] with OutputHook(self.model, outputs=outputs, as_tensor=False) as h: # poses is results['pred'] # N x 17x 3 poses, heatmap = self._inference_single_pose_model( self.model, img_or_path, bboxes_xywh, dataset_info=dataset_info, return_heatmap=return_heatmap) if return_heatmap: h.layer_outputs['heatmap'] = heatmap returned_outputs.append(h.layer_outputs) assert len(poses) == len(detection_results), print( len(poses), len(detection_results), len(bboxes_xyxy)) for pose, detection_result, bbox_xyxy in zip(poses, detection_results, bboxes_xyxy): pose_result = detection_result.copy() pose_result['keypoints'] = pose pose_result['bbox'] = bbox_xyxy pose_results.append(pose_result) return pose_results, returned_outputs def predict(self, input_data_list, batch_size=-1, return_heatmap=False): """Inference pose. Args: input_data_list: A list of image infos, like: [ { 'img' (str | np.ndarray, RGB): Image filename or loaded image. 'detection_results'(list | np.ndarray): All bounding boxes (with scores), shaped (N, 4) or (N, 5). (left, top, width, height, [score]) where N is number of bounding boxes. }, ... ] batch_size: batch size return_heatmap: return heatmap value or not, default false. Returns: { 'pose_results': list of ndarray[NxKx3]: Predicted pose x, y, score 'pose_heatmap' (optional): list of heatmap[N, K, H, W]: Model output heatmap } """ all_pose_results = [] for img_info in input_data_list: pose_results, returned_outputs = \ self._predict_single_img(img_info, self.bbox_thr, self.dataset_info) output = {'pose_results': pose_results} if return_heatmap: output.update({'pose_heatmap': returned_outputs}) # must return dict to adapt to pai all_pose_results.append(output) return all_pose_results @PREDICTORS.register_module() class TorchPoseTopDownPredictorWithDetector(PredictorInterface): SUPPORT_DETECTION_PREDICTORS = {'TorchYoloXPredictor': TorchYoloXPredictor} def __init__( self, model_path, model_config={ 'pose': { 'bbox_thr': 0.3, 'format': 'xywh' }, 'detection': { 'model_type': None, 'reserved_classes': [], 'score_thresh': 0.0, } }): """ init model Args: model_path: pose and detection model file path, split with `,`, make sure the first is pose model, second is detection model model_config: config string for model to init, in json format """ if isinstance(model_config, str): model_config = json.loads(model_config) detection_model_type = model_config['detection'].pop('model_type') assert detection_model_type in self.SUPPORT_DETECTION_PREDICTORS self.reserved_classes = model_config['detection'].get( 'reserved_classes', []) model_list = model_path.split(',') assert len(model_list) == 2 # first is pose model, second is detection model pose_model_path, detection_model_path = model_list detection_obj = self.SUPPORT_DETECTION_PREDICTORS[detection_model_type] self.detection_predictor = detection_obj( detection_model_path, model_config=model_config['detection']) self.pose_predictor = TorchPoseTopDownPredictor( pose_model_path, model_config=model_config['pose']) def process_det_results(self, outputs, input_data_list, reserved_classes=[]): filter_outputs = [] assert len(outputs) == len(input_data_list) for reserved_class in reserved_classes: assert reserved_class in self.detection_predictor.CLASSES, \ '%s not in detection classes %s' % (reserved_class, self.detection_predictor.CLASSES) # if reserved_class if [], reserve all classes reserved_classes = reserved_classes or self.detection_predictor.CLASSES for i in range(len(outputs)): output = outputs[i] cur_data = {'img': input_data_list[i], 'detection_results': []} for class_name in output['detection_class_names']: if class_name in reserved_classes: cur_data['detection_results'].append({ 'bbox': np.append(output['detection_boxes'][i], output['detection_scores'][i]) }) filter_outputs.append(cur_data) return filter_outputs def predict(self, input_data_list, batch_size=-1, return_heatmap=False): """Inference with pose model and detection model. Args: input_data_list: A list of images(np.ndarray, RGB) batch_size: batch size return_heatmap: return heatmap value or not, default false. Returns: { 'pose_results': list of ndarray[NxKx3]: Predicted pose x, y, score 'pose_heatmap' (optional): list of heatmap[N, K, H, W]: Model output heatmap } """ detection_output = self.detection_predictor.predict(input_data_list) output = self.process_det_results(detection_output, input_data_list, self.reserved_classes) pose_output = self.pose_predictor.predict( output, return_heatmap=return_heatmap) return pose_output def vis_pose_result(model, img, result, radius=4, thickness=1, kpt_score_thr=0.3, bbox_color='green', dataset_info=None, show=False, out_file=None): """Visualize the detection results on the image. Args: model (nn.Module): The loaded detector. img (str | np.ndarray): Image filename or loaded image. result (list[dict]): The results to draw over `img` (bbox_result, pose_result). radius (int): Radius of circles. thickness (int): Thickness of lines. kpt_score_thr (float): The threshold to visualize the keypoints. skeleton (list[tuple()]): Default None. show (bool): Whether to show the image. Default True. out_file (str|None): The filename of the output visualization image. """ # get dataset info if (dataset_info is None and hasattr(model, 'cfg') and 'dataset_info' in model.cfg): dataset_info = DatasetInfo(model.cfg.dataset_info) if not dataset_info: raise ValueError('Please provide `dataset_info`!') skeleton = dataset_info.skeleton pose_kpt_color = dataset_info.pose_kpt_color pose_link_color = dataset_info.pose_link_color if hasattr(model, 'module'): model = model.module img = model.show_result( img, result, skeleton, radius=radius, thickness=thickness, pose_kpt_color=pose_kpt_color, pose_link_color=pose_link_color, kpt_score_thr=kpt_score_thr, bbox_color=bbox_color, show=show, out_file=out_file) return img