mirror of https://github.com/alibaba/EasyCV.git
547 lines
19 KiB
Python
547 lines
19 KiB
Python
import functools
|
|
import json
|
|
|
|
import mmcv
|
|
import numpy as np
|
|
import torch
|
|
from mmcv.parallel import collate
|
|
from mmcv.utils.path import is_filepath
|
|
from torchvision.transforms import Compose
|
|
|
|
from easycv.core.bbox.bbox_util import xywh2xyxy_coco, xyxy2xywh_coco
|
|
from easycv.datasets.pose.data_sources.top_down import DatasetInfo
|
|
from easycv.datasets.registry import PIPELINES
|
|
from easycv.file import io
|
|
from easycv.framework.errors import ModuleNotFoundError, TypeError, ValueError
|
|
from easycv.models import build_model
|
|
from easycv.predictors.builder import PREDICTORS
|
|
from easycv.predictors.detector import TorchYoloXPredictor
|
|
from easycv.utils.checkpoint import load_checkpoint
|
|
from easycv.utils.config_tools import mmcv_config_fromfile
|
|
from easycv.utils.registry import build_from_cfg
|
|
|
|
try:
|
|
from easy_vision.python.inference.predictor import PredictorInterface
|
|
except:
|
|
from easycv.predictors.interface import PredictorInterface
|
|
|
|
|
|
class LoadImage:
|
|
"""A simple pipeline to load image."""
|
|
|
|
def __init__(self, color_type='color', channel_order='rgb'):
|
|
self.color_type = color_type
|
|
self.channel_order = channel_order
|
|
|
|
def __call__(self, results):
|
|
"""Call function to load images into results.
|
|
Args:
|
|
results (dict): A result dict contains the img_or_path.
|
|
if `img_or_path` is str, return self.channel_order mode,
|
|
if np.ndarray, return raw without process.
|
|
Returns:
|
|
dict: ``results`` will be returned containing loaded image.
|
|
"""
|
|
if isinstance(results['img_or_path'], str):
|
|
results['image_file'] = results['img_or_path']
|
|
img = mmcv.imread(results['img_or_path'], self.color_type,
|
|
self.channel_order)
|
|
elif isinstance(results['img_or_path'], np.ndarray):
|
|
results['image_file'] = ''
|
|
img = results['img_or_path']
|
|
else:
|
|
raise TypeError(
|
|
'"img_or_path" must be a numpy array or a str or a pathlib.Path object'
|
|
)
|
|
|
|
results['img'] = img
|
|
return results
|
|
|
|
|
|
def _box2cs(image_size, box):
|
|
"""This encodes bbox(x,y,w,h) into (center, scale)
|
|
Args:
|
|
x, y, w, h
|
|
Returns:
|
|
tuple: A tuple containing center and scale.
|
|
- np.ndarray[float32](2,): Center of the bbox (x, y).
|
|
- np.ndarray[float32](2,): Scale of the bbox w & h.
|
|
"""
|
|
|
|
x, y, w, h = box[:4]
|
|
aspect_ratio = image_size[0] / image_size[1]
|
|
center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)
|
|
|
|
if w > aspect_ratio * h:
|
|
h = w * 1.0 / aspect_ratio
|
|
elif w < aspect_ratio * h:
|
|
w = h * aspect_ratio
|
|
|
|
# pixel std is 200.0
|
|
scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)
|
|
scale = scale * 1.25
|
|
|
|
return center, scale
|
|
|
|
|
|
def rgetattr(obj, attr, *args):
|
|
|
|
def _getattr(obj, attr):
|
|
return getattr(obj, attr, *args)
|
|
|
|
return functools.reduce(_getattr, [obj] + attr.split('.'))
|
|
|
|
|
|
class OutputHook:
|
|
|
|
def __init__(self, module, outputs=None, as_tensor=False):
|
|
self.outputs = outputs
|
|
self.as_tensor = as_tensor
|
|
self.layer_outputs = {}
|
|
self.register(module)
|
|
|
|
def register(self, module):
|
|
|
|
def hook_wrapper(name):
|
|
|
|
def hook(model, input, output):
|
|
if self.as_tensor:
|
|
self.layer_outputs[name] = output
|
|
else:
|
|
if isinstance(output, list):
|
|
self.layer_outputs[name] = [
|
|
out.detach().cpu().numpy() for out in output
|
|
]
|
|
else:
|
|
self.layer_outputs[name] = output.detach().cpu().numpy(
|
|
)
|
|
|
|
return hook
|
|
|
|
self.handles = []
|
|
if isinstance(self.outputs, (list, tuple)):
|
|
for name in self.outputs:
|
|
try:
|
|
layer = rgetattr(module, name)
|
|
h = layer.register_forward_hook(hook_wrapper(name))
|
|
except ModuleNotFoundError as module_not_found:
|
|
raise ModuleNotFoundError(
|
|
f'Module {name} not found') from module_not_found
|
|
self.handles.append(h)
|
|
|
|
def remove(self):
|
|
for h in self.handles:
|
|
h.remove()
|
|
|
|
def __enter__(self):
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
self.remove()
|
|
|
|
|
|
class TorchPoseTopDownPredictor(PredictorInterface):
|
|
"""Inference a single image with a list of bounding boxes.
|
|
"""
|
|
|
|
def __init__(self, model_path, model_config=None):
|
|
"""
|
|
init model
|
|
|
|
Args:
|
|
model_path: model file path
|
|
model_config: config string for model to init, in json format
|
|
"""
|
|
bbox_thr = model_config.get('bbox_thr', 0.3)
|
|
format = model_config.get('format', 'xywh')
|
|
|
|
assert format in ['xyxy', 'xywh']
|
|
|
|
self.model_path = model_path
|
|
self.bbox_thr = bbox_thr
|
|
|
|
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
self.model = None
|
|
with io.open(self.model_path, 'rb') as infile:
|
|
checkpoint = torch.load(infile, map_location='cpu')
|
|
|
|
assert 'meta' in checkpoint and 'config' in checkpoint[
|
|
'meta'], 'meta.config is missing from checkpoint'
|
|
self.cfg = checkpoint['meta']['config']
|
|
|
|
assert hasattr(self.cfg, 'dataset_info'), \
|
|
'Not find dataset_info in checkpoint["meta"]["config"]'
|
|
|
|
if is_filepath(self.cfg.dataset_info):
|
|
cfg = mmcv_config_fromfile(self.cfg.dataset_info)
|
|
self.cfg.dataset_info = cfg._cfg_dict['dataset_info']
|
|
|
|
self.dataset_info = DatasetInfo(self.cfg.dataset_info)
|
|
self.cfg.model.pretrained = None
|
|
|
|
# build model
|
|
self.model = build_model(self.cfg.model)
|
|
|
|
map_location = 'cpu' if self.device == 'cpu' else 'cuda'
|
|
self.ckpt = load_checkpoint(
|
|
self.model, self.model_path, map_location=map_location)
|
|
|
|
self.model.to(self.device)
|
|
self.model.eval()
|
|
|
|
# build pipeline
|
|
channel_order = self.cfg.test_pipeline[0].get('channel_order', 'rgb')
|
|
test_pipeline = [LoadImage(channel_order=channel_order)] + [
|
|
build_from_cfg(p, PIPELINES) for p in self.cfg.test_pipeline
|
|
]
|
|
self.test_pipeline = Compose(test_pipeline)
|
|
|
|
def _inference_single_pose_model(self,
|
|
model,
|
|
img_or_path,
|
|
bboxes,
|
|
dataset_info=None,
|
|
return_heatmap=False):
|
|
"""Inference human bounding boxes.
|
|
|
|
num_bboxes: N
|
|
num_keypoints: K
|
|
|
|
Args:
|
|
model (nn.Module): The loaded pose model.
|
|
img_or_path (str | np.ndarray): Image filename or loaded image.
|
|
bboxes (list | np.ndarray): All bounding boxes (with scores),
|
|
shaped (N, 4) or (N, 5). (left, top, width, height, [score])
|
|
where N is number of bounding boxes.
|
|
dataset_info (DatasetInfo): A class containing all dataset info.
|
|
outputs (list[str] | tuple[str]): Names of layers whose output is
|
|
to be returned, default: None
|
|
|
|
Returns:
|
|
ndarray[NxKx3]: Predicted pose x, y, score.
|
|
heatmap[N, K, H, W]: Model output heatmap.
|
|
"""
|
|
|
|
cfg = self.cfg
|
|
device = next(model.parameters()).device
|
|
|
|
assert len(bboxes[0]) in [4, 5]
|
|
|
|
dataset_name = getattr(dataset_info, 'dataset_name', '')
|
|
flip_pairs = dataset_info.flip_pairs
|
|
|
|
batch_data = []
|
|
for bbox in bboxes:
|
|
center, scale = _box2cs(cfg.data_cfg['image_size'], bbox)
|
|
|
|
# prepare data
|
|
data = {
|
|
'img_or_path':
|
|
img_or_path,
|
|
'image_id':
|
|
0,
|
|
'center':
|
|
center,
|
|
'scale':
|
|
scale,
|
|
'bbox_score':
|
|
bbox[4] if len(bbox) == 5 else 1,
|
|
'bbox_id':
|
|
0, # need to be assigned if batch_size > 1
|
|
'dataset':
|
|
dataset_name,
|
|
'joints_3d':
|
|
np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32),
|
|
'joints_3d_visible':
|
|
np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32),
|
|
'rotation':
|
|
0,
|
|
'ann_info': {
|
|
'image_size': np.array(cfg.data_cfg['image_size']),
|
|
'num_joints': cfg.data_cfg['num_joints'],
|
|
'flip_pairs': flip_pairs
|
|
}
|
|
}
|
|
data = self.test_pipeline(data)
|
|
batch_data.append(data)
|
|
|
|
batch_data = collate(batch_data, samples_per_gpu=1)
|
|
|
|
if next(model.parameters()).is_cuda:
|
|
# scatter not work so just move image to cuda device
|
|
batch_data['img'] = batch_data['img'].to(device)
|
|
# get all img_metas of each bounding box
|
|
batch_data['img_metas'] = [
|
|
img_metas[0] for img_metas in batch_data['img_metas'].data
|
|
]
|
|
|
|
# forward the model
|
|
with torch.no_grad():
|
|
result = model(
|
|
img=batch_data['img'],
|
|
mode='test',
|
|
img_metas=batch_data['img_metas'],
|
|
return_heatmap=return_heatmap)
|
|
|
|
if return_heatmap:
|
|
return result['preds'], result['output_heatmap']
|
|
else:
|
|
return result['preds'], None
|
|
|
|
def _predict_single_img(self,
|
|
img_info,
|
|
bbox_thr,
|
|
dataset_info,
|
|
return_heatmap=False,
|
|
outputs=None):
|
|
|
|
pose_results = []
|
|
returned_outputs = []
|
|
img_or_path = img_info['img']
|
|
detection_results = img_info['detection_results']
|
|
|
|
if not detection_results:
|
|
return [], []
|
|
|
|
# Change for-loop preprocess each bbox to preprocess all bboxes at once.
|
|
bboxes = np.array([box['bbox'] for box in detection_results])
|
|
|
|
# Select bboxes by score threshold
|
|
if bbox_thr is not None:
|
|
assert bboxes.shape[1] == 5
|
|
valid_idx = np.where(bboxes[:, 4] > bbox_thr)[0]
|
|
bboxes = bboxes[valid_idx]
|
|
detection_results = [detection_results[i] for i in valid_idx]
|
|
|
|
if format == 'xyxy':
|
|
bboxes_xyxy = bboxes
|
|
bboxes_xywh = xyxy2xywh_coco(bboxes.copy(), 1)
|
|
else:
|
|
# format is already 'xywh'
|
|
bboxes_xywh = bboxes
|
|
bboxes_xyxy = xywh2xyxy_coco(bboxes.copy(), -1)
|
|
|
|
# if bbox_thr remove all bounding box
|
|
if len(bboxes_xywh) == 0:
|
|
return [], []
|
|
|
|
with OutputHook(self.model, outputs=outputs, as_tensor=False) as h:
|
|
# poses is results['pred'] # N x 17x 3
|
|
poses, heatmap = self._inference_single_pose_model(
|
|
self.model,
|
|
img_or_path,
|
|
bboxes_xywh,
|
|
dataset_info=dataset_info,
|
|
return_heatmap=return_heatmap)
|
|
|
|
if return_heatmap:
|
|
h.layer_outputs['heatmap'] = heatmap
|
|
|
|
returned_outputs.append(h.layer_outputs)
|
|
|
|
assert len(poses) == len(detection_results), print(
|
|
len(poses), len(detection_results), len(bboxes_xyxy))
|
|
for pose, detection_result, bbox_xyxy in zip(poses, detection_results,
|
|
bboxes_xyxy):
|
|
pose_result = detection_result.copy()
|
|
pose_result['keypoints'] = pose
|
|
pose_result['bbox'] = bbox_xyxy
|
|
pose_results.append(pose_result)
|
|
|
|
return pose_results, returned_outputs
|
|
|
|
def predict(self, input_data_list, batch_size=-1, return_heatmap=False):
|
|
"""Inference pose.
|
|
|
|
Args:
|
|
input_data_list: A list of image infos, like:
|
|
[
|
|
{
|
|
'img' (str | np.ndarray, RGB):
|
|
Image filename or loaded image.
|
|
'detection_results'(list | np.ndarray):
|
|
All bounding boxes (with scores),
|
|
shaped (N, 4) or (N, 5). (left, top, width, height, [score])
|
|
where N is number of bounding boxes.
|
|
},
|
|
...
|
|
]
|
|
batch_size: batch size
|
|
return_heatmap: return heatmap value or not, default false.
|
|
|
|
Returns:
|
|
{
|
|
'pose_results': list of ndarray[NxKx3]: Predicted pose x, y, score
|
|
'pose_heatmap' (optional): list of heatmap[N, K, H, W]: Model output heatmap
|
|
}
|
|
|
|
|
|
"""
|
|
all_pose_results = []
|
|
|
|
for img_info in input_data_list:
|
|
pose_results, returned_outputs = \
|
|
self._predict_single_img(img_info, self.bbox_thr, self.dataset_info)
|
|
output = {'pose_results': pose_results}
|
|
if return_heatmap:
|
|
output.update({'pose_heatmap': returned_outputs})
|
|
# must return dict to adapt to pai
|
|
all_pose_results.append(output)
|
|
|
|
return all_pose_results
|
|
|
|
|
|
@PREDICTORS.register_module()
|
|
class TorchPoseTopDownPredictorWithDetector(PredictorInterface):
|
|
|
|
SUPPORT_DETECTION_PREDICTORS = {'TorchYoloXPredictor': TorchYoloXPredictor}
|
|
|
|
def __init__(
|
|
self,
|
|
model_path,
|
|
model_config={
|
|
'pose': {
|
|
'bbox_thr': 0.3,
|
|
'format': 'xywh'
|
|
},
|
|
'detection': {
|
|
'model_type': None,
|
|
'reserved_classes': [],
|
|
'score_thresh': 0.0,
|
|
}
|
|
}):
|
|
"""
|
|
init model
|
|
|
|
Args:
|
|
model_path: pose and detection model file path, split with `,`,
|
|
make sure the first is pose model, second is detection model
|
|
model_config: config string for model to init, in json format
|
|
"""
|
|
if isinstance(model_config, str):
|
|
model_config = json.loads(model_config)
|
|
|
|
detection_model_type = model_config['detection'].pop('model_type')
|
|
assert detection_model_type in self.SUPPORT_DETECTION_PREDICTORS
|
|
|
|
self.reserved_classes = model_config['detection'].get(
|
|
'reserved_classes', [])
|
|
|
|
model_list = model_path.split(',')
|
|
assert len(model_list) == 2
|
|
# first is pose model, second is detection model
|
|
pose_model_path, detection_model_path = model_list
|
|
|
|
detection_obj = self.SUPPORT_DETECTION_PREDICTORS[detection_model_type]
|
|
self.detection_predictor = detection_obj(
|
|
detection_model_path, model_config=model_config['detection'])
|
|
self.pose_predictor = TorchPoseTopDownPredictor(
|
|
pose_model_path, model_config=model_config['pose'])
|
|
|
|
def process_det_results(self,
|
|
outputs,
|
|
input_data_list,
|
|
reserved_classes=[]):
|
|
filter_outputs = []
|
|
assert len(outputs) == len(input_data_list)
|
|
for reserved_class in reserved_classes:
|
|
assert reserved_class in self.detection_predictor.CLASSES, \
|
|
'%s not in detection classes %s' % (reserved_class, self.detection_predictor.CLASSES)
|
|
|
|
# if reserved_class if [], reserve all classes
|
|
reserved_classes = reserved_classes or self.detection_predictor.CLASSES
|
|
|
|
for i in range(len(outputs)):
|
|
output = outputs[i]
|
|
cur_data = {'img': input_data_list[i], 'detection_results': []}
|
|
for class_name in output['detection_class_names']:
|
|
if class_name in reserved_classes:
|
|
cur_data['detection_results'].append({
|
|
'bbox':
|
|
np.append(output['detection_boxes'][i],
|
|
output['detection_scores'][i])
|
|
})
|
|
filter_outputs.append(cur_data)
|
|
|
|
return filter_outputs
|
|
|
|
def predict(self, input_data_list, batch_size=-1, return_heatmap=False):
|
|
"""Inference with pose model and detection model.
|
|
|
|
Args:
|
|
input_data_list: A list of images(np.ndarray, RGB)
|
|
batch_size: batch size
|
|
return_heatmap: return heatmap value or not, default false.
|
|
|
|
Returns:
|
|
{
|
|
'pose_results': list of ndarray[NxKx3]: Predicted pose x, y, score
|
|
'pose_heatmap' (optional): list of heatmap[N, K, H, W]: Model output heatmap
|
|
}
|
|
|
|
|
|
"""
|
|
detection_output = self.detection_predictor.predict(input_data_list)
|
|
output = self.process_det_results(detection_output, input_data_list,
|
|
self.reserved_classes)
|
|
pose_output = self.pose_predictor.predict(
|
|
output, return_heatmap=return_heatmap)
|
|
|
|
return pose_output
|
|
|
|
|
|
def vis_pose_result(model,
|
|
img,
|
|
result,
|
|
radius=4,
|
|
thickness=1,
|
|
kpt_score_thr=0.3,
|
|
bbox_color='green',
|
|
dataset_info=None,
|
|
show=False,
|
|
out_file=None):
|
|
"""Visualize the detection results on the image.
|
|
|
|
Args:
|
|
model (nn.Module): The loaded detector.
|
|
img (str | np.ndarray): Image filename or loaded image.
|
|
result (list[dict]): The results to draw over `img`
|
|
(bbox_result, pose_result).
|
|
radius (int): Radius of circles.
|
|
thickness (int): Thickness of lines.
|
|
kpt_score_thr (float): The threshold to visualize the keypoints.
|
|
skeleton (list[tuple()]): Default None.
|
|
show (bool): Whether to show the image. Default True.
|
|
out_file (str|None): The filename of the output visualization image.
|
|
"""
|
|
|
|
# get dataset info
|
|
if (dataset_info is None and hasattr(model, 'cfg')
|
|
and 'dataset_info' in model.cfg):
|
|
dataset_info = DatasetInfo(model.cfg.dataset_info)
|
|
|
|
if not dataset_info:
|
|
raise ValueError('Please provide `dataset_info`!')
|
|
|
|
skeleton = dataset_info.skeleton
|
|
pose_kpt_color = dataset_info.pose_kpt_color
|
|
pose_link_color = dataset_info.pose_link_color
|
|
|
|
if hasattr(model, 'module'):
|
|
model = model.module
|
|
|
|
img = model.show_result(
|
|
img,
|
|
result,
|
|
skeleton,
|
|
radius=radius,
|
|
thickness=thickness,
|
|
pose_kpt_color=pose_kpt_color,
|
|
pose_link_color=pose_link_color,
|
|
kpt_score_thr=kpt_score_thr,
|
|
bbox_color=bbox_color,
|
|
show=show,
|
|
out_file=out_file)
|
|
|
|
return img
|