EasyCV/easycv/predictors/detector.py

671 lines
24 KiB
Python

# Copyright (c) Alibaba, Inc. and its affiliates.
import json
import os
from glob import glob
import numpy as np
import torch
from easycv.core.visualization import imshow_bboxes
from easycv.datasets.utils import replace_ImageToTensor
from easycv.file import io
from easycv.models.detection.utils import postprocess
from easycv.thirdparty.mtcnn import FaceDetector
from easycv.utils.checkpoint import load_checkpoint
from easycv.utils.misc import deprecated
from .base import InputProcessor, OutputProcessor, PredictorV2
from .builder import PREDICTORS
from .classifier import TorchClassifier
try:
from easy_vision.python.inference.predictor import PredictorInterface
except Exception:
from .interface import PredictorInterface
class DetInputProcessor(InputProcessor):
def build_processor(self):
if self.pipelines is not None:
pipelines = self.pipelines
elif self.cfg is None:
pipelines = []
else:
pipelines = self.cfg.get('test_pipeline', [])
# for batch inference
self.pipelines = replace_ImageToTensor(pipelines)
return super().build_processor()
class DetOutputProcessor(OutputProcessor):
def __init__(self, score_thresh, classes=None):
super(DetOutputProcessor, self).__init__()
self.score_thresh = score_thresh
self.classes = classes
def process_single(self, inputs):
if inputs['detection_scores'] is None or len(
inputs['detection_scores']) < 1:
return inputs
scores = inputs['detection_scores']
if scores is not None and self.score_thresh > 0:
keeped_ids = scores > self.score_thresh
inputs['detection_scores'] = inputs['detection_scores'][keeped_ids]
inputs['detection_boxes'] = inputs['detection_boxes'][keeped_ids]
inputs['detection_classes'] = inputs['detection_classes'][
keeped_ids]
class_names = []
for _, classes_id in enumerate(inputs['detection_classes']):
if classes_id is None:
class_names.append(None)
elif self.classes is not None and len(self.classes) > 0:
class_names.append(self.classes[int(classes_id)])
else:
class_names.append(classes_id)
inputs['detection_class_names'] = class_names
return inputs
@PREDICTORS.register_module()
class DetectionPredictor(PredictorV2):
"""Generic Detection Predictor, it will filter bbox results by ``score_threshold`` .
Args:
model_path (str): Path of model path.
config_file (Optinal[str]): config file path for model and processor to init. Defaults to None.
batch_size (int): batch size for forward.
device (str | torch.device): Support str('cuda' or 'cpu') or torch.device, if is None, detect device automatically.
save_results (bool): Whether to save predict results.
save_path (str): File path for saving results, only valid when `save_results` is True.
pipelines (list[dict]): Data pipeline configs.
input_processor_threads (int): Number of processes to process inputs.
mode (str): The image mode into the model.
"""
def __init__(self,
model_path,
config_file=None,
batch_size=1,
device=None,
save_results=False,
save_path=None,
pipelines=None,
score_threshold=0.5,
input_processor_threads=8,
mode='BGR',
*arg,
**kwargs):
super(DetectionPredictor, self).__init__(
model_path,
config_file=config_file,
batch_size=batch_size,
device=device,
save_results=save_results,
save_path=save_path,
pipelines=pipelines,
input_processor_threads=input_processor_threads,
mode=mode)
self.score_thresh = score_threshold
self.CLASSES = self.cfg.get('CLASSES', None)
def get_input_processor(self):
return DetInputProcessor(
self.cfg,
pipelines=self.pipelines,
batch_size=self.batch_size,
threads=self.input_processor_threads,
mode=self.mode)
def get_output_processor(self):
return DetOutputProcessor(self.score_thresh, self.CLASSES)
def visualize(self, img, results, show=False, out_file=None):
"""Only support show one sample now."""
bboxes = results['detection_boxes']
labels = results['detection_class_names']
img = self.input_processor._load_input(img)['img']
imshow_bboxes(
img,
bboxes,
labels=labels,
colors='cyan',
text_color='cyan',
font_size=18,
thickness=2,
font_scale=0.0,
show=show,
out_file=out_file)
class _JitProcessorWrapper:
def __init__(self, processor, device) -> None:
self.processor = processor
self.device = device
def __call__(self, results):
if self.processor is not None:
from mmcv.parallel import DataContainer as DC
outputs = {}
img = results['img']
img = torch.from_numpy(img).to(self.device)
img, img_meta = self.processor(img.unsqueeze(0)) # process batch
outputs['img'] = DC(
img.squeeze(0),
stack=True) # DC wrapper for collate batch and to device
outputs['img_metas'] = DC(img_meta, cpu_only=True)
return outputs
return results
class YoloXInputProcessor(DetInputProcessor):
"""Input processor for yolox.
Args:
cfg (Config): Config instance.
pipelines (list[dict]): Data pipeline configs.
batch_size (int): batch size for forward.
model_type (str): "raw" or "jit" or "blade"
jit_processor_path (str): File of the saved processing operator of torch jit type.
device (str | torch.device): Support str('cuda' or 'cpu') or torch.device, if is None, detect device automatically.
threads (int): Number of processes to process inputs.
mode (str): The image mode into the model.
"""
def __init__(
self,
cfg,
pipelines=None,
batch_size=1,
model_type='raw',
jit_processor_path=None,
device=None,
threads=8,
mode='BGR',
):
self.model_type = model_type
self.jit_processor_path = jit_processor_path
self.device = device
if self.device is None:
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
super().__init__(
cfg,
pipelines=pipelines,
batch_size=batch_size,
threads=threads,
mode=mode)
def build_processor(self):
self.jit_preprocess = False
if self.model_type != 'raw':
if hasattr(self.cfg, 'export'):
self.jit_preprocess = self.cfg['export'].get(
'preprocess_jit', False)
if self.model_type != 'raw' and self.jit_preprocess:
# jit or blade model
processor = None
if os.path.exists(self.jit_processor_path):
if self.threads > 1:
raise ValueError(
'Not support threads>1 for jit processor !')
# use a preprocess jit model to speed up
with io.open(self.jit_processor_path, 'rb') as infile:
processor = torch.jit.load(infile, self.device)
return _JitProcessorWrapper(processor, self.device)
else:
return super().build_processor()
class YoloXOutputProcessor(DetOutputProcessor):
def __init__(self,
score_thresh=0.5,
model_type='raw',
test_conf=0.01,
nms_thre=0.65,
use_trt_efficientnms=False,
classes=None):
super().__init__(score_thresh, classes)
self.model_type = model_type
self.test_conf = test_conf
self.nms_thre = nms_thre
self.use_trt_efficientnms = use_trt_efficientnms
def post_assign(self, outputs, img_metas):
detection_boxes = []
detection_scores = []
detection_classes = []
img_metas_list = []
for i in range(len(outputs)):
if img_metas:
img_metas_list.append(img_metas[i])
if outputs[i].requires_grad == True:
outputs[i] = outputs[i].detach()
if outputs[i] is not None:
bboxes = outputs[i][:, 0:4] if outputs[i] is not None else None
if img_metas:
bboxes /= img_metas[i]['scale_factor'][0]
detection_boxes.append(bboxes.cpu().numpy())
detection_scores.append(
(outputs[i][:, 4] * outputs[i][:, 5]).cpu().numpy())
detection_classes.append(outputs[i][:, 6].cpu().numpy().astype(
np.int32))
else:
detection_boxes.append(None)
detection_scores.append(None)
detection_classes.append(None)
test_outputs = {
'detection_boxes': detection_boxes,
'detection_scores': detection_scores,
'detection_classes': detection_classes,
'img_metas': img_metas_list
}
return test_outputs
def process_single(self, inputs):
det_out = inputs
img_meta = det_out['img_metas']
if self.model_type != 'raw':
results = det_out['results']
if self.use_trt_efficientnms:
det_out = {}
det_out['detection_boxes'] = results[1] / img_meta[
'scale_factor'][0]
det_out['detection_scores'] = results[2]
det_out['detection_classes'] = results[3]
else:
if self.model_type == 'jit':
det_out = self.post_assign(
results.unsqueeze(0), img_metas=[img_meta])
else:
det_out = self.post_assign(
postprocess(
results.unsqueeze(0), len(self.classes),
self.test_conf, self.nms_thre),
img_metas=[img_meta])
det_out['detection_scores'] = det_out['detection_scores'][0]
det_out['detection_boxes'] = det_out['detection_boxes'][0]
det_out['detection_classes'] = det_out['detection_classes'][0]
resuts = super().process_single(det_out)
resuts['ori_img_shape'] = list(img_meta['ori_img_shape'][:2])
return resuts
@PREDICTORS.register_module()
class YoloXPredictor(DetectionPredictor):
"""Detection predictor for Yolox.
Args:
model_path (str): Path of model path.
config_file (Optinal[str]): config file path for model and processor to init. Defaults to None.
batch_size (int): batch size for forward.
use_trt_efficientnms (bool): Whether used tensorrt efficient nms operation in the saved model.
device (str | torch.device): Support str('cuda' or 'cpu') or torch.device, if is None, detect device automatically.
save_results (bool): Whether to save predict results.
save_path (str): File path for saving results, only valid when `save_results` is True.
pipelines (list[dict]): Data pipeline configs.
max_det (int): Maximum number of detection output boxes.
score_thresh (float): Score threshold to filter box.
nms_thresh (float): Nms threshold to filter box.
input_processor_threads (int): Number of processes to process inputs.
mode (str): The image mode into the model.
"""
def __init__(self,
model_path,
config_file=None,
batch_size=1,
use_trt_efficientnms=False,
device=None,
save_results=False,
save_path=None,
pipelines=None,
max_det=100,
score_thresh=0.5,
nms_thresh=None,
test_conf=None,
input_processor_threads=8,
mode='BGR'):
self.max_det = max_det
self.use_trt_efficientnms = use_trt_efficientnms
if model_path.endswith('jit'):
self.model_type = 'jit'
elif model_path.endswith('blade'):
self.model_type = 'blade'
else:
self.model_type = 'raw'
if self.model_type == 'blade' or self.use_trt_efficientnms:
import torch_blade
if self.model_type != 'raw' and config_file is None:
config_file = model_path + '.config.json'
super(YoloXPredictor, self).__init__(
model_path,
config_file=config_file,
batch_size=batch_size,
device=device,
save_results=save_results,
save_path=save_path,
pipelines=pipelines,
score_threshold=score_thresh,
input_processor_threads=input_processor_threads,
mode=mode)
self.test_conf = test_conf or self.cfg['model'].get('test_conf', 0.01)
self.nms_thre = nms_thresh or self.cfg['model'].get('nms_thre', 0.65)
self.CLASSES = self.cfg.get('CLASSES', None) or self.cfg.get(
'classes', None)
assert self.CLASSES is not None
self.jit_processor_path = '.'.join(
self.model_path.split('.')[:-1] + ['preprocess'])
def _build_model(self):
if self.model_type != 'raw':
with io.open(self.model_path, 'rb') as infile:
model = torch.jit.load(infile, self.device)
else:
from easycv.utils.misc import reparameterize_models
model = super()._build_model()
model = reparameterize_models(model)
return model
def prepare_model(self):
"""Build model from config file by default.
If the model is not loaded from a configuration file, e.g. torch jit model, you need to reimplement it.
"""
model = self._build_model()
model.to(self.device)
model.eval()
if self.model_type == 'raw':
load_checkpoint(model, self.model_path, map_location='cpu')
return model
def model_forward(self, inputs):
"""Model forward.
If you need refactor model forward, you need to reimplement it.
"""
if self.model_type != 'raw':
with torch.no_grad():
outputs = self.model(inputs['img'])
outputs = {'results': outputs} # convert to dict format
else:
outputs = super().model_forward(inputs)
if 'img_metas' not in outputs:
outputs['img_metas'] = inputs['img_metas']
return outputs
def get_input_processor(self):
return YoloXInputProcessor(
self.cfg,
pipelines=self.pipelines,
batch_size=self.batch_size,
model_type=self.model_type,
jit_processor_path=self.jit_processor_path,
device=self.device,
threads=self.input_processor_threads,
mode=self.mode,
)
def get_output_processor(self):
return YoloXOutputProcessor(
score_thresh=self.score_thresh,
model_type=self.model_type,
test_conf=self.test_conf,
nms_thre=self.nms_thre,
use_trt_efficientnms=self.use_trt_efficientnms,
classes=self.CLASSES)
@deprecated(reason='Please use YoloXPredictor.')
@PREDICTORS.register_module()
class TorchYoloXPredictor(YoloXPredictor):
def __init__(self,
model_path,
max_det=100,
score_thresh=0.5,
use_trt_efficientnms=False,
model_config=None,
input_processor_threads=8,
mode='BGR'):
"""
Args:
model_path: model file path
max_det: maximum number of detection
score_thresh: score_thresh to filter box
model_config: config string for model to init, in json format
"""
if model_config:
if isinstance(model_config, str):
model_config = json.loads(model_config)
else:
model_config = {}
score_thresh = model_config[
'score_thresh'] if 'score_thresh' in model_config else score_thresh
super().__init__(
model_path,
config_file=None,
batch_size=1,
use_trt_efficientnms=use_trt_efficientnms,
device=None,
save_results=False,
save_path=None,
pipelines=None,
max_det=max_det,
score_thresh=score_thresh,
nms_thresh=None,
test_conf=None,
input_processor_threads=input_processor_threads,
mode=mode)
def predict(self, input_data_list, batch_size=-1, to_numpy=True):
return super().__call__(input_data_list)
@PREDICTORS.register_module()
class TorchFaceDetector(PredictorInterface):
def __init__(self, model_path=None, model_config=None):
"""
init model, add a facedetect and align for img input.
Args:
model_path: model file path
model_config: config string for model to init, in json format
"""
self.detector = FaceDetector()
def get_output_type(self):
"""
in this function user should return a type dict, which indicates
which type of data should the output of predictor be converted to
* type json, data will be serialized to json str
* type image, data will be converted to encode image binary and write to oss file,
whose name is output_dir/${key}/${input_filename}_${idx}.jpg, where input_filename
is the base filename extracted from url, key corresponds to the key in the dict of output_type,
if the type of data indexed by key is a list, idx is the index of element in list, otherwhile ${idx} will be empty
* type video, data will be converted to encode video binary and write to oss file,
:: return {
'image': 'image',
'feature': 'json'
}
indicating that the image data in the output dict will be save to image
file and feature in output dict will be converted to json
"""
return {}
def batch(self, image_tensor_list):
return torch.stack(image_tensor_list)
def predict(self, input_data_list, batch_size=-1, threshold=0.95):
"""
using session run predict a number of samples using batch_size
Args:
input_data_list: a list of numpy array, each array is a sample to be predicted
batch_size: batch_size passed by the caller, you can also ignore this param and
use a fixed number if you do not want to adjust batch_size in runtime
Return:
result: a list of dict, each dict is the prediction result of one sample
eg, {"output1": value1, "output2": value2}, the value type can be
python int str float, and numpy array
Raise:
if detect !=1 face in a img, then do nothing for this image
"""
num_image = len(input_data_list)
assert len(
input_data_list) > 0, 'input images should not be an empty list'
image_list = input_data_list
outputs_list = []
for idx, img in enumerate(image_list):
if type(img) is not np.ndarray:
img = np.asarray(img)
ori_img_shape = img.shape[:2]
bbox, ld = self.detector.safe_detect(img)
_scores = np.array([i[-1] for i in bbox])
boxes = []
scores = []
for idx, s in enumerate(_scores):
if s > threshold:
boxes.append(bbox[idx][:-1])
scores.append(bbox[idx][-1])
boxes = np.array(boxes)
scores = np.array(scores)
out = {
'ori_img_shape': list(ori_img_shape),
'detection_boxes': boxes,
'detection_scores': scores,
'detection_classes': [0] * boxes.shape[0],
'detection_class_names': ['face'] * boxes.shape[0],
}
outputs_list.append(out)
return outputs_list
@PREDICTORS.register_module()
class TorchYoloXClassifierPredictor(PredictorInterface):
def __init__(self,
models_root_dir,
max_det=100,
cls_score_thresh=0.01,
det_model_config=None,
cls_model_config=None):
"""
init model, add a yolox and classification predictor for img input.
Args:
models_root_dir: models_root_dir/detection/*.pth and models_root_dir/classification/*.pth
det_model_config: config string for detection model to init, in json format
cls_model_config: config string for classification model to init, in json format
"""
det_model_path = glob(
'%s/detection/*.pt*' % models_root_dir, recursive=True)
assert (len(det_model_path) == 1)
cls_model_path = glob(
'%s/classification/*.pt*' % models_root_dir, recursive=True)
assert (len(cls_model_path) == 1)
self.det_predictor = TorchYoloXPredictor(
det_model_path[0], max_det=max_det, model_config=det_model_config)
self.cls_predictor = TorchClassifier(
cls_model_path[0], model_config=cls_model_config)
self.cls_score_thresh = cls_score_thresh
def predict(self, input_data_list, batch_size=-1):
"""
using session run predict a number of samples using batch_size
Args:
input_data_list: a list of numpy array(in rgb order), each array is a sample
to be predicted
batch_size: batch_size passed by the caller, you can also ignore this param and
use a fixed number if you do not want to adjust batch_size in runtime
Return:
result: a list of dict, each dict is the prediction result of one sample
eg, {"output1": value1, "output2": value2}, the value type can be
python int str float, and numpy array
"""
results = self.det_predictor.predict(
input_data_list, batch_size=batch_size)
for img_idx, img in enumerate(input_data_list):
detection_boxes = results[img_idx]['detection_boxes']
detection_classes = results[img_idx]['detection_classes']
detection_scores = results[img_idx]['detection_scores']
crop_img_batch = []
for idx in range(detection_boxes.shape[0]):
xyxy = [int(a) for a in detection_boxes[idx]]
cropImg = img[xyxy[1]:xyxy[3], xyxy[0]:xyxy[2]]
crop_img_batch.append(cropImg)
if len(crop_img_batch) > 0:
cls_output = self.cls_predictor.predict(
crop_img_batch, batch_size=32)
else:
cls_output = []
class_name_list = []
class_id_list = []
class_score_list = []
det_bboxes = []
product_count_dict = {}
for idx in range(len(cls_output)):
class_name = cls_output[idx]['class_name'][0]
class_score = cls_output[idx]['class_probs'][class_name]
if class_score < self.cls_score_thresh:
continue
if class_name not in product_count_dict:
product_count_dict[class_name] = 1
else:
product_count_dict[class_name] += 1
class_name_list.append(class_name)
class_id_list.append(int(cls_output[idx]['class'][0]))
class_score_list.append(class_score)
det_bboxes.append([float(a) for a in detection_boxes[idx]])
results[img_idx].update({
'detection_boxes': np.array(det_bboxes),
'detection_scores': class_score_list,
'detection_classes': class_id_list,
'detection_class_names': class_name_list,
'product_count': product_count_dict
})
return results