mirror of https://github.com/alibaba/EasyCV.git
306 lines
8.6 KiB
Python
306 lines
8.6 KiB
Python
import numpy as np
|
|
from PIL import Image
|
|
import torch
|
|
|
|
|
|
def try_gpu():
|
|
use_cuda = torch.cuda.is_available()
|
|
return torch.device("cuda" if use_cuda else "cpu")
|
|
|
|
|
|
def nms(boxes, overlap_threshold=0.5, mode="union"):
|
|
"""Non-maximum suppression.
|
|
|
|
Arguments:
|
|
boxes: a float numpy array of shape [n, 5],
|
|
where each row is (xmin, ymin, xmax, ymax, score).
|
|
overlap_threshold: a float number.
|
|
mode: 'union' or 'min'.
|
|
|
|
Returns:
|
|
list with indices of the selected boxes
|
|
"""
|
|
|
|
# if there are no boxes, return the empty list
|
|
if len(boxes) == 0:
|
|
return []
|
|
|
|
# list of picked indices
|
|
pick = []
|
|
|
|
# grab the coordinates of the bounding boxes
|
|
x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]
|
|
|
|
area = (x2 - x1 + 1.0) * (y2 - y1 + 1.0)
|
|
ids = np.argsort(score) # in increasing order
|
|
|
|
while len(ids) > 0:
|
|
|
|
# grab index of the largest value
|
|
last = len(ids) - 1
|
|
i = ids[last]
|
|
pick.append(i)
|
|
|
|
# compute intersections
|
|
# of the box with the largest score
|
|
# with the rest of boxes
|
|
|
|
# left top corner of intersection boxes
|
|
ix1 = np.maximum(x1[i], x1[ids[:last]])
|
|
iy1 = np.maximum(y1[i], y1[ids[:last]])
|
|
|
|
# right bottom corner of intersection boxes
|
|
ix2 = np.minimum(x2[i], x2[ids[:last]])
|
|
iy2 = np.minimum(y2[i], y2[ids[:last]])
|
|
|
|
# width and height of intersection boxes
|
|
w = np.maximum(0.0, ix2 - ix1 + 1.0)
|
|
h = np.maximum(0.0, iy2 - iy1 + 1.0)
|
|
|
|
# intersections' areas
|
|
inter = w * h
|
|
if mode == "min":
|
|
overlap = inter / np.minimum(area[i], area[ids[:last]])
|
|
elif mode == "union":
|
|
# intersection over union (IoU)
|
|
overlap = inter / (area[i] + area[ids[:last]] - inter)
|
|
|
|
# delete all boxes where overlap is too big
|
|
ids = np.delete(
|
|
ids, np.concatenate([[last], np.where(overlap > overlap_threshold)[0]])
|
|
)
|
|
|
|
return pick
|
|
|
|
|
|
def convert_to_square(bboxes):
|
|
"""Convert bounding boxes to a square form.
|
|
|
|
Arguments:
|
|
bboxes: a float numpy array of shape [n, 5].
|
|
|
|
Returns:
|
|
a float numpy array of shape [n, 5],
|
|
squared bounding boxes.
|
|
"""
|
|
|
|
square_bboxes = np.zeros_like(bboxes)
|
|
x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
|
|
h = y2 - y1 + 1.0
|
|
w = x2 - x1 + 1.0
|
|
max_side = np.maximum(h, w)
|
|
square_bboxes[:, 0] = x1 + w * 0.5 - max_side * 0.5
|
|
square_bboxes[:, 1] = y1 + h * 0.5 - max_side * 0.5
|
|
square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0
|
|
square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0
|
|
return square_bboxes
|
|
|
|
|
|
def calibrate_box(bboxes, offsets):
|
|
"""Transform bounding boxes to be more like true bounding boxes.
|
|
'offsets' is one of the outputs of the nets.
|
|
|
|
Arguments:
|
|
bboxes: a float numpy array of shape [n, 5].
|
|
offsets: a float numpy array of shape [n, 4].
|
|
|
|
Returns:
|
|
a float numpy array of shape [n, 5].
|
|
"""
|
|
x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
|
|
w = x2 - x1 + 1.0
|
|
h = y2 - y1 + 1.0
|
|
w = np.expand_dims(w, 1)
|
|
h = np.expand_dims(h, 1)
|
|
|
|
# this is what happening here:
|
|
# tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)]
|
|
# x1_true = x1 + tx1*w
|
|
# y1_true = y1 + ty1*h
|
|
# x2_true = x2 + tx2*w
|
|
# y2_true = y2 + ty2*h
|
|
# below is just more compact form of this
|
|
|
|
# are offsets always such that
|
|
# x1 < x2 and y1 < y2 ?
|
|
|
|
translation = np.hstack([w, h, w, h]) * offsets
|
|
bboxes[:, 0:4] = bboxes[:, 0:4] + translation
|
|
return bboxes
|
|
|
|
|
|
def get_image_boxes(bounding_boxes, img, size=24):
|
|
"""Cut out boxes from the image.
|
|
|
|
Arguments:
|
|
bounding_boxes: a float numpy array of shape [n, 5].
|
|
img: an instance of PIL.Image.
|
|
size: an integer, size of cutouts.
|
|
|
|
Returns:
|
|
a float numpy array of shape [n, 3, size, size].
|
|
"""
|
|
|
|
num_boxes = len(bounding_boxes)
|
|
width, height = img.size
|
|
|
|
[dy, edy, dx, edx, y, ey, x, ex, w, h] = correct_bboxes(
|
|
bounding_boxes, width, height
|
|
)
|
|
img_boxes = np.zeros((num_boxes, 3, size, size), "float32")
|
|
|
|
for i in range(num_boxes):
|
|
img_box = np.zeros((h[i], w[i], 3), "uint8")
|
|
|
|
img_array = np.asarray(img, "uint8")
|
|
img_box[dy[i] : (edy[i] + 1), dx[i] : (edx[i] + 1), :] = img_array[
|
|
y[i] : (ey[i] + 1), x[i] : (ex[i] + 1), :
|
|
]
|
|
|
|
# resize
|
|
img_box = Image.fromarray(img_box)
|
|
img_box = img_box.resize((size, size), Image.BILINEAR)
|
|
img_box = np.asarray(img_box, "float32")
|
|
|
|
img_boxes[i, :, :, :] = preprocess(img_box)
|
|
|
|
return img_boxes
|
|
|
|
|
|
def correct_bboxes(bboxes, width, height):
|
|
"""Crop boxes that are too big and get coordinates
|
|
with respect to cutouts.
|
|
|
|
Arguments:
|
|
bboxes: a float numpy array of shape [n, 5],
|
|
where each row is (xmin, ymin, xmax, ymax, score).
|
|
width: a float number.
|
|
height: a float number.
|
|
|
|
Returns:
|
|
dy, dx, edy, edx: a int numpy arrays of shape [n],
|
|
coordinates of the boxes with respect to the cutouts.
|
|
y, x, ey, ex: a int numpy arrays of shape [n],
|
|
corrected ymin, xmin, ymax, xmax.
|
|
h, w: a int numpy arrays of shape [n],
|
|
just heights and widths of boxes.
|
|
|
|
in the following order:
|
|
[dy, edy, dx, edx, y, ey, x, ex, w, h].
|
|
"""
|
|
|
|
x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
|
|
w, h = x2 - x1 + 1.0, y2 - y1 + 1.0
|
|
num_boxes = bboxes.shape[0]
|
|
|
|
# 'e' stands for end
|
|
# (x, y) -> (ex, ey)
|
|
x, y, ex, ey = x1, y1, x2, y2
|
|
|
|
# we need to cut out a box from the image.
|
|
# (x, y, ex, ey) are corrected coordinates of the box
|
|
# in the image.
|
|
# (dx, dy, edx, edy) are coordinates of the box in the cutout
|
|
# from the image.
|
|
dx, dy = np.zeros((num_boxes,)), np.zeros((num_boxes,))
|
|
edx, edy = w.copy() - 1.0, h.copy() - 1.0
|
|
|
|
# if box's bottom right corner is too far right
|
|
ind = np.where(ex > width - 1.0)[0]
|
|
edx[ind] = w[ind] + width - 2.0 - ex[ind]
|
|
ex[ind] = width - 1.0
|
|
|
|
# if box's bottom right corner is too low
|
|
ind = np.where(ey > height - 1.0)[0]
|
|
edy[ind] = h[ind] + height - 2.0 - ey[ind]
|
|
ey[ind] = height - 1.0
|
|
|
|
# if box's top left corner is too far left
|
|
ind = np.where(x < 0.0)[0]
|
|
dx[ind] = 0.0 - x[ind]
|
|
x[ind] = 0.0
|
|
|
|
# if box's top left corner is too high
|
|
ind = np.where(y < 0.0)[0]
|
|
dy[ind] = 0.0 - y[ind]
|
|
y[ind] = 0.0
|
|
|
|
return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h]
|
|
return_list = [i.astype("int32") for i in return_list]
|
|
|
|
return return_list
|
|
|
|
|
|
def preprocess(img):
|
|
"""Preprocessing step before feeding the network.
|
|
|
|
Arguments:
|
|
img: a float numpy array of shape [h, w, c].
|
|
|
|
Returns:
|
|
a float numpy array of shape [1, c, h, w].
|
|
"""
|
|
# if len(img.shape) == 2:
|
|
# img = np.array([img, img, img])
|
|
# else:
|
|
img = img.transpose((2, 0, 1))
|
|
img = np.expand_dims(img, 0)
|
|
img = (img - 127.5) * 0.0078125
|
|
return img
|
|
|
|
|
|
def generate_bboxes(probs, offsets, scale, threshold):
|
|
"""Generate bounding boxes at places
|
|
where there is probably a face.
|
|
|
|
Arguments:
|
|
probs: a float numpy array of shape [n, m].
|
|
offsets: a float numpy array of shape [1, 4, n, m].
|
|
scale: a float number,
|
|
width and height of the image were scaled by this number.
|
|
threshold: a float number.
|
|
|
|
Returns:
|
|
a float numpy array of shape [n_boxes, 9]
|
|
"""
|
|
|
|
# applying P-Net is equivalent, in some sense, to
|
|
# moving 12x12 window with stride 2
|
|
stride = 2
|
|
cell_size = 12
|
|
|
|
# indices of boxes where there is probably a face
|
|
inds = np.where(probs > threshold)
|
|
|
|
if inds[0].size == 0:
|
|
return np.array([])
|
|
|
|
# transformations of bounding boxes
|
|
tx1, ty1, tx2, ty2 = [offsets[0, i, inds[0], inds[1]] for i in range(4)]
|
|
# they are defined as:
|
|
# w = x2 - x1 + 1
|
|
# h = y2 - y1 + 1
|
|
# x1_true = x1 + tx1*w
|
|
# x2_true = x2 + tx2*w
|
|
# y1_true = y1 + ty1*h
|
|
# y2_true = y2 + ty2*h
|
|
|
|
offsets = np.array([tx1, ty1, tx2, ty2])
|
|
score = probs[inds[0], inds[1]]
|
|
|
|
# P-Net is applied to scaled images
|
|
# so we need to rescale bounding boxes back
|
|
bounding_boxes = np.vstack(
|
|
[
|
|
np.round((stride * inds[1] + 1.0) / scale),
|
|
np.round((stride * inds[0] + 1.0) / scale),
|
|
np.round((stride * inds[1] + 1.0 + cell_size) / scale),
|
|
np.round((stride * inds[0] + 1.0 + cell_size) / scale),
|
|
score,
|
|
offsets,
|
|
]
|
|
)
|
|
# why one is added?
|
|
|
|
return bounding_boxes.T
|