mirror of https://github.com/RE-OWOD/RE-OWOD
Add files via upload
parent
63ebbb7dcc
commit
665719112e
Binary file not shown.
|
@ -0,0 +1,10 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
|
||||
from .utils.env import setup_environment
|
||||
|
||||
setup_environment()
|
||||
|
||||
|
||||
# This line will be programatically read/write by setup.py.
|
||||
# Leave them at the bottom of this file and don't touch them.
|
||||
__version__ = "0.2.1"
|
|
@ -0,0 +1,13 @@
|
|||
|
||||
This directory contains code to prepare a detectron2 model for deployment.
|
||||
Currently it supports exporting a detectron2 model to Caffe2 format through ONNX.
|
||||
|
||||
Please see [documentation](https://detectron2.readthedocs.io/tutorials/deployment.html) for its usage.
|
||||
|
||||
|
||||
### Acknowledgements
|
||||
|
||||
Thanks to Mobile Vision team at Facebook for developing the Caffe2 conversion tools.
|
||||
|
||||
Thanks to Computing Platform Department - PAI team at Alibaba Group (@bddpqq, @chenbohua3) who
|
||||
help export Detectron2 models to TorchScript.
|
|
@ -0,0 +1,5 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from .api import *
|
||||
|
||||
__all__ = [k for k in globals().keys() if not k.startswith("_")]
|
|
@ -0,0 +1,288 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
import copy
|
||||
import logging
|
||||
import os
|
||||
import torch
|
||||
from caffe2.proto import caffe2_pb2
|
||||
from torch import nn
|
||||
|
||||
from detectron2.config import CfgNode as CN
|
||||
|
||||
from .caffe2_inference import ProtobufDetectionModel
|
||||
from .caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP, convert_batched_inputs_to_c2_format
|
||||
from .shared import get_pb_arg_vali, get_pb_arg_vals, save_graph
|
||||
|
||||
__all__ = [
|
||||
"add_export_config",
|
||||
"export_caffe2_model",
|
||||
"Caffe2Model",
|
||||
"export_onnx_model",
|
||||
"Caffe2Tracer",
|
||||
]
|
||||
|
||||
|
||||
def add_export_config(cfg):
|
||||
"""
|
||||
Args:
|
||||
cfg (CfgNode): a detectron2 config
|
||||
|
||||
Returns:
|
||||
CfgNode: an updated config with new options that will be used
|
||||
by :class:`Caffe2Tracer`.
|
||||
"""
|
||||
is_frozen = cfg.is_frozen()
|
||||
cfg.defrost()
|
||||
cfg.EXPORT_CAFFE2 = CN()
|
||||
cfg.EXPORT_CAFFE2.USE_HEATMAP_MAX_KEYPOINT = False
|
||||
if is_frozen:
|
||||
cfg.freeze()
|
||||
return cfg
|
||||
|
||||
|
||||
class Caffe2Tracer:
|
||||
"""
|
||||
Make a detectron2 model traceable with caffe2 style.
|
||||
|
||||
An original detectron2 model may not be traceable, or
|
||||
cannot be deployed directly after being traced, due to some reasons:
|
||||
|
||||
1. control flow in some ops
|
||||
2. custom ops
|
||||
3. complicated pre/post processing
|
||||
|
||||
This class provides a traceable version of a detectron2 model by:
|
||||
|
||||
1. Rewrite parts of the model using ops in caffe2. Note that some ops do
|
||||
not have GPU implementation.
|
||||
2. Define the inputs "after pre-processing" as inputs to the model
|
||||
3. Remove post-processing and produce raw layer outputs
|
||||
|
||||
More specifically about inputs: all builtin models take two input tensors.
|
||||
|
||||
1. NCHW float "data" which is an image (usually in [0, 255])
|
||||
2. Nx3 float "im_info", each row of which is (height, width, 1.0)
|
||||
|
||||
After making a traceable model, the class provide methods to export such a
|
||||
model to different deployment formats.
|
||||
|
||||
The class currently only supports models using builtin meta architectures.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg, model, inputs):
|
||||
"""
|
||||
Args:
|
||||
cfg (CfgNode): a detectron2 config, with extra export-related options
|
||||
added by :func:`add_export_config`.
|
||||
model (nn.Module): a model built by
|
||||
:func:`detectron2.modeling.build_model`. Weights have to be already
|
||||
loaded to this model.
|
||||
inputs: sample inputs that the given model takes for inference.
|
||||
Will be used to trace the model. Random input with no detected objects
|
||||
will not work if the model has data-dependent control flow (e.g., R-CNN).
|
||||
"""
|
||||
assert isinstance(cfg, CN), cfg
|
||||
assert isinstance(model, torch.nn.Module), type(model)
|
||||
if "EXPORT_CAFFE2" not in cfg:
|
||||
cfg = add_export_config(cfg) # will just the defaults
|
||||
|
||||
self.cfg = cfg
|
||||
self.model = model
|
||||
self.inputs = inputs
|
||||
|
||||
def _get_traceable(self):
|
||||
# TODO how to make it extensible to support custom models
|
||||
C2MetaArch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[self.cfg.MODEL.META_ARCHITECTURE]
|
||||
traceable_model = C2MetaArch(self.cfg, copy.deepcopy(self.model))
|
||||
traceable_inputs = traceable_model.get_caffe2_inputs(self.inputs)
|
||||
return traceable_model, traceable_inputs
|
||||
|
||||
def export_caffe2(self):
|
||||
"""
|
||||
Export the model to Caffe2's protobuf format.
|
||||
The returned object can be saved with ``.save_protobuf()`` method.
|
||||
The result can be loaded and executed using Caffe2 runtime.
|
||||
|
||||
Returns:
|
||||
Caffe2Model
|
||||
"""
|
||||
from .caffe2_export import export_caffe2_detection_model
|
||||
|
||||
model, inputs = self._get_traceable()
|
||||
predict_net, init_net = export_caffe2_detection_model(model, inputs)
|
||||
return Caffe2Model(predict_net, init_net)
|
||||
|
||||
def export_onnx(self):
|
||||
"""
|
||||
Export the model to ONNX format.
|
||||
Note that the exported model contains custom ops only available in caffe2, therefore it
|
||||
cannot be directly executed by other runtime. Post-processing or transformation passes
|
||||
may be applied on the model to accommodate different runtimes, but we currently do not
|
||||
provide support for them.
|
||||
|
||||
Returns:
|
||||
onnx.ModelProto: an onnx model.
|
||||
"""
|
||||
from .caffe2_export import export_onnx_model as export_onnx_model_impl
|
||||
|
||||
model, inputs = self._get_traceable()
|
||||
return export_onnx_model_impl(model, (inputs,))
|
||||
|
||||
def export_torchscript(self):
|
||||
"""
|
||||
Export the model to a ``torch.jit.TracedModule`` by tracing.
|
||||
The returned object can be saved to a file by ``.save()``.
|
||||
|
||||
Returns:
|
||||
torch.jit.TracedModule: a torch TracedModule
|
||||
"""
|
||||
model, inputs = self._get_traceable()
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info("Tracing the model with torch.jit.trace ...")
|
||||
with torch.no_grad():
|
||||
return torch.jit.trace(model, (inputs,), optimize=True)
|
||||
|
||||
|
||||
def export_caffe2_model(cfg, model, inputs):
|
||||
"""
|
||||
Export a detectron2 model to caffe2 format.
|
||||
|
||||
Args:
|
||||
cfg (CfgNode): a detectron2 config, with extra export-related options
|
||||
added by :func:`add_export_config`.
|
||||
model (nn.Module): a model built by
|
||||
:func:`detectron2.modeling.build_model`.
|
||||
It will be modified by this function.
|
||||
inputs: sample inputs that the given model takes for inference.
|
||||
Will be used to trace the model.
|
||||
|
||||
Returns:
|
||||
Caffe2Model
|
||||
"""
|
||||
return Caffe2Tracer(cfg, model, inputs).export_caffe2()
|
||||
|
||||
|
||||
def export_onnx_model(cfg, model, inputs):
|
||||
"""
|
||||
Export a detectron2 model to ONNX format.
|
||||
Note that the exported model contains custom ops only available in caffe2, therefore it
|
||||
cannot be directly executed by other runtime. Post-processing or transformation passes
|
||||
may be applied on the model to accommodate different runtimes, but we currently do not
|
||||
provide support for them.
|
||||
|
||||
Args:
|
||||
cfg (CfgNode): a detectron2 config, with extra export-related options
|
||||
added by :func:`add_export_config`.
|
||||
model (nn.Module): a model built by
|
||||
:func:`detectron2.modeling.build_model`.
|
||||
It will be modified by this function.
|
||||
inputs: sample inputs that the given model takes for inference.
|
||||
Will be used to trace the model.
|
||||
Returns:
|
||||
onnx.ModelProto: an onnx model.
|
||||
"""
|
||||
return Caffe2Tracer(cfg, model, inputs).export_onnx()
|
||||
|
||||
|
||||
class Caffe2Model(nn.Module):
|
||||
"""
|
||||
A wrapper around the traced model in caffe2's pb format.
|
||||
"""
|
||||
|
||||
def __init__(self, predict_net, init_net):
|
||||
super().__init__()
|
||||
self.eval() # always in eval mode
|
||||
self._predict_net = predict_net
|
||||
self._init_net = init_net
|
||||
self._predictor = None
|
||||
|
||||
@property
|
||||
def predict_net(self):
|
||||
"""
|
||||
Returns:
|
||||
core.Net: the underlying caffe2 predict net
|
||||
"""
|
||||
return self._predict_net
|
||||
|
||||
@property
|
||||
def init_net(self):
|
||||
"""
|
||||
Returns:
|
||||
core.Net: the underlying caffe2 init net
|
||||
"""
|
||||
return self._init_net
|
||||
|
||||
__init__.__HIDE_SPHINX_DOC__ = True
|
||||
|
||||
def save_protobuf(self, output_dir):
|
||||
"""
|
||||
Save the model as caffe2's protobuf format.
|
||||
|
||||
Args:
|
||||
output_dir (str): the output directory to save protobuf files.
|
||||
"""
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info("Saving model to {} ...".format(output_dir))
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
with open(os.path.join(output_dir, "model.pb"), "wb") as f:
|
||||
f.write(self._predict_net.SerializeToString())
|
||||
with open(os.path.join(output_dir, "model.pbtxt"), "w") as f:
|
||||
f.write(str(self._predict_net))
|
||||
with open(os.path.join(output_dir, "model_init.pb"), "wb") as f:
|
||||
f.write(self._init_net.SerializeToString())
|
||||
|
||||
def save_graph(self, output_file, inputs=None):
|
||||
"""
|
||||
Save the graph as SVG format.
|
||||
|
||||
Args:
|
||||
output_file (str): a SVG file
|
||||
inputs: optional inputs given to the model.
|
||||
If given, the inputs will be used to run the graph to record
|
||||
shape of every tensor. The shape information will be
|
||||
saved together with the graph.
|
||||
"""
|
||||
from .caffe2_export import run_and_save_graph
|
||||
|
||||
if inputs is None:
|
||||
save_graph(self._predict_net, output_file, op_only=False)
|
||||
else:
|
||||
size_divisibility = get_pb_arg_vali(self._predict_net, "size_divisibility", 0)
|
||||
device = get_pb_arg_vals(self._predict_net, "device", b"cpu").decode("ascii")
|
||||
inputs = convert_batched_inputs_to_c2_format(inputs, size_divisibility, device)
|
||||
inputs = [x.cpu().numpy() for x in inputs]
|
||||
run_and_save_graph(self._predict_net, self._init_net, inputs, output_file)
|
||||
|
||||
@staticmethod
|
||||
def load_protobuf(dir):
|
||||
"""
|
||||
Args:
|
||||
dir (str): a directory used to save Caffe2Model with
|
||||
:meth:`save_protobuf`.
|
||||
The files "model.pb" and "model_init.pb" are needed.
|
||||
|
||||
Returns:
|
||||
Caffe2Model: the caffe2 model loaded from this directory.
|
||||
"""
|
||||
predict_net = caffe2_pb2.NetDef()
|
||||
with open(os.path.join(dir, "model.pb"), "rb") as f:
|
||||
predict_net.ParseFromString(f.read())
|
||||
|
||||
init_net = caffe2_pb2.NetDef()
|
||||
with open(os.path.join(dir, "model_init.pb"), "rb") as f:
|
||||
init_net.ParseFromString(f.read())
|
||||
|
||||
return Caffe2Model(predict_net, init_net)
|
||||
|
||||
def __call__(self, inputs):
|
||||
"""
|
||||
An interface that wraps around a caffe2 model and mimics detectron2's models'
|
||||
input & output format. This is used to compare the outputs of caffe2 model
|
||||
with its original torch model.
|
||||
|
||||
Due to the extra conversion between torch/caffe2,
|
||||
this method is not meant for benchmark.
|
||||
"""
|
||||
if self._predictor is None:
|
||||
self._predictor = ProtobufDetectionModel(self._predict_net, self._init_net)
|
||||
return self._predictor(inputs)
|
|
@ -0,0 +1,503 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
|
||||
import math
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
from detectron2.layers import cat
|
||||
from detectron2.layers.roi_align_rotated import ROIAlignRotated
|
||||
from detectron2.modeling import poolers
|
||||
from detectron2.modeling.proposal_generator import rpn
|
||||
from detectron2.modeling.roi_heads.mask_head import mask_rcnn_inference
|
||||
from detectron2.structures import Boxes, ImageList, Instances, Keypoints
|
||||
|
||||
from .shared import alias, to_device
|
||||
|
||||
|
||||
"""
|
||||
This file contains caffe2-compatible implementation of several detectrno2 components.
|
||||
"""
|
||||
|
||||
|
||||
class Caffe2Boxes(Boxes):
|
||||
"""
|
||||
Representing a list of detectron2.structures.Boxes from minibatch, each box
|
||||
is represented by a 5d vector (batch index + 4 coordinates), or a 6d vector
|
||||
(batch index + 5 coordinates) for RotatedBoxes.
|
||||
"""
|
||||
|
||||
def __init__(self, tensor):
|
||||
assert isinstance(tensor, torch.Tensor)
|
||||
assert tensor.dim() == 2 and tensor.size(-1) in [4, 5, 6], tensor.size()
|
||||
# TODO: make tensor immutable when dim is Nx5 for Boxes,
|
||||
# and Nx6 for RotatedBoxes?
|
||||
self.tensor = tensor
|
||||
|
||||
|
||||
# TODO clean up this class, maybe just extend Instances
|
||||
class InstancesList(object):
|
||||
"""
|
||||
Tensor representation of a list of Instances object for a batch of images.
|
||||
|
||||
When dealing with a batch of images with Caffe2 ops, a list of bboxes
|
||||
(instances) are usually represented by single Tensor with size
|
||||
(sigma(Ni), 5) or (sigma(Ni), 4) plus a batch split Tensor. This class is
|
||||
for providing common functions to convert between these two representations.
|
||||
"""
|
||||
|
||||
def __init__(self, im_info, indices, extra_fields=None):
|
||||
# [N, 3] -> (H, W, Scale)
|
||||
self.im_info = im_info
|
||||
# [N,] -> indice of batch to which the instance belongs
|
||||
self.indices = indices
|
||||
# [N, ...]
|
||||
self.batch_extra_fields = extra_fields or {}
|
||||
|
||||
self.image_size = self.im_info
|
||||
|
||||
def get_fields(self):
|
||||
""" like `get_fields` in the Instances object,
|
||||
but return each field in tensor representations """
|
||||
ret = {}
|
||||
for k, v in self.batch_extra_fields.items():
|
||||
# if isinstance(v, torch.Tensor):
|
||||
# tensor_rep = v
|
||||
# elif isinstance(v, (Boxes, Keypoints)):
|
||||
# tensor_rep = v.tensor
|
||||
# else:
|
||||
# raise ValueError("Can't find tensor representation for: {}".format())
|
||||
ret[k] = v
|
||||
return ret
|
||||
|
||||
def has(self, name):
|
||||
return name in self.batch_extra_fields
|
||||
|
||||
def set(self, name, value):
|
||||
data_len = len(value)
|
||||
if len(self.batch_extra_fields):
|
||||
assert (
|
||||
len(self) == data_len
|
||||
), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self))
|
||||
self.batch_extra_fields[name] = value
|
||||
|
||||
def __setattr__(self, name, val):
|
||||
if name in ["im_info", "indices", "batch_extra_fields", "image_size"]:
|
||||
super().__setattr__(name, val)
|
||||
else:
|
||||
self.set(name, val)
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name not in self.batch_extra_fields:
|
||||
raise AttributeError("Cannot find field '{}' in the given Instances!".format(name))
|
||||
return self.batch_extra_fields[name]
|
||||
|
||||
def __len__(self):
|
||||
return len(self.indices)
|
||||
|
||||
def flatten(self):
|
||||
ret = []
|
||||
for _, v in self.batch_extra_fields.items():
|
||||
if isinstance(v, (Boxes, Keypoints)):
|
||||
ret.append(v.tensor)
|
||||
else:
|
||||
ret.append(v)
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def to_d2_instances_list(instances_list):
|
||||
"""
|
||||
Convert InstancesList to List[Instances]. The input `instances_list` can
|
||||
also be a List[Instances], in this case this method is a non-op.
|
||||
"""
|
||||
if not isinstance(instances_list, InstancesList):
|
||||
assert all(isinstance(x, Instances) for x in instances_list)
|
||||
return instances_list
|
||||
|
||||
ret = []
|
||||
for i, info in enumerate(instances_list.im_info):
|
||||
instances = Instances(torch.Size([int(info[0].item()), int(info[1].item())]))
|
||||
|
||||
ids = instances_list.indices == i
|
||||
for k, v in instances_list.batch_extra_fields.items():
|
||||
if isinstance(v, torch.Tensor):
|
||||
instances.set(k, v[ids])
|
||||
continue
|
||||
elif isinstance(v, Boxes):
|
||||
instances.set(k, v[ids, -4:])
|
||||
continue
|
||||
|
||||
target_type, tensor_source = v
|
||||
assert isinstance(tensor_source, torch.Tensor)
|
||||
assert tensor_source.shape[0] == instances_list.indices.shape[0]
|
||||
tensor_source = tensor_source[ids]
|
||||
|
||||
if issubclass(target_type, Boxes):
|
||||
instances.set(k, Boxes(tensor_source[:, -4:]))
|
||||
elif issubclass(target_type, Keypoints):
|
||||
instances.set(k, Keypoints(tensor_source))
|
||||
elif issubclass(target_type, torch.Tensor):
|
||||
instances.set(k, tensor_source)
|
||||
else:
|
||||
raise ValueError("Can't handle targe type: {}".format(target_type))
|
||||
|
||||
ret.append(instances)
|
||||
return ret
|
||||
|
||||
|
||||
class Caffe2Compatible(object):
|
||||
def _get_tensor_mode(self):
|
||||
return self._tensor_mode
|
||||
|
||||
def _set_tensor_mode(self, v):
|
||||
self._tensor_mode = v
|
||||
|
||||
tensor_mode = property(_get_tensor_mode, _set_tensor_mode)
|
||||
"""
|
||||
If true, the model expects C2-style tensor only inputs/outputs format.
|
||||
"""
|
||||
|
||||
|
||||
class Caffe2RPN(Caffe2Compatible, rpn.RPN):
|
||||
def forward(self, images, features, gt_instances=None):
|
||||
assert not self.training
|
||||
|
||||
features = [features[f] for f in self.in_features]
|
||||
objectness_logits_pred, anchor_deltas_pred = self.rpn_head(features)
|
||||
|
||||
assert isinstance(images, ImageList)
|
||||
if self.tensor_mode:
|
||||
im_info = images.image_sizes
|
||||
else:
|
||||
im_info = torch.Tensor(
|
||||
[[im_sz[0], im_sz[1], torch.Tensor([1.0])] for im_sz in images.image_sizes]
|
||||
).to(images.tensor.device)
|
||||
assert isinstance(im_info, torch.Tensor)
|
||||
|
||||
rpn_rois_list = []
|
||||
rpn_roi_probs_list = []
|
||||
for scores, bbox_deltas, cell_anchors_tensor, feat_stride in zip(
|
||||
objectness_logits_pred,
|
||||
anchor_deltas_pred,
|
||||
iter(self.anchor_generator.cell_anchors),
|
||||
self.anchor_generator.strides,
|
||||
):
|
||||
scores = scores.detach()
|
||||
bbox_deltas = bbox_deltas.detach()
|
||||
|
||||
rpn_rois, rpn_roi_probs = torch.ops._caffe2.GenerateProposals(
|
||||
scores,
|
||||
bbox_deltas,
|
||||
im_info,
|
||||
cell_anchors_tensor,
|
||||
spatial_scale=1.0 / feat_stride,
|
||||
pre_nms_topN=self.pre_nms_topk[self.training],
|
||||
post_nms_topN=self.post_nms_topk[self.training],
|
||||
nms_thresh=self.nms_thresh,
|
||||
min_size=self.min_box_size,
|
||||
# correct_transform_coords=True, # deprecated argument
|
||||
angle_bound_on=True, # Default
|
||||
angle_bound_lo=-180,
|
||||
angle_bound_hi=180,
|
||||
clip_angle_thresh=1.0, # Default
|
||||
legacy_plus_one=False,
|
||||
)
|
||||
rpn_rois_list.append(rpn_rois)
|
||||
rpn_roi_probs_list.append(rpn_roi_probs)
|
||||
|
||||
# For FPN in D2, in RPN all proposals from different levels are concated
|
||||
# together, ranked and picked by top post_nms_topk. Then in ROIPooler
|
||||
# it calculates level_assignments and calls the RoIAlign from
|
||||
# the corresponding level.
|
||||
|
||||
if len(objectness_logits_pred) == 1:
|
||||
rpn_rois = rpn_rois_list[0]
|
||||
rpn_roi_probs = rpn_roi_probs_list[0]
|
||||
else:
|
||||
assert len(rpn_rois_list) == len(rpn_roi_probs_list)
|
||||
rpn_post_nms_topN = self.post_nms_topk[self.training]
|
||||
|
||||
device = rpn_rois_list[0].device
|
||||
input_list = [to_device(x, "cpu") for x in (rpn_rois_list + rpn_roi_probs_list)]
|
||||
|
||||
# TODO remove this after confirming rpn_max_level/rpn_min_level
|
||||
# is not needed in CollectRpnProposals.
|
||||
feature_strides = list(self.anchor_generator.strides)
|
||||
rpn_min_level = int(math.log2(feature_strides[0]))
|
||||
rpn_max_level = int(math.log2(feature_strides[-1]))
|
||||
assert (rpn_max_level - rpn_min_level + 1) == len(
|
||||
rpn_rois_list
|
||||
), "CollectRpnProposals requires continuous levels"
|
||||
|
||||
rpn_rois = torch.ops._caffe2.CollectRpnProposals(
|
||||
input_list,
|
||||
# NOTE: in current implementation, rpn_max_level and rpn_min_level
|
||||
# are not needed, only the subtraction of two matters and it
|
||||
# can be infer from the number of inputs. Keep them now for
|
||||
# consistency.
|
||||
rpn_max_level=2 + len(rpn_rois_list) - 1,
|
||||
rpn_min_level=2,
|
||||
rpn_post_nms_topN=rpn_post_nms_topN,
|
||||
)
|
||||
rpn_rois = to_device(rpn_rois, device)
|
||||
rpn_roi_probs = []
|
||||
|
||||
proposals = self.c2_postprocess(im_info, rpn_rois, rpn_roi_probs, self.tensor_mode)
|
||||
return proposals, {}
|
||||
|
||||
@staticmethod
|
||||
def c2_postprocess(im_info, rpn_rois, rpn_roi_probs, tensor_mode):
|
||||
proposals = InstancesList(
|
||||
im_info=im_info,
|
||||
indices=rpn_rois[:, 0],
|
||||
extra_fields={
|
||||
"proposal_boxes": Caffe2Boxes(rpn_rois),
|
||||
"objectness_logits": (torch.Tensor, rpn_roi_probs),
|
||||
},
|
||||
)
|
||||
if not tensor_mode:
|
||||
proposals = InstancesList.to_d2_instances_list(proposals)
|
||||
else:
|
||||
proposals = [proposals]
|
||||
return proposals
|
||||
|
||||
|
||||
class Caffe2ROIPooler(Caffe2Compatible, poolers.ROIPooler):
|
||||
@staticmethod
|
||||
def c2_preprocess(box_lists):
|
||||
assert all(isinstance(x, Boxes) for x in box_lists)
|
||||
if all(isinstance(x, Caffe2Boxes) for x in box_lists):
|
||||
# input is pure-tensor based
|
||||
assert len(box_lists) == 1
|
||||
pooler_fmt_boxes = box_lists[0].tensor
|
||||
else:
|
||||
pooler_fmt_boxes = poolers.convert_boxes_to_pooler_format(box_lists)
|
||||
return pooler_fmt_boxes
|
||||
|
||||
def forward(self, x, box_lists):
|
||||
assert not self.training
|
||||
|
||||
pooler_fmt_boxes = self.c2_preprocess(box_lists)
|
||||
num_level_assignments = len(self.level_poolers)
|
||||
|
||||
if num_level_assignments == 1:
|
||||
if isinstance(self.level_poolers[0], ROIAlignRotated):
|
||||
c2_roi_align = torch.ops._caffe2.RoIAlignRotated
|
||||
aligned = True
|
||||
else:
|
||||
c2_roi_align = torch.ops._caffe2.RoIAlign
|
||||
aligned = self.level_poolers[0].aligned
|
||||
|
||||
out = c2_roi_align(
|
||||
x[0],
|
||||
pooler_fmt_boxes,
|
||||
order="NCHW",
|
||||
spatial_scale=float(self.level_poolers[0].spatial_scale),
|
||||
pooled_h=int(self.output_size[0]),
|
||||
pooled_w=int(self.output_size[1]),
|
||||
sampling_ratio=int(self.level_poolers[0].sampling_ratio),
|
||||
aligned=aligned,
|
||||
)
|
||||
return out
|
||||
|
||||
device = pooler_fmt_boxes.device
|
||||
assert (
|
||||
self.max_level - self.min_level + 1 == 4
|
||||
), "Currently DistributeFpnProposals only support 4 levels"
|
||||
fpn_outputs = torch.ops._caffe2.DistributeFpnProposals(
|
||||
to_device(pooler_fmt_boxes, "cpu"),
|
||||
roi_canonical_scale=self.canonical_box_size,
|
||||
roi_canonical_level=self.canonical_level,
|
||||
roi_max_level=self.max_level,
|
||||
roi_min_level=self.min_level,
|
||||
legacy_plus_one=False,
|
||||
)
|
||||
fpn_outputs = [to_device(x, device) for x in fpn_outputs]
|
||||
|
||||
rois_fpn_list = fpn_outputs[:-1]
|
||||
rois_idx_restore_int32 = fpn_outputs[-1]
|
||||
|
||||
roi_feat_fpn_list = []
|
||||
for roi_fpn, x_level, pooler in zip(rois_fpn_list, x, self.level_poolers):
|
||||
if isinstance(pooler, ROIAlignRotated):
|
||||
c2_roi_align = torch.ops._caffe2.RoIAlignRotated
|
||||
aligned = True
|
||||
else:
|
||||
c2_roi_align = torch.ops._caffe2.RoIAlign
|
||||
aligned = bool(pooler.aligned)
|
||||
|
||||
roi_feat_fpn = c2_roi_align(
|
||||
x_level,
|
||||
roi_fpn,
|
||||
order="NCHW",
|
||||
spatial_scale=float(pooler.spatial_scale),
|
||||
pooled_h=int(self.output_size[0]),
|
||||
pooled_w=int(self.output_size[1]),
|
||||
sampling_ratio=int(pooler.sampling_ratio),
|
||||
aligned=aligned,
|
||||
)
|
||||
roi_feat_fpn_list.append(roi_feat_fpn)
|
||||
|
||||
roi_feat_shuffled = cat(roi_feat_fpn_list, dim=0)
|
||||
roi_feat = torch.ops._caffe2.BatchPermutation(roi_feat_shuffled, rois_idx_restore_int32)
|
||||
return roi_feat
|
||||
|
||||
|
||||
class Caffe2FastRCNNOutputsInference:
|
||||
def __init__(self, tensor_mode):
|
||||
self.tensor_mode = tensor_mode # whether the output is caffe2 tensor mode
|
||||
|
||||
def __call__(self, box_predictor, predictions, proposals):
|
||||
""" equivalent to FastRCNNOutputLayers.inference """
|
||||
score_thresh = box_predictor.test_score_thresh
|
||||
nms_thresh = box_predictor.test_nms_thresh
|
||||
topk_per_image = box_predictor.test_topk_per_image
|
||||
is_rotated = len(box_predictor.box2box_transform.weights) == 5
|
||||
|
||||
if is_rotated:
|
||||
box_dim = 5
|
||||
assert box_predictor.box2box_transform.weights[4] == 1, (
|
||||
"The weights for Rotated BBoxTransform in C2 have only 4 dimensions,"
|
||||
+ " thus enforcing the angle weight to be 1 for now"
|
||||
)
|
||||
box2box_transform_weights = box_predictor.box2box_transform.weights[:4]
|
||||
else:
|
||||
box_dim = 4
|
||||
box2box_transform_weights = box_predictor.box2box_transform.weights
|
||||
|
||||
class_logits, box_regression = predictions
|
||||
class_prob = F.softmax(class_logits, -1)
|
||||
|
||||
assert box_regression.shape[1] % box_dim == 0
|
||||
cls_agnostic_bbox_reg = box_regression.shape[1] // box_dim == 1
|
||||
|
||||
input_tensor_mode = proposals[0].proposal_boxes.tensor.shape[1] == box_dim + 1
|
||||
|
||||
rois = type(proposals[0].proposal_boxes).cat([p.proposal_boxes for p in proposals])
|
||||
device, dtype = rois.tensor.device, rois.tensor.dtype
|
||||
if input_tensor_mode:
|
||||
im_info = proposals[0].image_size
|
||||
rois = rois.tensor
|
||||
else:
|
||||
im_info = torch.Tensor(
|
||||
[[sz[0], sz[1], 1.0] for sz in [x.image_size for x in proposals]]
|
||||
)
|
||||
batch_ids = cat(
|
||||
[
|
||||
torch.full((b, 1), i, dtype=dtype, device=device)
|
||||
for i, b in enumerate(len(p) for p in proposals)
|
||||
],
|
||||
dim=0,
|
||||
)
|
||||
rois = torch.cat([batch_ids, rois.tensor], dim=1)
|
||||
|
||||
roi_pred_bbox, roi_batch_splits = torch.ops._caffe2.BBoxTransform(
|
||||
to_device(rois, "cpu"),
|
||||
to_device(box_regression, "cpu"),
|
||||
to_device(im_info, "cpu"),
|
||||
weights=box2box_transform_weights,
|
||||
apply_scale=True,
|
||||
rotated=is_rotated,
|
||||
angle_bound_on=True,
|
||||
angle_bound_lo=-180,
|
||||
angle_bound_hi=180,
|
||||
clip_angle_thresh=1.0,
|
||||
legacy_plus_one=False,
|
||||
)
|
||||
roi_pred_bbox = to_device(roi_pred_bbox, device)
|
||||
roi_batch_splits = to_device(roi_batch_splits, device)
|
||||
|
||||
nms_outputs = torch.ops._caffe2.BoxWithNMSLimit(
|
||||
to_device(class_prob, "cpu"),
|
||||
to_device(roi_pred_bbox, "cpu"),
|
||||
to_device(roi_batch_splits, "cpu"),
|
||||
score_thresh=float(score_thresh),
|
||||
nms=float(nms_thresh),
|
||||
detections_per_im=int(topk_per_image),
|
||||
soft_nms_enabled=False,
|
||||
soft_nms_method="linear",
|
||||
soft_nms_sigma=0.5,
|
||||
soft_nms_min_score_thres=0.001,
|
||||
rotated=is_rotated,
|
||||
cls_agnostic_bbox_reg=cls_agnostic_bbox_reg,
|
||||
input_boxes_include_bg_cls=False,
|
||||
output_classes_include_bg_cls=False,
|
||||
legacy_plus_one=False,
|
||||
)
|
||||
roi_score_nms = to_device(nms_outputs[0], device)
|
||||
roi_bbox_nms = to_device(nms_outputs[1], device)
|
||||
roi_class_nms = to_device(nms_outputs[2], device)
|
||||
roi_batch_splits_nms = to_device(nms_outputs[3], device)
|
||||
roi_keeps_nms = to_device(nms_outputs[4], device)
|
||||
roi_keeps_size_nms = to_device(nms_outputs[5], device)
|
||||
if not self.tensor_mode:
|
||||
roi_class_nms = roi_class_nms.to(torch.int64)
|
||||
|
||||
roi_batch_ids = cat(
|
||||
[
|
||||
torch.full((b, 1), i, dtype=dtype, device=device)
|
||||
for i, b in enumerate(int(x.item()) for x in roi_batch_splits_nms)
|
||||
],
|
||||
dim=0,
|
||||
)
|
||||
|
||||
roi_class_nms = alias(roi_class_nms, "class_nms")
|
||||
roi_score_nms = alias(roi_score_nms, "score_nms")
|
||||
roi_bbox_nms = alias(roi_bbox_nms, "bbox_nms")
|
||||
roi_batch_splits_nms = alias(roi_batch_splits_nms, "batch_splits_nms")
|
||||
roi_keeps_nms = alias(roi_keeps_nms, "keeps_nms")
|
||||
roi_keeps_size_nms = alias(roi_keeps_size_nms, "keeps_size_nms")
|
||||
|
||||
results = InstancesList(
|
||||
im_info=im_info,
|
||||
indices=roi_batch_ids[:, 0],
|
||||
extra_fields={
|
||||
"pred_boxes": Caffe2Boxes(roi_bbox_nms),
|
||||
"scores": roi_score_nms,
|
||||
"pred_classes": roi_class_nms,
|
||||
},
|
||||
)
|
||||
|
||||
if not self.tensor_mode:
|
||||
results = InstancesList.to_d2_instances_list(results)
|
||||
batch_splits = roi_batch_splits_nms.int().tolist()
|
||||
kept_indices = list(roi_keeps_nms.to(torch.int64).split(batch_splits))
|
||||
else:
|
||||
results = [results]
|
||||
kept_indices = [roi_keeps_nms]
|
||||
|
||||
return results, kept_indices
|
||||
|
||||
|
||||
class Caffe2MaskRCNNInference:
|
||||
def __call__(self, pred_mask_logits, pred_instances):
|
||||
""" equivalent to mask_head.mask_rcnn_inference """
|
||||
if all(isinstance(x, InstancesList) for x in pred_instances):
|
||||
assert len(pred_instances) == 1
|
||||
mask_probs_pred = pred_mask_logits.sigmoid()
|
||||
mask_probs_pred = alias(mask_probs_pred, "mask_fcn_probs")
|
||||
pred_instances[0].pred_masks = mask_probs_pred
|
||||
else:
|
||||
mask_rcnn_inference(pred_mask_logits, pred_instances)
|
||||
|
||||
|
||||
class Caffe2KeypointRCNNInference:
|
||||
def __init__(self, use_heatmap_max_keypoint):
|
||||
self.use_heatmap_max_keypoint = use_heatmap_max_keypoint
|
||||
|
||||
def __call__(self, pred_keypoint_logits, pred_instances):
|
||||
# just return the keypoint heatmap for now,
|
||||
# there will be option to call HeatmapMaxKeypointOp
|
||||
output = alias(pred_keypoint_logits, "kps_score")
|
||||
if all(isinstance(x, InstancesList) for x in pred_instances):
|
||||
assert len(pred_instances) == 1
|
||||
if self.use_heatmap_max_keypoint:
|
||||
device = output.device
|
||||
output = torch.ops._caffe2.HeatmapMaxKeypoint(
|
||||
to_device(output, "cpu"),
|
||||
pred_instances[0].pred_boxes.tensor,
|
||||
should_output_softmax=True, # worth make it configerable?
|
||||
)
|
||||
output = to_device(output, device)
|
||||
output = alias(output, "keypoints_out")
|
||||
pred_instances[0].pred_keypoints = output
|
||||
return pred_keypoint_logits
|
|
@ -0,0 +1,207 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
import copy
|
||||
import io
|
||||
import logging
|
||||
import numpy as np
|
||||
from typing import List
|
||||
import onnx
|
||||
import torch
|
||||
from caffe2.proto import caffe2_pb2
|
||||
from caffe2.python import core
|
||||
from caffe2.python.onnx.backend import Caffe2Backend
|
||||
from tabulate import tabulate
|
||||
from termcolor import colored
|
||||
from torch.onnx import OperatorExportTypes
|
||||
|
||||
from .shared import (
|
||||
ScopedWS,
|
||||
construct_init_net_from_params,
|
||||
fuse_alias_placeholder,
|
||||
fuse_copy_between_cpu_and_gpu,
|
||||
get_params_from_init_net,
|
||||
group_norm_replace_aten_with_caffe2,
|
||||
infer_device_type,
|
||||
remove_dead_end_ops,
|
||||
remove_reshape_for_fc,
|
||||
save_graph,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def export_onnx_model(model, inputs):
|
||||
"""
|
||||
Trace and export a model to onnx format.
|
||||
|
||||
Args:
|
||||
model (nn.Module):
|
||||
inputs (tuple[args]): the model will be called by `model(*inputs)`
|
||||
|
||||
Returns:
|
||||
an onnx model
|
||||
"""
|
||||
assert isinstance(model, torch.nn.Module)
|
||||
|
||||
# make sure all modules are in eval mode, onnx may change the training state
|
||||
# of the module if the states are not consistent
|
||||
def _check_eval(module):
|
||||
assert not module.training
|
||||
|
||||
model.apply(_check_eval)
|
||||
|
||||
# Export the model to ONNX
|
||||
with torch.no_grad():
|
||||
with io.BytesIO() as f:
|
||||
torch.onnx.export(
|
||||
model,
|
||||
inputs,
|
||||
f,
|
||||
operator_export_type=OperatorExportTypes.ONNX_ATEN_FALLBACK,
|
||||
# verbose=True, # NOTE: uncomment this for debugging
|
||||
# export_params=True,
|
||||
)
|
||||
onnx_model = onnx.load_from_string(f.getvalue())
|
||||
|
||||
# Apply ONNX's Optimization
|
||||
all_passes = onnx.optimizer.get_available_passes()
|
||||
passes = ["fuse_bn_into_conv"]
|
||||
assert all(p in all_passes for p in passes)
|
||||
onnx_model = onnx.optimizer.optimize(onnx_model, passes)
|
||||
return onnx_model
|
||||
|
||||
|
||||
def _op_stats(net_def):
|
||||
type_count = {}
|
||||
for t in [op.type for op in net_def.op]:
|
||||
type_count[t] = type_count.get(t, 0) + 1
|
||||
type_count_list = sorted(type_count.items(), key=lambda kv: kv[0]) # alphabet
|
||||
type_count_list = sorted(type_count_list, key=lambda kv: -kv[1]) # count
|
||||
return "\n".join("{:>4}x {}".format(count, name) for name, count in type_count_list)
|
||||
|
||||
|
||||
def _assign_device_option(
|
||||
predict_net: caffe2_pb2.NetDef, init_net: caffe2_pb2.NetDef, tensor_inputs: List[torch.Tensor]
|
||||
):
|
||||
"""
|
||||
ONNX exported network doesn't have concept of device, assign necessary
|
||||
device option for each op in order to make it runable on GPU runtime.
|
||||
"""
|
||||
|
||||
def _get_device_type(torch_tensor):
|
||||
assert torch_tensor.device.type in ["cpu", "cuda"]
|
||||
assert torch_tensor.device.index == 0
|
||||
return torch_tensor.device.type
|
||||
|
||||
def _assign_op_device_option(net_proto, net_ssa, blob_device_types):
|
||||
for op, ssa_i in zip(net_proto.op, net_ssa):
|
||||
if op.type in ["CopyCPUToGPU", "CopyGPUToCPU"]:
|
||||
op.device_option.CopyFrom(core.DeviceOption(caffe2_pb2.CUDA, 0))
|
||||
else:
|
||||
devices = [blob_device_types[b] for b in ssa_i[0] + ssa_i[1]]
|
||||
assert all(d == devices[0] for d in devices)
|
||||
if devices[0] == "cuda":
|
||||
op.device_option.CopyFrom(core.DeviceOption(caffe2_pb2.CUDA, 0))
|
||||
|
||||
# update ops in predict_net
|
||||
predict_net_input_device_types = {
|
||||
(name, 0): _get_device_type(tensor)
|
||||
for name, tensor in zip(predict_net.external_input, tensor_inputs)
|
||||
}
|
||||
predict_net_device_types = infer_device_type(
|
||||
predict_net, known_status=predict_net_input_device_types, device_name_style="pytorch"
|
||||
)
|
||||
predict_net_ssa, _ = core.get_ssa(predict_net)
|
||||
_assign_op_device_option(predict_net, predict_net_ssa, predict_net_device_types)
|
||||
|
||||
# update ops in init_net
|
||||
init_net_ssa, versions = core.get_ssa(init_net)
|
||||
init_net_output_device_types = {
|
||||
(name, versions[name]): predict_net_device_types[(name, 0)]
|
||||
for name in init_net.external_output
|
||||
}
|
||||
init_net_device_types = infer_device_type(
|
||||
init_net, known_status=init_net_output_device_types, device_name_style="pytorch"
|
||||
)
|
||||
_assign_op_device_option(init_net, init_net_ssa, init_net_device_types)
|
||||
|
||||
|
||||
def export_caffe2_detection_model(model: torch.nn.Module, tensor_inputs: List[torch.Tensor]):
|
||||
"""
|
||||
Export a caffe2-compatible Detectron2 model to caffe2 format via ONNX.
|
||||
|
||||
Arg:
|
||||
model: a caffe2-compatible version of detectron2 model, defined in caffe2_modeling.py
|
||||
tensor_inputs: a list of tensors that caffe2 model takes as input.
|
||||
"""
|
||||
model = copy.deepcopy(model)
|
||||
assert isinstance(model, torch.nn.Module)
|
||||
assert hasattr(model, "encode_additional_info")
|
||||
|
||||
# Export via ONNX
|
||||
logger.info(
|
||||
"Exporting a {} model via ONNX ...".format(type(model).__name__)
|
||||
+ " Some warnings from ONNX are expected and are usually not to worry about."
|
||||
)
|
||||
onnx_model = export_onnx_model(model, (tensor_inputs,))
|
||||
# Convert ONNX model to Caffe2 protobuf
|
||||
init_net, predict_net = Caffe2Backend.onnx_graph_to_caffe2_net(onnx_model)
|
||||
ops_table = [[op.type, op.input, op.output] for op in predict_net.op]
|
||||
table = tabulate(ops_table, headers=["type", "input", "output"], tablefmt="pipe")
|
||||
logger.info(
|
||||
"ONNX export Done. Exported predict_net (before optimizations):\n" + colored(table, "cyan")
|
||||
)
|
||||
|
||||
# Apply protobuf optimization
|
||||
fuse_alias_placeholder(predict_net, init_net)
|
||||
if any(t.device.type != "cpu" for t in tensor_inputs):
|
||||
fuse_copy_between_cpu_and_gpu(predict_net)
|
||||
remove_dead_end_ops(init_net)
|
||||
_assign_device_option(predict_net, init_net, tensor_inputs)
|
||||
params, device_options = get_params_from_init_net(init_net)
|
||||
predict_net, params = remove_reshape_for_fc(predict_net, params)
|
||||
init_net = construct_init_net_from_params(params, device_options)
|
||||
group_norm_replace_aten_with_caffe2(predict_net)
|
||||
|
||||
# Record necessary information for running the pb model in Detectron2 system.
|
||||
model.encode_additional_info(predict_net, init_net)
|
||||
|
||||
logger.info("Operators used in predict_net: \n{}".format(_op_stats(predict_net)))
|
||||
logger.info("Operators used in init_net: \n{}".format(_op_stats(init_net)))
|
||||
|
||||
return predict_net, init_net
|
||||
|
||||
|
||||
def run_and_save_graph(predict_net, init_net, tensor_inputs, graph_save_path):
|
||||
"""
|
||||
Run the caffe2 model on given inputs, recording the shape and draw the graph.
|
||||
|
||||
predict_net/init_net: caffe2 model.
|
||||
tensor_inputs: a list of tensors that caffe2 model takes as input.
|
||||
graph_save_path: path for saving graph of exported model.
|
||||
"""
|
||||
|
||||
logger.info("Saving graph of ONNX exported model to {} ...".format(graph_save_path))
|
||||
save_graph(predict_net, graph_save_path, op_only=False)
|
||||
|
||||
# Run the exported Caffe2 net
|
||||
logger.info("Running ONNX exported model ...")
|
||||
with ScopedWS("__ws_tmp__", True) as ws:
|
||||
ws.RunNetOnce(init_net)
|
||||
initialized_blobs = set(ws.Blobs())
|
||||
uninitialized = [inp for inp in predict_net.external_input if inp not in initialized_blobs]
|
||||
for name, blob in zip(uninitialized, tensor_inputs):
|
||||
ws.FeedBlob(name, blob)
|
||||
|
||||
try:
|
||||
ws.RunNetOnce(predict_net)
|
||||
except RuntimeError as e:
|
||||
logger.warning("Encountered RuntimeError: \n{}".format(str(e)))
|
||||
|
||||
ws_blobs = {b: ws.FetchBlob(b) for b in ws.Blobs()}
|
||||
blob_sizes = {b: ws_blobs[b].shape for b in ws_blobs if isinstance(ws_blobs[b], np.ndarray)}
|
||||
|
||||
logger.info("Saving graph with blob shapes to {} ...".format(graph_save_path))
|
||||
save_graph(predict_net, graph_save_path, op_only=False, blob_sizes=blob_sizes)
|
||||
|
||||
return ws_blobs
|
|
@ -0,0 +1,136 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
import collections
|
||||
import logging
|
||||
import numpy as np
|
||||
import torch
|
||||
from caffe2.proto import caffe2_pb2
|
||||
from caffe2.python import core
|
||||
|
||||
from .caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP, convert_batched_inputs_to_c2_format
|
||||
from .shared import ScopedWS, get_pb_arg_vali, get_pb_arg_vals, infer_device_type
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ProtobufModel(torch.nn.Module):
|
||||
"""
|
||||
A class works just like nn.Module in terms of inference, but running
|
||||
caffe2 model under the hood. Input/Output are Dict[str, tensor] whose keys
|
||||
are in external_input/output.
|
||||
"""
|
||||
|
||||
def __init__(self, predict_net, init_net):
|
||||
logger.info("Initializing ProtobufModel ...")
|
||||
super().__init__()
|
||||
assert isinstance(predict_net, caffe2_pb2.NetDef)
|
||||
assert isinstance(init_net, caffe2_pb2.NetDef)
|
||||
self.ws_name = "__ws_tmp__"
|
||||
self.net = core.Net(predict_net)
|
||||
|
||||
with ScopedWS(self.ws_name, is_reset=True, is_cleanup=False) as ws:
|
||||
ws.RunNetOnce(init_net)
|
||||
for blob in self.net.Proto().external_input:
|
||||
if blob not in ws.Blobs():
|
||||
ws.CreateBlob(blob)
|
||||
ws.CreateNet(self.net)
|
||||
|
||||
self._error_msgs = set()
|
||||
|
||||
def forward(self, inputs_dict):
|
||||
assert all(inp in self.net.Proto().external_input for inp in inputs_dict)
|
||||
with ScopedWS(self.ws_name, is_reset=False, is_cleanup=False) as ws:
|
||||
for b, tensor in inputs_dict.items():
|
||||
ws.FeedBlob(b, tensor)
|
||||
try:
|
||||
ws.RunNet(self.net.Proto().name)
|
||||
except RuntimeError as e:
|
||||
if not str(e) in self._error_msgs:
|
||||
self._error_msgs.add(str(e))
|
||||
logger.warning("Encountered new RuntimeError: \n{}".format(str(e)))
|
||||
logger.warning("Catch the error and use partial results.")
|
||||
|
||||
outputs_dict = collections.OrderedDict(
|
||||
[(b, ws.FetchBlob(b)) for b in self.net.Proto().external_output]
|
||||
)
|
||||
# Remove outputs of current run, this is necessary in order to
|
||||
# prevent fetching the result from previous run if the model fails
|
||||
# in the middle.
|
||||
for b in self.net.Proto().external_output:
|
||||
# Needs to create uninitialized blob to make the net runable.
|
||||
# This is "equivalent" to: ws.RemoveBlob(b) then ws.CreateBlob(b),
|
||||
# but there'no such API.
|
||||
ws.FeedBlob(b, "{}, a C++ native class of type nullptr (uninitialized).".format(b))
|
||||
|
||||
return outputs_dict
|
||||
|
||||
|
||||
class ProtobufDetectionModel(torch.nn.Module):
|
||||
"""
|
||||
A class works just like a pytorch meta arch in terms of inference, but running
|
||||
caffe2 model under the hood.
|
||||
"""
|
||||
|
||||
def __init__(self, predict_net, init_net, *, convert_outputs=None):
|
||||
"""
|
||||
Args:
|
||||
predict_net, init_net (core.Net): caffe2 nets
|
||||
convert_outptus (callable): a function that converts caffe2
|
||||
outputs to the same format of the original pytorch model.
|
||||
By default, use the one defined in the caffe2 meta_arch.
|
||||
"""
|
||||
super().__init__()
|
||||
self.protobuf_model = ProtobufModel(predict_net, init_net)
|
||||
self.size_divisibility = get_pb_arg_vali(predict_net, "size_divisibility", 0)
|
||||
self.device = get_pb_arg_vals(predict_net, "device", b"cpu").decode("ascii")
|
||||
|
||||
if convert_outputs is None:
|
||||
meta_arch = get_pb_arg_vals(predict_net, "meta_architecture", b"GeneralizedRCNN")
|
||||
meta_arch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[meta_arch.decode("ascii")]
|
||||
self._convert_outputs = meta_arch.get_outputs_converter(predict_net, init_net)
|
||||
else:
|
||||
self._convert_outputs = convert_outputs
|
||||
|
||||
def _infer_output_devices(self, inputs_dict):
|
||||
def _get_device_type(torch_tensor):
|
||||
assert torch_tensor.device.type in ["cpu", "cuda"]
|
||||
assert torch_tensor.device.index == 0
|
||||
return torch_tensor.device.type
|
||||
|
||||
predict_net = self.protobuf_model.net.Proto()
|
||||
input_device_types = {
|
||||
(name, 0): _get_device_type(tensor) for name, tensor in inputs_dict.items()
|
||||
}
|
||||
device_type_map = infer_device_type(
|
||||
predict_net, known_status=input_device_types, device_name_style="pytorch"
|
||||
)
|
||||
ssa, versions = core.get_ssa(predict_net)
|
||||
versioned_outputs = [(name, versions[name]) for name in predict_net.external_output]
|
||||
output_devices = [device_type_map[outp] for outp in versioned_outputs]
|
||||
return output_devices
|
||||
|
||||
def _convert_inputs(self, batched_inputs):
|
||||
# currently all models convert inputs in the same way
|
||||
data, im_info = convert_batched_inputs_to_c2_format(
|
||||
batched_inputs, self.size_divisibility, self.device
|
||||
)
|
||||
return {"data": data, "im_info": im_info}
|
||||
|
||||
def forward(self, batched_inputs):
|
||||
c2_inputs = self._convert_inputs(batched_inputs)
|
||||
c2_results = self.protobuf_model(c2_inputs)
|
||||
|
||||
if any(t.device.type != "cpu" for _, t in c2_inputs.items()):
|
||||
output_devices = self._infer_output_devices(c2_inputs)
|
||||
else:
|
||||
output_devices = ["cpu" for _ in self.protobuf_model.net.Proto().external_output]
|
||||
|
||||
def _cast_caffe2_blob_to_torch_tensor(blob, device):
|
||||
return torch.Tensor(blob).to(device) if isinstance(blob, np.ndarray) else None
|
||||
|
||||
c2_results = {
|
||||
name: _cast_caffe2_blob_to_torch_tensor(c2_results[name], device)
|
||||
for name, device in zip(self.protobuf_model.net.Proto().external_output, output_devices)
|
||||
}
|
||||
|
||||
return self._convert_outputs(batched_inputs, c2_inputs, c2_results)
|
|
@ -0,0 +1,497 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
import functools
|
||||
import io
|
||||
import struct
|
||||
import types
|
||||
import torch
|
||||
|
||||
from detectron2.modeling import meta_arch
|
||||
from detectron2.modeling.box_regression import Box2BoxTransform
|
||||
from detectron2.modeling.meta_arch.panoptic_fpn import combine_semantic_and_instance_outputs
|
||||
from detectron2.modeling.meta_arch.retinanet import permute_to_N_HWA_K
|
||||
from detectron2.modeling.postprocessing import detector_postprocess, sem_seg_postprocess
|
||||
from detectron2.modeling.roi_heads import keypoint_head
|
||||
from detectron2.structures import Boxes, ImageList, Instances, RotatedBoxes
|
||||
|
||||
from .c10 import Caffe2Compatible
|
||||
from .patcher import ROIHeadsPatcher, patch_generalized_rcnn
|
||||
from .shared import (
|
||||
alias,
|
||||
check_set_pb_arg,
|
||||
get_pb_arg_floats,
|
||||
get_pb_arg_valf,
|
||||
get_pb_arg_vali,
|
||||
get_pb_arg_vals,
|
||||
mock_torch_nn_functional_interpolate,
|
||||
)
|
||||
|
||||
|
||||
def assemble_rcnn_outputs_by_name(image_sizes, tensor_outputs, force_mask_on=False):
|
||||
"""
|
||||
A function to assemble caffe2 model's outputs (i.e. Dict[str, Tensor])
|
||||
to detectron2's format (i.e. list of Instances instance).
|
||||
This only works when the model follows the Caffe2 detectron's naming convention.
|
||||
|
||||
Args:
|
||||
image_sizes (List[List[int, int]]): [H, W] of every image.
|
||||
tensor_outputs (Dict[str, Tensor]): external_output to its tensor.
|
||||
|
||||
force_mask_on (Bool): if true, the it make sure there'll be pred_masks even
|
||||
if the mask is not found from tensor_outputs (usually due to model crash)
|
||||
"""
|
||||
|
||||
results = [Instances(image_size) for image_size in image_sizes]
|
||||
|
||||
batch_splits = tensor_outputs.get("batch_splits", None)
|
||||
if batch_splits:
|
||||
raise NotImplementedError()
|
||||
assert len(image_sizes) == 1
|
||||
result = results[0]
|
||||
|
||||
bbox_nms = tensor_outputs["bbox_nms"]
|
||||
score_nms = tensor_outputs["score_nms"]
|
||||
class_nms = tensor_outputs["class_nms"]
|
||||
# Detection will always success because Conv support 0-batch
|
||||
assert bbox_nms is not None
|
||||
assert score_nms is not None
|
||||
assert class_nms is not None
|
||||
if bbox_nms.shape[1] == 5:
|
||||
result.pred_boxes = RotatedBoxes(bbox_nms)
|
||||
else:
|
||||
result.pred_boxes = Boxes(bbox_nms)
|
||||
result.scores = score_nms
|
||||
result.pred_classes = class_nms.to(torch.int64)
|
||||
|
||||
mask_fcn_probs = tensor_outputs.get("mask_fcn_probs", None)
|
||||
if mask_fcn_probs is not None:
|
||||
# finish the mask pred
|
||||
mask_probs_pred = mask_fcn_probs
|
||||
num_masks = mask_probs_pred.shape[0]
|
||||
class_pred = result.pred_classes
|
||||
indices = torch.arange(num_masks, device=class_pred.device)
|
||||
mask_probs_pred = mask_probs_pred[indices, class_pred][:, None]
|
||||
result.pred_masks = mask_probs_pred
|
||||
elif force_mask_on:
|
||||
# NOTE: there's no way to know the height/width of mask here, it won't be
|
||||
# used anyway when batch size is 0, so just set them to 0.
|
||||
result.pred_masks = torch.zeros([0, 1, 0, 0], dtype=torch.uint8)
|
||||
|
||||
keypoints_out = tensor_outputs.get("keypoints_out", None)
|
||||
kps_score = tensor_outputs.get("kps_score", None)
|
||||
if keypoints_out is not None:
|
||||
# keypoints_out: [N, 4, #kypoints], where 4 is in order of (x, y, score, prob)
|
||||
keypoints_tensor = keypoints_out
|
||||
# NOTE: it's possible that prob is not calculated if "should_output_softmax"
|
||||
# is set to False in HeatmapMaxKeypoint, so just using raw score, seems
|
||||
# it doesn't affect mAP. TODO: check more carefully.
|
||||
keypoint_xyp = keypoints_tensor.transpose(1, 2)[:, :, [0, 1, 2]]
|
||||
result.pred_keypoints = keypoint_xyp
|
||||
elif kps_score is not None:
|
||||
# keypoint heatmap to sparse data structure
|
||||
pred_keypoint_logits = kps_score
|
||||
keypoint_head.keypoint_rcnn_inference(pred_keypoint_logits, [result])
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def _cast_to_f32(f64):
|
||||
return struct.unpack("f", struct.pack("f", f64))[0]
|
||||
|
||||
|
||||
def set_caffe2_compatible_tensor_mode(model, enable=True):
|
||||
def _fn(m):
|
||||
if isinstance(m, Caffe2Compatible):
|
||||
m.tensor_mode = enable
|
||||
|
||||
model.apply(_fn)
|
||||
|
||||
|
||||
def convert_batched_inputs_to_c2_format(batched_inputs, size_divisibility, device):
|
||||
"""
|
||||
See get_caffe2_inputs() below.
|
||||
"""
|
||||
assert all(isinstance(x, dict) for x in batched_inputs)
|
||||
assert all(x["image"].dim() == 3 for x in batched_inputs)
|
||||
|
||||
images = [x["image"] for x in batched_inputs]
|
||||
images = ImageList.from_tensors(images, size_divisibility)
|
||||
|
||||
im_info = []
|
||||
for input_per_image, image_size in zip(batched_inputs, images.image_sizes):
|
||||
target_height = input_per_image.get("height", image_size[0])
|
||||
target_width = input_per_image.get("width", image_size[1]) # noqa
|
||||
# NOTE: The scale inside im_info is kept as convention and for providing
|
||||
# post-processing information if further processing is needed. For
|
||||
# current Caffe2 model definitions that don't include post-processing inside
|
||||
# the model, this number is not used.
|
||||
# NOTE: There can be a slight difference between width and height
|
||||
# scales, using a single number can results in numerical difference
|
||||
# compared with D2's post-processing.
|
||||
scale = target_height / image_size[0]
|
||||
im_info.append([image_size[0], image_size[1], scale])
|
||||
im_info = torch.Tensor(im_info)
|
||||
|
||||
return images.tensor.to(device), im_info.to(device)
|
||||
|
||||
|
||||
class Caffe2MetaArch(Caffe2Compatible, torch.nn.Module):
|
||||
"""
|
||||
Base class for caffe2-compatible implementation of a meta architecture.
|
||||
The forward is traceable and its traced graph can be converted to caffe2
|
||||
graph through ONNX.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg, torch_model):
|
||||
"""
|
||||
Args:
|
||||
cfg (CfgNode):
|
||||
torch_model (nn.Module): the detectron2 model (meta_arch) to be
|
||||
converted.
|
||||
"""
|
||||
super().__init__()
|
||||
self._wrapped_model = torch_model
|
||||
self.eval()
|
||||
set_caffe2_compatible_tensor_mode(self, True)
|
||||
|
||||
def get_caffe2_inputs(self, batched_inputs):
|
||||
"""
|
||||
Convert pytorch-style structured inputs to caffe2-style inputs that
|
||||
are tuples of tensors.
|
||||
|
||||
Args:
|
||||
batched_inputs (list[dict]): inputs to a detectron2 model
|
||||
in its standard format. Each dict has "image" (CHW tensor), and optionally
|
||||
"height" and "width".
|
||||
|
||||
Returns:
|
||||
tuple[Tensor]:
|
||||
tuple of tensors that will be the inputs to the
|
||||
:meth:`forward` method. For existing models, the first
|
||||
is an NCHW tensor (padded and batched); the second is
|
||||
a im_info Nx3 tensor, where the rows are
|
||||
(height, width, unused legacy parameter)
|
||||
"""
|
||||
return convert_batched_inputs_to_c2_format(
|
||||
batched_inputs,
|
||||
self._wrapped_model.backbone.size_divisibility,
|
||||
self._wrapped_model.device,
|
||||
)
|
||||
|
||||
def encode_additional_info(self, predict_net, init_net):
|
||||
"""
|
||||
Save extra metadata that will be used by inference in the output protobuf.
|
||||
"""
|
||||
pass
|
||||
|
||||
def forward(self, inputs):
|
||||
"""
|
||||
Run the forward in caffe2-style. It has to use caffe2-compatible ops
|
||||
and the method will be used for tracing.
|
||||
|
||||
Args:
|
||||
inputs (tuple[Tensor]): inputs defined by :meth:`get_caffe2_input`.
|
||||
They will be the inputs of the converted caffe2 graph.
|
||||
|
||||
Returns:
|
||||
tuple[Tensor]: output tensors. They will be the outputs of the
|
||||
converted caffe2 graph.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def _caffe2_preprocess_image(self, inputs):
|
||||
"""
|
||||
Caffe2 implementation of preprocess_image, which is called inside each MetaArch's forward.
|
||||
It normalizes the input images, and the final caffe2 graph assumes the
|
||||
inputs have been batched already.
|
||||
"""
|
||||
data, im_info = inputs
|
||||
data = alias(data, "data")
|
||||
im_info = alias(im_info, "im_info")
|
||||
mean, std = self._wrapped_model.pixel_mean, self._wrapped_model.pixel_std
|
||||
normalized_data = (data - mean) / std
|
||||
normalized_data = alias(normalized_data, "normalized_data")
|
||||
|
||||
# Pack (data, im_info) into ImageList which is recognized by self.inference.
|
||||
images = ImageList(tensor=normalized_data, image_sizes=im_info)
|
||||
return images
|
||||
|
||||
@staticmethod
|
||||
def get_outputs_converter(predict_net, init_net):
|
||||
"""
|
||||
Creates a function that converts outputs of the caffe2 model to
|
||||
detectron2's standard format.
|
||||
The function uses information in `predict_net` and `init_net` that are
|
||||
available at inferene time. Therefore the function logic can be used in inference.
|
||||
|
||||
The returned function has the following signature:
|
||||
|
||||
def convert(batched_inputs, c2_inputs, c2_results) -> detectron2_outputs
|
||||
|
||||
Where
|
||||
|
||||
* batched_inputs (list[dict]): the original input format of the meta arch
|
||||
* c2_inputs (dict[str, Tensor]): the caffe2 inputs.
|
||||
* c2_results (dict[str, Tensor]): the caffe2 output format,
|
||||
corresponding to the outputs of the :meth:`forward` function.
|
||||
* detectron2_outputs: the original output format of the meta arch.
|
||||
|
||||
This function can be used to compare the outputs of the original meta arch and
|
||||
the converted caffe2 graph.
|
||||
|
||||
Returns:
|
||||
callable: a callable of the above signature.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class Caffe2GeneralizedRCNN(Caffe2MetaArch):
|
||||
def __init__(self, cfg, torch_model):
|
||||
assert isinstance(torch_model, meta_arch.GeneralizedRCNN)
|
||||
torch_model = patch_generalized_rcnn(torch_model)
|
||||
super().__init__(cfg, torch_model)
|
||||
|
||||
self.roi_heads_patcher = ROIHeadsPatcher(cfg, self._wrapped_model.roi_heads)
|
||||
|
||||
def encode_additional_info(self, predict_net, init_net):
|
||||
size_divisibility = self._wrapped_model.backbone.size_divisibility
|
||||
check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility)
|
||||
check_set_pb_arg(
|
||||
predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii")
|
||||
)
|
||||
check_set_pb_arg(predict_net, "meta_architecture", "s", b"GeneralizedRCNN")
|
||||
|
||||
@mock_torch_nn_functional_interpolate()
|
||||
def forward(self, inputs):
|
||||
if not self.tensor_mode:
|
||||
return self._wrapped_model.inference(inputs)
|
||||
images = self._caffe2_preprocess_image(inputs)
|
||||
features = self._wrapped_model.backbone(images.tensor)
|
||||
proposals, _ = self._wrapped_model.proposal_generator(images, features)
|
||||
with self.roi_heads_patcher.mock_roi_heads():
|
||||
detector_results, _ = self._wrapped_model.roi_heads(images, features, proposals)
|
||||
return tuple(detector_results[0].flatten())
|
||||
|
||||
@staticmethod
|
||||
def get_outputs_converter(predict_net, init_net):
|
||||
def f(batched_inputs, c2_inputs, c2_results):
|
||||
image_sizes = [[int(im[0]), int(im[1])] for im in c2_inputs["im_info"]]
|
||||
results = assemble_rcnn_outputs_by_name(image_sizes, c2_results)
|
||||
return meta_arch.GeneralizedRCNN._postprocess(results, batched_inputs, image_sizes)
|
||||
|
||||
return f
|
||||
|
||||
|
||||
class Caffe2PanopticFPN(Caffe2MetaArch):
|
||||
def __init__(self, cfg, torch_model):
|
||||
assert isinstance(torch_model, meta_arch.PanopticFPN)
|
||||
torch_model = patch_generalized_rcnn(torch_model)
|
||||
super().__init__(cfg, torch_model)
|
||||
|
||||
self.roi_heads_patcher = ROIHeadsPatcher(cfg, self._wrapped_model.roi_heads)
|
||||
|
||||
@mock_torch_nn_functional_interpolate()
|
||||
def forward(self, inputs):
|
||||
assert self.tensor_mode
|
||||
images = self._caffe2_preprocess_image(inputs)
|
||||
features = self._wrapped_model.backbone(images.tensor)
|
||||
|
||||
sem_seg_results, _ = self._wrapped_model.sem_seg_head(features)
|
||||
sem_seg_results = alias(sem_seg_results, "sem_seg")
|
||||
|
||||
proposals, _ = self._wrapped_model.proposal_generator(images, features)
|
||||
|
||||
with self.roi_heads_patcher.mock_roi_heads(self.tensor_mode):
|
||||
detector_results, _ = self._wrapped_model.roi_heads(images, features, proposals)
|
||||
|
||||
return tuple(detector_results[0].flatten()) + (sem_seg_results,)
|
||||
|
||||
def encode_additional_info(self, predict_net, init_net):
|
||||
size_divisibility = self._wrapped_model.backbone.size_divisibility
|
||||
check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility)
|
||||
check_set_pb_arg(
|
||||
predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii")
|
||||
)
|
||||
check_set_pb_arg(predict_net, "meta_architecture", "s", b"PanopticFPN")
|
||||
|
||||
# Inference parameters:
|
||||
check_set_pb_arg(predict_net, "combine_on", "i", self._wrapped_model.combine_on)
|
||||
check_set_pb_arg(
|
||||
predict_net,
|
||||
"combine_overlap_threshold",
|
||||
"f",
|
||||
_cast_to_f32(self._wrapped_model.combine_overlap_threshold),
|
||||
)
|
||||
check_set_pb_arg(
|
||||
predict_net,
|
||||
"combine_stuff_area_limit",
|
||||
"i",
|
||||
self._wrapped_model.combine_stuff_area_limit,
|
||||
)
|
||||
check_set_pb_arg(
|
||||
predict_net,
|
||||
"combine_instances_confidence_threshold",
|
||||
"f",
|
||||
_cast_to_f32(self._wrapped_model.combine_instances_confidence_threshold),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_outputs_converter(predict_net, init_net):
|
||||
combine_on = get_pb_arg_vali(predict_net, "combine_on", None)
|
||||
combine_overlap_threshold = get_pb_arg_valf(predict_net, "combine_overlap_threshold", None)
|
||||
combine_stuff_area_limit = get_pb_arg_vali(predict_net, "combine_stuff_area_limit", None)
|
||||
combine_instances_confidence_threshold = get_pb_arg_valf(
|
||||
predict_net, "combine_instances_confidence_threshold", None
|
||||
)
|
||||
|
||||
def f(batched_inputs, c2_inputs, c2_results):
|
||||
image_sizes = [[int(im[0]), int(im[1])] for im in c2_inputs["im_info"]]
|
||||
detector_results = assemble_rcnn_outputs_by_name(
|
||||
image_sizes, c2_results, force_mask_on=True
|
||||
)
|
||||
sem_seg_results = c2_results["sem_seg"]
|
||||
|
||||
# copied from meta_arch/panoptic_fpn.py ...
|
||||
processed_results = []
|
||||
for sem_seg_result, detector_result, input_per_image, image_size in zip(
|
||||
sem_seg_results, detector_results, batched_inputs, image_sizes
|
||||
):
|
||||
height = input_per_image.get("height", image_size[0])
|
||||
width = input_per_image.get("width", image_size[1])
|
||||
sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size, height, width)
|
||||
detector_r = detector_postprocess(detector_result, height, width)
|
||||
|
||||
processed_results.append({"sem_seg": sem_seg_r, "instances": detector_r})
|
||||
|
||||
if combine_on:
|
||||
panoptic_r = combine_semantic_and_instance_outputs(
|
||||
detector_r,
|
||||
sem_seg_r.argmax(dim=0),
|
||||
combine_overlap_threshold,
|
||||
combine_stuff_area_limit,
|
||||
combine_instances_confidence_threshold,
|
||||
)
|
||||
processed_results[-1]["panoptic_seg"] = panoptic_r
|
||||
return processed_results
|
||||
|
||||
return f
|
||||
|
||||
|
||||
class Caffe2RetinaNet(Caffe2MetaArch):
|
||||
def __init__(self, cfg, torch_model):
|
||||
assert isinstance(torch_model, meta_arch.RetinaNet)
|
||||
super().__init__(cfg, torch_model)
|
||||
|
||||
@mock_torch_nn_functional_interpolate()
|
||||
def forward(self, inputs):
|
||||
assert self.tensor_mode
|
||||
images = self._caffe2_preprocess_image(inputs)
|
||||
|
||||
# explicitly return the images sizes to avoid removing "im_info" by ONNX
|
||||
# since it's not used in the forward path
|
||||
return_tensors = [images.image_sizes]
|
||||
|
||||
features = self._wrapped_model.backbone(images.tensor)
|
||||
features = [features[f] for f in self._wrapped_model.in_features]
|
||||
for i, feature_i in enumerate(features):
|
||||
features[i] = alias(feature_i, "feature_{}".format(i), is_backward=True)
|
||||
return_tensors.append(features[i])
|
||||
|
||||
pred_logits, pred_anchor_deltas = self._wrapped_model.head(features)
|
||||
for i, (box_cls_i, box_delta_i) in enumerate(zip(pred_logits, pred_anchor_deltas)):
|
||||
return_tensors.append(alias(box_cls_i, "box_cls_{}".format(i)))
|
||||
return_tensors.append(alias(box_delta_i, "box_delta_{}".format(i)))
|
||||
|
||||
return tuple(return_tensors)
|
||||
|
||||
def encode_additional_info(self, predict_net, init_net):
|
||||
size_divisibility = self._wrapped_model.backbone.size_divisibility
|
||||
check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility)
|
||||
check_set_pb_arg(
|
||||
predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii")
|
||||
)
|
||||
check_set_pb_arg(predict_net, "meta_architecture", "s", b"RetinaNet")
|
||||
|
||||
# Inference parameters:
|
||||
check_set_pb_arg(
|
||||
predict_net, "score_threshold", "f", _cast_to_f32(self._wrapped_model.score_threshold)
|
||||
)
|
||||
check_set_pb_arg(predict_net, "topk_candidates", "i", self._wrapped_model.topk_candidates)
|
||||
check_set_pb_arg(
|
||||
predict_net, "nms_threshold", "f", _cast_to_f32(self._wrapped_model.nms_threshold)
|
||||
)
|
||||
check_set_pb_arg(
|
||||
predict_net,
|
||||
"max_detections_per_image",
|
||||
"i",
|
||||
self._wrapped_model.max_detections_per_image,
|
||||
)
|
||||
|
||||
check_set_pb_arg(
|
||||
predict_net,
|
||||
"bbox_reg_weights",
|
||||
"floats",
|
||||
[_cast_to_f32(w) for w in self._wrapped_model.box2box_transform.weights],
|
||||
)
|
||||
self._encode_anchor_generator_cfg(predict_net)
|
||||
|
||||
def _encode_anchor_generator_cfg(self, predict_net):
|
||||
# serialize anchor_generator for future use
|
||||
serialized_anchor_generator = io.BytesIO()
|
||||
torch.save(self._wrapped_model.anchor_generator, serialized_anchor_generator)
|
||||
# Ideally we can put anchor generating inside the model, then we don't
|
||||
# need to store this information.
|
||||
bytes = serialized_anchor_generator.getvalue()
|
||||
check_set_pb_arg(predict_net, "serialized_anchor_generator", "s", bytes)
|
||||
|
||||
@staticmethod
|
||||
def get_outputs_converter(predict_net, init_net):
|
||||
self = types.SimpleNamespace()
|
||||
serialized_anchor_generator = io.BytesIO(
|
||||
get_pb_arg_vals(predict_net, "serialized_anchor_generator", None)
|
||||
)
|
||||
self.anchor_generator = torch.load(serialized_anchor_generator)
|
||||
bbox_reg_weights = get_pb_arg_floats(predict_net, "bbox_reg_weights", None)
|
||||
self.box2box_transform = Box2BoxTransform(weights=tuple(bbox_reg_weights))
|
||||
self.score_threshold = get_pb_arg_valf(predict_net, "score_threshold", None)
|
||||
self.topk_candidates = get_pb_arg_vali(predict_net, "topk_candidates", None)
|
||||
self.nms_threshold = get_pb_arg_valf(predict_net, "nms_threshold", None)
|
||||
self.max_detections_per_image = get_pb_arg_vali(
|
||||
predict_net, "max_detections_per_image", None
|
||||
)
|
||||
|
||||
# hack to reuse inference code from RetinaNet
|
||||
self.inference = functools.partial(meta_arch.RetinaNet.inference, self)
|
||||
self.inference_single_image = functools.partial(
|
||||
meta_arch.RetinaNet.inference_single_image, self
|
||||
)
|
||||
|
||||
def f(batched_inputs, c2_inputs, c2_results):
|
||||
image_sizes = [[int(im[0]), int(im[1])] for im in c2_inputs["im_info"]]
|
||||
|
||||
num_features = len([x for x in c2_results.keys() if x.startswith("box_cls_")])
|
||||
pred_logits = [c2_results["box_cls_{}".format(i)] for i in range(num_features)]
|
||||
pred_anchor_deltas = [c2_results["box_delta_{}".format(i)] for i in range(num_features)]
|
||||
|
||||
# For each feature level, feature should have the same batch size and
|
||||
# spatial dimension as the box_cls and box_delta.
|
||||
dummy_features = [x.clone()[:, 0:0, :, :] for x in pred_logits]
|
||||
anchors = self.anchor_generator(dummy_features)
|
||||
|
||||
# self.num_classess can be inferred
|
||||
self.num_classes = pred_logits[0].shape[1] // (pred_anchor_deltas[0].shape[1] // 4)
|
||||
|
||||
pred_logits = [permute_to_N_HWA_K(x, self.num_classes) for x in pred_logits]
|
||||
pred_anchor_deltas = [permute_to_N_HWA_K(x, 4) for x in pred_anchor_deltas]
|
||||
|
||||
results = self.inference(anchors, pred_logits, pred_anchor_deltas, image_sizes)
|
||||
return meta_arch.GeneralizedRCNN._postprocess(results, batched_inputs, image_sizes)
|
||||
|
||||
return f
|
||||
|
||||
|
||||
META_ARCH_CAFFE2_EXPORT_TYPE_MAP = {
|
||||
"GeneralizedRCNN": Caffe2GeneralizedRCNN,
|
||||
"PanopticFPN": Caffe2PanopticFPN,
|
||||
"RetinaNet": Caffe2RetinaNet,
|
||||
}
|
|
@ -0,0 +1,153 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
import contextlib
|
||||
import mock
|
||||
import torch
|
||||
|
||||
from detectron2.modeling import poolers
|
||||
from detectron2.modeling.proposal_generator import rpn
|
||||
from detectron2.modeling.roi_heads import keypoint_head, mask_head
|
||||
from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers
|
||||
|
||||
from .c10 import (
|
||||
Caffe2Compatible,
|
||||
Caffe2FastRCNNOutputsInference,
|
||||
Caffe2KeypointRCNNInference,
|
||||
Caffe2MaskRCNNInference,
|
||||
Caffe2ROIPooler,
|
||||
Caffe2RPN,
|
||||
)
|
||||
|
||||
|
||||
class GenericMixin(object):
|
||||
pass
|
||||
|
||||
|
||||
class Caffe2CompatibleConverter(object):
|
||||
"""
|
||||
A GenericUpdater which implements the `create_from` interface, by modifying
|
||||
module object and assign it with another class replaceCls.
|
||||
"""
|
||||
|
||||
def __init__(self, replaceCls):
|
||||
self.replaceCls = replaceCls
|
||||
|
||||
def create_from(self, module):
|
||||
# update module's class to the new class
|
||||
assert isinstance(module, torch.nn.Module)
|
||||
if issubclass(self.replaceCls, GenericMixin):
|
||||
# replaceCls should act as mixin, create a new class on-the-fly
|
||||
new_class = type(
|
||||
"{}MixedWith{}".format(self.replaceCls.__name__, module.__class__.__name__),
|
||||
(self.replaceCls, module.__class__),
|
||||
{}, # {"new_method": lambda self: ...},
|
||||
)
|
||||
module.__class__ = new_class
|
||||
else:
|
||||
# replaceCls is complete class, this allow arbitrary class swap
|
||||
module.__class__ = self.replaceCls
|
||||
|
||||
# initialize Caffe2Compatible
|
||||
if isinstance(module, Caffe2Compatible):
|
||||
module.tensor_mode = False
|
||||
|
||||
return module
|
||||
|
||||
|
||||
def patch(model, target, updater, *args, **kwargs):
|
||||
"""
|
||||
recursively (post-order) update all modules with the target type and its
|
||||
subclasses, make a initialization/composition/inheritance/... via the
|
||||
updater.create_from.
|
||||
"""
|
||||
for name, module in model.named_children():
|
||||
model._modules[name] = patch(module, target, updater, *args, **kwargs)
|
||||
if isinstance(model, target):
|
||||
return updater.create_from(model, *args, **kwargs)
|
||||
return model
|
||||
|
||||
|
||||
def patch_generalized_rcnn(model):
|
||||
ccc = Caffe2CompatibleConverter
|
||||
model = patch(model, rpn.RPN, ccc(Caffe2RPN))
|
||||
model = patch(model, poolers.ROIPooler, ccc(Caffe2ROIPooler))
|
||||
|
||||
return model
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def mock_fastrcnn_outputs_inference(
|
||||
tensor_mode, check=True, box_predictor_type=FastRCNNOutputLayers
|
||||
):
|
||||
with mock.patch.object(
|
||||
box_predictor_type,
|
||||
"inference",
|
||||
autospec=True,
|
||||
side_effect=Caffe2FastRCNNOutputsInference(tensor_mode),
|
||||
) as mocked_func:
|
||||
yield
|
||||
if check:
|
||||
assert mocked_func.call_count > 0
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def mock_mask_rcnn_inference(tensor_mode, patched_module, check=True):
|
||||
with mock.patch(
|
||||
"{}.mask_rcnn_inference".format(patched_module), side_effect=Caffe2MaskRCNNInference()
|
||||
) as mocked_func:
|
||||
yield
|
||||
if check:
|
||||
assert mocked_func.call_count > 0
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def mock_keypoint_rcnn_inference(tensor_mode, patched_module, use_heatmap_max_keypoint, check=True):
|
||||
with mock.patch(
|
||||
"{}.keypoint_rcnn_inference".format(patched_module),
|
||||
side_effect=Caffe2KeypointRCNNInference(use_heatmap_max_keypoint),
|
||||
) as mocked_func:
|
||||
yield
|
||||
if check:
|
||||
assert mocked_func.call_count > 0
|
||||
|
||||
|
||||
class ROIHeadsPatcher:
|
||||
def __init__(self, cfg, heads):
|
||||
self.heads = heads
|
||||
|
||||
self.use_heatmap_max_keypoint = cfg.EXPORT_CAFFE2.USE_HEATMAP_MAX_KEYPOINT
|
||||
|
||||
@contextlib.contextmanager
|
||||
def mock_roi_heads(self, tensor_mode=True):
|
||||
"""
|
||||
Patching several inference functions inside ROIHeads and its subclasses
|
||||
|
||||
Args:
|
||||
tensor_mode (bool): whether the inputs/outputs are caffe2's tensor
|
||||
format or not. Default to True.
|
||||
"""
|
||||
# NOTE: this requries the `keypoint_rcnn_inference` and `mask_rcnn_inference`
|
||||
# are called inside the same file as BaseXxxHead due to using mock.patch.
|
||||
kpt_heads_mod = keypoint_head.BaseKeypointRCNNHead.__module__
|
||||
mask_head_mod = mask_head.BaseMaskRCNNHead.__module__
|
||||
|
||||
mock_ctx_managers = [
|
||||
mock_fastrcnn_outputs_inference(
|
||||
tensor_mode=tensor_mode,
|
||||
check=True,
|
||||
box_predictor_type=type(self.heads.box_predictor),
|
||||
)
|
||||
]
|
||||
if getattr(self.heads, "keypoint_on", False):
|
||||
mock_ctx_managers += [
|
||||
mock_keypoint_rcnn_inference(
|
||||
tensor_mode, kpt_heads_mod, self.use_heatmap_max_keypoint
|
||||
)
|
||||
]
|
||||
if getattr(self.heads, "mask_on", False):
|
||||
mock_ctx_managers += [mock_mask_rcnn_inference(tensor_mode, mask_head_mod)]
|
||||
|
||||
with contextlib.ExitStack() as stack: # python 3.3+
|
||||
for mgr in mock_ctx_managers:
|
||||
stack.enter_context(mgr)
|
||||
yield
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,223 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
import importlib.util
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
from contextlib import contextmanager
|
||||
import torch
|
||||
|
||||
# need an explicit import due to https://github.com/pytorch/pytorch/issues/38964
|
||||
from detectron2.structures import Boxes, Instances # noqa F401
|
||||
|
||||
_counter = 0
|
||||
|
||||
|
||||
def export_torchscript_with_instances(model, fields):
|
||||
"""
|
||||
Run :func:`torch.jit.script` on a model that uses the :class:`Instances` class. Since
|
||||
attributes of :class:`Instances` are "dynamically" added in eager mode,it is difficult
|
||||
for torchscript to support it out of the box. This function is made to support scripting
|
||||
a model that uses :class:`Instances`. It does the following:
|
||||
|
||||
1. Create a scriptable ``new_Instances`` class which behaves similarly to ``Instances``,
|
||||
but with all attributes been "static".
|
||||
The attributes need to be statically declared in the ``fields`` argument.
|
||||
2. Register ``new_Instances`` to torchscript, and force torchscript to
|
||||
use it when trying to compile ``Instances``.
|
||||
|
||||
After this function, the process will be reverted. User should be able to script another model
|
||||
using different fields.
|
||||
|
||||
Example:
|
||||
Assume that ``Instances`` in the model consist of two attributes named
|
||||
``proposal_boxes`` and ``objectness_logits`` with type :class:`Boxes` and
|
||||
:class:`Tensor` respectively during inference. You can call this function like:
|
||||
|
||||
::
|
||||
fields = {"proposal_boxes": "Boxes", "objectness_logits": "Tensor"}
|
||||
torchscipt_model = export_torchscript_with_instances(model, fields)
|
||||
|
||||
Note:
|
||||
Currently we only support models in evaluation mode. Exporting models in training mode
|
||||
or running inference processes of torchscripts that are exported from models in training
|
||||
mode may encounter unexpected errors.
|
||||
|
||||
Args:
|
||||
model (nn.Module): The input model to be exported to torchscript.
|
||||
fields (Dict[str, str]): Attribute names and corresponding type annotations that
|
||||
``Instances`` will use in the model. Note that all attributes used in ``Instances``
|
||||
need to be added, regarldess of whether they are inputs/outputs of the model.
|
||||
Custom data type is not supported for now.
|
||||
|
||||
Returns:
|
||||
torch.jit.ScriptModule: the input model in torchscript format
|
||||
"""
|
||||
|
||||
assert (
|
||||
not model.training
|
||||
), "Currently we only support exporting models in evaluation mode to torchscript"
|
||||
|
||||
with patch_instances(fields):
|
||||
scripted_model = torch.jit.script(model)
|
||||
return scripted_model
|
||||
|
||||
|
||||
@contextmanager
|
||||
def patch_instances(fields):
|
||||
with tempfile.TemporaryDirectory(prefix="detectron2") as dir, tempfile.NamedTemporaryFile(
|
||||
mode="w", encoding="utf-8", suffix=".py", dir=dir, delete=False
|
||||
) as f:
|
||||
try:
|
||||
cls_name, s = _gen_module(fields)
|
||||
f.write(s)
|
||||
f.flush()
|
||||
f.close()
|
||||
|
||||
module = _import(f.name)
|
||||
new_instances = getattr(module, cls_name)
|
||||
_ = torch.jit.script(new_instances)
|
||||
|
||||
# let torchscript think Instances was scripted already
|
||||
Instances.__torch_script_class__ = True
|
||||
# let torchscript find new_instances when looking for the jit type of Instances
|
||||
Instances._jit_override_qualname = torch._jit_internal._qualified_name(new_instances)
|
||||
yield new_instances
|
||||
finally:
|
||||
try:
|
||||
del Instances.__torch_script_class__
|
||||
del Instances._jit_override_qualname
|
||||
except AttributeError:
|
||||
pass
|
||||
sys.modules.pop(module.__name__)
|
||||
|
||||
|
||||
# TODO: find a more automatic way to enable import of other classes
|
||||
def _gen_imports():
|
||||
imports_str = """
|
||||
from copy import deepcopy
|
||||
import torch
|
||||
from torch import Tensor
|
||||
import typing
|
||||
from typing import *
|
||||
|
||||
from detectron2.structures import Boxes, Instances
|
||||
|
||||
"""
|
||||
return imports_str
|
||||
|
||||
|
||||
def _gen_class(fields):
|
||||
def indent(level, s):
|
||||
return " " * 4 * level + s
|
||||
|
||||
lines = []
|
||||
|
||||
global _counter
|
||||
_counter += 1
|
||||
|
||||
cls_name = "Instances_patched{}".format(_counter)
|
||||
|
||||
lines.append(
|
||||
f"""
|
||||
class {cls_name}:
|
||||
def __init__(self, image_size: Tuple[int, int]):
|
||||
self.image_size = image_size
|
||||
"""
|
||||
)
|
||||
|
||||
for name, type_ in fields.items():
|
||||
lines.append(indent(2, f"self._{name} = torch.jit.annotate(Optional[{type_}], None)"))
|
||||
|
||||
for name, type_ in fields.items():
|
||||
lines.append(
|
||||
f"""
|
||||
@property
|
||||
def {name}(self) -> {type_}:
|
||||
# has to use a local for type refinement
|
||||
# https://pytorch.org/docs/stable/jit_language_reference.html#optional-type-refinement
|
||||
t = self._{name}
|
||||
assert t is not None
|
||||
return t
|
||||
|
||||
@{name}.setter
|
||||
def {name}(self, value: {type_}) -> None:
|
||||
self._{name} = value
|
||||
"""
|
||||
)
|
||||
|
||||
# support function attribute `__len__`
|
||||
lines.append(
|
||||
"""
|
||||
def __len__(self) -> int:
|
||||
"""
|
||||
)
|
||||
for name, _ in fields.items():
|
||||
lines.append(
|
||||
f"""
|
||||
t = self._{name}
|
||||
if t is not None:
|
||||
return len(t)
|
||||
"""
|
||||
)
|
||||
lines.append(
|
||||
"""
|
||||
raise NotImplementedError("Empty Instances does not support __len__!")
|
||||
"""
|
||||
)
|
||||
|
||||
# support function attribute `has`
|
||||
lines.append(
|
||||
"""
|
||||
def has(self, name: str) -> bool:
|
||||
"""
|
||||
)
|
||||
for name, _ in fields.items():
|
||||
lines.append(
|
||||
f"""
|
||||
if name == "{name}":
|
||||
return self._{name} is not None
|
||||
"""
|
||||
)
|
||||
lines.append(
|
||||
"""
|
||||
return False
|
||||
"""
|
||||
)
|
||||
|
||||
# support function attribute `from_instances`
|
||||
lines.append(
|
||||
f"""
|
||||
@torch.jit.unused
|
||||
@staticmethod
|
||||
def from_instances(instances: Instances) -> "{cls_name}":
|
||||
fields = instances.get_fields()
|
||||
image_size = instances.image_size
|
||||
new_instances = {cls_name}(image_size)
|
||||
for name, val in fields.items():
|
||||
assert hasattr(new_instances, '_{{}}'.format(name)), \\
|
||||
"No attribute named {{}} in {cls_name}".format(name)
|
||||
setattr(new_instances, name, deepcopy(val))
|
||||
return new_instances
|
||||
"""
|
||||
)
|
||||
return cls_name, os.linesep.join(lines)
|
||||
|
||||
|
||||
def _gen_module(fields):
|
||||
s = ""
|
||||
s += _gen_imports()
|
||||
cls_name, cls_def = _gen_class(fields)
|
||||
s += cls_def
|
||||
return cls_name, s
|
||||
|
||||
|
||||
def _import(path):
|
||||
# https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"{}{}".format(sys.modules[__name__].__name__, _counter), path
|
||||
)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
sys.modules[module.__name__] = module
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
|
@ -0,0 +1,9 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
"""
|
||||
Model Zoo API for Detectron2: a collection of functions to create common model architectures and
|
||||
optionally load pre-trained weights as released in
|
||||
`MODEL_ZOO.md <https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md>`_.
|
||||
"""
|
||||
from .model_zoo import get, get_config_file, get_checkpoint_url
|
||||
|
||||
__all__ = ["get_checkpoint_url", "get", "get_config_file"]
|
|
@ -0,0 +1,153 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import os
|
||||
import pkg_resources
|
||||
import torch
|
||||
|
||||
from detectron2.checkpoint import DetectionCheckpointer
|
||||
from detectron2.config import get_cfg
|
||||
from detectron2.modeling import build_model
|
||||
|
||||
|
||||
class _ModelZooUrls(object):
|
||||
"""
|
||||
Mapping from names to officially released Detectron2 pre-trained models.
|
||||
"""
|
||||
|
||||
S3_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/"
|
||||
|
||||
# format: {config_path.yaml} -> model_id/model_final_{commit}.pkl
|
||||
CONFIG_PATH_TO_URL_SUFFIX = {
|
||||
# COCO Detection with Faster R-CNN
|
||||
"COCO-Detection/faster_rcnn_R_50_C4_1x.yaml": "137257644/model_final_721ade.pkl",
|
||||
"COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml": "137847829/model_final_51d356.pkl",
|
||||
"COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml": "137257794/model_final_b275ba.pkl",
|
||||
"COCO-Detection/faster_rcnn_R_50_C4_3x.yaml": "137849393/model_final_f97cb7.pkl",
|
||||
"COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml": "137849425/model_final_68d202.pkl",
|
||||
"COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml": "137849458/model_final_280758.pkl",
|
||||
"COCO-Detection/faster_rcnn_R_101_C4_3x.yaml": "138204752/model_final_298dad.pkl",
|
||||
"COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml": "138204841/model_final_3e0943.pkl",
|
||||
"COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml": "137851257/model_final_f6e8b1.pkl",
|
||||
"COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml": "139173657/model_final_68b088.pkl",
|
||||
# COCO Detection with RetinaNet
|
||||
"COCO-Detection/retinanet_R_50_FPN_1x.yaml": "190397773/model_final_bfca0b.pkl",
|
||||
"COCO-Detection/retinanet_R_50_FPN_3x.yaml": "190397829/model_final_5bd44e.pkl",
|
||||
"COCO-Detection/retinanet_R_101_FPN_3x.yaml": "190397697/model_final_971ab9.pkl",
|
||||
# COCO Detection with RPN and Fast R-CNN
|
||||
"COCO-Detection/rpn_R_50_C4_1x.yaml": "137258005/model_final_450694.pkl",
|
||||
"COCO-Detection/rpn_R_50_FPN_1x.yaml": "137258492/model_final_02ce48.pkl",
|
||||
"COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml": "137635226/model_final_e5f7ce.pkl",
|
||||
# COCO Instance Segmentation Baselines with Mask R-CNN
|
||||
"COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml": "137259246/model_final_9243eb.pkl",
|
||||
"COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml": "137260150/model_final_4f86c3.pkl",
|
||||
"COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml": "137260431/model_final_a54504.pkl",
|
||||
"COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml": "137849525/model_final_4ce675.pkl",
|
||||
"COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml": "137849551/model_final_84107b.pkl",
|
||||
"COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml": "137849600/model_final_f10217.pkl",
|
||||
"COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml": "138363239/model_final_a2914c.pkl",
|
||||
"COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml": "138363294/model_final_0464b7.pkl",
|
||||
"COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml": "138205316/model_final_a3ec72.pkl",
|
||||
"COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml": "139653917/model_final_2d9806.pkl", # noqa
|
||||
# COCO Person Keypoint Detection Baselines with Keypoint R-CNN
|
||||
"COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml": "137261548/model_final_04e291.pkl",
|
||||
"COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml": "137849621/model_final_a6e10b.pkl",
|
||||
"COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml": "138363331/model_final_997cc7.pkl",
|
||||
"COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml": "139686956/model_final_5ad38f.pkl",
|
||||
# COCO Panoptic Segmentation Baselines with Panoptic FPN
|
||||
"COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml": "139514544/model_final_dbfeb4.pkl",
|
||||
"COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml": "139514569/model_final_c10459.pkl",
|
||||
"COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml": "139514519/model_final_cafdb1.pkl",
|
||||
# LVIS Instance Segmentation Baselines with Mask R-CNN
|
||||
"LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml": "144219072/model_final_571f7c.pkl",
|
||||
"LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml": "144219035/model_final_824ab5.pkl",
|
||||
"LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml": "144219108/model_final_5e3439.pkl", # noqa
|
||||
# Cityscapes & Pascal VOC Baselines
|
||||
"Cityscapes/mask_rcnn_R_50_FPN.yaml": "142423278/model_final_af9cf5.pkl",
|
||||
"PascalVOC-Detection/faster_rcnn_R_50_C4.yaml": "142202221/model_final_b1acc2.pkl",
|
||||
# Other Settings
|
||||
"Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml": "138602867/model_final_65c703.pkl",
|
||||
"Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml": "144998336/model_final_821d0b.pkl",
|
||||
"Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml": "138602847/model_final_e9d89b.pkl",
|
||||
"Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml": "144998488/model_final_480dd8.pkl",
|
||||
"Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml": "169527823/model_final_3b3c51.pkl",
|
||||
"Misc/mask_rcnn_R_50_FPN_3x_gn.yaml": "138602888/model_final_dc5d9e.pkl",
|
||||
"Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml": "138602908/model_final_01ca85.pkl",
|
||||
"Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml": "183808979/model_final_da7b4c.pkl",
|
||||
"Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml": "184226666/model_final_5ce33e.pkl",
|
||||
"Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml": "139797668/model_final_be35db.pkl",
|
||||
"Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml": "18131413/model_0039999_e76410.pkl", # noqa
|
||||
# D1 Comparisons
|
||||
"Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml": "137781054/model_final_7ab50c.pkl", # noqa
|
||||
"Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml": "137781281/model_final_62ca52.pkl", # noqa
|
||||
"Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml": "137781195/model_final_cce136.pkl",
|
||||
}
|
||||
|
||||
|
||||
def get_checkpoint_url(config_path):
|
||||
"""
|
||||
Returns the URL to the model trained using the given config
|
||||
|
||||
Args:
|
||||
config_path (str): config file name relative to detectron2's "configs/"
|
||||
directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"
|
||||
|
||||
Returns:
|
||||
str: a URL to the model
|
||||
"""
|
||||
name = config_path.replace(".yaml", "")
|
||||
if config_path in _ModelZooUrls.CONFIG_PATH_TO_URL_SUFFIX:
|
||||
suffix = _ModelZooUrls.CONFIG_PATH_TO_URL_SUFFIX[config_path]
|
||||
return _ModelZooUrls.S3_PREFIX + name + "/" + suffix
|
||||
raise RuntimeError("{} not available in Model Zoo!".format(name))
|
||||
|
||||
|
||||
def get_config_file(config_path):
|
||||
"""
|
||||
Returns path to a builtin config file.
|
||||
|
||||
Args:
|
||||
config_path (str): config file name relative to detectron2's "configs/"
|
||||
directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"
|
||||
|
||||
Returns:
|
||||
str: the real path to the config file.
|
||||
"""
|
||||
cfg_file = pkg_resources.resource_filename(
|
||||
"detectron2.model_zoo", os.path.join("configs", config_path)
|
||||
)
|
||||
if not os.path.exists(cfg_file):
|
||||
raise RuntimeError("{} not available in Model Zoo!".format(config_path))
|
||||
return cfg_file
|
||||
|
||||
|
||||
def get(config_path, trained: bool = False):
|
||||
"""
|
||||
Get a model specified by relative path under Detectron2's official ``configs/`` directory.
|
||||
|
||||
Args:
|
||||
config_path (str): config file name relative to detectron2's "configs/"
|
||||
directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"
|
||||
trained (bool): If True, will initialize the model with the trained model zoo weights.
|
||||
If False, the checkpoint specified in the config file's ``MODEL.WEIGHTS`` is used
|
||||
instead; this will typically (though not always) initialize a subset of weights using
|
||||
an ImageNet pre-trained model, while randomly initializing the other weights.
|
||||
|
||||
Returns:
|
||||
nn.Module: a detectron2 model
|
||||
|
||||
Example:
|
||||
::
|
||||
from detectron2 import model_zoo
|
||||
model = model_zoo.get("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml", trained=True)
|
||||
"""
|
||||
cfg_file = get_config_file(config_path)
|
||||
|
||||
cfg = get_cfg()
|
||||
cfg.merge_from_file(cfg_file)
|
||||
if trained:
|
||||
cfg.MODEL.WEIGHTS = get_checkpoint_url(config_path)
|
||||
if not torch.cuda.is_available():
|
||||
cfg.MODEL.DEVICE = "cpu"
|
||||
|
||||
model = build_model(cfg)
|
||||
DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)
|
||||
return model
|
|
@ -0,0 +1,2 @@
|
|||
|
||||
Projects live in the [`projects` directory](../../projects) under the root of this repository, but not here.
|
|
@ -0,0 +1,31 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
import importlib
|
||||
from pathlib import Path
|
||||
|
||||
_PROJECTS = {
|
||||
"point_rend": "PointRend",
|
||||
"deeplab": "DeepLab",
|
||||
"panoptic_deeplab": "Panoptic-DeepLab",
|
||||
}
|
||||
_PROJECT_ROOT = Path(__file__).parent.parent.parent / "projects"
|
||||
|
||||
if _PROJECT_ROOT.is_dir():
|
||||
# This is true only for in-place installation (pip install -e, setup.py develop),
|
||||
# where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230
|
||||
|
||||
class _D2ProjectsFinder(importlib.abc.MetaPathFinder):
|
||||
def find_spec(self, name, path, target=None):
|
||||
if not name.startswith("detectron2.projects."):
|
||||
return
|
||||
project_name = name.split(".")[-1]
|
||||
project_dir = _PROJECTS.get(project_name)
|
||||
if not project_dir:
|
||||
return
|
||||
target_file = _PROJECT_ROOT / f"{project_dir}/{project_name}/__init__.py"
|
||||
if not target_file.is_file():
|
||||
return
|
||||
return importlib.util.spec_from_file_location(name, target_file)
|
||||
|
||||
import sys
|
||||
|
||||
sys.meta_path.append(_D2ProjectsFinder())
|
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
from .build import build_lr_scheduler, build_optimizer
|
||||
from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR
|
||||
|
||||
__all__ = [k for k in globals().keys() if not k.startswith("_")]
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,165 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
from enum import Enum
|
||||
from typing import Any, Callable, Dict, Iterable, List, Set, Type, Union
|
||||
import torch
|
||||
|
||||
from detectron2.config import CfgNode
|
||||
|
||||
from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR
|
||||
|
||||
_GradientClipperInput = Union[torch.Tensor, Iterable[torch.Tensor]]
|
||||
_GradientClipper = Callable[[_GradientClipperInput], None]
|
||||
|
||||
|
||||
class GradientClipType(Enum):
|
||||
VALUE = "value"
|
||||
NORM = "norm"
|
||||
|
||||
|
||||
def _create_gradient_clipper(cfg: CfgNode) -> _GradientClipper:
|
||||
"""
|
||||
Creates gradient clipping closure to clip by value or by norm,
|
||||
according to the provided config.
|
||||
"""
|
||||
cfg = cfg.clone()
|
||||
|
||||
def clip_grad_norm(p: _GradientClipperInput):
|
||||
torch.nn.utils.clip_grad_norm_(p, cfg.CLIP_VALUE, cfg.NORM_TYPE)
|
||||
|
||||
def clip_grad_value(p: _GradientClipperInput):
|
||||
torch.nn.utils.clip_grad_value_(p, cfg.CLIP_VALUE)
|
||||
|
||||
_GRADIENT_CLIP_TYPE_TO_CLIPPER = {
|
||||
GradientClipType.VALUE: clip_grad_value,
|
||||
GradientClipType.NORM: clip_grad_norm,
|
||||
}
|
||||
return _GRADIENT_CLIP_TYPE_TO_CLIPPER[GradientClipType(cfg.CLIP_TYPE)]
|
||||
|
||||
|
||||
def _generate_optimizer_class_with_gradient_clipping(
|
||||
optimizer_type: Type[torch.optim.Optimizer], gradient_clipper: _GradientClipper
|
||||
) -> Type[torch.optim.Optimizer]:
|
||||
"""
|
||||
Dynamically creates a new type that inherits the type of a given instance
|
||||
and overrides the `step` method to add gradient clipping
|
||||
"""
|
||||
|
||||
def optimizer_wgc_step(self, closure=None):
|
||||
for group in self.param_groups:
|
||||
for p in group["params"]:
|
||||
gradient_clipper(p)
|
||||
super(type(self), self).step(closure)
|
||||
|
||||
OptimizerWithGradientClip = type(
|
||||
optimizer_type.__name__ + "WithGradientClip",
|
||||
(optimizer_type,),
|
||||
{"step": optimizer_wgc_step},
|
||||
)
|
||||
return OptimizerWithGradientClip
|
||||
|
||||
|
||||
def maybe_add_gradient_clipping(
|
||||
cfg: CfgNode, optimizer: torch.optim.Optimizer
|
||||
) -> torch.optim.Optimizer:
|
||||
"""
|
||||
If gradient clipping is enabled through config options, wraps the existing
|
||||
optimizer instance of some type OptimizerType to become an instance
|
||||
of the new dynamically created class OptimizerTypeWithGradientClip
|
||||
that inherits OptimizerType and overrides the `step` method to
|
||||
include gradient clipping.
|
||||
|
||||
Args:
|
||||
cfg: CfgNode
|
||||
configuration options
|
||||
optimizer: torch.optim.Optimizer
|
||||
existing optimizer instance
|
||||
|
||||
Return:
|
||||
optimizer: torch.optim.Optimizer
|
||||
either the unmodified optimizer instance (if gradient clipping is
|
||||
disabled), or the same instance with adjusted __class__ to override
|
||||
the `step` method and include gradient clipping
|
||||
"""
|
||||
if not cfg.SOLVER.CLIP_GRADIENTS.ENABLED:
|
||||
return optimizer
|
||||
grad_clipper = _create_gradient_clipper(cfg.SOLVER.CLIP_GRADIENTS)
|
||||
OptimizerWithGradientClip = _generate_optimizer_class_with_gradient_clipping(
|
||||
type(optimizer), grad_clipper
|
||||
)
|
||||
optimizer.__class__ = OptimizerWithGradientClip
|
||||
return optimizer
|
||||
|
||||
|
||||
def build_optimizer(cfg: CfgNode, model: torch.nn.Module) -> torch.optim.Optimizer:
|
||||
"""
|
||||
Build an optimizer from config.
|
||||
"""
|
||||
norm_module_types = (
|
||||
torch.nn.BatchNorm1d,
|
||||
torch.nn.BatchNorm2d,
|
||||
torch.nn.BatchNorm3d,
|
||||
torch.nn.SyncBatchNorm,
|
||||
# NaiveSyncBatchNorm inherits from BatchNorm2d
|
||||
torch.nn.GroupNorm,
|
||||
torch.nn.InstanceNorm1d,
|
||||
torch.nn.InstanceNorm2d,
|
||||
torch.nn.InstanceNorm3d,
|
||||
torch.nn.LayerNorm,
|
||||
torch.nn.LocalResponseNorm,
|
||||
)
|
||||
params: List[Dict[str, Any]] = []
|
||||
memo: Set[torch.nn.parameter.Parameter] = set()
|
||||
for module in model.modules():
|
||||
for key, value in module.named_parameters(recurse=False):
|
||||
if not value.requires_grad:
|
||||
continue
|
||||
# Avoid duplicating parameters
|
||||
if value in memo:
|
||||
continue
|
||||
memo.add(value)
|
||||
lr = cfg.SOLVER.BASE_LR
|
||||
weight_decay = cfg.SOLVER.WEIGHT_DECAY
|
||||
if isinstance(module, norm_module_types):
|
||||
weight_decay = cfg.SOLVER.WEIGHT_DECAY_NORM
|
||||
elif key == "bias":
|
||||
# NOTE: unlike Detectron v1, we now default BIAS_LR_FACTOR to 1.0
|
||||
# and WEIGHT_DECAY_BIAS to WEIGHT_DECAY so that bias optimizer
|
||||
# hyperparameters are by default exactly the same as for regular
|
||||
# weights.
|
||||
lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR
|
||||
weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS
|
||||
params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}]
|
||||
|
||||
optimizer = torch.optim.SGD(
|
||||
params, cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.MOMENTUM, nesterov=cfg.SOLVER.NESTEROV
|
||||
)
|
||||
optimizer = maybe_add_gradient_clipping(cfg, optimizer)
|
||||
return optimizer
|
||||
|
||||
|
||||
def build_lr_scheduler(
|
||||
cfg: CfgNode, optimizer: torch.optim.Optimizer
|
||||
) -> torch.optim.lr_scheduler._LRScheduler:
|
||||
"""
|
||||
Build a LR scheduler from config.
|
||||
"""
|
||||
name = cfg.SOLVER.LR_SCHEDULER_NAME
|
||||
if name == "WarmupMultiStepLR":
|
||||
return WarmupMultiStepLR(
|
||||
optimizer,
|
||||
cfg.SOLVER.STEPS,
|
||||
cfg.SOLVER.GAMMA,
|
||||
warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
|
||||
warmup_iters=cfg.SOLVER.WARMUP_ITERS,
|
||||
warmup_method=cfg.SOLVER.WARMUP_METHOD,
|
||||
)
|
||||
elif name == "WarmupCosineLR":
|
||||
return WarmupCosineLR(
|
||||
optimizer,
|
||||
cfg.SOLVER.MAX_ITER,
|
||||
warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
|
||||
warmup_iters=cfg.SOLVER.WARMUP_ITERS,
|
||||
warmup_method=cfg.SOLVER.WARMUP_METHOD,
|
||||
)
|
||||
else:
|
||||
raise ValueError("Unknown LR scheduler: {}".format(name))
|
|
@ -0,0 +1,116 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import math
|
||||
from bisect import bisect_right
|
||||
from typing import List
|
||||
import torch
|
||||
|
||||
# NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes
|
||||
# only on epoch boundaries. We typically use iteration based schedules instead.
|
||||
# As a result, "epoch" (e.g., as in self.last_epoch) should be understood to mean
|
||||
# "iteration" instead.
|
||||
|
||||
# FIXME: ideally this would be achieved with a CombinedLRScheduler, separating
|
||||
# MultiStepLR with WarmupLR but the current LRScheduler design doesn't allow it.
|
||||
|
||||
|
||||
class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
|
||||
def __init__(
|
||||
self,
|
||||
optimizer: torch.optim.Optimizer,
|
||||
milestones: List[int],
|
||||
gamma: float = 0.1,
|
||||
warmup_factor: float = 0.001,
|
||||
warmup_iters: int = 1000,
|
||||
warmup_method: str = "linear",
|
||||
last_epoch: int = -1,
|
||||
):
|
||||
if not list(milestones) == sorted(milestones):
|
||||
raise ValueError(
|
||||
"Milestones should be a list of" " increasing integers. Got {}", milestones
|
||||
)
|
||||
self.milestones = milestones
|
||||
self.gamma = gamma
|
||||
self.warmup_factor = warmup_factor
|
||||
self.warmup_iters = warmup_iters
|
||||
self.warmup_method = warmup_method
|
||||
super().__init__(optimizer, last_epoch)
|
||||
|
||||
def get_lr(self) -> List[float]:
|
||||
warmup_factor = _get_warmup_factor_at_iter(
|
||||
self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
|
||||
)
|
||||
return [
|
||||
base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch)
|
||||
for base_lr in self.base_lrs
|
||||
]
|
||||
|
||||
def _compute_values(self) -> List[float]:
|
||||
# The new interface
|
||||
return self.get_lr()
|
||||
|
||||
|
||||
class WarmupCosineLR(torch.optim.lr_scheduler._LRScheduler):
|
||||
def __init__(
|
||||
self,
|
||||
optimizer: torch.optim.Optimizer,
|
||||
max_iters: int,
|
||||
warmup_factor: float = 0.001,
|
||||
warmup_iters: int = 1000,
|
||||
warmup_method: str = "linear",
|
||||
last_epoch: int = -1,
|
||||
):
|
||||
self.max_iters = max_iters
|
||||
self.warmup_factor = warmup_factor
|
||||
self.warmup_iters = warmup_iters
|
||||
self.warmup_method = warmup_method
|
||||
super().__init__(optimizer, last_epoch)
|
||||
|
||||
def get_lr(self) -> List[float]:
|
||||
warmup_factor = _get_warmup_factor_at_iter(
|
||||
self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor
|
||||
)
|
||||
# Different definitions of half-cosine with warmup are possible. For
|
||||
# simplicity we multiply the standard half-cosine schedule by the warmup
|
||||
# factor. An alternative is to start the period of the cosine at warmup_iters
|
||||
# instead of at 0. In the case that warmup_iters << max_iters the two are
|
||||
# very close to each other.
|
||||
return [
|
||||
base_lr
|
||||
* warmup_factor
|
||||
* 0.5
|
||||
* (1.0 + math.cos(math.pi * self.last_epoch / self.max_iters))
|
||||
for base_lr in self.base_lrs
|
||||
]
|
||||
|
||||
def _compute_values(self) -> List[float]:
|
||||
# The new interface
|
||||
return self.get_lr()
|
||||
|
||||
|
||||
def _get_warmup_factor_at_iter(
|
||||
method: str, iter: int, warmup_iters: int, warmup_factor: float
|
||||
) -> float:
|
||||
"""
|
||||
Return the learning rate warmup factor at a specific iteration.
|
||||
See :paper:`ImageNet in 1h` for more details.
|
||||
|
||||
Args:
|
||||
method (str): warmup method; either "constant" or "linear".
|
||||
iter (int): iteration at which to calculate the warmup factor.
|
||||
warmup_iters (int): the number of warmup iterations.
|
||||
warmup_factor (float): the base warmup factor (the meaning changes according
|
||||
to the method used).
|
||||
|
||||
Returns:
|
||||
float: the effective warmup factor at the given iteration.
|
||||
"""
|
||||
if iter >= warmup_iters:
|
||||
return 1.0
|
||||
|
||||
if method == "constant":
|
||||
return warmup_factor
|
||||
elif method == "linear":
|
||||
alpha = iter / warmup_iters
|
||||
return warmup_factor * (1 - alpha) + alpha
|
||||
else:
|
||||
raise ValueError("Unknown warmup method: {}".format(method))
|
|
@ -0,0 +1,11 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
from .boxes import Boxes, BoxMode, pairwise_iou, pairwise_ioa
|
||||
from .image_list import ImageList
|
||||
|
||||
from .instances import Instances
|
||||
from .keypoints import Keypoints, heatmaps_to_keypoints
|
||||
from .masks import BitMasks, PolygonMasks, rasterize_polygons_within_box, polygons_to_bitmask
|
||||
from .rotated_boxes import RotatedBoxes
|
||||
from .rotated_boxes import pairwise_iou as pairwise_iou_rotated
|
||||
|
||||
__all__ = [k for k in globals().keys() if not k.startswith("_")]
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,408 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import math
|
||||
import numpy as np
|
||||
from enum import IntEnum, unique
|
||||
from typing import Any, List, Tuple, Union
|
||||
import torch
|
||||
from torch import device
|
||||
|
||||
_RawBoxType = Union[List[float], Tuple[float, ...], torch.Tensor, np.ndarray]
|
||||
|
||||
|
||||
@unique
|
||||
class BoxMode(IntEnum):
|
||||
"""
|
||||
Enum of different ways to represent a box.
|
||||
"""
|
||||
|
||||
XYXY_ABS = 0
|
||||
"""
|
||||
(x0, y0, x1, y1) in absolute floating points coordinates.
|
||||
The coordinates in range [0, width or height].
|
||||
"""
|
||||
XYWH_ABS = 1
|
||||
"""
|
||||
(x0, y0, w, h) in absolute floating points coordinates.
|
||||
"""
|
||||
XYXY_REL = 2
|
||||
"""
|
||||
Not yet supported!
|
||||
(x0, y0, x1, y1) in range [0, 1]. They are relative to the size of the image.
|
||||
"""
|
||||
XYWH_REL = 3
|
||||
"""
|
||||
Not yet supported!
|
||||
(x0, y0, w, h) in range [0, 1]. They are relative to the size of the image.
|
||||
"""
|
||||
XYWHA_ABS = 4
|
||||
"""
|
||||
(xc, yc, w, h, a) in absolute floating points coordinates.
|
||||
(xc, yc) is the center of the rotated box, and the angle a is in degrees ccw.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def convert(box: _RawBoxType, from_mode: "BoxMode", to_mode: "BoxMode") -> _RawBoxType:
|
||||
"""
|
||||
Args:
|
||||
box: can be a k-tuple, k-list or an Nxk array/tensor, where k = 4 or 5
|
||||
from_mode, to_mode (BoxMode)
|
||||
|
||||
Returns:
|
||||
The converted box of the same type.
|
||||
"""
|
||||
if from_mode == to_mode:
|
||||
return box
|
||||
|
||||
original_type = type(box)
|
||||
is_numpy = isinstance(box, np.ndarray)
|
||||
single_box = isinstance(box, (list, tuple))
|
||||
if single_box:
|
||||
assert len(box) == 4 or len(box) == 5, (
|
||||
"BoxMode.convert takes either a k-tuple/list or an Nxk array/tensor,"
|
||||
" where k == 4 or 5"
|
||||
)
|
||||
arr = torch.tensor(box)[None, :]
|
||||
else:
|
||||
# avoid modifying the input box
|
||||
if is_numpy:
|
||||
arr = torch.from_numpy(np.asarray(box)).clone()
|
||||
else:
|
||||
arr = box.clone()
|
||||
|
||||
assert to_mode.value not in [
|
||||
BoxMode.XYXY_REL,
|
||||
BoxMode.XYWH_REL,
|
||||
] and from_mode.value not in [
|
||||
BoxMode.XYXY_REL,
|
||||
BoxMode.XYWH_REL,
|
||||
], "Relative mode not yet supported!"
|
||||
|
||||
if from_mode == BoxMode.XYWHA_ABS and to_mode == BoxMode.XYXY_ABS:
|
||||
assert (
|
||||
arr.shape[-1] == 5
|
||||
), "The last dimension of input shape must be 5 for XYWHA format"
|
||||
original_dtype = arr.dtype
|
||||
arr = arr.double()
|
||||
|
||||
w = arr[:, 2]
|
||||
h = arr[:, 3]
|
||||
a = arr[:, 4]
|
||||
c = torch.abs(torch.cos(a * math.pi / 180.0))
|
||||
s = torch.abs(torch.sin(a * math.pi / 180.0))
|
||||
# This basically computes the horizontal bounding rectangle of the rotated box
|
||||
new_w = c * w + s * h
|
||||
new_h = c * h + s * w
|
||||
|
||||
# convert center to top-left corner
|
||||
arr[:, 0] -= new_w / 2.0
|
||||
arr[:, 1] -= new_h / 2.0
|
||||
# bottom-right corner
|
||||
arr[:, 2] = arr[:, 0] + new_w
|
||||
arr[:, 3] = arr[:, 1] + new_h
|
||||
|
||||
arr = arr[:, :4].to(dtype=original_dtype)
|
||||
elif from_mode == BoxMode.XYWH_ABS and to_mode == BoxMode.XYWHA_ABS:
|
||||
original_dtype = arr.dtype
|
||||
arr = arr.double()
|
||||
arr[:, 0] += arr[:, 2] / 2.0
|
||||
arr[:, 1] += arr[:, 3] / 2.0
|
||||
angles = torch.zeros((arr.shape[0], 1), dtype=arr.dtype)
|
||||
arr = torch.cat((arr, angles), axis=1).to(dtype=original_dtype)
|
||||
else:
|
||||
if to_mode == BoxMode.XYXY_ABS and from_mode == BoxMode.XYWH_ABS:
|
||||
arr[:, 2] += arr[:, 0]
|
||||
arr[:, 3] += arr[:, 1]
|
||||
elif from_mode == BoxMode.XYXY_ABS and to_mode == BoxMode.XYWH_ABS:
|
||||
arr[:, 2] -= arr[:, 0]
|
||||
arr[:, 3] -= arr[:, 1]
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
"Conversion from BoxMode {} to {} is not supported yet".format(
|
||||
from_mode, to_mode
|
||||
)
|
||||
)
|
||||
|
||||
if single_box:
|
||||
return original_type(arr.flatten().tolist())
|
||||
if is_numpy:
|
||||
return arr.numpy()
|
||||
else:
|
||||
return arr
|
||||
|
||||
|
||||
class Boxes:
|
||||
"""
|
||||
This structure stores a list of boxes as a Nx4 torch.Tensor.
|
||||
It supports some common methods about boxes
|
||||
(`area`, `clip`, `nonempty`, etc),
|
||||
and also behaves like a Tensor
|
||||
(support indexing, `to(device)`, `.device`, and iteration over all boxes)
|
||||
|
||||
Attributes:
|
||||
tensor (torch.Tensor): float matrix of Nx4. Each row is (x1, y1, x2, y2).
|
||||
"""
|
||||
|
||||
def __init__(self, tensor: torch.Tensor):
|
||||
"""
|
||||
Args:
|
||||
tensor (Tensor[float]): a Nx4 matrix. Each row is (x1, y1, x2, y2).
|
||||
"""
|
||||
device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu")
|
||||
tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
|
||||
if tensor.numel() == 0:
|
||||
# Use reshape, so we don't end up creating a new tensor that does not depend on
|
||||
# the inputs (and consequently confuses jit)
|
||||
tensor = tensor.reshape((0, 4)).to(dtype=torch.float32, device=device)
|
||||
assert tensor.dim() == 2 and tensor.size(-1) == 4, tensor.size()
|
||||
|
||||
self.tensor = tensor
|
||||
|
||||
def clone(self) -> "Boxes":
|
||||
"""
|
||||
Clone the Boxes.
|
||||
|
||||
Returns:
|
||||
Boxes
|
||||
"""
|
||||
return Boxes(self.tensor.clone())
|
||||
|
||||
@torch.jit.unused
|
||||
def to(self, *args: Any, **kwargs: Any):
|
||||
return Boxes(self.tensor.to(*args, **kwargs))
|
||||
|
||||
def area(self) -> torch.Tensor:
|
||||
"""
|
||||
Computes the area of all the boxes.
|
||||
|
||||
Returns:
|
||||
torch.Tensor: a vector with areas of each box.
|
||||
"""
|
||||
box = self.tensor
|
||||
area = (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1])
|
||||
return area
|
||||
|
||||
def clip(self, box_size: Tuple[int, int]) -> None:
|
||||
"""
|
||||
Clip (in place) the boxes by limiting x coordinates to the range [0, width]
|
||||
and y coordinates to the range [0, height].
|
||||
|
||||
Args:
|
||||
box_size (height, width): The clipping box's size.
|
||||
"""
|
||||
assert torch.isfinite(self.tensor).all(), "Box tensor contains infinite or NaN!"
|
||||
h, w = box_size
|
||||
self.tensor[:, 0].clamp_(min=0, max=w)
|
||||
self.tensor[:, 1].clamp_(min=0, max=h)
|
||||
self.tensor[:, 2].clamp_(min=0, max=w)
|
||||
self.tensor[:, 3].clamp_(min=0, max=h)
|
||||
|
||||
def nonempty(self, threshold: float = 0.0) -> torch.Tensor:
|
||||
"""
|
||||
Find boxes that are non-empty.
|
||||
A box is considered empty, if either of its side is no larger than threshold.
|
||||
|
||||
Returns:
|
||||
Tensor:
|
||||
a binary vector which represents whether each box is empty
|
||||
(False) or non-empty (True).
|
||||
"""
|
||||
box = self.tensor
|
||||
widths = box[:, 2] - box[:, 0]
|
||||
heights = box[:, 3] - box[:, 1]
|
||||
keep = (widths > threshold) & (heights > threshold)
|
||||
return keep
|
||||
|
||||
def __getitem__(self, item):
|
||||
"""
|
||||
Args:
|
||||
item: int, slice, or a BoolTensor
|
||||
|
||||
Returns:
|
||||
Boxes: Create a new :class:`Boxes` by indexing.
|
||||
|
||||
The following usage are allowed:
|
||||
|
||||
1. `new_boxes = boxes[3]`: return a `Boxes` which contains only one box.
|
||||
2. `new_boxes = boxes[2:10]`: return a slice of boxes.
|
||||
3. `new_boxes = boxes[vector]`, where vector is a torch.BoolTensor
|
||||
with `length = len(boxes)`. Nonzero elements in the vector will be selected.
|
||||
|
||||
Note that the returned Boxes might share storage with this Boxes,
|
||||
subject to Pytorch's indexing semantics.
|
||||
"""
|
||||
if isinstance(item, int):
|
||||
return Boxes(self.tensor[item].view(1, -1))
|
||||
b = self.tensor[item]
|
||||
assert b.dim() == 2, "Indexing on Boxes with {} failed to return a matrix!".format(item)
|
||||
return Boxes(b)
|
||||
|
||||
def __len__(self) -> int:
|
||||
return self.tensor.shape[0]
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return "Boxes(" + str(self.tensor) + ")"
|
||||
|
||||
def inside_box(self, box_size: Tuple[int, int], boundary_threshold: int = 0) -> torch.Tensor:
|
||||
"""
|
||||
Args:
|
||||
box_size (height, width): Size of the reference box.
|
||||
boundary_threshold (int): Boxes that extend beyond the reference box
|
||||
boundary by more than boundary_threshold are considered "outside".
|
||||
|
||||
Returns:
|
||||
a binary vector, indicating whether each box is inside the reference box.
|
||||
"""
|
||||
height, width = box_size
|
||||
inds_inside = (
|
||||
(self.tensor[..., 0] >= -boundary_threshold)
|
||||
& (self.tensor[..., 1] >= -boundary_threshold)
|
||||
& (self.tensor[..., 2] < width + boundary_threshold)
|
||||
& (self.tensor[..., 3] < height + boundary_threshold)
|
||||
)
|
||||
return inds_inside
|
||||
|
||||
def get_centers(self) -> torch.Tensor:
|
||||
"""
|
||||
Returns:
|
||||
The box centers in a Nx2 array of (x, y).
|
||||
"""
|
||||
return (self.tensor[:, :2] + self.tensor[:, 2:]) / 2
|
||||
|
||||
def scale(self, scale_x: float, scale_y: float) -> None:
|
||||
"""
|
||||
Scale the box with horizontal and vertical scaling factors
|
||||
"""
|
||||
self.tensor[:, 0::2] *= scale_x
|
||||
self.tensor[:, 1::2] *= scale_y
|
||||
|
||||
# classmethod not supported by torchscript. TODO try staticmethod
|
||||
@classmethod
|
||||
@torch.jit.unused
|
||||
def cat(cls, boxes_list):
|
||||
"""
|
||||
Concatenates a list of Boxes into a single Boxes
|
||||
|
||||
Arguments:
|
||||
boxes_list (list[Boxes])
|
||||
|
||||
Returns:
|
||||
Boxes: the concatenated Boxes
|
||||
"""
|
||||
assert isinstance(boxes_list, (list, tuple))
|
||||
if len(boxes_list) == 0:
|
||||
return cls(torch.empty(0))
|
||||
assert all([isinstance(box, Boxes) for box in boxes_list])
|
||||
|
||||
# use torch.cat (v.s. layers.cat) so the returned boxes never share storage with input
|
||||
cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0))
|
||||
return cat_boxes
|
||||
|
||||
@property
|
||||
def device(self) -> device:
|
||||
return self.tensor.device
|
||||
|
||||
# type "Iterator[torch.Tensor]", yield, and iter() not supported by torchscript
|
||||
# https://github.com/pytorch/pytorch/issues/18627
|
||||
@torch.jit.unused
|
||||
def __iter__(self):
|
||||
"""
|
||||
Yield a box as a Tensor of shape (4,) at a time.
|
||||
"""
|
||||
yield from self.tensor
|
||||
|
||||
|
||||
def pairwise_intersection(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
|
||||
"""
|
||||
Given two lists of boxes of size N and M,
|
||||
compute the intersection area between __all__ N x M pairs of boxes.
|
||||
The box order must be (xmin, ymin, xmax, ymax)
|
||||
|
||||
Args:
|
||||
boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
|
||||
|
||||
Returns:
|
||||
Tensor: intersection, sized [N,M].
|
||||
"""
|
||||
boxes1, boxes2 = boxes1.tensor, boxes2.tensor
|
||||
width_height = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) - torch.max(
|
||||
boxes1[:, None, :2], boxes2[:, :2]
|
||||
) # [N,M,2]
|
||||
|
||||
width_height.clamp_(min=0) # [N,M,2]
|
||||
intersection = width_height.prod(dim=2) # [N,M]
|
||||
return intersection
|
||||
|
||||
|
||||
# implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
|
||||
# with slight modifications
|
||||
def pairwise_iou(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
|
||||
"""
|
||||
Given two lists of boxes of size N and M,
|
||||
compute the IoU (intersection over union)
|
||||
between __all__ N x M pairs of boxes.
|
||||
The box order must be (xmin, ymin, xmax, ymax).
|
||||
Args:
|
||||
boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
|
||||
|
||||
Returns:
|
||||
Tensor: IoU, sized [N,M].
|
||||
"""
|
||||
area1 = boxes1.area() # [N]
|
||||
area2 = boxes2.area() # [M]
|
||||
inter = pairwise_intersection(boxes1, boxes2)
|
||||
|
||||
# handle empty boxes
|
||||
iou = torch.where(
|
||||
inter > 0,
|
||||
inter / (area1[:, None] + area2 - inter),
|
||||
torch.zeros(1, dtype=inter.dtype, device=inter.device),
|
||||
)
|
||||
return iou
|
||||
|
||||
|
||||
def pairwise_ioa(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
|
||||
"""
|
||||
Similar to pariwise_iou but compute the IoA (intersection over boxes2 area).
|
||||
|
||||
Args:
|
||||
boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
|
||||
|
||||
Returns:
|
||||
Tensor: IoA, sized [N,M].
|
||||
"""
|
||||
area2 = boxes2.area() # [M]
|
||||
inter = pairwise_intersection(boxes1, boxes2)
|
||||
|
||||
# handle empty boxes
|
||||
ioa = torch.where(
|
||||
inter > 0, inter / area2, torch.zeros(1, dtype=inter.dtype, device=inter.device)
|
||||
)
|
||||
return ioa
|
||||
|
||||
|
||||
def matched_boxlist_iou(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
|
||||
"""
|
||||
Compute pairwise intersection over union (IOU) of two sets of matched
|
||||
boxes. The box order must be (xmin, ymin, xmax, ymax).
|
||||
Similar to boxlist_iou, but computes only diagonal elements of the matrix
|
||||
|
||||
Args:
|
||||
boxes1: (Boxes) bounding boxes, sized [N,4].
|
||||
boxes2: (Boxes) bounding boxes, sized [N,4].
|
||||
Returns:
|
||||
Tensor: iou, sized [N].
|
||||
"""
|
||||
assert len(boxes1) == len(
|
||||
boxes2
|
||||
), "boxlists should have the same" "number of entries, got {}, {}".format(
|
||||
len(boxes1), len(boxes2)
|
||||
)
|
||||
area1 = boxes1.area() # [N]
|
||||
area2 = boxes2.area() # [N]
|
||||
box1, box2 = boxes1.tensor, boxes2.tensor
|
||||
lt = torch.max(box1[:, :2], box2[:, :2]) # [N,2]
|
||||
rb = torch.min(box1[:, 2:], box2[:, 2:]) # [N,2]
|
||||
wh = (rb - lt).clamp(min=0) # [N,2]
|
||||
inter = wh[:, 0] * wh[:, 1] # [N]
|
||||
iou = inter / (area1 + area2 - inter) # [N]
|
||||
return iou
|
|
@ -0,0 +1,120 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
from __future__ import division
|
||||
from typing import Any, List, Sequence, Tuple
|
||||
import torch
|
||||
from torch import device
|
||||
from torch.nn import functional as F
|
||||
|
||||
|
||||
class ImageList(object):
|
||||
"""
|
||||
Structure that holds a list of images (of possibly
|
||||
varying sizes) as a single tensor.
|
||||
This works by padding the images to the same size,
|
||||
and storing in a field the original sizes of each image
|
||||
|
||||
Attributes:
|
||||
image_sizes (list[tuple[int, int]]): each tuple is (h, w)
|
||||
"""
|
||||
|
||||
def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]):
|
||||
"""
|
||||
Arguments:
|
||||
tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1
|
||||
image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can
|
||||
be smaller than (H, W) due to padding.
|
||||
"""
|
||||
self.tensor = tensor
|
||||
self.image_sizes = image_sizes
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.image_sizes)
|
||||
|
||||
def __getitem__(self, idx) -> torch.Tensor:
|
||||
"""
|
||||
Access the individual image in its original size.
|
||||
|
||||
Args:
|
||||
idx: int or slice
|
||||
|
||||
Returns:
|
||||
Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1
|
||||
"""
|
||||
size = self.image_sizes[idx]
|
||||
return self.tensor[idx, ..., : size[0], : size[1]]
|
||||
|
||||
@torch.jit.unused
|
||||
def to(self, *args: Any, **kwargs: Any) -> "ImageList":
|
||||
cast_tensor = self.tensor.to(*args, **kwargs)
|
||||
return ImageList(cast_tensor, self.image_sizes)
|
||||
|
||||
@property
|
||||
def device(self) -> device:
|
||||
return self.tensor.device
|
||||
|
||||
@staticmethod
|
||||
# https://github.com/pytorch/pytorch/issues/39308
|
||||
@torch.jit.unused
|
||||
def from_tensors(
|
||||
tensors: Sequence[torch.Tensor], size_divisibility: int = 0, pad_value: float = 0.0
|
||||
) -> "ImageList":
|
||||
"""
|
||||
Args:
|
||||
tensors: a tuple or list of `torch.Tensors`, each of shape (Hi, Wi) or
|
||||
(C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded
|
||||
to the same shape with `pad_value`.
|
||||
size_divisibility (int): If `size_divisibility > 0`, add padding to ensure
|
||||
the common height and width is divisible by `size_divisibility`.
|
||||
This depends on the model and many models need a divisibility of 32.
|
||||
pad_value (float): value to pad
|
||||
|
||||
Returns:
|
||||
an `ImageList`.
|
||||
"""
|
||||
assert len(tensors) > 0
|
||||
assert isinstance(tensors, (tuple, list))
|
||||
for t in tensors:
|
||||
assert isinstance(t, torch.Tensor), type(t)
|
||||
assert t.shape[1:-2] == tensors[0].shape[1:-2], t.shape
|
||||
# per dimension maximum (H, W) or (C_1, ..., C_K, H, W) where K >= 1 among all tensors
|
||||
max_size = (
|
||||
# In tracing mode, x.shape[i] is Tensor, and should not be converted
|
||||
# to int: this will cause the traced graph to have hard-coded shapes.
|
||||
# Instead we should make max_size a Tensor that depends on these tensors.
|
||||
# Using torch.stack twice seems to be the best way to convert
|
||||
# list[list[ScalarTensor]] to a Tensor
|
||||
torch.stack(
|
||||
[
|
||||
torch.stack([torch.as_tensor(dim) for dim in size])
|
||||
for size in [tuple(img.shape) for img in tensors]
|
||||
]
|
||||
)
|
||||
.max(0)
|
||||
.values
|
||||
)
|
||||
|
||||
if size_divisibility > 1:
|
||||
stride = size_divisibility
|
||||
# the last two dims are H,W, both subject to divisibility requirement
|
||||
max_size = torch.cat([max_size[:-2], (max_size[-2:] + (stride - 1)) // stride * stride])
|
||||
|
||||
image_sizes = [tuple(im.shape[-2:]) for im in tensors]
|
||||
|
||||
if len(tensors) == 1:
|
||||
# This seems slightly (2%) faster.
|
||||
# TODO: check whether it's faster for multiple images as well
|
||||
image_size = image_sizes[0]
|
||||
padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]]
|
||||
if all(x == 0 for x in padding_size): # https://github.com/pytorch/pytorch/issues/31734
|
||||
batched_imgs = tensors[0].unsqueeze(0)
|
||||
else:
|
||||
padded = F.pad(tensors[0], padding_size, value=pad_value)
|
||||
batched_imgs = padded.unsqueeze_(0)
|
||||
else:
|
||||
# max_size can be a tensor in tracing mode, therefore use tuple()
|
||||
batch_shape = (len(tensors),) + tuple(max_size)
|
||||
batched_imgs = tensors[0].new_full(batch_shape, pad_value)
|
||||
for img, pad_img in zip(tensors, batched_imgs):
|
||||
pad_img[..., : img.shape[-2], : img.shape[-1]].copy_(img)
|
||||
|
||||
return ImageList(batched_imgs.contiguous(), image_sizes)
|
|
@ -0,0 +1,190 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import itertools
|
||||
from typing import Any, Dict, List, Tuple, Union
|
||||
import torch
|
||||
|
||||
|
||||
class Instances:
|
||||
"""
|
||||
This class represents a list of instances in an image.
|
||||
It stores the attributes of instances (e.g., boxes, masks, labels, scores) as "fields".
|
||||
All fields must have the same ``__len__`` which is the number of instances.
|
||||
|
||||
All other (non-field) attributes of this class are considered private:
|
||||
they must start with '_' and are not modifiable by a user.
|
||||
|
||||
Some basic usage:
|
||||
|
||||
1. Set/get/check a field:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
instances.gt_boxes = Boxes(...)
|
||||
print(instances.pred_masks) # a tensor of shape (N, H, W)
|
||||
print('gt_masks' in instances)
|
||||
|
||||
2. ``len(instances)`` returns the number of instances
|
||||
3. Indexing: ``instances[indices]`` will apply the indexing on all the fields
|
||||
and returns a new :class:`Instances`.
|
||||
Typically, ``indices`` is a integer vector of indices,
|
||||
or a binary mask of length ``num_instances``
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
category_3_detections = instances[instances.pred_classes == 3]
|
||||
confident_detections = instances[instances.scores > 0.9]
|
||||
"""
|
||||
|
||||
def __init__(self, image_size: Tuple[int, int], **kwargs: Any):
|
||||
"""
|
||||
Args:
|
||||
image_size (height, width): the spatial size of the image.
|
||||
kwargs: fields to add to this `Instances`.
|
||||
"""
|
||||
self._image_size = image_size
|
||||
self._fields: Dict[str, Any] = {}
|
||||
for k, v in kwargs.items():
|
||||
self.set(k, v)
|
||||
|
||||
@property
|
||||
def image_size(self) -> Tuple[int, int]:
|
||||
"""
|
||||
Returns:
|
||||
tuple: height, width
|
||||
"""
|
||||
return self._image_size
|
||||
|
||||
def __setattr__(self, name: str, val: Any) -> None:
|
||||
if name.startswith("_"):
|
||||
super().__setattr__(name, val)
|
||||
else:
|
||||
self.set(name, val)
|
||||
|
||||
def __getattr__(self, name: str) -> Any:
|
||||
if name == "_fields" or name not in self._fields:
|
||||
raise AttributeError("Cannot find field '{}' in the given Instances!".format(name))
|
||||
return self._fields[name]
|
||||
|
||||
def set(self, name: str, value: Any) -> None:
|
||||
"""
|
||||
Set the field named `name` to `value`.
|
||||
The length of `value` must be the number of instances,
|
||||
and must agree with other existing fields in this object.
|
||||
"""
|
||||
data_len = len(value)
|
||||
if len(self._fields):
|
||||
assert (
|
||||
len(self) == data_len
|
||||
), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self))
|
||||
self._fields[name] = value
|
||||
|
||||
def has(self, name: str) -> bool:
|
||||
"""
|
||||
Returns:
|
||||
bool: whether the field called `name` exists.
|
||||
"""
|
||||
return name in self._fields
|
||||
|
||||
def remove(self, name: str) -> None:
|
||||
"""
|
||||
Remove the field called `name`.
|
||||
"""
|
||||
del self._fields[name]
|
||||
|
||||
def get(self, name: str) -> Any:
|
||||
"""
|
||||
Returns the field called `name`.
|
||||
"""
|
||||
return self._fields[name]
|
||||
|
||||
def get_fields(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Returns:
|
||||
dict: a dict which maps names (str) to data of the fields
|
||||
|
||||
Modifying the returned dict will modify this instance.
|
||||
"""
|
||||
return self._fields
|
||||
|
||||
# Tensor-like methods
|
||||
def to(self, *args: Any, **kwargs: Any) -> "Instances":
|
||||
"""
|
||||
Returns:
|
||||
Instances: all fields are called with a `to(device)`, if the field has this method.
|
||||
"""
|
||||
ret = Instances(self._image_size)
|
||||
for k, v in self._fields.items():
|
||||
if hasattr(v, "to"):
|
||||
v = v.to(*args, **kwargs)
|
||||
ret.set(k, v)
|
||||
return ret
|
||||
|
||||
def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Instances":
|
||||
"""
|
||||
Args:
|
||||
item: an index-like object and will be used to index all the fields.
|
||||
|
||||
Returns:
|
||||
If `item` is a string, return the data in the corresponding field.
|
||||
Otherwise, returns an `Instances` where all fields are indexed by `item`.
|
||||
"""
|
||||
if type(item) == int:
|
||||
if item >= len(self) or item < -len(self):
|
||||
raise IndexError("Instances index out of range!")
|
||||
else:
|
||||
item = slice(item, None, len(self))
|
||||
|
||||
ret = Instances(self._image_size)
|
||||
for k, v in self._fields.items():
|
||||
ret.set(k, v[item])
|
||||
return ret
|
||||
|
||||
def __len__(self) -> int:
|
||||
for v in self._fields.values():
|
||||
return len(v)
|
||||
raise NotImplementedError("Empty Instances does not support __len__!")
|
||||
|
||||
def __iter__(self):
|
||||
raise NotImplementedError("`Instances` object is not iterable!")
|
||||
|
||||
@staticmethod
|
||||
def cat(instance_lists: List["Instances"]) -> "Instances":
|
||||
"""
|
||||
Args:
|
||||
instance_lists (list[Instances])
|
||||
|
||||
Returns:
|
||||
Instances
|
||||
"""
|
||||
assert all(isinstance(i, Instances) for i in instance_lists)
|
||||
assert len(instance_lists) > 0
|
||||
if len(instance_lists) == 1:
|
||||
return instance_lists[0]
|
||||
|
||||
image_size = instance_lists[0].image_size
|
||||
for i in instance_lists[1:]:
|
||||
assert i.image_size == image_size
|
||||
ret = Instances(image_size)
|
||||
for k in instance_lists[0]._fields.keys():
|
||||
values = [i.get(k) for i in instance_lists]
|
||||
v0 = values[0]
|
||||
if isinstance(v0, torch.Tensor):
|
||||
values = torch.cat(values, dim=0)
|
||||
elif isinstance(v0, list):
|
||||
values = list(itertools.chain(*values))
|
||||
elif hasattr(type(v0), "cat"):
|
||||
values = type(v0).cat(values)
|
||||
else:
|
||||
raise ValueError("Unsupported type {} for concatenation".format(type(v0)))
|
||||
ret.set(k, values)
|
||||
return ret
|
||||
|
||||
def __str__(self) -> str:
|
||||
s = self.__class__.__name__ + "("
|
||||
s += "num_instances={}, ".format(len(self))
|
||||
s += "image_height={}, ".format(self._image_size[0])
|
||||
s += "image_width={}, ".format(self._image_size[1])
|
||||
s += "fields=[{}])".format(", ".join((f"{k}: {v}" for k, v in self._fields.items())))
|
||||
return s
|
||||
|
||||
__repr__ = __str__
|
|
@ -0,0 +1,216 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import numpy as np
|
||||
from typing import Any, List, Tuple, Union
|
||||
import torch
|
||||
|
||||
from detectron2.layers import interpolate
|
||||
|
||||
|
||||
class Keypoints:
|
||||
"""
|
||||
Stores keypoint **annotation** data. GT Instances have a `gt_keypoints` property
|
||||
containing the x,y location and visibility flag of each keypoint. This tensor has shape
|
||||
(N, K, 3) where N is the number of instances and K is the number of keypoints per instance.
|
||||
|
||||
The visibility flag follows the COCO format and must be one of three integers:
|
||||
* v=0: not labeled (in which case x=y=0)
|
||||
* v=1: labeled but not visible
|
||||
* v=2: labeled and visible
|
||||
"""
|
||||
|
||||
def __init__(self, keypoints: Union[torch.Tensor, np.ndarray, List[List[float]]]):
|
||||
"""
|
||||
Arguments:
|
||||
keypoints: A Tensor, numpy array, or list of the x, y, and visibility of each keypoint.
|
||||
The shape should be (N, K, 3) where N is the number of
|
||||
instances, and K is the number of keypoints per instance.
|
||||
"""
|
||||
device = keypoints.device if isinstance(keypoints, torch.Tensor) else torch.device("cpu")
|
||||
keypoints = torch.as_tensor(keypoints, dtype=torch.float32, device=device)
|
||||
assert keypoints.dim() == 3 and keypoints.shape[2] == 3, keypoints.shape
|
||||
self.tensor = keypoints
|
||||
|
||||
def __len__(self) -> int:
|
||||
return self.tensor.size(0)
|
||||
|
||||
def to(self, *args: Any, **kwargs: Any) -> "Keypoints":
|
||||
return type(self)(self.tensor.to(*args, **kwargs))
|
||||
|
||||
@property
|
||||
def device(self) -> torch.device:
|
||||
return self.tensor.device
|
||||
|
||||
def to_heatmap(self, boxes: torch.Tensor, heatmap_size: int) -> torch.Tensor:
|
||||
"""
|
||||
Convert keypoint annotations to a heatmap of one-hot labels for training,
|
||||
as described in :paper:`Mask R-CNN`.
|
||||
|
||||
Arguments:
|
||||
boxes: Nx4 tensor, the boxes to draw the keypoints to
|
||||
|
||||
Returns:
|
||||
heatmaps:
|
||||
A tensor of shape (N, K), each element is integer spatial label
|
||||
in the range [0, heatmap_size**2 - 1] for each keypoint in the input.
|
||||
valid:
|
||||
A tensor of shape (N, K) containing whether each keypoint is in the roi or not.
|
||||
"""
|
||||
return _keypoints_to_heatmap(self.tensor, boxes, heatmap_size)
|
||||
|
||||
def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Keypoints":
|
||||
"""
|
||||
Create a new `Keypoints` by indexing on this `Keypoints`.
|
||||
|
||||
The following usage are allowed:
|
||||
|
||||
1. `new_kpts = kpts[3]`: return a `Keypoints` which contains only one instance.
|
||||
2. `new_kpts = kpts[2:10]`: return a slice of key points.
|
||||
3. `new_kpts = kpts[vector]`, where vector is a torch.ByteTensor
|
||||
with `length = len(kpts)`. Nonzero elements in the vector will be selected.
|
||||
|
||||
Note that the returned Keypoints might share storage with this Keypoints,
|
||||
subject to Pytorch's indexing semantics.
|
||||
"""
|
||||
if isinstance(item, int):
|
||||
return Keypoints([self.tensor[item]])
|
||||
return Keypoints(self.tensor[item])
|
||||
|
||||
def __repr__(self) -> str:
|
||||
s = self.__class__.__name__ + "("
|
||||
s += "num_instances={})".format(len(self.tensor))
|
||||
return s
|
||||
|
||||
|
||||
# TODO make this nicer, this is a direct translation from C2 (but removing the inner loop)
|
||||
def _keypoints_to_heatmap(
|
||||
keypoints: torch.Tensor, rois: torch.Tensor, heatmap_size: int
|
||||
) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
"""
|
||||
Encode keypoint locations into a target heatmap for use in SoftmaxWithLoss across space.
|
||||
|
||||
Maps keypoints from the half-open interval [x1, x2) on continuous image coordinates to the
|
||||
closed interval [0, heatmap_size - 1] on discrete image coordinates. We use the
|
||||
continuous-discrete conversion from Heckbert 1990 ("What is the coordinate of a pixel?"):
|
||||
d = floor(c) and c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate.
|
||||
|
||||
Arguments:
|
||||
keypoints: tensor of keypoint locations in of shape (N, K, 3).
|
||||
rois: Nx4 tensor of rois in xyxy format
|
||||
heatmap_size: integer side length of square heatmap.
|
||||
|
||||
Returns:
|
||||
heatmaps: A tensor of shape (N, K) containing an integer spatial label
|
||||
in the range [0, heatmap_size**2 - 1] for each keypoint in the input.
|
||||
valid: A tensor of shape (N, K) containing whether each keypoint is in
|
||||
the roi or not.
|
||||
"""
|
||||
|
||||
if rois.numel() == 0:
|
||||
return rois.new().long(), rois.new().long()
|
||||
offset_x = rois[:, 0]
|
||||
offset_y = rois[:, 1]
|
||||
scale_x = heatmap_size / (rois[:, 2] - rois[:, 0])
|
||||
scale_y = heatmap_size / (rois[:, 3] - rois[:, 1])
|
||||
|
||||
offset_x = offset_x[:, None]
|
||||
offset_y = offset_y[:, None]
|
||||
scale_x = scale_x[:, None]
|
||||
scale_y = scale_y[:, None]
|
||||
|
||||
x = keypoints[..., 0]
|
||||
y = keypoints[..., 1]
|
||||
|
||||
x_boundary_inds = x == rois[:, 2][:, None]
|
||||
y_boundary_inds = y == rois[:, 3][:, None]
|
||||
|
||||
x = (x - offset_x) * scale_x
|
||||
x = x.floor().long()
|
||||
y = (y - offset_y) * scale_y
|
||||
y = y.floor().long()
|
||||
|
||||
x[x_boundary_inds] = heatmap_size - 1
|
||||
y[y_boundary_inds] = heatmap_size - 1
|
||||
|
||||
valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size)
|
||||
vis = keypoints[..., 2] > 0
|
||||
valid = (valid_loc & vis).long()
|
||||
|
||||
lin_ind = y * heatmap_size + x
|
||||
heatmaps = lin_ind * valid
|
||||
|
||||
return heatmaps, valid
|
||||
|
||||
|
||||
def heatmaps_to_keypoints(maps: torch.Tensor, rois: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
Extract predicted keypoint locations from heatmaps.
|
||||
|
||||
Args:
|
||||
maps (Tensor): (#ROIs, #keypoints, POOL_H, POOL_W). The predicted heatmap of logits for
|
||||
each ROI and each keypoint.
|
||||
rois (Tensor): (#ROIs, 4). The box of each ROI.
|
||||
|
||||
Returns:
|
||||
Tensor of shape (#ROIs, #keypoints, 4) with the last dimension corresponding to
|
||||
(x, y, logit, score) for each keypoint.
|
||||
|
||||
When converting discrete pixel indices in an NxN image to a continuous keypoint coordinate,
|
||||
we maintain consistency with :meth:`Keypoints.to_heatmap` by using the conversion from
|
||||
Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate.
|
||||
"""
|
||||
# The decorator use of torch.no_grad() was not supported by torchscript.
|
||||
# https://github.com/pytorch/pytorch/pull/41371
|
||||
maps = maps.detach()
|
||||
rois = rois.detach()
|
||||
|
||||
offset_x = rois[:, 0]
|
||||
offset_y = rois[:, 1]
|
||||
|
||||
widths = (rois[:, 2] - rois[:, 0]).clamp(min=1)
|
||||
heights = (rois[:, 3] - rois[:, 1]).clamp(min=1)
|
||||
widths_ceil = widths.ceil()
|
||||
heights_ceil = heights.ceil()
|
||||
|
||||
num_rois, num_keypoints = maps.shape[:2]
|
||||
xy_preds = maps.new_zeros(rois.shape[0], num_keypoints, 4)
|
||||
|
||||
width_corrections = widths / widths_ceil
|
||||
height_corrections = heights / heights_ceil
|
||||
|
||||
keypoints_idx = torch.arange(num_keypoints, device=maps.device)
|
||||
|
||||
for i in range(num_rois):
|
||||
outsize = (int(heights_ceil[i]), int(widths_ceil[i]))
|
||||
roi_map = interpolate(maps[[i]], size=outsize, mode="bicubic", align_corners=False).squeeze(
|
||||
0
|
||||
) # #keypoints x H x W
|
||||
|
||||
# softmax over the spatial region
|
||||
max_score, _ = roi_map.view(num_keypoints, -1).max(1)
|
||||
max_score = max_score.view(num_keypoints, 1, 1)
|
||||
tmp_full_resolution = (roi_map - max_score).exp_()
|
||||
tmp_pool_resolution = (maps[i] - max_score).exp_()
|
||||
# Produce scores over the region H x W, but normalize with POOL_H x POOL_W,
|
||||
# so that the scores of objects of different absolute sizes will be more comparable
|
||||
roi_map_scores = tmp_full_resolution / tmp_pool_resolution.sum((1, 2), keepdim=True)
|
||||
|
||||
w = roi_map.shape[2]
|
||||
pos = roi_map.view(num_keypoints, -1).argmax(1)
|
||||
|
||||
x_int = pos % w
|
||||
y_int = (pos - x_int) // w
|
||||
|
||||
assert (
|
||||
roi_map_scores[keypoints_idx, y_int, x_int]
|
||||
== roi_map_scores.view(num_keypoints, -1).max(1)[0]
|
||||
).all()
|
||||
|
||||
x = (x_int.float() + 0.5) * width_corrections[i]
|
||||
y = (y_int.float() + 0.5) * height_corrections[i]
|
||||
|
||||
xy_preds[i, :, 0] = x + offset_x[i]
|
||||
xy_preds[i, :, 1] = y + offset_y[i]
|
||||
xy_preds[i, :, 2] = roi_map[keypoints_idx, y_int, x_int]
|
||||
xy_preds[i, :, 3] = roi_map_scores[keypoints_idx, y_int, x_int]
|
||||
|
||||
return xy_preds
|
|
@ -0,0 +1,438 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import copy
|
||||
import itertools
|
||||
import numpy as np
|
||||
from typing import Any, Iterator, List, Union
|
||||
import pycocotools.mask as mask_util
|
||||
import torch
|
||||
|
||||
from detectron2.layers.roi_align import ROIAlign
|
||||
|
||||
from .boxes import Boxes
|
||||
|
||||
|
||||
def polygon_area(x, y):
|
||||
# Using the shoelace formula
|
||||
# https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates
|
||||
return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1)))
|
||||
|
||||
|
||||
def polygons_to_bitmask(polygons: List[np.ndarray], height: int, width: int) -> np.ndarray:
|
||||
"""
|
||||
Args:
|
||||
polygons (list[ndarray]): each array has shape (Nx2,)
|
||||
height, width (int)
|
||||
|
||||
Returns:
|
||||
ndarray: a bool mask of shape (height, width)
|
||||
"""
|
||||
assert len(polygons) > 0, "COCOAPI does not support empty polygons"
|
||||
rles = mask_util.frPyObjects(polygons, height, width)
|
||||
rle = mask_util.merge(rles)
|
||||
return mask_util.decode(rle).astype(np.bool)
|
||||
|
||||
|
||||
def rasterize_polygons_within_box(
|
||||
polygons: List[np.ndarray], box: np.ndarray, mask_size: int
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
Rasterize the polygons into a mask image and
|
||||
crop the mask content in the given box.
|
||||
The cropped mask is resized to (mask_size, mask_size).
|
||||
|
||||
This function is used when generating training targets for mask head in Mask R-CNN.
|
||||
Given original ground-truth masks for an image, new ground-truth mask
|
||||
training targets in the size of `mask_size x mask_size`
|
||||
must be provided for each predicted box. This function will be called to
|
||||
produce such targets.
|
||||
|
||||
Args:
|
||||
polygons (list[ndarray[float]]): a list of polygons, which represents an instance.
|
||||
box: 4-element numpy array
|
||||
mask_size (int):
|
||||
|
||||
Returns:
|
||||
Tensor: BoolTensor of shape (mask_size, mask_size)
|
||||
"""
|
||||
# 1. Shift the polygons w.r.t the boxes
|
||||
w, h = box[2] - box[0], box[3] - box[1]
|
||||
|
||||
polygons = copy.deepcopy(polygons)
|
||||
for p in polygons:
|
||||
p[0::2] = p[0::2] - box[0]
|
||||
p[1::2] = p[1::2] - box[1]
|
||||
|
||||
# 2. Rescale the polygons to the new box size
|
||||
# max() to avoid division by small number
|
||||
ratio_h = mask_size / max(h, 0.1)
|
||||
ratio_w = mask_size / max(w, 0.1)
|
||||
|
||||
if ratio_h == ratio_w:
|
||||
for p in polygons:
|
||||
p *= ratio_h
|
||||
else:
|
||||
for p in polygons:
|
||||
p[0::2] *= ratio_w
|
||||
p[1::2] *= ratio_h
|
||||
|
||||
# 3. Rasterize the polygons with coco api
|
||||
mask = polygons_to_bitmask(polygons, mask_size, mask_size)
|
||||
mask = torch.from_numpy(mask)
|
||||
return mask
|
||||
|
||||
|
||||
class BitMasks:
|
||||
"""
|
||||
This class stores the segmentation masks for all objects in one image, in
|
||||
the form of bitmaps.
|
||||
|
||||
Attributes:
|
||||
tensor: bool Tensor of N,H,W, representing N instances in the image.
|
||||
"""
|
||||
|
||||
def __init__(self, tensor: Union[torch.Tensor, np.ndarray]):
|
||||
"""
|
||||
Args:
|
||||
tensor: bool Tensor of N,H,W, representing N instances in the image.
|
||||
"""
|
||||
device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu")
|
||||
tensor = torch.as_tensor(tensor, dtype=torch.bool, device=device)
|
||||
assert tensor.dim() == 3, tensor.size()
|
||||
self.image_size = tensor.shape[1:]
|
||||
self.tensor = tensor
|
||||
|
||||
def to(self, *args: Any, **kwargs: Any) -> "BitMasks":
|
||||
return BitMasks(self.tensor.to(*args, **kwargs))
|
||||
|
||||
@property
|
||||
def device(self) -> torch.device:
|
||||
return self.tensor.device
|
||||
|
||||
def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "BitMasks":
|
||||
"""
|
||||
Returns:
|
||||
BitMasks: Create a new :class:`BitMasks` by indexing.
|
||||
|
||||
The following usage are allowed:
|
||||
|
||||
1. `new_masks = masks[3]`: return a `BitMasks` which contains only one mask.
|
||||
2. `new_masks = masks[2:10]`: return a slice of masks.
|
||||
3. `new_masks = masks[vector]`, where vector is a torch.BoolTensor
|
||||
with `length = len(masks)`. Nonzero elements in the vector will be selected.
|
||||
|
||||
Note that the returned object might share storage with this object,
|
||||
subject to Pytorch's indexing semantics.
|
||||
"""
|
||||
if isinstance(item, int):
|
||||
return BitMasks(self.tensor[item].view(1, -1))
|
||||
m = self.tensor[item]
|
||||
assert m.dim() == 3, "Indexing on BitMasks with {} returns a tensor with shape {}!".format(
|
||||
item, m.shape
|
||||
)
|
||||
return BitMasks(m)
|
||||
|
||||
def __iter__(self) -> torch.Tensor:
|
||||
yield from self.tensor
|
||||
|
||||
def __repr__(self) -> str:
|
||||
s = self.__class__.__name__ + "("
|
||||
s += "num_instances={})".format(len(self.tensor))
|
||||
return s
|
||||
|
||||
def __len__(self) -> int:
|
||||
return self.tensor.shape[0]
|
||||
|
||||
def nonempty(self) -> torch.Tensor:
|
||||
"""
|
||||
Find masks that are non-empty.
|
||||
|
||||
Returns:
|
||||
Tensor: a BoolTensor which represents
|
||||
whether each mask is empty (False) or non-empty (True).
|
||||
"""
|
||||
return self.tensor.flatten(1).any(dim=1)
|
||||
|
||||
@staticmethod
|
||||
def from_polygon_masks(
|
||||
polygon_masks: Union["PolygonMasks", List[List[np.ndarray]]], height: int, width: int
|
||||
) -> "BitMasks":
|
||||
"""
|
||||
Args:
|
||||
polygon_masks (list[list[ndarray]] or PolygonMasks)
|
||||
height, width (int)
|
||||
"""
|
||||
if isinstance(polygon_masks, PolygonMasks):
|
||||
polygon_masks = polygon_masks.polygons
|
||||
masks = [polygons_to_bitmask(p, height, width) for p in polygon_masks]
|
||||
return BitMasks(torch.stack([torch.from_numpy(x) for x in masks]))
|
||||
|
||||
def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor:
|
||||
"""
|
||||
Crop each bitmask by the given box, and resize results to (mask_size, mask_size).
|
||||
This can be used to prepare training targets for Mask R-CNN.
|
||||
It has less reconstruction error compared to rasterization with polygons.
|
||||
However we observe no difference in accuracy,
|
||||
but BitMasks requires more memory to store all the masks.
|
||||
|
||||
Args:
|
||||
boxes (Tensor): Nx4 tensor storing the boxes for each mask
|
||||
mask_size (int): the size of the rasterized mask.
|
||||
|
||||
Returns:
|
||||
Tensor:
|
||||
A bool tensor of shape (N, mask_size, mask_size), where
|
||||
N is the number of predicted boxes for this image.
|
||||
"""
|
||||
assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self))
|
||||
device = self.tensor.device
|
||||
|
||||
batch_inds = torch.arange(len(boxes), device=device).to(dtype=boxes.dtype)[:, None]
|
||||
rois = torch.cat([batch_inds, boxes], dim=1) # Nx5
|
||||
|
||||
bit_masks = self.tensor.to(dtype=torch.float32)
|
||||
rois = rois.to(device=device)
|
||||
output = (
|
||||
ROIAlign((mask_size, mask_size), 1.0, 0, aligned=True)
|
||||
.forward(bit_masks[:, None, :, :], rois)
|
||||
.squeeze(1)
|
||||
)
|
||||
output = output >= 0.5
|
||||
return output
|
||||
|
||||
def get_bounding_boxes(self) -> Boxes:
|
||||
"""
|
||||
Returns:
|
||||
Boxes: tight bounding boxes around bitmasks.
|
||||
If a mask is empty, it's bounding box will be all zero.
|
||||
"""
|
||||
boxes = torch.zeros(self.tensor.shape[0], 4, dtype=torch.float32)
|
||||
x_any = torch.any(self.tensor, dim=1)
|
||||
y_any = torch.any(self.tensor, dim=2)
|
||||
for idx in range(self.tensor.shape[0]):
|
||||
x = torch.where(x_any[idx, :])[0]
|
||||
y = torch.where(y_any[idx, :])[0]
|
||||
if len(x) > 0 and len(y) > 0:
|
||||
boxes[idx, :] = torch.as_tensor(
|
||||
[x[0], y[0], x[-1] + 1, y[-1] + 1], dtype=torch.float32
|
||||
)
|
||||
return Boxes(boxes)
|
||||
|
||||
@staticmethod
|
||||
def cat(bitmasks_list: List["BitMasks"]) -> "BitMasks":
|
||||
"""
|
||||
Concatenates a list of BitMasks into a single BitMasks
|
||||
|
||||
Arguments:
|
||||
bitmasks_list (list[BitMasks])
|
||||
|
||||
Returns:
|
||||
BitMasks: the concatenated BitMasks
|
||||
"""
|
||||
assert isinstance(bitmasks_list, (list, tuple))
|
||||
assert len(bitmasks_list) > 0
|
||||
assert all(isinstance(bitmask, BitMasks) for bitmask in bitmasks_list)
|
||||
|
||||
cat_bitmasks = type(bitmasks_list[0])(torch.cat([bm.tensor for bm in bitmasks_list], dim=0))
|
||||
return cat_bitmasks
|
||||
|
||||
|
||||
class PolygonMasks:
|
||||
"""
|
||||
This class stores the segmentation masks for all objects in one image, in the form of polygons.
|
||||
|
||||
Attributes:
|
||||
polygons: list[list[ndarray]]. Each ndarray is a float64 vector representing a polygon.
|
||||
"""
|
||||
|
||||
def __init__(self, polygons: List[List[Union[torch.Tensor, np.ndarray]]]):
|
||||
"""
|
||||
Arguments:
|
||||
polygons (list[list[np.ndarray]]): The first
|
||||
level of the list correspond to individual instances,
|
||||
the second level to all the polygons that compose the
|
||||
instance, and the third level to the polygon coordinates.
|
||||
The third level array should have the format of
|
||||
[x0, y0, x1, y1, ..., xn, yn] (n >= 3).
|
||||
"""
|
||||
assert isinstance(polygons, list), (
|
||||
"Cannot create PolygonMasks: Expect a list of list of polygons per image. "
|
||||
"Got '{}' instead.".format(type(polygons))
|
||||
)
|
||||
|
||||
def _make_array(t: Union[torch.Tensor, np.ndarray]) -> np.ndarray:
|
||||
# Use float64 for higher precision, because why not?
|
||||
# Always put polygons on CPU (self.to is a no-op) since they
|
||||
# are supposed to be small tensors.
|
||||
# May need to change this assumption if GPU placement becomes useful
|
||||
if isinstance(t, torch.Tensor):
|
||||
t = t.cpu().numpy()
|
||||
return np.asarray(t).astype("float64")
|
||||
|
||||
def process_polygons(
|
||||
polygons_per_instance: List[Union[torch.Tensor, np.ndarray]]
|
||||
) -> List[np.ndarray]:
|
||||
assert isinstance(polygons_per_instance, list), (
|
||||
"Cannot create polygons: Expect a list of polygons per instance. "
|
||||
"Got '{}' instead.".format(type(polygons_per_instance))
|
||||
)
|
||||
# transform the polygon to a tensor
|
||||
polygons_per_instance = [_make_array(p) for p in polygons_per_instance]
|
||||
for polygon in polygons_per_instance:
|
||||
assert len(polygon) % 2 == 0 and len(polygon) >= 6
|
||||
return polygons_per_instance
|
||||
|
||||
self.polygons: List[List[np.ndarray]] = [
|
||||
process_polygons(polygons_per_instance) for polygons_per_instance in polygons
|
||||
]
|
||||
|
||||
def to(self, *args: Any, **kwargs: Any) -> "PolygonMasks":
|
||||
return self
|
||||
|
||||
@property
|
||||
def device(self) -> torch.device:
|
||||
return torch.device("cpu")
|
||||
|
||||
def get_bounding_boxes(self) -> Boxes:
|
||||
"""
|
||||
Returns:
|
||||
Boxes: tight bounding boxes around polygon masks.
|
||||
"""
|
||||
boxes = torch.zeros(len(self.polygons), 4, dtype=torch.float32)
|
||||
for idx, polygons_per_instance in enumerate(self.polygons):
|
||||
minxy = torch.as_tensor([float("inf"), float("inf")], dtype=torch.float32)
|
||||
maxxy = torch.zeros(2, dtype=torch.float32)
|
||||
for polygon in polygons_per_instance:
|
||||
coords = torch.from_numpy(polygon).view(-1, 2).to(dtype=torch.float32)
|
||||
minxy = torch.min(minxy, torch.min(coords, dim=0).values)
|
||||
maxxy = torch.max(maxxy, torch.max(coords, dim=0).values)
|
||||
boxes[idx, :2] = minxy
|
||||
boxes[idx, 2:] = maxxy
|
||||
return Boxes(boxes)
|
||||
|
||||
def nonempty(self) -> torch.Tensor:
|
||||
"""
|
||||
Find masks that are non-empty.
|
||||
|
||||
Returns:
|
||||
Tensor:
|
||||
a BoolTensor which represents whether each mask is empty (False) or not (True).
|
||||
"""
|
||||
keep = [1 if len(polygon) > 0 else 0 for polygon in self.polygons]
|
||||
return torch.from_numpy(np.asarray(keep, dtype=np.bool))
|
||||
|
||||
def __getitem__(self, item: Union[int, slice, List[int], torch.BoolTensor]) -> "PolygonMasks":
|
||||
"""
|
||||
Support indexing over the instances and return a `PolygonMasks` object.
|
||||
`item` can be:
|
||||
|
||||
1. An integer. It will return an object with only one instance.
|
||||
2. A slice. It will return an object with the selected instances.
|
||||
3. A list[int]. It will return an object with the selected instances,
|
||||
correpsonding to the indices in the list.
|
||||
4. A vector mask of type BoolTensor, whose length is num_instances.
|
||||
It will return an object with the instances whose mask is nonzero.
|
||||
"""
|
||||
if isinstance(item, int):
|
||||
selected_polygons = [self.polygons[item]]
|
||||
elif isinstance(item, slice):
|
||||
selected_polygons = self.polygons[item]
|
||||
elif isinstance(item, list):
|
||||
selected_polygons = [self.polygons[i] for i in item]
|
||||
elif isinstance(item, torch.Tensor):
|
||||
# Polygons is a list, so we have to move the indices back to CPU.
|
||||
if item.dtype == torch.bool:
|
||||
assert item.dim() == 1, item.shape
|
||||
item = item.nonzero().squeeze(1).cpu().numpy().tolist()
|
||||
elif item.dtype in [torch.int32, torch.int64]:
|
||||
item = item.cpu().numpy().tolist()
|
||||
else:
|
||||
raise ValueError("Unsupported tensor dtype={} for indexing!".format(item.dtype))
|
||||
selected_polygons = [self.polygons[i] for i in item]
|
||||
return PolygonMasks(selected_polygons)
|
||||
|
||||
def __iter__(self) -> Iterator[List[np.ndarray]]:
|
||||
"""
|
||||
Yields:
|
||||
list[ndarray]: the polygons for one instance.
|
||||
Each Tensor is a float64 vector representing a polygon.
|
||||
"""
|
||||
return iter(self.polygons)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
s = self.__class__.__name__ + "("
|
||||
s += "num_instances={})".format(len(self.polygons))
|
||||
return s
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.polygons)
|
||||
|
||||
def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor:
|
||||
"""
|
||||
Crop each mask by the given box, and resize results to (mask_size, mask_size).
|
||||
This can be used to prepare training targets for Mask R-CNN.
|
||||
|
||||
Args:
|
||||
boxes (Tensor): Nx4 tensor storing the boxes for each mask
|
||||
mask_size (int): the size of the rasterized mask.
|
||||
|
||||
Returns:
|
||||
Tensor: A bool tensor of shape (N, mask_size, mask_size), where
|
||||
N is the number of predicted boxes for this image.
|
||||
"""
|
||||
assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self))
|
||||
|
||||
device = boxes.device
|
||||
# Put boxes on the CPU, as the polygon representation is not efficient GPU-wise
|
||||
# (several small tensors for representing a single instance mask)
|
||||
boxes = boxes.to(torch.device("cpu"))
|
||||
|
||||
results = [
|
||||
rasterize_polygons_within_box(poly, box.numpy(), mask_size)
|
||||
for poly, box in zip(self.polygons, boxes)
|
||||
]
|
||||
"""
|
||||
poly: list[list[float]], the polygons for one instance
|
||||
box: a tensor of shape (4,)
|
||||
"""
|
||||
if len(results) == 0:
|
||||
return torch.empty(0, mask_size, mask_size, dtype=torch.bool, device=device)
|
||||
return torch.stack(results, dim=0).to(device=device)
|
||||
|
||||
def area(self):
|
||||
"""
|
||||
Computes area of the mask.
|
||||
Only works with Polygons, using the shoelace formula:
|
||||
https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates
|
||||
|
||||
Returns:
|
||||
Tensor: a vector, area for each instance
|
||||
"""
|
||||
|
||||
area = []
|
||||
for polygons_per_instance in self.polygons:
|
||||
area_per_instance = 0
|
||||
for p in polygons_per_instance:
|
||||
area_per_instance += polygon_area(p[0::2], p[1::2])
|
||||
area.append(area_per_instance)
|
||||
|
||||
return torch.tensor(area)
|
||||
|
||||
@staticmethod
|
||||
def cat(polymasks_list: List["PolygonMasks"]) -> "PolygonMasks":
|
||||
"""
|
||||
Concatenates a list of PolygonMasks into a single PolygonMasks
|
||||
|
||||
Arguments:
|
||||
polymasks_list (list[PolygonMasks])
|
||||
|
||||
Returns:
|
||||
PolygonMasks: the concatenated PolygonMasks
|
||||
"""
|
||||
assert isinstance(polymasks_list, (list, tuple))
|
||||
assert len(polymasks_list) > 0
|
||||
assert all(isinstance(polymask, PolygonMasks) for polymask in polymasks_list)
|
||||
|
||||
cat_polymasks = type(polymasks_list[0])(
|
||||
list(itertools.chain.from_iterable(pm.polygons for pm in polymasks_list))
|
||||
)
|
||||
return cat_polymasks
|
|
@ -0,0 +1,481 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import math
|
||||
from typing import Any, Iterator, Tuple, Union
|
||||
import torch
|
||||
|
||||
from detectron2.layers.rotated_boxes import pairwise_iou_rotated
|
||||
|
||||
from .boxes import Boxes
|
||||
|
||||
|
||||
class RotatedBoxes(Boxes):
|
||||
"""
|
||||
This structure stores a list of rotated boxes as a Nx5 torch.Tensor.
|
||||
It supports some common methods about boxes
|
||||
(`area`, `clip`, `nonempty`, etc),
|
||||
and also behaves like a Tensor
|
||||
(support indexing, `to(device)`, `.device`, and iteration over all boxes)
|
||||
"""
|
||||
|
||||
def __init__(self, tensor: torch.Tensor):
|
||||
"""
|
||||
Args:
|
||||
tensor (Tensor[float]): a Nx5 matrix. Each row is
|
||||
(x_center, y_center, width, height, angle),
|
||||
in which angle is represented in degrees.
|
||||
While there's no strict range restriction for it,
|
||||
the recommended principal range is between [-180, 180) degrees.
|
||||
|
||||
Assume we have a horizontal box B = (x_center, y_center, width, height),
|
||||
where width is along the x-axis and height is along the y-axis.
|
||||
The rotated box B_rot (x_center, y_center, width, height, angle)
|
||||
can be seen as:
|
||||
|
||||
1. When angle == 0:
|
||||
B_rot == B
|
||||
2. When angle > 0:
|
||||
B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CCW;
|
||||
3. When angle < 0:
|
||||
B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CW.
|
||||
|
||||
Mathematically, since the right-handed coordinate system for image space
|
||||
is (y, x), where y is top->down and x is left->right, the 4 vertices of the
|
||||
rotated rectangle :math:`(yr_i, xr_i)` (i = 1, 2, 3, 4) can be obtained from
|
||||
the vertices of the horizontal rectangle (y_i, x_i) (i = 1, 2, 3, 4)
|
||||
in the following way (:math:`\\theta = angle*\\pi/180` is the angle in radians,
|
||||
(y_c, x_c) is the center of the rectangle):
|
||||
|
||||
.. math::
|
||||
|
||||
yr_i = \\cos(\\theta) (y_i - y_c) - \\sin(\\theta) (x_i - x_c) + y_c,
|
||||
|
||||
xr_i = \\sin(\\theta) (y_i - y_c) + \\cos(\\theta) (x_i - x_c) + x_c,
|
||||
|
||||
which is the standard rigid-body rotation transformation.
|
||||
|
||||
Intuitively, the angle is
|
||||
(1) the rotation angle from y-axis in image space
|
||||
to the height vector (top->down in the box's local coordinate system)
|
||||
of the box in CCW, and
|
||||
(2) the rotation angle from x-axis in image space
|
||||
to the width vector (left->right in the box's local coordinate system)
|
||||
of the box in CCW.
|
||||
|
||||
More intuitively, consider the following horizontal box ABCD represented
|
||||
in (x1, y1, x2, y2): (3, 2, 7, 4),
|
||||
covering the [3, 7] x [2, 4] region of the continuous coordinate system
|
||||
which looks like this:
|
||||
|
||||
.. code:: none
|
||||
|
||||
O--------> x
|
||||
|
|
||||
| A---B
|
||||
| | |
|
||||
| D---C
|
||||
|
|
||||
v y
|
||||
|
||||
Note that each capital letter represents one 0-dimensional geometric point
|
||||
instead of a 'square pixel' here.
|
||||
|
||||
In the example above, using (x, y) to represent a point we have:
|
||||
|
||||
.. math::
|
||||
|
||||
O = (0, 0), A = (3, 2), B = (7, 2), C = (7, 4), D = (3, 4)
|
||||
|
||||
We name vector AB = vector DC as the width vector in box's local coordinate system, and
|
||||
vector AD = vector BC as the height vector in box's local coordinate system. Initially,
|
||||
when angle = 0 degree, they're aligned with the positive directions of x-axis and y-axis
|
||||
in the image space, respectively.
|
||||
|
||||
For better illustration, we denote the center of the box as E,
|
||||
|
||||
.. code:: none
|
||||
|
||||
O--------> x
|
||||
|
|
||||
| A---B
|
||||
| | E |
|
||||
| D---C
|
||||
|
|
||||
v y
|
||||
|
||||
where the center E = ((3+7)/2, (2+4)/2) = (5, 3).
|
||||
|
||||
Also,
|
||||
|
||||
.. math::
|
||||
|
||||
width = |AB| = |CD| = 7 - 3 = 4,
|
||||
height = |AD| = |BC| = 4 - 2 = 2.
|
||||
|
||||
Therefore, the corresponding representation for the same shape in rotated box in
|
||||
(x_center, y_center, width, height, angle) format is:
|
||||
|
||||
(5, 3, 4, 2, 0),
|
||||
|
||||
Now, let's consider (5, 3, 4, 2, 90), which is rotated by 90 degrees
|
||||
CCW (counter-clockwise) by definition. It looks like this:
|
||||
|
||||
.. code:: none
|
||||
|
||||
O--------> x
|
||||
| B-C
|
||||
| | |
|
||||
| |E|
|
||||
| | |
|
||||
| A-D
|
||||
v y
|
||||
|
||||
The center E is still located at the same point (5, 3), while the vertices
|
||||
ABCD are rotated by 90 degrees CCW with regard to E:
|
||||
A = (4, 5), B = (4, 1), C = (6, 1), D = (6, 5)
|
||||
|
||||
Here, 90 degrees can be seen as the CCW angle to rotate from y-axis to
|
||||
vector AD or vector BC (the top->down height vector in box's local coordinate system),
|
||||
or the CCW angle to rotate from x-axis to vector AB or vector DC (the left->right
|
||||
width vector in box's local coordinate system).
|
||||
|
||||
.. math::
|
||||
|
||||
width = |AB| = |CD| = 5 - 1 = 4,
|
||||
height = |AD| = |BC| = 6 - 4 = 2.
|
||||
|
||||
Next, how about (5, 3, 4, 2, -90), which is rotated by 90 degrees CW (clockwise)
|
||||
by definition? It looks like this:
|
||||
|
||||
.. code:: none
|
||||
|
||||
O--------> x
|
||||
| D-A
|
||||
| | |
|
||||
| |E|
|
||||
| | |
|
||||
| C-B
|
||||
v y
|
||||
|
||||
The center E is still located at the same point (5, 3), while the vertices
|
||||
ABCD are rotated by 90 degrees CW with regard to E:
|
||||
A = (6, 1), B = (6, 5), C = (4, 5), D = (4, 1)
|
||||
|
||||
.. math::
|
||||
|
||||
width = |AB| = |CD| = 5 - 1 = 4,
|
||||
height = |AD| = |BC| = 6 - 4 = 2.
|
||||
|
||||
This covers exactly the same region as (5, 3, 4, 2, 90) does, and their IoU
|
||||
will be 1. However, these two will generate different RoI Pooling results and
|
||||
should not be treated as an identical box.
|
||||
|
||||
On the other hand, it's easy to see that (X, Y, W, H, A) is identical to
|
||||
(X, Y, W, H, A+360N), for any integer N. For example (5, 3, 4, 2, 270) would be
|
||||
identical to (5, 3, 4, 2, -90), because rotating the shape 270 degrees CCW is
|
||||
equivalent to rotating the same shape 90 degrees CW.
|
||||
|
||||
We could rotate further to get (5, 3, 4, 2, 180), or (5, 3, 4, 2, -180):
|
||||
|
||||
.. code:: none
|
||||
|
||||
O--------> x
|
||||
|
|
||||
| C---D
|
||||
| | E |
|
||||
| B---A
|
||||
|
|
||||
v y
|
||||
|
||||
.. math::
|
||||
|
||||
A = (7, 4), B = (3, 4), C = (3, 2), D = (7, 2),
|
||||
|
||||
width = |AB| = |CD| = 7 - 3 = 4,
|
||||
height = |AD| = |BC| = 4 - 2 = 2.
|
||||
|
||||
Finally, this is a very inaccurate (heavily quantized) illustration of
|
||||
how (5, 3, 4, 2, 60) looks like in case anyone wonders:
|
||||
|
||||
.. code:: none
|
||||
|
||||
O--------> x
|
||||
| B\
|
||||
| / C
|
||||
| /E /
|
||||
| A /
|
||||
| `D
|
||||
v y
|
||||
|
||||
It's still a rectangle with center of (5, 3), width of 4 and height of 2,
|
||||
but its angle (and thus orientation) is somewhere between
|
||||
(5, 3, 4, 2, 0) and (5, 3, 4, 2, 90).
|
||||
"""
|
||||
device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu")
|
||||
tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
|
||||
if tensor.numel() == 0:
|
||||
# Use reshape, so we don't end up creating a new tensor that does not depend on
|
||||
# the inputs (and consequently confuses jit)
|
||||
tensor = tensor.reshape((0, 5)).to(dtype=torch.float32, device=device)
|
||||
assert tensor.dim() == 2 and tensor.size(-1) == 5, tensor.size()
|
||||
|
||||
self.tensor = tensor
|
||||
|
||||
def clone(self) -> "RotatedBoxes":
|
||||
"""
|
||||
Clone the RotatedBoxes.
|
||||
|
||||
Returns:
|
||||
RotatedBoxes
|
||||
"""
|
||||
return RotatedBoxes(self.tensor.clone())
|
||||
|
||||
def to(self, *args: Any, **kwargs: Any) -> "RotatedBoxes":
|
||||
return RotatedBoxes(self.tensor.to(*args, **kwargs))
|
||||
|
||||
def area(self) -> torch.Tensor:
|
||||
"""
|
||||
Computes the area of all the boxes.
|
||||
|
||||
Returns:
|
||||
torch.Tensor: a vector with areas of each box.
|
||||
"""
|
||||
box = self.tensor
|
||||
area = box[:, 2] * box[:, 3]
|
||||
return area
|
||||
|
||||
def normalize_angles(self) -> None:
|
||||
"""
|
||||
Restrict angles to the range of [-180, 180) degrees
|
||||
"""
|
||||
self.tensor[:, 4] = (self.tensor[:, 4] + 180.0) % 360.0 - 180.0
|
||||
|
||||
def clip(self, box_size: Tuple[int, int], clip_angle_threshold: float = 1.0) -> None:
|
||||
"""
|
||||
Clip (in place) the boxes by limiting x coordinates to the range [0, width]
|
||||
and y coordinates to the range [0, height].
|
||||
|
||||
For RRPN:
|
||||
Only clip boxes that are almost horizontal with a tolerance of
|
||||
clip_angle_threshold to maintain backward compatibility.
|
||||
|
||||
Rotated boxes beyond this threshold are not clipped for two reasons:
|
||||
|
||||
1. There are potentially multiple ways to clip a rotated box to make it
|
||||
fit within the image.
|
||||
2. It's tricky to make the entire rectangular box fit within the image
|
||||
and still be able to not leave out pixels of interest.
|
||||
|
||||
Therefore we rely on ops like RoIAlignRotated to safely handle this.
|
||||
|
||||
Args:
|
||||
box_size (height, width): The clipping box's size.
|
||||
clip_angle_threshold:
|
||||
Iff. abs(normalized(angle)) <= clip_angle_threshold (in degrees),
|
||||
we do the clipping as horizontal boxes.
|
||||
"""
|
||||
h, w = box_size
|
||||
|
||||
# normalize angles to be within (-180, 180] degrees
|
||||
self.normalize_angles()
|
||||
|
||||
idx = torch.where(torch.abs(self.tensor[:, 4]) <= clip_angle_threshold)[0]
|
||||
|
||||
# convert to (x1, y1, x2, y2)
|
||||
x1 = self.tensor[idx, 0] - self.tensor[idx, 2] / 2.0
|
||||
y1 = self.tensor[idx, 1] - self.tensor[idx, 3] / 2.0
|
||||
x2 = self.tensor[idx, 0] + self.tensor[idx, 2] / 2.0
|
||||
y2 = self.tensor[idx, 1] + self.tensor[idx, 3] / 2.0
|
||||
|
||||
# clip
|
||||
x1.clamp_(min=0, max=w)
|
||||
y1.clamp_(min=0, max=h)
|
||||
x2.clamp_(min=0, max=w)
|
||||
y2.clamp_(min=0, max=h)
|
||||
|
||||
# convert back to (xc, yc, w, h)
|
||||
self.tensor[idx, 0] = (x1 + x2) / 2.0
|
||||
self.tensor[idx, 1] = (y1 + y2) / 2.0
|
||||
# make sure widths and heights do not increase due to numerical errors
|
||||
self.tensor[idx, 2] = torch.min(self.tensor[idx, 2], x2 - x1)
|
||||
self.tensor[idx, 3] = torch.min(self.tensor[idx, 3], y2 - y1)
|
||||
|
||||
def nonempty(self, threshold: float = 0.0) -> torch.Tensor:
|
||||
"""
|
||||
Find boxes that are non-empty.
|
||||
A box is considered empty, if either of its side is no larger than threshold.
|
||||
|
||||
Returns:
|
||||
Tensor: a binary vector which represents
|
||||
whether each box is empty (False) or non-empty (True).
|
||||
"""
|
||||
box = self.tensor
|
||||
widths = box[:, 2]
|
||||
heights = box[:, 3]
|
||||
keep = (widths > threshold) & (heights > threshold)
|
||||
return keep
|
||||
|
||||
def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "RotatedBoxes":
|
||||
"""
|
||||
Returns:
|
||||
RotatedBoxes: Create a new :class:`RotatedBoxes` by indexing.
|
||||
|
||||
The following usage are allowed:
|
||||
|
||||
1. `new_boxes = boxes[3]`: return a `RotatedBoxes` which contains only one box.
|
||||
2. `new_boxes = boxes[2:10]`: return a slice of boxes.
|
||||
3. `new_boxes = boxes[vector]`, where vector is a torch.ByteTensor
|
||||
with `length = len(boxes)`. Nonzero elements in the vector will be selected.
|
||||
|
||||
Note that the returned RotatedBoxes might share storage with this RotatedBoxes,
|
||||
subject to Pytorch's indexing semantics.
|
||||
"""
|
||||
if isinstance(item, int):
|
||||
return RotatedBoxes(self.tensor[item].view(1, -1))
|
||||
b = self.tensor[item]
|
||||
assert b.dim() == 2, "Indexing on RotatedBoxes with {} failed to return a matrix!".format(
|
||||
item
|
||||
)
|
||||
return RotatedBoxes(b)
|
||||
|
||||
def __len__(self) -> int:
|
||||
return self.tensor.shape[0]
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return "RotatedBoxes(" + str(self.tensor) + ")"
|
||||
|
||||
def inside_box(self, box_size: Tuple[int, int], boundary_threshold: int = 0) -> torch.Tensor:
|
||||
"""
|
||||
Args:
|
||||
box_size (height, width): Size of the reference box covering
|
||||
[0, width] x [0, height]
|
||||
boundary_threshold (int): Boxes that extend beyond the reference box
|
||||
boundary by more than boundary_threshold are considered "outside".
|
||||
|
||||
For RRPN, it might not be necessary to call this function since it's common
|
||||
for rotated box to extend to outside of the image boundaries
|
||||
(the clip function only clips the near-horizontal boxes)
|
||||
|
||||
Returns:
|
||||
a binary vector, indicating whether each box is inside the reference box.
|
||||
"""
|
||||
height, width = box_size
|
||||
|
||||
cnt_x = self.tensor[..., 0]
|
||||
cnt_y = self.tensor[..., 1]
|
||||
half_w = self.tensor[..., 2] / 2.0
|
||||
half_h = self.tensor[..., 3] / 2.0
|
||||
a = self.tensor[..., 4]
|
||||
c = torch.abs(torch.cos(a * math.pi / 180.0))
|
||||
s = torch.abs(torch.sin(a * math.pi / 180.0))
|
||||
# This basically computes the horizontal bounding rectangle of the rotated box
|
||||
max_rect_dx = c * half_w + s * half_h
|
||||
max_rect_dy = c * half_h + s * half_w
|
||||
|
||||
inds_inside = (
|
||||
(cnt_x - max_rect_dx >= -boundary_threshold)
|
||||
& (cnt_y - max_rect_dy >= -boundary_threshold)
|
||||
& (cnt_x + max_rect_dx < width + boundary_threshold)
|
||||
& (cnt_y + max_rect_dy < height + boundary_threshold)
|
||||
)
|
||||
|
||||
return inds_inside
|
||||
|
||||
def get_centers(self) -> torch.Tensor:
|
||||
"""
|
||||
Returns:
|
||||
The box centers in a Nx2 array of (x, y).
|
||||
"""
|
||||
return self.tensor[:, :2]
|
||||
|
||||
def scale(self, scale_x: float, scale_y: float) -> None:
|
||||
"""
|
||||
Scale the rotated box with horizontal and vertical scaling factors
|
||||
Note: when scale_factor_x != scale_factor_y,
|
||||
the rotated box does not preserve the rectangular shape when the angle
|
||||
is not a multiple of 90 degrees under resize transformation.
|
||||
Instead, the shape is a parallelogram (that has skew)
|
||||
Here we make an approximation by fitting a rotated rectangle to the parallelogram.
|
||||
"""
|
||||
self.tensor[:, 0] *= scale_x
|
||||
self.tensor[:, 1] *= scale_y
|
||||
theta = self.tensor[:, 4] * math.pi / 180.0
|
||||
c = torch.cos(theta)
|
||||
s = torch.sin(theta)
|
||||
|
||||
# In image space, y is top->down and x is left->right
|
||||
# Consider the local coordintate system for the rotated box,
|
||||
# where the box center is located at (0, 0), and the four vertices ABCD are
|
||||
# A(-w / 2, -h / 2), B(w / 2, -h / 2), C(w / 2, h / 2), D(-w / 2, h / 2)
|
||||
# the midpoint of the left edge AD of the rotated box E is:
|
||||
# E = (A+D)/2 = (-w / 2, 0)
|
||||
# the midpoint of the top edge AB of the rotated box F is:
|
||||
# F(0, -h / 2)
|
||||
# To get the old coordinates in the global system, apply the rotation transformation
|
||||
# (Note: the right-handed coordinate system for image space is yOx):
|
||||
# (old_x, old_y) = (s * y + c * x, c * y - s * x)
|
||||
# E(old) = (s * 0 + c * (-w/2), c * 0 - s * (-w/2)) = (-c * w / 2, s * w / 2)
|
||||
# F(old) = (s * (-h / 2) + c * 0, c * (-h / 2) - s * 0) = (-s * h / 2, -c * h / 2)
|
||||
# After applying the scaling factor (sfx, sfy):
|
||||
# E(new) = (-sfx * c * w / 2, sfy * s * w / 2)
|
||||
# F(new) = (-sfx * s * h / 2, -sfy * c * h / 2)
|
||||
# The new width after scaling tranformation becomes:
|
||||
|
||||
# w(new) = |E(new) - O| * 2
|
||||
# = sqrt[(sfx * c * w / 2)^2 + (sfy * s * w / 2)^2] * 2
|
||||
# = sqrt[(sfx * c)^2 + (sfy * s)^2] * w
|
||||
# i.e., scale_factor_w = sqrt[(sfx * c)^2 + (sfy * s)^2]
|
||||
#
|
||||
# For example,
|
||||
# when angle = 0 or 180, |c| = 1, s = 0, scale_factor_w == scale_factor_x;
|
||||
# when |angle| = 90, c = 0, |s| = 1, scale_factor_w == scale_factor_y
|
||||
self.tensor[:, 2] *= torch.sqrt((scale_x * c) ** 2 + (scale_y * s) ** 2)
|
||||
|
||||
# h(new) = |F(new) - O| * 2
|
||||
# = sqrt[(sfx * s * h / 2)^2 + (sfy * c * h / 2)^2] * 2
|
||||
# = sqrt[(sfx * s)^2 + (sfy * c)^2] * h
|
||||
# i.e., scale_factor_h = sqrt[(sfx * s)^2 + (sfy * c)^2]
|
||||
#
|
||||
# For example,
|
||||
# when angle = 0 or 180, |c| = 1, s = 0, scale_factor_h == scale_factor_y;
|
||||
# when |angle| = 90, c = 0, |s| = 1, scale_factor_h == scale_factor_x
|
||||
self.tensor[:, 3] *= torch.sqrt((scale_x * s) ** 2 + (scale_y * c) ** 2)
|
||||
|
||||
# The angle is the rotation angle from y-axis in image space to the height
|
||||
# vector (top->down in the box's local coordinate system) of the box in CCW.
|
||||
#
|
||||
# angle(new) = angle_yOx(O - F(new))
|
||||
# = angle_yOx( (sfx * s * h / 2, sfy * c * h / 2) )
|
||||
# = atan2(sfx * s * h / 2, sfy * c * h / 2)
|
||||
# = atan2(sfx * s, sfy * c)
|
||||
#
|
||||
# For example,
|
||||
# when sfx == sfy, angle(new) == atan2(s, c) == angle(old)
|
||||
self.tensor[:, 4] = torch.atan2(scale_x * s, scale_y * c) * 180 / math.pi
|
||||
|
||||
@property
|
||||
def device(self) -> str:
|
||||
return self.tensor.device
|
||||
|
||||
def __iter__(self) -> Iterator[torch.Tensor]:
|
||||
"""
|
||||
Yield a box as a Tensor of shape (5,) at a time.
|
||||
"""
|
||||
yield from self.tensor
|
||||
|
||||
|
||||
def pairwise_iou(boxes1: RotatedBoxes, boxes2: RotatedBoxes) -> None:
|
||||
"""
|
||||
Given two lists of rotated boxes of size N and M,
|
||||
compute the IoU (intersection over union)
|
||||
between __all__ N x M pairs of boxes.
|
||||
The box order must be (x_center, y_center, width, height, angle).
|
||||
|
||||
Args:
|
||||
boxes1, boxes2 (RotatedBoxes):
|
||||
two `RotatedBoxes`. Contains N & M rotated boxes, respectively.
|
||||
|
||||
Returns:
|
||||
Tensor: IoU, sized [N,M].
|
||||
"""
|
||||
|
||||
return pairwise_iou_rotated(boxes1.tensor, boxes2.tensor)
|
|
@ -0,0 +1,5 @@
|
|||
# Utility functions
|
||||
|
||||
This folder contain utility functions that are not used in the
|
||||
core library, but are useful for building models or training
|
||||
code using the config system.
|
|
@ -0,0 +1 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,164 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
import typing
|
||||
import torch
|
||||
from fvcore.nn import activation_count, flop_count, parameter_count, parameter_count_table
|
||||
from torch import nn
|
||||
|
||||
from detectron2.structures import BitMasks, Boxes, ImageList, Instances
|
||||
|
||||
from .logger import log_first_n
|
||||
|
||||
__all__ = [
|
||||
"activation_count_operators",
|
||||
"flop_count_operators",
|
||||
"parameter_count_table",
|
||||
"parameter_count",
|
||||
]
|
||||
|
||||
FLOPS_MODE = "flops"
|
||||
ACTIVATIONS_MODE = "activations"
|
||||
|
||||
|
||||
# some extra ops to ignore from counting.
|
||||
_IGNORED_OPS = [
|
||||
"aten::add",
|
||||
"aten::add_",
|
||||
"aten::batch_norm",
|
||||
"aten::constant_pad_nd",
|
||||
"aten::div",
|
||||
"aten::div_",
|
||||
"aten::exp",
|
||||
"aten::log2",
|
||||
"aten::max_pool2d",
|
||||
"aten::meshgrid",
|
||||
"aten::mul",
|
||||
"aten::mul_",
|
||||
"aten::nonzero_numpy",
|
||||
"aten::relu",
|
||||
"aten::relu_",
|
||||
"aten::rsub",
|
||||
"aten::sigmoid",
|
||||
"aten::sigmoid_",
|
||||
"aten::softmax",
|
||||
"aten::sort",
|
||||
"aten::sqrt",
|
||||
"aten::sub",
|
||||
"aten::upsample_nearest2d",
|
||||
"prim::PythonOp",
|
||||
"torchvision::nms",
|
||||
]
|
||||
|
||||
|
||||
def flop_count_operators(
|
||||
model: nn.Module, inputs: list, **kwargs
|
||||
) -> typing.DefaultDict[str, float]:
|
||||
"""
|
||||
Implement operator-level flops counting using jit.
|
||||
This is a wrapper of fvcore.nn.flop_count, that supports standard detection models
|
||||
in detectron2.
|
||||
|
||||
Note:
|
||||
The function runs the input through the model to compute flops.
|
||||
The flops of a detection model is often input-dependent, for example,
|
||||
the flops of box & mask head depends on the number of proposals &
|
||||
the number of detected objects.
|
||||
Therefore, the flops counting using a single input may not accurately
|
||||
reflect the computation cost of a model.
|
||||
|
||||
Args:
|
||||
model: a detectron2 model that takes `list[dict]` as input.
|
||||
inputs (list[dict]): inputs to model, in detectron2's standard format.
|
||||
"""
|
||||
return _wrapper_count_operators(model=model, inputs=inputs, mode=FLOPS_MODE, **kwargs)
|
||||
|
||||
|
||||
def activation_count_operators(
|
||||
model: nn.Module, inputs: list, **kwargs
|
||||
) -> typing.DefaultDict[str, float]:
|
||||
"""
|
||||
Implement operator-level activations counting using jit.
|
||||
This is a wrapper of fvcore.nn.activation_count, that supports standard detection models
|
||||
in detectron2.
|
||||
|
||||
Note:
|
||||
The function runs the input through the model to compute activations.
|
||||
The activations of a detection model is often input-dependent, for example,
|
||||
the activations of box & mask head depends on the number of proposals &
|
||||
the number of detected objects.
|
||||
|
||||
Args:
|
||||
model: a detectron2 model that takes `list[dict]` as input.
|
||||
inputs (list[dict]): inputs to model, in detectron2's standard format.
|
||||
"""
|
||||
return _wrapper_count_operators(model=model, inputs=inputs, mode=ACTIVATIONS_MODE, **kwargs)
|
||||
|
||||
|
||||
def _flatten_to_tuple(outputs):
|
||||
result = []
|
||||
if isinstance(outputs, torch.Tensor):
|
||||
result.append(outputs)
|
||||
elif isinstance(outputs, (list, tuple)):
|
||||
for v in outputs:
|
||||
result.extend(_flatten_to_tuple(v))
|
||||
elif isinstance(outputs, dict):
|
||||
for _, v in outputs.items():
|
||||
result.extend(_flatten_to_tuple(v))
|
||||
elif isinstance(outputs, Instances):
|
||||
result.extend(_flatten_to_tuple(outputs.get_fields()))
|
||||
elif isinstance(outputs, (Boxes, BitMasks, ImageList)):
|
||||
result.append(outputs.tensor)
|
||||
else:
|
||||
log_first_n(
|
||||
logging.WARN,
|
||||
f"Output of type {type(outputs)} not included in flops/activations count.",
|
||||
n=10,
|
||||
)
|
||||
return tuple(result)
|
||||
|
||||
|
||||
def _wrapper_count_operators(
|
||||
model: nn.Module, inputs: list, mode: str, **kwargs
|
||||
) -> typing.DefaultDict[str, float]:
|
||||
|
||||
# ignore some ops
|
||||
supported_ops = {k: lambda *args, **kwargs: {} for k in _IGNORED_OPS}
|
||||
supported_ops.update(kwargs.pop("supported_ops", {}))
|
||||
kwargs["supported_ops"] = supported_ops
|
||||
|
||||
assert len(inputs) == 1, "Please use batch size=1"
|
||||
tensor_input = inputs[0]["image"]
|
||||
|
||||
class WrapModel(nn.Module):
|
||||
def __init__(self, model):
|
||||
super().__init__()
|
||||
if isinstance(
|
||||
model, (nn.parallel.distributed.DistributedDataParallel, nn.DataParallel)
|
||||
):
|
||||
self.model = model.module
|
||||
else:
|
||||
self.model = model
|
||||
|
||||
def forward(self, image):
|
||||
# jit requires the input/output to be Tensors
|
||||
inputs = [{"image": image}]
|
||||
outputs = self.model.forward(inputs)
|
||||
# Only the subgraph that computes the returned tuple of tensor will be
|
||||
# counted. So we flatten everything we found to tuple of tensors.
|
||||
return _flatten_to_tuple(outputs)
|
||||
|
||||
old_train = model.training
|
||||
with torch.no_grad():
|
||||
if mode == FLOPS_MODE:
|
||||
ret = flop_count(WrapModel(model).train(False), (tensor_input,), **kwargs)
|
||||
elif mode == ACTIVATIONS_MODE:
|
||||
ret = activation_count(WrapModel(model).train(False), (tensor_input,), **kwargs)
|
||||
else:
|
||||
raise NotImplementedError("Count for mode {} is not supported yet.".format(mode))
|
||||
# compatible with change in fvcore
|
||||
if isinstance(ret, tuple):
|
||||
ret = ret[0]
|
||||
model.train(old_train)
|
||||
return ret
|
|
@ -0,0 +1,195 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import importlib
|
||||
import numpy as np
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
import PIL
|
||||
import torch
|
||||
import torchvision
|
||||
from tabulate import tabulate
|
||||
|
||||
__all__ = ["collect_env_info"]
|
||||
|
||||
|
||||
def collect_torch_env():
|
||||
try:
|
||||
import torch.__config__
|
||||
|
||||
return torch.__config__.show()
|
||||
except ImportError:
|
||||
# compatible with older versions of pytorch
|
||||
from torch.utils.collect_env import get_pretty_env_info
|
||||
|
||||
return get_pretty_env_info()
|
||||
|
||||
|
||||
def get_env_module():
|
||||
var_name = "DETECTRON2_ENV_MODULE"
|
||||
return var_name, os.environ.get(var_name, "<not set>")
|
||||
|
||||
|
||||
def detect_compute_compatibility(CUDA_HOME, so_file):
|
||||
try:
|
||||
cuobjdump = os.path.join(CUDA_HOME, "bin", "cuobjdump")
|
||||
if os.path.isfile(cuobjdump):
|
||||
output = subprocess.check_output(
|
||||
"'{}' --list-elf '{}'".format(cuobjdump, so_file), shell=True
|
||||
)
|
||||
output = output.decode("utf-8").strip().split("\n")
|
||||
arch = []
|
||||
for line in output:
|
||||
line = re.findall(r"\.sm_([0-9]*)\.", line)[0]
|
||||
arch.append(".".join(line))
|
||||
arch = sorted(set(arch))
|
||||
return ", ".join(arch)
|
||||
else:
|
||||
return so_file + "; cannot find cuobjdump"
|
||||
except Exception:
|
||||
# unhandled failure
|
||||
return so_file
|
||||
|
||||
|
||||
def collect_env_info():
|
||||
has_gpu = torch.cuda.is_available() # true for both CUDA & ROCM
|
||||
torch_version = torch.__version__
|
||||
|
||||
# NOTE: the use of CUDA_HOME and ROCM_HOME requires the CUDA/ROCM build deps, though in
|
||||
# theory detectron2 should be made runnable with only the corresponding runtimes
|
||||
from torch.utils.cpp_extension import CUDA_HOME
|
||||
|
||||
has_rocm = False
|
||||
if tuple(map(int, torch_version.split(".")[:2])) >= (1, 5):
|
||||
from torch.utils.cpp_extension import ROCM_HOME
|
||||
|
||||
if (getattr(torch.version, "hip", None) is not None) and (ROCM_HOME is not None):
|
||||
has_rocm = True
|
||||
has_cuda = has_gpu and (not has_rocm)
|
||||
|
||||
data = []
|
||||
data.append(("sys.platform", sys.platform))
|
||||
data.append(("Python", sys.version.replace("\n", "")))
|
||||
data.append(("numpy", np.__version__))
|
||||
|
||||
try:
|
||||
import detectron2 # noqa
|
||||
|
||||
data.append(
|
||||
("detectron2", detectron2.__version__ + " @" + os.path.dirname(detectron2.__file__))
|
||||
)
|
||||
except ImportError:
|
||||
data.append(("detectron2", "failed to import"))
|
||||
|
||||
try:
|
||||
from detectron2 import _C
|
||||
except ImportError:
|
||||
data.append(("detectron2._C", "failed to import. detectron2 is not built correctly"))
|
||||
|
||||
# print system compilers when extension fails to build
|
||||
if sys.platform != "win32": # don't know what to do for windows
|
||||
try:
|
||||
# this is how torch/utils/cpp_extensions.py choose compiler
|
||||
cxx = os.environ.get("CXX", "c++")
|
||||
cxx = subprocess.check_output("'{}' --version".format(cxx), shell=True)
|
||||
cxx = cxx.decode("utf-8").strip().split("\n")[0]
|
||||
except subprocess.SubprocessError:
|
||||
cxx = "Not found"
|
||||
data.append(("Compiler", cxx))
|
||||
|
||||
if has_cuda and CUDA_HOME is not None:
|
||||
try:
|
||||
nvcc = os.path.join(CUDA_HOME, "bin", "nvcc")
|
||||
nvcc = subprocess.check_output("'{}' -V".format(nvcc), shell=True)
|
||||
nvcc = nvcc.decode("utf-8").strip().split("\n")[-1]
|
||||
except subprocess.SubprocessError:
|
||||
nvcc = "Not found"
|
||||
data.append(("CUDA compiler", nvcc))
|
||||
else:
|
||||
# print compilers that are used to build extension
|
||||
data.append(("Compiler", _C.get_compiler_version()))
|
||||
data.append(("CUDA compiler", _C.get_cuda_version())) # cuda or hip
|
||||
if has_cuda:
|
||||
data.append(
|
||||
("detectron2 arch flags", detect_compute_compatibility(CUDA_HOME, _C.__file__))
|
||||
)
|
||||
|
||||
data.append(get_env_module())
|
||||
data.append(("PyTorch", torch_version + " @" + os.path.dirname(torch.__file__)))
|
||||
data.append(("PyTorch debug build", torch.version.debug))
|
||||
|
||||
data.append(("GPU available", has_gpu))
|
||||
if has_gpu:
|
||||
devices = defaultdict(list)
|
||||
for k in range(torch.cuda.device_count()):
|
||||
cap = ".".join((str(x) for x in torch.cuda.get_device_capability(k)))
|
||||
name = torch.cuda.get_device_name(k) + f" (arch={cap})"
|
||||
devices[name].append(str(k))
|
||||
for name, devids in devices.items():
|
||||
data.append(("GPU " + ",".join(devids), name))
|
||||
|
||||
if has_rocm:
|
||||
msg = " - invalid!" if not os.path.isdir(ROCM_HOME) else ""
|
||||
data.append(("ROCM_HOME", str(ROCM_HOME) + msg))
|
||||
else:
|
||||
msg = " - invalid!" if not os.path.isdir(CUDA_HOME) else ""
|
||||
data.append(("CUDA_HOME", str(CUDA_HOME) + msg))
|
||||
|
||||
cuda_arch_list = os.environ.get("TORCH_CUDA_ARCH_LIST", None)
|
||||
if cuda_arch_list:
|
||||
data.append(("TORCH_CUDA_ARCH_LIST", cuda_arch_list))
|
||||
data.append(("Pillow", PIL.__version__))
|
||||
|
||||
try:
|
||||
data.append(
|
||||
(
|
||||
"torchvision",
|
||||
str(torchvision.__version__) + " @" + os.path.dirname(torchvision.__file__),
|
||||
)
|
||||
)
|
||||
if has_cuda:
|
||||
try:
|
||||
torchvision_C = importlib.util.find_spec("torchvision._C").origin
|
||||
msg = detect_compute_compatibility(CUDA_HOME, torchvision_C)
|
||||
data.append(("torchvision arch flags", msg))
|
||||
except ImportError:
|
||||
data.append(("torchvision._C", "Not found"))
|
||||
except AttributeError:
|
||||
data.append(("torchvision", "unknown"))
|
||||
|
||||
try:
|
||||
import fvcore
|
||||
|
||||
data.append(("fvcore", fvcore.__version__))
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import cv2
|
||||
|
||||
data.append(("cv2", cv2.__version__))
|
||||
except ImportError:
|
||||
data.append(("cv2", "Not found"))
|
||||
env_str = tabulate(data) + "\n"
|
||||
env_str += collect_torch_env()
|
||||
return env_str
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
import detectron2 # noqa
|
||||
except ImportError:
|
||||
print(collect_env_info())
|
||||
else:
|
||||
from detectron2.utils.collect_env import collect_env_info
|
||||
|
||||
print(collect_env_info())
|
||||
if torch.cuda.is_available():
|
||||
for k in range(torch.cuda.device_count()):
|
||||
device = f"cuda:{k}"
|
||||
try:
|
||||
x = torch.tensor([1, 2.0], dtype=torch.float32)
|
||||
x = x.to(device)
|
||||
except Exception:
|
||||
print(f"Unable to copy tensor to device={device}")
|
|
@ -0,0 +1,140 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
|
||||
"""
|
||||
An awesome colormap for really neat visualizations.
|
||||
Copied from Detectron, and removed gray colors.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
__all__ = ["colormap", "random_color"]
|
||||
|
||||
# fmt: off
|
||||
# RGB:
|
||||
_COLORS = np.array(
|
||||
[
|
||||
0.000, 0.447, 0.741,
|
||||
0.850, 0.325, 0.098,
|
||||
0.929, 0.694, 0.125,
|
||||
0.494, 0.184, 0.556,
|
||||
0.466, 0.674, 0.188,
|
||||
0.301, 0.745, 0.933,
|
||||
0.635, 0.078, 0.184,
|
||||
0.300, 0.300, 0.300,
|
||||
0.600, 0.600, 0.600,
|
||||
1.000, 0.000, 0.000,
|
||||
1.000, 0.500, 0.000,
|
||||
0.749, 0.749, 0.000,
|
||||
0.000, 1.000, 0.000,
|
||||
0.000, 0.000, 1.000,
|
||||
0.667, 0.000, 1.000,
|
||||
0.333, 0.333, 0.000,
|
||||
0.333, 0.667, 0.000,
|
||||
0.333, 1.000, 0.000,
|
||||
0.667, 0.333, 0.000,
|
||||
0.667, 0.667, 0.000,
|
||||
0.667, 1.000, 0.000,
|
||||
1.000, 0.333, 0.000,
|
||||
1.000, 0.667, 0.000,
|
||||
1.000, 1.000, 0.000,
|
||||
0.000, 0.333, 0.500,
|
||||
0.000, 0.667, 0.500,
|
||||
0.000, 1.000, 0.500,
|
||||
0.333, 0.000, 0.500,
|
||||
0.333, 0.333, 0.500,
|
||||
0.333, 0.667, 0.500,
|
||||
0.333, 1.000, 0.500,
|
||||
0.667, 0.000, 0.500,
|
||||
0.667, 0.333, 0.500,
|
||||
0.667, 0.667, 0.500,
|
||||
0.667, 1.000, 0.500,
|
||||
1.000, 0.000, 0.500,
|
||||
1.000, 0.333, 0.500,
|
||||
1.000, 0.667, 0.500,
|
||||
1.000, 1.000, 0.500,
|
||||
0.000, 0.333, 1.000,
|
||||
0.000, 0.667, 1.000,
|
||||
0.000, 1.000, 1.000,
|
||||
0.333, 0.000, 1.000,
|
||||
0.333, 0.333, 1.000,
|
||||
0.333, 0.667, 1.000,
|
||||
0.333, 1.000, 1.000,
|
||||
0.667, 0.000, 1.000,
|
||||
0.667, 0.333, 1.000,
|
||||
0.667, 0.667, 1.000,
|
||||
0.667, 1.000, 1.000,
|
||||
1.000, 0.000, 1.000,
|
||||
1.000, 0.333, 1.000,
|
||||
1.000, 0.667, 1.000,
|
||||
0.333, 0.000, 0.000,
|
||||
0.500, 0.000, 0.000,
|
||||
0.667, 0.000, 0.000,
|
||||
0.833, 0.000, 0.000,
|
||||
1.000, 0.000, 0.000,
|
||||
0.000, 0.167, 0.000,
|
||||
0.000, 0.333, 0.000,
|
||||
0.000, 0.500, 0.000,
|
||||
0.000, 0.667, 0.000,
|
||||
0.000, 0.833, 0.000,
|
||||
0.000, 1.000, 0.000,
|
||||
0.000, 0.000, 0.167,
|
||||
0.000, 0.000, 0.333,
|
||||
0.000, 0.000, 0.500,
|
||||
0.000, 0.000, 0.667,
|
||||
0.000, 0.000, 0.833,
|
||||
0.000, 0.000, 1.000,
|
||||
0.000, 0.000, 0.000,
|
||||
0.143, 0.143, 0.143,
|
||||
0.857, 0.857, 0.857,
|
||||
1.000, 1.000, 1.000
|
||||
]
|
||||
).astype(np.float32).reshape(-1, 3)
|
||||
# fmt: on
|
||||
|
||||
|
||||
def colormap(rgb=False, maximum=255):
|
||||
"""
|
||||
Args:
|
||||
rgb (bool): whether to return RGB colors or BGR colors.
|
||||
maximum (int): either 255 or 1
|
||||
|
||||
Returns:
|
||||
ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1]
|
||||
"""
|
||||
assert maximum in [255, 1], maximum
|
||||
c = _COLORS * maximum
|
||||
if not rgb:
|
||||
c = c[:, ::-1]
|
||||
return c
|
||||
|
||||
|
||||
def random_color(rgb=False, maximum=255):
|
||||
"""
|
||||
Args:
|
||||
rgb (bool): whether to return RGB colors or BGR colors.
|
||||
maximum (int): either 255 or 1
|
||||
|
||||
Returns:
|
||||
ndarray: a vector of 3 numbers
|
||||
"""
|
||||
idx = np.random.randint(0, len(_COLORS))
|
||||
ret = _COLORS[idx] * maximum
|
||||
if not rgb:
|
||||
ret = ret[::-1]
|
||||
return ret
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import cv2
|
||||
|
||||
size = 100
|
||||
H, W = 10, 10
|
||||
canvas = np.random.rand(H * size, W * size, 3).astype("float32")
|
||||
for h in range(H):
|
||||
for w in range(W):
|
||||
idx = h * W + w
|
||||
if idx >= len(_COLORS):
|
||||
break
|
||||
canvas[h * size : (h + 1) * size, w * size : (w + 1) * size] = _COLORS[idx]
|
||||
cv2.imshow("a", canvas)
|
||||
cv2.waitKey(0)
|
|
@ -0,0 +1,263 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
"""
|
||||
This file contains primitives for multi-gpu communication.
|
||||
This is useful when doing distributed training.
|
||||
"""
|
||||
|
||||
import functools
|
||||
import logging
|
||||
import numpy as np
|
||||
import pickle
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
|
||||
_LOCAL_PROCESS_GROUP = None
|
||||
"""
|
||||
A torch process group which only includes processes that on the same machine as the current process.
|
||||
This variable is set when processes are spawned by `launch()` in "engine/launch.py".
|
||||
"""
|
||||
|
||||
|
||||
def get_world_size() -> int:
|
||||
if not dist.is_available():
|
||||
return 1
|
||||
if not dist.is_initialized():
|
||||
return 1
|
||||
return dist.get_world_size()
|
||||
|
||||
|
||||
def get_rank() -> int:
|
||||
if not dist.is_available():
|
||||
return 0
|
||||
if not dist.is_initialized():
|
||||
return 0
|
||||
return dist.get_rank()
|
||||
|
||||
|
||||
def get_local_rank() -> int:
|
||||
"""
|
||||
Returns:
|
||||
The rank of the current process within the local (per-machine) process group.
|
||||
"""
|
||||
if not dist.is_available():
|
||||
return 0
|
||||
if not dist.is_initialized():
|
||||
return 0
|
||||
assert _LOCAL_PROCESS_GROUP is not None
|
||||
return dist.get_rank(group=_LOCAL_PROCESS_GROUP)
|
||||
|
||||
|
||||
def get_local_size() -> int:
|
||||
"""
|
||||
Returns:
|
||||
The size of the per-machine process group,
|
||||
i.e. the number of processes per machine.
|
||||
"""
|
||||
if not dist.is_available():
|
||||
return 1
|
||||
if not dist.is_initialized():
|
||||
return 1
|
||||
return dist.get_world_size(group=_LOCAL_PROCESS_GROUP)
|
||||
|
||||
|
||||
def is_main_process() -> bool:
|
||||
return get_rank() == 0
|
||||
|
||||
|
||||
def synchronize():
|
||||
"""
|
||||
Helper function to synchronize (barrier) among all processes when
|
||||
using distributed training
|
||||
"""
|
||||
if not dist.is_available():
|
||||
return
|
||||
if not dist.is_initialized():
|
||||
return
|
||||
world_size = dist.get_world_size()
|
||||
if world_size == 1:
|
||||
return
|
||||
dist.barrier()
|
||||
|
||||
|
||||
@functools.lru_cache()
|
||||
def _get_global_gloo_group():
|
||||
"""
|
||||
Return a process group based on gloo backend, containing all the ranks
|
||||
The result is cached.
|
||||
"""
|
||||
if dist.get_backend() == "nccl":
|
||||
return dist.new_group(backend="gloo")
|
||||
else:
|
||||
return dist.group.WORLD
|
||||
|
||||
|
||||
def _serialize_to_tensor(data, group):
|
||||
backend = dist.get_backend(group)
|
||||
assert backend in ["gloo", "nccl"]
|
||||
device = torch.device("cpu" if backend == "gloo" else "cuda")
|
||||
|
||||
buffer = pickle.dumps(data)
|
||||
if len(buffer) > 1024 ** 3:
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.warning(
|
||||
"Rank {} trying to all-gather {:.2f} GB of data on device {}".format(
|
||||
get_rank(), len(buffer) / (1024 ** 3), device
|
||||
)
|
||||
)
|
||||
storage = torch.ByteStorage.from_buffer(buffer)
|
||||
tensor = torch.ByteTensor(storage).to(device=device)
|
||||
return tensor
|
||||
|
||||
|
||||
def _pad_to_largest_tensor(tensor, group):
|
||||
"""
|
||||
Returns:
|
||||
list[int]: size of the tensor, on each rank
|
||||
Tensor: padded tensor that has the max size
|
||||
"""
|
||||
world_size = dist.get_world_size(group=group)
|
||||
assert (
|
||||
world_size >= 1
|
||||
), "comm.gather/all_gather must be called from ranks within the given group!"
|
||||
local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device)
|
||||
size_list = [
|
||||
torch.zeros([1], dtype=torch.int64, device=tensor.device) for _ in range(world_size)
|
||||
]
|
||||
dist.all_gather(size_list, local_size, group=group)
|
||||
size_list = [int(size.item()) for size in size_list]
|
||||
|
||||
max_size = max(size_list)
|
||||
|
||||
# we pad the tensor because torch all_gather does not support
|
||||
# gathering tensors of different shapes
|
||||
if local_size != max_size:
|
||||
padding = torch.zeros((max_size - local_size,), dtype=torch.uint8, device=tensor.device)
|
||||
tensor = torch.cat((tensor, padding), dim=0)
|
||||
return size_list, tensor
|
||||
|
||||
|
||||
def all_gather(data, group=None):
|
||||
"""
|
||||
Run all_gather on arbitrary picklable data (not necessarily tensors).
|
||||
|
||||
Args:
|
||||
data: any picklable object
|
||||
group: a torch process group. By default, will use a group which
|
||||
contains all ranks on gloo backend.
|
||||
|
||||
Returns:
|
||||
list[data]: list of data gathered from each rank
|
||||
"""
|
||||
if get_world_size() == 1:
|
||||
return [data]
|
||||
if group is None:
|
||||
group = _get_global_gloo_group()
|
||||
if dist.get_world_size(group) == 1:
|
||||
return [data]
|
||||
|
||||
tensor = _serialize_to_tensor(data, group)
|
||||
|
||||
size_list, tensor = _pad_to_largest_tensor(tensor, group)
|
||||
max_size = max(size_list)
|
||||
|
||||
# receiving Tensor from all ranks
|
||||
tensor_list = [
|
||||
torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list
|
||||
]
|
||||
dist.all_gather(tensor_list, tensor, group=group)
|
||||
|
||||
data_list = []
|
||||
for size, tensor in zip(size_list, tensor_list):
|
||||
buffer = tensor.cpu().numpy().tobytes()[:size]
|
||||
data_list.append(pickle.loads(buffer))
|
||||
|
||||
return data_list
|
||||
|
||||
|
||||
def gather(data, dst=0, group=None):
|
||||
"""
|
||||
Run gather on arbitrary picklable data (not necessarily tensors).
|
||||
|
||||
Args:
|
||||
data: any picklable object
|
||||
dst (int): destination rank
|
||||
group: a torch process group. By default, will use a group which
|
||||
contains all ranks on gloo backend.
|
||||
|
||||
Returns:
|
||||
list[data]: on dst, a list of data gathered from each rank. Otherwise,
|
||||
an empty list.
|
||||
"""
|
||||
if get_world_size() == 1:
|
||||
return [data]
|
||||
if group is None:
|
||||
group = _get_global_gloo_group()
|
||||
if dist.get_world_size(group=group) == 1:
|
||||
return [data]
|
||||
rank = dist.get_rank(group=group)
|
||||
|
||||
tensor = _serialize_to_tensor(data, group)
|
||||
size_list, tensor = _pad_to_largest_tensor(tensor, group)
|
||||
|
||||
# receiving Tensor from all ranks
|
||||
if rank == dst:
|
||||
max_size = max(size_list)
|
||||
tensor_list = [
|
||||
torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list
|
||||
]
|
||||
dist.gather(tensor, tensor_list, dst=dst, group=group)
|
||||
|
||||
data_list = []
|
||||
for size, tensor in zip(size_list, tensor_list):
|
||||
buffer = tensor.cpu().numpy().tobytes()[:size]
|
||||
data_list.append(pickle.loads(buffer))
|
||||
return data_list
|
||||
else:
|
||||
dist.gather(tensor, [], dst=dst, group=group)
|
||||
return []
|
||||
|
||||
|
||||
def shared_random_seed():
|
||||
"""
|
||||
Returns:
|
||||
int: a random number that is the same across all workers.
|
||||
If workers need a shared RNG, they can use this shared seed to
|
||||
create one.
|
||||
|
||||
All workers must call this function, otherwise it will deadlock.
|
||||
"""
|
||||
ints = np.random.randint(2 ** 31)
|
||||
all_ints = all_gather(ints)
|
||||
return all_ints[0]
|
||||
|
||||
|
||||
def reduce_dict(input_dict, average=True):
|
||||
"""
|
||||
Reduce the values in the dictionary from all processes so that process with rank
|
||||
0 has the reduced results.
|
||||
|
||||
Args:
|
||||
input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor.
|
||||
average (bool): whether to do average or sum
|
||||
|
||||
Returns:
|
||||
a dict with the same keys as input_dict, after reduction.
|
||||
"""
|
||||
world_size = get_world_size()
|
||||
if world_size < 2:
|
||||
return input_dict
|
||||
with torch.no_grad():
|
||||
names = []
|
||||
values = []
|
||||
# sort the keys so that they are consistent across processes
|
||||
for k in sorted(input_dict.keys()):
|
||||
names.append(k)
|
||||
values.append(input_dict[k])
|
||||
values = torch.stack(values, dim=0)
|
||||
dist.reduce(values, dst=0)
|
||||
if dist.get_rank() == 0 and average:
|
||||
# only main process gets accumulated, so only divide by
|
||||
# world_size in this case
|
||||
values /= world_size
|
||||
reduced_dict = {k: v for k, v in zip(names, values)}
|
||||
return reduced_dict
|
|
@ -0,0 +1,125 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import importlib
|
||||
import importlib.util
|
||||
import logging
|
||||
import numpy as np
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
from datetime import datetime
|
||||
import torch
|
||||
|
||||
__all__ = ["seed_all_rng"]
|
||||
|
||||
|
||||
TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2])
|
||||
"""
|
||||
PyTorch version as a tuple of 2 ints. Useful for comparison.
|
||||
"""
|
||||
|
||||
|
||||
def seed_all_rng(seed=None):
|
||||
"""
|
||||
Set the random seed for the RNG in torch, numpy and python.
|
||||
|
||||
Args:
|
||||
seed (int): if None, will use a strong random seed.
|
||||
"""
|
||||
if seed is None:
|
||||
seed = (
|
||||
os.getpid()
|
||||
+ int(datetime.now().strftime("%S%f"))
|
||||
+ int.from_bytes(os.urandom(2), "big")
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info("Using a generated random seed {}".format(seed))
|
||||
np.random.seed(seed)
|
||||
torch.set_rng_state(torch.manual_seed(seed).get_state())
|
||||
random.seed(seed)
|
||||
|
||||
|
||||
# from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path
|
||||
def _import_file(module_name, file_path, make_importable=False):
|
||||
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
if make_importable:
|
||||
sys.modules[module_name] = module
|
||||
return module
|
||||
|
||||
|
||||
def _configure_libraries():
|
||||
"""
|
||||
Configurations for some libraries.
|
||||
"""
|
||||
# An environment option to disable `import cv2` globally,
|
||||
# in case it leads to negative performance impact
|
||||
disable_cv2 = int(os.environ.get("DETECTRON2_DISABLE_CV2", False))
|
||||
if disable_cv2:
|
||||
sys.modules["cv2"] = None
|
||||
else:
|
||||
# Disable opencl in opencv since its interaction with cuda often has negative effects
|
||||
# This envvar is supported after OpenCV 3.4.0
|
||||
os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled"
|
||||
try:
|
||||
import cv2
|
||||
|
||||
if int(cv2.__version__.split(".")[0]) >= 3:
|
||||
cv2.ocl.setUseOpenCL(False)
|
||||
except ModuleNotFoundError:
|
||||
# Other types of ImportError, if happened, should not be ignored.
|
||||
# Because a failed opencv import could mess up address space
|
||||
# https://github.com/skvark/opencv-python/issues/381
|
||||
pass
|
||||
|
||||
def get_version(module, digit=2):
|
||||
return tuple(map(int, module.__version__.split(".")[:digit]))
|
||||
|
||||
# fmt: off
|
||||
assert get_version(torch) >= (1, 4), "Requires torch>=1.4"
|
||||
import fvcore
|
||||
assert get_version(fvcore, 3) >= (0, 1, 1), "Requires fvcore>=0.1.1"
|
||||
import yaml
|
||||
assert get_version(yaml) >= (5, 1), "Requires pyyaml>=5.1"
|
||||
# fmt: on
|
||||
|
||||
|
||||
_ENV_SETUP_DONE = False
|
||||
|
||||
|
||||
def setup_environment():
|
||||
"""Perform environment setup work. The default setup is a no-op, but this
|
||||
function allows the user to specify a Python source file or a module in
|
||||
the $DETECTRON2_ENV_MODULE environment variable, that performs
|
||||
custom setup work that may be necessary to their computing environment.
|
||||
"""
|
||||
global _ENV_SETUP_DONE
|
||||
if _ENV_SETUP_DONE:
|
||||
return
|
||||
_ENV_SETUP_DONE = True
|
||||
|
||||
_configure_libraries()
|
||||
|
||||
custom_module_path = os.environ.get("DETECTRON2_ENV_MODULE")
|
||||
|
||||
if custom_module_path:
|
||||
setup_custom_environment(custom_module_path)
|
||||
else:
|
||||
# The default setup is a no-op
|
||||
pass
|
||||
|
||||
|
||||
def setup_custom_environment(custom_module):
|
||||
"""
|
||||
Load custom environment setup by importing a Python source file or a
|
||||
module, and run the setup function.
|
||||
"""
|
||||
if custom_module.endswith(".py"):
|
||||
module = _import_file("detectron2.utils.env.custom_module", custom_module)
|
||||
else:
|
||||
module = importlib.import_module(custom_module)
|
||||
assert hasattr(module, "setup_environment") and callable(module.setup_environment), (
|
||||
"Custom environment module defined in {} does not have the "
|
||||
"required callable attribute 'setup_environment'."
|
||||
).format(custom_module)
|
||||
module.setup_environment()
|
|
@ -0,0 +1,474 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from contextlib import contextmanager
|
||||
import torch
|
||||
from fvcore.common.file_io import PathManager
|
||||
from fvcore.common.history_buffer import HistoryBuffer
|
||||
|
||||
__all__ = [
|
||||
"get_event_storage",
|
||||
"JSONWriter",
|
||||
"TensorboardXWriter",
|
||||
"CommonMetricPrinter",
|
||||
"EventStorage",
|
||||
]
|
||||
|
||||
_CURRENT_STORAGE_STACK = []
|
||||
|
||||
|
||||
def get_event_storage():
|
||||
"""
|
||||
Returns:
|
||||
The :class:`EventStorage` object that's currently being used.
|
||||
Throws an error if no :class:`EventStorage` is currently enabled.
|
||||
"""
|
||||
assert len(
|
||||
_CURRENT_STORAGE_STACK
|
||||
), "get_event_storage() has to be called inside a 'with EventStorage(...)' context!"
|
||||
return _CURRENT_STORAGE_STACK[-1]
|
||||
|
||||
|
||||
class EventWriter:
|
||||
"""
|
||||
Base class for writers that obtain events from :class:`EventStorage` and process them.
|
||||
"""
|
||||
|
||||
def write(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
|
||||
class JSONWriter(EventWriter):
|
||||
"""
|
||||
Write scalars to a json file.
|
||||
|
||||
It saves scalars as one json per line (instead of a big json) for easy parsing.
|
||||
|
||||
Examples parsing such a json file:
|
||||
::
|
||||
$ cat metrics.json | jq -s '.[0:2]'
|
||||
[
|
||||
{
|
||||
"data_time": 0.008433341979980469,
|
||||
"iteration": 19,
|
||||
"loss": 1.9228371381759644,
|
||||
"loss_box_reg": 0.050025828182697296,
|
||||
"loss_classifier": 0.5316952466964722,
|
||||
"loss_mask": 0.7236229181289673,
|
||||
"loss_rpn_box": 0.0856662318110466,
|
||||
"loss_rpn_cls": 0.48198649287223816,
|
||||
"lr": 0.007173333333333333,
|
||||
"time": 0.25401854515075684
|
||||
},
|
||||
{
|
||||
"data_time": 0.007216215133666992,
|
||||
"iteration": 39,
|
||||
"loss": 1.282649278640747,
|
||||
"loss_box_reg": 0.06222952902317047,
|
||||
"loss_classifier": 0.30682939291000366,
|
||||
"loss_mask": 0.6970193982124329,
|
||||
"loss_rpn_box": 0.038663312792778015,
|
||||
"loss_rpn_cls": 0.1471673548221588,
|
||||
"lr": 0.007706666666666667,
|
||||
"time": 0.2490077018737793
|
||||
}
|
||||
]
|
||||
|
||||
$ cat metrics.json | jq '.loss_mask'
|
||||
0.7126231789588928
|
||||
0.689423680305481
|
||||
0.6776131987571716
|
||||
...
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, json_file, window_size=20):
|
||||
"""
|
||||
Args:
|
||||
json_file (str): path to the json file. New data will be appended if the file exists.
|
||||
window_size (int): the window size of median smoothing for the scalars whose
|
||||
`smoothing_hint` are True.
|
||||
"""
|
||||
self._file_handle = PathManager.open(json_file, "a")
|
||||
self._window_size = window_size
|
||||
self._last_write = -1
|
||||
|
||||
def write(self):
|
||||
storage = get_event_storage()
|
||||
to_save = defaultdict(dict)
|
||||
|
||||
for k, (v, iter) in storage.latest_with_smoothing_hint(self._window_size).items():
|
||||
# keep scalars that have not been written
|
||||
if iter <= self._last_write:
|
||||
continue
|
||||
to_save[iter][k] = v
|
||||
if len(to_save):
|
||||
all_iters = sorted(to_save.keys())
|
||||
self._last_write = max(all_iters)
|
||||
|
||||
for itr, scalars_per_iter in to_save.items():
|
||||
scalars_per_iter["iteration"] = itr
|
||||
self._file_handle.write(json.dumps(scalars_per_iter, sort_keys=True) + "\n")
|
||||
self._file_handle.flush()
|
||||
try:
|
||||
os.fsync(self._file_handle.fileno())
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
def close(self):
|
||||
self._file_handle.close()
|
||||
|
||||
|
||||
class TensorboardXWriter(EventWriter):
|
||||
"""
|
||||
Write all scalars to a tensorboard file.
|
||||
"""
|
||||
|
||||
def __init__(self, log_dir: str, window_size: int = 20, **kwargs):
|
||||
"""
|
||||
Args:
|
||||
log_dir (str): the directory to save the output events
|
||||
window_size (int): the scalars will be median-smoothed by this window size
|
||||
|
||||
kwargs: other arguments passed to `torch.utils.tensorboard.SummaryWriter(...)`
|
||||
"""
|
||||
self._window_size = window_size
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
self._writer = SummaryWriter(log_dir, **kwargs)
|
||||
self._last_write = -1
|
||||
|
||||
def write(self):
|
||||
storage = get_event_storage()
|
||||
new_last_write = self._last_write
|
||||
for k, (v, iter) in storage.latest_with_smoothing_hint(self._window_size).items():
|
||||
if iter > self._last_write:
|
||||
self._writer.add_scalar(k, v, iter)
|
||||
new_last_write = max(new_last_write, iter)
|
||||
self._last_write = new_last_write
|
||||
|
||||
# storage.put_{image,histogram} is only meant to be used by
|
||||
# tensorboard writer. So we access its internal fields directly from here.
|
||||
if len(storage._vis_data) >= 1:
|
||||
for img_name, img, step_num in storage._vis_data:
|
||||
self._writer.add_image(img_name, img, step_num)
|
||||
# Storage stores all image data and rely on this writer to clear them.
|
||||
# As a result it assumes only one writer will use its image data.
|
||||
# An alternative design is to let storage store limited recent
|
||||
# data (e.g. only the most recent image) that all writers can access.
|
||||
# In that case a writer may not see all image data if its period is long.
|
||||
storage.clear_images()
|
||||
|
||||
if len(storage._histograms) >= 1:
|
||||
for params in storage._histograms:
|
||||
self._writer.add_histogram_raw(**params)
|
||||
storage.clear_histograms()
|
||||
|
||||
def close(self):
|
||||
if hasattr(self, "_writer"): # doesn't exist when the code fails at import
|
||||
self._writer.close()
|
||||
|
||||
|
||||
class CommonMetricPrinter(EventWriter):
|
||||
"""
|
||||
Print **common** metrics to the terminal, including
|
||||
iteration time, ETA, memory, all losses, and the learning rate.
|
||||
It also applies smoothing using a window of 20 elements.
|
||||
|
||||
It's meant to print common metrics in common ways.
|
||||
To print something in more customized ways, please implement a similar printer by yourself.
|
||||
"""
|
||||
|
||||
def __init__(self, max_iter):
|
||||
"""
|
||||
Args:
|
||||
max_iter (int): the maximum number of iterations to train.
|
||||
Used to compute ETA.
|
||||
"""
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self._max_iter = max_iter
|
||||
self._last_write = None
|
||||
|
||||
def write(self):
|
||||
storage = get_event_storage()
|
||||
iteration = storage.iter
|
||||
if iteration == self._max_iter:
|
||||
# This hook only reports training progress (loss, ETA, etc) but not other data,
|
||||
# therefore do not write anything after training succeeds, even if this method
|
||||
# is called.
|
||||
return
|
||||
|
||||
try:
|
||||
data_time = storage.history("data_time").avg(20)
|
||||
except KeyError:
|
||||
# they may not exist in the first few iterations (due to warmup)
|
||||
# or when SimpleTrainer is not used
|
||||
data_time = None
|
||||
|
||||
eta_string = None
|
||||
try:
|
||||
iter_time = storage.history("time").global_avg()
|
||||
eta_seconds = storage.history("time").median(1000) * (self._max_iter - iteration - 1)
|
||||
storage.put_scalar("eta_seconds", eta_seconds, smoothing_hint=False)
|
||||
eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
|
||||
except KeyError:
|
||||
iter_time = None
|
||||
# estimate eta on our own - more noisy
|
||||
if self._last_write is not None:
|
||||
estimate_iter_time = (time.perf_counter() - self._last_write[1]) / (
|
||||
iteration - self._last_write[0]
|
||||
)
|
||||
eta_seconds = estimate_iter_time * (self._max_iter - iteration - 1)
|
||||
eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
|
||||
self._last_write = (iteration, time.perf_counter())
|
||||
|
||||
try:
|
||||
lr = "{:.5g}".format(storage.history("lr").latest())
|
||||
except KeyError:
|
||||
lr = "N/A"
|
||||
|
||||
if torch.cuda.is_available():
|
||||
max_mem_mb = torch.cuda.max_memory_allocated() / 1024.0 / 1024.0
|
||||
else:
|
||||
max_mem_mb = None
|
||||
|
||||
# NOTE: max_mem is parsed by grep in "dev/parse_results.sh"
|
||||
self.logger.info(
|
||||
" {eta}iter: {iter} {losses} {time}{data_time}lr: {lr} {memory}".format(
|
||||
eta=f"eta: {eta_string} " if eta_string else "",
|
||||
iter=iteration,
|
||||
losses=" ".join(
|
||||
[
|
||||
"{}: {:.4g}".format(k, v.median(20))
|
||||
for k, v in storage.histories().items()
|
||||
if "loss" in k
|
||||
]
|
||||
),
|
||||
time="time: {:.4f} ".format(iter_time) if iter_time is not None else "",
|
||||
data_time="data_time: {:.4f} ".format(data_time) if data_time is not None else "",
|
||||
lr=lr,
|
||||
memory="max_mem: {:.0f}M".format(max_mem_mb) if max_mem_mb is not None else "",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class EventStorage:
|
||||
"""
|
||||
The user-facing class that provides metric storage functionalities.
|
||||
|
||||
In the future we may add support for storing / logging other types of data if needed.
|
||||
"""
|
||||
|
||||
def __init__(self, start_iter=0):
|
||||
"""
|
||||
Args:
|
||||
start_iter (int): the iteration number to start with
|
||||
"""
|
||||
self._history = defaultdict(HistoryBuffer)
|
||||
self._smoothing_hints = {}
|
||||
self._latest_scalars = {}
|
||||
self._iter = start_iter
|
||||
self._current_prefix = ""
|
||||
self._vis_data = []
|
||||
self._histograms = []
|
||||
|
||||
def put_image(self, img_name, img_tensor):
|
||||
"""
|
||||
Add an `img_tensor` associated with `img_name`, to be shown on
|
||||
tensorboard.
|
||||
|
||||
Args:
|
||||
img_name (str): The name of the image to put into tensorboard.
|
||||
img_tensor (torch.Tensor or numpy.array): An `uint8` or `float`
|
||||
Tensor of shape `[channel, height, width]` where `channel` is
|
||||
3. The image format should be RGB. The elements in img_tensor
|
||||
can either have values in [0, 1] (float32) or [0, 255] (uint8).
|
||||
The `img_tensor` will be visualized in tensorboard.
|
||||
"""
|
||||
self._vis_data.append((img_name, img_tensor, self._iter))
|
||||
|
||||
def put_scalar(self, name, value, smoothing_hint=True):
|
||||
"""
|
||||
Add a scalar `value` to the `HistoryBuffer` associated with `name`.
|
||||
|
||||
Args:
|
||||
smoothing_hint (bool): a 'hint' on whether this scalar is noisy and should be
|
||||
smoothed when logged. The hint will be accessible through
|
||||
:meth:`EventStorage.smoothing_hints`. A writer may ignore the hint
|
||||
and apply custom smoothing rule.
|
||||
|
||||
It defaults to True because most scalars we save need to be smoothed to
|
||||
provide any useful signal.
|
||||
"""
|
||||
name = self._current_prefix + name
|
||||
history = self._history[name]
|
||||
value = float(value)
|
||||
history.update(value, self._iter)
|
||||
self._latest_scalars[name] = (value, self._iter)
|
||||
|
||||
existing_hint = self._smoothing_hints.get(name)
|
||||
if existing_hint is not None:
|
||||
assert (
|
||||
existing_hint == smoothing_hint
|
||||
), "Scalar {} was put with a different smoothing_hint!".format(name)
|
||||
else:
|
||||
self._smoothing_hints[name] = smoothing_hint
|
||||
|
||||
def put_scalars(self, *, smoothing_hint=True, **kwargs):
|
||||
"""
|
||||
Put multiple scalars from keyword arguments.
|
||||
|
||||
Examples:
|
||||
|
||||
storage.put_scalars(loss=my_loss, accuracy=my_accuracy, smoothing_hint=True)
|
||||
"""
|
||||
for k, v in kwargs.items():
|
||||
self.put_scalar(k, v, smoothing_hint=smoothing_hint)
|
||||
|
||||
def put_histogram(self, hist_name, hist_tensor, bins=1000):
|
||||
"""
|
||||
Create a histogram from a tensor.
|
||||
|
||||
Args:
|
||||
hist_name (str): The name of the histogram to put into tensorboard.
|
||||
hist_tensor (torch.Tensor): A Tensor of arbitrary shape to be converted
|
||||
into a histogram.
|
||||
bins (int): Number of histogram bins.
|
||||
"""
|
||||
ht_min, ht_max = hist_tensor.min().item(), hist_tensor.max().item()
|
||||
|
||||
# Create a histogram with PyTorch
|
||||
hist_counts = torch.histc(hist_tensor, bins=bins)
|
||||
hist_edges = torch.linspace(start=ht_min, end=ht_max, steps=bins + 1, dtype=torch.float32)
|
||||
|
||||
# Parameter for the add_histogram_raw function of SummaryWriter
|
||||
hist_params = dict(
|
||||
tag=hist_name,
|
||||
min=ht_min,
|
||||
max=ht_max,
|
||||
num=len(hist_tensor),
|
||||
sum=float(hist_tensor.sum()),
|
||||
sum_squares=float(torch.sum(hist_tensor ** 2)),
|
||||
bucket_limits=hist_edges[1:].tolist(),
|
||||
bucket_counts=hist_counts.tolist(),
|
||||
global_step=self._iter,
|
||||
)
|
||||
self._histograms.append(hist_params)
|
||||
|
||||
def history(self, name):
|
||||
"""
|
||||
Returns:
|
||||
HistoryBuffer: the scalar history for name
|
||||
"""
|
||||
ret = self._history.get(name, None)
|
||||
if ret is None:
|
||||
raise KeyError("No history metric available for {}!".format(name))
|
||||
return ret
|
||||
|
||||
def histories(self):
|
||||
"""
|
||||
Returns:
|
||||
dict[name -> HistoryBuffer]: the HistoryBuffer for all scalars
|
||||
"""
|
||||
return self._history
|
||||
|
||||
def latest(self):
|
||||
"""
|
||||
Returns:
|
||||
dict[str -> (float, int)]: mapping from the name of each scalar to the most
|
||||
recent value and the iteration number its added.
|
||||
"""
|
||||
return self._latest_scalars
|
||||
|
||||
def latest_with_smoothing_hint(self, window_size=20):
|
||||
"""
|
||||
Similar to :meth:`latest`, but the returned values
|
||||
are either the un-smoothed original latest value,
|
||||
or a median of the given window_size,
|
||||
depend on whether the smoothing_hint is True.
|
||||
|
||||
This provides a default behavior that other writers can use.
|
||||
"""
|
||||
result = {}
|
||||
for k, (v, itr) in self._latest_scalars.items():
|
||||
result[k] = (
|
||||
self._history[k].median(window_size) if self._smoothing_hints[k] else v,
|
||||
itr,
|
||||
)
|
||||
return result
|
||||
|
||||
def smoothing_hints(self):
|
||||
"""
|
||||
Returns:
|
||||
dict[name -> bool]: the user-provided hint on whether the scalar
|
||||
is noisy and needs smoothing.
|
||||
"""
|
||||
return self._smoothing_hints
|
||||
|
||||
def step(self):
|
||||
"""
|
||||
User should either: (1) Call this function to increment storage.iter when needed. Or
|
||||
(2) Set `storage.iter` to the correct iteration number before each iteration.
|
||||
|
||||
The storage will then be able to associate the new data with an iteration number.
|
||||
"""
|
||||
self._iter += 1
|
||||
|
||||
@property
|
||||
def iter(self):
|
||||
"""
|
||||
Returns:
|
||||
int: The current iteration number. When used together with a trainer,
|
||||
this is ensured to be the same as trainer.iter.
|
||||
"""
|
||||
return self._iter
|
||||
|
||||
@iter.setter
|
||||
def iter(self, val):
|
||||
self._iter = int(val)
|
||||
|
||||
@property
|
||||
def iteration(self):
|
||||
# for backward compatibility
|
||||
return self._iter
|
||||
|
||||
def __enter__(self):
|
||||
_CURRENT_STORAGE_STACK.append(self)
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
assert _CURRENT_STORAGE_STACK[-1] == self
|
||||
_CURRENT_STORAGE_STACK.pop()
|
||||
|
||||
@contextmanager
|
||||
def name_scope(self, name):
|
||||
"""
|
||||
Yields:
|
||||
A context within which all the events added to this storage
|
||||
will be prefixed by the name scope.
|
||||
"""
|
||||
old_prefix = self._current_prefix
|
||||
self._current_prefix = name.rstrip("/") + "/"
|
||||
yield
|
||||
self._current_prefix = old_prefix
|
||||
|
||||
def clear_images(self):
|
||||
"""
|
||||
Delete all the stored images for visualization. This should be called
|
||||
after images are written to tensorboard.
|
||||
"""
|
||||
self._vis_data = []
|
||||
|
||||
def clear_histograms(self):
|
||||
"""
|
||||
Delete all the stored histograms for visualization.
|
||||
This should be called after histograms are written to tensorboard.
|
||||
"""
|
||||
self._histograms = []
|
|
@ -0,0 +1,221 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import functools
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from collections import Counter
|
||||
from fvcore.common.file_io import PathManager
|
||||
from tabulate import tabulate
|
||||
from termcolor import colored
|
||||
|
||||
|
||||
class _ColorfulFormatter(logging.Formatter):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._root_name = kwargs.pop("root_name") + "."
|
||||
self._abbrev_name = kwargs.pop("abbrev_name", "")
|
||||
if len(self._abbrev_name):
|
||||
self._abbrev_name = self._abbrev_name + "."
|
||||
super(_ColorfulFormatter, self).__init__(*args, **kwargs)
|
||||
|
||||
def formatMessage(self, record):
|
||||
record.name = record.name.replace(self._root_name, self._abbrev_name)
|
||||
log = super(_ColorfulFormatter, self).formatMessage(record)
|
||||
if record.levelno == logging.WARNING:
|
||||
prefix = colored("WARNING", "red", attrs=["blink"])
|
||||
elif record.levelno == logging.ERROR or record.levelno == logging.CRITICAL:
|
||||
prefix = colored("ERROR", "red", attrs=["blink", "underline"])
|
||||
else:
|
||||
return log
|
||||
return prefix + " " + log
|
||||
|
||||
|
||||
@functools.lru_cache() # so that calling setup_logger multiple times won't add many handlers
|
||||
def setup_logger(
|
||||
output=None, distributed_rank=0, *, color=True, name="detectron2", abbrev_name=None
|
||||
):
|
||||
"""
|
||||
Initialize the detectron2 logger and set its verbosity level to "DEBUG".
|
||||
|
||||
Args:
|
||||
output (str): a file name or a directory to save log. If None, will not save log file.
|
||||
If ends with ".txt" or ".log", assumed to be a file name.
|
||||
Otherwise, logs will be saved to `output/log.txt`.
|
||||
name (str): the root module name of this logger
|
||||
abbrev_name (str): an abbreviation of the module, to avoid long names in logs.
|
||||
Set to "" to not log the root module in logs.
|
||||
By default, will abbreviate "detectron2" to "d2" and leave other
|
||||
modules unchanged.
|
||||
|
||||
Returns:
|
||||
logging.Logger: a logger
|
||||
"""
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
logger.propagate = False
|
||||
|
||||
if abbrev_name is None:
|
||||
abbrev_name = "d2" if name == "detectron2" else name
|
||||
|
||||
plain_formatter = logging.Formatter(
|
||||
"[%(asctime)s] %(name)s %(levelname)s: %(message)s", datefmt="%m/%d %H:%M:%S"
|
||||
)
|
||||
# stdout logging: master only
|
||||
if distributed_rank == 0:
|
||||
ch = logging.StreamHandler(stream=sys.stdout)
|
||||
ch.setLevel(logging.DEBUG)
|
||||
if color:
|
||||
formatter = _ColorfulFormatter(
|
||||
colored("[%(asctime)s %(name)s]: ", "green") + "%(message)s",
|
||||
datefmt="%m/%d %H:%M:%S",
|
||||
root_name=name,
|
||||
abbrev_name=str(abbrev_name),
|
||||
)
|
||||
else:
|
||||
formatter = plain_formatter
|
||||
ch.setFormatter(formatter)
|
||||
logger.addHandler(ch)
|
||||
|
||||
# file logging: all workers
|
||||
if output is not None:
|
||||
if output.endswith(".txt") or output.endswith(".log"):
|
||||
filename = output
|
||||
else:
|
||||
filename = os.path.join(output, "log.txt")
|
||||
if distributed_rank > 0:
|
||||
filename = filename + ".rank{}".format(distributed_rank)
|
||||
PathManager.mkdirs(os.path.dirname(filename))
|
||||
|
||||
fh = logging.StreamHandler(_cached_log_stream(filename))
|
||||
fh.setLevel(logging.DEBUG)
|
||||
fh.setFormatter(plain_formatter)
|
||||
logger.addHandler(fh)
|
||||
|
||||
return logger
|
||||
|
||||
|
||||
# cache the opened file object, so that different calls to `setup_logger`
|
||||
# with the same file name can safely write to the same file.
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def _cached_log_stream(filename):
|
||||
return PathManager.open(filename, "a")
|
||||
|
||||
|
||||
"""
|
||||
Below are some other convenient logging methods.
|
||||
They are mainly adopted from
|
||||
https://github.com/abseil/abseil-py/blob/master/absl/logging/__init__.py
|
||||
"""
|
||||
|
||||
|
||||
def _find_caller():
|
||||
"""
|
||||
Returns:
|
||||
str: module name of the caller
|
||||
tuple: a hashable key to be used to identify different callers
|
||||
"""
|
||||
frame = sys._getframe(2)
|
||||
while frame:
|
||||
code = frame.f_code
|
||||
if os.path.join("utils", "logger.") not in code.co_filename:
|
||||
mod_name = frame.f_globals["__name__"]
|
||||
if mod_name == "__main__":
|
||||
mod_name = "detectron2"
|
||||
return mod_name, (code.co_filename, frame.f_lineno, code.co_name)
|
||||
frame = frame.f_back
|
||||
|
||||
|
||||
_LOG_COUNTER = Counter()
|
||||
_LOG_TIMER = {}
|
||||
|
||||
|
||||
def log_first_n(lvl, msg, n=1, *, name=None, key="caller"):
|
||||
"""
|
||||
Log only for the first n times.
|
||||
|
||||
Args:
|
||||
lvl (int): the logging level
|
||||
msg (str):
|
||||
n (int):
|
||||
name (str): name of the logger to use. Will use the caller's module by default.
|
||||
key (str or tuple[str]): the string(s) can be one of "caller" or
|
||||
"message", which defines how to identify duplicated logs.
|
||||
For example, if called with `n=1, key="caller"`, this function
|
||||
will only log the first call from the same caller, regardless of
|
||||
the message content.
|
||||
If called with `n=1, key="message"`, this function will log the
|
||||
same content only once, even if they are called from different places.
|
||||
If called with `n=1, key=("caller", "message")`, this function
|
||||
will not log only if the same caller has logged the same message before.
|
||||
"""
|
||||
if isinstance(key, str):
|
||||
key = (key,)
|
||||
assert len(key) > 0
|
||||
|
||||
caller_module, caller_key = _find_caller()
|
||||
hash_key = ()
|
||||
if "caller" in key:
|
||||
hash_key = hash_key + caller_key
|
||||
if "message" in key:
|
||||
hash_key = hash_key + (msg,)
|
||||
|
||||
_LOG_COUNTER[hash_key] += 1
|
||||
if _LOG_COUNTER[hash_key] <= n:
|
||||
logging.getLogger(name or caller_module).log(lvl, msg)
|
||||
|
||||
|
||||
def log_every_n(lvl, msg, n=1, *, name=None):
|
||||
"""
|
||||
Log once per n times.
|
||||
|
||||
Args:
|
||||
lvl (int): the logging level
|
||||
msg (str):
|
||||
n (int):
|
||||
name (str): name of the logger to use. Will use the caller's module by default.
|
||||
"""
|
||||
caller_module, key = _find_caller()
|
||||
_LOG_COUNTER[key] += 1
|
||||
if n == 1 or _LOG_COUNTER[key] % n == 1:
|
||||
logging.getLogger(name or caller_module).log(lvl, msg)
|
||||
|
||||
|
||||
def log_every_n_seconds(lvl, msg, n=1, *, name=None):
|
||||
"""
|
||||
Log no more than once per n seconds.
|
||||
|
||||
Args:
|
||||
lvl (int): the logging level
|
||||
msg (str):
|
||||
n (int):
|
||||
name (str): name of the logger to use. Will use the caller's module by default.
|
||||
"""
|
||||
caller_module, key = _find_caller()
|
||||
last_logged = _LOG_TIMER.get(key, None)
|
||||
current_time = time.time()
|
||||
if last_logged is None or current_time - last_logged >= n:
|
||||
logging.getLogger(name or caller_module).log(lvl, msg)
|
||||
_LOG_TIMER[key] = current_time
|
||||
|
||||
|
||||
def create_small_table(small_dict):
|
||||
"""
|
||||
Create a small table using the keys of small_dict as headers. This is only
|
||||
suitable for small dictionaries.
|
||||
|
||||
Args:
|
||||
small_dict (dict): a result dictionary of only a few items.
|
||||
|
||||
Returns:
|
||||
str: the table as a string.
|
||||
"""
|
||||
keys, values = tuple(zip(*small_dict.items()))
|
||||
table = tabulate(
|
||||
[values],
|
||||
headers=keys,
|
||||
tablefmt="pipe",
|
||||
floatfmt=".3f",
|
||||
stralign="center",
|
||||
numalign="center",
|
||||
)
|
||||
return table
|
|
@ -0,0 +1,84 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
|
||||
import logging
|
||||
from contextlib import contextmanager
|
||||
from functools import wraps
|
||||
import torch
|
||||
|
||||
__all__ = ["retry_if_cuda_oom"]
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _ignore_torch_cuda_oom():
|
||||
"""
|
||||
A context which ignores CUDA OOM exception from pytorch.
|
||||
"""
|
||||
try:
|
||||
yield
|
||||
except RuntimeError as e:
|
||||
# NOTE: the string may change?
|
||||
if "CUDA out of memory. " in str(e):
|
||||
pass
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
def retry_if_cuda_oom(func):
|
||||
"""
|
||||
Makes a function retry itself after encountering
|
||||
pytorch's CUDA OOM error.
|
||||
It will first retry after calling `torch.cuda.empty_cache()`.
|
||||
|
||||
If that still fails, it will then retry by trying to convert inputs to CPUs.
|
||||
In this case, it expects the function to dispatch to CPU implementation.
|
||||
The return values may become CPU tensors as well and it's user's
|
||||
responsibility to convert it back to CUDA tensor if needed.
|
||||
|
||||
Args:
|
||||
func: a stateless callable that takes tensor-like objects as arguments
|
||||
|
||||
Returns:
|
||||
a callable which retries `func` if OOM is encountered.
|
||||
|
||||
Examples:
|
||||
::
|
||||
output = retry_if_cuda_oom(some_torch_function)(input1, input2)
|
||||
# output may be on CPU even if inputs are on GPU
|
||||
|
||||
Note:
|
||||
1. When converting inputs to CPU, it will only look at each argument and check
|
||||
if it has `.device` and `.to` for conversion. Nested structures of tensors
|
||||
are not supported.
|
||||
|
||||
2. Since the function might be called more than once, it has to be
|
||||
stateless.
|
||||
"""
|
||||
|
||||
def maybe_to_cpu(x):
|
||||
try:
|
||||
like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to")
|
||||
except AttributeError:
|
||||
like_gpu_tensor = False
|
||||
if like_gpu_tensor:
|
||||
return x.to(device="cpu")
|
||||
else:
|
||||
return x
|
||||
|
||||
@wraps(func)
|
||||
def wrapped(*args, **kwargs):
|
||||
with _ignore_torch_cuda_oom():
|
||||
return func(*args, **kwargs)
|
||||
|
||||
# Clear cache and retry
|
||||
torch.cuda.empty_cache()
|
||||
with _ignore_torch_cuda_oom():
|
||||
return func(*args, **kwargs)
|
||||
|
||||
# Try on CPU. This slows down the code significantly, therefore print a notice.
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info("Attempting to copy inputs of {} to CPU due to CUDA OOM".format(str(func)))
|
||||
new_args = (maybe_to_cpu(x) for x in args)
|
||||
new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()}
|
||||
return func(*new_args, **new_kwargs)
|
||||
|
||||
return wrapped
|
|
@ -0,0 +1,6 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
|
||||
# Keep this module for backward compatibility.
|
||||
from fvcore.common.registry import Registry # noqa
|
||||
|
||||
__all__ = ["Registry"]
|
|
@ -0,0 +1,29 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import cloudpickle
|
||||
|
||||
|
||||
class PicklableWrapper(object):
|
||||
"""
|
||||
Wrap an object to make it more picklable, note that it uses
|
||||
heavy weight serialization libraries that are slower than pickle.
|
||||
It's best to use it only on closures (which are usually not picklable).
|
||||
|
||||
This is a simplified version of
|
||||
https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py
|
||||
"""
|
||||
|
||||
def __init__(self, obj):
|
||||
self._obj = obj
|
||||
|
||||
def __reduce__(self):
|
||||
s = cloudpickle.dumps(self._obj)
|
||||
return cloudpickle.loads, (s,)
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
return self._obj(*args, **kwargs)
|
||||
|
||||
def __getattr__(self, attr):
|
||||
# Ensure that the wrapped object can be used seamlessly as the previous object.
|
||||
if attr not in ["_obj"]:
|
||||
return getattr(self._obj, attr)
|
||||
return getattr(self, attr)
|
|
@ -0,0 +1,62 @@
|
|||
import random
|
||||
from collections import deque
|
||||
import numpy as np
|
||||
|
||||
class Store:
|
||||
def __init__(self, total_num_classes, items_per_class, shuffle=False):
|
||||
self.shuffle = shuffle
|
||||
self.items_per_class = items_per_class
|
||||
self.total_num_classes = total_num_classes
|
||||
self.store = [deque(maxlen=self.items_per_class) for _ in range(self.total_num_classes)]
|
||||
|
||||
def add(self, items, class_ids):
|
||||
for idx, class_id in enumerate(class_ids):
|
||||
self.store[class_id].append(items[idx])
|
||||
|
||||
def retrieve(self, class_id):
|
||||
if class_id != -1:
|
||||
items = []
|
||||
for item in self.store[class_id]:
|
||||
items.extend(list(item))
|
||||
if self.shuffle:
|
||||
random.shuffle(items)
|
||||
return items
|
||||
else:
|
||||
all_items = []
|
||||
for i in range(self.total_num_classes):
|
||||
items = []
|
||||
for item in self.store[i]:
|
||||
items.append(list(item))
|
||||
all_items.append(items)
|
||||
return all_items
|
||||
|
||||
def reset(self):
|
||||
self.store = [deque(maxlen=self.items_per_class) for _ in range(self.total_num_classes)]
|
||||
|
||||
def __str__(self):
|
||||
s = self.__class__.__name__ + '('
|
||||
for idx, item in enumerate(self.store):
|
||||
s += '\n Class ' + str(idx) + ' --> ' + str(len(list(item))) + ' items'
|
||||
s = s + ' )'
|
||||
return s
|
||||
|
||||
def __repr__(self):
|
||||
return self.__str__()
|
||||
|
||||
def __len__(self):
|
||||
return sum([len(s) for s in self.store])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
store = Store(10, 3)
|
||||
store.add(('a', 'b', 'c', 'd', 'e', 'f'), (1, 1, 9, 1, 0, 1))
|
||||
store.add(('h',), (4,))
|
||||
# print(store.retrieve(1))
|
||||
# print(store.retrieve(3))
|
||||
# print(store.retrieve(9))
|
||||
print(store.retrieve(-1))
|
||||
# print(len(store))
|
||||
# store.reset()
|
||||
# print(len(store))
|
||||
|
||||
print(store)
|
|
@ -0,0 +1,62 @@
|
|||
import random
|
||||
from collections import deque
|
||||
import numpy as np
|
||||
|
||||
class Store:
|
||||
def __init__(self, total_num_classes, items_per_class, shuffle=False):
|
||||
self.shuffle = shuffle
|
||||
self.items_per_class = items_per_class
|
||||
self.total_num_classes = total_num_classes
|
||||
self.store = [deque(maxlen=self.items_per_class) for _ in range(self.total_num_classes)]
|
||||
|
||||
def add(self, items, class_ids):
|
||||
for idx, class_id in enumerate(class_ids):
|
||||
self.store[class_id].append(items[idx])
|
||||
|
||||
def retrieve(self, class_id):
|
||||
if class_id != -1:
|
||||
items = []
|
||||
for item in self.store[class_id]:
|
||||
items.extend(item)
|
||||
if self.shuffle:
|
||||
random.shuffle(items)
|
||||
return items
|
||||
else:
|
||||
all_items = []
|
||||
for i in range(self.total_num_classes):
|
||||
items = []
|
||||
for item in self.store[i]:
|
||||
items.append(item)
|
||||
all_items.append(items)
|
||||
return all_items
|
||||
|
||||
def reset(self):
|
||||
self.store = [deque(maxlen=self.items_per_class) for _ in range(self.total_num_classes)]
|
||||
|
||||
def __str__(self):
|
||||
s = self.__class__.__name__ + '('
|
||||
for idx, item in enumerate(self.store):
|
||||
s += '\n Class ' + str(idx) + ' --> ' + str(len(list(item))) + ' items'
|
||||
s = s + ' )'
|
||||
return s
|
||||
|
||||
def __repr__(self):
|
||||
return self.__str__()
|
||||
|
||||
def __len__(self):
|
||||
return sum([len(s) for s in self.store])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
store = Store(10, 3)
|
||||
store.add(('a', 'b', 'c', 'd', 'e', 'f'), (1, 1, 9, 1, 0, 1))
|
||||
store.add(('h',), (4,))
|
||||
# print(store.retrieve(1))
|
||||
# print(store.retrieve(3))
|
||||
# print(store.retrieve(9))
|
||||
print(store.retrieve(-1))
|
||||
# print(len(store))
|
||||
# store.reset()
|
||||
# print(len(store))
|
||||
|
||||
print(store)
|
|
@ -0,0 +1,235 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
|
||||
import numpy as np
|
||||
import pycocotools.mask as mask_util
|
||||
|
||||
from detectron2.utils.visualizer import (
|
||||
ColorMode,
|
||||
Visualizer,
|
||||
_create_text_labels,
|
||||
_PanopticPrediction,
|
||||
)
|
||||
|
||||
from .colormap import random_color
|
||||
|
||||
|
||||
class _DetectedInstance:
|
||||
"""
|
||||
Used to store data about detected objects in video frame,
|
||||
in order to transfer color to objects in the future frames.
|
||||
|
||||
Attributes:
|
||||
label (int):
|
||||
bbox (tuple[float]):
|
||||
mask_rle (dict):
|
||||
color (tuple[float]): RGB colors in range (0, 1)
|
||||
ttl (int): time-to-live for the instance. For example, if ttl=2,
|
||||
the instance color can be transferred to objects in the next two frames.
|
||||
"""
|
||||
|
||||
__slots__ = ["label", "bbox", "mask_rle", "color", "ttl"]
|
||||
|
||||
def __init__(self, label, bbox, mask_rle, color, ttl):
|
||||
self.label = label
|
||||
self.bbox = bbox
|
||||
self.mask_rle = mask_rle
|
||||
self.color = color
|
||||
self.ttl = ttl
|
||||
|
||||
|
||||
class VideoVisualizer:
|
||||
def __init__(self, metadata, instance_mode=ColorMode.IMAGE):
|
||||
"""
|
||||
Args:
|
||||
metadata (MetadataCatalog): image metadata.
|
||||
"""
|
||||
self.metadata = metadata
|
||||
self._old_instances = []
|
||||
assert instance_mode in [
|
||||
ColorMode.IMAGE,
|
||||
ColorMode.IMAGE_BW,
|
||||
], "Other mode not supported yet."
|
||||
self._instance_mode = instance_mode
|
||||
|
||||
def draw_instance_predictions(self, frame, predictions):
|
||||
"""
|
||||
Draw instance-level prediction results on an image.
|
||||
|
||||
Args:
|
||||
frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255].
|
||||
predictions (Instances): the output of an instance detection/segmentation
|
||||
model. Following fields will be used to draw:
|
||||
"pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").
|
||||
|
||||
Returns:
|
||||
output (VisImage): image object with visualizations.
|
||||
"""
|
||||
frame_visualizer = Visualizer(frame, self.metadata)
|
||||
num_instances = len(predictions)
|
||||
if num_instances == 0:
|
||||
return frame_visualizer.output
|
||||
|
||||
boxes = predictions.pred_boxes.tensor.numpy() if predictions.has("pred_boxes") else None
|
||||
scores = predictions.scores if predictions.has("scores") else None
|
||||
classes = predictions.pred_classes.numpy() if predictions.has("pred_classes") else None
|
||||
keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None
|
||||
|
||||
if predictions.has("pred_masks"):
|
||||
masks = predictions.pred_masks
|
||||
# mask IOU is not yet enabled
|
||||
# masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F"))
|
||||
# assert len(masks_rles) == num_instances
|
||||
else:
|
||||
masks = None
|
||||
|
||||
detected = [
|
||||
_DetectedInstance(classes[i], boxes[i], mask_rle=None, color=None, ttl=8)
|
||||
for i in range(num_instances)
|
||||
]
|
||||
colors = self._assign_colors(detected)
|
||||
|
||||
labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
|
||||
|
||||
if self._instance_mode == ColorMode.IMAGE_BW:
|
||||
# any() returns uint8 tensor
|
||||
frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
|
||||
(masks.any(dim=0) > 0).numpy() if masks is not None else None
|
||||
)
|
||||
alpha = 0.3
|
||||
else:
|
||||
alpha = 0.5
|
||||
|
||||
frame_visualizer.overlay_instances(
|
||||
boxes=None if masks is not None else boxes, # boxes are a bit distracting
|
||||
masks=masks,
|
||||
labels=labels,
|
||||
keypoints=keypoints,
|
||||
assigned_colors=colors,
|
||||
alpha=alpha,
|
||||
)
|
||||
|
||||
return frame_visualizer.output
|
||||
|
||||
def draw_sem_seg(self, frame, sem_seg, area_threshold=None):
|
||||
"""
|
||||
Args:
|
||||
sem_seg (ndarray or Tensor): semantic segmentation of shape (H, W),
|
||||
each value is the integer label.
|
||||
area_threshold (Optional[int]): only draw segmentations larger than the threshold
|
||||
"""
|
||||
# don't need to do anything special
|
||||
frame_visualizer = Visualizer(frame, self.metadata)
|
||||
frame_visualizer.draw_sem_seg(sem_seg, area_threshold=None)
|
||||
return frame_visualizer.output
|
||||
|
||||
def draw_panoptic_seg_predictions(
|
||||
self, frame, panoptic_seg, segments_info, area_threshold=None, alpha=0.5
|
||||
):
|
||||
frame_visualizer = Visualizer(frame, self.metadata)
|
||||
pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata)
|
||||
|
||||
if self._instance_mode == ColorMode.IMAGE_BW:
|
||||
frame_visualizer.output.img = frame_visualizer._create_grayscale_image(
|
||||
pred.non_empty_mask()
|
||||
)
|
||||
|
||||
# draw mask for all semantic segments first i.e. "stuff"
|
||||
for mask, sinfo in pred.semantic_masks():
|
||||
category_idx = sinfo["category_id"]
|
||||
try:
|
||||
mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]]
|
||||
except AttributeError:
|
||||
mask_color = None
|
||||
|
||||
frame_visualizer.draw_binary_mask(
|
||||
mask,
|
||||
color=mask_color,
|
||||
text=self.metadata.stuff_classes[category_idx],
|
||||
alpha=alpha,
|
||||
area_threshold=area_threshold,
|
||||
)
|
||||
|
||||
all_instances = list(pred.instance_masks())
|
||||
if len(all_instances) == 0:
|
||||
return frame_visualizer.output
|
||||
# draw mask for all instances second
|
||||
masks, sinfo = list(zip(*all_instances))
|
||||
num_instances = len(masks)
|
||||
masks_rles = mask_util.encode(
|
||||
np.asarray(np.asarray(masks).transpose(1, 2, 0), dtype=np.uint8, order="F")
|
||||
)
|
||||
assert len(masks_rles) == num_instances
|
||||
|
||||
category_ids = [x["category_id"] for x in sinfo]
|
||||
detected = [
|
||||
_DetectedInstance(category_ids[i], bbox=None, mask_rle=masks_rles[i], color=None, ttl=8)
|
||||
for i in range(num_instances)
|
||||
]
|
||||
colors = self._assign_colors(detected)
|
||||
labels = [self.metadata.thing_classes[k] for k in category_ids]
|
||||
|
||||
frame_visualizer.overlay_instances(
|
||||
boxes=None,
|
||||
masks=masks,
|
||||
labels=labels,
|
||||
keypoints=None,
|
||||
assigned_colors=colors,
|
||||
alpha=alpha,
|
||||
)
|
||||
return frame_visualizer.output
|
||||
|
||||
def _assign_colors(self, instances):
|
||||
"""
|
||||
Naive tracking heuristics to assign same color to the same instance,
|
||||
will update the internal state of tracked instances.
|
||||
|
||||
Returns:
|
||||
list[tuple[float]]: list of colors.
|
||||
"""
|
||||
|
||||
# Compute iou with either boxes or masks:
|
||||
is_crowd = np.zeros((len(instances),), dtype=np.bool)
|
||||
if instances[0].bbox is None:
|
||||
assert instances[0].mask_rle is not None
|
||||
# use mask iou only when box iou is None
|
||||
# because box seems good enough
|
||||
rles_old = [x.mask_rle for x in self._old_instances]
|
||||
rles_new = [x.mask_rle for x in instances]
|
||||
ious = mask_util.iou(rles_old, rles_new, is_crowd)
|
||||
threshold = 0.5
|
||||
else:
|
||||
boxes_old = [x.bbox for x in self._old_instances]
|
||||
boxes_new = [x.bbox for x in instances]
|
||||
ious = mask_util.iou(boxes_old, boxes_new, is_crowd)
|
||||
threshold = 0.6
|
||||
if len(ious) == 0:
|
||||
ious = np.zeros((len(self._old_instances), len(instances)), dtype="float32")
|
||||
|
||||
# Only allow matching instances of the same label:
|
||||
for old_idx, old in enumerate(self._old_instances):
|
||||
for new_idx, new in enumerate(instances):
|
||||
if old.label != new.label:
|
||||
ious[old_idx, new_idx] = 0
|
||||
|
||||
matched_new_per_old = np.asarray(ious).argmax(axis=1)
|
||||
max_iou_per_old = np.asarray(ious).max(axis=1)
|
||||
|
||||
# Try to find match for each old instance:
|
||||
extra_instances = []
|
||||
for idx, inst in enumerate(self._old_instances):
|
||||
if max_iou_per_old[idx] > threshold:
|
||||
newidx = matched_new_per_old[idx]
|
||||
if instances[newidx].color is None:
|
||||
instances[newidx].color = inst.color
|
||||
continue
|
||||
# If an old instance does not match any new instances,
|
||||
# keep it for the next frame in case it is just missed by the detector
|
||||
inst.ttl -= 1
|
||||
if inst.ttl > 0:
|
||||
extra_instances.append(inst)
|
||||
|
||||
# Assign random color to newly-detected instances:
|
||||
for inst in instances:
|
||||
if inst.color is None:
|
||||
inst.color = random_color(rgb=True, maximum=1)
|
||||
self._old_instances = instances[:] + extra_instances
|
||||
return [d.color for d in instances]
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue