Add files via upload

main
RE-OWOD 2022-01-04 17:26:58 +08:00 committed by GitHub
parent e60afe4993
commit 39a272f3d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 1529 additions and 0 deletions

45
tools/README.md 100644
View File

@ -0,0 +1,45 @@
This directory contains a few scripts that use detectron2.
* `train_net.py`
An example training script that's made to train builtin models of detectron2.
For usage, see [GETTING_STARTED.md](../GETTING_STARTED.md).
* `plain_train_net.py`
Similar to `train_net.py`, but implements a training loop instead of using `Trainer`.
This script includes fewer features but it may be more friendly to hackers.
* `benchmark.py`
Benchmark the training speed, inference speed or data loading speed of a given config.
Usage:
```
python benchmark.py --config-file config.yaml --task train/eval/data [optional DDP flags]
```
* `visualize_json_results.py`
Visualize the json instance detection/segmentation results dumped by `COCOEvalutor` or `LVISEvaluator`
Usage:
```
python visualize_json_results.py --input x.json --output dir/ --dataset coco_2017_val
```
If not using a builtin dataset, you'll need your own script or modify this script.
* `visualize_data.py`
Visualize ground truth raw annotations or training data (after preprocessing/augmentations).
Usage:
```
python visualize_data.py --config-file config.yaml --source annotation/dataloader --output-dir dir/ [--show]
```
NOTE: the script does not stop by itself when using `--source dataloader` because a training
dataloader is usually infinite.

View File

@ -0,0 +1,127 @@
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import logging
import numpy as np
from collections import Counter
import tqdm
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg
from detectron2.data import build_detection_test_loader
from detectron2.engine import default_argument_parser
from detectron2.modeling import build_model
from detectron2.utils.analysis import (
activation_count_operators,
flop_count_operators,
parameter_count_table,
)
from detectron2.utils.logger import setup_logger
logger = logging.getLogger("detectron2")
def setup(args):
cfg = get_cfg()
cfg.merge_from_file(args.config_file)
cfg.DATALOADER.NUM_WORKERS = 0
cfg.merge_from_list(args.opts)
cfg.freeze()
setup_logger()
return cfg
def do_flop(cfg):
data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
model = build_model(cfg)
DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)
model.eval()
counts = Counter()
total_flops = []
for idx, data in zip(tqdm.trange(args.num_inputs), data_loader): # noqa
count = flop_count_operators(model, data)
counts += count
total_flops.append(sum(count.values()))
logger.info(
"(G)Flops for Each Type of Operators:\n" + str([(k, v / idx) for k, v in counts.items()])
)
logger.info("Total (G)Flops: {}±{}".format(np.mean(total_flops), np.std(total_flops)))
def do_activation(cfg):
data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
model = build_model(cfg)
DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)
model.eval()
counts = Counter()
total_activations = []
for idx, data in zip(tqdm.trange(args.num_inputs), data_loader): # noqa
count = activation_count_operators(model, data)
counts += count
total_activations.append(sum(count.values()))
logger.info(
"(Million) Activations for Each Type of Operators:\n"
+ str([(k, v / idx) for k, v in counts.items()])
)
logger.info(
"Total (Million) Activations: {}±{}".format(
np.mean(total_activations), np.std(total_activations)
)
)
def do_parameter(cfg):
model = build_model(cfg)
logger.info("Parameter Count:\n" + parameter_count_table(model, max_depth=5))
def do_structure(cfg):
model = build_model(cfg)
logger.info("Model Structure:\n" + str(model))
if __name__ == "__main__":
parser = default_argument_parser(
epilog="""
Examples:
To show parameters of a model:
$ ./analyze_model.py --tasks parameter \\
--config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
Flops and activations are data-dependent, therefore inputs and model weights
are needed to count them:
$ ./analyze_model.py --num-inputs 100 --tasks flop \\
--config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \\
MODEL.WEIGHTS /path/to/model.pkl
"""
)
parser.add_argument(
"--tasks",
choices=["flop", "activation", "parameter", "structure"],
required=True,
nargs="+",
)
parser.add_argument(
"--num-inputs",
default=100,
type=int,
help="number of inputs used to compute statistics for flops/activations, "
"both are data dependent.",
)
args = parser.parse_args()
assert not args.eval_only
assert args.num_gpus == 1
cfg = setup(args)
for task in args.tasks:
{
"flop": do_flop,
"activation": do_activation,
"parameter": do_parameter,
"structure": do_structure,
}[task](cfg)

167
tools/benchmark.py 100644
View File

@ -0,0 +1,167 @@
#!/usr/bin/env python
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
A script to benchmark builtin models.
Note: this script has an extra dependency of psutil.
"""
import itertools
import logging
import psutil
import torch
import tqdm
from fvcore.common.timer import Timer
from torch.nn.parallel import DistributedDataParallel
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg
from detectron2.data import (
DatasetFromList,
build_detection_test_loader,
build_detection_train_loader,
)
from detectron2.engine import SimpleTrainer, default_argument_parser, hooks, launch
from detectron2.modeling import build_model
from detectron2.solver import build_optimizer
from detectron2.utils import comm
from detectron2.utils.events import CommonMetricPrinter
from detectron2.utils.logger import setup_logger
logger = logging.getLogger("detectron2")
def setup(args):
cfg = get_cfg()
cfg.merge_from_file(args.config_file)
cfg.SOLVER.BASE_LR = 0.001 # Avoid NaNs. Not useful in this script anyway.
cfg.merge_from_list(args.opts)
cfg.freeze()
setup_logger(distributed_rank=comm.get_rank())
return cfg
def benchmark_data(args):
cfg = setup(args)
timer = Timer()
dataloader = build_detection_train_loader(cfg)
logger.info("Initialize loader using {} seconds.".format(timer.seconds()))
timer.reset()
itr = iter(dataloader)
for i in range(10): # warmup
next(itr)
if i == 0:
startup_time = timer.seconds()
timer = Timer()
max_iter = 1000
for _ in tqdm.trange(max_iter):
next(itr)
logger.info(
"{} iters ({} images) in {} seconds.".format(
max_iter, max_iter * cfg.SOLVER.IMS_PER_BATCH, timer.seconds()
)
)
logger.info("Startup time: {} seconds".format(startup_time))
vram = psutil.virtual_memory()
logger.info(
"RAM Usage: {:.2f}/{:.2f} GB".format(
(vram.total - vram.available) / 1024 ** 3, vram.total / 1024 ** 3
)
)
# test for a few more rounds
for _ in range(10):
timer = Timer()
max_iter = 1000
for _ in tqdm.trange(max_iter):
next(itr)
logger.info(
"{} iters ({} images) in {} seconds.".format(
max_iter, max_iter * cfg.SOLVER.IMS_PER_BATCH, timer.seconds()
)
)
def benchmark_train(args):
cfg = setup(args)
model = build_model(cfg)
logger.info("Model:\n{}".format(model))
if comm.get_world_size() > 1:
model = DistributedDataParallel(
model, device_ids=[comm.get_local_rank()], broadcast_buffers=False
)
optimizer = build_optimizer(cfg, model)
checkpointer = DetectionCheckpointer(model, optimizer=optimizer)
checkpointer.load(cfg.MODEL.WEIGHTS)
cfg.defrost()
cfg.DATALOADER.NUM_WORKERS = 0
data_loader = build_detection_train_loader(cfg)
dummy_data = list(itertools.islice(data_loader, 100))
def f():
data = DatasetFromList(dummy_data, copy=False)
while True:
yield from data
max_iter = 400
trainer = SimpleTrainer(model, f(), optimizer)
trainer.register_hooks(
[hooks.IterationTimer(), hooks.PeriodicWriter([CommonMetricPrinter(max_iter)])]
)
trainer.train(1, max_iter)
@torch.no_grad()
def benchmark_eval(args):
cfg = setup(args)
model = build_model(cfg)
model.eval()
logger.info("Model:\n{}".format(model))
DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)
cfg.defrost()
cfg.DATALOADER.NUM_WORKERS = 0
data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
dummy_data = DatasetFromList(list(itertools.islice(data_loader, 100)), copy=False)
def f():
while True:
yield from dummy_data
for k in range(5): # warmup
model(dummy_data[k])
max_iter = 300
timer = Timer()
with tqdm.tqdm(total=max_iter) as pbar:
for idx, d in enumerate(f()):
if idx == max_iter:
break
model(d)
pbar.update()
logger.info("{} iters in {} seconds.".format(max_iter, timer.seconds()))
if __name__ == "__main__":
parser = default_argument_parser()
parser.add_argument("--task", choices=["train", "eval", "data"], required=True)
args = parser.parse_args()
assert not args.eval_only
if args.task == "data":
f = benchmark_data
elif args.task == "train":
"""
Note: training speed may not be representative.
The training cost of a R-CNN model varies with the content of the data
and the quality of the model.
"""
f = benchmark_train
elif args.task == "eval":
f = benchmark_eval
# only benchmark single-GPU inference.
assert args.num_gpus == 1 and args.num_machines == 1
launch(f, args.num_gpus, args.num_machines, args.machine_rank, args.dist_url, args=(args,))

View File

@ -0,0 +1,56 @@
#!/usr/bin/env python
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import pickle as pkl
import sys
import torch
"""
Usage:
# download one of the ResNet{18,34,50,101,152} models from torchvision:
wget https://download.pytorch.org/models/resnet50-19c8e357.pth -O r50.pth
# run the conversion
./convert-torchvision-to-d2.py r50.pth r50.pkl
# Then, use r50.pkl with the following changes in config:
MODEL:
WEIGHTS: "/path/to/r50.pkl"
PIXEL_MEAN: [123.675, 116.280, 103.530]
PIXEL_STD: [58.395, 57.120, 57.375]
RESNETS:
DEPTH: 50
STRIDE_IN_1X1: False
INPUT:
FORMAT: "RGB"
These models typically produce slightly worse results than the
pre-trained ResNets we use in official configs, which are the
original ResNet models released by MSRA.
"""
if __name__ == "__main__":
input = sys.argv[1]
obj = torch.load(input, map_location="cpu")
newmodel = {}
for k in list(obj.keys()):
old_k = k
if "layer" not in k:
k = "stem." + k
for t in [1, 2, 3, 4]:
k = k.replace("layer{}".format(t), "res{}".format(t + 1))
for t in [1, 2, 3]:
k = k.replace("bn{}".format(t), "conv{}.norm".format(t))
k = k.replace("downsample.0", "shortcut")
k = k.replace("downsample.1", "shortcut.norm")
print(old_k, "->", k)
newmodel[k] = obj.pop(old_k).detach().numpy()
res = {"model": newmodel, "__author__": "torchvision", "matching_heuristics": True}
with open(sys.argv[2], "wb") as f:
pkl.dump(res, f)
if obj:
print("Unconverted keys:", obj.keys())

View File

@ -0,0 +1,21 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# See https://pytorch.org/tutorials/advanced/cpp_frontend.html
cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
project(caffe2_mask_rcnn)
find_package(Torch REQUIRED)
find_package(gflags REQUIRED)
find_package(OpenCV REQUIRED)
add_executable(caffe2_mask_rcnn caffe2_mask_rcnn.cpp)
target_link_libraries(
caffe2_mask_rcnn
"${TORCH_LIBRARIES}" gflags glog protobuf ${OpenCV_LIBS})
set_property(TARGET caffe2_mask_rcnn PROPERTY CXX_STANDARD 14)
add_executable(torchscript_traced_mask_rcnn torchscript_traced_mask_rcnn.cpp)
target_link_libraries(
torchscript_traced_mask_rcnn
"${TORCH_LIBRARIES}" ${OpenCV_LIBS})
set_property(TARGET torchscript_traced_mask_rcnn PROPERTY CXX_STANDARD 14)

View File

@ -0,0 +1,9 @@
This directory contains:
1. A script that converts a detectron2 model to caffe2 format.
2. An example that loads a Mask R-CNN model in caffe2 format and runs inference.
See [tutorial](https://detectron2.readthedocs.io/tutorials/deployment.html)
for their usage.

View File

@ -0,0 +1,97 @@
#!/usr/bin/env python
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import argparse
import os
import onnx
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg
from detectron2.data import build_detection_test_loader
from detectron2.evaluation import COCOEvaluator, inference_on_dataset, print_csv_format
from detectron2.export import Caffe2Tracer, add_export_config
from detectron2.modeling import build_model
from detectron2.utils.env import TORCH_VERSION
from detectron2.utils.logger import setup_logger
def setup_cfg(args):
cfg = get_cfg()
# cuda context is initialized before creating dataloader, so we don't fork anymore
cfg.DATALOADER.NUM_WORKERS = 0
cfg = add_export_config(cfg)
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.freeze()
if cfg.MODEL.DEVICE != "cpu":
assert TORCH_VERSION >= (1, 5), "PyTorch>=1.5 required for GPU conversion!"
return cfg
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Convert a model using caffe2 tracing.")
parser.add_argument(
"--format",
choices=["caffe2", "onnx", "torchscript"],
help="output format",
default="caffe2",
)
parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
parser.add_argument("--run-eval", action="store_true")
parser.add_argument("--output", help="output directory for the converted model")
parser.add_argument(
"opts",
help="Modify config options using the command-line",
default=None,
nargs=argparse.REMAINDER,
)
args = parser.parse_args()
logger = setup_logger()
logger.info("Command line arguments: " + str(args))
os.makedirs(args.output, exist_ok=True)
cfg = setup_cfg(args)
# create a torch model
torch_model = build_model(cfg)
DetectionCheckpointer(torch_model).resume_or_load(cfg.MODEL.WEIGHTS)
# get a sample data
data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
first_batch = next(iter(data_loader))
# convert and save caffe2 model
tracer = Caffe2Tracer(cfg, torch_model, first_batch)
if args.format == "caffe2":
caffe2_model = tracer.export_caffe2()
caffe2_model.save_protobuf(args.output)
# draw the caffe2 graph
caffe2_model.save_graph(os.path.join(args.output, "model.svg"), inputs=first_batch)
elif args.format == "onnx":
onnx_model = tracer.export_onnx()
onnx.save(onnx_model, os.path.join(args.output, "model.onnx"))
elif args.format == "torchscript":
script_model = tracer.export_torchscript()
script_model.save(os.path.join(args.output, "model.ts"))
# Recursively print IR of all modules
with open(os.path.join(args.output, "model_ts_IR.txt"), "w") as f:
try:
f.write(script_model._actual_script_module._c.dump_to_str(True, False, False))
except AttributeError:
pass
# Print IR of the entire graph (all submodules inlined)
with open(os.path.join(args.output, "model_ts_IR_inlined.txt"), "w") as f:
f.write(str(script_model.inlined_graph))
# Print the model structure in pytorch style
with open(os.path.join(args.output, "model.txt"), "w") as f:
f.write(str(script_model))
# run evaluation with the converted model
if args.run_eval:
assert args.format == "caffe2", "Python inference in other format is not yet supported."
dataset = cfg.DATASETS.TEST[0]
data_loader = build_detection_test_loader(cfg, dataset)
# NOTE: hard-coded evaluator. change to the evaluator for your dataset
evaluator = COCOEvaluator(dataset, cfg, True, args.output)
metrics = inference_on_dataset(caffe2_model, data_loader, evaluator)
print_csv_format(metrics)

View File

@ -0,0 +1,119 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
#include <c10/util/Flags.h>
#include <caffe2/core/blob.h>
#include <caffe2/core/common.h>
#include <caffe2/core/init.h>
#include <caffe2/core/net.h>
#include <caffe2/core/workspace.h>
#include <caffe2/utils/proto_utils.h>
#include <opencv2/opencv.hpp>
#include <cassert>
#include <chrono>
#include <iostream>
#include <string>
C10_DEFINE_string(predict_net, "", "path to model.pb");
C10_DEFINE_string(init_net, "", "path to model_init.pb");
C10_DEFINE_string(input, "", "path to input image");
using namespace std;
using namespace caffe2;
int main(int argc, char** argv) {
caffe2::GlobalInit(&argc, &argv);
string predictNetPath = FLAGS_predict_net;
string initNetPath = FLAGS_init_net;
cv::Mat input = cv::imread(FLAGS_input, cv::IMREAD_COLOR);
const int height = input.rows;
const int width = input.cols;
// FPN models require divisibility of 32
assert(height % 32 == 0 && width % 32 == 0);
const int batch = 1;
const int channels = 3;
// initialize Net and Workspace
caffe2::NetDef initNet_, predictNet_;
CAFFE_ENFORCE(ReadProtoFromFile(initNetPath, &initNet_));
CAFFE_ENFORCE(ReadProtoFromFile(predictNetPath, &predictNet_));
Workspace workSpace;
for (auto& str : predictNet_.external_input()) {
workSpace.CreateBlob(str);
}
CAFFE_ENFORCE(workSpace.CreateNet(predictNet_));
CAFFE_ENFORCE(workSpace.RunNetOnce(initNet_));
// setup inputs
auto data = BlobGetMutableTensor(workSpace.GetBlob("data"), caffe2::CPU);
data->Resize(batch, channels, height, width);
float* ptr = data->mutable_data<float>();
// HWC to CHW
for (int c = 0; c < 3; ++c) {
for (int i = 0; i < height * width; ++i) {
ptr[c * height * width + i] = static_cast<float>(input.data[3 * i + c]);
}
}
auto im_info =
BlobGetMutableTensor(workSpace.GetBlob("im_info"), caffe2::CPU);
im_info->Resize(batch, 3);
float* im_info_ptr = im_info->mutable_data<float>();
im_info_ptr[0] = height;
im_info_ptr[1] = width;
im_info_ptr[2] = 1.0;
// run the network
CAFFE_ENFORCE(workSpace.RunNet(predictNet_.name()));
// run 3 more times to benchmark
int N_benchmark = 3;
auto start_time = chrono::high_resolution_clock::now();
for (int i = 0; i < N_benchmark; ++i) {
CAFFE_ENFORCE(workSpace.RunNet(predictNet_.name()));
}
auto end_time = chrono::high_resolution_clock::now();
auto ms = chrono::duration_cast<chrono::microseconds>(end_time - start_time)
.count();
cout << "Latency (should vary with different inputs): "
<< ms * 1.0 / 1e6 / N_benchmark << " seconds" << endl;
// parse Mask R-CNN outputs
caffe2::Tensor bbox(
workSpace.GetBlob("bbox_nms")->Get<caffe2::Tensor>(), caffe2::CPU);
caffe2::Tensor scores(
workSpace.GetBlob("score_nms")->Get<caffe2::Tensor>(), caffe2::CPU);
caffe2::Tensor labels(
workSpace.GetBlob("class_nms")->Get<caffe2::Tensor>(), caffe2::CPU);
caffe2::Tensor mask_probs(
workSpace.GetBlob("mask_fcn_probs")->Get<caffe2::Tensor>(), caffe2::CPU);
cout << "bbox:" << bbox.DebugString() << endl;
cout << "scores:" << scores.DebugString() << endl;
cout << "labels:" << labels.DebugString() << endl;
cout << "mask_probs: " << mask_probs.DebugString() << endl;
int num_instances = bbox.sizes()[0];
for (int i = 0; i < num_instances; ++i) {
float score = scores.data<float>()[i];
if (score < 0.6)
continue; // skip them
const float* box = bbox.data<float>() + i * 4;
int label = labels.data<float>()[i];
cout << "Prediction " << i << ", xyxy=(";
cout << box[0] << ", " << box[1] << ", " << box[2] << ", " << box[3]
<< "); score=" << score << "; label=" << label << endl;
const float* mask = mask_probs.data<float>() +
i * mask_probs.size_from_dim(1) + label * mask_probs.size_from_dim(2);
// save the 28x28 mask
cv::Mat cv_mask(28, 28, CV_32FC1);
memcpy(cv_mask.data, mask, 28 * 28 * sizeof(float));
cv::imwrite("mask" + std::to_string(i) + ".png", cv_mask * 255.);
}
return 0;
}

View File

@ -0,0 +1,76 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#include <opencv2/opencv.hpp>
#include <iostream>
#include <string>
#include <c10/cuda/CUDAStream.h>
#include <torch/csrc/autograd/grad_mode.h>
#include <torch/script.h>
using namespace std;
// experimental. don't use
int main(int argc, const char* argv[]) {
if (argc != 3) {
return 1;
}
std::string image_file = argv[2];
torch::autograd::AutoGradMode guard(false);
auto module = torch::jit::load(argv[1]);
assert(module.buffers().size() > 0);
// Assume that the entire model is on the same device.
// We just put input to this device.
auto device = (*begin(module.buffers())).device();
cv::Mat input_img = cv::imread(image_file, cv::IMREAD_COLOR);
const int height = input_img.rows;
const int width = input_img.cols;
// FPN models require divisibility of 32
assert(height % 32 == 0 && width % 32 == 0);
const int channels = 3;
auto input = torch::from_blob(
input_img.data, {1, height, width, channels}, torch::kUInt8);
// NHWC to NCHW
input = input.to(device, torch::kFloat).permute({0, 3, 1, 2}).contiguous();
std::array<float, 3> im_info_data{height * 1.0f, width * 1.0f, 1.0f};
auto im_info = torch::from_blob(im_info_data.data(), {1, 3}).to(device);
// run the network
auto output = module.forward({std::make_tuple(input, im_info)});
if (device.is_cuda())
c10::cuda::getCurrentCUDAStream().synchronize();
// run 3 more times to benchmark
int N_benchmark = 3;
auto start_time = chrono::high_resolution_clock::now();
for (int i = 0; i < N_benchmark; ++i) {
output = module.forward({std::make_tuple(input, im_info)});
if (device.is_cuda())
c10::cuda::getCurrentCUDAStream().synchronize();
}
auto end_time = chrono::high_resolution_clock::now();
auto ms = chrono::duration_cast<chrono::microseconds>(end_time - start_time)
.count();
cout << "Latency (should vary with different inputs): "
<< ms * 1.0 / 1e6 / N_benchmark << " seconds" << endl;
auto outputs = output.toTuple()->elements();
// parse Mask R-CNN outputs
auto bbox = outputs[0].toTensor(), scores = outputs[1].toTensor(),
labels = outputs[2].toTensor(), mask_probs = outputs[3].toTensor();
cout << "bbox: " << bbox.toString() << " " << bbox.sizes() << endl;
cout << "scores: " << scores.toString() << " " << scores.sizes() << endl;
cout << "labels: " << labels.toString() << " " << labels.sizes() << endl;
cout << "mask_probs: " << mask_probs.toString() << " " << mask_probs.sizes()
<< endl;
int num_instances = bbox.sizes()[0];
cout << bbox << endl;
return 0;
}

View File

@ -0,0 +1,238 @@
#!/usr/bin/env python
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
Detectron2 training script with a plain training loop.
This script reads a given config file and runs the training or evaluation.
It is an entry point that is able to train standard models in detectron2.
In order to let one script support training of many models,
this script contains logic that are specific to these built-in models and therefore
may not be suitable for your own project.
For example, your research project perhaps only needs a single "evaluator".
Therefore, we recommend you to use detectron2 as a library and take
this file as an example of how to use the library.
You may want to write your own script with your datasets and other customizations.
Compared to "train_net.py", this script supports fewer default features.
It also includes fewer abstraction, therefore is easier to add custom logic.
"""
import logging
import os
from collections import OrderedDict
import torch
from torch.nn.parallel import DistributedDataParallel
import detectron2.utils.comm as comm
from detectron2.checkpoint import DetectionCheckpointer, PeriodicCheckpointer
from detectron2.config import get_cfg
from detectron2.data import (
MetadataCatalog,
build_detection_test_loader,
build_detection_train_loader,
)
from detectron2.engine import default_argument_parser, default_setup, launch
from detectron2.evaluation import (
CityscapesInstanceEvaluator,
CityscapesSemSegEvaluator,
COCOEvaluator,
COCOPanopticEvaluator,
DatasetEvaluators,
LVISEvaluator,
PascalVOCDetectionEvaluator,
SemSegEvaluator,
inference_on_dataset,
print_csv_format,
)
from detectron2.modeling import build_model
from detectron2.solver import build_lr_scheduler, build_optimizer
from detectron2.utils.events import (
CommonMetricPrinter,
EventStorage,
JSONWriter,
TensorboardXWriter,
)
logger = logging.getLogger("detectron2")
def get_evaluator(cfg, dataset_name, output_folder=None):
"""
Create evaluator(s) for a given dataset.
This uses the special metadata "evaluator_type" associated with each builtin dataset.
For your own dataset, you can simply create an evaluator manually in your
script and do not have to worry about the hacky if-else logic here.
"""
if output_folder is None:
output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
evaluator_list = []
evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
if evaluator_type in ["sem_seg", "coco_panoptic_seg"]:
evaluator_list.append(
SemSegEvaluator(
dataset_name,
distributed=True,
num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES,
ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
output_dir=output_folder,
)
)
if evaluator_type in ["coco", "coco_panoptic_seg"]:
evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder))
if evaluator_type == "coco_panoptic_seg":
evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder))
if evaluator_type == "cityscapes_instance":
assert (
torch.cuda.device_count() >= comm.get_rank()
), "CityscapesEvaluator currently do not work with multiple machines."
return CityscapesInstanceEvaluator(dataset_name)
if evaluator_type == "cityscapes_sem_seg":
assert (
torch.cuda.device_count() >= comm.get_rank()
), "CityscapesEvaluator currently do not work with multiple machines."
return CityscapesSemSegEvaluator(dataset_name)
if evaluator_type == "pascal_voc":
return PascalVOCDetectionEvaluator(dataset_name)
if evaluator_type == "lvis":
return LVISEvaluator(dataset_name, cfg, True, output_folder)
if len(evaluator_list) == 0:
raise NotImplementedError(
"no Evaluator for the dataset {} with the type {}".format(dataset_name, evaluator_type)
)
if len(evaluator_list) == 1:
return evaluator_list[0]
return DatasetEvaluators(evaluator_list)
def do_test(cfg, model):
results = OrderedDict()
for dataset_name in cfg.DATASETS.TEST:
data_loader = build_detection_test_loader(cfg, dataset_name)
evaluator = get_evaluator(
cfg, dataset_name, os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
)
results_i = inference_on_dataset(model, data_loader, evaluator)
results[dataset_name] = results_i
if comm.is_main_process():
logger.info("Evaluation results for {} in csv format:".format(dataset_name))
print_csv_format(results_i)
if len(results) == 1:
results = list(results.values())[0]
return results
def do_train(cfg, model, resume=False):
model.train()
optimizer = build_optimizer(cfg, model)
scheduler = build_lr_scheduler(cfg, optimizer)
checkpointer = DetectionCheckpointer(
model, cfg.OUTPUT_DIR, optimizer=optimizer, scheduler=scheduler
)
start_iter = (
checkpointer.resume_or_load(cfg.MODEL.WEIGHTS, resume=resume).get("iteration", -1) + 1
)
max_iter = cfg.SOLVER.MAX_ITER
periodic_checkpointer = PeriodicCheckpointer(
checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD, max_iter=max_iter
)
writers = (
[
CommonMetricPrinter(max_iter),
JSONWriter(os.path.join(cfg.OUTPUT_DIR, "metrics.json")),
TensorboardXWriter(cfg.OUTPUT_DIR),
]
if comm.is_main_process()
else []
)
# compared to "train_net.py", we do not support accurate timing and
# precise BN here, because they are not trivial to implement in a small training loop
data_loader = build_detection_train_loader(cfg)
logger.info("Starting training from iteration {}".format(start_iter))
with EventStorage(start_iter) as storage:
for data, iteration in zip(data_loader, range(start_iter, max_iter)):
storage.iter = iteration
loss_dict = model(data)
losses = sum(loss_dict.values())
assert torch.isfinite(losses).all(), loss_dict
loss_dict_reduced = {k: v.item() for k, v in comm.reduce_dict(loss_dict).items()}
losses_reduced = sum(loss for loss in loss_dict_reduced.values())
if comm.is_main_process():
storage.put_scalars(total_loss=losses_reduced, **loss_dict_reduced)
optimizer.zero_grad()
losses.backward()
optimizer.step()
storage.put_scalar("lr", optimizer.param_groups[0]["lr"], smoothing_hint=False)
scheduler.step()
if (
cfg.TEST.EVAL_PERIOD > 0
and (iteration + 1) % cfg.TEST.EVAL_PERIOD == 0
and iteration != max_iter - 1
):
do_test(cfg, model)
# Compared to "train_net.py", the test results are not dumped to EventStorage
comm.synchronize()
if iteration - start_iter > 5 and (
(iteration + 1) % 20 == 0 or iteration == max_iter - 1
):
for writer in writers:
writer.write()
periodic_checkpointer.step(iteration)
def setup(args):
"""
Create configs and perform basic setups.
"""
cfg = get_cfg()
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.freeze()
default_setup(
cfg, args
) # if you don't like any of the default setup, write your own setup code
return cfg
def main(args):
cfg = setup(args)
model = build_model(cfg)
logger.info("Model:\n{}".format(model))
if args.eval_only:
DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
cfg.MODEL.WEIGHTS, resume=args.resume
)
return do_test(cfg, model)
distributed = comm.get_world_size() > 1
if distributed:
model = DistributedDataParallel(
model, device_ids=[comm.get_local_rank()], broadcast_buffers=False
)
do_train(cfg, model, resume=args.resume)
return do_test(cfg, model)
if __name__ == "__main__":
args = default_argument_parser().parse_args()
print("Command Line Args:", args)
launch(
main,
args.num_gpus,
num_machines=args.num_machines,
machine_rank=args.machine_rank,
dist_url=args.dist_url,
args=(args,),
)

View File

@ -0,0 +1,33 @@
import os
import torch
import pickle
source_dir = '/home/fk1/workspace/OWOD/output/logits'
files = os.listdir(source_dir)
unk = []
known = []
for file in files:
path = os.path.join(source_dir, file)
logits, classes = torch.load(path)
lse = torch.logsumexp(logits[:,:-2], dim=1)
for i, cls in enumerate(classes):
if cls == 21:
continue
if cls == 20:
unk.append(lse[i].detach().cpu().tolist())
else:
known.append(lse[i].detach().cpu().tolist())
print(known)
print('\n\n')
print(unk)
# dir = '/home/fk1/workspace/OWOD/output'
#
# with open(os.path.join(dir, 'unk.pkl'), 'wb') as f:
# pickle.dump(unk, f)
#
# with open(os.path.join(dir, 'known.pkl'), 'wb') as f:
# pickle.dump(unk, f)

132
tools/plot_tsne.py 100644
View File

@ -0,0 +1,132 @@
import os
import torch
import matplotlib.pyplot as plt
from matplotlib.ticker import NullFormatter
from sklearn import manifold, datasets
from time import time
from collections import deque
import seaborn as sns
import numpy as np
from detectron2.utils.store import Store
def plot_tsne(X, label, total_num_classes):
n_components = 2
(fig, subplots) = plt.subplots(1, 5, figsize=(15, 8))
perplexities = [5, 30, 50, 100, 150]
for i, perplexity in enumerate(perplexities):
ax = subplots[i]
t0 = time()
tsne = manifold.TSNE(n_components=n_components, init='random',
random_state=0, perplexity=perplexity)
Y = tsne.fit_transform(X)
t1 = time()
print("circles, perplexity=%d in %.2g sec" % (perplexity, t1 - t0))
ax.set_title("Perplexity=%d" % perplexity)
#
# sc = ax.scatter(Y[:, 0], Y[:, 1], c=label, cmap="plasma")
sns.scatterplot(x=Y[:, 0], y=Y[:, 1], hue=label, ax=ax, legend='full', palette='colorblind')
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
ax.axis('tight')
# plt.legend(handles=sc.legend_elements()[0], labels=range(total_num_classes))
# plt.legend(handles=sc.legend_elements()[0], labels=['0', '1'])
# plt.show()
plt.savefig('tsne.png')
def plot_tsne_indiv(X, label, total_num_classes):
n_components = 2
# perplexities = [5, 30, 50, 100, 150]
perplexities = list(range(10, 150, 10))
for i, perplexity in enumerate(perplexities):
__, ax = plt.subplots()
t0 = time()
tsne = manifold.TSNE(n_components=n_components, init='random',
random_state=0, perplexity=perplexity)
Y = tsne.fit_transform(X)
t1 = time()
print("circles, perplexity=%d in %.2g sec" % (perplexity, t1 - t0))
# ax.set_title("Perplexity=%d" % perplexity)
#
# sc = ax.scatter(Y[:, 0], Y[:, 1], c=label, cmap="plasma")
# palette = sns.color_palette(None, total_num_classes)
# palette = sns.color_palette("flare", as_cmap=True)
flatui = ['#e6194b', '#3cb44b', '#ffe119', '#4363d8', '#f58231', \
'#911eb4', '#46f0f0', '#f032e6', '#bcf60c', '#fabebe', \
'#008080', '#e6beff', '#9a6324', '#fffac8', '#800000', \
'#aaffc3', '#808000', '#ffd8b1', '#000075', '#808080', \
'#3498db']
# sns.set_palette(flatui)
sns.scatterplot(x=Y[:, 0], y=Y[:, 1], hue=label, ax=ax, legend='full', palette=flatui)
# sns.scatterplot(x=Y[:, 0], y=Y[:, 1], hue=label, ax=ax, legend='full', palette='colorblind')
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
ax.axis('tight')
# plt.legend(handles=sc.legend_elements()[0], labels=range(total_num_classes))
# plt.legend(handles=sc.legend_elements()[0], labels=['0', '1'])
# plt.show()
# plt.legend(fontsize='xx-large', ncol=2, handleheight=2.4, labelspacing=0.05)
plt.legend(ncol=6)
plt.savefig('tsne_' + str(perplexity) + '.png')
plt.pause(0.0001)
plt.clf()
maxlen_queue = 100
total_num_classes = 22
queues = [deque(maxlen=maxlen_queue) for _ in range(total_num_classes)]
source_dir = '/home/fk1/workspace/OWOD/output/features'
# files = os.listdir(source_dir)
# for i, file in enumerate(files):
# path = os.path.join(source_dir, file)
# features, classes = torch.load(path)
# for f, c in zip(features, classes):
# if c == 80:
# c = 20
# queues[c].append(f.detach().cpu().numpy())
# elif c == 81:
# c = 21
# queues[c].append(f.detach().cpu().numpy())
# elif c <= total_num_classes:
# queues[c.detach().cpu().numpy()].append(f.detach().cpu().numpy())
# if i%100 == 0:
# print('Processing ' + str(i))
# # if i == 2:
# # break
#
# torch.save(queues, os.path.join(source_dir,'queues_tsne.pkl'))
queues = torch.load(os.path.join(source_dir,'queues_tsne.pkl'))
x = []
y = []
for i, queue in enumerate(queues):
if i == 20:
continue
if i == 21:
i = 20
for item in queue:
x.append(item)
y.append(i)
print('Going to plot')
plot_tsne_indiv(x, y, total_num_classes)

170
tools/train_net.py 100644
View File

@ -0,0 +1,170 @@
#!/usr/bin/env python
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
Detection Training Script.
This scripts reads a given config file and runs the training or evaluation.
It is an entry point that is made to train standard models in detectron2.
In order to let one script support training of many models,
this script contains logic that are specific to these built-in models and therefore
may not be suitable for your own project.
For example, your research project perhaps only needs a single "evaluator".
Therefore, we recommend you to use detectron2 as an library and take
this file as an example of how to use the library.
You may want to write your own script with your datasets and other customizations.
"""
import logging
import os
from collections import OrderedDict
import torch
import detectron2.utils.comm as comm
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg
from detectron2.data import MetadataCatalog
from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch
from detectron2.evaluation import (
CityscapesInstanceEvaluator,
CityscapesSemSegEvaluator,
COCOEvaluator,
COCOPanopticEvaluator,
DatasetEvaluators,
LVISEvaluator,
PascalVOCDetectionEvaluator,
SemSegEvaluator,
verify_results,
)
from detectron2.modeling import GeneralizedRCNNWithTTA
class Trainer(DefaultTrainer):
"""
We use the "DefaultTrainer" which contains pre-defined default logic for
standard training workflow. They may not work for you, especially if you
are working on a new research project. In that case you can write your
own training loop. You can use "tools/plain_train_net.py" as an example.
"""
@classmethod
def build_evaluator(cls, cfg, dataset_name, output_folder=None):
"""
Create evaluator(s) for a given dataset.
This uses the special metadata "evaluator_type" associated with each builtin dataset.
For your own dataset, you can simply create an evaluator manually in your
script and do not have to worry about the hacky if-else logic here.
"""
if output_folder is None:
output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
evaluator_list = []
evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
if evaluator_type in ["sem_seg", "coco_panoptic_seg"]:
evaluator_list.append(
SemSegEvaluator(
dataset_name,
distributed=True,
num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES,
ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
output_dir=output_folder,
)
)
if evaluator_type in ["coco", "coco_panoptic_seg"]:
evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder))
if evaluator_type == "coco_panoptic_seg":
evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder))
if evaluator_type == "cityscapes_instance":
assert (
torch.cuda.device_count() >= comm.get_rank()
), "CityscapesEvaluator currently do not work with multiple machines."
return CityscapesInstanceEvaluator(dataset_name)
if evaluator_type == "cityscapes_sem_seg":
assert (
torch.cuda.device_count() >= comm.get_rank()
), "CityscapesEvaluator currently do not work with multiple machines."
return CityscapesSemSegEvaluator(dataset_name)
elif evaluator_type == "pascal_voc":
return PascalVOCDetectionEvaluator(dataset_name, cfg)
elif evaluator_type == "lvis":
return LVISEvaluator(dataset_name, cfg, True, output_folder)
if len(evaluator_list) == 0:
raise NotImplementedError(
"no Evaluator for the dataset {} with the type {}".format(
dataset_name, evaluator_type
)
)
elif len(evaluator_list) == 1:
return evaluator_list[0]
return DatasetEvaluators(evaluator_list)
@classmethod
def test_with_TTA(cls, cfg, model):
logger = logging.getLogger("detectron2.trainer")
# In the end of training, run an evaluation with TTA
# Only support some R-CNN models.
logger.info("Running inference with test-time augmentation ...")
model = GeneralizedRCNNWithTTA(cfg, model)
evaluators = [
cls.build_evaluator(
cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
)
for name in cfg.DATASETS.TEST
]
res = cls.test(cfg, model, evaluators)
res = OrderedDict({k + "_TTA": v for k, v in res.items()})
return res
def setup(args):
"""
Create configs and perform basic setups.
"""
cfg = get_cfg()
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.freeze()
default_setup(cfg, args)
return cfg
def main(args):
cfg = setup(args)
if args.eval_only:
model = Trainer.build_model(cfg)
DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
cfg.MODEL.WEIGHTS, resume=args.resume
)
res = Trainer.test(cfg, model)
if cfg.TEST.AUG.ENABLED:
res.update(Trainer.test_with_TTA(cfg, model))
if comm.is_main_process():
verify_results(cfg, res)
return res
"""
If you'd like to do anything fancier than the standard training logic,
consider writing your own training loop (see plain_train_net.py) or
subclassing the trainer.
"""
trainer = Trainer(cfg)
trainer.resume_or_load(resume=args.resume)
if cfg.TEST.AUG.ENABLED:
trainer.register_hooks(
[hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))]
)
return trainer.train()
if __name__ == "__main__":
args = default_argument_parser().parse_args()
print("Command Line Args:", args)
launch(
main,
args.num_gpus,
num_machines=args.num_machines,
machine_rank=args.machine_rank,
dist_url=args.dist_url,
args=(args,),
)

56
tools/tsne.py 100644
View File

@ -0,0 +1,56 @@
# Author: Narine Kokhlikyan <narine@slice.com>
# License: BSD
import matplotlib.pyplot as plt
from matplotlib.ticker import NullFormatter
from sklearn import manifold, datasets
from time import time
import numpy as np
def plot_tsne(X, label):
n_components = 2
(fig, subplots) = plt.subplots(1, 5, figsize=(15, 8))
perplexities = [5, 30, 50, 100, 150]
for i, perplexity in enumerate(perplexities):
ax = subplots[i]
t0 = time()
tsne = manifold.TSNE(n_components=n_components, init='random',
random_state=0, perplexity=perplexity)
Y = tsne.fit_transform(X)
t1 = time()
print("circles, perplexity=%d in %.2g sec" % (perplexity, t1 - t0))
ax.set_title("Perplexity=%d" % perplexity)
sc = ax.scatter(Y[:, 0], Y[:, 1], c=label, cmap="Set1")
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
ax.axis('tight')
plt.legend(handles=sc.legend_elements()[0], labels=['0', '1'])
# plt.show()
plt.savefig('tsne.png')
n_samples = 300
# X, y = datasets.make_circles(n_samples=n_samples, factor=.5, noise=.05)
# plot_tsne(X, y)
num_samples_from_prior = 10
num_tasks = 10
X = []
color = []
label = []
for i in range(num_tasks):
for p in range(num_samples_from_prior):
prior = np.random.rand(1000)
X.append(prior)
color.append('C'+str(i))
label.append(i%2)
plot_tsne(np.array(X), np.array(color), label)

View File

@ -0,0 +1,93 @@
#!/usr/bin/env python
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import argparse
import os
from itertools import chain
import cv2
import tqdm
from detectron2.config import get_cfg
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_train_loader
from detectron2.data import detection_utils as utils
from detectron2.data.build import filter_images_with_few_keypoints
from detectron2.utils.logger import setup_logger
from detectron2.utils.visualizer import Visualizer
def setup(args):
cfg = get_cfg()
if args.config_file:
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.freeze()
return cfg
def parse_args(in_args=None):
parser = argparse.ArgumentParser(description="Visualize ground-truth data")
parser.add_argument(
"--source",
choices=["annotation", "dataloader"],
required=True,
help="visualize the annotations or the data loader (with pre-processing)",
)
parser.add_argument("--config-file", metavar="FILE", help="path to config file")
parser.add_argument("--output-dir", default="./", help="path to output directory")
parser.add_argument("--show", action="store_true", help="show output in a window")
parser.add_argument(
"opts",
help="Modify config options using the command-line",
default=None,
nargs=argparse.REMAINDER,
)
return parser.parse_args(in_args)
if __name__ == "__main__":
args = parse_args()
logger = setup_logger()
logger.info("Arguments: " + str(args))
cfg = setup(args)
dirname = args.output_dir
os.makedirs(dirname, exist_ok=True)
metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0])
def output(vis, fname):
if args.show:
print(fname)
cv2.imshow("window", vis.get_image()[:, :, ::-1])
cv2.waitKey()
else:
filepath = os.path.join(dirname, fname)
print("Saving to {} ...".format(filepath))
vis.save(filepath)
scale = 2.0 if args.show else 1.0
if args.source == "dataloader":
train_data_loader = build_detection_train_loader(cfg)
for batch in train_data_loader:
for per_image in batch:
# Pytorch tensor is in (C, H, W) format
img = per_image["image"].permute(1, 2, 0).cpu().detach().numpy()
img = utils.convert_image_to_rgb(img, cfg.INPUT.FORMAT)
visualizer = Visualizer(img, metadata=metadata, scale=scale)
target_fields = per_image["instances"].get_fields()
labels = [metadata.thing_classes[i] for i in target_fields["gt_classes"]]
vis = visualizer.overlay_instances(
labels=labels,
boxes=target_fields.get("gt_boxes", None),
masks=target_fields.get("gt_masks", None),
keypoints=target_fields.get("gt_keypoints", None),
)
output(vis, str(per_image["image_id"]) + ".jpg")
else:
dicts = list(chain.from_iterable([DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN]))
if cfg.MODEL.KEYPOINT_ON:
dicts = filter_images_with_few_keypoints(dicts, 1)
for dic in tqdm.tqdm(dicts):
img = utils.read_image(dic["file_name"], "RGB")
visualizer = Visualizer(img, metadata=metadata, scale=scale)
vis = visualizer.draw_dataset_dict(dic)
output(vis, os.path.basename(dic["file_name"]))

View File

@ -0,0 +1,90 @@
#!/usr/bin/env python
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import argparse
import json
import numpy as np
import os
from collections import defaultdict
import cv2
import tqdm
from fvcore.common.file_io import PathManager
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import Boxes, BoxMode, Instances
from detectron2.utils.logger import setup_logger
from detectron2.utils.visualizer import Visualizer
def create_instances(predictions, image_size):
ret = Instances(image_size)
score = np.asarray([x["score"] for x in predictions])
chosen = (score > args.conf_threshold).nonzero()[0]
score = score[chosen]
bbox = np.asarray([predictions[i]["bbox"] for i in chosen]).reshape(-1, 4)
bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
labels = np.asarray([dataset_id_map(predictions[i]["category_id"]) for i in chosen])
ret.scores = score
ret.pred_boxes = Boxes(bbox)
ret.pred_classes = labels
try:
ret.pred_masks = [predictions[i]["segmentation"] for i in chosen]
except KeyError:
pass
return ret
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="A script that visualizes the json predictions from COCO or LVIS dataset."
)
parser.add_argument("--input", required=True, help="JSON file produced by the model")
parser.add_argument("--output", required=True, help="output directory")
parser.add_argument("--dataset", help="name of the dataset", default="coco_2017_val")
parser.add_argument("--conf-threshold", default=0.5, type=float, help="confidence threshold")
args = parser.parse_args()
logger = setup_logger()
with PathManager.open(args.input, "r") as f:
predictions = json.load(f)
pred_by_image = defaultdict(list)
for p in predictions:
pred_by_image[p["image_id"]].append(p)
dicts = list(DatasetCatalog.get(args.dataset))
metadata = MetadataCatalog.get(args.dataset)
if hasattr(metadata, "thing_dataset_id_to_contiguous_id"):
def dataset_id_map(ds_id):
return metadata.thing_dataset_id_to_contiguous_id[ds_id]
elif "lvis" in args.dataset:
# LVIS results are in the same format as COCO results, but have a different
# mapping from dataset category id to contiguous category id in [0, #categories - 1]
def dataset_id_map(ds_id):
return ds_id - 1
else:
raise ValueError("Unsupported dataset: {}".format(args.dataset))
os.makedirs(args.output, exist_ok=True)
for dic in tqdm.tqdm(dicts):
img = cv2.imread(dic["file_name"], cv2.IMREAD_COLOR)[:, :, ::-1]
basename = os.path.basename(dic["file_name"])
predictions = create_instances(pred_by_image[dic["image_id"]], img.shape[:2])
vis = Visualizer(img, metadata)
vis_pred = vis.draw_instance_predictions(predictions).get_image()
vis = Visualizer(img, metadata)
vis_gt = vis.draw_dataset_dict(dic).get_image()
concat = np.concatenate((vis_pred, vis_gt), axis=1)
cv2.imwrite(os.path.join(args.output, basename), concat[:, :, ::-1])