diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 0000000..3733863 --- /dev/null +++ b/tools/README.md @@ -0,0 +1,45 @@ + +This directory contains a few scripts that use detectron2. + + +* `train_net.py` + +An example training script that's made to train builtin models of detectron2. + +For usage, see [GETTING_STARTED.md](../GETTING_STARTED.md). + +* `plain_train_net.py` + +Similar to `train_net.py`, but implements a training loop instead of using `Trainer`. +This script includes fewer features but it may be more friendly to hackers. + +* `benchmark.py` + +Benchmark the training speed, inference speed or data loading speed of a given config. + +Usage: +``` +python benchmark.py --config-file config.yaml --task train/eval/data [optional DDP flags] +``` + +* `visualize_json_results.py` + +Visualize the json instance detection/segmentation results dumped by `COCOEvalutor` or `LVISEvaluator` + +Usage: +``` +python visualize_json_results.py --input x.json --output dir/ --dataset coco_2017_val +``` +If not using a builtin dataset, you'll need your own script or modify this script. + +* `visualize_data.py` + +Visualize ground truth raw annotations or training data (after preprocessing/augmentations). + +Usage: +``` +python visualize_data.py --config-file config.yaml --source annotation/dataloader --output-dir dir/ [--show] +``` + +NOTE: the script does not stop by itself when using `--source dataloader` because a training +dataloader is usually infinite. diff --git a/tools/analyze_model.py b/tools/analyze_model.py new file mode 100644 index 0000000..9c06ea4 --- /dev/null +++ b/tools/analyze_model.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import logging +import numpy as np +from collections import Counter +import tqdm + +from detectron2.checkpoint import DetectionCheckpointer +from detectron2.config import get_cfg +from detectron2.data import build_detection_test_loader +from detectron2.engine import default_argument_parser +from detectron2.modeling import build_model +from detectron2.utils.analysis import ( + activation_count_operators, + flop_count_operators, + parameter_count_table, +) +from detectron2.utils.logger import setup_logger + +logger = logging.getLogger("detectron2") + + +def setup(args): + cfg = get_cfg() + cfg.merge_from_file(args.config_file) + cfg.DATALOADER.NUM_WORKERS = 0 + cfg.merge_from_list(args.opts) + cfg.freeze() + setup_logger() + return cfg + + +def do_flop(cfg): + data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) + model = build_model(cfg) + DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) + model.eval() + + counts = Counter() + total_flops = [] + for idx, data in zip(tqdm.trange(args.num_inputs), data_loader): # noqa + count = flop_count_operators(model, data) + counts += count + total_flops.append(sum(count.values())) + logger.info( + "(G)Flops for Each Type of Operators:\n" + str([(k, v / idx) for k, v in counts.items()]) + ) + logger.info("Total (G)Flops: {}±{}".format(np.mean(total_flops), np.std(total_flops))) + + +def do_activation(cfg): + data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) + model = build_model(cfg) + DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) + model.eval() + + counts = Counter() + total_activations = [] + for idx, data in zip(tqdm.trange(args.num_inputs), data_loader): # noqa + count = activation_count_operators(model, data) + counts += count + total_activations.append(sum(count.values())) + logger.info( + "(Million) Activations for Each Type of Operators:\n" + + str([(k, v / idx) for k, v in counts.items()]) + ) + logger.info( + "Total (Million) Activations: {}±{}".format( + np.mean(total_activations), np.std(total_activations) + ) + ) + + +def do_parameter(cfg): + model = build_model(cfg) + logger.info("Parameter Count:\n" + parameter_count_table(model, max_depth=5)) + + +def do_structure(cfg): + model = build_model(cfg) + logger.info("Model Structure:\n" + str(model)) + + +if __name__ == "__main__": + parser = default_argument_parser( + epilog=""" +Examples: + +To show parameters of a model: +$ ./analyze_model.py --tasks parameter \\ + --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml + +Flops and activations are data-dependent, therefore inputs and model weights +are needed to count them: + +$ ./analyze_model.py --num-inputs 100 --tasks flop \\ + --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \\ + MODEL.WEIGHTS /path/to/model.pkl +""" + ) + parser.add_argument( + "--tasks", + choices=["flop", "activation", "parameter", "structure"], + required=True, + nargs="+", + ) + parser.add_argument( + "--num-inputs", + default=100, + type=int, + help="number of inputs used to compute statistics for flops/activations, " + "both are data dependent.", + ) + args = parser.parse_args() + assert not args.eval_only + assert args.num_gpus == 1 + + cfg = setup(args) + + for task in args.tasks: + { + "flop": do_flop, + "activation": do_activation, + "parameter": do_parameter, + "structure": do_structure, + }[task](cfg) diff --git a/tools/benchmark.py b/tools/benchmark.py new file mode 100644 index 0000000..5f5d6ba --- /dev/null +++ b/tools/benchmark.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +""" +A script to benchmark builtin models. + +Note: this script has an extra dependency of psutil. +""" + +import itertools +import logging +import psutil +import torch +import tqdm +from fvcore.common.timer import Timer +from torch.nn.parallel import DistributedDataParallel + +from detectron2.checkpoint import DetectionCheckpointer +from detectron2.config import get_cfg +from detectron2.data import ( + DatasetFromList, + build_detection_test_loader, + build_detection_train_loader, +) +from detectron2.engine import SimpleTrainer, default_argument_parser, hooks, launch +from detectron2.modeling import build_model +from detectron2.solver import build_optimizer +from detectron2.utils import comm +from detectron2.utils.events import CommonMetricPrinter +from detectron2.utils.logger import setup_logger + +logger = logging.getLogger("detectron2") + + +def setup(args): + cfg = get_cfg() + cfg.merge_from_file(args.config_file) + cfg.SOLVER.BASE_LR = 0.001 # Avoid NaNs. Not useful in this script anyway. + cfg.merge_from_list(args.opts) + cfg.freeze() + setup_logger(distributed_rank=comm.get_rank()) + return cfg + + +def benchmark_data(args): + cfg = setup(args) + + timer = Timer() + dataloader = build_detection_train_loader(cfg) + logger.info("Initialize loader using {} seconds.".format(timer.seconds())) + + timer.reset() + itr = iter(dataloader) + for i in range(10): # warmup + next(itr) + if i == 0: + startup_time = timer.seconds() + timer = Timer() + max_iter = 1000 + for _ in tqdm.trange(max_iter): + next(itr) + logger.info( + "{} iters ({} images) in {} seconds.".format( + max_iter, max_iter * cfg.SOLVER.IMS_PER_BATCH, timer.seconds() + ) + ) + logger.info("Startup time: {} seconds".format(startup_time)) + vram = psutil.virtual_memory() + logger.info( + "RAM Usage: {:.2f}/{:.2f} GB".format( + (vram.total - vram.available) / 1024 ** 3, vram.total / 1024 ** 3 + ) + ) + + # test for a few more rounds + for _ in range(10): + timer = Timer() + max_iter = 1000 + for _ in tqdm.trange(max_iter): + next(itr) + logger.info( + "{} iters ({} images) in {} seconds.".format( + max_iter, max_iter * cfg.SOLVER.IMS_PER_BATCH, timer.seconds() + ) + ) + + +def benchmark_train(args): + cfg = setup(args) + model = build_model(cfg) + logger.info("Model:\n{}".format(model)) + if comm.get_world_size() > 1: + model = DistributedDataParallel( + model, device_ids=[comm.get_local_rank()], broadcast_buffers=False + ) + optimizer = build_optimizer(cfg, model) + checkpointer = DetectionCheckpointer(model, optimizer=optimizer) + checkpointer.load(cfg.MODEL.WEIGHTS) + + cfg.defrost() + cfg.DATALOADER.NUM_WORKERS = 0 + data_loader = build_detection_train_loader(cfg) + dummy_data = list(itertools.islice(data_loader, 100)) + + def f(): + data = DatasetFromList(dummy_data, copy=False) + while True: + yield from data + + max_iter = 400 + trainer = SimpleTrainer(model, f(), optimizer) + trainer.register_hooks( + [hooks.IterationTimer(), hooks.PeriodicWriter([CommonMetricPrinter(max_iter)])] + ) + trainer.train(1, max_iter) + + +@torch.no_grad() +def benchmark_eval(args): + cfg = setup(args) + model = build_model(cfg) + model.eval() + logger.info("Model:\n{}".format(model)) + DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) + + cfg.defrost() + cfg.DATALOADER.NUM_WORKERS = 0 + data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) + dummy_data = DatasetFromList(list(itertools.islice(data_loader, 100)), copy=False) + + def f(): + while True: + yield from dummy_data + + for k in range(5): # warmup + model(dummy_data[k]) + + max_iter = 300 + timer = Timer() + with tqdm.tqdm(total=max_iter) as pbar: + for idx, d in enumerate(f()): + if idx == max_iter: + break + model(d) + pbar.update() + logger.info("{} iters in {} seconds.".format(max_iter, timer.seconds())) + + +if __name__ == "__main__": + parser = default_argument_parser() + parser.add_argument("--task", choices=["train", "eval", "data"], required=True) + args = parser.parse_args() + assert not args.eval_only + + if args.task == "data": + f = benchmark_data + elif args.task == "train": + """ + Note: training speed may not be representative. + The training cost of a R-CNN model varies with the content of the data + and the quality of the model. + """ + f = benchmark_train + elif args.task == "eval": + f = benchmark_eval + # only benchmark single-GPU inference. + assert args.num_gpus == 1 and args.num_machines == 1 + launch(f, args.num_gpus, args.num_machines, args.machine_rank, args.dist_url, args=(args,)) diff --git a/tools/convert-torchvision-to-d2.py b/tools/convert-torchvision-to-d2.py new file mode 100644 index 0000000..18a24e4 --- /dev/null +++ b/tools/convert-torchvision-to-d2.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import pickle as pkl +import sys +import torch + +""" +Usage: + # download one of the ResNet{18,34,50,101,152} models from torchvision: + wget https://download.pytorch.org/models/resnet50-19c8e357.pth -O r50.pth + # run the conversion + ./convert-torchvision-to-d2.py r50.pth r50.pkl + + # Then, use r50.pkl with the following changes in config: + +MODEL: + WEIGHTS: "/path/to/r50.pkl" + PIXEL_MEAN: [123.675, 116.280, 103.530] + PIXEL_STD: [58.395, 57.120, 57.375] + RESNETS: + DEPTH: 50 + STRIDE_IN_1X1: False +INPUT: + FORMAT: "RGB" + + These models typically produce slightly worse results than the + pre-trained ResNets we use in official configs, which are the + original ResNet models released by MSRA. +""" + +if __name__ == "__main__": + input = sys.argv[1] + + obj = torch.load(input, map_location="cpu") + + newmodel = {} + for k in list(obj.keys()): + old_k = k + if "layer" not in k: + k = "stem." + k + for t in [1, 2, 3, 4]: + k = k.replace("layer{}".format(t), "res{}".format(t + 1)) + for t in [1, 2, 3]: + k = k.replace("bn{}".format(t), "conv{}.norm".format(t)) + k = k.replace("downsample.0", "shortcut") + k = k.replace("downsample.1", "shortcut.norm") + print(old_k, "->", k) + newmodel[k] = obj.pop(old_k).detach().numpy() + + res = {"model": newmodel, "__author__": "torchvision", "matching_heuristics": True} + + with open(sys.argv[2], "wb") as f: + pkl.dump(res, f) + if obj: + print("Unconverted keys:", obj.keys()) diff --git a/tools/deploy/CMakeLists.txt b/tools/deploy/CMakeLists.txt new file mode 100644 index 0000000..c989eb2 --- /dev/null +++ b/tools/deploy/CMakeLists.txt @@ -0,0 +1,21 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# See https://pytorch.org/tutorials/advanced/cpp_frontend.html +cmake_minimum_required(VERSION 3.0 FATAL_ERROR) +project(caffe2_mask_rcnn) + +find_package(Torch REQUIRED) +find_package(gflags REQUIRED) +find_package(OpenCV REQUIRED) + +add_executable(caffe2_mask_rcnn caffe2_mask_rcnn.cpp) +target_link_libraries( + caffe2_mask_rcnn + "${TORCH_LIBRARIES}" gflags glog protobuf ${OpenCV_LIBS}) +set_property(TARGET caffe2_mask_rcnn PROPERTY CXX_STANDARD 14) + + +add_executable(torchscript_traced_mask_rcnn torchscript_traced_mask_rcnn.cpp) +target_link_libraries( + torchscript_traced_mask_rcnn + "${TORCH_LIBRARIES}" ${OpenCV_LIBS}) +set_property(TARGET torchscript_traced_mask_rcnn PROPERTY CXX_STANDARD 14) diff --git a/tools/deploy/README.md b/tools/deploy/README.md new file mode 100644 index 0000000..b9d5b15 --- /dev/null +++ b/tools/deploy/README.md @@ -0,0 +1,9 @@ + +This directory contains: + +1. A script that converts a detectron2 model to caffe2 format. + +2. An example that loads a Mask R-CNN model in caffe2 format and runs inference. + +See [tutorial](https://detectron2.readthedocs.io/tutorials/deployment.html) +for their usage. diff --git a/tools/deploy/caffe2_converter.py b/tools/deploy/caffe2_converter.py new file mode 100644 index 0000000..6665f55 --- /dev/null +++ b/tools/deploy/caffe2_converter.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import argparse +import os +import onnx + +from detectron2.checkpoint import DetectionCheckpointer +from detectron2.config import get_cfg +from detectron2.data import build_detection_test_loader +from detectron2.evaluation import COCOEvaluator, inference_on_dataset, print_csv_format +from detectron2.export import Caffe2Tracer, add_export_config +from detectron2.modeling import build_model +from detectron2.utils.env import TORCH_VERSION +from detectron2.utils.logger import setup_logger + + +def setup_cfg(args): + cfg = get_cfg() + # cuda context is initialized before creating dataloader, so we don't fork anymore + cfg.DATALOADER.NUM_WORKERS = 0 + cfg = add_export_config(cfg) + cfg.merge_from_file(args.config_file) + cfg.merge_from_list(args.opts) + cfg.freeze() + if cfg.MODEL.DEVICE != "cpu": + assert TORCH_VERSION >= (1, 5), "PyTorch>=1.5 required for GPU conversion!" + return cfg + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Convert a model using caffe2 tracing.") + parser.add_argument( + "--format", + choices=["caffe2", "onnx", "torchscript"], + help="output format", + default="caffe2", + ) + parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file") + parser.add_argument("--run-eval", action="store_true") + parser.add_argument("--output", help="output directory for the converted model") + parser.add_argument( + "opts", + help="Modify config options using the command-line", + default=None, + nargs=argparse.REMAINDER, + ) + args = parser.parse_args() + logger = setup_logger() + logger.info("Command line arguments: " + str(args)) + os.makedirs(args.output, exist_ok=True) + + cfg = setup_cfg(args) + + # create a torch model + torch_model = build_model(cfg) + DetectionCheckpointer(torch_model).resume_or_load(cfg.MODEL.WEIGHTS) + + # get a sample data + data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) + first_batch = next(iter(data_loader)) + + # convert and save caffe2 model + tracer = Caffe2Tracer(cfg, torch_model, first_batch) + if args.format == "caffe2": + caffe2_model = tracer.export_caffe2() + caffe2_model.save_protobuf(args.output) + # draw the caffe2 graph + caffe2_model.save_graph(os.path.join(args.output, "model.svg"), inputs=first_batch) + elif args.format == "onnx": + onnx_model = tracer.export_onnx() + onnx.save(onnx_model, os.path.join(args.output, "model.onnx")) + elif args.format == "torchscript": + script_model = tracer.export_torchscript() + script_model.save(os.path.join(args.output, "model.ts")) + + # Recursively print IR of all modules + with open(os.path.join(args.output, "model_ts_IR.txt"), "w") as f: + try: + f.write(script_model._actual_script_module._c.dump_to_str(True, False, False)) + except AttributeError: + pass + # Print IR of the entire graph (all submodules inlined) + with open(os.path.join(args.output, "model_ts_IR_inlined.txt"), "w") as f: + f.write(str(script_model.inlined_graph)) + # Print the model structure in pytorch style + with open(os.path.join(args.output, "model.txt"), "w") as f: + f.write(str(script_model)) + + # run evaluation with the converted model + if args.run_eval: + assert args.format == "caffe2", "Python inference in other format is not yet supported." + dataset = cfg.DATASETS.TEST[0] + data_loader = build_detection_test_loader(cfg, dataset) + # NOTE: hard-coded evaluator. change to the evaluator for your dataset + evaluator = COCOEvaluator(dataset, cfg, True, args.output) + metrics = inference_on_dataset(caffe2_model, data_loader, evaluator) + print_csv_format(metrics) diff --git a/tools/deploy/caffe2_mask_rcnn.cpp b/tools/deploy/caffe2_mask_rcnn.cpp new file mode 100644 index 0000000..44370b4 --- /dev/null +++ b/tools/deploy/caffe2_mask_rcnn.cpp @@ -0,0 +1,119 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +C10_DEFINE_string(predict_net, "", "path to model.pb"); +C10_DEFINE_string(init_net, "", "path to model_init.pb"); +C10_DEFINE_string(input, "", "path to input image"); + +using namespace std; +using namespace caffe2; + +int main(int argc, char** argv) { + caffe2::GlobalInit(&argc, &argv); + string predictNetPath = FLAGS_predict_net; + string initNetPath = FLAGS_init_net; + cv::Mat input = cv::imread(FLAGS_input, cv::IMREAD_COLOR); + + const int height = input.rows; + const int width = input.cols; + // FPN models require divisibility of 32 + assert(height % 32 == 0 && width % 32 == 0); + const int batch = 1; + const int channels = 3; + + // initialize Net and Workspace + caffe2::NetDef initNet_, predictNet_; + CAFFE_ENFORCE(ReadProtoFromFile(initNetPath, &initNet_)); + CAFFE_ENFORCE(ReadProtoFromFile(predictNetPath, &predictNet_)); + + Workspace workSpace; + for (auto& str : predictNet_.external_input()) { + workSpace.CreateBlob(str); + } + CAFFE_ENFORCE(workSpace.CreateNet(predictNet_)); + CAFFE_ENFORCE(workSpace.RunNetOnce(initNet_)); + + // setup inputs + auto data = BlobGetMutableTensor(workSpace.GetBlob("data"), caffe2::CPU); + data->Resize(batch, channels, height, width); + float* ptr = data->mutable_data(); + // HWC to CHW + for (int c = 0; c < 3; ++c) { + for (int i = 0; i < height * width; ++i) { + ptr[c * height * width + i] = static_cast(input.data[3 * i + c]); + } + } + + auto im_info = + BlobGetMutableTensor(workSpace.GetBlob("im_info"), caffe2::CPU); + im_info->Resize(batch, 3); + float* im_info_ptr = im_info->mutable_data(); + im_info_ptr[0] = height; + im_info_ptr[1] = width; + im_info_ptr[2] = 1.0; + + // run the network + CAFFE_ENFORCE(workSpace.RunNet(predictNet_.name())); + + // run 3 more times to benchmark + int N_benchmark = 3; + auto start_time = chrono::high_resolution_clock::now(); + for (int i = 0; i < N_benchmark; ++i) { + CAFFE_ENFORCE(workSpace.RunNet(predictNet_.name())); + } + auto end_time = chrono::high_resolution_clock::now(); + auto ms = chrono::duration_cast(end_time - start_time) + .count(); + cout << "Latency (should vary with different inputs): " + << ms * 1.0 / 1e6 / N_benchmark << " seconds" << endl; + + // parse Mask R-CNN outputs + caffe2::Tensor bbox( + workSpace.GetBlob("bbox_nms")->Get(), caffe2::CPU); + caffe2::Tensor scores( + workSpace.GetBlob("score_nms")->Get(), caffe2::CPU); + caffe2::Tensor labels( + workSpace.GetBlob("class_nms")->Get(), caffe2::CPU); + caffe2::Tensor mask_probs( + workSpace.GetBlob("mask_fcn_probs")->Get(), caffe2::CPU); + cout << "bbox:" << bbox.DebugString() << endl; + cout << "scores:" << scores.DebugString() << endl; + cout << "labels:" << labels.DebugString() << endl; + cout << "mask_probs: " << mask_probs.DebugString() << endl; + + int num_instances = bbox.sizes()[0]; + for (int i = 0; i < num_instances; ++i) { + float score = scores.data()[i]; + if (score < 0.6) + continue; // skip them + + const float* box = bbox.data() + i * 4; + int label = labels.data()[i]; + + cout << "Prediction " << i << ", xyxy=("; + cout << box[0] << ", " << box[1] << ", " << box[2] << ", " << box[3] + << "); score=" << score << "; label=" << label << endl; + + const float* mask = mask_probs.data() + + i * mask_probs.size_from_dim(1) + label * mask_probs.size_from_dim(2); + + // save the 28x28 mask + cv::Mat cv_mask(28, 28, CV_32FC1); + memcpy(cv_mask.data, mask, 28 * 28 * sizeof(float)); + cv::imwrite("mask" + std::to_string(i) + ".png", cv_mask * 255.); + } + return 0; +} diff --git a/tools/deploy/torchscript_traced_mask_rcnn.cpp b/tools/deploy/torchscript_traced_mask_rcnn.cpp new file mode 100644 index 0000000..7e6df9d --- /dev/null +++ b/tools/deploy/torchscript_traced_mask_rcnn.cpp @@ -0,0 +1,76 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. + +#include +#include +#include + +#include +#include +#include + +using namespace std; + +// experimental. don't use +int main(int argc, const char* argv[]) { + if (argc != 3) { + return 1; + } + std::string image_file = argv[2]; + + torch::autograd::AutoGradMode guard(false); + auto module = torch::jit::load(argv[1]); + + assert(module.buffers().size() > 0); + // Assume that the entire model is on the same device. + // We just put input to this device. + auto device = (*begin(module.buffers())).device(); + + cv::Mat input_img = cv::imread(image_file, cv::IMREAD_COLOR); + const int height = input_img.rows; + const int width = input_img.cols; + // FPN models require divisibility of 32 + assert(height % 32 == 0 && width % 32 == 0); + const int channels = 3; + + auto input = torch::from_blob( + input_img.data, {1, height, width, channels}, torch::kUInt8); + // NHWC to NCHW + input = input.to(device, torch::kFloat).permute({0, 3, 1, 2}).contiguous(); + + std::array im_info_data{height * 1.0f, width * 1.0f, 1.0f}; + auto im_info = torch::from_blob(im_info_data.data(), {1, 3}).to(device); + + // run the network + auto output = module.forward({std::make_tuple(input, im_info)}); + if (device.is_cuda()) + c10::cuda::getCurrentCUDAStream().synchronize(); + + // run 3 more times to benchmark + int N_benchmark = 3; + auto start_time = chrono::high_resolution_clock::now(); + for (int i = 0; i < N_benchmark; ++i) { + output = module.forward({std::make_tuple(input, im_info)}); + if (device.is_cuda()) + c10::cuda::getCurrentCUDAStream().synchronize(); + } + auto end_time = chrono::high_resolution_clock::now(); + auto ms = chrono::duration_cast(end_time - start_time) + .count(); + cout << "Latency (should vary with different inputs): " + << ms * 1.0 / 1e6 / N_benchmark << " seconds" << endl; + + auto outputs = output.toTuple()->elements(); + // parse Mask R-CNN outputs + auto bbox = outputs[0].toTensor(), scores = outputs[1].toTensor(), + labels = outputs[2].toTensor(), mask_probs = outputs[3].toTensor(); + + cout << "bbox: " << bbox.toString() << " " << bbox.sizes() << endl; + cout << "scores: " << scores.toString() << " " << scores.sizes() << endl; + cout << "labels: " << labels.toString() << " " << labels.sizes() << endl; + cout << "mask_probs: " << mask_probs.toString() << " " << mask_probs.sizes() + << endl; + + int num_instances = bbox.sizes()[0]; + cout << bbox << endl; + return 0; +} diff --git a/tools/plain_train_net.py b/tools/plain_train_net.py new file mode 100644 index 0000000..af07c25 --- /dev/null +++ b/tools/plain_train_net.py @@ -0,0 +1,238 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +""" +Detectron2 training script with a plain training loop. + +This script reads a given config file and runs the training or evaluation. +It is an entry point that is able to train standard models in detectron2. + +In order to let one script support training of many models, +this script contains logic that are specific to these built-in models and therefore +may not be suitable for your own project. +For example, your research project perhaps only needs a single "evaluator". + +Therefore, we recommend you to use detectron2 as a library and take +this file as an example of how to use the library. +You may want to write your own script with your datasets and other customizations. + +Compared to "train_net.py", this script supports fewer default features. +It also includes fewer abstraction, therefore is easier to add custom logic. +""" + +import logging +import os +from collections import OrderedDict +import torch +from torch.nn.parallel import DistributedDataParallel + +import detectron2.utils.comm as comm +from detectron2.checkpoint import DetectionCheckpointer, PeriodicCheckpointer +from detectron2.config import get_cfg +from detectron2.data import ( + MetadataCatalog, + build_detection_test_loader, + build_detection_train_loader, +) +from detectron2.engine import default_argument_parser, default_setup, launch +from detectron2.evaluation import ( + CityscapesInstanceEvaluator, + CityscapesSemSegEvaluator, + COCOEvaluator, + COCOPanopticEvaluator, + DatasetEvaluators, + LVISEvaluator, + PascalVOCDetectionEvaluator, + SemSegEvaluator, + inference_on_dataset, + print_csv_format, +) +from detectron2.modeling import build_model +from detectron2.solver import build_lr_scheduler, build_optimizer +from detectron2.utils.events import ( + CommonMetricPrinter, + EventStorage, + JSONWriter, + TensorboardXWriter, +) + +logger = logging.getLogger("detectron2") + + +def get_evaluator(cfg, dataset_name, output_folder=None): + """ + Create evaluator(s) for a given dataset. + This uses the special metadata "evaluator_type" associated with each builtin dataset. + For your own dataset, you can simply create an evaluator manually in your + script and do not have to worry about the hacky if-else logic here. + """ + if output_folder is None: + output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") + evaluator_list = [] + evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type + if evaluator_type in ["sem_seg", "coco_panoptic_seg"]: + evaluator_list.append( + SemSegEvaluator( + dataset_name, + distributed=True, + num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES, + ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, + output_dir=output_folder, + ) + ) + if evaluator_type in ["coco", "coco_panoptic_seg"]: + evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder)) + if evaluator_type == "coco_panoptic_seg": + evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder)) + if evaluator_type == "cityscapes_instance": + assert ( + torch.cuda.device_count() >= comm.get_rank() + ), "CityscapesEvaluator currently do not work with multiple machines." + return CityscapesInstanceEvaluator(dataset_name) + if evaluator_type == "cityscapes_sem_seg": + assert ( + torch.cuda.device_count() >= comm.get_rank() + ), "CityscapesEvaluator currently do not work with multiple machines." + return CityscapesSemSegEvaluator(dataset_name) + if evaluator_type == "pascal_voc": + return PascalVOCDetectionEvaluator(dataset_name) + if evaluator_type == "lvis": + return LVISEvaluator(dataset_name, cfg, True, output_folder) + if len(evaluator_list) == 0: + raise NotImplementedError( + "no Evaluator for the dataset {} with the type {}".format(dataset_name, evaluator_type) + ) + if len(evaluator_list) == 1: + return evaluator_list[0] + return DatasetEvaluators(evaluator_list) + + +def do_test(cfg, model): + results = OrderedDict() + for dataset_name in cfg.DATASETS.TEST: + data_loader = build_detection_test_loader(cfg, dataset_name) + evaluator = get_evaluator( + cfg, dataset_name, os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) + ) + results_i = inference_on_dataset(model, data_loader, evaluator) + results[dataset_name] = results_i + if comm.is_main_process(): + logger.info("Evaluation results for {} in csv format:".format(dataset_name)) + print_csv_format(results_i) + if len(results) == 1: + results = list(results.values())[0] + return results + + +def do_train(cfg, model, resume=False): + model.train() + optimizer = build_optimizer(cfg, model) + scheduler = build_lr_scheduler(cfg, optimizer) + + checkpointer = DetectionCheckpointer( + model, cfg.OUTPUT_DIR, optimizer=optimizer, scheduler=scheduler + ) + start_iter = ( + checkpointer.resume_or_load(cfg.MODEL.WEIGHTS, resume=resume).get("iteration", -1) + 1 + ) + max_iter = cfg.SOLVER.MAX_ITER + + periodic_checkpointer = PeriodicCheckpointer( + checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD, max_iter=max_iter + ) + + writers = ( + [ + CommonMetricPrinter(max_iter), + JSONWriter(os.path.join(cfg.OUTPUT_DIR, "metrics.json")), + TensorboardXWriter(cfg.OUTPUT_DIR), + ] + if comm.is_main_process() + else [] + ) + + # compared to "train_net.py", we do not support accurate timing and + # precise BN here, because they are not trivial to implement in a small training loop + data_loader = build_detection_train_loader(cfg) + logger.info("Starting training from iteration {}".format(start_iter)) + with EventStorage(start_iter) as storage: + for data, iteration in zip(data_loader, range(start_iter, max_iter)): + storage.iter = iteration + + loss_dict = model(data) + losses = sum(loss_dict.values()) + assert torch.isfinite(losses).all(), loss_dict + + loss_dict_reduced = {k: v.item() for k, v in comm.reduce_dict(loss_dict).items()} + losses_reduced = sum(loss for loss in loss_dict_reduced.values()) + if comm.is_main_process(): + storage.put_scalars(total_loss=losses_reduced, **loss_dict_reduced) + + optimizer.zero_grad() + losses.backward() + optimizer.step() + storage.put_scalar("lr", optimizer.param_groups[0]["lr"], smoothing_hint=False) + scheduler.step() + + if ( + cfg.TEST.EVAL_PERIOD > 0 + and (iteration + 1) % cfg.TEST.EVAL_PERIOD == 0 + and iteration != max_iter - 1 + ): + do_test(cfg, model) + # Compared to "train_net.py", the test results are not dumped to EventStorage + comm.synchronize() + + if iteration - start_iter > 5 and ( + (iteration + 1) % 20 == 0 or iteration == max_iter - 1 + ): + for writer in writers: + writer.write() + periodic_checkpointer.step(iteration) + + +def setup(args): + """ + Create configs and perform basic setups. + """ + cfg = get_cfg() + cfg.merge_from_file(args.config_file) + cfg.merge_from_list(args.opts) + cfg.freeze() + default_setup( + cfg, args + ) # if you don't like any of the default setup, write your own setup code + return cfg + + +def main(args): + cfg = setup(args) + + model = build_model(cfg) + logger.info("Model:\n{}".format(model)) + if args.eval_only: + DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( + cfg.MODEL.WEIGHTS, resume=args.resume + ) + return do_test(cfg, model) + + distributed = comm.get_world_size() > 1 + if distributed: + model = DistributedDataParallel( + model, device_ids=[comm.get_local_rank()], broadcast_buffers=False + ) + + do_train(cfg, model, resume=args.resume) + return do_test(cfg, model) + + +if __name__ == "__main__": + args = default_argument_parser().parse_args() + print("Command Line Args:", args) + launch( + main, + args.num_gpus, + num_machines=args.num_machines, + machine_rank=args.machine_rank, + dist_url=args.dist_url, + args=(args,), + ) diff --git a/tools/plot_energy.py b/tools/plot_energy.py new file mode 100644 index 0000000..a2b755d --- /dev/null +++ b/tools/plot_energy.py @@ -0,0 +1,33 @@ +import os +import torch +import pickle + +source_dir = '/home/fk1/workspace/OWOD/output/logits' + +files = os.listdir(source_dir) +unk = [] +known = [] +for file in files: + path = os.path.join(source_dir, file) + logits, classes = torch.load(path) + lse = torch.logsumexp(logits[:,:-2], dim=1) + + for i, cls in enumerate(classes): + if cls == 21: + continue + if cls == 20: + unk.append(lse[i].detach().cpu().tolist()) + else: + known.append(lse[i].detach().cpu().tolist()) + +print(known) +print('\n\n') +print(unk) + +# dir = '/home/fk1/workspace/OWOD/output' +# +# with open(os.path.join(dir, 'unk.pkl'), 'wb') as f: +# pickle.dump(unk, f) +# +# with open(os.path.join(dir, 'known.pkl'), 'wb') as f: +# pickle.dump(unk, f) diff --git a/tools/plot_tsne.py b/tools/plot_tsne.py new file mode 100644 index 0000000..38931d8 --- /dev/null +++ b/tools/plot_tsne.py @@ -0,0 +1,132 @@ +import os +import torch +import matplotlib.pyplot as plt + +from matplotlib.ticker import NullFormatter +from sklearn import manifold, datasets +from time import time +from collections import deque +import seaborn as sns + +import numpy as np + +from detectron2.utils.store import Store + + +def plot_tsne(X, label, total_num_classes): + n_components = 2 + (fig, subplots) = plt.subplots(1, 5, figsize=(15, 8)) + + perplexities = [5, 30, 50, 100, 150] + + for i, perplexity in enumerate(perplexities): + ax = subplots[i] + + t0 = time() + tsne = manifold.TSNE(n_components=n_components, init='random', + random_state=0, perplexity=perplexity) + Y = tsne.fit_transform(X) + t1 = time() + print("circles, perplexity=%d in %.2g sec" % (perplexity, t1 - t0)) + ax.set_title("Perplexity=%d" % perplexity) + # + # sc = ax.scatter(Y[:, 0], Y[:, 1], c=label, cmap="plasma") + sns.scatterplot(x=Y[:, 0], y=Y[:, 1], hue=label, ax=ax, legend='full', palette='colorblind') + ax.xaxis.set_major_formatter(NullFormatter()) + ax.yaxis.set_major_formatter(NullFormatter()) + ax.axis('tight') + # plt.legend(handles=sc.legend_elements()[0], labels=range(total_num_classes)) + + # plt.legend(handles=sc.legend_elements()[0], labels=['0', '1']) + + # plt.show() + + plt.savefig('tsne.png') + +def plot_tsne_indiv(X, label, total_num_classes): + n_components = 2 + + # perplexities = [5, 30, 50, 100, 150] + perplexities = list(range(10, 150, 10)) + + for i, perplexity in enumerate(perplexities): + __, ax = plt.subplots() + t0 = time() + tsne = manifold.TSNE(n_components=n_components, init='random', + random_state=0, perplexity=perplexity) + Y = tsne.fit_transform(X) + t1 = time() + print("circles, perplexity=%d in %.2g sec" % (perplexity, t1 - t0)) + # ax.set_title("Perplexity=%d" % perplexity) + # + # sc = ax.scatter(Y[:, 0], Y[:, 1], c=label, cmap="plasma") + + # palette = sns.color_palette(None, total_num_classes) + # palette = sns.color_palette("flare", as_cmap=True) + + flatui = ['#e6194b', '#3cb44b', '#ffe119', '#4363d8', '#f58231', \ + '#911eb4', '#46f0f0', '#f032e6', '#bcf60c', '#fabebe', \ + '#008080', '#e6beff', '#9a6324', '#fffac8', '#800000', \ + '#aaffc3', '#808000', '#ffd8b1', '#000075', '#808080', \ + '#3498db'] + # sns.set_palette(flatui) + sns.scatterplot(x=Y[:, 0], y=Y[:, 1], hue=label, ax=ax, legend='full', palette=flatui) + + # sns.scatterplot(x=Y[:, 0], y=Y[:, 1], hue=label, ax=ax, legend='full', palette='colorblind') + ax.xaxis.set_major_formatter(NullFormatter()) + ax.yaxis.set_major_formatter(NullFormatter()) + ax.axis('tight') + # plt.legend(handles=sc.legend_elements()[0], labels=range(total_num_classes)) + + # plt.legend(handles=sc.legend_elements()[0], labels=['0', '1']) + + # plt.show() + # plt.legend(fontsize='xx-large', ncol=2, handleheight=2.4, labelspacing=0.05) + plt.legend(ncol=6) + + plt.savefig('tsne_' + str(perplexity) + '.png') + plt.pause(0.0001) + plt.clf() + +maxlen_queue = 100 +total_num_classes = 22 + +queues = [deque(maxlen=maxlen_queue) for _ in range(total_num_classes)] + +source_dir = '/home/fk1/workspace/OWOD/output/features' + +# files = os.listdir(source_dir) +# for i, file in enumerate(files): +# path = os.path.join(source_dir, file) +# features, classes = torch.load(path) +# for f, c in zip(features, classes): +# if c == 80: +# c = 20 +# queues[c].append(f.detach().cpu().numpy()) +# elif c == 81: +# c = 21 +# queues[c].append(f.detach().cpu().numpy()) +# elif c <= total_num_classes: +# queues[c.detach().cpu().numpy()].append(f.detach().cpu().numpy()) +# if i%100 == 0: +# print('Processing ' + str(i)) +# # if i == 2: +# # break +# +# torch.save(queues, os.path.join(source_dir,'queues_tsne.pkl')) + +queues = torch.load(os.path.join(source_dir,'queues_tsne.pkl')) + +x = [] +y = [] +for i, queue in enumerate(queues): + if i == 20: + continue + if i == 21: + i = 20 + for item in queue: + x.append(item) + y.append(i) + +print('Going to plot') +plot_tsne_indiv(x, y, total_num_classes) diff --git a/tools/train_net.py b/tools/train_net.py new file mode 100644 index 0000000..c44e158 --- /dev/null +++ b/tools/train_net.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +""" +Detection Training Script. + +This scripts reads a given config file and runs the training or evaluation. +It is an entry point that is made to train standard models in detectron2. + +In order to let one script support training of many models, +this script contains logic that are specific to these built-in models and therefore +may not be suitable for your own project. +For example, your research project perhaps only needs a single "evaluator". + +Therefore, we recommend you to use detectron2 as an library and take +this file as an example of how to use the library. +You may want to write your own script with your datasets and other customizations. +""" + +import logging +import os +from collections import OrderedDict +import torch + +import detectron2.utils.comm as comm +from detectron2.checkpoint import DetectionCheckpointer +from detectron2.config import get_cfg +from detectron2.data import MetadataCatalog +from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch +from detectron2.evaluation import ( + CityscapesInstanceEvaluator, + CityscapesSemSegEvaluator, + COCOEvaluator, + COCOPanopticEvaluator, + DatasetEvaluators, + LVISEvaluator, + PascalVOCDetectionEvaluator, + SemSegEvaluator, + verify_results, +) +from detectron2.modeling import GeneralizedRCNNWithTTA + + +class Trainer(DefaultTrainer): + """ + We use the "DefaultTrainer" which contains pre-defined default logic for + standard training workflow. They may not work for you, especially if you + are working on a new research project. In that case you can write your + own training loop. You can use "tools/plain_train_net.py" as an example. + """ + + @classmethod + def build_evaluator(cls, cfg, dataset_name, output_folder=None): + """ + Create evaluator(s) for a given dataset. + This uses the special metadata "evaluator_type" associated with each builtin dataset. + For your own dataset, you can simply create an evaluator manually in your + script and do not have to worry about the hacky if-else logic here. + """ + if output_folder is None: + output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") + evaluator_list = [] + evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type + if evaluator_type in ["sem_seg", "coco_panoptic_seg"]: + evaluator_list.append( + SemSegEvaluator( + dataset_name, + distributed=True, + num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES, + ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, + output_dir=output_folder, + ) + ) + if evaluator_type in ["coco", "coco_panoptic_seg"]: + evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder)) + if evaluator_type == "coco_panoptic_seg": + evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder)) + if evaluator_type == "cityscapes_instance": + assert ( + torch.cuda.device_count() >= comm.get_rank() + ), "CityscapesEvaluator currently do not work with multiple machines." + return CityscapesInstanceEvaluator(dataset_name) + if evaluator_type == "cityscapes_sem_seg": + assert ( + torch.cuda.device_count() >= comm.get_rank() + ), "CityscapesEvaluator currently do not work with multiple machines." + return CityscapesSemSegEvaluator(dataset_name) + elif evaluator_type == "pascal_voc": + return PascalVOCDetectionEvaluator(dataset_name, cfg) + elif evaluator_type == "lvis": + return LVISEvaluator(dataset_name, cfg, True, output_folder) + if len(evaluator_list) == 0: + raise NotImplementedError( + "no Evaluator for the dataset {} with the type {}".format( + dataset_name, evaluator_type + ) + ) + elif len(evaluator_list) == 1: + return evaluator_list[0] + return DatasetEvaluators(evaluator_list) + + @classmethod + def test_with_TTA(cls, cfg, model): + logger = logging.getLogger("detectron2.trainer") + # In the end of training, run an evaluation with TTA + # Only support some R-CNN models. + logger.info("Running inference with test-time augmentation ...") + model = GeneralizedRCNNWithTTA(cfg, model) + evaluators = [ + cls.build_evaluator( + cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA") + ) + for name in cfg.DATASETS.TEST + ] + res = cls.test(cfg, model, evaluators) + res = OrderedDict({k + "_TTA": v for k, v in res.items()}) + return res + + +def setup(args): + """ + Create configs and perform basic setups. + """ + cfg = get_cfg() + cfg.merge_from_file(args.config_file) + cfg.merge_from_list(args.opts) + cfg.freeze() + default_setup(cfg, args) + return cfg + + +def main(args): + cfg = setup(args) + + if args.eval_only: + model = Trainer.build_model(cfg) + DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( + cfg.MODEL.WEIGHTS, resume=args.resume + ) + res = Trainer.test(cfg, model) + if cfg.TEST.AUG.ENABLED: + res.update(Trainer.test_with_TTA(cfg, model)) + if comm.is_main_process(): + verify_results(cfg, res) + return res + + """ + If you'd like to do anything fancier than the standard training logic, + consider writing your own training loop (see plain_train_net.py) or + subclassing the trainer. + """ + trainer = Trainer(cfg) + trainer.resume_or_load(resume=args.resume) + if cfg.TEST.AUG.ENABLED: + trainer.register_hooks( + [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))] + ) + return trainer.train() + + +if __name__ == "__main__": + args = default_argument_parser().parse_args() + print("Command Line Args:", args) + launch( + main, + args.num_gpus, + num_machines=args.num_machines, + machine_rank=args.machine_rank, + dist_url=args.dist_url, + args=(args,), + ) diff --git a/tools/tsne.py b/tools/tsne.py new file mode 100644 index 0000000..7ef5e17 --- /dev/null +++ b/tools/tsne.py @@ -0,0 +1,56 @@ +# Author: Narine Kokhlikyan +# License: BSD + +import matplotlib.pyplot as plt + +from matplotlib.ticker import NullFormatter +from sklearn import manifold, datasets +from time import time +import numpy as np + + +def plot_tsne(X, label): + n_components = 2 + (fig, subplots) = plt.subplots(1, 5, figsize=(15, 8)) + + perplexities = [5, 30, 50, 100, 150] + + for i, perplexity in enumerate(perplexities): + ax = subplots[i] + + t0 = time() + tsne = manifold.TSNE(n_components=n_components, init='random', + random_state=0, perplexity=perplexity) + Y = tsne.fit_transform(X) + t1 = time() + print("circles, perplexity=%d in %.2g sec" % (perplexity, t1 - t0)) + ax.set_title("Perplexity=%d" % perplexity) + sc = ax.scatter(Y[:, 0], Y[:, 1], c=label, cmap="Set1") + ax.xaxis.set_major_formatter(NullFormatter()) + ax.yaxis.set_major_formatter(NullFormatter()) + ax.axis('tight') + plt.legend(handles=sc.legend_elements()[0], labels=['0', '1']) + + # plt.show() + + plt.savefig('tsne.png') + + +n_samples = 300 +# X, y = datasets.make_circles(n_samples=n_samples, factor=.5, noise=.05) +# plot_tsne(X, y) + +num_samples_from_prior = 10 +num_tasks = 10 + +X = [] +color = [] +label = [] +for i in range(num_tasks): + for p in range(num_samples_from_prior): + prior = np.random.rand(1000) + X.append(prior) + color.append('C'+str(i)) + label.append(i%2) + +plot_tsne(np.array(X), np.array(color), label) diff --git a/tools/visualize_data.py b/tools/visualize_data.py new file mode 100644 index 0000000..b143b2d --- /dev/null +++ b/tools/visualize_data.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +import argparse +import os +from itertools import chain +import cv2 +import tqdm + +from detectron2.config import get_cfg +from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_train_loader +from detectron2.data import detection_utils as utils +from detectron2.data.build import filter_images_with_few_keypoints +from detectron2.utils.logger import setup_logger +from detectron2.utils.visualizer import Visualizer + + +def setup(args): + cfg = get_cfg() + if args.config_file: + cfg.merge_from_file(args.config_file) + cfg.merge_from_list(args.opts) + cfg.freeze() + return cfg + + +def parse_args(in_args=None): + parser = argparse.ArgumentParser(description="Visualize ground-truth data") + parser.add_argument( + "--source", + choices=["annotation", "dataloader"], + required=True, + help="visualize the annotations or the data loader (with pre-processing)", + ) + parser.add_argument("--config-file", metavar="FILE", help="path to config file") + parser.add_argument("--output-dir", default="./", help="path to output directory") + parser.add_argument("--show", action="store_true", help="show output in a window") + parser.add_argument( + "opts", + help="Modify config options using the command-line", + default=None, + nargs=argparse.REMAINDER, + ) + return parser.parse_args(in_args) + + +if __name__ == "__main__": + args = parse_args() + logger = setup_logger() + logger.info("Arguments: " + str(args)) + cfg = setup(args) + + dirname = args.output_dir + os.makedirs(dirname, exist_ok=True) + metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]) + + def output(vis, fname): + if args.show: + print(fname) + cv2.imshow("window", vis.get_image()[:, :, ::-1]) + cv2.waitKey() + else: + filepath = os.path.join(dirname, fname) + print("Saving to {} ...".format(filepath)) + vis.save(filepath) + + scale = 2.0 if args.show else 1.0 + if args.source == "dataloader": + train_data_loader = build_detection_train_loader(cfg) + for batch in train_data_loader: + for per_image in batch: + # Pytorch tensor is in (C, H, W) format + img = per_image["image"].permute(1, 2, 0).cpu().detach().numpy() + img = utils.convert_image_to_rgb(img, cfg.INPUT.FORMAT) + + visualizer = Visualizer(img, metadata=metadata, scale=scale) + target_fields = per_image["instances"].get_fields() + labels = [metadata.thing_classes[i] for i in target_fields["gt_classes"]] + vis = visualizer.overlay_instances( + labels=labels, + boxes=target_fields.get("gt_boxes", None), + masks=target_fields.get("gt_masks", None), + keypoints=target_fields.get("gt_keypoints", None), + ) + output(vis, str(per_image["image_id"]) + ".jpg") + else: + dicts = list(chain.from_iterable([DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN])) + if cfg.MODEL.KEYPOINT_ON: + dicts = filter_images_with_few_keypoints(dicts, 1) + for dic in tqdm.tqdm(dicts): + img = utils.read_image(dic["file_name"], "RGB") + visualizer = Visualizer(img, metadata=metadata, scale=scale) + vis = visualizer.draw_dataset_dict(dic) + output(vis, os.path.basename(dic["file_name"])) diff --git a/tools/visualize_json_results.py b/tools/visualize_json_results.py new file mode 100644 index 0000000..d11ecb9 --- /dev/null +++ b/tools/visualize_json_results.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import argparse +import json +import numpy as np +import os +from collections import defaultdict +import cv2 +import tqdm +from fvcore.common.file_io import PathManager + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.structures import Boxes, BoxMode, Instances +from detectron2.utils.logger import setup_logger +from detectron2.utils.visualizer import Visualizer + + +def create_instances(predictions, image_size): + ret = Instances(image_size) + + score = np.asarray([x["score"] for x in predictions]) + chosen = (score > args.conf_threshold).nonzero()[0] + score = score[chosen] + bbox = np.asarray([predictions[i]["bbox"] for i in chosen]).reshape(-1, 4) + bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) + + labels = np.asarray([dataset_id_map(predictions[i]["category_id"]) for i in chosen]) + + ret.scores = score + ret.pred_boxes = Boxes(bbox) + ret.pred_classes = labels + + try: + ret.pred_masks = [predictions[i]["segmentation"] for i in chosen] + except KeyError: + pass + return ret + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="A script that visualizes the json predictions from COCO or LVIS dataset." + ) + parser.add_argument("--input", required=True, help="JSON file produced by the model") + parser.add_argument("--output", required=True, help="output directory") + parser.add_argument("--dataset", help="name of the dataset", default="coco_2017_val") + parser.add_argument("--conf-threshold", default=0.5, type=float, help="confidence threshold") + args = parser.parse_args() + + logger = setup_logger() + + with PathManager.open(args.input, "r") as f: + predictions = json.load(f) + + pred_by_image = defaultdict(list) + for p in predictions: + pred_by_image[p["image_id"]].append(p) + + dicts = list(DatasetCatalog.get(args.dataset)) + metadata = MetadataCatalog.get(args.dataset) + if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): + + def dataset_id_map(ds_id): + return metadata.thing_dataset_id_to_contiguous_id[ds_id] + + elif "lvis" in args.dataset: + # LVIS results are in the same format as COCO results, but have a different + # mapping from dataset category id to contiguous category id in [0, #categories - 1] + def dataset_id_map(ds_id): + return ds_id - 1 + + else: + raise ValueError("Unsupported dataset: {}".format(args.dataset)) + + os.makedirs(args.output, exist_ok=True) + + for dic in tqdm.tqdm(dicts): + img = cv2.imread(dic["file_name"], cv2.IMREAD_COLOR)[:, :, ::-1] + basename = os.path.basename(dic["file_name"]) + + predictions = create_instances(pred_by_image[dic["image_id"]], img.shape[:2]) + vis = Visualizer(img, metadata) + vis_pred = vis.draw_instance_predictions(predictions).get_image() + + vis = Visualizer(img, metadata) + vis_gt = vis.draw_dataset_dict(dic).get_image() + + concat = np.concatenate((vis_pred, vis_gt), axis=1) + cv2.imwrite(os.path.join(args.output, basename), concat[:, :, ::-1])