From 28e2825941e3345d61ddf028402962726c4dbac1 Mon Sep 17 00:00:00 2001 From: RE-OWOD <95522332+RE-OWOD@users.noreply.github.com> Date: Tue, 4 Jan 2022 13:49:38 +0800 Subject: [PATCH] Add files via upload --- projects/DensePose/README.md | 53 + projects/DensePose/apply_net.py | 319 +++++ .../configs/Base-DensePose-RCNN-FPN.yaml | 48 + .../densepose_rcnn_HRFPN_HRNet_w32_s1x.yaml | 16 + .../densepose_rcnn_HRFPN_HRNet_w40_s1x.yaml | 23 + .../densepose_rcnn_HRFPN_HRNet_w48_s1x.yaml | 23 + .../densepose_rcnn_R_101_FPN_DL_WC1M_s1x.yaml | 18 + .../densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml | 16 + .../densepose_rcnn_R_101_FPN_DL_WC2M_s1x.yaml | 18 + .../densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml | 16 + .../densepose_rcnn_R_101_FPN_DL_s1x.yaml | 10 + .../densepose_rcnn_R_101_FPN_WC1M_s1x.yaml | 18 + .../densepose_rcnn_R_101_FPN_WC1_s1x.yaml | 16 + .../densepose_rcnn_R_101_FPN_WC2M_s1x.yaml | 18 + .../densepose_rcnn_R_101_FPN_WC2_s1x.yaml | 16 + .../configs/densepose_rcnn_R_101_FPN_s1x.yaml | 8 + .../densepose_rcnn_R_101_FPN_s1x_legacy.yaml | 17 + .../densepose_rcnn_R_50_FPN_DL_WC1M_s1x.yaml | 18 + .../densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml | 16 + .../densepose_rcnn_R_50_FPN_DL_WC2M_s1x.yaml | 18 + .../densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml | 16 + .../densepose_rcnn_R_50_FPN_DL_s1x.yaml | 10 + .../densepose_rcnn_R_50_FPN_WC1M_s1x.yaml | 20 + .../densepose_rcnn_R_50_FPN_WC1_s1x.yaml | 16 + .../densepose_rcnn_R_50_FPN_WC2M_s1x.yaml | 18 + .../densepose_rcnn_R_50_FPN_WC2_s1x.yaml | 16 + .../configs/densepose_rcnn_R_50_FPN_s1x.yaml | 8 + .../densepose_rcnn_R_50_FPN_s1x_legacy.yaml | 17 + .../configs/evolution/Base-RCNN-FPN-MC-B.yaml | 121 ++ .../configs/evolution/Base-RCNN-FPN-MC.yaml | 91 ++ .../densepose_R_101_FPN_1x_Atop10_toP.yaml | 19 + .../densepose_R_101_FPN_DL_1x_Atop10_toP.yaml | 19 + ...epose_R_101_FPN_DL_WC1M_1x_Atop10_toP.yaml | 29 + ...sepose_R_101_FPN_DL_WC1_1x_Atop10_toP.yaml | 27 + ...ensepose_R_101_FPN_WC1M_1x_Atop10_toP.yaml | 29 + ...densepose_R_101_FPN_WC1_1x_Atop10_toP.yaml | 27 + .../densepose_R_50_FPN_1x_Atop10_toP.yaml | 19 + .../densepose_R_50_FPN_DL_1x_Atop10_toP.yaml | 19 + ...sepose_R_50_FPN_DL_WC1M_1x_Atop10_toP.yaml | 29 + ...nsepose_R_50_FPN_DL_WC1_1x_Atop10_toP.yaml | 27 + ...densepose_R_50_FPN_WC1M_1x_Atop10_toP.yaml | 29 + ...nsepose_R_50_FPN_WC1M_1x_Atop10_toP_B.yaml | 30 + .../densepose_R_50_FPN_WC1_1x_Atop10_toP.yaml | 27 + .../evolution/faster_rcnn_R_50_FPN_1x_MC.yaml | 7 + ...cnn_HRFPN_HRNet_w32_training_acc_test.yaml | 7 + ...nsepose_rcnn_R_50_FPN_DL_instant_test.yaml | 11 + ..._rcnn_R_50_FPN_TTA_inference_acc_test.yaml | 13 + ...sepose_rcnn_R_50_FPN_WC1_instant_test.yaml | 19 + ...sepose_rcnn_R_50_FPN_WC2_instant_test.yaml | 19 + ...pose_rcnn_R_50_FPN_inference_acc_test.yaml | 8 + .../densepose_rcnn_R_50_FPN_instant_test.yaml | 9 + ...epose_rcnn_R_50_FPN_training_acc_test.yaml | 18 + projects/DensePose/densepose/config.py | 171 +++ projects/DensePose/densepose/data/__init__.py | 23 + projects/DensePose/densepose/data/build.py | 604 +++++++++ .../densepose/data/combined_loader.py | 44 + .../densepose/data/dataset_mapper.py | 168 +++ .../densepose/data/datasets/__init__.py | 5 + .../densepose/data/datasets/builtin.py | 13 + .../densepose/data/datasets/chimpnsee.py | 28 + .../DensePose/densepose/data/datasets/coco.py | 324 +++++ .../densepose/data/datasets/dataset_type.py | 11 + .../densepose/data/image_list_dataset.py | 53 + .../densepose/data/inference_based_loader.py | 146 +++ .../densepose/data/samplers/__init__.py | 6 + .../densepose/data/samplers/densepose_base.py | 190 +++ .../samplers/densepose_confidence_based.py | 91 ++ .../data/samplers/densepose_uniform.py | 41 + .../data/samplers/mask_from_densepose.py | 59 + .../data/samplers/prediction_to_gt.py | 80 ++ .../DensePose/densepose/data/structures.py | 703 ++++++++++ .../densepose/data/transform/__init__.py | 3 + .../densepose/data/transform/image.py | 37 + projects/DensePose/densepose/data/utils.py | 22 + .../densepose/data/video/__init__.py | 17 + .../densepose/data/video/frame_selector.py | 87 ++ .../data/video/video_keyframe_dataset.py | 232 ++++ .../densepose/densepose_coco_evaluation.py | 1157 +++++++++++++++++ .../DensePose/densepose/engine/__init__.py | 3 + .../DensePose/densepose/engine/trainer.py | 118 ++ projects/DensePose/densepose/evaluator.py | 224 ++++ .../DensePose/densepose/modeling/build.py | 66 + .../densepose/modeling/confidence.py | 73 ++ .../modeling/densepose_checkpoint.py | 35 + .../DensePose/densepose/modeling/filter.py | 94 ++ .../DensePose/densepose/modeling/hrfpn.py | 181 +++ .../DensePose/densepose/modeling/hrnet.py | 473 +++++++ .../DensePose/densepose/modeling/inference.py | 83 ++ .../densepose/modeling/losses/__init__.py | 3 + .../modeling/losses/densepose_losses.py | 729 +++++++++++ .../densepose/modeling/predictors/__init__.py | 5 + .../densepose/modeling/predictors/chart.py | 102 ++ .../modeling/predictors/chart_confidence.py | 176 +++ .../predictors/chart_with_confidence.py | 13 + .../densepose/modeling/roi_heads/deeplab.py | 263 ++++ .../densepose/modeling/roi_heads/registry.py | 5 + .../densepose/modeling/roi_heads/roi_head.py | 224 ++++ .../densepose/modeling/roi_heads/v1convx.py | 64 + projects/DensePose/query_db.py | 250 ++++ projects/DensePose/train_net.py | 74 ++ 100 files changed, 9052 insertions(+) create mode 100644 projects/DensePose/apply_net.py create mode 100644 projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml create mode 100644 projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w32_s1x.yaml create mode 100644 projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w40_s1x.yaml create mode 100644 projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w48_s1x.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1M_s1x.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2M_s1x.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1M_s1x.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2M_s1x.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1M_s1x.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2M_s1x.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1M_s1x.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2M_s1x.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml create mode 100644 projects/DensePose/configs/evolution/Base-RCNN-FPN-MC-B.yaml create mode 100644 projects/DensePose/configs/evolution/Base-RCNN-FPN-MC.yaml create mode 100644 projects/DensePose/configs/evolution/densepose_R_101_FPN_1x_Atop10_toP.yaml create mode 100644 projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_1x_Atop10_toP.yaml create mode 100644 projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_WC1M_1x_Atop10_toP.yaml create mode 100644 projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_WC1_1x_Atop10_toP.yaml create mode 100644 projects/DensePose/configs/evolution/densepose_R_101_FPN_WC1M_1x_Atop10_toP.yaml create mode 100644 projects/DensePose/configs/evolution/densepose_R_101_FPN_WC1_1x_Atop10_toP.yaml create mode 100644 projects/DensePose/configs/evolution/densepose_R_50_FPN_1x_Atop10_toP.yaml create mode 100644 projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_1x_Atop10_toP.yaml create mode 100644 projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_1x_Atop10_toP.yaml create mode 100644 projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1_1x_Atop10_toP.yaml create mode 100644 projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1M_1x_Atop10_toP.yaml create mode 100644 projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1M_1x_Atop10_toP_B.yaml create mode 100644 projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1_1x_Atop10_toP.yaml create mode 100644 projects/DensePose/configs/evolution/faster_rcnn_R_50_FPN_1x_MC.yaml create mode 100644 projects/DensePose/configs/quick_schedules/densepose_rcnn_HRFPN_HRNet_w32_training_acc_test.yaml create mode 100644 projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml create mode 100644 projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml create mode 100644 projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml create mode 100644 projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml create mode 100644 projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml create mode 100644 projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml create mode 100644 projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml create mode 100644 projects/DensePose/densepose/config.py create mode 100644 projects/DensePose/densepose/data/__init__.py create mode 100644 projects/DensePose/densepose/data/build.py create mode 100644 projects/DensePose/densepose/data/combined_loader.py create mode 100644 projects/DensePose/densepose/data/dataset_mapper.py create mode 100644 projects/DensePose/densepose/data/datasets/__init__.py create mode 100644 projects/DensePose/densepose/data/datasets/builtin.py create mode 100644 projects/DensePose/densepose/data/datasets/chimpnsee.py create mode 100644 projects/DensePose/densepose/data/datasets/coco.py create mode 100644 projects/DensePose/densepose/data/datasets/dataset_type.py create mode 100644 projects/DensePose/densepose/data/image_list_dataset.py create mode 100644 projects/DensePose/densepose/data/inference_based_loader.py create mode 100644 projects/DensePose/densepose/data/samplers/__init__.py create mode 100644 projects/DensePose/densepose/data/samplers/densepose_base.py create mode 100644 projects/DensePose/densepose/data/samplers/densepose_confidence_based.py create mode 100644 projects/DensePose/densepose/data/samplers/densepose_uniform.py create mode 100644 projects/DensePose/densepose/data/samplers/mask_from_densepose.py create mode 100644 projects/DensePose/densepose/data/samplers/prediction_to_gt.py create mode 100644 projects/DensePose/densepose/data/structures.py create mode 100644 projects/DensePose/densepose/data/transform/__init__.py create mode 100644 projects/DensePose/densepose/data/transform/image.py create mode 100644 projects/DensePose/densepose/data/utils.py create mode 100644 projects/DensePose/densepose/data/video/__init__.py create mode 100644 projects/DensePose/densepose/data/video/frame_selector.py create mode 100644 projects/DensePose/densepose/data/video/video_keyframe_dataset.py create mode 100644 projects/DensePose/densepose/densepose_coco_evaluation.py create mode 100644 projects/DensePose/densepose/engine/__init__.py create mode 100644 projects/DensePose/densepose/engine/trainer.py create mode 100644 projects/DensePose/densepose/evaluator.py create mode 100644 projects/DensePose/densepose/modeling/build.py create mode 100644 projects/DensePose/densepose/modeling/confidence.py create mode 100644 projects/DensePose/densepose/modeling/densepose_checkpoint.py create mode 100644 projects/DensePose/densepose/modeling/filter.py create mode 100644 projects/DensePose/densepose/modeling/hrfpn.py create mode 100644 projects/DensePose/densepose/modeling/hrnet.py create mode 100644 projects/DensePose/densepose/modeling/inference.py create mode 100644 projects/DensePose/densepose/modeling/losses/__init__.py create mode 100644 projects/DensePose/densepose/modeling/losses/densepose_losses.py create mode 100644 projects/DensePose/densepose/modeling/predictors/__init__.py create mode 100644 projects/DensePose/densepose/modeling/predictors/chart.py create mode 100644 projects/DensePose/densepose/modeling/predictors/chart_confidence.py create mode 100644 projects/DensePose/densepose/modeling/predictors/chart_with_confidence.py create mode 100644 projects/DensePose/densepose/modeling/roi_heads/deeplab.py create mode 100644 projects/DensePose/densepose/modeling/roi_heads/registry.py create mode 100644 projects/DensePose/densepose/modeling/roi_heads/roi_head.py create mode 100644 projects/DensePose/densepose/modeling/roi_heads/v1convx.py create mode 100644 projects/DensePose/query_db.py create mode 100644 projects/DensePose/train_net.py diff --git a/projects/DensePose/README.md b/projects/DensePose/README.md index 8b13789..fd2f1ee 100644 --- a/projects/DensePose/README.md +++ b/projects/DensePose/README.md @@ -1 +1,54 @@ +# DensePose in Detectron2 +**Dense Human Pose Estimation In The Wild** + +_Rıza Alp Güler, Natalia Neverova, Iasonas Kokkinos_ + +[[`densepose.org`](https://densepose.org)] [[`arXiv`](https://arxiv.org/abs/1802.00434)] [[`BibTeX`](#CitingDensePose)] + +Dense human pose estimation aims at mapping all human pixels of an RGB image to the 3D surface of the human body. + +
+ +
+ +In this repository, we provide the code to train and evaluate DensePose-RCNN. We also provide tools to visualize +DensePose annotation and results. + +# Quick Start + +See [ Getting Started ](doc/GETTING_STARTED.md) + +# Model Zoo and Baselines + +We provide a number of baseline results and trained models available for download. See [Model Zoo](doc/MODEL_ZOO.md) for details. + +# License + +Detectron2 is released under the [Apache 2.0 license](../../LICENSE) + +## Citing DensePose + +If you use DensePose, please take the references from the following BibTeX entries: + +For DensePose with estimated confidences: + +``` +@InProceedings{Neverova2019DensePoseConfidences, + title = {Correlated Uncertainty for Learning Dense Correspondences from Noisy Labels}, + author = {Neverova, Natalia and Novotny, David and Vedaldi, Andrea}, + journal = {Advances in Neural Information Processing Systems}, + year = {2019}, +} +``` + +For the original DensePose: + +``` +@InProceedings{Guler2018DensePose, + title={DensePose: Dense Human Pose Estimation In The Wild}, + author={R\{i}za Alp G\"uler, Natalia Neverova, Iasonas Kokkinos}, + journal={The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year={2018} +} +``` diff --git a/projects/DensePose/apply_net.py b/projects/DensePose/apply_net.py new file mode 100644 index 0000000..25ccc7d --- /dev/null +++ b/projects/DensePose/apply_net.py @@ -0,0 +1,319 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import argparse +import glob +import logging +import os +import pickle +import sys +from typing import Any, ClassVar, Dict, List +import torch + +from detectron2.config import get_cfg +from detectron2.data.detection_utils import read_image +from detectron2.engine.defaults import DefaultPredictor +from detectron2.structures.boxes import BoxMode +from detectron2.structures.instances import Instances +from detectron2.utils.logger import setup_logger + +from densepose import add_densepose_config, add_hrnet_config +from densepose.utils.logger import verbosity_to_level +from densepose.vis.base import CompoundVisualizer +from densepose.vis.bounding_box import ScoredBoundingBoxVisualizer +from densepose.vis.densepose import ( + DensePoseResultsContourVisualizer, + DensePoseResultsFineSegmentationVisualizer, + DensePoseResultsUVisualizer, + DensePoseResultsVVisualizer, +) +from densepose.vis.extractor import CompoundExtractor, create_extractor + +DOC = """Apply Net - a tool to print / visualize DensePose results +""" + +LOGGER_NAME = "apply_net" +logger = logging.getLogger(LOGGER_NAME) + +_ACTION_REGISTRY: Dict[str, "Action"] = {} + + +class Action(object): + @classmethod + def add_arguments(cls: type, parser: argparse.ArgumentParser): + parser.add_argument( + "-v", + "--verbosity", + action="count", + help="Verbose mode. Multiple -v options increase the verbosity.", + ) + + +def register_action(cls: type): + """ + Decorator for action classes to automate action registration + """ + global _ACTION_REGISTRY + _ACTION_REGISTRY[cls.COMMAND] = cls + return cls + + +class InferenceAction(Action): + @classmethod + def add_arguments(cls: type, parser: argparse.ArgumentParser): + super(InferenceAction, cls).add_arguments(parser) + parser.add_argument("cfg", metavar="", help="Config file") + parser.add_argument("model", metavar="", help="Model file") + parser.add_argument("input", metavar="", help="Input data") + parser.add_argument( + "--opts", + help="Modify config options using the command-line 'KEY VALUE' pairs", + default=[], + nargs=argparse.REMAINDER, + ) + + @classmethod + def execute(cls: type, args: argparse.Namespace): + logger.info(f"Loading config from {args.cfg}") + opts = [] + cfg = cls.setup_config(args.cfg, args.model, args, opts) + logger.info(f"Loading model from {args.model}") + predictor = DefaultPredictor(cfg) + logger.info(f"Loading data from {args.input}") + file_list = cls._get_input_file_list(args.input) + if len(file_list) == 0: + logger.warning(f"No input images for {args.input}") + return + context = cls.create_context(args) + for file_name in file_list: + img = read_image(file_name, format="BGR") # predictor expects BGR image. + with torch.no_grad(): + outputs = predictor(img)["instances"] + cls.execute_on_outputs(context, {"file_name": file_name, "image": img}, outputs) + cls.postexecute(context) + + @classmethod + def setup_config( + cls: type, config_fpath: str, model_fpath: str, args: argparse.Namespace, opts: List[str] + ): + cfg = get_cfg() + add_densepose_config(cfg) + add_hrnet_config(cfg) + cfg.merge_from_file(config_fpath) + cfg.merge_from_list(args.opts) + if opts: + cfg.merge_from_list(opts) + cfg.MODEL.WEIGHTS = model_fpath + cfg.freeze() + return cfg + + @classmethod + def _get_input_file_list(cls: type, input_spec: str): + if os.path.isdir(input_spec): + file_list = [ + os.path.join(input_spec, fname) + for fname in os.listdir(input_spec) + if os.path.isfile(os.path.join(input_spec, fname)) + ] + elif os.path.isfile(input_spec): + file_list = [input_spec] + else: + file_list = glob.glob(input_spec) + return file_list + + +@register_action +class DumpAction(InferenceAction): + """ + Dump action that outputs results to a pickle file + """ + + COMMAND: ClassVar[str] = "dump" + + @classmethod + def add_parser(cls: type, subparsers: argparse._SubParsersAction): + parser = subparsers.add_parser(cls.COMMAND, help="Dump model outputs to a file.") + cls.add_arguments(parser) + parser.set_defaults(func=cls.execute) + + @classmethod + def add_arguments(cls: type, parser: argparse.ArgumentParser): + super(DumpAction, cls).add_arguments(parser) + parser.add_argument( + "--output", + metavar="", + default="results.pkl", + help="File name to save dump to", + ) + + @classmethod + def execute_on_outputs( + cls: type, context: Dict[str, Any], entry: Dict[str, Any], outputs: Instances + ): + image_fpath = entry["file_name"] + logger.info(f"Processing {image_fpath}") + result = {"file_name": image_fpath} + if outputs.has("scores"): + result["scores"] = outputs.get("scores").cpu() + if outputs.has("pred_boxes"): + result["pred_boxes_XYXY"] = outputs.get("pred_boxes").tensor.cpu() + if outputs.has("pred_densepose"): + boxes_XYWH = BoxMode.convert( + result["pred_boxes_XYXY"], BoxMode.XYXY_ABS, BoxMode.XYWH_ABS + ) + result["pred_densepose"] = outputs.get("pred_densepose").to_result(boxes_XYWH) + context["results"].append(result) + + @classmethod + def create_context(cls: type, args: argparse.Namespace): + context = {"results": [], "out_fname": args.output} + return context + + @classmethod + def postexecute(cls: type, context: Dict[str, Any]): + out_fname = context["out_fname"] + out_dir = os.path.dirname(out_fname) + if len(out_dir) > 0 and not os.path.exists(out_dir): + os.makedirs(out_dir) + with open(out_fname, "wb") as hFile: + pickle.dump(context["results"], hFile) + logger.info(f"Output saved to {out_fname}") + + +@register_action +class ShowAction(InferenceAction): + """ + Show action that visualizes selected entries on an image + """ + + COMMAND: ClassVar[str] = "show" + VISUALIZERS: ClassVar[Dict[str, object]] = { + "dp_contour": DensePoseResultsContourVisualizer, + "dp_segm": DensePoseResultsFineSegmentationVisualizer, + "dp_u": DensePoseResultsUVisualizer, + "dp_v": DensePoseResultsVVisualizer, + "bbox": ScoredBoundingBoxVisualizer, + } + + @classmethod + def add_parser(cls: type, subparsers: argparse._SubParsersAction): + parser = subparsers.add_parser(cls.COMMAND, help="Visualize selected entries") + cls.add_arguments(parser) + parser.set_defaults(func=cls.execute) + + @classmethod + def add_arguments(cls: type, parser: argparse.ArgumentParser): + super(ShowAction, cls).add_arguments(parser) + parser.add_argument( + "visualizations", + metavar="", + help="Comma separated list of visualizations, possible values: " + "[{}]".format(",".join(sorted(cls.VISUALIZERS.keys()))), + ) + parser.add_argument( + "--min_score", + metavar="", + default=0.8, + type=float, + help="Minimum detection score to visualize", + ) + parser.add_argument( + "--nms_thresh", metavar="", default=None, type=float, help="NMS threshold" + ) + parser.add_argument( + "--output", + metavar="", + default="outputres.png", + help="File name to save output to", + ) + + @classmethod + def setup_config( + cls: type, config_fpath: str, model_fpath: str, args: argparse.Namespace, opts: List[str] + ): + opts.append("MODEL.ROI_HEADS.SCORE_THRESH_TEST") + opts.append(str(args.min_score)) + if args.nms_thresh is not None: + opts.append("MODEL.ROI_HEADS.NMS_THRESH_TEST") + opts.append(str(args.nms_thresh)) + cfg = super(ShowAction, cls).setup_config(config_fpath, model_fpath, args, opts) + return cfg + + @classmethod + def execute_on_outputs( + cls: type, context: Dict[str, Any], entry: Dict[str, Any], outputs: Instances + ): + import cv2 + import numpy as np + + visualizer = context["visualizer"] + extractor = context["extractor"] + image_fpath = entry["file_name"] + logger.info(f"Processing {image_fpath}") + image = cv2.cvtColor(entry["image"], cv2.COLOR_BGR2GRAY) + image = np.tile(image[:, :, np.newaxis], [1, 1, 3]) + data = extractor(outputs) + image_vis = visualizer.visualize(image, data) + entry_idx = context["entry_idx"] + 1 + out_fname = cls._get_out_fname(entry_idx, context["out_fname"]) + out_dir = os.path.dirname(out_fname) + if len(out_dir) > 0 and not os.path.exists(out_dir): + os.makedirs(out_dir) + cv2.imwrite(out_fname, image_vis) + logger.info(f"Output saved to {out_fname}") + context["entry_idx"] += 1 + + @classmethod + def postexecute(cls: type, context: Dict[str, Any]): + pass + + @classmethod + def _get_out_fname(cls: type, entry_idx: int, fname_base: str): + base, ext = os.path.splitext(fname_base) + return base + ".{0:04d}".format(entry_idx) + ext + + @classmethod + def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]: + vis_specs = args.visualizations.split(",") + visualizers = [] + extractors = [] + for vis_spec in vis_specs: + vis = cls.VISUALIZERS[vis_spec]() + visualizers.append(vis) + extractor = create_extractor(vis) + extractors.append(extractor) + visualizer = CompoundVisualizer(visualizers) + extractor = CompoundExtractor(extractors) + context = { + "extractor": extractor, + "visualizer": visualizer, + "out_fname": args.output, + "entry_idx": 0, + } + return context + + +def create_argument_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description=DOC, + formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=120), + ) + parser.set_defaults(func=lambda _: parser.print_help(sys.stdout)) + subparsers = parser.add_subparsers(title="Actions") + for _, action in _ACTION_REGISTRY.items(): + action.add_parser(subparsers) + return parser + + +def main(): + parser = create_argument_parser() + args = parser.parse_args() + verbosity = args.verbosity if hasattr(args, "verbosity") else None + global logger + logger = setup_logger(name=LOGGER_NAME) + logger.setLevel(verbosity_to_level(verbosity)) + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml b/projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml new file mode 100644 index 0000000..1579187 --- /dev/null +++ b/projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml @@ -0,0 +1,48 @@ +VERSION: 2 +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + BACKBONE: + NAME: "build_resnet_fpn_backbone" + RESNETS: + OUT_FEATURES: ["res2", "res3", "res4", "res5"] + FPN: + IN_FEATURES: ["res2", "res3", "res4", "res5"] + ANCHOR_GENERATOR: + SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map + ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) + RPN: + IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] + PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level + PRE_NMS_TOPK_TEST: 1000 # Per FPN level + # Detectron1 uses 2000 proposals per-batch, + # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) + # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. + POST_NMS_TOPK_TRAIN: 1000 + POST_NMS_TOPK_TEST: 1000 + + DENSEPOSE_ON: True + ROI_HEADS: + NAME: "DensePoseROIHeads" + IN_FEATURES: ["p2", "p3", "p4", "p5"] + NUM_CLASSES: 1 + ROI_BOX_HEAD: + NAME: "FastRCNNConvFCHead" + NUM_FC: 2 + POOLER_RESOLUTION: 7 + POOLER_SAMPLING_RATIO: 2 + POOLER_TYPE: "ROIAlign" + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseV1ConvXHead" + POOLER_TYPE: "ROIAlign" + NUM_COARSE_SEGM_CHANNELS: 2 +DATASETS: + TRAIN: ("densepose_coco_2014_train", "densepose_coco_2014_valminusminival") + TEST: ("densepose_coco_2014_minival",) +SOLVER: + IMS_PER_BATCH: 16 + BASE_LR: 0.01 + STEPS: (60000, 80000) + MAX_ITER: 90000 + WARMUP_FACTOR: 0.1 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) diff --git a/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w32_s1x.yaml b/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w32_s1x.yaml new file mode 100644 index 0000000..36eabfe --- /dev/null +++ b/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w32_s1x.yaml @@ -0,0 +1,16 @@ +_BASE_: "../Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "https://1drv.ms/u/s!Aus8VCZ_C_33dYBMemi9xOUFR0w" + BACKBONE: + NAME: "build_hrfpn_backbone" + RPN: + IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5'] + ROI_HEADS: + IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5'] +SOLVER: + MAX_ITER: 130000 + STEPS: (100000, 120000) + CLIP_GRADIENTS: + ENABLED: True + CLIP_TYPE: "norm" + BASE_LR: 0.03 diff --git a/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w40_s1x.yaml b/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w40_s1x.yaml new file mode 100644 index 0000000..0ca8085 --- /dev/null +++ b/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w40_s1x.yaml @@ -0,0 +1,23 @@ +_BASE_: "../Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "https://1drv.ms/u/s!Aus8VCZ_C_33ck0gvo5jfoWBOPo" + BACKBONE: + NAME: "build_hrfpn_backbone" + RPN: + IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5'] + ROI_HEADS: + IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5'] + HRNET: + STAGE2: + NUM_CHANNELS: [40, 80] + STAGE3: + NUM_CHANNELS: [40, 80, 160] + STAGE4: + NUM_CHANNELS: [40, 80, 160, 320] +SOLVER: + MAX_ITER: 130000 + STEPS: (100000, 120000) + CLIP_GRADIENTS: + ENABLED: True + CLIP_TYPE: "norm" + BASE_LR: 0.03 diff --git a/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w48_s1x.yaml b/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w48_s1x.yaml new file mode 100644 index 0000000..a3f437a --- /dev/null +++ b/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w48_s1x.yaml @@ -0,0 +1,23 @@ +_BASE_: "../Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "https://1drv.ms/u/s!Aus8VCZ_C_33dKvqI6pBZlifgJk" + BACKBONE: + NAME: "build_hrfpn_backbone" + RPN: + IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5'] + ROI_HEADS: + IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5'] + HRNET: + STAGE2: + NUM_CHANNELS: [48, 96] + STAGE3: + NUM_CHANNELS: [48, 96, 192] + STAGE4: + NUM_CHANNELS: [48, 96, 192, 384] +SOLVER: + MAX_ITER: 130000 + STEPS: (100000, 120000) + CLIP_GRADIENTS: + ENABLED: True + CLIP_TYPE: "norm" + BASE_LR: 0.03 diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1M_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1M_s1x.yaml new file mode 100644 index 0000000..3c16763 --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1M_s1x.yaml @@ -0,0 +1,18 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseDeepLabHead" + UV_CONFIDENCE: + ENABLED: True + TYPE: "iid_iso" + SEGM_CONFIDENCE: + ENABLED: True + POINT_REGRESSION_WEIGHTS: 0.0005 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + MAX_ITER: 130000 + STEPS: (100000, 120000) diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml new file mode 100644 index 0000000..15475b1 --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml @@ -0,0 +1,16 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseDeepLabHead" + UV_CONFIDENCE: + ENABLED: True + TYPE: "iid_iso" + POINT_REGRESSION_WEIGHTS: 0.0005 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + MAX_ITER: 130000 + STEPS: (100000, 120000) diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2M_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2M_s1x.yaml new file mode 100644 index 0000000..0cbe07f --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2M_s1x.yaml @@ -0,0 +1,18 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseDeepLabHead" + UV_CONFIDENCE: + ENABLED: True + TYPE: "indep_aniso" + SEGM_CONFIDENCE: + ENABLED: True + POINT_REGRESSION_WEIGHTS: 0.0005 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + MAX_ITER: 130000 + STEPS: (100000, 120000) diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml new file mode 100644 index 0000000..7546b96 --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml @@ -0,0 +1,16 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseDeepLabHead" + UV_CONFIDENCE: + ENABLED: True + TYPE: "indep_aniso" + POINT_REGRESSION_WEIGHTS: 0.0005 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + MAX_ITER: 130000 + STEPS: (100000, 120000) diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml new file mode 100644 index 0000000..045f7f0 --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml @@ -0,0 +1,10 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseDeepLabHead" +SOLVER: + MAX_ITER: 130000 + STEPS: (100000, 120000) diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1M_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1M_s1x.yaml new file mode 100644 index 0000000..9334e18 --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1M_s1x.yaml @@ -0,0 +1,18 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 + ROI_DENSEPOSE_HEAD: + UV_CONFIDENCE: + ENABLED: True + TYPE: "iid_iso" + SEGM_CONFIDENCE: + ENABLED: True + POINT_REGRESSION_WEIGHTS: 0.0005 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + MAX_ITER: 130000 + STEPS: (100000, 120000) + WARMUP_FACTOR: 0.025 diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml new file mode 100644 index 0000000..ace6209 --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml @@ -0,0 +1,16 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 + ROI_DENSEPOSE_HEAD: + UV_CONFIDENCE: + ENABLED: True + TYPE: "iid_iso" + POINT_REGRESSION_WEIGHTS: 0.0005 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + MAX_ITER: 130000 + STEPS: (100000, 120000) + WARMUP_FACTOR: 0.025 diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2M_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2M_s1x.yaml new file mode 100644 index 0000000..90f0be2 --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2M_s1x.yaml @@ -0,0 +1,18 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 + ROI_DENSEPOSE_HEAD: + UV_CONFIDENCE: + ENABLED: True + TYPE: "indep_aniso" + SEGM_CONFIDENCE: + ENABLED: True + POINT_REGRESSION_WEIGHTS: 0.0005 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + MAX_ITER: 130000 + STEPS: (100000, 120000) + WARMUP_FACTOR: 0.025 diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml new file mode 100644 index 0000000..766c098 --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml @@ -0,0 +1,16 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 + ROI_DENSEPOSE_HEAD: + UV_CONFIDENCE: + ENABLED: True + TYPE: "indep_aniso" + POINT_REGRESSION_WEIGHTS: 0.0005 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + MAX_ITER: 130000 + STEPS: (100000, 120000) + WARMUP_FACTOR: 0.025 diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml new file mode 100644 index 0000000..af44fb7 --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml @@ -0,0 +1,8 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 +SOLVER: + MAX_ITER: 130000 + STEPS: (100000, 120000) diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml new file mode 100644 index 0000000..8e79a1b --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml @@ -0,0 +1,17 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 + ROI_DENSEPOSE_HEAD: + NUM_COARSE_SEGM_CHANNELS: 15 + POOLER_RESOLUTION: 14 + HEATMAP_SIZE: 56 + INDEX_WEIGHTS: 2.0 + PART_WEIGHTS: 0.3 + POINT_REGRESSION_WEIGHTS: 0.1 + DECODER_ON: False +SOLVER: + BASE_LR: 0.002 + MAX_ITER: 130000 + STEPS: (100000, 120000) diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1M_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1M_s1x.yaml new file mode 100644 index 0000000..18a417a --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1M_s1x.yaml @@ -0,0 +1,18 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseDeepLabHead" + UV_CONFIDENCE: + ENABLED: True + TYPE: "iid_iso" + SEGM_CONFIDENCE: + ENABLED: True + POINT_REGRESSION_WEIGHTS: 0.0005 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + MAX_ITER: 130000 + STEPS: (100000, 120000) diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml new file mode 100644 index 0000000..f3720ef --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml @@ -0,0 +1,16 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseDeepLabHead" + UV_CONFIDENCE: + ENABLED: True + TYPE: "iid_iso" + POINT_REGRESSION_WEIGHTS: 0.0005 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + MAX_ITER: 130000 + STEPS: (100000, 120000) diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2M_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2M_s1x.yaml new file mode 100644 index 0000000..8a413d2 --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2M_s1x.yaml @@ -0,0 +1,18 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseDeepLabHead" + UV_CONFIDENCE: + ENABLED: True + TYPE: "indep_aniso" + SEGM_CONFIDENCE: + ENABLED: True + POINT_REGRESSION_WEIGHTS: 0.0005 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + MAX_ITER: 130000 + STEPS: (100000, 120000) diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml new file mode 100644 index 0000000..5a47cc0 --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml @@ -0,0 +1,16 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseDeepLabHead" + UV_CONFIDENCE: + ENABLED: True + TYPE: "indep_aniso" + POINT_REGRESSION_WEIGHTS: 0.0005 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + MAX_ITER: 130000 + STEPS: (100000, 120000) diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml new file mode 100644 index 0000000..52a170b --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml @@ -0,0 +1,10 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseDeepLabHead" +SOLVER: + MAX_ITER: 130000 + STEPS: (100000, 120000) diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1M_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1M_s1x.yaml new file mode 100644 index 0000000..8a81f2a --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1M_s1x.yaml @@ -0,0 +1,20 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 + ROI_DENSEPOSE_HEAD: + UV_CONFIDENCE: + ENABLED: True + TYPE: "iid_iso" + SEGM_CONFIDENCE: + ENABLED: True + POINT_REGRESSION_WEIGHTS: 0.0005 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + CLIP_TYPE: norm + CLIP_VALUE: 100.0 + MAX_ITER: 130000 + STEPS: (100000, 120000) + WARMUP_FACTOR: 0.025 diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml new file mode 100644 index 0000000..d36e542 --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml @@ -0,0 +1,16 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 + ROI_DENSEPOSE_HEAD: + UV_CONFIDENCE: + ENABLED: True + TYPE: "iid_iso" + POINT_REGRESSION_WEIGHTS: 0.0005 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + MAX_ITER: 130000 + STEPS: (100000, 120000) + WARMUP_FACTOR: 0.025 diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2M_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2M_s1x.yaml new file mode 100644 index 0000000..5cf29ea --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2M_s1x.yaml @@ -0,0 +1,18 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 + ROI_DENSEPOSE_HEAD: + UV_CONFIDENCE: + ENABLED: True + TYPE: "indep_aniso" + SEGM_CONFIDENCE: + ENABLED: True + POINT_REGRESSION_WEIGHTS: 0.0005 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + MAX_ITER: 130000 + STEPS: (100000, 120000) + WARMUP_FACTOR: 0.025 diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml new file mode 100644 index 0000000..e880d46 --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml @@ -0,0 +1,16 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 + ROI_DENSEPOSE_HEAD: + UV_CONFIDENCE: + ENABLED: True + TYPE: "indep_aniso" + POINT_REGRESSION_WEIGHTS: 0.0005 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + MAX_ITER: 130000 + STEPS: (100000, 120000) + WARMUP_FACTOR: 0.025 diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml new file mode 100644 index 0000000..d2dd14c --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml @@ -0,0 +1,8 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 +SOLVER: + MAX_ITER: 130000 + STEPS: (100000, 120000) diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml new file mode 100644 index 0000000..6c5391f --- /dev/null +++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml @@ -0,0 +1,17 @@ +_BASE_: "Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 + ROI_DENSEPOSE_HEAD: + NUM_COARSE_SEGM_CHANNELS: 15 + POOLER_RESOLUTION: 14 + HEATMAP_SIZE: 56 + INDEX_WEIGHTS: 2.0 + PART_WEIGHTS: 0.3 + POINT_REGRESSION_WEIGHTS: 0.1 + DECODER_ON: False +SOLVER: + BASE_LR: 0.002 + MAX_ITER: 130000 + STEPS: (100000, 120000) diff --git a/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC-B.yaml b/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC-B.yaml new file mode 100644 index 0000000..1a2664d --- /dev/null +++ b/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC-B.yaml @@ -0,0 +1,121 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + BACKBONE: + NAME: "build_resnet_fpn_backbone" + RESNETS: + OUT_FEATURES: ["res2", "res3", "res4", "res5"] + FPN: + IN_FEATURES: ["res2", "res3", "res4", "res5"] + ANCHOR_GENERATOR: + SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map + ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) + RPN: + IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] + PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level + PRE_NMS_TOPK_TEST: 1000 # Per FPN level + # Detectron1 uses 2000 proposals per-batch, + # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) + # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. + POST_NMS_TOPK_TRAIN: 1000 + POST_NMS_TOPK_TEST: 1000 + ROI_HEADS: + NAME: "StandardROIHeads" + IN_FEATURES: ["p2", "p3", "p4", "p5"] + NUM_CLASSES: 1 + ROI_BOX_HEAD: + NAME: "FastRCNNConvFCHead" + NUM_FC: 2 + POOLER_RESOLUTION: 7 + ROI_MASK_HEAD: + NAME: "MaskRCNNConvUpsampleHead" + NUM_CONV: 4 + POOLER_RESOLUTION: 14 +DATASETS: + TRAIN: ("base_coco_2017_train",) + TEST: ("base_coco_2017_val", "densepose_chimps") + CATEGORY_MAPS: + "base_coco_2017_train": + "16": 1 # bird -> person + "17": 1 # cat -> person + "18": 1 # dog -> person + "19": 1 # horse -> person + "20": 1 # sheep -> person + "21": 1 # cow -> person + "22": 1 # elephant -> person + "23": 1 # bear -> person + "24": 1 # zebra -> person + "25": 1 # girafe -> person + "base_coco_2017_val": + "16": 1 # bird -> person + "17": 1 # cat -> person + "18": 1 # dog -> person + "19": 1 # horse -> person + "20": 1 # sheep -> person + "21": 1 # cow -> person + "22": 1 # elephant -> person + "23": 1 # bear -> person + "24": 1 # zebra -> person + "25": 1 # girafe -> person + WHITELISTED_CATEGORIES: + "base_coco_2017_train": + - 1 # person + - 16 # bird + - 17 # cat + - 18 # dog + - 19 # horse + - 20 # sheep + - 21 # cow + - 22 # elephant + - 23 # bear + - 24 # zebra + - 25 # girafe + "base_coco_2017_val": + - 1 # person + - 16 # bird + - 17 # cat + - 18 # dog + - 19 # horse + - 20 # sheep + - 21 # cow + - 22 # elephant + - 23 # bear + - 24 # zebra + - 25 # girafe +BOOTSTRAP_DATASETS: + - DATASET: "chimpnsee" + RATIO: 1.0 + IMAGE_LOADER: + TYPE: "video_keyframe" + SELECT: + STRATEGY: "random_k" + NUM_IMAGES: 4 + TRANSFORM: + TYPE: "resize" + MIN_SIZE: 800 + MAX_SIZE: 1333 + BATCH_SIZE: 8 + NUM_WORKERS: 1 + INFERENCE: + INPUT_BATCH_SIZE: 1 + OUTPUT_BATCH_SIZE: 1 + DATA_SAMPLER: + # supported types: + # densepose_uniform + # densepose_UV_confidence + # densepose_fine_segm_confidence + # densepose_coarse_segm_confidence + TYPE: "densepose_uniform" + COUNT_PER_CLASS: 8 + FILTER: + TYPE: "detection_score" + MIN_VALUE: 0.8 +BOOTSTRAP_MODEL: + WEIGHTS: "" +SOLVER: + IMS_PER_BATCH: 16 + BASE_LR: 0.02 + STEPS: (60000, 80000) + MAX_ITER: 90000 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +VERSION: 2 diff --git a/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC.yaml b/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC.yaml new file mode 100644 index 0000000..5a20882 --- /dev/null +++ b/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC.yaml @@ -0,0 +1,91 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + BACKBONE: + NAME: "build_resnet_fpn_backbone" + RESNETS: + OUT_FEATURES: ["res2", "res3", "res4", "res5"] + FPN: + IN_FEATURES: ["res2", "res3", "res4", "res5"] + ANCHOR_GENERATOR: + SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map + ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) + RPN: + IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] + PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level + PRE_NMS_TOPK_TEST: 1000 # Per FPN level + # Detectron1 uses 2000 proposals per-batch, + # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) + # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. + POST_NMS_TOPK_TRAIN: 1000 + POST_NMS_TOPK_TEST: 1000 + ROI_HEADS: + NAME: "StandardROIHeads" + IN_FEATURES: ["p2", "p3", "p4", "p5"] + NUM_CLASSES: 1 + ROI_BOX_HEAD: + NAME: "FastRCNNConvFCHead" + NUM_FC: 2 + POOLER_RESOLUTION: 7 + ROI_MASK_HEAD: + NAME: "MaskRCNNConvUpsampleHead" + NUM_CONV: 4 + POOLER_RESOLUTION: 14 +DATASETS: + TRAIN: ("base_coco_2017_train",) + TEST: ("base_coco_2017_val", "densepose_chimps") + CATEGORY_MAPS: + "base_coco_2017_train": + "16": 1 # bird -> person + "17": 1 # cat -> person + "18": 1 # dog -> person + "19": 1 # horse -> person + "20": 1 # sheep -> person + "21": 1 # cow -> person + "22": 1 # elephant -> person + "23": 1 # bear -> person + "24": 1 # zebra -> person + "25": 1 # girafe -> person + "base_coco_2017_val": + "16": 1 # bird -> person + "17": 1 # cat -> person + "18": 1 # dog -> person + "19": 1 # horse -> person + "20": 1 # sheep -> person + "21": 1 # cow -> person + "22": 1 # elephant -> person + "23": 1 # bear -> person + "24": 1 # zebra -> person + "25": 1 # girafe -> person + WHITELISTED_CATEGORIES: + "base_coco_2017_train": + - 1 # person + - 16 # bird + - 17 # cat + - 18 # dog + - 19 # horse + - 20 # sheep + - 21 # cow + - 22 # elephant + - 23 # bear + - 24 # zebra + - 25 # girafe + "base_coco_2017_val": + - 1 # person + - 16 # bird + - 17 # cat + - 18 # dog + - 19 # horse + - 20 # sheep + - 21 # cow + - 22 # elephant + - 23 # bear + - 24 # zebra + - 25 # girafe +SOLVER: + IMS_PER_BATCH: 16 + BASE_LR: 0.02 + STEPS: (60000, 80000) + MAX_ITER: 90000 +INPUT: + MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) +VERSION: 2 diff --git a/projects/DensePose/configs/evolution/densepose_R_101_FPN_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_101_FPN_1x_Atop10_toP.yaml new file mode 100644 index 0000000..cf0050e --- /dev/null +++ b/projects/DensePose/configs/evolution/densepose_R_101_FPN_1x_Atop10_toP.yaml @@ -0,0 +1,19 @@ +_BASE_: "Base-RCNN-FPN-MC.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 + DENSEPOSE_ON: True + ROI_HEADS: + NAME: "DensePoseROIHeads" + IN_FEATURES: ["p2", "p3", "p4", "p5"] + NUM_CLASSES: 1 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseV1ConvXHead" + POOLER_TYPE: "ROIAlign" + NUM_COARSE_SEGM_CHANNELS: 2 + COARSE_SEGM_TRAINED_BY_MASKS: True + INDEX_WEIGHTS: 1.0 +DATASETS: + TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train") + TEST: ("densepose_chimps",) diff --git a/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_1x_Atop10_toP.yaml new file mode 100644 index 0000000..ff151ed --- /dev/null +++ b/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_1x_Atop10_toP.yaml @@ -0,0 +1,19 @@ +_BASE_: "Base-RCNN-FPN-MC.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 + DENSEPOSE_ON: True + ROI_HEADS: + NAME: "DensePoseROIHeads" + IN_FEATURES: ["p2", "p3", "p4", "p5"] + NUM_CLASSES: 1 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseDeepLabHead" + POOLER_TYPE: "ROIAlign" + NUM_COARSE_SEGM_CHANNELS: 2 + COARSE_SEGM_TRAINED_BY_MASKS: True + INDEX_WEIGHTS: 1.0 +DATASETS: + TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train") + TEST: ("densepose_chimps",) diff --git a/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_WC1M_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_WC1M_1x_Atop10_toP.yaml new file mode 100644 index 0000000..16762cc --- /dev/null +++ b/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_WC1M_1x_Atop10_toP.yaml @@ -0,0 +1,29 @@ +_BASE_: "Base-RCNN-FPN-MC.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 + DENSEPOSE_ON: True + ROI_HEADS: + NAME: "DensePoseROIHeads" + IN_FEATURES: ["p2", "p3", "p4", "p5"] + NUM_CLASSES: 1 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseDeepLabHead" + UV_CONFIDENCE: + ENABLED: True + TYPE: "iid_iso" + SEGM_CONFIDENCE: + ENABLED: True + POINT_REGRESSION_WEIGHTS: 0.0005 + POOLER_TYPE: "ROIAlign" + NUM_COARSE_SEGM_CHANNELS: 2 + COARSE_SEGM_TRAINED_BY_MASKS: True + INDEX_WEIGHTS: 1.0 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + WARMUP_FACTOR: 0.025 +DATASETS: + TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train") + TEST: ("densepose_chimps",) diff --git a/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_WC1_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_WC1_1x_Atop10_toP.yaml new file mode 100644 index 0000000..45f6ec9 --- /dev/null +++ b/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_WC1_1x_Atop10_toP.yaml @@ -0,0 +1,27 @@ +_BASE_: "Base-RCNN-FPN-MC.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 + DENSEPOSE_ON: True + ROI_HEADS: + NAME: "DensePoseROIHeads" + IN_FEATURES: ["p2", "p3", "p4", "p5"] + NUM_CLASSES: 1 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseDeepLabHead" + UV_CONFIDENCE: + ENABLED: True + TYPE: "iid_iso" + POINT_REGRESSION_WEIGHTS: 0.0005 + POOLER_TYPE: "ROIAlign" + NUM_COARSE_SEGM_CHANNELS: 2 + COARSE_SEGM_TRAINED_BY_MASKS: True + INDEX_WEIGHTS: 1.0 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + WARMUP_FACTOR: 0.025 +DATASETS: + TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train") + TEST: ("densepose_chimps",) diff --git a/projects/DensePose/configs/evolution/densepose_R_101_FPN_WC1M_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_101_FPN_WC1M_1x_Atop10_toP.yaml new file mode 100644 index 0000000..81ffe1f --- /dev/null +++ b/projects/DensePose/configs/evolution/densepose_R_101_FPN_WC1M_1x_Atop10_toP.yaml @@ -0,0 +1,29 @@ +_BASE_: "Base-RCNN-FPN-MC.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 + DENSEPOSE_ON: True + ROI_HEADS: + NAME: "DensePoseROIHeads" + IN_FEATURES: ["p2", "p3", "p4", "p5"] + NUM_CLASSES: 1 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseV1ConvXHead" + UV_CONFIDENCE: + ENABLED: True + TYPE: "iid_iso" + SEGM_CONFIDENCE: + ENABLED: True + POINT_REGRESSION_WEIGHTS: 0.0005 + POOLER_TYPE: "ROIAlign" + NUM_COARSE_SEGM_CHANNELS: 2 + COARSE_SEGM_TRAINED_BY_MASKS: True + INDEX_WEIGHTS: 1.0 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + WARMUP_FACTOR: 0.025 +DATASETS: + TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train") + TEST: ("densepose_chimps",) diff --git a/projects/DensePose/configs/evolution/densepose_R_101_FPN_WC1_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_101_FPN_WC1_1x_Atop10_toP.yaml new file mode 100644 index 0000000..76abe3e --- /dev/null +++ b/projects/DensePose/configs/evolution/densepose_R_101_FPN_WC1_1x_Atop10_toP.yaml @@ -0,0 +1,27 @@ +_BASE_: "Base-RCNN-FPN-MC.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" + RESNETS: + DEPTH: 101 + DENSEPOSE_ON: True + ROI_HEADS: + NAME: "DensePoseROIHeads" + IN_FEATURES: ["p2", "p3", "p4", "p5"] + NUM_CLASSES: 1 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseV1ConvXHead" + UV_CONFIDENCE: + ENABLED: True + TYPE: "iid_iso" + POINT_REGRESSION_WEIGHTS: 0.0005 + POOLER_TYPE: "ROIAlign" + NUM_COARSE_SEGM_CHANNELS: 2 + COARSE_SEGM_TRAINED_BY_MASKS: True + INDEX_WEIGHTS: 1.0 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + WARMUP_FACTOR: 0.025 +DATASETS: + TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train") + TEST: ("densepose_chimps",) diff --git a/projects/DensePose/configs/evolution/densepose_R_50_FPN_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_50_FPN_1x_Atop10_toP.yaml new file mode 100644 index 0000000..c827da1 --- /dev/null +++ b/projects/DensePose/configs/evolution/densepose_R_50_FPN_1x_Atop10_toP.yaml @@ -0,0 +1,19 @@ +_BASE_: "Base-RCNN-FPN-MC.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 + DENSEPOSE_ON: True + ROI_HEADS: + NAME: "DensePoseROIHeads" + IN_FEATURES: ["p2", "p3", "p4", "p5"] + NUM_CLASSES: 1 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseV1ConvXHead" + POOLER_TYPE: "ROIAlign" + NUM_COARSE_SEGM_CHANNELS: 2 + COARSE_SEGM_TRAINED_BY_MASKS: True + INDEX_WEIGHTS: 1.0 +DATASETS: + TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train") + TEST: ("densepose_chimps",) diff --git a/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_1x_Atop10_toP.yaml new file mode 100644 index 0000000..174029b --- /dev/null +++ b/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_1x_Atop10_toP.yaml @@ -0,0 +1,19 @@ +_BASE_: "Base-RCNN-FPN-MC.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 + DENSEPOSE_ON: True + ROI_HEADS: + NAME: "DensePoseROIHeads" + IN_FEATURES: ["p2", "p3", "p4", "p5"] + NUM_CLASSES: 1 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseDeepLabHead" + POOLER_TYPE: "ROIAlign" + NUM_COARSE_SEGM_CHANNELS: 2 + COARSE_SEGM_TRAINED_BY_MASKS: True + INDEX_WEIGHTS: 1.0 +DATASETS: + TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train") + TEST: ("densepose_chimps",) diff --git a/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_1x_Atop10_toP.yaml new file mode 100644 index 0000000..ab5bf31 --- /dev/null +++ b/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_1x_Atop10_toP.yaml @@ -0,0 +1,29 @@ +_BASE_: "Base-RCNN-FPN-MC.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 + DENSEPOSE_ON: True + ROI_HEADS: + NAME: "DensePoseROIHeads" + IN_FEATURES: ["p2", "p3", "p4", "p5"] + NUM_CLASSES: 1 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseDeepLabHead" + UV_CONFIDENCE: + ENABLED: True + TYPE: "iid_iso" + SEGM_CONFIDENCE: + ENABLED: True + POINT_REGRESSION_WEIGHTS: 0.0005 + POOLER_TYPE: "ROIAlign" + NUM_COARSE_SEGM_CHANNELS: 2 + COARSE_SEGM_TRAINED_BY_MASKS: True + INDEX_WEIGHTS: 1.0 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + WARMUP_FACTOR: 0.025 +DATASETS: + TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train") + TEST: ("densepose_chimps",) diff --git a/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1_1x_Atop10_toP.yaml new file mode 100644 index 0000000..9d0ca1e --- /dev/null +++ b/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1_1x_Atop10_toP.yaml @@ -0,0 +1,27 @@ +_BASE_: "Base-RCNN-FPN-MC.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 + DENSEPOSE_ON: True + ROI_HEADS: + NAME: "DensePoseROIHeads" + IN_FEATURES: ["p2", "p3", "p4", "p5"] + NUM_CLASSES: 1 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseDeepLabHead" + UV_CONFIDENCE: + ENABLED: True + TYPE: "iid_iso" + POINT_REGRESSION_WEIGHTS: 0.0005 + POOLER_TYPE: "ROIAlign" + NUM_COARSE_SEGM_CHANNELS: 2 + COARSE_SEGM_TRAINED_BY_MASKS: True + INDEX_WEIGHTS: 1.0 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + WARMUP_FACTOR: 0.025 +DATASETS: + TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train") + TEST: ("densepose_chimps",) diff --git a/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1M_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1M_1x_Atop10_toP.yaml new file mode 100644 index 0000000..35855b7 --- /dev/null +++ b/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1M_1x_Atop10_toP.yaml @@ -0,0 +1,29 @@ +_BASE_: "Base-RCNN-FPN-MC.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 + DENSEPOSE_ON: True + ROI_HEADS: + NAME: "DensePoseROIHeads" + IN_FEATURES: ["p2", "p3", "p4", "p5"] + NUM_CLASSES: 1 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseV1ConvXHead" + UV_CONFIDENCE: + ENABLED: True + TYPE: "iid_iso" + SEGM_CONFIDENCE: + ENABLED: True + POINT_REGRESSION_WEIGHTS: 0.0005 + POOLER_TYPE: "ROIAlign" + NUM_COARSE_SEGM_CHANNELS: 2 + COARSE_SEGM_TRAINED_BY_MASKS: True + INDEX_WEIGHTS: 1.0 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + WARMUP_FACTOR: 0.025 +DATASETS: + TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train") + TEST: ("densepose_chimps",) diff --git a/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1M_1x_Atop10_toP_B.yaml b/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1M_1x_Atop10_toP_B.yaml new file mode 100644 index 0000000..74c5476 --- /dev/null +++ b/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1M_1x_Atop10_toP_B.yaml @@ -0,0 +1,30 @@ +_BASE_: "Base-RCNN-FPN-MC-B.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 + DENSEPOSE_ON: True + ROI_HEADS: + NAME: "DensePoseROIHeads" + IN_FEATURES: ["p2", "p3", "p4", "p5"] + NUM_CLASSES: 1 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseV1ConvXHead" + UV_CONFIDENCE: + ENABLED: True + TYPE: "iid_iso" + SEGM_CONFIDENCE: + ENABLED: True + POINT_REGRESSION_WEIGHTS: 0.0005 + POOLER_TYPE: "ROIAlign" + NUM_COARSE_SEGM_CHANNELS: 2 + COARSE_SEGM_TRAINED_BY_MASKS: True + INDEX_WEIGHTS: 1.0 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + CLIP_TYPE: "norm" + WARMUP_FACTOR: 0.025 +DATASETS: + TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train") + TEST: ("densepose_chimps",) diff --git a/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1_1x_Atop10_toP.yaml new file mode 100644 index 0000000..683215e --- /dev/null +++ b/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1_1x_Atop10_toP.yaml @@ -0,0 +1,27 @@ +_BASE_: "Base-RCNN-FPN-MC.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 + DENSEPOSE_ON: True + ROI_HEADS: + NAME: "DensePoseROIHeads" + IN_FEATURES: ["p2", "p3", "p4", "p5"] + NUM_CLASSES: 1 + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseV1ConvXHead" + UV_CONFIDENCE: + ENABLED: True + TYPE: "iid_iso" + POINT_REGRESSION_WEIGHTS: 0.0005 + POOLER_TYPE: "ROIAlign" + NUM_COARSE_SEGM_CHANNELS: 2 + COARSE_SEGM_TRAINED_BY_MASKS: True + INDEX_WEIGHTS: 1.0 +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + WARMUP_FACTOR: 0.025 +DATASETS: + TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train") + TEST: ("densepose_chimps",) diff --git a/projects/DensePose/configs/evolution/faster_rcnn_R_50_FPN_1x_MC.yaml b/projects/DensePose/configs/evolution/faster_rcnn_R_50_FPN_1x_MC.yaml new file mode 100644 index 0000000..80139ad --- /dev/null +++ b/projects/DensePose/configs/evolution/faster_rcnn_R_50_FPN_1x_MC.yaml @@ -0,0 +1,7 @@ +_BASE_: "Base-RCNN-FPN-MC.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + MASK_ON: False + DENSEPOSE_ON: False + RESNETS: + DEPTH: 50 diff --git a/projects/DensePose/configs/quick_schedules/densepose_rcnn_HRFPN_HRNet_w32_training_acc_test.yaml b/projects/DensePose/configs/quick_schedules/densepose_rcnn_HRFPN_HRNet_w32_training_acc_test.yaml new file mode 100644 index 0000000..68a8509 --- /dev/null +++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_HRFPN_HRNet_w32_training_acc_test.yaml @@ -0,0 +1,7 @@ +_BASE_: "../HRNet/densepose_rcnn_HRFPN_HRNet_w32_s1x.yaml" +DATASETS: + TRAIN: ("densepose_coco_2014_minival_100",) + TEST: ("densepose_coco_2014_minival_100",) +SOLVER: + MAX_ITER: 40 + STEPS: (30,) diff --git a/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml new file mode 100644 index 0000000..b90989e --- /dev/null +++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml @@ -0,0 +1,11 @@ +_BASE_: "../Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + ROI_DENSEPOSE_HEAD: + NAME: "DensePoseDeepLabHead" +DATASETS: + TRAIN: ("densepose_coco_2014_minival_100",) + TEST: ("densepose_coco_2014_minival_100",) +SOLVER: + MAX_ITER: 40 + STEPS: (30,) diff --git a/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml new file mode 100644 index 0000000..7d41274 --- /dev/null +++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml @@ -0,0 +1,13 @@ +_BASE_: "../densepose_rcnn_R_50_FPN_s1x.yaml" +MODEL: + WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl" +DATASETS: + TRAIN: () + TEST: ("densepose_coco_2014_minival_100",) +TEST: + AUG: + ENABLED: True + MIN_SIZES: (400, 500, 600, 700, 800, 900, 1000, 1100, 1200) + MAX_SIZE: 4000 + FLIP: True + EXPECTED_RESULTS: [["bbox_TTA", "AP", 61.74, 0.03], ["densepose_gps_TTA", "AP", 60.22, 0.03], ["densepose_gpsm_TTA", "AP", 63.85, 0.03]] diff --git a/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml new file mode 100644 index 0000000..f0fe611 --- /dev/null +++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml @@ -0,0 +1,19 @@ +_BASE_: "../Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 + ROI_DENSEPOSE_HEAD: + UV_CONFIDENCE: + ENABLED: True + TYPE: "iid_iso" + POINT_REGRESSION_WEIGHTS: 0.0005 +DATASETS: + TRAIN: ("densepose_coco_2014_minival_100",) + TEST: ("densepose_coco_2014_minival_100",) +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + MAX_ITER: 40 + STEPS: (30,) + WARMUP_FACTOR: 0.025 diff --git a/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml new file mode 100644 index 0000000..f0d9358 --- /dev/null +++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml @@ -0,0 +1,19 @@ +_BASE_: "../Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + RESNETS: + DEPTH: 50 + ROI_DENSEPOSE_HEAD: + UV_CONFIDENCE: + ENABLED: True + TYPE: "indep_aniso" + POINT_REGRESSION_WEIGHTS: 0.0005 +DATASETS: + TRAIN: ("densepose_coco_2014_minival_100",) + TEST: ("densepose_coco_2014_minival_100",) +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + MAX_ITER: 40 + STEPS: (30,) + WARMUP_FACTOR: 0.025 diff --git a/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml new file mode 100644 index 0000000..3c5a7d2 --- /dev/null +++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml @@ -0,0 +1,8 @@ +_BASE_: "../densepose_rcnn_R_50_FPN_s1x.yaml" +MODEL: + WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl" +DATASETS: + TRAIN: () + TEST: ("densepose_coco_2014_minival_100",) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 59.27, 0.025], ["densepose_gps", "AP", 60.11, 0.02], ["densepose_gpsm", "AP", 64.20, 0.02]] diff --git a/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml new file mode 100644 index 0000000..057c876 --- /dev/null +++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml @@ -0,0 +1,9 @@ +_BASE_: "../Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" +DATASETS: + TRAIN: ("densepose_coco_2014_minival_100",) + TEST: ("densepose_coco_2014_minival_100",) +SOLVER: + MAX_ITER: 40 + STEPS: (30,) diff --git a/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml new file mode 100644 index 0000000..0053c9d --- /dev/null +++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml @@ -0,0 +1,18 @@ +_BASE_: "../Base-DensePose-RCNN-FPN.yaml" +MODEL: + WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" + ROI_HEADS: + NUM_CLASSES: 1 +DATASETS: + TRAIN: ("densepose_coco_2014_minival",) + TEST: ("densepose_coco_2014_minival",) +SOLVER: + CLIP_GRADIENTS: + ENABLED: True + CLIP_TYPE: norm + CLIP_VALUE: 1.0 + MAX_ITER: 6000 + STEPS: (5500, 5800) +TEST: + EXPECTED_RESULTS: [["bbox", "AP", 76.2477, 1.0], ["densepose_gps", "AP", 79.6090, 1.5], ["densepose_gpsm", "AP", 80.0061, 1.5]] + diff --git a/projects/DensePose/densepose/config.py b/projects/DensePose/densepose/config.py new file mode 100644 index 0000000..e69e47e --- /dev/null +++ b/projects/DensePose/densepose/config.py @@ -0,0 +1,171 @@ +# -*- coding = utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +from detectron2.config import CfgNode as CN + + +def add_dataset_category_config(cfg: CN): + """ + Add config for additional category-related dataset options + - category whitelisting + - category mapping + """ + _C = cfg + _C.DATASETS.CATEGORY_MAPS = CN(new_allowed=True) + _C.DATASETS.WHITELISTED_CATEGORIES = CN(new_allowed=True) + + +def add_bootstrap_config(cfg: CN): + """ + """ + _C = cfg + _C.BOOTSTRAP_DATASETS = [] + _C.BOOTSTRAP_MODEL = CN() + _C.BOOTSTRAP_MODEL.WEIGHTS = "" + _C.BOOTSTRAP_MODEL.DEVICE = "cuda" + + +def get_bootstrap_dataset_config() -> CN: + _C = CN() + _C.DATASET = "" + # ratio used to mix data loaders + _C.RATIO = 0.1 + # image loader + _C.IMAGE_LOADER = CN(new_allowed=True) + _C.IMAGE_LOADER.TYPE = "" + _C.IMAGE_LOADER.BATCH_SIZE = 4 + _C.IMAGE_LOADER.NUM_WORKERS = 4 + # inference + _C.INFERENCE = CN() + # batch size for model inputs + _C.INFERENCE.INPUT_BATCH_SIZE = 4 + # batch size to group model outputs + _C.INFERENCE.OUTPUT_BATCH_SIZE = 2 + # sampled data + _C.DATA_SAMPLER = CN(new_allowed=True) + _C.DATA_SAMPLER.TYPE = "" + # filter + _C.FILTER = CN(new_allowed=True) + _C.FILTER.TYPE = "" + return _C + + +def load_bootstrap_config(cfg: CN): + """ + Bootstrap datasets are given as a list of `dict` that are not automatically + converted into CfgNode. This method processes all bootstrap dataset entries + and ensures that they are in CfgNode format and comply with the specification + """ + if not cfg.BOOTSTRAP_DATASETS: + return + + bootstrap_datasets_cfgnodes = [] + for dataset_cfg in cfg.BOOTSTRAP_DATASETS: + _C = get_bootstrap_dataset_config().clone() + _C.merge_from_other_cfg(CN(dataset_cfg)) + bootstrap_datasets_cfgnodes.append(_C) + cfg.BOOTSTRAP_DATASETS = bootstrap_datasets_cfgnodes + + +def add_densepose_head_config(cfg: CN): + """ + Add config for densepose head. + """ + _C = cfg + + _C.MODEL.DENSEPOSE_ON = True + + _C.MODEL.ROI_DENSEPOSE_HEAD = CN() + _C.MODEL.ROI_DENSEPOSE_HEAD.NAME = "" + _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8 + # Number of parts used for point labels + _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24 + _C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4 + _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512 + _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3 + _C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2 + _C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 112 + _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2" + _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 28 + _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2 + _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS = 2 # 15 or 2 + # Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD) + _C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7 + # Loss weights for annotation masks.(14 Parts) + _C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 5.0 + # Loss weights for surface parts. (24 Parts) + _C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 1.0 + # Loss weights for UV regression. + _C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.01 + # Coarse segmentation is trained using instance segmentation task data + _C.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS = False + # For Decoder + _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON = True + _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES = 256 + _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS = 256 + _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM = "" + _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE = 4 + # For DeepLab head + _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB = CN() + _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM = "GN" + _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON = 0 + # Confidences + # Enable learning UV confidences (variances) along with the actual values + _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE = CN({"ENABLED": False}) + # UV confidence lower bound + _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON = 0.01 + # Enable learning segmentation confidences (variances) along with the actual values + _C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE = CN({"ENABLED": False}) + # Segmentation confidence lower bound + _C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON = 0.01 + # Statistical model type for confidence learning, possible values: + # - "iid_iso": statistically independent identically distributed residuals + # with isotropic covariance + # - "indep_aniso": statistically independent residuals with anisotropic + # covariances + _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE = "iid_iso" + # List of angles for rotation in data augmentation during training + _C.INPUT.ROTATION_ANGLES = [0] + _C.TEST.AUG.ROTATION_ANGLES = () # Rotation TTA + + +def add_hrnet_config(cfg: CN): + """ + Add config for HRNet backbone. + """ + _C = cfg + + # For HigherHRNet w32 + _C.MODEL.HRNET = CN() + _C.MODEL.HRNET.STEM_INPLANES = 64 + _C.MODEL.HRNET.STAGE2 = CN() + _C.MODEL.HRNET.STAGE2.NUM_MODULES = 1 + _C.MODEL.HRNET.STAGE2.NUM_BRANCHES = 2 + _C.MODEL.HRNET.STAGE2.BLOCK = "BASIC" + _C.MODEL.HRNET.STAGE2.NUM_BLOCKS = [4, 4] + _C.MODEL.HRNET.STAGE2.NUM_CHANNELS = [32, 64] + _C.MODEL.HRNET.STAGE2.FUSE_METHOD = "SUM" + _C.MODEL.HRNET.STAGE3 = CN() + _C.MODEL.HRNET.STAGE3.NUM_MODULES = 4 + _C.MODEL.HRNET.STAGE3.NUM_BRANCHES = 3 + _C.MODEL.HRNET.STAGE3.BLOCK = "BASIC" + _C.MODEL.HRNET.STAGE3.NUM_BLOCKS = [4, 4, 4] + _C.MODEL.HRNET.STAGE3.NUM_CHANNELS = [32, 64, 128] + _C.MODEL.HRNET.STAGE3.FUSE_METHOD = "SUM" + _C.MODEL.HRNET.STAGE4 = CN() + _C.MODEL.HRNET.STAGE4.NUM_MODULES = 3 + _C.MODEL.HRNET.STAGE4.NUM_BRANCHES = 4 + _C.MODEL.HRNET.STAGE4.BLOCK = "BASIC" + _C.MODEL.HRNET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] + _C.MODEL.HRNET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256] + _C.MODEL.HRNET.STAGE4.FUSE_METHOD = "SUM" + + _C.MODEL.HRNET.HRFPN = CN() + _C.MODEL.HRNET.HRFPN.OUT_CHANNELS = 256 + + +def add_densepose_config(cfg: CN): + add_densepose_head_config(cfg) + add_hrnet_config(cfg) + add_bootstrap_config(cfg) + add_dataset_category_config(cfg) diff --git a/projects/DensePose/densepose/data/__init__.py b/projects/DensePose/densepose/data/__init__.py new file mode 100644 index 0000000..bb7e0e8 --- /dev/null +++ b/projects/DensePose/densepose/data/__init__.py @@ -0,0 +1,23 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +from .build import ( + build_detection_test_loader, + build_detection_train_loader, + build_combined_loader, + build_frame_selector, + build_inference_based_loaders, + has_inference_based_loaders, + BootstrapDatasetFactoryCatalog, +) +from .combined_loader import CombinedDataLoader +from .dataset_mapper import DatasetMapper +from .inference_based_loader import InferenceBasedLoader, ScoreBasedFilter +from .utils import is_relative_local_path, maybe_prepend_base_path + +# ensure the builtin datasets are registered +from . import datasets + +# ensure the bootstrap datasets builders are registered +from . import build + +__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/projects/DensePose/densepose/data/build.py b/projects/DensePose/densepose/data/build.py new file mode 100644 index 0000000..26ca84a --- /dev/null +++ b/projects/DensePose/densepose/data/build.py @@ -0,0 +1,604 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import itertools +import logging +import numpy as np +from collections import UserDict +from typing import Any, Callable, Collection, Dict, Iterable, List, Optional, Sequence +import torch +from torch.utils.data.dataset import Dataset + +from detectron2.config import CfgNode +from detectron2.data.build import ( + build_batch_data_loader, + load_proposals_into_dataset, + print_instances_class_histogram, + trivial_batch_collator, +) +from detectron2.data.catalog import DatasetCatalog, Metadata, MetadataCatalog +from detectron2.data.common import DatasetFromList, MapDataset +from detectron2.data.samplers import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler +from detectron2.utils.comm import get_world_size + +from densepose.config import get_bootstrap_dataset_config + +from .combined_loader import CombinedDataLoader, Loader +from .dataset_mapper import DatasetMapper +from .datasets.coco import DENSEPOSE_KEYS_WITHOUT_MASK as DENSEPOSE_COCO_KEYS_WITHOUT_MASK +from .datasets.coco import DENSEPOSE_MASK_KEY as DENSEPOSE_COCO_MASK_KEY +from .datasets.dataset_type import DatasetType +from .inference_based_loader import InferenceBasedLoader, ScoreBasedFilter +from .samplers import ( + DensePoseConfidenceBasedSampler, + DensePoseUniformSampler, + MaskFromDensePoseSampler, + PredictionToGroundTruthSampler, +) +from .transform import ImageResizeTransform +from .video import ( + FirstKFramesSelector, + FrameSelectionStrategy, + LastKFramesSelector, + RandomKFramesSelector, + VideoKeyframeDataset, + video_list_from_file, +) + +__all__ = ["build_detection_train_loader", "build_detection_test_loader"] + + +Instance = Dict[str, Any] +InstancePredicate = Callable[[Instance], bool] + + +def _compute_num_images_per_worker(cfg: CfgNode): + num_workers = get_world_size() + images_per_batch = cfg.SOLVER.IMS_PER_BATCH + assert ( + images_per_batch % num_workers == 0 + ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format( + images_per_batch, num_workers + ) + assert ( + images_per_batch >= num_workers + ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format( + images_per_batch, num_workers + ) + images_per_worker = images_per_batch // num_workers + return images_per_worker + + +def _map_category_id_to_contiguous_id(dataset_name: str, dataset_dicts: Iterable[Instance]): + meta = MetadataCatalog.get(dataset_name) + for dataset_dict in dataset_dicts: + for ann in dataset_dict["annotations"]: + ann["category_id"] = meta.thing_dataset_id_to_contiguous_id[ann["category_id"]] + + +def _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names: Iterable[str]): + # merge categories for all datasets + merged_categories = {} + for dataset_name in dataset_names: + meta = MetadataCatalog.get(dataset_name) + for cat_id, cat_name in meta.categories.items(): + if cat_id not in merged_categories: + merged_categories[cat_id] = (cat_name, dataset_name) + continue + cat_name_other, dataset_name_other = merged_categories[cat_id] + if cat_name_other != cat_name: + raise ValueError( + f"Incompatible categories for category ID {cat_id}: " + f'dataset {dataset_name} value "{cat_name}", ' + f'dataset {dataset_name_other} value "{cat_name_other}"' + ) + + merged_cat_id_to_cont_id = {} + for i, cat_id in enumerate(sorted(merged_categories.keys())): + merged_cat_id_to_cont_id[cat_id] = i + + # add category maps to metadata + for dataset_name in dataset_names: + meta = MetadataCatalog.get(dataset_name) + categories = meta.get("categories") + meta.thing_classes = [categories[cat_id] for cat_id in sorted(categories.keys())] + meta.thing_dataset_id_to_contiguous_id = { + cat_id: merged_cat_id_to_cont_id[cat_id] for cat_id in sorted(categories.keys()) + } + meta.thing_contiguous_id_to_dataset_id = { + merged_cat_id_to_cont_id[cat_id]: cat_id for cat_id in sorted(categories.keys()) + } + + +def _maybe_create_general_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]: + def has_annotations(instance: Instance) -> bool: + return "annotations" in instance + + def has_only_crowd_anotations(instance: Instance) -> bool: + for ann in instance["annotations"]: + if ann.get("is_crowd", 0) == 0: + return False + return True + + def general_keep_instance_predicate(instance: Instance) -> bool: + return has_annotations(instance) and not has_only_crowd_anotations(instance) + + if not cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS: + return None + return general_keep_instance_predicate + + +def _maybe_create_keypoints_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]: + + min_num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE + + def has_sufficient_num_keypoints(instance: Instance) -> bool: + num_kpts = sum( + (np.array(ann["keypoints"][2::3]) > 0).sum() + for ann in instance["annotations"] + if "keypoints" in ann + ) + return num_kpts >= min_num_keypoints + + if cfg.MODEL.KEYPOINT_ON and (min_num_keypoints > 0): + return has_sufficient_num_keypoints + return None + + +def _maybe_create_mask_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]: + if not cfg.MODEL.MASK_ON: + return None + + def has_mask_annotations(instance: Instance) -> bool: + return any("segmentation" in ann for ann in instance["annotations"]) + + return has_mask_annotations + + +def _maybe_create_densepose_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]: + if not cfg.MODEL.DENSEPOSE_ON: + return None + + use_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS + + def has_densepose_annotations(instance: Instance) -> bool: + for ann in instance["annotations"]: + if all(key in ann for key in DENSEPOSE_COCO_KEYS_WITHOUT_MASK) and ( + (DENSEPOSE_COCO_MASK_KEY in ann) or ("segmentation" in ann) + ): + return True + if use_masks and "segmentation" in ann: + return True + return False + + return has_densepose_annotations + + +def _maybe_create_specific_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]: + specific_predicate_creators = [ + _maybe_create_keypoints_keep_instance_predicate, + _maybe_create_mask_keep_instance_predicate, + _maybe_create_densepose_keep_instance_predicate, + ] + predicates = [creator(cfg) for creator in specific_predicate_creators] + predicates = [p for p in predicates if p is not None] + if not predicates: + return None + + def combined_predicate(instance: Instance) -> bool: + return any(p(instance) for p in predicates) + + return combined_predicate + + +def _get_train_keep_instance_predicate(cfg: CfgNode): + general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg) + combined_specific_keep_predicate = _maybe_create_specific_keep_instance_predicate(cfg) + + def combined_general_specific_keep_predicate(instance: Instance) -> bool: + return general_keep_predicate(instance) and combined_specific_keep_predicate(instance) + + if (general_keep_predicate is None) and (combined_specific_keep_predicate is None): + return None + if general_keep_predicate is None: + return combined_specific_keep_predicate + if combined_specific_keep_predicate is None: + return general_keep_predicate + return combined_general_specific_keep_predicate + + +def _get_test_keep_instance_predicate(cfg: CfgNode): + general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg) + return general_keep_predicate + + +def _maybe_filter_and_map_categories( + dataset_name: str, dataset_dicts: List[Instance] +) -> List[Instance]: + meta = MetadataCatalog.get(dataset_name) + whitelisted_categories = meta.get("whitelisted_categories") + category_map = meta.get("category_map", {}) + if whitelisted_categories is None and not category_map: + return dataset_dicts + filtered_dataset_dicts = [] + for dataset_dict in dataset_dicts: + anns = [] + for ann in dataset_dict["annotations"]: + cat_id = ann["category_id"] + if whitelisted_categories is not None and cat_id not in whitelisted_categories: + continue + ann["category_id"] = category_map.get(cat_id, cat_id) + anns.append(ann) + dataset_dict["annotations"] = anns + filtered_dataset_dicts.append(dataset_dict) + return filtered_dataset_dicts + + +def _add_category_whitelists_to_metadata(cfg: CfgNode): + for dataset_name, whitelisted_cat_ids in cfg.DATASETS.WHITELISTED_CATEGORIES.items(): + meta = MetadataCatalog.get(dataset_name) + meta.whitelisted_categories = whitelisted_cat_ids + logger = logging.getLogger(__name__) + logger.info( + "Whitelisted categories for dataset {}: {}".format( + dataset_name, meta.whitelisted_categories + ) + ) + + +def _add_category_maps_to_metadata(cfg: CfgNode): + for dataset_name, category_map in cfg.DATASETS.CATEGORY_MAPS.items(): + category_map = { + int(cat_id_src): int(cat_id_dst) for cat_id_src, cat_id_dst in category_map.items() + } + meta = MetadataCatalog.get(dataset_name) + meta.category_map = category_map + logger = logging.getLogger(__name__) + logger.info("Category maps for dataset {}: {}".format(dataset_name, meta.category_map)) + + +def combine_detection_dataset_dicts( + dataset_names: Collection[str], + keep_instance_predicate: Optional[InstancePredicate] = None, + proposal_files: Optional[Collection[str]] = None, +) -> List[Instance]: + """ + Load and prepare dataset dicts for training / testing + + Args: + dataset_names (Collection[str]): a list of dataset names + keep_instance_predicate (Callable: Dict[str, Any] -> bool): predicate + applied to instance dicts which defines whether to keep the instance + proposal_files (Collection[str]): if given, a list of object proposal files + that match each dataset in `dataset_names`. + """ + assert len(dataset_names) + if proposal_files is None: + proposal_files = [None] * len(dataset_names) + assert len(dataset_names) == len(proposal_files) + # load annotations and dataset metadata + dataset_map = {} + for dataset_name in dataset_names: + dataset_dicts = DatasetCatalog.get(dataset_name) + dataset_map[dataset_name] = dataset_dicts + # initialize category maps + _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names) + # apply category maps + all_datasets_dicts = [] + for dataset_name, proposal_file in zip(dataset_names, proposal_files): + dataset_dicts = dataset_map[dataset_name] + assert len(dataset_dicts), f"Dataset '{dataset_name}' is empty!" + if proposal_file is not None: + dataset_dicts = load_proposals_into_dataset(dataset_dicts, proposal_file) + dataset_dicts = _maybe_filter_and_map_categories(dataset_name, dataset_dicts) + _map_category_id_to_contiguous_id(dataset_name, dataset_dicts) + print_instances_class_histogram( + dataset_dicts, MetadataCatalog.get(dataset_name).thing_classes + ) + all_datasets_dicts.append(dataset_dicts) + + if keep_instance_predicate is not None: + all_datasets_dicts_plain = [ + d + for d in itertools.chain.from_iterable(all_datasets_dicts) + if keep_instance_predicate(d) + ] + else: + all_datasets_dicts_plain = list(itertools.chain.from_iterable(all_datasets_dicts)) + return all_datasets_dicts_plain + + +def build_detection_train_loader(cfg: CfgNode, mapper=None): + """ + A data loader is created in a way similar to that of Detectron2. + The main differences are: + - it allows to combine datasets with different but compatible object category sets + + The data loader is created by the following steps: + 1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts. + 2. Start workers to work on the dicts. Each worker will: + * Map each metadata dict into another format to be consumed by the model. + * Batch them by simply putting dicts into a list. + The batched ``list[mapped_dict]`` is what this dataloader will return. + + Args: + cfg (CfgNode): the config + mapper (callable): a callable which takes a sample (dict) from dataset and + returns the format to be consumed by the model. + By default it will be `DatasetMapper(cfg, True)`. + + Returns: + an infinite iterator of training data + """ + + _add_category_whitelists_to_metadata(cfg) + _add_category_maps_to_metadata(cfg) + dataset_dicts = combine_detection_dataset_dicts( + cfg.DATASETS.TRAIN, + keep_instance_predicate=_get_train_keep_instance_predicate(cfg), + proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, + ) + dataset = DatasetFromList(dataset_dicts, copy=False) + + if mapper is None: + mapper = DatasetMapper(cfg, True) + dataset = MapDataset(dataset, mapper) + + sampler_name = cfg.DATALOADER.SAMPLER_TRAIN + logger = logging.getLogger(__name__) + logger.info("Using training sampler {}".format(sampler_name)) + if sampler_name == "TrainingSampler": + sampler = TrainingSampler(len(dataset)) + elif sampler_name == "RepeatFactorTrainingSampler": + repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency( + dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD + ) + sampler = RepeatFactorTrainingSampler(repeat_factors) + else: + raise ValueError("Unknown training sampler: {}".format(sampler_name)) + + return build_batch_data_loader( + dataset, + sampler, + cfg.SOLVER.IMS_PER_BATCH, + aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, + num_workers=cfg.DATALOADER.NUM_WORKERS, + ) + + +def build_detection_test_loader(cfg, dataset_name, mapper=None): + """ + Similar to `build_detection_train_loader`. + But this function uses the given `dataset_name` argument (instead of the names in cfg), + and uses batch size 1. + + Args: + cfg: a detectron2 CfgNode + dataset_name (str): a name of the dataset that's available in the DatasetCatalog + mapper (callable): a callable which takes a sample (dict) from dataset + and returns the format to be consumed by the model. + By default it will be `DatasetMapper(cfg, False)`. + + Returns: + DataLoader: a torch DataLoader, that loads the given detection + dataset, with test-time transformation and batching. + """ + _add_category_whitelists_to_metadata(cfg) + _add_category_maps_to_metadata(cfg) + dataset_dicts = combine_detection_dataset_dicts( + [dataset_name], + keep_instance_predicate=_get_test_keep_instance_predicate(cfg), + proposal_files=[ + cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)] + ] + if cfg.MODEL.LOAD_PROPOSALS + else None, + ) + + dataset = DatasetFromList(dataset_dicts) + if mapper is None: + mapper = DatasetMapper(cfg, False) + dataset = MapDataset(dataset, mapper) + + sampler = InferenceSampler(len(dataset)) + # Always use 1 image per worker during inference since this is the + # standard when reporting inference time in papers. + batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False) + + data_loader = torch.utils.data.DataLoader( + dataset, + num_workers=cfg.DATALOADER.NUM_WORKERS, + batch_sampler=batch_sampler, + collate_fn=trivial_batch_collator, + ) + return data_loader + + +def build_frame_selector(cfg: CfgNode): + strategy = FrameSelectionStrategy(cfg.STRATEGY) + if strategy == FrameSelectionStrategy.RANDOM_K: + frame_selector = RandomKFramesSelector(cfg.NUM_IMAGES) + elif strategy == FrameSelectionStrategy.FIRST_K: + frame_selector = FirstKFramesSelector(cfg.NUM_IMAGES) + elif strategy == FrameSelectionStrategy.LAST_K: + frame_selector = LastKFramesSelector(cfg.NUM_IMAGES) + elif strategy == FrameSelectionStrategy.ALL: + frame_selector = None + return frame_selector + + +def build_transform(cfg: CfgNode, data_type: str): + if cfg.TYPE == "resize": + if data_type == "image": + return ImageResizeTransform(cfg.MIN_SIZE, cfg.MAX_SIZE) + raise ValueError(f"Unknown transform {cfg.TYPE} for data type {data_type}") + + +def build_combined_loader(cfg: CfgNode, loaders: Collection[Loader], ratios: Sequence[float]): + images_per_worker = _compute_num_images_per_worker(cfg) + return CombinedDataLoader(loaders, images_per_worker, ratios) + + +def build_bootstrap_dataset(dataset_name: str, cfg: CfgNode) -> Sequence[torch.Tensor]: + """ + Build dataset that provides data to bootstrap on + + Args: + dataset_name (str): Name of the dataset, needs to have associated metadata + to load the data + cfg (CfgNode): bootstrapping config + Returns: + Sequence[Tensor] - dataset that provides image batches, Tensors of size + [N, C, H, W] of type float32 + """ + logger = logging.getLogger(__name__) + meta = MetadataCatalog.get(dataset_name) + factory = BootstrapDatasetFactoryCatalog.get(meta.dataset_type) + dataset = None + if factory is not None: + dataset = factory(meta, cfg) + if dataset is None: + logger.warning(f"Failed to create dataset {dataset_name} of type {meta.dataset_type}") + return dataset + + +def build_data_sampler(cfg: CfgNode): + if cfg.TYPE == "densepose_uniform": + data_sampler = PredictionToGroundTruthSampler() + # transform densepose pred -> gt + data_sampler.register_sampler( + "pred_densepose", + "gt_densepose", + DensePoseUniformSampler(count_per_class=cfg.COUNT_PER_CLASS), + ) + data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler()) + return data_sampler + elif cfg.TYPE == "densepose_UV_confidence": + data_sampler = PredictionToGroundTruthSampler() + # transform densepose pred -> gt + data_sampler.register_sampler( + "pred_densepose", + "gt_densepose", + DensePoseConfidenceBasedSampler( + confidence_channel="sigma_2", + count_per_class=cfg.COUNT_PER_CLASS, + search_proportion=0.5, + ), + ) + data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler()) + return data_sampler + elif cfg.TYPE == "densepose_fine_segm_confidence": + data_sampler = PredictionToGroundTruthSampler() + # transform densepose pred -> gt + data_sampler.register_sampler( + "pred_densepose", + "gt_densepose", + DensePoseConfidenceBasedSampler( + confidence_channel="fine_segm_confidence", + count_per_class=cfg.COUNT_PER_CLASS, + search_proportion=0.5, + ), + ) + data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler()) + return data_sampler + elif cfg.TYPE == "densepose_coarse_segm_confidence": + data_sampler = PredictionToGroundTruthSampler() + # transform densepose pred -> gt + data_sampler.register_sampler( + "pred_densepose", + "gt_densepose", + DensePoseConfidenceBasedSampler( + confidence_channel="coarse_segm_confidence", + count_per_class=cfg.COUNT_PER_CLASS, + search_proportion=0.5, + ), + ) + data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler()) + return data_sampler + + raise ValueError(f"Unknown data sampler type {cfg.TYPE}") + + +def build_data_filter(cfg: CfgNode): + if cfg.TYPE == "detection_score": + min_score = cfg.MIN_VALUE + return ScoreBasedFilter(min_score=min_score) + raise ValueError(f"Unknown data filter type {cfg.TYPE}") + + +def build_inference_based_loader( + cfg: CfgNode, dataset_cfg: CfgNode, model: torch.nn.Module +) -> InferenceBasedLoader: + """ + Constructs data loader based on inference results of a model. + """ + dataset = build_bootstrap_dataset(dataset_cfg.DATASET, dataset_cfg.IMAGE_LOADER) + training_sampler = TrainingSampler(len(dataset)) + data_loader = torch.utils.data.DataLoader( + dataset, + batch_size=dataset_cfg.IMAGE_LOADER.BATCH_SIZE, + sampler=training_sampler, + num_workers=dataset_cfg.IMAGE_LOADER.NUM_WORKERS, + collate_fn=trivial_batch_collator, + ) + return InferenceBasedLoader( + model, + data_loader=data_loader, + data_sampler=build_data_sampler(dataset_cfg.DATA_SAMPLER), + data_filter=build_data_filter(dataset_cfg.FILTER), + shuffle=True, + batch_size=dataset_cfg.INFERENCE.OUTPUT_BATCH_SIZE, + inference_batch_size=dataset_cfg.INFERENCE.INPUT_BATCH_SIZE, + ) + + +def has_inference_based_loaders(cfg: CfgNode) -> bool: + """ + Returns True, if at least one inferense-based loader must + be instantiated for training + """ + return len(cfg.BOOTSTRAP_DATASETS) > 0 + + +def build_inference_based_loaders( + cfg: CfgNode, model: torch.nn.Module +) -> List[InferenceBasedLoader]: + loaders = [] + ratios = [] + for dataset_spec in cfg.BOOTSTRAP_DATASETS: + dataset_cfg = get_bootstrap_dataset_config().clone() + dataset_cfg.merge_from_other_cfg(CfgNode(dataset_spec)) + loader = build_inference_based_loader(cfg, dataset_cfg, model) + loaders.append(loader) + ratios.append(dataset_cfg.RATIO) + return loaders, ratios + + +def build_video_list_dataset(meta: Metadata, cfg: CfgNode): + video_list_fpath = meta.video_list_fpath + video_base_path = meta.video_base_path + if cfg.TYPE == "video_keyframe": + frame_selector = build_frame_selector(cfg.SELECT) + transform = build_transform(cfg.TRANSFORM, data_type="image") + video_list = video_list_from_file(video_list_fpath, video_base_path) + return VideoKeyframeDataset(video_list, frame_selector, transform) + + +class _BootstrapDatasetFactoryCatalog(UserDict): + """ + A global dictionary that stores information about bootstrapped datasets creation functions + from metadata and config, for diverse DatasetType + """ + + def register(self, dataset_type: DatasetType, factory: Callable[[Metadata, CfgNode], Dataset]): + """ + Args: + dataset_type (DatasetType): a DatasetType e.g. DatasetType.VIDEO_LIST + factory (Callable[Metadata, CfgNode]): a callable which takes Metadata and cfg + arguments and returns a dataset object. + """ + assert dataset_type not in self, "Dataset '{}' is already registered!".format(dataset_type) + self[dataset_type] = factory + + +BootstrapDatasetFactoryCatalog = _BootstrapDatasetFactoryCatalog() +BootstrapDatasetFactoryCatalog.register(DatasetType.VIDEO_LIST, build_video_list_dataset) diff --git a/projects/DensePose/densepose/data/combined_loader.py b/projects/DensePose/densepose/data/combined_loader.py new file mode 100644 index 0000000..73278b4 --- /dev/null +++ b/projects/DensePose/densepose/data/combined_loader.py @@ -0,0 +1,44 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import random +from collections import deque +from typing import Any, Collection, Deque, Iterable, Iterator, List, Sequence + +Loader = Iterable[Any] + + +def _pooled_next(iterator: Iterator[Any], pool: Deque[Any]): + if not pool: + pool.extend(next(iterator)) + return pool.popleft() + + +class CombinedDataLoader: + """ + Combines data loaders using the provided sampling ratios + """ + + BATCH_COUNT = 100 + + def __init__(self, loaders: Collection[Loader], batch_size: int, ratios: Sequence[float]): + self.loaders = loaders + self.batch_size = batch_size + self.ratios = ratios + + def __iter__(self) -> Iterator[List[Any]]: + iters = [iter(loader) for loader in self.loaders] + indices = [] + pool = [deque()] * len(iters) + # infinite iterator, as in D2 + while True: + if not indices: + # just a buffer of indices, its size doesn't matter + # as long as it's a multiple of batch_size + k = self.batch_size * self.BATCH_COUNT + indices = random.choices(range(len(self.loaders)), self.ratios, k=k) + try: + batch = [_pooled_next(iters[i], pool[i]) for i in indices[: self.batch_size]] + except StopIteration: + break + indices = indices[self.batch_size :] + yield batch diff --git a/projects/DensePose/densepose/data/dataset_mapper.py b/projects/DensePose/densepose/data/dataset_mapper.py new file mode 100644 index 0000000..817fec1 --- /dev/null +++ b/projects/DensePose/densepose/data/dataset_mapper.py @@ -0,0 +1,168 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import copy +import logging +from typing import Any, Dict, Tuple +import torch +from fvcore.common.file_io import PathManager + +from detectron2.data import MetadataCatalog +from detectron2.data import detection_utils as utils +from detectron2.data import transforms as T +from detectron2.layers import ROIAlign +from detectron2.structures import BoxMode + +from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData + + +def build_augmentation(cfg, is_train): + logger = logging.getLogger(__name__) + result = utils.build_augmentation(cfg, is_train) + if is_train: + random_rotation = T.RandomRotation( + cfg.INPUT.ROTATION_ANGLES, expand=False, sample_style="choice" + ) + result.append(random_rotation) + logger.info("DensePose-specific augmentation used in training: " + str(random_rotation)) + return result + + +class DatasetMapper: + """ + A customized version of `detectron2.data.DatasetMapper` + """ + + def __init__(self, cfg, is_train=True): + self.augmentation = build_augmentation(cfg, is_train) + + # fmt: off + self.img_format = cfg.INPUT.FORMAT + self.mask_on = ( + cfg.MODEL.MASK_ON or ( + cfg.MODEL.DENSEPOSE_ON + and cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS) + ) + self.keypoint_on = cfg.MODEL.KEYPOINT_ON + self.densepose_on = cfg.MODEL.DENSEPOSE_ON + assert not cfg.MODEL.LOAD_PROPOSALS, "not supported yet" + # fmt: on + if self.keypoint_on and is_train: + # Flip only makes sense in training + self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN) + else: + self.keypoint_hflip_indices = None + + if self.densepose_on: + densepose_transform_srcs = [ + MetadataCatalog.get(ds).densepose_transform_src + for ds in cfg.DATASETS.TRAIN + cfg.DATASETS.TEST + ] + assert len(densepose_transform_srcs) > 0 + # TODO: check that DensePose transformation data is the same for + # all the datasets. Otherwise one would have to pass DB ID with + # each entry to select proper transformation data. For now, since + # all DensePose annotated data uses the same data semantics, we + # omit this check. + densepose_transform_data_fpath = PathManager.get_local_path(densepose_transform_srcs[0]) + self.densepose_transform_data = DensePoseTransformData.load( + densepose_transform_data_fpath + ) + + self.is_train = is_train + + def __call__(self, dataset_dict): + """ + Args: + dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. + + Returns: + dict: a format that builtin models in detectron2 accept + """ + dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below + image = utils.read_image(dataset_dict["file_name"], format=self.img_format) + utils.check_image_size(dataset_dict, image) + + image, transforms = T.apply_transform_gens(self.augmentation, image) + image_shape = image.shape[:2] # h, w + dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) + + if not self.is_train: + dataset_dict.pop("annotations", None) + return dataset_dict + + for anno in dataset_dict["annotations"]: + if not self.mask_on: + anno.pop("segmentation", None) + if not self.keypoint_on: + anno.pop("keypoints", None) + + # USER: Implement additional transformations if you have other types of data + # USER: Don't call transpose_densepose if you don't need + annos = [ + self._transform_densepose( + utils.transform_instance_annotations( + obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices + ), + transforms, + ) + for obj in dataset_dict.pop("annotations") + if obj.get("iscrowd", 0) == 0 + ] + + if self.mask_on: + self._add_densepose_masks_as_segmentation(annos, image_shape) + + instances = utils.annotations_to_instances(annos, image_shape, mask_format="bitmask") + densepose_annotations = [obj.get("densepose") for obj in annos] + if densepose_annotations and not all(v is None for v in densepose_annotations): + instances.gt_densepose = DensePoseList( + densepose_annotations, instances.gt_boxes, image_shape + ) + + dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()] + return dataset_dict + + def _transform_densepose(self, annotation, transforms): + if not self.densepose_on: + return annotation + + # Handle densepose annotations + is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation) + if is_valid: + densepose_data = DensePoseDataRelative(annotation, cleanup=True) + densepose_data.apply_transform(transforms, self.densepose_transform_data) + annotation["densepose"] = densepose_data + else: + # logger = logging.getLogger(__name__) + # logger.debug("Could not load DensePose annotation: {}".format(reason_not_valid)) + DensePoseDataRelative.cleanup_annotation(annotation) + # NOTE: annotations for certain instances may be unavailable. + # 'None' is accepted by the DensePostList data structure. + annotation["densepose"] = None + return annotation + + def _add_densepose_masks_as_segmentation( + self, annotations: Dict[str, Any], image_shape_hw: Tuple[int, int] + ): + for obj in annotations: + if ("densepose" not in obj) or ("segmentation" in obj): + continue + # DP segmentation: torch.Tensor [S, S] of float32, S=256 + segm_dp = torch.zeros_like(obj["densepose"].segm) + segm_dp[obj["densepose"].segm > 0] = 1 + segm_h, segm_w = segm_dp.shape + bbox_segm_dp = torch.tensor((0, 0, segm_h - 1, segm_w - 1), dtype=torch.float32) + # image bbox + x0, y0, x1, y1 = ( + v.item() for v in BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) + ) + segm_aligned = ( + ROIAlign((y1 - y0, x1 - x0), 1.0, 0, aligned=True) + .forward(segm_dp.view(1, 1, *segm_dp.shape), bbox_segm_dp) + .squeeze() + ) + image_mask = torch.zeros(*image_shape_hw, dtype=torch.float32) + image_mask[y0:y1, x0:x1] = segm_aligned + # segmentation for BitMask: np.array [H, W] of np.bool + obj["segmentation"] = image_mask >= 0.5 diff --git a/projects/DensePose/densepose/data/datasets/__init__.py b/projects/DensePose/densepose/data/datasets/__init__.py new file mode 100644 index 0000000..0ea9c2f --- /dev/null +++ b/projects/DensePose/densepose/data/datasets/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +from . import builtin # ensure the builtin datasets are registered + +__all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")] diff --git a/projects/DensePose/densepose/data/datasets/builtin.py b/projects/DensePose/densepose/data/datasets/builtin.py new file mode 100644 index 0000000..c788f24 --- /dev/null +++ b/projects/DensePose/densepose/data/datasets/builtin.py @@ -0,0 +1,13 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +from .chimpnsee import register_dataset as register_chimpnsee_dataset +from .coco import BASE_DATASETS as BASE_COCO_DATASETS +from .coco import DATASETS as COCO_DATASETS +from .coco import register_datasets as register_coco_datasets + +DEFAULT_DATASETS_ROOT = "datasets" + + +register_coco_datasets(COCO_DATASETS, DEFAULT_DATASETS_ROOT) +register_coco_datasets(BASE_COCO_DATASETS, DEFAULT_DATASETS_ROOT) + +register_chimpnsee_dataset(DEFAULT_DATASETS_ROOT) diff --git a/projects/DensePose/densepose/data/datasets/chimpnsee.py b/projects/DensePose/densepose/data/datasets/chimpnsee.py new file mode 100644 index 0000000..7b68bea --- /dev/null +++ b/projects/DensePose/densepose/data/datasets/chimpnsee.py @@ -0,0 +1,28 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import os +from typing import Optional + +from detectron2.data import DatasetCatalog, MetadataCatalog + +from ..utils import maybe_prepend_base_path +from .dataset_type import DatasetType + +CHIMPNSEE_DATASET_NAME = "chimpnsee" + + +def register_dataset(datasets_root: Optional[os.PathLike] = None): + def empty_load_callback(): + pass + + video_list_fpath = maybe_prepend_base_path( + datasets_root, "chimpnsee/cdna.eva.mpg.de/video_list.txt" + ) + video_base_path = maybe_prepend_base_path(datasets_root, "chimpnsee/cdna.eva.mpg.de") + + DatasetCatalog.register(CHIMPNSEE_DATASET_NAME, empty_load_callback) + MetadataCatalog.get(CHIMPNSEE_DATASET_NAME).set( + dataset_type=DatasetType.VIDEO_LIST, + video_list_fpath=video_list_fpath, + video_base_path=video_base_path, + ) diff --git a/projects/DensePose/densepose/data/datasets/coco.py b/projects/DensePose/densepose/data/datasets/coco.py new file mode 100644 index 0000000..9b5bdfe --- /dev/null +++ b/projects/DensePose/densepose/data/datasets/coco.py @@ -0,0 +1,324 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +import contextlib +import io +import logging +import os +from collections import defaultdict +from dataclasses import dataclass +from typing import Any, Dict, Iterable, List, Optional +from fvcore.common.file_io import PathManager +from fvcore.common.timer import Timer + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.structures import BoxMode + +from ..utils import maybe_prepend_base_path + +DENSEPOSE_MASK_KEY = "dp_masks" +DENSEPOSE_KEYS_WITHOUT_MASK = ["dp_x", "dp_y", "dp_I", "dp_U", "dp_V"] +DENSEPOSE_KEYS = DENSEPOSE_KEYS_WITHOUT_MASK + [DENSEPOSE_MASK_KEY] +DENSEPOSE_METADATA_URL_PREFIX = "https://dl.fbaipublicfiles.com/densepose/data/" + + +@dataclass +class CocoDatasetInfo: + name: str + images_root: str + annotations_fpath: str + + +DATASETS = [ + CocoDatasetInfo( + name="densepose_coco_2014_train", + images_root="coco/train2014", + annotations_fpath="coco/annotations/densepose_train2014.json", + ), + CocoDatasetInfo( + name="densepose_coco_2014_minival", + images_root="coco/val2014", + annotations_fpath="coco/annotations/densepose_minival2014.json", + ), + CocoDatasetInfo( + name="densepose_coco_2014_minival_100", + images_root="coco/val2014", + annotations_fpath="coco/annotations/densepose_minival2014_100.json", + ), + CocoDatasetInfo( + name="densepose_coco_2014_valminusminival", + images_root="coco/val2014", + annotations_fpath="coco/annotations/densepose_valminusminival2014.json", + ), + CocoDatasetInfo( + name="densepose_chimps", + images_root="densepose_evolution/densepose_chimps", + annotations_fpath="densepose_evolution/annotations/densepose_chimps_densepose.json", + ), + CocoDatasetInfo( + name="posetrack2017_train", + images_root="posetrack2017/posetrack_data_2017", + annotations_fpath="posetrack2017/densepose_posetrack_train2017.json", + ), + CocoDatasetInfo( + name="posetrack2017_val", + images_root="posetrack2017/posetrack_data_2017", + annotations_fpath="posetrack2017/densepose_posetrack_val2017.json", + ), +] + + +BASE_DATASETS = [ + CocoDatasetInfo( + name="base_coco_2017_train", + images_root="coco/train2017", + annotations_fpath="coco/annotations/instances_train2017.json", + ), + CocoDatasetInfo( + name="base_coco_2017_val", + images_root="coco/val2017", + annotations_fpath="coco/annotations/instances_val2017.json", + ), + CocoDatasetInfo( + name="base_coco_2017_val_100", + images_root="coco/val2017", + annotations_fpath="coco/annotations/instances_val2017_100.json", + ), +] + + +def get_metadata(base_path: Optional[os.PathLike]) -> Dict[str, Any]: + """ + Returns metadata associated with COCO DensePose datasets + + Args: + base_path: Optional[os.PathLike] + Base path used to load metadata from + + Returns: + Dict[str, Any] + Metadata in the form of a dictionary + """ + meta = { + "densepose_transform_src": maybe_prepend_base_path(base_path, "UV_symmetry_transforms.mat"), + "densepose_smpl_subdiv": maybe_prepend_base_path(base_path, "SMPL_subdiv.mat"), + "densepose_smpl_subdiv_transform": maybe_prepend_base_path( + base_path, "SMPL_SUBDIV_TRANSFORM.mat" + ), + } + return meta + + +def _load_coco_annotations(json_file: str): + """ + Load COCO annotations from a JSON file + + Args: + json_file: str + Path to the file to load annotations from + Returns: + Instance of `pycocotools.coco.COCO` that provides access to annotations + data + """ + from pycocotools.coco import COCO + + logger = logging.getLogger(__name__) + timer = Timer() + with contextlib.redirect_stdout(io.StringIO()): + coco_api = COCO(json_file) + if timer.seconds() > 1: + logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) + return coco_api + + +def _add_categories_metadata(dataset_name: str, categories: Dict[str, Any]): + meta = MetadataCatalog.get(dataset_name) + meta.categories = {c["id"]: c["name"] for c in categories} + logger = logging.getLogger(__name__) + logger.info("Dataset {} categories: {}".format(dataset_name, categories)) + + +def _verify_annotations_have_unique_ids(json_file: str, anns: List[List[Dict[str, Any]]]): + if "minival" in json_file: + # Skip validation on COCO2014 valminusminival and minival annotations + # The ratio of buggy annotations there is tiny and does not affect accuracy + # Therefore we explicitly white-list them + return + ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image] + assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format( + json_file + ) + + +def _maybe_add_bbox(obj: Dict[str, Any], ann_dict: Dict[str, Any]): + if "bbox" not in ann_dict: + return + obj["bbox"] = ann_dict["bbox"] + obj["bbox_mode"] = BoxMode.XYWH_ABS + + +def _maybe_add_segm(obj: Dict[str, Any], ann_dict: Dict[str, Any]): + if "segmentation" not in ann_dict: + return + segm = ann_dict["segmentation"] + if not isinstance(segm, dict): + # filter out invalid polygons (< 3 points) + segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6] + if len(segm) == 0: + return + obj["segmentation"] = segm + + +def _maybe_add_keypoints(obj: Dict[str, Any], ann_dict: Dict[str, Any]): + if "keypoints" not in ann_dict: + return + keypts = ann_dict["keypoints"] # list[int] + for idx, v in enumerate(keypts): + if idx % 3 != 2: + # COCO's segmentation coordinates are floating points in [0, H or W], + # but keypoint coordinates are integers in [0, H-1 or W-1] + # Therefore we assume the coordinates are "pixel indices" and + # add 0.5 to convert to floating point coordinates. + keypts[idx] = v + 0.5 + obj["keypoints"] = keypts + + +def _maybe_add_densepose(obj: Dict[str, Any], ann_dict: Dict[str, Any]): + for key in DENSEPOSE_KEYS: + if key in ann_dict: + obj[key] = ann_dict[key] + + +def _combine_images_with_annotations( + dataset_name: str, + image_root: str, + img_datas: Iterable[Dict[str, Any]], + ann_datas: Iterable[Iterable[Dict[str, Any]]], +): + + ann_keys = ["iscrowd", "category_id"] + dataset_dicts = [] + contains_video_frame_info = False + + for img_dict, ann_dicts in zip(img_datas, ann_datas): + record = {} + record["file_name"] = os.path.join(image_root, img_dict["file_name"]) + record["height"] = img_dict["height"] + record["width"] = img_dict["width"] + record["image_id"] = img_dict["id"] + record["dataset"] = dataset_name + if "frame_id" in img_dict: + record["frame_id"] = img_dict["frame_id"] + record["video_id"] = img_dict.get("vid_id", None) + contains_video_frame_info = True + objs = [] + for ann_dict in ann_dicts: + assert ann_dict["image_id"] == record["image_id"] + assert ann_dict.get("ignore", 0) == 0 + obj = {key: ann_dict[key] for key in ann_keys if key in ann_dict} + _maybe_add_bbox(obj, ann_dict) + _maybe_add_segm(obj, ann_dict) + _maybe_add_keypoints(obj, ann_dict) + _maybe_add_densepose(obj, ann_dict) + objs.append(obj) + record["annotations"] = objs + dataset_dicts.append(record) + if contains_video_frame_info: + create_video_frame_mapping(dataset_name, dataset_dicts) + return dataset_dicts + + +def create_video_frame_mapping(dataset_name, dataset_dicts): + mapping = defaultdict(dict) + for d in dataset_dicts: + video_id = d.get("video_id") + if video_id is None: + continue + mapping[video_id].update({d["frame_id"]: d["file_name"]}) + MetadataCatalog.get(dataset_name).set(video_frame_mapping=mapping) + + +def load_coco_json(annotations_json_file: str, image_root: str, dataset_name: str): + """ + Loads a JSON file with annotations in COCO instances format. + Replaces `detectron2.data.datasets.coco.load_coco_json` to handle metadata + in a more flexible way. Postpones category mapping to a later stage to be + able to combine several datasets with different (but coherent) sets of + categories. + + Args: + + annotations_json_file: str + Path to the JSON file with annotations in COCO instances format. + image_root: str + directory that contains all the images + dataset_name: str + the name that identifies a dataset, e.g. "densepose_coco_2014_train" + extra_annotation_keys: Optional[List[str]] + If provided, these keys are used to extract additional data from + the annotations. + """ + coco_api = _load_coco_annotations(PathManager.get_local_path(annotations_json_file)) + _add_categories_metadata(dataset_name, coco_api.loadCats(coco_api.getCatIds())) + # sort indices for reproducible results + img_ids = sorted(coco_api.imgs.keys()) + # imgs is a list of dicts, each looks something like: + # {'license': 4, + # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', + # 'file_name': 'COCO_val2014_000000001268.jpg', + # 'height': 427, + # 'width': 640, + # 'date_captured': '2013-11-17 05:57:24', + # 'id': 1268} + imgs = coco_api.loadImgs(img_ids) + logger = logging.getLogger(__name__) + logger.info("Loaded {} images in COCO format from {}".format(len(imgs), annotations_json_file)) + # anns is a list[list[dict]], where each dict is an annotation + # record for an object. The inner list enumerates the objects in an image + # and the outer list enumerates over images. + anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] + _verify_annotations_have_unique_ids(annotations_json_file, anns) + dataset_records = _combine_images_with_annotations(dataset_name, image_root, imgs, anns) + return dataset_records + + +def register_dataset(dataset_data: CocoDatasetInfo, datasets_root: Optional[os.PathLike] = None): + """ + Registers provided COCO DensePose dataset + + Args: + dataset_data: CocoDatasetInfo + Dataset data + datasets_root: Optional[os.PathLike] + Datasets root folder (default: None) + """ + annotations_fpath = maybe_prepend_base_path(datasets_root, dataset_data.annotations_fpath) + images_root = maybe_prepend_base_path(datasets_root, dataset_data.images_root) + + def load_annotations(): + return load_coco_json( + annotations_json_file=annotations_fpath, + image_root=images_root, + dataset_name=dataset_data.name, + ) + + DatasetCatalog.register(dataset_data.name, load_annotations) + MetadataCatalog.get(dataset_data.name).set( + json_file=annotations_fpath, + image_root=images_root, + **get_metadata(DENSEPOSE_METADATA_URL_PREFIX) + ) + + +def register_datasets( + datasets_data: Iterable[CocoDatasetInfo], datasets_root: Optional[os.PathLike] = None +): + """ + Registers provided COCO DensePose datasets + + Args: + datasets_data: Iterable[CocoDatasetInfo] + An iterable of dataset datas + datasets_root: Optional[os.PathLike] + Datasets root folder (default: None) + """ + for dataset_data in datasets_data: + register_dataset(dataset_data, datasets_root) diff --git a/projects/DensePose/densepose/data/datasets/dataset_type.py b/projects/DensePose/densepose/data/datasets/dataset_type.py new file mode 100644 index 0000000..30e1c58 --- /dev/null +++ b/projects/DensePose/densepose/data/datasets/dataset_type.py @@ -0,0 +1,11 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +from enum import Enum + + +class DatasetType(Enum): + """ + Dataset type, mostly used for datasets that contain data to bootstrap models on + """ + + VIDEO_LIST = "video_list" diff --git a/projects/DensePose/densepose/data/image_list_dataset.py b/projects/DensePose/densepose/data/image_list_dataset.py new file mode 100644 index 0000000..7d656f0 --- /dev/null +++ b/projects/DensePose/densepose/data/image_list_dataset.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import logging +import numpy as np +from typing import Callable, List, Optional +import torch +from torch.utils.data.dataset import Dataset + +from detectron2.data.detection_utils import read_image + +ImageTransform = Callable[[torch.Tensor], torch.Tensor] + + +class ImageListDataset(Dataset): + """ + Dataset that provides images from a list. + """ + + _EMPTY_IMAGE = torch.empty((1, 1, 3)) + + def __init__(self, image_list: List[str], transform: Optional[ImageTransform] = None): + """ + Args: + image_list (List[str]): list of paths to image files + """ + self.image_list = image_list + self.transform = transform + + def __getitem__(self, idx: int) -> torch.Tensor: + """ + Gets selected images from the list + + Args: + idx (int): video index in the video list file + Returns: + image (torch.Tensor): tensor of size [H, W, 3] + """ + fpath = self.image_list[idx] + + try: + image = torch.from_numpy(np.ascontiguousarray(read_image(fpath, format="BGR"))) + if self.transform is not None: + image = self.transform(image.unsqueeze(0))[0] # Transforms are done on batches + return image + except (OSError, RuntimeError) as e: + logger = logging.getLogger(__name__) + logger.warning(f"Error opening image file container {fpath}: {e}") + + return self._EMPTY_IMAGE + + def __len__(self): + return len(self.image_list) diff --git a/projects/DensePose/densepose/data/inference_based_loader.py b/projects/DensePose/densepose/data/inference_based_loader.py new file mode 100644 index 0000000..433c686 --- /dev/null +++ b/projects/DensePose/densepose/data/inference_based_loader.py @@ -0,0 +1,146 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import random +from typing import Any, Callable, Iterable, Iterator, List, Optional, Tuple +import torch +from torch import nn + +SampledData = Any +ModelOutput = Any + + +def _grouper(iterable: Iterable[Any], n: int, fillvalue=None) -> Iterator[Tuple[Any]]: + """ + Group elements of an iterable by chunks of size `n`, e.g. + grouper(range(9), 4) -> + (0, 1, 2, 3), (4, 5, 6, 7), (8, None, None, None) + """ + it = iter(iterable) + while True: + values = [] + for _ in range(n): + try: + value = next(it) + except StopIteration: + if values: + values.extend([fillvalue] * (n - len(values))) + yield tuple(values) + return + values.append(value) + yield tuple(values) + + +class ScoreBasedFilter: + """ + Filters entries in model output based on their scores + Discards all entries with score less than the specified minimum + """ + + def __init__(self, min_score: float = 0.8): + self.min_score = min_score + + def __call__(self, model_output: ModelOutput) -> ModelOutput: + for model_output_i in model_output: + instances = model_output_i["instances"] + if not instances.has("scores"): + continue + instances_filtered = instances[instances.scores >= self.min_score] + model_output_i["instances"] = instances_filtered + return model_output + + +class InferenceBasedLoader: + """ + Data loader based on results inferred by a model. Consists of: + - a data loader that provides batches of images + - a model that is used to infer the results + - a data sampler that converts inferred results to annotations + """ + + def __init__( + self, + model: nn.Module, + data_loader: Iterable[List[torch.Tensor]], + data_sampler: Optional[Callable[[ModelOutput], List[SampledData]]] = None, + data_filter: Optional[Callable[[ModelOutput], ModelOutput]] = None, + shuffle: bool = True, + batch_size: int = 4, + inference_batch_size: int = 4, + drop_last: bool = False, + ): + """ + Constructor + + Args: + model (torch.nn.Module): model used to produce data + data_loader (Iterable[Tensor]): iterable that provides images + to perform inference on + data_sampler (Callable: ModelOutput -> SampledData): functor + that produces annotation data from inference results; + (optional, default: None) + data_filter (Callable: ModelOutput -> ModelOutput): filter + that selects model outputs for for further processing + (optional, default: None) + shuffle (bool): if True, the input images get shuffled + batch_size (int): batch size for the produced annotation data + inference_batch_size (int): batch size for input images + drop_last (bool): if True, drop the last batch if it is undersized + """ + self.model = model + self.model.eval() + self.data_loader = data_loader + self.data_sampler = data_sampler + self.data_filter = data_filter + self.shuffle = shuffle + self.batch_size = batch_size + self.inference_batch_size = inference_batch_size + self.drop_last = drop_last + + def __iter__(self) -> Iterator[List[SampledData]]: + for batch in self.data_loader: + # batch : List[Tensor[N, C, H, W]] + # images_batch : Tensor[N, C, H, W] + # image : Tensor[C, H, W] + images = [image for images_batch in batch for image in images_batch] + if not images: + continue + if self.shuffle: + random.shuffle(images) + yield from self._produce_data(images) + + def _produce_data(self, images: List[torch.Tensor]) -> Iterator[List[SampledData]]: + """ + Produce batches of data from images + + Args: + images (List[Tensor]): list of images to process + + Returns: + Iterator over batches of data sampled from model outputs + """ + data_batches: List[SampledData] = [] + batched_images = _grouper(images, self.inference_batch_size) + for batch in batched_images: + batch = [{"image": img.to(self.model.device)} for img in batch if img is not None] + if not batch: + continue + with torch.no_grad(): + model_output = self.model(batch) + for model_output_i, batch_i in zip(model_output, batch): + model_output_i["image"] = batch_i["image"] + model_output_filtered = ( + model_output if self.data_filter is None else self.data_filter(model_output) + ) + data = ( + model_output_filtered + if self.data_sampler is None + else self.data_sampler(model_output_filtered) + ) + for data_i in data: + if len(data_i["instances"]): + data_batches.append(data_i) + if len(data_batches) >= self.batch_size: + yield data_batches[: self.batch_size] + data_batches = data_batches[self.batch_size :] + if not self.drop_last and data_batches: + yield data_batches diff --git a/projects/DensePose/densepose/data/samplers/__init__.py b/projects/DensePose/densepose/data/samplers/__init__.py new file mode 100644 index 0000000..5a2d5d8 --- /dev/null +++ b/projects/DensePose/densepose/data/samplers/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +from .densepose_uniform import DensePoseUniformSampler +from .densepose_confidence_based import DensePoseConfidenceBasedSampler +from .mask_from_densepose import MaskFromDensePoseSampler, densepose_to_mask +from .prediction_to_gt import PredictionToGroundTruthSampler diff --git a/projects/DensePose/densepose/data/samplers/densepose_base.py b/projects/DensePose/densepose/data/samplers/densepose_base.py new file mode 100644 index 0000000..b51fc1f --- /dev/null +++ b/projects/DensePose/densepose/data/samplers/densepose_base.py @@ -0,0 +1,190 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +from typing import List, Optional +import torch +from torch.nn import functional as F + +from detectron2.structures import BoxMode, Instances + +from ..structures import ( + DensePoseDataRelative, + DensePoseList, + DensePoseOutput, + resample_output_to_bbox, +) + + +class DensePoseBaseSampler: + """ + Base DensePose sampler to produce DensePose data from DensePose predictions. + Samples for each class are drawn according to some distribution over all pixels estimated + to belong to that class. + """ + + def __init__(self, count_per_class: int = 8): + """ + Constructor + + Args: + count_per_class (int): the sampler produces at most `count_per_class` + samples for each category + """ + self.count_per_class = count_per_class + + def __call__(self, instances: Instances) -> DensePoseList: + """ + Convert DensePose predictions (an instance of `DensePoseOutput`) + into DensePose annotations data (an instance of `DensePoseList`) + """ + boxes_xyxy_abs = instances.pred_boxes.tensor.clone().cpu() + boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) + dp_datas = [] + for i, box_xywh in enumerate(boxes_xywh_abs): + labels_i, result_i = resample_output_to_bbox( + instances.pred_densepose[i], box_xywh, self._confidence_channels() + ) + annotation_i = self._sample(labels_i.cpu(), result_i.cpu(), box_xywh) + annotation_i[DensePoseDataRelative.S_KEY] = self._resample_mask( + instances.pred_densepose[i] + ) + + dp_datas.append(DensePoseDataRelative(annotation_i)) + # create densepose annotations on CPU + dp_list = DensePoseList(dp_datas, boxes_xyxy_abs, instances.image_size) + return dp_list + + def _sample( + self, labels: torch.Tensor, dp_result: torch.Tensor, bbox_xywh: List[int] + ) -> DensePoseDataRelative: + """ + Sample DensPoseDataRelative from estimation results + """ + annotation = { + DensePoseDataRelative.X_KEY: [], + DensePoseDataRelative.Y_KEY: [], + DensePoseDataRelative.U_KEY: [], + DensePoseDataRelative.V_KEY: [], + DensePoseDataRelative.I_KEY: [], + } + x0, y0, _, _ = bbox_xywh + n, h, w = dp_result.shape + for part_id in range(1, DensePoseDataRelative.N_PART_LABELS + 1): + # indices - tuple of 3 1D tensors of size k + # 0: index along the first dimension N + # 1: index along H dimension + # 2: index along W dimension + indices = torch.nonzero(labels.expand(n, h, w) == part_id, as_tuple=True) + # values - an array of size [n, k] + # n: number of channels (U, V, confidences) + # k: number of points labeled with part_id + values = dp_result[indices].view(n, -1) + k = values.shape[1] + count = min(self.count_per_class, k) + if count <= 0: + continue + index_sample = self._produce_index_sample(values, count) + sampled_values = values[:, index_sample] + sampled_y = indices[1][index_sample] + 0.5 + sampled_x = indices[2][index_sample] + 0.5 + # prepare / normalize data + x = (sampled_x / w * 256.0).cpu().tolist() + y = (sampled_y / h * 256.0).cpu().tolist() + u = sampled_values[0].clamp(0, 1).cpu().tolist() + v = sampled_values[1].clamp(0, 1).cpu().tolist() + fine_segm_labels = [part_id] * count + # extend annotations + annotation[DensePoseDataRelative.X_KEY].extend(x) + annotation[DensePoseDataRelative.Y_KEY].extend(y) + annotation[DensePoseDataRelative.U_KEY].extend(u) + annotation[DensePoseDataRelative.V_KEY].extend(v) + annotation[DensePoseDataRelative.I_KEY].extend(fine_segm_labels) + return annotation + + def _confidence_channels(self) -> Optional[List[str]]: + """ + Confedence channels to be used for sampling (to be overridden in children) + """ + return None + + def _produce_index_sample(self, values: torch.Tensor, count: int): + """ + Abstract method to produce a sample of indices to select data + To be implemented in descendants + + Args: + values (torch.Tensor): an array of size [n, k] that contains + estimated values (U, V, confidences); + n: number of channels (U, V, confidences) + k: number of points labeled with part_id + count (int): number of samples to produce, should be positive and <= k +:w + + Return: + list(int): indices of values (along axis 1) selected as a sample + """ + raise NotImplementedError + + def _resample_mask(self, output: DensePoseOutput) -> torch.Tensor: + """ + Convert output mask tensors into the annotation mask tensor of size + (256, 256) + """ + sz = DensePoseDataRelative.MASK_SIZE + S = ( + F.interpolate(output.S, (sz, sz), mode="bilinear", align_corners=False) + .argmax(dim=1) + .long() + ) + I = ( + ( + F.interpolate(output.I, (sz, sz), mode="bilinear", align_corners=False).argmax( + dim=1 + ) + * (S > 0).long() + ) + .squeeze() + .cpu() + ) + # Map fine segmentation results to coarse segmentation ground truth + # TODO: extract this into separate classes + # coarse segmentation: 1 = Torso, 2 = Right Hand, 3 = Left Hand, + # 4 = Left Foot, 5 = Right Foot, 6 = Upper Leg Right, 7 = Upper Leg Left, + # 8 = Lower Leg Right, 9 = Lower Leg Left, 10 = Upper Arm Left, + # 11 = Upper Arm Right, 12 = Lower Arm Left, 13 = Lower Arm Right, + # 14 = Head + # fine segmentation: 1, 2 = Torso, 3 = Right Hand, 4 = Left Hand, + # 5 = Left Foot, 6 = Right Foot, 7, 9 = Upper Leg Right, + # 8, 10 = Upper Leg Left, 11, 13 = Lower Leg Right, + # 12, 14 = Lower Leg Left, 15, 17 = Upper Arm Left, + # 16, 18 = Upper Arm Right, 19, 21 = Lower Arm Left, + # 20, 22 = Lower Arm Right, 23, 24 = Head + FINE_TO_COARSE_SEGMENTATION = { + 1: 1, + 2: 1, + 3: 2, + 4: 3, + 5: 4, + 6: 5, + 7: 6, + 8: 7, + 9: 6, + 10: 7, + 11: 8, + 12: 9, + 13: 8, + 14: 9, + 15: 10, + 16: 11, + 17: 10, + 18: 11, + 19: 12, + 20: 13, + 21: 12, + 22: 13, + 23: 14, + 24: 14, + } + mask = torch.zeros((sz, sz), dtype=torch.int64, device=torch.device("cpu")) + for i in range(DensePoseDataRelative.N_PART_LABELS): + mask[I == i + 1] = FINE_TO_COARSE_SEGMENTATION[i + 1] + return mask diff --git a/projects/DensePose/densepose/data/samplers/densepose_confidence_based.py b/projects/DensePose/densepose/data/samplers/densepose_confidence_based.py new file mode 100644 index 0000000..f0ebb0e --- /dev/null +++ b/projects/DensePose/densepose/data/samplers/densepose_confidence_based.py @@ -0,0 +1,91 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import random +from typing import List, Optional +import torch + +from .densepose_base import DensePoseBaseSampler + + +class DensePoseConfidenceBasedSampler(DensePoseBaseSampler): + """ + Samples DensePose data from DensePose predictions. + Samples for each class are drawn using confidence value estimates. + """ + + def __init__( + self, + confidence_channel: str, + count_per_class: int = 8, + search_count_multiplier: Optional[float] = None, + search_proportion: Optional[float] = None, + ): + """ + Constructor + + Args: + confidence_channel (str): confidence channel to use for sampling; + possible values: + "sigma_2": confidences for UV values + "fine_segm_confidence": confidences for fine segmentation + "coarse_segm_confidence": confidences for coarse segmentation + (default: "sigma_2") + count_per_class (int): the sampler produces at most `count_per_class` + samples for each category (default: 8) + search_count_multiplier (float or None): if not None, the total number + of the most confident estimates of a given class to consider is + defined as `min(search_count_multiplier * count_per_class, N)`, + where `N` is the total number of estimates of the class; cannot be + specified together with `search_proportion` (default: None) + search_proportion (float or None): if not None, the total number of the + of the most confident estimates of a given class to consider is + defined as `min(max(search_proportion * N, count_per_class), N)`, + where `N` is the total number of estimates of the class; cannot be + specified together with `search_count_multiplier` (default: None) + """ + super().__init__(count_per_class) + self.confidence_channel = confidence_channel + self.search_count_multiplier = search_count_multiplier + self.search_proportion = search_proportion + assert (search_count_multiplier is None) or (search_proportion is None), ( + f"Cannot specify both search_count_multiplier (={search_count_multiplier})" + f"and search_proportion (={search_proportion})" + ) + + def _confidence_channels(self) -> Optional[List[str]]: + """ + Confedence channels to be used for sampling (to be overridden in children) + """ + return [self.confidence_channel] + + def _produce_index_sample(self, values: torch.Tensor, count: int): + """ + Produce a sample of indices to select data based on confidences + + Args: + values (torch.Tensor): an array of size [n, k] that contains + estimated values (U, V, confidences); + n: number of channels (U, V, confidences) + k: number of points labeled with part_id + count (int): number of samples to produce, should be positive and <= k + + Return: + list(int): indices of values (along axis 1) selected as a sample + """ + k = values.shape[1] + if k == count: + index_sample = list(range(k)) + else: + # take the best count * search_count_multiplier pixels, + # sample from them uniformly + # (here best = smallest variance) + _, sorted_confidence_indices = torch.sort(values[2]) + if self.search_count_multiplier is not None: + search_count = min(int(count * self.search_count_multiplier), k) + elif self.search_proportion is not None: + search_count = min(max(int(k * self.search_proportion), count), k) + else: + search_count = min(count, k) + sample_from_top = random.sample(range(search_count), count) + index_sample = sorted_confidence_indices[:search_count][sample_from_top] + return index_sample diff --git a/projects/DensePose/densepose/data/samplers/densepose_uniform.py b/projects/DensePose/densepose/data/samplers/densepose_uniform.py new file mode 100644 index 0000000..6cf083d --- /dev/null +++ b/projects/DensePose/densepose/data/samplers/densepose_uniform.py @@ -0,0 +1,41 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import random +import torch + +from .densepose_base import DensePoseBaseSampler + + +class DensePoseUniformSampler(DensePoseBaseSampler): + """ + Samples DensePose data from DensePose predictions. + Samples for each class are drawn uniformly over all pixels estimated + to belong to that class. + """ + + def __init__(self, count_per_class: int = 8): + """ + Constructor + + Args: + count_per_class (int): the sampler produces at most `count_per_class` + samples for each category + """ + super().__init__(count_per_class) + + def _produce_index_sample(self, values: torch.Tensor, count: int): + """ + Produce a uniform sample of indices to select data + + Args: + values (torch.Tensor): an array of size [n, k] that contains + estimated values (U, V, confidences); + n: number of channels (U, V, confidences) + k: number of points labeled with part_id + count (int): number of samples to produce, should be positive and <= k + + Return: + list(int): indices of values (along axis 1) selected as a sample + """ + k = values.shape[1] + return random.sample(range(k), count) diff --git a/projects/DensePose/densepose/data/samplers/mask_from_densepose.py b/projects/DensePose/densepose/data/samplers/mask_from_densepose.py new file mode 100644 index 0000000..66bf9c0 --- /dev/null +++ b/projects/DensePose/densepose/data/samplers/mask_from_densepose.py @@ -0,0 +1,59 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import torch + +from detectron2.structures import BitMasks, BoxMode, Instances + +from ..structures import resample_output_to_bbox + + +def densepose_to_mask(instances: Instances) -> BitMasks: + """ + Produce masks from DensePose predictions + DensePose predictions for a given image, stored in `pred_densepose` field, + are instances of DensePoseOutput. This sampler takes + `S` and `I` output tensors (coarse and fine segmentation) and converts + then to a mask tensor, which is a bool tensor of the size of the input + image + + Args: + instances (Instances): predicted results, expected to have `pred_densepose` field + that contains `DensePoseOutput` objects + + Returns: + `BitMasks` instance with boolean tensors of the size of the input image that have non-zero + values at pixels that are estimated to belong to the detected objects + """ + H, W = instances.image_size + boxes_xyxy_abs = instances.pred_boxes.tensor.clone().cpu() + boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) + N = len(boxes_xywh_abs) + gt_masks = torch.zeros((N, H, W), dtype=torch.bool, device=torch.device("cpu")) + for i, box_xywh in enumerate(boxes_xywh_abs): + labels_i, _ = resample_output_to_bbox(instances.pred_densepose[i], box_xywh) + x, y, w, h = box_xywh.long().tolist() + gt_masks[i, y : y + h, x : x + w] = labels_i.cpu() > 0 + return BitMasks(gt_masks) + + +class MaskFromDensePoseSampler: + """ + Produce mask GT from DensePose predictions + DensePose prediction is an instance of DensePoseOutput. This sampler takes + `S` and `I` output tensors (coarse and fine segmentation) and converts + then to a mask tensor, which is a bool tensor of the size of the input + image + """ + + def __call__(self, instances: Instances) -> BitMasks: + """ + Converts predicted data from `instances` into the GT mask data + + Args: + instances (Instances): predicted results, expected to have `pred_densepose` field + + Returns: + Boolean Tensor of the size of the input image that has non-zero + values at pixels that are estimated to belong to the detected object + """ + return densepose_to_mask(instances) diff --git a/projects/DensePose/densepose/data/samplers/prediction_to_gt.py b/projects/DensePose/densepose/data/samplers/prediction_to_gt.py new file mode 100644 index 0000000..4d7f4b2 --- /dev/null +++ b/projects/DensePose/densepose/data/samplers/prediction_to_gt.py @@ -0,0 +1,80 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +from dataclasses import dataclass +from typing import Any, Callable, Dict, Optional + +from detectron2.structures import Instances + +ModelOutput = Dict[str, Any] +SampledData = Dict[str, Any] + + +@dataclass +class _Sampler: + """ + Sampler registry entry that contains: + - src (str): source field to sample from (deleted after sampling) + - dst (Optional[str]): destination field to sample to, if not None + - func (Optional[Callable: Any -> Any]): function that performs sampling, + if None, reference copy is performed + """ + + src: str + dst: Optional[str] + func: Optional[Callable[[Any], Any]] + + +class PredictionToGroundTruthSampler: + """ + Sampler implementation that converts predictions to GT using registered + samplers for different fields of `Instances`. + """ + + def __init__(self, dataset_name: str = ""): + self.dataset_name = dataset_name + self._samplers = {} + self.register_sampler("pred_boxes", "gt_boxes", None) + self.register_sampler("pred_classes", "gt_classes", None) + self.register_sampler("scores") + + def __call__(self, model_output: ModelOutput) -> SampledData: + """ + Transform model output into ground truth data through sampling + + Args: + model_output (Dict[str, Any]): model output + Returns: + Dict[str, Any]: sampled data + """ + for model_output_i in model_output: + instances: Instances = model_output_i["instances"] + # transform data in each field + for _, sampler in self._samplers.items(): + if not instances.has(sampler.src) or sampler.dst is None: + continue + if sampler.func is None: + instances.set(sampler.dst, instances.get(sampler.src)) + else: + instances.set(sampler.dst, sampler.func(instances)) + # delete model output data that was transformed + for _, sampler in self._samplers.items(): + if sampler.src != sampler.dst and instances.has(sampler.src): + instances.remove(sampler.src) + model_output_i["dataset"] = self.dataset_name + return model_output + + def register_sampler( + self, + prediction_attr: str, + gt_attr: Optional[str] = None, + func: Optional[Callable[[Any], Any]] = None, + ): + """ + Register sampler for a field + + Args: + prediction_attr (str): field to replace with a sampled value + gt_attr (Optional[str]): field to store the sampled value to, if not None + func (Optional[Callable: Any -> Any]): sampler function + """ + self._samplers[prediction_attr] = _Sampler(src=prediction_attr, dst=gt_attr, func=func) diff --git a/projects/DensePose/densepose/data/structures.py b/projects/DensePose/densepose/data/structures.py new file mode 100644 index 0000000..6b4a728 --- /dev/null +++ b/projects/DensePose/densepose/data/structures.py @@ -0,0 +1,703 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +import base64 +import numpy as np +from io import BytesIO +from typing import BinaryIO, Dict, List, Optional, Tuple, Union +import torch +from PIL import Image +from torch.nn import functional as F + + +class DensePoseTransformData(object): + + # Horizontal symmetry label transforms used for horizontal flip + MASK_LABEL_SYMMETRIES = [0, 1, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14] + # fmt: off + POINT_LABEL_SYMMETRIES = [ 0, 1, 2, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 18, 17, 20, 19, 22, 21, 24, 23] # noqa + # fmt: on + + def __init__(self, uv_symmetries: Dict[str, torch.Tensor], device: torch.device): + self.mask_label_symmetries = DensePoseTransformData.MASK_LABEL_SYMMETRIES + self.point_label_symmetries = DensePoseTransformData.POINT_LABEL_SYMMETRIES + self.uv_symmetries = uv_symmetries + self.device = torch.device("cpu") + + def to(self, device: torch.device, copy: bool = False) -> "DensePoseTransformData": + """ + Convert transform data to the specified device + + Args: + device (torch.device): device to convert the data to + copy (bool): flag that specifies whether to copy or to reference the data + in case the device is the same + Return: + An instance of `DensePoseTransformData` with data stored on the specified device + """ + if self.device == device and not copy: + return self + uv_symmetry_map = {} + for key in self.uv_symmetries: + uv_symmetry_map[key] = self.uv_symmetries[key].to(device=device, copy=copy) + return DensePoseTransformData(uv_symmetry_map, device) + + @staticmethod + def load(io: Union[str, BinaryIO]): + """ + Args: + io: (str or binary file-like object): input file to load data from + Returns: + An instance of `DensePoseTransformData` with transforms loaded from the file + """ + import scipy.io + + uv_symmetry_map = scipy.io.loadmat(io) + uv_symmetry_map_torch = {} + for key in ["U_transforms", "V_transforms"]: + uv_symmetry_map_torch[key] = [] + map_src = uv_symmetry_map[key] + map_dst = uv_symmetry_map_torch[key] + for i in range(map_src.shape[1]): + map_dst.append(torch.from_numpy(map_src[0, i]).to(dtype=torch.float)) + uv_symmetry_map_torch[key] = torch.stack(map_dst, dim=0) + transform_data = DensePoseTransformData(uv_symmetry_map_torch, device=torch.device("cpu")) + return transform_data + + +class DensePoseDataRelative(object): + """ + Dense pose relative annotations that can be applied to any bounding box: + x - normalized X coordinates [0, 255] of annotated points + y - normalized Y coordinates [0, 255] of annotated points + i - body part labels 0,...,24 for annotated points + u - body part U coordinates [0, 1] for annotated points + v - body part V coordinates [0, 1] for annotated points + segm - 256x256 segmentation mask with values 0,...,14 + To obtain absolute x and y data wrt some bounding box one needs to first + divide the data by 256, multiply by the respective bounding box size + and add bounding box offset: + x_img = x0 + x_norm * w / 256.0 + y_img = y0 + y_norm * h / 256.0 + Segmentation masks are typically sampled to get image-based masks. + """ + + # Key for normalized X coordinates in annotation dict + X_KEY = "dp_x" + # Key for normalized Y coordinates in annotation dict + Y_KEY = "dp_y" + # Key for U part coordinates in annotation dict + U_KEY = "dp_U" + # Key for V part coordinates in annotation dict + V_KEY = "dp_V" + # Key for I point labels in annotation dict + I_KEY = "dp_I" + # Key for segmentation mask in annotation dict + S_KEY = "dp_masks" + # Number of body parts in segmentation masks + N_BODY_PARTS = 14 + # Number of parts in point labels + N_PART_LABELS = 24 + MASK_SIZE = 256 + + def __init__(self, annotation, cleanup=False): + is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation) + assert is_valid, "Invalid DensePose annotations: {}".format(reason_not_valid) + self.x = torch.as_tensor(annotation[DensePoseDataRelative.X_KEY]) + self.y = torch.as_tensor(annotation[DensePoseDataRelative.Y_KEY]) + self.i = torch.as_tensor(annotation[DensePoseDataRelative.I_KEY]) + self.u = torch.as_tensor(annotation[DensePoseDataRelative.U_KEY]) + self.v = torch.as_tensor(annotation[DensePoseDataRelative.V_KEY]) + self.segm = DensePoseDataRelative.extract_segmentation_mask(annotation) + self.device = torch.device("cpu") + if cleanup: + DensePoseDataRelative.cleanup_annotation(annotation) + + def to(self, device): + if self.device == device: + return self + new_data = DensePoseDataRelative.__new__(DensePoseDataRelative) + new_data.x = self.x + new_data.x = self.x.to(device) + new_data.y = self.y.to(device) + new_data.i = self.i.to(device) + new_data.u = self.u.to(device) + new_data.v = self.v.to(device) + new_data.segm = self.segm.to(device) + new_data.device = device + return new_data + + @staticmethod + def extract_segmentation_mask(annotation): + poly_specs = annotation[DensePoseDataRelative.S_KEY] + if isinstance(poly_specs, torch.Tensor): + # data is already given as mask tensors, no need to decode + return poly_specs + + import pycocotools.mask as mask_utils + + segm = torch.zeros((DensePoseDataRelative.MASK_SIZE,) * 2, dtype=torch.float32) + for i in range(DensePoseDataRelative.N_BODY_PARTS): + poly_i = poly_specs[i] + if poly_i: + mask_i = mask_utils.decode(poly_i) + segm[mask_i > 0] = i + 1 + return segm + + @staticmethod + def validate_annotation(annotation): + for key in [ + DensePoseDataRelative.X_KEY, + DensePoseDataRelative.Y_KEY, + DensePoseDataRelative.I_KEY, + DensePoseDataRelative.U_KEY, + DensePoseDataRelative.V_KEY, + DensePoseDataRelative.S_KEY, + ]: + if key not in annotation: + return False, "no {key} data in the annotation".format(key=key) + return True, None + + @staticmethod + def cleanup_annotation(annotation): + for key in [ + DensePoseDataRelative.X_KEY, + DensePoseDataRelative.Y_KEY, + DensePoseDataRelative.I_KEY, + DensePoseDataRelative.U_KEY, + DensePoseDataRelative.V_KEY, + DensePoseDataRelative.S_KEY, + ]: + if key in annotation: + del annotation[key] + + def apply_transform(self, transforms, densepose_transform_data): + self._transform_pts(transforms, densepose_transform_data) + self._transform_segm(transforms, densepose_transform_data) + + def _transform_pts(self, transforms, dp_transform_data): + import detectron2.data.transforms as T + + # NOTE: This assumes that HorizFlipTransform is the only one that does flip + do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1 + if do_hflip: + self.x = self.segm.size(1) - self.x + self._flip_iuv_semantics(dp_transform_data) + + for t in transforms.transforms: + if isinstance(t, T.RotationTransform): + xy_scale = np.array((t.w, t.h)) / DensePoseDataRelative.MASK_SIZE + xy = t.apply_coords(np.stack((self.x, self.y), axis=1) * xy_scale) + self.x, self.y = torch.tensor(xy / xy_scale, dtype=self.x.dtype).T + + def _flip_iuv_semantics(self, dp_transform_data: DensePoseTransformData) -> None: + i_old = self.i.clone() + uv_symmetries = dp_transform_data.uv_symmetries + pt_label_symmetries = dp_transform_data.point_label_symmetries + for i in range(self.N_PART_LABELS): + if i + 1 in i_old: + annot_indices_i = i_old == i + 1 + if pt_label_symmetries[i + 1] != i + 1: + self.i[annot_indices_i] = pt_label_symmetries[i + 1] + u_loc = (self.u[annot_indices_i] * 255).long() + v_loc = (self.v[annot_indices_i] * 255).long() + self.u[annot_indices_i] = uv_symmetries["U_transforms"][i][v_loc, u_loc].to( + device=self.u.device + ) + self.v[annot_indices_i] = uv_symmetries["V_transforms"][i][v_loc, u_loc].to( + device=self.v.device + ) + + def _transform_segm(self, transforms, dp_transform_data): + import detectron2.data.transforms as T + + # NOTE: This assumes that HorizFlipTransform is the only one that does flip + do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1 + if do_hflip: + self.segm = torch.flip(self.segm, [1]) + self._flip_segm_semantics(dp_transform_data) + + for t in transforms.transforms: + if isinstance(t, T.RotationTransform): + self._transform_segm_rotation(t) + + def _flip_segm_semantics(self, dp_transform_data): + old_segm = self.segm.clone() + mask_label_symmetries = dp_transform_data.mask_label_symmetries + for i in range(self.N_BODY_PARTS): + if mask_label_symmetries[i + 1] != i + 1: + self.segm[old_segm == i + 1] = mask_label_symmetries[i + 1] + + def _transform_segm_rotation(self, rotation): + self.segm = F.interpolate(self.segm[None, None, :], (rotation.h, rotation.w)).numpy() + self.segm = torch.tensor(rotation.apply_segmentation(self.segm[0, 0]))[None, None, :] + self.segm = F.interpolate(self.segm, [DensePoseDataRelative.MASK_SIZE] * 2)[0, 0] + + +def normalized_coords_transform(x0, y0, w, h): + """ + Coordinates transform that maps top left corner to (-1, -1) and bottom + right corner to (1, 1). Used for torch.grid_sample to initialize the + grid + """ + + def f(p): + return (2 * (p[0] - x0) / w - 1, 2 * (p[1] - y0) / h - 1) + + return f + + +class DensePoseOutput(object): + def __init__(self, S, I, U, V, confidences): + """ + Args: + S (`torch.Tensor`): coarse segmentation tensor of size (N, A, H, W) + I (`torch.Tensor`): fine segmentation tensor of size (N, C, H, W) + U (`torch.Tensor`): U coordinates for each fine segmentation label of size (N, C, H, W) + V (`torch.Tensor`): V coordinates for each fine segmentation label of size (N, C, H, W) + confidences (dict of str -> `torch.Tensor`) estimated confidence model parameters + """ + self.S = S + self.I = I # noqa: E741 + self.U = U + self.V = V + self.confidences = confidences + self._check_output_dims(S, I, U, V) + + def _check_output_dims(self, S, I, U, V): + assert ( + len(S.size()) == 4 + ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format( + S.size() + ) + assert ( + len(I.size()) == 4 + ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format( + S.size() + ) + assert ( + len(U.size()) == 4 + ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format( + S.size() + ) + assert ( + len(V.size()) == 4 + ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format( + S.size() + ) + assert len(S) == len(I), ( + "Number of output segmentation planes {} " + "should be equal to the number of output part index " + "planes {}".format(len(S), len(I)) + ) + assert S.size()[2:] == I.size()[2:], ( + "Output segmentation plane size {} " + "should be equal to the output part index " + "plane size {}".format(S.size()[2:], I.size()[2:]) + ) + assert I.size() == U.size(), ( + "Part index output shape {} " + "should be the same as U coordinates output shape {}".format(I.size(), U.size()) + ) + assert I.size() == V.size(), ( + "Part index output shape {} " + "should be the same as V coordinates output shape {}".format(I.size(), V.size()) + ) + + def resize(self, image_size_hw): + # do nothing - outputs are invariant to resize + pass + + def _crop(self, S, I, U, V, bbox_old_xywh, bbox_new_xywh): + """ + Resample S, I, U, V from bbox_old to the cropped bbox_new + """ + x0old, y0old, wold, hold = bbox_old_xywh + x0new, y0new, wnew, hnew = bbox_new_xywh + tr_coords = normalized_coords_transform(x0old, y0old, wold, hold) + topleft = (x0new, y0new) + bottomright = (x0new + wnew, y0new + hnew) + topleft_norm = tr_coords(topleft) + bottomright_norm = tr_coords(bottomright) + hsize = S.size(1) + wsize = S.size(2) + grid = torch.meshgrid( + torch.arange( + topleft_norm[1], + bottomright_norm[1], + (bottomright_norm[1] - topleft_norm[1]) / hsize, + )[:hsize], + torch.arange( + topleft_norm[0], + bottomright_norm[0], + (bottomright_norm[0] - topleft_norm[0]) / wsize, + )[:wsize], + ) + grid = torch.stack(grid, dim=2).to(S.device) + assert ( + grid.size(0) == hsize + ), "Resampled grid expected " "height={}, actual height={}".format(hsize, grid.size(0)) + assert grid.size(1) == wsize, "Resampled grid expected " "width={}, actual width={}".format( + wsize, grid.size(1) + ) + S_new = F.grid_sample( + S.unsqueeze(0), + torch.unsqueeze(grid, 0), + mode="bilinear", + padding_mode="border", + align_corners=True, + ).squeeze(0) + I_new = F.grid_sample( + I.unsqueeze(0), + torch.unsqueeze(grid, 0), + mode="bilinear", + padding_mode="border", + align_corners=True, + ).squeeze(0) + U_new = F.grid_sample( + U.unsqueeze(0), + torch.unsqueeze(grid, 0), + mode="bilinear", + padding_mode="border", + align_corners=True, + ).squeeze(0) + V_new = F.grid_sample( + V.unsqueeze(0), + torch.unsqueeze(grid, 0), + mode="bilinear", + padding_mode="border", + align_corners=True, + ).squeeze(0) + return S_new, I_new, U_new, V_new + + def crop(self, indices_cropped, bboxes_old, bboxes_new): + """ + Crop outputs for selected bounding boxes to the new bounding boxes. + """ + # VK: cropping is ignored for now + # for i, ic in enumerate(indices_cropped): + # self.S[ic], self.I[ic], self.U[ic], self.V[ic] = \ + # self._crop(self.S[ic], self.I[ic], self.U[ic], self.V[ic], + # bboxes_old[i], bboxes_new[i]) + pass + + def hflip(self, transform_data: DensePoseTransformData) -> None: + """ + Change S, I, U and V to take into account a Horizontal flip. + """ + if self.I.shape[0] > 0: + for el in "SIUV": + self.__dict__[el] = torch.flip(self.__dict__[el], [3]) + for key in self.confidences: + self.confidences[key] = torch.flip(self.confidences[key], [3]) + self._flip_iuv_semantics_tensor(transform_data) + self._flip_segm_semantics_tensor(transform_data) + + def _flip_iuv_semantics_tensor(self, dp_transform_data: DensePoseTransformData) -> None: + point_label_symmetries = dp_transform_data.point_label_symmetries + uv_symmetries = dp_transform_data.uv_symmetries + + N, C, H, W = self.U.shape + u_loc = (self.U[:, 1:, :, :].clamp(0, 1) * 255).long() + v_loc = (self.V[:, 1:, :, :].clamp(0, 1) * 255).long() + Iindex = torch.arange(C - 1, device=self.U.device)[None, :, None, None].expand( + N, C - 1, H, W + ) + self.U[:, 1:, :, :] = uv_symmetries["U_transforms"][Iindex, v_loc, u_loc] + self.V[:, 1:, :, :] = uv_symmetries["V_transforms"][Iindex, v_loc, u_loc] + + for el in "IUV": + self.__dict__[el] = self.__dict__[el][:, point_label_symmetries, :, :] + + def _flip_segm_semantics_tensor(self, dp_transform_data): + if self.S.shape[1] == DensePoseDataRelative.N_BODY_PARTS + 1: + self.S = self.S[:, dp_transform_data.mask_label_symmetries, :, :] + + def to_result(self, boxes_xywh): + """ + Convert DensePose outputs to results format. Results are more compact, + but cannot be resampled any more + """ + result = DensePoseResult(boxes_xywh, self.S, self.I, self.U, self.V) + return result + + def __getitem__(self, item): + if isinstance(item, int): + S_selected = self.S[item].unsqueeze(0) + I_selected = self.I[item].unsqueeze(0) + U_selected = self.U[item].unsqueeze(0) + V_selected = self.V[item].unsqueeze(0) + conf_selected = {} + for key in self.confidences: + conf_selected[key] = self.confidences[key][item].unsqueeze(0) + else: + S_selected = self.S[item] + I_selected = self.I[item] + U_selected = self.U[item] + V_selected = self.V[item] + conf_selected = {} + for key in self.confidences: + conf_selected[key] = self.confidences[key][item] + return DensePoseOutput(S_selected, I_selected, U_selected, V_selected, conf_selected) + + def __str__(self): + s = "DensePoseOutput S {}, I {}, U {}, V {}".format( + list(self.S.size()), list(self.I.size()), list(self.U.size()), list(self.V.size()) + ) + s_conf = "confidences: [{}]".format( + ", ".join([f"{key} {list(self.confidences[key].size())}" for key in self.confidences]) + ) + return ", ".join([s, s_conf]) + + def __len__(self): + return self.S.size(0) + + +def resample_output_to_bbox( + output: DensePoseOutput, bbox_xywh_abs: List[int], confidences: Optional[List[str]] = None +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Convert DensePose output of size [1, C, S, S] into DensePose results [D, H_i, W_i], + where `i` is detection index and `D == 2 + len(confidences)`. This conversion: + - resamples data to the detection bounding box size (H_i, W_i), + - sets label for each pixel of the bounding box as the `argmax` of scores, + - assigns values (U, V, confidences) based on label and resampled data + + Args: + output (DensePoseOutput): outputs of the DensePose model + bbox_xywh_abs (List[int]): bounding box, a list of 4 integer values XYWH + confidences (List[str]): optional list of `str` that specifies confidence + channels to be resampled and added to the results + + Results: + labels (torch.Tensor): tensor [1, H_i, W_i] of `torch.uint8` containing fine + segmentation labels of each pixel + data (torch.Tensor): tensor [D, H_i, W_i] of `torch.float32` containing + for each pixel the estimated U, V coordinates and the requested + confidence values in the order that corresponds to `confidences` + """ + x, y, w, h = bbox_xywh_abs + w = max(int(w), 1) + h = max(int(h), 1) + N_out = 2 if confidences is None else 2 + len(confidences) + device = output.U.device + data = torch.zeros([N_out, h, w], dtype=torch.float32, device=device) + # coarse segmentation + assert ( + len(output.S.size()) == 4 + ), "AnnIndex tensor size should have {} dimensions but has {}".format(4, len(output.S.size())) + s_bbox = F.interpolate(output.S, (h, w), mode="bilinear", align_corners=False).argmax(dim=1) + # fine segmentation + assert ( + len(output.I.size()) == 4 + ), "IndexUV tensor size should have {} dimensions but has {}".format(4, len(output.S.size())) + labels = ( + F.interpolate(output.I, (h, w), mode="bilinear", align_corners=False).argmax(dim=1) + * (s_bbox > 0).long() + ).squeeze(0) + # U + assert len(output.U.size()) == 4, "U tensor size should have {} dimensions but has {}".format( + 4, len(output.U.size()) + ) + u_bbox = F.interpolate(output.U, (h, w), mode="bilinear", align_corners=False) + # V + assert len(output.V.size()) == 4, "V tensor size should have {} dimensions but has {}".format( + 4, len(output.V.size()) + ) + v_bbox = F.interpolate(output.V, (h, w), mode="bilinear", align_corners=False) + # confidences + if confidences is not None: + resampled_confidence = {} + for key in output.confidences: + resampled_confidence[key] = F.interpolate( + output.confidences[key], (h, w), mode="bilinear", align_corners=False + ) + + # assign data from channels that correspond to the labels + for part_id in range(1, u_bbox.size(1)): + data[0][labels == part_id] = u_bbox[0, part_id][labels == part_id] + data[1][labels == part_id] = v_bbox[0, part_id][labels == part_id] + if confidences is None: + continue + for i, key in enumerate(confidences): + if resampled_confidence[key].size(1) != u_bbox.size(1): + # confidence is not part-based, don't try to fill it part by part + continue + data[2 + i][labels == part_id] = resampled_confidence[key][0, part_id][ + labels == part_id + ] + if confidences is not None: + for i, key in enumerate(confidences): + if resampled_confidence[key].size(1) != u_bbox.size(1): + # confidence is not part-based, fill the data with the first channel + # (targeted for segmentation confidences that have only 1 channel) + data[2 + i] = resampled_confidence[key][0, 0] + return labels.unsqueeze(0), data + + +class DensePoseResult(object): + def __init__(self, boxes_xywh, S, I, U, V): + self.results = [] + self.boxes_xywh = boxes_xywh.cpu().tolist() + assert len(boxes_xywh.size()) == 2 + assert boxes_xywh.size(1) == 4 + for i, box_xywh in enumerate(boxes_xywh): + result_i = self._output_to_result(box_xywh, S[[i]], I[[i]], U[[i]], V[[i]]) + result_numpy_i = result_i.cpu().numpy() + result_encoded_i = DensePoseResult.encode_png_data(result_numpy_i) + result_encoded_with_shape_i = (result_numpy_i.shape, result_encoded_i) + self.results.append(result_encoded_with_shape_i) + + def __str__(self): + s = "DensePoseResult: N={} [{}]".format( + len(self.results), ", ".join([str(list(r[0])) for r in self.results]) + ) + return s + + def _output_to_result(self, box_xywh, S, I, U, V): + # TODO: reuse resample_output_to_bbox + x, y, w, h = box_xywh + w = max(int(w), 1) + h = max(int(h), 1) + result = torch.zeros([3, h, w], dtype=torch.uint8, device=U.device) + assert ( + len(S.size()) == 4 + ), "AnnIndex tensor size should have {} " "dimensions but has {}".format(4, len(S.size())) + s_bbox = F.interpolate(S, (h, w), mode="bilinear", align_corners=False).argmax(dim=1) + assert ( + len(I.size()) == 4 + ), "IndexUV tensor size should have {} " "dimensions but has {}".format(4, len(S.size())) + i_bbox = ( + F.interpolate(I, (h, w), mode="bilinear", align_corners=False).argmax(dim=1) + * (s_bbox > 0).long() + ).squeeze(0) + assert len(U.size()) == 4, "U tensor size should have {} " "dimensions but has {}".format( + 4, len(U.size()) + ) + u_bbox = F.interpolate(U, (h, w), mode="bilinear", align_corners=False) + assert len(V.size()) == 4, "V tensor size should have {} " "dimensions but has {}".format( + 4, len(V.size()) + ) + v_bbox = F.interpolate(V, (h, w), mode="bilinear", align_corners=False) + result[0] = i_bbox + for part_id in range(1, u_bbox.size(1)): + result[1][i_bbox == part_id] = ( + (u_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8) + ) + result[2][i_bbox == part_id] = ( + (v_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8) + ) + assert ( + result.size(1) == h + ), "Results height {} should be equal" "to bounding box height {}".format(result.size(1), h) + assert ( + result.size(2) == w + ), "Results width {} should be equal" "to bounding box width {}".format(result.size(2), w) + return result + + @staticmethod + def encode_png_data(arr): + """ + Encode array data as a PNG image using the highest compression rate + @param arr [in] Data stored in an array of size (3, M, N) of type uint8 + @return Base64-encoded string containing PNG-compressed data + """ + assert len(arr.shape) == 3, "Expected a 3D array as an input," " got a {0}D array".format( + len(arr.shape) + ) + assert arr.shape[0] == 3, "Expected first array dimension of size 3," " got {0}".format( + arr.shape[0] + ) + assert arr.dtype == np.uint8, "Expected an array of type np.uint8, " " got {0}".format( + arr.dtype + ) + data = np.moveaxis(arr, 0, -1) + im = Image.fromarray(data) + fstream = BytesIO() + im.save(fstream, format="png", optimize=True) + s = base64.encodebytes(fstream.getvalue()).decode() + return s + + @staticmethod + def decode_png_data(shape, s): + """ + Decode array data from a string that contains PNG-compressed data + @param Base64-encoded string containing PNG-compressed data + @return Data stored in an array of size (3, M, N) of type uint8 + """ + fstream = BytesIO(base64.decodebytes(s.encode())) + im = Image.open(fstream) + data = np.moveaxis(np.array(im.getdata(), dtype=np.uint8), -1, 0) + return data.reshape(shape) + + def __len__(self): + return len(self.results) + + def __getitem__(self, item): + result_encoded = self.results[item] + bbox_xywh = self.boxes_xywh[item] + return result_encoded, bbox_xywh + + +class DensePoseList(object): + + _TORCH_DEVICE_CPU = torch.device("cpu") + + def __init__(self, densepose_datas, boxes_xyxy_abs, image_size_hw, device=_TORCH_DEVICE_CPU): + assert len(densepose_datas) == len( + boxes_xyxy_abs + ), "Attempt to initialize DensePoseList with {} DensePose datas " "and {} boxes".format( + len(densepose_datas), len(boxes_xyxy_abs) + ) + self.densepose_datas = [] + for densepose_data in densepose_datas: + assert isinstance(densepose_data, DensePoseDataRelative) or densepose_data is None, ( + "Attempt to initialize DensePoseList with DensePose datas " + "of type {}, expected DensePoseDataRelative".format(type(densepose_data)) + ) + densepose_data_ondevice = ( + densepose_data.to(device) if densepose_data is not None else None + ) + self.densepose_datas.append(densepose_data_ondevice) + self.boxes_xyxy_abs = boxes_xyxy_abs.to(device) + self.image_size_hw = image_size_hw + self.device = device + + def to(self, device): + if self.device == device: + return self + return DensePoseList(self.densepose_datas, self.boxes_xyxy_abs, self.image_size_hw, device) + + def __iter__(self): + return iter(self.densepose_datas) + + def __len__(self): + return len(self.densepose_datas) + + def __repr__(self): + s = self.__class__.__name__ + "(" + s += "num_instances={}, ".format(len(self.densepose_datas)) + s += "image_width={}, ".format(self.image_size_hw[1]) + s += "image_height={})".format(self.image_size_hw[0]) + return s + + def __getitem__(self, item): + if isinstance(item, int): + densepose_data_rel = self.densepose_datas[item] + return densepose_data_rel + elif isinstance(item, slice): + densepose_datas_rel = self.densepose_datas[item] + boxes_xyxy_abs = self.boxes_xyxy_abs[item] + return DensePoseList( + densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device + ) + elif isinstance(item, torch.Tensor) and (item.dtype == torch.bool): + densepose_datas_rel = [self.densepose_datas[i] for i, x in enumerate(item) if x > 0] + boxes_xyxy_abs = self.boxes_xyxy_abs[item] + return DensePoseList( + densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device + ) + else: + densepose_datas_rel = [self.densepose_datas[i] for i in item] + boxes_xyxy_abs = self.boxes_xyxy_abs[item] + return DensePoseList( + densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device + ) diff --git a/projects/DensePose/densepose/data/transform/__init__.py b/projects/DensePose/densepose/data/transform/__init__.py new file mode 100644 index 0000000..555ee83 --- /dev/null +++ b/projects/DensePose/densepose/data/transform/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +from .image import ImageResizeTransform diff --git a/projects/DensePose/densepose/data/transform/image.py b/projects/DensePose/densepose/data/transform/image.py new file mode 100644 index 0000000..ff9de52 --- /dev/null +++ b/projects/DensePose/densepose/data/transform/image.py @@ -0,0 +1,37 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import torch + + +class ImageResizeTransform: + """ + Transform that converts frames loaded from a dataset + (RGB data in NHWC channel order, typically uint8) to a format ready to be + consumed by DensePose training (BGR float32 data in NCHW channel order) + """ + + def __init__(self, min_size: int = 800, max_size: int = 1333): + self.min_size = min_size + self.max_size = max_size + + def __call__(self, frames: torch.Tensor) -> torch.Tensor: + """ + Args: + frames (torch.Tensor): tensor of size [N, H, W, 3] that contains + RGB data (typically in uint8) + Returns: + frames (torch.Tensor): tensor of size [N, 3, H1, W1] where + H1 and W1 are chosen to respect the specified min and max sizes + and preserve the original aspect ratio, the data channels + follow BGR order and the data type is `torch.float32` + """ + frames = frames[..., [2, 1, 0]] # RGB -> BGR + frames = frames.permute(0, 3, 1, 2).float() # NHWC -> NCHW + # resize with min size + min_size = min(frames.shape[-2:]) + max_size = max(frames.shape[-2:]) + scale = min(self.min_size / min_size, self.max_size / max_size) + frames = torch.nn.functional.interpolate( + frames, scale_factor=scale, mode="bilinear", align_corners=False + ) + return frames diff --git a/projects/DensePose/densepose/data/utils.py b/projects/DensePose/densepose/data/utils.py new file mode 100644 index 0000000..fc46ca7 --- /dev/null +++ b/projects/DensePose/densepose/data/utils.py @@ -0,0 +1,22 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import os +from typing import Optional + + +def is_relative_local_path(path: os.PathLike): + path_str = os.fsdecode(path) + return ("://" not in path_str) and not os.path.isabs(path) + + +def maybe_prepend_base_path(base_path: Optional[os.PathLike], path: os.PathLike): + """ + Prepends the provided path with a base path prefix if: + 1) base path is not None; + 2) path is a local path + """ + if base_path is None: + return path + if is_relative_local_path(path): + return os.path.join(base_path, path) + return path diff --git a/projects/DensePose/densepose/data/video/__init__.py b/projects/DensePose/densepose/data/video/__init__.py new file mode 100644 index 0000000..13541ce --- /dev/null +++ b/projects/DensePose/densepose/data/video/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +from .frame_selector import ( + FrameSelectionStrategy, + RandomKFramesSelector, + FirstKFramesSelector, + LastKFramesSelector, + FrameTsList, + FrameSelector, +) + +from .video_keyframe_dataset import ( + VideoKeyframeDataset, + video_list_from_file, + list_keyframes, + read_keyframes, +) diff --git a/projects/DensePose/densepose/data/video/frame_selector.py b/projects/DensePose/densepose/data/video/frame_selector.py new file mode 100644 index 0000000..408b877 --- /dev/null +++ b/projects/DensePose/densepose/data/video/frame_selector.py @@ -0,0 +1,87 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import random +from collections.abc import Callable +from enum import Enum +from typing import Callable as TCallable +from typing import List + +FrameTsList = List[int] +FrameSelector = TCallable[[FrameTsList], FrameTsList] + + +class FrameSelectionStrategy(Enum): + """ + Frame selection strategy used with videos: + - "random_k": select k random frames + - "first_k": select k first frames + - "last_k": select k last frames + - "all": select all frames + """ + + # fmt: off + RANDOM_K = "random_k" + FIRST_K = "first_k" + LAST_K = "last_k" + ALL = "all" + # fmt: on + + +class RandomKFramesSelector(Callable): + """ + Selector that retains at most `k` random frames + """ + + def __init__(self, k: int): + self.k = k + + def __call__(self, frame_tss: FrameTsList) -> FrameTsList: + """ + Select `k` random frames + + Args: + frames_tss (List[int]): timestamps of input frames + Returns: + List[int]: timestamps of selected frames + """ + return random.sample(frame_tss, min(self.k, len(frame_tss))) + + +class FirstKFramesSelector(Callable): + """ + Selector that retains at most `k` first frames + """ + + def __init__(self, k: int): + self.k = k + + def __call__(self, frame_tss: FrameTsList) -> FrameTsList: + """ + Select `k` first frames + + Args: + frames_tss (List[int]): timestamps of input frames + Returns: + List[int]: timestamps of selected frames + """ + return frame_tss[: self.k] + + +class LastKFramesSelector(Callable): + """ + Selector that retains at most `k` last frames from video data + """ + + def __init__(self, k: int): + self.k = k + + def __call__(self, frame_tss: FrameTsList) -> FrameTsList: + """ + Select `k` last frames + + Args: + frames_tss (List[int]): timestamps of input frames + Returns: + List[int]: timestamps of selected frames + """ + return frame_tss[-self.k :] diff --git a/projects/DensePose/densepose/data/video/video_keyframe_dataset.py b/projects/DensePose/densepose/data/video/video_keyframe_dataset.py new file mode 100644 index 0000000..8efe575 --- /dev/null +++ b/projects/DensePose/densepose/data/video/video_keyframe_dataset.py @@ -0,0 +1,232 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import logging +import numpy as np +from typing import Callable, List, Optional +import torch +from fvcore.common.file_io import PathManager +from torch.utils.data.dataset import Dataset + +import av + +from ..utils import maybe_prepend_base_path +from .frame_selector import FrameSelector, FrameTsList + +FrameList = List[av.frame.Frame] +FrameTransform = Callable[[torch.Tensor], torch.Tensor] + + +def list_keyframes(video_fpath: str, video_stream_idx: int = 0) -> FrameTsList: + """ + Traverses all keyframes of a video file. Returns a list of keyframe + timestamps. Timestamps are counts in timebase units. + + Args: + video_fpath (str): Video file path + video_stream_idx (int): Video stream index (default: 0) + Returns: + List[int]: list of keyframe timestaps (timestamp is a count in timebase + units) + """ + try: + with PathManager.open(video_fpath, "rb") as io: + container = av.open(io, mode="r") + stream = container.streams.video[video_stream_idx] + keyframes = [] + pts = -1 + # Note: even though we request forward seeks for keyframes, sometimes + # a keyframe in backwards direction is returned. We introduce tolerance + # as a max count of ignored backward seeks + tolerance_backward_seeks = 2 + while True: + try: + container.seek(pts + 1, backward=False, any_frame=False, stream=stream) + except av.AVError as e: + # the exception occurs when the video length is exceeded, + # we then return whatever data we've already collected + logger = logging.getLogger(__name__) + logger.debug( + f"List keyframes: Error seeking video file {video_fpath}, " + f"video stream {video_stream_idx}, pts {pts + 1}, AV error: {e}" + ) + return keyframes + except OSError as e: + logger = logging.getLogger(__name__) + logger.warning( + f"List keyframes: Error seeking video file {video_fpath}, " + f"video stream {video_stream_idx}, pts {pts + 1}, OS error: {e}" + ) + return [] + packet = next(container.demux(video=video_stream_idx)) + if packet.pts is not None and packet.pts <= pts: + logger = logging.getLogger(__name__) + logger.warning( + f"Video file {video_fpath}, stream {video_stream_idx}: " + f"bad seek for packet {pts + 1} (got packet {packet.pts}), " + f"tolerance {tolerance_backward_seeks}." + ) + tolerance_backward_seeks -= 1 + if tolerance_backward_seeks == 0: + return [] + pts += 1 + continue + tolerance_backward_seeks = 2 + pts = packet.pts + if pts is None: + return keyframes + if packet.is_keyframe: + keyframes.append(pts) + return keyframes + except OSError as e: + logger = logging.getLogger(__name__) + logger.warning( + f"List keyframes: Error opening video file container {video_fpath}, " f"OS error: {e}" + ) + except RuntimeError as e: + logger = logging.getLogger(__name__) + logger.warning( + f"List keyframes: Error opening video file container {video_fpath}, " + f"Runtime error: {e}" + ) + return [] + + +def read_keyframes( + video_fpath: str, keyframes: FrameTsList, video_stream_idx: int = 0 +) -> FrameList: + """ + Reads keyframe data from a video file. + + Args: + video_fpath (str): Video file path + keyframes (List[int]): List of keyframe timestamps (as counts in + timebase units to be used in container seek operations) + video_stream_idx (int): Video stream index (default: 0) + Returns: + List[Frame]: list of frames that correspond to the specified timestamps + """ + try: + with PathManager.open(video_fpath, "rb") as io: + container = av.open(io) + stream = container.streams.video[video_stream_idx] + frames = [] + for pts in keyframes: + try: + container.seek(pts, any_frame=False, stream=stream) + frame = next(container.decode(video=0)) + frames.append(frame) + except av.AVError as e: + logger = logging.getLogger(__name__) + logger.warning( + f"Read keyframes: Error seeking video file {video_fpath}, " + f"video stream {video_stream_idx}, pts {pts}, AV error: {e}" + ) + container.close() + return frames + except OSError as e: + logger = logging.getLogger(__name__) + logger.warning( + f"Read keyframes: Error seeking video file {video_fpath}, " + f"video stream {video_stream_idx}, pts {pts}, OS error: {e}" + ) + container.close() + return frames + except StopIteration: + logger = logging.getLogger(__name__) + logger.warning( + f"Read keyframes: Error decoding frame from {video_fpath}, " + f"video stream {video_stream_idx}, pts {pts}" + ) + container.close() + return frames + + container.close() + return frames + except OSError as e: + logger = logging.getLogger(__name__) + logger.warning( + f"Read keyframes: Error opening video file container {video_fpath}, OS error: {e}" + ) + except RuntimeError as e: + logger = logging.getLogger(__name__) + logger.warning( + f"Read keyframes: Error opening video file container {video_fpath}, Runtime error: {e}" + ) + return [] + + +def video_list_from_file(video_list_fpath: str, base_path: Optional[str] = None): + """ + Create a list of paths to video files from a text file. + + Args: + video_list_fpath (str): path to a plain text file with the list of videos + base_path (str): base path for entries from the video list (default: None) + """ + video_list = [] + with PathManager.open(video_list_fpath, "r") as io: + for line in io: + video_list.append(maybe_prepend_base_path(base_path, line.strip())) + return video_list + + +class VideoKeyframeDataset(Dataset): + """ + Dataset that provides keyframes for a set of videos. + """ + + _EMPTY_FRAMES = torch.empty((0, 3, 1, 1)) + + def __init__( + self, + video_list: List[str], + frame_selector: Optional[FrameSelector] = None, + transform: Optional[FrameTransform] = None, + ): + """ + Dataset constructor + + Args: + video_list (List[str]): list of paths to video files + frame_selector (Callable: KeyFrameList -> KeyFrameList): + selects keyframes to process, keyframes are given by + packet timestamps in timebase counts. If None, all keyframes + are selected (default: None) + transform (Callable: torch.Tensor -> torch.Tensor): + transforms a batch of RGB images (tensors of size [B, H, W, 3]), + returns a tensor of the same size. If None, no transform is + applied (default: None) + + """ + self.video_list = video_list + self.frame_selector = frame_selector + self.transform = transform + + def __getitem__(self, idx: int) -> torch.Tensor: + """ + Gets selected keyframes from a given video + + Args: + idx (int): video index in the video list file + Returns: + frames (torch.Tensor): tensor of size [N, H, W, 3] or of size + defined by the transform that contains keyframes data + """ + fpath = self.video_list[idx] + keyframes = list_keyframes(fpath) + if not keyframes: + return self._EMPTY_FRAMES + if self.frame_selector is not None: + keyframes = self.frame_selector(keyframes) + frames = read_keyframes(fpath, keyframes) + if not frames: + return self._EMPTY_FRAMES + frames = np.stack([frame.to_rgb().to_ndarray() for frame in frames]) + frames = torch.as_tensor(frames, device=torch.device("cpu")) + if self.transform is not None: + frames = self.transform(frames) + return frames + + def __len__(self): + return len(self.video_list) diff --git a/projects/DensePose/densepose/densepose_coco_evaluation.py b/projects/DensePose/densepose/densepose_coco_evaluation.py new file mode 100644 index 0000000..3faa0e5 --- /dev/null +++ b/projects/DensePose/densepose/densepose_coco_evaluation.py @@ -0,0 +1,1157 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# This is a modified version of cocoeval.py where we also have the densepose evaluation. + +__author__ = "tsungyi" + +import copy +import datetime +import logging +import numpy as np +import pickle +import time +from collections import defaultdict +from enum import Enum +from typing import Any, Dict, Tuple +import scipy.spatial.distance as ssd +from fvcore.common.file_io import PathManager +from pycocotools import mask as maskUtils +from scipy.io import loadmat +from scipy.ndimage import zoom as spzoom + +from .data.structures import DensePoseDataRelative, DensePoseResult + +logger = logging.getLogger(__name__) + + +class DensePoseEvalMode(str, Enum): + # use both masks and geodesic distances (GPS * IOU) to compute scores + GPSM = "gpsm" + # use only geodesic distances (GPS) to compute scores + GPS = "gps" + # use only masks (IOU) to compute scores + IOU = "iou" + + +class DensePoseDataMode(str, Enum): + # use estimated IUV data (default mode) + IUV_DT = "iuvdt" + # use ground truth IUV data + IUV_GT = "iuvgt" + # use ground truth labels I and set UV to 0 + I_GT_UV_0 = "igtuv0" + # use ground truth labels I and estimated UV coordinates + I_GT_UV_DT = "igtuvdt" + # use estimated labels I and set UV to 0 + I_DT_UV_0 = "idtuv0" + + +class DensePoseCocoEval(object): + # Interface for evaluating detection on the Microsoft COCO dataset. + # + # The usage for CocoEval is as follows: + # cocoGt=..., cocoDt=... # load dataset and results + # E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object + # E.params.recThrs = ...; # set parameters as desired + # E.evaluate(); # run per image evaluation + # E.accumulate(); # accumulate per image results + # E.summarize(); # display summary metrics of results + # For example usage see evalDemo.m and http://mscoco.org/. + # + # The evaluation parameters are as follows (defaults in brackets): + # imgIds - [all] N img ids to use for evaluation + # catIds - [all] K cat ids to use for evaluation + # iouThrs - [.5:.05:.95] T=10 IoU thresholds for evaluation + # recThrs - [0:.01:1] R=101 recall thresholds for evaluation + # areaRng - [...] A=4 object area ranges for evaluation + # maxDets - [1 10 100] M=3 thresholds on max detections per image + # iouType - ['segm'] set iouType to 'segm', 'bbox', 'keypoints' or 'densepose' + # iouType replaced the now DEPRECATED useSegm parameter. + # useCats - [1] if true use category labels for evaluation + # Note: if useCats=0 category labels are ignored as in proposal scoring. + # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified. + # + # evaluate(): evaluates detections on every image and every category and + # concats the results into the "evalImgs" with fields: + # dtIds - [1xD] id for each of the D detections (dt) + # gtIds - [1xG] id for each of the G ground truths (gt) + # dtMatches - [TxD] matching gt id at each IoU or 0 + # gtMatches - [TxG] matching dt id at each IoU or 0 + # dtScores - [1xD] confidence of each dt + # gtIgnore - [1xG] ignore flag for each gt + # dtIgnore - [TxD] ignore flag for each dt at each IoU + # + # accumulate(): accumulates the per-image, per-category evaluation + # results in "evalImgs" into the dictionary "eval" with fields: + # params - parameters used for evaluation + # date - date evaluation was performed + # counts - [T,R,K,A,M] parameter dimensions (see above) + # precision - [TxRxKxAxM] precision for every evaluation setting + # recall - [TxKxAxM] max recall for every evaluation setting + # Note: precision and recall==-1 for settings with no gt objects. + # + # See also coco, mask, pycocoDemo, pycocoEvalDemo + # + # Microsoft COCO Toolbox. version 2.0 + # Data, paper, and tutorials available at: http://mscoco.org/ + # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. + # Licensed under the Simplified BSD License [see coco/license.txt] + def __init__( + self, + cocoGt=None, + cocoDt=None, + iouType: str = "densepose", + dpEvalMode: DensePoseEvalMode = DensePoseEvalMode.GPS, + dpDataMode: DensePoseDataMode = DensePoseDataMode.IUV_DT, + ): + """ + Initialize CocoEval using coco APIs for gt and dt + :param cocoGt: coco object with ground truth annotations + :param cocoDt: coco object with detection results + :return: None + """ + self.cocoGt = cocoGt # ground truth COCO API + self.cocoDt = cocoDt # detections COCO API + self._dpEvalMode = dpEvalMode + self._dpDataMode = dpDataMode + self.params = {} # evaluation parameters + self.evalImgs = defaultdict(list) # per-image per-category eval results [KxAxI] + self.eval = {} # accumulated evaluation results + self._gts = defaultdict(list) # gt for evaluation + self._dts = defaultdict(list) # dt for evaluation + self.params = Params(iouType=iouType) # parameters + self._paramsEval = {} # parameters for evaluation + self.stats = [] # result summarization + self.ious = {} # ious between all gts and dts + if cocoGt is not None: + self.params.imgIds = sorted(cocoGt.getImgIds()) + self.params.catIds = sorted(cocoGt.getCatIds()) + self.ignoreThrBB = 0.7 + self.ignoreThrUV = 0.9 + + def _loadGEval(self): + smpl_subdiv_fpath = PathManager.get_local_path( + "https://dl.fbaipublicfiles.com/densepose/data/SMPL_subdiv.mat" + ) + pdist_transform_fpath = PathManager.get_local_path( + "https://dl.fbaipublicfiles.com/densepose/data/SMPL_SUBDIV_TRANSFORM.mat" + ) + pdist_matrix_fpath = PathManager.get_local_path( + "https://dl.fbaipublicfiles.com/densepose/data/Pdist_matrix.pkl", timeout_sec=120 + ) + SMPL_subdiv = loadmat(smpl_subdiv_fpath) + self.PDIST_transform = loadmat(pdist_transform_fpath) + self.PDIST_transform = self.PDIST_transform["index"].squeeze() + UV = np.array([SMPL_subdiv["U_subdiv"], SMPL_subdiv["V_subdiv"]]).squeeze() + ClosestVertInds = np.arange(UV.shape[1]) + 1 + self.Part_UVs = [] + self.Part_ClosestVertInds = [] + for i in np.arange(24): + self.Part_UVs.append(UV[:, SMPL_subdiv["Part_ID_subdiv"].squeeze() == (i + 1)]) + self.Part_ClosestVertInds.append( + ClosestVertInds[SMPL_subdiv["Part_ID_subdiv"].squeeze() == (i + 1)] + ) + + with open(pdist_matrix_fpath, "rb") as hFile: + arrays = pickle.load(hFile, encoding="latin1") + self.Pdist_matrix = arrays["Pdist_matrix"] + self.Part_ids = np.array(SMPL_subdiv["Part_ID_subdiv"].squeeze()) + # Mean geodesic distances for parts. + self.Mean_Distances = np.array([0, 0.351, 0.107, 0.126, 0.237, 0.173, 0.142, 0.128, 0.150]) + # Coarse Part labels. + self.CoarseParts = np.array( + [0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8] + ) + + def _prepare(self): + """ + Prepare ._gts and ._dts for evaluation based on params + :return: None + """ + + def _toMask(anns, coco): + # modify ann['segmentation'] by reference + for ann in anns: + # safeguard for invalid segmentation annotation; + # annotations containing empty lists exist in the posetrack + # dataset. This is not a correct segmentation annotation + # in terms of COCO format; we need to deal with it somehow + segm = ann["segmentation"] + if type(segm) == list and len(segm) == 0: + ann["segmentation"] = None + continue + rle = coco.annToRLE(ann) + ann["segmentation"] = rle + + def _getIgnoreRegion(iid, coco): + img = coco.imgs[iid] + + if "ignore_regions_x" not in img.keys(): + return None + + if len(img["ignore_regions_x"]) == 0: + return None + + rgns_merged = [ + [v for xy in zip(region_x, region_y) for v in xy] + for region_x, region_y in zip(img["ignore_regions_x"], img["ignore_regions_y"]) + ] + rles = maskUtils.frPyObjects(rgns_merged, img["height"], img["width"]) + rle = maskUtils.merge(rles) + return maskUtils.decode(rle) + + def _checkIgnore(dt, iregion): + if iregion is None: + return True + + bb = np.array(dt["bbox"]).astype(np.int) + x1, y1, x2, y2 = bb[0], bb[1], bb[0] + bb[2], bb[1] + bb[3] + x2 = min([x2, iregion.shape[1]]) + y2 = min([y2, iregion.shape[0]]) + + if bb[2] * bb[3] == 0: + return False + + crop_iregion = iregion[y1:y2, x1:x2] + + if crop_iregion.sum() == 0: + return True + + if "densepose" not in dt.keys(): # filtering boxes + return crop_iregion.sum() / bb[2] / bb[3] < self.ignoreThrBB + + # filtering UVs + ignoremask = np.require(crop_iregion, requirements=["F"]) + mask = self._extract_mask(dt) + uvmask = np.require(np.asarray(mask > 0), dtype=np.uint8, requirements=["F"]) + uvmask_ = maskUtils.encode(uvmask) + ignoremask_ = maskUtils.encode(ignoremask) + uviou = maskUtils.iou([uvmask_], [ignoremask_], [1])[0] + return uviou < self.ignoreThrUV + + p = self.params + + if p.useCats: + gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)) + dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)) + else: + gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds)) + dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds)) + + imns = self.cocoGt.loadImgs(p.imgIds) + self.size_mapping = {} + for im in imns: + self.size_mapping[im["id"]] = [im["height"], im["width"]] + + # if iouType == 'uv', add point gt annotations + if p.iouType == "densepose": + self._loadGEval() + + # convert ground truth to mask if iouType == 'segm' + if p.iouType == "segm": + _toMask(gts, self.cocoGt) + _toMask(dts, self.cocoDt) + + # set ignore flag + for gt in gts: + gt["ignore"] = gt["ignore"] if "ignore" in gt else 0 + gt["ignore"] = "iscrowd" in gt and gt["iscrowd"] + if p.iouType == "keypoints": + gt["ignore"] = (gt["num_keypoints"] == 0) or gt["ignore"] + if p.iouType == "densepose": + gt["ignore"] = ("dp_x" in gt) == 0 + if p.iouType == "segm": + gt["ignore"] = gt["segmentation"] is None + + self._gts = defaultdict(list) # gt for evaluation + self._dts = defaultdict(list) # dt for evaluation + self._igrgns = defaultdict(list) + + for gt in gts: + iid = gt["image_id"] + if iid not in self._igrgns.keys(): + self._igrgns[iid] = _getIgnoreRegion(iid, self.cocoGt) + if _checkIgnore(gt, self._igrgns[iid]): + self._gts[iid, gt["category_id"]].append(gt) + for dt in dts: + iid = dt["image_id"] + if (iid not in self._igrgns) or _checkIgnore(dt, self._igrgns[iid]): + self._dts[iid, dt["category_id"]].append(dt) + + self.evalImgs = defaultdict(list) # per-image per-category evaluation results + self.eval = {} # accumulated evaluation results + + def evaluate(self): + """ + Run per image evaluation on given images and store results (a list of dict) in self.evalImgs + :return: None + """ + tic = time.time() + logger.info("Running per image DensePose evaluation... {}".format(self.params.iouType)) + p = self.params + # add backward compatibility if useSegm is specified in params + if p.useSegm is not None: + p.iouType = "segm" if p.useSegm == 1 else "bbox" + logger.info("useSegm (deprecated) is not None. Running DensePose evaluation") + p.imgIds = list(np.unique(p.imgIds)) + if p.useCats: + p.catIds = list(np.unique(p.catIds)) + p.maxDets = sorted(p.maxDets) + self.params = p + + self._prepare() + # loop through images, area range, max detection number + catIds = p.catIds if p.useCats else [-1] + + if p.iouType in ["segm", "bbox"]: + computeIoU = self.computeIoU + elif p.iouType == "keypoints": + computeIoU = self.computeOks + elif p.iouType == "densepose": + computeIoU = self.computeOgps + if self._dpEvalMode == DensePoseEvalMode.GPSM: + self.real_ious = { + (imgId, catId): self.computeDPIoU(imgId, catId) + for imgId in p.imgIds + for catId in catIds + } + + self.ious = { + (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds + } + + evaluateImg = self.evaluateImg + maxDet = p.maxDets[-1] + self.evalImgs = [ + evaluateImg(imgId, catId, areaRng, maxDet) + for catId in catIds + for areaRng in p.areaRng + for imgId in p.imgIds + ] + self._paramsEval = copy.deepcopy(self.params) + toc = time.time() + logger.info("DensePose evaluation DONE (t={:0.2f}s).".format(toc - tic)) + + def getDensePoseMask(self, polys): + maskGen = np.zeros([256, 256]) + stop = min(len(polys) + 1, 15) + for i in range(1, stop): + if polys[i - 1]: + currentMask = maskUtils.decode(polys[i - 1]) + maskGen[currentMask > 0] = i + return maskGen + + def _generate_rlemask_on_image(self, mask, imgId, data): + bbox_xywh = np.array(data["bbox"]) + x, y, w, h = bbox_xywh + im_h, im_w = self.size_mapping[imgId] + im_mask = np.zeros((im_h, im_w), dtype=np.uint8) + if mask is not None: + x0 = max(int(x), 0) + x1 = min(int(x + w), im_w, int(x) + mask.shape[1]) + y0 = max(int(y), 0) + y1 = min(int(y + h), im_h, int(y) + mask.shape[0]) + y = int(y) + x = int(x) + im_mask[y0:y1, x0:x1] = mask[y0 - y : y1 - y, x0 - x : x1 - x] + im_mask = np.require(np.asarray(im_mask > 0), dtype=np.uint8, requirements=["F"]) + rle_mask = maskUtils.encode(np.array(im_mask[:, :, np.newaxis], order="F"))[0] + return rle_mask + + def computeDPIoU(self, imgId, catId): + p = self.params + if p.useCats: + gt = self._gts[imgId, catId] + dt = self._dts[imgId, catId] + else: + gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]] + dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]] + if len(gt) == 0 and len(dt) == 0: + return [] + inds = np.argsort([-d["score"] for d in dt], kind="mergesort") + dt = [dt[i] for i in inds] + if len(dt) > p.maxDets[-1]: + dt = dt[0 : p.maxDets[-1]] + + gtmasks = [] + for g in gt: + if DensePoseDataRelative.S_KEY in g: + # convert DensePose mask to a binary mask + mask = np.minimum(self.getDensePoseMask(g[DensePoseDataRelative.S_KEY]), 1.0) + _, _, w, h = g["bbox"] + scale_x = float(max(w, 1)) / mask.shape[1] + scale_y = float(max(h, 1)) / mask.shape[0] + mask = spzoom(mask, (scale_y, scale_x), order=1, prefilter=False) + mask = np.array(mask > 0.5, dtype=np.uint8) + rle_mask = self._generate_rlemask_on_image(mask, imgId, g) + elif "segmentation" in g: + segmentation = g["segmentation"] + if isinstance(segmentation, list) and segmentation: + # polygons + im_h, im_w = self.size_mapping[imgId] + rles = maskUtils.frPyObjects(segmentation, im_h, im_w) + rle_mask = maskUtils.merge(rles) + elif isinstance(segmentation, dict): + if isinstance(segmentation["counts"], list): + # uncompressed RLE + im_h, im_w = self.size_mapping[imgId] + rle_mask = maskUtils.frPyObjects(segmentation, im_h, im_w) + else: + # compressed RLE + rle_mask = segmentation + else: + rle_mask = self._generate_rlemask_on_image(None, imgId, g) + else: + rle_mask = self._generate_rlemask_on_image(None, imgId, g) + gtmasks.append(rle_mask) + + dtmasks = [] + for d in dt: + mask = self._extract_mask(d) + mask = np.require(np.asarray(mask > 0), dtype=np.uint8, requirements=["F"]) + rle_mask = self._generate_rlemask_on_image(mask, imgId, d) + dtmasks.append(rle_mask) + + # compute iou between each dt and gt region + iscrowd = [int(o["iscrowd"]) for o in gt] + iousDP = maskUtils.iou(dtmasks, gtmasks, iscrowd) + return iousDP + + def computeIoU(self, imgId, catId): + p = self.params + if p.useCats: + gt = self._gts[imgId, catId] + dt = self._dts[imgId, catId] + else: + gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]] + dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]] + if len(gt) == 0 and len(dt) == 0: + return [] + inds = np.argsort([-d["score"] for d in dt], kind="mergesort") + dt = [dt[i] for i in inds] + if len(dt) > p.maxDets[-1]: + dt = dt[0 : p.maxDets[-1]] + + if p.iouType == "segm": + g = [g["segmentation"] for g in gt if g["segmentation"] is not None] + d = [d["segmentation"] for d in dt if d["segmentation"] is not None] + elif p.iouType == "bbox": + g = [g["bbox"] for g in gt] + d = [d["bbox"] for d in dt] + else: + raise Exception("unknown iouType for iou computation") + + # compute iou between each dt and gt region + iscrowd = [int(o["iscrowd"]) for o in gt] + ious = maskUtils.iou(d, g, iscrowd) + return ious + + def computeOks(self, imgId, catId): + p = self.params + # dimension here should be Nxm + gts = self._gts[imgId, catId] + dts = self._dts[imgId, catId] + inds = np.argsort([-d["score"] for d in dts], kind="mergesort") + dts = [dts[i] for i in inds] + if len(dts) > p.maxDets[-1]: + dts = dts[0 : p.maxDets[-1]] + # if len(gts) == 0 and len(dts) == 0: + if len(gts) == 0 or len(dts) == 0: + return [] + ious = np.zeros((len(dts), len(gts))) + sigmas = ( + np.array( + [ + 0.26, + 0.25, + 0.25, + 0.35, + 0.35, + 0.79, + 0.79, + 0.72, + 0.72, + 0.62, + 0.62, + 1.07, + 1.07, + 0.87, + 0.87, + 0.89, + 0.89, + ] + ) + / 10.0 + ) + vars = (sigmas * 2) ** 2 + k = len(sigmas) + # compute oks between each detection and ground truth object + for j, gt in enumerate(gts): + # create bounds for ignore regions(double the gt bbox) + g = np.array(gt["keypoints"]) + xg = g[0::3] + yg = g[1::3] + vg = g[2::3] + k1 = np.count_nonzero(vg > 0) + bb = gt["bbox"] + x0 = bb[0] - bb[2] + x1 = bb[0] + bb[2] * 2 + y0 = bb[1] - bb[3] + y1 = bb[1] + bb[3] * 2 + for i, dt in enumerate(dts): + d = np.array(dt["keypoints"]) + xd = d[0::3] + yd = d[1::3] + if k1 > 0: + # measure the per-keypoint distance if keypoints visible + dx = xd - xg + dy = yd - yg + else: + # measure minimum distance to keypoints in (x0,y0) & (x1,y1) + z = np.zeros(k) + dx = np.max((z, x0 - xd), axis=0) + np.max((z, xd - x1), axis=0) + dy = np.max((z, y0 - yd), axis=0) + np.max((z, yd - y1), axis=0) + e = (dx ** 2 + dy ** 2) / vars / (gt["area"] + np.spacing(1)) / 2 + if k1 > 0: + e = e[vg > 0] + ious[i, j] = np.sum(np.exp(-e)) / e.shape[0] + return ious + + def _extract_mask(self, dt: Dict[str, Any]) -> np.ndarray: + (densepose_shape, densepose_data_encoded), densepose_bbox_xywh = dt["densepose"] + densepose_data = DensePoseResult.decode_png_data(densepose_shape, densepose_data_encoded) + return densepose_data[0] + + def _extract_iuv( + self, densepose_data: np.ndarray, py: np.ndarray, px: np.ndarray, gt: Dict[str, Any] + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """ + Extract arrays of I, U and V values at given points as numpy arrays + given the data mode stored in self._dpDataMode + """ + if self._dpDataMode == DensePoseDataMode.IUV_DT: + # estimated labels and UV (default) + ipoints = densepose_data[0, py, px] + upoints = densepose_data[1, py, px] / 255.0 # convert from uint8 by /255. + vpoints = densepose_data[2, py, px] / 255.0 + elif self._dpDataMode == DensePoseDataMode.IUV_GT: + # ground truth + ipoints = np.array(gt["dp_I"]) + upoints = np.array(gt["dp_U"]) + vpoints = np.array(gt["dp_V"]) + elif self._dpDataMode == DensePoseDataMode.I_GT_UV_0: + # ground truth labels, UV = 0 + ipoints = np.array(gt["dp_I"]) + upoints = upoints * 0.0 + vpoints = vpoints * 0.0 + elif self._dpDataMode == DensePoseDataMode.I_GT_UV_DT: + # ground truth labels, estimated UV + ipoints = np.array(gt["dp_I"]) + upoints = densepose_data[1, py, px] / 255.0 # convert from uint8 by /255. + vpoints = densepose_data[2, py, px] / 255.0 + elif self._dpDataMode == DensePoseDataMode.I_DT_UV_0: + # estimated labels, UV = 0 + ipoints = densepose_data[0, py, px] + upoints = upoints * 0.0 + vpoints = vpoints * 0.0 + else: + raise ValueError(f"Unknown data mode: {self._dpDataMode}") + return ipoints, upoints, vpoints + + def computeOgps(self, imgId, catId): + p = self.params + # dimension here should be Nxm + g = self._gts[imgId, catId] + d = self._dts[imgId, catId] + inds = np.argsort([-d_["score"] for d_ in d], kind="mergesort") + d = [d[i] for i in inds] + if len(d) > p.maxDets[-1]: + d = d[0 : p.maxDets[-1]] + # if len(gts) == 0 and len(dts) == 0: + if len(g) == 0 or len(d) == 0: + return [] + ious = np.zeros((len(d), len(g))) + # compute opgs between each detection and ground truth object + # sigma = self.sigma #0.255 # dist = 0.3m corresponds to ogps = 0.5 + # 1 # dist = 0.3m corresponds to ogps = 0.96 + # 1.45 # dist = 1.7m (person height) corresponds to ogps = 0.5) + for j, gt in enumerate(g): + if not gt["ignore"]: + g_ = gt["bbox"] + for i, dt in enumerate(d): + # + dy = int(dt["bbox"][3]) + dx = int(dt["bbox"][2]) + dp_x = np.array(gt["dp_x"]) * g_[2] / 255.0 + dp_y = np.array(gt["dp_y"]) * g_[3] / 255.0 + py = (dp_y + g_[1] - dt["bbox"][1]).astype(np.int) + px = (dp_x + g_[0] - dt["bbox"][0]).astype(np.int) + # + pts = np.zeros(len(px)) + pts[px >= dx] = -1 + pts[py >= dy] = -1 + pts[px < 0] = -1 + pts[py < 0] = -1 + if len(pts) < 1: + ogps = 0.0 + elif np.max(pts) == -1: + ogps = 0.0 + else: + px[pts == -1] = 0 + py[pts == -1] = 0 + (densepose_shape, densepose_data_encoded), densepose_bbox_xywh = dt[ + "densepose" + ] + densepose_data = DensePoseResult.decode_png_data( + densepose_shape, densepose_data_encoded + ) + assert densepose_data.shape[2] == dx, ( + "DensePoseData width {} should be equal to " + "detection bounding box width {}".format(densepose_data.shape[2], dx) + ) + assert densepose_data.shape[1] == dy, ( + "DensePoseData height {} should be equal to " + "detection bounding box height {}".format(densepose_data.shape[1], dy) + ) + ipoints, upoints, vpoints = self._extract_iuv(densepose_data, py, px, gt) + ipoints[pts == -1] = 0 + # Find closest vertices in subsampled mesh. + cVerts, cVertsGT = self.findAllClosestVerts(gt, upoints, vpoints, ipoints) + # Get pairwise geodesic distances between gt and estimated mesh points. + dist = self.getDistances(cVertsGT, cVerts) + # Compute the Ogps measure. + # Find the mean geodesic normalization distance for + # each GT point, based on which part it is on. + Current_Mean_Distances = self.Mean_Distances[ + self.CoarseParts[self.Part_ids[cVertsGT[cVertsGT > 0].astype(int) - 1]] + ] + # Compute gps + ogps_values = np.exp(-(dist ** 2) / (2 * (Current_Mean_Distances ** 2))) + # + if len(dist) > 0: + ogps = np.sum(ogps_values) / len(dist) + ious[i, j] = ogps + + gbb = [gt["bbox"] for gt in g] + dbb = [dt["bbox"] for dt in d] + + # compute iou between each dt and gt region + iscrowd = [int(o["iscrowd"]) for o in g] + ious_bb = maskUtils.iou(dbb, gbb, iscrowd) + return ious, ious_bb + + def evaluateImg(self, imgId, catId, aRng, maxDet): + """ + perform evaluation for single category and image + :return: dict (single image results) + """ + + p = self.params + if p.useCats: + gt = self._gts[imgId, catId] + dt = self._dts[imgId, catId] + else: + gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]] + dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]] + if len(gt) == 0 and len(dt) == 0: + return None + + for g in gt: + # g['_ignore'] = g['ignore'] + if g["ignore"] or (g["area"] < aRng[0] or g["area"] > aRng[1]): + g["_ignore"] = True + else: + g["_ignore"] = False + + # sort dt highest score first, sort gt ignore last + gtind = np.argsort([g["_ignore"] for g in gt], kind="mergesort") + gt = [gt[i] for i in gtind] + dtind = np.argsort([-d["score"] for d in dt], kind="mergesort") + dt = [dt[i] for i in dtind[0:maxDet]] + iscrowd = [int(o["iscrowd"]) for o in gt] + # load computed ious + if p.iouType == "densepose": + # print('Checking the length', len(self.ious[imgId, catId])) + # if len(self.ious[imgId, catId]) == 0: + # print(self.ious[imgId, catId]) + ious = ( + self.ious[imgId, catId][0][:, gtind] + if len(self.ious[imgId, catId]) > 0 + else self.ious[imgId, catId] + ) + ioubs = ( + self.ious[imgId, catId][1][:, gtind] + if len(self.ious[imgId, catId]) > 0 + else self.ious[imgId, catId] + ) + if self._dpEvalMode == DensePoseEvalMode.GPSM: + iousM = ( + self.real_ious[imgId, catId][:, gtind] + if len(self.real_ious[imgId, catId]) > 0 + else self.real_ious[imgId, catId] + ) + else: + ious = ( + self.ious[imgId, catId][:, gtind] + if len(self.ious[imgId, catId]) > 0 + else self.ious[imgId, catId] + ) + + T = len(p.iouThrs) + G = len(gt) + D = len(dt) + gtm = np.zeros((T, G)) + dtm = np.zeros((T, D)) + gtIg = np.array([g["_ignore"] for g in gt]) + dtIg = np.zeros((T, D)) + if np.all(gtIg) and p.iouType == "densepose": + dtIg = np.logical_or(dtIg, True) + + if len(ious) > 0: # and not p.iouType == 'densepose': + for tind, t in enumerate(p.iouThrs): + for dind, d in enumerate(dt): + # information about best match so far (m=-1 -> unmatched) + iou = min([t, 1 - 1e-10]) + m = -1 + for gind, _g in enumerate(gt): + # if this gt already matched, and not a crowd, continue + if gtm[tind, gind] > 0 and not iscrowd[gind]: + continue + # if dt matched to reg gt, and on ignore gt, stop + if m > -1 and gtIg[m] == 0 and gtIg[gind] == 1: + break + if p.iouType == "densepose": + if self._dpEvalMode == DensePoseEvalMode.GPSM: + new_iou = np.sqrt(iousM[dind, gind] * ious[dind, gind]) + elif self._dpEvalMode == DensePoseEvalMode.IOU: + new_iou = iousM[dind, gind] + elif self._dpEvalMode == DensePoseEvalMode.GPS: + new_iou = ious[dind, gind] + else: + new_iou = ious[dind, gind] + if new_iou < iou: + continue + if new_iou == 0.0: + continue + # if match successful and best so far, store appropriately + iou = new_iou + m = gind + # if match made store id of match for both dt and gt + if m == -1: + continue + dtIg[tind, dind] = gtIg[m] + dtm[tind, dind] = gt[m]["id"] + gtm[tind, m] = d["id"] + + if p.iouType == "densepose": + if not len(ioubs) == 0: + for dind, d in enumerate(dt): + # information about best match so far (m=-1 -> unmatched) + if dtm[tind, dind] == 0: + ioub = 0.8 + m = -1 + for gind, _g in enumerate(gt): + # if this gt already matched, and not a crowd, continue + if gtm[tind, gind] > 0 and not iscrowd[gind]: + continue + # continue to next gt unless better match made + if ioubs[dind, gind] < ioub: + continue + # if match successful and best so far, store appropriately + ioub = ioubs[dind, gind] + m = gind + # if match made store id of match for both dt and gt + if m > -1: + dtIg[:, dind] = gtIg[m] + if gtIg[m]: + dtm[tind, dind] = gt[m]["id"] + gtm[tind, m] = d["id"] + # set unmatched detections outside of area range to ignore + a = np.array([d["area"] < aRng[0] or d["area"] > aRng[1] for d in dt]).reshape((1, len(dt))) + dtIg = np.logical_or(dtIg, np.logical_and(dtm == 0, np.repeat(a, T, 0))) + # store results for given image and category + # print('Done with the function', len(self.ious[imgId, catId])) + return { + "image_id": imgId, + "category_id": catId, + "aRng": aRng, + "maxDet": maxDet, + "dtIds": [d["id"] for d in dt], + "gtIds": [g["id"] for g in gt], + "dtMatches": dtm, + "gtMatches": gtm, + "dtScores": [d["score"] for d in dt], + "gtIgnore": gtIg, + "dtIgnore": dtIg, + } + + def accumulate(self, p=None): + """ + Accumulate per image evaluation results and store the result in self.eval + :param p: input params for evaluation + :return: None + """ + logger.info("Accumulating evaluation results...") + tic = time.time() + if not self.evalImgs: + logger.info("Please run evaluate() first") + # allows input customized parameters + if p is None: + p = self.params + p.catIds = p.catIds if p.useCats == 1 else [-1] + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -(np.ones((T, R, K, A, M))) # -1 for the precision of absent categories + recall = -(np.ones((T, K, A, M))) + + # create dictionary for future indexing + logger.info("Categories: {}".format(p.catIds)) + _pe = self._paramsEval + catIds = _pe.catIds if _pe.useCats else [-1] + setK = set(catIds) + setA = set(map(tuple, _pe.areaRng)) + setM = set(_pe.maxDets) + setI = set(_pe.imgIds) + # get inds to evaluate + k_list = [n for n, k in enumerate(p.catIds) if k in setK] + m_list = [m for n, m in enumerate(p.maxDets) if m in setM] + a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + I0 = len(_pe.imgIds) + A0 = len(_pe.areaRng) + # retrieve E at each category, area range, and max number of detections + for k, k0 in enumerate(k_list): + Nk = k0 * A0 * I0 + for a, a0 in enumerate(a_list): + Na = a0 * I0 + for m, maxDet in enumerate(m_list): + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if e is not None] + if len(E) == 0: + continue + dtScores = np.concatenate([e["dtScores"][0:maxDet] for e in E]) + + # different sorting method generates slightly different results. + # mergesort is used to be consistent as Matlab implementation. + inds = np.argsort(-dtScores, kind="mergesort") + + dtm = np.concatenate([e["dtMatches"][:, 0:maxDet] for e in E], axis=1)[:, inds] + dtIg = np.concatenate([e["dtIgnore"][:, 0:maxDet] for e in E], axis=1)[:, inds] + gtIg = np.concatenate([e["gtIgnore"] for e in E]) + npig = np.count_nonzero(gtIg == 0) + if npig == 0: + continue + tps = np.logical_and(dtm, np.logical_not(dtIg)) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg)) + tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float) + fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float) + for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): + tp = np.array(tp) + fp = np.array(fp) + nd = len(tp) + rc = tp / npig + pr = tp / (fp + tp + np.spacing(1)) + q = np.zeros((R,)) + + if nd: + recall[t, k, a, m] = rc[-1] + else: + recall[t, k, a, m] = 0 + + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist() + q = q.tolist() + + for i in range(nd - 1, 0, -1): + if pr[i] > pr[i - 1]: + pr[i - 1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side="left") + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + except Exception: + pass + precision[t, :, k, a, m] = np.array(q) + logger.info( + "Final: max precision {}, min precision {}".format(np.max(precision), np.min(precision)) + ) + self.eval = { + "params": p, + "counts": [T, R, K, A, M], + "date": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "precision": precision, + "recall": recall, + } + toc = time.time() + logger.info("DONE (t={:0.2f}s).".format(toc - tic)) + + def summarize(self): + """ + Compute and display summary metrics for evaluation results. + Note this function can *only* be applied on the default parameter setting + """ + + def _summarize(ap=1, iouThr=None, areaRng="all", maxDets=100): + p = self.params + iStr = " {:<18} {} @[ {}={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}" + titleStr = "Average Precision" if ap == 1 else "Average Recall" + typeStr = "(AP)" if ap == 1 else "(AR)" + measure = "IoU" + if self.params.iouType == "keypoints": + measure = "OKS" + elif self.params.iouType == "densepose": + measure = "OGPS" + iouStr = ( + "{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1]) + if iouThr is None + else "{:0.2f}".format(iouThr) + ) + + aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] + mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] + if ap == 1: + # dimension of precision: [TxRxKxAxM] + s = self.eval["precision"] + # IoU + if iouThr is not None: + t = np.where(np.abs(iouThr - p.iouThrs) < 0.001)[0] + s = s[t] + s = s[:, :, :, aind, mind] + else: + # dimension of recall: [TxKxAxM] + s = self.eval["recall"] + if iouThr is not None: + t = np.where(np.abs(iouThr - p.iouThrs) < 0.001)[0] + s = s[t] + s = s[:, :, aind, mind] + if len(s[s > -1]) == 0: + mean_s = -1 + else: + mean_s = np.mean(s[s > -1]) + logger.info(iStr.format(titleStr, typeStr, measure, iouStr, areaRng, maxDets, mean_s)) + return mean_s + + def _summarizeDets(): + stats = np.zeros((12,)) + stats[0] = _summarize(1) + stats[1] = _summarize(1, iouThr=0.5, maxDets=self.params.maxDets[2]) + stats[2] = _summarize(1, iouThr=0.75, maxDets=self.params.maxDets[2]) + stats[3] = _summarize(1, areaRng="small", maxDets=self.params.maxDets[2]) + stats[4] = _summarize(1, areaRng="medium", maxDets=self.params.maxDets[2]) + stats[5] = _summarize(1, areaRng="large", maxDets=self.params.maxDets[2]) + stats[6] = _summarize(0, maxDets=self.params.maxDets[0]) + stats[7] = _summarize(0, maxDets=self.params.maxDets[1]) + stats[8] = _summarize(0, maxDets=self.params.maxDets[2]) + stats[9] = _summarize(0, areaRng="small", maxDets=self.params.maxDets[2]) + stats[10] = _summarize(0, areaRng="medium", maxDets=self.params.maxDets[2]) + stats[11] = _summarize(0, areaRng="large", maxDets=self.params.maxDets[2]) + return stats + + def _summarizeKps(): + stats = np.zeros((10,)) + stats[0] = _summarize(1, maxDets=20) + stats[1] = _summarize(1, maxDets=20, iouThr=0.5) + stats[2] = _summarize(1, maxDets=20, iouThr=0.75) + stats[3] = _summarize(1, maxDets=20, areaRng="medium") + stats[4] = _summarize(1, maxDets=20, areaRng="large") + stats[5] = _summarize(0, maxDets=20) + stats[6] = _summarize(0, maxDets=20, iouThr=0.5) + stats[7] = _summarize(0, maxDets=20, iouThr=0.75) + stats[8] = _summarize(0, maxDets=20, areaRng="medium") + stats[9] = _summarize(0, maxDets=20, areaRng="large") + return stats + + def _summarizeUvs(): + stats = [_summarize(1, maxDets=self.params.maxDets[0])] + min_threshold = self.params.iouThrs.min() + if min_threshold <= 0.201: + stats += [_summarize(1, maxDets=self.params.maxDets[0], iouThr=0.2)] + if min_threshold <= 0.301: + stats += [_summarize(1, maxDets=self.params.maxDets[0], iouThr=0.3)] + if min_threshold <= 0.401: + stats += [_summarize(1, maxDets=self.params.maxDets[0], iouThr=0.4)] + stats += [ + _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.5), + _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.75), + _summarize(1, maxDets=self.params.maxDets[0], areaRng="medium"), + _summarize(1, maxDets=self.params.maxDets[0], areaRng="large"), + _summarize(0, maxDets=self.params.maxDets[0]), + _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.5), + _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.75), + _summarize(0, maxDets=self.params.maxDets[0], areaRng="medium"), + _summarize(0, maxDets=self.params.maxDets[0], areaRng="large"), + ] + return np.array(stats) + + def _summarizeUvsOld(): + stats = np.zeros((18,)) + stats[0] = _summarize(1, maxDets=self.params.maxDets[0]) + stats[1] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.5) + stats[2] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.55) + stats[3] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.60) + stats[4] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.65) + stats[5] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.70) + stats[6] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.75) + stats[7] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.80) + stats[8] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.85) + stats[9] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.90) + stats[10] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.95) + stats[11] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="medium") + stats[12] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="large") + stats[13] = _summarize(0, maxDets=self.params.maxDets[0]) + stats[14] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.5) + stats[15] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.75) + stats[16] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="medium") + stats[17] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="large") + return stats + + if not self.eval: + raise Exception("Please run accumulate() first") + iouType = self.params.iouType + if iouType in ["segm", "bbox"]: + summarize = _summarizeDets + elif iouType in ["keypoints"]: + summarize = _summarizeKps + elif iouType in ["densepose"]: + summarize = _summarizeUvs + self.stats = summarize() + + def __str__(self): + self.summarize() + + # ================ functions for dense pose ============================== + def findAllClosestVerts(self, gt, U_points, V_points, Index_points): + # + I_gt = np.array(gt["dp_I"]) + U_gt = np.array(gt["dp_U"]) + V_gt = np.array(gt["dp_V"]) + # + # print(I_gt) + # + ClosestVerts = np.ones(Index_points.shape) * -1 + for i in np.arange(24): + # + if sum(Index_points == (i + 1)) > 0: + UVs = np.array( + [U_points[Index_points == (i + 1)], V_points[Index_points == (i + 1)]] + ) + Current_Part_UVs = self.Part_UVs[i] + Current_Part_ClosestVertInds = self.Part_ClosestVertInds[i] + D = ssd.cdist(Current_Part_UVs.transpose(), UVs.transpose()).squeeze() + ClosestVerts[Index_points == (i + 1)] = Current_Part_ClosestVertInds[ + np.argmin(D, axis=0) + ] + # + ClosestVertsGT = np.ones(Index_points.shape) * -1 + for i in np.arange(24): + if sum(I_gt == (i + 1)) > 0: + UVs = np.array([U_gt[I_gt == (i + 1)], V_gt[I_gt == (i + 1)]]) + Current_Part_UVs = self.Part_UVs[i] + Current_Part_ClosestVertInds = self.Part_ClosestVertInds[i] + D = ssd.cdist(Current_Part_UVs.transpose(), UVs.transpose()).squeeze() + ClosestVertsGT[I_gt == (i + 1)] = Current_Part_ClosestVertInds[np.argmin(D, axis=0)] + # + return ClosestVerts, ClosestVertsGT + + def getDistances(self, cVertsGT, cVerts): + + ClosestVertsTransformed = self.PDIST_transform[cVerts.astype(int) - 1] + ClosestVertsGTTransformed = self.PDIST_transform[cVertsGT.astype(int) - 1] + # + ClosestVertsTransformed[cVerts < 0] = 0 + ClosestVertsGTTransformed[cVertsGT < 0] = 0 + # + cVertsGT = ClosestVertsGTTransformed + cVerts = ClosestVertsTransformed + # + n = 27554 + dists = [] + for d in range(len(cVertsGT)): + if cVertsGT[d] > 0: + if cVerts[d] > 0: + i = cVertsGT[d] - 1 + j = cVerts[d] - 1 + if j == i: + dists.append(0) + elif j > i: + ccc = i + i = j + j = ccc + i = n - i - 1 + j = n - j - 1 + k = (n * (n - 1) / 2) - (n - i) * ((n - i) - 1) / 2 + j - i - 1 + k = (n * n - n) / 2 - k - 1 + dists.append(self.Pdist_matrix[int(k)][0]) + else: + i = n - i - 1 + j = n - j - 1 + k = (n * (n - 1) / 2) - (n - i) * ((n - i) - 1) / 2 + j - i - 1 + k = (n * n - n) / 2 - k - 1 + dists.append(self.Pdist_matrix[int(k)][0]) + else: + dists.append(np.inf) + return np.atleast_1d(np.array(dists).squeeze()) + + +class Params: + """ + Params for coco evaluation api + """ + + def setDetParams(self): + self.imgIds = [] + self.catIds = [] + # np.arange causes trouble. the data point on arange is slightly larger than the true value + self.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True) + self.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True) + self.maxDets = [1, 10, 100] + self.areaRng = [ + [0 ** 2, 1e5 ** 2], + [0 ** 2, 32 ** 2], + [32 ** 2, 96 ** 2], + [96 ** 2, 1e5 ** 2], + ] + self.areaRngLbl = ["all", "small", "medium", "large"] + self.useCats = 1 + + def setKpParams(self): + self.imgIds = [] + self.catIds = [] + # np.arange causes trouble. the data point on arange is slightly larger than the true value + self.iouThrs = np.linspace(0.5, 0.95, np.round((0.95 - 0.5) / 0.05) + 1, endpoint=True) + self.recThrs = np.linspace(0.0, 1.00, np.round((1.00 - 0.0) / 0.01) + 1, endpoint=True) + self.maxDets = [20] + self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]] + self.areaRngLbl = ["all", "medium", "large"] + self.useCats = 1 + + def setUvParams(self): + self.imgIds = [] + self.catIds = [] + self.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True) + self.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True) + self.maxDets = [20] + self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]] + self.areaRngLbl = ["all", "medium", "large"] + self.useCats = 1 + + def __init__(self, iouType="segm"): + if iouType == "segm" or iouType == "bbox": + self.setDetParams() + elif iouType == "keypoints": + self.setKpParams() + elif iouType == "densepose": + self.setUvParams() + else: + raise Exception("iouType not supported") + self.iouType = iouType + # useSegm is deprecated + self.useSegm = None diff --git a/projects/DensePose/densepose/engine/__init__.py b/projects/DensePose/densepose/engine/__init__.py new file mode 100644 index 0000000..d73edeb --- /dev/null +++ b/projects/DensePose/densepose/engine/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +from .trainer import Trainer diff --git a/projects/DensePose/densepose/engine/trainer.py b/projects/DensePose/densepose/engine/trainer.py new file mode 100644 index 0000000..1d1d270 --- /dev/null +++ b/projects/DensePose/densepose/engine/trainer.py @@ -0,0 +1,118 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import logging +import os +from collections import OrderedDict + +from detectron2.checkpoint import DetectionCheckpointer +from detectron2.config import CfgNode +from detectron2.engine import DefaultTrainer +from detectron2.evaluation import COCOEvaluator, DatasetEvaluators +from detectron2.utils.events import EventWriter, get_event_storage + +from densepose import ( + DensePoseCOCOEvaluator, + DensePoseDatasetMapperTTA, + DensePoseGeneralizedRCNNWithTTA, + load_from_cfg, +) +from densepose.data import ( + DatasetMapper, + build_combined_loader, + build_detection_test_loader, + build_detection_train_loader, + build_inference_based_loaders, + has_inference_based_loaders, +) + + +class SampleCountingLoader: + def __init__(self, loader): + self.loader = loader + + def __iter__(self): + it = iter(self.loader) + storage = get_event_storage() + while True: + try: + batch = next(it) + num_inst_per_dataset = {} + for data in batch: + dataset_name = data["dataset"] + if dataset_name not in num_inst_per_dataset: + num_inst_per_dataset[dataset_name] = 0 + num_inst = len(data["instances"]) + num_inst_per_dataset[dataset_name] += num_inst + for dataset_name in num_inst_per_dataset: + storage.put_scalar(f"batch/{dataset_name}", num_inst_per_dataset[dataset_name]) + yield batch + except StopIteration: + break + + +class SampleCountMetricPrinter(EventWriter): + def __init__(self): + self.logger = logging.getLogger(__name__) + + def write(self): + storage = get_event_storage() + batch_stats_strs = [] + for key, buf in storage.histories().items(): + if key.startswith("batch/"): + batch_stats_strs.append(f"{key} {buf.avg(20)}") + self.logger.info(", ".join(batch_stats_strs)) + + +class Trainer(DefaultTrainer): + @classmethod + def build_evaluator(cls, cfg: CfgNode, dataset_name, output_folder=None): + if output_folder is None: + output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") + evaluators = [COCOEvaluator(dataset_name, cfg, True, output_folder)] + if cfg.MODEL.DENSEPOSE_ON: + evaluators.append(DensePoseCOCOEvaluator(dataset_name, True, output_folder)) + return DatasetEvaluators(evaluators) + + @classmethod + def build_test_loader(cls, cfg: CfgNode, dataset_name): + return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False)) + + @classmethod + def build_train_loader(cls, cfg: CfgNode): + data_loader = build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True)) + if not has_inference_based_loaders(cfg): + return data_loader + model = cls.build_model(cfg) + model.to(cfg.BOOTSTRAP_MODEL.DEVICE) + DetectionCheckpointer(model).resume_or_load(cfg.BOOTSTRAP_MODEL.WEIGHTS, resume=False) + inference_based_loaders, ratios = build_inference_based_loaders(cfg, model) + loaders = [data_loader] + inference_based_loaders + ratios = [1.0] + ratios + combined_data_loader = build_combined_loader(cfg, loaders, ratios) + sample_counting_loader = SampleCountingLoader(combined_data_loader) + return sample_counting_loader + + def build_writers(self): + writers = super().build_writers() + writers.append(SampleCountMetricPrinter()) + return writers + + @classmethod + def test_with_TTA(cls, cfg: CfgNode, model): + logger = logging.getLogger("detectron2.trainer") + # In the end of training, run an evaluation with TTA + # Only support some R-CNN models. + logger.info("Running inference with test-time augmentation ...") + transform_data = load_from_cfg(cfg) + model = DensePoseGeneralizedRCNNWithTTA( + cfg, model, transform_data, DensePoseDatasetMapperTTA(cfg) + ) + evaluators = [ + cls.build_evaluator( + cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA") + ) + for name in cfg.DATASETS.TEST + ] + res = cls.test(cfg, model, evaluators) + res = OrderedDict({k + "_TTA": v for k, v in res.items()}) + return res diff --git a/projects/DensePose/densepose/evaluator.py b/projects/DensePose/densepose/evaluator.py new file mode 100644 index 0000000..da78f76 --- /dev/null +++ b/projects/DensePose/densepose/evaluator.py @@ -0,0 +1,224 @@ +# -*- coding: utf-8 -*- +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import contextlib +import copy +import io +import itertools +import logging +import numpy as np +import os +from collections import OrderedDict +import pycocotools.mask as mask_utils +import torch +from fvcore.common.file_io import PathManager +from pycocotools.coco import COCO + +from detectron2.data import MetadataCatalog +from detectron2.evaluation import DatasetEvaluator +from detectron2.structures import BoxMode +from detectron2.utils.comm import all_gather, is_main_process, synchronize +from detectron2.utils.logger import create_small_table + +from .data.samplers import densepose_to_mask +from .densepose_coco_evaluation import DensePoseCocoEval, DensePoseEvalMode + + +class DensePoseCOCOEvaluator(DatasetEvaluator): + def __init__(self, dataset_name, distributed, output_dir=None): + self._distributed = distributed + self._output_dir = output_dir + + self._cpu_device = torch.device("cpu") + self._logger = logging.getLogger(__name__) + + self._metadata = MetadataCatalog.get(dataset_name) + self._min_threshold = 0.5 + json_file = PathManager.get_local_path(self._metadata.json_file) + with contextlib.redirect_stdout(io.StringIO()): + self._coco_api = COCO(json_file) + + def reset(self): + self._predictions = [] + + def process(self, inputs, outputs): + """ + Args: + inputs: the inputs to a COCO model (e.g., GeneralizedRCNN). + It is a list of dict. Each dict corresponds to an image and + contains keys like "height", "width", "file_name", "image_id". + outputs: the outputs of a COCO model. It is a list of dicts with key + "instances" that contains :class:`Instances`. + The :class:`Instances` object needs to have `densepose` field. + """ + for input, output in zip(inputs, outputs): + instances = output["instances"].to(self._cpu_device) + + json_results = prediction_to_json(instances, input["image_id"]) + self._predictions.extend(json_results) + + def evaluate(self): + if self._distributed: + synchronize() + predictions = all_gather(self._predictions) + predictions = list(itertools.chain(*predictions)) + if not is_main_process(): + return + else: + predictions = self._predictions + + return copy.deepcopy(self._eval_predictions(predictions)) + + def _eval_predictions(self, predictions): + """ + Evaluate predictions on densepose. + Return results with the metrics of the tasks. + """ + self._logger.info("Preparing results for COCO format ...") + + if self._output_dir: + PathManager.mkdirs(self._output_dir) + file_path = os.path.join(self._output_dir, "coco_densepose_predictions.pth") + with PathManager.open(file_path, "wb") as f: + torch.save(predictions, f) + + self._logger.info("Evaluating predictions ...") + res = OrderedDict() + results_gps, results_gpsm, results_segm = _evaluate_predictions_on_coco( + self._coco_api, predictions, min_threshold=self._min_threshold + ) + res["densepose_gps"] = results_gps + res["densepose_gpsm"] = results_gpsm + res["densepose_segm"] = results_segm + return res + + +def prediction_to_json(instances, img_id): + """ + Args: + instances (Instances): the output of the model + img_id (str): the image id in COCO + + Returns: + list[dict]: the results in densepose evaluation format + """ + scores = instances.scores.tolist() + segmentations = densepose_to_mask(instances) + + boxes = instances.pred_boxes.tensor.clone() + boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) + instances.pred_densepose = instances.pred_densepose.to_result(boxes) + + results = [] + for k in range(len(instances)): + densepose = instances.pred_densepose[k] + segmentation = segmentations.tensor[k] + segmentation_encoded = mask_utils.encode( + np.require(segmentation.numpy(), dtype=np.uint8, requirements=["F"]) + ) + segmentation_encoded["counts"] = segmentation_encoded["counts"].decode("utf-8") + result = { + "image_id": img_id, + "category_id": 1, # densepose only has one class + "bbox": densepose[1], + "score": scores[k], + "densepose": densepose, + "segmentation": segmentation_encoded, + } + results.append(result) + return results + + +def _evaluate_predictions_on_coco(coco_gt, coco_results, min_threshold=0.5): + logger = logging.getLogger(__name__) + + segm_metrics = _get_segmentation_metrics() + densepose_metrics = _get_densepose_metrics(min_threshold) + if len(coco_results) == 0: # cocoapi does not handle empty results very well + logger.warn("No predictions from the model! Set scores to -1") + results_gps = {metric: -1 for metric in densepose_metrics} + results_gpsm = {metric: -1 for metric in densepose_metrics} + results_segm = {metric: -1 for metric in segm_metrics} + return results_gps, results_gpsm, results_segm + + coco_dt = coco_gt.loadRes(coco_results) + results_segm = _evaluate_predictions_on_coco_segm(coco_gt, coco_dt, segm_metrics, min_threshold) + logger.info("Evaluation results for densepose segm: \n" + create_small_table(results_segm)) + results_gps = _evaluate_predictions_on_coco_gps( + coco_gt, coco_dt, densepose_metrics, min_threshold + ) + logger.info( + "Evaluation results for densepose, GPS metric: \n" + create_small_table(results_gps) + ) + results_gpsm = _evaluate_predictions_on_coco_gpsm( + coco_gt, coco_dt, densepose_metrics, min_threshold + ) + logger.info( + "Evaluation results for densepose, GPSm metric: \n" + create_small_table(results_gpsm) + ) + return results_gps, results_gpsm, results_segm + + +def _get_densepose_metrics(min_threshold=0.5): + metrics = ["AP"] + if min_threshold <= 0.201: + metrics += ["AP20"] + if min_threshold <= 0.301: + metrics += ["AP30"] + if min_threshold <= 0.401: + metrics += ["AP40"] + metrics.extend(["AP50", "AP75", "APm", "APl", "AR", "AR50", "AR75", "ARm", "ARl"]) + return metrics + + +def _get_segmentation_metrics(): + return [ + "AP", + "AP50", + "AP75", + "APs", + "APm", + "APl", + "AR@1", + "AR@10", + "AR@100", + "ARs", + "ARm", + "ARl", + ] + + +def _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics, min_threshold=0.5): + coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPS) + coco_eval.params.iouThrs = np.linspace( + min_threshold, 0.95, int(np.round((0.95 - min_threshold) / 0.05)) + 1, endpoint=True + ) + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)} + return results + + +def _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics, min_threshold=0.5): + coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPSM) + coco_eval.params.iouThrs = np.linspace( + min_threshold, 0.95, int(np.round((0.95 - min_threshold) / 0.05)) + 1, endpoint=True + ) + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)} + return results + + +def _evaluate_predictions_on_coco_segm(coco_gt, coco_dt, metrics, min_threshold=0.5): + coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "segm") + coco_eval.params.iouThrs = np.linspace( + min_threshold, 0.95, int(np.round((0.95 - min_threshold) / 0.05)) + 1, endpoint=True + ) + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)} + return results diff --git a/projects/DensePose/densepose/modeling/build.py b/projects/DensePose/densepose/modeling/build.py new file mode 100644 index 0000000..43f3bf2 --- /dev/null +++ b/projects/DensePose/densepose/modeling/build.py @@ -0,0 +1,66 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +from detectron2.config import CfgNode + +from .filter import DensePoseDataFilter +from .losses import DensePoseLosses +from .predictors import DensePoseChartWithConfidencePredictor + + +def build_densepose_predictor(cfg: CfgNode, input_channels: int): + """ + Create an instance of DensePose predictor based on configuration options. + + Args: + cfg (CfgNode): configuration options + input_channels (int): input tensor size along the channel dimension + Return: + An instance of DensePose predictor + """ + predictor = DensePoseChartWithConfidencePredictor(cfg, input_channels) + return predictor + + +def build_densepose_data_filter(cfg: CfgNode): + """ + Build DensePose data filter which selects data for training + + Args: + cfg (CfgNode): configuration options + + Return: + Callable: list(Tensor), list(Instances) -> list(Tensor), list(Instances) + An instance of DensePose filter, which takes feature tensors and proposals + as an input and returns filtered features and proposals + """ + dp_filter = DensePoseDataFilter(cfg) + return dp_filter + + +def build_densepose_head(cfg: CfgNode, input_channels: int): + """ + Build DensePose head based on configurations options + + Args: + cfg (CfgNode): configuration options + input_channels (int): input tensor size along the channel dimension + Return: + An instance of DensePose head + """ + from .roi_heads.registry import ROI_DENSEPOSE_HEAD_REGISTRY + + head_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.NAME + return ROI_DENSEPOSE_HEAD_REGISTRY.get(head_name)(cfg, input_channels) + + +def build_densepose_losses(cfg: CfgNode): + """ + Build DensePose loss based on configurations options + + Args: + cfg (CfgNode): configuration options + Return: + An instance of DensePose loss + """ + losses = DensePoseLosses(cfg) + return losses diff --git a/projects/DensePose/densepose/modeling/confidence.py b/projects/DensePose/densepose/modeling/confidence.py new file mode 100644 index 0000000..5195d20 --- /dev/null +++ b/projects/DensePose/densepose/modeling/confidence.py @@ -0,0 +1,73 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +from dataclasses import dataclass +from enum import Enum + +from detectron2.config import CfgNode + + +class DensePoseUVConfidenceType(Enum): + """ + Statistical model type for confidence learning, possible values: + - "iid_iso": statistically independent identically distributed residuals + with anisotropic covariance + - "indep_aniso": statistically independent residuals with anisotropic + covariances + For details, see: + N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning + Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019 + """ + + # fmt: off + IID_ISO = "iid_iso" + INDEP_ANISO = "indep_aniso" + # fmt: on + + +@dataclass +class DensePoseUVConfidenceConfig: + """ + Configuration options for confidence on UV data + """ + + enabled: bool = False + # lower bound on UV confidences + epsilon: float = 0.01 + type: DensePoseUVConfidenceType = DensePoseUVConfidenceType.IID_ISO + + +@dataclass +class DensePoseSegmConfidenceConfig: + """ + Configuration options for confidence on segmentation + """ + + enabled: bool = False + # lower bound on confidence values + epsilon: float = 0.01 + + +@dataclass +class DensePoseConfidenceModelConfig: + """ + Configuration options for confidence models + """ + + # confidence for U and V values + uv_confidence: DensePoseUVConfidenceConfig + # segmentation confidence + segm_confidence: DensePoseSegmConfidenceConfig + + @staticmethod + def from_cfg(cfg: CfgNode) -> "DensePoseConfidenceModelConfig": + return DensePoseConfidenceModelConfig( + uv_confidence=DensePoseUVConfidenceConfig( + enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.ENABLED, + epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON, + type=DensePoseUVConfidenceType(cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE), + ), + segm_confidence=DensePoseSegmConfidenceConfig( + enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.ENABLED, + epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON, + ), + ) diff --git a/projects/DensePose/densepose/modeling/densepose_checkpoint.py b/projects/DensePose/densepose/modeling/densepose_checkpoint.py new file mode 100644 index 0000000..d2beed2 --- /dev/null +++ b/projects/DensePose/densepose/modeling/densepose_checkpoint.py @@ -0,0 +1,35 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +from collections import OrderedDict + +from detectron2.checkpoint import DetectionCheckpointer + + +def _rename_HRNet_weights(weights): + # We detect and rename HRNet weights for DensePose. 1956 and 1716 are values that are + # common to all HRNet pretrained weights, and should be enough to accurately identify them + if ( + len(weights["model"].keys()) == 1956 + and len([k for k in weights["model"].keys() if k.startswith("stage")]) == 1716 + ): + hrnet_weights = OrderedDict() + for k in weights["model"].keys(): + hrnet_weights["backbone.bottom_up." + str(k)] = weights["model"][k] + return {"model": hrnet_weights} + else: + return weights + + +class DensePoseCheckpointer(DetectionCheckpointer): + """ + Same as :class:`DetectionCheckpointer`, but is able to handle HRNet weights + """ + + def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables): + super().__init__(model, save_dir, save_to_disk=save_to_disk, **checkpointables) + + def _load_file(self, filename: str) -> object: + """ + Adding hrnet support + """ + weights = super()._load_file(filename) + return _rename_HRNet_weights(weights) diff --git a/projects/DensePose/densepose/modeling/filter.py b/projects/DensePose/densepose/modeling/filter.py new file mode 100644 index 0000000..5628d94 --- /dev/null +++ b/projects/DensePose/densepose/modeling/filter.py @@ -0,0 +1,94 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +from typing import List +import torch + +from detectron2.config import CfgNode +from detectron2.structures import Instances +from detectron2.structures.boxes import matched_boxlist_iou + + +class DensePoseDataFilter(object): + def __init__(self, cfg: CfgNode): + self.iou_threshold = cfg.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD + self.keep_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS + + @torch.no_grad() + def __call__(self, features: List[torch.Tensor], proposals_with_targets: List[Instances]): + """ + Filters proposals with targets to keep only the ones relevant for + DensePose training + + Args: + features (list[Tensor]): input data as a list of features, + each feature is a tensor. Axis 0 represents the number of + images `N` in the input data; axes 1-3 are channels, + height, and width, which may vary between features + (e.g., if a feature pyramid is used). + proposals_with_targets (list[Instances]): length `N` list of + `Instances`. The i-th `Instances` contains instances + (proposals, GT) for the i-th input image, + Returns: + list[Tensor]: filtered features + list[Instances]: filtered proposals + """ + proposals_filtered = [] + # TODO: the commented out code was supposed to correctly deal with situations + # where no valid DensePose GT is available for certain images. The corresponding + # image features were sliced and proposals were filtered. This led to performance + # deterioration, both in terms of runtime and in terms of evaluation results. + # + # feature_mask = torch.ones( + # len(proposals_with_targets), + # dtype=torch.bool, + # device=features[0].device if len(features) > 0 else torch.device("cpu"), + # ) + for i, proposals_per_image in enumerate(proposals_with_targets): + if not proposals_per_image.has("gt_densepose") and ( + not proposals_per_image.has("gt_masks") or not self.keep_masks + ): + # feature_mask[i] = 0 + continue + gt_boxes = proposals_per_image.gt_boxes + est_boxes = proposals_per_image.proposal_boxes + # apply match threshold for densepose head + iou = matched_boxlist_iou(gt_boxes, est_boxes) + iou_select = iou > self.iou_threshold + proposals_per_image = proposals_per_image[iou_select] + + N_gt_boxes = len(proposals_per_image.gt_boxes) + assert N_gt_boxes == len(proposals_per_image.proposal_boxes), ( + f"The number of GT boxes {N_gt_boxes} is different from the " + f"number of proposal boxes {len(proposals_per_image.proposal_boxes)}" + ) + # filter out any target without suitable annotation + if self.keep_masks: + gt_masks = ( + proposals_per_image.gt_masks + if hasattr(proposals_per_image, "gt_masks") + else [None] * N_gt_boxes + ) + else: + gt_masks = [None] * N_gt_boxes + gt_densepose = ( + proposals_per_image.gt_densepose + if hasattr(proposals_per_image, "gt_densepose") + else [None] * N_gt_boxes + ) + assert len(gt_masks) == N_gt_boxes + assert len(gt_densepose) == N_gt_boxes + selected_indices = [ + i + for i, (dp_target, mask_target) in enumerate(zip(gt_densepose, gt_masks)) + if (dp_target is not None) or (mask_target is not None) + ] + # if not len(selected_indices): + # feature_mask[i] = 0 + # continue + if len(selected_indices) != N_gt_boxes: + proposals_per_image = proposals_per_image[selected_indices] + assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes) + proposals_filtered.append(proposals_per_image) + # features_filtered = [feature[feature_mask] for feature in features] + # return features_filtered, proposals_filtered + return features, proposals_filtered diff --git a/projects/DensePose/densepose/modeling/hrfpn.py b/projects/DensePose/densepose/modeling/hrfpn.py new file mode 100644 index 0000000..ddc1c19 --- /dev/null +++ b/projects/DensePose/densepose/modeling/hrfpn.py @@ -0,0 +1,181 @@ +""" +MIT License +Copyright (c) 2019 Microsoft +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from detectron2.layers import ShapeSpec +from detectron2.modeling.backbone import BACKBONE_REGISTRY +from detectron2.modeling.backbone.backbone import Backbone + +from .hrnet import build_pose_hrnet_backbone + + +class HRFPN(Backbone): + """ HRFPN (High Resolution Feature Pyramids) + Transforms outputs of HRNet backbone so they are suitable for the ROI_heads + arXiv: https://arxiv.org/abs/1904.04514 + Adapted from https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/necks/hrfpn.py + Args: + bottom_up: (list) output of HRNet + in_features (list): names of the input features (output of HRNet) + in_channels (list): number of channels for each branch + out_channels (int): output channels of feature pyramids + n_out_features (int): number of output stages + pooling (str): pooling for generating feature pyramids (from {MAX, AVG}) + share_conv (bool): Have one conv per output, or share one with all the outputs + """ + + def __init__( + self, + bottom_up, + in_features, + n_out_features, + in_channels, + out_channels, + pooling="AVG", + share_conv=False, + ): + super(HRFPN, self).__init__() + assert isinstance(in_channels, list) + self.bottom_up = bottom_up + self.in_features = in_features + self.n_out_features = n_out_features + self.in_channels = in_channels + self.out_channels = out_channels + self.num_ins = len(in_channels) + self.share_conv = share_conv + + if self.share_conv: + self.fpn_conv = nn.Conv2d( + in_channels=out_channels, out_channels=out_channels, kernel_size=3, padding=1 + ) + else: + self.fpn_conv = nn.ModuleList() + for _ in range(self.n_out_features): + self.fpn_conv.append( + nn.Conv2d( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + padding=1, + ) + ) + + # Custom change: Replaces a simple bilinear interpolation + self.interp_conv = nn.ModuleList() + for i in range(len(self.in_features)): + self.interp_conv.append( + nn.Sequential( + nn.ConvTranspose2d( + in_channels=in_channels[i], + out_channels=in_channels[i], + kernel_size=4, + stride=2 ** i, + padding=0, + output_padding=0, + bias=False, + ), + nn.BatchNorm2d(in_channels[i], momentum=0.1), + nn.ReLU(inplace=True), + ) + ) + + # Custom change: Replaces a couple (reduction conv + pooling) by one conv + self.reduction_pooling_conv = nn.ModuleList() + for i in range(self.n_out_features): + self.reduction_pooling_conv.append( + nn.Sequential( + nn.Conv2d(sum(in_channels), out_channels, kernel_size=2 ** i, stride=2 ** i), + nn.BatchNorm2d(out_channels, momentum=0.1), + nn.ReLU(inplace=True), + ) + ) + + if pooling == "MAX": + self.pooling = F.max_pool2d + else: + self.pooling = F.avg_pool2d + + self._out_features = [] + self._out_feature_channels = {} + self._out_feature_strides = {} + + for i in range(self.n_out_features): + self._out_features.append("p%d" % (i + 1)) + self._out_feature_channels.update({self._out_features[-1]: self.out_channels}) + self._out_feature_strides.update({self._out_features[-1]: 2 ** (i + 2)}) + + # default init_weights for conv(msra) and norm in ConvModule + def init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, a=1) + nn.init.constant_(m.bias, 0) + + def forward(self, inputs): + bottom_up_features = self.bottom_up(inputs) + assert len(bottom_up_features) == len(self.in_features) + inputs = [bottom_up_features[f] for f in self.in_features] + + outs = [] + for i in range(len(inputs)): + outs.append(self.interp_conv[i](inputs[i])) + shape_2 = min(o.shape[2] for o in outs) + shape_3 = min(o.shape[3] for o in outs) + out = torch.cat([o[:, :, :shape_2, :shape_3] for o in outs], dim=1) + outs = [] + for i in range(self.n_out_features): + outs.append(self.reduction_pooling_conv[i](out)) + for i in range(len(outs)): # Make shapes consistent + outs[-1 - i] = outs[-1 - i][ + :, :, : outs[-1].shape[2] * 2 ** i, : outs[-1].shape[3] * 2 ** i + ] + outputs = [] + for i in range(len(outs)): + if self.share_conv: + outputs.append(self.fpn_conv(outs[i])) + else: + outputs.append(self.fpn_conv[i](outs[i])) + + assert len(self._out_features) == len(outputs) + return dict(zip(self._out_features, outputs)) + + +@BACKBONE_REGISTRY.register() +def build_hrfpn_backbone(cfg, input_shape: ShapeSpec): + + in_channels = cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS + in_features = ["p%d" % (i + 1) for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES)] + n_out_features = len(cfg.MODEL.ROI_HEADS.IN_FEATURES) + out_channels = cfg.MODEL.HRNET.HRFPN.OUT_CHANNELS + hrnet = build_pose_hrnet_backbone(cfg, input_shape) + hrfpn = HRFPN( + hrnet, + in_features, + n_out_features, + in_channels, + out_channels, + pooling="AVG", + share_conv=False, + ) + + return hrfpn diff --git a/projects/DensePose/densepose/modeling/hrnet.py b/projects/DensePose/densepose/modeling/hrnet.py new file mode 100644 index 0000000..acaa92d --- /dev/null +++ b/projects/DensePose/densepose/modeling/hrnet.py @@ -0,0 +1,473 @@ +# ------------------------------------------------------------------------------ +# Copyright (c) Microsoft +# Licensed under the MIT License. +# Written by Bin Xiao (leoxiaobin@gmail.com) +# Modified by Bowen Cheng (bcheng9@illinois.edu) +# Adapted from https://github.com/HRNet/Higher-HRNet-Human-Pose-Estimation/blob/master/lib/models/pose_higher_hrnet.py # noqa +# ------------------------------------------------------------------------------ + +from __future__ import absolute_import, division, print_function +import logging +import torch.nn as nn + +from detectron2.layers import ShapeSpec +from detectron2.modeling.backbone import BACKBONE_REGISTRY +from detectron2.modeling.backbone.backbone import Backbone + +BN_MOMENTUM = 0.1 +logger = logging.getLogger(__name__) + +__all__ = ["build_pose_hrnet_backbone", "PoseHigherResolutionNet"] + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) + self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class HighResolutionModule(nn.Module): + """ HighResolutionModule + Building block of the PoseHigherResolutionNet (see lower) + arXiv: https://arxiv.org/abs/1908.10357 + Args: + num_branches (int): number of branches of the modyle + blocks (str): type of block of the module + num_blocks (int): number of blocks of the module + num_inchannels (int): number of input channels of the module + num_channels (list): number of channels of each branch + multi_scale_output (bool): only used by the last module of PoseHigherResolutionNet + """ + + def __init__( + self, + num_branches, + blocks, + num_blocks, + num_inchannels, + num_channels, + multi_scale_output=True, + ): + super(HighResolutionModule, self).__init__() + self._check_branches(num_branches, blocks, num_blocks, num_inchannels, num_channels) + + self.num_inchannels = num_inchannels + self.num_branches = num_branches + + self.multi_scale_output = multi_scale_output + + self.branches = self._make_branches(num_branches, blocks, num_blocks, num_channels) + self.fuse_layers = self._make_fuse_layers() + self.relu = nn.ReLU(True) + + def _check_branches(self, num_branches, blocks, num_blocks, num_inchannels, num_channels): + if num_branches != len(num_blocks): + error_msg = "NUM_BRANCHES({}) <> NUM_BLOCKS({})".format(num_branches, len(num_blocks)) + logger.error(error_msg) + raise ValueError(error_msg) + + if num_branches != len(num_channels): + error_msg = "NUM_BRANCHES({}) <> NUM_CHANNELS({})".format( + num_branches, len(num_channels) + ) + logger.error(error_msg) + raise ValueError(error_msg) + + if num_branches != len(num_inchannels): + error_msg = "NUM_BRANCHES({}) <> NUM_INCHANNELS({})".format( + num_branches, len(num_inchannels) + ) + logger.error(error_msg) + raise ValueError(error_msg) + + def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1): + downsample = None + if ( + stride != 1 + or self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion + ): + downsample = nn.Sequential( + nn.Conv2d( + self.num_inchannels[branch_index], + num_channels[branch_index] * block.expansion, + kernel_size=1, + stride=stride, + bias=False, + ), + nn.BatchNorm2d(num_channels[branch_index] * block.expansion, momentum=BN_MOMENTUM), + ) + + layers = [] + layers.append( + block(self.num_inchannels[branch_index], num_channels[branch_index], stride, downsample) + ) + self.num_inchannels[branch_index] = num_channels[branch_index] * block.expansion + for _ in range(1, num_blocks[branch_index]): + layers.append(block(self.num_inchannels[branch_index], num_channels[branch_index])) + + return nn.Sequential(*layers) + + def _make_branches(self, num_branches, block, num_blocks, num_channels): + branches = [] + + for i in range(num_branches): + branches.append(self._make_one_branch(i, block, num_blocks, num_channels)) + + return nn.ModuleList(branches) + + def _make_fuse_layers(self): + if self.num_branches == 1: + return None + + num_branches = self.num_branches + num_inchannels = self.num_inchannels + fuse_layers = [] + for i in range(num_branches if self.multi_scale_output else 1): + fuse_layer = [] + for j in range(num_branches): + if j > i: + fuse_layer.append( + nn.Sequential( + nn.Conv2d(num_inchannels[j], num_inchannels[i], 1, 1, 0, bias=False), + nn.BatchNorm2d(num_inchannels[i]), + nn.Upsample(scale_factor=2 ** (j - i), mode="nearest"), + ) + ) + elif j == i: + fuse_layer.append(None) + else: + conv3x3s = [] + for k in range(i - j): + if k == i - j - 1: + num_outchannels_conv3x3 = num_inchannels[i] + conv3x3s.append( + nn.Sequential( + nn.Conv2d( + num_inchannels[j], + num_outchannels_conv3x3, + 3, + 2, + 1, + bias=False, + ), + nn.BatchNorm2d(num_outchannels_conv3x3), + ) + ) + else: + num_outchannels_conv3x3 = num_inchannels[j] + conv3x3s.append( + nn.Sequential( + nn.Conv2d( + num_inchannels[j], + num_outchannels_conv3x3, + 3, + 2, + 1, + bias=False, + ), + nn.BatchNorm2d(num_outchannels_conv3x3), + nn.ReLU(True), + ) + ) + fuse_layer.append(nn.Sequential(*conv3x3s)) + fuse_layers.append(nn.ModuleList(fuse_layer)) + + return nn.ModuleList(fuse_layers) + + def get_num_inchannels(self): + return self.num_inchannels + + def forward(self, x): + if self.num_branches == 1: + return [self.branches[0](x[0])] + + for i in range(self.num_branches): + x[i] = self.branches[i](x[i]) + + x_fuse = [] + + for i in range(len(self.fuse_layers)): + y = x[0] if i == 0 else self.fuse_layers[i][0](x[0]) + for j in range(1, self.num_branches): + if i == j: + y = y + x[j] + else: + z = self.fuse_layers[i][j](x[j])[:, :, : y.shape[2], : y.shape[3]] + y = y + z + x_fuse.append(self.relu(y)) + + return x_fuse + + +blocks_dict = {"BASIC": BasicBlock, "BOTTLENECK": Bottleneck} + + +class PoseHigherResolutionNet(Backbone): + """ PoseHigherResolutionNet + Composed of several HighResolutionModule tied together with ConvNets + Adapted from the GitHub version to fit with HRFPN and the Detectron2 infrastructure + arXiv: https://arxiv.org/abs/1908.10357 + """ + + def __init__(self, cfg, **kwargs): + self.inplanes = cfg.MODEL.HRNET.STEM_INPLANES + super(PoseHigherResolutionNet, self).__init__() + + # stem net + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) + self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) + self.relu = nn.ReLU(inplace=True) + self.layer1 = self._make_layer(Bottleneck, 64, 4) + + self.stage2_cfg = cfg.MODEL.HRNET.STAGE2 + num_channels = self.stage2_cfg.NUM_CHANNELS + block = blocks_dict[self.stage2_cfg.BLOCK] + num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))] + self.transition1 = self._make_transition_layer([256], num_channels) + self.stage2, pre_stage_channels = self._make_stage(self.stage2_cfg, num_channels) + + self.stage3_cfg = cfg.MODEL.HRNET.STAGE3 + num_channels = self.stage3_cfg.NUM_CHANNELS + block = blocks_dict[self.stage3_cfg.BLOCK] + num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))] + self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels) + self.stage3, pre_stage_channels = self._make_stage(self.stage3_cfg, num_channels) + + self.stage4_cfg = cfg.MODEL.HRNET.STAGE4 + num_channels = self.stage4_cfg.NUM_CHANNELS + block = blocks_dict[self.stage4_cfg.BLOCK] + num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))] + self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels) + self.stage4, pre_stage_channels = self._make_stage( + self.stage4_cfg, num_channels, multi_scale_output=True + ) + + self._out_features = [] + self._out_feature_channels = {} + self._out_feature_strides = {} + + for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES): + self._out_features.append("p%d" % (i + 1)) + self._out_feature_channels.update( + {self._out_features[-1]: cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS[i]} + ) + self._out_feature_strides.update({self._out_features[-1]: 1}) + + def _get_deconv_cfg(self, deconv_kernel): + if deconv_kernel == 4: + padding = 1 + output_padding = 0 + elif deconv_kernel == 3: + padding = 1 + output_padding = 1 + elif deconv_kernel == 2: + padding = 0 + output_padding = 0 + + return deconv_kernel, padding, output_padding + + def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer): + num_branches_cur = len(num_channels_cur_layer) + num_branches_pre = len(num_channels_pre_layer) + + transition_layers = [] + for i in range(num_branches_cur): + if i < num_branches_pre: + if num_channels_cur_layer[i] != num_channels_pre_layer[i]: + transition_layers.append( + nn.Sequential( + nn.Conv2d( + num_channels_pre_layer[i], + num_channels_cur_layer[i], + 3, + 1, + 1, + bias=False, + ), + nn.BatchNorm2d(num_channels_cur_layer[i]), + nn.ReLU(inplace=True), + ) + ) + else: + transition_layers.append(None) + else: + conv3x3s = [] + for j in range(i + 1 - num_branches_pre): + inchannels = num_channels_pre_layer[-1] + outchannels = ( + num_channels_cur_layer[i] if j == i - num_branches_pre else inchannels + ) + conv3x3s.append( + nn.Sequential( + nn.Conv2d(inchannels, outchannels, 3, 2, 1, bias=False), + nn.BatchNorm2d(outchannels), + nn.ReLU(inplace=True), + ) + ) + transition_layers.append(nn.Sequential(*conv3x3s)) + + return nn.ModuleList(transition_layers) + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + self.inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False, + ), + nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def _make_stage(self, layer_config, num_inchannels, multi_scale_output=True): + num_modules = layer_config["NUM_MODULES"] + num_branches = layer_config["NUM_BRANCHES"] + num_blocks = layer_config["NUM_BLOCKS"] + num_channels = layer_config["NUM_CHANNELS"] + block = blocks_dict[layer_config["BLOCK"]] + + modules = [] + for i in range(num_modules): + # multi_scale_output is only used last module + if not multi_scale_output and i == num_modules - 1: + reset_multi_scale_output = False + else: + reset_multi_scale_output = True + + modules.append( + HighResolutionModule( + num_branches, + block, + num_blocks, + num_inchannels, + num_channels, + reset_multi_scale_output, + ) + ) + num_inchannels = modules[-1].get_num_inchannels() + + return nn.Sequential(*modules), num_inchannels + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.bn2(x) + x = self.relu(x) + x = self.layer1(x) + + x_list = [] + for i in range(self.stage2_cfg.NUM_BRANCHES): + if self.transition1[i] is not None: + x_list.append(self.transition1[i](x)) + else: + x_list.append(x) + y_list = self.stage2(x_list) + + x_list = [] + for i in range(self.stage3_cfg.NUM_BRANCHES): + if self.transition2[i] is not None: + x_list.append(self.transition2[i](y_list[-1])) + else: + x_list.append(y_list[i]) + y_list = self.stage3(x_list) + + x_list = [] + for i in range(self.stage4_cfg.NUM_BRANCHES): + if self.transition3[i] is not None: + x_list.append(self.transition3[i](y_list[-1])) + else: + x_list.append(y_list[i]) + y_list = self.stage4(x_list) + + assert len(self._out_features) == len(y_list) + return dict(zip(self._out_features, y_list)) # final_outputs + + +@BACKBONE_REGISTRY.register() +def build_pose_hrnet_backbone(cfg, input_shape: ShapeSpec): + model = PoseHigherResolutionNet(cfg) + return model diff --git a/projects/DensePose/densepose/modeling/inference.py b/projects/DensePose/densepose/modeling/inference.py new file mode 100644 index 0000000..77f093d --- /dev/null +++ b/projects/DensePose/densepose/modeling/inference.py @@ -0,0 +1,83 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +from typing import List, Tuple +import torch + +from detectron2.structures import Instances + +from ..data.structures import DensePoseOutput + + +def densepose_inference( + densepose_outputs: Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor], + densepose_confidences: Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor], + detections: List[Instances], +): + """ + Infer dense pose estimate based on outputs from the DensePose head + and detections. The estimate for each detection instance is stored in its + "pred_densepose" attribute. + + Args: + densepose_outputs (tuple(`torch.Tensor`)): iterable containing 4 elements: + - s (:obj: `torch.Tensor`): coarse segmentation tensor of size (N, A, H, W), + - i (:obj: `torch.Tensor`): fine segmentation tensor of size (N, C, H, W), + - u (:obj: `torch.Tensor`): U coordinates for each class of size (N, C, H, W), + - v (:obj: `torch.Tensor`): V coordinates for each class of size (N, C, H, W), + where N is the total number of detections in a batch, + A is the number of coarse segmentations labels + (e.g. 15 for coarse body parts + background), + C is the number of fine segmentation labels + (e.g. 25 for fine body parts + background), + W is the resolution along the X axis + H is the resolution along the Y axis + densepose_confidences (tuple(`torch.Tensor`)): iterable containing 4 elements: + - sigma_1 (:obj: `torch.Tensor`): global confidences for UV coordinates + of size (N, C, H, W) + - sigma_2 (:obj: `torch.Tensor`): individual confidences for UV coordinates + of size (N, C, H, W) + - kappa_u (:obj: `torch.Tensor`): first component of confidence direction + vector of size (N, C, H, W) + - kappa_v (:obj: `torch.Tensor`): second component of confidence direction + vector of size (N, C, H, W) + - fine_segm_confidence (:obj: `torch.Tensor`): confidence for fine + segmentation of size (N, 1, H, W) + - coarse_segm_confidence (:obj: `torch.Tensor`): confidence for coarse + segmentation of size (N, 1, H, W) + detections (list[Instances]): A list of N Instances, where N is the number of images + in the batch. Instances are modified by this method: "pred_densepose" attribute + is added to each instance, the attribute contains the corresponding + DensePoseOutput object. + """ + # DensePose outputs: segmentation, body part indices, U, V + s, index_uv, u, v = densepose_outputs + ( + sigma_1, + sigma_2, + kappa_u, + kappa_v, + fine_segm_confidence, + coarse_segm_confidence, + ) = densepose_confidences + k = 0 + for detection in detections: + n_i = len(detection) + s_i = s[k : k + n_i] + index_uv_i = index_uv[k : k + n_i] + u_i = u[k : k + n_i] + v_i = v[k : k + n_i] + _local_vars = locals() + confidences = { + name: _local_vars[name][k : k + n_i] + for name in ( + "sigma_1", + "sigma_2", + "kappa_u", + "kappa_v", + "fine_segm_confidence", + "coarse_segm_confidence", + ) + if _local_vars.get(name) is not None + } + densepose_output_i = DensePoseOutput(s_i, index_uv_i, u_i, v_i, confidences) + detection.pred_densepose = densepose_output_i + k += n_i diff --git a/projects/DensePose/densepose/modeling/losses/__init__.py b/projects/DensePose/densepose/modeling/losses/__init__.py new file mode 100644 index 0000000..47e8298 --- /dev/null +++ b/projects/DensePose/densepose/modeling/losses/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +from .densepose_losses import DensePoseLosses diff --git a/projects/DensePose/densepose/modeling/losses/densepose_losses.py b/projects/DensePose/densepose/modeling/losses/densepose_losses.py new file mode 100644 index 0000000..cc205b4 --- /dev/null +++ b/projects/DensePose/densepose/modeling/losses/densepose_losses.py @@ -0,0 +1,729 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +import math +from dataclasses import dataclass +from typing import Iterable, Optional +import torch +from torch import nn +from torch.nn import functional as F + +from detectron2.structures import Instances + +from .. import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType + + +def _linear_interpolation_utilities(v_norm, v0_src, size_src, v0_dst, size_dst, size_z): + """ + Computes utility values for linear interpolation at points v. + The points are given as normalized offsets in the source interval + (v0_src, v0_src + size_src), more precisely: + v = v0_src + v_norm * size_src / 256.0 + The computed utilities include lower points v_lo, upper points v_hi, + interpolation weights v_w and flags j_valid indicating whether the + points falls into the destination interval (v0_dst, v0_dst + size_dst). + + Args: + v_norm (:obj: `torch.Tensor`): tensor of size N containing + normalized point offsets + v0_src (:obj: `torch.Tensor`): tensor of size N containing + left bounds of source intervals for normalized points + size_src (:obj: `torch.Tensor`): tensor of size N containing + source interval sizes for normalized points + v0_dst (:obj: `torch.Tensor`): tensor of size N containing + left bounds of destination intervals + size_dst (:obj: `torch.Tensor`): tensor of size N containing + destination interval sizes + size_z (int): interval size for data to be interpolated + + Returns: + v_lo (:obj: `torch.Tensor`): int tensor of size N containing + indices of lower values used for interpolation, all values are + integers from [0, size_z - 1] + v_hi (:obj: `torch.Tensor`): int tensor of size N containing + indices of upper values used for interpolation, all values are + integers from [0, size_z - 1] + v_w (:obj: `torch.Tensor`): float tensor of size N containing + interpolation weights + j_valid (:obj: `torch.Tensor`): uint8 tensor of size N containing + 0 for points outside the estimation interval + (v0_est, v0_est + size_est) and 1 otherwise + """ + v = v0_src + v_norm * size_src / 256.0 + j_valid = (v - v0_dst >= 0) * (v - v0_dst < size_dst) + v_grid = (v - v0_dst) * size_z / size_dst + v_lo = v_grid.floor().long().clamp(min=0, max=size_z - 1) + v_hi = (v_lo + 1).clamp(max=size_z - 1) + v_grid = torch.min(v_hi.float(), v_grid) + v_w = v_grid - v_lo.float() + return v_lo, v_hi, v_w, j_valid + + +class SingleTensorsHelper: + def __init__(self, proposals_with_gt): + + with torch.no_grad(): + ( + index_uv_img, + i_with_dp, + bbox_xywh_est, + bbox_xywh_gt, + index_gt_all, + x_norm, + y_norm, + u_gt_all, + v_gt_all, + s_gt, + index_bbox, + ) = _extract_single_tensors_from_matches(proposals_with_gt) + + for k, v in locals().items(): + if k not in ["self", "proposals_with_gt"]: + setattr(self, k, v) + + +class BilinearInterpolationHelper: + """ + Args: + tensors_helper (SingleTensorsHelper) + j_valid (:obj: `torch.Tensor`): uint8 tensor of size M containing + 0 for points to be discarded and 1 for points to be selected + y_lo (:obj: `torch.Tensor`): int tensor of indices of upper values + in z_est for each point + y_hi (:obj: `torch.Tensor`): int tensor of indices of lower values + in z_est for each point + x_lo (:obj: `torch.Tensor`): int tensor of indices of left values + in z_est for each point + x_hi (:obj: `torch.Tensor`): int tensor of indices of right values + in z_est for each point + w_ylo_xlo (:obj: `torch.Tensor`): float tensor of size M; + contains upper-left value weight for each point + w_ylo_xhi (:obj: `torch.Tensor`): float tensor of size M; + contains upper-right value weight for each point + w_yhi_xlo (:obj: `torch.Tensor`): float tensor of size M; + contains lower-left value weight for each point + w_yhi_xhi (:obj: `torch.Tensor`): float tensor of size M; + contains lower-right value weight for each point + """ + + def __init__( + self, + tensors_helper, + j_valid, + y_lo, + y_hi, + x_lo, + x_hi, + w_ylo_xlo, + w_ylo_xhi, + w_yhi_xlo, + w_yhi_xhi, + ): + for k, v in locals().items(): + if k != "self": + setattr(self, k, v) + + @staticmethod + def from_matches(tensors_helper, densepose_outputs_size): + + zh, zw = densepose_outputs_size[2], densepose_outputs_size[3] + + x0_gt, y0_gt, w_gt, h_gt = tensors_helper.bbox_xywh_gt[tensors_helper.index_bbox].unbind(1) + x0_est, y0_est, w_est, h_est = tensors_helper.bbox_xywh_est[ + tensors_helper.index_bbox + ].unbind(dim=1) + x_lo, x_hi, x_w, jx_valid = _linear_interpolation_utilities( + tensors_helper.x_norm, x0_gt, w_gt, x0_est, w_est, zw + ) + y_lo, y_hi, y_w, jy_valid = _linear_interpolation_utilities( + tensors_helper.y_norm, y0_gt, h_gt, y0_est, h_est, zh + ) + j_valid = jx_valid * jy_valid + + w_ylo_xlo = (1.0 - x_w) * (1.0 - y_w) + w_ylo_xhi = x_w * (1.0 - y_w) + w_yhi_xlo = (1.0 - x_w) * y_w + w_yhi_xhi = x_w * y_w + + return BilinearInterpolationHelper( + tensors_helper, + j_valid, + y_lo, + y_hi, + x_lo, + x_hi, + w_ylo_xlo, + w_ylo_xhi, + w_yhi_xlo, + w_yhi_xhi, + ) + + def extract_at_points( + self, + z_est, + slice_index_uv=None, + w_ylo_xlo=None, + w_ylo_xhi=None, + w_yhi_xlo=None, + w_yhi_xhi=None, + ): + """ + Extract ground truth values z_gt for valid point indices and estimated + values z_est using bilinear interpolation over top-left (y_lo, x_lo), + top-right (y_lo, x_hi), bottom-left (y_hi, x_lo) and bottom-right + (y_hi, x_hi) values in z_est with corresponding weights: + w_ylo_xlo, w_ylo_xhi, w_yhi_xlo and w_yhi_xhi. + Use slice_index_uv to slice dim=1 in z_est + """ + index_gt_all = self.tensors_helper.index_gt_all + slice_index_uv = index_gt_all if slice_index_uv is None else slice_index_uv + w_ylo_xlo = self.w_ylo_xlo if w_ylo_xlo is None else w_ylo_xlo + w_ylo_xhi = self.w_ylo_xhi if w_ylo_xhi is None else w_ylo_xhi + w_yhi_xlo = self.w_yhi_xlo if w_yhi_xlo is None else w_yhi_xlo + w_yhi_xhi = self.w_yhi_xhi if w_yhi_xhi is None else w_yhi_xhi + + index_bbox = self.tensors_helper.index_bbox + z_est_sampled = ( + z_est[index_bbox, slice_index_uv, self.y_lo, self.x_lo] * w_ylo_xlo + + z_est[index_bbox, slice_index_uv, self.y_lo, self.x_hi] * w_ylo_xhi + + z_est[index_bbox, slice_index_uv, self.y_hi, self.x_lo] * w_yhi_xlo + + z_est[index_bbox, slice_index_uv, self.y_hi, self.x_hi] * w_yhi_xhi + ) + return z_est_sampled + + +def _resample_data( + z, bbox_xywh_src, bbox_xywh_dst, wout, hout, mode="nearest", padding_mode="zeros" +): + """ + Args: + z (:obj: `torch.Tensor`): tensor of size (N,C,H,W) with data to be + resampled + bbox_xywh_src (:obj: `torch.Tensor`): tensor of size (N,4) containing + source bounding boxes in format XYWH + bbox_xywh_dst (:obj: `torch.Tensor`): tensor of size (N,4) containing + destination bounding boxes in format XYWH + Return: + zresampled (:obj: `torch.Tensor`): tensor of size (N, C, Hout, Wout) + with resampled values of z, where D is the discretization size + """ + n = bbox_xywh_src.size(0) + assert n == bbox_xywh_dst.size(0), ( + "The number of " + "source ROIs for resampling ({}) should be equal to the number " + "of destination ROIs ({})".format(bbox_xywh_src.size(0), bbox_xywh_dst.size(0)) + ) + x0src, y0src, wsrc, hsrc = bbox_xywh_src.unbind(dim=1) + x0dst, y0dst, wdst, hdst = bbox_xywh_dst.unbind(dim=1) + x0dst_norm = 2 * (x0dst - x0src) / wsrc - 1 + y0dst_norm = 2 * (y0dst - y0src) / hsrc - 1 + x1dst_norm = 2 * (x0dst + wdst - x0src) / wsrc - 1 + y1dst_norm = 2 * (y0dst + hdst - y0src) / hsrc - 1 + grid_w = torch.arange(wout, device=z.device, dtype=torch.float) / wout + grid_h = torch.arange(hout, device=z.device, dtype=torch.float) / hout + grid_w_expanded = grid_w[None, None, :].expand(n, hout, wout) + grid_h_expanded = grid_h[None, :, None].expand(n, hout, wout) + dx_expanded = (x1dst_norm - x0dst_norm)[:, None, None].expand(n, hout, wout) + dy_expanded = (y1dst_norm - y0dst_norm)[:, None, None].expand(n, hout, wout) + x0_expanded = x0dst_norm[:, None, None].expand(n, hout, wout) + y0_expanded = y0dst_norm[:, None, None].expand(n, hout, wout) + grid_x = grid_w_expanded * dx_expanded + x0_expanded + grid_y = grid_h_expanded * dy_expanded + y0_expanded + grid = torch.stack((grid_x, grid_y), dim=3) + # resample Z from (N, C, H, W) into (N, C, Hout, Wout) + zresampled = F.grid_sample(z, grid, mode=mode, padding_mode=padding_mode, align_corners=True) + return zresampled + + +def _extract_single_tensors_from_matches_one_image( + proposals_targets, bbox_with_dp_offset, bbox_global_offset +): + i_gt_all = [] + x_norm_all = [] + y_norm_all = [] + u_gt_all = [] + v_gt_all = [] + s_gt_all = [] + bbox_xywh_gt_all = [] + bbox_xywh_est_all = [] + # Ibbox_all == k should be true for all data that corresponds + # to bbox_xywh_gt[k] and bbox_xywh_est[k] + # index k here is global wrt images + i_bbox_all = [] + # at offset k (k is global) contains index of bounding box data + # within densepose output tensor + i_with_dp = [] + + boxes_xywh_est = proposals_targets.proposal_boxes.clone() + boxes_xywh_gt = proposals_targets.gt_boxes.clone() + n_i = len(boxes_xywh_est) + assert n_i == len(boxes_xywh_gt) + + if n_i: + boxes_xywh_est.tensor[:, 2] -= boxes_xywh_est.tensor[:, 0] + boxes_xywh_est.tensor[:, 3] -= boxes_xywh_est.tensor[:, 1] + boxes_xywh_gt.tensor[:, 2] -= boxes_xywh_gt.tensor[:, 0] + boxes_xywh_gt.tensor[:, 3] -= boxes_xywh_gt.tensor[:, 1] + if hasattr(proposals_targets, "gt_densepose"): + densepose_gt = proposals_targets.gt_densepose + for k, box_xywh_est, box_xywh_gt, dp_gt in zip( + range(n_i), boxes_xywh_est.tensor, boxes_xywh_gt.tensor, densepose_gt + ): + if (dp_gt is not None) and (len(dp_gt.x) > 0): + i_gt_all.append(dp_gt.i) + x_norm_all.append(dp_gt.x) + y_norm_all.append(dp_gt.y) + u_gt_all.append(dp_gt.u) + v_gt_all.append(dp_gt.v) + s_gt_all.append(dp_gt.segm.unsqueeze(0)) + bbox_xywh_gt_all.append(box_xywh_gt.view(-1, 4)) + bbox_xywh_est_all.append(box_xywh_est.view(-1, 4)) + i_bbox_k = torch.full_like(dp_gt.i, bbox_with_dp_offset + len(i_with_dp)) + i_bbox_all.append(i_bbox_k) + i_with_dp.append(bbox_global_offset + k) + return ( + i_gt_all, + x_norm_all, + y_norm_all, + u_gt_all, + v_gt_all, + s_gt_all, + bbox_xywh_gt_all, + bbox_xywh_est_all, + i_bbox_all, + i_with_dp, + ) + + +def _extract_single_tensors_from_matches(proposals_with_targets): + i_img = [] + i_gt_all = [] + x_norm_all = [] + y_norm_all = [] + u_gt_all = [] + v_gt_all = [] + s_gt_all = [] + bbox_xywh_gt_all = [] + bbox_xywh_est_all = [] + i_bbox_all = [] + i_with_dp_all = [] + n = 0 + for i, proposals_targets_per_image in enumerate(proposals_with_targets): + n_i = proposals_targets_per_image.proposal_boxes.tensor.size(0) + if not n_i: + continue + ( + i_gt_img, + x_norm_img, + y_norm_img, + u_gt_img, + v_gt_img, + s_gt_img, + bbox_xywh_gt_img, + bbox_xywh_est_img, + i_bbox_img, + i_with_dp_img, + ) = _extract_single_tensors_from_matches_one_image( # noqa + proposals_targets_per_image, len(i_with_dp_all), n + ) + i_gt_all.extend(i_gt_img) + x_norm_all.extend(x_norm_img) + y_norm_all.extend(y_norm_img) + u_gt_all.extend(u_gt_img) + v_gt_all.extend(v_gt_img) + s_gt_all.extend(s_gt_img) + bbox_xywh_gt_all.extend(bbox_xywh_gt_img) + bbox_xywh_est_all.extend(bbox_xywh_est_img) + i_bbox_all.extend(i_bbox_img) + i_with_dp_all.extend(i_with_dp_img) + i_img.extend([i] * len(i_with_dp_img)) + n += n_i + # concatenate all data into a single tensor + if (n > 0) and (len(i_with_dp_all) > 0): + i_gt = torch.cat(i_gt_all, 0).long() + x_norm = torch.cat(x_norm_all, 0) + y_norm = torch.cat(y_norm_all, 0) + u_gt = torch.cat(u_gt_all, 0) + v_gt = torch.cat(v_gt_all, 0) + s_gt = torch.cat(s_gt_all, 0) + bbox_xywh_gt = torch.cat(bbox_xywh_gt_all, 0) + bbox_xywh_est = torch.cat(bbox_xywh_est_all, 0) + i_bbox = torch.cat(i_bbox_all, 0).long() + else: + i_gt = None + x_norm = None + y_norm = None + u_gt = None + v_gt = None + s_gt = None + bbox_xywh_gt = None + bbox_xywh_est = None + i_bbox = None + return ( + i_img, + i_with_dp_all, + bbox_xywh_est, + bbox_xywh_gt, + i_gt, + x_norm, + y_norm, + u_gt, + v_gt, + s_gt, + i_bbox, + ) + + +@dataclass +class DataForMaskLoss: + """ + Contains mask GT and estimated data for proposals from multiple images: + """ + + # tensor of size (K, H, W) containing GT labels + masks_gt: Optional[torch.Tensor] = None + # tensor of size (K, C, H, W) containing estimated scores + masks_est: Optional[torch.Tensor] = None + + +def _extract_data_for_mask_loss_from_matches( + proposals_targets: Iterable[Instances], estimated_segm: torch.Tensor +) -> DataForMaskLoss: + """ + Extract data for mask loss from instances that contain matched GT and + estimated bounding boxes. + Args: + proposals_targets: Iterable[Instances] + matched GT and estimated results, each item in the iterable + corresponds to data in 1 image + estimated_segm: torch.Tensor if size + size to which GT masks are resized + Return: + masks_est: tensor(K, C, H, W) of float - class scores + masks_gt: tensor(K, H, W) of int64 - labels + """ + data = DataForMaskLoss() + masks_gt = [] + offset = 0 + assert estimated_segm.shape[2] == estimated_segm.shape[3], ( + f"Expected estimated segmentation to have a square shape, " + f"but the actual shape is {estimated_segm.shape[2:]}" + ) + mask_size = estimated_segm.shape[2] + num_proposals = sum(inst.proposal_boxes.tensor.size(0) for inst in proposals_targets) + num_estimated = estimated_segm.shape[0] + assert ( + num_proposals == num_estimated + ), "The number of proposals {} must be equal to the number of estimates {}".format( + num_proposals, num_estimated + ) + + for proposals_targets_per_image in proposals_targets: + n_i = proposals_targets_per_image.proposal_boxes.tensor.size(0) + if not n_i: + continue + gt_masks_per_image = proposals_targets_per_image.gt_masks.crop_and_resize( + proposals_targets_per_image.proposal_boxes.tensor, mask_size + ).to(device=estimated_segm.device) + masks_gt.append(gt_masks_per_image) + offset += n_i + if masks_gt: + data.masks_est = estimated_segm + data.masks_gt = torch.cat(masks_gt, dim=0) + return data + + +class IIDIsotropicGaussianUVLoss(nn.Module): + """ + Loss for the case of iid residuals with isotropic covariance: + $Sigma_i = sigma_i^2 I$ + The loss (negative log likelihood) is then: + $1/2 sum_{i=1}^n (log(2 pi) + 2 log sigma_i^2 + ||delta_i||^2 / sigma_i^2)$, + where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates + difference between estimated and ground truth UV values + For details, see: + N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning + Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019 + """ + + def __init__(self, sigma_lower_bound: float): + super(IIDIsotropicGaussianUVLoss, self).__init__() + self.sigma_lower_bound = sigma_lower_bound + self.log2pi = math.log(2 * math.pi) + + def forward( + self, + u: torch.Tensor, + v: torch.Tensor, + sigma_u: torch.Tensor, + target_u: torch.Tensor, + target_v: torch.Tensor, + ): + # compute $\sigma_i^2$ + # use sigma_lower_bound to avoid degenerate solution for variance + # (sigma -> 0) + sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound + # compute \|delta_i\|^2 + delta_t_delta = (u - target_u) ** 2 + (v - target_v) ** 2 + # the total loss from the formula above: + loss = 0.5 * (self.log2pi + 2 * torch.log(sigma2) + delta_t_delta / sigma2) + return loss.sum() + + +class IndepAnisotropicGaussianUVLoss(nn.Module): + """ + Loss for the case of independent residuals with anisotropic covariances: + $Sigma_i = sigma_i^2 I + r_i r_i^T$ + The loss (negative log likelihood) is then: + $1/2 sum_{i=1}^n (log(2 pi) + + log sigma_i^2 (sigma_i^2 + ||r_i||^2) + + ||delta_i||^2 / sigma_i^2 + - ^2 / (sigma_i^2 * (sigma_i^2 + ||r_i||^2)))$, + where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates + difference between estimated and ground truth UV values + For details, see: + N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning + Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019 + """ + + def __init__(self, sigma_lower_bound: float): + super(IndepAnisotropicGaussianUVLoss, self).__init__() + self.sigma_lower_bound = sigma_lower_bound + self.log2pi = math.log(2 * math.pi) + + def forward( + self, + u: torch.Tensor, + v: torch.Tensor, + sigma_u: torch.Tensor, + kappa_u_est: torch.Tensor, + kappa_v_est: torch.Tensor, + target_u: torch.Tensor, + target_v: torch.Tensor, + ): + # compute $\sigma_i^2$ + sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound + # compute \|r_i\|^2 + r_sqnorm2 = kappa_u_est ** 2 + kappa_v_est ** 2 + delta_u = u - target_u + delta_v = v - target_v + # compute \|delta_i\|^2 + delta_sqnorm = delta_u ** 2 + delta_v ** 2 + delta_u_r_u = delta_u * kappa_u_est + delta_v_r_v = delta_v * kappa_v_est + # compute the scalar product + delta_r = delta_u_r_u + delta_v_r_v + # compute squared scalar product ^2 + delta_r_sqnorm = delta_r ** 2 + denom2 = sigma2 * (sigma2 + r_sqnorm2) + loss = 0.5 * ( + self.log2pi + torch.log(denom2) + delta_sqnorm / sigma2 - delta_r_sqnorm / denom2 + ) + return loss.sum() + + +class DensePoseLosses(object): + def __init__(self, cfg): + # fmt: off + self.heatmap_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE + self.w_points = cfg.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS + self.w_part = cfg.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS + self.w_segm = cfg.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS + self.n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS + # fmt: on + self.segm_trained_by_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS + self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg) + if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO: + self.uv_loss_with_confidences = IIDIsotropicGaussianUVLoss( + self.confidence_model_cfg.uv_confidence.epsilon + ) + elif self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.INDEP_ANISO: + self.uv_loss_with_confidences = IndepAnisotropicGaussianUVLoss( + self.confidence_model_cfg.uv_confidence.epsilon + ) + + def __call__(self, proposals_with_gt, densepose_outputs, densepose_confidences): + if not self.segm_trained_by_masks: + return self.produce_densepose_losses( + proposals_with_gt, densepose_outputs, densepose_confidences + ) + else: + losses = {} + losses_densepose = self.produce_densepose_losses( + proposals_with_gt, densepose_outputs, densepose_confidences + ) + losses.update(losses_densepose) + losses_mask = self.produce_mask_losses( + proposals_with_gt, densepose_outputs, densepose_confidences + ) + losses.update(losses_mask) + return losses + + def produce_fake_mask_losses(self, densepose_outputs): + losses = {} + segm_scores, _, _, _ = densepose_outputs + losses["loss_densepose_S"] = segm_scores.sum() * 0 + return losses + + def produce_mask_losses(self, proposals_with_gt, densepose_outputs, densepose_confidences): + if not len(proposals_with_gt): + return self.produce_fake_mask_losses(densepose_outputs) + losses = {} + # densepose outputs are computed for all images and all bounding boxes; + # i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively, + # the outputs will have size(0) == 3+1+2+1 == 7 + segm_scores, _, _, _ = densepose_outputs + with torch.no_grad(): + mask_loss_data = _extract_data_for_mask_loss_from_matches( + proposals_with_gt, segm_scores + ) + if (mask_loss_data.masks_gt is None) or (mask_loss_data.masks_est is None): + return self.produce_fake_mask_losses(densepose_outputs) + losses["loss_densepose_S"] = ( + F.cross_entropy(mask_loss_data.masks_est, mask_loss_data.masks_gt.long()) * self.w_segm + ) + return losses + + def produce_fake_densepose_losses(self, densepose_outputs, densepose_confidences): + # we need to keep the same computation graph on all the GPUs to + # perform reduction properly. Hence even if we have no data on one + # of the GPUs, we still need to generate the computation graph. + # Add fake (zero) losses in the form Tensor.sum() * 0 + s, index_uv, u, v = densepose_outputs + conf_type = self.confidence_model_cfg.uv_confidence.type + ( + sigma_1, + sigma_2, + kappa_u, + kappa_v, + fine_segm_confidence, + coarse_segm_confidence, + ) = densepose_confidences + losses = {} + losses["loss_densepose_I"] = index_uv.sum() * 0 + if not self.segm_trained_by_masks: + losses["loss_densepose_S"] = s.sum() * 0 + if self.confidence_model_cfg.uv_confidence.enabled: + losses["loss_densepose_UV"] = (u.sum() + v.sum()) * 0 + if conf_type == DensePoseUVConfidenceType.IID_ISO: + losses["loss_densepose_UV"] += sigma_2.sum() * 0 + elif conf_type == DensePoseUVConfidenceType.INDEP_ANISO: + losses["loss_densepose_UV"] += (sigma_2.sum() + kappa_u.sum() + kappa_v.sum()) * 0 + else: + losses["loss_densepose_U"] = u.sum() * 0 + losses["loss_densepose_V"] = v.sum() * 0 + return losses + + def produce_densepose_losses(self, proposals_with_gt, densepose_outputs, densepose_confidences): + losses = {} + # densepose outputs are computed for all images and all bounding boxes; + # i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively, + # the outputs will have size(0) == 3+1+2+1 == 7 + s, index_uv, u, v = densepose_outputs + assert u.size(2) == v.size(2) + assert u.size(3) == v.size(3) + assert u.size(2) == index_uv.size(2) + assert u.size(3) == index_uv.size(3) + densepose_outputs_size = u.size() + + if not len(proposals_with_gt): + return self.produce_fake_densepose_losses(densepose_outputs, densepose_confidences) + ( + sigma_1, + sigma_2, + kappa_u, + kappa_v, + fine_segm_confidence, + coarse_segm_confidence, + ) = densepose_confidences + conf_type = self.confidence_model_cfg.uv_confidence.type + + tensors_helper = SingleTensorsHelper(proposals_with_gt) + n_batch = len(tensors_helper.i_with_dp) + + # NOTE: we need to keep the same computation graph on all the GPUs to + # perform reduction properly. Hence even if we have no data on one + # of the GPUs, we still need to generate the computation graph. + # Add fake (zero) loss in the form Tensor.sum() * 0 + if not n_batch: + return self.produce_fake_densepose_losses(densepose_outputs, densepose_confidences) + + interpolator = BilinearInterpolationHelper.from_matches( + tensors_helper, densepose_outputs_size + ) + + j_valid_fg = interpolator.j_valid * (tensors_helper.index_gt_all > 0) + + u_gt = tensors_helper.u_gt_all[j_valid_fg] + u_est_all = interpolator.extract_at_points(u[tensors_helper.i_with_dp]) + u_est = u_est_all[j_valid_fg] + + v_gt = tensors_helper.v_gt_all[j_valid_fg] + v_est_all = interpolator.extract_at_points(v[tensors_helper.i_with_dp]) + v_est = v_est_all[j_valid_fg] + + index_uv_gt = tensors_helper.index_gt_all[interpolator.j_valid] + index_uv_est_all = interpolator.extract_at_points( + index_uv[tensors_helper.i_with_dp], + slice_index_uv=slice(None), + w_ylo_xlo=interpolator.w_ylo_xlo[:, None], + w_ylo_xhi=interpolator.w_ylo_xhi[:, None], + w_yhi_xlo=interpolator.w_yhi_xlo[:, None], + w_yhi_xhi=interpolator.w_yhi_xhi[:, None], + ) + index_uv_est = index_uv_est_all[interpolator.j_valid, :] + + if self.confidence_model_cfg.uv_confidence.enabled: + sigma_2_est_all = interpolator.extract_at_points(sigma_2[tensors_helper.i_with_dp]) + sigma_2_est = sigma_2_est_all[j_valid_fg] + if conf_type in [DensePoseUVConfidenceType.INDEP_ANISO]: + kappa_u_est_all = interpolator.extract_at_points(kappa_u[tensors_helper.i_with_dp]) + kappa_u_est = kappa_u_est_all[j_valid_fg] + kappa_v_est_all = interpolator.extract_at_points(kappa_v[tensors_helper.i_with_dp]) + kappa_v_est = kappa_v_est_all[j_valid_fg] + + # Resample everything to the estimated data size, no need to resample + # S_est then: + if not self.segm_trained_by_masks: + s_est = s[tensors_helper.i_with_dp] + with torch.no_grad(): + s_gt = _resample_data( + tensors_helper.s_gt.unsqueeze(1), + tensors_helper.bbox_xywh_gt, + tensors_helper.bbox_xywh_est, + self.heatmap_size, + self.heatmap_size, + mode="nearest", + padding_mode="zeros", + ).squeeze(1) + + # add point-based losses: + if self.confidence_model_cfg.uv_confidence.enabled: + if conf_type == DensePoseUVConfidenceType.IID_ISO: + uv_loss = ( + self.uv_loss_with_confidences(u_est, v_est, sigma_2_est, u_gt, v_gt) + * self.w_points + ) + losses["loss_densepose_UV"] = uv_loss + elif conf_type == DensePoseUVConfidenceType.INDEP_ANISO: + uv_loss = ( + self.uv_loss_with_confidences( + u_est, v_est, sigma_2_est, kappa_u_est, kappa_v_est, u_gt, v_gt + ) + * self.w_points + ) + losses["loss_densepose_UV"] = uv_loss + else: + raise ValueError(f"Unknown confidence model type: {conf_type}") + else: + u_loss = F.smooth_l1_loss(u_est, u_gt, reduction="sum") * self.w_points + losses["loss_densepose_U"] = u_loss + v_loss = F.smooth_l1_loss(v_est, v_gt, reduction="sum") * self.w_points + losses["loss_densepose_V"] = v_loss + index_uv_loss = F.cross_entropy(index_uv_est, index_uv_gt.long()) * self.w_part + losses["loss_densepose_I"] = index_uv_loss + + if not self.segm_trained_by_masks: + if self.n_segm_chan == 2: + s_gt = s_gt > 0 + s_loss = F.cross_entropy(s_est, s_gt.long()) * self.w_segm + losses["loss_densepose_S"] = s_loss + return losses diff --git a/projects/DensePose/densepose/modeling/predictors/__init__.py b/projects/DensePose/densepose/modeling/predictors/__init__.py new file mode 100644 index 0000000..18e6ec7 --- /dev/null +++ b/projects/DensePose/densepose/modeling/predictors/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +from .chart import DensePoseChartPredictor +from .chart_confidence import DensePoseChartConfidencePredictorMixin +from .chart_with_confidence import DensePoseChartWithConfidencePredictor diff --git a/projects/DensePose/densepose/modeling/predictors/chart.py b/projects/DensePose/densepose/modeling/predictors/chart.py new file mode 100644 index 0000000..37667e9 --- /dev/null +++ b/projects/DensePose/densepose/modeling/predictors/chart.py @@ -0,0 +1,102 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import torch +from torch import nn + +from detectron2.config import CfgNode +from detectron2.layers import ConvTranspose2d, interpolate + +from ..utils import initialize_module_params + + +class DensePoseChartPredictor(nn.Module): + """ + Predictor (last layers of a DensePose model) that takes DensePose head outputs as an input + and produces 4 tensors which represent DensePose results for predefined body parts + (patches / charts): + - coarse segmentation [N, K, H, W] + - fine segmentation [N, C, H, W] + - U coordinates [N, C, H, W] + - V coordinates [N, C, H, W] + where + - N is the number of instances + - K is the number of coarse segmentation channels ( + 2 = foreground / background, + 15 = one of 14 body parts / background) + - C is the number of fine segmentation channels ( + 24 fine body parts / background) + - H and W are height and width of predictions + """ + + def __init__(self, cfg: CfgNode, input_channels: int): + """ + Initialize predictor using configuration options + + Args: + cfg (CfgNode): configuration options + input_channels (int): input tensor size along the channel dimension + """ + super().__init__() + dim_in = input_channels + n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS + dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1 + kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL + self.ann_index_lowres = ConvTranspose2d( + dim_in, n_segm_chan, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) + ) + self.index_uv_lowres = ConvTranspose2d( + dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) + ) + self.u_lowres = ConvTranspose2d( + dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) + ) + self.v_lowres = ConvTranspose2d( + dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) + ) + self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE + initialize_module_params(self) + + def interp2d(self, tensor_nchw: torch.Tensor): + """ + Bilinear interpolation method to be used for upscaling + + Args: + tensor_nchw (tensor): tensor of shape (N, C, H, W) + Return: + tensor of shape (N, C, Hout, Wout), where Hout and Wout are computed + by applying the scale factor to H and W + """ + return interpolate( + tensor_nchw, scale_factor=self.scale_factor, mode="bilinear", align_corners=False + ) + + def forward(self, head_outputs: torch.Tensor): + """ + Perform forward step on DensePose head outputs + + Args: + head_outputs (tensor): DensePose head outputs, tensor of shape [N, D, H, W] + Return: + - a tuple of 4 tensors containing DensePose predictions for charts: + * coarse segmentation estimate, a tensor of shape [N, K, Hout, Wout] + * fine segmentation estimate, a tensor of shape [N, C, Hout, Wout] + * U coordinates, a tensor of shape [N, C, Hout, Wout] + * V coordinates, a tensor of shape [N, C, Hout, Wout] + - a tuple of 4 tensors containing DensePose predictions for charts at reduced resolution: + * coarse segmentation estimate, a tensor of shape [N, K, Hout / 2, Wout / 2] + * fine segmentation estimate, a tensor of shape [N, C, Hout / 2, Wout / 2] + * U coordinates, a tensor of shape [N, C, Hout / 2, Wout / 2] + * V coordinates, a tensor of shape [N, C, Hout / 2, Wout / 2] + """ + coarse_segm_lowres = self.ann_index_lowres(head_outputs) + fine_segm_lowres = self.index_uv_lowres(head_outputs) + u_lowres = self.u_lowres(head_outputs) + v_lowres = self.v_lowres(head_outputs) + + coarse_segm = self.interp2d(coarse_segm_lowres) + fine_segm = self.interp2d(fine_segm_lowres) + u = self.interp2d(u_lowres) + v = self.interp2d(v_lowres) + siuv = (coarse_segm, fine_segm, u, v) + siuv_lowres = (coarse_segm_lowres, fine_segm_lowres, u_lowres, v_lowres) + return siuv, siuv_lowres diff --git a/projects/DensePose/densepose/modeling/predictors/chart_confidence.py b/projects/DensePose/densepose/modeling/predictors/chart_confidence.py new file mode 100644 index 0000000..8770736 --- /dev/null +++ b/projects/DensePose/densepose/modeling/predictors/chart_confidence.py @@ -0,0 +1,176 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +import torch +from torch.nn import functional as F + +from detectron2.config import CfgNode +from detectron2.layers import ConvTranspose2d + +from ..confidence import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType +from ..utils import initialize_module_params + + +class DensePoseChartConfidencePredictorMixin: + """ + Predictor contains the last layers of a DensePose model that take DensePose head + outputs as an input and produce model outputs. Confidence predictor mixin is used + to generate confidences for segmentation and UV tensors estimated by some + base predictor. Several assumptions need to hold for the base predictor: + 1) the `forward` method must return SIUV tuple as the first result ( + S = coarse segmentation, I = fine segmentation, U and V are intrinsic + chart coordinates) + 2) `interp2d` method must be defined to perform bilinear interpolation; + the same method is typically used for SIUV and confidences + Confidence predictor mixin provides confidence estimates, as described in: + N. Neverova et al., Correlated Uncertainty for Learning Dense Correspondences + from Noisy Labels, NeurIPS 2019 + A. Sanakoyeu et al., Transferring Dense Pose to Proximal Animal Classes, CVPR 2020 + """ + + def __init__(self, cfg: CfgNode, input_channels: int): + """ + Initialize confidence predictor using configuration options. + + Args: + cfg (CfgNode): configuration options + input_channels (int): number of input channels + """ + # we rely on base predictor to call nn.Module.__init__ + super().__init__(cfg, input_channels) + self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg) + self._initialize_confidence_estimation_layers(cfg, input_channels) + initialize_module_params(self) + + def _initialize_confidence_estimation_layers(self, cfg: CfgNode, dim_in: int): + """ + Initialize confidence estimation layers based on configuration options + + Args: + cfg (CfgNode): configuration options + dim_in (int): number of input channels + """ + dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1 + kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL + if self.confidence_model_cfg.uv_confidence.enabled: + if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO: + self.sigma_2_lowres = ConvTranspose2d( + dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) + ) + elif ( + self.confidence_model_cfg.uv_confidence.type + == DensePoseUVConfidenceType.INDEP_ANISO + ): + self.sigma_2_lowres = ConvTranspose2d( + dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) + ) + self.kappa_u_lowres = ConvTranspose2d( + dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) + ) + self.kappa_v_lowres = ConvTranspose2d( + dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) + ) + else: + raise ValueError( + f"Unknown confidence model type: " + f"{self.confidence_model_cfg.confidence_model_type}" + ) + if self.confidence_model_cfg.segm_confidence.enabled: + self.fine_segm_confidence_lowres = ConvTranspose2d( + dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) + ) + self.coarse_segm_confidence_lowres = ConvTranspose2d( + dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) + ) + + def forward(self, head_outputs: torch.Tensor): + """ + Perform forward operation on head outputs used as inputs for the predictor. + Calls forward method from the base predictor and uses its outputs to compute + confidences. + + Args: + head_outputs (Tensor): head outputs used as predictor inputs + Return: + A tuple containing the following entries: + - SIUV tuple with possibly modified segmentation tensors + - various other outputs from the base predictor + - 6 tensors with estimated confidence model parameters at full resolution + (sigma_1, sigma_2, kappa_u, kappa_v, fine_segm_confidence, coarse_segm_confidence) + - 6 tensors with estimated confidence model parameters at half resolution + (sigma_1, sigma_2, kappa_u, kappa_v, fine_segm_confidence, coarse_segm_confidence) + """ + # assuming base class returns SIUV estimates in its first result + base_predictor_outputs = super().forward(head_outputs) + siuv = ( + base_predictor_outputs[0] + if isinstance(base_predictor_outputs, tuple) + else base_predictor_outputs + ) + coarse_segm, fine_segm, u, v = siuv + + sigma_1, sigma_2, kappa_u, kappa_v = None, None, None, None + sigma_1_lowres, sigma_2_lowres, kappa_u_lowres, kappa_v_lowres = None, None, None, None + fine_segm_confidence_lowres, fine_segm_confidence = None, None + coarse_segm_confidence_lowres, coarse_segm_confidence = None, None + if self.confidence_model_cfg.uv_confidence.enabled: + if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO: + sigma_2_lowres = self.sigma_2_lowres(head_outputs) + # assuming base class defines interp2d method for bilinear interpolation + sigma_2 = self.interp2d(sigma_2_lowres) + elif ( + self.confidence_model_cfg.uv_confidence.type + == DensePoseUVConfidenceType.INDEP_ANISO + ): + sigma_2_lowres = self.sigma_2_lowres(head_outputs) + kappa_u_lowres = self.kappa_u_lowres(head_outputs) + kappa_v_lowres = self.kappa_v_lowres(head_outputs) + # assuming base class defines interp2d method for bilinear interpolation + sigma_2 = self.interp2d(sigma_2_lowres) + kappa_u = self.interp2d(kappa_u_lowres) + kappa_v = self.interp2d(kappa_v_lowres) + else: + raise ValueError( + f"Unknown confidence model type: " + f"{self.confidence_model_cfg.confidence_model_type}" + ) + if self.confidence_model_cfg.segm_confidence.enabled: + fine_segm_confidence_lowres = self.fine_segm_confidence_lowres(head_outputs) + # assuming base class defines interp2d method for bilinear interpolation + fine_segm_confidence = self.interp2d(fine_segm_confidence_lowres) + fine_segm_confidence = ( + F.softplus(fine_segm_confidence) + self.confidence_model_cfg.segm_confidence.epsilon + ) + fine_segm = fine_segm * torch.repeat_interleave( + fine_segm_confidence, fine_segm.shape[1], dim=1 + ) + coarse_segm_confidence_lowres = self.coarse_segm_confidence_lowres(head_outputs) + # assuming base class defines interp2d method for bilinear interpolation + coarse_segm_confidence = self.interp2d(coarse_segm_confidence_lowres) + coarse_segm_confidence = ( + F.softplus(coarse_segm_confidence) + + self.confidence_model_cfg.segm_confidence.epsilon + ) + coarse_segm = coarse_segm * torch.repeat_interleave( + coarse_segm_confidence, coarse_segm.shape[1], dim=1 + ) + results = [] + # append SIUV with possibly modified segmentation tensors + results.append((coarse_segm, fine_segm, u, v)) + # append the rest of base predictor outputs + if isinstance(base_predictor_outputs, tuple): + results.extend(base_predictor_outputs[1:]) + # append hi-res confidence estimates + results.append( + (sigma_1, sigma_2, kappa_u, kappa_v, fine_segm_confidence, coarse_segm_confidence) + ) + # append lo-res confidence estimates + results.append( + ( + sigma_1_lowres, + sigma_2_lowres, + kappa_u_lowres, + kappa_v_lowres, + fine_segm_confidence_lowres, + coarse_segm_confidence_lowres, + ) + ) + return tuple(results) diff --git a/projects/DensePose/densepose/modeling/predictors/chart_with_confidence.py b/projects/DensePose/densepose/modeling/predictors/chart_with_confidence.py new file mode 100644 index 0000000..96875b8 --- /dev/null +++ b/projects/DensePose/densepose/modeling/predictors/chart_with_confidence.py @@ -0,0 +1,13 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +from . import DensePoseChartConfidencePredictorMixin, DensePoseChartPredictor + + +class DensePoseChartWithConfidencePredictor( + DensePoseChartConfidencePredictorMixin, DensePoseChartPredictor +): + """ + Predictor that combines chart and chart confidence estimation + """ + + pass diff --git a/projects/DensePose/densepose/modeling/roi_heads/deeplab.py b/projects/DensePose/densepose/modeling/roi_heads/deeplab.py new file mode 100644 index 0000000..106dfcf --- /dev/null +++ b/projects/DensePose/densepose/modeling/roi_heads/deeplab.py @@ -0,0 +1,263 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import fvcore.nn.weight_init as weight_init +import torch +from torch import nn +from torch.nn import functional as F + +from detectron2.config import CfgNode +from detectron2.layers import Conv2d + +from .registry import ROI_DENSEPOSE_HEAD_REGISTRY + + +@ROI_DENSEPOSE_HEAD_REGISTRY.register() +class DensePoseDeepLabHead(nn.Module): + """ + DensePose head using DeepLabV3 model from + "Rethinking Atrous Convolution for Semantic Image Segmentation" + . + """ + + def __init__(self, cfg: CfgNode, input_channels: int): + super(DensePoseDeepLabHead, self).__init__() + # fmt: off + hidden_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM + kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL + norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM + self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS + self.use_nonlocal = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON + # fmt: on + pad_size = kernel_size // 2 + n_channels = input_channels + + self.ASPP = ASPP(input_channels, [6, 12, 56], n_channels) # 6, 12, 56 + self.add_module("ASPP", self.ASPP) + + if self.use_nonlocal: + self.NLBlock = NONLocalBlock2D(input_channels, bn_layer=True) + self.add_module("NLBlock", self.NLBlock) + # weight_init.c2_msra_fill(self.ASPP) + + for i in range(self.n_stacked_convs): + norm_module = nn.GroupNorm(32, hidden_dim) if norm == "GN" else None + layer = Conv2d( + n_channels, + hidden_dim, + kernel_size, + stride=1, + padding=pad_size, + bias=not norm, + norm=norm_module, + ) + weight_init.c2_msra_fill(layer) + n_channels = hidden_dim + layer_name = self._get_layer_name(i) + self.add_module(layer_name, layer) + self.n_out_channels = hidden_dim + # initialize_module_params(self) + + def forward(self, features): + x0 = features + x = self.ASPP(x0) + if self.use_nonlocal: + x = self.NLBlock(x) + output = x + for i in range(self.n_stacked_convs): + layer_name = self._get_layer_name(i) + x = getattr(self, layer_name)(x) + x = F.relu(x) + output = x + return output + + def _get_layer_name(self, i: int): + layer_name = "body_conv_fcn{}".format(i + 1) + return layer_name + + +# Copied from +# https://github.com/pytorch/vision/blob/master/torchvision/models/segmentation/deeplabv3.py +# See https://arxiv.org/pdf/1706.05587.pdf for details +class ASPPConv(nn.Sequential): + def __init__(self, in_channels, out_channels, dilation): + modules = [ + nn.Conv2d( + in_channels, out_channels, 3, padding=dilation, dilation=dilation, bias=False + ), + nn.GroupNorm(32, out_channels), + nn.ReLU(), + ] + super(ASPPConv, self).__init__(*modules) + + +class ASPPPooling(nn.Sequential): + def __init__(self, in_channels, out_channels): + super(ASPPPooling, self).__init__( + nn.AdaptiveAvgPool2d(1), + nn.Conv2d(in_channels, out_channels, 1, bias=False), + nn.GroupNorm(32, out_channels), + nn.ReLU(), + ) + + def forward(self, x): + size = x.shape[-2:] + x = super(ASPPPooling, self).forward(x) + return F.interpolate(x, size=size, mode="bilinear", align_corners=False) + + +class ASPP(nn.Module): + def __init__(self, in_channels, atrous_rates, out_channels): + super(ASPP, self).__init__() + modules = [] + modules.append( + nn.Sequential( + nn.Conv2d(in_channels, out_channels, 1, bias=False), + nn.GroupNorm(32, out_channels), + nn.ReLU(), + ) + ) + + rate1, rate2, rate3 = tuple(atrous_rates) + modules.append(ASPPConv(in_channels, out_channels, rate1)) + modules.append(ASPPConv(in_channels, out_channels, rate2)) + modules.append(ASPPConv(in_channels, out_channels, rate3)) + modules.append(ASPPPooling(in_channels, out_channels)) + + self.convs = nn.ModuleList(modules) + + self.project = nn.Sequential( + nn.Conv2d(5 * out_channels, out_channels, 1, bias=False), + # nn.BatchNorm2d(out_channels), + nn.ReLU() + # nn.Dropout(0.5) + ) + + def forward(self, x): + res = [] + for conv in self.convs: + res.append(conv(x)) + res = torch.cat(res, dim=1) + return self.project(res) + + +# copied from +# https://github.com/AlexHex7/Non-local_pytorch/blob/master/lib/non_local_embedded_gaussian.py +# See https://arxiv.org/abs/1711.07971 for details +class _NonLocalBlockND(nn.Module): + def __init__( + self, in_channels, inter_channels=None, dimension=3, sub_sample=True, bn_layer=True + ): + super(_NonLocalBlockND, self).__init__() + + assert dimension in [1, 2, 3] + + self.dimension = dimension + self.sub_sample = sub_sample + + self.in_channels = in_channels + self.inter_channels = inter_channels + + if self.inter_channels is None: + self.inter_channels = in_channels // 2 + if self.inter_channels == 0: + self.inter_channels = 1 + + if dimension == 3: + conv_nd = nn.Conv3d + max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2)) + bn = nn.GroupNorm # (32, hidden_dim) #nn.BatchNorm3d + elif dimension == 2: + conv_nd = nn.Conv2d + max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2)) + bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm2d + else: + conv_nd = nn.Conv1d + max_pool_layer = nn.MaxPool1d(kernel_size=2) + bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm1d + + self.g = conv_nd( + in_channels=self.in_channels, + out_channels=self.inter_channels, + kernel_size=1, + stride=1, + padding=0, + ) + + if bn_layer: + self.W = nn.Sequential( + conv_nd( + in_channels=self.inter_channels, + out_channels=self.in_channels, + kernel_size=1, + stride=1, + padding=0, + ), + bn(32, self.in_channels), + ) + nn.init.constant_(self.W[1].weight, 0) + nn.init.constant_(self.W[1].bias, 0) + else: + self.W = conv_nd( + in_channels=self.inter_channels, + out_channels=self.in_channels, + kernel_size=1, + stride=1, + padding=0, + ) + nn.init.constant_(self.W.weight, 0) + nn.init.constant_(self.W.bias, 0) + + self.theta = conv_nd( + in_channels=self.in_channels, + out_channels=self.inter_channels, + kernel_size=1, + stride=1, + padding=0, + ) + self.phi = conv_nd( + in_channels=self.in_channels, + out_channels=self.inter_channels, + kernel_size=1, + stride=1, + padding=0, + ) + + if sub_sample: + self.g = nn.Sequential(self.g, max_pool_layer) + self.phi = nn.Sequential(self.phi, max_pool_layer) + + def forward(self, x): + """ + :param x: (b, c, t, h, w) + :return: + """ + + batch_size = x.size(0) + + g_x = self.g(x).view(batch_size, self.inter_channels, -1) + g_x = g_x.permute(0, 2, 1) + + theta_x = self.theta(x).view(batch_size, self.inter_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + phi_x = self.phi(x).view(batch_size, self.inter_channels, -1) + f = torch.matmul(theta_x, phi_x) + f_div_C = F.softmax(f, dim=-1) + + y = torch.matmul(f_div_C, g_x) + y = y.permute(0, 2, 1).contiguous() + y = y.view(batch_size, self.inter_channels, *x.size()[2:]) + W_y = self.W(y) + z = W_y + x + + return z + + +class NONLocalBlock2D(_NonLocalBlockND): + def __init__(self, in_channels, inter_channels=None, sub_sample=True, bn_layer=True): + super(NONLocalBlock2D, self).__init__( + in_channels, + inter_channels=inter_channels, + dimension=2, + sub_sample=sub_sample, + bn_layer=bn_layer, + ) diff --git a/projects/DensePose/densepose/modeling/roi_heads/registry.py b/projects/DensePose/densepose/modeling/roi_heads/registry.py new file mode 100644 index 0000000..29e58cf --- /dev/null +++ b/projects/DensePose/densepose/modeling/roi_heads/registry.py @@ -0,0 +1,5 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +from detectron2.utils.registry import Registry + +ROI_DENSEPOSE_HEAD_REGISTRY = Registry("ROI_DENSEPOSE_HEAD") diff --git a/projects/DensePose/densepose/modeling/roi_heads/roi_head.py b/projects/DensePose/densepose/modeling/roi_heads/roi_head.py new file mode 100644 index 0000000..68a0b3a --- /dev/null +++ b/projects/DensePose/densepose/modeling/roi_heads/roi_head.py @@ -0,0 +1,224 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import numpy as np +from typing import Dict, List, Optional +import fvcore.nn.weight_init as weight_init +import torch +import torch.nn as nn +from torch.nn import functional as F + +from detectron2.layers import Conv2d, ShapeSpec, get_norm +from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads +from detectron2.modeling.poolers import ROIPooler +from detectron2.modeling.roi_heads import select_foreground_proposals +from detectron2.structures import ImageList, Instances + +from .. import ( + build_densepose_data_filter, + build_densepose_head, + build_densepose_losses, + build_densepose_predictor, + densepose_inference, +) + + +class Decoder(nn.Module): + """ + A semantic segmentation head described in detail in the Panoptic Feature Pyramid Networks paper + (https://arxiv.org/abs/1901.02446). It takes FPN features as input and merges information from + all levels of the FPN into single output. + """ + + def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], in_features): + super(Decoder, self).__init__() + + # fmt: off + self.in_features = in_features + feature_strides = {k: v.stride for k, v in input_shape.items()} + feature_channels = {k: v.channels for k, v in input_shape.items()} + num_classes = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES + conv_dims = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS + self.common_stride = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE + norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM + # fmt: on + + self.scale_heads = [] + for in_feature in self.in_features: + head_ops = [] + head_length = max( + 1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride)) + ) + for k in range(head_length): + conv = Conv2d( + feature_channels[in_feature] if k == 0 else conv_dims, + conv_dims, + kernel_size=3, + stride=1, + padding=1, + bias=not norm, + norm=get_norm(norm, conv_dims), + activation=F.relu, + ) + weight_init.c2_msra_fill(conv) + head_ops.append(conv) + if feature_strides[in_feature] != self.common_stride: + head_ops.append( + nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False) + ) + self.scale_heads.append(nn.Sequential(*head_ops)) + self.add_module(in_feature, self.scale_heads[-1]) + self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0) + weight_init.c2_msra_fill(self.predictor) + + def forward(self, features: List[torch.Tensor]): + for i, _ in enumerate(self.in_features): + if i == 0: + x = self.scale_heads[i](features[i]) + else: + x = x + self.scale_heads[i](features[i]) + x = self.predictor(x) + return x + + +@ROI_HEADS_REGISTRY.register() +class DensePoseROIHeads(StandardROIHeads): + """ + A Standard ROIHeads which contains an addition of DensePose head. + """ + + def __init__(self, cfg, input_shape): + super().__init__(cfg, input_shape) + self._init_densepose_head(cfg, input_shape) + + def _init_densepose_head(self, cfg, input_shape): + # fmt: off + self.densepose_on = cfg.MODEL.DENSEPOSE_ON + if not self.densepose_on: + return + self.densepose_data_filter = build_densepose_data_filter(cfg) + dp_pooler_resolution = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION + dp_pooler_sampling_ratio = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO + dp_pooler_type = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE + self.use_decoder = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON + # fmt: on + if self.use_decoder: + dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride,) + else: + dp_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features) + in_channels = [input_shape[f].channels for f in self.in_features][0] + + if self.use_decoder: + self.decoder = Decoder(cfg, input_shape, self.in_features) + + self.densepose_pooler = ROIPooler( + output_size=dp_pooler_resolution, + scales=dp_pooler_scales, + sampling_ratio=dp_pooler_sampling_ratio, + pooler_type=dp_pooler_type, + ) + self.densepose_head = build_densepose_head(cfg, in_channels) + self.densepose_predictor = build_densepose_predictor( + cfg, self.densepose_head.n_out_channels + ) + self.densepose_losses = build_densepose_losses(cfg) + + def _forward_densepose(self, features: Dict[str, torch.Tensor], instances: List[Instances]): + """ + Forward logic of the densepose prediction branch. + + Args: + features (dict[str, Tensor]): input data as a mapping from feature + map name to tensor. Axis 0 represents the number of images `N` in + the input data; axes 1-3 are channels, height, and width, which may + vary between feature maps (e.g., if a feature pyramid is used). + instances (list[Instances]): length `N` list of `Instances`. The i-th + `Instances` contains instances for the i-th input image, + In training, they can be the proposals. + In inference, they can be the predicted boxes. + + Returns: + In training, a dict of losses. + In inference, update `instances` with new fields "densepose" and return it. + """ + if not self.densepose_on: + return {} if self.training else instances + + features = [features[f] for f in self.in_features] + if self.training: + proposals, _ = select_foreground_proposals(instances, self.num_classes) + features, proposals = self.densepose_data_filter(features, proposals) + if len(proposals) > 0: + proposal_boxes = [x.proposal_boxes for x in proposals] + + if self.use_decoder: + features = [self.decoder(features)] + + features_dp = self.densepose_pooler(features, proposal_boxes) + densepose_head_outputs = self.densepose_head(features_dp) + densepose_outputs, _, confidences, _ = self.densepose_predictor( + densepose_head_outputs + ) + densepose_loss_dict = self.densepose_losses( + proposals, densepose_outputs, confidences + ) + return densepose_loss_dict + else: + pred_boxes = [x.pred_boxes for x in instances] + + if self.use_decoder: + features = [self.decoder(features)] + + features_dp = self.densepose_pooler(features, pred_boxes) + if len(features_dp) > 0: + densepose_head_outputs = self.densepose_head(features_dp) + densepose_outputs, _, confidences, _ = self.densepose_predictor( + densepose_head_outputs + ) + else: + # If no detection occurred instances + # set densepose_outputs to empty tensors + empty_tensor = torch.zeros(size=(0, 0, 0, 0), device=features_dp.device) + densepose_outputs = tuple([empty_tensor] * 4) + confidences = tuple([empty_tensor] * 6) + + densepose_inference(densepose_outputs, confidences, instances) + return instances + + def forward( + self, + images: ImageList, + features: Dict[str, torch.Tensor], + proposals: List[Instances], + targets: Optional[List[Instances]] = None, + ): + instances, losses = super().forward(images, features, proposals, targets) + del targets, images + + if self.training: + losses.update(self._forward_densepose(features, instances)) + return instances, losses + + def forward_with_given_boxes( + self, features: Dict[str, torch.Tensor], instances: List[Instances] + ): + """ + Use the given boxes in `instances` to produce other (non-box) per-ROI outputs. + + This is useful for downstream tasks where a box is known, but need to obtain + other attributes (outputs of other heads). + Test-time augmentation also uses this. + + Args: + features: same as in `forward()` + instances (list[Instances]): instances to predict other outputs. Expect the keys + "pred_boxes" and "pred_classes" to exist. + + Returns: + instances (list[Instances]): + the same `Instances` objects, with extra + fields such as `pred_masks` or `pred_keypoints`. + """ + + instances = super().forward_with_given_boxes(features, instances) + instances = self._forward_densepose(features, instances) + return instances diff --git a/projects/DensePose/densepose/modeling/roi_heads/v1convx.py b/projects/DensePose/densepose/modeling/roi_heads/v1convx.py new file mode 100644 index 0000000..ef02b0e --- /dev/null +++ b/projects/DensePose/densepose/modeling/roi_heads/v1convx.py @@ -0,0 +1,64 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import torch +from torch import nn +from torch.nn import functional as F + +from detectron2.config import CfgNode +from detectron2.layers import Conv2d + +from ..utils import initialize_module_params +from .registry import ROI_DENSEPOSE_HEAD_REGISTRY + + +@ROI_DENSEPOSE_HEAD_REGISTRY.register() +class DensePoseV1ConvXHead(nn.Module): + """ + Fully convolutional DensePose head. + """ + + def __init__(self, cfg: CfgNode, input_channels: int): + """ + Initialize DensePose fully convolutional head + + Args: + cfg (CfgNode): configuration options + input_channels (int): number of input channels + """ + super(DensePoseV1ConvXHead, self).__init__() + # fmt: off + hidden_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM + kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL + self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS + # fmt: on + pad_size = kernel_size // 2 + n_channels = input_channels + for i in range(self.n_stacked_convs): + layer = Conv2d(n_channels, hidden_dim, kernel_size, stride=1, padding=pad_size) + layer_name = self._get_layer_name(i) + self.add_module(layer_name, layer) + n_channels = hidden_dim + self.n_out_channels = n_channels + initialize_module_params(self) + + def forward(self, features: torch.Tensor): + """ + Apply DensePose fully convolutional head to the input features + + Args: + features (tensor): input features + Result: + A tensor of DensePose head outputs + """ + x = features + output = x + for i in range(self.n_stacked_convs): + layer_name = self._get_layer_name(i) + x = getattr(self, layer_name)(x) + x = F.relu(x) + output = x + return output + + def _get_layer_name(self, i: int): + layer_name = "body_conv_fcn{}".format(i + 1) + return layer_name diff --git a/projects/DensePose/query_db.py b/projects/DensePose/query_db.py new file mode 100644 index 0000000..6d3ea2f --- /dev/null +++ b/projects/DensePose/query_db.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +import argparse +import logging +import os +import sys +from timeit import default_timer as timer +from typing import Any, ClassVar, Dict, List +import torch +from fvcore.common.file_io import PathManager + +from detectron2.data.catalog import DatasetCatalog +from detectron2.utils.logger import setup_logger + +from densepose.data.structures import DensePoseDataRelative +from densepose.utils.dbhelper import EntrySelector +from densepose.utils.logger import verbosity_to_level +from densepose.vis.base import CompoundVisualizer +from densepose.vis.bounding_box import BoundingBoxVisualizer +from densepose.vis.densepose import ( + DensePoseDataCoarseSegmentationVisualizer, + DensePoseDataPointsIVisualizer, + DensePoseDataPointsUVisualizer, + DensePoseDataPointsVisualizer, + DensePoseDataPointsVVisualizer, +) + +DOC = """Query DB - a tool to print / visualize data from a database +""" + +LOGGER_NAME = "query_db" + +logger = logging.getLogger(LOGGER_NAME) + +_ACTION_REGISTRY: Dict[str, "Action"] = {} + + +class Action(object): + @classmethod + def add_arguments(cls: type, parser: argparse.ArgumentParser): + parser.add_argument( + "-v", + "--verbosity", + action="count", + help="Verbose mode. Multiple -v options increase the verbosity.", + ) + + +def register_action(cls: type): + """ + Decorator for action classes to automate action registration + """ + global _ACTION_REGISTRY + _ACTION_REGISTRY[cls.COMMAND] = cls + return cls + + +class EntrywiseAction(Action): + @classmethod + def add_arguments(cls: type, parser: argparse.ArgumentParser): + super(EntrywiseAction, cls).add_arguments(parser) + parser.add_argument( + "dataset", metavar="", help="Dataset name (e.g. densepose_coco_2014_train)" + ) + parser.add_argument( + "selector", + metavar="", + help="Dataset entry selector in the form field1[:type]=value1[," + "field2[:type]=value_min-value_max...] which selects all " + "entries from the dataset that satisfy the constraints", + ) + parser.add_argument( + "--max-entries", metavar="N", help="Maximum number of entries to process", type=int + ) + + @classmethod + def execute(cls: type, args: argparse.Namespace): + dataset = setup_dataset(args.dataset) + entry_selector = EntrySelector.from_string(args.selector) + context = cls.create_context(args) + if args.max_entries is not None: + for _, entry in zip(range(args.max_entries), dataset): + if entry_selector(entry): + cls.execute_on_entry(entry, context) + else: + for entry in dataset: + if entry_selector(entry): + cls.execute_on_entry(entry, context) + + @classmethod + def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]: + context = {} + return context + + +@register_action +class PrintAction(EntrywiseAction): + """ + Print action that outputs selected entries to stdout + """ + + COMMAND: ClassVar[str] = "print" + + @classmethod + def add_parser(cls: type, subparsers: argparse._SubParsersAction): + parser = subparsers.add_parser(cls.COMMAND, help="Output selected entries to stdout. ") + cls.add_arguments(parser) + parser.set_defaults(func=cls.execute) + + @classmethod + def add_arguments(cls: type, parser: argparse.ArgumentParser): + super(PrintAction, cls).add_arguments(parser) + + @classmethod + def execute_on_entry(cls: type, entry: Dict[str, Any], context: Dict[str, Any]): + import pprint + + printer = pprint.PrettyPrinter(indent=2, width=200, compact=True) + printer.pprint(entry) + + +@register_action +class ShowAction(EntrywiseAction): + """ + Show action that visualizes selected entries on an image + """ + + COMMAND: ClassVar[str] = "show" + VISUALIZERS: ClassVar[Dict[str, object]] = { + "dp_segm": DensePoseDataCoarseSegmentationVisualizer(), + "dp_i": DensePoseDataPointsIVisualizer(), + "dp_u": DensePoseDataPointsUVisualizer(), + "dp_v": DensePoseDataPointsVVisualizer(), + "dp_pts": DensePoseDataPointsVisualizer(), + "bbox": BoundingBoxVisualizer(), + } + + @classmethod + def add_parser(cls: type, subparsers: argparse._SubParsersAction): + parser = subparsers.add_parser(cls.COMMAND, help="Visualize selected entries") + cls.add_arguments(parser) + parser.set_defaults(func=cls.execute) + + @classmethod + def add_arguments(cls: type, parser: argparse.ArgumentParser): + super(ShowAction, cls).add_arguments(parser) + parser.add_argument( + "visualizations", + metavar="", + help="Comma separated list of visualizations, possible values: " + "[{}]".format(",".join(sorted(cls.VISUALIZERS.keys()))), + ) + parser.add_argument( + "--output", + metavar="", + default="output.png", + help="File name to save output to", + ) + + @classmethod + def execute_on_entry(cls: type, entry: Dict[str, Any], context: Dict[str, Any]): + import cv2 + import numpy as np + + image_fpath = PathManager.get_local_path(entry["file_name"]) + image = cv2.imread(image_fpath, cv2.IMREAD_GRAYSCALE) + image = np.tile(image[:, :, np.newaxis], [1, 1, 3]) + datas = cls._extract_data_for_visualizers_from_entry(context["vis_specs"], entry) + visualizer = context["visualizer"] + image_vis = visualizer.visualize(image, datas) + entry_idx = context["entry_idx"] + 1 + out_fname = cls._get_out_fname(entry_idx, context["out_fname"]) + cv2.imwrite(out_fname, image_vis) + logger.info(f"Output saved to {out_fname}") + context["entry_idx"] += 1 + + @classmethod + def _get_out_fname(cls: type, entry_idx: int, fname_base: str): + base, ext = os.path.splitext(fname_base) + return base + ".{0:04d}".format(entry_idx) + ext + + @classmethod + def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]: + vis_specs = args.visualizations.split(",") + visualizers = [] + for vis_spec in vis_specs: + vis = cls.VISUALIZERS[vis_spec] + visualizers.append(vis) + context = { + "vis_specs": vis_specs, + "visualizer": CompoundVisualizer(visualizers), + "out_fname": args.output, + "entry_idx": 0, + } + return context + + @classmethod + def _extract_data_for_visualizers_from_entry( + cls: type, vis_specs: List[str], entry: Dict[str, Any] + ): + dp_list = [] + bbox_list = [] + for annotation in entry["annotations"]: + is_valid, _ = DensePoseDataRelative.validate_annotation(annotation) + if not is_valid: + continue + bbox = torch.as_tensor(annotation["bbox"]) + bbox_list.append(bbox) + dp_data = DensePoseDataRelative(annotation) + dp_list.append(dp_data) + datas = [] + for vis_spec in vis_specs: + datas.append(bbox_list if "bbox" == vis_spec else (bbox_list, dp_list)) + return datas + + +def setup_dataset(dataset_name): + logger.info("Loading dataset {}".format(dataset_name)) + start = timer() + dataset = DatasetCatalog.get(dataset_name) + stop = timer() + logger.info("Loaded dataset {} in {:.3f}s".format(dataset_name, stop - start)) + return dataset + + +def create_argument_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description=DOC, + formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=120), + ) + parser.set_defaults(func=lambda _: parser.print_help(sys.stdout)) + subparsers = parser.add_subparsers(title="Actions") + for _, action in _ACTION_REGISTRY.items(): + action.add_parser(subparsers) + return parser + + +def main(): + parser = create_argument_parser() + args = parser.parse_args() + verbosity = args.verbosity if hasattr(args, "verbosity") else None + global logger + logger = setup_logger(name=LOGGER_NAME) + logger.setLevel(verbosity_to_level(verbosity)) + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/projects/DensePose/train_net.py b/projects/DensePose/train_net.py new file mode 100644 index 0000000..72c74c3 --- /dev/null +++ b/projects/DensePose/train_net.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved + +""" +DensePose Training Script. + +This script is similar to the training script in detectron2/tools. + +It is an example of how a user might use detectron2 for a new project. +""" + +from fvcore.common.file_io import PathManager + +import detectron2.utils.comm as comm +from detectron2.config import get_cfg +from detectron2.engine import default_argument_parser, default_setup, hooks, launch +from detectron2.evaluation import verify_results +from detectron2.utils.logger import setup_logger + +from densepose import add_densepose_config +from densepose.engine import Trainer +from densepose.modeling.densepose_checkpoint import DensePoseCheckpointer + + +def setup(args): + cfg = get_cfg() + add_densepose_config(cfg) + cfg.merge_from_file(args.config_file) + cfg.merge_from_list(args.opts) + cfg.freeze() + default_setup(cfg, args) + # Setup logger for "densepose" module + setup_logger(output=cfg.OUTPUT_DIR, distributed_rank=comm.get_rank(), name="densepose") + return cfg + + +def main(args): + cfg = setup(args) + # disable strict kwargs checking: allow one to specify path handle + # hints through kwargs, like timeout in DP evaluation + PathManager.set_strict_kwargs_checking(False) + + if args.eval_only: + model = Trainer.build_model(cfg) + DensePoseCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( + cfg.MODEL.WEIGHTS, resume=args.resume + ) + res = Trainer.test(cfg, model) + if cfg.TEST.AUG.ENABLED: + res.update(Trainer.test_with_TTA(cfg, model)) + if comm.is_main_process(): + verify_results(cfg, res) + return res + + trainer = Trainer(cfg) + trainer.resume_or_load(resume=args.resume) + if cfg.TEST.AUG.ENABLED: + trainer.register_hooks( + [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))] + ) + return trainer.train() + + +if __name__ == "__main__": + args = default_argument_parser().parse_args() + print("Command Line Args:", args) + launch( + main, + args.num_gpus, + num_machines=args.num_machines, + machine_rank=args.machine_rank, + dist_url=args.dist_url, + args=(args,), + )