From 28e2825941e3345d61ddf028402962726c4dbac1 Mon Sep 17 00:00:00 2001
From: RE-OWOD <95522332+RE-OWOD@users.noreply.github.com>
Date: Tue, 4 Jan 2022 13:49:38 +0800
Subject: [PATCH] Add files via upload
---
projects/DensePose/README.md | 53 +
projects/DensePose/apply_net.py | 319 +++++
.../configs/Base-DensePose-RCNN-FPN.yaml | 48 +
.../densepose_rcnn_HRFPN_HRNet_w32_s1x.yaml | 16 +
.../densepose_rcnn_HRFPN_HRNet_w40_s1x.yaml | 23 +
.../densepose_rcnn_HRFPN_HRNet_w48_s1x.yaml | 23 +
.../densepose_rcnn_R_101_FPN_DL_WC1M_s1x.yaml | 18 +
.../densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml | 16 +
.../densepose_rcnn_R_101_FPN_DL_WC2M_s1x.yaml | 18 +
.../densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml | 16 +
.../densepose_rcnn_R_101_FPN_DL_s1x.yaml | 10 +
.../densepose_rcnn_R_101_FPN_WC1M_s1x.yaml | 18 +
.../densepose_rcnn_R_101_FPN_WC1_s1x.yaml | 16 +
.../densepose_rcnn_R_101_FPN_WC2M_s1x.yaml | 18 +
.../densepose_rcnn_R_101_FPN_WC2_s1x.yaml | 16 +
.../configs/densepose_rcnn_R_101_FPN_s1x.yaml | 8 +
.../densepose_rcnn_R_101_FPN_s1x_legacy.yaml | 17 +
.../densepose_rcnn_R_50_FPN_DL_WC1M_s1x.yaml | 18 +
.../densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml | 16 +
.../densepose_rcnn_R_50_FPN_DL_WC2M_s1x.yaml | 18 +
.../densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml | 16 +
.../densepose_rcnn_R_50_FPN_DL_s1x.yaml | 10 +
.../densepose_rcnn_R_50_FPN_WC1M_s1x.yaml | 20 +
.../densepose_rcnn_R_50_FPN_WC1_s1x.yaml | 16 +
.../densepose_rcnn_R_50_FPN_WC2M_s1x.yaml | 18 +
.../densepose_rcnn_R_50_FPN_WC2_s1x.yaml | 16 +
.../configs/densepose_rcnn_R_50_FPN_s1x.yaml | 8 +
.../densepose_rcnn_R_50_FPN_s1x_legacy.yaml | 17 +
.../configs/evolution/Base-RCNN-FPN-MC-B.yaml | 121 ++
.../configs/evolution/Base-RCNN-FPN-MC.yaml | 91 ++
.../densepose_R_101_FPN_1x_Atop10_toP.yaml | 19 +
.../densepose_R_101_FPN_DL_1x_Atop10_toP.yaml | 19 +
...epose_R_101_FPN_DL_WC1M_1x_Atop10_toP.yaml | 29 +
...sepose_R_101_FPN_DL_WC1_1x_Atop10_toP.yaml | 27 +
...ensepose_R_101_FPN_WC1M_1x_Atop10_toP.yaml | 29 +
...densepose_R_101_FPN_WC1_1x_Atop10_toP.yaml | 27 +
.../densepose_R_50_FPN_1x_Atop10_toP.yaml | 19 +
.../densepose_R_50_FPN_DL_1x_Atop10_toP.yaml | 19 +
...sepose_R_50_FPN_DL_WC1M_1x_Atop10_toP.yaml | 29 +
...nsepose_R_50_FPN_DL_WC1_1x_Atop10_toP.yaml | 27 +
...densepose_R_50_FPN_WC1M_1x_Atop10_toP.yaml | 29 +
...nsepose_R_50_FPN_WC1M_1x_Atop10_toP_B.yaml | 30 +
.../densepose_R_50_FPN_WC1_1x_Atop10_toP.yaml | 27 +
.../evolution/faster_rcnn_R_50_FPN_1x_MC.yaml | 7 +
...cnn_HRFPN_HRNet_w32_training_acc_test.yaml | 7 +
...nsepose_rcnn_R_50_FPN_DL_instant_test.yaml | 11 +
..._rcnn_R_50_FPN_TTA_inference_acc_test.yaml | 13 +
...sepose_rcnn_R_50_FPN_WC1_instant_test.yaml | 19 +
...sepose_rcnn_R_50_FPN_WC2_instant_test.yaml | 19 +
...pose_rcnn_R_50_FPN_inference_acc_test.yaml | 8 +
.../densepose_rcnn_R_50_FPN_instant_test.yaml | 9 +
...epose_rcnn_R_50_FPN_training_acc_test.yaml | 18 +
projects/DensePose/densepose/config.py | 171 +++
projects/DensePose/densepose/data/__init__.py | 23 +
projects/DensePose/densepose/data/build.py | 604 +++++++++
.../densepose/data/combined_loader.py | 44 +
.../densepose/data/dataset_mapper.py | 168 +++
.../densepose/data/datasets/__init__.py | 5 +
.../densepose/data/datasets/builtin.py | 13 +
.../densepose/data/datasets/chimpnsee.py | 28 +
.../DensePose/densepose/data/datasets/coco.py | 324 +++++
.../densepose/data/datasets/dataset_type.py | 11 +
.../densepose/data/image_list_dataset.py | 53 +
.../densepose/data/inference_based_loader.py | 146 +++
.../densepose/data/samplers/__init__.py | 6 +
.../densepose/data/samplers/densepose_base.py | 190 +++
.../samplers/densepose_confidence_based.py | 91 ++
.../data/samplers/densepose_uniform.py | 41 +
.../data/samplers/mask_from_densepose.py | 59 +
.../data/samplers/prediction_to_gt.py | 80 ++
.../DensePose/densepose/data/structures.py | 703 ++++++++++
.../densepose/data/transform/__init__.py | 3 +
.../densepose/data/transform/image.py | 37 +
projects/DensePose/densepose/data/utils.py | 22 +
.../densepose/data/video/__init__.py | 17 +
.../densepose/data/video/frame_selector.py | 87 ++
.../data/video/video_keyframe_dataset.py | 232 ++++
.../densepose/densepose_coco_evaluation.py | 1157 +++++++++++++++++
.../DensePose/densepose/engine/__init__.py | 3 +
.../DensePose/densepose/engine/trainer.py | 118 ++
projects/DensePose/densepose/evaluator.py | 224 ++++
.../DensePose/densepose/modeling/build.py | 66 +
.../densepose/modeling/confidence.py | 73 ++
.../modeling/densepose_checkpoint.py | 35 +
.../DensePose/densepose/modeling/filter.py | 94 ++
.../DensePose/densepose/modeling/hrfpn.py | 181 +++
.../DensePose/densepose/modeling/hrnet.py | 473 +++++++
.../DensePose/densepose/modeling/inference.py | 83 ++
.../densepose/modeling/losses/__init__.py | 3 +
.../modeling/losses/densepose_losses.py | 729 +++++++++++
.../densepose/modeling/predictors/__init__.py | 5 +
.../densepose/modeling/predictors/chart.py | 102 ++
.../modeling/predictors/chart_confidence.py | 176 +++
.../predictors/chart_with_confidence.py | 13 +
.../densepose/modeling/roi_heads/deeplab.py | 263 ++++
.../densepose/modeling/roi_heads/registry.py | 5 +
.../densepose/modeling/roi_heads/roi_head.py | 224 ++++
.../densepose/modeling/roi_heads/v1convx.py | 64 +
projects/DensePose/query_db.py | 250 ++++
projects/DensePose/train_net.py | 74 ++
100 files changed, 9052 insertions(+)
create mode 100644 projects/DensePose/apply_net.py
create mode 100644 projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml
create mode 100644 projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w32_s1x.yaml
create mode 100644 projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w40_s1x.yaml
create mode 100644 projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w48_s1x.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1M_s1x.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2M_s1x.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1M_s1x.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2M_s1x.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1M_s1x.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2M_s1x.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1M_s1x.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2M_s1x.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml
create mode 100644 projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml
create mode 100644 projects/DensePose/configs/evolution/Base-RCNN-FPN-MC-B.yaml
create mode 100644 projects/DensePose/configs/evolution/Base-RCNN-FPN-MC.yaml
create mode 100644 projects/DensePose/configs/evolution/densepose_R_101_FPN_1x_Atop10_toP.yaml
create mode 100644 projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_1x_Atop10_toP.yaml
create mode 100644 projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_WC1M_1x_Atop10_toP.yaml
create mode 100644 projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_WC1_1x_Atop10_toP.yaml
create mode 100644 projects/DensePose/configs/evolution/densepose_R_101_FPN_WC1M_1x_Atop10_toP.yaml
create mode 100644 projects/DensePose/configs/evolution/densepose_R_101_FPN_WC1_1x_Atop10_toP.yaml
create mode 100644 projects/DensePose/configs/evolution/densepose_R_50_FPN_1x_Atop10_toP.yaml
create mode 100644 projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_1x_Atop10_toP.yaml
create mode 100644 projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_1x_Atop10_toP.yaml
create mode 100644 projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1_1x_Atop10_toP.yaml
create mode 100644 projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1M_1x_Atop10_toP.yaml
create mode 100644 projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1M_1x_Atop10_toP_B.yaml
create mode 100644 projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1_1x_Atop10_toP.yaml
create mode 100644 projects/DensePose/configs/evolution/faster_rcnn_R_50_FPN_1x_MC.yaml
create mode 100644 projects/DensePose/configs/quick_schedules/densepose_rcnn_HRFPN_HRNet_w32_training_acc_test.yaml
create mode 100644 projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml
create mode 100644 projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml
create mode 100644 projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml
create mode 100644 projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml
create mode 100644 projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml
create mode 100644 projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml
create mode 100644 projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml
create mode 100644 projects/DensePose/densepose/config.py
create mode 100644 projects/DensePose/densepose/data/__init__.py
create mode 100644 projects/DensePose/densepose/data/build.py
create mode 100644 projects/DensePose/densepose/data/combined_loader.py
create mode 100644 projects/DensePose/densepose/data/dataset_mapper.py
create mode 100644 projects/DensePose/densepose/data/datasets/__init__.py
create mode 100644 projects/DensePose/densepose/data/datasets/builtin.py
create mode 100644 projects/DensePose/densepose/data/datasets/chimpnsee.py
create mode 100644 projects/DensePose/densepose/data/datasets/coco.py
create mode 100644 projects/DensePose/densepose/data/datasets/dataset_type.py
create mode 100644 projects/DensePose/densepose/data/image_list_dataset.py
create mode 100644 projects/DensePose/densepose/data/inference_based_loader.py
create mode 100644 projects/DensePose/densepose/data/samplers/__init__.py
create mode 100644 projects/DensePose/densepose/data/samplers/densepose_base.py
create mode 100644 projects/DensePose/densepose/data/samplers/densepose_confidence_based.py
create mode 100644 projects/DensePose/densepose/data/samplers/densepose_uniform.py
create mode 100644 projects/DensePose/densepose/data/samplers/mask_from_densepose.py
create mode 100644 projects/DensePose/densepose/data/samplers/prediction_to_gt.py
create mode 100644 projects/DensePose/densepose/data/structures.py
create mode 100644 projects/DensePose/densepose/data/transform/__init__.py
create mode 100644 projects/DensePose/densepose/data/transform/image.py
create mode 100644 projects/DensePose/densepose/data/utils.py
create mode 100644 projects/DensePose/densepose/data/video/__init__.py
create mode 100644 projects/DensePose/densepose/data/video/frame_selector.py
create mode 100644 projects/DensePose/densepose/data/video/video_keyframe_dataset.py
create mode 100644 projects/DensePose/densepose/densepose_coco_evaluation.py
create mode 100644 projects/DensePose/densepose/engine/__init__.py
create mode 100644 projects/DensePose/densepose/engine/trainer.py
create mode 100644 projects/DensePose/densepose/evaluator.py
create mode 100644 projects/DensePose/densepose/modeling/build.py
create mode 100644 projects/DensePose/densepose/modeling/confidence.py
create mode 100644 projects/DensePose/densepose/modeling/densepose_checkpoint.py
create mode 100644 projects/DensePose/densepose/modeling/filter.py
create mode 100644 projects/DensePose/densepose/modeling/hrfpn.py
create mode 100644 projects/DensePose/densepose/modeling/hrnet.py
create mode 100644 projects/DensePose/densepose/modeling/inference.py
create mode 100644 projects/DensePose/densepose/modeling/losses/__init__.py
create mode 100644 projects/DensePose/densepose/modeling/losses/densepose_losses.py
create mode 100644 projects/DensePose/densepose/modeling/predictors/__init__.py
create mode 100644 projects/DensePose/densepose/modeling/predictors/chart.py
create mode 100644 projects/DensePose/densepose/modeling/predictors/chart_confidence.py
create mode 100644 projects/DensePose/densepose/modeling/predictors/chart_with_confidence.py
create mode 100644 projects/DensePose/densepose/modeling/roi_heads/deeplab.py
create mode 100644 projects/DensePose/densepose/modeling/roi_heads/registry.py
create mode 100644 projects/DensePose/densepose/modeling/roi_heads/roi_head.py
create mode 100644 projects/DensePose/densepose/modeling/roi_heads/v1convx.py
create mode 100644 projects/DensePose/query_db.py
create mode 100644 projects/DensePose/train_net.py
diff --git a/projects/DensePose/README.md b/projects/DensePose/README.md
index 8b13789..fd2f1ee 100644
--- a/projects/DensePose/README.md
+++ b/projects/DensePose/README.md
@@ -1 +1,54 @@
+# DensePose in Detectron2
+**Dense Human Pose Estimation In The Wild**
+
+_Rıza Alp Güler, Natalia Neverova, Iasonas Kokkinos_
+
+[[`densepose.org`](https://densepose.org)] [[`arXiv`](https://arxiv.org/abs/1802.00434)] [[`BibTeX`](#CitingDensePose)]
+
+Dense human pose estimation aims at mapping all human pixels of an RGB image to the 3D surface of the human body.
+
+
+

+
+
+In this repository, we provide the code to train and evaluate DensePose-RCNN. We also provide tools to visualize
+DensePose annotation and results.
+
+# Quick Start
+
+See [ Getting Started ](doc/GETTING_STARTED.md)
+
+# Model Zoo and Baselines
+
+We provide a number of baseline results and trained models available for download. See [Model Zoo](doc/MODEL_ZOO.md) for details.
+
+# License
+
+Detectron2 is released under the [Apache 2.0 license](../../LICENSE)
+
+## Citing DensePose
+
+If you use DensePose, please take the references from the following BibTeX entries:
+
+For DensePose with estimated confidences:
+
+```
+@InProceedings{Neverova2019DensePoseConfidences,
+ title = {Correlated Uncertainty for Learning Dense Correspondences from Noisy Labels},
+ author = {Neverova, Natalia and Novotny, David and Vedaldi, Andrea},
+ journal = {Advances in Neural Information Processing Systems},
+ year = {2019},
+}
+```
+
+For the original DensePose:
+
+```
+@InProceedings{Guler2018DensePose,
+ title={DensePose: Dense Human Pose Estimation In The Wild},
+ author={R\{i}za Alp G\"uler, Natalia Neverova, Iasonas Kokkinos},
+ journal={The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year={2018}
+}
+```
diff --git a/projects/DensePose/apply_net.py b/projects/DensePose/apply_net.py
new file mode 100644
index 0000000..25ccc7d
--- /dev/null
+++ b/projects/DensePose/apply_net.py
@@ -0,0 +1,319 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import argparse
+import glob
+import logging
+import os
+import pickle
+import sys
+from typing import Any, ClassVar, Dict, List
+import torch
+
+from detectron2.config import get_cfg
+from detectron2.data.detection_utils import read_image
+from detectron2.engine.defaults import DefaultPredictor
+from detectron2.structures.boxes import BoxMode
+from detectron2.structures.instances import Instances
+from detectron2.utils.logger import setup_logger
+
+from densepose import add_densepose_config, add_hrnet_config
+from densepose.utils.logger import verbosity_to_level
+from densepose.vis.base import CompoundVisualizer
+from densepose.vis.bounding_box import ScoredBoundingBoxVisualizer
+from densepose.vis.densepose import (
+ DensePoseResultsContourVisualizer,
+ DensePoseResultsFineSegmentationVisualizer,
+ DensePoseResultsUVisualizer,
+ DensePoseResultsVVisualizer,
+)
+from densepose.vis.extractor import CompoundExtractor, create_extractor
+
+DOC = """Apply Net - a tool to print / visualize DensePose results
+"""
+
+LOGGER_NAME = "apply_net"
+logger = logging.getLogger(LOGGER_NAME)
+
+_ACTION_REGISTRY: Dict[str, "Action"] = {}
+
+
+class Action(object):
+ @classmethod
+ def add_arguments(cls: type, parser: argparse.ArgumentParser):
+ parser.add_argument(
+ "-v",
+ "--verbosity",
+ action="count",
+ help="Verbose mode. Multiple -v options increase the verbosity.",
+ )
+
+
+def register_action(cls: type):
+ """
+ Decorator for action classes to automate action registration
+ """
+ global _ACTION_REGISTRY
+ _ACTION_REGISTRY[cls.COMMAND] = cls
+ return cls
+
+
+class InferenceAction(Action):
+ @classmethod
+ def add_arguments(cls: type, parser: argparse.ArgumentParser):
+ super(InferenceAction, cls).add_arguments(parser)
+ parser.add_argument("cfg", metavar="", help="Config file")
+ parser.add_argument("model", metavar="", help="Model file")
+ parser.add_argument("input", metavar="", help="Input data")
+ parser.add_argument(
+ "--opts",
+ help="Modify config options using the command-line 'KEY VALUE' pairs",
+ default=[],
+ nargs=argparse.REMAINDER,
+ )
+
+ @classmethod
+ def execute(cls: type, args: argparse.Namespace):
+ logger.info(f"Loading config from {args.cfg}")
+ opts = []
+ cfg = cls.setup_config(args.cfg, args.model, args, opts)
+ logger.info(f"Loading model from {args.model}")
+ predictor = DefaultPredictor(cfg)
+ logger.info(f"Loading data from {args.input}")
+ file_list = cls._get_input_file_list(args.input)
+ if len(file_list) == 0:
+ logger.warning(f"No input images for {args.input}")
+ return
+ context = cls.create_context(args)
+ for file_name in file_list:
+ img = read_image(file_name, format="BGR") # predictor expects BGR image.
+ with torch.no_grad():
+ outputs = predictor(img)["instances"]
+ cls.execute_on_outputs(context, {"file_name": file_name, "image": img}, outputs)
+ cls.postexecute(context)
+
+ @classmethod
+ def setup_config(
+ cls: type, config_fpath: str, model_fpath: str, args: argparse.Namespace, opts: List[str]
+ ):
+ cfg = get_cfg()
+ add_densepose_config(cfg)
+ add_hrnet_config(cfg)
+ cfg.merge_from_file(config_fpath)
+ cfg.merge_from_list(args.opts)
+ if opts:
+ cfg.merge_from_list(opts)
+ cfg.MODEL.WEIGHTS = model_fpath
+ cfg.freeze()
+ return cfg
+
+ @classmethod
+ def _get_input_file_list(cls: type, input_spec: str):
+ if os.path.isdir(input_spec):
+ file_list = [
+ os.path.join(input_spec, fname)
+ for fname in os.listdir(input_spec)
+ if os.path.isfile(os.path.join(input_spec, fname))
+ ]
+ elif os.path.isfile(input_spec):
+ file_list = [input_spec]
+ else:
+ file_list = glob.glob(input_spec)
+ return file_list
+
+
+@register_action
+class DumpAction(InferenceAction):
+ """
+ Dump action that outputs results to a pickle file
+ """
+
+ COMMAND: ClassVar[str] = "dump"
+
+ @classmethod
+ def add_parser(cls: type, subparsers: argparse._SubParsersAction):
+ parser = subparsers.add_parser(cls.COMMAND, help="Dump model outputs to a file.")
+ cls.add_arguments(parser)
+ parser.set_defaults(func=cls.execute)
+
+ @classmethod
+ def add_arguments(cls: type, parser: argparse.ArgumentParser):
+ super(DumpAction, cls).add_arguments(parser)
+ parser.add_argument(
+ "--output",
+ metavar="",
+ default="results.pkl",
+ help="File name to save dump to",
+ )
+
+ @classmethod
+ def execute_on_outputs(
+ cls: type, context: Dict[str, Any], entry: Dict[str, Any], outputs: Instances
+ ):
+ image_fpath = entry["file_name"]
+ logger.info(f"Processing {image_fpath}")
+ result = {"file_name": image_fpath}
+ if outputs.has("scores"):
+ result["scores"] = outputs.get("scores").cpu()
+ if outputs.has("pred_boxes"):
+ result["pred_boxes_XYXY"] = outputs.get("pred_boxes").tensor.cpu()
+ if outputs.has("pred_densepose"):
+ boxes_XYWH = BoxMode.convert(
+ result["pred_boxes_XYXY"], BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
+ )
+ result["pred_densepose"] = outputs.get("pred_densepose").to_result(boxes_XYWH)
+ context["results"].append(result)
+
+ @classmethod
+ def create_context(cls: type, args: argparse.Namespace):
+ context = {"results": [], "out_fname": args.output}
+ return context
+
+ @classmethod
+ def postexecute(cls: type, context: Dict[str, Any]):
+ out_fname = context["out_fname"]
+ out_dir = os.path.dirname(out_fname)
+ if len(out_dir) > 0 and not os.path.exists(out_dir):
+ os.makedirs(out_dir)
+ with open(out_fname, "wb") as hFile:
+ pickle.dump(context["results"], hFile)
+ logger.info(f"Output saved to {out_fname}")
+
+
+@register_action
+class ShowAction(InferenceAction):
+ """
+ Show action that visualizes selected entries on an image
+ """
+
+ COMMAND: ClassVar[str] = "show"
+ VISUALIZERS: ClassVar[Dict[str, object]] = {
+ "dp_contour": DensePoseResultsContourVisualizer,
+ "dp_segm": DensePoseResultsFineSegmentationVisualizer,
+ "dp_u": DensePoseResultsUVisualizer,
+ "dp_v": DensePoseResultsVVisualizer,
+ "bbox": ScoredBoundingBoxVisualizer,
+ }
+
+ @classmethod
+ def add_parser(cls: type, subparsers: argparse._SubParsersAction):
+ parser = subparsers.add_parser(cls.COMMAND, help="Visualize selected entries")
+ cls.add_arguments(parser)
+ parser.set_defaults(func=cls.execute)
+
+ @classmethod
+ def add_arguments(cls: type, parser: argparse.ArgumentParser):
+ super(ShowAction, cls).add_arguments(parser)
+ parser.add_argument(
+ "visualizations",
+ metavar="",
+ help="Comma separated list of visualizations, possible values: "
+ "[{}]".format(",".join(sorted(cls.VISUALIZERS.keys()))),
+ )
+ parser.add_argument(
+ "--min_score",
+ metavar="",
+ default=0.8,
+ type=float,
+ help="Minimum detection score to visualize",
+ )
+ parser.add_argument(
+ "--nms_thresh", metavar="", default=None, type=float, help="NMS threshold"
+ )
+ parser.add_argument(
+ "--output",
+ metavar="",
+ default="outputres.png",
+ help="File name to save output to",
+ )
+
+ @classmethod
+ def setup_config(
+ cls: type, config_fpath: str, model_fpath: str, args: argparse.Namespace, opts: List[str]
+ ):
+ opts.append("MODEL.ROI_HEADS.SCORE_THRESH_TEST")
+ opts.append(str(args.min_score))
+ if args.nms_thresh is not None:
+ opts.append("MODEL.ROI_HEADS.NMS_THRESH_TEST")
+ opts.append(str(args.nms_thresh))
+ cfg = super(ShowAction, cls).setup_config(config_fpath, model_fpath, args, opts)
+ return cfg
+
+ @classmethod
+ def execute_on_outputs(
+ cls: type, context: Dict[str, Any], entry: Dict[str, Any], outputs: Instances
+ ):
+ import cv2
+ import numpy as np
+
+ visualizer = context["visualizer"]
+ extractor = context["extractor"]
+ image_fpath = entry["file_name"]
+ logger.info(f"Processing {image_fpath}")
+ image = cv2.cvtColor(entry["image"], cv2.COLOR_BGR2GRAY)
+ image = np.tile(image[:, :, np.newaxis], [1, 1, 3])
+ data = extractor(outputs)
+ image_vis = visualizer.visualize(image, data)
+ entry_idx = context["entry_idx"] + 1
+ out_fname = cls._get_out_fname(entry_idx, context["out_fname"])
+ out_dir = os.path.dirname(out_fname)
+ if len(out_dir) > 0 and not os.path.exists(out_dir):
+ os.makedirs(out_dir)
+ cv2.imwrite(out_fname, image_vis)
+ logger.info(f"Output saved to {out_fname}")
+ context["entry_idx"] += 1
+
+ @classmethod
+ def postexecute(cls: type, context: Dict[str, Any]):
+ pass
+
+ @classmethod
+ def _get_out_fname(cls: type, entry_idx: int, fname_base: str):
+ base, ext = os.path.splitext(fname_base)
+ return base + ".{0:04d}".format(entry_idx) + ext
+
+ @classmethod
+ def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]:
+ vis_specs = args.visualizations.split(",")
+ visualizers = []
+ extractors = []
+ for vis_spec in vis_specs:
+ vis = cls.VISUALIZERS[vis_spec]()
+ visualizers.append(vis)
+ extractor = create_extractor(vis)
+ extractors.append(extractor)
+ visualizer = CompoundVisualizer(visualizers)
+ extractor = CompoundExtractor(extractors)
+ context = {
+ "extractor": extractor,
+ "visualizer": visualizer,
+ "out_fname": args.output,
+ "entry_idx": 0,
+ }
+ return context
+
+
+def create_argument_parser() -> argparse.ArgumentParser:
+ parser = argparse.ArgumentParser(
+ description=DOC,
+ formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=120),
+ )
+ parser.set_defaults(func=lambda _: parser.print_help(sys.stdout))
+ subparsers = parser.add_subparsers(title="Actions")
+ for _, action in _ACTION_REGISTRY.items():
+ action.add_parser(subparsers)
+ return parser
+
+
+def main():
+ parser = create_argument_parser()
+ args = parser.parse_args()
+ verbosity = args.verbosity if hasattr(args, "verbosity") else None
+ global logger
+ logger = setup_logger(name=LOGGER_NAME)
+ logger.setLevel(verbosity_to_level(verbosity))
+ args.func(args)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml b/projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml
new file mode 100644
index 0000000..1579187
--- /dev/null
+++ b/projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml
@@ -0,0 +1,48 @@
+VERSION: 2
+MODEL:
+ META_ARCHITECTURE: "GeneralizedRCNN"
+ BACKBONE:
+ NAME: "build_resnet_fpn_backbone"
+ RESNETS:
+ OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+ FPN:
+ IN_FEATURES: ["res2", "res3", "res4", "res5"]
+ ANCHOR_GENERATOR:
+ SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
+ ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
+ RPN:
+ IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+ PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
+ PRE_NMS_TOPK_TEST: 1000 # Per FPN level
+ # Detectron1 uses 2000 proposals per-batch,
+ # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+ # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+ POST_NMS_TOPK_TRAIN: 1000
+ POST_NMS_TOPK_TEST: 1000
+
+ DENSEPOSE_ON: True
+ ROI_HEADS:
+ NAME: "DensePoseROIHeads"
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
+ NUM_CLASSES: 1
+ ROI_BOX_HEAD:
+ NAME: "FastRCNNConvFCHead"
+ NUM_FC: 2
+ POOLER_RESOLUTION: 7
+ POOLER_SAMPLING_RATIO: 2
+ POOLER_TYPE: "ROIAlign"
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseV1ConvXHead"
+ POOLER_TYPE: "ROIAlign"
+ NUM_COARSE_SEGM_CHANNELS: 2
+DATASETS:
+ TRAIN: ("densepose_coco_2014_train", "densepose_coco_2014_valminusminival")
+ TEST: ("densepose_coco_2014_minival",)
+SOLVER:
+ IMS_PER_BATCH: 16
+ BASE_LR: 0.01
+ STEPS: (60000, 80000)
+ MAX_ITER: 90000
+ WARMUP_FACTOR: 0.1
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
diff --git a/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w32_s1x.yaml b/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w32_s1x.yaml
new file mode 100644
index 0000000..36eabfe
--- /dev/null
+++ b/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w32_s1x.yaml
@@ -0,0 +1,16 @@
+_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "https://1drv.ms/u/s!Aus8VCZ_C_33dYBMemi9xOUFR0w"
+ BACKBONE:
+ NAME: "build_hrfpn_backbone"
+ RPN:
+ IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5']
+ ROI_HEADS:
+ IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5']
+SOLVER:
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
+ CLIP_GRADIENTS:
+ ENABLED: True
+ CLIP_TYPE: "norm"
+ BASE_LR: 0.03
diff --git a/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w40_s1x.yaml b/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w40_s1x.yaml
new file mode 100644
index 0000000..0ca8085
--- /dev/null
+++ b/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w40_s1x.yaml
@@ -0,0 +1,23 @@
+_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "https://1drv.ms/u/s!Aus8VCZ_C_33ck0gvo5jfoWBOPo"
+ BACKBONE:
+ NAME: "build_hrfpn_backbone"
+ RPN:
+ IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5']
+ ROI_HEADS:
+ IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5']
+ HRNET:
+ STAGE2:
+ NUM_CHANNELS: [40, 80]
+ STAGE3:
+ NUM_CHANNELS: [40, 80, 160]
+ STAGE4:
+ NUM_CHANNELS: [40, 80, 160, 320]
+SOLVER:
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
+ CLIP_GRADIENTS:
+ ENABLED: True
+ CLIP_TYPE: "norm"
+ BASE_LR: 0.03
diff --git a/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w48_s1x.yaml b/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w48_s1x.yaml
new file mode 100644
index 0000000..a3f437a
--- /dev/null
+++ b/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w48_s1x.yaml
@@ -0,0 +1,23 @@
+_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "https://1drv.ms/u/s!Aus8VCZ_C_33dKvqI6pBZlifgJk"
+ BACKBONE:
+ NAME: "build_hrfpn_backbone"
+ RPN:
+ IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5']
+ ROI_HEADS:
+ IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5']
+ HRNET:
+ STAGE2:
+ NUM_CHANNELS: [48, 96]
+ STAGE3:
+ NUM_CHANNELS: [48, 96, 192]
+ STAGE4:
+ NUM_CHANNELS: [48, 96, 192, 384]
+SOLVER:
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
+ CLIP_GRADIENTS:
+ ENABLED: True
+ CLIP_TYPE: "norm"
+ BASE_LR: 0.03
diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1M_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1M_s1x.yaml
new file mode 100644
index 0000000..3c16763
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1M_s1x.yaml
@@ -0,0 +1,18 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseDeepLabHead"
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "iid_iso"
+ SEGM_CONFIDENCE:
+ ENABLED: True
+ POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml
new file mode 100644
index 0000000..15475b1
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml
@@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseDeepLabHead"
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "iid_iso"
+ POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2M_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2M_s1x.yaml
new file mode 100644
index 0000000..0cbe07f
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2M_s1x.yaml
@@ -0,0 +1,18 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseDeepLabHead"
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "indep_aniso"
+ SEGM_CONFIDENCE:
+ ENABLED: True
+ POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml
new file mode 100644
index 0000000..7546b96
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml
@@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseDeepLabHead"
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "indep_aniso"
+ POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml
new file mode 100644
index 0000000..045f7f0
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml
@@ -0,0 +1,10 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseDeepLabHead"
+SOLVER:
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1M_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1M_s1x.yaml
new file mode 100644
index 0000000..9334e18
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1M_s1x.yaml
@@ -0,0 +1,18 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+ ROI_DENSEPOSE_HEAD:
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "iid_iso"
+ SEGM_CONFIDENCE:
+ ENABLED: True
+ POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
+ WARMUP_FACTOR: 0.025
diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml
new file mode 100644
index 0000000..ace6209
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml
@@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+ ROI_DENSEPOSE_HEAD:
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "iid_iso"
+ POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
+ WARMUP_FACTOR: 0.025
diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2M_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2M_s1x.yaml
new file mode 100644
index 0000000..90f0be2
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2M_s1x.yaml
@@ -0,0 +1,18 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+ ROI_DENSEPOSE_HEAD:
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "indep_aniso"
+ SEGM_CONFIDENCE:
+ ENABLED: True
+ POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
+ WARMUP_FACTOR: 0.025
diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml
new file mode 100644
index 0000000..766c098
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml
@@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+ ROI_DENSEPOSE_HEAD:
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "indep_aniso"
+ POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
+ WARMUP_FACTOR: 0.025
diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml
new file mode 100644
index 0000000..af44fb7
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+SOLVER:
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
diff --git a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml
new file mode 100644
index 0000000..8e79a1b
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml
@@ -0,0 +1,17 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+ ROI_DENSEPOSE_HEAD:
+ NUM_COARSE_SEGM_CHANNELS: 15
+ POOLER_RESOLUTION: 14
+ HEATMAP_SIZE: 56
+ INDEX_WEIGHTS: 2.0
+ PART_WEIGHTS: 0.3
+ POINT_REGRESSION_WEIGHTS: 0.1
+ DECODER_ON: False
+SOLVER:
+ BASE_LR: 0.002
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1M_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1M_s1x.yaml
new file mode 100644
index 0000000..18a417a
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1M_s1x.yaml
@@ -0,0 +1,18 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseDeepLabHead"
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "iid_iso"
+ SEGM_CONFIDENCE:
+ ENABLED: True
+ POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml
new file mode 100644
index 0000000..f3720ef
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml
@@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseDeepLabHead"
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "iid_iso"
+ POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2M_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2M_s1x.yaml
new file mode 100644
index 0000000..8a413d2
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2M_s1x.yaml
@@ -0,0 +1,18 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseDeepLabHead"
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "indep_aniso"
+ SEGM_CONFIDENCE:
+ ENABLED: True
+ POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml
new file mode 100644
index 0000000..5a47cc0
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml
@@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseDeepLabHead"
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "indep_aniso"
+ POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml
new file mode 100644
index 0000000..52a170b
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml
@@ -0,0 +1,10 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseDeepLabHead"
+SOLVER:
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1M_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1M_s1x.yaml
new file mode 100644
index 0000000..8a81f2a
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1M_s1x.yaml
@@ -0,0 +1,20 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+ ROI_DENSEPOSE_HEAD:
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "iid_iso"
+ SEGM_CONFIDENCE:
+ ENABLED: True
+ POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ CLIP_TYPE: norm
+ CLIP_VALUE: 100.0
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
+ WARMUP_FACTOR: 0.025
diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml
new file mode 100644
index 0000000..d36e542
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml
@@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+ ROI_DENSEPOSE_HEAD:
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "iid_iso"
+ POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
+ WARMUP_FACTOR: 0.025
diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2M_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2M_s1x.yaml
new file mode 100644
index 0000000..5cf29ea
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2M_s1x.yaml
@@ -0,0 +1,18 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+ ROI_DENSEPOSE_HEAD:
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "indep_aniso"
+ SEGM_CONFIDENCE:
+ ENABLED: True
+ POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
+ WARMUP_FACTOR: 0.025
diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml
new file mode 100644
index 0000000..e880d46
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml
@@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+ ROI_DENSEPOSE_HEAD:
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "indep_aniso"
+ POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
+ WARMUP_FACTOR: 0.025
diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml
new file mode 100644
index 0000000..d2dd14c
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml
@@ -0,0 +1,8 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+SOLVER:
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
diff --git a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml
new file mode 100644
index 0000000..6c5391f
--- /dev/null
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml
@@ -0,0 +1,17 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+ ROI_DENSEPOSE_HEAD:
+ NUM_COARSE_SEGM_CHANNELS: 15
+ POOLER_RESOLUTION: 14
+ HEATMAP_SIZE: 56
+ INDEX_WEIGHTS: 2.0
+ PART_WEIGHTS: 0.3
+ POINT_REGRESSION_WEIGHTS: 0.1
+ DECODER_ON: False
+SOLVER:
+ BASE_LR: 0.002
+ MAX_ITER: 130000
+ STEPS: (100000, 120000)
diff --git a/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC-B.yaml b/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC-B.yaml
new file mode 100644
index 0000000..1a2664d
--- /dev/null
+++ b/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC-B.yaml
@@ -0,0 +1,121 @@
+MODEL:
+ META_ARCHITECTURE: "GeneralizedRCNN"
+ BACKBONE:
+ NAME: "build_resnet_fpn_backbone"
+ RESNETS:
+ OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+ FPN:
+ IN_FEATURES: ["res2", "res3", "res4", "res5"]
+ ANCHOR_GENERATOR:
+ SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
+ ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
+ RPN:
+ IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+ PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
+ PRE_NMS_TOPK_TEST: 1000 # Per FPN level
+ # Detectron1 uses 2000 proposals per-batch,
+ # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+ # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+ POST_NMS_TOPK_TRAIN: 1000
+ POST_NMS_TOPK_TEST: 1000
+ ROI_HEADS:
+ NAME: "StandardROIHeads"
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
+ NUM_CLASSES: 1
+ ROI_BOX_HEAD:
+ NAME: "FastRCNNConvFCHead"
+ NUM_FC: 2
+ POOLER_RESOLUTION: 7
+ ROI_MASK_HEAD:
+ NAME: "MaskRCNNConvUpsampleHead"
+ NUM_CONV: 4
+ POOLER_RESOLUTION: 14
+DATASETS:
+ TRAIN: ("base_coco_2017_train",)
+ TEST: ("base_coco_2017_val", "densepose_chimps")
+ CATEGORY_MAPS:
+ "base_coco_2017_train":
+ "16": 1 # bird -> person
+ "17": 1 # cat -> person
+ "18": 1 # dog -> person
+ "19": 1 # horse -> person
+ "20": 1 # sheep -> person
+ "21": 1 # cow -> person
+ "22": 1 # elephant -> person
+ "23": 1 # bear -> person
+ "24": 1 # zebra -> person
+ "25": 1 # girafe -> person
+ "base_coco_2017_val":
+ "16": 1 # bird -> person
+ "17": 1 # cat -> person
+ "18": 1 # dog -> person
+ "19": 1 # horse -> person
+ "20": 1 # sheep -> person
+ "21": 1 # cow -> person
+ "22": 1 # elephant -> person
+ "23": 1 # bear -> person
+ "24": 1 # zebra -> person
+ "25": 1 # girafe -> person
+ WHITELISTED_CATEGORIES:
+ "base_coco_2017_train":
+ - 1 # person
+ - 16 # bird
+ - 17 # cat
+ - 18 # dog
+ - 19 # horse
+ - 20 # sheep
+ - 21 # cow
+ - 22 # elephant
+ - 23 # bear
+ - 24 # zebra
+ - 25 # girafe
+ "base_coco_2017_val":
+ - 1 # person
+ - 16 # bird
+ - 17 # cat
+ - 18 # dog
+ - 19 # horse
+ - 20 # sheep
+ - 21 # cow
+ - 22 # elephant
+ - 23 # bear
+ - 24 # zebra
+ - 25 # girafe
+BOOTSTRAP_DATASETS:
+ - DATASET: "chimpnsee"
+ RATIO: 1.0
+ IMAGE_LOADER:
+ TYPE: "video_keyframe"
+ SELECT:
+ STRATEGY: "random_k"
+ NUM_IMAGES: 4
+ TRANSFORM:
+ TYPE: "resize"
+ MIN_SIZE: 800
+ MAX_SIZE: 1333
+ BATCH_SIZE: 8
+ NUM_WORKERS: 1
+ INFERENCE:
+ INPUT_BATCH_SIZE: 1
+ OUTPUT_BATCH_SIZE: 1
+ DATA_SAMPLER:
+ # supported types:
+ # densepose_uniform
+ # densepose_UV_confidence
+ # densepose_fine_segm_confidence
+ # densepose_coarse_segm_confidence
+ TYPE: "densepose_uniform"
+ COUNT_PER_CLASS: 8
+ FILTER:
+ TYPE: "detection_score"
+ MIN_VALUE: 0.8
+BOOTSTRAP_MODEL:
+ WEIGHTS: ""
+SOLVER:
+ IMS_PER_BATCH: 16
+ BASE_LR: 0.02
+ STEPS: (60000, 80000)
+ MAX_ITER: 90000
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC.yaml b/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC.yaml
new file mode 100644
index 0000000..5a20882
--- /dev/null
+++ b/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC.yaml
@@ -0,0 +1,91 @@
+MODEL:
+ META_ARCHITECTURE: "GeneralizedRCNN"
+ BACKBONE:
+ NAME: "build_resnet_fpn_backbone"
+ RESNETS:
+ OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+ FPN:
+ IN_FEATURES: ["res2", "res3", "res4", "res5"]
+ ANCHOR_GENERATOR:
+ SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
+ ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
+ RPN:
+ IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+ PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
+ PRE_NMS_TOPK_TEST: 1000 # Per FPN level
+ # Detectron1 uses 2000 proposals per-batch,
+ # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+ # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+ POST_NMS_TOPK_TRAIN: 1000
+ POST_NMS_TOPK_TEST: 1000
+ ROI_HEADS:
+ NAME: "StandardROIHeads"
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
+ NUM_CLASSES: 1
+ ROI_BOX_HEAD:
+ NAME: "FastRCNNConvFCHead"
+ NUM_FC: 2
+ POOLER_RESOLUTION: 7
+ ROI_MASK_HEAD:
+ NAME: "MaskRCNNConvUpsampleHead"
+ NUM_CONV: 4
+ POOLER_RESOLUTION: 14
+DATASETS:
+ TRAIN: ("base_coco_2017_train",)
+ TEST: ("base_coco_2017_val", "densepose_chimps")
+ CATEGORY_MAPS:
+ "base_coco_2017_train":
+ "16": 1 # bird -> person
+ "17": 1 # cat -> person
+ "18": 1 # dog -> person
+ "19": 1 # horse -> person
+ "20": 1 # sheep -> person
+ "21": 1 # cow -> person
+ "22": 1 # elephant -> person
+ "23": 1 # bear -> person
+ "24": 1 # zebra -> person
+ "25": 1 # girafe -> person
+ "base_coco_2017_val":
+ "16": 1 # bird -> person
+ "17": 1 # cat -> person
+ "18": 1 # dog -> person
+ "19": 1 # horse -> person
+ "20": 1 # sheep -> person
+ "21": 1 # cow -> person
+ "22": 1 # elephant -> person
+ "23": 1 # bear -> person
+ "24": 1 # zebra -> person
+ "25": 1 # girafe -> person
+ WHITELISTED_CATEGORIES:
+ "base_coco_2017_train":
+ - 1 # person
+ - 16 # bird
+ - 17 # cat
+ - 18 # dog
+ - 19 # horse
+ - 20 # sheep
+ - 21 # cow
+ - 22 # elephant
+ - 23 # bear
+ - 24 # zebra
+ - 25 # girafe
+ "base_coco_2017_val":
+ - 1 # person
+ - 16 # bird
+ - 17 # cat
+ - 18 # dog
+ - 19 # horse
+ - 20 # sheep
+ - 21 # cow
+ - 22 # elephant
+ - 23 # bear
+ - 24 # zebra
+ - 25 # girafe
+SOLVER:
+ IMS_PER_BATCH: 16
+ BASE_LR: 0.02
+ STEPS: (60000, 80000)
+ MAX_ITER: 90000
+INPUT:
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
diff --git a/projects/DensePose/configs/evolution/densepose_R_101_FPN_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_101_FPN_1x_Atop10_toP.yaml
new file mode 100644
index 0000000..cf0050e
--- /dev/null
+++ b/projects/DensePose/configs/evolution/densepose_R_101_FPN_1x_Atop10_toP.yaml
@@ -0,0 +1,19 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+ DENSEPOSE_ON: True
+ ROI_HEADS:
+ NAME: "DensePoseROIHeads"
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
+ NUM_CLASSES: 1
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseV1ConvXHead"
+ POOLER_TYPE: "ROIAlign"
+ NUM_COARSE_SEGM_CHANNELS: 2
+ COARSE_SEGM_TRAINED_BY_MASKS: True
+ INDEX_WEIGHTS: 1.0
+DATASETS:
+ TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+ TEST: ("densepose_chimps",)
diff --git a/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_1x_Atop10_toP.yaml
new file mode 100644
index 0000000..ff151ed
--- /dev/null
+++ b/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_1x_Atop10_toP.yaml
@@ -0,0 +1,19 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+ DENSEPOSE_ON: True
+ ROI_HEADS:
+ NAME: "DensePoseROIHeads"
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
+ NUM_CLASSES: 1
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseDeepLabHead"
+ POOLER_TYPE: "ROIAlign"
+ NUM_COARSE_SEGM_CHANNELS: 2
+ COARSE_SEGM_TRAINED_BY_MASKS: True
+ INDEX_WEIGHTS: 1.0
+DATASETS:
+ TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+ TEST: ("densepose_chimps",)
diff --git a/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_WC1M_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_WC1M_1x_Atop10_toP.yaml
new file mode 100644
index 0000000..16762cc
--- /dev/null
+++ b/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_WC1M_1x_Atop10_toP.yaml
@@ -0,0 +1,29 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+ DENSEPOSE_ON: True
+ ROI_HEADS:
+ NAME: "DensePoseROIHeads"
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
+ NUM_CLASSES: 1
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseDeepLabHead"
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "iid_iso"
+ SEGM_CONFIDENCE:
+ ENABLED: True
+ POINT_REGRESSION_WEIGHTS: 0.0005
+ POOLER_TYPE: "ROIAlign"
+ NUM_COARSE_SEGM_CHANNELS: 2
+ COARSE_SEGM_TRAINED_BY_MASKS: True
+ INDEX_WEIGHTS: 1.0
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ WARMUP_FACTOR: 0.025
+DATASETS:
+ TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+ TEST: ("densepose_chimps",)
diff --git a/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_WC1_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_WC1_1x_Atop10_toP.yaml
new file mode 100644
index 0000000..45f6ec9
--- /dev/null
+++ b/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_WC1_1x_Atop10_toP.yaml
@@ -0,0 +1,27 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+ DENSEPOSE_ON: True
+ ROI_HEADS:
+ NAME: "DensePoseROIHeads"
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
+ NUM_CLASSES: 1
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseDeepLabHead"
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "iid_iso"
+ POINT_REGRESSION_WEIGHTS: 0.0005
+ POOLER_TYPE: "ROIAlign"
+ NUM_COARSE_SEGM_CHANNELS: 2
+ COARSE_SEGM_TRAINED_BY_MASKS: True
+ INDEX_WEIGHTS: 1.0
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ WARMUP_FACTOR: 0.025
+DATASETS:
+ TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+ TEST: ("densepose_chimps",)
diff --git a/projects/DensePose/configs/evolution/densepose_R_101_FPN_WC1M_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_101_FPN_WC1M_1x_Atop10_toP.yaml
new file mode 100644
index 0000000..81ffe1f
--- /dev/null
+++ b/projects/DensePose/configs/evolution/densepose_R_101_FPN_WC1M_1x_Atop10_toP.yaml
@@ -0,0 +1,29 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+ DENSEPOSE_ON: True
+ ROI_HEADS:
+ NAME: "DensePoseROIHeads"
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
+ NUM_CLASSES: 1
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseV1ConvXHead"
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "iid_iso"
+ SEGM_CONFIDENCE:
+ ENABLED: True
+ POINT_REGRESSION_WEIGHTS: 0.0005
+ POOLER_TYPE: "ROIAlign"
+ NUM_COARSE_SEGM_CHANNELS: 2
+ COARSE_SEGM_TRAINED_BY_MASKS: True
+ INDEX_WEIGHTS: 1.0
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ WARMUP_FACTOR: 0.025
+DATASETS:
+ TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+ TEST: ("densepose_chimps",)
diff --git a/projects/DensePose/configs/evolution/densepose_R_101_FPN_WC1_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_101_FPN_WC1_1x_Atop10_toP.yaml
new file mode 100644
index 0000000..76abe3e
--- /dev/null
+++ b/projects/DensePose/configs/evolution/densepose_R_101_FPN_WC1_1x_Atop10_toP.yaml
@@ -0,0 +1,27 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+ RESNETS:
+ DEPTH: 101
+ DENSEPOSE_ON: True
+ ROI_HEADS:
+ NAME: "DensePoseROIHeads"
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
+ NUM_CLASSES: 1
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseV1ConvXHead"
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "iid_iso"
+ POINT_REGRESSION_WEIGHTS: 0.0005
+ POOLER_TYPE: "ROIAlign"
+ NUM_COARSE_SEGM_CHANNELS: 2
+ COARSE_SEGM_TRAINED_BY_MASKS: True
+ INDEX_WEIGHTS: 1.0
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ WARMUP_FACTOR: 0.025
+DATASETS:
+ TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+ TEST: ("densepose_chimps",)
diff --git a/projects/DensePose/configs/evolution/densepose_R_50_FPN_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_50_FPN_1x_Atop10_toP.yaml
new file mode 100644
index 0000000..c827da1
--- /dev/null
+++ b/projects/DensePose/configs/evolution/densepose_R_50_FPN_1x_Atop10_toP.yaml
@@ -0,0 +1,19 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+ DENSEPOSE_ON: True
+ ROI_HEADS:
+ NAME: "DensePoseROIHeads"
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
+ NUM_CLASSES: 1
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseV1ConvXHead"
+ POOLER_TYPE: "ROIAlign"
+ NUM_COARSE_SEGM_CHANNELS: 2
+ COARSE_SEGM_TRAINED_BY_MASKS: True
+ INDEX_WEIGHTS: 1.0
+DATASETS:
+ TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+ TEST: ("densepose_chimps",)
diff --git a/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_1x_Atop10_toP.yaml
new file mode 100644
index 0000000..174029b
--- /dev/null
+++ b/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_1x_Atop10_toP.yaml
@@ -0,0 +1,19 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+ DENSEPOSE_ON: True
+ ROI_HEADS:
+ NAME: "DensePoseROIHeads"
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
+ NUM_CLASSES: 1
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseDeepLabHead"
+ POOLER_TYPE: "ROIAlign"
+ NUM_COARSE_SEGM_CHANNELS: 2
+ COARSE_SEGM_TRAINED_BY_MASKS: True
+ INDEX_WEIGHTS: 1.0
+DATASETS:
+ TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+ TEST: ("densepose_chimps",)
diff --git a/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_1x_Atop10_toP.yaml
new file mode 100644
index 0000000..ab5bf31
--- /dev/null
+++ b/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_1x_Atop10_toP.yaml
@@ -0,0 +1,29 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+ DENSEPOSE_ON: True
+ ROI_HEADS:
+ NAME: "DensePoseROIHeads"
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
+ NUM_CLASSES: 1
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseDeepLabHead"
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "iid_iso"
+ SEGM_CONFIDENCE:
+ ENABLED: True
+ POINT_REGRESSION_WEIGHTS: 0.0005
+ POOLER_TYPE: "ROIAlign"
+ NUM_COARSE_SEGM_CHANNELS: 2
+ COARSE_SEGM_TRAINED_BY_MASKS: True
+ INDEX_WEIGHTS: 1.0
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ WARMUP_FACTOR: 0.025
+DATASETS:
+ TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+ TEST: ("densepose_chimps",)
diff --git a/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1_1x_Atop10_toP.yaml
new file mode 100644
index 0000000..9d0ca1e
--- /dev/null
+++ b/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1_1x_Atop10_toP.yaml
@@ -0,0 +1,27 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+ DENSEPOSE_ON: True
+ ROI_HEADS:
+ NAME: "DensePoseROIHeads"
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
+ NUM_CLASSES: 1
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseDeepLabHead"
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "iid_iso"
+ POINT_REGRESSION_WEIGHTS: 0.0005
+ POOLER_TYPE: "ROIAlign"
+ NUM_COARSE_SEGM_CHANNELS: 2
+ COARSE_SEGM_TRAINED_BY_MASKS: True
+ INDEX_WEIGHTS: 1.0
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ WARMUP_FACTOR: 0.025
+DATASETS:
+ TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+ TEST: ("densepose_chimps",)
diff --git a/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1M_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1M_1x_Atop10_toP.yaml
new file mode 100644
index 0000000..35855b7
--- /dev/null
+++ b/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1M_1x_Atop10_toP.yaml
@@ -0,0 +1,29 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+ DENSEPOSE_ON: True
+ ROI_HEADS:
+ NAME: "DensePoseROIHeads"
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
+ NUM_CLASSES: 1
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseV1ConvXHead"
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "iid_iso"
+ SEGM_CONFIDENCE:
+ ENABLED: True
+ POINT_REGRESSION_WEIGHTS: 0.0005
+ POOLER_TYPE: "ROIAlign"
+ NUM_COARSE_SEGM_CHANNELS: 2
+ COARSE_SEGM_TRAINED_BY_MASKS: True
+ INDEX_WEIGHTS: 1.0
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ WARMUP_FACTOR: 0.025
+DATASETS:
+ TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+ TEST: ("densepose_chimps",)
diff --git a/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1M_1x_Atop10_toP_B.yaml b/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1M_1x_Atop10_toP_B.yaml
new file mode 100644
index 0000000..74c5476
--- /dev/null
+++ b/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1M_1x_Atop10_toP_B.yaml
@@ -0,0 +1,30 @@
+_BASE_: "Base-RCNN-FPN-MC-B.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+ DENSEPOSE_ON: True
+ ROI_HEADS:
+ NAME: "DensePoseROIHeads"
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
+ NUM_CLASSES: 1
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseV1ConvXHead"
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "iid_iso"
+ SEGM_CONFIDENCE:
+ ENABLED: True
+ POINT_REGRESSION_WEIGHTS: 0.0005
+ POOLER_TYPE: "ROIAlign"
+ NUM_COARSE_SEGM_CHANNELS: 2
+ COARSE_SEGM_TRAINED_BY_MASKS: True
+ INDEX_WEIGHTS: 1.0
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ CLIP_TYPE: "norm"
+ WARMUP_FACTOR: 0.025
+DATASETS:
+ TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+ TEST: ("densepose_chimps",)
diff --git a/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1_1x_Atop10_toP.yaml b/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1_1x_Atop10_toP.yaml
new file mode 100644
index 0000000..683215e
--- /dev/null
+++ b/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1_1x_Atop10_toP.yaml
@@ -0,0 +1,27 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+ DENSEPOSE_ON: True
+ ROI_HEADS:
+ NAME: "DensePoseROIHeads"
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
+ NUM_CLASSES: 1
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseV1ConvXHead"
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "iid_iso"
+ POINT_REGRESSION_WEIGHTS: 0.0005
+ POOLER_TYPE: "ROIAlign"
+ NUM_COARSE_SEGM_CHANNELS: 2
+ COARSE_SEGM_TRAINED_BY_MASKS: True
+ INDEX_WEIGHTS: 1.0
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ WARMUP_FACTOR: 0.025
+DATASETS:
+ TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+ TEST: ("densepose_chimps",)
diff --git a/projects/DensePose/configs/evolution/faster_rcnn_R_50_FPN_1x_MC.yaml b/projects/DensePose/configs/evolution/faster_rcnn_R_50_FPN_1x_MC.yaml
new file mode 100644
index 0000000..80139ad
--- /dev/null
+++ b/projects/DensePose/configs/evolution/faster_rcnn_R_50_FPN_1x_MC.yaml
@@ -0,0 +1,7 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ MASK_ON: False
+ DENSEPOSE_ON: False
+ RESNETS:
+ DEPTH: 50
diff --git a/projects/DensePose/configs/quick_schedules/densepose_rcnn_HRFPN_HRNet_w32_training_acc_test.yaml b/projects/DensePose/configs/quick_schedules/densepose_rcnn_HRFPN_HRNet_w32_training_acc_test.yaml
new file mode 100644
index 0000000..68a8509
--- /dev/null
+++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_HRFPN_HRNet_w32_training_acc_test.yaml
@@ -0,0 +1,7 @@
+_BASE_: "../HRNet/densepose_rcnn_HRFPN_HRNet_w32_s1x.yaml"
+DATASETS:
+ TRAIN: ("densepose_coco_2014_minival_100",)
+ TEST: ("densepose_coco_2014_minival_100",)
+SOLVER:
+ MAX_ITER: 40
+ STEPS: (30,)
diff --git a/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml
new file mode 100644
index 0000000..b90989e
--- /dev/null
+++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml
@@ -0,0 +1,11 @@
+_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ ROI_DENSEPOSE_HEAD:
+ NAME: "DensePoseDeepLabHead"
+DATASETS:
+ TRAIN: ("densepose_coco_2014_minival_100",)
+ TEST: ("densepose_coco_2014_minival_100",)
+SOLVER:
+ MAX_ITER: 40
+ STEPS: (30,)
diff --git a/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml
new file mode 100644
index 0000000..7d41274
--- /dev/null
+++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml
@@ -0,0 +1,13 @@
+_BASE_: "../densepose_rcnn_R_50_FPN_s1x.yaml"
+MODEL:
+ WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl"
+DATASETS:
+ TRAIN: ()
+ TEST: ("densepose_coco_2014_minival_100",)
+TEST:
+ AUG:
+ ENABLED: True
+ MIN_SIZES: (400, 500, 600, 700, 800, 900, 1000, 1100, 1200)
+ MAX_SIZE: 4000
+ FLIP: True
+ EXPECTED_RESULTS: [["bbox_TTA", "AP", 61.74, 0.03], ["densepose_gps_TTA", "AP", 60.22, 0.03], ["densepose_gpsm_TTA", "AP", 63.85, 0.03]]
diff --git a/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml
new file mode 100644
index 0000000..f0fe611
--- /dev/null
+++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml
@@ -0,0 +1,19 @@
+_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+ ROI_DENSEPOSE_HEAD:
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "iid_iso"
+ POINT_REGRESSION_WEIGHTS: 0.0005
+DATASETS:
+ TRAIN: ("densepose_coco_2014_minival_100",)
+ TEST: ("densepose_coco_2014_minival_100",)
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ MAX_ITER: 40
+ STEPS: (30,)
+ WARMUP_FACTOR: 0.025
diff --git a/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml
new file mode 100644
index 0000000..f0d9358
--- /dev/null
+++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml
@@ -0,0 +1,19 @@
+_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ RESNETS:
+ DEPTH: 50
+ ROI_DENSEPOSE_HEAD:
+ UV_CONFIDENCE:
+ ENABLED: True
+ TYPE: "indep_aniso"
+ POINT_REGRESSION_WEIGHTS: 0.0005
+DATASETS:
+ TRAIN: ("densepose_coco_2014_minival_100",)
+ TEST: ("densepose_coco_2014_minival_100",)
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ MAX_ITER: 40
+ STEPS: (30,)
+ WARMUP_FACTOR: 0.025
diff --git a/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml
new file mode 100644
index 0000000..3c5a7d2
--- /dev/null
+++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml
@@ -0,0 +1,8 @@
+_BASE_: "../densepose_rcnn_R_50_FPN_s1x.yaml"
+MODEL:
+ WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl"
+DATASETS:
+ TRAIN: ()
+ TEST: ("densepose_coco_2014_minival_100",)
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 59.27, 0.025], ["densepose_gps", "AP", 60.11, 0.02], ["densepose_gpsm", "AP", 64.20, 0.02]]
diff --git a/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml
new file mode 100644
index 0000000..057c876
--- /dev/null
+++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml
@@ -0,0 +1,9 @@
+_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+DATASETS:
+ TRAIN: ("densepose_coco_2014_minival_100",)
+ TEST: ("densepose_coco_2014_minival_100",)
+SOLVER:
+ MAX_ITER: 40
+ STEPS: (30,)
diff --git a/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml
new file mode 100644
index 0000000..0053c9d
--- /dev/null
+++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml
@@ -0,0 +1,18 @@
+_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+ ROI_HEADS:
+ NUM_CLASSES: 1
+DATASETS:
+ TRAIN: ("densepose_coco_2014_minival",)
+ TEST: ("densepose_coco_2014_minival",)
+SOLVER:
+ CLIP_GRADIENTS:
+ ENABLED: True
+ CLIP_TYPE: norm
+ CLIP_VALUE: 1.0
+ MAX_ITER: 6000
+ STEPS: (5500, 5800)
+TEST:
+ EXPECTED_RESULTS: [["bbox", "AP", 76.2477, 1.0], ["densepose_gps", "AP", 79.6090, 1.5], ["densepose_gpsm", "AP", 80.0061, 1.5]]
+
diff --git a/projects/DensePose/densepose/config.py b/projects/DensePose/densepose/config.py
new file mode 100644
index 0000000..e69e47e
--- /dev/null
+++ b/projects/DensePose/densepose/config.py
@@ -0,0 +1,171 @@
+# -*- coding = utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from detectron2.config import CfgNode as CN
+
+
+def add_dataset_category_config(cfg: CN):
+ """
+ Add config for additional category-related dataset options
+ - category whitelisting
+ - category mapping
+ """
+ _C = cfg
+ _C.DATASETS.CATEGORY_MAPS = CN(new_allowed=True)
+ _C.DATASETS.WHITELISTED_CATEGORIES = CN(new_allowed=True)
+
+
+def add_bootstrap_config(cfg: CN):
+ """
+ """
+ _C = cfg
+ _C.BOOTSTRAP_DATASETS = []
+ _C.BOOTSTRAP_MODEL = CN()
+ _C.BOOTSTRAP_MODEL.WEIGHTS = ""
+ _C.BOOTSTRAP_MODEL.DEVICE = "cuda"
+
+
+def get_bootstrap_dataset_config() -> CN:
+ _C = CN()
+ _C.DATASET = ""
+ # ratio used to mix data loaders
+ _C.RATIO = 0.1
+ # image loader
+ _C.IMAGE_LOADER = CN(new_allowed=True)
+ _C.IMAGE_LOADER.TYPE = ""
+ _C.IMAGE_LOADER.BATCH_SIZE = 4
+ _C.IMAGE_LOADER.NUM_WORKERS = 4
+ # inference
+ _C.INFERENCE = CN()
+ # batch size for model inputs
+ _C.INFERENCE.INPUT_BATCH_SIZE = 4
+ # batch size to group model outputs
+ _C.INFERENCE.OUTPUT_BATCH_SIZE = 2
+ # sampled data
+ _C.DATA_SAMPLER = CN(new_allowed=True)
+ _C.DATA_SAMPLER.TYPE = ""
+ # filter
+ _C.FILTER = CN(new_allowed=True)
+ _C.FILTER.TYPE = ""
+ return _C
+
+
+def load_bootstrap_config(cfg: CN):
+ """
+ Bootstrap datasets are given as a list of `dict` that are not automatically
+ converted into CfgNode. This method processes all bootstrap dataset entries
+ and ensures that they are in CfgNode format and comply with the specification
+ """
+ if not cfg.BOOTSTRAP_DATASETS:
+ return
+
+ bootstrap_datasets_cfgnodes = []
+ for dataset_cfg in cfg.BOOTSTRAP_DATASETS:
+ _C = get_bootstrap_dataset_config().clone()
+ _C.merge_from_other_cfg(CN(dataset_cfg))
+ bootstrap_datasets_cfgnodes.append(_C)
+ cfg.BOOTSTRAP_DATASETS = bootstrap_datasets_cfgnodes
+
+
+def add_densepose_head_config(cfg: CN):
+ """
+ Add config for densepose head.
+ """
+ _C = cfg
+
+ _C.MODEL.DENSEPOSE_ON = True
+
+ _C.MODEL.ROI_DENSEPOSE_HEAD = CN()
+ _C.MODEL.ROI_DENSEPOSE_HEAD.NAME = ""
+ _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8
+ # Number of parts used for point labels
+ _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3
+ _C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2
+ _C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 112
+ _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2"
+ _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 28
+ _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2
+ _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS = 2 # 15 or 2
+ # Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD)
+ _C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7
+ # Loss weights for annotation masks.(14 Parts)
+ _C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 5.0
+ # Loss weights for surface parts. (24 Parts)
+ _C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 1.0
+ # Loss weights for UV regression.
+ _C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.01
+ # Coarse segmentation is trained using instance segmentation task data
+ _C.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS = False
+ # For Decoder
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON = True
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES = 256
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS = 256
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM = ""
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE = 4
+ # For DeepLab head
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB = CN()
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM = "GN"
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON = 0
+ # Confidences
+ # Enable learning UV confidences (variances) along with the actual values
+ _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE = CN({"ENABLED": False})
+ # UV confidence lower bound
+ _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON = 0.01
+ # Enable learning segmentation confidences (variances) along with the actual values
+ _C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE = CN({"ENABLED": False})
+ # Segmentation confidence lower bound
+ _C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON = 0.01
+ # Statistical model type for confidence learning, possible values:
+ # - "iid_iso": statistically independent identically distributed residuals
+ # with isotropic covariance
+ # - "indep_aniso": statistically independent residuals with anisotropic
+ # covariances
+ _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE = "iid_iso"
+ # List of angles for rotation in data augmentation during training
+ _C.INPUT.ROTATION_ANGLES = [0]
+ _C.TEST.AUG.ROTATION_ANGLES = () # Rotation TTA
+
+
+def add_hrnet_config(cfg: CN):
+ """
+ Add config for HRNet backbone.
+ """
+ _C = cfg
+
+ # For HigherHRNet w32
+ _C.MODEL.HRNET = CN()
+ _C.MODEL.HRNET.STEM_INPLANES = 64
+ _C.MODEL.HRNET.STAGE2 = CN()
+ _C.MODEL.HRNET.STAGE2.NUM_MODULES = 1
+ _C.MODEL.HRNET.STAGE2.NUM_BRANCHES = 2
+ _C.MODEL.HRNET.STAGE2.BLOCK = "BASIC"
+ _C.MODEL.HRNET.STAGE2.NUM_BLOCKS = [4, 4]
+ _C.MODEL.HRNET.STAGE2.NUM_CHANNELS = [32, 64]
+ _C.MODEL.HRNET.STAGE2.FUSE_METHOD = "SUM"
+ _C.MODEL.HRNET.STAGE3 = CN()
+ _C.MODEL.HRNET.STAGE3.NUM_MODULES = 4
+ _C.MODEL.HRNET.STAGE3.NUM_BRANCHES = 3
+ _C.MODEL.HRNET.STAGE3.BLOCK = "BASIC"
+ _C.MODEL.HRNET.STAGE3.NUM_BLOCKS = [4, 4, 4]
+ _C.MODEL.HRNET.STAGE3.NUM_CHANNELS = [32, 64, 128]
+ _C.MODEL.HRNET.STAGE3.FUSE_METHOD = "SUM"
+ _C.MODEL.HRNET.STAGE4 = CN()
+ _C.MODEL.HRNET.STAGE4.NUM_MODULES = 3
+ _C.MODEL.HRNET.STAGE4.NUM_BRANCHES = 4
+ _C.MODEL.HRNET.STAGE4.BLOCK = "BASIC"
+ _C.MODEL.HRNET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
+ _C.MODEL.HRNET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
+ _C.MODEL.HRNET.STAGE4.FUSE_METHOD = "SUM"
+
+ _C.MODEL.HRNET.HRFPN = CN()
+ _C.MODEL.HRNET.HRFPN.OUT_CHANNELS = 256
+
+
+def add_densepose_config(cfg: CN):
+ add_densepose_head_config(cfg)
+ add_hrnet_config(cfg)
+ add_bootstrap_config(cfg)
+ add_dataset_category_config(cfg)
diff --git a/projects/DensePose/densepose/data/__init__.py b/projects/DensePose/densepose/data/__init__.py
new file mode 100644
index 0000000..bb7e0e8
--- /dev/null
+++ b/projects/DensePose/densepose/data/__init__.py
@@ -0,0 +1,23 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from .build import (
+ build_detection_test_loader,
+ build_detection_train_loader,
+ build_combined_loader,
+ build_frame_selector,
+ build_inference_based_loaders,
+ has_inference_based_loaders,
+ BootstrapDatasetFactoryCatalog,
+)
+from .combined_loader import CombinedDataLoader
+from .dataset_mapper import DatasetMapper
+from .inference_based_loader import InferenceBasedLoader, ScoreBasedFilter
+from .utils import is_relative_local_path, maybe_prepend_base_path
+
+# ensure the builtin datasets are registered
+from . import datasets
+
+# ensure the bootstrap datasets builders are registered
+from . import build
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/projects/DensePose/densepose/data/build.py b/projects/DensePose/densepose/data/build.py
new file mode 100644
index 0000000..26ca84a
--- /dev/null
+++ b/projects/DensePose/densepose/data/build.py
@@ -0,0 +1,604 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import itertools
+import logging
+import numpy as np
+from collections import UserDict
+from typing import Any, Callable, Collection, Dict, Iterable, List, Optional, Sequence
+import torch
+from torch.utils.data.dataset import Dataset
+
+from detectron2.config import CfgNode
+from detectron2.data.build import (
+ build_batch_data_loader,
+ load_proposals_into_dataset,
+ print_instances_class_histogram,
+ trivial_batch_collator,
+)
+from detectron2.data.catalog import DatasetCatalog, Metadata, MetadataCatalog
+from detectron2.data.common import DatasetFromList, MapDataset
+from detectron2.data.samplers import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler
+from detectron2.utils.comm import get_world_size
+
+from densepose.config import get_bootstrap_dataset_config
+
+from .combined_loader import CombinedDataLoader, Loader
+from .dataset_mapper import DatasetMapper
+from .datasets.coco import DENSEPOSE_KEYS_WITHOUT_MASK as DENSEPOSE_COCO_KEYS_WITHOUT_MASK
+from .datasets.coco import DENSEPOSE_MASK_KEY as DENSEPOSE_COCO_MASK_KEY
+from .datasets.dataset_type import DatasetType
+from .inference_based_loader import InferenceBasedLoader, ScoreBasedFilter
+from .samplers import (
+ DensePoseConfidenceBasedSampler,
+ DensePoseUniformSampler,
+ MaskFromDensePoseSampler,
+ PredictionToGroundTruthSampler,
+)
+from .transform import ImageResizeTransform
+from .video import (
+ FirstKFramesSelector,
+ FrameSelectionStrategy,
+ LastKFramesSelector,
+ RandomKFramesSelector,
+ VideoKeyframeDataset,
+ video_list_from_file,
+)
+
+__all__ = ["build_detection_train_loader", "build_detection_test_loader"]
+
+
+Instance = Dict[str, Any]
+InstancePredicate = Callable[[Instance], bool]
+
+
+def _compute_num_images_per_worker(cfg: CfgNode):
+ num_workers = get_world_size()
+ images_per_batch = cfg.SOLVER.IMS_PER_BATCH
+ assert (
+ images_per_batch % num_workers == 0
+ ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
+ images_per_batch, num_workers
+ )
+ assert (
+ images_per_batch >= num_workers
+ ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
+ images_per_batch, num_workers
+ )
+ images_per_worker = images_per_batch // num_workers
+ return images_per_worker
+
+
+def _map_category_id_to_contiguous_id(dataset_name: str, dataset_dicts: Iterable[Instance]):
+ meta = MetadataCatalog.get(dataset_name)
+ for dataset_dict in dataset_dicts:
+ for ann in dataset_dict["annotations"]:
+ ann["category_id"] = meta.thing_dataset_id_to_contiguous_id[ann["category_id"]]
+
+
+def _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names: Iterable[str]):
+ # merge categories for all datasets
+ merged_categories = {}
+ for dataset_name in dataset_names:
+ meta = MetadataCatalog.get(dataset_name)
+ for cat_id, cat_name in meta.categories.items():
+ if cat_id not in merged_categories:
+ merged_categories[cat_id] = (cat_name, dataset_name)
+ continue
+ cat_name_other, dataset_name_other = merged_categories[cat_id]
+ if cat_name_other != cat_name:
+ raise ValueError(
+ f"Incompatible categories for category ID {cat_id}: "
+ f'dataset {dataset_name} value "{cat_name}", '
+ f'dataset {dataset_name_other} value "{cat_name_other}"'
+ )
+
+ merged_cat_id_to_cont_id = {}
+ for i, cat_id in enumerate(sorted(merged_categories.keys())):
+ merged_cat_id_to_cont_id[cat_id] = i
+
+ # add category maps to metadata
+ for dataset_name in dataset_names:
+ meta = MetadataCatalog.get(dataset_name)
+ categories = meta.get("categories")
+ meta.thing_classes = [categories[cat_id] for cat_id in sorted(categories.keys())]
+ meta.thing_dataset_id_to_contiguous_id = {
+ cat_id: merged_cat_id_to_cont_id[cat_id] for cat_id in sorted(categories.keys())
+ }
+ meta.thing_contiguous_id_to_dataset_id = {
+ merged_cat_id_to_cont_id[cat_id]: cat_id for cat_id in sorted(categories.keys())
+ }
+
+
+def _maybe_create_general_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+ def has_annotations(instance: Instance) -> bool:
+ return "annotations" in instance
+
+ def has_only_crowd_anotations(instance: Instance) -> bool:
+ for ann in instance["annotations"]:
+ if ann.get("is_crowd", 0) == 0:
+ return False
+ return True
+
+ def general_keep_instance_predicate(instance: Instance) -> bool:
+ return has_annotations(instance) and not has_only_crowd_anotations(instance)
+
+ if not cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS:
+ return None
+ return general_keep_instance_predicate
+
+
+def _maybe_create_keypoints_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+
+ min_num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
+
+ def has_sufficient_num_keypoints(instance: Instance) -> bool:
+ num_kpts = sum(
+ (np.array(ann["keypoints"][2::3]) > 0).sum()
+ for ann in instance["annotations"]
+ if "keypoints" in ann
+ )
+ return num_kpts >= min_num_keypoints
+
+ if cfg.MODEL.KEYPOINT_ON and (min_num_keypoints > 0):
+ return has_sufficient_num_keypoints
+ return None
+
+
+def _maybe_create_mask_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+ if not cfg.MODEL.MASK_ON:
+ return None
+
+ def has_mask_annotations(instance: Instance) -> bool:
+ return any("segmentation" in ann for ann in instance["annotations"])
+
+ return has_mask_annotations
+
+
+def _maybe_create_densepose_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+ if not cfg.MODEL.DENSEPOSE_ON:
+ return None
+
+ use_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
+
+ def has_densepose_annotations(instance: Instance) -> bool:
+ for ann in instance["annotations"]:
+ if all(key in ann for key in DENSEPOSE_COCO_KEYS_WITHOUT_MASK) and (
+ (DENSEPOSE_COCO_MASK_KEY in ann) or ("segmentation" in ann)
+ ):
+ return True
+ if use_masks and "segmentation" in ann:
+ return True
+ return False
+
+ return has_densepose_annotations
+
+
+def _maybe_create_specific_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+ specific_predicate_creators = [
+ _maybe_create_keypoints_keep_instance_predicate,
+ _maybe_create_mask_keep_instance_predicate,
+ _maybe_create_densepose_keep_instance_predicate,
+ ]
+ predicates = [creator(cfg) for creator in specific_predicate_creators]
+ predicates = [p for p in predicates if p is not None]
+ if not predicates:
+ return None
+
+ def combined_predicate(instance: Instance) -> bool:
+ return any(p(instance) for p in predicates)
+
+ return combined_predicate
+
+
+def _get_train_keep_instance_predicate(cfg: CfgNode):
+ general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
+ combined_specific_keep_predicate = _maybe_create_specific_keep_instance_predicate(cfg)
+
+ def combined_general_specific_keep_predicate(instance: Instance) -> bool:
+ return general_keep_predicate(instance) and combined_specific_keep_predicate(instance)
+
+ if (general_keep_predicate is None) and (combined_specific_keep_predicate is None):
+ return None
+ if general_keep_predicate is None:
+ return combined_specific_keep_predicate
+ if combined_specific_keep_predicate is None:
+ return general_keep_predicate
+ return combined_general_specific_keep_predicate
+
+
+def _get_test_keep_instance_predicate(cfg: CfgNode):
+ general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
+ return general_keep_predicate
+
+
+def _maybe_filter_and_map_categories(
+ dataset_name: str, dataset_dicts: List[Instance]
+) -> List[Instance]:
+ meta = MetadataCatalog.get(dataset_name)
+ whitelisted_categories = meta.get("whitelisted_categories")
+ category_map = meta.get("category_map", {})
+ if whitelisted_categories is None and not category_map:
+ return dataset_dicts
+ filtered_dataset_dicts = []
+ for dataset_dict in dataset_dicts:
+ anns = []
+ for ann in dataset_dict["annotations"]:
+ cat_id = ann["category_id"]
+ if whitelisted_categories is not None and cat_id not in whitelisted_categories:
+ continue
+ ann["category_id"] = category_map.get(cat_id, cat_id)
+ anns.append(ann)
+ dataset_dict["annotations"] = anns
+ filtered_dataset_dicts.append(dataset_dict)
+ return filtered_dataset_dicts
+
+
+def _add_category_whitelists_to_metadata(cfg: CfgNode):
+ for dataset_name, whitelisted_cat_ids in cfg.DATASETS.WHITELISTED_CATEGORIES.items():
+ meta = MetadataCatalog.get(dataset_name)
+ meta.whitelisted_categories = whitelisted_cat_ids
+ logger = logging.getLogger(__name__)
+ logger.info(
+ "Whitelisted categories for dataset {}: {}".format(
+ dataset_name, meta.whitelisted_categories
+ )
+ )
+
+
+def _add_category_maps_to_metadata(cfg: CfgNode):
+ for dataset_name, category_map in cfg.DATASETS.CATEGORY_MAPS.items():
+ category_map = {
+ int(cat_id_src): int(cat_id_dst) for cat_id_src, cat_id_dst in category_map.items()
+ }
+ meta = MetadataCatalog.get(dataset_name)
+ meta.category_map = category_map
+ logger = logging.getLogger(__name__)
+ logger.info("Category maps for dataset {}: {}".format(dataset_name, meta.category_map))
+
+
+def combine_detection_dataset_dicts(
+ dataset_names: Collection[str],
+ keep_instance_predicate: Optional[InstancePredicate] = None,
+ proposal_files: Optional[Collection[str]] = None,
+) -> List[Instance]:
+ """
+ Load and prepare dataset dicts for training / testing
+
+ Args:
+ dataset_names (Collection[str]): a list of dataset names
+ keep_instance_predicate (Callable: Dict[str, Any] -> bool): predicate
+ applied to instance dicts which defines whether to keep the instance
+ proposal_files (Collection[str]): if given, a list of object proposal files
+ that match each dataset in `dataset_names`.
+ """
+ assert len(dataset_names)
+ if proposal_files is None:
+ proposal_files = [None] * len(dataset_names)
+ assert len(dataset_names) == len(proposal_files)
+ # load annotations and dataset metadata
+ dataset_map = {}
+ for dataset_name in dataset_names:
+ dataset_dicts = DatasetCatalog.get(dataset_name)
+ dataset_map[dataset_name] = dataset_dicts
+ # initialize category maps
+ _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names)
+ # apply category maps
+ all_datasets_dicts = []
+ for dataset_name, proposal_file in zip(dataset_names, proposal_files):
+ dataset_dicts = dataset_map[dataset_name]
+ assert len(dataset_dicts), f"Dataset '{dataset_name}' is empty!"
+ if proposal_file is not None:
+ dataset_dicts = load_proposals_into_dataset(dataset_dicts, proposal_file)
+ dataset_dicts = _maybe_filter_and_map_categories(dataset_name, dataset_dicts)
+ _map_category_id_to_contiguous_id(dataset_name, dataset_dicts)
+ print_instances_class_histogram(
+ dataset_dicts, MetadataCatalog.get(dataset_name).thing_classes
+ )
+ all_datasets_dicts.append(dataset_dicts)
+
+ if keep_instance_predicate is not None:
+ all_datasets_dicts_plain = [
+ d
+ for d in itertools.chain.from_iterable(all_datasets_dicts)
+ if keep_instance_predicate(d)
+ ]
+ else:
+ all_datasets_dicts_plain = list(itertools.chain.from_iterable(all_datasets_dicts))
+ return all_datasets_dicts_plain
+
+
+def build_detection_train_loader(cfg: CfgNode, mapper=None):
+ """
+ A data loader is created in a way similar to that of Detectron2.
+ The main differences are:
+ - it allows to combine datasets with different but compatible object category sets
+
+ The data loader is created by the following steps:
+ 1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts.
+ 2. Start workers to work on the dicts. Each worker will:
+ * Map each metadata dict into another format to be consumed by the model.
+ * Batch them by simply putting dicts into a list.
+ The batched ``list[mapped_dict]`` is what this dataloader will return.
+
+ Args:
+ cfg (CfgNode): the config
+ mapper (callable): a callable which takes a sample (dict) from dataset and
+ returns the format to be consumed by the model.
+ By default it will be `DatasetMapper(cfg, True)`.
+
+ Returns:
+ an infinite iterator of training data
+ """
+
+ _add_category_whitelists_to_metadata(cfg)
+ _add_category_maps_to_metadata(cfg)
+ dataset_dicts = combine_detection_dataset_dicts(
+ cfg.DATASETS.TRAIN,
+ keep_instance_predicate=_get_train_keep_instance_predicate(cfg),
+ proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
+ )
+ dataset = DatasetFromList(dataset_dicts, copy=False)
+
+ if mapper is None:
+ mapper = DatasetMapper(cfg, True)
+ dataset = MapDataset(dataset, mapper)
+
+ sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
+ logger = logging.getLogger(__name__)
+ logger.info("Using training sampler {}".format(sampler_name))
+ if sampler_name == "TrainingSampler":
+ sampler = TrainingSampler(len(dataset))
+ elif sampler_name == "RepeatFactorTrainingSampler":
+ repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency(
+ dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD
+ )
+ sampler = RepeatFactorTrainingSampler(repeat_factors)
+ else:
+ raise ValueError("Unknown training sampler: {}".format(sampler_name))
+
+ return build_batch_data_loader(
+ dataset,
+ sampler,
+ cfg.SOLVER.IMS_PER_BATCH,
+ aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING,
+ num_workers=cfg.DATALOADER.NUM_WORKERS,
+ )
+
+
+def build_detection_test_loader(cfg, dataset_name, mapper=None):
+ """
+ Similar to `build_detection_train_loader`.
+ But this function uses the given `dataset_name` argument (instead of the names in cfg),
+ and uses batch size 1.
+
+ Args:
+ cfg: a detectron2 CfgNode
+ dataset_name (str): a name of the dataset that's available in the DatasetCatalog
+ mapper (callable): a callable which takes a sample (dict) from dataset
+ and returns the format to be consumed by the model.
+ By default it will be `DatasetMapper(cfg, False)`.
+
+ Returns:
+ DataLoader: a torch DataLoader, that loads the given detection
+ dataset, with test-time transformation and batching.
+ """
+ _add_category_whitelists_to_metadata(cfg)
+ _add_category_maps_to_metadata(cfg)
+ dataset_dicts = combine_detection_dataset_dicts(
+ [dataset_name],
+ keep_instance_predicate=_get_test_keep_instance_predicate(cfg),
+ proposal_files=[
+ cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)]
+ ]
+ if cfg.MODEL.LOAD_PROPOSALS
+ else None,
+ )
+
+ dataset = DatasetFromList(dataset_dicts)
+ if mapper is None:
+ mapper = DatasetMapper(cfg, False)
+ dataset = MapDataset(dataset, mapper)
+
+ sampler = InferenceSampler(len(dataset))
+ # Always use 1 image per worker during inference since this is the
+ # standard when reporting inference time in papers.
+ batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)
+
+ data_loader = torch.utils.data.DataLoader(
+ dataset,
+ num_workers=cfg.DATALOADER.NUM_WORKERS,
+ batch_sampler=batch_sampler,
+ collate_fn=trivial_batch_collator,
+ )
+ return data_loader
+
+
+def build_frame_selector(cfg: CfgNode):
+ strategy = FrameSelectionStrategy(cfg.STRATEGY)
+ if strategy == FrameSelectionStrategy.RANDOM_K:
+ frame_selector = RandomKFramesSelector(cfg.NUM_IMAGES)
+ elif strategy == FrameSelectionStrategy.FIRST_K:
+ frame_selector = FirstKFramesSelector(cfg.NUM_IMAGES)
+ elif strategy == FrameSelectionStrategy.LAST_K:
+ frame_selector = LastKFramesSelector(cfg.NUM_IMAGES)
+ elif strategy == FrameSelectionStrategy.ALL:
+ frame_selector = None
+ return frame_selector
+
+
+def build_transform(cfg: CfgNode, data_type: str):
+ if cfg.TYPE == "resize":
+ if data_type == "image":
+ return ImageResizeTransform(cfg.MIN_SIZE, cfg.MAX_SIZE)
+ raise ValueError(f"Unknown transform {cfg.TYPE} for data type {data_type}")
+
+
+def build_combined_loader(cfg: CfgNode, loaders: Collection[Loader], ratios: Sequence[float]):
+ images_per_worker = _compute_num_images_per_worker(cfg)
+ return CombinedDataLoader(loaders, images_per_worker, ratios)
+
+
+def build_bootstrap_dataset(dataset_name: str, cfg: CfgNode) -> Sequence[torch.Tensor]:
+ """
+ Build dataset that provides data to bootstrap on
+
+ Args:
+ dataset_name (str): Name of the dataset, needs to have associated metadata
+ to load the data
+ cfg (CfgNode): bootstrapping config
+ Returns:
+ Sequence[Tensor] - dataset that provides image batches, Tensors of size
+ [N, C, H, W] of type float32
+ """
+ logger = logging.getLogger(__name__)
+ meta = MetadataCatalog.get(dataset_name)
+ factory = BootstrapDatasetFactoryCatalog.get(meta.dataset_type)
+ dataset = None
+ if factory is not None:
+ dataset = factory(meta, cfg)
+ if dataset is None:
+ logger.warning(f"Failed to create dataset {dataset_name} of type {meta.dataset_type}")
+ return dataset
+
+
+def build_data_sampler(cfg: CfgNode):
+ if cfg.TYPE == "densepose_uniform":
+ data_sampler = PredictionToGroundTruthSampler()
+ # transform densepose pred -> gt
+ data_sampler.register_sampler(
+ "pred_densepose",
+ "gt_densepose",
+ DensePoseUniformSampler(count_per_class=cfg.COUNT_PER_CLASS),
+ )
+ data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler())
+ return data_sampler
+ elif cfg.TYPE == "densepose_UV_confidence":
+ data_sampler = PredictionToGroundTruthSampler()
+ # transform densepose pred -> gt
+ data_sampler.register_sampler(
+ "pred_densepose",
+ "gt_densepose",
+ DensePoseConfidenceBasedSampler(
+ confidence_channel="sigma_2",
+ count_per_class=cfg.COUNT_PER_CLASS,
+ search_proportion=0.5,
+ ),
+ )
+ data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler())
+ return data_sampler
+ elif cfg.TYPE == "densepose_fine_segm_confidence":
+ data_sampler = PredictionToGroundTruthSampler()
+ # transform densepose pred -> gt
+ data_sampler.register_sampler(
+ "pred_densepose",
+ "gt_densepose",
+ DensePoseConfidenceBasedSampler(
+ confidence_channel="fine_segm_confidence",
+ count_per_class=cfg.COUNT_PER_CLASS,
+ search_proportion=0.5,
+ ),
+ )
+ data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler())
+ return data_sampler
+ elif cfg.TYPE == "densepose_coarse_segm_confidence":
+ data_sampler = PredictionToGroundTruthSampler()
+ # transform densepose pred -> gt
+ data_sampler.register_sampler(
+ "pred_densepose",
+ "gt_densepose",
+ DensePoseConfidenceBasedSampler(
+ confidence_channel="coarse_segm_confidence",
+ count_per_class=cfg.COUNT_PER_CLASS,
+ search_proportion=0.5,
+ ),
+ )
+ data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler())
+ return data_sampler
+
+ raise ValueError(f"Unknown data sampler type {cfg.TYPE}")
+
+
+def build_data_filter(cfg: CfgNode):
+ if cfg.TYPE == "detection_score":
+ min_score = cfg.MIN_VALUE
+ return ScoreBasedFilter(min_score=min_score)
+ raise ValueError(f"Unknown data filter type {cfg.TYPE}")
+
+
+def build_inference_based_loader(
+ cfg: CfgNode, dataset_cfg: CfgNode, model: torch.nn.Module
+) -> InferenceBasedLoader:
+ """
+ Constructs data loader based on inference results of a model.
+ """
+ dataset = build_bootstrap_dataset(dataset_cfg.DATASET, dataset_cfg.IMAGE_LOADER)
+ training_sampler = TrainingSampler(len(dataset))
+ data_loader = torch.utils.data.DataLoader(
+ dataset,
+ batch_size=dataset_cfg.IMAGE_LOADER.BATCH_SIZE,
+ sampler=training_sampler,
+ num_workers=dataset_cfg.IMAGE_LOADER.NUM_WORKERS,
+ collate_fn=trivial_batch_collator,
+ )
+ return InferenceBasedLoader(
+ model,
+ data_loader=data_loader,
+ data_sampler=build_data_sampler(dataset_cfg.DATA_SAMPLER),
+ data_filter=build_data_filter(dataset_cfg.FILTER),
+ shuffle=True,
+ batch_size=dataset_cfg.INFERENCE.OUTPUT_BATCH_SIZE,
+ inference_batch_size=dataset_cfg.INFERENCE.INPUT_BATCH_SIZE,
+ )
+
+
+def has_inference_based_loaders(cfg: CfgNode) -> bool:
+ """
+ Returns True, if at least one inferense-based loader must
+ be instantiated for training
+ """
+ return len(cfg.BOOTSTRAP_DATASETS) > 0
+
+
+def build_inference_based_loaders(
+ cfg: CfgNode, model: torch.nn.Module
+) -> List[InferenceBasedLoader]:
+ loaders = []
+ ratios = []
+ for dataset_spec in cfg.BOOTSTRAP_DATASETS:
+ dataset_cfg = get_bootstrap_dataset_config().clone()
+ dataset_cfg.merge_from_other_cfg(CfgNode(dataset_spec))
+ loader = build_inference_based_loader(cfg, dataset_cfg, model)
+ loaders.append(loader)
+ ratios.append(dataset_cfg.RATIO)
+ return loaders, ratios
+
+
+def build_video_list_dataset(meta: Metadata, cfg: CfgNode):
+ video_list_fpath = meta.video_list_fpath
+ video_base_path = meta.video_base_path
+ if cfg.TYPE == "video_keyframe":
+ frame_selector = build_frame_selector(cfg.SELECT)
+ transform = build_transform(cfg.TRANSFORM, data_type="image")
+ video_list = video_list_from_file(video_list_fpath, video_base_path)
+ return VideoKeyframeDataset(video_list, frame_selector, transform)
+
+
+class _BootstrapDatasetFactoryCatalog(UserDict):
+ """
+ A global dictionary that stores information about bootstrapped datasets creation functions
+ from metadata and config, for diverse DatasetType
+ """
+
+ def register(self, dataset_type: DatasetType, factory: Callable[[Metadata, CfgNode], Dataset]):
+ """
+ Args:
+ dataset_type (DatasetType): a DatasetType e.g. DatasetType.VIDEO_LIST
+ factory (Callable[Metadata, CfgNode]): a callable which takes Metadata and cfg
+ arguments and returns a dataset object.
+ """
+ assert dataset_type not in self, "Dataset '{}' is already registered!".format(dataset_type)
+ self[dataset_type] = factory
+
+
+BootstrapDatasetFactoryCatalog = _BootstrapDatasetFactoryCatalog()
+BootstrapDatasetFactoryCatalog.register(DatasetType.VIDEO_LIST, build_video_list_dataset)
diff --git a/projects/DensePose/densepose/data/combined_loader.py b/projects/DensePose/densepose/data/combined_loader.py
new file mode 100644
index 0000000..73278b4
--- /dev/null
+++ b/projects/DensePose/densepose/data/combined_loader.py
@@ -0,0 +1,44 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import random
+from collections import deque
+from typing import Any, Collection, Deque, Iterable, Iterator, List, Sequence
+
+Loader = Iterable[Any]
+
+
+def _pooled_next(iterator: Iterator[Any], pool: Deque[Any]):
+ if not pool:
+ pool.extend(next(iterator))
+ return pool.popleft()
+
+
+class CombinedDataLoader:
+ """
+ Combines data loaders using the provided sampling ratios
+ """
+
+ BATCH_COUNT = 100
+
+ def __init__(self, loaders: Collection[Loader], batch_size: int, ratios: Sequence[float]):
+ self.loaders = loaders
+ self.batch_size = batch_size
+ self.ratios = ratios
+
+ def __iter__(self) -> Iterator[List[Any]]:
+ iters = [iter(loader) for loader in self.loaders]
+ indices = []
+ pool = [deque()] * len(iters)
+ # infinite iterator, as in D2
+ while True:
+ if not indices:
+ # just a buffer of indices, its size doesn't matter
+ # as long as it's a multiple of batch_size
+ k = self.batch_size * self.BATCH_COUNT
+ indices = random.choices(range(len(self.loaders)), self.ratios, k=k)
+ try:
+ batch = [_pooled_next(iters[i], pool[i]) for i in indices[: self.batch_size]]
+ except StopIteration:
+ break
+ indices = indices[self.batch_size :]
+ yield batch
diff --git a/projects/DensePose/densepose/data/dataset_mapper.py b/projects/DensePose/densepose/data/dataset_mapper.py
new file mode 100644
index 0000000..817fec1
--- /dev/null
+++ b/projects/DensePose/densepose/data/dataset_mapper.py
@@ -0,0 +1,168 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import copy
+import logging
+from typing import Any, Dict, Tuple
+import torch
+from fvcore.common.file_io import PathManager
+
+from detectron2.data import MetadataCatalog
+from detectron2.data import detection_utils as utils
+from detectron2.data import transforms as T
+from detectron2.layers import ROIAlign
+from detectron2.structures import BoxMode
+
+from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
+
+
+def build_augmentation(cfg, is_train):
+ logger = logging.getLogger(__name__)
+ result = utils.build_augmentation(cfg, is_train)
+ if is_train:
+ random_rotation = T.RandomRotation(
+ cfg.INPUT.ROTATION_ANGLES, expand=False, sample_style="choice"
+ )
+ result.append(random_rotation)
+ logger.info("DensePose-specific augmentation used in training: " + str(random_rotation))
+ return result
+
+
+class DatasetMapper:
+ """
+ A customized version of `detectron2.data.DatasetMapper`
+ """
+
+ def __init__(self, cfg, is_train=True):
+ self.augmentation = build_augmentation(cfg, is_train)
+
+ # fmt: off
+ self.img_format = cfg.INPUT.FORMAT
+ self.mask_on = (
+ cfg.MODEL.MASK_ON or (
+ cfg.MODEL.DENSEPOSE_ON
+ and cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS)
+ )
+ self.keypoint_on = cfg.MODEL.KEYPOINT_ON
+ self.densepose_on = cfg.MODEL.DENSEPOSE_ON
+ assert not cfg.MODEL.LOAD_PROPOSALS, "not supported yet"
+ # fmt: on
+ if self.keypoint_on and is_train:
+ # Flip only makes sense in training
+ self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
+ else:
+ self.keypoint_hflip_indices = None
+
+ if self.densepose_on:
+ densepose_transform_srcs = [
+ MetadataCatalog.get(ds).densepose_transform_src
+ for ds in cfg.DATASETS.TRAIN + cfg.DATASETS.TEST
+ ]
+ assert len(densepose_transform_srcs) > 0
+ # TODO: check that DensePose transformation data is the same for
+ # all the datasets. Otherwise one would have to pass DB ID with
+ # each entry to select proper transformation data. For now, since
+ # all DensePose annotated data uses the same data semantics, we
+ # omit this check.
+ densepose_transform_data_fpath = PathManager.get_local_path(densepose_transform_srcs[0])
+ self.densepose_transform_data = DensePoseTransformData.load(
+ densepose_transform_data_fpath
+ )
+
+ self.is_train = is_train
+
+ def __call__(self, dataset_dict):
+ """
+ Args:
+ dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
+
+ Returns:
+ dict: a format that builtin models in detectron2 accept
+ """
+ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
+ image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
+ utils.check_image_size(dataset_dict, image)
+
+ image, transforms = T.apply_transform_gens(self.augmentation, image)
+ image_shape = image.shape[:2] # h, w
+ dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
+
+ if not self.is_train:
+ dataset_dict.pop("annotations", None)
+ return dataset_dict
+
+ for anno in dataset_dict["annotations"]:
+ if not self.mask_on:
+ anno.pop("segmentation", None)
+ if not self.keypoint_on:
+ anno.pop("keypoints", None)
+
+ # USER: Implement additional transformations if you have other types of data
+ # USER: Don't call transpose_densepose if you don't need
+ annos = [
+ self._transform_densepose(
+ utils.transform_instance_annotations(
+ obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
+ ),
+ transforms,
+ )
+ for obj in dataset_dict.pop("annotations")
+ if obj.get("iscrowd", 0) == 0
+ ]
+
+ if self.mask_on:
+ self._add_densepose_masks_as_segmentation(annos, image_shape)
+
+ instances = utils.annotations_to_instances(annos, image_shape, mask_format="bitmask")
+ densepose_annotations = [obj.get("densepose") for obj in annos]
+ if densepose_annotations and not all(v is None for v in densepose_annotations):
+ instances.gt_densepose = DensePoseList(
+ densepose_annotations, instances.gt_boxes, image_shape
+ )
+
+ dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()]
+ return dataset_dict
+
+ def _transform_densepose(self, annotation, transforms):
+ if not self.densepose_on:
+ return annotation
+
+ # Handle densepose annotations
+ is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
+ if is_valid:
+ densepose_data = DensePoseDataRelative(annotation, cleanup=True)
+ densepose_data.apply_transform(transforms, self.densepose_transform_data)
+ annotation["densepose"] = densepose_data
+ else:
+ # logger = logging.getLogger(__name__)
+ # logger.debug("Could not load DensePose annotation: {}".format(reason_not_valid))
+ DensePoseDataRelative.cleanup_annotation(annotation)
+ # NOTE: annotations for certain instances may be unavailable.
+ # 'None' is accepted by the DensePostList data structure.
+ annotation["densepose"] = None
+ return annotation
+
+ def _add_densepose_masks_as_segmentation(
+ self, annotations: Dict[str, Any], image_shape_hw: Tuple[int, int]
+ ):
+ for obj in annotations:
+ if ("densepose" not in obj) or ("segmentation" in obj):
+ continue
+ # DP segmentation: torch.Tensor [S, S] of float32, S=256
+ segm_dp = torch.zeros_like(obj["densepose"].segm)
+ segm_dp[obj["densepose"].segm > 0] = 1
+ segm_h, segm_w = segm_dp.shape
+ bbox_segm_dp = torch.tensor((0, 0, segm_h - 1, segm_w - 1), dtype=torch.float32)
+ # image bbox
+ x0, y0, x1, y1 = (
+ v.item() for v in BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)
+ )
+ segm_aligned = (
+ ROIAlign((y1 - y0, x1 - x0), 1.0, 0, aligned=True)
+ .forward(segm_dp.view(1, 1, *segm_dp.shape), bbox_segm_dp)
+ .squeeze()
+ )
+ image_mask = torch.zeros(*image_shape_hw, dtype=torch.float32)
+ image_mask[y0:y1, x0:x1] = segm_aligned
+ # segmentation for BitMask: np.array [H, W] of np.bool
+ obj["segmentation"] = image_mask >= 0.5
diff --git a/projects/DensePose/densepose/data/datasets/__init__.py b/projects/DensePose/densepose/data/datasets/__init__.py
new file mode 100644
index 0000000..0ea9c2f
--- /dev/null
+++ b/projects/DensePose/densepose/data/datasets/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from . import builtin # ensure the builtin datasets are registered
+
+__all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")]
diff --git a/projects/DensePose/densepose/data/datasets/builtin.py b/projects/DensePose/densepose/data/datasets/builtin.py
new file mode 100644
index 0000000..c788f24
--- /dev/null
+++ b/projects/DensePose/densepose/data/datasets/builtin.py
@@ -0,0 +1,13 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from .chimpnsee import register_dataset as register_chimpnsee_dataset
+from .coco import BASE_DATASETS as BASE_COCO_DATASETS
+from .coco import DATASETS as COCO_DATASETS
+from .coco import register_datasets as register_coco_datasets
+
+DEFAULT_DATASETS_ROOT = "datasets"
+
+
+register_coco_datasets(COCO_DATASETS, DEFAULT_DATASETS_ROOT)
+register_coco_datasets(BASE_COCO_DATASETS, DEFAULT_DATASETS_ROOT)
+
+register_chimpnsee_dataset(DEFAULT_DATASETS_ROOT)
diff --git a/projects/DensePose/densepose/data/datasets/chimpnsee.py b/projects/DensePose/densepose/data/datasets/chimpnsee.py
new file mode 100644
index 0000000..7b68bea
--- /dev/null
+++ b/projects/DensePose/densepose/data/datasets/chimpnsee.py
@@ -0,0 +1,28 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import os
+from typing import Optional
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+
+from ..utils import maybe_prepend_base_path
+from .dataset_type import DatasetType
+
+CHIMPNSEE_DATASET_NAME = "chimpnsee"
+
+
+def register_dataset(datasets_root: Optional[os.PathLike] = None):
+ def empty_load_callback():
+ pass
+
+ video_list_fpath = maybe_prepend_base_path(
+ datasets_root, "chimpnsee/cdna.eva.mpg.de/video_list.txt"
+ )
+ video_base_path = maybe_prepend_base_path(datasets_root, "chimpnsee/cdna.eva.mpg.de")
+
+ DatasetCatalog.register(CHIMPNSEE_DATASET_NAME, empty_load_callback)
+ MetadataCatalog.get(CHIMPNSEE_DATASET_NAME).set(
+ dataset_type=DatasetType.VIDEO_LIST,
+ video_list_fpath=video_list_fpath,
+ video_base_path=video_base_path,
+ )
diff --git a/projects/DensePose/densepose/data/datasets/coco.py b/projects/DensePose/densepose/data/datasets/coco.py
new file mode 100644
index 0000000..9b5bdfe
--- /dev/null
+++ b/projects/DensePose/densepose/data/datasets/coco.py
@@ -0,0 +1,324 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import contextlib
+import io
+import logging
+import os
+from collections import defaultdict
+from dataclasses import dataclass
+from typing import Any, Dict, Iterable, List, Optional
+from fvcore.common.file_io import PathManager
+from fvcore.common.timer import Timer
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.structures import BoxMode
+
+from ..utils import maybe_prepend_base_path
+
+DENSEPOSE_MASK_KEY = "dp_masks"
+DENSEPOSE_KEYS_WITHOUT_MASK = ["dp_x", "dp_y", "dp_I", "dp_U", "dp_V"]
+DENSEPOSE_KEYS = DENSEPOSE_KEYS_WITHOUT_MASK + [DENSEPOSE_MASK_KEY]
+DENSEPOSE_METADATA_URL_PREFIX = "https://dl.fbaipublicfiles.com/densepose/data/"
+
+
+@dataclass
+class CocoDatasetInfo:
+ name: str
+ images_root: str
+ annotations_fpath: str
+
+
+DATASETS = [
+ CocoDatasetInfo(
+ name="densepose_coco_2014_train",
+ images_root="coco/train2014",
+ annotations_fpath="coco/annotations/densepose_train2014.json",
+ ),
+ CocoDatasetInfo(
+ name="densepose_coco_2014_minival",
+ images_root="coco/val2014",
+ annotations_fpath="coco/annotations/densepose_minival2014.json",
+ ),
+ CocoDatasetInfo(
+ name="densepose_coco_2014_minival_100",
+ images_root="coco/val2014",
+ annotations_fpath="coco/annotations/densepose_minival2014_100.json",
+ ),
+ CocoDatasetInfo(
+ name="densepose_coco_2014_valminusminival",
+ images_root="coco/val2014",
+ annotations_fpath="coco/annotations/densepose_valminusminival2014.json",
+ ),
+ CocoDatasetInfo(
+ name="densepose_chimps",
+ images_root="densepose_evolution/densepose_chimps",
+ annotations_fpath="densepose_evolution/annotations/densepose_chimps_densepose.json",
+ ),
+ CocoDatasetInfo(
+ name="posetrack2017_train",
+ images_root="posetrack2017/posetrack_data_2017",
+ annotations_fpath="posetrack2017/densepose_posetrack_train2017.json",
+ ),
+ CocoDatasetInfo(
+ name="posetrack2017_val",
+ images_root="posetrack2017/posetrack_data_2017",
+ annotations_fpath="posetrack2017/densepose_posetrack_val2017.json",
+ ),
+]
+
+
+BASE_DATASETS = [
+ CocoDatasetInfo(
+ name="base_coco_2017_train",
+ images_root="coco/train2017",
+ annotations_fpath="coco/annotations/instances_train2017.json",
+ ),
+ CocoDatasetInfo(
+ name="base_coco_2017_val",
+ images_root="coco/val2017",
+ annotations_fpath="coco/annotations/instances_val2017.json",
+ ),
+ CocoDatasetInfo(
+ name="base_coco_2017_val_100",
+ images_root="coco/val2017",
+ annotations_fpath="coco/annotations/instances_val2017_100.json",
+ ),
+]
+
+
+def get_metadata(base_path: Optional[os.PathLike]) -> Dict[str, Any]:
+ """
+ Returns metadata associated with COCO DensePose datasets
+
+ Args:
+ base_path: Optional[os.PathLike]
+ Base path used to load metadata from
+
+ Returns:
+ Dict[str, Any]
+ Metadata in the form of a dictionary
+ """
+ meta = {
+ "densepose_transform_src": maybe_prepend_base_path(base_path, "UV_symmetry_transforms.mat"),
+ "densepose_smpl_subdiv": maybe_prepend_base_path(base_path, "SMPL_subdiv.mat"),
+ "densepose_smpl_subdiv_transform": maybe_prepend_base_path(
+ base_path, "SMPL_SUBDIV_TRANSFORM.mat"
+ ),
+ }
+ return meta
+
+
+def _load_coco_annotations(json_file: str):
+ """
+ Load COCO annotations from a JSON file
+
+ Args:
+ json_file: str
+ Path to the file to load annotations from
+ Returns:
+ Instance of `pycocotools.coco.COCO` that provides access to annotations
+ data
+ """
+ from pycocotools.coco import COCO
+
+ logger = logging.getLogger(__name__)
+ timer = Timer()
+ with contextlib.redirect_stdout(io.StringIO()):
+ coco_api = COCO(json_file)
+ if timer.seconds() > 1:
+ logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
+ return coco_api
+
+
+def _add_categories_metadata(dataset_name: str, categories: Dict[str, Any]):
+ meta = MetadataCatalog.get(dataset_name)
+ meta.categories = {c["id"]: c["name"] for c in categories}
+ logger = logging.getLogger(__name__)
+ logger.info("Dataset {} categories: {}".format(dataset_name, categories))
+
+
+def _verify_annotations_have_unique_ids(json_file: str, anns: List[List[Dict[str, Any]]]):
+ if "minival" in json_file:
+ # Skip validation on COCO2014 valminusminival and minival annotations
+ # The ratio of buggy annotations there is tiny and does not affect accuracy
+ # Therefore we explicitly white-list them
+ return
+ ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
+ assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
+ json_file
+ )
+
+
+def _maybe_add_bbox(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
+ if "bbox" not in ann_dict:
+ return
+ obj["bbox"] = ann_dict["bbox"]
+ obj["bbox_mode"] = BoxMode.XYWH_ABS
+
+
+def _maybe_add_segm(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
+ if "segmentation" not in ann_dict:
+ return
+ segm = ann_dict["segmentation"]
+ if not isinstance(segm, dict):
+ # filter out invalid polygons (< 3 points)
+ segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
+ if len(segm) == 0:
+ return
+ obj["segmentation"] = segm
+
+
+def _maybe_add_keypoints(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
+ if "keypoints" not in ann_dict:
+ return
+ keypts = ann_dict["keypoints"] # list[int]
+ for idx, v in enumerate(keypts):
+ if idx % 3 != 2:
+ # COCO's segmentation coordinates are floating points in [0, H or W],
+ # but keypoint coordinates are integers in [0, H-1 or W-1]
+ # Therefore we assume the coordinates are "pixel indices" and
+ # add 0.5 to convert to floating point coordinates.
+ keypts[idx] = v + 0.5
+ obj["keypoints"] = keypts
+
+
+def _maybe_add_densepose(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
+ for key in DENSEPOSE_KEYS:
+ if key in ann_dict:
+ obj[key] = ann_dict[key]
+
+
+def _combine_images_with_annotations(
+ dataset_name: str,
+ image_root: str,
+ img_datas: Iterable[Dict[str, Any]],
+ ann_datas: Iterable[Iterable[Dict[str, Any]]],
+):
+
+ ann_keys = ["iscrowd", "category_id"]
+ dataset_dicts = []
+ contains_video_frame_info = False
+
+ for img_dict, ann_dicts in zip(img_datas, ann_datas):
+ record = {}
+ record["file_name"] = os.path.join(image_root, img_dict["file_name"])
+ record["height"] = img_dict["height"]
+ record["width"] = img_dict["width"]
+ record["image_id"] = img_dict["id"]
+ record["dataset"] = dataset_name
+ if "frame_id" in img_dict:
+ record["frame_id"] = img_dict["frame_id"]
+ record["video_id"] = img_dict.get("vid_id", None)
+ contains_video_frame_info = True
+ objs = []
+ for ann_dict in ann_dicts:
+ assert ann_dict["image_id"] == record["image_id"]
+ assert ann_dict.get("ignore", 0) == 0
+ obj = {key: ann_dict[key] for key in ann_keys if key in ann_dict}
+ _maybe_add_bbox(obj, ann_dict)
+ _maybe_add_segm(obj, ann_dict)
+ _maybe_add_keypoints(obj, ann_dict)
+ _maybe_add_densepose(obj, ann_dict)
+ objs.append(obj)
+ record["annotations"] = objs
+ dataset_dicts.append(record)
+ if contains_video_frame_info:
+ create_video_frame_mapping(dataset_name, dataset_dicts)
+ return dataset_dicts
+
+
+def create_video_frame_mapping(dataset_name, dataset_dicts):
+ mapping = defaultdict(dict)
+ for d in dataset_dicts:
+ video_id = d.get("video_id")
+ if video_id is None:
+ continue
+ mapping[video_id].update({d["frame_id"]: d["file_name"]})
+ MetadataCatalog.get(dataset_name).set(video_frame_mapping=mapping)
+
+
+def load_coco_json(annotations_json_file: str, image_root: str, dataset_name: str):
+ """
+ Loads a JSON file with annotations in COCO instances format.
+ Replaces `detectron2.data.datasets.coco.load_coco_json` to handle metadata
+ in a more flexible way. Postpones category mapping to a later stage to be
+ able to combine several datasets with different (but coherent) sets of
+ categories.
+
+ Args:
+
+ annotations_json_file: str
+ Path to the JSON file with annotations in COCO instances format.
+ image_root: str
+ directory that contains all the images
+ dataset_name: str
+ the name that identifies a dataset, e.g. "densepose_coco_2014_train"
+ extra_annotation_keys: Optional[List[str]]
+ If provided, these keys are used to extract additional data from
+ the annotations.
+ """
+ coco_api = _load_coco_annotations(PathManager.get_local_path(annotations_json_file))
+ _add_categories_metadata(dataset_name, coco_api.loadCats(coco_api.getCatIds()))
+ # sort indices for reproducible results
+ img_ids = sorted(coco_api.imgs.keys())
+ # imgs is a list of dicts, each looks something like:
+ # {'license': 4,
+ # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
+ # 'file_name': 'COCO_val2014_000000001268.jpg',
+ # 'height': 427,
+ # 'width': 640,
+ # 'date_captured': '2013-11-17 05:57:24',
+ # 'id': 1268}
+ imgs = coco_api.loadImgs(img_ids)
+ logger = logging.getLogger(__name__)
+ logger.info("Loaded {} images in COCO format from {}".format(len(imgs), annotations_json_file))
+ # anns is a list[list[dict]], where each dict is an annotation
+ # record for an object. The inner list enumerates the objects in an image
+ # and the outer list enumerates over images.
+ anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
+ _verify_annotations_have_unique_ids(annotations_json_file, anns)
+ dataset_records = _combine_images_with_annotations(dataset_name, image_root, imgs, anns)
+ return dataset_records
+
+
+def register_dataset(dataset_data: CocoDatasetInfo, datasets_root: Optional[os.PathLike] = None):
+ """
+ Registers provided COCO DensePose dataset
+
+ Args:
+ dataset_data: CocoDatasetInfo
+ Dataset data
+ datasets_root: Optional[os.PathLike]
+ Datasets root folder (default: None)
+ """
+ annotations_fpath = maybe_prepend_base_path(datasets_root, dataset_data.annotations_fpath)
+ images_root = maybe_prepend_base_path(datasets_root, dataset_data.images_root)
+
+ def load_annotations():
+ return load_coco_json(
+ annotations_json_file=annotations_fpath,
+ image_root=images_root,
+ dataset_name=dataset_data.name,
+ )
+
+ DatasetCatalog.register(dataset_data.name, load_annotations)
+ MetadataCatalog.get(dataset_data.name).set(
+ json_file=annotations_fpath,
+ image_root=images_root,
+ **get_metadata(DENSEPOSE_METADATA_URL_PREFIX)
+ )
+
+
+def register_datasets(
+ datasets_data: Iterable[CocoDatasetInfo], datasets_root: Optional[os.PathLike] = None
+):
+ """
+ Registers provided COCO DensePose datasets
+
+ Args:
+ datasets_data: Iterable[CocoDatasetInfo]
+ An iterable of dataset datas
+ datasets_root: Optional[os.PathLike]
+ Datasets root folder (default: None)
+ """
+ for dataset_data in datasets_data:
+ register_dataset(dataset_data, datasets_root)
diff --git a/projects/DensePose/densepose/data/datasets/dataset_type.py b/projects/DensePose/densepose/data/datasets/dataset_type.py
new file mode 100644
index 0000000..30e1c58
--- /dev/null
+++ b/projects/DensePose/densepose/data/datasets/dataset_type.py
@@ -0,0 +1,11 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from enum import Enum
+
+
+class DatasetType(Enum):
+ """
+ Dataset type, mostly used for datasets that contain data to bootstrap models on
+ """
+
+ VIDEO_LIST = "video_list"
diff --git a/projects/DensePose/densepose/data/image_list_dataset.py b/projects/DensePose/densepose/data/image_list_dataset.py
new file mode 100644
index 0000000..7d656f0
--- /dev/null
+++ b/projects/DensePose/densepose/data/image_list_dataset.py
@@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import logging
+import numpy as np
+from typing import Callable, List, Optional
+import torch
+from torch.utils.data.dataset import Dataset
+
+from detectron2.data.detection_utils import read_image
+
+ImageTransform = Callable[[torch.Tensor], torch.Tensor]
+
+
+class ImageListDataset(Dataset):
+ """
+ Dataset that provides images from a list.
+ """
+
+ _EMPTY_IMAGE = torch.empty((1, 1, 3))
+
+ def __init__(self, image_list: List[str], transform: Optional[ImageTransform] = None):
+ """
+ Args:
+ image_list (List[str]): list of paths to image files
+ """
+ self.image_list = image_list
+ self.transform = transform
+
+ def __getitem__(self, idx: int) -> torch.Tensor:
+ """
+ Gets selected images from the list
+
+ Args:
+ idx (int): video index in the video list file
+ Returns:
+ image (torch.Tensor): tensor of size [H, W, 3]
+ """
+ fpath = self.image_list[idx]
+
+ try:
+ image = torch.from_numpy(np.ascontiguousarray(read_image(fpath, format="BGR")))
+ if self.transform is not None:
+ image = self.transform(image.unsqueeze(0))[0] # Transforms are done on batches
+ return image
+ except (OSError, RuntimeError) as e:
+ logger = logging.getLogger(__name__)
+ logger.warning(f"Error opening image file container {fpath}: {e}")
+
+ return self._EMPTY_IMAGE
+
+ def __len__(self):
+ return len(self.image_list)
diff --git a/projects/DensePose/densepose/data/inference_based_loader.py b/projects/DensePose/densepose/data/inference_based_loader.py
new file mode 100644
index 0000000..433c686
--- /dev/null
+++ b/projects/DensePose/densepose/data/inference_based_loader.py
@@ -0,0 +1,146 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import random
+from typing import Any, Callable, Iterable, Iterator, List, Optional, Tuple
+import torch
+from torch import nn
+
+SampledData = Any
+ModelOutput = Any
+
+
+def _grouper(iterable: Iterable[Any], n: int, fillvalue=None) -> Iterator[Tuple[Any]]:
+ """
+ Group elements of an iterable by chunks of size `n`, e.g.
+ grouper(range(9), 4) ->
+ (0, 1, 2, 3), (4, 5, 6, 7), (8, None, None, None)
+ """
+ it = iter(iterable)
+ while True:
+ values = []
+ for _ in range(n):
+ try:
+ value = next(it)
+ except StopIteration:
+ if values:
+ values.extend([fillvalue] * (n - len(values)))
+ yield tuple(values)
+ return
+ values.append(value)
+ yield tuple(values)
+
+
+class ScoreBasedFilter:
+ """
+ Filters entries in model output based on their scores
+ Discards all entries with score less than the specified minimum
+ """
+
+ def __init__(self, min_score: float = 0.8):
+ self.min_score = min_score
+
+ def __call__(self, model_output: ModelOutput) -> ModelOutput:
+ for model_output_i in model_output:
+ instances = model_output_i["instances"]
+ if not instances.has("scores"):
+ continue
+ instances_filtered = instances[instances.scores >= self.min_score]
+ model_output_i["instances"] = instances_filtered
+ return model_output
+
+
+class InferenceBasedLoader:
+ """
+ Data loader based on results inferred by a model. Consists of:
+ - a data loader that provides batches of images
+ - a model that is used to infer the results
+ - a data sampler that converts inferred results to annotations
+ """
+
+ def __init__(
+ self,
+ model: nn.Module,
+ data_loader: Iterable[List[torch.Tensor]],
+ data_sampler: Optional[Callable[[ModelOutput], List[SampledData]]] = None,
+ data_filter: Optional[Callable[[ModelOutput], ModelOutput]] = None,
+ shuffle: bool = True,
+ batch_size: int = 4,
+ inference_batch_size: int = 4,
+ drop_last: bool = False,
+ ):
+ """
+ Constructor
+
+ Args:
+ model (torch.nn.Module): model used to produce data
+ data_loader (Iterable[Tensor]): iterable that provides images
+ to perform inference on
+ data_sampler (Callable: ModelOutput -> SampledData): functor
+ that produces annotation data from inference results;
+ (optional, default: None)
+ data_filter (Callable: ModelOutput -> ModelOutput): filter
+ that selects model outputs for for further processing
+ (optional, default: None)
+ shuffle (bool): if True, the input images get shuffled
+ batch_size (int): batch size for the produced annotation data
+ inference_batch_size (int): batch size for input images
+ drop_last (bool): if True, drop the last batch if it is undersized
+ """
+ self.model = model
+ self.model.eval()
+ self.data_loader = data_loader
+ self.data_sampler = data_sampler
+ self.data_filter = data_filter
+ self.shuffle = shuffle
+ self.batch_size = batch_size
+ self.inference_batch_size = inference_batch_size
+ self.drop_last = drop_last
+
+ def __iter__(self) -> Iterator[List[SampledData]]:
+ for batch in self.data_loader:
+ # batch : List[Tensor[N, C, H, W]]
+ # images_batch : Tensor[N, C, H, W]
+ # image : Tensor[C, H, W]
+ images = [image for images_batch in batch for image in images_batch]
+ if not images:
+ continue
+ if self.shuffle:
+ random.shuffle(images)
+ yield from self._produce_data(images)
+
+ def _produce_data(self, images: List[torch.Tensor]) -> Iterator[List[SampledData]]:
+ """
+ Produce batches of data from images
+
+ Args:
+ images (List[Tensor]): list of images to process
+
+ Returns:
+ Iterator over batches of data sampled from model outputs
+ """
+ data_batches: List[SampledData] = []
+ batched_images = _grouper(images, self.inference_batch_size)
+ for batch in batched_images:
+ batch = [{"image": img.to(self.model.device)} for img in batch if img is not None]
+ if not batch:
+ continue
+ with torch.no_grad():
+ model_output = self.model(batch)
+ for model_output_i, batch_i in zip(model_output, batch):
+ model_output_i["image"] = batch_i["image"]
+ model_output_filtered = (
+ model_output if self.data_filter is None else self.data_filter(model_output)
+ )
+ data = (
+ model_output_filtered
+ if self.data_sampler is None
+ else self.data_sampler(model_output_filtered)
+ )
+ for data_i in data:
+ if len(data_i["instances"]):
+ data_batches.append(data_i)
+ if len(data_batches) >= self.batch_size:
+ yield data_batches[: self.batch_size]
+ data_batches = data_batches[self.batch_size :]
+ if not self.drop_last and data_batches:
+ yield data_batches
diff --git a/projects/DensePose/densepose/data/samplers/__init__.py b/projects/DensePose/densepose/data/samplers/__init__.py
new file mode 100644
index 0000000..5a2d5d8
--- /dev/null
+++ b/projects/DensePose/densepose/data/samplers/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from .densepose_uniform import DensePoseUniformSampler
+from .densepose_confidence_based import DensePoseConfidenceBasedSampler
+from .mask_from_densepose import MaskFromDensePoseSampler, densepose_to_mask
+from .prediction_to_gt import PredictionToGroundTruthSampler
diff --git a/projects/DensePose/densepose/data/samplers/densepose_base.py b/projects/DensePose/densepose/data/samplers/densepose_base.py
new file mode 100644
index 0000000..b51fc1f
--- /dev/null
+++ b/projects/DensePose/densepose/data/samplers/densepose_base.py
@@ -0,0 +1,190 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from typing import List, Optional
+import torch
+from torch.nn import functional as F
+
+from detectron2.structures import BoxMode, Instances
+
+from ..structures import (
+ DensePoseDataRelative,
+ DensePoseList,
+ DensePoseOutput,
+ resample_output_to_bbox,
+)
+
+
+class DensePoseBaseSampler:
+ """
+ Base DensePose sampler to produce DensePose data from DensePose predictions.
+ Samples for each class are drawn according to some distribution over all pixels estimated
+ to belong to that class.
+ """
+
+ def __init__(self, count_per_class: int = 8):
+ """
+ Constructor
+
+ Args:
+ count_per_class (int): the sampler produces at most `count_per_class`
+ samples for each category
+ """
+ self.count_per_class = count_per_class
+
+ def __call__(self, instances: Instances) -> DensePoseList:
+ """
+ Convert DensePose predictions (an instance of `DensePoseOutput`)
+ into DensePose annotations data (an instance of `DensePoseList`)
+ """
+ boxes_xyxy_abs = instances.pred_boxes.tensor.clone().cpu()
+ boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+ dp_datas = []
+ for i, box_xywh in enumerate(boxes_xywh_abs):
+ labels_i, result_i = resample_output_to_bbox(
+ instances.pred_densepose[i], box_xywh, self._confidence_channels()
+ )
+ annotation_i = self._sample(labels_i.cpu(), result_i.cpu(), box_xywh)
+ annotation_i[DensePoseDataRelative.S_KEY] = self._resample_mask(
+ instances.pred_densepose[i]
+ )
+
+ dp_datas.append(DensePoseDataRelative(annotation_i))
+ # create densepose annotations on CPU
+ dp_list = DensePoseList(dp_datas, boxes_xyxy_abs, instances.image_size)
+ return dp_list
+
+ def _sample(
+ self, labels: torch.Tensor, dp_result: torch.Tensor, bbox_xywh: List[int]
+ ) -> DensePoseDataRelative:
+ """
+ Sample DensPoseDataRelative from estimation results
+ """
+ annotation = {
+ DensePoseDataRelative.X_KEY: [],
+ DensePoseDataRelative.Y_KEY: [],
+ DensePoseDataRelative.U_KEY: [],
+ DensePoseDataRelative.V_KEY: [],
+ DensePoseDataRelative.I_KEY: [],
+ }
+ x0, y0, _, _ = bbox_xywh
+ n, h, w = dp_result.shape
+ for part_id in range(1, DensePoseDataRelative.N_PART_LABELS + 1):
+ # indices - tuple of 3 1D tensors of size k
+ # 0: index along the first dimension N
+ # 1: index along H dimension
+ # 2: index along W dimension
+ indices = torch.nonzero(labels.expand(n, h, w) == part_id, as_tuple=True)
+ # values - an array of size [n, k]
+ # n: number of channels (U, V, confidences)
+ # k: number of points labeled with part_id
+ values = dp_result[indices].view(n, -1)
+ k = values.shape[1]
+ count = min(self.count_per_class, k)
+ if count <= 0:
+ continue
+ index_sample = self._produce_index_sample(values, count)
+ sampled_values = values[:, index_sample]
+ sampled_y = indices[1][index_sample] + 0.5
+ sampled_x = indices[2][index_sample] + 0.5
+ # prepare / normalize data
+ x = (sampled_x / w * 256.0).cpu().tolist()
+ y = (sampled_y / h * 256.0).cpu().tolist()
+ u = sampled_values[0].clamp(0, 1).cpu().tolist()
+ v = sampled_values[1].clamp(0, 1).cpu().tolist()
+ fine_segm_labels = [part_id] * count
+ # extend annotations
+ annotation[DensePoseDataRelative.X_KEY].extend(x)
+ annotation[DensePoseDataRelative.Y_KEY].extend(y)
+ annotation[DensePoseDataRelative.U_KEY].extend(u)
+ annotation[DensePoseDataRelative.V_KEY].extend(v)
+ annotation[DensePoseDataRelative.I_KEY].extend(fine_segm_labels)
+ return annotation
+
+ def _confidence_channels(self) -> Optional[List[str]]:
+ """
+ Confedence channels to be used for sampling (to be overridden in children)
+ """
+ return None
+
+ def _produce_index_sample(self, values: torch.Tensor, count: int):
+ """
+ Abstract method to produce a sample of indices to select data
+ To be implemented in descendants
+
+ Args:
+ values (torch.Tensor): an array of size [n, k] that contains
+ estimated values (U, V, confidences);
+ n: number of channels (U, V, confidences)
+ k: number of points labeled with part_id
+ count (int): number of samples to produce, should be positive and <= k
+:w
+
+ Return:
+ list(int): indices of values (along axis 1) selected as a sample
+ """
+ raise NotImplementedError
+
+ def _resample_mask(self, output: DensePoseOutput) -> torch.Tensor:
+ """
+ Convert output mask tensors into the annotation mask tensor of size
+ (256, 256)
+ """
+ sz = DensePoseDataRelative.MASK_SIZE
+ S = (
+ F.interpolate(output.S, (sz, sz), mode="bilinear", align_corners=False)
+ .argmax(dim=1)
+ .long()
+ )
+ I = (
+ (
+ F.interpolate(output.I, (sz, sz), mode="bilinear", align_corners=False).argmax(
+ dim=1
+ )
+ * (S > 0).long()
+ )
+ .squeeze()
+ .cpu()
+ )
+ # Map fine segmentation results to coarse segmentation ground truth
+ # TODO: extract this into separate classes
+ # coarse segmentation: 1 = Torso, 2 = Right Hand, 3 = Left Hand,
+ # 4 = Left Foot, 5 = Right Foot, 6 = Upper Leg Right, 7 = Upper Leg Left,
+ # 8 = Lower Leg Right, 9 = Lower Leg Left, 10 = Upper Arm Left,
+ # 11 = Upper Arm Right, 12 = Lower Arm Left, 13 = Lower Arm Right,
+ # 14 = Head
+ # fine segmentation: 1, 2 = Torso, 3 = Right Hand, 4 = Left Hand,
+ # 5 = Left Foot, 6 = Right Foot, 7, 9 = Upper Leg Right,
+ # 8, 10 = Upper Leg Left, 11, 13 = Lower Leg Right,
+ # 12, 14 = Lower Leg Left, 15, 17 = Upper Arm Left,
+ # 16, 18 = Upper Arm Right, 19, 21 = Lower Arm Left,
+ # 20, 22 = Lower Arm Right, 23, 24 = Head
+ FINE_TO_COARSE_SEGMENTATION = {
+ 1: 1,
+ 2: 1,
+ 3: 2,
+ 4: 3,
+ 5: 4,
+ 6: 5,
+ 7: 6,
+ 8: 7,
+ 9: 6,
+ 10: 7,
+ 11: 8,
+ 12: 9,
+ 13: 8,
+ 14: 9,
+ 15: 10,
+ 16: 11,
+ 17: 10,
+ 18: 11,
+ 19: 12,
+ 20: 13,
+ 21: 12,
+ 22: 13,
+ 23: 14,
+ 24: 14,
+ }
+ mask = torch.zeros((sz, sz), dtype=torch.int64, device=torch.device("cpu"))
+ for i in range(DensePoseDataRelative.N_PART_LABELS):
+ mask[I == i + 1] = FINE_TO_COARSE_SEGMENTATION[i + 1]
+ return mask
diff --git a/projects/DensePose/densepose/data/samplers/densepose_confidence_based.py b/projects/DensePose/densepose/data/samplers/densepose_confidence_based.py
new file mode 100644
index 0000000..f0ebb0e
--- /dev/null
+++ b/projects/DensePose/densepose/data/samplers/densepose_confidence_based.py
@@ -0,0 +1,91 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import random
+from typing import List, Optional
+import torch
+
+from .densepose_base import DensePoseBaseSampler
+
+
+class DensePoseConfidenceBasedSampler(DensePoseBaseSampler):
+ """
+ Samples DensePose data from DensePose predictions.
+ Samples for each class are drawn using confidence value estimates.
+ """
+
+ def __init__(
+ self,
+ confidence_channel: str,
+ count_per_class: int = 8,
+ search_count_multiplier: Optional[float] = None,
+ search_proportion: Optional[float] = None,
+ ):
+ """
+ Constructor
+
+ Args:
+ confidence_channel (str): confidence channel to use for sampling;
+ possible values:
+ "sigma_2": confidences for UV values
+ "fine_segm_confidence": confidences for fine segmentation
+ "coarse_segm_confidence": confidences for coarse segmentation
+ (default: "sigma_2")
+ count_per_class (int): the sampler produces at most `count_per_class`
+ samples for each category (default: 8)
+ search_count_multiplier (float or None): if not None, the total number
+ of the most confident estimates of a given class to consider is
+ defined as `min(search_count_multiplier * count_per_class, N)`,
+ where `N` is the total number of estimates of the class; cannot be
+ specified together with `search_proportion` (default: None)
+ search_proportion (float or None): if not None, the total number of the
+ of the most confident estimates of a given class to consider is
+ defined as `min(max(search_proportion * N, count_per_class), N)`,
+ where `N` is the total number of estimates of the class; cannot be
+ specified together with `search_count_multiplier` (default: None)
+ """
+ super().__init__(count_per_class)
+ self.confidence_channel = confidence_channel
+ self.search_count_multiplier = search_count_multiplier
+ self.search_proportion = search_proportion
+ assert (search_count_multiplier is None) or (search_proportion is None), (
+ f"Cannot specify both search_count_multiplier (={search_count_multiplier})"
+ f"and search_proportion (={search_proportion})"
+ )
+
+ def _confidence_channels(self) -> Optional[List[str]]:
+ """
+ Confedence channels to be used for sampling (to be overridden in children)
+ """
+ return [self.confidence_channel]
+
+ def _produce_index_sample(self, values: torch.Tensor, count: int):
+ """
+ Produce a sample of indices to select data based on confidences
+
+ Args:
+ values (torch.Tensor): an array of size [n, k] that contains
+ estimated values (U, V, confidences);
+ n: number of channels (U, V, confidences)
+ k: number of points labeled with part_id
+ count (int): number of samples to produce, should be positive and <= k
+
+ Return:
+ list(int): indices of values (along axis 1) selected as a sample
+ """
+ k = values.shape[1]
+ if k == count:
+ index_sample = list(range(k))
+ else:
+ # take the best count * search_count_multiplier pixels,
+ # sample from them uniformly
+ # (here best = smallest variance)
+ _, sorted_confidence_indices = torch.sort(values[2])
+ if self.search_count_multiplier is not None:
+ search_count = min(int(count * self.search_count_multiplier), k)
+ elif self.search_proportion is not None:
+ search_count = min(max(int(k * self.search_proportion), count), k)
+ else:
+ search_count = min(count, k)
+ sample_from_top = random.sample(range(search_count), count)
+ index_sample = sorted_confidence_indices[:search_count][sample_from_top]
+ return index_sample
diff --git a/projects/DensePose/densepose/data/samplers/densepose_uniform.py b/projects/DensePose/densepose/data/samplers/densepose_uniform.py
new file mode 100644
index 0000000..6cf083d
--- /dev/null
+++ b/projects/DensePose/densepose/data/samplers/densepose_uniform.py
@@ -0,0 +1,41 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import random
+import torch
+
+from .densepose_base import DensePoseBaseSampler
+
+
+class DensePoseUniformSampler(DensePoseBaseSampler):
+ """
+ Samples DensePose data from DensePose predictions.
+ Samples for each class are drawn uniformly over all pixels estimated
+ to belong to that class.
+ """
+
+ def __init__(self, count_per_class: int = 8):
+ """
+ Constructor
+
+ Args:
+ count_per_class (int): the sampler produces at most `count_per_class`
+ samples for each category
+ """
+ super().__init__(count_per_class)
+
+ def _produce_index_sample(self, values: torch.Tensor, count: int):
+ """
+ Produce a uniform sample of indices to select data
+
+ Args:
+ values (torch.Tensor): an array of size [n, k] that contains
+ estimated values (U, V, confidences);
+ n: number of channels (U, V, confidences)
+ k: number of points labeled with part_id
+ count (int): number of samples to produce, should be positive and <= k
+
+ Return:
+ list(int): indices of values (along axis 1) selected as a sample
+ """
+ k = values.shape[1]
+ return random.sample(range(k), count)
diff --git a/projects/DensePose/densepose/data/samplers/mask_from_densepose.py b/projects/DensePose/densepose/data/samplers/mask_from_densepose.py
new file mode 100644
index 0000000..66bf9c0
--- /dev/null
+++ b/projects/DensePose/densepose/data/samplers/mask_from_densepose.py
@@ -0,0 +1,59 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import torch
+
+from detectron2.structures import BitMasks, BoxMode, Instances
+
+from ..structures import resample_output_to_bbox
+
+
+def densepose_to_mask(instances: Instances) -> BitMasks:
+ """
+ Produce masks from DensePose predictions
+ DensePose predictions for a given image, stored in `pred_densepose` field,
+ are instances of DensePoseOutput. This sampler takes
+ `S` and `I` output tensors (coarse and fine segmentation) and converts
+ then to a mask tensor, which is a bool tensor of the size of the input
+ image
+
+ Args:
+ instances (Instances): predicted results, expected to have `pred_densepose` field
+ that contains `DensePoseOutput` objects
+
+ Returns:
+ `BitMasks` instance with boolean tensors of the size of the input image that have non-zero
+ values at pixels that are estimated to belong to the detected objects
+ """
+ H, W = instances.image_size
+ boxes_xyxy_abs = instances.pred_boxes.tensor.clone().cpu()
+ boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+ N = len(boxes_xywh_abs)
+ gt_masks = torch.zeros((N, H, W), dtype=torch.bool, device=torch.device("cpu"))
+ for i, box_xywh in enumerate(boxes_xywh_abs):
+ labels_i, _ = resample_output_to_bbox(instances.pred_densepose[i], box_xywh)
+ x, y, w, h = box_xywh.long().tolist()
+ gt_masks[i, y : y + h, x : x + w] = labels_i.cpu() > 0
+ return BitMasks(gt_masks)
+
+
+class MaskFromDensePoseSampler:
+ """
+ Produce mask GT from DensePose predictions
+ DensePose prediction is an instance of DensePoseOutput. This sampler takes
+ `S` and `I` output tensors (coarse and fine segmentation) and converts
+ then to a mask tensor, which is a bool tensor of the size of the input
+ image
+ """
+
+ def __call__(self, instances: Instances) -> BitMasks:
+ """
+ Converts predicted data from `instances` into the GT mask data
+
+ Args:
+ instances (Instances): predicted results, expected to have `pred_densepose` field
+
+ Returns:
+ Boolean Tensor of the size of the input image that has non-zero
+ values at pixels that are estimated to belong to the detected object
+ """
+ return densepose_to_mask(instances)
diff --git a/projects/DensePose/densepose/data/samplers/prediction_to_gt.py b/projects/DensePose/densepose/data/samplers/prediction_to_gt.py
new file mode 100644
index 0000000..4d7f4b2
--- /dev/null
+++ b/projects/DensePose/densepose/data/samplers/prediction_to_gt.py
@@ -0,0 +1,80 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from dataclasses import dataclass
+from typing import Any, Callable, Dict, Optional
+
+from detectron2.structures import Instances
+
+ModelOutput = Dict[str, Any]
+SampledData = Dict[str, Any]
+
+
+@dataclass
+class _Sampler:
+ """
+ Sampler registry entry that contains:
+ - src (str): source field to sample from (deleted after sampling)
+ - dst (Optional[str]): destination field to sample to, if not None
+ - func (Optional[Callable: Any -> Any]): function that performs sampling,
+ if None, reference copy is performed
+ """
+
+ src: str
+ dst: Optional[str]
+ func: Optional[Callable[[Any], Any]]
+
+
+class PredictionToGroundTruthSampler:
+ """
+ Sampler implementation that converts predictions to GT using registered
+ samplers for different fields of `Instances`.
+ """
+
+ def __init__(self, dataset_name: str = ""):
+ self.dataset_name = dataset_name
+ self._samplers = {}
+ self.register_sampler("pred_boxes", "gt_boxes", None)
+ self.register_sampler("pred_classes", "gt_classes", None)
+ self.register_sampler("scores")
+
+ def __call__(self, model_output: ModelOutput) -> SampledData:
+ """
+ Transform model output into ground truth data through sampling
+
+ Args:
+ model_output (Dict[str, Any]): model output
+ Returns:
+ Dict[str, Any]: sampled data
+ """
+ for model_output_i in model_output:
+ instances: Instances = model_output_i["instances"]
+ # transform data in each field
+ for _, sampler in self._samplers.items():
+ if not instances.has(sampler.src) or sampler.dst is None:
+ continue
+ if sampler.func is None:
+ instances.set(sampler.dst, instances.get(sampler.src))
+ else:
+ instances.set(sampler.dst, sampler.func(instances))
+ # delete model output data that was transformed
+ for _, sampler in self._samplers.items():
+ if sampler.src != sampler.dst and instances.has(sampler.src):
+ instances.remove(sampler.src)
+ model_output_i["dataset"] = self.dataset_name
+ return model_output
+
+ def register_sampler(
+ self,
+ prediction_attr: str,
+ gt_attr: Optional[str] = None,
+ func: Optional[Callable[[Any], Any]] = None,
+ ):
+ """
+ Register sampler for a field
+
+ Args:
+ prediction_attr (str): field to replace with a sampled value
+ gt_attr (Optional[str]): field to store the sampled value to, if not None
+ func (Optional[Callable: Any -> Any]): sampler function
+ """
+ self._samplers[prediction_attr] = _Sampler(src=prediction_attr, dst=gt_attr, func=func)
diff --git a/projects/DensePose/densepose/data/structures.py b/projects/DensePose/densepose/data/structures.py
new file mode 100644
index 0000000..6b4a728
--- /dev/null
+++ b/projects/DensePose/densepose/data/structures.py
@@ -0,0 +1,703 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import base64
+import numpy as np
+from io import BytesIO
+from typing import BinaryIO, Dict, List, Optional, Tuple, Union
+import torch
+from PIL import Image
+from torch.nn import functional as F
+
+
+class DensePoseTransformData(object):
+
+ # Horizontal symmetry label transforms used for horizontal flip
+ MASK_LABEL_SYMMETRIES = [0, 1, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14]
+ # fmt: off
+ POINT_LABEL_SYMMETRIES = [ 0, 1, 2, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 18, 17, 20, 19, 22, 21, 24, 23] # noqa
+ # fmt: on
+
+ def __init__(self, uv_symmetries: Dict[str, torch.Tensor], device: torch.device):
+ self.mask_label_symmetries = DensePoseTransformData.MASK_LABEL_SYMMETRIES
+ self.point_label_symmetries = DensePoseTransformData.POINT_LABEL_SYMMETRIES
+ self.uv_symmetries = uv_symmetries
+ self.device = torch.device("cpu")
+
+ def to(self, device: torch.device, copy: bool = False) -> "DensePoseTransformData":
+ """
+ Convert transform data to the specified device
+
+ Args:
+ device (torch.device): device to convert the data to
+ copy (bool): flag that specifies whether to copy or to reference the data
+ in case the device is the same
+ Return:
+ An instance of `DensePoseTransformData` with data stored on the specified device
+ """
+ if self.device == device and not copy:
+ return self
+ uv_symmetry_map = {}
+ for key in self.uv_symmetries:
+ uv_symmetry_map[key] = self.uv_symmetries[key].to(device=device, copy=copy)
+ return DensePoseTransformData(uv_symmetry_map, device)
+
+ @staticmethod
+ def load(io: Union[str, BinaryIO]):
+ """
+ Args:
+ io: (str or binary file-like object): input file to load data from
+ Returns:
+ An instance of `DensePoseTransformData` with transforms loaded from the file
+ """
+ import scipy.io
+
+ uv_symmetry_map = scipy.io.loadmat(io)
+ uv_symmetry_map_torch = {}
+ for key in ["U_transforms", "V_transforms"]:
+ uv_symmetry_map_torch[key] = []
+ map_src = uv_symmetry_map[key]
+ map_dst = uv_symmetry_map_torch[key]
+ for i in range(map_src.shape[1]):
+ map_dst.append(torch.from_numpy(map_src[0, i]).to(dtype=torch.float))
+ uv_symmetry_map_torch[key] = torch.stack(map_dst, dim=0)
+ transform_data = DensePoseTransformData(uv_symmetry_map_torch, device=torch.device("cpu"))
+ return transform_data
+
+
+class DensePoseDataRelative(object):
+ """
+ Dense pose relative annotations that can be applied to any bounding box:
+ x - normalized X coordinates [0, 255] of annotated points
+ y - normalized Y coordinates [0, 255] of annotated points
+ i - body part labels 0,...,24 for annotated points
+ u - body part U coordinates [0, 1] for annotated points
+ v - body part V coordinates [0, 1] for annotated points
+ segm - 256x256 segmentation mask with values 0,...,14
+ To obtain absolute x and y data wrt some bounding box one needs to first
+ divide the data by 256, multiply by the respective bounding box size
+ and add bounding box offset:
+ x_img = x0 + x_norm * w / 256.0
+ y_img = y0 + y_norm * h / 256.0
+ Segmentation masks are typically sampled to get image-based masks.
+ """
+
+ # Key for normalized X coordinates in annotation dict
+ X_KEY = "dp_x"
+ # Key for normalized Y coordinates in annotation dict
+ Y_KEY = "dp_y"
+ # Key for U part coordinates in annotation dict
+ U_KEY = "dp_U"
+ # Key for V part coordinates in annotation dict
+ V_KEY = "dp_V"
+ # Key for I point labels in annotation dict
+ I_KEY = "dp_I"
+ # Key for segmentation mask in annotation dict
+ S_KEY = "dp_masks"
+ # Number of body parts in segmentation masks
+ N_BODY_PARTS = 14
+ # Number of parts in point labels
+ N_PART_LABELS = 24
+ MASK_SIZE = 256
+
+ def __init__(self, annotation, cleanup=False):
+ is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
+ assert is_valid, "Invalid DensePose annotations: {}".format(reason_not_valid)
+ self.x = torch.as_tensor(annotation[DensePoseDataRelative.X_KEY])
+ self.y = torch.as_tensor(annotation[DensePoseDataRelative.Y_KEY])
+ self.i = torch.as_tensor(annotation[DensePoseDataRelative.I_KEY])
+ self.u = torch.as_tensor(annotation[DensePoseDataRelative.U_KEY])
+ self.v = torch.as_tensor(annotation[DensePoseDataRelative.V_KEY])
+ self.segm = DensePoseDataRelative.extract_segmentation_mask(annotation)
+ self.device = torch.device("cpu")
+ if cleanup:
+ DensePoseDataRelative.cleanup_annotation(annotation)
+
+ def to(self, device):
+ if self.device == device:
+ return self
+ new_data = DensePoseDataRelative.__new__(DensePoseDataRelative)
+ new_data.x = self.x
+ new_data.x = self.x.to(device)
+ new_data.y = self.y.to(device)
+ new_data.i = self.i.to(device)
+ new_data.u = self.u.to(device)
+ new_data.v = self.v.to(device)
+ new_data.segm = self.segm.to(device)
+ new_data.device = device
+ return new_data
+
+ @staticmethod
+ def extract_segmentation_mask(annotation):
+ poly_specs = annotation[DensePoseDataRelative.S_KEY]
+ if isinstance(poly_specs, torch.Tensor):
+ # data is already given as mask tensors, no need to decode
+ return poly_specs
+
+ import pycocotools.mask as mask_utils
+
+ segm = torch.zeros((DensePoseDataRelative.MASK_SIZE,) * 2, dtype=torch.float32)
+ for i in range(DensePoseDataRelative.N_BODY_PARTS):
+ poly_i = poly_specs[i]
+ if poly_i:
+ mask_i = mask_utils.decode(poly_i)
+ segm[mask_i > 0] = i + 1
+ return segm
+
+ @staticmethod
+ def validate_annotation(annotation):
+ for key in [
+ DensePoseDataRelative.X_KEY,
+ DensePoseDataRelative.Y_KEY,
+ DensePoseDataRelative.I_KEY,
+ DensePoseDataRelative.U_KEY,
+ DensePoseDataRelative.V_KEY,
+ DensePoseDataRelative.S_KEY,
+ ]:
+ if key not in annotation:
+ return False, "no {key} data in the annotation".format(key=key)
+ return True, None
+
+ @staticmethod
+ def cleanup_annotation(annotation):
+ for key in [
+ DensePoseDataRelative.X_KEY,
+ DensePoseDataRelative.Y_KEY,
+ DensePoseDataRelative.I_KEY,
+ DensePoseDataRelative.U_KEY,
+ DensePoseDataRelative.V_KEY,
+ DensePoseDataRelative.S_KEY,
+ ]:
+ if key in annotation:
+ del annotation[key]
+
+ def apply_transform(self, transforms, densepose_transform_data):
+ self._transform_pts(transforms, densepose_transform_data)
+ self._transform_segm(transforms, densepose_transform_data)
+
+ def _transform_pts(self, transforms, dp_transform_data):
+ import detectron2.data.transforms as T
+
+ # NOTE: This assumes that HorizFlipTransform is the only one that does flip
+ do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
+ if do_hflip:
+ self.x = self.segm.size(1) - self.x
+ self._flip_iuv_semantics(dp_transform_data)
+
+ for t in transforms.transforms:
+ if isinstance(t, T.RotationTransform):
+ xy_scale = np.array((t.w, t.h)) / DensePoseDataRelative.MASK_SIZE
+ xy = t.apply_coords(np.stack((self.x, self.y), axis=1) * xy_scale)
+ self.x, self.y = torch.tensor(xy / xy_scale, dtype=self.x.dtype).T
+
+ def _flip_iuv_semantics(self, dp_transform_data: DensePoseTransformData) -> None:
+ i_old = self.i.clone()
+ uv_symmetries = dp_transform_data.uv_symmetries
+ pt_label_symmetries = dp_transform_data.point_label_symmetries
+ for i in range(self.N_PART_LABELS):
+ if i + 1 in i_old:
+ annot_indices_i = i_old == i + 1
+ if pt_label_symmetries[i + 1] != i + 1:
+ self.i[annot_indices_i] = pt_label_symmetries[i + 1]
+ u_loc = (self.u[annot_indices_i] * 255).long()
+ v_loc = (self.v[annot_indices_i] * 255).long()
+ self.u[annot_indices_i] = uv_symmetries["U_transforms"][i][v_loc, u_loc].to(
+ device=self.u.device
+ )
+ self.v[annot_indices_i] = uv_symmetries["V_transforms"][i][v_loc, u_loc].to(
+ device=self.v.device
+ )
+
+ def _transform_segm(self, transforms, dp_transform_data):
+ import detectron2.data.transforms as T
+
+ # NOTE: This assumes that HorizFlipTransform is the only one that does flip
+ do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
+ if do_hflip:
+ self.segm = torch.flip(self.segm, [1])
+ self._flip_segm_semantics(dp_transform_data)
+
+ for t in transforms.transforms:
+ if isinstance(t, T.RotationTransform):
+ self._transform_segm_rotation(t)
+
+ def _flip_segm_semantics(self, dp_transform_data):
+ old_segm = self.segm.clone()
+ mask_label_symmetries = dp_transform_data.mask_label_symmetries
+ for i in range(self.N_BODY_PARTS):
+ if mask_label_symmetries[i + 1] != i + 1:
+ self.segm[old_segm == i + 1] = mask_label_symmetries[i + 1]
+
+ def _transform_segm_rotation(self, rotation):
+ self.segm = F.interpolate(self.segm[None, None, :], (rotation.h, rotation.w)).numpy()
+ self.segm = torch.tensor(rotation.apply_segmentation(self.segm[0, 0]))[None, None, :]
+ self.segm = F.interpolate(self.segm, [DensePoseDataRelative.MASK_SIZE] * 2)[0, 0]
+
+
+def normalized_coords_transform(x0, y0, w, h):
+ """
+ Coordinates transform that maps top left corner to (-1, -1) and bottom
+ right corner to (1, 1). Used for torch.grid_sample to initialize the
+ grid
+ """
+
+ def f(p):
+ return (2 * (p[0] - x0) / w - 1, 2 * (p[1] - y0) / h - 1)
+
+ return f
+
+
+class DensePoseOutput(object):
+ def __init__(self, S, I, U, V, confidences):
+ """
+ Args:
+ S (`torch.Tensor`): coarse segmentation tensor of size (N, A, H, W)
+ I (`torch.Tensor`): fine segmentation tensor of size (N, C, H, W)
+ U (`torch.Tensor`): U coordinates for each fine segmentation label of size (N, C, H, W)
+ V (`torch.Tensor`): V coordinates for each fine segmentation label of size (N, C, H, W)
+ confidences (dict of str -> `torch.Tensor`) estimated confidence model parameters
+ """
+ self.S = S
+ self.I = I # noqa: E741
+ self.U = U
+ self.V = V
+ self.confidences = confidences
+ self._check_output_dims(S, I, U, V)
+
+ def _check_output_dims(self, S, I, U, V):
+ assert (
+ len(S.size()) == 4
+ ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
+ S.size()
+ )
+ assert (
+ len(I.size()) == 4
+ ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
+ S.size()
+ )
+ assert (
+ len(U.size()) == 4
+ ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
+ S.size()
+ )
+ assert (
+ len(V.size()) == 4
+ ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
+ S.size()
+ )
+ assert len(S) == len(I), (
+ "Number of output segmentation planes {} "
+ "should be equal to the number of output part index "
+ "planes {}".format(len(S), len(I))
+ )
+ assert S.size()[2:] == I.size()[2:], (
+ "Output segmentation plane size {} "
+ "should be equal to the output part index "
+ "plane size {}".format(S.size()[2:], I.size()[2:])
+ )
+ assert I.size() == U.size(), (
+ "Part index output shape {} "
+ "should be the same as U coordinates output shape {}".format(I.size(), U.size())
+ )
+ assert I.size() == V.size(), (
+ "Part index output shape {} "
+ "should be the same as V coordinates output shape {}".format(I.size(), V.size())
+ )
+
+ def resize(self, image_size_hw):
+ # do nothing - outputs are invariant to resize
+ pass
+
+ def _crop(self, S, I, U, V, bbox_old_xywh, bbox_new_xywh):
+ """
+ Resample S, I, U, V from bbox_old to the cropped bbox_new
+ """
+ x0old, y0old, wold, hold = bbox_old_xywh
+ x0new, y0new, wnew, hnew = bbox_new_xywh
+ tr_coords = normalized_coords_transform(x0old, y0old, wold, hold)
+ topleft = (x0new, y0new)
+ bottomright = (x0new + wnew, y0new + hnew)
+ topleft_norm = tr_coords(topleft)
+ bottomright_norm = tr_coords(bottomright)
+ hsize = S.size(1)
+ wsize = S.size(2)
+ grid = torch.meshgrid(
+ torch.arange(
+ topleft_norm[1],
+ bottomright_norm[1],
+ (bottomright_norm[1] - topleft_norm[1]) / hsize,
+ )[:hsize],
+ torch.arange(
+ topleft_norm[0],
+ bottomright_norm[0],
+ (bottomright_norm[0] - topleft_norm[0]) / wsize,
+ )[:wsize],
+ )
+ grid = torch.stack(grid, dim=2).to(S.device)
+ assert (
+ grid.size(0) == hsize
+ ), "Resampled grid expected " "height={}, actual height={}".format(hsize, grid.size(0))
+ assert grid.size(1) == wsize, "Resampled grid expected " "width={}, actual width={}".format(
+ wsize, grid.size(1)
+ )
+ S_new = F.grid_sample(
+ S.unsqueeze(0),
+ torch.unsqueeze(grid, 0),
+ mode="bilinear",
+ padding_mode="border",
+ align_corners=True,
+ ).squeeze(0)
+ I_new = F.grid_sample(
+ I.unsqueeze(0),
+ torch.unsqueeze(grid, 0),
+ mode="bilinear",
+ padding_mode="border",
+ align_corners=True,
+ ).squeeze(0)
+ U_new = F.grid_sample(
+ U.unsqueeze(0),
+ torch.unsqueeze(grid, 0),
+ mode="bilinear",
+ padding_mode="border",
+ align_corners=True,
+ ).squeeze(0)
+ V_new = F.grid_sample(
+ V.unsqueeze(0),
+ torch.unsqueeze(grid, 0),
+ mode="bilinear",
+ padding_mode="border",
+ align_corners=True,
+ ).squeeze(0)
+ return S_new, I_new, U_new, V_new
+
+ def crop(self, indices_cropped, bboxes_old, bboxes_new):
+ """
+ Crop outputs for selected bounding boxes to the new bounding boxes.
+ """
+ # VK: cropping is ignored for now
+ # for i, ic in enumerate(indices_cropped):
+ # self.S[ic], self.I[ic], self.U[ic], self.V[ic] = \
+ # self._crop(self.S[ic], self.I[ic], self.U[ic], self.V[ic],
+ # bboxes_old[i], bboxes_new[i])
+ pass
+
+ def hflip(self, transform_data: DensePoseTransformData) -> None:
+ """
+ Change S, I, U and V to take into account a Horizontal flip.
+ """
+ if self.I.shape[0] > 0:
+ for el in "SIUV":
+ self.__dict__[el] = torch.flip(self.__dict__[el], [3])
+ for key in self.confidences:
+ self.confidences[key] = torch.flip(self.confidences[key], [3])
+ self._flip_iuv_semantics_tensor(transform_data)
+ self._flip_segm_semantics_tensor(transform_data)
+
+ def _flip_iuv_semantics_tensor(self, dp_transform_data: DensePoseTransformData) -> None:
+ point_label_symmetries = dp_transform_data.point_label_symmetries
+ uv_symmetries = dp_transform_data.uv_symmetries
+
+ N, C, H, W = self.U.shape
+ u_loc = (self.U[:, 1:, :, :].clamp(0, 1) * 255).long()
+ v_loc = (self.V[:, 1:, :, :].clamp(0, 1) * 255).long()
+ Iindex = torch.arange(C - 1, device=self.U.device)[None, :, None, None].expand(
+ N, C - 1, H, W
+ )
+ self.U[:, 1:, :, :] = uv_symmetries["U_transforms"][Iindex, v_loc, u_loc]
+ self.V[:, 1:, :, :] = uv_symmetries["V_transforms"][Iindex, v_loc, u_loc]
+
+ for el in "IUV":
+ self.__dict__[el] = self.__dict__[el][:, point_label_symmetries, :, :]
+
+ def _flip_segm_semantics_tensor(self, dp_transform_data):
+ if self.S.shape[1] == DensePoseDataRelative.N_BODY_PARTS + 1:
+ self.S = self.S[:, dp_transform_data.mask_label_symmetries, :, :]
+
+ def to_result(self, boxes_xywh):
+ """
+ Convert DensePose outputs to results format. Results are more compact,
+ but cannot be resampled any more
+ """
+ result = DensePoseResult(boxes_xywh, self.S, self.I, self.U, self.V)
+ return result
+
+ def __getitem__(self, item):
+ if isinstance(item, int):
+ S_selected = self.S[item].unsqueeze(0)
+ I_selected = self.I[item].unsqueeze(0)
+ U_selected = self.U[item].unsqueeze(0)
+ V_selected = self.V[item].unsqueeze(0)
+ conf_selected = {}
+ for key in self.confidences:
+ conf_selected[key] = self.confidences[key][item].unsqueeze(0)
+ else:
+ S_selected = self.S[item]
+ I_selected = self.I[item]
+ U_selected = self.U[item]
+ V_selected = self.V[item]
+ conf_selected = {}
+ for key in self.confidences:
+ conf_selected[key] = self.confidences[key][item]
+ return DensePoseOutput(S_selected, I_selected, U_selected, V_selected, conf_selected)
+
+ def __str__(self):
+ s = "DensePoseOutput S {}, I {}, U {}, V {}".format(
+ list(self.S.size()), list(self.I.size()), list(self.U.size()), list(self.V.size())
+ )
+ s_conf = "confidences: [{}]".format(
+ ", ".join([f"{key} {list(self.confidences[key].size())}" for key in self.confidences])
+ )
+ return ", ".join([s, s_conf])
+
+ def __len__(self):
+ return self.S.size(0)
+
+
+def resample_output_to_bbox(
+ output: DensePoseOutput, bbox_xywh_abs: List[int], confidences: Optional[List[str]] = None
+) -> Tuple[torch.Tensor, torch.Tensor]:
+ """
+ Convert DensePose output of size [1, C, S, S] into DensePose results [D, H_i, W_i],
+ where `i` is detection index and `D == 2 + len(confidences)`. This conversion:
+ - resamples data to the detection bounding box size (H_i, W_i),
+ - sets label for each pixel of the bounding box as the `argmax` of scores,
+ - assigns values (U, V, confidences) based on label and resampled data
+
+ Args:
+ output (DensePoseOutput): outputs of the DensePose model
+ bbox_xywh_abs (List[int]): bounding box, a list of 4 integer values XYWH
+ confidences (List[str]): optional list of `str` that specifies confidence
+ channels to be resampled and added to the results
+
+ Results:
+ labels (torch.Tensor): tensor [1, H_i, W_i] of `torch.uint8` containing fine
+ segmentation labels of each pixel
+ data (torch.Tensor): tensor [D, H_i, W_i] of `torch.float32` containing
+ for each pixel the estimated U, V coordinates and the requested
+ confidence values in the order that corresponds to `confidences`
+ """
+ x, y, w, h = bbox_xywh_abs
+ w = max(int(w), 1)
+ h = max(int(h), 1)
+ N_out = 2 if confidences is None else 2 + len(confidences)
+ device = output.U.device
+ data = torch.zeros([N_out, h, w], dtype=torch.float32, device=device)
+ # coarse segmentation
+ assert (
+ len(output.S.size()) == 4
+ ), "AnnIndex tensor size should have {} dimensions but has {}".format(4, len(output.S.size()))
+ s_bbox = F.interpolate(output.S, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
+ # fine segmentation
+ assert (
+ len(output.I.size()) == 4
+ ), "IndexUV tensor size should have {} dimensions but has {}".format(4, len(output.S.size()))
+ labels = (
+ F.interpolate(output.I, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
+ * (s_bbox > 0).long()
+ ).squeeze(0)
+ # U
+ assert len(output.U.size()) == 4, "U tensor size should have {} dimensions but has {}".format(
+ 4, len(output.U.size())
+ )
+ u_bbox = F.interpolate(output.U, (h, w), mode="bilinear", align_corners=False)
+ # V
+ assert len(output.V.size()) == 4, "V tensor size should have {} dimensions but has {}".format(
+ 4, len(output.V.size())
+ )
+ v_bbox = F.interpolate(output.V, (h, w), mode="bilinear", align_corners=False)
+ # confidences
+ if confidences is not None:
+ resampled_confidence = {}
+ for key in output.confidences:
+ resampled_confidence[key] = F.interpolate(
+ output.confidences[key], (h, w), mode="bilinear", align_corners=False
+ )
+
+ # assign data from channels that correspond to the labels
+ for part_id in range(1, u_bbox.size(1)):
+ data[0][labels == part_id] = u_bbox[0, part_id][labels == part_id]
+ data[1][labels == part_id] = v_bbox[0, part_id][labels == part_id]
+ if confidences is None:
+ continue
+ for i, key in enumerate(confidences):
+ if resampled_confidence[key].size(1) != u_bbox.size(1):
+ # confidence is not part-based, don't try to fill it part by part
+ continue
+ data[2 + i][labels == part_id] = resampled_confidence[key][0, part_id][
+ labels == part_id
+ ]
+ if confidences is not None:
+ for i, key in enumerate(confidences):
+ if resampled_confidence[key].size(1) != u_bbox.size(1):
+ # confidence is not part-based, fill the data with the first channel
+ # (targeted for segmentation confidences that have only 1 channel)
+ data[2 + i] = resampled_confidence[key][0, 0]
+ return labels.unsqueeze(0), data
+
+
+class DensePoseResult(object):
+ def __init__(self, boxes_xywh, S, I, U, V):
+ self.results = []
+ self.boxes_xywh = boxes_xywh.cpu().tolist()
+ assert len(boxes_xywh.size()) == 2
+ assert boxes_xywh.size(1) == 4
+ for i, box_xywh in enumerate(boxes_xywh):
+ result_i = self._output_to_result(box_xywh, S[[i]], I[[i]], U[[i]], V[[i]])
+ result_numpy_i = result_i.cpu().numpy()
+ result_encoded_i = DensePoseResult.encode_png_data(result_numpy_i)
+ result_encoded_with_shape_i = (result_numpy_i.shape, result_encoded_i)
+ self.results.append(result_encoded_with_shape_i)
+
+ def __str__(self):
+ s = "DensePoseResult: N={} [{}]".format(
+ len(self.results), ", ".join([str(list(r[0])) for r in self.results])
+ )
+ return s
+
+ def _output_to_result(self, box_xywh, S, I, U, V):
+ # TODO: reuse resample_output_to_bbox
+ x, y, w, h = box_xywh
+ w = max(int(w), 1)
+ h = max(int(h), 1)
+ result = torch.zeros([3, h, w], dtype=torch.uint8, device=U.device)
+ assert (
+ len(S.size()) == 4
+ ), "AnnIndex tensor size should have {} " "dimensions but has {}".format(4, len(S.size()))
+ s_bbox = F.interpolate(S, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
+ assert (
+ len(I.size()) == 4
+ ), "IndexUV tensor size should have {} " "dimensions but has {}".format(4, len(S.size()))
+ i_bbox = (
+ F.interpolate(I, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
+ * (s_bbox > 0).long()
+ ).squeeze(0)
+ assert len(U.size()) == 4, "U tensor size should have {} " "dimensions but has {}".format(
+ 4, len(U.size())
+ )
+ u_bbox = F.interpolate(U, (h, w), mode="bilinear", align_corners=False)
+ assert len(V.size()) == 4, "V tensor size should have {} " "dimensions but has {}".format(
+ 4, len(V.size())
+ )
+ v_bbox = F.interpolate(V, (h, w), mode="bilinear", align_corners=False)
+ result[0] = i_bbox
+ for part_id in range(1, u_bbox.size(1)):
+ result[1][i_bbox == part_id] = (
+ (u_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8)
+ )
+ result[2][i_bbox == part_id] = (
+ (v_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8)
+ )
+ assert (
+ result.size(1) == h
+ ), "Results height {} should be equal" "to bounding box height {}".format(result.size(1), h)
+ assert (
+ result.size(2) == w
+ ), "Results width {} should be equal" "to bounding box width {}".format(result.size(2), w)
+ return result
+
+ @staticmethod
+ def encode_png_data(arr):
+ """
+ Encode array data as a PNG image using the highest compression rate
+ @param arr [in] Data stored in an array of size (3, M, N) of type uint8
+ @return Base64-encoded string containing PNG-compressed data
+ """
+ assert len(arr.shape) == 3, "Expected a 3D array as an input," " got a {0}D array".format(
+ len(arr.shape)
+ )
+ assert arr.shape[0] == 3, "Expected first array dimension of size 3," " got {0}".format(
+ arr.shape[0]
+ )
+ assert arr.dtype == np.uint8, "Expected an array of type np.uint8, " " got {0}".format(
+ arr.dtype
+ )
+ data = np.moveaxis(arr, 0, -1)
+ im = Image.fromarray(data)
+ fstream = BytesIO()
+ im.save(fstream, format="png", optimize=True)
+ s = base64.encodebytes(fstream.getvalue()).decode()
+ return s
+
+ @staticmethod
+ def decode_png_data(shape, s):
+ """
+ Decode array data from a string that contains PNG-compressed data
+ @param Base64-encoded string containing PNG-compressed data
+ @return Data stored in an array of size (3, M, N) of type uint8
+ """
+ fstream = BytesIO(base64.decodebytes(s.encode()))
+ im = Image.open(fstream)
+ data = np.moveaxis(np.array(im.getdata(), dtype=np.uint8), -1, 0)
+ return data.reshape(shape)
+
+ def __len__(self):
+ return len(self.results)
+
+ def __getitem__(self, item):
+ result_encoded = self.results[item]
+ bbox_xywh = self.boxes_xywh[item]
+ return result_encoded, bbox_xywh
+
+
+class DensePoseList(object):
+
+ _TORCH_DEVICE_CPU = torch.device("cpu")
+
+ def __init__(self, densepose_datas, boxes_xyxy_abs, image_size_hw, device=_TORCH_DEVICE_CPU):
+ assert len(densepose_datas) == len(
+ boxes_xyxy_abs
+ ), "Attempt to initialize DensePoseList with {} DensePose datas " "and {} boxes".format(
+ len(densepose_datas), len(boxes_xyxy_abs)
+ )
+ self.densepose_datas = []
+ for densepose_data in densepose_datas:
+ assert isinstance(densepose_data, DensePoseDataRelative) or densepose_data is None, (
+ "Attempt to initialize DensePoseList with DensePose datas "
+ "of type {}, expected DensePoseDataRelative".format(type(densepose_data))
+ )
+ densepose_data_ondevice = (
+ densepose_data.to(device) if densepose_data is not None else None
+ )
+ self.densepose_datas.append(densepose_data_ondevice)
+ self.boxes_xyxy_abs = boxes_xyxy_abs.to(device)
+ self.image_size_hw = image_size_hw
+ self.device = device
+
+ def to(self, device):
+ if self.device == device:
+ return self
+ return DensePoseList(self.densepose_datas, self.boxes_xyxy_abs, self.image_size_hw, device)
+
+ def __iter__(self):
+ return iter(self.densepose_datas)
+
+ def __len__(self):
+ return len(self.densepose_datas)
+
+ def __repr__(self):
+ s = self.__class__.__name__ + "("
+ s += "num_instances={}, ".format(len(self.densepose_datas))
+ s += "image_width={}, ".format(self.image_size_hw[1])
+ s += "image_height={})".format(self.image_size_hw[0])
+ return s
+
+ def __getitem__(self, item):
+ if isinstance(item, int):
+ densepose_data_rel = self.densepose_datas[item]
+ return densepose_data_rel
+ elif isinstance(item, slice):
+ densepose_datas_rel = self.densepose_datas[item]
+ boxes_xyxy_abs = self.boxes_xyxy_abs[item]
+ return DensePoseList(
+ densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
+ )
+ elif isinstance(item, torch.Tensor) and (item.dtype == torch.bool):
+ densepose_datas_rel = [self.densepose_datas[i] for i, x in enumerate(item) if x > 0]
+ boxes_xyxy_abs = self.boxes_xyxy_abs[item]
+ return DensePoseList(
+ densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
+ )
+ else:
+ densepose_datas_rel = [self.densepose_datas[i] for i in item]
+ boxes_xyxy_abs = self.boxes_xyxy_abs[item]
+ return DensePoseList(
+ densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
+ )
diff --git a/projects/DensePose/densepose/data/transform/__init__.py b/projects/DensePose/densepose/data/transform/__init__.py
new file mode 100644
index 0000000..555ee83
--- /dev/null
+++ b/projects/DensePose/densepose/data/transform/__init__.py
@@ -0,0 +1,3 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from .image import ImageResizeTransform
diff --git a/projects/DensePose/densepose/data/transform/image.py b/projects/DensePose/densepose/data/transform/image.py
new file mode 100644
index 0000000..ff9de52
--- /dev/null
+++ b/projects/DensePose/densepose/data/transform/image.py
@@ -0,0 +1,37 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import torch
+
+
+class ImageResizeTransform:
+ """
+ Transform that converts frames loaded from a dataset
+ (RGB data in NHWC channel order, typically uint8) to a format ready to be
+ consumed by DensePose training (BGR float32 data in NCHW channel order)
+ """
+
+ def __init__(self, min_size: int = 800, max_size: int = 1333):
+ self.min_size = min_size
+ self.max_size = max_size
+
+ def __call__(self, frames: torch.Tensor) -> torch.Tensor:
+ """
+ Args:
+ frames (torch.Tensor): tensor of size [N, H, W, 3] that contains
+ RGB data (typically in uint8)
+ Returns:
+ frames (torch.Tensor): tensor of size [N, 3, H1, W1] where
+ H1 and W1 are chosen to respect the specified min and max sizes
+ and preserve the original aspect ratio, the data channels
+ follow BGR order and the data type is `torch.float32`
+ """
+ frames = frames[..., [2, 1, 0]] # RGB -> BGR
+ frames = frames.permute(0, 3, 1, 2).float() # NHWC -> NCHW
+ # resize with min size
+ min_size = min(frames.shape[-2:])
+ max_size = max(frames.shape[-2:])
+ scale = min(self.min_size / min_size, self.max_size / max_size)
+ frames = torch.nn.functional.interpolate(
+ frames, scale_factor=scale, mode="bilinear", align_corners=False
+ )
+ return frames
diff --git a/projects/DensePose/densepose/data/utils.py b/projects/DensePose/densepose/data/utils.py
new file mode 100644
index 0000000..fc46ca7
--- /dev/null
+++ b/projects/DensePose/densepose/data/utils.py
@@ -0,0 +1,22 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import os
+from typing import Optional
+
+
+def is_relative_local_path(path: os.PathLike):
+ path_str = os.fsdecode(path)
+ return ("://" not in path_str) and not os.path.isabs(path)
+
+
+def maybe_prepend_base_path(base_path: Optional[os.PathLike], path: os.PathLike):
+ """
+ Prepends the provided path with a base path prefix if:
+ 1) base path is not None;
+ 2) path is a local path
+ """
+ if base_path is None:
+ return path
+ if is_relative_local_path(path):
+ return os.path.join(base_path, path)
+ return path
diff --git a/projects/DensePose/densepose/data/video/__init__.py b/projects/DensePose/densepose/data/video/__init__.py
new file mode 100644
index 0000000..13541ce
--- /dev/null
+++ b/projects/DensePose/densepose/data/video/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from .frame_selector import (
+ FrameSelectionStrategy,
+ RandomKFramesSelector,
+ FirstKFramesSelector,
+ LastKFramesSelector,
+ FrameTsList,
+ FrameSelector,
+)
+
+from .video_keyframe_dataset import (
+ VideoKeyframeDataset,
+ video_list_from_file,
+ list_keyframes,
+ read_keyframes,
+)
diff --git a/projects/DensePose/densepose/data/video/frame_selector.py b/projects/DensePose/densepose/data/video/frame_selector.py
new file mode 100644
index 0000000..408b877
--- /dev/null
+++ b/projects/DensePose/densepose/data/video/frame_selector.py
@@ -0,0 +1,87 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import random
+from collections.abc import Callable
+from enum import Enum
+from typing import Callable as TCallable
+from typing import List
+
+FrameTsList = List[int]
+FrameSelector = TCallable[[FrameTsList], FrameTsList]
+
+
+class FrameSelectionStrategy(Enum):
+ """
+ Frame selection strategy used with videos:
+ - "random_k": select k random frames
+ - "first_k": select k first frames
+ - "last_k": select k last frames
+ - "all": select all frames
+ """
+
+ # fmt: off
+ RANDOM_K = "random_k"
+ FIRST_K = "first_k"
+ LAST_K = "last_k"
+ ALL = "all"
+ # fmt: on
+
+
+class RandomKFramesSelector(Callable):
+ """
+ Selector that retains at most `k` random frames
+ """
+
+ def __init__(self, k: int):
+ self.k = k
+
+ def __call__(self, frame_tss: FrameTsList) -> FrameTsList:
+ """
+ Select `k` random frames
+
+ Args:
+ frames_tss (List[int]): timestamps of input frames
+ Returns:
+ List[int]: timestamps of selected frames
+ """
+ return random.sample(frame_tss, min(self.k, len(frame_tss)))
+
+
+class FirstKFramesSelector(Callable):
+ """
+ Selector that retains at most `k` first frames
+ """
+
+ def __init__(self, k: int):
+ self.k = k
+
+ def __call__(self, frame_tss: FrameTsList) -> FrameTsList:
+ """
+ Select `k` first frames
+
+ Args:
+ frames_tss (List[int]): timestamps of input frames
+ Returns:
+ List[int]: timestamps of selected frames
+ """
+ return frame_tss[: self.k]
+
+
+class LastKFramesSelector(Callable):
+ """
+ Selector that retains at most `k` last frames from video data
+ """
+
+ def __init__(self, k: int):
+ self.k = k
+
+ def __call__(self, frame_tss: FrameTsList) -> FrameTsList:
+ """
+ Select `k` last frames
+
+ Args:
+ frames_tss (List[int]): timestamps of input frames
+ Returns:
+ List[int]: timestamps of selected frames
+ """
+ return frame_tss[-self.k :]
diff --git a/projects/DensePose/densepose/data/video/video_keyframe_dataset.py b/projects/DensePose/densepose/data/video/video_keyframe_dataset.py
new file mode 100644
index 0000000..8efe575
--- /dev/null
+++ b/projects/DensePose/densepose/data/video/video_keyframe_dataset.py
@@ -0,0 +1,232 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import logging
+import numpy as np
+from typing import Callable, List, Optional
+import torch
+from fvcore.common.file_io import PathManager
+from torch.utils.data.dataset import Dataset
+
+import av
+
+from ..utils import maybe_prepend_base_path
+from .frame_selector import FrameSelector, FrameTsList
+
+FrameList = List[av.frame.Frame]
+FrameTransform = Callable[[torch.Tensor], torch.Tensor]
+
+
+def list_keyframes(video_fpath: str, video_stream_idx: int = 0) -> FrameTsList:
+ """
+ Traverses all keyframes of a video file. Returns a list of keyframe
+ timestamps. Timestamps are counts in timebase units.
+
+ Args:
+ video_fpath (str): Video file path
+ video_stream_idx (int): Video stream index (default: 0)
+ Returns:
+ List[int]: list of keyframe timestaps (timestamp is a count in timebase
+ units)
+ """
+ try:
+ with PathManager.open(video_fpath, "rb") as io:
+ container = av.open(io, mode="r")
+ stream = container.streams.video[video_stream_idx]
+ keyframes = []
+ pts = -1
+ # Note: even though we request forward seeks for keyframes, sometimes
+ # a keyframe in backwards direction is returned. We introduce tolerance
+ # as a max count of ignored backward seeks
+ tolerance_backward_seeks = 2
+ while True:
+ try:
+ container.seek(pts + 1, backward=False, any_frame=False, stream=stream)
+ except av.AVError as e:
+ # the exception occurs when the video length is exceeded,
+ # we then return whatever data we've already collected
+ logger = logging.getLogger(__name__)
+ logger.debug(
+ f"List keyframes: Error seeking video file {video_fpath}, "
+ f"video stream {video_stream_idx}, pts {pts + 1}, AV error: {e}"
+ )
+ return keyframes
+ except OSError as e:
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ f"List keyframes: Error seeking video file {video_fpath}, "
+ f"video stream {video_stream_idx}, pts {pts + 1}, OS error: {e}"
+ )
+ return []
+ packet = next(container.demux(video=video_stream_idx))
+ if packet.pts is not None and packet.pts <= pts:
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ f"Video file {video_fpath}, stream {video_stream_idx}: "
+ f"bad seek for packet {pts + 1} (got packet {packet.pts}), "
+ f"tolerance {tolerance_backward_seeks}."
+ )
+ tolerance_backward_seeks -= 1
+ if tolerance_backward_seeks == 0:
+ return []
+ pts += 1
+ continue
+ tolerance_backward_seeks = 2
+ pts = packet.pts
+ if pts is None:
+ return keyframes
+ if packet.is_keyframe:
+ keyframes.append(pts)
+ return keyframes
+ except OSError as e:
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ f"List keyframes: Error opening video file container {video_fpath}, " f"OS error: {e}"
+ )
+ except RuntimeError as e:
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ f"List keyframes: Error opening video file container {video_fpath}, "
+ f"Runtime error: {e}"
+ )
+ return []
+
+
+def read_keyframes(
+ video_fpath: str, keyframes: FrameTsList, video_stream_idx: int = 0
+) -> FrameList:
+ """
+ Reads keyframe data from a video file.
+
+ Args:
+ video_fpath (str): Video file path
+ keyframes (List[int]): List of keyframe timestamps (as counts in
+ timebase units to be used in container seek operations)
+ video_stream_idx (int): Video stream index (default: 0)
+ Returns:
+ List[Frame]: list of frames that correspond to the specified timestamps
+ """
+ try:
+ with PathManager.open(video_fpath, "rb") as io:
+ container = av.open(io)
+ stream = container.streams.video[video_stream_idx]
+ frames = []
+ for pts in keyframes:
+ try:
+ container.seek(pts, any_frame=False, stream=stream)
+ frame = next(container.decode(video=0))
+ frames.append(frame)
+ except av.AVError as e:
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ f"Read keyframes: Error seeking video file {video_fpath}, "
+ f"video stream {video_stream_idx}, pts {pts}, AV error: {e}"
+ )
+ container.close()
+ return frames
+ except OSError as e:
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ f"Read keyframes: Error seeking video file {video_fpath}, "
+ f"video stream {video_stream_idx}, pts {pts}, OS error: {e}"
+ )
+ container.close()
+ return frames
+ except StopIteration:
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ f"Read keyframes: Error decoding frame from {video_fpath}, "
+ f"video stream {video_stream_idx}, pts {pts}"
+ )
+ container.close()
+ return frames
+
+ container.close()
+ return frames
+ except OSError as e:
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ f"Read keyframes: Error opening video file container {video_fpath}, OS error: {e}"
+ )
+ except RuntimeError as e:
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ f"Read keyframes: Error opening video file container {video_fpath}, Runtime error: {e}"
+ )
+ return []
+
+
+def video_list_from_file(video_list_fpath: str, base_path: Optional[str] = None):
+ """
+ Create a list of paths to video files from a text file.
+
+ Args:
+ video_list_fpath (str): path to a plain text file with the list of videos
+ base_path (str): base path for entries from the video list (default: None)
+ """
+ video_list = []
+ with PathManager.open(video_list_fpath, "r") as io:
+ for line in io:
+ video_list.append(maybe_prepend_base_path(base_path, line.strip()))
+ return video_list
+
+
+class VideoKeyframeDataset(Dataset):
+ """
+ Dataset that provides keyframes for a set of videos.
+ """
+
+ _EMPTY_FRAMES = torch.empty((0, 3, 1, 1))
+
+ def __init__(
+ self,
+ video_list: List[str],
+ frame_selector: Optional[FrameSelector] = None,
+ transform: Optional[FrameTransform] = None,
+ ):
+ """
+ Dataset constructor
+
+ Args:
+ video_list (List[str]): list of paths to video files
+ frame_selector (Callable: KeyFrameList -> KeyFrameList):
+ selects keyframes to process, keyframes are given by
+ packet timestamps in timebase counts. If None, all keyframes
+ are selected (default: None)
+ transform (Callable: torch.Tensor -> torch.Tensor):
+ transforms a batch of RGB images (tensors of size [B, H, W, 3]),
+ returns a tensor of the same size. If None, no transform is
+ applied (default: None)
+
+ """
+ self.video_list = video_list
+ self.frame_selector = frame_selector
+ self.transform = transform
+
+ def __getitem__(self, idx: int) -> torch.Tensor:
+ """
+ Gets selected keyframes from a given video
+
+ Args:
+ idx (int): video index in the video list file
+ Returns:
+ frames (torch.Tensor): tensor of size [N, H, W, 3] or of size
+ defined by the transform that contains keyframes data
+ """
+ fpath = self.video_list[idx]
+ keyframes = list_keyframes(fpath)
+ if not keyframes:
+ return self._EMPTY_FRAMES
+ if self.frame_selector is not None:
+ keyframes = self.frame_selector(keyframes)
+ frames = read_keyframes(fpath, keyframes)
+ if not frames:
+ return self._EMPTY_FRAMES
+ frames = np.stack([frame.to_rgb().to_ndarray() for frame in frames])
+ frames = torch.as_tensor(frames, device=torch.device("cpu"))
+ if self.transform is not None:
+ frames = self.transform(frames)
+ return frames
+
+ def __len__(self):
+ return len(self.video_list)
diff --git a/projects/DensePose/densepose/densepose_coco_evaluation.py b/projects/DensePose/densepose/densepose_coco_evaluation.py
new file mode 100644
index 0000000..3faa0e5
--- /dev/null
+++ b/projects/DensePose/densepose/densepose_coco_evaluation.py
@@ -0,0 +1,1157 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# This is a modified version of cocoeval.py where we also have the densepose evaluation.
+
+__author__ = "tsungyi"
+
+import copy
+import datetime
+import logging
+import numpy as np
+import pickle
+import time
+from collections import defaultdict
+from enum import Enum
+from typing import Any, Dict, Tuple
+import scipy.spatial.distance as ssd
+from fvcore.common.file_io import PathManager
+from pycocotools import mask as maskUtils
+from scipy.io import loadmat
+from scipy.ndimage import zoom as spzoom
+
+from .data.structures import DensePoseDataRelative, DensePoseResult
+
+logger = logging.getLogger(__name__)
+
+
+class DensePoseEvalMode(str, Enum):
+ # use both masks and geodesic distances (GPS * IOU) to compute scores
+ GPSM = "gpsm"
+ # use only geodesic distances (GPS) to compute scores
+ GPS = "gps"
+ # use only masks (IOU) to compute scores
+ IOU = "iou"
+
+
+class DensePoseDataMode(str, Enum):
+ # use estimated IUV data (default mode)
+ IUV_DT = "iuvdt"
+ # use ground truth IUV data
+ IUV_GT = "iuvgt"
+ # use ground truth labels I and set UV to 0
+ I_GT_UV_0 = "igtuv0"
+ # use ground truth labels I and estimated UV coordinates
+ I_GT_UV_DT = "igtuvdt"
+ # use estimated labels I and set UV to 0
+ I_DT_UV_0 = "idtuv0"
+
+
+class DensePoseCocoEval(object):
+ # Interface for evaluating detection on the Microsoft COCO dataset.
+ #
+ # The usage for CocoEval is as follows:
+ # cocoGt=..., cocoDt=... # load dataset and results
+ # E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object
+ # E.params.recThrs = ...; # set parameters as desired
+ # E.evaluate(); # run per image evaluation
+ # E.accumulate(); # accumulate per image results
+ # E.summarize(); # display summary metrics of results
+ # For example usage see evalDemo.m and http://mscoco.org/.
+ #
+ # The evaluation parameters are as follows (defaults in brackets):
+ # imgIds - [all] N img ids to use for evaluation
+ # catIds - [all] K cat ids to use for evaluation
+ # iouThrs - [.5:.05:.95] T=10 IoU thresholds for evaluation
+ # recThrs - [0:.01:1] R=101 recall thresholds for evaluation
+ # areaRng - [...] A=4 object area ranges for evaluation
+ # maxDets - [1 10 100] M=3 thresholds on max detections per image
+ # iouType - ['segm'] set iouType to 'segm', 'bbox', 'keypoints' or 'densepose'
+ # iouType replaced the now DEPRECATED useSegm parameter.
+ # useCats - [1] if true use category labels for evaluation
+ # Note: if useCats=0 category labels are ignored as in proposal scoring.
+ # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified.
+ #
+ # evaluate(): evaluates detections on every image and every category and
+ # concats the results into the "evalImgs" with fields:
+ # dtIds - [1xD] id for each of the D detections (dt)
+ # gtIds - [1xG] id for each of the G ground truths (gt)
+ # dtMatches - [TxD] matching gt id at each IoU or 0
+ # gtMatches - [TxG] matching dt id at each IoU or 0
+ # dtScores - [1xD] confidence of each dt
+ # gtIgnore - [1xG] ignore flag for each gt
+ # dtIgnore - [TxD] ignore flag for each dt at each IoU
+ #
+ # accumulate(): accumulates the per-image, per-category evaluation
+ # results in "evalImgs" into the dictionary "eval" with fields:
+ # params - parameters used for evaluation
+ # date - date evaluation was performed
+ # counts - [T,R,K,A,M] parameter dimensions (see above)
+ # precision - [TxRxKxAxM] precision for every evaluation setting
+ # recall - [TxKxAxM] max recall for every evaluation setting
+ # Note: precision and recall==-1 for settings with no gt objects.
+ #
+ # See also coco, mask, pycocoDemo, pycocoEvalDemo
+ #
+ # Microsoft COCO Toolbox. version 2.0
+ # Data, paper, and tutorials available at: http://mscoco.org/
+ # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
+ # Licensed under the Simplified BSD License [see coco/license.txt]
+ def __init__(
+ self,
+ cocoGt=None,
+ cocoDt=None,
+ iouType: str = "densepose",
+ dpEvalMode: DensePoseEvalMode = DensePoseEvalMode.GPS,
+ dpDataMode: DensePoseDataMode = DensePoseDataMode.IUV_DT,
+ ):
+ """
+ Initialize CocoEval using coco APIs for gt and dt
+ :param cocoGt: coco object with ground truth annotations
+ :param cocoDt: coco object with detection results
+ :return: None
+ """
+ self.cocoGt = cocoGt # ground truth COCO API
+ self.cocoDt = cocoDt # detections COCO API
+ self._dpEvalMode = dpEvalMode
+ self._dpDataMode = dpDataMode
+ self.params = {} # evaluation parameters
+ self.evalImgs = defaultdict(list) # per-image per-category eval results [KxAxI]
+ self.eval = {} # accumulated evaluation results
+ self._gts = defaultdict(list) # gt for evaluation
+ self._dts = defaultdict(list) # dt for evaluation
+ self.params = Params(iouType=iouType) # parameters
+ self._paramsEval = {} # parameters for evaluation
+ self.stats = [] # result summarization
+ self.ious = {} # ious between all gts and dts
+ if cocoGt is not None:
+ self.params.imgIds = sorted(cocoGt.getImgIds())
+ self.params.catIds = sorted(cocoGt.getCatIds())
+ self.ignoreThrBB = 0.7
+ self.ignoreThrUV = 0.9
+
+ def _loadGEval(self):
+ smpl_subdiv_fpath = PathManager.get_local_path(
+ "https://dl.fbaipublicfiles.com/densepose/data/SMPL_subdiv.mat"
+ )
+ pdist_transform_fpath = PathManager.get_local_path(
+ "https://dl.fbaipublicfiles.com/densepose/data/SMPL_SUBDIV_TRANSFORM.mat"
+ )
+ pdist_matrix_fpath = PathManager.get_local_path(
+ "https://dl.fbaipublicfiles.com/densepose/data/Pdist_matrix.pkl", timeout_sec=120
+ )
+ SMPL_subdiv = loadmat(smpl_subdiv_fpath)
+ self.PDIST_transform = loadmat(pdist_transform_fpath)
+ self.PDIST_transform = self.PDIST_transform["index"].squeeze()
+ UV = np.array([SMPL_subdiv["U_subdiv"], SMPL_subdiv["V_subdiv"]]).squeeze()
+ ClosestVertInds = np.arange(UV.shape[1]) + 1
+ self.Part_UVs = []
+ self.Part_ClosestVertInds = []
+ for i in np.arange(24):
+ self.Part_UVs.append(UV[:, SMPL_subdiv["Part_ID_subdiv"].squeeze() == (i + 1)])
+ self.Part_ClosestVertInds.append(
+ ClosestVertInds[SMPL_subdiv["Part_ID_subdiv"].squeeze() == (i + 1)]
+ )
+
+ with open(pdist_matrix_fpath, "rb") as hFile:
+ arrays = pickle.load(hFile, encoding="latin1")
+ self.Pdist_matrix = arrays["Pdist_matrix"]
+ self.Part_ids = np.array(SMPL_subdiv["Part_ID_subdiv"].squeeze())
+ # Mean geodesic distances for parts.
+ self.Mean_Distances = np.array([0, 0.351, 0.107, 0.126, 0.237, 0.173, 0.142, 0.128, 0.150])
+ # Coarse Part labels.
+ self.CoarseParts = np.array(
+ [0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8]
+ )
+
+ def _prepare(self):
+ """
+ Prepare ._gts and ._dts for evaluation based on params
+ :return: None
+ """
+
+ def _toMask(anns, coco):
+ # modify ann['segmentation'] by reference
+ for ann in anns:
+ # safeguard for invalid segmentation annotation;
+ # annotations containing empty lists exist in the posetrack
+ # dataset. This is not a correct segmentation annotation
+ # in terms of COCO format; we need to deal with it somehow
+ segm = ann["segmentation"]
+ if type(segm) == list and len(segm) == 0:
+ ann["segmentation"] = None
+ continue
+ rle = coco.annToRLE(ann)
+ ann["segmentation"] = rle
+
+ def _getIgnoreRegion(iid, coco):
+ img = coco.imgs[iid]
+
+ if "ignore_regions_x" not in img.keys():
+ return None
+
+ if len(img["ignore_regions_x"]) == 0:
+ return None
+
+ rgns_merged = [
+ [v for xy in zip(region_x, region_y) for v in xy]
+ for region_x, region_y in zip(img["ignore_regions_x"], img["ignore_regions_y"])
+ ]
+ rles = maskUtils.frPyObjects(rgns_merged, img["height"], img["width"])
+ rle = maskUtils.merge(rles)
+ return maskUtils.decode(rle)
+
+ def _checkIgnore(dt, iregion):
+ if iregion is None:
+ return True
+
+ bb = np.array(dt["bbox"]).astype(np.int)
+ x1, y1, x2, y2 = bb[0], bb[1], bb[0] + bb[2], bb[1] + bb[3]
+ x2 = min([x2, iregion.shape[1]])
+ y2 = min([y2, iregion.shape[0]])
+
+ if bb[2] * bb[3] == 0:
+ return False
+
+ crop_iregion = iregion[y1:y2, x1:x2]
+
+ if crop_iregion.sum() == 0:
+ return True
+
+ if "densepose" not in dt.keys(): # filtering boxes
+ return crop_iregion.sum() / bb[2] / bb[3] < self.ignoreThrBB
+
+ # filtering UVs
+ ignoremask = np.require(crop_iregion, requirements=["F"])
+ mask = self._extract_mask(dt)
+ uvmask = np.require(np.asarray(mask > 0), dtype=np.uint8, requirements=["F"])
+ uvmask_ = maskUtils.encode(uvmask)
+ ignoremask_ = maskUtils.encode(ignoremask)
+ uviou = maskUtils.iou([uvmask_], [ignoremask_], [1])[0]
+ return uviou < self.ignoreThrUV
+
+ p = self.params
+
+ if p.useCats:
+ gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
+ dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
+ else:
+ gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
+ dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))
+
+ imns = self.cocoGt.loadImgs(p.imgIds)
+ self.size_mapping = {}
+ for im in imns:
+ self.size_mapping[im["id"]] = [im["height"], im["width"]]
+
+ # if iouType == 'uv', add point gt annotations
+ if p.iouType == "densepose":
+ self._loadGEval()
+
+ # convert ground truth to mask if iouType == 'segm'
+ if p.iouType == "segm":
+ _toMask(gts, self.cocoGt)
+ _toMask(dts, self.cocoDt)
+
+ # set ignore flag
+ for gt in gts:
+ gt["ignore"] = gt["ignore"] if "ignore" in gt else 0
+ gt["ignore"] = "iscrowd" in gt and gt["iscrowd"]
+ if p.iouType == "keypoints":
+ gt["ignore"] = (gt["num_keypoints"] == 0) or gt["ignore"]
+ if p.iouType == "densepose":
+ gt["ignore"] = ("dp_x" in gt) == 0
+ if p.iouType == "segm":
+ gt["ignore"] = gt["segmentation"] is None
+
+ self._gts = defaultdict(list) # gt for evaluation
+ self._dts = defaultdict(list) # dt for evaluation
+ self._igrgns = defaultdict(list)
+
+ for gt in gts:
+ iid = gt["image_id"]
+ if iid not in self._igrgns.keys():
+ self._igrgns[iid] = _getIgnoreRegion(iid, self.cocoGt)
+ if _checkIgnore(gt, self._igrgns[iid]):
+ self._gts[iid, gt["category_id"]].append(gt)
+ for dt in dts:
+ iid = dt["image_id"]
+ if (iid not in self._igrgns) or _checkIgnore(dt, self._igrgns[iid]):
+ self._dts[iid, dt["category_id"]].append(dt)
+
+ self.evalImgs = defaultdict(list) # per-image per-category evaluation results
+ self.eval = {} # accumulated evaluation results
+
+ def evaluate(self):
+ """
+ Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
+ :return: None
+ """
+ tic = time.time()
+ logger.info("Running per image DensePose evaluation... {}".format(self.params.iouType))
+ p = self.params
+ # add backward compatibility if useSegm is specified in params
+ if p.useSegm is not None:
+ p.iouType = "segm" if p.useSegm == 1 else "bbox"
+ logger.info("useSegm (deprecated) is not None. Running DensePose evaluation")
+ p.imgIds = list(np.unique(p.imgIds))
+ if p.useCats:
+ p.catIds = list(np.unique(p.catIds))
+ p.maxDets = sorted(p.maxDets)
+ self.params = p
+
+ self._prepare()
+ # loop through images, area range, max detection number
+ catIds = p.catIds if p.useCats else [-1]
+
+ if p.iouType in ["segm", "bbox"]:
+ computeIoU = self.computeIoU
+ elif p.iouType == "keypoints":
+ computeIoU = self.computeOks
+ elif p.iouType == "densepose":
+ computeIoU = self.computeOgps
+ if self._dpEvalMode == DensePoseEvalMode.GPSM:
+ self.real_ious = {
+ (imgId, catId): self.computeDPIoU(imgId, catId)
+ for imgId in p.imgIds
+ for catId in catIds
+ }
+
+ self.ious = {
+ (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds
+ }
+
+ evaluateImg = self.evaluateImg
+ maxDet = p.maxDets[-1]
+ self.evalImgs = [
+ evaluateImg(imgId, catId, areaRng, maxDet)
+ for catId in catIds
+ for areaRng in p.areaRng
+ for imgId in p.imgIds
+ ]
+ self._paramsEval = copy.deepcopy(self.params)
+ toc = time.time()
+ logger.info("DensePose evaluation DONE (t={:0.2f}s).".format(toc - tic))
+
+ def getDensePoseMask(self, polys):
+ maskGen = np.zeros([256, 256])
+ stop = min(len(polys) + 1, 15)
+ for i in range(1, stop):
+ if polys[i - 1]:
+ currentMask = maskUtils.decode(polys[i - 1])
+ maskGen[currentMask > 0] = i
+ return maskGen
+
+ def _generate_rlemask_on_image(self, mask, imgId, data):
+ bbox_xywh = np.array(data["bbox"])
+ x, y, w, h = bbox_xywh
+ im_h, im_w = self.size_mapping[imgId]
+ im_mask = np.zeros((im_h, im_w), dtype=np.uint8)
+ if mask is not None:
+ x0 = max(int(x), 0)
+ x1 = min(int(x + w), im_w, int(x) + mask.shape[1])
+ y0 = max(int(y), 0)
+ y1 = min(int(y + h), im_h, int(y) + mask.shape[0])
+ y = int(y)
+ x = int(x)
+ im_mask[y0:y1, x0:x1] = mask[y0 - y : y1 - y, x0 - x : x1 - x]
+ im_mask = np.require(np.asarray(im_mask > 0), dtype=np.uint8, requirements=["F"])
+ rle_mask = maskUtils.encode(np.array(im_mask[:, :, np.newaxis], order="F"))[0]
+ return rle_mask
+
+ def computeDPIoU(self, imgId, catId):
+ p = self.params
+ if p.useCats:
+ gt = self._gts[imgId, catId]
+ dt = self._dts[imgId, catId]
+ else:
+ gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
+ dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
+ if len(gt) == 0 and len(dt) == 0:
+ return []
+ inds = np.argsort([-d["score"] for d in dt], kind="mergesort")
+ dt = [dt[i] for i in inds]
+ if len(dt) > p.maxDets[-1]:
+ dt = dt[0 : p.maxDets[-1]]
+
+ gtmasks = []
+ for g in gt:
+ if DensePoseDataRelative.S_KEY in g:
+ # convert DensePose mask to a binary mask
+ mask = np.minimum(self.getDensePoseMask(g[DensePoseDataRelative.S_KEY]), 1.0)
+ _, _, w, h = g["bbox"]
+ scale_x = float(max(w, 1)) / mask.shape[1]
+ scale_y = float(max(h, 1)) / mask.shape[0]
+ mask = spzoom(mask, (scale_y, scale_x), order=1, prefilter=False)
+ mask = np.array(mask > 0.5, dtype=np.uint8)
+ rle_mask = self._generate_rlemask_on_image(mask, imgId, g)
+ elif "segmentation" in g:
+ segmentation = g["segmentation"]
+ if isinstance(segmentation, list) and segmentation:
+ # polygons
+ im_h, im_w = self.size_mapping[imgId]
+ rles = maskUtils.frPyObjects(segmentation, im_h, im_w)
+ rle_mask = maskUtils.merge(rles)
+ elif isinstance(segmentation, dict):
+ if isinstance(segmentation["counts"], list):
+ # uncompressed RLE
+ im_h, im_w = self.size_mapping[imgId]
+ rle_mask = maskUtils.frPyObjects(segmentation, im_h, im_w)
+ else:
+ # compressed RLE
+ rle_mask = segmentation
+ else:
+ rle_mask = self._generate_rlemask_on_image(None, imgId, g)
+ else:
+ rle_mask = self._generate_rlemask_on_image(None, imgId, g)
+ gtmasks.append(rle_mask)
+
+ dtmasks = []
+ for d in dt:
+ mask = self._extract_mask(d)
+ mask = np.require(np.asarray(mask > 0), dtype=np.uint8, requirements=["F"])
+ rle_mask = self._generate_rlemask_on_image(mask, imgId, d)
+ dtmasks.append(rle_mask)
+
+ # compute iou between each dt and gt region
+ iscrowd = [int(o["iscrowd"]) for o in gt]
+ iousDP = maskUtils.iou(dtmasks, gtmasks, iscrowd)
+ return iousDP
+
+ def computeIoU(self, imgId, catId):
+ p = self.params
+ if p.useCats:
+ gt = self._gts[imgId, catId]
+ dt = self._dts[imgId, catId]
+ else:
+ gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
+ dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
+ if len(gt) == 0 and len(dt) == 0:
+ return []
+ inds = np.argsort([-d["score"] for d in dt], kind="mergesort")
+ dt = [dt[i] for i in inds]
+ if len(dt) > p.maxDets[-1]:
+ dt = dt[0 : p.maxDets[-1]]
+
+ if p.iouType == "segm":
+ g = [g["segmentation"] for g in gt if g["segmentation"] is not None]
+ d = [d["segmentation"] for d in dt if d["segmentation"] is not None]
+ elif p.iouType == "bbox":
+ g = [g["bbox"] for g in gt]
+ d = [d["bbox"] for d in dt]
+ else:
+ raise Exception("unknown iouType for iou computation")
+
+ # compute iou between each dt and gt region
+ iscrowd = [int(o["iscrowd"]) for o in gt]
+ ious = maskUtils.iou(d, g, iscrowd)
+ return ious
+
+ def computeOks(self, imgId, catId):
+ p = self.params
+ # dimension here should be Nxm
+ gts = self._gts[imgId, catId]
+ dts = self._dts[imgId, catId]
+ inds = np.argsort([-d["score"] for d in dts], kind="mergesort")
+ dts = [dts[i] for i in inds]
+ if len(dts) > p.maxDets[-1]:
+ dts = dts[0 : p.maxDets[-1]]
+ # if len(gts) == 0 and len(dts) == 0:
+ if len(gts) == 0 or len(dts) == 0:
+ return []
+ ious = np.zeros((len(dts), len(gts)))
+ sigmas = (
+ np.array(
+ [
+ 0.26,
+ 0.25,
+ 0.25,
+ 0.35,
+ 0.35,
+ 0.79,
+ 0.79,
+ 0.72,
+ 0.72,
+ 0.62,
+ 0.62,
+ 1.07,
+ 1.07,
+ 0.87,
+ 0.87,
+ 0.89,
+ 0.89,
+ ]
+ )
+ / 10.0
+ )
+ vars = (sigmas * 2) ** 2
+ k = len(sigmas)
+ # compute oks between each detection and ground truth object
+ for j, gt in enumerate(gts):
+ # create bounds for ignore regions(double the gt bbox)
+ g = np.array(gt["keypoints"])
+ xg = g[0::3]
+ yg = g[1::3]
+ vg = g[2::3]
+ k1 = np.count_nonzero(vg > 0)
+ bb = gt["bbox"]
+ x0 = bb[0] - bb[2]
+ x1 = bb[0] + bb[2] * 2
+ y0 = bb[1] - bb[3]
+ y1 = bb[1] + bb[3] * 2
+ for i, dt in enumerate(dts):
+ d = np.array(dt["keypoints"])
+ xd = d[0::3]
+ yd = d[1::3]
+ if k1 > 0:
+ # measure the per-keypoint distance if keypoints visible
+ dx = xd - xg
+ dy = yd - yg
+ else:
+ # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
+ z = np.zeros(k)
+ dx = np.max((z, x0 - xd), axis=0) + np.max((z, xd - x1), axis=0)
+ dy = np.max((z, y0 - yd), axis=0) + np.max((z, yd - y1), axis=0)
+ e = (dx ** 2 + dy ** 2) / vars / (gt["area"] + np.spacing(1)) / 2
+ if k1 > 0:
+ e = e[vg > 0]
+ ious[i, j] = np.sum(np.exp(-e)) / e.shape[0]
+ return ious
+
+ def _extract_mask(self, dt: Dict[str, Any]) -> np.ndarray:
+ (densepose_shape, densepose_data_encoded), densepose_bbox_xywh = dt["densepose"]
+ densepose_data = DensePoseResult.decode_png_data(densepose_shape, densepose_data_encoded)
+ return densepose_data[0]
+
+ def _extract_iuv(
+ self, densepose_data: np.ndarray, py: np.ndarray, px: np.ndarray, gt: Dict[str, Any]
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+ """
+ Extract arrays of I, U and V values at given points as numpy arrays
+ given the data mode stored in self._dpDataMode
+ """
+ if self._dpDataMode == DensePoseDataMode.IUV_DT:
+ # estimated labels and UV (default)
+ ipoints = densepose_data[0, py, px]
+ upoints = densepose_data[1, py, px] / 255.0 # convert from uint8 by /255.
+ vpoints = densepose_data[2, py, px] / 255.0
+ elif self._dpDataMode == DensePoseDataMode.IUV_GT:
+ # ground truth
+ ipoints = np.array(gt["dp_I"])
+ upoints = np.array(gt["dp_U"])
+ vpoints = np.array(gt["dp_V"])
+ elif self._dpDataMode == DensePoseDataMode.I_GT_UV_0:
+ # ground truth labels, UV = 0
+ ipoints = np.array(gt["dp_I"])
+ upoints = upoints * 0.0
+ vpoints = vpoints * 0.0
+ elif self._dpDataMode == DensePoseDataMode.I_GT_UV_DT:
+ # ground truth labels, estimated UV
+ ipoints = np.array(gt["dp_I"])
+ upoints = densepose_data[1, py, px] / 255.0 # convert from uint8 by /255.
+ vpoints = densepose_data[2, py, px] / 255.0
+ elif self._dpDataMode == DensePoseDataMode.I_DT_UV_0:
+ # estimated labels, UV = 0
+ ipoints = densepose_data[0, py, px]
+ upoints = upoints * 0.0
+ vpoints = vpoints * 0.0
+ else:
+ raise ValueError(f"Unknown data mode: {self._dpDataMode}")
+ return ipoints, upoints, vpoints
+
+ def computeOgps(self, imgId, catId):
+ p = self.params
+ # dimension here should be Nxm
+ g = self._gts[imgId, catId]
+ d = self._dts[imgId, catId]
+ inds = np.argsort([-d_["score"] for d_ in d], kind="mergesort")
+ d = [d[i] for i in inds]
+ if len(d) > p.maxDets[-1]:
+ d = d[0 : p.maxDets[-1]]
+ # if len(gts) == 0 and len(dts) == 0:
+ if len(g) == 0 or len(d) == 0:
+ return []
+ ious = np.zeros((len(d), len(g)))
+ # compute opgs between each detection and ground truth object
+ # sigma = self.sigma #0.255 # dist = 0.3m corresponds to ogps = 0.5
+ # 1 # dist = 0.3m corresponds to ogps = 0.96
+ # 1.45 # dist = 1.7m (person height) corresponds to ogps = 0.5)
+ for j, gt in enumerate(g):
+ if not gt["ignore"]:
+ g_ = gt["bbox"]
+ for i, dt in enumerate(d):
+ #
+ dy = int(dt["bbox"][3])
+ dx = int(dt["bbox"][2])
+ dp_x = np.array(gt["dp_x"]) * g_[2] / 255.0
+ dp_y = np.array(gt["dp_y"]) * g_[3] / 255.0
+ py = (dp_y + g_[1] - dt["bbox"][1]).astype(np.int)
+ px = (dp_x + g_[0] - dt["bbox"][0]).astype(np.int)
+ #
+ pts = np.zeros(len(px))
+ pts[px >= dx] = -1
+ pts[py >= dy] = -1
+ pts[px < 0] = -1
+ pts[py < 0] = -1
+ if len(pts) < 1:
+ ogps = 0.0
+ elif np.max(pts) == -1:
+ ogps = 0.0
+ else:
+ px[pts == -1] = 0
+ py[pts == -1] = 0
+ (densepose_shape, densepose_data_encoded), densepose_bbox_xywh = dt[
+ "densepose"
+ ]
+ densepose_data = DensePoseResult.decode_png_data(
+ densepose_shape, densepose_data_encoded
+ )
+ assert densepose_data.shape[2] == dx, (
+ "DensePoseData width {} should be equal to "
+ "detection bounding box width {}".format(densepose_data.shape[2], dx)
+ )
+ assert densepose_data.shape[1] == dy, (
+ "DensePoseData height {} should be equal to "
+ "detection bounding box height {}".format(densepose_data.shape[1], dy)
+ )
+ ipoints, upoints, vpoints = self._extract_iuv(densepose_data, py, px, gt)
+ ipoints[pts == -1] = 0
+ # Find closest vertices in subsampled mesh.
+ cVerts, cVertsGT = self.findAllClosestVerts(gt, upoints, vpoints, ipoints)
+ # Get pairwise geodesic distances between gt and estimated mesh points.
+ dist = self.getDistances(cVertsGT, cVerts)
+ # Compute the Ogps measure.
+ # Find the mean geodesic normalization distance for
+ # each GT point, based on which part it is on.
+ Current_Mean_Distances = self.Mean_Distances[
+ self.CoarseParts[self.Part_ids[cVertsGT[cVertsGT > 0].astype(int) - 1]]
+ ]
+ # Compute gps
+ ogps_values = np.exp(-(dist ** 2) / (2 * (Current_Mean_Distances ** 2)))
+ #
+ if len(dist) > 0:
+ ogps = np.sum(ogps_values) / len(dist)
+ ious[i, j] = ogps
+
+ gbb = [gt["bbox"] for gt in g]
+ dbb = [dt["bbox"] for dt in d]
+
+ # compute iou between each dt and gt region
+ iscrowd = [int(o["iscrowd"]) for o in g]
+ ious_bb = maskUtils.iou(dbb, gbb, iscrowd)
+ return ious, ious_bb
+
+ def evaluateImg(self, imgId, catId, aRng, maxDet):
+ """
+ perform evaluation for single category and image
+ :return: dict (single image results)
+ """
+
+ p = self.params
+ if p.useCats:
+ gt = self._gts[imgId, catId]
+ dt = self._dts[imgId, catId]
+ else:
+ gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
+ dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
+ if len(gt) == 0 and len(dt) == 0:
+ return None
+
+ for g in gt:
+ # g['_ignore'] = g['ignore']
+ if g["ignore"] or (g["area"] < aRng[0] or g["area"] > aRng[1]):
+ g["_ignore"] = True
+ else:
+ g["_ignore"] = False
+
+ # sort dt highest score first, sort gt ignore last
+ gtind = np.argsort([g["_ignore"] for g in gt], kind="mergesort")
+ gt = [gt[i] for i in gtind]
+ dtind = np.argsort([-d["score"] for d in dt], kind="mergesort")
+ dt = [dt[i] for i in dtind[0:maxDet]]
+ iscrowd = [int(o["iscrowd"]) for o in gt]
+ # load computed ious
+ if p.iouType == "densepose":
+ # print('Checking the length', len(self.ious[imgId, catId]))
+ # if len(self.ious[imgId, catId]) == 0:
+ # print(self.ious[imgId, catId])
+ ious = (
+ self.ious[imgId, catId][0][:, gtind]
+ if len(self.ious[imgId, catId]) > 0
+ else self.ious[imgId, catId]
+ )
+ ioubs = (
+ self.ious[imgId, catId][1][:, gtind]
+ if len(self.ious[imgId, catId]) > 0
+ else self.ious[imgId, catId]
+ )
+ if self._dpEvalMode == DensePoseEvalMode.GPSM:
+ iousM = (
+ self.real_ious[imgId, catId][:, gtind]
+ if len(self.real_ious[imgId, catId]) > 0
+ else self.real_ious[imgId, catId]
+ )
+ else:
+ ious = (
+ self.ious[imgId, catId][:, gtind]
+ if len(self.ious[imgId, catId]) > 0
+ else self.ious[imgId, catId]
+ )
+
+ T = len(p.iouThrs)
+ G = len(gt)
+ D = len(dt)
+ gtm = np.zeros((T, G))
+ dtm = np.zeros((T, D))
+ gtIg = np.array([g["_ignore"] for g in gt])
+ dtIg = np.zeros((T, D))
+ if np.all(gtIg) and p.iouType == "densepose":
+ dtIg = np.logical_or(dtIg, True)
+
+ if len(ious) > 0: # and not p.iouType == 'densepose':
+ for tind, t in enumerate(p.iouThrs):
+ for dind, d in enumerate(dt):
+ # information about best match so far (m=-1 -> unmatched)
+ iou = min([t, 1 - 1e-10])
+ m = -1
+ for gind, _g in enumerate(gt):
+ # if this gt already matched, and not a crowd, continue
+ if gtm[tind, gind] > 0 and not iscrowd[gind]:
+ continue
+ # if dt matched to reg gt, and on ignore gt, stop
+ if m > -1 and gtIg[m] == 0 and gtIg[gind] == 1:
+ break
+ if p.iouType == "densepose":
+ if self._dpEvalMode == DensePoseEvalMode.GPSM:
+ new_iou = np.sqrt(iousM[dind, gind] * ious[dind, gind])
+ elif self._dpEvalMode == DensePoseEvalMode.IOU:
+ new_iou = iousM[dind, gind]
+ elif self._dpEvalMode == DensePoseEvalMode.GPS:
+ new_iou = ious[dind, gind]
+ else:
+ new_iou = ious[dind, gind]
+ if new_iou < iou:
+ continue
+ if new_iou == 0.0:
+ continue
+ # if match successful and best so far, store appropriately
+ iou = new_iou
+ m = gind
+ # if match made store id of match for both dt and gt
+ if m == -1:
+ continue
+ dtIg[tind, dind] = gtIg[m]
+ dtm[tind, dind] = gt[m]["id"]
+ gtm[tind, m] = d["id"]
+
+ if p.iouType == "densepose":
+ if not len(ioubs) == 0:
+ for dind, d in enumerate(dt):
+ # information about best match so far (m=-1 -> unmatched)
+ if dtm[tind, dind] == 0:
+ ioub = 0.8
+ m = -1
+ for gind, _g in enumerate(gt):
+ # if this gt already matched, and not a crowd, continue
+ if gtm[tind, gind] > 0 and not iscrowd[gind]:
+ continue
+ # continue to next gt unless better match made
+ if ioubs[dind, gind] < ioub:
+ continue
+ # if match successful and best so far, store appropriately
+ ioub = ioubs[dind, gind]
+ m = gind
+ # if match made store id of match for both dt and gt
+ if m > -1:
+ dtIg[:, dind] = gtIg[m]
+ if gtIg[m]:
+ dtm[tind, dind] = gt[m]["id"]
+ gtm[tind, m] = d["id"]
+ # set unmatched detections outside of area range to ignore
+ a = np.array([d["area"] < aRng[0] or d["area"] > aRng[1] for d in dt]).reshape((1, len(dt)))
+ dtIg = np.logical_or(dtIg, np.logical_and(dtm == 0, np.repeat(a, T, 0)))
+ # store results for given image and category
+ # print('Done with the function', len(self.ious[imgId, catId]))
+ return {
+ "image_id": imgId,
+ "category_id": catId,
+ "aRng": aRng,
+ "maxDet": maxDet,
+ "dtIds": [d["id"] for d in dt],
+ "gtIds": [g["id"] for g in gt],
+ "dtMatches": dtm,
+ "gtMatches": gtm,
+ "dtScores": [d["score"] for d in dt],
+ "gtIgnore": gtIg,
+ "dtIgnore": dtIg,
+ }
+
+ def accumulate(self, p=None):
+ """
+ Accumulate per image evaluation results and store the result in self.eval
+ :param p: input params for evaluation
+ :return: None
+ """
+ logger.info("Accumulating evaluation results...")
+ tic = time.time()
+ if not self.evalImgs:
+ logger.info("Please run evaluate() first")
+ # allows input customized parameters
+ if p is None:
+ p = self.params
+ p.catIds = p.catIds if p.useCats == 1 else [-1]
+ T = len(p.iouThrs)
+ R = len(p.recThrs)
+ K = len(p.catIds) if p.useCats else 1
+ A = len(p.areaRng)
+ M = len(p.maxDets)
+ precision = -(np.ones((T, R, K, A, M))) # -1 for the precision of absent categories
+ recall = -(np.ones((T, K, A, M)))
+
+ # create dictionary for future indexing
+ logger.info("Categories: {}".format(p.catIds))
+ _pe = self._paramsEval
+ catIds = _pe.catIds if _pe.useCats else [-1]
+ setK = set(catIds)
+ setA = set(map(tuple, _pe.areaRng))
+ setM = set(_pe.maxDets)
+ setI = set(_pe.imgIds)
+ # get inds to evaluate
+ k_list = [n for n, k in enumerate(p.catIds) if k in setK]
+ m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
+ a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
+ i_list = [n for n, i in enumerate(p.imgIds) if i in setI]
+ I0 = len(_pe.imgIds)
+ A0 = len(_pe.areaRng)
+ # retrieve E at each category, area range, and max number of detections
+ for k, k0 in enumerate(k_list):
+ Nk = k0 * A0 * I0
+ for a, a0 in enumerate(a_list):
+ Na = a0 * I0
+ for m, maxDet in enumerate(m_list):
+ E = [self.evalImgs[Nk + Na + i] for i in i_list]
+ E = [e for e in E if e is not None]
+ if len(E) == 0:
+ continue
+ dtScores = np.concatenate([e["dtScores"][0:maxDet] for e in E])
+
+ # different sorting method generates slightly different results.
+ # mergesort is used to be consistent as Matlab implementation.
+ inds = np.argsort(-dtScores, kind="mergesort")
+
+ dtm = np.concatenate([e["dtMatches"][:, 0:maxDet] for e in E], axis=1)[:, inds]
+ dtIg = np.concatenate([e["dtIgnore"][:, 0:maxDet] for e in E], axis=1)[:, inds]
+ gtIg = np.concatenate([e["gtIgnore"] for e in E])
+ npig = np.count_nonzero(gtIg == 0)
+ if npig == 0:
+ continue
+ tps = np.logical_and(dtm, np.logical_not(dtIg))
+ fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg))
+ tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
+ fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
+ for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
+ tp = np.array(tp)
+ fp = np.array(fp)
+ nd = len(tp)
+ rc = tp / npig
+ pr = tp / (fp + tp + np.spacing(1))
+ q = np.zeros((R,))
+
+ if nd:
+ recall[t, k, a, m] = rc[-1]
+ else:
+ recall[t, k, a, m] = 0
+
+ # numpy is slow without cython optimization for accessing elements
+ # use python array gets significant speed improvement
+ pr = pr.tolist()
+ q = q.tolist()
+
+ for i in range(nd - 1, 0, -1):
+ if pr[i] > pr[i - 1]:
+ pr[i - 1] = pr[i]
+
+ inds = np.searchsorted(rc, p.recThrs, side="left")
+ try:
+ for ri, pi in enumerate(inds):
+ q[ri] = pr[pi]
+ except Exception:
+ pass
+ precision[t, :, k, a, m] = np.array(q)
+ logger.info(
+ "Final: max precision {}, min precision {}".format(np.max(precision), np.min(precision))
+ )
+ self.eval = {
+ "params": p,
+ "counts": [T, R, K, A, M],
+ "date": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+ "precision": precision,
+ "recall": recall,
+ }
+ toc = time.time()
+ logger.info("DONE (t={:0.2f}s).".format(toc - tic))
+
+ def summarize(self):
+ """
+ Compute and display summary metrics for evaluation results.
+ Note this function can *only* be applied on the default parameter setting
+ """
+
+ def _summarize(ap=1, iouThr=None, areaRng="all", maxDets=100):
+ p = self.params
+ iStr = " {:<18} {} @[ {}={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}"
+ titleStr = "Average Precision" if ap == 1 else "Average Recall"
+ typeStr = "(AP)" if ap == 1 else "(AR)"
+ measure = "IoU"
+ if self.params.iouType == "keypoints":
+ measure = "OKS"
+ elif self.params.iouType == "densepose":
+ measure = "OGPS"
+ iouStr = (
+ "{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1])
+ if iouThr is None
+ else "{:0.2f}".format(iouThr)
+ )
+
+ aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
+ mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
+ if ap == 1:
+ # dimension of precision: [TxRxKxAxM]
+ s = self.eval["precision"]
+ # IoU
+ if iouThr is not None:
+ t = np.where(np.abs(iouThr - p.iouThrs) < 0.001)[0]
+ s = s[t]
+ s = s[:, :, :, aind, mind]
+ else:
+ # dimension of recall: [TxKxAxM]
+ s = self.eval["recall"]
+ if iouThr is not None:
+ t = np.where(np.abs(iouThr - p.iouThrs) < 0.001)[0]
+ s = s[t]
+ s = s[:, :, aind, mind]
+ if len(s[s > -1]) == 0:
+ mean_s = -1
+ else:
+ mean_s = np.mean(s[s > -1])
+ logger.info(iStr.format(titleStr, typeStr, measure, iouStr, areaRng, maxDets, mean_s))
+ return mean_s
+
+ def _summarizeDets():
+ stats = np.zeros((12,))
+ stats[0] = _summarize(1)
+ stats[1] = _summarize(1, iouThr=0.5, maxDets=self.params.maxDets[2])
+ stats[2] = _summarize(1, iouThr=0.75, maxDets=self.params.maxDets[2])
+ stats[3] = _summarize(1, areaRng="small", maxDets=self.params.maxDets[2])
+ stats[4] = _summarize(1, areaRng="medium", maxDets=self.params.maxDets[2])
+ stats[5] = _summarize(1, areaRng="large", maxDets=self.params.maxDets[2])
+ stats[6] = _summarize(0, maxDets=self.params.maxDets[0])
+ stats[7] = _summarize(0, maxDets=self.params.maxDets[1])
+ stats[8] = _summarize(0, maxDets=self.params.maxDets[2])
+ stats[9] = _summarize(0, areaRng="small", maxDets=self.params.maxDets[2])
+ stats[10] = _summarize(0, areaRng="medium", maxDets=self.params.maxDets[2])
+ stats[11] = _summarize(0, areaRng="large", maxDets=self.params.maxDets[2])
+ return stats
+
+ def _summarizeKps():
+ stats = np.zeros((10,))
+ stats[0] = _summarize(1, maxDets=20)
+ stats[1] = _summarize(1, maxDets=20, iouThr=0.5)
+ stats[2] = _summarize(1, maxDets=20, iouThr=0.75)
+ stats[3] = _summarize(1, maxDets=20, areaRng="medium")
+ stats[4] = _summarize(1, maxDets=20, areaRng="large")
+ stats[5] = _summarize(0, maxDets=20)
+ stats[6] = _summarize(0, maxDets=20, iouThr=0.5)
+ stats[7] = _summarize(0, maxDets=20, iouThr=0.75)
+ stats[8] = _summarize(0, maxDets=20, areaRng="medium")
+ stats[9] = _summarize(0, maxDets=20, areaRng="large")
+ return stats
+
+ def _summarizeUvs():
+ stats = [_summarize(1, maxDets=self.params.maxDets[0])]
+ min_threshold = self.params.iouThrs.min()
+ if min_threshold <= 0.201:
+ stats += [_summarize(1, maxDets=self.params.maxDets[0], iouThr=0.2)]
+ if min_threshold <= 0.301:
+ stats += [_summarize(1, maxDets=self.params.maxDets[0], iouThr=0.3)]
+ if min_threshold <= 0.401:
+ stats += [_summarize(1, maxDets=self.params.maxDets[0], iouThr=0.4)]
+ stats += [
+ _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.5),
+ _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.75),
+ _summarize(1, maxDets=self.params.maxDets[0], areaRng="medium"),
+ _summarize(1, maxDets=self.params.maxDets[0], areaRng="large"),
+ _summarize(0, maxDets=self.params.maxDets[0]),
+ _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.5),
+ _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.75),
+ _summarize(0, maxDets=self.params.maxDets[0], areaRng="medium"),
+ _summarize(0, maxDets=self.params.maxDets[0], areaRng="large"),
+ ]
+ return np.array(stats)
+
+ def _summarizeUvsOld():
+ stats = np.zeros((18,))
+ stats[0] = _summarize(1, maxDets=self.params.maxDets[0])
+ stats[1] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.5)
+ stats[2] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.55)
+ stats[3] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.60)
+ stats[4] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.65)
+ stats[5] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.70)
+ stats[6] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.75)
+ stats[7] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.80)
+ stats[8] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.85)
+ stats[9] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.90)
+ stats[10] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.95)
+ stats[11] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="medium")
+ stats[12] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="large")
+ stats[13] = _summarize(0, maxDets=self.params.maxDets[0])
+ stats[14] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.5)
+ stats[15] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.75)
+ stats[16] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="medium")
+ stats[17] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="large")
+ return stats
+
+ if not self.eval:
+ raise Exception("Please run accumulate() first")
+ iouType = self.params.iouType
+ if iouType in ["segm", "bbox"]:
+ summarize = _summarizeDets
+ elif iouType in ["keypoints"]:
+ summarize = _summarizeKps
+ elif iouType in ["densepose"]:
+ summarize = _summarizeUvs
+ self.stats = summarize()
+
+ def __str__(self):
+ self.summarize()
+
+ # ================ functions for dense pose ==============================
+ def findAllClosestVerts(self, gt, U_points, V_points, Index_points):
+ #
+ I_gt = np.array(gt["dp_I"])
+ U_gt = np.array(gt["dp_U"])
+ V_gt = np.array(gt["dp_V"])
+ #
+ # print(I_gt)
+ #
+ ClosestVerts = np.ones(Index_points.shape) * -1
+ for i in np.arange(24):
+ #
+ if sum(Index_points == (i + 1)) > 0:
+ UVs = np.array(
+ [U_points[Index_points == (i + 1)], V_points[Index_points == (i + 1)]]
+ )
+ Current_Part_UVs = self.Part_UVs[i]
+ Current_Part_ClosestVertInds = self.Part_ClosestVertInds[i]
+ D = ssd.cdist(Current_Part_UVs.transpose(), UVs.transpose()).squeeze()
+ ClosestVerts[Index_points == (i + 1)] = Current_Part_ClosestVertInds[
+ np.argmin(D, axis=0)
+ ]
+ #
+ ClosestVertsGT = np.ones(Index_points.shape) * -1
+ for i in np.arange(24):
+ if sum(I_gt == (i + 1)) > 0:
+ UVs = np.array([U_gt[I_gt == (i + 1)], V_gt[I_gt == (i + 1)]])
+ Current_Part_UVs = self.Part_UVs[i]
+ Current_Part_ClosestVertInds = self.Part_ClosestVertInds[i]
+ D = ssd.cdist(Current_Part_UVs.transpose(), UVs.transpose()).squeeze()
+ ClosestVertsGT[I_gt == (i + 1)] = Current_Part_ClosestVertInds[np.argmin(D, axis=0)]
+ #
+ return ClosestVerts, ClosestVertsGT
+
+ def getDistances(self, cVertsGT, cVerts):
+
+ ClosestVertsTransformed = self.PDIST_transform[cVerts.astype(int) - 1]
+ ClosestVertsGTTransformed = self.PDIST_transform[cVertsGT.astype(int) - 1]
+ #
+ ClosestVertsTransformed[cVerts < 0] = 0
+ ClosestVertsGTTransformed[cVertsGT < 0] = 0
+ #
+ cVertsGT = ClosestVertsGTTransformed
+ cVerts = ClosestVertsTransformed
+ #
+ n = 27554
+ dists = []
+ for d in range(len(cVertsGT)):
+ if cVertsGT[d] > 0:
+ if cVerts[d] > 0:
+ i = cVertsGT[d] - 1
+ j = cVerts[d] - 1
+ if j == i:
+ dists.append(0)
+ elif j > i:
+ ccc = i
+ i = j
+ j = ccc
+ i = n - i - 1
+ j = n - j - 1
+ k = (n * (n - 1) / 2) - (n - i) * ((n - i) - 1) / 2 + j - i - 1
+ k = (n * n - n) / 2 - k - 1
+ dists.append(self.Pdist_matrix[int(k)][0])
+ else:
+ i = n - i - 1
+ j = n - j - 1
+ k = (n * (n - 1) / 2) - (n - i) * ((n - i) - 1) / 2 + j - i - 1
+ k = (n * n - n) / 2 - k - 1
+ dists.append(self.Pdist_matrix[int(k)][0])
+ else:
+ dists.append(np.inf)
+ return np.atleast_1d(np.array(dists).squeeze())
+
+
+class Params:
+ """
+ Params for coco evaluation api
+ """
+
+ def setDetParams(self):
+ self.imgIds = []
+ self.catIds = []
+ # np.arange causes trouble. the data point on arange is slightly larger than the true value
+ self.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True)
+ self.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True)
+ self.maxDets = [1, 10, 100]
+ self.areaRng = [
+ [0 ** 2, 1e5 ** 2],
+ [0 ** 2, 32 ** 2],
+ [32 ** 2, 96 ** 2],
+ [96 ** 2, 1e5 ** 2],
+ ]
+ self.areaRngLbl = ["all", "small", "medium", "large"]
+ self.useCats = 1
+
+ def setKpParams(self):
+ self.imgIds = []
+ self.catIds = []
+ # np.arange causes trouble. the data point on arange is slightly larger than the true value
+ self.iouThrs = np.linspace(0.5, 0.95, np.round((0.95 - 0.5) / 0.05) + 1, endpoint=True)
+ self.recThrs = np.linspace(0.0, 1.00, np.round((1.00 - 0.0) / 0.01) + 1, endpoint=True)
+ self.maxDets = [20]
+ self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
+ self.areaRngLbl = ["all", "medium", "large"]
+ self.useCats = 1
+
+ def setUvParams(self):
+ self.imgIds = []
+ self.catIds = []
+ self.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True)
+ self.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True)
+ self.maxDets = [20]
+ self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
+ self.areaRngLbl = ["all", "medium", "large"]
+ self.useCats = 1
+
+ def __init__(self, iouType="segm"):
+ if iouType == "segm" or iouType == "bbox":
+ self.setDetParams()
+ elif iouType == "keypoints":
+ self.setKpParams()
+ elif iouType == "densepose":
+ self.setUvParams()
+ else:
+ raise Exception("iouType not supported")
+ self.iouType = iouType
+ # useSegm is deprecated
+ self.useSegm = None
diff --git a/projects/DensePose/densepose/engine/__init__.py b/projects/DensePose/densepose/engine/__init__.py
new file mode 100644
index 0000000..d73edeb
--- /dev/null
+++ b/projects/DensePose/densepose/engine/__init__.py
@@ -0,0 +1,3 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from .trainer import Trainer
diff --git a/projects/DensePose/densepose/engine/trainer.py b/projects/DensePose/densepose/engine/trainer.py
new file mode 100644
index 0000000..1d1d270
--- /dev/null
+++ b/projects/DensePose/densepose/engine/trainer.py
@@ -0,0 +1,118 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import logging
+import os
+from collections import OrderedDict
+
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import CfgNode
+from detectron2.engine import DefaultTrainer
+from detectron2.evaluation import COCOEvaluator, DatasetEvaluators
+from detectron2.utils.events import EventWriter, get_event_storage
+
+from densepose import (
+ DensePoseCOCOEvaluator,
+ DensePoseDatasetMapperTTA,
+ DensePoseGeneralizedRCNNWithTTA,
+ load_from_cfg,
+)
+from densepose.data import (
+ DatasetMapper,
+ build_combined_loader,
+ build_detection_test_loader,
+ build_detection_train_loader,
+ build_inference_based_loaders,
+ has_inference_based_loaders,
+)
+
+
+class SampleCountingLoader:
+ def __init__(self, loader):
+ self.loader = loader
+
+ def __iter__(self):
+ it = iter(self.loader)
+ storage = get_event_storage()
+ while True:
+ try:
+ batch = next(it)
+ num_inst_per_dataset = {}
+ for data in batch:
+ dataset_name = data["dataset"]
+ if dataset_name not in num_inst_per_dataset:
+ num_inst_per_dataset[dataset_name] = 0
+ num_inst = len(data["instances"])
+ num_inst_per_dataset[dataset_name] += num_inst
+ for dataset_name in num_inst_per_dataset:
+ storage.put_scalar(f"batch/{dataset_name}", num_inst_per_dataset[dataset_name])
+ yield batch
+ except StopIteration:
+ break
+
+
+class SampleCountMetricPrinter(EventWriter):
+ def __init__(self):
+ self.logger = logging.getLogger(__name__)
+
+ def write(self):
+ storage = get_event_storage()
+ batch_stats_strs = []
+ for key, buf in storage.histories().items():
+ if key.startswith("batch/"):
+ batch_stats_strs.append(f"{key} {buf.avg(20)}")
+ self.logger.info(", ".join(batch_stats_strs))
+
+
+class Trainer(DefaultTrainer):
+ @classmethod
+ def build_evaluator(cls, cfg: CfgNode, dataset_name, output_folder=None):
+ if output_folder is None:
+ output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
+ evaluators = [COCOEvaluator(dataset_name, cfg, True, output_folder)]
+ if cfg.MODEL.DENSEPOSE_ON:
+ evaluators.append(DensePoseCOCOEvaluator(dataset_name, True, output_folder))
+ return DatasetEvaluators(evaluators)
+
+ @classmethod
+ def build_test_loader(cls, cfg: CfgNode, dataset_name):
+ return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False))
+
+ @classmethod
+ def build_train_loader(cls, cfg: CfgNode):
+ data_loader = build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True))
+ if not has_inference_based_loaders(cfg):
+ return data_loader
+ model = cls.build_model(cfg)
+ model.to(cfg.BOOTSTRAP_MODEL.DEVICE)
+ DetectionCheckpointer(model).resume_or_load(cfg.BOOTSTRAP_MODEL.WEIGHTS, resume=False)
+ inference_based_loaders, ratios = build_inference_based_loaders(cfg, model)
+ loaders = [data_loader] + inference_based_loaders
+ ratios = [1.0] + ratios
+ combined_data_loader = build_combined_loader(cfg, loaders, ratios)
+ sample_counting_loader = SampleCountingLoader(combined_data_loader)
+ return sample_counting_loader
+
+ def build_writers(self):
+ writers = super().build_writers()
+ writers.append(SampleCountMetricPrinter())
+ return writers
+
+ @classmethod
+ def test_with_TTA(cls, cfg: CfgNode, model):
+ logger = logging.getLogger("detectron2.trainer")
+ # In the end of training, run an evaluation with TTA
+ # Only support some R-CNN models.
+ logger.info("Running inference with test-time augmentation ...")
+ transform_data = load_from_cfg(cfg)
+ model = DensePoseGeneralizedRCNNWithTTA(
+ cfg, model, transform_data, DensePoseDatasetMapperTTA(cfg)
+ )
+ evaluators = [
+ cls.build_evaluator(
+ cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
+ )
+ for name in cfg.DATASETS.TEST
+ ]
+ res = cls.test(cfg, model, evaluators)
+ res = OrderedDict({k + "_TTA": v for k, v in res.items()})
+ return res
diff --git a/projects/DensePose/densepose/evaluator.py b/projects/DensePose/densepose/evaluator.py
new file mode 100644
index 0000000..da78f76
--- /dev/null
+++ b/projects/DensePose/densepose/evaluator.py
@@ -0,0 +1,224 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import contextlib
+import copy
+import io
+import itertools
+import logging
+import numpy as np
+import os
+from collections import OrderedDict
+import pycocotools.mask as mask_utils
+import torch
+from fvcore.common.file_io import PathManager
+from pycocotools.coco import COCO
+
+from detectron2.data import MetadataCatalog
+from detectron2.evaluation import DatasetEvaluator
+from detectron2.structures import BoxMode
+from detectron2.utils.comm import all_gather, is_main_process, synchronize
+from detectron2.utils.logger import create_small_table
+
+from .data.samplers import densepose_to_mask
+from .densepose_coco_evaluation import DensePoseCocoEval, DensePoseEvalMode
+
+
+class DensePoseCOCOEvaluator(DatasetEvaluator):
+ def __init__(self, dataset_name, distributed, output_dir=None):
+ self._distributed = distributed
+ self._output_dir = output_dir
+
+ self._cpu_device = torch.device("cpu")
+ self._logger = logging.getLogger(__name__)
+
+ self._metadata = MetadataCatalog.get(dataset_name)
+ self._min_threshold = 0.5
+ json_file = PathManager.get_local_path(self._metadata.json_file)
+ with contextlib.redirect_stdout(io.StringIO()):
+ self._coco_api = COCO(json_file)
+
+ def reset(self):
+ self._predictions = []
+
+ def process(self, inputs, outputs):
+ """
+ Args:
+ inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
+ It is a list of dict. Each dict corresponds to an image and
+ contains keys like "height", "width", "file_name", "image_id".
+ outputs: the outputs of a COCO model. It is a list of dicts with key
+ "instances" that contains :class:`Instances`.
+ The :class:`Instances` object needs to have `densepose` field.
+ """
+ for input, output in zip(inputs, outputs):
+ instances = output["instances"].to(self._cpu_device)
+
+ json_results = prediction_to_json(instances, input["image_id"])
+ self._predictions.extend(json_results)
+
+ def evaluate(self):
+ if self._distributed:
+ synchronize()
+ predictions = all_gather(self._predictions)
+ predictions = list(itertools.chain(*predictions))
+ if not is_main_process():
+ return
+ else:
+ predictions = self._predictions
+
+ return copy.deepcopy(self._eval_predictions(predictions))
+
+ def _eval_predictions(self, predictions):
+ """
+ Evaluate predictions on densepose.
+ Return results with the metrics of the tasks.
+ """
+ self._logger.info("Preparing results for COCO format ...")
+
+ if self._output_dir:
+ PathManager.mkdirs(self._output_dir)
+ file_path = os.path.join(self._output_dir, "coco_densepose_predictions.pth")
+ with PathManager.open(file_path, "wb") as f:
+ torch.save(predictions, f)
+
+ self._logger.info("Evaluating predictions ...")
+ res = OrderedDict()
+ results_gps, results_gpsm, results_segm = _evaluate_predictions_on_coco(
+ self._coco_api, predictions, min_threshold=self._min_threshold
+ )
+ res["densepose_gps"] = results_gps
+ res["densepose_gpsm"] = results_gpsm
+ res["densepose_segm"] = results_segm
+ return res
+
+
+def prediction_to_json(instances, img_id):
+ """
+ Args:
+ instances (Instances): the output of the model
+ img_id (str): the image id in COCO
+
+ Returns:
+ list[dict]: the results in densepose evaluation format
+ """
+ scores = instances.scores.tolist()
+ segmentations = densepose_to_mask(instances)
+
+ boxes = instances.pred_boxes.tensor.clone()
+ boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+ instances.pred_densepose = instances.pred_densepose.to_result(boxes)
+
+ results = []
+ for k in range(len(instances)):
+ densepose = instances.pred_densepose[k]
+ segmentation = segmentations.tensor[k]
+ segmentation_encoded = mask_utils.encode(
+ np.require(segmentation.numpy(), dtype=np.uint8, requirements=["F"])
+ )
+ segmentation_encoded["counts"] = segmentation_encoded["counts"].decode("utf-8")
+ result = {
+ "image_id": img_id,
+ "category_id": 1, # densepose only has one class
+ "bbox": densepose[1],
+ "score": scores[k],
+ "densepose": densepose,
+ "segmentation": segmentation_encoded,
+ }
+ results.append(result)
+ return results
+
+
+def _evaluate_predictions_on_coco(coco_gt, coco_results, min_threshold=0.5):
+ logger = logging.getLogger(__name__)
+
+ segm_metrics = _get_segmentation_metrics()
+ densepose_metrics = _get_densepose_metrics(min_threshold)
+ if len(coco_results) == 0: # cocoapi does not handle empty results very well
+ logger.warn("No predictions from the model! Set scores to -1")
+ results_gps = {metric: -1 for metric in densepose_metrics}
+ results_gpsm = {metric: -1 for metric in densepose_metrics}
+ results_segm = {metric: -1 for metric in segm_metrics}
+ return results_gps, results_gpsm, results_segm
+
+ coco_dt = coco_gt.loadRes(coco_results)
+ results_segm = _evaluate_predictions_on_coco_segm(coco_gt, coco_dt, segm_metrics, min_threshold)
+ logger.info("Evaluation results for densepose segm: \n" + create_small_table(results_segm))
+ results_gps = _evaluate_predictions_on_coco_gps(
+ coco_gt, coco_dt, densepose_metrics, min_threshold
+ )
+ logger.info(
+ "Evaluation results for densepose, GPS metric: \n" + create_small_table(results_gps)
+ )
+ results_gpsm = _evaluate_predictions_on_coco_gpsm(
+ coco_gt, coco_dt, densepose_metrics, min_threshold
+ )
+ logger.info(
+ "Evaluation results for densepose, GPSm metric: \n" + create_small_table(results_gpsm)
+ )
+ return results_gps, results_gpsm, results_segm
+
+
+def _get_densepose_metrics(min_threshold=0.5):
+ metrics = ["AP"]
+ if min_threshold <= 0.201:
+ metrics += ["AP20"]
+ if min_threshold <= 0.301:
+ metrics += ["AP30"]
+ if min_threshold <= 0.401:
+ metrics += ["AP40"]
+ metrics.extend(["AP50", "AP75", "APm", "APl", "AR", "AR50", "AR75", "ARm", "ARl"])
+ return metrics
+
+
+def _get_segmentation_metrics():
+ return [
+ "AP",
+ "AP50",
+ "AP75",
+ "APs",
+ "APm",
+ "APl",
+ "AR@1",
+ "AR@10",
+ "AR@100",
+ "ARs",
+ "ARm",
+ "ARl",
+ ]
+
+
+def _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics, min_threshold=0.5):
+ coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPS)
+ coco_eval.params.iouThrs = np.linspace(
+ min_threshold, 0.95, int(np.round((0.95 - min_threshold) / 0.05)) + 1, endpoint=True
+ )
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+ results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
+ return results
+
+
+def _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics, min_threshold=0.5):
+ coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPSM)
+ coco_eval.params.iouThrs = np.linspace(
+ min_threshold, 0.95, int(np.round((0.95 - min_threshold) / 0.05)) + 1, endpoint=True
+ )
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+ results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
+ return results
+
+
+def _evaluate_predictions_on_coco_segm(coco_gt, coco_dt, metrics, min_threshold=0.5):
+ coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "segm")
+ coco_eval.params.iouThrs = np.linspace(
+ min_threshold, 0.95, int(np.round((0.95 - min_threshold) / 0.05)) + 1, endpoint=True
+ )
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+ results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
+ return results
diff --git a/projects/DensePose/densepose/modeling/build.py b/projects/DensePose/densepose/modeling/build.py
new file mode 100644
index 0000000..43f3bf2
--- /dev/null
+++ b/projects/DensePose/densepose/modeling/build.py
@@ -0,0 +1,66 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from detectron2.config import CfgNode
+
+from .filter import DensePoseDataFilter
+from .losses import DensePoseLosses
+from .predictors import DensePoseChartWithConfidencePredictor
+
+
+def build_densepose_predictor(cfg: CfgNode, input_channels: int):
+ """
+ Create an instance of DensePose predictor based on configuration options.
+
+ Args:
+ cfg (CfgNode): configuration options
+ input_channels (int): input tensor size along the channel dimension
+ Return:
+ An instance of DensePose predictor
+ """
+ predictor = DensePoseChartWithConfidencePredictor(cfg, input_channels)
+ return predictor
+
+
+def build_densepose_data_filter(cfg: CfgNode):
+ """
+ Build DensePose data filter which selects data for training
+
+ Args:
+ cfg (CfgNode): configuration options
+
+ Return:
+ Callable: list(Tensor), list(Instances) -> list(Tensor), list(Instances)
+ An instance of DensePose filter, which takes feature tensors and proposals
+ as an input and returns filtered features and proposals
+ """
+ dp_filter = DensePoseDataFilter(cfg)
+ return dp_filter
+
+
+def build_densepose_head(cfg: CfgNode, input_channels: int):
+ """
+ Build DensePose head based on configurations options
+
+ Args:
+ cfg (CfgNode): configuration options
+ input_channels (int): input tensor size along the channel dimension
+ Return:
+ An instance of DensePose head
+ """
+ from .roi_heads.registry import ROI_DENSEPOSE_HEAD_REGISTRY
+
+ head_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.NAME
+ return ROI_DENSEPOSE_HEAD_REGISTRY.get(head_name)(cfg, input_channels)
+
+
+def build_densepose_losses(cfg: CfgNode):
+ """
+ Build DensePose loss based on configurations options
+
+ Args:
+ cfg (CfgNode): configuration options
+ Return:
+ An instance of DensePose loss
+ """
+ losses = DensePoseLosses(cfg)
+ return losses
diff --git a/projects/DensePose/densepose/modeling/confidence.py b/projects/DensePose/densepose/modeling/confidence.py
new file mode 100644
index 0000000..5195d20
--- /dev/null
+++ b/projects/DensePose/densepose/modeling/confidence.py
@@ -0,0 +1,73 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from dataclasses import dataclass
+from enum import Enum
+
+from detectron2.config import CfgNode
+
+
+class DensePoseUVConfidenceType(Enum):
+ """
+ Statistical model type for confidence learning, possible values:
+ - "iid_iso": statistically independent identically distributed residuals
+ with anisotropic covariance
+ - "indep_aniso": statistically independent residuals with anisotropic
+ covariances
+ For details, see:
+ N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
+ Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
+ """
+
+ # fmt: off
+ IID_ISO = "iid_iso"
+ INDEP_ANISO = "indep_aniso"
+ # fmt: on
+
+
+@dataclass
+class DensePoseUVConfidenceConfig:
+ """
+ Configuration options for confidence on UV data
+ """
+
+ enabled: bool = False
+ # lower bound on UV confidences
+ epsilon: float = 0.01
+ type: DensePoseUVConfidenceType = DensePoseUVConfidenceType.IID_ISO
+
+
+@dataclass
+class DensePoseSegmConfidenceConfig:
+ """
+ Configuration options for confidence on segmentation
+ """
+
+ enabled: bool = False
+ # lower bound on confidence values
+ epsilon: float = 0.01
+
+
+@dataclass
+class DensePoseConfidenceModelConfig:
+ """
+ Configuration options for confidence models
+ """
+
+ # confidence for U and V values
+ uv_confidence: DensePoseUVConfidenceConfig
+ # segmentation confidence
+ segm_confidence: DensePoseSegmConfidenceConfig
+
+ @staticmethod
+ def from_cfg(cfg: CfgNode) -> "DensePoseConfidenceModelConfig":
+ return DensePoseConfidenceModelConfig(
+ uv_confidence=DensePoseUVConfidenceConfig(
+ enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.ENABLED,
+ epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON,
+ type=DensePoseUVConfidenceType(cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE),
+ ),
+ segm_confidence=DensePoseSegmConfidenceConfig(
+ enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.ENABLED,
+ epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON,
+ ),
+ )
diff --git a/projects/DensePose/densepose/modeling/densepose_checkpoint.py b/projects/DensePose/densepose/modeling/densepose_checkpoint.py
new file mode 100644
index 0000000..d2beed2
--- /dev/null
+++ b/projects/DensePose/densepose/modeling/densepose_checkpoint.py
@@ -0,0 +1,35 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from collections import OrderedDict
+
+from detectron2.checkpoint import DetectionCheckpointer
+
+
+def _rename_HRNet_weights(weights):
+ # We detect and rename HRNet weights for DensePose. 1956 and 1716 are values that are
+ # common to all HRNet pretrained weights, and should be enough to accurately identify them
+ if (
+ len(weights["model"].keys()) == 1956
+ and len([k for k in weights["model"].keys() if k.startswith("stage")]) == 1716
+ ):
+ hrnet_weights = OrderedDict()
+ for k in weights["model"].keys():
+ hrnet_weights["backbone.bottom_up." + str(k)] = weights["model"][k]
+ return {"model": hrnet_weights}
+ else:
+ return weights
+
+
+class DensePoseCheckpointer(DetectionCheckpointer):
+ """
+ Same as :class:`DetectionCheckpointer`, but is able to handle HRNet weights
+ """
+
+ def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
+ super().__init__(model, save_dir, save_to_disk=save_to_disk, **checkpointables)
+
+ def _load_file(self, filename: str) -> object:
+ """
+ Adding hrnet support
+ """
+ weights = super()._load_file(filename)
+ return _rename_HRNet_weights(weights)
diff --git a/projects/DensePose/densepose/modeling/filter.py b/projects/DensePose/densepose/modeling/filter.py
new file mode 100644
index 0000000..5628d94
--- /dev/null
+++ b/projects/DensePose/densepose/modeling/filter.py
@@ -0,0 +1,94 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from typing import List
+import torch
+
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+from detectron2.structures.boxes import matched_boxlist_iou
+
+
+class DensePoseDataFilter(object):
+ def __init__(self, cfg: CfgNode):
+ self.iou_threshold = cfg.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD
+ self.keep_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
+
+ @torch.no_grad()
+ def __call__(self, features: List[torch.Tensor], proposals_with_targets: List[Instances]):
+ """
+ Filters proposals with targets to keep only the ones relevant for
+ DensePose training
+
+ Args:
+ features (list[Tensor]): input data as a list of features,
+ each feature is a tensor. Axis 0 represents the number of
+ images `N` in the input data; axes 1-3 are channels,
+ height, and width, which may vary between features
+ (e.g., if a feature pyramid is used).
+ proposals_with_targets (list[Instances]): length `N` list of
+ `Instances`. The i-th `Instances` contains instances
+ (proposals, GT) for the i-th input image,
+ Returns:
+ list[Tensor]: filtered features
+ list[Instances]: filtered proposals
+ """
+ proposals_filtered = []
+ # TODO: the commented out code was supposed to correctly deal with situations
+ # where no valid DensePose GT is available for certain images. The corresponding
+ # image features were sliced and proposals were filtered. This led to performance
+ # deterioration, both in terms of runtime and in terms of evaluation results.
+ #
+ # feature_mask = torch.ones(
+ # len(proposals_with_targets),
+ # dtype=torch.bool,
+ # device=features[0].device if len(features) > 0 else torch.device("cpu"),
+ # )
+ for i, proposals_per_image in enumerate(proposals_with_targets):
+ if not proposals_per_image.has("gt_densepose") and (
+ not proposals_per_image.has("gt_masks") or not self.keep_masks
+ ):
+ # feature_mask[i] = 0
+ continue
+ gt_boxes = proposals_per_image.gt_boxes
+ est_boxes = proposals_per_image.proposal_boxes
+ # apply match threshold for densepose head
+ iou = matched_boxlist_iou(gt_boxes, est_boxes)
+ iou_select = iou > self.iou_threshold
+ proposals_per_image = proposals_per_image[iou_select]
+
+ N_gt_boxes = len(proposals_per_image.gt_boxes)
+ assert N_gt_boxes == len(proposals_per_image.proposal_boxes), (
+ f"The number of GT boxes {N_gt_boxes} is different from the "
+ f"number of proposal boxes {len(proposals_per_image.proposal_boxes)}"
+ )
+ # filter out any target without suitable annotation
+ if self.keep_masks:
+ gt_masks = (
+ proposals_per_image.gt_masks
+ if hasattr(proposals_per_image, "gt_masks")
+ else [None] * N_gt_boxes
+ )
+ else:
+ gt_masks = [None] * N_gt_boxes
+ gt_densepose = (
+ proposals_per_image.gt_densepose
+ if hasattr(proposals_per_image, "gt_densepose")
+ else [None] * N_gt_boxes
+ )
+ assert len(gt_masks) == N_gt_boxes
+ assert len(gt_densepose) == N_gt_boxes
+ selected_indices = [
+ i
+ for i, (dp_target, mask_target) in enumerate(zip(gt_densepose, gt_masks))
+ if (dp_target is not None) or (mask_target is not None)
+ ]
+ # if not len(selected_indices):
+ # feature_mask[i] = 0
+ # continue
+ if len(selected_indices) != N_gt_boxes:
+ proposals_per_image = proposals_per_image[selected_indices]
+ assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes)
+ proposals_filtered.append(proposals_per_image)
+ # features_filtered = [feature[feature_mask] for feature in features]
+ # return features_filtered, proposals_filtered
+ return features, proposals_filtered
diff --git a/projects/DensePose/densepose/modeling/hrfpn.py b/projects/DensePose/densepose/modeling/hrfpn.py
new file mode 100644
index 0000000..ddc1c19
--- /dev/null
+++ b/projects/DensePose/densepose/modeling/hrfpn.py
@@ -0,0 +1,181 @@
+"""
+MIT License
+Copyright (c) 2019 Microsoft
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.backbone import BACKBONE_REGISTRY
+from detectron2.modeling.backbone.backbone import Backbone
+
+from .hrnet import build_pose_hrnet_backbone
+
+
+class HRFPN(Backbone):
+ """ HRFPN (High Resolution Feature Pyramids)
+ Transforms outputs of HRNet backbone so they are suitable for the ROI_heads
+ arXiv: https://arxiv.org/abs/1904.04514
+ Adapted from https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/necks/hrfpn.py
+ Args:
+ bottom_up: (list) output of HRNet
+ in_features (list): names of the input features (output of HRNet)
+ in_channels (list): number of channels for each branch
+ out_channels (int): output channels of feature pyramids
+ n_out_features (int): number of output stages
+ pooling (str): pooling for generating feature pyramids (from {MAX, AVG})
+ share_conv (bool): Have one conv per output, or share one with all the outputs
+ """
+
+ def __init__(
+ self,
+ bottom_up,
+ in_features,
+ n_out_features,
+ in_channels,
+ out_channels,
+ pooling="AVG",
+ share_conv=False,
+ ):
+ super(HRFPN, self).__init__()
+ assert isinstance(in_channels, list)
+ self.bottom_up = bottom_up
+ self.in_features = in_features
+ self.n_out_features = n_out_features
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.num_ins = len(in_channels)
+ self.share_conv = share_conv
+
+ if self.share_conv:
+ self.fpn_conv = nn.Conv2d(
+ in_channels=out_channels, out_channels=out_channels, kernel_size=3, padding=1
+ )
+ else:
+ self.fpn_conv = nn.ModuleList()
+ for _ in range(self.n_out_features):
+ self.fpn_conv.append(
+ nn.Conv2d(
+ in_channels=out_channels,
+ out_channels=out_channels,
+ kernel_size=3,
+ padding=1,
+ )
+ )
+
+ # Custom change: Replaces a simple bilinear interpolation
+ self.interp_conv = nn.ModuleList()
+ for i in range(len(self.in_features)):
+ self.interp_conv.append(
+ nn.Sequential(
+ nn.ConvTranspose2d(
+ in_channels=in_channels[i],
+ out_channels=in_channels[i],
+ kernel_size=4,
+ stride=2 ** i,
+ padding=0,
+ output_padding=0,
+ bias=False,
+ ),
+ nn.BatchNorm2d(in_channels[i], momentum=0.1),
+ nn.ReLU(inplace=True),
+ )
+ )
+
+ # Custom change: Replaces a couple (reduction conv + pooling) by one conv
+ self.reduction_pooling_conv = nn.ModuleList()
+ for i in range(self.n_out_features):
+ self.reduction_pooling_conv.append(
+ nn.Sequential(
+ nn.Conv2d(sum(in_channels), out_channels, kernel_size=2 ** i, stride=2 ** i),
+ nn.BatchNorm2d(out_channels, momentum=0.1),
+ nn.ReLU(inplace=True),
+ )
+ )
+
+ if pooling == "MAX":
+ self.pooling = F.max_pool2d
+ else:
+ self.pooling = F.avg_pool2d
+
+ self._out_features = []
+ self._out_feature_channels = {}
+ self._out_feature_strides = {}
+
+ for i in range(self.n_out_features):
+ self._out_features.append("p%d" % (i + 1))
+ self._out_feature_channels.update({self._out_features[-1]: self.out_channels})
+ self._out_feature_strides.update({self._out_features[-1]: 2 ** (i + 2)})
+
+ # default init_weights for conv(msra) and norm in ConvModule
+ def init_weights(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ nn.init.kaiming_normal_(m.weight, a=1)
+ nn.init.constant_(m.bias, 0)
+
+ def forward(self, inputs):
+ bottom_up_features = self.bottom_up(inputs)
+ assert len(bottom_up_features) == len(self.in_features)
+ inputs = [bottom_up_features[f] for f in self.in_features]
+
+ outs = []
+ for i in range(len(inputs)):
+ outs.append(self.interp_conv[i](inputs[i]))
+ shape_2 = min(o.shape[2] for o in outs)
+ shape_3 = min(o.shape[3] for o in outs)
+ out = torch.cat([o[:, :, :shape_2, :shape_3] for o in outs], dim=1)
+ outs = []
+ for i in range(self.n_out_features):
+ outs.append(self.reduction_pooling_conv[i](out))
+ for i in range(len(outs)): # Make shapes consistent
+ outs[-1 - i] = outs[-1 - i][
+ :, :, : outs[-1].shape[2] * 2 ** i, : outs[-1].shape[3] * 2 ** i
+ ]
+ outputs = []
+ for i in range(len(outs)):
+ if self.share_conv:
+ outputs.append(self.fpn_conv(outs[i]))
+ else:
+ outputs.append(self.fpn_conv[i](outs[i]))
+
+ assert len(self._out_features) == len(outputs)
+ return dict(zip(self._out_features, outputs))
+
+
+@BACKBONE_REGISTRY.register()
+def build_hrfpn_backbone(cfg, input_shape: ShapeSpec):
+
+ in_channels = cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS
+ in_features = ["p%d" % (i + 1) for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES)]
+ n_out_features = len(cfg.MODEL.ROI_HEADS.IN_FEATURES)
+ out_channels = cfg.MODEL.HRNET.HRFPN.OUT_CHANNELS
+ hrnet = build_pose_hrnet_backbone(cfg, input_shape)
+ hrfpn = HRFPN(
+ hrnet,
+ in_features,
+ n_out_features,
+ in_channels,
+ out_channels,
+ pooling="AVG",
+ share_conv=False,
+ )
+
+ return hrfpn
diff --git a/projects/DensePose/densepose/modeling/hrnet.py b/projects/DensePose/densepose/modeling/hrnet.py
new file mode 100644
index 0000000..acaa92d
--- /dev/null
+++ b/projects/DensePose/densepose/modeling/hrnet.py
@@ -0,0 +1,473 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (leoxiaobin@gmail.com)
+# Modified by Bowen Cheng (bcheng9@illinois.edu)
+# Adapted from https://github.com/HRNet/Higher-HRNet-Human-Pose-Estimation/blob/master/lib/models/pose_higher_hrnet.py # noqa
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import, division, print_function
+import logging
+import torch.nn as nn
+
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.backbone import BACKBONE_REGISTRY
+from detectron2.modeling.backbone.backbone import Backbone
+
+BN_MOMENTUM = 0.1
+logger = logging.getLogger(__name__)
+
+__all__ = ["build_pose_hrnet_backbone", "PoseHigherResolutionNet"]
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+ """3x3 convolution with padding"""
+ return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
+
+
+class BasicBlock(nn.Module):
+ expansion = 1
+
+ def __init__(self, inplanes, planes, stride=1, downsample=None):
+ super(BasicBlock, self).__init__()
+ self.conv1 = conv3x3(inplanes, planes, stride)
+ self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+ self.relu = nn.ReLU(inplace=True)
+ self.conv2 = conv3x3(planes, planes)
+ self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+ self.downsample = downsample
+ self.stride = stride
+
+ def forward(self, x):
+ residual = x
+
+ out = self.conv1(x)
+ out = self.bn1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.bn2(out)
+
+ if self.downsample is not None:
+ residual = self.downsample(x)
+
+ out += residual
+ out = self.relu(out)
+
+ return out
+
+
+class Bottleneck(nn.Module):
+ expansion = 4
+
+ def __init__(self, inplanes, planes, stride=1, downsample=None):
+ super(Bottleneck, self).__init__()
+ self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+ self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+ self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+ self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+ self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
+ self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM)
+ self.relu = nn.ReLU(inplace=True)
+ self.downsample = downsample
+ self.stride = stride
+
+ def forward(self, x):
+ residual = x
+
+ out = self.conv1(x)
+ out = self.bn1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.bn2(out)
+ out = self.relu(out)
+
+ out = self.conv3(out)
+ out = self.bn3(out)
+
+ if self.downsample is not None:
+ residual = self.downsample(x)
+
+ out += residual
+ out = self.relu(out)
+
+ return out
+
+
+class HighResolutionModule(nn.Module):
+ """ HighResolutionModule
+ Building block of the PoseHigherResolutionNet (see lower)
+ arXiv: https://arxiv.org/abs/1908.10357
+ Args:
+ num_branches (int): number of branches of the modyle
+ blocks (str): type of block of the module
+ num_blocks (int): number of blocks of the module
+ num_inchannels (int): number of input channels of the module
+ num_channels (list): number of channels of each branch
+ multi_scale_output (bool): only used by the last module of PoseHigherResolutionNet
+ """
+
+ def __init__(
+ self,
+ num_branches,
+ blocks,
+ num_blocks,
+ num_inchannels,
+ num_channels,
+ multi_scale_output=True,
+ ):
+ super(HighResolutionModule, self).__init__()
+ self._check_branches(num_branches, blocks, num_blocks, num_inchannels, num_channels)
+
+ self.num_inchannels = num_inchannels
+ self.num_branches = num_branches
+
+ self.multi_scale_output = multi_scale_output
+
+ self.branches = self._make_branches(num_branches, blocks, num_blocks, num_channels)
+ self.fuse_layers = self._make_fuse_layers()
+ self.relu = nn.ReLU(True)
+
+ def _check_branches(self, num_branches, blocks, num_blocks, num_inchannels, num_channels):
+ if num_branches != len(num_blocks):
+ error_msg = "NUM_BRANCHES({}) <> NUM_BLOCKS({})".format(num_branches, len(num_blocks))
+ logger.error(error_msg)
+ raise ValueError(error_msg)
+
+ if num_branches != len(num_channels):
+ error_msg = "NUM_BRANCHES({}) <> NUM_CHANNELS({})".format(
+ num_branches, len(num_channels)
+ )
+ logger.error(error_msg)
+ raise ValueError(error_msg)
+
+ if num_branches != len(num_inchannels):
+ error_msg = "NUM_BRANCHES({}) <> NUM_INCHANNELS({})".format(
+ num_branches, len(num_inchannels)
+ )
+ logger.error(error_msg)
+ raise ValueError(error_msg)
+
+ def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1):
+ downsample = None
+ if (
+ stride != 1
+ or self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion
+ ):
+ downsample = nn.Sequential(
+ nn.Conv2d(
+ self.num_inchannels[branch_index],
+ num_channels[branch_index] * block.expansion,
+ kernel_size=1,
+ stride=stride,
+ bias=False,
+ ),
+ nn.BatchNorm2d(num_channels[branch_index] * block.expansion, momentum=BN_MOMENTUM),
+ )
+
+ layers = []
+ layers.append(
+ block(self.num_inchannels[branch_index], num_channels[branch_index], stride, downsample)
+ )
+ self.num_inchannels[branch_index] = num_channels[branch_index] * block.expansion
+ for _ in range(1, num_blocks[branch_index]):
+ layers.append(block(self.num_inchannels[branch_index], num_channels[branch_index]))
+
+ return nn.Sequential(*layers)
+
+ def _make_branches(self, num_branches, block, num_blocks, num_channels):
+ branches = []
+
+ for i in range(num_branches):
+ branches.append(self._make_one_branch(i, block, num_blocks, num_channels))
+
+ return nn.ModuleList(branches)
+
+ def _make_fuse_layers(self):
+ if self.num_branches == 1:
+ return None
+
+ num_branches = self.num_branches
+ num_inchannels = self.num_inchannels
+ fuse_layers = []
+ for i in range(num_branches if self.multi_scale_output else 1):
+ fuse_layer = []
+ for j in range(num_branches):
+ if j > i:
+ fuse_layer.append(
+ nn.Sequential(
+ nn.Conv2d(num_inchannels[j], num_inchannels[i], 1, 1, 0, bias=False),
+ nn.BatchNorm2d(num_inchannels[i]),
+ nn.Upsample(scale_factor=2 ** (j - i), mode="nearest"),
+ )
+ )
+ elif j == i:
+ fuse_layer.append(None)
+ else:
+ conv3x3s = []
+ for k in range(i - j):
+ if k == i - j - 1:
+ num_outchannels_conv3x3 = num_inchannels[i]
+ conv3x3s.append(
+ nn.Sequential(
+ nn.Conv2d(
+ num_inchannels[j],
+ num_outchannels_conv3x3,
+ 3,
+ 2,
+ 1,
+ bias=False,
+ ),
+ nn.BatchNorm2d(num_outchannels_conv3x3),
+ )
+ )
+ else:
+ num_outchannels_conv3x3 = num_inchannels[j]
+ conv3x3s.append(
+ nn.Sequential(
+ nn.Conv2d(
+ num_inchannels[j],
+ num_outchannels_conv3x3,
+ 3,
+ 2,
+ 1,
+ bias=False,
+ ),
+ nn.BatchNorm2d(num_outchannels_conv3x3),
+ nn.ReLU(True),
+ )
+ )
+ fuse_layer.append(nn.Sequential(*conv3x3s))
+ fuse_layers.append(nn.ModuleList(fuse_layer))
+
+ return nn.ModuleList(fuse_layers)
+
+ def get_num_inchannels(self):
+ return self.num_inchannels
+
+ def forward(self, x):
+ if self.num_branches == 1:
+ return [self.branches[0](x[0])]
+
+ for i in range(self.num_branches):
+ x[i] = self.branches[i](x[i])
+
+ x_fuse = []
+
+ for i in range(len(self.fuse_layers)):
+ y = x[0] if i == 0 else self.fuse_layers[i][0](x[0])
+ for j in range(1, self.num_branches):
+ if i == j:
+ y = y + x[j]
+ else:
+ z = self.fuse_layers[i][j](x[j])[:, :, : y.shape[2], : y.shape[3]]
+ y = y + z
+ x_fuse.append(self.relu(y))
+
+ return x_fuse
+
+
+blocks_dict = {"BASIC": BasicBlock, "BOTTLENECK": Bottleneck}
+
+
+class PoseHigherResolutionNet(Backbone):
+ """ PoseHigherResolutionNet
+ Composed of several HighResolutionModule tied together with ConvNets
+ Adapted from the GitHub version to fit with HRFPN and the Detectron2 infrastructure
+ arXiv: https://arxiv.org/abs/1908.10357
+ """
+
+ def __init__(self, cfg, **kwargs):
+ self.inplanes = cfg.MODEL.HRNET.STEM_INPLANES
+ super(PoseHigherResolutionNet, self).__init__()
+
+ # stem net
+ self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
+ self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+ self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False)
+ self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+ self.relu = nn.ReLU(inplace=True)
+ self.layer1 = self._make_layer(Bottleneck, 64, 4)
+
+ self.stage2_cfg = cfg.MODEL.HRNET.STAGE2
+ num_channels = self.stage2_cfg.NUM_CHANNELS
+ block = blocks_dict[self.stage2_cfg.BLOCK]
+ num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
+ self.transition1 = self._make_transition_layer([256], num_channels)
+ self.stage2, pre_stage_channels = self._make_stage(self.stage2_cfg, num_channels)
+
+ self.stage3_cfg = cfg.MODEL.HRNET.STAGE3
+ num_channels = self.stage3_cfg.NUM_CHANNELS
+ block = blocks_dict[self.stage3_cfg.BLOCK]
+ num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
+ self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels)
+ self.stage3, pre_stage_channels = self._make_stage(self.stage3_cfg, num_channels)
+
+ self.stage4_cfg = cfg.MODEL.HRNET.STAGE4
+ num_channels = self.stage4_cfg.NUM_CHANNELS
+ block = blocks_dict[self.stage4_cfg.BLOCK]
+ num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
+ self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels)
+ self.stage4, pre_stage_channels = self._make_stage(
+ self.stage4_cfg, num_channels, multi_scale_output=True
+ )
+
+ self._out_features = []
+ self._out_feature_channels = {}
+ self._out_feature_strides = {}
+
+ for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES):
+ self._out_features.append("p%d" % (i + 1))
+ self._out_feature_channels.update(
+ {self._out_features[-1]: cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS[i]}
+ )
+ self._out_feature_strides.update({self._out_features[-1]: 1})
+
+ def _get_deconv_cfg(self, deconv_kernel):
+ if deconv_kernel == 4:
+ padding = 1
+ output_padding = 0
+ elif deconv_kernel == 3:
+ padding = 1
+ output_padding = 1
+ elif deconv_kernel == 2:
+ padding = 0
+ output_padding = 0
+
+ return deconv_kernel, padding, output_padding
+
+ def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer):
+ num_branches_cur = len(num_channels_cur_layer)
+ num_branches_pre = len(num_channels_pre_layer)
+
+ transition_layers = []
+ for i in range(num_branches_cur):
+ if i < num_branches_pre:
+ if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+ transition_layers.append(
+ nn.Sequential(
+ nn.Conv2d(
+ num_channels_pre_layer[i],
+ num_channels_cur_layer[i],
+ 3,
+ 1,
+ 1,
+ bias=False,
+ ),
+ nn.BatchNorm2d(num_channels_cur_layer[i]),
+ nn.ReLU(inplace=True),
+ )
+ )
+ else:
+ transition_layers.append(None)
+ else:
+ conv3x3s = []
+ for j in range(i + 1 - num_branches_pre):
+ inchannels = num_channels_pre_layer[-1]
+ outchannels = (
+ num_channels_cur_layer[i] if j == i - num_branches_pre else inchannels
+ )
+ conv3x3s.append(
+ nn.Sequential(
+ nn.Conv2d(inchannels, outchannels, 3, 2, 1, bias=False),
+ nn.BatchNorm2d(outchannels),
+ nn.ReLU(inplace=True),
+ )
+ )
+ transition_layers.append(nn.Sequential(*conv3x3s))
+
+ return nn.ModuleList(transition_layers)
+
+ def _make_layer(self, block, planes, blocks, stride=1):
+ downsample = None
+ if stride != 1 or self.inplanes != planes * block.expansion:
+ downsample = nn.Sequential(
+ nn.Conv2d(
+ self.inplanes,
+ planes * block.expansion,
+ kernel_size=1,
+ stride=stride,
+ bias=False,
+ ),
+ nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
+ )
+
+ layers = []
+ layers.append(block(self.inplanes, planes, stride, downsample))
+ self.inplanes = planes * block.expansion
+ for _ in range(1, blocks):
+ layers.append(block(self.inplanes, planes))
+
+ return nn.Sequential(*layers)
+
+ def _make_stage(self, layer_config, num_inchannels, multi_scale_output=True):
+ num_modules = layer_config["NUM_MODULES"]
+ num_branches = layer_config["NUM_BRANCHES"]
+ num_blocks = layer_config["NUM_BLOCKS"]
+ num_channels = layer_config["NUM_CHANNELS"]
+ block = blocks_dict[layer_config["BLOCK"]]
+
+ modules = []
+ for i in range(num_modules):
+ # multi_scale_output is only used last module
+ if not multi_scale_output and i == num_modules - 1:
+ reset_multi_scale_output = False
+ else:
+ reset_multi_scale_output = True
+
+ modules.append(
+ HighResolutionModule(
+ num_branches,
+ block,
+ num_blocks,
+ num_inchannels,
+ num_channels,
+ reset_multi_scale_output,
+ )
+ )
+ num_inchannels = modules[-1].get_num_inchannels()
+
+ return nn.Sequential(*modules), num_inchannels
+
+ def forward(self, x):
+ x = self.conv1(x)
+ x = self.bn1(x)
+ x = self.relu(x)
+ x = self.conv2(x)
+ x = self.bn2(x)
+ x = self.relu(x)
+ x = self.layer1(x)
+
+ x_list = []
+ for i in range(self.stage2_cfg.NUM_BRANCHES):
+ if self.transition1[i] is not None:
+ x_list.append(self.transition1[i](x))
+ else:
+ x_list.append(x)
+ y_list = self.stage2(x_list)
+
+ x_list = []
+ for i in range(self.stage3_cfg.NUM_BRANCHES):
+ if self.transition2[i] is not None:
+ x_list.append(self.transition2[i](y_list[-1]))
+ else:
+ x_list.append(y_list[i])
+ y_list = self.stage3(x_list)
+
+ x_list = []
+ for i in range(self.stage4_cfg.NUM_BRANCHES):
+ if self.transition3[i] is not None:
+ x_list.append(self.transition3[i](y_list[-1]))
+ else:
+ x_list.append(y_list[i])
+ y_list = self.stage4(x_list)
+
+ assert len(self._out_features) == len(y_list)
+ return dict(zip(self._out_features, y_list)) # final_outputs
+
+
+@BACKBONE_REGISTRY.register()
+def build_pose_hrnet_backbone(cfg, input_shape: ShapeSpec):
+ model = PoseHigherResolutionNet(cfg)
+ return model
diff --git a/projects/DensePose/densepose/modeling/inference.py b/projects/DensePose/densepose/modeling/inference.py
new file mode 100644
index 0000000..77f093d
--- /dev/null
+++ b/projects/DensePose/densepose/modeling/inference.py
@@ -0,0 +1,83 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from typing import List, Tuple
+import torch
+
+from detectron2.structures import Instances
+
+from ..data.structures import DensePoseOutput
+
+
+def densepose_inference(
+ densepose_outputs: Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor],
+ densepose_confidences: Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor],
+ detections: List[Instances],
+):
+ """
+ Infer dense pose estimate based on outputs from the DensePose head
+ and detections. The estimate for each detection instance is stored in its
+ "pred_densepose" attribute.
+
+ Args:
+ densepose_outputs (tuple(`torch.Tensor`)): iterable containing 4 elements:
+ - s (:obj: `torch.Tensor`): coarse segmentation tensor of size (N, A, H, W),
+ - i (:obj: `torch.Tensor`): fine segmentation tensor of size (N, C, H, W),
+ - u (:obj: `torch.Tensor`): U coordinates for each class of size (N, C, H, W),
+ - v (:obj: `torch.Tensor`): V coordinates for each class of size (N, C, H, W),
+ where N is the total number of detections in a batch,
+ A is the number of coarse segmentations labels
+ (e.g. 15 for coarse body parts + background),
+ C is the number of fine segmentation labels
+ (e.g. 25 for fine body parts + background),
+ W is the resolution along the X axis
+ H is the resolution along the Y axis
+ densepose_confidences (tuple(`torch.Tensor`)): iterable containing 4 elements:
+ - sigma_1 (:obj: `torch.Tensor`): global confidences for UV coordinates
+ of size (N, C, H, W)
+ - sigma_2 (:obj: `torch.Tensor`): individual confidences for UV coordinates
+ of size (N, C, H, W)
+ - kappa_u (:obj: `torch.Tensor`): first component of confidence direction
+ vector of size (N, C, H, W)
+ - kappa_v (:obj: `torch.Tensor`): second component of confidence direction
+ vector of size (N, C, H, W)
+ - fine_segm_confidence (:obj: `torch.Tensor`): confidence for fine
+ segmentation of size (N, 1, H, W)
+ - coarse_segm_confidence (:obj: `torch.Tensor`): confidence for coarse
+ segmentation of size (N, 1, H, W)
+ detections (list[Instances]): A list of N Instances, where N is the number of images
+ in the batch. Instances are modified by this method: "pred_densepose" attribute
+ is added to each instance, the attribute contains the corresponding
+ DensePoseOutput object.
+ """
+ # DensePose outputs: segmentation, body part indices, U, V
+ s, index_uv, u, v = densepose_outputs
+ (
+ sigma_1,
+ sigma_2,
+ kappa_u,
+ kappa_v,
+ fine_segm_confidence,
+ coarse_segm_confidence,
+ ) = densepose_confidences
+ k = 0
+ for detection in detections:
+ n_i = len(detection)
+ s_i = s[k : k + n_i]
+ index_uv_i = index_uv[k : k + n_i]
+ u_i = u[k : k + n_i]
+ v_i = v[k : k + n_i]
+ _local_vars = locals()
+ confidences = {
+ name: _local_vars[name][k : k + n_i]
+ for name in (
+ "sigma_1",
+ "sigma_2",
+ "kappa_u",
+ "kappa_v",
+ "fine_segm_confidence",
+ "coarse_segm_confidence",
+ )
+ if _local_vars.get(name) is not None
+ }
+ densepose_output_i = DensePoseOutput(s_i, index_uv_i, u_i, v_i, confidences)
+ detection.pred_densepose = densepose_output_i
+ k += n_i
diff --git a/projects/DensePose/densepose/modeling/losses/__init__.py b/projects/DensePose/densepose/modeling/losses/__init__.py
new file mode 100644
index 0000000..47e8298
--- /dev/null
+++ b/projects/DensePose/densepose/modeling/losses/__init__.py
@@ -0,0 +1,3 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from .densepose_losses import DensePoseLosses
diff --git a/projects/DensePose/densepose/modeling/losses/densepose_losses.py b/projects/DensePose/densepose/modeling/losses/densepose_losses.py
new file mode 100644
index 0000000..cc205b4
--- /dev/null
+++ b/projects/DensePose/densepose/modeling/losses/densepose_losses.py
@@ -0,0 +1,729 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import math
+from dataclasses import dataclass
+from typing import Iterable, Optional
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.structures import Instances
+
+from .. import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
+
+
+def _linear_interpolation_utilities(v_norm, v0_src, size_src, v0_dst, size_dst, size_z):
+ """
+ Computes utility values for linear interpolation at points v.
+ The points are given as normalized offsets in the source interval
+ (v0_src, v0_src + size_src), more precisely:
+ v = v0_src + v_norm * size_src / 256.0
+ The computed utilities include lower points v_lo, upper points v_hi,
+ interpolation weights v_w and flags j_valid indicating whether the
+ points falls into the destination interval (v0_dst, v0_dst + size_dst).
+
+ Args:
+ v_norm (:obj: `torch.Tensor`): tensor of size N containing
+ normalized point offsets
+ v0_src (:obj: `torch.Tensor`): tensor of size N containing
+ left bounds of source intervals for normalized points
+ size_src (:obj: `torch.Tensor`): tensor of size N containing
+ source interval sizes for normalized points
+ v0_dst (:obj: `torch.Tensor`): tensor of size N containing
+ left bounds of destination intervals
+ size_dst (:obj: `torch.Tensor`): tensor of size N containing
+ destination interval sizes
+ size_z (int): interval size for data to be interpolated
+
+ Returns:
+ v_lo (:obj: `torch.Tensor`): int tensor of size N containing
+ indices of lower values used for interpolation, all values are
+ integers from [0, size_z - 1]
+ v_hi (:obj: `torch.Tensor`): int tensor of size N containing
+ indices of upper values used for interpolation, all values are
+ integers from [0, size_z - 1]
+ v_w (:obj: `torch.Tensor`): float tensor of size N containing
+ interpolation weights
+ j_valid (:obj: `torch.Tensor`): uint8 tensor of size N containing
+ 0 for points outside the estimation interval
+ (v0_est, v0_est + size_est) and 1 otherwise
+ """
+ v = v0_src + v_norm * size_src / 256.0
+ j_valid = (v - v0_dst >= 0) * (v - v0_dst < size_dst)
+ v_grid = (v - v0_dst) * size_z / size_dst
+ v_lo = v_grid.floor().long().clamp(min=0, max=size_z - 1)
+ v_hi = (v_lo + 1).clamp(max=size_z - 1)
+ v_grid = torch.min(v_hi.float(), v_grid)
+ v_w = v_grid - v_lo.float()
+ return v_lo, v_hi, v_w, j_valid
+
+
+class SingleTensorsHelper:
+ def __init__(self, proposals_with_gt):
+
+ with torch.no_grad():
+ (
+ index_uv_img,
+ i_with_dp,
+ bbox_xywh_est,
+ bbox_xywh_gt,
+ index_gt_all,
+ x_norm,
+ y_norm,
+ u_gt_all,
+ v_gt_all,
+ s_gt,
+ index_bbox,
+ ) = _extract_single_tensors_from_matches(proposals_with_gt)
+
+ for k, v in locals().items():
+ if k not in ["self", "proposals_with_gt"]:
+ setattr(self, k, v)
+
+
+class BilinearInterpolationHelper:
+ """
+ Args:
+ tensors_helper (SingleTensorsHelper)
+ j_valid (:obj: `torch.Tensor`): uint8 tensor of size M containing
+ 0 for points to be discarded and 1 for points to be selected
+ y_lo (:obj: `torch.Tensor`): int tensor of indices of upper values
+ in z_est for each point
+ y_hi (:obj: `torch.Tensor`): int tensor of indices of lower values
+ in z_est for each point
+ x_lo (:obj: `torch.Tensor`): int tensor of indices of left values
+ in z_est for each point
+ x_hi (:obj: `torch.Tensor`): int tensor of indices of right values
+ in z_est for each point
+ w_ylo_xlo (:obj: `torch.Tensor`): float tensor of size M;
+ contains upper-left value weight for each point
+ w_ylo_xhi (:obj: `torch.Tensor`): float tensor of size M;
+ contains upper-right value weight for each point
+ w_yhi_xlo (:obj: `torch.Tensor`): float tensor of size M;
+ contains lower-left value weight for each point
+ w_yhi_xhi (:obj: `torch.Tensor`): float tensor of size M;
+ contains lower-right value weight for each point
+ """
+
+ def __init__(
+ self,
+ tensors_helper,
+ j_valid,
+ y_lo,
+ y_hi,
+ x_lo,
+ x_hi,
+ w_ylo_xlo,
+ w_ylo_xhi,
+ w_yhi_xlo,
+ w_yhi_xhi,
+ ):
+ for k, v in locals().items():
+ if k != "self":
+ setattr(self, k, v)
+
+ @staticmethod
+ def from_matches(tensors_helper, densepose_outputs_size):
+
+ zh, zw = densepose_outputs_size[2], densepose_outputs_size[3]
+
+ x0_gt, y0_gt, w_gt, h_gt = tensors_helper.bbox_xywh_gt[tensors_helper.index_bbox].unbind(1)
+ x0_est, y0_est, w_est, h_est = tensors_helper.bbox_xywh_est[
+ tensors_helper.index_bbox
+ ].unbind(dim=1)
+ x_lo, x_hi, x_w, jx_valid = _linear_interpolation_utilities(
+ tensors_helper.x_norm, x0_gt, w_gt, x0_est, w_est, zw
+ )
+ y_lo, y_hi, y_w, jy_valid = _linear_interpolation_utilities(
+ tensors_helper.y_norm, y0_gt, h_gt, y0_est, h_est, zh
+ )
+ j_valid = jx_valid * jy_valid
+
+ w_ylo_xlo = (1.0 - x_w) * (1.0 - y_w)
+ w_ylo_xhi = x_w * (1.0 - y_w)
+ w_yhi_xlo = (1.0 - x_w) * y_w
+ w_yhi_xhi = x_w * y_w
+
+ return BilinearInterpolationHelper(
+ tensors_helper,
+ j_valid,
+ y_lo,
+ y_hi,
+ x_lo,
+ x_hi,
+ w_ylo_xlo,
+ w_ylo_xhi,
+ w_yhi_xlo,
+ w_yhi_xhi,
+ )
+
+ def extract_at_points(
+ self,
+ z_est,
+ slice_index_uv=None,
+ w_ylo_xlo=None,
+ w_ylo_xhi=None,
+ w_yhi_xlo=None,
+ w_yhi_xhi=None,
+ ):
+ """
+ Extract ground truth values z_gt for valid point indices and estimated
+ values z_est using bilinear interpolation over top-left (y_lo, x_lo),
+ top-right (y_lo, x_hi), bottom-left (y_hi, x_lo) and bottom-right
+ (y_hi, x_hi) values in z_est with corresponding weights:
+ w_ylo_xlo, w_ylo_xhi, w_yhi_xlo and w_yhi_xhi.
+ Use slice_index_uv to slice dim=1 in z_est
+ """
+ index_gt_all = self.tensors_helper.index_gt_all
+ slice_index_uv = index_gt_all if slice_index_uv is None else slice_index_uv
+ w_ylo_xlo = self.w_ylo_xlo if w_ylo_xlo is None else w_ylo_xlo
+ w_ylo_xhi = self.w_ylo_xhi if w_ylo_xhi is None else w_ylo_xhi
+ w_yhi_xlo = self.w_yhi_xlo if w_yhi_xlo is None else w_yhi_xlo
+ w_yhi_xhi = self.w_yhi_xhi if w_yhi_xhi is None else w_yhi_xhi
+
+ index_bbox = self.tensors_helper.index_bbox
+ z_est_sampled = (
+ z_est[index_bbox, slice_index_uv, self.y_lo, self.x_lo] * w_ylo_xlo
+ + z_est[index_bbox, slice_index_uv, self.y_lo, self.x_hi] * w_ylo_xhi
+ + z_est[index_bbox, slice_index_uv, self.y_hi, self.x_lo] * w_yhi_xlo
+ + z_est[index_bbox, slice_index_uv, self.y_hi, self.x_hi] * w_yhi_xhi
+ )
+ return z_est_sampled
+
+
+def _resample_data(
+ z, bbox_xywh_src, bbox_xywh_dst, wout, hout, mode="nearest", padding_mode="zeros"
+):
+ """
+ Args:
+ z (:obj: `torch.Tensor`): tensor of size (N,C,H,W) with data to be
+ resampled
+ bbox_xywh_src (:obj: `torch.Tensor`): tensor of size (N,4) containing
+ source bounding boxes in format XYWH
+ bbox_xywh_dst (:obj: `torch.Tensor`): tensor of size (N,4) containing
+ destination bounding boxes in format XYWH
+ Return:
+ zresampled (:obj: `torch.Tensor`): tensor of size (N, C, Hout, Wout)
+ with resampled values of z, where D is the discretization size
+ """
+ n = bbox_xywh_src.size(0)
+ assert n == bbox_xywh_dst.size(0), (
+ "The number of "
+ "source ROIs for resampling ({}) should be equal to the number "
+ "of destination ROIs ({})".format(bbox_xywh_src.size(0), bbox_xywh_dst.size(0))
+ )
+ x0src, y0src, wsrc, hsrc = bbox_xywh_src.unbind(dim=1)
+ x0dst, y0dst, wdst, hdst = bbox_xywh_dst.unbind(dim=1)
+ x0dst_norm = 2 * (x0dst - x0src) / wsrc - 1
+ y0dst_norm = 2 * (y0dst - y0src) / hsrc - 1
+ x1dst_norm = 2 * (x0dst + wdst - x0src) / wsrc - 1
+ y1dst_norm = 2 * (y0dst + hdst - y0src) / hsrc - 1
+ grid_w = torch.arange(wout, device=z.device, dtype=torch.float) / wout
+ grid_h = torch.arange(hout, device=z.device, dtype=torch.float) / hout
+ grid_w_expanded = grid_w[None, None, :].expand(n, hout, wout)
+ grid_h_expanded = grid_h[None, :, None].expand(n, hout, wout)
+ dx_expanded = (x1dst_norm - x0dst_norm)[:, None, None].expand(n, hout, wout)
+ dy_expanded = (y1dst_norm - y0dst_norm)[:, None, None].expand(n, hout, wout)
+ x0_expanded = x0dst_norm[:, None, None].expand(n, hout, wout)
+ y0_expanded = y0dst_norm[:, None, None].expand(n, hout, wout)
+ grid_x = grid_w_expanded * dx_expanded + x0_expanded
+ grid_y = grid_h_expanded * dy_expanded + y0_expanded
+ grid = torch.stack((grid_x, grid_y), dim=3)
+ # resample Z from (N, C, H, W) into (N, C, Hout, Wout)
+ zresampled = F.grid_sample(z, grid, mode=mode, padding_mode=padding_mode, align_corners=True)
+ return zresampled
+
+
+def _extract_single_tensors_from_matches_one_image(
+ proposals_targets, bbox_with_dp_offset, bbox_global_offset
+):
+ i_gt_all = []
+ x_norm_all = []
+ y_norm_all = []
+ u_gt_all = []
+ v_gt_all = []
+ s_gt_all = []
+ bbox_xywh_gt_all = []
+ bbox_xywh_est_all = []
+ # Ibbox_all == k should be true for all data that corresponds
+ # to bbox_xywh_gt[k] and bbox_xywh_est[k]
+ # index k here is global wrt images
+ i_bbox_all = []
+ # at offset k (k is global) contains index of bounding box data
+ # within densepose output tensor
+ i_with_dp = []
+
+ boxes_xywh_est = proposals_targets.proposal_boxes.clone()
+ boxes_xywh_gt = proposals_targets.gt_boxes.clone()
+ n_i = len(boxes_xywh_est)
+ assert n_i == len(boxes_xywh_gt)
+
+ if n_i:
+ boxes_xywh_est.tensor[:, 2] -= boxes_xywh_est.tensor[:, 0]
+ boxes_xywh_est.tensor[:, 3] -= boxes_xywh_est.tensor[:, 1]
+ boxes_xywh_gt.tensor[:, 2] -= boxes_xywh_gt.tensor[:, 0]
+ boxes_xywh_gt.tensor[:, 3] -= boxes_xywh_gt.tensor[:, 1]
+ if hasattr(proposals_targets, "gt_densepose"):
+ densepose_gt = proposals_targets.gt_densepose
+ for k, box_xywh_est, box_xywh_gt, dp_gt in zip(
+ range(n_i), boxes_xywh_est.tensor, boxes_xywh_gt.tensor, densepose_gt
+ ):
+ if (dp_gt is not None) and (len(dp_gt.x) > 0):
+ i_gt_all.append(dp_gt.i)
+ x_norm_all.append(dp_gt.x)
+ y_norm_all.append(dp_gt.y)
+ u_gt_all.append(dp_gt.u)
+ v_gt_all.append(dp_gt.v)
+ s_gt_all.append(dp_gt.segm.unsqueeze(0))
+ bbox_xywh_gt_all.append(box_xywh_gt.view(-1, 4))
+ bbox_xywh_est_all.append(box_xywh_est.view(-1, 4))
+ i_bbox_k = torch.full_like(dp_gt.i, bbox_with_dp_offset + len(i_with_dp))
+ i_bbox_all.append(i_bbox_k)
+ i_with_dp.append(bbox_global_offset + k)
+ return (
+ i_gt_all,
+ x_norm_all,
+ y_norm_all,
+ u_gt_all,
+ v_gt_all,
+ s_gt_all,
+ bbox_xywh_gt_all,
+ bbox_xywh_est_all,
+ i_bbox_all,
+ i_with_dp,
+ )
+
+
+def _extract_single_tensors_from_matches(proposals_with_targets):
+ i_img = []
+ i_gt_all = []
+ x_norm_all = []
+ y_norm_all = []
+ u_gt_all = []
+ v_gt_all = []
+ s_gt_all = []
+ bbox_xywh_gt_all = []
+ bbox_xywh_est_all = []
+ i_bbox_all = []
+ i_with_dp_all = []
+ n = 0
+ for i, proposals_targets_per_image in enumerate(proposals_with_targets):
+ n_i = proposals_targets_per_image.proposal_boxes.tensor.size(0)
+ if not n_i:
+ continue
+ (
+ i_gt_img,
+ x_norm_img,
+ y_norm_img,
+ u_gt_img,
+ v_gt_img,
+ s_gt_img,
+ bbox_xywh_gt_img,
+ bbox_xywh_est_img,
+ i_bbox_img,
+ i_with_dp_img,
+ ) = _extract_single_tensors_from_matches_one_image( # noqa
+ proposals_targets_per_image, len(i_with_dp_all), n
+ )
+ i_gt_all.extend(i_gt_img)
+ x_norm_all.extend(x_norm_img)
+ y_norm_all.extend(y_norm_img)
+ u_gt_all.extend(u_gt_img)
+ v_gt_all.extend(v_gt_img)
+ s_gt_all.extend(s_gt_img)
+ bbox_xywh_gt_all.extend(bbox_xywh_gt_img)
+ bbox_xywh_est_all.extend(bbox_xywh_est_img)
+ i_bbox_all.extend(i_bbox_img)
+ i_with_dp_all.extend(i_with_dp_img)
+ i_img.extend([i] * len(i_with_dp_img))
+ n += n_i
+ # concatenate all data into a single tensor
+ if (n > 0) and (len(i_with_dp_all) > 0):
+ i_gt = torch.cat(i_gt_all, 0).long()
+ x_norm = torch.cat(x_norm_all, 0)
+ y_norm = torch.cat(y_norm_all, 0)
+ u_gt = torch.cat(u_gt_all, 0)
+ v_gt = torch.cat(v_gt_all, 0)
+ s_gt = torch.cat(s_gt_all, 0)
+ bbox_xywh_gt = torch.cat(bbox_xywh_gt_all, 0)
+ bbox_xywh_est = torch.cat(bbox_xywh_est_all, 0)
+ i_bbox = torch.cat(i_bbox_all, 0).long()
+ else:
+ i_gt = None
+ x_norm = None
+ y_norm = None
+ u_gt = None
+ v_gt = None
+ s_gt = None
+ bbox_xywh_gt = None
+ bbox_xywh_est = None
+ i_bbox = None
+ return (
+ i_img,
+ i_with_dp_all,
+ bbox_xywh_est,
+ bbox_xywh_gt,
+ i_gt,
+ x_norm,
+ y_norm,
+ u_gt,
+ v_gt,
+ s_gt,
+ i_bbox,
+ )
+
+
+@dataclass
+class DataForMaskLoss:
+ """
+ Contains mask GT and estimated data for proposals from multiple images:
+ """
+
+ # tensor of size (K, H, W) containing GT labels
+ masks_gt: Optional[torch.Tensor] = None
+ # tensor of size (K, C, H, W) containing estimated scores
+ masks_est: Optional[torch.Tensor] = None
+
+
+def _extract_data_for_mask_loss_from_matches(
+ proposals_targets: Iterable[Instances], estimated_segm: torch.Tensor
+) -> DataForMaskLoss:
+ """
+ Extract data for mask loss from instances that contain matched GT and
+ estimated bounding boxes.
+ Args:
+ proposals_targets: Iterable[Instances]
+ matched GT and estimated results, each item in the iterable
+ corresponds to data in 1 image
+ estimated_segm: torch.Tensor if size
+ size to which GT masks are resized
+ Return:
+ masks_est: tensor(K, C, H, W) of float - class scores
+ masks_gt: tensor(K, H, W) of int64 - labels
+ """
+ data = DataForMaskLoss()
+ masks_gt = []
+ offset = 0
+ assert estimated_segm.shape[2] == estimated_segm.shape[3], (
+ f"Expected estimated segmentation to have a square shape, "
+ f"but the actual shape is {estimated_segm.shape[2:]}"
+ )
+ mask_size = estimated_segm.shape[2]
+ num_proposals = sum(inst.proposal_boxes.tensor.size(0) for inst in proposals_targets)
+ num_estimated = estimated_segm.shape[0]
+ assert (
+ num_proposals == num_estimated
+ ), "The number of proposals {} must be equal to the number of estimates {}".format(
+ num_proposals, num_estimated
+ )
+
+ for proposals_targets_per_image in proposals_targets:
+ n_i = proposals_targets_per_image.proposal_boxes.tensor.size(0)
+ if not n_i:
+ continue
+ gt_masks_per_image = proposals_targets_per_image.gt_masks.crop_and_resize(
+ proposals_targets_per_image.proposal_boxes.tensor, mask_size
+ ).to(device=estimated_segm.device)
+ masks_gt.append(gt_masks_per_image)
+ offset += n_i
+ if masks_gt:
+ data.masks_est = estimated_segm
+ data.masks_gt = torch.cat(masks_gt, dim=0)
+ return data
+
+
+class IIDIsotropicGaussianUVLoss(nn.Module):
+ """
+ Loss for the case of iid residuals with isotropic covariance:
+ $Sigma_i = sigma_i^2 I$
+ The loss (negative log likelihood) is then:
+ $1/2 sum_{i=1}^n (log(2 pi) + 2 log sigma_i^2 + ||delta_i||^2 / sigma_i^2)$,
+ where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates
+ difference between estimated and ground truth UV values
+ For details, see:
+ N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
+ Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
+ """
+
+ def __init__(self, sigma_lower_bound: float):
+ super(IIDIsotropicGaussianUVLoss, self).__init__()
+ self.sigma_lower_bound = sigma_lower_bound
+ self.log2pi = math.log(2 * math.pi)
+
+ def forward(
+ self,
+ u: torch.Tensor,
+ v: torch.Tensor,
+ sigma_u: torch.Tensor,
+ target_u: torch.Tensor,
+ target_v: torch.Tensor,
+ ):
+ # compute $\sigma_i^2$
+ # use sigma_lower_bound to avoid degenerate solution for variance
+ # (sigma -> 0)
+ sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound
+ # compute \|delta_i\|^2
+ delta_t_delta = (u - target_u) ** 2 + (v - target_v) ** 2
+ # the total loss from the formula above:
+ loss = 0.5 * (self.log2pi + 2 * torch.log(sigma2) + delta_t_delta / sigma2)
+ return loss.sum()
+
+
+class IndepAnisotropicGaussianUVLoss(nn.Module):
+ """
+ Loss for the case of independent residuals with anisotropic covariances:
+ $Sigma_i = sigma_i^2 I + r_i r_i^T$
+ The loss (negative log likelihood) is then:
+ $1/2 sum_{i=1}^n (log(2 pi)
+ + log sigma_i^2 (sigma_i^2 + ||r_i||^2)
+ + ||delta_i||^2 / sigma_i^2
+ - ^2 / (sigma_i^2 * (sigma_i^2 + ||r_i||^2)))$,
+ where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates
+ difference between estimated and ground truth UV values
+ For details, see:
+ N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
+ Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
+ """
+
+ def __init__(self, sigma_lower_bound: float):
+ super(IndepAnisotropicGaussianUVLoss, self).__init__()
+ self.sigma_lower_bound = sigma_lower_bound
+ self.log2pi = math.log(2 * math.pi)
+
+ def forward(
+ self,
+ u: torch.Tensor,
+ v: torch.Tensor,
+ sigma_u: torch.Tensor,
+ kappa_u_est: torch.Tensor,
+ kappa_v_est: torch.Tensor,
+ target_u: torch.Tensor,
+ target_v: torch.Tensor,
+ ):
+ # compute $\sigma_i^2$
+ sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound
+ # compute \|r_i\|^2
+ r_sqnorm2 = kappa_u_est ** 2 + kappa_v_est ** 2
+ delta_u = u - target_u
+ delta_v = v - target_v
+ # compute \|delta_i\|^2
+ delta_sqnorm = delta_u ** 2 + delta_v ** 2
+ delta_u_r_u = delta_u * kappa_u_est
+ delta_v_r_v = delta_v * kappa_v_est
+ # compute the scalar product
+ delta_r = delta_u_r_u + delta_v_r_v
+ # compute squared scalar product ^2
+ delta_r_sqnorm = delta_r ** 2
+ denom2 = sigma2 * (sigma2 + r_sqnorm2)
+ loss = 0.5 * (
+ self.log2pi + torch.log(denom2) + delta_sqnorm / sigma2 - delta_r_sqnorm / denom2
+ )
+ return loss.sum()
+
+
+class DensePoseLosses(object):
+ def __init__(self, cfg):
+ # fmt: off
+ self.heatmap_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE
+ self.w_points = cfg.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS
+ self.w_part = cfg.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS
+ self.w_segm = cfg.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS
+ self.n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
+ # fmt: on
+ self.segm_trained_by_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
+ self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
+ if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
+ self.uv_loss_with_confidences = IIDIsotropicGaussianUVLoss(
+ self.confidence_model_cfg.uv_confidence.epsilon
+ )
+ elif self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.INDEP_ANISO:
+ self.uv_loss_with_confidences = IndepAnisotropicGaussianUVLoss(
+ self.confidence_model_cfg.uv_confidence.epsilon
+ )
+
+ def __call__(self, proposals_with_gt, densepose_outputs, densepose_confidences):
+ if not self.segm_trained_by_masks:
+ return self.produce_densepose_losses(
+ proposals_with_gt, densepose_outputs, densepose_confidences
+ )
+ else:
+ losses = {}
+ losses_densepose = self.produce_densepose_losses(
+ proposals_with_gt, densepose_outputs, densepose_confidences
+ )
+ losses.update(losses_densepose)
+ losses_mask = self.produce_mask_losses(
+ proposals_with_gt, densepose_outputs, densepose_confidences
+ )
+ losses.update(losses_mask)
+ return losses
+
+ def produce_fake_mask_losses(self, densepose_outputs):
+ losses = {}
+ segm_scores, _, _, _ = densepose_outputs
+ losses["loss_densepose_S"] = segm_scores.sum() * 0
+ return losses
+
+ def produce_mask_losses(self, proposals_with_gt, densepose_outputs, densepose_confidences):
+ if not len(proposals_with_gt):
+ return self.produce_fake_mask_losses(densepose_outputs)
+ losses = {}
+ # densepose outputs are computed for all images and all bounding boxes;
+ # i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively,
+ # the outputs will have size(0) == 3+1+2+1 == 7
+ segm_scores, _, _, _ = densepose_outputs
+ with torch.no_grad():
+ mask_loss_data = _extract_data_for_mask_loss_from_matches(
+ proposals_with_gt, segm_scores
+ )
+ if (mask_loss_data.masks_gt is None) or (mask_loss_data.masks_est is None):
+ return self.produce_fake_mask_losses(densepose_outputs)
+ losses["loss_densepose_S"] = (
+ F.cross_entropy(mask_loss_data.masks_est, mask_loss_data.masks_gt.long()) * self.w_segm
+ )
+ return losses
+
+ def produce_fake_densepose_losses(self, densepose_outputs, densepose_confidences):
+ # we need to keep the same computation graph on all the GPUs to
+ # perform reduction properly. Hence even if we have no data on one
+ # of the GPUs, we still need to generate the computation graph.
+ # Add fake (zero) losses in the form Tensor.sum() * 0
+ s, index_uv, u, v = densepose_outputs
+ conf_type = self.confidence_model_cfg.uv_confidence.type
+ (
+ sigma_1,
+ sigma_2,
+ kappa_u,
+ kappa_v,
+ fine_segm_confidence,
+ coarse_segm_confidence,
+ ) = densepose_confidences
+ losses = {}
+ losses["loss_densepose_I"] = index_uv.sum() * 0
+ if not self.segm_trained_by_masks:
+ losses["loss_densepose_S"] = s.sum() * 0
+ if self.confidence_model_cfg.uv_confidence.enabled:
+ losses["loss_densepose_UV"] = (u.sum() + v.sum()) * 0
+ if conf_type == DensePoseUVConfidenceType.IID_ISO:
+ losses["loss_densepose_UV"] += sigma_2.sum() * 0
+ elif conf_type == DensePoseUVConfidenceType.INDEP_ANISO:
+ losses["loss_densepose_UV"] += (sigma_2.sum() + kappa_u.sum() + kappa_v.sum()) * 0
+ else:
+ losses["loss_densepose_U"] = u.sum() * 0
+ losses["loss_densepose_V"] = v.sum() * 0
+ return losses
+
+ def produce_densepose_losses(self, proposals_with_gt, densepose_outputs, densepose_confidences):
+ losses = {}
+ # densepose outputs are computed for all images and all bounding boxes;
+ # i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively,
+ # the outputs will have size(0) == 3+1+2+1 == 7
+ s, index_uv, u, v = densepose_outputs
+ assert u.size(2) == v.size(2)
+ assert u.size(3) == v.size(3)
+ assert u.size(2) == index_uv.size(2)
+ assert u.size(3) == index_uv.size(3)
+ densepose_outputs_size = u.size()
+
+ if not len(proposals_with_gt):
+ return self.produce_fake_densepose_losses(densepose_outputs, densepose_confidences)
+ (
+ sigma_1,
+ sigma_2,
+ kappa_u,
+ kappa_v,
+ fine_segm_confidence,
+ coarse_segm_confidence,
+ ) = densepose_confidences
+ conf_type = self.confidence_model_cfg.uv_confidence.type
+
+ tensors_helper = SingleTensorsHelper(proposals_with_gt)
+ n_batch = len(tensors_helper.i_with_dp)
+
+ # NOTE: we need to keep the same computation graph on all the GPUs to
+ # perform reduction properly. Hence even if we have no data on one
+ # of the GPUs, we still need to generate the computation graph.
+ # Add fake (zero) loss in the form Tensor.sum() * 0
+ if not n_batch:
+ return self.produce_fake_densepose_losses(densepose_outputs, densepose_confidences)
+
+ interpolator = BilinearInterpolationHelper.from_matches(
+ tensors_helper, densepose_outputs_size
+ )
+
+ j_valid_fg = interpolator.j_valid * (tensors_helper.index_gt_all > 0)
+
+ u_gt = tensors_helper.u_gt_all[j_valid_fg]
+ u_est_all = interpolator.extract_at_points(u[tensors_helper.i_with_dp])
+ u_est = u_est_all[j_valid_fg]
+
+ v_gt = tensors_helper.v_gt_all[j_valid_fg]
+ v_est_all = interpolator.extract_at_points(v[tensors_helper.i_with_dp])
+ v_est = v_est_all[j_valid_fg]
+
+ index_uv_gt = tensors_helper.index_gt_all[interpolator.j_valid]
+ index_uv_est_all = interpolator.extract_at_points(
+ index_uv[tensors_helper.i_with_dp],
+ slice_index_uv=slice(None),
+ w_ylo_xlo=interpolator.w_ylo_xlo[:, None],
+ w_ylo_xhi=interpolator.w_ylo_xhi[:, None],
+ w_yhi_xlo=interpolator.w_yhi_xlo[:, None],
+ w_yhi_xhi=interpolator.w_yhi_xhi[:, None],
+ )
+ index_uv_est = index_uv_est_all[interpolator.j_valid, :]
+
+ if self.confidence_model_cfg.uv_confidence.enabled:
+ sigma_2_est_all = interpolator.extract_at_points(sigma_2[tensors_helper.i_with_dp])
+ sigma_2_est = sigma_2_est_all[j_valid_fg]
+ if conf_type in [DensePoseUVConfidenceType.INDEP_ANISO]:
+ kappa_u_est_all = interpolator.extract_at_points(kappa_u[tensors_helper.i_with_dp])
+ kappa_u_est = kappa_u_est_all[j_valid_fg]
+ kappa_v_est_all = interpolator.extract_at_points(kappa_v[tensors_helper.i_with_dp])
+ kappa_v_est = kappa_v_est_all[j_valid_fg]
+
+ # Resample everything to the estimated data size, no need to resample
+ # S_est then:
+ if not self.segm_trained_by_masks:
+ s_est = s[tensors_helper.i_with_dp]
+ with torch.no_grad():
+ s_gt = _resample_data(
+ tensors_helper.s_gt.unsqueeze(1),
+ tensors_helper.bbox_xywh_gt,
+ tensors_helper.bbox_xywh_est,
+ self.heatmap_size,
+ self.heatmap_size,
+ mode="nearest",
+ padding_mode="zeros",
+ ).squeeze(1)
+
+ # add point-based losses:
+ if self.confidence_model_cfg.uv_confidence.enabled:
+ if conf_type == DensePoseUVConfidenceType.IID_ISO:
+ uv_loss = (
+ self.uv_loss_with_confidences(u_est, v_est, sigma_2_est, u_gt, v_gt)
+ * self.w_points
+ )
+ losses["loss_densepose_UV"] = uv_loss
+ elif conf_type == DensePoseUVConfidenceType.INDEP_ANISO:
+ uv_loss = (
+ self.uv_loss_with_confidences(
+ u_est, v_est, sigma_2_est, kappa_u_est, kappa_v_est, u_gt, v_gt
+ )
+ * self.w_points
+ )
+ losses["loss_densepose_UV"] = uv_loss
+ else:
+ raise ValueError(f"Unknown confidence model type: {conf_type}")
+ else:
+ u_loss = F.smooth_l1_loss(u_est, u_gt, reduction="sum") * self.w_points
+ losses["loss_densepose_U"] = u_loss
+ v_loss = F.smooth_l1_loss(v_est, v_gt, reduction="sum") * self.w_points
+ losses["loss_densepose_V"] = v_loss
+ index_uv_loss = F.cross_entropy(index_uv_est, index_uv_gt.long()) * self.w_part
+ losses["loss_densepose_I"] = index_uv_loss
+
+ if not self.segm_trained_by_masks:
+ if self.n_segm_chan == 2:
+ s_gt = s_gt > 0
+ s_loss = F.cross_entropy(s_est, s_gt.long()) * self.w_segm
+ losses["loss_densepose_S"] = s_loss
+ return losses
diff --git a/projects/DensePose/densepose/modeling/predictors/__init__.py b/projects/DensePose/densepose/modeling/predictors/__init__.py
new file mode 100644
index 0000000..18e6ec7
--- /dev/null
+++ b/projects/DensePose/densepose/modeling/predictors/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from .chart import DensePoseChartPredictor
+from .chart_confidence import DensePoseChartConfidencePredictorMixin
+from .chart_with_confidence import DensePoseChartWithConfidencePredictor
diff --git a/projects/DensePose/densepose/modeling/predictors/chart.py b/projects/DensePose/densepose/modeling/predictors/chart.py
new file mode 100644
index 0000000..37667e9
--- /dev/null
+++ b/projects/DensePose/densepose/modeling/predictors/chart.py
@@ -0,0 +1,102 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import torch
+from torch import nn
+
+from detectron2.config import CfgNode
+from detectron2.layers import ConvTranspose2d, interpolate
+
+from ..utils import initialize_module_params
+
+
+class DensePoseChartPredictor(nn.Module):
+ """
+ Predictor (last layers of a DensePose model) that takes DensePose head outputs as an input
+ and produces 4 tensors which represent DensePose results for predefined body parts
+ (patches / charts):
+ - coarse segmentation [N, K, H, W]
+ - fine segmentation [N, C, H, W]
+ - U coordinates [N, C, H, W]
+ - V coordinates [N, C, H, W]
+ where
+ - N is the number of instances
+ - K is the number of coarse segmentation channels (
+ 2 = foreground / background,
+ 15 = one of 14 body parts / background)
+ - C is the number of fine segmentation channels (
+ 24 fine body parts / background)
+ - H and W are height and width of predictions
+ """
+
+ def __init__(self, cfg: CfgNode, input_channels: int):
+ """
+ Initialize predictor using configuration options
+
+ Args:
+ cfg (CfgNode): configuration options
+ input_channels (int): input tensor size along the channel dimension
+ """
+ super().__init__()
+ dim_in = input_channels
+ n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
+ dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
+ self.ann_index_lowres = ConvTranspose2d(
+ dim_in, n_segm_chan, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+ self.index_uv_lowres = ConvTranspose2d(
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+ self.u_lowres = ConvTranspose2d(
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+ self.v_lowres = ConvTranspose2d(
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+ self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE
+ initialize_module_params(self)
+
+ def interp2d(self, tensor_nchw: torch.Tensor):
+ """
+ Bilinear interpolation method to be used for upscaling
+
+ Args:
+ tensor_nchw (tensor): tensor of shape (N, C, H, W)
+ Return:
+ tensor of shape (N, C, Hout, Wout), where Hout and Wout are computed
+ by applying the scale factor to H and W
+ """
+ return interpolate(
+ tensor_nchw, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
+ )
+
+ def forward(self, head_outputs: torch.Tensor):
+ """
+ Perform forward step on DensePose head outputs
+
+ Args:
+ head_outputs (tensor): DensePose head outputs, tensor of shape [N, D, H, W]
+ Return:
+ - a tuple of 4 tensors containing DensePose predictions for charts:
+ * coarse segmentation estimate, a tensor of shape [N, K, Hout, Wout]
+ * fine segmentation estimate, a tensor of shape [N, C, Hout, Wout]
+ * U coordinates, a tensor of shape [N, C, Hout, Wout]
+ * V coordinates, a tensor of shape [N, C, Hout, Wout]
+ - a tuple of 4 tensors containing DensePose predictions for charts at reduced resolution:
+ * coarse segmentation estimate, a tensor of shape [N, K, Hout / 2, Wout / 2]
+ * fine segmentation estimate, a tensor of shape [N, C, Hout / 2, Wout / 2]
+ * U coordinates, a tensor of shape [N, C, Hout / 2, Wout / 2]
+ * V coordinates, a tensor of shape [N, C, Hout / 2, Wout / 2]
+ """
+ coarse_segm_lowres = self.ann_index_lowres(head_outputs)
+ fine_segm_lowres = self.index_uv_lowres(head_outputs)
+ u_lowres = self.u_lowres(head_outputs)
+ v_lowres = self.v_lowres(head_outputs)
+
+ coarse_segm = self.interp2d(coarse_segm_lowres)
+ fine_segm = self.interp2d(fine_segm_lowres)
+ u = self.interp2d(u_lowres)
+ v = self.interp2d(v_lowres)
+ siuv = (coarse_segm, fine_segm, u, v)
+ siuv_lowres = (coarse_segm_lowres, fine_segm_lowres, u_lowres, v_lowres)
+ return siuv, siuv_lowres
diff --git a/projects/DensePose/densepose/modeling/predictors/chart_confidence.py b/projects/DensePose/densepose/modeling/predictors/chart_confidence.py
new file mode 100644
index 0000000..8770736
--- /dev/null
+++ b/projects/DensePose/densepose/modeling/predictors/chart_confidence.py
@@ -0,0 +1,176 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import torch
+from torch.nn import functional as F
+
+from detectron2.config import CfgNode
+from detectron2.layers import ConvTranspose2d
+
+from ..confidence import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
+from ..utils import initialize_module_params
+
+
+class DensePoseChartConfidencePredictorMixin:
+ """
+ Predictor contains the last layers of a DensePose model that take DensePose head
+ outputs as an input and produce model outputs. Confidence predictor mixin is used
+ to generate confidences for segmentation and UV tensors estimated by some
+ base predictor. Several assumptions need to hold for the base predictor:
+ 1) the `forward` method must return SIUV tuple as the first result (
+ S = coarse segmentation, I = fine segmentation, U and V are intrinsic
+ chart coordinates)
+ 2) `interp2d` method must be defined to perform bilinear interpolation;
+ the same method is typically used for SIUV and confidences
+ Confidence predictor mixin provides confidence estimates, as described in:
+ N. Neverova et al., Correlated Uncertainty for Learning Dense Correspondences
+ from Noisy Labels, NeurIPS 2019
+ A. Sanakoyeu et al., Transferring Dense Pose to Proximal Animal Classes, CVPR 2020
+ """
+
+ def __init__(self, cfg: CfgNode, input_channels: int):
+ """
+ Initialize confidence predictor using configuration options.
+
+ Args:
+ cfg (CfgNode): configuration options
+ input_channels (int): number of input channels
+ """
+ # we rely on base predictor to call nn.Module.__init__
+ super().__init__(cfg, input_channels)
+ self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
+ self._initialize_confidence_estimation_layers(cfg, input_channels)
+ initialize_module_params(self)
+
+ def _initialize_confidence_estimation_layers(self, cfg: CfgNode, dim_in: int):
+ """
+ Initialize confidence estimation layers based on configuration options
+
+ Args:
+ cfg (CfgNode): configuration options
+ dim_in (int): number of input channels
+ """
+ dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
+ if self.confidence_model_cfg.uv_confidence.enabled:
+ if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
+ self.sigma_2_lowres = ConvTranspose2d(
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+ elif (
+ self.confidence_model_cfg.uv_confidence.type
+ == DensePoseUVConfidenceType.INDEP_ANISO
+ ):
+ self.sigma_2_lowres = ConvTranspose2d(
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+ self.kappa_u_lowres = ConvTranspose2d(
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+ self.kappa_v_lowres = ConvTranspose2d(
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+ else:
+ raise ValueError(
+ f"Unknown confidence model type: "
+ f"{self.confidence_model_cfg.confidence_model_type}"
+ )
+ if self.confidence_model_cfg.segm_confidence.enabled:
+ self.fine_segm_confidence_lowres = ConvTranspose2d(
+ dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+ self.coarse_segm_confidence_lowres = ConvTranspose2d(
+ dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+
+ def forward(self, head_outputs: torch.Tensor):
+ """
+ Perform forward operation on head outputs used as inputs for the predictor.
+ Calls forward method from the base predictor and uses its outputs to compute
+ confidences.
+
+ Args:
+ head_outputs (Tensor): head outputs used as predictor inputs
+ Return:
+ A tuple containing the following entries:
+ - SIUV tuple with possibly modified segmentation tensors
+ - various other outputs from the base predictor
+ - 6 tensors with estimated confidence model parameters at full resolution
+ (sigma_1, sigma_2, kappa_u, kappa_v, fine_segm_confidence, coarse_segm_confidence)
+ - 6 tensors with estimated confidence model parameters at half resolution
+ (sigma_1, sigma_2, kappa_u, kappa_v, fine_segm_confidence, coarse_segm_confidence)
+ """
+ # assuming base class returns SIUV estimates in its first result
+ base_predictor_outputs = super().forward(head_outputs)
+ siuv = (
+ base_predictor_outputs[0]
+ if isinstance(base_predictor_outputs, tuple)
+ else base_predictor_outputs
+ )
+ coarse_segm, fine_segm, u, v = siuv
+
+ sigma_1, sigma_2, kappa_u, kappa_v = None, None, None, None
+ sigma_1_lowres, sigma_2_lowres, kappa_u_lowres, kappa_v_lowres = None, None, None, None
+ fine_segm_confidence_lowres, fine_segm_confidence = None, None
+ coarse_segm_confidence_lowres, coarse_segm_confidence = None, None
+ if self.confidence_model_cfg.uv_confidence.enabled:
+ if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
+ sigma_2_lowres = self.sigma_2_lowres(head_outputs)
+ # assuming base class defines interp2d method for bilinear interpolation
+ sigma_2 = self.interp2d(sigma_2_lowres)
+ elif (
+ self.confidence_model_cfg.uv_confidence.type
+ == DensePoseUVConfidenceType.INDEP_ANISO
+ ):
+ sigma_2_lowres = self.sigma_2_lowres(head_outputs)
+ kappa_u_lowres = self.kappa_u_lowres(head_outputs)
+ kappa_v_lowres = self.kappa_v_lowres(head_outputs)
+ # assuming base class defines interp2d method for bilinear interpolation
+ sigma_2 = self.interp2d(sigma_2_lowres)
+ kappa_u = self.interp2d(kappa_u_lowres)
+ kappa_v = self.interp2d(kappa_v_lowres)
+ else:
+ raise ValueError(
+ f"Unknown confidence model type: "
+ f"{self.confidence_model_cfg.confidence_model_type}"
+ )
+ if self.confidence_model_cfg.segm_confidence.enabled:
+ fine_segm_confidence_lowres = self.fine_segm_confidence_lowres(head_outputs)
+ # assuming base class defines interp2d method for bilinear interpolation
+ fine_segm_confidence = self.interp2d(fine_segm_confidence_lowres)
+ fine_segm_confidence = (
+ F.softplus(fine_segm_confidence) + self.confidence_model_cfg.segm_confidence.epsilon
+ )
+ fine_segm = fine_segm * torch.repeat_interleave(
+ fine_segm_confidence, fine_segm.shape[1], dim=1
+ )
+ coarse_segm_confidence_lowres = self.coarse_segm_confidence_lowres(head_outputs)
+ # assuming base class defines interp2d method for bilinear interpolation
+ coarse_segm_confidence = self.interp2d(coarse_segm_confidence_lowres)
+ coarse_segm_confidence = (
+ F.softplus(coarse_segm_confidence)
+ + self.confidence_model_cfg.segm_confidence.epsilon
+ )
+ coarse_segm = coarse_segm * torch.repeat_interleave(
+ coarse_segm_confidence, coarse_segm.shape[1], dim=1
+ )
+ results = []
+ # append SIUV with possibly modified segmentation tensors
+ results.append((coarse_segm, fine_segm, u, v))
+ # append the rest of base predictor outputs
+ if isinstance(base_predictor_outputs, tuple):
+ results.extend(base_predictor_outputs[1:])
+ # append hi-res confidence estimates
+ results.append(
+ (sigma_1, sigma_2, kappa_u, kappa_v, fine_segm_confidence, coarse_segm_confidence)
+ )
+ # append lo-res confidence estimates
+ results.append(
+ (
+ sigma_1_lowres,
+ sigma_2_lowres,
+ kappa_u_lowres,
+ kappa_v_lowres,
+ fine_segm_confidence_lowres,
+ coarse_segm_confidence_lowres,
+ )
+ )
+ return tuple(results)
diff --git a/projects/DensePose/densepose/modeling/predictors/chart_with_confidence.py b/projects/DensePose/densepose/modeling/predictors/chart_with_confidence.py
new file mode 100644
index 0000000..96875b8
--- /dev/null
+++ b/projects/DensePose/densepose/modeling/predictors/chart_with_confidence.py
@@ -0,0 +1,13 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from . import DensePoseChartConfidencePredictorMixin, DensePoseChartPredictor
+
+
+class DensePoseChartWithConfidencePredictor(
+ DensePoseChartConfidencePredictorMixin, DensePoseChartPredictor
+):
+ """
+ Predictor that combines chart and chart confidence estimation
+ """
+
+ pass
diff --git a/projects/DensePose/densepose/modeling/roi_heads/deeplab.py b/projects/DensePose/densepose/modeling/roi_heads/deeplab.py
new file mode 100644
index 0000000..106dfcf
--- /dev/null
+++ b/projects/DensePose/densepose/modeling/roi_heads/deeplab.py
@@ -0,0 +1,263 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import fvcore.nn.weight_init as weight_init
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.config import CfgNode
+from detectron2.layers import Conv2d
+
+from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
+
+
+@ROI_DENSEPOSE_HEAD_REGISTRY.register()
+class DensePoseDeepLabHead(nn.Module):
+ """
+ DensePose head using DeepLabV3 model from
+ "Rethinking Atrous Convolution for Semantic Image Segmentation"
+ .
+ """
+
+ def __init__(self, cfg: CfgNode, input_channels: int):
+ super(DensePoseDeepLabHead, self).__init__()
+ # fmt: off
+ hidden_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL
+ norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM
+ self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS
+ self.use_nonlocal = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON
+ # fmt: on
+ pad_size = kernel_size // 2
+ n_channels = input_channels
+
+ self.ASPP = ASPP(input_channels, [6, 12, 56], n_channels) # 6, 12, 56
+ self.add_module("ASPP", self.ASPP)
+
+ if self.use_nonlocal:
+ self.NLBlock = NONLocalBlock2D(input_channels, bn_layer=True)
+ self.add_module("NLBlock", self.NLBlock)
+ # weight_init.c2_msra_fill(self.ASPP)
+
+ for i in range(self.n_stacked_convs):
+ norm_module = nn.GroupNorm(32, hidden_dim) if norm == "GN" else None
+ layer = Conv2d(
+ n_channels,
+ hidden_dim,
+ kernel_size,
+ stride=1,
+ padding=pad_size,
+ bias=not norm,
+ norm=norm_module,
+ )
+ weight_init.c2_msra_fill(layer)
+ n_channels = hidden_dim
+ layer_name = self._get_layer_name(i)
+ self.add_module(layer_name, layer)
+ self.n_out_channels = hidden_dim
+ # initialize_module_params(self)
+
+ def forward(self, features):
+ x0 = features
+ x = self.ASPP(x0)
+ if self.use_nonlocal:
+ x = self.NLBlock(x)
+ output = x
+ for i in range(self.n_stacked_convs):
+ layer_name = self._get_layer_name(i)
+ x = getattr(self, layer_name)(x)
+ x = F.relu(x)
+ output = x
+ return output
+
+ def _get_layer_name(self, i: int):
+ layer_name = "body_conv_fcn{}".format(i + 1)
+ return layer_name
+
+
+# Copied from
+# https://github.com/pytorch/vision/blob/master/torchvision/models/segmentation/deeplabv3.py
+# See https://arxiv.org/pdf/1706.05587.pdf for details
+class ASPPConv(nn.Sequential):
+ def __init__(self, in_channels, out_channels, dilation):
+ modules = [
+ nn.Conv2d(
+ in_channels, out_channels, 3, padding=dilation, dilation=dilation, bias=False
+ ),
+ nn.GroupNorm(32, out_channels),
+ nn.ReLU(),
+ ]
+ super(ASPPConv, self).__init__(*modules)
+
+
+class ASPPPooling(nn.Sequential):
+ def __init__(self, in_channels, out_channels):
+ super(ASPPPooling, self).__init__(
+ nn.AdaptiveAvgPool2d(1),
+ nn.Conv2d(in_channels, out_channels, 1, bias=False),
+ nn.GroupNorm(32, out_channels),
+ nn.ReLU(),
+ )
+
+ def forward(self, x):
+ size = x.shape[-2:]
+ x = super(ASPPPooling, self).forward(x)
+ return F.interpolate(x, size=size, mode="bilinear", align_corners=False)
+
+
+class ASPP(nn.Module):
+ def __init__(self, in_channels, atrous_rates, out_channels):
+ super(ASPP, self).__init__()
+ modules = []
+ modules.append(
+ nn.Sequential(
+ nn.Conv2d(in_channels, out_channels, 1, bias=False),
+ nn.GroupNorm(32, out_channels),
+ nn.ReLU(),
+ )
+ )
+
+ rate1, rate2, rate3 = tuple(atrous_rates)
+ modules.append(ASPPConv(in_channels, out_channels, rate1))
+ modules.append(ASPPConv(in_channels, out_channels, rate2))
+ modules.append(ASPPConv(in_channels, out_channels, rate3))
+ modules.append(ASPPPooling(in_channels, out_channels))
+
+ self.convs = nn.ModuleList(modules)
+
+ self.project = nn.Sequential(
+ nn.Conv2d(5 * out_channels, out_channels, 1, bias=False),
+ # nn.BatchNorm2d(out_channels),
+ nn.ReLU()
+ # nn.Dropout(0.5)
+ )
+
+ def forward(self, x):
+ res = []
+ for conv in self.convs:
+ res.append(conv(x))
+ res = torch.cat(res, dim=1)
+ return self.project(res)
+
+
+# copied from
+# https://github.com/AlexHex7/Non-local_pytorch/blob/master/lib/non_local_embedded_gaussian.py
+# See https://arxiv.org/abs/1711.07971 for details
+class _NonLocalBlockND(nn.Module):
+ def __init__(
+ self, in_channels, inter_channels=None, dimension=3, sub_sample=True, bn_layer=True
+ ):
+ super(_NonLocalBlockND, self).__init__()
+
+ assert dimension in [1, 2, 3]
+
+ self.dimension = dimension
+ self.sub_sample = sub_sample
+
+ self.in_channels = in_channels
+ self.inter_channels = inter_channels
+
+ if self.inter_channels is None:
+ self.inter_channels = in_channels // 2
+ if self.inter_channels == 0:
+ self.inter_channels = 1
+
+ if dimension == 3:
+ conv_nd = nn.Conv3d
+ max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2))
+ bn = nn.GroupNorm # (32, hidden_dim) #nn.BatchNorm3d
+ elif dimension == 2:
+ conv_nd = nn.Conv2d
+ max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2))
+ bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm2d
+ else:
+ conv_nd = nn.Conv1d
+ max_pool_layer = nn.MaxPool1d(kernel_size=2)
+ bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm1d
+
+ self.g = conv_nd(
+ in_channels=self.in_channels,
+ out_channels=self.inter_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ )
+
+ if bn_layer:
+ self.W = nn.Sequential(
+ conv_nd(
+ in_channels=self.inter_channels,
+ out_channels=self.in_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ ),
+ bn(32, self.in_channels),
+ )
+ nn.init.constant_(self.W[1].weight, 0)
+ nn.init.constant_(self.W[1].bias, 0)
+ else:
+ self.W = conv_nd(
+ in_channels=self.inter_channels,
+ out_channels=self.in_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ )
+ nn.init.constant_(self.W.weight, 0)
+ nn.init.constant_(self.W.bias, 0)
+
+ self.theta = conv_nd(
+ in_channels=self.in_channels,
+ out_channels=self.inter_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ )
+ self.phi = conv_nd(
+ in_channels=self.in_channels,
+ out_channels=self.inter_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ )
+
+ if sub_sample:
+ self.g = nn.Sequential(self.g, max_pool_layer)
+ self.phi = nn.Sequential(self.phi, max_pool_layer)
+
+ def forward(self, x):
+ """
+ :param x: (b, c, t, h, w)
+ :return:
+ """
+
+ batch_size = x.size(0)
+
+ g_x = self.g(x).view(batch_size, self.inter_channels, -1)
+ g_x = g_x.permute(0, 2, 1)
+
+ theta_x = self.theta(x).view(batch_size, self.inter_channels, -1)
+ theta_x = theta_x.permute(0, 2, 1)
+ phi_x = self.phi(x).view(batch_size, self.inter_channels, -1)
+ f = torch.matmul(theta_x, phi_x)
+ f_div_C = F.softmax(f, dim=-1)
+
+ y = torch.matmul(f_div_C, g_x)
+ y = y.permute(0, 2, 1).contiguous()
+ y = y.view(batch_size, self.inter_channels, *x.size()[2:])
+ W_y = self.W(y)
+ z = W_y + x
+
+ return z
+
+
+class NONLocalBlock2D(_NonLocalBlockND):
+ def __init__(self, in_channels, inter_channels=None, sub_sample=True, bn_layer=True):
+ super(NONLocalBlock2D, self).__init__(
+ in_channels,
+ inter_channels=inter_channels,
+ dimension=2,
+ sub_sample=sub_sample,
+ bn_layer=bn_layer,
+ )
diff --git a/projects/DensePose/densepose/modeling/roi_heads/registry.py b/projects/DensePose/densepose/modeling/roi_heads/registry.py
new file mode 100644
index 0000000..29e58cf
--- /dev/null
+++ b/projects/DensePose/densepose/modeling/roi_heads/registry.py
@@ -0,0 +1,5 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from detectron2.utils.registry import Registry
+
+ROI_DENSEPOSE_HEAD_REGISTRY = Registry("ROI_DENSEPOSE_HEAD")
diff --git a/projects/DensePose/densepose/modeling/roi_heads/roi_head.py b/projects/DensePose/densepose/modeling/roi_heads/roi_head.py
new file mode 100644
index 0000000..68a0b3a
--- /dev/null
+++ b/projects/DensePose/densepose/modeling/roi_heads/roi_head.py
@@ -0,0 +1,224 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import numpy as np
+from typing import Dict, List, Optional
+import fvcore.nn.weight_init as weight_init
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+
+from detectron2.layers import Conv2d, ShapeSpec, get_norm
+from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads
+from detectron2.modeling.poolers import ROIPooler
+from detectron2.modeling.roi_heads import select_foreground_proposals
+from detectron2.structures import ImageList, Instances
+
+from .. import (
+ build_densepose_data_filter,
+ build_densepose_head,
+ build_densepose_losses,
+ build_densepose_predictor,
+ densepose_inference,
+)
+
+
+class Decoder(nn.Module):
+ """
+ A semantic segmentation head described in detail in the Panoptic Feature Pyramid Networks paper
+ (https://arxiv.org/abs/1901.02446). It takes FPN features as input and merges information from
+ all levels of the FPN into single output.
+ """
+
+ def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], in_features):
+ super(Decoder, self).__init__()
+
+ # fmt: off
+ self.in_features = in_features
+ feature_strides = {k: v.stride for k, v in input_shape.items()}
+ feature_channels = {k: v.channels for k, v in input_shape.items()}
+ num_classes = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES
+ conv_dims = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS
+ self.common_stride = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE
+ norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM
+ # fmt: on
+
+ self.scale_heads = []
+ for in_feature in self.in_features:
+ head_ops = []
+ head_length = max(
+ 1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride))
+ )
+ for k in range(head_length):
+ conv = Conv2d(
+ feature_channels[in_feature] if k == 0 else conv_dims,
+ conv_dims,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ bias=not norm,
+ norm=get_norm(norm, conv_dims),
+ activation=F.relu,
+ )
+ weight_init.c2_msra_fill(conv)
+ head_ops.append(conv)
+ if feature_strides[in_feature] != self.common_stride:
+ head_ops.append(
+ nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
+ )
+ self.scale_heads.append(nn.Sequential(*head_ops))
+ self.add_module(in_feature, self.scale_heads[-1])
+ self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
+ weight_init.c2_msra_fill(self.predictor)
+
+ def forward(self, features: List[torch.Tensor]):
+ for i, _ in enumerate(self.in_features):
+ if i == 0:
+ x = self.scale_heads[i](features[i])
+ else:
+ x = x + self.scale_heads[i](features[i])
+ x = self.predictor(x)
+ return x
+
+
+@ROI_HEADS_REGISTRY.register()
+class DensePoseROIHeads(StandardROIHeads):
+ """
+ A Standard ROIHeads which contains an addition of DensePose head.
+ """
+
+ def __init__(self, cfg, input_shape):
+ super().__init__(cfg, input_shape)
+ self._init_densepose_head(cfg, input_shape)
+
+ def _init_densepose_head(self, cfg, input_shape):
+ # fmt: off
+ self.densepose_on = cfg.MODEL.DENSEPOSE_ON
+ if not self.densepose_on:
+ return
+ self.densepose_data_filter = build_densepose_data_filter(cfg)
+ dp_pooler_resolution = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION
+ dp_pooler_sampling_ratio = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO
+ dp_pooler_type = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE
+ self.use_decoder = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON
+ # fmt: on
+ if self.use_decoder:
+ dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride,)
+ else:
+ dp_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features)
+ in_channels = [input_shape[f].channels for f in self.in_features][0]
+
+ if self.use_decoder:
+ self.decoder = Decoder(cfg, input_shape, self.in_features)
+
+ self.densepose_pooler = ROIPooler(
+ output_size=dp_pooler_resolution,
+ scales=dp_pooler_scales,
+ sampling_ratio=dp_pooler_sampling_ratio,
+ pooler_type=dp_pooler_type,
+ )
+ self.densepose_head = build_densepose_head(cfg, in_channels)
+ self.densepose_predictor = build_densepose_predictor(
+ cfg, self.densepose_head.n_out_channels
+ )
+ self.densepose_losses = build_densepose_losses(cfg)
+
+ def _forward_densepose(self, features: Dict[str, torch.Tensor], instances: List[Instances]):
+ """
+ Forward logic of the densepose prediction branch.
+
+ Args:
+ features (dict[str, Tensor]): input data as a mapping from feature
+ map name to tensor. Axis 0 represents the number of images `N` in
+ the input data; axes 1-3 are channels, height, and width, which may
+ vary between feature maps (e.g., if a feature pyramid is used).
+ instances (list[Instances]): length `N` list of `Instances`. The i-th
+ `Instances` contains instances for the i-th input image,
+ In training, they can be the proposals.
+ In inference, they can be the predicted boxes.
+
+ Returns:
+ In training, a dict of losses.
+ In inference, update `instances` with new fields "densepose" and return it.
+ """
+ if not self.densepose_on:
+ return {} if self.training else instances
+
+ features = [features[f] for f in self.in_features]
+ if self.training:
+ proposals, _ = select_foreground_proposals(instances, self.num_classes)
+ features, proposals = self.densepose_data_filter(features, proposals)
+ if len(proposals) > 0:
+ proposal_boxes = [x.proposal_boxes for x in proposals]
+
+ if self.use_decoder:
+ features = [self.decoder(features)]
+
+ features_dp = self.densepose_pooler(features, proposal_boxes)
+ densepose_head_outputs = self.densepose_head(features_dp)
+ densepose_outputs, _, confidences, _ = self.densepose_predictor(
+ densepose_head_outputs
+ )
+ densepose_loss_dict = self.densepose_losses(
+ proposals, densepose_outputs, confidences
+ )
+ return densepose_loss_dict
+ else:
+ pred_boxes = [x.pred_boxes for x in instances]
+
+ if self.use_decoder:
+ features = [self.decoder(features)]
+
+ features_dp = self.densepose_pooler(features, pred_boxes)
+ if len(features_dp) > 0:
+ densepose_head_outputs = self.densepose_head(features_dp)
+ densepose_outputs, _, confidences, _ = self.densepose_predictor(
+ densepose_head_outputs
+ )
+ else:
+ # If no detection occurred instances
+ # set densepose_outputs to empty tensors
+ empty_tensor = torch.zeros(size=(0, 0, 0, 0), device=features_dp.device)
+ densepose_outputs = tuple([empty_tensor] * 4)
+ confidences = tuple([empty_tensor] * 6)
+
+ densepose_inference(densepose_outputs, confidences, instances)
+ return instances
+
+ def forward(
+ self,
+ images: ImageList,
+ features: Dict[str, torch.Tensor],
+ proposals: List[Instances],
+ targets: Optional[List[Instances]] = None,
+ ):
+ instances, losses = super().forward(images, features, proposals, targets)
+ del targets, images
+
+ if self.training:
+ losses.update(self._forward_densepose(features, instances))
+ return instances, losses
+
+ def forward_with_given_boxes(
+ self, features: Dict[str, torch.Tensor], instances: List[Instances]
+ ):
+ """
+ Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
+
+ This is useful for downstream tasks where a box is known, but need to obtain
+ other attributes (outputs of other heads).
+ Test-time augmentation also uses this.
+
+ Args:
+ features: same as in `forward()`
+ instances (list[Instances]): instances to predict other outputs. Expect the keys
+ "pred_boxes" and "pred_classes" to exist.
+
+ Returns:
+ instances (list[Instances]):
+ the same `Instances` objects, with extra
+ fields such as `pred_masks` or `pred_keypoints`.
+ """
+
+ instances = super().forward_with_given_boxes(features, instances)
+ instances = self._forward_densepose(features, instances)
+ return instances
diff --git a/projects/DensePose/densepose/modeling/roi_heads/v1convx.py b/projects/DensePose/densepose/modeling/roi_heads/v1convx.py
new file mode 100644
index 0000000..ef02b0e
--- /dev/null
+++ b/projects/DensePose/densepose/modeling/roi_heads/v1convx.py
@@ -0,0 +1,64 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.config import CfgNode
+from detectron2.layers import Conv2d
+
+from ..utils import initialize_module_params
+from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
+
+
+@ROI_DENSEPOSE_HEAD_REGISTRY.register()
+class DensePoseV1ConvXHead(nn.Module):
+ """
+ Fully convolutional DensePose head.
+ """
+
+ def __init__(self, cfg: CfgNode, input_channels: int):
+ """
+ Initialize DensePose fully convolutional head
+
+ Args:
+ cfg (CfgNode): configuration options
+ input_channels (int): number of input channels
+ """
+ super(DensePoseV1ConvXHead, self).__init__()
+ # fmt: off
+ hidden_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL
+ self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS
+ # fmt: on
+ pad_size = kernel_size // 2
+ n_channels = input_channels
+ for i in range(self.n_stacked_convs):
+ layer = Conv2d(n_channels, hidden_dim, kernel_size, stride=1, padding=pad_size)
+ layer_name = self._get_layer_name(i)
+ self.add_module(layer_name, layer)
+ n_channels = hidden_dim
+ self.n_out_channels = n_channels
+ initialize_module_params(self)
+
+ def forward(self, features: torch.Tensor):
+ """
+ Apply DensePose fully convolutional head to the input features
+
+ Args:
+ features (tensor): input features
+ Result:
+ A tensor of DensePose head outputs
+ """
+ x = features
+ output = x
+ for i in range(self.n_stacked_convs):
+ layer_name = self._get_layer_name(i)
+ x = getattr(self, layer_name)(x)
+ x = F.relu(x)
+ output = x
+ return output
+
+ def _get_layer_name(self, i: int):
+ layer_name = "body_conv_fcn{}".format(i + 1)
+ return layer_name
diff --git a/projects/DensePose/query_db.py b/projects/DensePose/query_db.py
new file mode 100644
index 0000000..6d3ea2f
--- /dev/null
+++ b/projects/DensePose/query_db.py
@@ -0,0 +1,250 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import argparse
+import logging
+import os
+import sys
+from timeit import default_timer as timer
+from typing import Any, ClassVar, Dict, List
+import torch
+from fvcore.common.file_io import PathManager
+
+from detectron2.data.catalog import DatasetCatalog
+from detectron2.utils.logger import setup_logger
+
+from densepose.data.structures import DensePoseDataRelative
+from densepose.utils.dbhelper import EntrySelector
+from densepose.utils.logger import verbosity_to_level
+from densepose.vis.base import CompoundVisualizer
+from densepose.vis.bounding_box import BoundingBoxVisualizer
+from densepose.vis.densepose import (
+ DensePoseDataCoarseSegmentationVisualizer,
+ DensePoseDataPointsIVisualizer,
+ DensePoseDataPointsUVisualizer,
+ DensePoseDataPointsVisualizer,
+ DensePoseDataPointsVVisualizer,
+)
+
+DOC = """Query DB - a tool to print / visualize data from a database
+"""
+
+LOGGER_NAME = "query_db"
+
+logger = logging.getLogger(LOGGER_NAME)
+
+_ACTION_REGISTRY: Dict[str, "Action"] = {}
+
+
+class Action(object):
+ @classmethod
+ def add_arguments(cls: type, parser: argparse.ArgumentParser):
+ parser.add_argument(
+ "-v",
+ "--verbosity",
+ action="count",
+ help="Verbose mode. Multiple -v options increase the verbosity.",
+ )
+
+
+def register_action(cls: type):
+ """
+ Decorator for action classes to automate action registration
+ """
+ global _ACTION_REGISTRY
+ _ACTION_REGISTRY[cls.COMMAND] = cls
+ return cls
+
+
+class EntrywiseAction(Action):
+ @classmethod
+ def add_arguments(cls: type, parser: argparse.ArgumentParser):
+ super(EntrywiseAction, cls).add_arguments(parser)
+ parser.add_argument(
+ "dataset", metavar="", help="Dataset name (e.g. densepose_coco_2014_train)"
+ )
+ parser.add_argument(
+ "selector",
+ metavar="",
+ help="Dataset entry selector in the form field1[:type]=value1[,"
+ "field2[:type]=value_min-value_max...] which selects all "
+ "entries from the dataset that satisfy the constraints",
+ )
+ parser.add_argument(
+ "--max-entries", metavar="N", help="Maximum number of entries to process", type=int
+ )
+
+ @classmethod
+ def execute(cls: type, args: argparse.Namespace):
+ dataset = setup_dataset(args.dataset)
+ entry_selector = EntrySelector.from_string(args.selector)
+ context = cls.create_context(args)
+ if args.max_entries is not None:
+ for _, entry in zip(range(args.max_entries), dataset):
+ if entry_selector(entry):
+ cls.execute_on_entry(entry, context)
+ else:
+ for entry in dataset:
+ if entry_selector(entry):
+ cls.execute_on_entry(entry, context)
+
+ @classmethod
+ def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]:
+ context = {}
+ return context
+
+
+@register_action
+class PrintAction(EntrywiseAction):
+ """
+ Print action that outputs selected entries to stdout
+ """
+
+ COMMAND: ClassVar[str] = "print"
+
+ @classmethod
+ def add_parser(cls: type, subparsers: argparse._SubParsersAction):
+ parser = subparsers.add_parser(cls.COMMAND, help="Output selected entries to stdout. ")
+ cls.add_arguments(parser)
+ parser.set_defaults(func=cls.execute)
+
+ @classmethod
+ def add_arguments(cls: type, parser: argparse.ArgumentParser):
+ super(PrintAction, cls).add_arguments(parser)
+
+ @classmethod
+ def execute_on_entry(cls: type, entry: Dict[str, Any], context: Dict[str, Any]):
+ import pprint
+
+ printer = pprint.PrettyPrinter(indent=2, width=200, compact=True)
+ printer.pprint(entry)
+
+
+@register_action
+class ShowAction(EntrywiseAction):
+ """
+ Show action that visualizes selected entries on an image
+ """
+
+ COMMAND: ClassVar[str] = "show"
+ VISUALIZERS: ClassVar[Dict[str, object]] = {
+ "dp_segm": DensePoseDataCoarseSegmentationVisualizer(),
+ "dp_i": DensePoseDataPointsIVisualizer(),
+ "dp_u": DensePoseDataPointsUVisualizer(),
+ "dp_v": DensePoseDataPointsVVisualizer(),
+ "dp_pts": DensePoseDataPointsVisualizer(),
+ "bbox": BoundingBoxVisualizer(),
+ }
+
+ @classmethod
+ def add_parser(cls: type, subparsers: argparse._SubParsersAction):
+ parser = subparsers.add_parser(cls.COMMAND, help="Visualize selected entries")
+ cls.add_arguments(parser)
+ parser.set_defaults(func=cls.execute)
+
+ @classmethod
+ def add_arguments(cls: type, parser: argparse.ArgumentParser):
+ super(ShowAction, cls).add_arguments(parser)
+ parser.add_argument(
+ "visualizations",
+ metavar="",
+ help="Comma separated list of visualizations, possible values: "
+ "[{}]".format(",".join(sorted(cls.VISUALIZERS.keys()))),
+ )
+ parser.add_argument(
+ "--output",
+ metavar="",
+ default="output.png",
+ help="File name to save output to",
+ )
+
+ @classmethod
+ def execute_on_entry(cls: type, entry: Dict[str, Any], context: Dict[str, Any]):
+ import cv2
+ import numpy as np
+
+ image_fpath = PathManager.get_local_path(entry["file_name"])
+ image = cv2.imread(image_fpath, cv2.IMREAD_GRAYSCALE)
+ image = np.tile(image[:, :, np.newaxis], [1, 1, 3])
+ datas = cls._extract_data_for_visualizers_from_entry(context["vis_specs"], entry)
+ visualizer = context["visualizer"]
+ image_vis = visualizer.visualize(image, datas)
+ entry_idx = context["entry_idx"] + 1
+ out_fname = cls._get_out_fname(entry_idx, context["out_fname"])
+ cv2.imwrite(out_fname, image_vis)
+ logger.info(f"Output saved to {out_fname}")
+ context["entry_idx"] += 1
+
+ @classmethod
+ def _get_out_fname(cls: type, entry_idx: int, fname_base: str):
+ base, ext = os.path.splitext(fname_base)
+ return base + ".{0:04d}".format(entry_idx) + ext
+
+ @classmethod
+ def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]:
+ vis_specs = args.visualizations.split(",")
+ visualizers = []
+ for vis_spec in vis_specs:
+ vis = cls.VISUALIZERS[vis_spec]
+ visualizers.append(vis)
+ context = {
+ "vis_specs": vis_specs,
+ "visualizer": CompoundVisualizer(visualizers),
+ "out_fname": args.output,
+ "entry_idx": 0,
+ }
+ return context
+
+ @classmethod
+ def _extract_data_for_visualizers_from_entry(
+ cls: type, vis_specs: List[str], entry: Dict[str, Any]
+ ):
+ dp_list = []
+ bbox_list = []
+ for annotation in entry["annotations"]:
+ is_valid, _ = DensePoseDataRelative.validate_annotation(annotation)
+ if not is_valid:
+ continue
+ bbox = torch.as_tensor(annotation["bbox"])
+ bbox_list.append(bbox)
+ dp_data = DensePoseDataRelative(annotation)
+ dp_list.append(dp_data)
+ datas = []
+ for vis_spec in vis_specs:
+ datas.append(bbox_list if "bbox" == vis_spec else (bbox_list, dp_list))
+ return datas
+
+
+def setup_dataset(dataset_name):
+ logger.info("Loading dataset {}".format(dataset_name))
+ start = timer()
+ dataset = DatasetCatalog.get(dataset_name)
+ stop = timer()
+ logger.info("Loaded dataset {} in {:.3f}s".format(dataset_name, stop - start))
+ return dataset
+
+
+def create_argument_parser() -> argparse.ArgumentParser:
+ parser = argparse.ArgumentParser(
+ description=DOC,
+ formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=120),
+ )
+ parser.set_defaults(func=lambda _: parser.print_help(sys.stdout))
+ subparsers = parser.add_subparsers(title="Actions")
+ for _, action in _ACTION_REGISTRY.items():
+ action.add_parser(subparsers)
+ return parser
+
+
+def main():
+ parser = create_argument_parser()
+ args = parser.parse_args()
+ verbosity = args.verbosity if hasattr(args, "verbosity") else None
+ global logger
+ logger = setup_logger(name=LOGGER_NAME)
+ logger.setLevel(verbosity_to_level(verbosity))
+ args.func(args)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/projects/DensePose/train_net.py b/projects/DensePose/train_net.py
new file mode 100644
index 0000000..72c74c3
--- /dev/null
+++ b/projects/DensePose/train_net.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+"""
+DensePose Training Script.
+
+This script is similar to the training script in detectron2/tools.
+
+It is an example of how a user might use detectron2 for a new project.
+"""
+
+from fvcore.common.file_io import PathManager
+
+import detectron2.utils.comm as comm
+from detectron2.config import get_cfg
+from detectron2.engine import default_argument_parser, default_setup, hooks, launch
+from detectron2.evaluation import verify_results
+from detectron2.utils.logger import setup_logger
+
+from densepose import add_densepose_config
+from densepose.engine import Trainer
+from densepose.modeling.densepose_checkpoint import DensePoseCheckpointer
+
+
+def setup(args):
+ cfg = get_cfg()
+ add_densepose_config(cfg)
+ cfg.merge_from_file(args.config_file)
+ cfg.merge_from_list(args.opts)
+ cfg.freeze()
+ default_setup(cfg, args)
+ # Setup logger for "densepose" module
+ setup_logger(output=cfg.OUTPUT_DIR, distributed_rank=comm.get_rank(), name="densepose")
+ return cfg
+
+
+def main(args):
+ cfg = setup(args)
+ # disable strict kwargs checking: allow one to specify path handle
+ # hints through kwargs, like timeout in DP evaluation
+ PathManager.set_strict_kwargs_checking(False)
+
+ if args.eval_only:
+ model = Trainer.build_model(cfg)
+ DensePoseCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
+ cfg.MODEL.WEIGHTS, resume=args.resume
+ )
+ res = Trainer.test(cfg, model)
+ if cfg.TEST.AUG.ENABLED:
+ res.update(Trainer.test_with_TTA(cfg, model))
+ if comm.is_main_process():
+ verify_results(cfg, res)
+ return res
+
+ trainer = Trainer(cfg)
+ trainer.resume_or_load(resume=args.resume)
+ if cfg.TEST.AUG.ENABLED:
+ trainer.register_hooks(
+ [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))]
+ )
+ return trainer.train()
+
+
+if __name__ == "__main__":
+ args = default_argument_parser().parse_args()
+ print("Command Line Args:", args)
+ launch(
+ main,
+ args.num_gpus,
+ num_machines=args.num_machines,
+ machine_rank=args.machine_rank,
+ dist_url=args.dist_url,
+ args=(args,),
+ )