Add files via upload

2022-01-04 13:49:38 +08:00 · 2022-01-04 13:49:38 +08:00 · 28e2825941
parent 3c9dcce2c7
commit 28e2825941
100 changed files with 9052 additions and 0 deletions
--- a/projects/DensePose/README.md
+++ b/projects/DensePose/README.md
@ -1 +1,54 @@
+# DensePose in Detectron2
+**Dense Human Pose Estimation In The Wild**
+
+_Rıza Alp Güler, Natalia Neverova, Iasonas Kokkinos_
+
+[[`densepose.org`](https://densepose.org)] [[`arXiv`](https://arxiv.org/abs/1802.00434)] [[`BibTeX`](#CitingDensePose)]
+
+Dense human pose estimation aims at mapping all human pixels of an RGB image to the 3D surface of the human body.
+
+<div align="center">
+  <img src="https://drive.google.com/uc?export=view&id=1qfSOkpueo1kVZbXOuQJJhyagKjMgepsz" width="700px" />
+</div>
+
+In this repository, we provide the code to train and evaluate DensePose-RCNN. We also provide tools to visualize
+DensePose annotation and results.
+
+# Quick Start
+
+See [ Getting Started ](doc/GETTING_STARTED.md)
+
+# Model Zoo and Baselines
+
+We provide a number of baseline results and trained models available for download. See [Model Zoo](doc/MODEL_ZOO.md) for details.
+
+# License
+
+Detectron2 is released under the [Apache 2.0 license](../../LICENSE)
+
+## <a name="CitingDensePose"></a>Citing DensePose
+
+If you use DensePose, please take the references from the following BibTeX entries:
+
+For DensePose with estimated confidences:
+
+```
+@InProceedings{Neverova2019DensePoseConfidences,
+    title = {Correlated Uncertainty for Learning Dense Correspondences from Noisy Labels},
+    author = {Neverova, Natalia and Novotny, David and Vedaldi, Andrea},
+    journal = {Advances in Neural Information Processing Systems},
+    year = {2019},
+}
+```
+
+For the original DensePose:
+
+```
+@InProceedings{Guler2018DensePose,
+  title={DensePose: Dense Human Pose Estimation In The Wild},
+  author={R\{i}za Alp G\"uler, Natalia Neverova, Iasonas Kokkinos},
+  journal={The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+  year={2018}
+}
+```

--- a/projects/DensePose/apply_net.py
+++ b/projects/DensePose/apply_net.py
@ -0,0 +1,319 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import argparse
+import glob
+import logging
+import os
+import pickle
+import sys
+from typing import Any, ClassVar, Dict, List
+import torch
+
+from detectron2.config import get_cfg
+from detectron2.data.detection_utils import read_image
+from detectron2.engine.defaults import DefaultPredictor
+from detectron2.structures.boxes import BoxMode
+from detectron2.structures.instances import Instances
+from detectron2.utils.logger import setup_logger
+
+from densepose import add_densepose_config, add_hrnet_config
+from densepose.utils.logger import verbosity_to_level
+from densepose.vis.base import CompoundVisualizer
+from densepose.vis.bounding_box import ScoredBoundingBoxVisualizer
+from densepose.vis.densepose import (
+    DensePoseResultsContourVisualizer,
+    DensePoseResultsFineSegmentationVisualizer,
+    DensePoseResultsUVisualizer,
+    DensePoseResultsVVisualizer,
+)
+from densepose.vis.extractor import CompoundExtractor, create_extractor
+
+DOC = """Apply Net - a tool to print / visualize DensePose results
+"""
+
+LOGGER_NAME = "apply_net"
+logger = logging.getLogger(LOGGER_NAME)
+
+_ACTION_REGISTRY: Dict[str, "Action"] = {}
+
+
+class Action(object):
+    @classmethod
+    def add_arguments(cls: type, parser: argparse.ArgumentParser):
+        parser.add_argument(
+            "-v",
+            "--verbosity",
+            action="count",
+            help="Verbose mode. Multiple -v options increase the verbosity.",
+        )
+
+
+def register_action(cls: type):
+    """
+    Decorator for action classes to automate action registration
+    """
+    global _ACTION_REGISTRY
+    _ACTION_REGISTRY[cls.COMMAND] = cls
+    return cls
+
+
+class InferenceAction(Action):
+    @classmethod
+    def add_arguments(cls: type, parser: argparse.ArgumentParser):
+        super(InferenceAction, cls).add_arguments(parser)
+        parser.add_argument("cfg", metavar="<config>", help="Config file")
+        parser.add_argument("model", metavar="<model>", help="Model file")
+        parser.add_argument("input", metavar="<input>", help="Input data")
+        parser.add_argument(
+            "--opts",
+            help="Modify config options using the command-line 'KEY VALUE' pairs",
+            default=[],
+            nargs=argparse.REMAINDER,
+        )
+
+    @classmethod
+    def execute(cls: type, args: argparse.Namespace):
+        logger.info(f"Loading config from {args.cfg}")
+        opts = []
+        cfg = cls.setup_config(args.cfg, args.model, args, opts)
+        logger.info(f"Loading model from {args.model}")
+        predictor = DefaultPredictor(cfg)
+        logger.info(f"Loading data from {args.input}")
+        file_list = cls._get_input_file_list(args.input)
+        if len(file_list) == 0:
+            logger.warning(f"No input images for {args.input}")
+            return
+        context = cls.create_context(args)
+        for file_name in file_list:
+            img = read_image(file_name, format="BGR")  # predictor expects BGR image.
+            with torch.no_grad():
+                outputs = predictor(img)["instances"]
+                cls.execute_on_outputs(context, {"file_name": file_name, "image": img}, outputs)
+        cls.postexecute(context)
+
+    @classmethod
+    def setup_config(
+        cls: type, config_fpath: str, model_fpath: str, args: argparse.Namespace, opts: List[str]
+    ):
+        cfg = get_cfg()
+        add_densepose_config(cfg)
+        add_hrnet_config(cfg)
+        cfg.merge_from_file(config_fpath)
+        cfg.merge_from_list(args.opts)
+        if opts:
+            cfg.merge_from_list(opts)
+        cfg.MODEL.WEIGHTS = model_fpath
+        cfg.freeze()
+        return cfg
+
+    @classmethod
+    def _get_input_file_list(cls: type, input_spec: str):
+        if os.path.isdir(input_spec):
+            file_list = [
+                os.path.join(input_spec, fname)
+                for fname in os.listdir(input_spec)
+                if os.path.isfile(os.path.join(input_spec, fname))
+            ]
+        elif os.path.isfile(input_spec):
+            file_list = [input_spec]
+        else:
+            file_list = glob.glob(input_spec)
+        return file_list
+
+
+@register_action
+class DumpAction(InferenceAction):
+    """
+    Dump action that outputs results to a pickle file
+    """
+
+    COMMAND: ClassVar[str] = "dump"
+
+    @classmethod
+    def add_parser(cls: type, subparsers: argparse._SubParsersAction):
+        parser = subparsers.add_parser(cls.COMMAND, help="Dump model outputs to a file.")
+        cls.add_arguments(parser)
+        parser.set_defaults(func=cls.execute)
+
+    @classmethod
+    def add_arguments(cls: type, parser: argparse.ArgumentParser):
+        super(DumpAction, cls).add_arguments(parser)
+        parser.add_argument(
+            "--output",
+            metavar="<dump_file>",
+            default="results.pkl",
+            help="File name to save dump to",
+        )
+
+    @classmethod
+    def execute_on_outputs(
+        cls: type, context: Dict[str, Any], entry: Dict[str, Any], outputs: Instances
+    ):
+        image_fpath = entry["file_name"]
+        logger.info(f"Processing {image_fpath}")
+        result = {"file_name": image_fpath}
+        if outputs.has("scores"):
+            result["scores"] = outputs.get("scores").cpu()
+        if outputs.has("pred_boxes"):
+            result["pred_boxes_XYXY"] = outputs.get("pred_boxes").tensor.cpu()
+            if outputs.has("pred_densepose"):
+                boxes_XYWH = BoxMode.convert(
+                    result["pred_boxes_XYXY"], BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
+                )
+                result["pred_densepose"] = outputs.get("pred_densepose").to_result(boxes_XYWH)
+        context["results"].append(result)
+
+    @classmethod
+    def create_context(cls: type, args: argparse.Namespace):
+        context = {"results": [], "out_fname": args.output}
+        return context
+
+    @classmethod
+    def postexecute(cls: type, context: Dict[str, Any]):
+        out_fname = context["out_fname"]
+        out_dir = os.path.dirname(out_fname)
+        if len(out_dir) > 0 and not os.path.exists(out_dir):
+            os.makedirs(out_dir)
+        with open(out_fname, "wb") as hFile:
+            pickle.dump(context["results"], hFile)
+            logger.info(f"Output saved to {out_fname}")
+
+
+@register_action
+class ShowAction(InferenceAction):
+    """
+    Show action that visualizes selected entries on an image
+    """
+
+    COMMAND: ClassVar[str] = "show"
+    VISUALIZERS: ClassVar[Dict[str, object]] = {
+        "dp_contour": DensePoseResultsContourVisualizer,
+        "dp_segm": DensePoseResultsFineSegmentationVisualizer,
+        "dp_u": DensePoseResultsUVisualizer,
+        "dp_v": DensePoseResultsVVisualizer,
+        "bbox": ScoredBoundingBoxVisualizer,
+    }
+
+    @classmethod
+    def add_parser(cls: type, subparsers: argparse._SubParsersAction):
+        parser = subparsers.add_parser(cls.COMMAND, help="Visualize selected entries")
+        cls.add_arguments(parser)
+        parser.set_defaults(func=cls.execute)
+
+    @classmethod
+    def add_arguments(cls: type, parser: argparse.ArgumentParser):
+        super(ShowAction, cls).add_arguments(parser)
+        parser.add_argument(
+            "visualizations",
+            metavar="<visualizations>",
+            help="Comma separated list of visualizations, possible values: "
+            "[{}]".format(",".join(sorted(cls.VISUALIZERS.keys()))),
+        )
+        parser.add_argument(
+            "--min_score",
+            metavar="<score>",
+            default=0.8,
+            type=float,
+            help="Minimum detection score to visualize",
+        )
+        parser.add_argument(
+            "--nms_thresh", metavar="<threshold>", default=None, type=float, help="NMS threshold"
+        )
+        parser.add_argument(
+            "--output",
+            metavar="<image_file>",
+            default="outputres.png",
+            help="File name to save output to",
+        )
+
+    @classmethod
+    def setup_config(
+        cls: type, config_fpath: str, model_fpath: str, args: argparse.Namespace, opts: List[str]
+    ):
+        opts.append("MODEL.ROI_HEADS.SCORE_THRESH_TEST")
+        opts.append(str(args.min_score))
+        if args.nms_thresh is not None:
+            opts.append("MODEL.ROI_HEADS.NMS_THRESH_TEST")
+            opts.append(str(args.nms_thresh))
+        cfg = super(ShowAction, cls).setup_config(config_fpath, model_fpath, args, opts)
+        return cfg
+
+    @classmethod
+    def execute_on_outputs(
+        cls: type, context: Dict[str, Any], entry: Dict[str, Any], outputs: Instances
+    ):
+        import cv2
+        import numpy as np
+
+        visualizer = context["visualizer"]
+        extractor = context["extractor"]
+        image_fpath = entry["file_name"]
+        logger.info(f"Processing {image_fpath}")
+        image = cv2.cvtColor(entry["image"], cv2.COLOR_BGR2GRAY)
+        image = np.tile(image[:, :, np.newaxis], [1, 1, 3])
+        data = extractor(outputs)
+        image_vis = visualizer.visualize(image, data)
+        entry_idx = context["entry_idx"] + 1
+        out_fname = cls._get_out_fname(entry_idx, context["out_fname"])
+        out_dir = os.path.dirname(out_fname)
+        if len(out_dir) > 0 and not os.path.exists(out_dir):
+            os.makedirs(out_dir)
+        cv2.imwrite(out_fname, image_vis)
+        logger.info(f"Output saved to {out_fname}")
+        context["entry_idx"] += 1
+
+    @classmethod
+    def postexecute(cls: type, context: Dict[str, Any]):
+        pass
+
+    @classmethod
+    def _get_out_fname(cls: type, entry_idx: int, fname_base: str):
+        base, ext = os.path.splitext(fname_base)
+        return base + ".{0:04d}".format(entry_idx) + ext
+
+    @classmethod
+    def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]:
+        vis_specs = args.visualizations.split(",")
+        visualizers = []
+        extractors = []
+        for vis_spec in vis_specs:
+            vis = cls.VISUALIZERS[vis_spec]()
+            visualizers.append(vis)
+            extractor = create_extractor(vis)
+            extractors.append(extractor)
+        visualizer = CompoundVisualizer(visualizers)
+        extractor = CompoundExtractor(extractors)
+        context = {
+            "extractor": extractor,
+            "visualizer": visualizer,
+            "out_fname": args.output,
+            "entry_idx": 0,
+        }
+        return context
+
+
+def create_argument_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description=DOC,
+        formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=120),
+    )
+    parser.set_defaults(func=lambda _: parser.print_help(sys.stdout))
+    subparsers = parser.add_subparsers(title="Actions")
+    for _, action in _ACTION_REGISTRY.items():
+        action.add_parser(subparsers)
+    return parser
+
+
+def main():
+    parser = create_argument_parser()
+    args = parser.parse_args()
+    verbosity = args.verbosity if hasattr(args, "verbosity") else None
+    global logger
+    logger = setup_logger(name=LOGGER_NAME)
+    logger.setLevel(verbosity_to_level(verbosity))
+    args.func(args)
+
+
+if __name__ == "__main__":
+    main()
--- a/projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml
+++ b/projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml
@ -0,0 +1,48 @@
+VERSION: 2
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  BACKBONE:
+    NAME: "build_resnet_fpn_backbone"
+  RESNETS:
+    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+  FPN:
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+  ANCHOR_GENERATOR:
+    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
+    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
+  RPN:
+    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
+    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
+    # Detectron1 uses 2000 proposals per-batch,
+    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+    POST_NMS_TOPK_TRAIN: 1000
+    POST_NMS_TOPK_TEST: 1000
+
+  DENSEPOSE_ON: True
+  ROI_HEADS:
+    NAME: "DensePoseROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 1
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseV1ConvXHead"
+    POOLER_TYPE: "ROIAlign"
+    NUM_COARSE_SEGM_CHANNELS: 2
+DATASETS:
+  TRAIN: ("densepose_coco_2014_train", "densepose_coco_2014_valminusminival")
+  TEST: ("densepose_coco_2014_minival",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.01
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+  WARMUP_FACTOR: 0.1
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
--- a/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w32_s1x.yaml
+++ b/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w32_s1x.yaml
@ -0,0 +1,16 @@
+_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "https://1drv.ms/u/s!Aus8VCZ_C_33dYBMemi9xOUFR0w"
+  BACKBONE:
+    NAME: "build_hrfpn_backbone"
+  RPN:
+    IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5']
+  ROI_HEADS:
+    IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5']
+SOLVER:
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
+  CLIP_GRADIENTS:
+    ENABLED: True
+    CLIP_TYPE: "norm"
+  BASE_LR: 0.03
--- a/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w40_s1x.yaml
+++ b/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w40_s1x.yaml
@ -0,0 +1,23 @@
+_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "https://1drv.ms/u/s!Aus8VCZ_C_33ck0gvo5jfoWBOPo"
+  BACKBONE:
+    NAME: "build_hrfpn_backbone"
+  RPN:
+    IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5']
+  ROI_HEADS:
+    IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5']
+  HRNET:
+    STAGE2:
+      NUM_CHANNELS: [40, 80]
+    STAGE3:
+      NUM_CHANNELS: [40, 80, 160]
+    STAGE4:
+      NUM_CHANNELS: [40, 80, 160, 320]
+SOLVER:
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
+  CLIP_GRADIENTS:
+    ENABLED: True
+    CLIP_TYPE: "norm"
+  BASE_LR: 0.03
--- a/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w48_s1x.yaml
+++ b/projects/DensePose/configs/HRNet/densepose_rcnn_HRFPN_HRNet_w48_s1x.yaml
@ -0,0 +1,23 @@
+_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "https://1drv.ms/u/s!Aus8VCZ_C_33dKvqI6pBZlifgJk"
+  BACKBONE:
+    NAME: "build_hrfpn_backbone"
+  RPN:
+    IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5']
+  ROI_HEADS:
+    IN_FEATURES: ['p1', 'p2', 'p3', 'p4', 'p5']
+  HRNET:
+    STAGE2:
+      NUM_CHANNELS: [48, 96]
+    STAGE3:
+      NUM_CHANNELS: [48, 96, 192]
+    STAGE4:
+      NUM_CHANNELS: [48, 96, 192, 384]
+SOLVER:
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
+  CLIP_GRADIENTS:
+    ENABLED: True
+    CLIP_TYPE: "norm"
+  BASE_LR: 0.03
--- a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1M_s1x.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1M_s1x.yaml
@ -0,0 +1,18 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    SEGM_CONFIDENCE:
+      ENABLED: True
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml
@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2M_s1x.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2M_s1x.yaml
@ -0,0 +1,18 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "indep_aniso"
+    SEGM_CONFIDENCE:
+      ENABLED: True
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml
@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "indep_aniso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml
@ -0,0 +1,10 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+SOLVER:
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1M_s1x.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1M_s1x.yaml
@ -0,0 +1,18 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  ROI_DENSEPOSE_HEAD:
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    SEGM_CONFIDENCE:
+      ENABLED: True
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
+  WARMUP_FACTOR: 0.025
--- a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml
@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  ROI_DENSEPOSE_HEAD:
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
+  WARMUP_FACTOR: 0.025
--- a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2M_s1x.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2M_s1x.yaml
@ -0,0 +1,18 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  ROI_DENSEPOSE_HEAD:
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "indep_aniso"
+    SEGM_CONFIDENCE:
+      ENABLED: True
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
+  WARMUP_FACTOR: 0.025
--- a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml
@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  ROI_DENSEPOSE_HEAD:
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "indep_aniso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
+  WARMUP_FACTOR: 0.025
--- a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml
@ -0,0 +1,8 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+SOLVER:
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml
@ -0,0 +1,17 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  ROI_DENSEPOSE_HEAD:
+    NUM_COARSE_SEGM_CHANNELS: 15
+    POOLER_RESOLUTION: 14
+    HEATMAP_SIZE: 56
+    INDEX_WEIGHTS: 2.0
+    PART_WEIGHTS: 0.3
+    POINT_REGRESSION_WEIGHTS: 0.1
+    DECODER_ON: False
+SOLVER:
+  BASE_LR: 0.002
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1M_s1x.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1M_s1x.yaml
@ -0,0 +1,18 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    SEGM_CONFIDENCE:
+      ENABLED: True
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml
@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2M_s1x.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2M_s1x.yaml
@ -0,0 +1,18 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "indep_aniso"
+    SEGM_CONFIDENCE:
+      ENABLED: True
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml
@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "indep_aniso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml
@ -0,0 +1,10 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+SOLVER:
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1M_s1x.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1M_s1x.yaml
@ -0,0 +1,20 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  ROI_DENSEPOSE_HEAD:
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    SEGM_CONFIDENCE:
+      ENABLED: True
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+    CLIP_TYPE: norm
+    CLIP_VALUE: 100.0
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
+  WARMUP_FACTOR: 0.025
--- a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml
@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  ROI_DENSEPOSE_HEAD:
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
+  WARMUP_FACTOR: 0.025
--- a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2M_s1x.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2M_s1x.yaml
@ -0,0 +1,18 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  ROI_DENSEPOSE_HEAD:
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "indep_aniso"
+    SEGM_CONFIDENCE:
+      ENABLED: True
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
+  WARMUP_FACTOR: 0.025
--- a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml
@ -0,0 +1,16 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  ROI_DENSEPOSE_HEAD:
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "indep_aniso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
+  WARMUP_FACTOR: 0.025
--- a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml
@ -0,0 +1,8 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml
+++ b/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml
@ -0,0 +1,17 @@
+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  ROI_DENSEPOSE_HEAD:
+    NUM_COARSE_SEGM_CHANNELS: 15
+    POOLER_RESOLUTION: 14
+    HEATMAP_SIZE: 56
+    INDEX_WEIGHTS: 2.0
+    PART_WEIGHTS: 0.3
+    POINT_REGRESSION_WEIGHTS: 0.1
+    DECODER_ON: False
+SOLVER:
+  BASE_LR: 0.002
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)
--- a/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC-B.yaml
+++ b/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC-B.yaml
@ -0,0 +1,121 @@
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  BACKBONE:
+    NAME: "build_resnet_fpn_backbone"
+  RESNETS:
+    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+  FPN:
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+  ANCHOR_GENERATOR:
+    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
+    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
+  RPN:
+    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
+    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
+    # Detectron1 uses 2000 proposals per-batch,
+    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+    POST_NMS_TOPK_TRAIN: 1000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "StandardROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 1
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+  ROI_MASK_HEAD:
+    NAME: "MaskRCNNConvUpsampleHead"
+    NUM_CONV: 4
+    POOLER_RESOLUTION: 14
+DATASETS:
+  TRAIN: ("base_coco_2017_train",)
+  TEST: ("base_coco_2017_val", "densepose_chimps")
+  CATEGORY_MAPS:
+    "base_coco_2017_train":
+      "16": 1 # bird -> person
+      "17": 1 # cat -> person
+      "18": 1 # dog -> person
+      "19": 1 # horse -> person
+      "20": 1 # sheep -> person
+      "21": 1 # cow -> person
+      "22": 1 # elephant -> person
+      "23": 1 # bear -> person
+      "24": 1 # zebra -> person
+      "25": 1 # girafe -> person
+    "base_coco_2017_val":
+      "16": 1 # bird -> person
+      "17": 1 # cat -> person
+      "18": 1 # dog -> person
+      "19": 1 # horse -> person
+      "20": 1 # sheep -> person
+      "21": 1 # cow -> person
+      "22": 1 # elephant -> person
+      "23": 1 # bear -> person
+      "24": 1 # zebra -> person
+      "25": 1 # girafe -> person
+  WHITELISTED_CATEGORIES:
+    "base_coco_2017_train":
+      - 1  # person
+      - 16 # bird
+      - 17 # cat
+      - 18 # dog
+      - 19 # horse
+      - 20 # sheep
+      - 21 # cow
+      - 22 # elephant
+      - 23 # bear
+      - 24 # zebra
+      - 25 # girafe
+    "base_coco_2017_val":
+      - 1  # person
+      - 16 # bird
+      - 17 # cat
+      - 18 # dog
+      - 19 # horse
+      - 20 # sheep
+      - 21 # cow
+      - 22 # elephant
+      - 23 # bear
+      - 24 # zebra
+      - 25 # girafe
+BOOTSTRAP_DATASETS:
+  - DATASET: "chimpnsee"
+    RATIO: 1.0
+    IMAGE_LOADER:
+      TYPE: "video_keyframe"
+      SELECT:
+        STRATEGY: "random_k"
+        NUM_IMAGES: 4
+      TRANSFORM:
+        TYPE: "resize"
+        MIN_SIZE: 800
+        MAX_SIZE: 1333
+      BATCH_SIZE: 8
+      NUM_WORKERS: 1
+    INFERENCE:
+      INPUT_BATCH_SIZE: 1
+      OUTPUT_BATCH_SIZE: 1
+    DATA_SAMPLER:
+      # supported types:
+      #   densepose_uniform
+      #   densepose_UV_confidence
+      #   densepose_fine_segm_confidence
+      #   densepose_coarse_segm_confidence
+      TYPE: "densepose_uniform"
+      COUNT_PER_CLASS: 8
+    FILTER:
+      TYPE: "detection_score"
+      MIN_VALUE: 0.8
+BOOTSTRAP_MODEL:
+  WEIGHTS: ""
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.02
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
--- a/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC.yaml
+++ b/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC.yaml
@ -0,0 +1,91 @@
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  BACKBONE:
+    NAME: "build_resnet_fpn_backbone"
+  RESNETS:
+    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+  FPN:
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+  ANCHOR_GENERATOR:
+    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
+    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
+  RPN:
+    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
+    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
+    # Detectron1 uses 2000 proposals per-batch,
+    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+    POST_NMS_TOPK_TRAIN: 1000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "StandardROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 1
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+  ROI_MASK_HEAD:
+    NAME: "MaskRCNNConvUpsampleHead"
+    NUM_CONV: 4
+    POOLER_RESOLUTION: 14
+DATASETS:
+  TRAIN: ("base_coco_2017_train",)
+  TEST: ("base_coco_2017_val", "densepose_chimps")
+  CATEGORY_MAPS:
+    "base_coco_2017_train":
+      "16": 1 # bird -> person
+      "17": 1 # cat -> person
+      "18": 1 # dog -> person
+      "19": 1 # horse -> person
+      "20": 1 # sheep -> person
+      "21": 1 # cow -> person
+      "22": 1 # elephant -> person
+      "23": 1 # bear -> person
+      "24": 1 # zebra -> person
+      "25": 1 # girafe -> person
+    "base_coco_2017_val":
+      "16": 1 # bird -> person
+      "17": 1 # cat -> person
+      "18": 1 # dog -> person
+      "19": 1 # horse -> person
+      "20": 1 # sheep -> person
+      "21": 1 # cow -> person
+      "22": 1 # elephant -> person
+      "23": 1 # bear -> person
+      "24": 1 # zebra -> person
+      "25": 1 # girafe -> person
+  WHITELISTED_CATEGORIES:
+    "base_coco_2017_train":
+      - 1  # person
+      - 16 # bird
+      - 17 # cat
+      - 18 # dog
+      - 19 # horse
+      - 20 # sheep
+      - 21 # cow
+      - 22 # elephant
+      - 23 # bear
+      - 24 # zebra
+      - 25 # girafe
+    "base_coco_2017_val":
+      - 1  # person
+      - 16 # bird
+      - 17 # cat
+      - 18 # dog
+      - 19 # horse
+      - 20 # sheep
+      - 21 # cow
+      - 22 # elephant
+      - 23 # bear
+      - 24 # zebra
+      - 25 # girafe
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.02
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+VERSION: 2
--- a/projects/DensePose/configs/evolution/densepose_R_101_FPN_1x_Atop10_toP.yaml
+++ b/projects/DensePose/configs/evolution/densepose_R_101_FPN_1x_Atop10_toP.yaml
@ -0,0 +1,19 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  DENSEPOSE_ON: True
+  ROI_HEADS:
+    NAME: "DensePoseROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 1
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseV1ConvXHead"
+    POOLER_TYPE: "ROIAlign"
+    NUM_COARSE_SEGM_CHANNELS: 2
+    COARSE_SEGM_TRAINED_BY_MASKS: True
+    INDEX_WEIGHTS: 1.0
+DATASETS:
+  TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+  TEST: ("densepose_chimps",)
--- a/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_1x_Atop10_toP.yaml
+++ b/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_1x_Atop10_toP.yaml
@ -0,0 +1,19 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  DENSEPOSE_ON: True
+  ROI_HEADS:
+    NAME: "DensePoseROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 1
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+    POOLER_TYPE: "ROIAlign"
+    NUM_COARSE_SEGM_CHANNELS: 2
+    COARSE_SEGM_TRAINED_BY_MASKS: True
+    INDEX_WEIGHTS: 1.0
+DATASETS:
+  TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+  TEST: ("densepose_chimps",)
--- a/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_WC1M_1x_Atop10_toP.yaml
+++ b/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_WC1M_1x_Atop10_toP.yaml
@ -0,0 +1,29 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  DENSEPOSE_ON: True
+  ROI_HEADS:
+    NAME: "DensePoseROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 1
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    SEGM_CONFIDENCE:
+      ENABLED: True
+    POINT_REGRESSION_WEIGHTS: 0.0005
+    POOLER_TYPE: "ROIAlign"
+    NUM_COARSE_SEGM_CHANNELS: 2
+    COARSE_SEGM_TRAINED_BY_MASKS: True
+    INDEX_WEIGHTS: 1.0
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  WARMUP_FACTOR: 0.025
+DATASETS:
+  TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+  TEST: ("densepose_chimps",)
--- a/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_WC1_1x_Atop10_toP.yaml
+++ b/projects/DensePose/configs/evolution/densepose_R_101_FPN_DL_WC1_1x_Atop10_toP.yaml
@ -0,0 +1,27 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  DENSEPOSE_ON: True
+  ROI_HEADS:
+    NAME: "DensePoseROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 1
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+    POOLER_TYPE: "ROIAlign"
+    NUM_COARSE_SEGM_CHANNELS: 2
+    COARSE_SEGM_TRAINED_BY_MASKS: True
+    INDEX_WEIGHTS: 1.0
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  WARMUP_FACTOR: 0.025
+DATASETS:
+  TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+  TEST: ("densepose_chimps",)
--- a/projects/DensePose/configs/evolution/densepose_R_101_FPN_WC1M_1x_Atop10_toP.yaml
+++ b/projects/DensePose/configs/evolution/densepose_R_101_FPN_WC1M_1x_Atop10_toP.yaml
@ -0,0 +1,29 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  DENSEPOSE_ON: True
+  ROI_HEADS:
+    NAME: "DensePoseROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 1
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseV1ConvXHead"
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    SEGM_CONFIDENCE:
+      ENABLED: True
+    POINT_REGRESSION_WEIGHTS: 0.0005
+    POOLER_TYPE: "ROIAlign"
+    NUM_COARSE_SEGM_CHANNELS: 2
+    COARSE_SEGM_TRAINED_BY_MASKS: True
+    INDEX_WEIGHTS: 1.0
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  WARMUP_FACTOR: 0.025
+DATASETS:
+  TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+  TEST: ("densepose_chimps",)
--- a/projects/DensePose/configs/evolution/densepose_R_101_FPN_WC1_1x_Atop10_toP.yaml
+++ b/projects/DensePose/configs/evolution/densepose_R_101_FPN_WC1_1x_Atop10_toP.yaml
@ -0,0 +1,27 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
+  RESNETS:
+    DEPTH: 101
+  DENSEPOSE_ON: True
+  ROI_HEADS:
+    NAME: "DensePoseROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 1
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseV1ConvXHead"
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+    POOLER_TYPE: "ROIAlign"
+    NUM_COARSE_SEGM_CHANNELS: 2
+    COARSE_SEGM_TRAINED_BY_MASKS: True
+    INDEX_WEIGHTS: 1.0
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  WARMUP_FACTOR: 0.025
+DATASETS:
+  TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+  TEST: ("densepose_chimps",)
--- a/projects/DensePose/configs/evolution/densepose_R_50_FPN_1x_Atop10_toP.yaml
+++ b/projects/DensePose/configs/evolution/densepose_R_50_FPN_1x_Atop10_toP.yaml
@ -0,0 +1,19 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  DENSEPOSE_ON: True
+  ROI_HEADS:
+    NAME: "DensePoseROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 1
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseV1ConvXHead"
+    POOLER_TYPE: "ROIAlign"
+    NUM_COARSE_SEGM_CHANNELS: 2
+    COARSE_SEGM_TRAINED_BY_MASKS: True
+    INDEX_WEIGHTS: 1.0
+DATASETS:
+  TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+  TEST: ("densepose_chimps",)
--- a/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_1x_Atop10_toP.yaml
+++ b/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_1x_Atop10_toP.yaml
@ -0,0 +1,19 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  DENSEPOSE_ON: True
+  ROI_HEADS:
+    NAME: "DensePoseROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 1
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+    POOLER_TYPE: "ROIAlign"
+    NUM_COARSE_SEGM_CHANNELS: 2
+    COARSE_SEGM_TRAINED_BY_MASKS: True
+    INDEX_WEIGHTS: 1.0
+DATASETS:
+  TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+  TEST: ("densepose_chimps",)
--- a/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_1x_Atop10_toP.yaml
+++ b/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1M_1x_Atop10_toP.yaml
@ -0,0 +1,29 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  DENSEPOSE_ON: True
+  ROI_HEADS:
+    NAME: "DensePoseROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 1
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    SEGM_CONFIDENCE:
+      ENABLED: True
+    POINT_REGRESSION_WEIGHTS: 0.0005
+    POOLER_TYPE: "ROIAlign"
+    NUM_COARSE_SEGM_CHANNELS: 2
+    COARSE_SEGM_TRAINED_BY_MASKS: True
+    INDEX_WEIGHTS: 1.0
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  WARMUP_FACTOR: 0.025
+DATASETS:
+  TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+  TEST: ("densepose_chimps",)
--- a/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1_1x_Atop10_toP.yaml
+++ b/projects/DensePose/configs/evolution/densepose_R_50_FPN_DL_WC1_1x_Atop10_toP.yaml
@ -0,0 +1,27 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  DENSEPOSE_ON: True
+  ROI_HEADS:
+    NAME: "DensePoseROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 1
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+    POOLER_TYPE: "ROIAlign"
+    NUM_COARSE_SEGM_CHANNELS: 2
+    COARSE_SEGM_TRAINED_BY_MASKS: True
+    INDEX_WEIGHTS: 1.0
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  WARMUP_FACTOR: 0.025
+DATASETS:
+  TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+  TEST: ("densepose_chimps",)
--- a/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1M_1x_Atop10_toP.yaml
+++ b/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1M_1x_Atop10_toP.yaml
@ -0,0 +1,29 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  DENSEPOSE_ON: True
+  ROI_HEADS:
+    NAME: "DensePoseROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 1
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseV1ConvXHead"
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    SEGM_CONFIDENCE:
+      ENABLED: True
+    POINT_REGRESSION_WEIGHTS: 0.0005
+    POOLER_TYPE: "ROIAlign"
+    NUM_COARSE_SEGM_CHANNELS: 2
+    COARSE_SEGM_TRAINED_BY_MASKS: True
+    INDEX_WEIGHTS: 1.0
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  WARMUP_FACTOR: 0.025
+DATASETS:
+  TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+  TEST: ("densepose_chimps",)
--- a/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1M_1x_Atop10_toP_B.yaml
+++ b/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1M_1x_Atop10_toP_B.yaml
@ -0,0 +1,30 @@
+_BASE_: "Base-RCNN-FPN-MC-B.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  DENSEPOSE_ON: True
+  ROI_HEADS:
+    NAME: "DensePoseROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 1
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseV1ConvXHead"
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    SEGM_CONFIDENCE:
+      ENABLED: True
+    POINT_REGRESSION_WEIGHTS: 0.0005
+    POOLER_TYPE: "ROIAlign"
+    NUM_COARSE_SEGM_CHANNELS: 2
+    COARSE_SEGM_TRAINED_BY_MASKS: True
+    INDEX_WEIGHTS: 1.0
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+    CLIP_TYPE: "norm"
+  WARMUP_FACTOR: 0.025
+DATASETS:
+  TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+  TEST: ("densepose_chimps",)
--- a/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1_1x_Atop10_toP.yaml
+++ b/projects/DensePose/configs/evolution/densepose_R_50_FPN_WC1_1x_Atop10_toP.yaml
@ -0,0 +1,27 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  DENSEPOSE_ON: True
+  ROI_HEADS:
+    NAME: "DensePoseROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 1
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseV1ConvXHead"
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+    POOLER_TYPE: "ROIAlign"
+    NUM_COARSE_SEGM_CHANNELS: 2
+    COARSE_SEGM_TRAINED_BY_MASKS: True
+    INDEX_WEIGHTS: 1.0
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  WARMUP_FACTOR: 0.025
+DATASETS:
+  TRAIN: ("base_coco_2017_train", "densepose_coco_2014_train")
+  TEST: ("densepose_chimps",)
--- a/projects/DensePose/configs/evolution/faster_rcnn_R_50_FPN_1x_MC.yaml
+++ b/projects/DensePose/configs/evolution/faster_rcnn_R_50_FPN_1x_MC.yaml
@ -0,0 +1,7 @@
+_BASE_: "Base-RCNN-FPN-MC.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: False
+  DENSEPOSE_ON: False
+  RESNETS:
+    DEPTH: 50
--- a/projects/DensePose/configs/quick_schedules/densepose_rcnn_HRFPN_HRNet_w32_training_acc_test.yaml
+++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_HRFPN_HRNet_w32_training_acc_test.yaml
@ -0,0 +1,7 @@
+_BASE_: "../HRNet/densepose_rcnn_HRFPN_HRNet_w32_s1x.yaml"
+DATASETS:
+  TRAIN: ("densepose_coco_2014_minival_100",)
+  TEST: ("densepose_coco_2014_minival_100",)
+SOLVER:
+  MAX_ITER: 40
+  STEPS: (30,)
--- a/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml
+++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml
@ -0,0 +1,11 @@
+_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseDeepLabHead"
+DATASETS:
+  TRAIN: ("densepose_coco_2014_minival_100",)
+  TEST: ("densepose_coco_2014_minival_100",)
+SOLVER:
+  MAX_ITER: 40
+  STEPS: (30,)
--- a/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml
+++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml
@ -0,0 +1,13 @@
+_BASE_: "../densepose_rcnn_R_50_FPN_s1x.yaml"
+MODEL:
+  WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl"
+DATASETS:
+  TRAIN: ()
+  TEST: ("densepose_coco_2014_minival_100",)
+TEST:
+  AUG:
+    ENABLED: True
+    MIN_SIZES: (400, 500, 600, 700, 800, 900, 1000, 1100, 1200)
+    MAX_SIZE: 4000
+    FLIP: True
+  EXPECTED_RESULTS: [["bbox_TTA", "AP", 61.74, 0.03], ["densepose_gps_TTA", "AP",  60.22, 0.03], ["densepose_gpsm_TTA", "AP", 63.85, 0.03]]
--- a/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml
+++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml
@ -0,0 +1,19 @@
+_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  ROI_DENSEPOSE_HEAD:
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "iid_iso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+DATASETS:
+  TRAIN: ("densepose_coco_2014_minival_100",)
+  TEST: ("densepose_coco_2014_minival_100",)
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 40
+  STEPS: (30,)
+  WARMUP_FACTOR: 0.025
--- a/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml
+++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml
@ -0,0 +1,19 @@
+_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+  ROI_DENSEPOSE_HEAD:
+    UV_CONFIDENCE:
+      ENABLED: True
+      TYPE: "indep_aniso"
+    POINT_REGRESSION_WEIGHTS: 0.0005
+DATASETS:
+  TRAIN: ("densepose_coco_2014_minival_100",)
+  TEST: ("densepose_coco_2014_minival_100",)
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+  MAX_ITER: 40 
+  STEPS: (30,)
+  WARMUP_FACTOR: 0.025
--- a/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml
+++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml
@ -0,0 +1,8 @@
+_BASE_: "../densepose_rcnn_R_50_FPN_s1x.yaml"
+MODEL:
+  WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl"
+DATASETS:
+  TRAIN: ()
+  TEST: ("densepose_coco_2014_minival_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 59.27, 0.025], ["densepose_gps", "AP",  60.11, 0.02], ["densepose_gpsm", "AP", 64.20, 0.02]]
--- a/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml
+++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml
@ -0,0 +1,9 @@
+_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+DATASETS:
+  TRAIN: ("densepose_coco_2014_minival_100",)
+  TEST: ("densepose_coco_2014_minival_100",)
+SOLVER:
+  MAX_ITER: 40
+  STEPS: (30,)
--- a/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml
+++ b/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml
@ -0,0 +1,18 @@
+_BASE_: "../Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  ROI_HEADS:
+    NUM_CLASSES: 1
+DATASETS:
+  TRAIN: ("densepose_coco_2014_minival",)
+  TEST: ("densepose_coco_2014_minival",)
+SOLVER:
+  CLIP_GRADIENTS:
+    ENABLED: True
+    CLIP_TYPE: norm
+    CLIP_VALUE: 1.0
+  MAX_ITER: 6000
+  STEPS: (5500, 5800)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 76.2477, 1.0], ["densepose_gps", "AP", 79.6090, 1.5], ["densepose_gpsm", "AP", 80.0061, 1.5]]
+
--- a/projects/DensePose/densepose/config.py
+++ b/projects/DensePose/densepose/config.py
@ -0,0 +1,171 @@
+# -*- coding = utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from detectron2.config import CfgNode as CN
+
+
+def add_dataset_category_config(cfg: CN):
+    """
+    Add config for additional category-related dataset options
+     - category whitelisting
+     - category mapping
+    """
+    _C = cfg
+    _C.DATASETS.CATEGORY_MAPS = CN(new_allowed=True)
+    _C.DATASETS.WHITELISTED_CATEGORIES = CN(new_allowed=True)
+
+
+def add_bootstrap_config(cfg: CN):
+    """
+    """
+    _C = cfg
+    _C.BOOTSTRAP_DATASETS = []
+    _C.BOOTSTRAP_MODEL = CN()
+    _C.BOOTSTRAP_MODEL.WEIGHTS = ""
+    _C.BOOTSTRAP_MODEL.DEVICE = "cuda"
+
+
+def get_bootstrap_dataset_config() -> CN:
+    _C = CN()
+    _C.DATASET = ""
+    # ratio used to mix data loaders
+    _C.RATIO = 0.1
+    # image loader
+    _C.IMAGE_LOADER = CN(new_allowed=True)
+    _C.IMAGE_LOADER.TYPE = ""
+    _C.IMAGE_LOADER.BATCH_SIZE = 4
+    _C.IMAGE_LOADER.NUM_WORKERS = 4
+    # inference
+    _C.INFERENCE = CN()
+    # batch size for model inputs
+    _C.INFERENCE.INPUT_BATCH_SIZE = 4
+    # batch size to group model outputs
+    _C.INFERENCE.OUTPUT_BATCH_SIZE = 2
+    # sampled data
+    _C.DATA_SAMPLER = CN(new_allowed=True)
+    _C.DATA_SAMPLER.TYPE = ""
+    # filter
+    _C.FILTER = CN(new_allowed=True)
+    _C.FILTER.TYPE = ""
+    return _C
+
+
+def load_bootstrap_config(cfg: CN):
+    """
+    Bootstrap datasets are given as a list of `dict` that are not automatically
+    converted into CfgNode. This method processes all bootstrap dataset entries
+    and ensures that they are in CfgNode format and comply with the specification
+    """
+    if not cfg.BOOTSTRAP_DATASETS:
+        return
+
+    bootstrap_datasets_cfgnodes = []
+    for dataset_cfg in cfg.BOOTSTRAP_DATASETS:
+        _C = get_bootstrap_dataset_config().clone()
+        _C.merge_from_other_cfg(CN(dataset_cfg))
+        bootstrap_datasets_cfgnodes.append(_C)
+    cfg.BOOTSTRAP_DATASETS = bootstrap_datasets_cfgnodes
+
+
+def add_densepose_head_config(cfg: CN):
+    """
+    Add config for densepose head.
+    """
+    _C = cfg
+
+    _C.MODEL.DENSEPOSE_ON = True
+
+    _C.MODEL.ROI_DENSEPOSE_HEAD = CN()
+    _C.MODEL.ROI_DENSEPOSE_HEAD.NAME = ""
+    _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8
+    # Number of parts used for point labels
+    _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512
+    _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3
+    _C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2
+    _C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 112
+    _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2"
+    _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 28
+    _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2
+    _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS = 2  # 15 or 2
+    # Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD)
+    _C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7
+    # Loss weights for annotation masks.(14 Parts)
+    _C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 5.0
+    # Loss weights for surface parts. (24 Parts)
+    _C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 1.0
+    # Loss weights for UV regression.
+    _C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.01
+    # Coarse segmentation is trained using instance segmentation task data
+    _C.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS = False
+    # For Decoder
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON = True
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES = 256
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS = 256
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM = ""
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE = 4
+    # For DeepLab head
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB = CN()
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM = "GN"
+    _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON = 0
+    # Confidences
+    # Enable learning UV confidences (variances) along with the actual values
+    _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE = CN({"ENABLED": False})
+    # UV confidence lower bound
+    _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON = 0.01
+    # Enable learning segmentation confidences (variances) along with the actual values
+    _C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE = CN({"ENABLED": False})
+    # Segmentation confidence lower bound
+    _C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON = 0.01
+    # Statistical model type for confidence learning, possible values:
+    # - "iid_iso": statistically independent identically distributed residuals
+    #    with isotropic covariance
+    # - "indep_aniso": statistically independent residuals with anisotropic
+    #    covariances
+    _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE = "iid_iso"
+    # List of angles for rotation in data augmentation during training
+    _C.INPUT.ROTATION_ANGLES = [0]
+    _C.TEST.AUG.ROTATION_ANGLES = ()  # Rotation TTA
+
+
+def add_hrnet_config(cfg: CN):
+    """
+    Add config for HRNet backbone.
+    """
+    _C = cfg
+
+    # For HigherHRNet w32
+    _C.MODEL.HRNET = CN()
+    _C.MODEL.HRNET.STEM_INPLANES = 64
+    _C.MODEL.HRNET.STAGE2 = CN()
+    _C.MODEL.HRNET.STAGE2.NUM_MODULES = 1
+    _C.MODEL.HRNET.STAGE2.NUM_BRANCHES = 2
+    _C.MODEL.HRNET.STAGE2.BLOCK = "BASIC"
+    _C.MODEL.HRNET.STAGE2.NUM_BLOCKS = [4, 4]
+    _C.MODEL.HRNET.STAGE2.NUM_CHANNELS = [32, 64]
+    _C.MODEL.HRNET.STAGE2.FUSE_METHOD = "SUM"
+    _C.MODEL.HRNET.STAGE3 = CN()
+    _C.MODEL.HRNET.STAGE3.NUM_MODULES = 4
+    _C.MODEL.HRNET.STAGE3.NUM_BRANCHES = 3
+    _C.MODEL.HRNET.STAGE3.BLOCK = "BASIC"
+    _C.MODEL.HRNET.STAGE3.NUM_BLOCKS = [4, 4, 4]
+    _C.MODEL.HRNET.STAGE3.NUM_CHANNELS = [32, 64, 128]
+    _C.MODEL.HRNET.STAGE3.FUSE_METHOD = "SUM"
+    _C.MODEL.HRNET.STAGE4 = CN()
+    _C.MODEL.HRNET.STAGE4.NUM_MODULES = 3
+    _C.MODEL.HRNET.STAGE4.NUM_BRANCHES = 4
+    _C.MODEL.HRNET.STAGE4.BLOCK = "BASIC"
+    _C.MODEL.HRNET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
+    _C.MODEL.HRNET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
+    _C.MODEL.HRNET.STAGE4.FUSE_METHOD = "SUM"
+
+    _C.MODEL.HRNET.HRFPN = CN()
+    _C.MODEL.HRNET.HRFPN.OUT_CHANNELS = 256
+
+
+def add_densepose_config(cfg: CN):
+    add_densepose_head_config(cfg)
+    add_hrnet_config(cfg)
+    add_bootstrap_config(cfg)
+    add_dataset_category_config(cfg)
--- a/projects/DensePose/densepose/data/init.py
+++ b/projects/DensePose/densepose/data/init.py
@ -0,0 +1,23 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from .build import (
+    build_detection_test_loader,
+    build_detection_train_loader,
+    build_combined_loader,
+    build_frame_selector,
+    build_inference_based_loaders,
+    has_inference_based_loaders,
+    BootstrapDatasetFactoryCatalog,
+)
+from .combined_loader import CombinedDataLoader
+from .dataset_mapper import DatasetMapper
+from .inference_based_loader import InferenceBasedLoader, ScoreBasedFilter
+from .utils import is_relative_local_path, maybe_prepend_base_path
+
+# ensure the builtin datasets are registered
+from . import datasets
+
+# ensure the bootstrap datasets builders are registered
+from . import build
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
--- a/projects/DensePose/densepose/data/build.py
+++ b/projects/DensePose/densepose/data/build.py
@ -0,0 +1,604 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import itertools
+import logging
+import numpy as np
+from collections import UserDict
+from typing import Any, Callable, Collection, Dict, Iterable, List, Optional, Sequence
+import torch
+from torch.utils.data.dataset import Dataset
+
+from detectron2.config import CfgNode
+from detectron2.data.build import (
+    build_batch_data_loader,
+    load_proposals_into_dataset,
+    print_instances_class_histogram,
+    trivial_batch_collator,
+)
+from detectron2.data.catalog import DatasetCatalog, Metadata, MetadataCatalog
+from detectron2.data.common import DatasetFromList, MapDataset
+from detectron2.data.samplers import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler
+from detectron2.utils.comm import get_world_size
+
+from densepose.config import get_bootstrap_dataset_config
+
+from .combined_loader import CombinedDataLoader, Loader
+from .dataset_mapper import DatasetMapper
+from .datasets.coco import DENSEPOSE_KEYS_WITHOUT_MASK as DENSEPOSE_COCO_KEYS_WITHOUT_MASK
+from .datasets.coco import DENSEPOSE_MASK_KEY as DENSEPOSE_COCO_MASK_KEY
+from .datasets.dataset_type import DatasetType
+from .inference_based_loader import InferenceBasedLoader, ScoreBasedFilter
+from .samplers import (
+    DensePoseConfidenceBasedSampler,
+    DensePoseUniformSampler,
+    MaskFromDensePoseSampler,
+    PredictionToGroundTruthSampler,
+)
+from .transform import ImageResizeTransform
+from .video import (
+    FirstKFramesSelector,
+    FrameSelectionStrategy,
+    LastKFramesSelector,
+    RandomKFramesSelector,
+    VideoKeyframeDataset,
+    video_list_from_file,
+)
+
+__all__ = ["build_detection_train_loader", "build_detection_test_loader"]
+
+
+Instance = Dict[str, Any]
+InstancePredicate = Callable[[Instance], bool]
+
+
+def _compute_num_images_per_worker(cfg: CfgNode):
+    num_workers = get_world_size()
+    images_per_batch = cfg.SOLVER.IMS_PER_BATCH
+    assert (
+        images_per_batch % num_workers == 0
+    ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
+        images_per_batch, num_workers
+    )
+    assert (
+        images_per_batch >= num_workers
+    ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
+        images_per_batch, num_workers
+    )
+    images_per_worker = images_per_batch // num_workers
+    return images_per_worker
+
+
+def _map_category_id_to_contiguous_id(dataset_name: str, dataset_dicts: Iterable[Instance]):
+    meta = MetadataCatalog.get(dataset_name)
+    for dataset_dict in dataset_dicts:
+        for ann in dataset_dict["annotations"]:
+            ann["category_id"] = meta.thing_dataset_id_to_contiguous_id[ann["category_id"]]
+
+
+def _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names: Iterable[str]):
+    # merge categories for all datasets
+    merged_categories = {}
+    for dataset_name in dataset_names:
+        meta = MetadataCatalog.get(dataset_name)
+        for cat_id, cat_name in meta.categories.items():
+            if cat_id not in merged_categories:
+                merged_categories[cat_id] = (cat_name, dataset_name)
+                continue
+            cat_name_other, dataset_name_other = merged_categories[cat_id]
+            if cat_name_other != cat_name:
+                raise ValueError(
+                    f"Incompatible categories for category ID {cat_id}: "
+                    f'dataset {dataset_name} value "{cat_name}", '
+                    f'dataset {dataset_name_other} value "{cat_name_other}"'
+                )
+
+    merged_cat_id_to_cont_id = {}
+    for i, cat_id in enumerate(sorted(merged_categories.keys())):
+        merged_cat_id_to_cont_id[cat_id] = i
+
+    # add category maps to metadata
+    for dataset_name in dataset_names:
+        meta = MetadataCatalog.get(dataset_name)
+        categories = meta.get("categories")
+        meta.thing_classes = [categories[cat_id] for cat_id in sorted(categories.keys())]
+        meta.thing_dataset_id_to_contiguous_id = {
+            cat_id: merged_cat_id_to_cont_id[cat_id] for cat_id in sorted(categories.keys())
+        }
+        meta.thing_contiguous_id_to_dataset_id = {
+            merged_cat_id_to_cont_id[cat_id]: cat_id for cat_id in sorted(categories.keys())
+        }
+
+
+def _maybe_create_general_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+    def has_annotations(instance: Instance) -> bool:
+        return "annotations" in instance
+
+    def has_only_crowd_anotations(instance: Instance) -> bool:
+        for ann in instance["annotations"]:
+            if ann.get("is_crowd", 0) == 0:
+                return False
+        return True
+
+    def general_keep_instance_predicate(instance: Instance) -> bool:
+        return has_annotations(instance) and not has_only_crowd_anotations(instance)
+
+    if not cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS:
+        return None
+    return general_keep_instance_predicate
+
+
+def _maybe_create_keypoints_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+
+    min_num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
+
+    def has_sufficient_num_keypoints(instance: Instance) -> bool:
+        num_kpts = sum(
+            (np.array(ann["keypoints"][2::3]) > 0).sum()
+            for ann in instance["annotations"]
+            if "keypoints" in ann
+        )
+        return num_kpts >= min_num_keypoints
+
+    if cfg.MODEL.KEYPOINT_ON and (min_num_keypoints > 0):
+        return has_sufficient_num_keypoints
+    return None
+
+
+def _maybe_create_mask_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+    if not cfg.MODEL.MASK_ON:
+        return None
+
+    def has_mask_annotations(instance: Instance) -> bool:
+        return any("segmentation" in ann for ann in instance["annotations"])
+
+    return has_mask_annotations
+
+
+def _maybe_create_densepose_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+    if not cfg.MODEL.DENSEPOSE_ON:
+        return None
+
+    use_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
+
+    def has_densepose_annotations(instance: Instance) -> bool:
+        for ann in instance["annotations"]:
+            if all(key in ann for key in DENSEPOSE_COCO_KEYS_WITHOUT_MASK) and (
+                (DENSEPOSE_COCO_MASK_KEY in ann) or ("segmentation" in ann)
+            ):
+                return True
+            if use_masks and "segmentation" in ann:
+                return True
+        return False
+
+    return has_densepose_annotations
+
+
+def _maybe_create_specific_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+    specific_predicate_creators = [
+        _maybe_create_keypoints_keep_instance_predicate,
+        _maybe_create_mask_keep_instance_predicate,
+        _maybe_create_densepose_keep_instance_predicate,
+    ]
+    predicates = [creator(cfg) for creator in specific_predicate_creators]
+    predicates = [p for p in predicates if p is not None]
+    if not predicates:
+        return None
+
+    def combined_predicate(instance: Instance) -> bool:
+        return any(p(instance) for p in predicates)
+
+    return combined_predicate
+
+
+def _get_train_keep_instance_predicate(cfg: CfgNode):
+    general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
+    combined_specific_keep_predicate = _maybe_create_specific_keep_instance_predicate(cfg)
+
+    def combined_general_specific_keep_predicate(instance: Instance) -> bool:
+        return general_keep_predicate(instance) and combined_specific_keep_predicate(instance)
+
+    if (general_keep_predicate is None) and (combined_specific_keep_predicate is None):
+        return None
+    if general_keep_predicate is None:
+        return combined_specific_keep_predicate
+    if combined_specific_keep_predicate is None:
+        return general_keep_predicate
+    return combined_general_specific_keep_predicate
+
+
+def _get_test_keep_instance_predicate(cfg: CfgNode):
+    general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
+    return general_keep_predicate
+
+
+def _maybe_filter_and_map_categories(
+    dataset_name: str, dataset_dicts: List[Instance]
+) -> List[Instance]:
+    meta = MetadataCatalog.get(dataset_name)
+    whitelisted_categories = meta.get("whitelisted_categories")
+    category_map = meta.get("category_map", {})
+    if whitelisted_categories is None and not category_map:
+        return dataset_dicts
+    filtered_dataset_dicts = []
+    for dataset_dict in dataset_dicts:
+        anns = []
+        for ann in dataset_dict["annotations"]:
+            cat_id = ann["category_id"]
+            if whitelisted_categories is not None and cat_id not in whitelisted_categories:
+                continue
+            ann["category_id"] = category_map.get(cat_id, cat_id)
+            anns.append(ann)
+        dataset_dict["annotations"] = anns
+        filtered_dataset_dicts.append(dataset_dict)
+    return filtered_dataset_dicts
+
+
+def _add_category_whitelists_to_metadata(cfg: CfgNode):
+    for dataset_name, whitelisted_cat_ids in cfg.DATASETS.WHITELISTED_CATEGORIES.items():
+        meta = MetadataCatalog.get(dataset_name)
+        meta.whitelisted_categories = whitelisted_cat_ids
+        logger = logging.getLogger(__name__)
+        logger.info(
+            "Whitelisted categories for dataset {}: {}".format(
+                dataset_name, meta.whitelisted_categories
+            )
+        )
+
+
+def _add_category_maps_to_metadata(cfg: CfgNode):
+    for dataset_name, category_map in cfg.DATASETS.CATEGORY_MAPS.items():
+        category_map = {
+            int(cat_id_src): int(cat_id_dst) for cat_id_src, cat_id_dst in category_map.items()
+        }
+        meta = MetadataCatalog.get(dataset_name)
+        meta.category_map = category_map
+        logger = logging.getLogger(__name__)
+        logger.info("Category maps for dataset {}: {}".format(dataset_name, meta.category_map))
+
+
+def combine_detection_dataset_dicts(
+    dataset_names: Collection[str],
+    keep_instance_predicate: Optional[InstancePredicate] = None,
+    proposal_files: Optional[Collection[str]] = None,
+) -> List[Instance]:
+    """
+    Load and prepare dataset dicts for training / testing
+
+    Args:
+        dataset_names (Collection[str]): a list of dataset names
+        keep_instance_predicate (Callable: Dict[str, Any] -> bool): predicate
+            applied to instance dicts which defines whether to keep the instance
+        proposal_files (Collection[str]): if given, a list of object proposal files
+            that match each dataset in `dataset_names`.
+    """
+    assert len(dataset_names)
+    if proposal_files is None:
+        proposal_files = [None] * len(dataset_names)
+    assert len(dataset_names) == len(proposal_files)
+    # load annotations and dataset metadata
+    dataset_map = {}
+    for dataset_name in dataset_names:
+        dataset_dicts = DatasetCatalog.get(dataset_name)
+        dataset_map[dataset_name] = dataset_dicts
+    # initialize category maps
+    _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names)
+    # apply category maps
+    all_datasets_dicts = []
+    for dataset_name, proposal_file in zip(dataset_names, proposal_files):
+        dataset_dicts = dataset_map[dataset_name]
+        assert len(dataset_dicts), f"Dataset '{dataset_name}' is empty!"
+        if proposal_file is not None:
+            dataset_dicts = load_proposals_into_dataset(dataset_dicts, proposal_file)
+        dataset_dicts = _maybe_filter_and_map_categories(dataset_name, dataset_dicts)
+        _map_category_id_to_contiguous_id(dataset_name, dataset_dicts)
+        print_instances_class_histogram(
+            dataset_dicts, MetadataCatalog.get(dataset_name).thing_classes
+        )
+        all_datasets_dicts.append(dataset_dicts)
+
+    if keep_instance_predicate is not None:
+        all_datasets_dicts_plain = [
+            d
+            for d in itertools.chain.from_iterable(all_datasets_dicts)
+            if keep_instance_predicate(d)
+        ]
+    else:
+        all_datasets_dicts_plain = list(itertools.chain.from_iterable(all_datasets_dicts))
+    return all_datasets_dicts_plain
+
+
+def build_detection_train_loader(cfg: CfgNode, mapper=None):
+    """
+    A data loader is created in a way similar to that of Detectron2.
+    The main differences are:
+     - it allows to combine datasets with different but compatible object category sets
+
+    The data loader is created by the following steps:
+    1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts.
+    2. Start workers to work on the dicts. Each worker will:
+        * Map each metadata dict into another format to be consumed by the model.
+        * Batch them by simply putting dicts into a list.
+    The batched ``list[mapped_dict]`` is what this dataloader will return.
+
+    Args:
+        cfg (CfgNode): the config
+        mapper (callable): a callable which takes a sample (dict) from dataset and
+            returns the format to be consumed by the model.
+            By default it will be `DatasetMapper(cfg, True)`.
+
+    Returns:
+        an infinite iterator of training data
+    """
+
+    _add_category_whitelists_to_metadata(cfg)
+    _add_category_maps_to_metadata(cfg)
+    dataset_dicts = combine_detection_dataset_dicts(
+        cfg.DATASETS.TRAIN,
+        keep_instance_predicate=_get_train_keep_instance_predicate(cfg),
+        proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
+    )
+    dataset = DatasetFromList(dataset_dicts, copy=False)
+
+    if mapper is None:
+        mapper = DatasetMapper(cfg, True)
+    dataset = MapDataset(dataset, mapper)
+
+    sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
+    logger = logging.getLogger(__name__)
+    logger.info("Using training sampler {}".format(sampler_name))
+    if sampler_name == "TrainingSampler":
+        sampler = TrainingSampler(len(dataset))
+    elif sampler_name == "RepeatFactorTrainingSampler":
+        repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency(
+            dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD
+        )
+        sampler = RepeatFactorTrainingSampler(repeat_factors)
+    else:
+        raise ValueError("Unknown training sampler: {}".format(sampler_name))
+
+    return build_batch_data_loader(
+        dataset,
+        sampler,
+        cfg.SOLVER.IMS_PER_BATCH,
+        aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING,
+        num_workers=cfg.DATALOADER.NUM_WORKERS,
+    )
+
+
+def build_detection_test_loader(cfg, dataset_name, mapper=None):
+    """
+    Similar to `build_detection_train_loader`.
+    But this function uses the given `dataset_name` argument (instead of the names in cfg),
+    and uses batch size 1.
+
+    Args:
+        cfg: a detectron2 CfgNode
+        dataset_name (str): a name of the dataset that's available in the DatasetCatalog
+        mapper (callable): a callable which takes a sample (dict) from dataset
+            and returns the format to be consumed by the model.
+            By default it will be `DatasetMapper(cfg, False)`.
+
+    Returns:
+        DataLoader: a torch DataLoader, that loads the given detection
+            dataset, with test-time transformation and batching.
+    """
+    _add_category_whitelists_to_metadata(cfg)
+    _add_category_maps_to_metadata(cfg)
+    dataset_dicts = combine_detection_dataset_dicts(
+        [dataset_name],
+        keep_instance_predicate=_get_test_keep_instance_predicate(cfg),
+        proposal_files=[
+            cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)]
+        ]
+        if cfg.MODEL.LOAD_PROPOSALS
+        else None,
+    )
+
+    dataset = DatasetFromList(dataset_dicts)
+    if mapper is None:
+        mapper = DatasetMapper(cfg, False)
+    dataset = MapDataset(dataset, mapper)
+
+    sampler = InferenceSampler(len(dataset))
+    # Always use 1 image per worker during inference since this is the
+    # standard when reporting inference time in papers.
+    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)
+
+    data_loader = torch.utils.data.DataLoader(
+        dataset,
+        num_workers=cfg.DATALOADER.NUM_WORKERS,
+        batch_sampler=batch_sampler,
+        collate_fn=trivial_batch_collator,
+    )
+    return data_loader
+
+
+def build_frame_selector(cfg: CfgNode):
+    strategy = FrameSelectionStrategy(cfg.STRATEGY)
+    if strategy == FrameSelectionStrategy.RANDOM_K:
+        frame_selector = RandomKFramesSelector(cfg.NUM_IMAGES)
+    elif strategy == FrameSelectionStrategy.FIRST_K:
+        frame_selector = FirstKFramesSelector(cfg.NUM_IMAGES)
+    elif strategy == FrameSelectionStrategy.LAST_K:
+        frame_selector = LastKFramesSelector(cfg.NUM_IMAGES)
+    elif strategy == FrameSelectionStrategy.ALL:
+        frame_selector = None
+    return frame_selector
+
+
+def build_transform(cfg: CfgNode, data_type: str):
+    if cfg.TYPE == "resize":
+        if data_type == "image":
+            return ImageResizeTransform(cfg.MIN_SIZE, cfg.MAX_SIZE)
+    raise ValueError(f"Unknown transform {cfg.TYPE} for data type {data_type}")
+
+
+def build_combined_loader(cfg: CfgNode, loaders: Collection[Loader], ratios: Sequence[float]):
+    images_per_worker = _compute_num_images_per_worker(cfg)
+    return CombinedDataLoader(loaders, images_per_worker, ratios)
+
+
+def build_bootstrap_dataset(dataset_name: str, cfg: CfgNode) -> Sequence[torch.Tensor]:
+    """
+    Build dataset that provides data to bootstrap on
+
+    Args:
+        dataset_name (str): Name of the dataset, needs to have associated metadata
+            to load the data
+        cfg (CfgNode): bootstrapping config
+    Returns:
+        Sequence[Tensor] - dataset that provides image batches, Tensors of size
+            [N, C, H, W] of type float32
+    """
+    logger = logging.getLogger(__name__)
+    meta = MetadataCatalog.get(dataset_name)
+    factory = BootstrapDatasetFactoryCatalog.get(meta.dataset_type)
+    dataset = None
+    if factory is not None:
+        dataset = factory(meta, cfg)
+    if dataset is None:
+        logger.warning(f"Failed to create dataset {dataset_name} of type {meta.dataset_type}")
+    return dataset
+
+
+def build_data_sampler(cfg: CfgNode):
+    if cfg.TYPE == "densepose_uniform":
+        data_sampler = PredictionToGroundTruthSampler()
+        # transform densepose pred -> gt
+        data_sampler.register_sampler(
+            "pred_densepose",
+            "gt_densepose",
+            DensePoseUniformSampler(count_per_class=cfg.COUNT_PER_CLASS),
+        )
+        data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler())
+        return data_sampler
+    elif cfg.TYPE == "densepose_UV_confidence":
+        data_sampler = PredictionToGroundTruthSampler()
+        # transform densepose pred -> gt
+        data_sampler.register_sampler(
+            "pred_densepose",
+            "gt_densepose",
+            DensePoseConfidenceBasedSampler(
+                confidence_channel="sigma_2",
+                count_per_class=cfg.COUNT_PER_CLASS,
+                search_proportion=0.5,
+            ),
+        )
+        data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler())
+        return data_sampler
+    elif cfg.TYPE == "densepose_fine_segm_confidence":
+        data_sampler = PredictionToGroundTruthSampler()
+        # transform densepose pred -> gt
+        data_sampler.register_sampler(
+            "pred_densepose",
+            "gt_densepose",
+            DensePoseConfidenceBasedSampler(
+                confidence_channel="fine_segm_confidence",
+                count_per_class=cfg.COUNT_PER_CLASS,
+                search_proportion=0.5,
+            ),
+        )
+        data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler())
+        return data_sampler
+    elif cfg.TYPE == "densepose_coarse_segm_confidence":
+        data_sampler = PredictionToGroundTruthSampler()
+        # transform densepose pred -> gt
+        data_sampler.register_sampler(
+            "pred_densepose",
+            "gt_densepose",
+            DensePoseConfidenceBasedSampler(
+                confidence_channel="coarse_segm_confidence",
+                count_per_class=cfg.COUNT_PER_CLASS,
+                search_proportion=0.5,
+            ),
+        )
+        data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler())
+        return data_sampler
+
+    raise ValueError(f"Unknown data sampler type {cfg.TYPE}")
+
+
+def build_data_filter(cfg: CfgNode):
+    if cfg.TYPE == "detection_score":
+        min_score = cfg.MIN_VALUE
+        return ScoreBasedFilter(min_score=min_score)
+    raise ValueError(f"Unknown data filter type {cfg.TYPE}")
+
+
+def build_inference_based_loader(
+    cfg: CfgNode, dataset_cfg: CfgNode, model: torch.nn.Module
+) -> InferenceBasedLoader:
+    """
+    Constructs data loader based on inference results of a model.
+    """
+    dataset = build_bootstrap_dataset(dataset_cfg.DATASET, dataset_cfg.IMAGE_LOADER)
+    training_sampler = TrainingSampler(len(dataset))
+    data_loader = torch.utils.data.DataLoader(
+        dataset,
+        batch_size=dataset_cfg.IMAGE_LOADER.BATCH_SIZE,
+        sampler=training_sampler,
+        num_workers=dataset_cfg.IMAGE_LOADER.NUM_WORKERS,
+        collate_fn=trivial_batch_collator,
+    )
+    return InferenceBasedLoader(
+        model,
+        data_loader=data_loader,
+        data_sampler=build_data_sampler(dataset_cfg.DATA_SAMPLER),
+        data_filter=build_data_filter(dataset_cfg.FILTER),
+        shuffle=True,
+        batch_size=dataset_cfg.INFERENCE.OUTPUT_BATCH_SIZE,
+        inference_batch_size=dataset_cfg.INFERENCE.INPUT_BATCH_SIZE,
+    )
+
+
+def has_inference_based_loaders(cfg: CfgNode) -> bool:
+    """
+    Returns True, if at least one inferense-based loader must
+    be instantiated for training
+    """
+    return len(cfg.BOOTSTRAP_DATASETS) > 0
+
+
+def build_inference_based_loaders(
+    cfg: CfgNode, model: torch.nn.Module
+) -> List[InferenceBasedLoader]:
+    loaders = []
+    ratios = []
+    for dataset_spec in cfg.BOOTSTRAP_DATASETS:
+        dataset_cfg = get_bootstrap_dataset_config().clone()
+        dataset_cfg.merge_from_other_cfg(CfgNode(dataset_spec))
+        loader = build_inference_based_loader(cfg, dataset_cfg, model)
+        loaders.append(loader)
+        ratios.append(dataset_cfg.RATIO)
+    return loaders, ratios
+
+
+def build_video_list_dataset(meta: Metadata, cfg: CfgNode):
+    video_list_fpath = meta.video_list_fpath
+    video_base_path = meta.video_base_path
+    if cfg.TYPE == "video_keyframe":
+        frame_selector = build_frame_selector(cfg.SELECT)
+        transform = build_transform(cfg.TRANSFORM, data_type="image")
+        video_list = video_list_from_file(video_list_fpath, video_base_path)
+        return VideoKeyframeDataset(video_list, frame_selector, transform)
+
+
+class _BootstrapDatasetFactoryCatalog(UserDict):
+    """
+    A global dictionary that stores information about bootstrapped datasets creation functions
+    from metadata and config, for diverse DatasetType
+    """
+
+    def register(self, dataset_type: DatasetType, factory: Callable[[Metadata, CfgNode], Dataset]):
+        """
+        Args:
+            dataset_type (DatasetType): a DatasetType e.g. DatasetType.VIDEO_LIST
+            factory (Callable[Metadata, CfgNode]): a callable which takes Metadata and cfg
+            arguments and returns a dataset object.
+        """
+        assert dataset_type not in self, "Dataset '{}' is already registered!".format(dataset_type)
+        self[dataset_type] = factory
+
+
+BootstrapDatasetFactoryCatalog = _BootstrapDatasetFactoryCatalog()
+BootstrapDatasetFactoryCatalog.register(DatasetType.VIDEO_LIST, build_video_list_dataset)
--- a/projects/DensePose/densepose/data/combined_loader.py
+++ b/projects/DensePose/densepose/data/combined_loader.py
@ -0,0 +1,44 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import random
+from collections import deque
+from typing import Any, Collection, Deque, Iterable, Iterator, List, Sequence
+
+Loader = Iterable[Any]
+
+
+def _pooled_next(iterator: Iterator[Any], pool: Deque[Any]):
+    if not pool:
+        pool.extend(next(iterator))
+    return pool.popleft()
+
+
+class CombinedDataLoader:
+    """
+    Combines data loaders using the provided sampling ratios
+    """
+
+    BATCH_COUNT = 100
+
+    def __init__(self, loaders: Collection[Loader], batch_size: int, ratios: Sequence[float]):
+        self.loaders = loaders
+        self.batch_size = batch_size
+        self.ratios = ratios
+
+    def __iter__(self) -> Iterator[List[Any]]:
+        iters = [iter(loader) for loader in self.loaders]
+        indices = []
+        pool = [deque()] * len(iters)
+        # infinite iterator, as in D2
+        while True:
+            if not indices:
+                # just a buffer of indices, its size doesn't matter
+                # as long as it's a multiple of batch_size
+                k = self.batch_size * self.BATCH_COUNT
+                indices = random.choices(range(len(self.loaders)), self.ratios, k=k)
+            try:
+                batch = [_pooled_next(iters[i], pool[i]) for i in indices[: self.batch_size]]
+            except StopIteration:
+                break
+            indices = indices[self.batch_size :]
+            yield batch
--- a/projects/DensePose/densepose/data/dataset_mapper.py
+++ b/projects/DensePose/densepose/data/dataset_mapper.py
@ -0,0 +1,168 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import copy
+import logging
+from typing import Any, Dict, Tuple
+import torch
+from fvcore.common.file_io import PathManager
+
+from detectron2.data import MetadataCatalog
+from detectron2.data import detection_utils as utils
+from detectron2.data import transforms as T
+from detectron2.layers import ROIAlign
+from detectron2.structures import BoxMode
+
+from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
+
+
+def build_augmentation(cfg, is_train):
+    logger = logging.getLogger(__name__)
+    result = utils.build_augmentation(cfg, is_train)
+    if is_train:
+        random_rotation = T.RandomRotation(
+            cfg.INPUT.ROTATION_ANGLES, expand=False, sample_style="choice"
+        )
+        result.append(random_rotation)
+        logger.info("DensePose-specific augmentation used in training: " + str(random_rotation))
+    return result
+
+
+class DatasetMapper:
+    """
+    A customized version of `detectron2.data.DatasetMapper`
+    """
+
+    def __init__(self, cfg, is_train=True):
+        self.augmentation = build_augmentation(cfg, is_train)
+
+        # fmt: off
+        self.img_format     = cfg.INPUT.FORMAT
+        self.mask_on        = (
+            cfg.MODEL.MASK_ON or (
+                cfg.MODEL.DENSEPOSE_ON
+                and cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS)
+        )
+        self.keypoint_on    = cfg.MODEL.KEYPOINT_ON
+        self.densepose_on   = cfg.MODEL.DENSEPOSE_ON
+        assert not cfg.MODEL.LOAD_PROPOSALS, "not supported yet"
+        # fmt: on
+        if self.keypoint_on and is_train:
+            # Flip only makes sense in training
+            self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
+        else:
+            self.keypoint_hflip_indices = None
+
+        if self.densepose_on:
+            densepose_transform_srcs = [
+                MetadataCatalog.get(ds).densepose_transform_src
+                for ds in cfg.DATASETS.TRAIN + cfg.DATASETS.TEST
+            ]
+            assert len(densepose_transform_srcs) > 0
+            # TODO: check that DensePose transformation data is the same for
+            # all the datasets. Otherwise one would have to pass DB ID with
+            # each entry to select proper transformation data. For now, since
+            # all DensePose annotated data uses the same data semantics, we
+            # omit this check.
+            densepose_transform_data_fpath = PathManager.get_local_path(densepose_transform_srcs[0])
+            self.densepose_transform_data = DensePoseTransformData.load(
+                densepose_transform_data_fpath
+            )
+
+        self.is_train = is_train
+
+    def __call__(self, dataset_dict):
+        """
+        Args:
+            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
+
+        Returns:
+            dict: a format that builtin models in detectron2 accept
+        """
+        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
+        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
+        utils.check_image_size(dataset_dict, image)
+
+        image, transforms = T.apply_transform_gens(self.augmentation, image)
+        image_shape = image.shape[:2]  # h, w
+        dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
+
+        if not self.is_train:
+            dataset_dict.pop("annotations", None)
+            return dataset_dict
+
+        for anno in dataset_dict["annotations"]:
+            if not self.mask_on:
+                anno.pop("segmentation", None)
+            if not self.keypoint_on:
+                anno.pop("keypoints", None)
+
+        # USER: Implement additional transformations if you have other types of data
+        # USER: Don't call transpose_densepose if you don't need
+        annos = [
+            self._transform_densepose(
+                utils.transform_instance_annotations(
+                    obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
+                ),
+                transforms,
+            )
+            for obj in dataset_dict.pop("annotations")
+            if obj.get("iscrowd", 0) == 0
+        ]
+
+        if self.mask_on:
+            self._add_densepose_masks_as_segmentation(annos, image_shape)
+
+        instances = utils.annotations_to_instances(annos, image_shape, mask_format="bitmask")
+        densepose_annotations = [obj.get("densepose") for obj in annos]
+        if densepose_annotations and not all(v is None for v in densepose_annotations):
+            instances.gt_densepose = DensePoseList(
+                densepose_annotations, instances.gt_boxes, image_shape
+            )
+
+        dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()]
+        return dataset_dict
+
+    def _transform_densepose(self, annotation, transforms):
+        if not self.densepose_on:
+            return annotation
+
+        # Handle densepose annotations
+        is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
+        if is_valid:
+            densepose_data = DensePoseDataRelative(annotation, cleanup=True)
+            densepose_data.apply_transform(transforms, self.densepose_transform_data)
+            annotation["densepose"] = densepose_data
+        else:
+            # logger = logging.getLogger(__name__)
+            # logger.debug("Could not load DensePose annotation: {}".format(reason_not_valid))
+            DensePoseDataRelative.cleanup_annotation(annotation)
+            # NOTE: annotations for certain instances may be unavailable.
+            # 'None' is accepted by the DensePostList data structure.
+            annotation["densepose"] = None
+        return annotation
+
+    def _add_densepose_masks_as_segmentation(
+        self, annotations: Dict[str, Any], image_shape_hw: Tuple[int, int]
+    ):
+        for obj in annotations:
+            if ("densepose" not in obj) or ("segmentation" in obj):
+                continue
+            # DP segmentation: torch.Tensor [S, S] of float32, S=256
+            segm_dp = torch.zeros_like(obj["densepose"].segm)
+            segm_dp[obj["densepose"].segm > 0] = 1
+            segm_h, segm_w = segm_dp.shape
+            bbox_segm_dp = torch.tensor((0, 0, segm_h - 1, segm_w - 1), dtype=torch.float32)
+            # image bbox
+            x0, y0, x1, y1 = (
+                v.item() for v in BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)
+            )
+            segm_aligned = (
+                ROIAlign((y1 - y0, x1 - x0), 1.0, 0, aligned=True)
+                .forward(segm_dp.view(1, 1, *segm_dp.shape), bbox_segm_dp)
+                .squeeze()
+            )
+            image_mask = torch.zeros(*image_shape_hw, dtype=torch.float32)
+            image_mask[y0:y1, x0:x1] = segm_aligned
+            # segmentation for BitMask: np.array [H, W] of np.bool
+            obj["segmentation"] = image_mask >= 0.5
--- a/projects/DensePose/densepose/data/datasets/init.py
+++ b/projects/DensePose/densepose/data/datasets/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from . import builtin  # ensure the builtin datasets are registered
+
+__all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")]
--- a/projects/DensePose/densepose/data/datasets/builtin.py
+++ b/projects/DensePose/densepose/data/datasets/builtin.py
@ -0,0 +1,13 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from .chimpnsee import register_dataset as register_chimpnsee_dataset
+from .coco import BASE_DATASETS as BASE_COCO_DATASETS
+from .coco import DATASETS as COCO_DATASETS
+from .coco import register_datasets as register_coco_datasets
+
+DEFAULT_DATASETS_ROOT = "datasets"
+
+
+register_coco_datasets(COCO_DATASETS, DEFAULT_DATASETS_ROOT)
+register_coco_datasets(BASE_COCO_DATASETS, DEFAULT_DATASETS_ROOT)
+
+register_chimpnsee_dataset(DEFAULT_DATASETS_ROOT)
--- a/projects/DensePose/densepose/data/datasets/chimpnsee.py
+++ b/projects/DensePose/densepose/data/datasets/chimpnsee.py
@ -0,0 +1,28 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import os
+from typing import Optional
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+
+from ..utils import maybe_prepend_base_path
+from .dataset_type import DatasetType
+
+CHIMPNSEE_DATASET_NAME = "chimpnsee"
+
+
+def register_dataset(datasets_root: Optional[os.PathLike] = None):
+    def empty_load_callback():
+        pass
+
+    video_list_fpath = maybe_prepend_base_path(
+        datasets_root, "chimpnsee/cdna.eva.mpg.de/video_list.txt"
+    )
+    video_base_path = maybe_prepend_base_path(datasets_root, "chimpnsee/cdna.eva.mpg.de")
+
+    DatasetCatalog.register(CHIMPNSEE_DATASET_NAME, empty_load_callback)
+    MetadataCatalog.get(CHIMPNSEE_DATASET_NAME).set(
+        dataset_type=DatasetType.VIDEO_LIST,
+        video_list_fpath=video_list_fpath,
+        video_base_path=video_base_path,
+    )
--- a/projects/DensePose/densepose/data/datasets/coco.py
+++ b/projects/DensePose/densepose/data/datasets/coco.py
@ -0,0 +1,324 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import contextlib
+import io
+import logging
+import os
+from collections import defaultdict
+from dataclasses import dataclass
+from typing import Any, Dict, Iterable, List, Optional
+from fvcore.common.file_io import PathManager
+from fvcore.common.timer import Timer
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.structures import BoxMode
+
+from ..utils import maybe_prepend_base_path
+
+DENSEPOSE_MASK_KEY = "dp_masks"
+DENSEPOSE_KEYS_WITHOUT_MASK = ["dp_x", "dp_y", "dp_I", "dp_U", "dp_V"]
+DENSEPOSE_KEYS = DENSEPOSE_KEYS_WITHOUT_MASK + [DENSEPOSE_MASK_KEY]
+DENSEPOSE_METADATA_URL_PREFIX = "https://dl.fbaipublicfiles.com/densepose/data/"
+
+
+@dataclass
+class CocoDatasetInfo:
+    name: str
+    images_root: str
+    annotations_fpath: str
+
+
+DATASETS = [
+    CocoDatasetInfo(
+        name="densepose_coco_2014_train",
+        images_root="coco/train2014",
+        annotations_fpath="coco/annotations/densepose_train2014.json",
+    ),
+    CocoDatasetInfo(
+        name="densepose_coco_2014_minival",
+        images_root="coco/val2014",
+        annotations_fpath="coco/annotations/densepose_minival2014.json",
+    ),
+    CocoDatasetInfo(
+        name="densepose_coco_2014_minival_100",
+        images_root="coco/val2014",
+        annotations_fpath="coco/annotations/densepose_minival2014_100.json",
+    ),
+    CocoDatasetInfo(
+        name="densepose_coco_2014_valminusminival",
+        images_root="coco/val2014",
+        annotations_fpath="coco/annotations/densepose_valminusminival2014.json",
+    ),
+    CocoDatasetInfo(
+        name="densepose_chimps",
+        images_root="densepose_evolution/densepose_chimps",
+        annotations_fpath="densepose_evolution/annotations/densepose_chimps_densepose.json",
+    ),
+    CocoDatasetInfo(
+        name="posetrack2017_train",
+        images_root="posetrack2017/posetrack_data_2017",
+        annotations_fpath="posetrack2017/densepose_posetrack_train2017.json",
+    ),
+    CocoDatasetInfo(
+        name="posetrack2017_val",
+        images_root="posetrack2017/posetrack_data_2017",
+        annotations_fpath="posetrack2017/densepose_posetrack_val2017.json",
+    ),
+]
+
+
+BASE_DATASETS = [
+    CocoDatasetInfo(
+        name="base_coco_2017_train",
+        images_root="coco/train2017",
+        annotations_fpath="coco/annotations/instances_train2017.json",
+    ),
+    CocoDatasetInfo(
+        name="base_coco_2017_val",
+        images_root="coco/val2017",
+        annotations_fpath="coco/annotations/instances_val2017.json",
+    ),
+    CocoDatasetInfo(
+        name="base_coco_2017_val_100",
+        images_root="coco/val2017",
+        annotations_fpath="coco/annotations/instances_val2017_100.json",
+    ),
+]
+
+
+def get_metadata(base_path: Optional[os.PathLike]) -> Dict[str, Any]:
+    """
+    Returns metadata associated with COCO DensePose datasets
+
+    Args:
+    base_path: Optional[os.PathLike]
+        Base path used to load metadata from
+
+    Returns:
+    Dict[str, Any]
+        Metadata in the form of a dictionary
+    """
+    meta = {
+        "densepose_transform_src": maybe_prepend_base_path(base_path, "UV_symmetry_transforms.mat"),
+        "densepose_smpl_subdiv": maybe_prepend_base_path(base_path, "SMPL_subdiv.mat"),
+        "densepose_smpl_subdiv_transform": maybe_prepend_base_path(
+            base_path, "SMPL_SUBDIV_TRANSFORM.mat"
+        ),
+    }
+    return meta
+
+
+def _load_coco_annotations(json_file: str):
+    """
+    Load COCO annotations from a JSON file
+
+    Args:
+        json_file: str
+            Path to the file to load annotations from
+    Returns:
+        Instance of `pycocotools.coco.COCO` that provides access to annotations
+        data
+    """
+    from pycocotools.coco import COCO
+
+    logger = logging.getLogger(__name__)
+    timer = Timer()
+    with contextlib.redirect_stdout(io.StringIO()):
+        coco_api = COCO(json_file)
+    if timer.seconds() > 1:
+        logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
+    return coco_api
+
+
+def _add_categories_metadata(dataset_name: str, categories: Dict[str, Any]):
+    meta = MetadataCatalog.get(dataset_name)
+    meta.categories = {c["id"]: c["name"] for c in categories}
+    logger = logging.getLogger(__name__)
+    logger.info("Dataset {} categories: {}".format(dataset_name, categories))
+
+
+def _verify_annotations_have_unique_ids(json_file: str, anns: List[List[Dict[str, Any]]]):
+    if "minival" in json_file:
+        # Skip validation on COCO2014 valminusminival and minival annotations
+        # The ratio of buggy annotations there is tiny and does not affect accuracy
+        # Therefore we explicitly white-list them
+        return
+    ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
+    assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
+        json_file
+    )
+
+
+def _maybe_add_bbox(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
+    if "bbox" not in ann_dict:
+        return
+    obj["bbox"] = ann_dict["bbox"]
+    obj["bbox_mode"] = BoxMode.XYWH_ABS
+
+
+def _maybe_add_segm(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
+    if "segmentation" not in ann_dict:
+        return
+    segm = ann_dict["segmentation"]
+    if not isinstance(segm, dict):
+        # filter out invalid polygons (< 3 points)
+        segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
+        if len(segm) == 0:
+            return
+    obj["segmentation"] = segm
+
+
+def _maybe_add_keypoints(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
+    if "keypoints" not in ann_dict:
+        return
+    keypts = ann_dict["keypoints"]  # list[int]
+    for idx, v in enumerate(keypts):
+        if idx % 3 != 2:
+            # COCO's segmentation coordinates are floating points in [0, H or W],
+            # but keypoint coordinates are integers in [0, H-1 or W-1]
+            # Therefore we assume the coordinates are "pixel indices" and
+            # add 0.5 to convert to floating point coordinates.
+            keypts[idx] = v + 0.5
+    obj["keypoints"] = keypts
+
+
+def _maybe_add_densepose(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
+    for key in DENSEPOSE_KEYS:
+        if key in ann_dict:
+            obj[key] = ann_dict[key]
+
+
+def _combine_images_with_annotations(
+    dataset_name: str,
+    image_root: str,
+    img_datas: Iterable[Dict[str, Any]],
+    ann_datas: Iterable[Iterable[Dict[str, Any]]],
+):
+
+    ann_keys = ["iscrowd", "category_id"]
+    dataset_dicts = []
+    contains_video_frame_info = False
+
+    for img_dict, ann_dicts in zip(img_datas, ann_datas):
+        record = {}
+        record["file_name"] = os.path.join(image_root, img_dict["file_name"])
+        record["height"] = img_dict["height"]
+        record["width"] = img_dict["width"]
+        record["image_id"] = img_dict["id"]
+        record["dataset"] = dataset_name
+        if "frame_id" in img_dict:
+            record["frame_id"] = img_dict["frame_id"]
+            record["video_id"] = img_dict.get("vid_id", None)
+            contains_video_frame_info = True
+        objs = []
+        for ann_dict in ann_dicts:
+            assert ann_dict["image_id"] == record["image_id"]
+            assert ann_dict.get("ignore", 0) == 0
+            obj = {key: ann_dict[key] for key in ann_keys if key in ann_dict}
+            _maybe_add_bbox(obj, ann_dict)
+            _maybe_add_segm(obj, ann_dict)
+            _maybe_add_keypoints(obj, ann_dict)
+            _maybe_add_densepose(obj, ann_dict)
+            objs.append(obj)
+        record["annotations"] = objs
+        dataset_dicts.append(record)
+    if contains_video_frame_info:
+        create_video_frame_mapping(dataset_name, dataset_dicts)
+    return dataset_dicts
+
+
+def create_video_frame_mapping(dataset_name, dataset_dicts):
+    mapping = defaultdict(dict)
+    for d in dataset_dicts:
+        video_id = d.get("video_id")
+        if video_id is None:
+            continue
+        mapping[video_id].update({d["frame_id"]: d["file_name"]})
+    MetadataCatalog.get(dataset_name).set(video_frame_mapping=mapping)
+
+
+def load_coco_json(annotations_json_file: str, image_root: str, dataset_name: str):
+    """
+    Loads a JSON file with annotations in COCO instances format.
+    Replaces `detectron2.data.datasets.coco.load_coco_json` to handle metadata
+    in a more flexible way. Postpones category mapping to a later stage to be
+    able to combine several datasets with different (but coherent) sets of
+    categories.
+
+    Args:
+
+    annotations_json_file: str
+        Path to the JSON file with annotations in COCO instances format.
+    image_root: str
+        directory that contains all the images
+    dataset_name: str
+        the name that identifies a dataset, e.g. "densepose_coco_2014_train"
+    extra_annotation_keys: Optional[List[str]]
+        If provided, these keys are used to extract additional data from
+        the annotations.
+    """
+    coco_api = _load_coco_annotations(PathManager.get_local_path(annotations_json_file))
+    _add_categories_metadata(dataset_name, coco_api.loadCats(coco_api.getCatIds()))
+    # sort indices for reproducible results
+    img_ids = sorted(coco_api.imgs.keys())
+    # imgs is a list of dicts, each looks something like:
+    # {'license': 4,
+    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
+    #  'file_name': 'COCO_val2014_000000001268.jpg',
+    #  'height': 427,
+    #  'width': 640,
+    #  'date_captured': '2013-11-17 05:57:24',
+    #  'id': 1268}
+    imgs = coco_api.loadImgs(img_ids)
+    logger = logging.getLogger(__name__)
+    logger.info("Loaded {} images in COCO format from {}".format(len(imgs), annotations_json_file))
+    # anns is a list[list[dict]], where each dict is an annotation
+    # record for an object. The inner list enumerates the objects in an image
+    # and the outer list enumerates over images.
+    anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
+    _verify_annotations_have_unique_ids(annotations_json_file, anns)
+    dataset_records = _combine_images_with_annotations(dataset_name, image_root, imgs, anns)
+    return dataset_records
+
+
+def register_dataset(dataset_data: CocoDatasetInfo, datasets_root: Optional[os.PathLike] = None):
+    """
+    Registers provided COCO DensePose dataset
+
+    Args:
+    dataset_data: CocoDatasetInfo
+        Dataset data
+    datasets_root: Optional[os.PathLike]
+        Datasets root folder (default: None)
+    """
+    annotations_fpath = maybe_prepend_base_path(datasets_root, dataset_data.annotations_fpath)
+    images_root = maybe_prepend_base_path(datasets_root, dataset_data.images_root)
+
+    def load_annotations():
+        return load_coco_json(
+            annotations_json_file=annotations_fpath,
+            image_root=images_root,
+            dataset_name=dataset_data.name,
+        )
+
+    DatasetCatalog.register(dataset_data.name, load_annotations)
+    MetadataCatalog.get(dataset_data.name).set(
+        json_file=annotations_fpath,
+        image_root=images_root,
+        **get_metadata(DENSEPOSE_METADATA_URL_PREFIX)
+    )
+
+
+def register_datasets(
+    datasets_data: Iterable[CocoDatasetInfo], datasets_root: Optional[os.PathLike] = None
+):
+    """
+    Registers provided COCO DensePose datasets
+
+    Args:
+    datasets_data: Iterable[CocoDatasetInfo]
+        An iterable of dataset datas
+    datasets_root: Optional[os.PathLike]
+        Datasets root folder (default: None)
+    """
+    for dataset_data in datasets_data:
+        register_dataset(dataset_data, datasets_root)
--- a/projects/DensePose/densepose/data/datasets/dataset_type.py
+++ b/projects/DensePose/densepose/data/datasets/dataset_type.py
@ -0,0 +1,11 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from enum import Enum
+
+
+class DatasetType(Enum):
+    """
+    Dataset type, mostly used for datasets that contain data to bootstrap models on
+    """
+
+    VIDEO_LIST = "video_list"
--- a/projects/DensePose/densepose/data/image_list_dataset.py
+++ b/projects/DensePose/densepose/data/image_list_dataset.py
@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import logging
+import numpy as np
+from typing import Callable, List, Optional
+import torch
+from torch.utils.data.dataset import Dataset
+
+from detectron2.data.detection_utils import read_image
+
+ImageTransform = Callable[[torch.Tensor], torch.Tensor]
+
+
+class ImageListDataset(Dataset):
+    """
+    Dataset that provides images from a list.
+    """
+
+    _EMPTY_IMAGE = torch.empty((1, 1, 3))
+
+    def __init__(self, image_list: List[str], transform: Optional[ImageTransform] = None):
+        """
+        Args:
+            image_list (List[str]): list of paths to image files
+        """
+        self.image_list = image_list
+        self.transform = transform
+
+    def __getitem__(self, idx: int) -> torch.Tensor:
+        """
+        Gets selected images from the list
+
+        Args:
+            idx (int): video index in the video list file
+        Returns:
+            image (torch.Tensor): tensor of size [H, W, 3]
+        """
+        fpath = self.image_list[idx]
+
+        try:
+            image = torch.from_numpy(np.ascontiguousarray(read_image(fpath, format="BGR")))
+            if self.transform is not None:
+                image = self.transform(image.unsqueeze(0))[0]  # Transforms are done on batches
+            return image
+        except (OSError, RuntimeError) as e:
+            logger = logging.getLogger(__name__)
+            logger.warning(f"Error opening image file container {fpath}: {e}")
+
+        return self._EMPTY_IMAGE
+
+    def __len__(self):
+        return len(self.image_list)
--- a/projects/DensePose/densepose/data/inference_based_loader.py
+++ b/projects/DensePose/densepose/data/inference_based_loader.py
@ -0,0 +1,146 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import random
+from typing import Any, Callable, Iterable, Iterator, List, Optional, Tuple
+import torch
+from torch import nn
+
+SampledData = Any
+ModelOutput = Any
+
+
+def _grouper(iterable: Iterable[Any], n: int, fillvalue=None) -> Iterator[Tuple[Any]]:
+    """
+    Group elements of an iterable by chunks of size `n`, e.g.
+    grouper(range(9), 4) ->
+        (0, 1, 2, 3), (4, 5, 6, 7), (8, None, None, None)
+    """
+    it = iter(iterable)
+    while True:
+        values = []
+        for _ in range(n):
+            try:
+                value = next(it)
+            except StopIteration:
+                if values:
+                    values.extend([fillvalue] * (n - len(values)))
+                    yield tuple(values)
+                return
+            values.append(value)
+        yield tuple(values)
+
+
+class ScoreBasedFilter:
+    """
+    Filters entries in model output based on their scores
+    Discards all entries with score less than the specified minimum
+    """
+
+    def __init__(self, min_score: float = 0.8):
+        self.min_score = min_score
+
+    def __call__(self, model_output: ModelOutput) -> ModelOutput:
+        for model_output_i in model_output:
+            instances = model_output_i["instances"]
+            if not instances.has("scores"):
+                continue
+            instances_filtered = instances[instances.scores >= self.min_score]
+            model_output_i["instances"] = instances_filtered
+        return model_output
+
+
+class InferenceBasedLoader:
+    """
+    Data loader based on results inferred by a model. Consists of:
+     - a data loader that provides batches of images
+     - a model that is used to infer the results
+     - a data sampler that converts inferred results to annotations
+    """
+
+    def __init__(
+        self,
+        model: nn.Module,
+        data_loader: Iterable[List[torch.Tensor]],
+        data_sampler: Optional[Callable[[ModelOutput], List[SampledData]]] = None,
+        data_filter: Optional[Callable[[ModelOutput], ModelOutput]] = None,
+        shuffle: bool = True,
+        batch_size: int = 4,
+        inference_batch_size: int = 4,
+        drop_last: bool = False,
+    ):
+        """
+        Constructor
+
+        Args:
+          model (torch.nn.Module): model used to produce data
+          data_loader (Iterable[Tensor]): iterable that provides images
+              to perform inference on
+          data_sampler (Callable: ModelOutput -> SampledData): functor
+              that produces annotation data from inference results;
+              (optional, default: None)
+          data_filter (Callable: ModelOutput -> ModelOutput): filter
+              that selects model outputs for for further processing
+              (optional, default: None)
+          shuffle (bool): if True, the input images get shuffled
+          batch_size (int): batch size for the produced annotation data
+          inference_batch_size (int): batch size for input images
+          drop_last (bool): if True, drop the last batch if it is undersized
+        """
+        self.model = model
+        self.model.eval()
+        self.data_loader = data_loader
+        self.data_sampler = data_sampler
+        self.data_filter = data_filter
+        self.shuffle = shuffle
+        self.batch_size = batch_size
+        self.inference_batch_size = inference_batch_size
+        self.drop_last = drop_last
+
+    def __iter__(self) -> Iterator[List[SampledData]]:
+        for batch in self.data_loader:
+            # batch : List[Tensor[N, C, H, W]]
+            # images_batch : Tensor[N, C, H, W]
+            # image : Tensor[C, H, W]
+            images = [image for images_batch in batch for image in images_batch]
+            if not images:
+                continue
+            if self.shuffle:
+                random.shuffle(images)
+            yield from self._produce_data(images)
+
+    def _produce_data(self, images: List[torch.Tensor]) -> Iterator[List[SampledData]]:
+        """
+        Produce batches of data from images
+
+        Args:
+          images (List[Tensor]): list of images to process
+
+        Returns:
+          Iterator over batches of data sampled from model outputs
+        """
+        data_batches: List[SampledData] = []
+        batched_images = _grouper(images, self.inference_batch_size)
+        for batch in batched_images:
+            batch = [{"image": img.to(self.model.device)} for img in batch if img is not None]
+            if not batch:
+                continue
+            with torch.no_grad():
+                model_output = self.model(batch)
+            for model_output_i, batch_i in zip(model_output, batch):
+                model_output_i["image"] = batch_i["image"]
+            model_output_filtered = (
+                model_output if self.data_filter is None else self.data_filter(model_output)
+            )
+            data = (
+                model_output_filtered
+                if self.data_sampler is None
+                else self.data_sampler(model_output_filtered)
+            )
+            for data_i in data:
+                if len(data_i["instances"]):
+                    data_batches.append(data_i)
+            if len(data_batches) >= self.batch_size:
+                yield data_batches[: self.batch_size]
+                data_batches = data_batches[self.batch_size :]
+        if not self.drop_last and data_batches:
+            yield data_batches
--- a/projects/DensePose/densepose/data/samplers/init.py
+++ b/projects/DensePose/densepose/data/samplers/init.py
@ -0,0 +1,6 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from .densepose_uniform import DensePoseUniformSampler
+from .densepose_confidence_based import DensePoseConfidenceBasedSampler
+from .mask_from_densepose import MaskFromDensePoseSampler, densepose_to_mask
+from .prediction_to_gt import PredictionToGroundTruthSampler
--- a/projects/DensePose/densepose/data/samplers/densepose_base.py
+++ b/projects/DensePose/densepose/data/samplers/densepose_base.py
@ -0,0 +1,190 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from typing import List, Optional
+import torch
+from torch.nn import functional as F
+
+from detectron2.structures import BoxMode, Instances
+
+from ..structures import (
+    DensePoseDataRelative,
+    DensePoseList,
+    DensePoseOutput,
+    resample_output_to_bbox,
+)
+
+
+class DensePoseBaseSampler:
+    """
+    Base DensePose sampler to produce DensePose data from DensePose predictions.
+    Samples for each class are drawn according to some distribution over all pixels estimated
+    to belong to that class.
+    """
+
+    def __init__(self, count_per_class: int = 8):
+        """
+        Constructor
+
+        Args:
+          count_per_class (int): the sampler produces at most `count_per_class`
+              samples for each category
+        """
+        self.count_per_class = count_per_class
+
+    def __call__(self, instances: Instances) -> DensePoseList:
+        """
+        Convert DensePose predictions (an instance of `DensePoseOutput`)
+        into DensePose annotations data (an instance of `DensePoseList`)
+        """
+        boxes_xyxy_abs = instances.pred_boxes.tensor.clone().cpu()
+        boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+        dp_datas = []
+        for i, box_xywh in enumerate(boxes_xywh_abs):
+            labels_i, result_i = resample_output_to_bbox(
+                instances.pred_densepose[i], box_xywh, self._confidence_channels()
+            )
+            annotation_i = self._sample(labels_i.cpu(), result_i.cpu(), box_xywh)
+            annotation_i[DensePoseDataRelative.S_KEY] = self._resample_mask(
+                instances.pred_densepose[i]
+            )
+
+            dp_datas.append(DensePoseDataRelative(annotation_i))
+        # create densepose annotations on CPU
+        dp_list = DensePoseList(dp_datas, boxes_xyxy_abs, instances.image_size)
+        return dp_list
+
+    def _sample(
+        self, labels: torch.Tensor, dp_result: torch.Tensor, bbox_xywh: List[int]
+    ) -> DensePoseDataRelative:
+        """
+        Sample DensPoseDataRelative from estimation results
+        """
+        annotation = {
+            DensePoseDataRelative.X_KEY: [],
+            DensePoseDataRelative.Y_KEY: [],
+            DensePoseDataRelative.U_KEY: [],
+            DensePoseDataRelative.V_KEY: [],
+            DensePoseDataRelative.I_KEY: [],
+        }
+        x0, y0, _, _ = bbox_xywh
+        n, h, w = dp_result.shape
+        for part_id in range(1, DensePoseDataRelative.N_PART_LABELS + 1):
+            # indices - tuple of 3 1D tensors of size k
+            # 0: index along the first dimension N
+            # 1: index along H dimension
+            # 2: index along W dimension
+            indices = torch.nonzero(labels.expand(n, h, w) == part_id, as_tuple=True)
+            # values - an array of size [n, k]
+            # n: number of channels (U, V, confidences)
+            # k: number of points labeled with part_id
+            values = dp_result[indices].view(n, -1)
+            k = values.shape[1]
+            count = min(self.count_per_class, k)
+            if count <= 0:
+                continue
+            index_sample = self._produce_index_sample(values, count)
+            sampled_values = values[:, index_sample]
+            sampled_y = indices[1][index_sample] + 0.5
+            sampled_x = indices[2][index_sample] + 0.5
+            # prepare / normalize data
+            x = (sampled_x / w * 256.0).cpu().tolist()
+            y = (sampled_y / h * 256.0).cpu().tolist()
+            u = sampled_values[0].clamp(0, 1).cpu().tolist()
+            v = sampled_values[1].clamp(0, 1).cpu().tolist()
+            fine_segm_labels = [part_id] * count
+            # extend annotations
+            annotation[DensePoseDataRelative.X_KEY].extend(x)
+            annotation[DensePoseDataRelative.Y_KEY].extend(y)
+            annotation[DensePoseDataRelative.U_KEY].extend(u)
+            annotation[DensePoseDataRelative.V_KEY].extend(v)
+            annotation[DensePoseDataRelative.I_KEY].extend(fine_segm_labels)
+        return annotation
+
+    def _confidence_channels(self) -> Optional[List[str]]:
+        """
+        Confedence channels to be used for sampling (to be overridden in children)
+        """
+        return None
+
+    def _produce_index_sample(self, values: torch.Tensor, count: int):
+        """
+        Abstract method to produce a sample of indices to select data
+        To be implemented in descendants
+
+        Args:
+            values (torch.Tensor): an array of size [n, k] that contains
+                estimated values (U, V, confidences);
+                n: number of channels (U, V, confidences)
+                k: number of points labeled with part_id
+            count (int): number of samples to produce, should be positive and <= k
+:w
+
+        Return:
+            list(int): indices of values (along axis 1) selected as a sample
+        """
+        raise NotImplementedError
+
+    def _resample_mask(self, output: DensePoseOutput) -> torch.Tensor:
+        """
+        Convert output mask tensors into the annotation mask tensor of size
+        (256, 256)
+        """
+        sz = DensePoseDataRelative.MASK_SIZE
+        S = (
+            F.interpolate(output.S, (sz, sz), mode="bilinear", align_corners=False)
+            .argmax(dim=1)
+            .long()
+        )
+        I = (
+            (
+                F.interpolate(output.I, (sz, sz), mode="bilinear", align_corners=False).argmax(
+                    dim=1
+                )
+                * (S > 0).long()
+            )
+            .squeeze()
+            .cpu()
+        )
+        # Map fine segmentation results to coarse segmentation ground truth
+        # TODO: extract this into separate classes
+        # coarse segmentation: 1 = Torso, 2 = Right Hand, 3 = Left Hand,
+        # 4 = Left Foot, 5 = Right Foot, 6 = Upper Leg Right, 7 = Upper Leg Left,
+        # 8 = Lower Leg Right, 9 = Lower Leg Left, 10 = Upper Arm Left,
+        # 11 = Upper Arm Right, 12 = Lower Arm Left, 13 = Lower Arm Right,
+        # 14 = Head
+        # fine segmentation: 1, 2 = Torso, 3 = Right Hand, 4 = Left Hand,
+        # 5 = Left Foot, 6 = Right Foot, 7, 9 = Upper Leg Right,
+        # 8, 10 = Upper Leg Left, 11, 13 = Lower Leg Right,
+        # 12, 14 = Lower Leg Left, 15, 17 = Upper Arm Left,
+        # 16, 18 = Upper Arm Right, 19, 21 = Lower Arm Left,
+        # 20, 22 = Lower Arm Right, 23, 24 = Head
+        FINE_TO_COARSE_SEGMENTATION = {
+            1: 1,
+            2: 1,
+            3: 2,
+            4: 3,
+            5: 4,
+            6: 5,
+            7: 6,
+            8: 7,
+            9: 6,
+            10: 7,
+            11: 8,
+            12: 9,
+            13: 8,
+            14: 9,
+            15: 10,
+            16: 11,
+            17: 10,
+            18: 11,
+            19: 12,
+            20: 13,
+            21: 12,
+            22: 13,
+            23: 14,
+            24: 14,
+        }
+        mask = torch.zeros((sz, sz), dtype=torch.int64, device=torch.device("cpu"))
+        for i in range(DensePoseDataRelative.N_PART_LABELS):
+            mask[I == i + 1] = FINE_TO_COARSE_SEGMENTATION[i + 1]
+        return mask
--- a/projects/DensePose/densepose/data/samplers/densepose_confidence_based.py
+++ b/projects/DensePose/densepose/data/samplers/densepose_confidence_based.py
@ -0,0 +1,91 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import random
+from typing import List, Optional
+import torch
+
+from .densepose_base import DensePoseBaseSampler
+
+
+class DensePoseConfidenceBasedSampler(DensePoseBaseSampler):
+    """
+    Samples DensePose data from DensePose predictions.
+    Samples for each class are drawn using confidence value estimates.
+    """
+
+    def __init__(
+        self,
+        confidence_channel: str,
+        count_per_class: int = 8,
+        search_count_multiplier: Optional[float] = None,
+        search_proportion: Optional[float] = None,
+    ):
+        """
+        Constructor
+
+        Args:
+          confidence_channel (str): confidence channel to use for sampling;
+            possible values:
+              "sigma_2": confidences for UV values
+              "fine_segm_confidence": confidences for fine segmentation
+              "coarse_segm_confidence": confidences for coarse segmentation
+            (default: "sigma_2")
+          count_per_class (int): the sampler produces at most `count_per_class`
+              samples for each category (default: 8)
+          search_count_multiplier (float or None): if not None, the total number
+              of the most confident estimates of a given class to consider is
+              defined as `min(search_count_multiplier * count_per_class, N)`,
+              where `N` is the total number of estimates of the class; cannot be
+              specified together with `search_proportion` (default: None)
+          search_proportion (float or None): if not None, the total number of the
+              of the most confident estimates of a given class to consider is
+              defined as `min(max(search_proportion * N, count_per_class), N)`,
+              where `N` is the total number of estimates of the class; cannot be
+              specified together with `search_count_multiplier` (default: None)
+        """
+        super().__init__(count_per_class)
+        self.confidence_channel = confidence_channel
+        self.search_count_multiplier = search_count_multiplier
+        self.search_proportion = search_proportion
+        assert (search_count_multiplier is None) or (search_proportion is None), (
+            f"Cannot specify both search_count_multiplier (={search_count_multiplier})"
+            f"and search_proportion (={search_proportion})"
+        )
+
+    def _confidence_channels(self) -> Optional[List[str]]:
+        """
+        Confedence channels to be used for sampling (to be overridden in children)
+        """
+        return [self.confidence_channel]
+
+    def _produce_index_sample(self, values: torch.Tensor, count: int):
+        """
+        Produce a sample of indices to select data based on confidences
+
+        Args:
+            values (torch.Tensor): an array of size [n, k] that contains
+                estimated values (U, V, confidences);
+                n: number of channels (U, V, confidences)
+                k: number of points labeled with part_id
+            count (int): number of samples to produce, should be positive and <= k
+
+        Return:
+            list(int): indices of values (along axis 1) selected as a sample
+        """
+        k = values.shape[1]
+        if k == count:
+            index_sample = list(range(k))
+        else:
+            # take the best count * search_count_multiplier pixels,
+            # sample from them uniformly
+            # (here best = smallest variance)
+            _, sorted_confidence_indices = torch.sort(values[2])
+            if self.search_count_multiplier is not None:
+                search_count = min(int(count * self.search_count_multiplier), k)
+            elif self.search_proportion is not None:
+                search_count = min(max(int(k * self.search_proportion), count), k)
+            else:
+                search_count = min(count, k)
+            sample_from_top = random.sample(range(search_count), count)
+            index_sample = sorted_confidence_indices[:search_count][sample_from_top]
+        return index_sample
--- a/projects/DensePose/densepose/data/samplers/densepose_uniform.py
+++ b/projects/DensePose/densepose/data/samplers/densepose_uniform.py
@ -0,0 +1,41 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import random
+import torch
+
+from .densepose_base import DensePoseBaseSampler
+
+
+class DensePoseUniformSampler(DensePoseBaseSampler):
+    """
+    Samples DensePose data from DensePose predictions.
+    Samples for each class are drawn uniformly over all pixels estimated
+    to belong to that class.
+    """
+
+    def __init__(self, count_per_class: int = 8):
+        """
+        Constructor
+
+        Args:
+          count_per_class (int): the sampler produces at most `count_per_class`
+              samples for each category
+        """
+        super().__init__(count_per_class)
+
+    def _produce_index_sample(self, values: torch.Tensor, count: int):
+        """
+        Produce a uniform sample of indices to select data
+
+        Args:
+            values (torch.Tensor): an array of size [n, k] that contains
+                estimated values (U, V, confidences);
+                n: number of channels (U, V, confidences)
+                k: number of points labeled with part_id
+            count (int): number of samples to produce, should be positive and <= k
+
+        Return:
+            list(int): indices of values (along axis 1) selected as a sample
+        """
+        k = values.shape[1]
+        return random.sample(range(k), count)
--- a/projects/DensePose/densepose/data/samplers/mask_from_densepose.py
+++ b/projects/DensePose/densepose/data/samplers/mask_from_densepose.py
@ -0,0 +1,59 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import torch
+
+from detectron2.structures import BitMasks, BoxMode, Instances
+
+from ..structures import resample_output_to_bbox
+
+
+def densepose_to_mask(instances: Instances) -> BitMasks:
+    """
+    Produce masks from DensePose predictions
+    DensePose predictions for a given image, stored in `pred_densepose` field,
+    are instances of DensePoseOutput. This sampler takes
+    `S` and `I` output tensors (coarse and fine segmentation) and converts
+    then to a mask tensor, which is a bool tensor of the size of the input
+    image
+
+    Args:
+        instances (Instances): predicted results, expected to have `pred_densepose` field
+            that contains `DensePoseOutput` objects
+
+    Returns:
+        `BitMasks` instance with boolean tensors of the size of the input image that have non-zero
+            values at pixels that are estimated to belong to the detected objects
+    """
+    H, W = instances.image_size
+    boxes_xyxy_abs = instances.pred_boxes.tensor.clone().cpu()
+    boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+    N = len(boxes_xywh_abs)
+    gt_masks = torch.zeros((N, H, W), dtype=torch.bool, device=torch.device("cpu"))
+    for i, box_xywh in enumerate(boxes_xywh_abs):
+        labels_i, _ = resample_output_to_bbox(instances.pred_densepose[i], box_xywh)
+        x, y, w, h = box_xywh.long().tolist()
+        gt_masks[i, y : y + h, x : x + w] = labels_i.cpu() > 0
+    return BitMasks(gt_masks)
+
+
+class MaskFromDensePoseSampler:
+    """
+    Produce mask GT from DensePose predictions
+    DensePose prediction is an instance of DensePoseOutput. This sampler takes
+    `S` and `I` output tensors (coarse and fine segmentation) and converts
+    then to a mask tensor, which is a bool tensor of the size of the input
+    image
+    """
+
+    def __call__(self, instances: Instances) -> BitMasks:
+        """
+        Converts predicted data from `instances` into the GT mask data
+
+        Args:
+            instances (Instances): predicted results, expected to have `pred_densepose` field
+
+        Returns:
+            Boolean Tensor of the size of the input image that has non-zero
+            values at pixels that are estimated to belong to the detected object
+        """
+        return densepose_to_mask(instances)
--- a/projects/DensePose/densepose/data/samplers/prediction_to_gt.py
+++ b/projects/DensePose/densepose/data/samplers/prediction_to_gt.py
@ -0,0 +1,80 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from dataclasses import dataclass
+from typing import Any, Callable, Dict, Optional
+
+from detectron2.structures import Instances
+
+ModelOutput = Dict[str, Any]
+SampledData = Dict[str, Any]
+
+
+@dataclass
+class _Sampler:
+    """
+    Sampler registry entry that contains:
+     - src (str): source field to sample from (deleted after sampling)
+     - dst (Optional[str]): destination field to sample to, if not None
+     - func (Optional[Callable: Any -> Any]): function that performs sampling,
+         if None, reference copy is performed
+    """
+
+    src: str
+    dst: Optional[str]
+    func: Optional[Callable[[Any], Any]]
+
+
+class PredictionToGroundTruthSampler:
+    """
+    Sampler implementation that converts predictions to GT using registered
+    samplers for different fields of `Instances`.
+    """
+
+    def __init__(self, dataset_name: str = ""):
+        self.dataset_name = dataset_name
+        self._samplers = {}
+        self.register_sampler("pred_boxes", "gt_boxes", None)
+        self.register_sampler("pred_classes", "gt_classes", None)
+        self.register_sampler("scores")
+
+    def __call__(self, model_output: ModelOutput) -> SampledData:
+        """
+        Transform model output into ground truth data through sampling
+
+        Args:
+          model_output (Dict[str, Any]): model output
+        Returns:
+          Dict[str, Any]: sampled data
+        """
+        for model_output_i in model_output:
+            instances: Instances = model_output_i["instances"]
+            # transform data in each field
+            for _, sampler in self._samplers.items():
+                if not instances.has(sampler.src) or sampler.dst is None:
+                    continue
+                if sampler.func is None:
+                    instances.set(sampler.dst, instances.get(sampler.src))
+                else:
+                    instances.set(sampler.dst, sampler.func(instances))
+            # delete model output data that was transformed
+            for _, sampler in self._samplers.items():
+                if sampler.src != sampler.dst and instances.has(sampler.src):
+                    instances.remove(sampler.src)
+            model_output_i["dataset"] = self.dataset_name
+        return model_output
+
+    def register_sampler(
+        self,
+        prediction_attr: str,
+        gt_attr: Optional[str] = None,
+        func: Optional[Callable[[Any], Any]] = None,
+    ):
+        """
+        Register sampler for a field
+
+        Args:
+          prediction_attr (str): field to replace with a sampled value
+          gt_attr (Optional[str]): field to store the sampled value to, if not None
+          func (Optional[Callable: Any -> Any]): sampler function
+        """
+        self._samplers[prediction_attr] = _Sampler(src=prediction_attr, dst=gt_attr, func=func)
--- a/projects/DensePose/densepose/data/structures.py
+++ b/projects/DensePose/densepose/data/structures.py
@ -0,0 +1,703 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import base64
+import numpy as np
+from io import BytesIO
+from typing import BinaryIO, Dict, List, Optional, Tuple, Union
+import torch
+from PIL import Image
+from torch.nn import functional as F
+
+
+class DensePoseTransformData(object):
+
+    # Horizontal symmetry label transforms used for horizontal flip
+    MASK_LABEL_SYMMETRIES = [0, 1, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14]
+    # fmt: off
+    POINT_LABEL_SYMMETRIES = [ 0, 1, 2, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 18, 17, 20, 19, 22, 21, 24, 23]  # noqa
+    # fmt: on
+
+    def __init__(self, uv_symmetries: Dict[str, torch.Tensor], device: torch.device):
+        self.mask_label_symmetries = DensePoseTransformData.MASK_LABEL_SYMMETRIES
+        self.point_label_symmetries = DensePoseTransformData.POINT_LABEL_SYMMETRIES
+        self.uv_symmetries = uv_symmetries
+        self.device = torch.device("cpu")
+
+    def to(self, device: torch.device, copy: bool = False) -> "DensePoseTransformData":
+        """
+        Convert transform data to the specified device
+
+        Args:
+            device (torch.device): device to convert the data to
+            copy (bool): flag that specifies whether to copy or to reference the data
+                in case the device is the same
+        Return:
+            An instance of `DensePoseTransformData` with data stored on the specified device
+        """
+        if self.device == device and not copy:
+            return self
+        uv_symmetry_map = {}
+        for key in self.uv_symmetries:
+            uv_symmetry_map[key] = self.uv_symmetries[key].to(device=device, copy=copy)
+        return DensePoseTransformData(uv_symmetry_map, device)
+
+    @staticmethod
+    def load(io: Union[str, BinaryIO]):
+        """
+        Args:
+            io: (str or binary file-like object): input file to load data from
+        Returns:
+            An instance of `DensePoseTransformData` with transforms loaded from the file
+        """
+        import scipy.io
+
+        uv_symmetry_map = scipy.io.loadmat(io)
+        uv_symmetry_map_torch = {}
+        for key in ["U_transforms", "V_transforms"]:
+            uv_symmetry_map_torch[key] = []
+            map_src = uv_symmetry_map[key]
+            map_dst = uv_symmetry_map_torch[key]
+            for i in range(map_src.shape[1]):
+                map_dst.append(torch.from_numpy(map_src[0, i]).to(dtype=torch.float))
+            uv_symmetry_map_torch[key] = torch.stack(map_dst, dim=0)
+        transform_data = DensePoseTransformData(uv_symmetry_map_torch, device=torch.device("cpu"))
+        return transform_data
+
+
+class DensePoseDataRelative(object):
+    """
+    Dense pose relative annotations that can be applied to any bounding box:
+        x - normalized X coordinates [0, 255] of annotated points
+        y - normalized Y coordinates [0, 255] of annotated points
+        i - body part labels 0,...,24 for annotated points
+        u - body part U coordinates [0, 1] for annotated points
+        v - body part V coordinates [0, 1] for annotated points
+        segm - 256x256 segmentation mask with values 0,...,14
+    To obtain absolute x and y data wrt some bounding box one needs to first
+    divide the data by 256, multiply by the respective bounding box size
+    and add bounding box offset:
+        x_img = x0 + x_norm * w / 256.0
+        y_img = y0 + y_norm * h / 256.0
+    Segmentation masks are typically sampled to get image-based masks.
+    """
+
+    # Key for normalized X coordinates in annotation dict
+    X_KEY = "dp_x"
+    # Key for normalized Y coordinates in annotation dict
+    Y_KEY = "dp_y"
+    # Key for U part coordinates in annotation dict
+    U_KEY = "dp_U"
+    # Key for V part coordinates in annotation dict
+    V_KEY = "dp_V"
+    # Key for I point labels in annotation dict
+    I_KEY = "dp_I"
+    # Key for segmentation mask in annotation dict
+    S_KEY = "dp_masks"
+    # Number of body parts in segmentation masks
+    N_BODY_PARTS = 14
+    # Number of parts in point labels
+    N_PART_LABELS = 24
+    MASK_SIZE = 256
+
+    def __init__(self, annotation, cleanup=False):
+        is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
+        assert is_valid, "Invalid DensePose annotations: {}".format(reason_not_valid)
+        self.x = torch.as_tensor(annotation[DensePoseDataRelative.X_KEY])
+        self.y = torch.as_tensor(annotation[DensePoseDataRelative.Y_KEY])
+        self.i = torch.as_tensor(annotation[DensePoseDataRelative.I_KEY])
+        self.u = torch.as_tensor(annotation[DensePoseDataRelative.U_KEY])
+        self.v = torch.as_tensor(annotation[DensePoseDataRelative.V_KEY])
+        self.segm = DensePoseDataRelative.extract_segmentation_mask(annotation)
+        self.device = torch.device("cpu")
+        if cleanup:
+            DensePoseDataRelative.cleanup_annotation(annotation)
+
+    def to(self, device):
+        if self.device == device:
+            return self
+        new_data = DensePoseDataRelative.__new__(DensePoseDataRelative)
+        new_data.x = self.x
+        new_data.x = self.x.to(device)
+        new_data.y = self.y.to(device)
+        new_data.i = self.i.to(device)
+        new_data.u = self.u.to(device)
+        new_data.v = self.v.to(device)
+        new_data.segm = self.segm.to(device)
+        new_data.device = device
+        return new_data
+
+    @staticmethod
+    def extract_segmentation_mask(annotation):
+        poly_specs = annotation[DensePoseDataRelative.S_KEY]
+        if isinstance(poly_specs, torch.Tensor):
+            # data is already given as mask tensors, no need to decode
+            return poly_specs
+
+        import pycocotools.mask as mask_utils
+
+        segm = torch.zeros((DensePoseDataRelative.MASK_SIZE,) * 2, dtype=torch.float32)
+        for i in range(DensePoseDataRelative.N_BODY_PARTS):
+            poly_i = poly_specs[i]
+            if poly_i:
+                mask_i = mask_utils.decode(poly_i)
+                segm[mask_i > 0] = i + 1
+        return segm
+
+    @staticmethod
+    def validate_annotation(annotation):
+        for key in [
+            DensePoseDataRelative.X_KEY,
+            DensePoseDataRelative.Y_KEY,
+            DensePoseDataRelative.I_KEY,
+            DensePoseDataRelative.U_KEY,
+            DensePoseDataRelative.V_KEY,
+            DensePoseDataRelative.S_KEY,
+        ]:
+            if key not in annotation:
+                return False, "no {key} data in the annotation".format(key=key)
+        return True, None
+
+    @staticmethod
+    def cleanup_annotation(annotation):
+        for key in [
+            DensePoseDataRelative.X_KEY,
+            DensePoseDataRelative.Y_KEY,
+            DensePoseDataRelative.I_KEY,
+            DensePoseDataRelative.U_KEY,
+            DensePoseDataRelative.V_KEY,
+            DensePoseDataRelative.S_KEY,
+        ]:
+            if key in annotation:
+                del annotation[key]
+
+    def apply_transform(self, transforms, densepose_transform_data):
+        self._transform_pts(transforms, densepose_transform_data)
+        self._transform_segm(transforms, densepose_transform_data)
+
+    def _transform_pts(self, transforms, dp_transform_data):
+        import detectron2.data.transforms as T
+
+        # NOTE: This assumes that HorizFlipTransform is the only one that does flip
+        do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
+        if do_hflip:
+            self.x = self.segm.size(1) - self.x
+            self._flip_iuv_semantics(dp_transform_data)
+
+        for t in transforms.transforms:
+            if isinstance(t, T.RotationTransform):
+                xy_scale = np.array((t.w, t.h)) / DensePoseDataRelative.MASK_SIZE
+                xy = t.apply_coords(np.stack((self.x, self.y), axis=1) * xy_scale)
+                self.x, self.y = torch.tensor(xy / xy_scale, dtype=self.x.dtype).T
+
+    def _flip_iuv_semantics(self, dp_transform_data: DensePoseTransformData) -> None:
+        i_old = self.i.clone()
+        uv_symmetries = dp_transform_data.uv_symmetries
+        pt_label_symmetries = dp_transform_data.point_label_symmetries
+        for i in range(self.N_PART_LABELS):
+            if i + 1 in i_old:
+                annot_indices_i = i_old == i + 1
+                if pt_label_symmetries[i + 1] != i + 1:
+                    self.i[annot_indices_i] = pt_label_symmetries[i + 1]
+                u_loc = (self.u[annot_indices_i] * 255).long()
+                v_loc = (self.v[annot_indices_i] * 255).long()
+                self.u[annot_indices_i] = uv_symmetries["U_transforms"][i][v_loc, u_loc].to(
+                    device=self.u.device
+                )
+                self.v[annot_indices_i] = uv_symmetries["V_transforms"][i][v_loc, u_loc].to(
+                    device=self.v.device
+                )
+
+    def _transform_segm(self, transforms, dp_transform_data):
+        import detectron2.data.transforms as T
+
+        # NOTE: This assumes that HorizFlipTransform is the only one that does flip
+        do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
+        if do_hflip:
+            self.segm = torch.flip(self.segm, [1])
+            self._flip_segm_semantics(dp_transform_data)
+
+        for t in transforms.transforms:
+            if isinstance(t, T.RotationTransform):
+                self._transform_segm_rotation(t)
+
+    def _flip_segm_semantics(self, dp_transform_data):
+        old_segm = self.segm.clone()
+        mask_label_symmetries = dp_transform_data.mask_label_symmetries
+        for i in range(self.N_BODY_PARTS):
+            if mask_label_symmetries[i + 1] != i + 1:
+                self.segm[old_segm == i + 1] = mask_label_symmetries[i + 1]
+
+    def _transform_segm_rotation(self, rotation):
+        self.segm = F.interpolate(self.segm[None, None, :], (rotation.h, rotation.w)).numpy()
+        self.segm = torch.tensor(rotation.apply_segmentation(self.segm[0, 0]))[None, None, :]
+        self.segm = F.interpolate(self.segm, [DensePoseDataRelative.MASK_SIZE] * 2)[0, 0]
+
+
+def normalized_coords_transform(x0, y0, w, h):
+    """
+    Coordinates transform that maps top left corner to (-1, -1) and bottom
+    right corner to (1, 1). Used for torch.grid_sample to initialize the
+    grid
+    """
+
+    def f(p):
+        return (2 * (p[0] - x0) / w - 1, 2 * (p[1] - y0) / h - 1)
+
+    return f
+
+
+class DensePoseOutput(object):
+    def __init__(self, S, I, U, V, confidences):
+        """
+        Args:
+            S (`torch.Tensor`): coarse segmentation tensor of size (N, A, H, W)
+            I (`torch.Tensor`): fine segmentation tensor of size (N, C, H, W)
+            U (`torch.Tensor`): U coordinates for each fine segmentation label of size (N, C, H, W)
+            V (`torch.Tensor`): V coordinates for each fine segmentation label of size (N, C, H, W)
+            confidences (dict of str -> `torch.Tensor`) estimated confidence model parameters
+        """
+        self.S = S
+        self.I = I  # noqa: E741
+        self.U = U
+        self.V = V
+        self.confidences = confidences
+        self._check_output_dims(S, I, U, V)
+
+    def _check_output_dims(self, S, I, U, V):
+        assert (
+            len(S.size()) == 4
+        ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
+            S.size()
+        )
+        assert (
+            len(I.size()) == 4
+        ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
+            S.size()
+        )
+        assert (
+            len(U.size()) == 4
+        ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
+            S.size()
+        )
+        assert (
+            len(V.size()) == 4
+        ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format(
+            S.size()
+        )
+        assert len(S) == len(I), (
+            "Number of output segmentation planes {} "
+            "should be equal to the number of output part index "
+            "planes {}".format(len(S), len(I))
+        )
+        assert S.size()[2:] == I.size()[2:], (
+            "Output segmentation plane size {} "
+            "should be equal to the output part index "
+            "plane size {}".format(S.size()[2:], I.size()[2:])
+        )
+        assert I.size() == U.size(), (
+            "Part index output shape {} "
+            "should be the same as U coordinates output shape {}".format(I.size(), U.size())
+        )
+        assert I.size() == V.size(), (
+            "Part index output shape {} "
+            "should be the same as V coordinates output shape {}".format(I.size(), V.size())
+        )
+
+    def resize(self, image_size_hw):
+        # do nothing - outputs are invariant to resize
+        pass
+
+    def _crop(self, S, I, U, V, bbox_old_xywh, bbox_new_xywh):
+        """
+        Resample S, I, U, V from bbox_old to the cropped bbox_new
+        """
+        x0old, y0old, wold, hold = bbox_old_xywh
+        x0new, y0new, wnew, hnew = bbox_new_xywh
+        tr_coords = normalized_coords_transform(x0old, y0old, wold, hold)
+        topleft = (x0new, y0new)
+        bottomright = (x0new + wnew, y0new + hnew)
+        topleft_norm = tr_coords(topleft)
+        bottomright_norm = tr_coords(bottomright)
+        hsize = S.size(1)
+        wsize = S.size(2)
+        grid = torch.meshgrid(
+            torch.arange(
+                topleft_norm[1],
+                bottomright_norm[1],
+                (bottomright_norm[1] - topleft_norm[1]) / hsize,
+            )[:hsize],
+            torch.arange(
+                topleft_norm[0],
+                bottomright_norm[0],
+                (bottomright_norm[0] - topleft_norm[0]) / wsize,
+            )[:wsize],
+        )
+        grid = torch.stack(grid, dim=2).to(S.device)
+        assert (
+            grid.size(0) == hsize
+        ), "Resampled grid expected " "height={}, actual height={}".format(hsize, grid.size(0))
+        assert grid.size(1) == wsize, "Resampled grid expected " "width={}, actual width={}".format(
+            wsize, grid.size(1)
+        )
+        S_new = F.grid_sample(
+            S.unsqueeze(0),
+            torch.unsqueeze(grid, 0),
+            mode="bilinear",
+            padding_mode="border",
+            align_corners=True,
+        ).squeeze(0)
+        I_new = F.grid_sample(
+            I.unsqueeze(0),
+            torch.unsqueeze(grid, 0),
+            mode="bilinear",
+            padding_mode="border",
+            align_corners=True,
+        ).squeeze(0)
+        U_new = F.grid_sample(
+            U.unsqueeze(0),
+            torch.unsqueeze(grid, 0),
+            mode="bilinear",
+            padding_mode="border",
+            align_corners=True,
+        ).squeeze(0)
+        V_new = F.grid_sample(
+            V.unsqueeze(0),
+            torch.unsqueeze(grid, 0),
+            mode="bilinear",
+            padding_mode="border",
+            align_corners=True,
+        ).squeeze(0)
+        return S_new, I_new, U_new, V_new
+
+    def crop(self, indices_cropped, bboxes_old, bboxes_new):
+        """
+        Crop outputs for selected bounding boxes to the new bounding boxes.
+        """
+        # VK: cropping is ignored for now
+        # for i, ic in enumerate(indices_cropped):
+        #    self.S[ic], self.I[ic], self.U[ic], self.V[ic] = \
+        #        self._crop(self.S[ic], self.I[ic], self.U[ic], self.V[ic],
+        #        bboxes_old[i], bboxes_new[i])
+        pass
+
+    def hflip(self, transform_data: DensePoseTransformData) -> None:
+        """
+        Change S, I, U and V to take into account a Horizontal flip.
+        """
+        if self.I.shape[0] > 0:
+            for el in "SIUV":
+                self.__dict__[el] = torch.flip(self.__dict__[el], [3])
+            for key in self.confidences:
+                self.confidences[key] = torch.flip(self.confidences[key], [3])
+            self._flip_iuv_semantics_tensor(transform_data)
+            self._flip_segm_semantics_tensor(transform_data)
+
+    def _flip_iuv_semantics_tensor(self, dp_transform_data: DensePoseTransformData) -> None:
+        point_label_symmetries = dp_transform_data.point_label_symmetries
+        uv_symmetries = dp_transform_data.uv_symmetries
+
+        N, C, H, W = self.U.shape
+        u_loc = (self.U[:, 1:, :, :].clamp(0, 1) * 255).long()
+        v_loc = (self.V[:, 1:, :, :].clamp(0, 1) * 255).long()
+        Iindex = torch.arange(C - 1, device=self.U.device)[None, :, None, None].expand(
+            N, C - 1, H, W
+        )
+        self.U[:, 1:, :, :] = uv_symmetries["U_transforms"][Iindex, v_loc, u_loc]
+        self.V[:, 1:, :, :] = uv_symmetries["V_transforms"][Iindex, v_loc, u_loc]
+
+        for el in "IUV":
+            self.__dict__[el] = self.__dict__[el][:, point_label_symmetries, :, :]
+
+    def _flip_segm_semantics_tensor(self, dp_transform_data):
+        if self.S.shape[1] == DensePoseDataRelative.N_BODY_PARTS + 1:
+            self.S = self.S[:, dp_transform_data.mask_label_symmetries, :, :]
+
+    def to_result(self, boxes_xywh):
+        """
+        Convert DensePose outputs to results format. Results are more compact,
+        but cannot be resampled any more
+        """
+        result = DensePoseResult(boxes_xywh, self.S, self.I, self.U, self.V)
+        return result
+
+    def __getitem__(self, item):
+        if isinstance(item, int):
+            S_selected = self.S[item].unsqueeze(0)
+            I_selected = self.I[item].unsqueeze(0)
+            U_selected = self.U[item].unsqueeze(0)
+            V_selected = self.V[item].unsqueeze(0)
+            conf_selected = {}
+            for key in self.confidences:
+                conf_selected[key] = self.confidences[key][item].unsqueeze(0)
+        else:
+            S_selected = self.S[item]
+            I_selected = self.I[item]
+            U_selected = self.U[item]
+            V_selected = self.V[item]
+            conf_selected = {}
+            for key in self.confidences:
+                conf_selected[key] = self.confidences[key][item]
+        return DensePoseOutput(S_selected, I_selected, U_selected, V_selected, conf_selected)
+
+    def __str__(self):
+        s = "DensePoseOutput S {}, I {}, U {}, V {}".format(
+            list(self.S.size()), list(self.I.size()), list(self.U.size()), list(self.V.size())
+        )
+        s_conf = "confidences: [{}]".format(
+            ", ".join([f"{key} {list(self.confidences[key].size())}" for key in self.confidences])
+        )
+        return ", ".join([s, s_conf])
+
+    def __len__(self):
+        return self.S.size(0)
+
+
+def resample_output_to_bbox(
+    output: DensePoseOutput, bbox_xywh_abs: List[int], confidences: Optional[List[str]] = None
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Convert DensePose output of size [1, C, S, S] into DensePose results [D, H_i, W_i],
+    where `i` is detection index and `D == 2 + len(confidences)`. This conversion:
+     - resamples data to the detection bounding box size (H_i, W_i),
+     - sets label for each pixel of the bounding box as the `argmax` of scores,
+     - assigns values (U, V, confidences) based on label and resampled data
+
+    Args:
+      output (DensePoseOutput): outputs of the DensePose model
+      bbox_xywh_abs (List[int]): bounding box, a list of 4 integer values XYWH
+      confidences (List[str]): optional list of `str` that specifies confidence
+        channels to be resampled and added to the results
+
+    Results:
+        labels (torch.Tensor): tensor [1, H_i, W_i] of `torch.uint8` containing fine
+            segmentation labels of each pixel
+        data (torch.Tensor): tensor [D, H_i, W_i] of `torch.float32` containing
+            for each pixel the estimated U, V coordinates and the requested
+            confidence values in the order that corresponds to `confidences`
+    """
+    x, y, w, h = bbox_xywh_abs
+    w = max(int(w), 1)
+    h = max(int(h), 1)
+    N_out = 2 if confidences is None else 2 + len(confidences)
+    device = output.U.device
+    data = torch.zeros([N_out, h, w], dtype=torch.float32, device=device)
+    # coarse segmentation
+    assert (
+        len(output.S.size()) == 4
+    ), "AnnIndex tensor size should have {} dimensions but has {}".format(4, len(output.S.size()))
+    s_bbox = F.interpolate(output.S, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
+    # fine segmentation
+    assert (
+        len(output.I.size()) == 4
+    ), "IndexUV tensor size should have {} dimensions but has {}".format(4, len(output.S.size()))
+    labels = (
+        F.interpolate(output.I, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
+        * (s_bbox > 0).long()
+    ).squeeze(0)
+    # U
+    assert len(output.U.size()) == 4, "U tensor size should have {} dimensions but has {}".format(
+        4, len(output.U.size())
+    )
+    u_bbox = F.interpolate(output.U, (h, w), mode="bilinear", align_corners=False)
+    # V
+    assert len(output.V.size()) == 4, "V tensor size should have {} dimensions but has {}".format(
+        4, len(output.V.size())
+    )
+    v_bbox = F.interpolate(output.V, (h, w), mode="bilinear", align_corners=False)
+    # confidences
+    if confidences is not None:
+        resampled_confidence = {}
+        for key in output.confidences:
+            resampled_confidence[key] = F.interpolate(
+                output.confidences[key], (h, w), mode="bilinear", align_corners=False
+            )
+
+    # assign data from channels that correspond to the labels
+    for part_id in range(1, u_bbox.size(1)):
+        data[0][labels == part_id] = u_bbox[0, part_id][labels == part_id]
+        data[1][labels == part_id] = v_bbox[0, part_id][labels == part_id]
+        if confidences is None:
+            continue
+        for i, key in enumerate(confidences):
+            if resampled_confidence[key].size(1) != u_bbox.size(1):
+                # confidence is not part-based, don't try to fill it part by part
+                continue
+            data[2 + i][labels == part_id] = resampled_confidence[key][0, part_id][
+                labels == part_id
+            ]
+    if confidences is not None:
+        for i, key in enumerate(confidences):
+            if resampled_confidence[key].size(1) != u_bbox.size(1):
+                # confidence is not part-based, fill the data with the first channel
+                # (targeted for segmentation confidences that have only 1 channel)
+                data[2 + i] = resampled_confidence[key][0, 0]
+    return labels.unsqueeze(0), data
+
+
+class DensePoseResult(object):
+    def __init__(self, boxes_xywh, S, I, U, V):
+        self.results = []
+        self.boxes_xywh = boxes_xywh.cpu().tolist()
+        assert len(boxes_xywh.size()) == 2
+        assert boxes_xywh.size(1) == 4
+        for i, box_xywh in enumerate(boxes_xywh):
+            result_i = self._output_to_result(box_xywh, S[[i]], I[[i]], U[[i]], V[[i]])
+            result_numpy_i = result_i.cpu().numpy()
+            result_encoded_i = DensePoseResult.encode_png_data(result_numpy_i)
+            result_encoded_with_shape_i = (result_numpy_i.shape, result_encoded_i)
+            self.results.append(result_encoded_with_shape_i)
+
+    def __str__(self):
+        s = "DensePoseResult: N={} [{}]".format(
+            len(self.results), ", ".join([str(list(r[0])) for r in self.results])
+        )
+        return s
+
+    def _output_to_result(self, box_xywh, S, I, U, V):
+        # TODO: reuse resample_output_to_bbox
+        x, y, w, h = box_xywh
+        w = max(int(w), 1)
+        h = max(int(h), 1)
+        result = torch.zeros([3, h, w], dtype=torch.uint8, device=U.device)
+        assert (
+            len(S.size()) == 4
+        ), "AnnIndex tensor size should have {} " "dimensions but has {}".format(4, len(S.size()))
+        s_bbox = F.interpolate(S, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
+        assert (
+            len(I.size()) == 4
+        ), "IndexUV tensor size should have {} " "dimensions but has {}".format(4, len(S.size()))
+        i_bbox = (
+            F.interpolate(I, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
+            * (s_bbox > 0).long()
+        ).squeeze(0)
+        assert len(U.size()) == 4, "U tensor size should have {} " "dimensions but has {}".format(
+            4, len(U.size())
+        )
+        u_bbox = F.interpolate(U, (h, w), mode="bilinear", align_corners=False)
+        assert len(V.size()) == 4, "V tensor size should have {} " "dimensions but has {}".format(
+            4, len(V.size())
+        )
+        v_bbox = F.interpolate(V, (h, w), mode="bilinear", align_corners=False)
+        result[0] = i_bbox
+        for part_id in range(1, u_bbox.size(1)):
+            result[1][i_bbox == part_id] = (
+                (u_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8)
+            )
+            result[2][i_bbox == part_id] = (
+                (v_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8)
+            )
+        assert (
+            result.size(1) == h
+        ), "Results height {} should be equal" "to bounding box height {}".format(result.size(1), h)
+        assert (
+            result.size(2) == w
+        ), "Results width {} should be equal" "to bounding box width {}".format(result.size(2), w)
+        return result
+
+    @staticmethod
+    def encode_png_data(arr):
+        """
+        Encode array data as a PNG image using the highest compression rate
+        @param arr [in] Data stored in an array of size (3, M, N) of type uint8
+        @return Base64-encoded string containing PNG-compressed data
+        """
+        assert len(arr.shape) == 3, "Expected a 3D array as an input," " got a {0}D array".format(
+            len(arr.shape)
+        )
+        assert arr.shape[0] == 3, "Expected first array dimension of size 3," " got {0}".format(
+            arr.shape[0]
+        )
+        assert arr.dtype == np.uint8, "Expected an array of type np.uint8, " " got {0}".format(
+            arr.dtype
+        )
+        data = np.moveaxis(arr, 0, -1)
+        im = Image.fromarray(data)
+        fstream = BytesIO()
+        im.save(fstream, format="png", optimize=True)
+        s = base64.encodebytes(fstream.getvalue()).decode()
+        return s
+
+    @staticmethod
+    def decode_png_data(shape, s):
+        """
+        Decode array data from a string that contains PNG-compressed data
+        @param Base64-encoded string containing PNG-compressed data
+        @return Data stored in an array of size (3, M, N) of type uint8
+        """
+        fstream = BytesIO(base64.decodebytes(s.encode()))
+        im = Image.open(fstream)
+        data = np.moveaxis(np.array(im.getdata(), dtype=np.uint8), -1, 0)
+        return data.reshape(shape)
+
+    def __len__(self):
+        return len(self.results)
+
+    def __getitem__(self, item):
+        result_encoded = self.results[item]
+        bbox_xywh = self.boxes_xywh[item]
+        return result_encoded, bbox_xywh
+
+
+class DensePoseList(object):
+
+    _TORCH_DEVICE_CPU = torch.device("cpu")
+
+    def __init__(self, densepose_datas, boxes_xyxy_abs, image_size_hw, device=_TORCH_DEVICE_CPU):
+        assert len(densepose_datas) == len(
+            boxes_xyxy_abs
+        ), "Attempt to initialize DensePoseList with {} DensePose datas " "and {} boxes".format(
+            len(densepose_datas), len(boxes_xyxy_abs)
+        )
+        self.densepose_datas = []
+        for densepose_data in densepose_datas:
+            assert isinstance(densepose_data, DensePoseDataRelative) or densepose_data is None, (
+                "Attempt to initialize DensePoseList with DensePose datas "
+                "of type {}, expected DensePoseDataRelative".format(type(densepose_data))
+            )
+            densepose_data_ondevice = (
+                densepose_data.to(device) if densepose_data is not None else None
+            )
+            self.densepose_datas.append(densepose_data_ondevice)
+        self.boxes_xyxy_abs = boxes_xyxy_abs.to(device)
+        self.image_size_hw = image_size_hw
+        self.device = device
+
+    def to(self, device):
+        if self.device == device:
+            return self
+        return DensePoseList(self.densepose_datas, self.boxes_xyxy_abs, self.image_size_hw, device)
+
+    def __iter__(self):
+        return iter(self.densepose_datas)
+
+    def __len__(self):
+        return len(self.densepose_datas)
+
+    def __repr__(self):
+        s = self.__class__.__name__ + "("
+        s += "num_instances={}, ".format(len(self.densepose_datas))
+        s += "image_width={}, ".format(self.image_size_hw[1])
+        s += "image_height={})".format(self.image_size_hw[0])
+        return s
+
+    def __getitem__(self, item):
+        if isinstance(item, int):
+            densepose_data_rel = self.densepose_datas[item]
+            return densepose_data_rel
+        elif isinstance(item, slice):
+            densepose_datas_rel = self.densepose_datas[item]
+            boxes_xyxy_abs = self.boxes_xyxy_abs[item]
+            return DensePoseList(
+                densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
+            )
+        elif isinstance(item, torch.Tensor) and (item.dtype == torch.bool):
+            densepose_datas_rel = [self.densepose_datas[i] for i, x in enumerate(item) if x > 0]
+            boxes_xyxy_abs = self.boxes_xyxy_abs[item]
+            return DensePoseList(
+                densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
+            )
+        else:
+            densepose_datas_rel = [self.densepose_datas[i] for i in item]
+            boxes_xyxy_abs = self.boxes_xyxy_abs[item]
+            return DensePoseList(
+                densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
+            )
--- a/projects/DensePose/densepose/data/transform/init.py
+++ b/projects/DensePose/densepose/data/transform/init.py
@ -0,0 +1,3 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from .image import ImageResizeTransform
--- a/projects/DensePose/densepose/data/transform/image.py
+++ b/projects/DensePose/densepose/data/transform/image.py
@ -0,0 +1,37 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import torch
+
+
+class ImageResizeTransform:
+    """
+    Transform that converts frames loaded from a dataset
+    (RGB data in NHWC channel order, typically uint8) to a format ready to be
+    consumed by DensePose training (BGR float32 data in NCHW channel order)
+    """
+
+    def __init__(self, min_size: int = 800, max_size: int = 1333):
+        self.min_size = min_size
+        self.max_size = max_size
+
+    def __call__(self, frames: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            frames (torch.Tensor): tensor of size [N, H, W, 3] that contains
+                RGB data (typically in uint8)
+        Returns:
+            frames (torch.Tensor): tensor of size [N, 3, H1, W1] where
+                H1 and W1 are chosen to respect the specified min and max sizes
+                and preserve the original aspect ratio, the data channels
+                follow BGR order and the data type is `torch.float32`
+        """
+        frames = frames[..., [2, 1, 0]]  # RGB -> BGR
+        frames = frames.permute(0, 3, 1, 2).float()  # NHWC -> NCHW
+        # resize with min size
+        min_size = min(frames.shape[-2:])
+        max_size = max(frames.shape[-2:])
+        scale = min(self.min_size / min_size, self.max_size / max_size)
+        frames = torch.nn.functional.interpolate(
+            frames, scale_factor=scale, mode="bilinear", align_corners=False
+        )
+        return frames
--- a/projects/DensePose/densepose/data/utils.py
+++ b/projects/DensePose/densepose/data/utils.py
@ -0,0 +1,22 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import os
+from typing import Optional
+
+
+def is_relative_local_path(path: os.PathLike):
+    path_str = os.fsdecode(path)
+    return ("://" not in path_str) and not os.path.isabs(path)
+
+
+def maybe_prepend_base_path(base_path: Optional[os.PathLike], path: os.PathLike):
+    """
+    Prepends the provided path with a base path prefix if:
+    1) base path is not None;
+    2) path is a local path
+    """
+    if base_path is None:
+        return path
+    if is_relative_local_path(path):
+        return os.path.join(base_path, path)
+    return path
--- a/projects/DensePose/densepose/data/video/init.py
+++ b/projects/DensePose/densepose/data/video/init.py
@ -0,0 +1,17 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from .frame_selector import (
+    FrameSelectionStrategy,
+    RandomKFramesSelector,
+    FirstKFramesSelector,
+    LastKFramesSelector,
+    FrameTsList,
+    FrameSelector,
+)
+
+from .video_keyframe_dataset import (
+    VideoKeyframeDataset,
+    video_list_from_file,
+    list_keyframes,
+    read_keyframes,
+)
--- a/projects/DensePose/densepose/data/video/frame_selector.py
+++ b/projects/DensePose/densepose/data/video/frame_selector.py
@ -0,0 +1,87 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import random
+from collections.abc import Callable
+from enum import Enum
+from typing import Callable as TCallable
+from typing import List
+
+FrameTsList = List[int]
+FrameSelector = TCallable[[FrameTsList], FrameTsList]
+
+
+class FrameSelectionStrategy(Enum):
+    """
+    Frame selection strategy used with videos:
+     - "random_k": select k random frames
+     - "first_k": select k first frames
+     - "last_k": select k last frames
+     - "all": select all frames
+    """
+
+    # fmt: off
+    RANDOM_K = "random_k"
+    FIRST_K  = "first_k"
+    LAST_K   = "last_k"
+    ALL      = "all"
+    # fmt: on
+
+
+class RandomKFramesSelector(Callable):
+    """
+    Selector that retains at most `k` random frames
+    """
+
+    def __init__(self, k: int):
+        self.k = k
+
+    def __call__(self, frame_tss: FrameTsList) -> FrameTsList:
+        """
+        Select `k` random frames
+
+        Args:
+          frames_tss (List[int]): timestamps of input frames
+        Returns:
+          List[int]: timestamps of selected frames
+        """
+        return random.sample(frame_tss, min(self.k, len(frame_tss)))
+
+
+class FirstKFramesSelector(Callable):
+    """
+    Selector that retains at most `k` first frames
+    """
+
+    def __init__(self, k: int):
+        self.k = k
+
+    def __call__(self, frame_tss: FrameTsList) -> FrameTsList:
+        """
+        Select `k` first frames
+
+        Args:
+          frames_tss (List[int]): timestamps of input frames
+        Returns:
+          List[int]: timestamps of selected frames
+        """
+        return frame_tss[: self.k]
+
+
+class LastKFramesSelector(Callable):
+    """
+    Selector that retains at most `k` last frames from video data
+    """
+
+    def __init__(self, k: int):
+        self.k = k
+
+    def __call__(self, frame_tss: FrameTsList) -> FrameTsList:
+        """
+        Select `k` last frames
+
+        Args:
+          frames_tss (List[int]): timestamps of input frames
+        Returns:
+          List[int]: timestamps of selected frames
+        """
+        return frame_tss[-self.k :]
--- a/projects/DensePose/densepose/data/video/video_keyframe_dataset.py
+++ b/projects/DensePose/densepose/data/video/video_keyframe_dataset.py
@ -0,0 +1,232 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import logging
+import numpy as np
+from typing import Callable, List, Optional
+import torch
+from fvcore.common.file_io import PathManager
+from torch.utils.data.dataset import Dataset
+
+import av
+
+from ..utils import maybe_prepend_base_path
+from .frame_selector import FrameSelector, FrameTsList
+
+FrameList = List[av.frame.Frame]
+FrameTransform = Callable[[torch.Tensor], torch.Tensor]
+
+
+def list_keyframes(video_fpath: str, video_stream_idx: int = 0) -> FrameTsList:
+    """
+    Traverses all keyframes of a video file. Returns a list of keyframe
+    timestamps. Timestamps are counts in timebase units.
+
+    Args:
+       video_fpath (str): Video file path
+       video_stream_idx (int): Video stream index (default: 0)
+    Returns:
+       List[int]: list of keyframe timestaps (timestamp is a count in timebase
+           units)
+    """
+    try:
+        with PathManager.open(video_fpath, "rb") as io:
+            container = av.open(io, mode="r")
+            stream = container.streams.video[video_stream_idx]
+            keyframes = []
+            pts = -1
+            # Note: even though we request forward seeks for keyframes, sometimes
+            # a keyframe in backwards direction is returned. We introduce tolerance
+            # as a max count of ignored backward seeks
+            tolerance_backward_seeks = 2
+            while True:
+                try:
+                    container.seek(pts + 1, backward=False, any_frame=False, stream=stream)
+                except av.AVError as e:
+                    # the exception occurs when the video length is exceeded,
+                    # we then return whatever data we've already collected
+                    logger = logging.getLogger(__name__)
+                    logger.debug(
+                        f"List keyframes: Error seeking video file {video_fpath}, "
+                        f"video stream {video_stream_idx}, pts {pts + 1}, AV error: {e}"
+                    )
+                    return keyframes
+                except OSError as e:
+                    logger = logging.getLogger(__name__)
+                    logger.warning(
+                        f"List keyframes: Error seeking video file {video_fpath}, "
+                        f"video stream {video_stream_idx}, pts {pts + 1}, OS error: {e}"
+                    )
+                    return []
+                packet = next(container.demux(video=video_stream_idx))
+                if packet.pts is not None and packet.pts <= pts:
+                    logger = logging.getLogger(__name__)
+                    logger.warning(
+                        f"Video file {video_fpath}, stream {video_stream_idx}: "
+                        f"bad seek for packet {pts + 1} (got packet {packet.pts}), "
+                        f"tolerance {tolerance_backward_seeks}."
+                    )
+                    tolerance_backward_seeks -= 1
+                    if tolerance_backward_seeks == 0:
+                        return []
+                    pts += 1
+                    continue
+                tolerance_backward_seeks = 2
+                pts = packet.pts
+                if pts is None:
+                    return keyframes
+                if packet.is_keyframe:
+                    keyframes.append(pts)
+            return keyframes
+    except OSError as e:
+        logger = logging.getLogger(__name__)
+        logger.warning(
+            f"List keyframes: Error opening video file container {video_fpath}, " f"OS error: {e}"
+        )
+    except RuntimeError as e:
+        logger = logging.getLogger(__name__)
+        logger.warning(
+            f"List keyframes: Error opening video file container {video_fpath}, "
+            f"Runtime error: {e}"
+        )
+    return []
+
+
+def read_keyframes(
+    video_fpath: str, keyframes: FrameTsList, video_stream_idx: int = 0
+) -> FrameList:
+    """
+    Reads keyframe data from a video file.
+
+    Args:
+        video_fpath (str): Video file path
+        keyframes (List[int]): List of keyframe timestamps (as counts in
+            timebase units to be used in container seek operations)
+        video_stream_idx (int): Video stream index (default: 0)
+    Returns:
+        List[Frame]: list of frames that correspond to the specified timestamps
+    """
+    try:
+        with PathManager.open(video_fpath, "rb") as io:
+            container = av.open(io)
+            stream = container.streams.video[video_stream_idx]
+            frames = []
+            for pts in keyframes:
+                try:
+                    container.seek(pts, any_frame=False, stream=stream)
+                    frame = next(container.decode(video=0))
+                    frames.append(frame)
+                except av.AVError as e:
+                    logger = logging.getLogger(__name__)
+                    logger.warning(
+                        f"Read keyframes: Error seeking video file {video_fpath}, "
+                        f"video stream {video_stream_idx}, pts {pts}, AV error: {e}"
+                    )
+                    container.close()
+                    return frames
+                except OSError as e:
+                    logger = logging.getLogger(__name__)
+                    logger.warning(
+                        f"Read keyframes: Error seeking video file {video_fpath}, "
+                        f"video stream {video_stream_idx}, pts {pts}, OS error: {e}"
+                    )
+                    container.close()
+                    return frames
+                except StopIteration:
+                    logger = logging.getLogger(__name__)
+                    logger.warning(
+                        f"Read keyframes: Error decoding frame from {video_fpath}, "
+                        f"video stream {video_stream_idx}, pts {pts}"
+                    )
+                    container.close()
+                    return frames
+
+            container.close()
+            return frames
+    except OSError as e:
+        logger = logging.getLogger(__name__)
+        logger.warning(
+            f"Read keyframes: Error opening video file container {video_fpath}, OS error: {e}"
+        )
+    except RuntimeError as e:
+        logger = logging.getLogger(__name__)
+        logger.warning(
+            f"Read keyframes: Error opening video file container {video_fpath}, Runtime error: {e}"
+        )
+    return []
+
+
+def video_list_from_file(video_list_fpath: str, base_path: Optional[str] = None):
+    """
+    Create a list of paths to video files from a text file.
+
+    Args:
+        video_list_fpath (str): path to a plain text file with the list of videos
+        base_path (str): base path for entries from the video list (default: None)
+    """
+    video_list = []
+    with PathManager.open(video_list_fpath, "r") as io:
+        for line in io:
+            video_list.append(maybe_prepend_base_path(base_path, line.strip()))
+    return video_list
+
+
+class VideoKeyframeDataset(Dataset):
+    """
+    Dataset that provides keyframes for a set of videos.
+    """
+
+    _EMPTY_FRAMES = torch.empty((0, 3, 1, 1))
+
+    def __init__(
+        self,
+        video_list: List[str],
+        frame_selector: Optional[FrameSelector] = None,
+        transform: Optional[FrameTransform] = None,
+    ):
+        """
+        Dataset constructor
+
+        Args:
+            video_list (List[str]): list of paths to video files
+            frame_selector (Callable: KeyFrameList -> KeyFrameList):
+                selects keyframes to process, keyframes are given by
+                packet timestamps in timebase counts. If None, all keyframes
+                are selected (default: None)
+            transform (Callable: torch.Tensor -> torch.Tensor):
+                transforms a batch of RGB images (tensors of size [B, H, W, 3]),
+                returns a tensor of the same size. If None, no transform is
+                applied (default: None)
+
+        """
+        self.video_list = video_list
+        self.frame_selector = frame_selector
+        self.transform = transform
+
+    def __getitem__(self, idx: int) -> torch.Tensor:
+        """
+        Gets selected keyframes from a given video
+
+        Args:
+            idx (int): video index in the video list file
+        Returns:
+            frames (torch.Tensor): tensor of size [N, H, W, 3] or of size
+                defined by the transform that contains keyframes data
+        """
+        fpath = self.video_list[idx]
+        keyframes = list_keyframes(fpath)
+        if not keyframes:
+            return self._EMPTY_FRAMES
+        if self.frame_selector is not None:
+            keyframes = self.frame_selector(keyframes)
+        frames = read_keyframes(fpath, keyframes)
+        if not frames:
+            return self._EMPTY_FRAMES
+        frames = np.stack([frame.to_rgb().to_ndarray() for frame in frames])
+        frames = torch.as_tensor(frames, device=torch.device("cpu"))
+        if self.transform is not None:
+            frames = self.transform(frames)
+        return frames
+
+    def __len__(self):
+        return len(self.video_list)
--- a/projects/DensePose/densepose/densepose_coco_evaluation.py
+++ b/projects/DensePose/densepose/densepose_coco_evaluation.py
--- a/projects/DensePose/densepose/engine/init.py
+++ b/projects/DensePose/densepose/engine/init.py
@ -0,0 +1,3 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from .trainer import Trainer
--- a/projects/DensePose/densepose/engine/trainer.py
+++ b/projects/DensePose/densepose/engine/trainer.py
@ -0,0 +1,118 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import logging
+import os
+from collections import OrderedDict
+
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import CfgNode
+from detectron2.engine import DefaultTrainer
+from detectron2.evaluation import COCOEvaluator, DatasetEvaluators
+from detectron2.utils.events import EventWriter, get_event_storage
+
+from densepose import (
+    DensePoseCOCOEvaluator,
+    DensePoseDatasetMapperTTA,
+    DensePoseGeneralizedRCNNWithTTA,
+    load_from_cfg,
+)
+from densepose.data import (
+    DatasetMapper,
+    build_combined_loader,
+    build_detection_test_loader,
+    build_detection_train_loader,
+    build_inference_based_loaders,
+    has_inference_based_loaders,
+)
+
+
+class SampleCountingLoader:
+    def __init__(self, loader):
+        self.loader = loader
+
+    def __iter__(self):
+        it = iter(self.loader)
+        storage = get_event_storage()
+        while True:
+            try:
+                batch = next(it)
+                num_inst_per_dataset = {}
+                for data in batch:
+                    dataset_name = data["dataset"]
+                    if dataset_name not in num_inst_per_dataset:
+                        num_inst_per_dataset[dataset_name] = 0
+                    num_inst = len(data["instances"])
+                    num_inst_per_dataset[dataset_name] += num_inst
+                for dataset_name in num_inst_per_dataset:
+                    storage.put_scalar(f"batch/{dataset_name}", num_inst_per_dataset[dataset_name])
+                yield batch
+            except StopIteration:
+                break
+
+
+class SampleCountMetricPrinter(EventWriter):
+    def __init__(self):
+        self.logger = logging.getLogger(__name__)
+
+    def write(self):
+        storage = get_event_storage()
+        batch_stats_strs = []
+        for key, buf in storage.histories().items():
+            if key.startswith("batch/"):
+                batch_stats_strs.append(f"{key} {buf.avg(20)}")
+        self.logger.info(", ".join(batch_stats_strs))
+
+
+class Trainer(DefaultTrainer):
+    @classmethod
+    def build_evaluator(cls, cfg: CfgNode, dataset_name, output_folder=None):
+        if output_folder is None:
+            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
+        evaluators = [COCOEvaluator(dataset_name, cfg, True, output_folder)]
+        if cfg.MODEL.DENSEPOSE_ON:
+            evaluators.append(DensePoseCOCOEvaluator(dataset_name, True, output_folder))
+        return DatasetEvaluators(evaluators)
+
+    @classmethod
+    def build_test_loader(cls, cfg: CfgNode, dataset_name):
+        return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False))
+
+    @classmethod
+    def build_train_loader(cls, cfg: CfgNode):
+        data_loader = build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True))
+        if not has_inference_based_loaders(cfg):
+            return data_loader
+        model = cls.build_model(cfg)
+        model.to(cfg.BOOTSTRAP_MODEL.DEVICE)
+        DetectionCheckpointer(model).resume_or_load(cfg.BOOTSTRAP_MODEL.WEIGHTS, resume=False)
+        inference_based_loaders, ratios = build_inference_based_loaders(cfg, model)
+        loaders = [data_loader] + inference_based_loaders
+        ratios = [1.0] + ratios
+        combined_data_loader = build_combined_loader(cfg, loaders, ratios)
+        sample_counting_loader = SampleCountingLoader(combined_data_loader)
+        return sample_counting_loader
+
+    def build_writers(self):
+        writers = super().build_writers()
+        writers.append(SampleCountMetricPrinter())
+        return writers
+
+    @classmethod
+    def test_with_TTA(cls, cfg: CfgNode, model):
+        logger = logging.getLogger("detectron2.trainer")
+        # In the end of training, run an evaluation with TTA
+        # Only support some R-CNN models.
+        logger.info("Running inference with test-time augmentation ...")
+        transform_data = load_from_cfg(cfg)
+        model = DensePoseGeneralizedRCNNWithTTA(
+            cfg, model, transform_data, DensePoseDatasetMapperTTA(cfg)
+        )
+        evaluators = [
+            cls.build_evaluator(
+                cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
+            )
+            for name in cfg.DATASETS.TEST
+        ]
+        res = cls.test(cfg, model, evaluators)
+        res = OrderedDict({k + "_TTA": v for k, v in res.items()})
+        return res
--- a/projects/DensePose/densepose/evaluator.py
+++ b/projects/DensePose/densepose/evaluator.py
@ -0,0 +1,224 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import contextlib
+import copy
+import io
+import itertools
+import logging
+import numpy as np
+import os
+from collections import OrderedDict
+import pycocotools.mask as mask_utils
+import torch
+from fvcore.common.file_io import PathManager
+from pycocotools.coco import COCO
+
+from detectron2.data import MetadataCatalog
+from detectron2.evaluation import DatasetEvaluator
+from detectron2.structures import BoxMode
+from detectron2.utils.comm import all_gather, is_main_process, synchronize
+from detectron2.utils.logger import create_small_table
+
+from .data.samplers import densepose_to_mask
+from .densepose_coco_evaluation import DensePoseCocoEval, DensePoseEvalMode
+
+
+class DensePoseCOCOEvaluator(DatasetEvaluator):
+    def __init__(self, dataset_name, distributed, output_dir=None):
+        self._distributed = distributed
+        self._output_dir = output_dir
+
+        self._cpu_device = torch.device("cpu")
+        self._logger = logging.getLogger(__name__)
+
+        self._metadata = MetadataCatalog.get(dataset_name)
+        self._min_threshold = 0.5
+        json_file = PathManager.get_local_path(self._metadata.json_file)
+        with contextlib.redirect_stdout(io.StringIO()):
+            self._coco_api = COCO(json_file)
+
+    def reset(self):
+        self._predictions = []
+
+    def process(self, inputs, outputs):
+        """
+        Args:
+            inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
+                It is a list of dict. Each dict corresponds to an image and
+                contains keys like "height", "width", "file_name", "image_id".
+            outputs: the outputs of a COCO model. It is a list of dicts with key
+                "instances" that contains :class:`Instances`.
+                The :class:`Instances` object needs to have `densepose` field.
+        """
+        for input, output in zip(inputs, outputs):
+            instances = output["instances"].to(self._cpu_device)
+
+            json_results = prediction_to_json(instances, input["image_id"])
+            self._predictions.extend(json_results)
+
+    def evaluate(self):
+        if self._distributed:
+            synchronize()
+            predictions = all_gather(self._predictions)
+            predictions = list(itertools.chain(*predictions))
+            if not is_main_process():
+                return
+        else:
+            predictions = self._predictions
+
+        return copy.deepcopy(self._eval_predictions(predictions))
+
+    def _eval_predictions(self, predictions):
+        """
+        Evaluate predictions on densepose.
+        Return results with the metrics of the tasks.
+        """
+        self._logger.info("Preparing results for COCO format ...")
+
+        if self._output_dir:
+            PathManager.mkdirs(self._output_dir)
+            file_path = os.path.join(self._output_dir, "coco_densepose_predictions.pth")
+            with PathManager.open(file_path, "wb") as f:
+                torch.save(predictions, f)
+
+        self._logger.info("Evaluating predictions ...")
+        res = OrderedDict()
+        results_gps, results_gpsm, results_segm = _evaluate_predictions_on_coco(
+            self._coco_api, predictions, min_threshold=self._min_threshold
+        )
+        res["densepose_gps"] = results_gps
+        res["densepose_gpsm"] = results_gpsm
+        res["densepose_segm"] = results_segm
+        return res
+
+
+def prediction_to_json(instances, img_id):
+    """
+    Args:
+        instances (Instances): the output of the model
+        img_id (str): the image id in COCO
+
+    Returns:
+        list[dict]: the results in densepose evaluation format
+    """
+    scores = instances.scores.tolist()
+    segmentations = densepose_to_mask(instances)
+
+    boxes = instances.pred_boxes.tensor.clone()
+    boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+    instances.pred_densepose = instances.pred_densepose.to_result(boxes)
+
+    results = []
+    for k in range(len(instances)):
+        densepose = instances.pred_densepose[k]
+        segmentation = segmentations.tensor[k]
+        segmentation_encoded = mask_utils.encode(
+            np.require(segmentation.numpy(), dtype=np.uint8, requirements=["F"])
+        )
+        segmentation_encoded["counts"] = segmentation_encoded["counts"].decode("utf-8")
+        result = {
+            "image_id": img_id,
+            "category_id": 1,  # densepose only has one class
+            "bbox": densepose[1],
+            "score": scores[k],
+            "densepose": densepose,
+            "segmentation": segmentation_encoded,
+        }
+        results.append(result)
+    return results
+
+
+def _evaluate_predictions_on_coco(coco_gt, coco_results, min_threshold=0.5):
+    logger = logging.getLogger(__name__)
+
+    segm_metrics = _get_segmentation_metrics()
+    densepose_metrics = _get_densepose_metrics(min_threshold)
+    if len(coco_results) == 0:  # cocoapi does not handle empty results very well
+        logger.warn("No predictions from the model! Set scores to -1")
+        results_gps = {metric: -1 for metric in densepose_metrics}
+        results_gpsm = {metric: -1 for metric in densepose_metrics}
+        results_segm = {metric: -1 for metric in segm_metrics}
+        return results_gps, results_gpsm, results_segm
+
+    coco_dt = coco_gt.loadRes(coco_results)
+    results_segm = _evaluate_predictions_on_coco_segm(coco_gt, coco_dt, segm_metrics, min_threshold)
+    logger.info("Evaluation results for densepose segm: \n" + create_small_table(results_segm))
+    results_gps = _evaluate_predictions_on_coco_gps(
+        coco_gt, coco_dt, densepose_metrics, min_threshold
+    )
+    logger.info(
+        "Evaluation results for densepose, GPS metric: \n" + create_small_table(results_gps)
+    )
+    results_gpsm = _evaluate_predictions_on_coco_gpsm(
+        coco_gt, coco_dt, densepose_metrics, min_threshold
+    )
+    logger.info(
+        "Evaluation results for densepose, GPSm metric: \n" + create_small_table(results_gpsm)
+    )
+    return results_gps, results_gpsm, results_segm
+
+
+def _get_densepose_metrics(min_threshold=0.5):
+    metrics = ["AP"]
+    if min_threshold <= 0.201:
+        metrics += ["AP20"]
+    if min_threshold <= 0.301:
+        metrics += ["AP30"]
+    if min_threshold <= 0.401:
+        metrics += ["AP40"]
+    metrics.extend(["AP50", "AP75", "APm", "APl", "AR", "AR50", "AR75", "ARm", "ARl"])
+    return metrics
+
+
+def _get_segmentation_metrics():
+    return [
+        "AP",
+        "AP50",
+        "AP75",
+        "APs",
+        "APm",
+        "APl",
+        "AR@1",
+        "AR@10",
+        "AR@100",
+        "ARs",
+        "ARm",
+        "ARl",
+    ]
+
+
+def _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics, min_threshold=0.5):
+    coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPS)
+    coco_eval.params.iouThrs = np.linspace(
+        min_threshold, 0.95, int(np.round((0.95 - min_threshold) / 0.05)) + 1, endpoint=True
+    )
+    coco_eval.evaluate()
+    coco_eval.accumulate()
+    coco_eval.summarize()
+    results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
+    return results
+
+
+def _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics, min_threshold=0.5):
+    coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPSM)
+    coco_eval.params.iouThrs = np.linspace(
+        min_threshold, 0.95, int(np.round((0.95 - min_threshold) / 0.05)) + 1, endpoint=True
+    )
+    coco_eval.evaluate()
+    coco_eval.accumulate()
+    coco_eval.summarize()
+    results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
+    return results
+
+
+def _evaluate_predictions_on_coco_segm(coco_gt, coco_dt, metrics, min_threshold=0.5):
+    coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "segm")
+    coco_eval.params.iouThrs = np.linspace(
+        min_threshold, 0.95, int(np.round((0.95 - min_threshold) / 0.05)) + 1, endpoint=True
+    )
+    coco_eval.evaluate()
+    coco_eval.accumulate()
+    coco_eval.summarize()
+    results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
+    return results
--- a/projects/DensePose/densepose/modeling/build.py
+++ b/projects/DensePose/densepose/modeling/build.py
@ -0,0 +1,66 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from detectron2.config import CfgNode
+
+from .filter import DensePoseDataFilter
+from .losses import DensePoseLosses
+from .predictors import DensePoseChartWithConfidencePredictor
+
+
+def build_densepose_predictor(cfg: CfgNode, input_channels: int):
+    """
+    Create an instance of DensePose predictor based on configuration options.
+
+    Args:
+        cfg (CfgNode): configuration options
+        input_channels (int): input tensor size along the channel dimension
+    Return:
+        An instance of DensePose predictor
+    """
+    predictor = DensePoseChartWithConfidencePredictor(cfg, input_channels)
+    return predictor
+
+
+def build_densepose_data_filter(cfg: CfgNode):
+    """
+    Build DensePose data filter which selects data for training
+
+    Args:
+        cfg (CfgNode): configuration options
+
+    Return:
+        Callable: list(Tensor), list(Instances) -> list(Tensor), list(Instances)
+        An instance of DensePose filter, which takes feature tensors and proposals
+        as an input and returns filtered features and proposals
+    """
+    dp_filter = DensePoseDataFilter(cfg)
+    return dp_filter
+
+
+def build_densepose_head(cfg: CfgNode, input_channels: int):
+    """
+    Build DensePose head based on configurations options
+
+    Args:
+        cfg (CfgNode): configuration options
+        input_channels (int): input tensor size along the channel dimension
+    Return:
+        An instance of DensePose head
+    """
+    from .roi_heads.registry import ROI_DENSEPOSE_HEAD_REGISTRY
+
+    head_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.NAME
+    return ROI_DENSEPOSE_HEAD_REGISTRY.get(head_name)(cfg, input_channels)
+
+
+def build_densepose_losses(cfg: CfgNode):
+    """
+    Build DensePose loss based on configurations options
+
+    Args:
+        cfg (CfgNode): configuration options
+    Return:
+        An instance of DensePose loss
+    """
+    losses = DensePoseLosses(cfg)
+    return losses
--- a/projects/DensePose/densepose/modeling/confidence.py
+++ b/projects/DensePose/densepose/modeling/confidence.py
@ -0,0 +1,73 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from dataclasses import dataclass
+from enum import Enum
+
+from detectron2.config import CfgNode
+
+
+class DensePoseUVConfidenceType(Enum):
+    """
+    Statistical model type for confidence learning, possible values:
+     - "iid_iso": statistically independent identically distributed residuals
+         with anisotropic covariance
+     - "indep_aniso": statistically independent residuals with anisotropic
+         covariances
+    For details, see:
+    N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
+    Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
+    """
+
+    # fmt: off
+    IID_ISO     = "iid_iso"
+    INDEP_ANISO = "indep_aniso"
+    # fmt: on
+
+
+@dataclass
+class DensePoseUVConfidenceConfig:
+    """
+    Configuration options for confidence on UV data
+    """
+
+    enabled: bool = False
+    # lower bound on UV confidences
+    epsilon: float = 0.01
+    type: DensePoseUVConfidenceType = DensePoseUVConfidenceType.IID_ISO
+
+
+@dataclass
+class DensePoseSegmConfidenceConfig:
+    """
+    Configuration options for confidence on segmentation
+    """
+
+    enabled: bool = False
+    # lower bound on confidence values
+    epsilon: float = 0.01
+
+
+@dataclass
+class DensePoseConfidenceModelConfig:
+    """
+    Configuration options for confidence models
+    """
+
+    # confidence for U and V values
+    uv_confidence: DensePoseUVConfidenceConfig
+    # segmentation confidence
+    segm_confidence: DensePoseSegmConfidenceConfig
+
+    @staticmethod
+    def from_cfg(cfg: CfgNode) -> "DensePoseConfidenceModelConfig":
+        return DensePoseConfidenceModelConfig(
+            uv_confidence=DensePoseUVConfidenceConfig(
+                enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.ENABLED,
+                epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON,
+                type=DensePoseUVConfidenceType(cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE),
+            ),
+            segm_confidence=DensePoseSegmConfidenceConfig(
+                enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.ENABLED,
+                epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON,
+            ),
+        )
--- a/projects/DensePose/densepose/modeling/densepose_checkpoint.py
+++ b/projects/DensePose/densepose/modeling/densepose_checkpoint.py
@ -0,0 +1,35 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from collections import OrderedDict
+
+from detectron2.checkpoint import DetectionCheckpointer
+
+
+def _rename_HRNet_weights(weights):
+    # We detect and  rename HRNet weights for DensePose. 1956 and 1716 are values that are
+    # common to all HRNet pretrained weights, and should be enough to accurately identify them
+    if (
+        len(weights["model"].keys()) == 1956
+        and len([k for k in weights["model"].keys() if k.startswith("stage")]) == 1716
+    ):
+        hrnet_weights = OrderedDict()
+        for k in weights["model"].keys():
+            hrnet_weights["backbone.bottom_up." + str(k)] = weights["model"][k]
+        return {"model": hrnet_weights}
+    else:
+        return weights
+
+
+class DensePoseCheckpointer(DetectionCheckpointer):
+    """
+    Same as :class:`DetectionCheckpointer`, but is able to handle HRNet weights
+    """
+
+    def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
+        super().__init__(model, save_dir, save_to_disk=save_to_disk, **checkpointables)
+
+    def _load_file(self, filename: str) -> object:
+        """
+        Adding hrnet support
+        """
+        weights = super()._load_file(filename)
+        return _rename_HRNet_weights(weights)
--- a/projects/DensePose/densepose/modeling/filter.py
+++ b/projects/DensePose/densepose/modeling/filter.py
@ -0,0 +1,94 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from typing import List
+import torch
+
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+from detectron2.structures.boxes import matched_boxlist_iou
+
+
+class DensePoseDataFilter(object):
+    def __init__(self, cfg: CfgNode):
+        self.iou_threshold = cfg.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD
+        self.keep_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
+
+    @torch.no_grad()
+    def __call__(self, features: List[torch.Tensor], proposals_with_targets: List[Instances]):
+        """
+        Filters proposals with targets to keep only the ones relevant for
+        DensePose training
+
+        Args:
+            features (list[Tensor]): input data as a list of features,
+                each feature is a tensor. Axis 0 represents the number of
+                images `N` in the input data; axes 1-3 are channels,
+                height, and width, which may vary between features
+                (e.g., if a feature pyramid is used).
+            proposals_with_targets (list[Instances]): length `N` list of
+                `Instances`. The i-th `Instances` contains instances
+                (proposals, GT) for the i-th input image,
+        Returns:
+            list[Tensor]: filtered features
+            list[Instances]: filtered proposals
+        """
+        proposals_filtered = []
+        # TODO: the commented out code was supposed to correctly deal with situations
+        # where no valid DensePose GT is available for certain images. The corresponding
+        # image features were sliced and proposals were filtered. This led to performance
+        # deterioration, both in terms of runtime and in terms of evaluation results.
+        #
+        # feature_mask = torch.ones(
+        #    len(proposals_with_targets),
+        #    dtype=torch.bool,
+        #    device=features[0].device if len(features) > 0 else torch.device("cpu"),
+        # )
+        for i, proposals_per_image in enumerate(proposals_with_targets):
+            if not proposals_per_image.has("gt_densepose") and (
+                not proposals_per_image.has("gt_masks") or not self.keep_masks
+            ):
+                # feature_mask[i] = 0
+                continue
+            gt_boxes = proposals_per_image.gt_boxes
+            est_boxes = proposals_per_image.proposal_boxes
+            # apply match threshold for densepose head
+            iou = matched_boxlist_iou(gt_boxes, est_boxes)
+            iou_select = iou > self.iou_threshold
+            proposals_per_image = proposals_per_image[iou_select]
+
+            N_gt_boxes = len(proposals_per_image.gt_boxes)
+            assert N_gt_boxes == len(proposals_per_image.proposal_boxes), (
+                f"The number of GT boxes {N_gt_boxes} is different from the "
+                f"number of proposal boxes {len(proposals_per_image.proposal_boxes)}"
+            )
+            # filter out any target without suitable annotation
+            if self.keep_masks:
+                gt_masks = (
+                    proposals_per_image.gt_masks
+                    if hasattr(proposals_per_image, "gt_masks")
+                    else [None] * N_gt_boxes
+                )
+            else:
+                gt_masks = [None] * N_gt_boxes
+            gt_densepose = (
+                proposals_per_image.gt_densepose
+                if hasattr(proposals_per_image, "gt_densepose")
+                else [None] * N_gt_boxes
+            )
+            assert len(gt_masks) == N_gt_boxes
+            assert len(gt_densepose) == N_gt_boxes
+            selected_indices = [
+                i
+                for i, (dp_target, mask_target) in enumerate(zip(gt_densepose, gt_masks))
+                if (dp_target is not None) or (mask_target is not None)
+            ]
+            # if not len(selected_indices):
+            #     feature_mask[i] = 0
+            #     continue
+            if len(selected_indices) != N_gt_boxes:
+                proposals_per_image = proposals_per_image[selected_indices]
+            assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes)
+            proposals_filtered.append(proposals_per_image)
+        # features_filtered = [feature[feature_mask] for feature in features]
+        # return features_filtered, proposals_filtered
+        return features, proposals_filtered
--- a/projects/DensePose/densepose/modeling/hrfpn.py
+++ b/projects/DensePose/densepose/modeling/hrfpn.py
@ -0,0 +1,181 @@
+"""
+MIT License
+Copyright (c) 2019 Microsoft
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.backbone import BACKBONE_REGISTRY
+from detectron2.modeling.backbone.backbone import Backbone
+
+from .hrnet import build_pose_hrnet_backbone
+
+
+class HRFPN(Backbone):
+    """ HRFPN (High Resolution Feature Pyramids)
+    Transforms outputs of HRNet backbone so they are suitable for the ROI_heads
+    arXiv: https://arxiv.org/abs/1904.04514
+    Adapted from https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/necks/hrfpn.py
+    Args:
+        bottom_up: (list) output of HRNet
+        in_features (list): names of the input features (output of HRNet)
+        in_channels (list): number of channels for each branch
+        out_channels (int): output channels of feature pyramids
+        n_out_features (int): number of output stages
+        pooling (str): pooling for generating feature pyramids (from {MAX, AVG})
+        share_conv (bool): Have one conv per output, or share one with all the outputs
+    """
+
+    def __init__(
+        self,
+        bottom_up,
+        in_features,
+        n_out_features,
+        in_channels,
+        out_channels,
+        pooling="AVG",
+        share_conv=False,
+    ):
+        super(HRFPN, self).__init__()
+        assert isinstance(in_channels, list)
+        self.bottom_up = bottom_up
+        self.in_features = in_features
+        self.n_out_features = n_out_features
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.num_ins = len(in_channels)
+        self.share_conv = share_conv
+
+        if self.share_conv:
+            self.fpn_conv = nn.Conv2d(
+                in_channels=out_channels, out_channels=out_channels, kernel_size=3, padding=1
+            )
+        else:
+            self.fpn_conv = nn.ModuleList()
+            for _ in range(self.n_out_features):
+                self.fpn_conv.append(
+                    nn.Conv2d(
+                        in_channels=out_channels,
+                        out_channels=out_channels,
+                        kernel_size=3,
+                        padding=1,
+                    )
+                )
+
+        # Custom change: Replaces a simple bilinear interpolation
+        self.interp_conv = nn.ModuleList()
+        for i in range(len(self.in_features)):
+            self.interp_conv.append(
+                nn.Sequential(
+                    nn.ConvTranspose2d(
+                        in_channels=in_channels[i],
+                        out_channels=in_channels[i],
+                        kernel_size=4,
+                        stride=2 ** i,
+                        padding=0,
+                        output_padding=0,
+                        bias=False,
+                    ),
+                    nn.BatchNorm2d(in_channels[i], momentum=0.1),
+                    nn.ReLU(inplace=True),
+                )
+            )
+
+        # Custom change: Replaces a couple (reduction conv + pooling) by one conv
+        self.reduction_pooling_conv = nn.ModuleList()
+        for i in range(self.n_out_features):
+            self.reduction_pooling_conv.append(
+                nn.Sequential(
+                    nn.Conv2d(sum(in_channels), out_channels, kernel_size=2 ** i, stride=2 ** i),
+                    nn.BatchNorm2d(out_channels, momentum=0.1),
+                    nn.ReLU(inplace=True),
+                )
+            )
+
+        if pooling == "MAX":
+            self.pooling = F.max_pool2d
+        else:
+            self.pooling = F.avg_pool2d
+
+        self._out_features = []
+        self._out_feature_channels = {}
+        self._out_feature_strides = {}
+
+        for i in range(self.n_out_features):
+            self._out_features.append("p%d" % (i + 1))
+            self._out_feature_channels.update({self._out_features[-1]: self.out_channels})
+            self._out_feature_strides.update({self._out_features[-1]: 2 ** (i + 2)})
+
+    # default init_weights for conv(msra) and norm in ConvModule
+    def init_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, a=1)
+                nn.init.constant_(m.bias, 0)
+
+    def forward(self, inputs):
+        bottom_up_features = self.bottom_up(inputs)
+        assert len(bottom_up_features) == len(self.in_features)
+        inputs = [bottom_up_features[f] for f in self.in_features]
+
+        outs = []
+        for i in range(len(inputs)):
+            outs.append(self.interp_conv[i](inputs[i]))
+        shape_2 = min(o.shape[2] for o in outs)
+        shape_3 = min(o.shape[3] for o in outs)
+        out = torch.cat([o[:, :, :shape_2, :shape_3] for o in outs], dim=1)
+        outs = []
+        for i in range(self.n_out_features):
+            outs.append(self.reduction_pooling_conv[i](out))
+        for i in range(len(outs)):  # Make shapes consistent
+            outs[-1 - i] = outs[-1 - i][
+                :, :, : outs[-1].shape[2] * 2 ** i, : outs[-1].shape[3] * 2 ** i
+            ]
+        outputs = []
+        for i in range(len(outs)):
+            if self.share_conv:
+                outputs.append(self.fpn_conv(outs[i]))
+            else:
+                outputs.append(self.fpn_conv[i](outs[i]))
+
+        assert len(self._out_features) == len(outputs)
+        return dict(zip(self._out_features, outputs))
+
+
+@BACKBONE_REGISTRY.register()
+def build_hrfpn_backbone(cfg, input_shape: ShapeSpec):
+
+    in_channels = cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS
+    in_features = ["p%d" % (i + 1) for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES)]
+    n_out_features = len(cfg.MODEL.ROI_HEADS.IN_FEATURES)
+    out_channels = cfg.MODEL.HRNET.HRFPN.OUT_CHANNELS
+    hrnet = build_pose_hrnet_backbone(cfg, input_shape)
+    hrfpn = HRFPN(
+        hrnet,
+        in_features,
+        n_out_features,
+        in_channels,
+        out_channels,
+        pooling="AVG",
+        share_conv=False,
+    )
+
+    return hrfpn
--- a/projects/DensePose/densepose/modeling/hrnet.py
+++ b/projects/DensePose/densepose/modeling/hrnet.py
@ -0,0 +1,473 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (leoxiaobin@gmail.com)
+# Modified by Bowen Cheng (bcheng9@illinois.edu)
+# Adapted from https://github.com/HRNet/Higher-HRNet-Human-Pose-Estimation/blob/master/lib/models/pose_higher_hrnet.py  # noqa
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import, division, print_function
+import logging
+import torch.nn as nn
+
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.backbone import BACKBONE_REGISTRY
+from detectron2.modeling.backbone.backbone import Backbone
+
+BN_MOMENTUM = 0.1
+logger = logging.getLogger(__name__)
+
+__all__ = ["build_pose_hrnet_backbone", "PoseHigherResolutionNet"]
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class HighResolutionModule(nn.Module):
+    """ HighResolutionModule
+    Building block of the PoseHigherResolutionNet (see lower)
+    arXiv: https://arxiv.org/abs/1908.10357
+    Args:
+        num_branches (int): number of branches of the modyle
+        blocks (str): type of block of the module
+        num_blocks (int): number of blocks of the module
+        num_inchannels (int): number of input channels of the module
+        num_channels (list): number of channels of each branch
+        multi_scale_output (bool): only used by the last module of PoseHigherResolutionNet
+    """
+
+    def __init__(
+        self,
+        num_branches,
+        blocks,
+        num_blocks,
+        num_inchannels,
+        num_channels,
+        multi_scale_output=True,
+    ):
+        super(HighResolutionModule, self).__init__()
+        self._check_branches(num_branches, blocks, num_blocks, num_inchannels, num_channels)
+
+        self.num_inchannels = num_inchannels
+        self.num_branches = num_branches
+
+        self.multi_scale_output = multi_scale_output
+
+        self.branches = self._make_branches(num_branches, blocks, num_blocks, num_channels)
+        self.fuse_layers = self._make_fuse_layers()
+        self.relu = nn.ReLU(True)
+
+    def _check_branches(self, num_branches, blocks, num_blocks, num_inchannels, num_channels):
+        if num_branches != len(num_blocks):
+            error_msg = "NUM_BRANCHES({}) <> NUM_BLOCKS({})".format(num_branches, len(num_blocks))
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+
+        if num_branches != len(num_channels):
+            error_msg = "NUM_BRANCHES({}) <> NUM_CHANNELS({})".format(
+                num_branches, len(num_channels)
+            )
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+
+        if num_branches != len(num_inchannels):
+            error_msg = "NUM_BRANCHES({}) <> NUM_INCHANNELS({})".format(
+                num_branches, len(num_inchannels)
+            )
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+
+    def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1):
+        downsample = None
+        if (
+            stride != 1
+            or self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion
+        ):
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    self.num_inchannels[branch_index],
+                    num_channels[branch_index] * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(num_channels[branch_index] * block.expansion, momentum=BN_MOMENTUM),
+            )
+
+        layers = []
+        layers.append(
+            block(self.num_inchannels[branch_index], num_channels[branch_index], stride, downsample)
+        )
+        self.num_inchannels[branch_index] = num_channels[branch_index] * block.expansion
+        for _ in range(1, num_blocks[branch_index]):
+            layers.append(block(self.num_inchannels[branch_index], num_channels[branch_index]))
+
+        return nn.Sequential(*layers)
+
+    def _make_branches(self, num_branches, block, num_blocks, num_channels):
+        branches = []
+
+        for i in range(num_branches):
+            branches.append(self._make_one_branch(i, block, num_blocks, num_channels))
+
+        return nn.ModuleList(branches)
+
+    def _make_fuse_layers(self):
+        if self.num_branches == 1:
+            return None
+
+        num_branches = self.num_branches
+        num_inchannels = self.num_inchannels
+        fuse_layers = []
+        for i in range(num_branches if self.multi_scale_output else 1):
+            fuse_layer = []
+            for j in range(num_branches):
+                if j > i:
+                    fuse_layer.append(
+                        nn.Sequential(
+                            nn.Conv2d(num_inchannels[j], num_inchannels[i], 1, 1, 0, bias=False),
+                            nn.BatchNorm2d(num_inchannels[i]),
+                            nn.Upsample(scale_factor=2 ** (j - i), mode="nearest"),
+                        )
+                    )
+                elif j == i:
+                    fuse_layer.append(None)
+                else:
+                    conv3x3s = []
+                    for k in range(i - j):
+                        if k == i - j - 1:
+                            num_outchannels_conv3x3 = num_inchannels[i]
+                            conv3x3s.append(
+                                nn.Sequential(
+                                    nn.Conv2d(
+                                        num_inchannels[j],
+                                        num_outchannels_conv3x3,
+                                        3,
+                                        2,
+                                        1,
+                                        bias=False,
+                                    ),
+                                    nn.BatchNorm2d(num_outchannels_conv3x3),
+                                )
+                            )
+                        else:
+                            num_outchannels_conv3x3 = num_inchannels[j]
+                            conv3x3s.append(
+                                nn.Sequential(
+                                    nn.Conv2d(
+                                        num_inchannels[j],
+                                        num_outchannels_conv3x3,
+                                        3,
+                                        2,
+                                        1,
+                                        bias=False,
+                                    ),
+                                    nn.BatchNorm2d(num_outchannels_conv3x3),
+                                    nn.ReLU(True),
+                                )
+                            )
+                    fuse_layer.append(nn.Sequential(*conv3x3s))
+            fuse_layers.append(nn.ModuleList(fuse_layer))
+
+        return nn.ModuleList(fuse_layers)
+
+    def get_num_inchannels(self):
+        return self.num_inchannels
+
+    def forward(self, x):
+        if self.num_branches == 1:
+            return [self.branches[0](x[0])]
+
+        for i in range(self.num_branches):
+            x[i] = self.branches[i](x[i])
+
+        x_fuse = []
+
+        for i in range(len(self.fuse_layers)):
+            y = x[0] if i == 0 else self.fuse_layers[i][0](x[0])
+            for j in range(1, self.num_branches):
+                if i == j:
+                    y = y + x[j]
+                else:
+                    z = self.fuse_layers[i][j](x[j])[:, :, : y.shape[2], : y.shape[3]]
+                    y = y + z
+            x_fuse.append(self.relu(y))
+
+        return x_fuse
+
+
+blocks_dict = {"BASIC": BasicBlock, "BOTTLENECK": Bottleneck}
+
+
+class PoseHigherResolutionNet(Backbone):
+    """ PoseHigherResolutionNet
+    Composed of several HighResolutionModule tied together with ConvNets
+    Adapted from the GitHub version to fit with HRFPN and the Detectron2 infrastructure
+    arXiv: https://arxiv.org/abs/1908.10357
+    """
+
+    def __init__(self, cfg, **kwargs):
+        self.inplanes = cfg.MODEL.HRNET.STEM_INPLANES
+        super(PoseHigherResolutionNet, self).__init__()
+
+        # stem net
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.layer1 = self._make_layer(Bottleneck, 64, 4)
+
+        self.stage2_cfg = cfg.MODEL.HRNET.STAGE2
+        num_channels = self.stage2_cfg.NUM_CHANNELS
+        block = blocks_dict[self.stage2_cfg.BLOCK]
+        num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
+        self.transition1 = self._make_transition_layer([256], num_channels)
+        self.stage2, pre_stage_channels = self._make_stage(self.stage2_cfg, num_channels)
+
+        self.stage3_cfg = cfg.MODEL.HRNET.STAGE3
+        num_channels = self.stage3_cfg.NUM_CHANNELS
+        block = blocks_dict[self.stage3_cfg.BLOCK]
+        num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
+        self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels)
+        self.stage3, pre_stage_channels = self._make_stage(self.stage3_cfg, num_channels)
+
+        self.stage4_cfg = cfg.MODEL.HRNET.STAGE4
+        num_channels = self.stage4_cfg.NUM_CHANNELS
+        block = blocks_dict[self.stage4_cfg.BLOCK]
+        num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
+        self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels)
+        self.stage4, pre_stage_channels = self._make_stage(
+            self.stage4_cfg, num_channels, multi_scale_output=True
+        )
+
+        self._out_features = []
+        self._out_feature_channels = {}
+        self._out_feature_strides = {}
+
+        for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES):
+            self._out_features.append("p%d" % (i + 1))
+            self._out_feature_channels.update(
+                {self._out_features[-1]: cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS[i]}
+            )
+            self._out_feature_strides.update({self._out_features[-1]: 1})
+
+    def _get_deconv_cfg(self, deconv_kernel):
+        if deconv_kernel == 4:
+            padding = 1
+            output_padding = 0
+        elif deconv_kernel == 3:
+            padding = 1
+            output_padding = 1
+        elif deconv_kernel == 2:
+            padding = 0
+            output_padding = 0
+
+        return deconv_kernel, padding, output_padding
+
+    def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer):
+        num_branches_cur = len(num_channels_cur_layer)
+        num_branches_pre = len(num_channels_pre_layer)
+
+        transition_layers = []
+        for i in range(num_branches_cur):
+            if i < num_branches_pre:
+                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+                    transition_layers.append(
+                        nn.Sequential(
+                            nn.Conv2d(
+                                num_channels_pre_layer[i],
+                                num_channels_cur_layer[i],
+                                3,
+                                1,
+                                1,
+                                bias=False,
+                            ),
+                            nn.BatchNorm2d(num_channels_cur_layer[i]),
+                            nn.ReLU(inplace=True),
+                        )
+                    )
+                else:
+                    transition_layers.append(None)
+            else:
+                conv3x3s = []
+                for j in range(i + 1 - num_branches_pre):
+                    inchannels = num_channels_pre_layer[-1]
+                    outchannels = (
+                        num_channels_cur_layer[i] if j == i - num_branches_pre else inchannels
+                    )
+                    conv3x3s.append(
+                        nn.Sequential(
+                            nn.Conv2d(inchannels, outchannels, 3, 2, 1, bias=False),
+                            nn.BatchNorm2d(outchannels),
+                            nn.ReLU(inplace=True),
+                        )
+                    )
+                transition_layers.append(nn.Sequential(*conv3x3s))
+
+        return nn.ModuleList(transition_layers)
+
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    self.inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+
+        return nn.Sequential(*layers)
+
+    def _make_stage(self, layer_config, num_inchannels, multi_scale_output=True):
+        num_modules = layer_config["NUM_MODULES"]
+        num_branches = layer_config["NUM_BRANCHES"]
+        num_blocks = layer_config["NUM_BLOCKS"]
+        num_channels = layer_config["NUM_CHANNELS"]
+        block = blocks_dict[layer_config["BLOCK"]]
+
+        modules = []
+        for i in range(num_modules):
+            # multi_scale_output is only used last module
+            if not multi_scale_output and i == num_modules - 1:
+                reset_multi_scale_output = False
+            else:
+                reset_multi_scale_output = True
+
+            modules.append(
+                HighResolutionModule(
+                    num_branches,
+                    block,
+                    num_blocks,
+                    num_inchannels,
+                    num_channels,
+                    reset_multi_scale_output,
+                )
+            )
+            num_inchannels = modules[-1].get_num_inchannels()
+
+        return nn.Sequential(*modules), num_inchannels
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = self.relu(x)
+        x = self.layer1(x)
+
+        x_list = []
+        for i in range(self.stage2_cfg.NUM_BRANCHES):
+            if self.transition1[i] is not None:
+                x_list.append(self.transition1[i](x))
+            else:
+                x_list.append(x)
+        y_list = self.stage2(x_list)
+
+        x_list = []
+        for i in range(self.stage3_cfg.NUM_BRANCHES):
+            if self.transition2[i] is not None:
+                x_list.append(self.transition2[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage3(x_list)
+
+        x_list = []
+        for i in range(self.stage4_cfg.NUM_BRANCHES):
+            if self.transition3[i] is not None:
+                x_list.append(self.transition3[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage4(x_list)
+
+        assert len(self._out_features) == len(y_list)
+        return dict(zip(self._out_features, y_list))  # final_outputs
+
+
+@BACKBONE_REGISTRY.register()
+def build_pose_hrnet_backbone(cfg, input_shape: ShapeSpec):
+    model = PoseHigherResolutionNet(cfg)
+    return model
--- a/projects/DensePose/densepose/modeling/inference.py
+++ b/projects/DensePose/densepose/modeling/inference.py
@ -0,0 +1,83 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from typing import List, Tuple
+import torch
+
+from detectron2.structures import Instances
+
+from ..data.structures import DensePoseOutput
+
+
+def densepose_inference(
+    densepose_outputs: Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor],
+    densepose_confidences: Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor],
+    detections: List[Instances],
+):
+    """
+    Infer dense pose estimate based on outputs from the DensePose head
+    and detections. The estimate for each detection instance is stored in its
+    "pred_densepose" attribute.
+
+    Args:
+        densepose_outputs (tuple(`torch.Tensor`)): iterable containing 4 elements:
+            - s (:obj: `torch.Tensor`): coarse segmentation tensor of size (N, A, H, W),
+            - i (:obj: `torch.Tensor`): fine segmentation tensor of size (N, C, H, W),
+            - u (:obj: `torch.Tensor`): U coordinates for each class of size (N, C, H, W),
+            - v (:obj: `torch.Tensor`): V coordinates for each class of size (N, C, H, W),
+            where N is the total number of detections in a batch,
+                  A is the number of coarse segmentations labels
+                      (e.g. 15 for coarse body parts + background),
+                  C is the number of fine segmentation labels
+                      (e.g. 25 for fine body parts + background),
+                  W is the resolution along the X axis
+                  H is the resolution along the Y axis
+        densepose_confidences (tuple(`torch.Tensor`)): iterable containing 4 elements:
+            - sigma_1 (:obj: `torch.Tensor`): global confidences for UV coordinates
+                of size (N, C, H, W)
+            - sigma_2 (:obj: `torch.Tensor`): individual confidences for UV coordinates
+                of size (N, C, H, W)
+            - kappa_u (:obj: `torch.Tensor`): first component of confidence direction
+                vector of size (N, C, H, W)
+            - kappa_v (:obj: `torch.Tensor`): second component of confidence direction
+                vector of size (N, C, H, W)
+            - fine_segm_confidence (:obj: `torch.Tensor`): confidence for fine
+                segmentation of size (N, 1, H, W)
+            - coarse_segm_confidence (:obj: `torch.Tensor`): confidence for coarse
+                segmentation of size (N, 1, H, W)
+        detections (list[Instances]): A list of N Instances, where N is the number of images
+            in the batch. Instances are modified by this method: "pred_densepose" attribute
+            is added to each instance, the attribute contains the corresponding
+            DensePoseOutput object.
+    """
+    # DensePose outputs: segmentation, body part indices, U, V
+    s, index_uv, u, v = densepose_outputs
+    (
+        sigma_1,
+        sigma_2,
+        kappa_u,
+        kappa_v,
+        fine_segm_confidence,
+        coarse_segm_confidence,
+    ) = densepose_confidences
+    k = 0
+    for detection in detections:
+        n_i = len(detection)
+        s_i = s[k : k + n_i]
+        index_uv_i = index_uv[k : k + n_i]
+        u_i = u[k : k + n_i]
+        v_i = v[k : k + n_i]
+        _local_vars = locals()
+        confidences = {
+            name: _local_vars[name][k : k + n_i]
+            for name in (
+                "sigma_1",
+                "sigma_2",
+                "kappa_u",
+                "kappa_v",
+                "fine_segm_confidence",
+                "coarse_segm_confidence",
+            )
+            if _local_vars.get(name) is not None
+        }
+        densepose_output_i = DensePoseOutput(s_i, index_uv_i, u_i, v_i, confidences)
+        detection.pred_densepose = densepose_output_i
+        k += n_i
--- a/projects/DensePose/densepose/modeling/losses/init.py
+++ b/projects/DensePose/densepose/modeling/losses/init.py
@ -0,0 +1,3 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from .densepose_losses import DensePoseLosses
--- a/projects/DensePose/densepose/modeling/losses/densepose_losses.py
+++ b/projects/DensePose/densepose/modeling/losses/densepose_losses.py
@ -0,0 +1,729 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import math
+from dataclasses import dataclass
+from typing import Iterable, Optional
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.structures import Instances
+
+from .. import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
+
+
+def _linear_interpolation_utilities(v_norm, v0_src, size_src, v0_dst, size_dst, size_z):
+    """
+    Computes utility values for linear interpolation at points v.
+    The points are given as normalized offsets in the source interval
+    (v0_src, v0_src + size_src), more precisely:
+        v = v0_src + v_norm * size_src / 256.0
+    The computed utilities include lower points v_lo, upper points v_hi,
+    interpolation weights v_w and flags j_valid indicating whether the
+    points falls into the destination interval (v0_dst, v0_dst + size_dst).
+
+    Args:
+        v_norm (:obj: `torch.Tensor`): tensor of size N containing
+            normalized point offsets
+        v0_src (:obj: `torch.Tensor`): tensor of size N containing
+            left bounds of source intervals for normalized points
+        size_src (:obj: `torch.Tensor`): tensor of size N containing
+            source interval sizes for normalized points
+        v0_dst (:obj: `torch.Tensor`): tensor of size N containing
+            left bounds of destination intervals
+        size_dst (:obj: `torch.Tensor`): tensor of size N containing
+            destination interval sizes
+        size_z (int): interval size for data to be interpolated
+
+    Returns:
+        v_lo (:obj: `torch.Tensor`): int tensor of size N containing
+            indices of lower values used for interpolation, all values are
+            integers from [0, size_z - 1]
+        v_hi (:obj: `torch.Tensor`): int tensor of size N containing
+            indices of upper values used for interpolation, all values are
+            integers from [0, size_z - 1]
+        v_w (:obj: `torch.Tensor`): float tensor of size N containing
+            interpolation weights
+        j_valid (:obj: `torch.Tensor`): uint8 tensor of size N containing
+            0 for points outside the estimation interval
+            (v0_est, v0_est + size_est) and 1 otherwise
+    """
+    v = v0_src + v_norm * size_src / 256.0
+    j_valid = (v - v0_dst >= 0) * (v - v0_dst < size_dst)
+    v_grid = (v - v0_dst) * size_z / size_dst
+    v_lo = v_grid.floor().long().clamp(min=0, max=size_z - 1)
+    v_hi = (v_lo + 1).clamp(max=size_z - 1)
+    v_grid = torch.min(v_hi.float(), v_grid)
+    v_w = v_grid - v_lo.float()
+    return v_lo, v_hi, v_w, j_valid
+
+
+class SingleTensorsHelper:
+    def __init__(self, proposals_with_gt):
+
+        with torch.no_grad():
+            (
+                index_uv_img,
+                i_with_dp,
+                bbox_xywh_est,
+                bbox_xywh_gt,
+                index_gt_all,
+                x_norm,
+                y_norm,
+                u_gt_all,
+                v_gt_all,
+                s_gt,
+                index_bbox,
+            ) = _extract_single_tensors_from_matches(proposals_with_gt)
+
+        for k, v in locals().items():
+            if k not in ["self", "proposals_with_gt"]:
+                setattr(self, k, v)
+
+
+class BilinearInterpolationHelper:
+    """
+    Args:
+        tensors_helper (SingleTensorsHelper)
+        j_valid (:obj: `torch.Tensor`): uint8 tensor of size M containing
+            0 for points to be discarded and 1 for points to be selected
+        y_lo (:obj: `torch.Tensor`): int tensor of indices of upper values
+            in z_est for each point
+        y_hi (:obj: `torch.Tensor`): int tensor of indices of lower values
+            in z_est for each point
+        x_lo (:obj: `torch.Tensor`): int tensor of indices of left values
+            in z_est for each point
+        x_hi (:obj: `torch.Tensor`): int tensor of indices of right values
+            in z_est for each point
+        w_ylo_xlo (:obj: `torch.Tensor`): float tensor of size M;
+            contains upper-left value weight for each point
+        w_ylo_xhi (:obj: `torch.Tensor`): float tensor of size M;
+            contains upper-right value weight for each point
+        w_yhi_xlo (:obj: `torch.Tensor`): float tensor of size M;
+            contains lower-left value weight for each point
+        w_yhi_xhi (:obj: `torch.Tensor`): float tensor of size M;
+            contains lower-right value weight for each point
+    """
+
+    def __init__(
+        self,
+        tensors_helper,
+        j_valid,
+        y_lo,
+        y_hi,
+        x_lo,
+        x_hi,
+        w_ylo_xlo,
+        w_ylo_xhi,
+        w_yhi_xlo,
+        w_yhi_xhi,
+    ):
+        for k, v in locals().items():
+            if k != "self":
+                setattr(self, k, v)
+
+    @staticmethod
+    def from_matches(tensors_helper, densepose_outputs_size):
+
+        zh, zw = densepose_outputs_size[2], densepose_outputs_size[3]
+
+        x0_gt, y0_gt, w_gt, h_gt = tensors_helper.bbox_xywh_gt[tensors_helper.index_bbox].unbind(1)
+        x0_est, y0_est, w_est, h_est = tensors_helper.bbox_xywh_est[
+            tensors_helper.index_bbox
+        ].unbind(dim=1)
+        x_lo, x_hi, x_w, jx_valid = _linear_interpolation_utilities(
+            tensors_helper.x_norm, x0_gt, w_gt, x0_est, w_est, zw
+        )
+        y_lo, y_hi, y_w, jy_valid = _linear_interpolation_utilities(
+            tensors_helper.y_norm, y0_gt, h_gt, y0_est, h_est, zh
+        )
+        j_valid = jx_valid * jy_valid
+
+        w_ylo_xlo = (1.0 - x_w) * (1.0 - y_w)
+        w_ylo_xhi = x_w * (1.0 - y_w)
+        w_yhi_xlo = (1.0 - x_w) * y_w
+        w_yhi_xhi = x_w * y_w
+
+        return BilinearInterpolationHelper(
+            tensors_helper,
+            j_valid,
+            y_lo,
+            y_hi,
+            x_lo,
+            x_hi,
+            w_ylo_xlo,
+            w_ylo_xhi,
+            w_yhi_xlo,
+            w_yhi_xhi,
+        )
+
+    def extract_at_points(
+        self,
+        z_est,
+        slice_index_uv=None,
+        w_ylo_xlo=None,
+        w_ylo_xhi=None,
+        w_yhi_xlo=None,
+        w_yhi_xhi=None,
+    ):
+        """
+        Extract ground truth values z_gt for valid point indices and estimated
+        values z_est using bilinear interpolation over top-left (y_lo, x_lo),
+        top-right (y_lo, x_hi), bottom-left (y_hi, x_lo) and bottom-right
+        (y_hi, x_hi) values in z_est with corresponding weights:
+        w_ylo_xlo, w_ylo_xhi, w_yhi_xlo and w_yhi_xhi.
+        Use slice_index_uv to slice dim=1 in z_est
+        """
+        index_gt_all = self.tensors_helper.index_gt_all
+        slice_index_uv = index_gt_all if slice_index_uv is None else slice_index_uv
+        w_ylo_xlo = self.w_ylo_xlo if w_ylo_xlo is None else w_ylo_xlo
+        w_ylo_xhi = self.w_ylo_xhi if w_ylo_xhi is None else w_ylo_xhi
+        w_yhi_xlo = self.w_yhi_xlo if w_yhi_xlo is None else w_yhi_xlo
+        w_yhi_xhi = self.w_yhi_xhi if w_yhi_xhi is None else w_yhi_xhi
+
+        index_bbox = self.tensors_helper.index_bbox
+        z_est_sampled = (
+            z_est[index_bbox, slice_index_uv, self.y_lo, self.x_lo] * w_ylo_xlo
+            + z_est[index_bbox, slice_index_uv, self.y_lo, self.x_hi] * w_ylo_xhi
+            + z_est[index_bbox, slice_index_uv, self.y_hi, self.x_lo] * w_yhi_xlo
+            + z_est[index_bbox, slice_index_uv, self.y_hi, self.x_hi] * w_yhi_xhi
+        )
+        return z_est_sampled
+
+
+def _resample_data(
+    z, bbox_xywh_src, bbox_xywh_dst, wout, hout, mode="nearest", padding_mode="zeros"
+):
+    """
+    Args:
+        z (:obj: `torch.Tensor`): tensor of size (N,C,H,W) with data to be
+            resampled
+        bbox_xywh_src (:obj: `torch.Tensor`): tensor of size (N,4) containing
+            source bounding boxes in format XYWH
+        bbox_xywh_dst (:obj: `torch.Tensor`): tensor of size (N,4) containing
+            destination bounding boxes in format XYWH
+    Return:
+        zresampled (:obj: `torch.Tensor`): tensor of size (N, C, Hout, Wout)
+            with resampled values of z, where D is the discretization size
+    """
+    n = bbox_xywh_src.size(0)
+    assert n == bbox_xywh_dst.size(0), (
+        "The number of "
+        "source ROIs for resampling ({}) should be equal to the number "
+        "of destination ROIs ({})".format(bbox_xywh_src.size(0), bbox_xywh_dst.size(0))
+    )
+    x0src, y0src, wsrc, hsrc = bbox_xywh_src.unbind(dim=1)
+    x0dst, y0dst, wdst, hdst = bbox_xywh_dst.unbind(dim=1)
+    x0dst_norm = 2 * (x0dst - x0src) / wsrc - 1
+    y0dst_norm = 2 * (y0dst - y0src) / hsrc - 1
+    x1dst_norm = 2 * (x0dst + wdst - x0src) / wsrc - 1
+    y1dst_norm = 2 * (y0dst + hdst - y0src) / hsrc - 1
+    grid_w = torch.arange(wout, device=z.device, dtype=torch.float) / wout
+    grid_h = torch.arange(hout, device=z.device, dtype=torch.float) / hout
+    grid_w_expanded = grid_w[None, None, :].expand(n, hout, wout)
+    grid_h_expanded = grid_h[None, :, None].expand(n, hout, wout)
+    dx_expanded = (x1dst_norm - x0dst_norm)[:, None, None].expand(n, hout, wout)
+    dy_expanded = (y1dst_norm - y0dst_norm)[:, None, None].expand(n, hout, wout)
+    x0_expanded = x0dst_norm[:, None, None].expand(n, hout, wout)
+    y0_expanded = y0dst_norm[:, None, None].expand(n, hout, wout)
+    grid_x = grid_w_expanded * dx_expanded + x0_expanded
+    grid_y = grid_h_expanded * dy_expanded + y0_expanded
+    grid = torch.stack((grid_x, grid_y), dim=3)
+    # resample Z from (N, C, H, W) into (N, C, Hout, Wout)
+    zresampled = F.grid_sample(z, grid, mode=mode, padding_mode=padding_mode, align_corners=True)
+    return zresampled
+
+
+def _extract_single_tensors_from_matches_one_image(
+    proposals_targets, bbox_with_dp_offset, bbox_global_offset
+):
+    i_gt_all = []
+    x_norm_all = []
+    y_norm_all = []
+    u_gt_all = []
+    v_gt_all = []
+    s_gt_all = []
+    bbox_xywh_gt_all = []
+    bbox_xywh_est_all = []
+    # Ibbox_all == k should be true for all data that corresponds
+    # to bbox_xywh_gt[k] and bbox_xywh_est[k]
+    # index k here is global wrt images
+    i_bbox_all = []
+    # at offset k (k is global) contains index of bounding box data
+    # within densepose output tensor
+    i_with_dp = []
+
+    boxes_xywh_est = proposals_targets.proposal_boxes.clone()
+    boxes_xywh_gt = proposals_targets.gt_boxes.clone()
+    n_i = len(boxes_xywh_est)
+    assert n_i == len(boxes_xywh_gt)
+
+    if n_i:
+        boxes_xywh_est.tensor[:, 2] -= boxes_xywh_est.tensor[:, 0]
+        boxes_xywh_est.tensor[:, 3] -= boxes_xywh_est.tensor[:, 1]
+        boxes_xywh_gt.tensor[:, 2] -= boxes_xywh_gt.tensor[:, 0]
+        boxes_xywh_gt.tensor[:, 3] -= boxes_xywh_gt.tensor[:, 1]
+        if hasattr(proposals_targets, "gt_densepose"):
+            densepose_gt = proposals_targets.gt_densepose
+            for k, box_xywh_est, box_xywh_gt, dp_gt in zip(
+                range(n_i), boxes_xywh_est.tensor, boxes_xywh_gt.tensor, densepose_gt
+            ):
+                if (dp_gt is not None) and (len(dp_gt.x) > 0):
+                    i_gt_all.append(dp_gt.i)
+                    x_norm_all.append(dp_gt.x)
+                    y_norm_all.append(dp_gt.y)
+                    u_gt_all.append(dp_gt.u)
+                    v_gt_all.append(dp_gt.v)
+                    s_gt_all.append(dp_gt.segm.unsqueeze(0))
+                    bbox_xywh_gt_all.append(box_xywh_gt.view(-1, 4))
+                    bbox_xywh_est_all.append(box_xywh_est.view(-1, 4))
+                    i_bbox_k = torch.full_like(dp_gt.i, bbox_with_dp_offset + len(i_with_dp))
+                    i_bbox_all.append(i_bbox_k)
+                    i_with_dp.append(bbox_global_offset + k)
+    return (
+        i_gt_all,
+        x_norm_all,
+        y_norm_all,
+        u_gt_all,
+        v_gt_all,
+        s_gt_all,
+        bbox_xywh_gt_all,
+        bbox_xywh_est_all,
+        i_bbox_all,
+        i_with_dp,
+    )
+
+
+def _extract_single_tensors_from_matches(proposals_with_targets):
+    i_img = []
+    i_gt_all = []
+    x_norm_all = []
+    y_norm_all = []
+    u_gt_all = []
+    v_gt_all = []
+    s_gt_all = []
+    bbox_xywh_gt_all = []
+    bbox_xywh_est_all = []
+    i_bbox_all = []
+    i_with_dp_all = []
+    n = 0
+    for i, proposals_targets_per_image in enumerate(proposals_with_targets):
+        n_i = proposals_targets_per_image.proposal_boxes.tensor.size(0)
+        if not n_i:
+            continue
+        (
+            i_gt_img,
+            x_norm_img,
+            y_norm_img,
+            u_gt_img,
+            v_gt_img,
+            s_gt_img,
+            bbox_xywh_gt_img,
+            bbox_xywh_est_img,
+            i_bbox_img,
+            i_with_dp_img,
+        ) = _extract_single_tensors_from_matches_one_image(  # noqa
+            proposals_targets_per_image, len(i_with_dp_all), n
+        )
+        i_gt_all.extend(i_gt_img)
+        x_norm_all.extend(x_norm_img)
+        y_norm_all.extend(y_norm_img)
+        u_gt_all.extend(u_gt_img)
+        v_gt_all.extend(v_gt_img)
+        s_gt_all.extend(s_gt_img)
+        bbox_xywh_gt_all.extend(bbox_xywh_gt_img)
+        bbox_xywh_est_all.extend(bbox_xywh_est_img)
+        i_bbox_all.extend(i_bbox_img)
+        i_with_dp_all.extend(i_with_dp_img)
+        i_img.extend([i] * len(i_with_dp_img))
+        n += n_i
+    # concatenate all data into a single tensor
+    if (n > 0) and (len(i_with_dp_all) > 0):
+        i_gt = torch.cat(i_gt_all, 0).long()
+        x_norm = torch.cat(x_norm_all, 0)
+        y_norm = torch.cat(y_norm_all, 0)
+        u_gt = torch.cat(u_gt_all, 0)
+        v_gt = torch.cat(v_gt_all, 0)
+        s_gt = torch.cat(s_gt_all, 0)
+        bbox_xywh_gt = torch.cat(bbox_xywh_gt_all, 0)
+        bbox_xywh_est = torch.cat(bbox_xywh_est_all, 0)
+        i_bbox = torch.cat(i_bbox_all, 0).long()
+    else:
+        i_gt = None
+        x_norm = None
+        y_norm = None
+        u_gt = None
+        v_gt = None
+        s_gt = None
+        bbox_xywh_gt = None
+        bbox_xywh_est = None
+        i_bbox = None
+    return (
+        i_img,
+        i_with_dp_all,
+        bbox_xywh_est,
+        bbox_xywh_gt,
+        i_gt,
+        x_norm,
+        y_norm,
+        u_gt,
+        v_gt,
+        s_gt,
+        i_bbox,
+    )
+
+
+@dataclass
+class DataForMaskLoss:
+    """
+    Contains mask GT and estimated data for proposals from multiple images:
+    """
+
+    # tensor of size (K, H, W) containing GT labels
+    masks_gt: Optional[torch.Tensor] = None
+    # tensor of size (K, C, H, W) containing estimated scores
+    masks_est: Optional[torch.Tensor] = None
+
+
+def _extract_data_for_mask_loss_from_matches(
+    proposals_targets: Iterable[Instances], estimated_segm: torch.Tensor
+) -> DataForMaskLoss:
+    """
+    Extract data for mask loss from instances that contain matched GT and
+    estimated bounding boxes.
+    Args:
+        proposals_targets: Iterable[Instances]
+            matched GT and estimated results, each item in the iterable
+            corresponds to data in 1 image
+        estimated_segm: torch.Tensor if size
+            size to which GT masks are resized
+    Return:
+        masks_est: tensor(K, C, H, W) of float - class scores
+        masks_gt: tensor(K, H, W) of int64 - labels
+    """
+    data = DataForMaskLoss()
+    masks_gt = []
+    offset = 0
+    assert estimated_segm.shape[2] == estimated_segm.shape[3], (
+        f"Expected estimated segmentation to have a square shape, "
+        f"but the actual shape is {estimated_segm.shape[2:]}"
+    )
+    mask_size = estimated_segm.shape[2]
+    num_proposals = sum(inst.proposal_boxes.tensor.size(0) for inst in proposals_targets)
+    num_estimated = estimated_segm.shape[0]
+    assert (
+        num_proposals == num_estimated
+    ), "The number of proposals {} must be equal to the number of estimates {}".format(
+        num_proposals, num_estimated
+    )
+
+    for proposals_targets_per_image in proposals_targets:
+        n_i = proposals_targets_per_image.proposal_boxes.tensor.size(0)
+        if not n_i:
+            continue
+        gt_masks_per_image = proposals_targets_per_image.gt_masks.crop_and_resize(
+            proposals_targets_per_image.proposal_boxes.tensor, mask_size
+        ).to(device=estimated_segm.device)
+        masks_gt.append(gt_masks_per_image)
+        offset += n_i
+    if masks_gt:
+        data.masks_est = estimated_segm
+        data.masks_gt = torch.cat(masks_gt, dim=0)
+    return data
+
+
+class IIDIsotropicGaussianUVLoss(nn.Module):
+    """
+    Loss for the case of iid residuals with isotropic covariance:
+    $Sigma_i = sigma_i^2 I$
+    The loss (negative log likelihood) is then:
+    $1/2 sum_{i=1}^n (log(2 pi) + 2 log sigma_i^2 + ||delta_i||^2 / sigma_i^2)$,
+    where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates
+    difference between estimated and ground truth UV values
+    For details, see:
+    N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
+    Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
+    """
+
+    def __init__(self, sigma_lower_bound: float):
+        super(IIDIsotropicGaussianUVLoss, self).__init__()
+        self.sigma_lower_bound = sigma_lower_bound
+        self.log2pi = math.log(2 * math.pi)
+
+    def forward(
+        self,
+        u: torch.Tensor,
+        v: torch.Tensor,
+        sigma_u: torch.Tensor,
+        target_u: torch.Tensor,
+        target_v: torch.Tensor,
+    ):
+        # compute $\sigma_i^2$
+        # use sigma_lower_bound to avoid degenerate solution for variance
+        # (sigma -> 0)
+        sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound
+        # compute \|delta_i\|^2
+        delta_t_delta = (u - target_u) ** 2 + (v - target_v) ** 2
+        # the total loss from the formula above:
+        loss = 0.5 * (self.log2pi + 2 * torch.log(sigma2) + delta_t_delta / sigma2)
+        return loss.sum()
+
+
+class IndepAnisotropicGaussianUVLoss(nn.Module):
+    """
+    Loss for the case of independent residuals with anisotropic covariances:
+    $Sigma_i = sigma_i^2 I + r_i r_i^T$
+    The loss (negative log likelihood) is then:
+    $1/2 sum_{i=1}^n (log(2 pi)
+      + log sigma_i^2 (sigma_i^2 + ||r_i||^2)
+      + ||delta_i||^2 / sigma_i^2
+      - <delta_i, r_i>^2 / (sigma_i^2 * (sigma_i^2 + ||r_i||^2)))$,
+    where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates
+    difference between estimated and ground truth UV values
+    For details, see:
+    N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
+    Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
+    """
+
+    def __init__(self, sigma_lower_bound: float):
+        super(IndepAnisotropicGaussianUVLoss, self).__init__()
+        self.sigma_lower_bound = sigma_lower_bound
+        self.log2pi = math.log(2 * math.pi)
+
+    def forward(
+        self,
+        u: torch.Tensor,
+        v: torch.Tensor,
+        sigma_u: torch.Tensor,
+        kappa_u_est: torch.Tensor,
+        kappa_v_est: torch.Tensor,
+        target_u: torch.Tensor,
+        target_v: torch.Tensor,
+    ):
+        # compute $\sigma_i^2$
+        sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound
+        # compute \|r_i\|^2
+        r_sqnorm2 = kappa_u_est ** 2 + kappa_v_est ** 2
+        delta_u = u - target_u
+        delta_v = v - target_v
+        # compute \|delta_i\|^2
+        delta_sqnorm = delta_u ** 2 + delta_v ** 2
+        delta_u_r_u = delta_u * kappa_u_est
+        delta_v_r_v = delta_v * kappa_v_est
+        # compute the scalar product <delta_i, r_i>
+        delta_r = delta_u_r_u + delta_v_r_v
+        # compute squared scalar product <delta_i, r_i>^2
+        delta_r_sqnorm = delta_r ** 2
+        denom2 = sigma2 * (sigma2 + r_sqnorm2)
+        loss = 0.5 * (
+            self.log2pi + torch.log(denom2) + delta_sqnorm / sigma2 - delta_r_sqnorm / denom2
+        )
+        return loss.sum()
+
+
+class DensePoseLosses(object):
+    def __init__(self, cfg):
+        # fmt: off
+        self.heatmap_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE
+        self.w_points     = cfg.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS
+        self.w_part       = cfg.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS
+        self.w_segm       = cfg.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS
+        self.n_segm_chan  = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
+        # fmt: on
+        self.segm_trained_by_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
+        self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
+        if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
+            self.uv_loss_with_confidences = IIDIsotropicGaussianUVLoss(
+                self.confidence_model_cfg.uv_confidence.epsilon
+            )
+        elif self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.INDEP_ANISO:
+            self.uv_loss_with_confidences = IndepAnisotropicGaussianUVLoss(
+                self.confidence_model_cfg.uv_confidence.epsilon
+            )
+
+    def __call__(self, proposals_with_gt, densepose_outputs, densepose_confidences):
+        if not self.segm_trained_by_masks:
+            return self.produce_densepose_losses(
+                proposals_with_gt, densepose_outputs, densepose_confidences
+            )
+        else:
+            losses = {}
+            losses_densepose = self.produce_densepose_losses(
+                proposals_with_gt, densepose_outputs, densepose_confidences
+            )
+            losses.update(losses_densepose)
+            losses_mask = self.produce_mask_losses(
+                proposals_with_gt, densepose_outputs, densepose_confidences
+            )
+            losses.update(losses_mask)
+            return losses
+
+    def produce_fake_mask_losses(self, densepose_outputs):
+        losses = {}
+        segm_scores, _, _, _ = densepose_outputs
+        losses["loss_densepose_S"] = segm_scores.sum() * 0
+        return losses
+
+    def produce_mask_losses(self, proposals_with_gt, densepose_outputs, densepose_confidences):
+        if not len(proposals_with_gt):
+            return self.produce_fake_mask_losses(densepose_outputs)
+        losses = {}
+        # densepose outputs are computed for all images and all bounding boxes;
+        # i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively,
+        # the outputs will have size(0) == 3+1+2+1 == 7
+        segm_scores, _, _, _ = densepose_outputs
+        with torch.no_grad():
+            mask_loss_data = _extract_data_for_mask_loss_from_matches(
+                proposals_with_gt, segm_scores
+            )
+        if (mask_loss_data.masks_gt is None) or (mask_loss_data.masks_est is None):
+            return self.produce_fake_mask_losses(densepose_outputs)
+        losses["loss_densepose_S"] = (
+            F.cross_entropy(mask_loss_data.masks_est, mask_loss_data.masks_gt.long()) * self.w_segm
+        )
+        return losses
+
+    def produce_fake_densepose_losses(self, densepose_outputs, densepose_confidences):
+        # we need to keep the same computation graph on all the GPUs to
+        # perform reduction properly. Hence even if we have no data on one
+        # of the GPUs, we still need to generate the computation graph.
+        # Add fake (zero) losses in the form Tensor.sum() * 0
+        s, index_uv, u, v = densepose_outputs
+        conf_type = self.confidence_model_cfg.uv_confidence.type
+        (
+            sigma_1,
+            sigma_2,
+            kappa_u,
+            kappa_v,
+            fine_segm_confidence,
+            coarse_segm_confidence,
+        ) = densepose_confidences
+        losses = {}
+        losses["loss_densepose_I"] = index_uv.sum() * 0
+        if not self.segm_trained_by_masks:
+            losses["loss_densepose_S"] = s.sum() * 0
+        if self.confidence_model_cfg.uv_confidence.enabled:
+            losses["loss_densepose_UV"] = (u.sum() + v.sum()) * 0
+            if conf_type == DensePoseUVConfidenceType.IID_ISO:
+                losses["loss_densepose_UV"] += sigma_2.sum() * 0
+            elif conf_type == DensePoseUVConfidenceType.INDEP_ANISO:
+                losses["loss_densepose_UV"] += (sigma_2.sum() + kappa_u.sum() + kappa_v.sum()) * 0
+        else:
+            losses["loss_densepose_U"] = u.sum() * 0
+            losses["loss_densepose_V"] = v.sum() * 0
+        return losses
+
+    def produce_densepose_losses(self, proposals_with_gt, densepose_outputs, densepose_confidences):
+        losses = {}
+        # densepose outputs are computed for all images and all bounding boxes;
+        # i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively,
+        # the outputs will have size(0) == 3+1+2+1 == 7
+        s, index_uv, u, v = densepose_outputs
+        assert u.size(2) == v.size(2)
+        assert u.size(3) == v.size(3)
+        assert u.size(2) == index_uv.size(2)
+        assert u.size(3) == index_uv.size(3)
+        densepose_outputs_size = u.size()
+
+        if not len(proposals_with_gt):
+            return self.produce_fake_densepose_losses(densepose_outputs, densepose_confidences)
+        (
+            sigma_1,
+            sigma_2,
+            kappa_u,
+            kappa_v,
+            fine_segm_confidence,
+            coarse_segm_confidence,
+        ) = densepose_confidences
+        conf_type = self.confidence_model_cfg.uv_confidence.type
+
+        tensors_helper = SingleTensorsHelper(proposals_with_gt)
+        n_batch = len(tensors_helper.i_with_dp)
+
+        # NOTE: we need to keep the same computation graph on all the GPUs to
+        # perform reduction properly. Hence even if we have no data on one
+        # of the GPUs, we still need to generate the computation graph.
+        # Add fake (zero) loss in the form Tensor.sum() * 0
+        if not n_batch:
+            return self.produce_fake_densepose_losses(densepose_outputs, densepose_confidences)
+
+        interpolator = BilinearInterpolationHelper.from_matches(
+            tensors_helper, densepose_outputs_size
+        )
+
+        j_valid_fg = interpolator.j_valid * (tensors_helper.index_gt_all > 0)
+
+        u_gt = tensors_helper.u_gt_all[j_valid_fg]
+        u_est_all = interpolator.extract_at_points(u[tensors_helper.i_with_dp])
+        u_est = u_est_all[j_valid_fg]
+
+        v_gt = tensors_helper.v_gt_all[j_valid_fg]
+        v_est_all = interpolator.extract_at_points(v[tensors_helper.i_with_dp])
+        v_est = v_est_all[j_valid_fg]
+
+        index_uv_gt = tensors_helper.index_gt_all[interpolator.j_valid]
+        index_uv_est_all = interpolator.extract_at_points(
+            index_uv[tensors_helper.i_with_dp],
+            slice_index_uv=slice(None),
+            w_ylo_xlo=interpolator.w_ylo_xlo[:, None],
+            w_ylo_xhi=interpolator.w_ylo_xhi[:, None],
+            w_yhi_xlo=interpolator.w_yhi_xlo[:, None],
+            w_yhi_xhi=interpolator.w_yhi_xhi[:, None],
+        )
+        index_uv_est = index_uv_est_all[interpolator.j_valid, :]
+
+        if self.confidence_model_cfg.uv_confidence.enabled:
+            sigma_2_est_all = interpolator.extract_at_points(sigma_2[tensors_helper.i_with_dp])
+            sigma_2_est = sigma_2_est_all[j_valid_fg]
+            if conf_type in [DensePoseUVConfidenceType.INDEP_ANISO]:
+                kappa_u_est_all = interpolator.extract_at_points(kappa_u[tensors_helper.i_with_dp])
+                kappa_u_est = kappa_u_est_all[j_valid_fg]
+                kappa_v_est_all = interpolator.extract_at_points(kappa_v[tensors_helper.i_with_dp])
+                kappa_v_est = kappa_v_est_all[j_valid_fg]
+
+        # Resample everything to the estimated data size, no need to resample
+        # S_est then:
+        if not self.segm_trained_by_masks:
+            s_est = s[tensors_helper.i_with_dp]
+            with torch.no_grad():
+                s_gt = _resample_data(
+                    tensors_helper.s_gt.unsqueeze(1),
+                    tensors_helper.bbox_xywh_gt,
+                    tensors_helper.bbox_xywh_est,
+                    self.heatmap_size,
+                    self.heatmap_size,
+                    mode="nearest",
+                    padding_mode="zeros",
+                ).squeeze(1)
+
+        # add point-based losses:
+        if self.confidence_model_cfg.uv_confidence.enabled:
+            if conf_type == DensePoseUVConfidenceType.IID_ISO:
+                uv_loss = (
+                    self.uv_loss_with_confidences(u_est, v_est, sigma_2_est, u_gt, v_gt)
+                    * self.w_points
+                )
+                losses["loss_densepose_UV"] = uv_loss
+            elif conf_type == DensePoseUVConfidenceType.INDEP_ANISO:
+                uv_loss = (
+                    self.uv_loss_with_confidences(
+                        u_est, v_est, sigma_2_est, kappa_u_est, kappa_v_est, u_gt, v_gt
+                    )
+                    * self.w_points
+                )
+                losses["loss_densepose_UV"] = uv_loss
+            else:
+                raise ValueError(f"Unknown confidence model type: {conf_type}")
+        else:
+            u_loss = F.smooth_l1_loss(u_est, u_gt, reduction="sum") * self.w_points
+            losses["loss_densepose_U"] = u_loss
+            v_loss = F.smooth_l1_loss(v_est, v_gt, reduction="sum") * self.w_points
+            losses["loss_densepose_V"] = v_loss
+        index_uv_loss = F.cross_entropy(index_uv_est, index_uv_gt.long()) * self.w_part
+        losses["loss_densepose_I"] = index_uv_loss
+
+        if not self.segm_trained_by_masks:
+            if self.n_segm_chan == 2:
+                s_gt = s_gt > 0
+            s_loss = F.cross_entropy(s_est, s_gt.long()) * self.w_segm
+            losses["loss_densepose_S"] = s_loss
+        return losses
--- a/projects/DensePose/densepose/modeling/predictors/init.py
+++ b/projects/DensePose/densepose/modeling/predictors/init.py
@ -0,0 +1,5 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from .chart import DensePoseChartPredictor
+from .chart_confidence import DensePoseChartConfidencePredictorMixin
+from .chart_with_confidence import DensePoseChartWithConfidencePredictor
--- a/projects/DensePose/densepose/modeling/predictors/chart.py
+++ b/projects/DensePose/densepose/modeling/predictors/chart.py
@ -0,0 +1,102 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import torch
+from torch import nn
+
+from detectron2.config import CfgNode
+from detectron2.layers import ConvTranspose2d, interpolate
+
+from ..utils import initialize_module_params
+
+
+class DensePoseChartPredictor(nn.Module):
+    """
+    Predictor (last layers of a DensePose model) that takes DensePose head outputs as an input
+    and produces 4 tensors which represent DensePose results for predefined body parts
+    (patches / charts):
+     - coarse segmentation [N, K, H, W]
+     - fine segmentation [N, C, H, W]
+     - U coordinates [N, C, H, W]
+     - V coordinates [N, C, H, W]
+    where
+     - N is the number of instances
+     - K is the number of coarse segmentation channels (
+         2 = foreground / background,
+         15 = one of 14 body parts / background)
+     - C is the number of fine segmentation channels (
+         24 fine body parts / background)
+     - H and W are height and width of predictions
+    """
+
+    def __init__(self, cfg: CfgNode, input_channels: int):
+        """
+        Initialize predictor using configuration options
+
+        Args:
+            cfg (CfgNode): configuration options
+            input_channels (int): input tensor size along the channel dimension
+        """
+        super().__init__()
+        dim_in = input_channels
+        n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
+        dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
+        kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
+        self.ann_index_lowres = ConvTranspose2d(
+            dim_in, n_segm_chan, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+        )
+        self.index_uv_lowres = ConvTranspose2d(
+            dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+        )
+        self.u_lowres = ConvTranspose2d(
+            dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+        )
+        self.v_lowres = ConvTranspose2d(
+            dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+        )
+        self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE
+        initialize_module_params(self)
+
+    def interp2d(self, tensor_nchw: torch.Tensor):
+        """
+        Bilinear interpolation method to be used for upscaling
+
+        Args:
+            tensor_nchw (tensor): tensor of shape (N, C, H, W)
+        Return:
+            tensor of shape (N, C, Hout, Wout), where Hout and Wout are computed
+                by applying the scale factor to H and W
+        """
+        return interpolate(
+            tensor_nchw, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
+        )
+
+    def forward(self, head_outputs: torch.Tensor):
+        """
+        Perform forward step on DensePose head outputs
+
+        Args:
+            head_outputs (tensor): DensePose head outputs, tensor of shape [N, D, H, W]
+        Return:
+           - a tuple of 4 tensors containing DensePose predictions for charts:
+               * coarse segmentation estimate, a tensor of shape [N, K, Hout, Wout]
+               * fine segmentation estimate, a tensor of shape [N, C, Hout, Wout]
+               * U coordinates, a tensor of shape [N, C, Hout, Wout]
+               * V coordinates, a tensor of shape [N, C, Hout, Wout]
+           - a tuple of 4 tensors containing DensePose predictions for charts at reduced resolution:
+               * coarse segmentation estimate, a tensor of shape [N, K, Hout / 2, Wout / 2]
+               * fine segmentation estimate, a tensor of shape [N, C, Hout / 2, Wout / 2]
+               * U coordinates, a tensor of shape [N, C, Hout / 2, Wout / 2]
+               * V coordinates, a tensor of shape [N, C, Hout / 2, Wout / 2]
+        """
+        coarse_segm_lowres = self.ann_index_lowres(head_outputs)
+        fine_segm_lowres = self.index_uv_lowres(head_outputs)
+        u_lowres = self.u_lowres(head_outputs)
+        v_lowres = self.v_lowres(head_outputs)
+
+        coarse_segm = self.interp2d(coarse_segm_lowres)
+        fine_segm = self.interp2d(fine_segm_lowres)
+        u = self.interp2d(u_lowres)
+        v = self.interp2d(v_lowres)
+        siuv = (coarse_segm, fine_segm, u, v)
+        siuv_lowres = (coarse_segm_lowres, fine_segm_lowres, u_lowres, v_lowres)
+        return siuv, siuv_lowres
--- a/projects/DensePose/densepose/modeling/predictors/chart_confidence.py
+++ b/projects/DensePose/densepose/modeling/predictors/chart_confidence.py
@ -0,0 +1,176 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import torch
+from torch.nn import functional as F
+
+from detectron2.config import CfgNode
+from detectron2.layers import ConvTranspose2d
+
+from ..confidence import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
+from ..utils import initialize_module_params
+
+
+class DensePoseChartConfidencePredictorMixin:
+    """
+    Predictor contains the last layers of a DensePose model that take DensePose head
+    outputs as an input and produce model outputs. Confidence predictor mixin is used
+    to generate confidences for segmentation and UV tensors estimated by some
+    base predictor. Several assumptions need to hold for the base predictor:
+    1) the `forward` method must return SIUV tuple as the first result (
+        S = coarse segmentation, I = fine segmentation, U and V are intrinsic
+        chart coordinates)
+    2) `interp2d` method must be defined to perform bilinear interpolation;
+        the same method is typically used for SIUV and confidences
+    Confidence predictor mixin provides confidence estimates, as described in:
+        N. Neverova et al., Correlated Uncertainty for Learning Dense Correspondences
+            from Noisy Labels, NeurIPS 2019
+        A. Sanakoyeu et al., Transferring Dense Pose to Proximal Animal Classes, CVPR 2020
+    """
+
+    def __init__(self, cfg: CfgNode, input_channels: int):
+        """
+        Initialize confidence predictor using configuration options.
+
+        Args:
+            cfg (CfgNode): configuration options
+            input_channels (int): number of input channels
+        """
+        # we rely on base predictor to call nn.Module.__init__
+        super().__init__(cfg, input_channels)
+        self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
+        self._initialize_confidence_estimation_layers(cfg, input_channels)
+        initialize_module_params(self)
+
+    def _initialize_confidence_estimation_layers(self, cfg: CfgNode, dim_in: int):
+        """
+        Initialize confidence estimation layers based on configuration options
+
+        Args:
+            cfg (CfgNode): configuration options
+            dim_in (int): number of input channels
+        """
+        dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
+        kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
+        if self.confidence_model_cfg.uv_confidence.enabled:
+            if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
+                self.sigma_2_lowres = ConvTranspose2d(
+                    dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+                )
+            elif (
+                self.confidence_model_cfg.uv_confidence.type
+                == DensePoseUVConfidenceType.INDEP_ANISO
+            ):
+                self.sigma_2_lowres = ConvTranspose2d(
+                    dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+                )
+                self.kappa_u_lowres = ConvTranspose2d(
+                    dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+                )
+                self.kappa_v_lowres = ConvTranspose2d(
+                    dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+                )
+            else:
+                raise ValueError(
+                    f"Unknown confidence model type: "
+                    f"{self.confidence_model_cfg.confidence_model_type}"
+                )
+        if self.confidence_model_cfg.segm_confidence.enabled:
+            self.fine_segm_confidence_lowres = ConvTranspose2d(
+                dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+            )
+            self.coarse_segm_confidence_lowres = ConvTranspose2d(
+                dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+            )
+
+    def forward(self, head_outputs: torch.Tensor):
+        """
+        Perform forward operation on head outputs used as inputs for the predictor.
+        Calls forward method from the base predictor and uses its outputs to compute
+        confidences.
+
+        Args:
+            head_outputs (Tensor): head outputs used as predictor inputs
+        Return:
+            A tuple containing the following entries:
+            - SIUV tuple with possibly modified segmentation tensors
+            - various other outputs from the base predictor
+            - 6 tensors with estimated confidence model parameters at full resolution
+            (sigma_1, sigma_2, kappa_u, kappa_v, fine_segm_confidence, coarse_segm_confidence)
+            - 6 tensors with estimated confidence model parameters at half resolution
+            (sigma_1, sigma_2, kappa_u, kappa_v, fine_segm_confidence, coarse_segm_confidence)
+        """
+        # assuming base class returns SIUV estimates in its first result
+        base_predictor_outputs = super().forward(head_outputs)
+        siuv = (
+            base_predictor_outputs[0]
+            if isinstance(base_predictor_outputs, tuple)
+            else base_predictor_outputs
+        )
+        coarse_segm, fine_segm, u, v = siuv
+
+        sigma_1, sigma_2, kappa_u, kappa_v = None, None, None, None
+        sigma_1_lowres, sigma_2_lowres, kappa_u_lowres, kappa_v_lowres = None, None, None, None
+        fine_segm_confidence_lowres, fine_segm_confidence = None, None
+        coarse_segm_confidence_lowres, coarse_segm_confidence = None, None
+        if self.confidence_model_cfg.uv_confidence.enabled:
+            if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
+                sigma_2_lowres = self.sigma_2_lowres(head_outputs)
+                # assuming base class defines interp2d method for bilinear interpolation
+                sigma_2 = self.interp2d(sigma_2_lowres)
+            elif (
+                self.confidence_model_cfg.uv_confidence.type
+                == DensePoseUVConfidenceType.INDEP_ANISO
+            ):
+                sigma_2_lowres = self.sigma_2_lowres(head_outputs)
+                kappa_u_lowres = self.kappa_u_lowres(head_outputs)
+                kappa_v_lowres = self.kappa_v_lowres(head_outputs)
+                # assuming base class defines interp2d method for bilinear interpolation
+                sigma_2 = self.interp2d(sigma_2_lowres)
+                kappa_u = self.interp2d(kappa_u_lowres)
+                kappa_v = self.interp2d(kappa_v_lowres)
+            else:
+                raise ValueError(
+                    f"Unknown confidence model type: "
+                    f"{self.confidence_model_cfg.confidence_model_type}"
+                )
+        if self.confidence_model_cfg.segm_confidence.enabled:
+            fine_segm_confidence_lowres = self.fine_segm_confidence_lowres(head_outputs)
+            # assuming base class defines interp2d method for bilinear interpolation
+            fine_segm_confidence = self.interp2d(fine_segm_confidence_lowres)
+            fine_segm_confidence = (
+                F.softplus(fine_segm_confidence) + self.confidence_model_cfg.segm_confidence.epsilon
+            )
+            fine_segm = fine_segm * torch.repeat_interleave(
+                fine_segm_confidence, fine_segm.shape[1], dim=1
+            )
+            coarse_segm_confidence_lowres = self.coarse_segm_confidence_lowres(head_outputs)
+            # assuming base class defines interp2d method for bilinear interpolation
+            coarse_segm_confidence = self.interp2d(coarse_segm_confidence_lowres)
+            coarse_segm_confidence = (
+                F.softplus(coarse_segm_confidence)
+                + self.confidence_model_cfg.segm_confidence.epsilon
+            )
+            coarse_segm = coarse_segm * torch.repeat_interleave(
+                coarse_segm_confidence, coarse_segm.shape[1], dim=1
+            )
+        results = []
+        # append SIUV with possibly modified segmentation tensors
+        results.append((coarse_segm, fine_segm, u, v))
+        # append the rest of base predictor outputs
+        if isinstance(base_predictor_outputs, tuple):
+            results.extend(base_predictor_outputs[1:])
+        # append hi-res confidence estimates
+        results.append(
+            (sigma_1, sigma_2, kappa_u, kappa_v, fine_segm_confidence, coarse_segm_confidence)
+        )
+        # append lo-res confidence estimates
+        results.append(
+            (
+                sigma_1_lowres,
+                sigma_2_lowres,
+                kappa_u_lowres,
+                kappa_v_lowres,
+                fine_segm_confidence_lowres,
+                coarse_segm_confidence_lowres,
+            )
+        )
+        return tuple(results)
--- a/projects/DensePose/densepose/modeling/predictors/chart_with_confidence.py
+++ b/projects/DensePose/densepose/modeling/predictors/chart_with_confidence.py
@ -0,0 +1,13 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from . import DensePoseChartConfidencePredictorMixin, DensePoseChartPredictor
+
+
+class DensePoseChartWithConfidencePredictor(
+    DensePoseChartConfidencePredictorMixin, DensePoseChartPredictor
+):
+    """
+    Predictor that combines chart and chart confidence estimation
+    """
+
+    pass
--- a/projects/DensePose/densepose/modeling/roi_heads/deeplab.py
+++ b/projects/DensePose/densepose/modeling/roi_heads/deeplab.py
@ -0,0 +1,263 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import fvcore.nn.weight_init as weight_init
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.config import CfgNode
+from detectron2.layers import Conv2d
+
+from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
+
+
+@ROI_DENSEPOSE_HEAD_REGISTRY.register()
+class DensePoseDeepLabHead(nn.Module):
+    """
+    DensePose head using DeepLabV3 model from
+    "Rethinking Atrous Convolution for Semantic Image Segmentation"
+    <https://arxiv.org/abs/1706.05587>.
+    """
+
+    def __init__(self, cfg: CfgNode, input_channels: int):
+        super(DensePoseDeepLabHead, self).__init__()
+        # fmt: off
+        hidden_dim           = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM
+        kernel_size          = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL
+        norm                 = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM
+        self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS
+        self.use_nonlocal    = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON
+        # fmt: on
+        pad_size = kernel_size // 2
+        n_channels = input_channels
+
+        self.ASPP = ASPP(input_channels, [6, 12, 56], n_channels)  # 6, 12, 56
+        self.add_module("ASPP", self.ASPP)
+
+        if self.use_nonlocal:
+            self.NLBlock = NONLocalBlock2D(input_channels, bn_layer=True)
+            self.add_module("NLBlock", self.NLBlock)
+        # weight_init.c2_msra_fill(self.ASPP)
+
+        for i in range(self.n_stacked_convs):
+            norm_module = nn.GroupNorm(32, hidden_dim) if norm == "GN" else None
+            layer = Conv2d(
+                n_channels,
+                hidden_dim,
+                kernel_size,
+                stride=1,
+                padding=pad_size,
+                bias=not norm,
+                norm=norm_module,
+            )
+            weight_init.c2_msra_fill(layer)
+            n_channels = hidden_dim
+            layer_name = self._get_layer_name(i)
+            self.add_module(layer_name, layer)
+        self.n_out_channels = hidden_dim
+        # initialize_module_params(self)
+
+    def forward(self, features):
+        x0 = features
+        x = self.ASPP(x0)
+        if self.use_nonlocal:
+            x = self.NLBlock(x)
+        output = x
+        for i in range(self.n_stacked_convs):
+            layer_name = self._get_layer_name(i)
+            x = getattr(self, layer_name)(x)
+            x = F.relu(x)
+            output = x
+        return output
+
+    def _get_layer_name(self, i: int):
+        layer_name = "body_conv_fcn{}".format(i + 1)
+        return layer_name
+
+
+# Copied from
+# https://github.com/pytorch/vision/blob/master/torchvision/models/segmentation/deeplabv3.py
+# See https://arxiv.org/pdf/1706.05587.pdf for details
+class ASPPConv(nn.Sequential):
+    def __init__(self, in_channels, out_channels, dilation):
+        modules = [
+            nn.Conv2d(
+                in_channels, out_channels, 3, padding=dilation, dilation=dilation, bias=False
+            ),
+            nn.GroupNorm(32, out_channels),
+            nn.ReLU(),
+        ]
+        super(ASPPConv, self).__init__(*modules)
+
+
+class ASPPPooling(nn.Sequential):
+    def __init__(self, in_channels, out_channels):
+        super(ASPPPooling, self).__init__(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(in_channels, out_channels, 1, bias=False),
+            nn.GroupNorm(32, out_channels),
+            nn.ReLU(),
+        )
+
+    def forward(self, x):
+        size = x.shape[-2:]
+        x = super(ASPPPooling, self).forward(x)
+        return F.interpolate(x, size=size, mode="bilinear", align_corners=False)
+
+
+class ASPP(nn.Module):
+    def __init__(self, in_channels, atrous_rates, out_channels):
+        super(ASPP, self).__init__()
+        modules = []
+        modules.append(
+            nn.Sequential(
+                nn.Conv2d(in_channels, out_channels, 1, bias=False),
+                nn.GroupNorm(32, out_channels),
+                nn.ReLU(),
+            )
+        )
+
+        rate1, rate2, rate3 = tuple(atrous_rates)
+        modules.append(ASPPConv(in_channels, out_channels, rate1))
+        modules.append(ASPPConv(in_channels, out_channels, rate2))
+        modules.append(ASPPConv(in_channels, out_channels, rate3))
+        modules.append(ASPPPooling(in_channels, out_channels))
+
+        self.convs = nn.ModuleList(modules)
+
+        self.project = nn.Sequential(
+            nn.Conv2d(5 * out_channels, out_channels, 1, bias=False),
+            # nn.BatchNorm2d(out_channels),
+            nn.ReLU()
+            # nn.Dropout(0.5)
+        )
+
+    def forward(self, x):
+        res = []
+        for conv in self.convs:
+            res.append(conv(x))
+        res = torch.cat(res, dim=1)
+        return self.project(res)
+
+
+# copied from
+# https://github.com/AlexHex7/Non-local_pytorch/blob/master/lib/non_local_embedded_gaussian.py
+# See https://arxiv.org/abs/1711.07971 for details
+class _NonLocalBlockND(nn.Module):
+    def __init__(
+        self, in_channels, inter_channels=None, dimension=3, sub_sample=True, bn_layer=True
+    ):
+        super(_NonLocalBlockND, self).__init__()
+
+        assert dimension in [1, 2, 3]
+
+        self.dimension = dimension
+        self.sub_sample = sub_sample
+
+        self.in_channels = in_channels
+        self.inter_channels = inter_channels
+
+        if self.inter_channels is None:
+            self.inter_channels = in_channels // 2
+            if self.inter_channels == 0:
+                self.inter_channels = 1
+
+        if dimension == 3:
+            conv_nd = nn.Conv3d
+            max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2))
+            bn = nn.GroupNorm  # (32, hidden_dim) #nn.BatchNorm3d
+        elif dimension == 2:
+            conv_nd = nn.Conv2d
+            max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2))
+            bn = nn.GroupNorm  # (32, hidden_dim)nn.BatchNorm2d
+        else:
+            conv_nd = nn.Conv1d
+            max_pool_layer = nn.MaxPool1d(kernel_size=2)
+            bn = nn.GroupNorm  # (32, hidden_dim)nn.BatchNorm1d
+
+        self.g = conv_nd(
+            in_channels=self.in_channels,
+            out_channels=self.inter_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+        )
+
+        if bn_layer:
+            self.W = nn.Sequential(
+                conv_nd(
+                    in_channels=self.inter_channels,
+                    out_channels=self.in_channels,
+                    kernel_size=1,
+                    stride=1,
+                    padding=0,
+                ),
+                bn(32, self.in_channels),
+            )
+            nn.init.constant_(self.W[1].weight, 0)
+            nn.init.constant_(self.W[1].bias, 0)
+        else:
+            self.W = conv_nd(
+                in_channels=self.inter_channels,
+                out_channels=self.in_channels,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+            )
+            nn.init.constant_(self.W.weight, 0)
+            nn.init.constant_(self.W.bias, 0)
+
+        self.theta = conv_nd(
+            in_channels=self.in_channels,
+            out_channels=self.inter_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+        )
+        self.phi = conv_nd(
+            in_channels=self.in_channels,
+            out_channels=self.inter_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+        )
+
+        if sub_sample:
+            self.g = nn.Sequential(self.g, max_pool_layer)
+            self.phi = nn.Sequential(self.phi, max_pool_layer)
+
+    def forward(self, x):
+        """
+        :param x: (b, c, t, h, w)
+        :return:
+        """
+
+        batch_size = x.size(0)
+
+        g_x = self.g(x).view(batch_size, self.inter_channels, -1)
+        g_x = g_x.permute(0, 2, 1)
+
+        theta_x = self.theta(x).view(batch_size, self.inter_channels, -1)
+        theta_x = theta_x.permute(0, 2, 1)
+        phi_x = self.phi(x).view(batch_size, self.inter_channels, -1)
+        f = torch.matmul(theta_x, phi_x)
+        f_div_C = F.softmax(f, dim=-1)
+
+        y = torch.matmul(f_div_C, g_x)
+        y = y.permute(0, 2, 1).contiguous()
+        y = y.view(batch_size, self.inter_channels, *x.size()[2:])
+        W_y = self.W(y)
+        z = W_y + x
+
+        return z
+
+
+class NONLocalBlock2D(_NonLocalBlockND):
+    def __init__(self, in_channels, inter_channels=None, sub_sample=True, bn_layer=True):
+        super(NONLocalBlock2D, self).__init__(
+            in_channels,
+            inter_channels=inter_channels,
+            dimension=2,
+            sub_sample=sub_sample,
+            bn_layer=bn_layer,
+        )
--- a/projects/DensePose/densepose/modeling/roi_heads/registry.py
+++ b/projects/DensePose/densepose/modeling/roi_heads/registry.py
@ -0,0 +1,5 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+from detectron2.utils.registry import Registry
+
+ROI_DENSEPOSE_HEAD_REGISTRY = Registry("ROI_DENSEPOSE_HEAD")
--- a/projects/DensePose/densepose/modeling/roi_heads/roi_head.py
+++ b/projects/DensePose/densepose/modeling/roi_heads/roi_head.py
@ -0,0 +1,224 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import numpy as np
+from typing import Dict, List, Optional
+import fvcore.nn.weight_init as weight_init
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+
+from detectron2.layers import Conv2d, ShapeSpec, get_norm
+from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads
+from detectron2.modeling.poolers import ROIPooler
+from detectron2.modeling.roi_heads import select_foreground_proposals
+from detectron2.structures import ImageList, Instances
+
+from .. import (
+    build_densepose_data_filter,
+    build_densepose_head,
+    build_densepose_losses,
+    build_densepose_predictor,
+    densepose_inference,
+)
+
+
+class Decoder(nn.Module):
+    """
+    A semantic segmentation head described in detail in the Panoptic Feature Pyramid Networks paper
+    (https://arxiv.org/abs/1901.02446). It takes FPN features as input and merges information from
+    all levels of the FPN into single output.
+    """
+
+    def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], in_features):
+        super(Decoder, self).__init__()
+
+        # fmt: off
+        self.in_features      = in_features
+        feature_strides       = {k: v.stride for k, v in input_shape.items()}
+        feature_channels      = {k: v.channels for k, v in input_shape.items()}
+        num_classes           = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES
+        conv_dims             = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS
+        self.common_stride    = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE
+        norm                  = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM
+        # fmt: on
+
+        self.scale_heads = []
+        for in_feature in self.in_features:
+            head_ops = []
+            head_length = max(
+                1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride))
+            )
+            for k in range(head_length):
+                conv = Conv2d(
+                    feature_channels[in_feature] if k == 0 else conv_dims,
+                    conv_dims,
+                    kernel_size=3,
+                    stride=1,
+                    padding=1,
+                    bias=not norm,
+                    norm=get_norm(norm, conv_dims),
+                    activation=F.relu,
+                )
+                weight_init.c2_msra_fill(conv)
+                head_ops.append(conv)
+                if feature_strides[in_feature] != self.common_stride:
+                    head_ops.append(
+                        nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
+                    )
+            self.scale_heads.append(nn.Sequential(*head_ops))
+            self.add_module(in_feature, self.scale_heads[-1])
+        self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
+        weight_init.c2_msra_fill(self.predictor)
+
+    def forward(self, features: List[torch.Tensor]):
+        for i, _ in enumerate(self.in_features):
+            if i == 0:
+                x = self.scale_heads[i](features[i])
+            else:
+                x = x + self.scale_heads[i](features[i])
+        x = self.predictor(x)
+        return x
+
+
+@ROI_HEADS_REGISTRY.register()
+class DensePoseROIHeads(StandardROIHeads):
+    """
+    A Standard ROIHeads which contains an addition of DensePose head.
+    """
+
+    def __init__(self, cfg, input_shape):
+        super().__init__(cfg, input_shape)
+        self._init_densepose_head(cfg, input_shape)
+
+    def _init_densepose_head(self, cfg, input_shape):
+        # fmt: off
+        self.densepose_on          = cfg.MODEL.DENSEPOSE_ON
+        if not self.densepose_on:
+            return
+        self.densepose_data_filter = build_densepose_data_filter(cfg)
+        dp_pooler_resolution       = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION
+        dp_pooler_sampling_ratio   = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO
+        dp_pooler_type             = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE
+        self.use_decoder           = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON
+        # fmt: on
+        if self.use_decoder:
+            dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride,)
+        else:
+            dp_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features)
+        in_channels = [input_shape[f].channels for f in self.in_features][0]
+
+        if self.use_decoder:
+            self.decoder = Decoder(cfg, input_shape, self.in_features)
+
+        self.densepose_pooler = ROIPooler(
+            output_size=dp_pooler_resolution,
+            scales=dp_pooler_scales,
+            sampling_ratio=dp_pooler_sampling_ratio,
+            pooler_type=dp_pooler_type,
+        )
+        self.densepose_head = build_densepose_head(cfg, in_channels)
+        self.densepose_predictor = build_densepose_predictor(
+            cfg, self.densepose_head.n_out_channels
+        )
+        self.densepose_losses = build_densepose_losses(cfg)
+
+    def _forward_densepose(self, features: Dict[str, torch.Tensor], instances: List[Instances]):
+        """
+        Forward logic of the densepose prediction branch.
+
+        Args:
+            features (dict[str, Tensor]): input data as a mapping from feature
+                map name to tensor. Axis 0 represents the number of images `N` in
+                the input data; axes 1-3 are channels, height, and width, which may
+                vary between feature maps (e.g., if a feature pyramid is used).
+            instances (list[Instances]): length `N` list of `Instances`. The i-th
+                `Instances` contains instances for the i-th input image,
+                In training, they can be the proposals.
+                In inference, they can be the predicted boxes.
+
+        Returns:
+            In training, a dict of losses.
+            In inference, update `instances` with new fields "densepose" and return it.
+        """
+        if not self.densepose_on:
+            return {} if self.training else instances
+
+        features = [features[f] for f in self.in_features]
+        if self.training:
+            proposals, _ = select_foreground_proposals(instances, self.num_classes)
+            features, proposals = self.densepose_data_filter(features, proposals)
+            if len(proposals) > 0:
+                proposal_boxes = [x.proposal_boxes for x in proposals]
+
+                if self.use_decoder:
+                    features = [self.decoder(features)]
+
+                features_dp = self.densepose_pooler(features, proposal_boxes)
+                densepose_head_outputs = self.densepose_head(features_dp)
+                densepose_outputs, _, confidences, _ = self.densepose_predictor(
+                    densepose_head_outputs
+                )
+                densepose_loss_dict = self.densepose_losses(
+                    proposals, densepose_outputs, confidences
+                )
+                return densepose_loss_dict
+        else:
+            pred_boxes = [x.pred_boxes for x in instances]
+
+            if self.use_decoder:
+                features = [self.decoder(features)]
+
+            features_dp = self.densepose_pooler(features, pred_boxes)
+            if len(features_dp) > 0:
+                densepose_head_outputs = self.densepose_head(features_dp)
+                densepose_outputs, _, confidences, _ = self.densepose_predictor(
+                    densepose_head_outputs
+                )
+            else:
+                # If no detection occurred instances
+                # set densepose_outputs to empty tensors
+                empty_tensor = torch.zeros(size=(0, 0, 0, 0), device=features_dp.device)
+                densepose_outputs = tuple([empty_tensor] * 4)
+                confidences = tuple([empty_tensor] * 6)
+
+            densepose_inference(densepose_outputs, confidences, instances)
+            return instances
+
+    def forward(
+        self,
+        images: ImageList,
+        features: Dict[str, torch.Tensor],
+        proposals: List[Instances],
+        targets: Optional[List[Instances]] = None,
+    ):
+        instances, losses = super().forward(images, features, proposals, targets)
+        del targets, images
+
+        if self.training:
+            losses.update(self._forward_densepose(features, instances))
+        return instances, losses
+
+    def forward_with_given_boxes(
+        self, features: Dict[str, torch.Tensor], instances: List[Instances]
+    ):
+        """
+        Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
+
+        This is useful for downstream tasks where a box is known, but need to obtain
+        other attributes (outputs of other heads).
+        Test-time augmentation also uses this.
+
+        Args:
+            features: same as in `forward()`
+            instances (list[Instances]): instances to predict other outputs. Expect the keys
+                "pred_boxes" and "pred_classes" to exist.
+
+        Returns:
+            instances (list[Instances]):
+                the same `Instances` objects, with extra
+                fields such as `pred_masks` or `pred_keypoints`.
+        """
+
+        instances = super().forward_with_given_boxes(features, instances)
+        instances = self._forward_densepose(features, instances)
+        return instances
--- a/projects/DensePose/densepose/modeling/roi_heads/v1convx.py
+++ b/projects/DensePose/densepose/modeling/roi_heads/v1convx.py
@ -0,0 +1,64 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.config import CfgNode
+from detectron2.layers import Conv2d
+
+from ..utils import initialize_module_params
+from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
+
+
+@ROI_DENSEPOSE_HEAD_REGISTRY.register()
+class DensePoseV1ConvXHead(nn.Module):
+    """
+    Fully convolutional DensePose head.
+    """
+
+    def __init__(self, cfg: CfgNode, input_channels: int):
+        """
+        Initialize DensePose fully convolutional head
+
+        Args:
+            cfg (CfgNode): configuration options
+            input_channels (int): number of input channels
+        """
+        super(DensePoseV1ConvXHead, self).__init__()
+        # fmt: off
+        hidden_dim           = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM
+        kernel_size          = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL
+        self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS
+        # fmt: on
+        pad_size = kernel_size // 2
+        n_channels = input_channels
+        for i in range(self.n_stacked_convs):
+            layer = Conv2d(n_channels, hidden_dim, kernel_size, stride=1, padding=pad_size)
+            layer_name = self._get_layer_name(i)
+            self.add_module(layer_name, layer)
+            n_channels = hidden_dim
+        self.n_out_channels = n_channels
+        initialize_module_params(self)
+
+    def forward(self, features: torch.Tensor):
+        """
+        Apply DensePose fully convolutional head to the input features
+
+        Args:
+            features (tensor): input features
+        Result:
+            A tensor of DensePose head outputs
+        """
+        x = features
+        output = x
+        for i in range(self.n_stacked_convs):
+            layer_name = self._get_layer_name(i)
+            x = getattr(self, layer_name)(x)
+            x = F.relu(x)
+            output = x
+        return output
+
+    def _get_layer_name(self, i: int):
+        layer_name = "body_conv_fcn{}".format(i + 1)
+        return layer_name
--- a/projects/DensePose/query_db.py
+++ b/projects/DensePose/query_db.py
@ -0,0 +1,250 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import argparse
+import logging
+import os
+import sys
+from timeit import default_timer as timer
+from typing import Any, ClassVar, Dict, List
+import torch
+from fvcore.common.file_io import PathManager
+
+from detectron2.data.catalog import DatasetCatalog
+from detectron2.utils.logger import setup_logger
+
+from densepose.data.structures import DensePoseDataRelative
+from densepose.utils.dbhelper import EntrySelector
+from densepose.utils.logger import verbosity_to_level
+from densepose.vis.base import CompoundVisualizer
+from densepose.vis.bounding_box import BoundingBoxVisualizer
+from densepose.vis.densepose import (
+    DensePoseDataCoarseSegmentationVisualizer,
+    DensePoseDataPointsIVisualizer,
+    DensePoseDataPointsUVisualizer,
+    DensePoseDataPointsVisualizer,
+    DensePoseDataPointsVVisualizer,
+)
+
+DOC = """Query DB - a tool to print / visualize data from a database
+"""
+
+LOGGER_NAME = "query_db"
+
+logger = logging.getLogger(LOGGER_NAME)
+
+_ACTION_REGISTRY: Dict[str, "Action"] = {}
+
+
+class Action(object):
+    @classmethod
+    def add_arguments(cls: type, parser: argparse.ArgumentParser):
+        parser.add_argument(
+            "-v",
+            "--verbosity",
+            action="count",
+            help="Verbose mode. Multiple -v options increase the verbosity.",
+        )
+
+
+def register_action(cls: type):
+    """
+    Decorator for action classes to automate action registration
+    """
+    global _ACTION_REGISTRY
+    _ACTION_REGISTRY[cls.COMMAND] = cls
+    return cls
+
+
+class EntrywiseAction(Action):
+    @classmethod
+    def add_arguments(cls: type, parser: argparse.ArgumentParser):
+        super(EntrywiseAction, cls).add_arguments(parser)
+        parser.add_argument(
+            "dataset", metavar="<dataset>", help="Dataset name (e.g. densepose_coco_2014_train)"
+        )
+        parser.add_argument(
+            "selector",
+            metavar="<selector>",
+            help="Dataset entry selector in the form field1[:type]=value1[,"
+            "field2[:type]=value_min-value_max...] which selects all "
+            "entries from the dataset that satisfy the constraints",
+        )
+        parser.add_argument(
+            "--max-entries", metavar="N", help="Maximum number of entries to process", type=int
+        )
+
+    @classmethod
+    def execute(cls: type, args: argparse.Namespace):
+        dataset = setup_dataset(args.dataset)
+        entry_selector = EntrySelector.from_string(args.selector)
+        context = cls.create_context(args)
+        if args.max_entries is not None:
+            for _, entry in zip(range(args.max_entries), dataset):
+                if entry_selector(entry):
+                    cls.execute_on_entry(entry, context)
+        else:
+            for entry in dataset:
+                if entry_selector(entry):
+                    cls.execute_on_entry(entry, context)
+
+    @classmethod
+    def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]:
+        context = {}
+        return context
+
+
+@register_action
+class PrintAction(EntrywiseAction):
+    """
+    Print action that outputs selected entries to stdout
+    """
+
+    COMMAND: ClassVar[str] = "print"
+
+    @classmethod
+    def add_parser(cls: type, subparsers: argparse._SubParsersAction):
+        parser = subparsers.add_parser(cls.COMMAND, help="Output selected entries to stdout. ")
+        cls.add_arguments(parser)
+        parser.set_defaults(func=cls.execute)
+
+    @classmethod
+    def add_arguments(cls: type, parser: argparse.ArgumentParser):
+        super(PrintAction, cls).add_arguments(parser)
+
+    @classmethod
+    def execute_on_entry(cls: type, entry: Dict[str, Any], context: Dict[str, Any]):
+        import pprint
+
+        printer = pprint.PrettyPrinter(indent=2, width=200, compact=True)
+        printer.pprint(entry)
+
+
+@register_action
+class ShowAction(EntrywiseAction):
+    """
+    Show action that visualizes selected entries on an image
+    """
+
+    COMMAND: ClassVar[str] = "show"
+    VISUALIZERS: ClassVar[Dict[str, object]] = {
+        "dp_segm": DensePoseDataCoarseSegmentationVisualizer(),
+        "dp_i": DensePoseDataPointsIVisualizer(),
+        "dp_u": DensePoseDataPointsUVisualizer(),
+        "dp_v": DensePoseDataPointsVVisualizer(),
+        "dp_pts": DensePoseDataPointsVisualizer(),
+        "bbox": BoundingBoxVisualizer(),
+    }
+
+    @classmethod
+    def add_parser(cls: type, subparsers: argparse._SubParsersAction):
+        parser = subparsers.add_parser(cls.COMMAND, help="Visualize selected entries")
+        cls.add_arguments(parser)
+        parser.set_defaults(func=cls.execute)
+
+    @classmethod
+    def add_arguments(cls: type, parser: argparse.ArgumentParser):
+        super(ShowAction, cls).add_arguments(parser)
+        parser.add_argument(
+            "visualizations",
+            metavar="<visualizations>",
+            help="Comma separated list of visualizations, possible values: "
+            "[{}]".format(",".join(sorted(cls.VISUALIZERS.keys()))),
+        )
+        parser.add_argument(
+            "--output",
+            metavar="<image_file>",
+            default="output.png",
+            help="File name to save output to",
+        )
+
+    @classmethod
+    def execute_on_entry(cls: type, entry: Dict[str, Any], context: Dict[str, Any]):
+        import cv2
+        import numpy as np
+
+        image_fpath = PathManager.get_local_path(entry["file_name"])
+        image = cv2.imread(image_fpath, cv2.IMREAD_GRAYSCALE)
+        image = np.tile(image[:, :, np.newaxis], [1, 1, 3])
+        datas = cls._extract_data_for_visualizers_from_entry(context["vis_specs"], entry)
+        visualizer = context["visualizer"]
+        image_vis = visualizer.visualize(image, datas)
+        entry_idx = context["entry_idx"] + 1
+        out_fname = cls._get_out_fname(entry_idx, context["out_fname"])
+        cv2.imwrite(out_fname, image_vis)
+        logger.info(f"Output saved to {out_fname}")
+        context["entry_idx"] += 1
+
+    @classmethod
+    def _get_out_fname(cls: type, entry_idx: int, fname_base: str):
+        base, ext = os.path.splitext(fname_base)
+        return base + ".{0:04d}".format(entry_idx) + ext
+
+    @classmethod
+    def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]:
+        vis_specs = args.visualizations.split(",")
+        visualizers = []
+        for vis_spec in vis_specs:
+            vis = cls.VISUALIZERS[vis_spec]
+            visualizers.append(vis)
+        context = {
+            "vis_specs": vis_specs,
+            "visualizer": CompoundVisualizer(visualizers),
+            "out_fname": args.output,
+            "entry_idx": 0,
+        }
+        return context
+
+    @classmethod
+    def _extract_data_for_visualizers_from_entry(
+        cls: type, vis_specs: List[str], entry: Dict[str, Any]
+    ):
+        dp_list = []
+        bbox_list = []
+        for annotation in entry["annotations"]:
+            is_valid, _ = DensePoseDataRelative.validate_annotation(annotation)
+            if not is_valid:
+                continue
+            bbox = torch.as_tensor(annotation["bbox"])
+            bbox_list.append(bbox)
+            dp_data = DensePoseDataRelative(annotation)
+            dp_list.append(dp_data)
+        datas = []
+        for vis_spec in vis_specs:
+            datas.append(bbox_list if "bbox" == vis_spec else (bbox_list, dp_list))
+        return datas
+
+
+def setup_dataset(dataset_name):
+    logger.info("Loading dataset {}".format(dataset_name))
+    start = timer()
+    dataset = DatasetCatalog.get(dataset_name)
+    stop = timer()
+    logger.info("Loaded dataset {} in {:.3f}s".format(dataset_name, stop - start))
+    return dataset
+
+
+def create_argument_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description=DOC,
+        formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=120),
+    )
+    parser.set_defaults(func=lambda _: parser.print_help(sys.stdout))
+    subparsers = parser.add_subparsers(title="Actions")
+    for _, action in _ACTION_REGISTRY.items():
+        action.add_parser(subparsers)
+    return parser
+
+
+def main():
+    parser = create_argument_parser()
+    args = parser.parse_args()
+    verbosity = args.verbosity if hasattr(args, "verbosity") else None
+    global logger
+    logger = setup_logger(name=LOGGER_NAME)
+    logger.setLevel(verbosity_to_level(verbosity))
+    args.func(args)
+
+
+if __name__ == "__main__":
+    main()
--- a/projects/DensePose/train_net.py
+++ b/projects/DensePose/train_net.py
@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+"""
+DensePose Training Script.
+
+This script is similar to the training script in detectron2/tools.
+
+It is an example of how a user might use detectron2 for a new project.
+"""
+
+from fvcore.common.file_io import PathManager
+
+import detectron2.utils.comm as comm
+from detectron2.config import get_cfg
+from detectron2.engine import default_argument_parser, default_setup, hooks, launch
+from detectron2.evaluation import verify_results
+from detectron2.utils.logger import setup_logger
+
+from densepose import add_densepose_config
+from densepose.engine import Trainer
+from densepose.modeling.densepose_checkpoint import DensePoseCheckpointer
+
+
+def setup(args):
+    cfg = get_cfg()
+    add_densepose_config(cfg)
+    cfg.merge_from_file(args.config_file)
+    cfg.merge_from_list(args.opts)
+    cfg.freeze()
+    default_setup(cfg, args)
+    # Setup logger for "densepose" module
+    setup_logger(output=cfg.OUTPUT_DIR, distributed_rank=comm.get_rank(), name="densepose")
+    return cfg
+
+
+def main(args):
+    cfg = setup(args)
+    # disable strict kwargs checking: allow one to specify path handle
+    # hints through kwargs, like timeout in DP evaluation
+    PathManager.set_strict_kwargs_checking(False)
+
+    if args.eval_only:
+        model = Trainer.build_model(cfg)
+        DensePoseCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
+            cfg.MODEL.WEIGHTS, resume=args.resume
+        )
+        res = Trainer.test(cfg, model)
+        if cfg.TEST.AUG.ENABLED:
+            res.update(Trainer.test_with_TTA(cfg, model))
+        if comm.is_main_process():
+            verify_results(cfg, res)
+        return res
+
+    trainer = Trainer(cfg)
+    trainer.resume_or_load(resume=args.resume)
+    if cfg.TEST.AUG.ENABLED:
+        trainer.register_hooks(
+            [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))]
+        )
+    return trainer.train()
+
+
+if __name__ == "__main__":
+    args = default_argument_parser().parse_args()
+    print("Command Line Args:", args)
+    launch(
+        main,
+        args.num_gpus,
+        num_machines=args.num_machines,
+        machine_rank=args.machine_rank,
+        dist_url=args.dist_url,
+        args=(args,),
+    )