diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 0000000..f560384
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,9 @@
+## Unit Tests
+
+To run the unittests, do:
+```
+cd detectron2
+python -m unittest discover -v -s ./tests
+```
+
+There are also end-to-end inference & training tests, in [dev/run_*_tests.sh](../dev).
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..168f997
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
diff --git a/tests/layers/test_mask_ops.py b/tests/layers/test_mask_ops.py
new file mode 100644
index 0000000..d180627
--- /dev/null
+++ b/tests/layers/test_mask_ops.py
@@ -0,0 +1,190 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import contextlib
+import io
+import numpy as np
+import unittest
+from collections import defaultdict
+import torch
+import tqdm
+from fvcore.common.benchmark import benchmark
+from fvcore.common.file_io import PathManager
+from pycocotools.coco import COCO
+from tabulate import tabulate
+from torch.nn import functional as F
+
+from detectron2.data import MetadataCatalog
+from detectron2.layers.mask_ops import (
+    pad_masks,
+    paste_mask_in_image_old,
+    paste_masks_in_image,
+    scale_boxes,
+)
+from detectron2.structures import BitMasks, Boxes, BoxMode, PolygonMasks
+from detectron2.structures.masks import polygons_to_bitmask
+
+
+def iou_between_full_image_bit_masks(a, b):
+    intersect = (a & b).sum()
+    union = (a | b).sum()
+    return intersect / union
+
+
+def rasterize_polygons_with_grid_sample(full_image_bit_mask, box, mask_size, threshold=0.5):
+    x0, y0, x1, y1 = box[0], box[1], box[2], box[3]
+
+    img_h, img_w = full_image_bit_mask.shape
+
+    mask_y = np.arange(0.0, mask_size) + 0.5  # mask y sample coords in [0.5, mask_size - 0.5]
+    mask_x = np.arange(0.0, mask_size) + 0.5  # mask x sample coords in [0.5, mask_size - 0.5]
+    mask_y = mask_y / mask_size * (y1 - y0) + y0
+    mask_x = mask_x / mask_size * (x1 - x0) + x0
+
+    mask_x = (mask_x - 0.5) / (img_w - 1) * 2 + -1
+    mask_y = (mask_y - 0.5) / (img_h - 1) * 2 + -1
+    gy, gx = torch.meshgrid(torch.from_numpy(mask_y), torch.from_numpy(mask_x))
+    ind = torch.stack([gx, gy], dim=-1).to(dtype=torch.float32)
+
+    full_image_bit_mask = torch.from_numpy(full_image_bit_mask)
+    mask = F.grid_sample(
+        full_image_bit_mask[None, None, :, :].to(dtype=torch.float32),
+        ind[None, :, :, :],
+        align_corners=True,
+    )
+
+    return mask[0, 0] >= threshold
+
+
+class TestMaskCropPaste(unittest.TestCase):
+    def setUp(self):
+        json_file = MetadataCatalog.get("coco_2017_val_100").json_file
+        if not PathManager.isfile(json_file):
+            raise unittest.SkipTest("{} not found".format(json_file))
+        with contextlib.redirect_stdout(io.StringIO()):
+            json_file = PathManager.get_local_path(json_file)
+            self.coco = COCO(json_file)
+
+    def test_crop_paste_consistency(self):
+        """
+        rasterize_polygons_within_box (used in training)
+        and
+        paste_masks_in_image (used in inference)
+        should be inverse operations to each other.
+
+        This function runs several implementation of the above two operations and prints
+        the reconstruction error.
+        """
+
+        anns = self.coco.loadAnns(self.coco.getAnnIds(iscrowd=False))  # avoid crowd annotations
+
+        selected_anns = anns[:100]
+
+        ious = []
+        for ann in tqdm.tqdm(selected_anns):
+            results = self.process_annotation(ann)
+            ious.append([k[2] for k in results])
+
+        ious = np.array(ious)
+        mean_ious = ious.mean(axis=0)
+        table = []
+        res_dic = defaultdict(dict)
+        for row, iou in zip(results, mean_ious):
+            table.append((row[0], row[1], iou))
+            res_dic[row[0]][row[1]] = iou
+        print(tabulate(table, headers=["rasterize", "paste", "iou"], tablefmt="simple"))
+        # assert that the reconstruction is good:
+        self.assertTrue(res_dic["polygon"]["aligned"] > 0.94)
+        self.assertTrue(res_dic["roialign"]["aligned"] > 0.95)
+
+    def process_annotation(self, ann, mask_side_len=28):
+        # Parse annotation data
+        img_info = self.coco.loadImgs(ids=[ann["image_id"]])[0]
+        height, width = img_info["height"], img_info["width"]
+        gt_polygons = [np.array(p, dtype=np.float64) for p in ann["segmentation"]]
+        gt_bbox = BoxMode.convert(ann["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
+        gt_bit_mask = polygons_to_bitmask(gt_polygons, height, width)
+
+        # Run rasterize ..
+        torch_gt_bbox = torch.tensor(gt_bbox).to(dtype=torch.float32).reshape(-1, 4)
+        box_bitmasks = {
+            "polygon": PolygonMasks([gt_polygons]).crop_and_resize(torch_gt_bbox, mask_side_len)[0],
+            "gridsample": rasterize_polygons_with_grid_sample(gt_bit_mask, gt_bbox, mask_side_len),
+            "roialign": BitMasks(torch.from_numpy(gt_bit_mask[None, :, :])).crop_and_resize(
+                torch_gt_bbox, mask_side_len
+            )[0],
+        }
+
+        # Run paste ..
+        results = defaultdict(dict)
+        for k, box_bitmask in box_bitmasks.items():
+            padded_bitmask, scale = pad_masks(box_bitmask[None, :, :], 1)
+            scaled_boxes = scale_boxes(torch_gt_bbox, scale)
+
+            r = results[k]
+            r["old"] = paste_mask_in_image_old(
+                padded_bitmask[0], scaled_boxes[0], height, width, threshold=0.5
+            )
+            r["aligned"] = paste_masks_in_image(
+                box_bitmask[None, :, :], Boxes(torch_gt_bbox), (height, width)
+            )[0]
+
+        table = []
+        for rasterize_method, r in results.items():
+            for paste_method, mask in r.items():
+                mask = np.asarray(mask)
+                iou = iou_between_full_image_bit_masks(gt_bit_mask.astype("uint8"), mask)
+                table.append((rasterize_method, paste_method, iou))
+        return table
+
+    def test_polygon_area(self):
+        # Draw polygon boxes
+        for d in [5.0, 10.0, 1000.0]:
+            polygon = PolygonMasks([[[0, 0, 0, d, d, d, d, 0]]])
+            area = polygon.area()[0]
+            target = d ** 2
+            self.assertEqual(area, target)
+
+        # Draw polygon triangles
+        for d in [5.0, 10.0, 1000.0]:
+            polygon = PolygonMasks([[[0, 0, 0, d, d, d]]])
+            area = polygon.area()[0]
+            target = d ** 2 / 2
+            self.assertEqual(area, target)
+
+
+def benchmark_paste():
+    S = 800
+    H, W = image_shape = (S, S)
+    N = 64
+    torch.manual_seed(42)
+    masks = torch.rand(N, 28, 28)
+
+    center = torch.rand(N, 2) * 600 + 100
+    wh = torch.clamp(torch.randn(N, 2) * 40 + 200, min=50)
+    x0y0 = torch.clamp(center - wh * 0.5, min=0.0)
+    x1y1 = torch.clamp(center + wh * 0.5, max=S)
+    boxes = Boxes(torch.cat([x0y0, x1y1], axis=1))
+
+    def func(device, n=3):
+        m = masks.to(device=device)
+        b = boxes.to(device=device)
+
+        def bench():
+            for _ in range(n):
+                paste_masks_in_image(m, b, image_shape)
+            if device.type == "cuda":
+                torch.cuda.synchronize()
+
+        return bench
+
+    specs = [{"device": torch.device("cpu"), "n": 3}]
+    if torch.cuda.is_available():
+        specs.append({"device": torch.device("cuda"), "n": 3})
+
+    benchmark(func, "paste_masks", specs, num_iters=10, warmup_iters=2)
+
+
+if __name__ == "__main__":
+    benchmark_paste()
+    unittest.main()
diff --git a/tests/layers/test_nms.py b/tests/layers/test_nms.py
new file mode 100644
index 0000000..051c0c0
--- /dev/null
+++ b/tests/layers/test_nms.py
@@ -0,0 +1,39 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from __future__ import absolute_import, division, print_function, unicode_literals
+import unittest
+import torch
+
+from detectron2.layers import batched_nms
+from detectron2.utils.env import TORCH_VERSION
+
+
+class TestNMS(unittest.TestCase):
+    def _create_tensors(self, N):
+        boxes = torch.rand(N, 4) * 100
+        # Note: the implementation of this function in torchvision is:
+        # boxes[:, 2:] += torch.rand(N, 2) * 100
+        # but it does not guarantee non-negative widths/heights constraints:
+        # boxes[:, 2] >= boxes[:, 0] and boxes[:, 3] >= boxes[:, 1]:
+        boxes[:, 2:] += boxes[:, :2]
+        scores = torch.rand(N)
+        return boxes, scores
+
+    @unittest.skipIf(TORCH_VERSION < (1, 6), "Insufficient pytorch version")
+    def test_nms_scriptability(self):
+        N = 2000
+        num_classes = 50
+        boxes, scores = self._create_tensors(N)
+        idxs = torch.randint(0, num_classes, (N,))
+        scripted_batched_nms = torch.jit.script(batched_nms)
+        err_msg = "NMS is incompatible with jit-scripted NMS for IoU={}"
+
+        for iou in [0.2, 0.5, 0.8]:
+            keep_ref = batched_nms(boxes, scores, idxs, iou)
+            backup = boxes.clone()
+            scripted_keep = scripted_batched_nms(boxes, scores, idxs, iou)
+            assert torch.allclose(boxes, backup), "boxes modified by jit-scripted batched_nms"
+            self.assertTrue(torch.equal(keep_ref, scripted_keep), err_msg.format(iou))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/layers/test_nms_rotated.py b/tests/layers/test_nms_rotated.py
new file mode 100644
index 0000000..b8c08aa
--- /dev/null
+++ b/tests/layers/test_nms_rotated.py
@@ -0,0 +1,187 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from __future__ import absolute_import, division, print_function, unicode_literals
+import numpy as np
+import unittest
+import torch
+from torchvision import ops
+
+from detectron2.layers import batched_nms, batched_nms_rotated, nms_rotated
+
+
+def nms_edit_distance(keep1, keep2):
+    """
+    Compare the "keep" result of two nms call.
+    They are allowed to be different in terms of edit distance
+    due to floating point precision issues, e.g.,
+    if a box happen to have an IoU of 0.5 with another box,
+    one implentation may choose to keep it while another may discard it.
+    """
+    if torch.equal(keep1, keep2):
+        # they should be equal most of the time
+        return 0
+    keep1, keep2 = tuple(keep1.cpu()), tuple(keep2.cpu())
+    m, n = len(keep1), len(keep2)
+
+    # edit distance with DP
+    f = [np.arange(n + 1), np.arange(n + 1)]
+    for i in range(m):
+        cur_row = i % 2
+        other_row = (i + 1) % 2
+        f[other_row][0] = i + 1
+        for j in range(n):
+            f[other_row][j + 1] = (
+                f[cur_row][j]
+                if keep1[i] == keep2[j]
+                else min(min(f[cur_row][j], f[cur_row][j + 1]), f[other_row][j]) + 1
+            )
+    return f[m % 2][n]
+
+
+class TestNMSRotated(unittest.TestCase):
+    def reference_horizontal_nms(self, boxes, scores, iou_threshold):
+        """
+        Args:
+            box_scores (N, 5): boxes in corner-form and probabilities.
+                (Note here 5 == 4 + 1, i.e., 4-dim horizontal box + 1-dim prob)
+            iou_threshold: intersection over union threshold.
+        Returns:
+             picked: a list of indexes of the kept boxes
+        """
+        picked = []
+        _, indexes = scores.sort(descending=True)
+        while len(indexes) > 0:
+            current = indexes[0]
+            picked.append(current.item())
+            if len(indexes) == 1:
+                break
+            current_box = boxes[current, :]
+            indexes = indexes[1:]
+            rest_boxes = boxes[indexes, :]
+            iou = ops.box_iou(rest_boxes, current_box.unsqueeze(0)).squeeze(1)
+            indexes = indexes[iou <= iou_threshold]
+
+        return torch.as_tensor(picked)
+
+    def _create_tensors(self, N):
+        boxes = torch.rand(N, 4) * 100
+        # Note: the implementation of this function in torchvision is:
+        # boxes[:, 2:] += torch.rand(N, 2) * 100
+        # but it does not guarantee non-negative widths/heights constraints:
+        # boxes[:, 2] >= boxes[:, 0] and boxes[:, 3] >= boxes[:, 1]:
+        boxes[:, 2:] += boxes[:, :2]
+        scores = torch.rand(N)
+        return boxes, scores
+
+    def test_batched_nms_rotated_0_degree_cpu(self):
+        N = 2000
+        num_classes = 50
+        boxes, scores = self._create_tensors(N)
+        idxs = torch.randint(0, num_classes, (N,))
+        rotated_boxes = torch.zeros(N, 5)
+        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
+        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
+        rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
+        rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
+        err_msg = "Rotated NMS with 0 degree is incompatible with horizontal NMS for IoU={}"
+        for iou in [0.2, 0.5, 0.8]:
+            backup = boxes.clone()
+            keep_ref = batched_nms(boxes, scores, idxs, iou)
+            assert torch.allclose(boxes, backup), "boxes modified by batched_nms"
+            backup = rotated_boxes.clone()
+            keep = batched_nms_rotated(rotated_boxes, scores, idxs, iou)
+            assert torch.allclose(
+                rotated_boxes, backup
+            ), "rotated_boxes modified by batched_nms_rotated"
+            self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou))
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_batched_nms_rotated_0_degree_cuda(self):
+        N = 2000
+        num_classes = 50
+        boxes, scores = self._create_tensors(N)
+        idxs = torch.randint(0, num_classes, (N,))
+        rotated_boxes = torch.zeros(N, 5)
+        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
+        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
+        rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
+        rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
+        err_msg = "Rotated NMS with 0 degree is incompatible with horizontal NMS for IoU={}"
+        for iou in [0.2, 0.5, 0.8]:
+            backup = boxes.clone()
+            keep_ref = batched_nms(boxes.cuda(), scores.cuda(), idxs, iou)
+            self.assertTrue(torch.allclose(boxes, backup), "boxes modified by batched_nms")
+            backup = rotated_boxes.clone()
+            keep = batched_nms_rotated(rotated_boxes.cuda(), scores.cuda(), idxs, iou)
+            self.assertTrue(
+                torch.allclose(rotated_boxes, backup),
+                "rotated_boxes modified by batched_nms_rotated",
+            )
+            self.assertLessEqual(nms_edit_distance(keep, keep_ref), 2, err_msg.format(iou))
+
+    def test_nms_rotated_0_degree_cpu(self):
+        N = 1000
+        boxes, scores = self._create_tensors(N)
+        rotated_boxes = torch.zeros(N, 5)
+        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
+        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
+        rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
+        rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
+        err_msg = "Rotated NMS incompatible between CPU and reference implementation for IoU={}"
+        for iou in [0.5]:
+            keep_ref = self.reference_horizontal_nms(boxes, scores, iou)
+            keep = nms_rotated(rotated_boxes, scores, iou)
+            self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou))
+
+    def test_nms_rotated_90_degrees_cpu(self):
+        N = 1000
+        boxes, scores = self._create_tensors(N)
+        rotated_boxes = torch.zeros(N, 5)
+        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
+        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
+        # Note for rotated_boxes[:, 2] and rotated_boxes[:, 3]:
+        # widths and heights are intentionally swapped here for 90 degrees case
+        # so that the reference horizontal nms could be used
+        rotated_boxes[:, 2] = boxes[:, 3] - boxes[:, 1]
+        rotated_boxes[:, 3] = boxes[:, 2] - boxes[:, 0]
+
+        rotated_boxes[:, 4] = torch.ones(N) * 90
+        err_msg = "Rotated NMS incompatible between CPU and reference implementation for IoU={}"
+        for iou in [0.2, 0.5, 0.8]:
+            keep_ref = self.reference_horizontal_nms(boxes, scores, iou)
+            keep = nms_rotated(rotated_boxes, scores, iou)
+            self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou))
+
+    def test_nms_rotated_180_degrees_cpu(self):
+        N = 1000
+        boxes, scores = self._create_tensors(N)
+        rotated_boxes = torch.zeros(N, 5)
+        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
+        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
+        rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
+        rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
+        rotated_boxes[:, 4] = torch.ones(N) * 180
+        err_msg = "Rotated NMS incompatible between CPU and reference implementation for IoU={}"
+        for iou in [0.2, 0.5, 0.8]:
+            keep_ref = self.reference_horizontal_nms(boxes, scores, iou)
+            keep = nms_rotated(rotated_boxes, scores, iou)
+            self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou))
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_nms_rotated_0_degree_cuda(self):
+        N = 1000
+        boxes, scores = self._create_tensors(N)
+        rotated_boxes = torch.zeros(N, 5)
+        rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
+        rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
+        rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
+        rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
+        err_msg = "Rotated NMS incompatible between CPU and CUDA for IoU={}"
+
+        for iou in [0.2, 0.5, 0.8]:
+            r_cpu = nms_rotated(rotated_boxes, scores, iou)
+            r_cuda = nms_rotated(rotated_boxes.cuda(), scores.cuda(), iou)
+            self.assertLessEqual(nms_edit_distance(r_cpu, r_cuda.cpu()), 1, err_msg.format(iou))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/layers/test_roi_align.py b/tests/layers/test_roi_align.py
new file mode 100644
index 0000000..633d7c2
--- /dev/null
+++ b/tests/layers/test_roi_align.py
@@ -0,0 +1,152 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import numpy as np
+import unittest
+import cv2
+import torch
+from fvcore.common.benchmark import benchmark
+
+from detectron2.layers.roi_align import ROIAlign
+
+
+class ROIAlignTest(unittest.TestCase):
+    def test_forward_output(self):
+        input = np.arange(25).reshape(5, 5).astype("float32")
+        """
+        0  1  2   3 4
+        5  6  7   8 9
+        10 11 12 13 14
+        15 16 17 18 19
+        20 21 22 23 24
+        """
+
+        output = self._simple_roialign(input, [1, 1, 3, 3], (4, 4), aligned=False)
+        output_correct = self._simple_roialign(input, [1, 1, 3, 3], (4, 4), aligned=True)
+
+        # without correction:
+        old_results = [
+            [7.5, 8, 8.5, 9],
+            [10, 10.5, 11, 11.5],
+            [12.5, 13, 13.5, 14],
+            [15, 15.5, 16, 16.5],
+        ]
+
+        # with 0.5 correction:
+        correct_results = [
+            [4.5, 5.0, 5.5, 6.0],
+            [7.0, 7.5, 8.0, 8.5],
+            [9.5, 10.0, 10.5, 11.0],
+            [12.0, 12.5, 13.0, 13.5],
+        ]
+        # This is an upsampled version of [[6, 7], [11, 12]]
+
+        self.assertTrue(np.allclose(output.flatten(), np.asarray(old_results).flatten()))
+        self.assertTrue(
+            np.allclose(output_correct.flatten(), np.asarray(correct_results).flatten())
+        )
+
+        # Also see similar issues in tensorflow at
+        # https://github.com/tensorflow/tensorflow/issues/26278
+
+    def test_resize(self):
+        H, W = 30, 30
+        input = np.random.rand(H, W).astype("float32") * 100
+        box = [10, 10, 20, 20]
+        output = self._simple_roialign(input, box, (5, 5), aligned=True)
+
+        input2x = cv2.resize(input, (W // 2, H // 2), interpolation=cv2.INTER_LINEAR)
+        box2x = [x / 2 for x in box]
+        output2x = self._simple_roialign(input2x, box2x, (5, 5), aligned=True)
+        diff = np.abs(output2x - output)
+        self.assertTrue(diff.max() < 1e-4)
+
+    def _simple_roialign(self, img, box, resolution, aligned=True):
+        """
+        RoiAlign with scale 1.0 and 0 sample ratio.
+        """
+        if isinstance(resolution, int):
+            resolution = (resolution, resolution)
+        op = ROIAlign(resolution, 1.0, 0, aligned=aligned)
+        input = torch.from_numpy(img[None, None, :, :].astype("float32"))
+
+        rois = [0] + list(box)
+        rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32"))
+        output = op.forward(input, rois)
+        if torch.cuda.is_available():
+            output_cuda = op.forward(input.cuda(), rois.cuda()).cpu()
+            self.assertTrue(torch.allclose(output, output_cuda))
+        return output[0, 0]
+
+    def _simple_roialign_with_grad(self, img, box, resolution, device):
+        if isinstance(resolution, int):
+            resolution = (resolution, resolution)
+
+        op = ROIAlign(resolution, 1.0, 0, aligned=True)
+        input = torch.from_numpy(img[None, None, :, :].astype("float32"))
+
+        rois = [0] + list(box)
+        rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32"))
+        input = input.to(device=device)
+        rois = rois.to(device=device)
+        input.requires_grad = True
+        output = op.forward(input, rois)
+        return input, output
+
+    def test_empty_box(self):
+        img = np.random.rand(5, 5)
+        box = [3, 4, 5, 4]
+        o = self._simple_roialign(img, box, 7)
+        self.assertTrue(o.shape == (7, 7))
+        self.assertTrue((o == 0).all())
+
+        for dev in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
+            input, output = self._simple_roialign_with_grad(img, box, 7, torch.device(dev))
+            output.sum().backward()
+            self.assertTrue(torch.allclose(input.grad, torch.zeros_like(input)))
+
+    def test_empty_batch(self):
+        input = torch.zeros(0, 3, 10, 10, dtype=torch.float32)
+        rois = torch.zeros(0, 5, dtype=torch.float32)
+        op = ROIAlign((7, 7), 1.0, 0, aligned=True)
+        output = op.forward(input, rois)
+        self.assertTrue(output.shape == (0, 3, 7, 7))
+
+
+def benchmark_roi_align():
+    from detectron2 import _C
+
+    def random_boxes(mean_box, stdev, N, maxsize):
+        ret = torch.rand(N, 4) * stdev + torch.tensor(mean_box, dtype=torch.float)
+        ret.clamp_(min=0, max=maxsize)
+        return ret
+
+    def func(N, C, H, W, nboxes_per_img):
+        input = torch.rand(N, C, H, W)
+        boxes = []
+        batch_idx = []
+        for k in range(N):
+            b = random_boxes([80, 80, 130, 130], 24, nboxes_per_img, H)
+            # try smaller boxes:
+            # b = random_boxes([100, 100, 110, 110], 4, nboxes_per_img, H)
+            boxes.append(b)
+            batch_idx.append(torch.zeros(nboxes_per_img, 1, dtype=torch.float32) + k)
+        boxes = torch.cat(boxes, axis=0)
+        batch_idx = torch.cat(batch_idx, axis=0)
+        boxes = torch.cat([batch_idx, boxes], axis=1)
+
+        input = input.cuda()
+        boxes = boxes.cuda()
+
+        def bench():
+            _C.roi_align_forward(input, boxes, 1.0, 7, 7, 0, True)
+            torch.cuda.synchronize()
+
+        return bench
+
+    args = [dict(N=2, C=512, H=256, W=256, nboxes_per_img=500)]
+    benchmark(func, "cuda_roialign", args, num_iters=20, warmup_iters=1)
+
+
+if __name__ == "__main__":
+    if torch.cuda.is_available():
+        benchmark_roi_align()
+    unittest.main()
diff --git a/tests/layers/test_roi_align_rotated.py b/tests/layers/test_roi_align_rotated.py
new file mode 100644
index 0000000..1915b59
--- /dev/null
+++ b/tests/layers/test_roi_align_rotated.py
@@ -0,0 +1,176 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+import unittest
+import cv2
+import torch
+from torch.autograd import Variable, gradcheck
+
+from detectron2.layers.roi_align import ROIAlign
+from detectron2.layers.roi_align_rotated import ROIAlignRotated
+
+logger = logging.getLogger(__name__)
+
+
+class ROIAlignRotatedTest(unittest.TestCase):
+    def _box_to_rotated_box(self, box, angle):
+        return [
+            (box[0] + box[2]) / 2.0,
+            (box[1] + box[3]) / 2.0,
+            box[2] - box[0],
+            box[3] - box[1],
+            angle,
+        ]
+
+    def _rot90(self, img, num):
+        num = num % 4  # note: -1 % 4 == 3
+        for _ in range(num):
+            img = img.transpose(0, 1).flip(0)
+        return img
+
+    def test_forward_output_0_90_180_270(self):
+        for i in range(4):
+            # i = 0, 1, 2, 3 corresponding to 0, 90, 180, 270 degrees
+            img = torch.arange(25, dtype=torch.float32).reshape(5, 5)
+            """
+            0  1  2   3 4
+            5  6  7   8 9
+            10 11 12 13 14
+            15 16 17 18 19
+            20 21 22 23 24
+            """
+            box = [1, 1, 3, 3]
+            rotated_box = self._box_to_rotated_box(box=box, angle=90 * i)
+
+            result = self._simple_roi_align_rotated(img=img, box=rotated_box, resolution=(4, 4))
+
+            # Here's an explanation for 0 degree case:
+            # point 0 in the original input lies at [0.5, 0.5]
+            # (the center of bin [0, 1] x [0, 1])
+            # point 1 in the original input lies at [1.5, 0.5], etc.
+            # since the resolution is (4, 4) that divides [1, 3] x [1, 3]
+            # into 4 x 4 equal bins,
+            # the top-left bin is [1, 1.5] x [1, 1.5], and its center
+            # (1.25, 1.25) lies at the 3/4 position
+            # between point 0 and point 1, point 5 and point 6,
+            # point 0 and point 5, point 1 and point 6, so it can be calculated as
+            # 0.25*(0*0.25+1*0.75)+(5*0.25+6*0.75)*0.75 = 4.5
+            result_expected = torch.tensor(
+                [
+                    [4.5, 5.0, 5.5, 6.0],
+                    [7.0, 7.5, 8.0, 8.5],
+                    [9.5, 10.0, 10.5, 11.0],
+                    [12.0, 12.5, 13.0, 13.5],
+                ]
+            )
+            # This is also an upsampled version of [[6, 7], [11, 12]]
+
+            # When the box is rotated by 90 degrees CCW,
+            # the result would be rotated by 90 degrees CW, thus it's -i here
+            result_expected = self._rot90(result_expected, -i)
+
+            assert torch.allclose(result, result_expected)
+
+    def test_resize(self):
+        H, W = 30, 30
+        input = torch.rand(H, W) * 100
+        box = [10, 10, 20, 20]
+        rotated_box = self._box_to_rotated_box(box, angle=0)
+        output = self._simple_roi_align_rotated(img=input, box=rotated_box, resolution=(5, 5))
+
+        input2x = cv2.resize(input.numpy(), (W // 2, H // 2), interpolation=cv2.INTER_LINEAR)
+        input2x = torch.from_numpy(input2x)
+        box2x = [x / 2 for x in box]
+        rotated_box2x = self._box_to_rotated_box(box2x, angle=0)
+        output2x = self._simple_roi_align_rotated(img=input2x, box=rotated_box2x, resolution=(5, 5))
+        assert torch.allclose(output2x, output)
+
+    def _simple_roi_align_rotated(self, img, box, resolution):
+        """
+        RoiAlignRotated with scale 1.0 and 0 sample ratio.
+        """
+        op = ROIAlignRotated(output_size=resolution, spatial_scale=1.0, sampling_ratio=0)
+        input = img[None, None, :, :]
+
+        rois = [0] + list(box)
+        rois = torch.tensor(rois, dtype=torch.float32)[None, :]
+        result_cpu = op.forward(input, rois)
+        if torch.cuda.is_available():
+            result_cuda = op.forward(input.cuda(), rois.cuda())
+            assert torch.allclose(result_cpu, result_cuda.cpu())
+        return result_cpu[0, 0]
+
+    def test_empty_box(self):
+        img = torch.rand(5, 5)
+        out = self._simple_roi_align_rotated(img, [2, 3, 0, 0, 0], (7, 7))
+        self.assertTrue((out == 0).all())
+
+    def test_roi_align_rotated_gradcheck_cpu(self):
+        dtype = torch.float64
+        device = torch.device("cpu")
+        roi_align_rotated_op = ROIAlignRotated(
+            output_size=(5, 5), spatial_scale=0.5, sampling_ratio=1
+        ).to(dtype=dtype, device=device)
+        x = torch.rand(1, 1, 10, 10, dtype=dtype, device=device, requires_grad=True)
+        # roi format is (batch index, x_center, y_center, width, height, angle)
+        rois = torch.tensor(
+            [[0, 4.5, 4.5, 9, 9, 0], [0, 2, 7, 4, 4, 0], [0, 7, 7, 4, 4, 0]],
+            dtype=dtype,
+            device=device,
+        )
+
+        def func(input):
+            return roi_align_rotated_op(input, rois)
+
+        assert gradcheck(func, (x,)), "gradcheck failed for RoIAlignRotated CPU"
+        assert gradcheck(func, (x.transpose(2, 3),)), "gradcheck failed for RoIAlignRotated CPU"
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_roi_align_rotated_gradient_cuda(self):
+        """
+        Compute gradients for ROIAlignRotated with multiple bounding boxes on the GPU,
+        and compare the result with ROIAlign
+        """
+        # torch.manual_seed(123)
+        dtype = torch.float64
+        device = torch.device("cuda")
+        pool_h, pool_w = (5, 5)
+
+        roi_align = ROIAlign(output_size=(pool_h, pool_w), spatial_scale=1, sampling_ratio=2).to(
+            device=device
+        )
+
+        roi_align_rotated = ROIAlignRotated(
+            output_size=(pool_h, pool_w), spatial_scale=1, sampling_ratio=2
+        ).to(device=device)
+
+        x = torch.rand(1, 1, 10, 10, dtype=dtype, device=device, requires_grad=True)
+        # x_rotated = x.clone() won't work (will lead to grad_fun=CloneBackward)!
+        x_rotated = Variable(x.data.clone(), requires_grad=True)
+
+        # roi_rotated format is (batch index, x_center, y_center, width, height, angle)
+        rois_rotated = torch.tensor(
+            [[0, 4.5, 4.5, 9, 9, 0], [0, 2, 7, 4, 4, 0], [0, 7, 7, 4, 4, 0]],
+            dtype=dtype,
+            device=device,
+        )
+
+        y_rotated = roi_align_rotated(x_rotated, rois_rotated)
+        s_rotated = y_rotated.sum()
+        s_rotated.backward()
+
+        # roi format is (batch index, x1, y1, x2, y2)
+        rois = torch.tensor(
+            [[0, 0, 0, 9, 9], [0, 0, 5, 4, 9], [0, 5, 5, 9, 9]], dtype=dtype, device=device
+        )
+
+        y = roi_align(x, rois)
+        s = y.sum()
+        s.backward()
+
+        assert torch.allclose(
+            x.grad, x_rotated.grad
+        ), "gradients for ROIAlign and ROIAlignRotated mismatch on CUDA"
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/modeling/test_anchor_generator.py b/tests/modeling/test_anchor_generator.py
new file mode 100644
index 0000000..c0d783b
--- /dev/null
+++ b/tests/modeling/test_anchor_generator.py
@@ -0,0 +1,122 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+import unittest
+import torch
+
+from detectron2.config import get_cfg
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.anchor_generator import DefaultAnchorGenerator, RotatedAnchorGenerator
+from detectron2.utils.env import TORCH_VERSION
+
+logger = logging.getLogger(__name__)
+
+
+class TestAnchorGenerator(unittest.TestCase):
+    def test_default_anchor_generator(self):
+        cfg = get_cfg()
+        cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]]
+        cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1, 4]]
+
+        anchor_generator = DefaultAnchorGenerator(cfg, [ShapeSpec(stride=4)])
+
+        # only the last two dimensions of features matter here
+        num_images = 2
+        features = {"stage3": torch.rand(num_images, 96, 1, 2)}
+        anchors = anchor_generator([features["stage3"]])
+        expected_anchor_tensor = torch.tensor(
+            [
+                [-32.0, -8.0, 32.0, 8.0],
+                [-16.0, -16.0, 16.0, 16.0],
+                [-8.0, -32.0, 8.0, 32.0],
+                [-64.0, -16.0, 64.0, 16.0],
+                [-32.0, -32.0, 32.0, 32.0],
+                [-16.0, -64.0, 16.0, 64.0],
+                [-28.0, -8.0, 36.0, 8.0],  # -28.0 == -32.0 + STRIDE (4)
+                [-12.0, -16.0, 20.0, 16.0],
+                [-4.0, -32.0, 12.0, 32.0],
+                [-60.0, -16.0, 68.0, 16.0],
+                [-28.0, -32.0, 36.0, 32.0],
+                [-12.0, -64.0, 20.0, 64.0],
+            ]
+        )
+
+        assert torch.allclose(anchors[0].tensor, expected_anchor_tensor)
+
+    def test_default_anchor_generator_centered(self):
+        # test explicit args
+        anchor_generator = DefaultAnchorGenerator(
+            sizes=[32, 64], aspect_ratios=[0.25, 1, 4], strides=[4]
+        )
+
+        # only the last two dimensions of features matter here
+        num_images = 2
+        features = {"stage3": torch.rand(num_images, 96, 1, 2)}
+        expected_anchor_tensor = torch.tensor(
+            [
+                [-30.0, -6.0, 34.0, 10.0],
+                [-14.0, -14.0, 18.0, 18.0],
+                [-6.0, -30.0, 10.0, 34.0],
+                [-62.0, -14.0, 66.0, 18.0],
+                [-30.0, -30.0, 34.0, 34.0],
+                [-14.0, -62.0, 18.0, 66.0],
+                [-26.0, -6.0, 38.0, 10.0],
+                [-10.0, -14.0, 22.0, 18.0],
+                [-2.0, -30.0, 14.0, 34.0],
+                [-58.0, -14.0, 70.0, 18.0],
+                [-26.0, -30.0, 38.0, 34.0],
+                [-10.0, -62.0, 22.0, 66.0],
+            ]
+        )
+
+        anchors = anchor_generator([features["stage3"]])
+        assert torch.allclose(anchors[0].tensor, expected_anchor_tensor)
+
+        if TORCH_VERSION >= (1, 6):
+            anchors = torch.jit.script(anchor_generator)([features["stage3"]])
+            assert torch.allclose(anchors[0].tensor, expected_anchor_tensor)
+
+    def test_rrpn_anchor_generator(self):
+        cfg = get_cfg()
+        cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]]
+        cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1, 4]]
+        cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [0, 45]  # test single list[float]
+        anchor_generator = RotatedAnchorGenerator(cfg, [ShapeSpec(stride=4)])
+
+        # only the last two dimensions of features matter here
+        num_images = 2
+        features = {"stage3": torch.rand(num_images, 96, 1, 2)}
+        anchors = anchor_generator([features["stage3"]])
+        expected_anchor_tensor = torch.tensor(
+            [
+                [0.0, 0.0, 64.0, 16.0, 0.0],
+                [0.0, 0.0, 64.0, 16.0, 45.0],
+                [0.0, 0.0, 32.0, 32.0, 0.0],
+                [0.0, 0.0, 32.0, 32.0, 45.0],
+                [0.0, 0.0, 16.0, 64.0, 0.0],
+                [0.0, 0.0, 16.0, 64.0, 45.0],
+                [0.0, 0.0, 128.0, 32.0, 0.0],
+                [0.0, 0.0, 128.0, 32.0, 45.0],
+                [0.0, 0.0, 64.0, 64.0, 0.0],
+                [0.0, 0.0, 64.0, 64.0, 45.0],
+                [0.0, 0.0, 32.0, 128.0, 0.0],
+                [0.0, 0.0, 32.0, 128.0, 45.0],
+                [4.0, 0.0, 64.0, 16.0, 0.0],  # 4.0 == 0.0 + STRIDE (4)
+                [4.0, 0.0, 64.0, 16.0, 45.0],
+                [4.0, 0.0, 32.0, 32.0, 0.0],
+                [4.0, 0.0, 32.0, 32.0, 45.0],
+                [4.0, 0.0, 16.0, 64.0, 0.0],
+                [4.0, 0.0, 16.0, 64.0, 45.0],
+                [4.0, 0.0, 128.0, 32.0, 0.0],
+                [4.0, 0.0, 128.0, 32.0, 45.0],
+                [4.0, 0.0, 64.0, 64.0, 0.0],
+                [4.0, 0.0, 64.0, 64.0, 45.0],
+                [4.0, 0.0, 32.0, 128.0, 0.0],
+                [4.0, 0.0, 32.0, 128.0, 45.0],
+            ]
+        )
+
+        assert torch.allclose(anchors[0].tensor, expected_anchor_tensor)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/modeling/test_box2box_transform.py b/tests/modeling/test_box2box_transform.py
new file mode 100644
index 0000000..9d124d7
--- /dev/null
+++ b/tests/modeling/test_box2box_transform.py
@@ -0,0 +1,64 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+import unittest
+import torch
+
+from detectron2.modeling.box_regression import Box2BoxTransform, Box2BoxTransformRotated
+
+logger = logging.getLogger(__name__)
+
+
+def random_boxes(mean_box, stdev, N):
+    return torch.rand(N, 4) * stdev + torch.tensor(mean_box, dtype=torch.float)
+
+
+class TestBox2BoxTransform(unittest.TestCase):
+    def test_reconstruction(self):
+        weights = (5, 5, 10, 10)
+        b2b_tfm = Box2BoxTransform(weights=weights)
+        src_boxes = random_boxes([10, 10, 20, 20], 1, 10)
+        dst_boxes = random_boxes([10, 10, 20, 20], 1, 10)
+
+        devices = [torch.device("cpu")]
+        if torch.cuda.is_available():
+            devices.append(torch.device("cuda"))
+        for device in devices:
+            src_boxes = src_boxes.to(device=device)
+            dst_boxes = dst_boxes.to(device=device)
+            deltas = b2b_tfm.get_deltas(src_boxes, dst_boxes)
+            dst_boxes_reconstructed = b2b_tfm.apply_deltas(deltas, src_boxes)
+            assert torch.allclose(dst_boxes, dst_boxes_reconstructed)
+
+
+def random_rotated_boxes(mean_box, std_length, std_angle, N):
+    return torch.cat(
+        [torch.rand(N, 4) * std_length, torch.rand(N, 1) * std_angle], dim=1
+    ) + torch.tensor(mean_box, dtype=torch.float)
+
+
+class TestBox2BoxTransformRotated(unittest.TestCase):
+    def test_reconstruction(self):
+        weights = (5, 5, 10, 10, 1)
+        b2b_transform = Box2BoxTransformRotated(weights=weights)
+        src_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10)
+        dst_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10)
+
+        devices = [torch.device("cpu")]
+        if torch.cuda.is_available():
+            devices.append(torch.device("cuda"))
+        for device in devices:
+            src_boxes = src_boxes.to(device=device)
+            dst_boxes = dst_boxes.to(device=device)
+            deltas = b2b_transform.get_deltas(src_boxes, dst_boxes)
+            dst_boxes_reconstructed = b2b_transform.apply_deltas(deltas, src_boxes)
+            assert torch.allclose(dst_boxes[:, :4], dst_boxes_reconstructed[:, :4], atol=1e-5)
+            # angle difference has to be normalized
+            assert torch.allclose(
+                (dst_boxes[:, 4] - dst_boxes_reconstructed[:, 4] + 180.0) % 360.0 - 180.0,
+                torch.zeros_like(dst_boxes[:, 4]),
+                atol=1e-4,
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/modeling/test_fast_rcnn.py b/tests/modeling/test_fast_rcnn.py
new file mode 100644
index 0000000..70b64d3
--- /dev/null
+++ b/tests/modeling/test_fast_rcnn.py
@@ -0,0 +1,106 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+import unittest
+import torch
+
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.box_regression import Box2BoxTransform, Box2BoxTransformRotated
+from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers
+from detectron2.modeling.roi_heads.rotated_fast_rcnn import RotatedFastRCNNOutputLayers
+from detectron2.structures import Boxes, Instances, RotatedBoxes
+from detectron2.utils.events import EventStorage
+
+logger = logging.getLogger(__name__)
+
+
+class FastRCNNTest(unittest.TestCase):
+    def test_fast_rcnn(self):
+        torch.manual_seed(132)
+
+        box_head_output_size = 8
+
+        box_predictor = FastRCNNOutputLayers(
+            ShapeSpec(channels=box_head_output_size),
+            box2box_transform=Box2BoxTransform(weights=(10, 10, 5, 5)),
+            num_classes=5,
+        )
+        feature_pooled = torch.rand(2, box_head_output_size)
+        predictions = box_predictor(feature_pooled)
+
+        proposal_boxes = torch.tensor([[0.8, 1.1, 3.2, 2.8], [2.3, 2.5, 7, 8]], dtype=torch.float32)
+        gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32)
+        proposal = Instances((10, 10))
+        proposal.proposal_boxes = Boxes(proposal_boxes)
+        proposal.gt_boxes = Boxes(gt_boxes)
+        proposal.gt_classes = torch.tensor([1, 2])
+
+        with EventStorage():  # capture events in a new storage to discard them
+            losses = box_predictor.losses(predictions, [proposal])
+
+        expected_losses = {
+            "loss_cls": torch.tensor(1.7951188087),
+            "loss_box_reg": torch.tensor(4.0357131958),
+        }
+        for name in expected_losses.keys():
+            assert torch.allclose(losses[name], expected_losses[name])
+
+    def test_fast_rcnn_empty_batch(self, device="cpu"):
+        box_predictor = FastRCNNOutputLayers(
+            ShapeSpec(channels=10),
+            box2box_transform=Box2BoxTransform(weights=(10, 10, 5, 5)),
+            num_classes=8,
+        ).to(device=device)
+
+        logits = torch.randn(0, 100, requires_grad=True, device=device)
+        deltas = torch.randn(0, 4, requires_grad=True, device=device)
+        losses = box_predictor.losses([logits, deltas], [])
+        for value in losses.values():
+            self.assertTrue(torch.allclose(value, torch.zeros_like(value)))
+        sum(losses.values()).backward()
+        self.assertTrue(logits.grad is not None)
+        self.assertTrue(deltas.grad is not None)
+
+        predictions, _ = box_predictor.inference([logits, deltas], [])
+        self.assertEqual(len(predictions), 0)
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_fast_rcnn_empty_batch_cuda(self):
+        self.test_fast_rcnn_empty_batch(device=torch.device("cuda"))
+
+    def test_fast_rcnn_rotated(self):
+        torch.manual_seed(132)
+        box_head_output_size = 8
+
+        box_predictor = RotatedFastRCNNOutputLayers(
+            ShapeSpec(channels=box_head_output_size),
+            box2box_transform=Box2BoxTransformRotated(weights=(10, 10, 5, 5, 1)),
+            num_classes=5,
+        )
+        feature_pooled = torch.rand(2, box_head_output_size)
+        predictions = box_predictor(feature_pooled)
+        proposal_boxes = torch.tensor(
+            [[2, 1.95, 2.4, 1.7, 0], [4.65, 5.25, 4.7, 5.5, 0]], dtype=torch.float32
+        )
+        gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32)
+        proposal = Instances((10, 10))
+        proposal.proposal_boxes = RotatedBoxes(proposal_boxes)
+        proposal.gt_boxes = RotatedBoxes(gt_boxes)
+        proposal.gt_classes = torch.tensor([1, 2])
+
+        with EventStorage():  # capture events in a new storage to discard them
+            losses = box_predictor.losses(predictions, [proposal])
+
+        # Note: the expected losses are slightly different even if
+        # the boxes are essentially the same as in the FastRCNNOutput test, because
+        # bbox_pred in FastRCNNOutputLayers have different Linear layers/initialization
+        # between the two cases.
+        expected_losses = {
+            "loss_cls": torch.tensor(1.7920907736),
+            "loss_box_reg": torch.tensor(4.0410838127),
+        }
+        for name in expected_losses.keys():
+            assert torch.allclose(losses[name], expected_losses[name])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/modeling/test_matcher.py b/tests/modeling/test_matcher.py
new file mode 100644
index 0000000..adef912
--- /dev/null
+++ b/tests/modeling/test_matcher.py
@@ -0,0 +1,45 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import unittest
+from typing import List
+import torch
+
+from detectron2.config import get_cfg
+from detectron2.modeling.matcher import Matcher
+from detectron2.utils.env import TORCH_VERSION
+
+
+class TestMatcher(unittest.TestCase):
+    # need https://github.com/pytorch/pytorch/pull/38378
+    @unittest.skipIf(TORCH_VERSION < (1, 6), "Insufficient pytorch version")
+    def test_scriptability(self):
+        cfg = get_cfg()
+        anchor_matcher = Matcher(
+            cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True
+        )
+        match_quality_matrix = torch.tensor(
+            [[0.15, 0.45, 0.2, 0.6], [0.3, 0.65, 0.05, 0.1], [0.05, 0.4, 0.25, 0.4]]
+        )
+        expected_matches = torch.tensor([1, 1, 2, 0])
+        expected_match_labels = torch.tensor([-1, 1, 0, 1], dtype=torch.int8)
+
+        matches, match_labels = anchor_matcher(match_quality_matrix)
+        self.assertTrue(torch.allclose(matches, expected_matches))
+        self.assertTrue(torch.allclose(match_labels, expected_match_labels))
+
+        # nonzero_tuple must be import explicitly to let jit know what it is.
+        # https://github.com/pytorch/pytorch/issues/38964
+        from detectron2.layers import nonzero_tuple  # noqa F401
+
+        def f(thresholds: List[float], labels: List[int]):
+            return Matcher(thresholds, labels, allow_low_quality_matches=True)
+
+        scripted_anchor_matcher = torch.jit.script(f)(
+            cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS
+        )
+        matches, match_labels = scripted_anchor_matcher(match_quality_matrix)
+        self.assertTrue(torch.allclose(matches, expected_matches))
+        self.assertTrue(torch.allclose(match_labels, expected_match_labels))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/modeling/test_model_e2e.py b/tests/modeling/test_model_e2e.py
new file mode 100644
index 0000000..8041fe7
--- /dev/null
+++ b/tests/modeling/test_model_e2e.py
@@ -0,0 +1,157 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+
+
+import numpy as np
+import unittest
+import torch
+
+import detectron2.model_zoo as model_zoo
+from detectron2.config import get_cfg
+from detectron2.modeling import build_model
+from detectron2.structures import BitMasks, Boxes, ImageList, Instances
+from detectron2.utils.events import EventStorage
+
+
+def get_model_zoo(config_path):
+    """
+    Like model_zoo.get, but do not load any weights (even pretrained)
+    """
+    cfg_file = model_zoo.get_config_file(config_path)
+    cfg = get_cfg()
+    cfg.merge_from_file(cfg_file)
+    if not torch.cuda.is_available():
+        cfg.MODEL.DEVICE = "cpu"
+    return build_model(cfg)
+
+
+def create_model_input(img, inst=None):
+    if inst is not None:
+        return {"image": img, "instances": inst}
+    else:
+        return {"image": img}
+
+
+def get_empty_instance(h, w):
+    inst = Instances((h, w))
+    inst.gt_boxes = Boxes(torch.rand(0, 4))
+    inst.gt_classes = torch.tensor([]).to(dtype=torch.int64)
+    inst.gt_masks = BitMasks(torch.rand(0, h, w))
+    return inst
+
+
+def get_regular_bitmask_instances(h, w):
+    inst = Instances((h, w))
+    inst.gt_boxes = Boxes(torch.rand(3, 4))
+    inst.gt_boxes.tensor[:, 2:] += inst.gt_boxes.tensor[:, :2]
+    inst.gt_classes = torch.tensor([3, 4, 5]).to(dtype=torch.int64)
+    inst.gt_masks = BitMasks((torch.rand(3, h, w) > 0.5))
+    return inst
+
+
+class ModelE2ETest:
+    def setUp(self):
+        torch.manual_seed(43)
+        self.model = get_model_zoo(self.CONFIG_PATH)
+
+    def _test_eval(self, input_sizes):
+        inputs = [create_model_input(torch.rand(3, s[0], s[1])) for s in input_sizes]
+        self.model.eval()
+        self.model(inputs)
+
+    def _test_train(self, input_sizes, instances):
+        assert len(input_sizes) == len(instances)
+        inputs = [
+            create_model_input(torch.rand(3, s[0], s[1]), inst)
+            for s, inst in zip(input_sizes, instances)
+        ]
+        self.model.train()
+        with EventStorage():
+            losses = self.model(inputs)
+            sum(losses.values()).backward()
+            del losses
+
+    def _inf_tensor(self, *shape):
+        return 1.0 / torch.zeros(*shape, device=self.model.device)
+
+    def _nan_tensor(self, *shape):
+        return torch.zeros(*shape, device=self.model.device).fill_(float("nan"))
+
+    def test_empty_data(self):
+        instances = [get_empty_instance(200, 250), get_empty_instance(200, 249)]
+        self._test_eval([(200, 250), (200, 249)])
+        self._test_train([(200, 250), (200, 249)], instances)
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
+    def test_eval_tocpu(self):
+        model = get_model_zoo(self.CONFIG_PATH).cpu()
+        model.eval()
+        input_sizes = [(200, 250), (200, 249)]
+        inputs = [create_model_input(torch.rand(3, s[0], s[1])) for s in input_sizes]
+        model(inputs)
+
+
+class MaskRCNNE2ETest(ModelE2ETest, unittest.TestCase):
+    CONFIG_PATH = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml"
+
+    def test_half_empty_data(self):
+        instances = [get_empty_instance(200, 250), get_regular_bitmask_instances(200, 249)]
+        self._test_train([(200, 250), (200, 249)], instances)
+
+    # This test is flaky because in some environment the output features are zero due to relu
+    # def test_rpn_inf_nan_data(self):
+    #     self.model.eval()
+    #     for tensor in [self._inf_tensor, self._nan_tensor]:
+    #         images = ImageList(tensor(1, 3, 512, 512), [(510, 510)])
+    #         features = {
+    #             "p2": tensor(1, 256, 256, 256),
+    #             "p3": tensor(1, 256, 128, 128),
+    #             "p4": tensor(1, 256, 64, 64),
+    #             "p5": tensor(1, 256, 32, 32),
+    #             "p6": tensor(1, 256, 16, 16),
+    #         }
+    #         props, _ = self.model.proposal_generator(images, features)
+    #         self.assertEqual(len(props[0]), 0)
+
+    def test_roiheads_inf_nan_data(self):
+        self.model.eval()
+        for tensor in [self._inf_tensor, self._nan_tensor]:
+            images = ImageList(tensor(1, 3, 512, 512), [(510, 510)])
+            features = {
+                "p2": tensor(1, 256, 256, 256),
+                "p3": tensor(1, 256, 128, 128),
+                "p4": tensor(1, 256, 64, 64),
+                "p5": tensor(1, 256, 32, 32),
+                "p6": tensor(1, 256, 16, 16),
+            }
+            props = [Instances((510, 510))]
+            props[0].proposal_boxes = Boxes([[10, 10, 20, 20]]).to(device=self.model.device)
+            props[0].objectness_logits = torch.tensor([1.0]).reshape(1, 1)
+            det, _ = self.model.roi_heads(images, features, props)
+            self.assertEqual(len(det[0]), 0)
+
+
+class RetinaNetE2ETest(ModelE2ETest, unittest.TestCase):
+    CONFIG_PATH = "COCO-Detection/retinanet_R_50_FPN_1x.yaml"
+
+    def test_inf_nan_data(self):
+        self.model.eval()
+        self.model.score_threshold = -999999999
+        for tensor in [self._inf_tensor, self._nan_tensor]:
+            images = ImageList(tensor(1, 3, 512, 512), [(510, 510)])
+            features = [
+                tensor(1, 256, 128, 128),
+                tensor(1, 256, 64, 64),
+                tensor(1, 256, 32, 32),
+                tensor(1, 256, 16, 16),
+                tensor(1, 256, 8, 8),
+            ]
+            anchors = self.model.anchor_generator(features)
+            _, pred_anchor_deltas = self.model.head(features)
+            HWAs = [np.prod(x.shape[-3:]) // 4 for x in pred_anchor_deltas]
+
+            pred_logits = [tensor(1, HWA, self.model.num_classes) for HWA in HWAs]
+            pred_anchor_deltas = [tensor(1, HWA, 4) for HWA in HWAs]
+            det = self.model.inference(anchors, pred_logits, pred_anchor_deltas, images.image_sizes)
+            # all predictions (if any) are infinite or nan
+            if len(det[0]):
+                self.assertTrue(torch.isfinite(det[0].pred_boxes.tensor).sum() == 0)
diff --git a/tests/modeling/test_roi_heads.py b/tests/modeling/test_roi_heads.py
new file mode 100644
index 0000000..d7042c4
--- /dev/null
+++ b/tests/modeling/test_roi_heads.py
@@ -0,0 +1,231 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+import unittest
+from copy import deepcopy
+import torch
+
+from detectron2.config import get_cfg
+from detectron2.export.torchscript import patch_instances
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.proposal_generator.build import build_proposal_generator
+from detectron2.modeling.roi_heads import (
+    FastRCNNConvFCHead,
+    KRCNNConvDeconvUpsampleHead,
+    MaskRCNNConvUpsampleHead,
+    StandardROIHeads,
+    build_roi_heads,
+)
+from detectron2.structures import BitMasks, Boxes, ImageList, Instances, RotatedBoxes
+from detectron2.utils.env import TORCH_VERSION
+from detectron2.utils.events import EventStorage
+
+logger = logging.getLogger(__name__)
+
+"""
+Make sure the losses of ROIHeads/RPN do not change, to avoid
+breaking the forward logic by mistake.
+This relies on assumption that pytorch's RNG is stable.
+"""
+
+
+class ROIHeadsTest(unittest.TestCase):
+    def test_roi_heads(self):
+        torch.manual_seed(121)
+        cfg = get_cfg()
+        cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
+        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
+        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2"
+        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5)
+        cfg.MODEL.MASK_ON = True
+        num_images = 2
+        images_tensor = torch.rand(num_images, 20, 30)
+        image_sizes = [(10, 10), (20, 30)]
+        images = ImageList(images_tensor, image_sizes)
+        num_channels = 1024
+        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
+        feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)}
+
+        image_shape = (15, 15)
+        gt_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32)
+        gt_instance0 = Instances(image_shape)
+        gt_instance0.gt_boxes = Boxes(gt_boxes0)
+        gt_instance0.gt_classes = torch.tensor([2, 1])
+        gt_instance0.gt_masks = BitMasks(torch.rand((2,) + image_shape) > 0.5)
+        gt_boxes1 = torch.tensor([[1, 5, 2, 8], [7, 3, 10, 5]], dtype=torch.float32)
+        gt_instance1 = Instances(image_shape)
+        gt_instance1.gt_boxes = Boxes(gt_boxes1)
+        gt_instance1.gt_classes = torch.tensor([1, 2])
+        gt_instance1.gt_masks = BitMasks(torch.rand((2,) + image_shape) > 0.5)
+        gt_instances = [gt_instance0, gt_instance1]
+
+        proposal_generator = build_proposal_generator(cfg, feature_shape)
+        roi_heads = StandardROIHeads(cfg, feature_shape)
+
+        with EventStorage():  # capture events in a new storage to discard them
+            proposals, proposal_losses = proposal_generator(images, features, gt_instances)
+            _, detector_losses = roi_heads(images, features, proposals, gt_instances)
+
+        detector_losses.update(proposal_losses)
+        expected_losses = {
+            "loss_cls": 4.5253729820251465,
+            "loss_box_reg": 0.009785720147192478,
+            "loss_mask": 0.693184494972229,
+            "loss_rpn_cls": 0.08186662942171097,
+            "loss_rpn_loc": 0.1104838103055954,
+        }
+        succ = all(
+            torch.allclose(detector_losses[name], torch.tensor(expected_losses.get(name, 0.0)))
+            for name in detector_losses.keys()
+        )
+        self.assertTrue(
+            succ,
+            "Losses has changed! New losses: {}".format(
+                {k: v.item() for k, v in detector_losses.items()}
+            ),
+        )
+
+    def test_rroi_heads(self):
+        torch.manual_seed(121)
+        cfg = get_cfg()
+        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN"
+        cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator"
+        cfg.MODEL.ROI_HEADS.NAME = "RROIHeads"
+        cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead"
+        cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2
+        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1)
+        cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead"
+        cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated"
+        cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1)
+        num_images = 2
+        images_tensor = torch.rand(num_images, 20, 30)
+        image_sizes = [(10, 10), (20, 30)]
+        images = ImageList(images_tensor, image_sizes)
+        num_channels = 1024
+        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
+        feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)}
+
+        image_shape = (15, 15)
+        gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]], dtype=torch.float32)
+        gt_instance0 = Instances(image_shape)
+        gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0)
+        gt_instance0.gt_classes = torch.tensor([2, 1])
+        gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]], dtype=torch.float32)
+        gt_instance1 = Instances(image_shape)
+        gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1)
+        gt_instance1.gt_classes = torch.tensor([1, 2])
+        gt_instances = [gt_instance0, gt_instance1]
+
+        proposal_generator = build_proposal_generator(cfg, feature_shape)
+        roi_heads = build_roi_heads(cfg, feature_shape)
+
+        with EventStorage():  # capture events in a new storage to discard them
+            proposals, proposal_losses = proposal_generator(images, features, gt_instances)
+            _, detector_losses = roi_heads(images, features, proposals, gt_instances)
+
+        detector_losses.update(proposal_losses)
+        expected_losses = {
+            "loss_cls": 4.365657806396484,
+            "loss_box_reg": 0.0015851043863222003,
+            "loss_rpn_cls": 0.2427729219198227,
+            "loss_rpn_loc": 0.3646621108055115,
+        }
+        succ = all(
+            torch.allclose(detector_losses[name], torch.tensor(expected_losses.get(name, 0.0)))
+            for name in detector_losses.keys()
+        )
+        self.assertTrue(
+            succ,
+            "Losses has changed! New losses: {}".format(
+                {k: v.item() for k, v in detector_losses.items()}
+            ),
+        )
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_box_head_scriptability(self):
+        input_shape = ShapeSpec(channels=1024, height=14, width=14)
+        box_features = torch.randn(4, 1024, 14, 14)
+
+        box_head = FastRCNNConvFCHead(
+            input_shape, conv_dims=[512, 512], fc_dims=[1024, 1024]
+        ).eval()
+        script_box_head = torch.jit.script(box_head)
+
+        origin_output = box_head(box_features)
+        script_output = script_box_head(box_features)
+        self.assertTrue(torch.equal(origin_output, script_output))
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_mask_head_scriptability(self):
+        input_shape = ShapeSpec(channels=1024)
+        mask_features = torch.randn(4, 1024, 14, 14)
+
+        image_shapes = [(10, 10), (15, 15)]
+        pred_instance0 = Instances(image_shapes[0])
+        pred_classes0 = torch.tensor([1, 2, 3], dtype=torch.int64)
+        pred_instance0.pred_classes = pred_classes0
+        pred_instance1 = Instances(image_shapes[1])
+        pred_classes1 = torch.tensor([4], dtype=torch.int64)
+        pred_instance1.pred_classes = pred_classes1
+
+        mask_head = MaskRCNNConvUpsampleHead(
+            input_shape, num_classes=80, conv_dims=[256, 256]
+        ).eval()
+        # pred_instance will be in-place changed during the inference
+        # process of `MaskRCNNConvUpsampleHead`
+        origin_outputs = mask_head(mask_features, deepcopy([pred_instance0, pred_instance1]))
+
+        fields = {"pred_masks": "Tensor", "pred_classes": "Tensor"}
+        with patch_instances(fields) as NewInstances:
+            sciript_mask_head = torch.jit.script(mask_head)
+            pred_instance0 = NewInstances.from_instances(pred_instance0)
+            pred_instance1 = NewInstances.from_instances(pred_instance1)
+            script_outputs = sciript_mask_head(mask_features, [pred_instance0, pred_instance1])
+
+        for origin_ins, script_ins in zip(origin_outputs, script_outputs):
+            self.assertEqual(origin_ins.image_size, script_ins.image_size)
+            self.assertTrue(torch.equal(origin_ins.pred_classes, script_ins.pred_classes))
+            self.assertTrue(torch.equal(origin_ins.pred_masks, script_ins.pred_masks))
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_keypoint_head_scriptability(self):
+        input_shape = ShapeSpec(channels=1024, height=14, width=14)
+        keypoint_features = torch.randn(4, 1024, 14, 14)
+
+        image_shapes = [(10, 10), (15, 15)]
+        pred_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6], [1, 5, 2, 8]], dtype=torch.float32)
+        pred_instance0 = Instances(image_shapes[0])
+        pred_instance0.pred_boxes = Boxes(pred_boxes0)
+        pred_boxes1 = torch.tensor([[7, 3, 10, 5]], dtype=torch.float32)
+        pred_instance1 = Instances(image_shapes[1])
+        pred_instance1.pred_boxes = Boxes(pred_boxes1)
+
+        keypoint_head = KRCNNConvDeconvUpsampleHead(
+            input_shape, num_keypoints=17, conv_dims=[512, 512]
+        ).eval()
+        origin_outputs = keypoint_head(
+            keypoint_features, deepcopy([pred_instance0, pred_instance1])
+        )
+
+        fields = {
+            "pred_boxes": "Boxes",
+            "pred_keypoints": "Tensor",
+            "pred_keypoint_heatmaps": "Tensor",
+        }
+        with patch_instances(fields) as NewInstances:
+            sciript_keypoint_head = torch.jit.script(keypoint_head)
+            pred_instance0 = NewInstances.from_instances(pred_instance0)
+            pred_instance1 = NewInstances.from_instances(pred_instance1)
+            script_outputs = sciript_keypoint_head(
+                keypoint_features, [pred_instance0, pred_instance1]
+            )
+
+        for origin_ins, script_ins in zip(origin_outputs, script_outputs):
+            self.assertEqual(origin_ins.image_size, script_ins.image_size)
+            self.assertTrue(torch.equal(origin_ins.pred_keypoints, script_ins.pred_keypoints))
+            self.assertTrue(
+                torch.equal(origin_ins.pred_keypoint_heatmaps, script_ins.pred_keypoint_heatmaps)
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/modeling/test_roi_pooler.py b/tests/modeling/test_roi_pooler.py
new file mode 100644
index 0000000..df2e16f
--- /dev/null
+++ b/tests/modeling/test_roi_pooler.py
@@ -0,0 +1,139 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+import unittest
+import torch
+
+from detectron2.modeling.poolers import ROIPooler
+from detectron2.structures import Boxes, RotatedBoxes
+from detectron2.utils.env import TORCH_VERSION
+
+logger = logging.getLogger(__name__)
+
+
+class TestROIPooler(unittest.TestCase):
+    def _rand_boxes(self, num_boxes, x_max, y_max):
+        coords = torch.rand(num_boxes, 4)
+        coords[:, 0] *= x_max
+        coords[:, 1] *= y_max
+        coords[:, 2] *= x_max
+        coords[:, 3] *= y_max
+        boxes = torch.zeros(num_boxes, 4)
+        boxes[:, 0] = torch.min(coords[:, 0], coords[:, 2])
+        boxes[:, 1] = torch.min(coords[:, 1], coords[:, 3])
+        boxes[:, 2] = torch.max(coords[:, 0], coords[:, 2])
+        boxes[:, 3] = torch.max(coords[:, 1], coords[:, 3])
+        return boxes
+
+    def _test_roialignv2_roialignrotated_match(self, device):
+        pooler_resolution = 14
+        canonical_level = 4
+        canonical_scale_factor = 2 ** canonical_level
+        pooler_scales = (1.0 / canonical_scale_factor,)
+        sampling_ratio = 0
+
+        N, C, H, W = 2, 4, 10, 8
+        N_rois = 10
+        std = 11
+        mean = 0
+        feature = (torch.rand(N, C, H, W) - 0.5) * 2 * std + mean
+
+        features = [feature.to(device)]
+
+        rois = []
+        rois_rotated = []
+        for _ in range(N):
+            boxes = self._rand_boxes(
+                num_boxes=N_rois, x_max=W * canonical_scale_factor, y_max=H * canonical_scale_factor
+            )
+
+            rotated_boxes = torch.zeros(N_rois, 5)
+            rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
+            rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
+            rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
+            rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
+            rois.append(Boxes(boxes).to(device))
+            rois_rotated.append(RotatedBoxes(rotated_boxes).to(device))
+
+        roialignv2_pooler = ROIPooler(
+            output_size=pooler_resolution,
+            scales=pooler_scales,
+            sampling_ratio=sampling_ratio,
+            pooler_type="ROIAlignV2",
+        )
+
+        roialignv2_out = roialignv2_pooler(features, rois)
+
+        roialignrotated_pooler = ROIPooler(
+            output_size=pooler_resolution,
+            scales=pooler_scales,
+            sampling_ratio=sampling_ratio,
+            pooler_type="ROIAlignRotated",
+        )
+
+        roialignrotated_out = roialignrotated_pooler(features, rois_rotated)
+
+        self.assertTrue(torch.allclose(roialignv2_out, roialignrotated_out, atol=1e-4))
+
+    def test_roialignv2_roialignrotated_match_cpu(self):
+        self._test_roialignv2_roialignrotated_match(device="cpu")
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_roialignv2_roialignrotated_match_cuda(self):
+        self._test_roialignv2_roialignrotated_match(device="cuda")
+
+    def _test_scriptability(self, device):
+        pooler_resolution = 14
+        canonical_level = 4
+        canonical_scale_factor = 2 ** canonical_level
+        pooler_scales = (1.0 / canonical_scale_factor,)
+        sampling_ratio = 0
+
+        N, C, H, W = 2, 4, 10, 8
+        N_rois = 10
+        std = 11
+        mean = 0
+        feature = (torch.rand(N, C, H, W) - 0.5) * 2 * std + mean
+
+        features = [feature.to(device)]
+
+        rois = []
+        for _ in range(N):
+            boxes = self._rand_boxes(
+                num_boxes=N_rois, x_max=W * canonical_scale_factor, y_max=H * canonical_scale_factor
+            )
+
+            rois.append(Boxes(boxes).to(device))
+
+        roialignv2_pooler = ROIPooler(
+            output_size=pooler_resolution,
+            scales=pooler_scales,
+            sampling_ratio=sampling_ratio,
+            pooler_type="ROIAlignV2",
+        )
+
+        roialignv2_out = roialignv2_pooler(features, rois)
+        scripted_roialignv2_out = torch.jit.script(roialignv2_pooler)(features, rois)
+        self.assertTrue(torch.equal(roialignv2_out, scripted_roialignv2_out))
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_scriptability_cpu(self):
+        self._test_scriptability(device="cpu")
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_scriptability_gpu(self):
+        self._test_scriptability(device="cuda")
+
+    def test_no_images(self):
+        N, C, H, W = 0, 32, 32, 32
+        feature = torch.rand(N, C, H, W) - 0.5
+        features = [feature]
+        pooler = ROIPooler(
+            output_size=14, scales=(1.0,), sampling_ratio=0.0, pooler_type="ROIAlignV2"
+        )
+        output = pooler.forward(features, [])
+        self.assertEqual(output.shape, (0, C, 14, 14))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/modeling/test_rpn.py b/tests/modeling/test_rpn.py
new file mode 100644
index 0000000..884161a
--- /dev/null
+++ b/tests/modeling/test_rpn.py
@@ -0,0 +1,256 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+import unittest
+import torch
+
+from detectron2.config import get_cfg
+from detectron2.export.torchscript import export_torchscript_with_instances
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.backbone import build_backbone
+from detectron2.modeling.proposal_generator import RPN, build_proposal_generator
+from detectron2.modeling.proposal_generator.proposal_utils import find_top_rpn_proposals
+from detectron2.structures import Boxes, ImageList, Instances, RotatedBoxes
+from detectron2.utils.env import TORCH_VERSION
+from detectron2.utils.events import EventStorage
+
+logger = logging.getLogger(__name__)
+
+
+class RPNTest(unittest.TestCase):
+    def test_rpn(self):
+        torch.manual_seed(121)
+        cfg = get_cfg()
+        backbone = build_backbone(cfg)
+        proposal_generator = RPN(cfg, backbone.output_shape())
+        num_images = 2
+        images_tensor = torch.rand(num_images, 20, 30)
+        image_sizes = [(10, 10), (20, 30)]
+        images = ImageList(images_tensor, image_sizes)
+        image_shape = (15, 15)
+        num_channels = 1024
+        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
+        gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32)
+        gt_instances = Instances(image_shape)
+        gt_instances.gt_boxes = Boxes(gt_boxes)
+        with EventStorage():  # capture events in a new storage to discard them
+            proposals, proposal_losses = proposal_generator(
+                images, features, [gt_instances[0], gt_instances[1]]
+            )
+
+        expected_losses = {
+            "loss_rpn_cls": torch.tensor(0.0804563984),
+            "loss_rpn_loc": torch.tensor(0.0990132466),
+        }
+        for name in expected_losses.keys():
+            err_msg = "proposal_losses[{}] = {}, expected losses = {}".format(
+                name, proposal_losses[name], expected_losses[name]
+            )
+            self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg)
+
+        expected_proposal_boxes = [
+            Boxes(torch.tensor([[0, 0, 10, 10], [7.3365392685, 0, 10, 10]])),
+            Boxes(
+                torch.tensor(
+                    [
+                        [0, 0, 30, 20],
+                        [0, 0, 16.7862777710, 13.1362524033],
+                        [0, 0, 30, 13.3173446655],
+                        [0, 0, 10.8602609634, 20],
+                        [7.7165775299, 0, 27.3875980377, 20],
+                    ]
+                )
+            ),
+        ]
+
+        expected_objectness_logits = [
+            torch.tensor([0.1225359365, -0.0133192837]),
+            torch.tensor([0.1415634006, 0.0989848152, 0.0565387346, -0.0072308783, -0.0428492837]),
+        ]
+
+        for proposal, expected_proposal_box, im_size, expected_objectness_logit in zip(
+            proposals, expected_proposal_boxes, image_sizes, expected_objectness_logits
+        ):
+            self.assertEqual(len(proposal), len(expected_proposal_box))
+            self.assertEqual(proposal.image_size, im_size)
+            self.assertTrue(
+                torch.allclose(proposal.proposal_boxes.tensor, expected_proposal_box.tensor)
+            )
+            self.assertTrue(torch.allclose(proposal.objectness_logits, expected_objectness_logit))
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_rpn_scriptability(self):
+        cfg = get_cfg()
+        proposal_generator = RPN(cfg, {"res4": ShapeSpec(channels=1024, stride=16)}).eval()
+        num_images = 2
+        images_tensor = torch.rand(num_images, 30, 40)
+        image_sizes = [(32, 32), (30, 40)]
+        images = ImageList(images_tensor, image_sizes)
+        features = {"res4": torch.rand(num_images, 1024, 1, 2)}
+
+        fields = {"proposal_boxes": "Boxes", "objectness_logits": "Tensor"}
+        proposal_generator_ts = export_torchscript_with_instances(proposal_generator, fields)
+
+        proposals, _ = proposal_generator(images, features)
+        proposals_ts, _ = proposal_generator_ts(images, features)
+
+        for proposal, proposal_ts in zip(proposals, proposals_ts):
+            self.assertEqual(proposal.image_size, proposal_ts.image_size)
+            self.assertTrue(
+                torch.equal(proposal.proposal_boxes.tensor, proposal_ts.proposal_boxes.tensor)
+            )
+            self.assertTrue(torch.equal(proposal.objectness_logits, proposal_ts.objectness_logits))
+
+    def test_rrpn(self):
+        torch.manual_seed(121)
+        cfg = get_cfg()
+        cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN"
+        cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator"
+        cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]]
+        cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1]]
+        cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [[0, 60]]
+        cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1)
+        cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead"
+        backbone = build_backbone(cfg)
+        proposal_generator = build_proposal_generator(cfg, backbone.output_shape())
+        num_images = 2
+        images_tensor = torch.rand(num_images, 20, 30)
+        image_sizes = [(10, 10), (20, 30)]
+        images = ImageList(images_tensor, image_sizes)
+        image_shape = (15, 15)
+        num_channels = 1024
+        features = {"res4": torch.rand(num_images, num_channels, 1, 2)}
+        gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32)
+        gt_instances = Instances(image_shape)
+        gt_instances.gt_boxes = RotatedBoxes(gt_boxes)
+        with EventStorage():  # capture events in a new storage to discard them
+            proposals, proposal_losses = proposal_generator(
+                images, features, [gt_instances[0], gt_instances[1]]
+            )
+
+        expected_losses = {
+            "loss_rpn_cls": torch.tensor(0.043263837695121765),
+            "loss_rpn_loc": torch.tensor(0.14432406425476074),
+        }
+        for name in expected_losses.keys():
+            err_msg = "proposal_losses[{}] = {}, expected losses = {}".format(
+                name, proposal_losses[name], expected_losses[name]
+            )
+            self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg)
+
+        expected_proposal_boxes = [
+            RotatedBoxes(
+                torch.tensor(
+                    [
+                        [0.60189795, 1.24095452, 61.98131943, 18.03621292, -4.07244873],
+                        [15.64940453, 1.69624567, 59.59749603, 16.34339333, 2.62692475],
+                        [-3.02982378, -2.69752932, 67.90952301, 59.62455750, 59.97010040],
+                        [16.71863365, 1.98309708, 35.61507797, 32.81484985, 62.92267227],
+                        [0.49432933, -7.92979717, 67.77606201, 62.93098450, -1.85656738],
+                        [8.00880814, 1.36017394, 121.81007385, 32.74150467, 50.44297409],
+                        [16.44299889, -4.82221127, 63.39775848, 61.22503662, 54.12270737],
+                        [5.00000000, 5.00000000, 10.00000000, 10.00000000, -0.76943970],
+                        [17.64130402, -0.98095351, 61.40377808, 16.28918839, 55.53118134],
+                        [0.13016054, 4.60568953, 35.80157471, 32.30180359, 62.52872086],
+                        [-4.26460743, 0.39604485, 124.30079651, 31.84611320, -1.58203125],
+                        [7.52815342, -0.91636634, 62.39784622, 15.45565224, 60.79549789],
+                    ]
+                )
+            ),
+            RotatedBoxes(
+                torch.tensor(
+                    [
+                        [0.07734215, 0.81635046, 65.33510590, 17.34688377, -1.51821899],
+                        [-3.41833067, -3.11320257, 64.17595673, 60.55617905, 58.27033234],
+                        [20.67383385, -6.16561556, 63.60531998, 62.52315903, 54.85546494],
+                        [15.00000000, 10.00000000, 30.00000000, 20.00000000, -0.18218994],
+                        [9.22646523, -6.84775209, 62.09895706, 65.46472931, -2.74307251],
+                        [15.00000000, 4.93451595, 30.00000000, 9.86903191, -0.60272217],
+                        [8.88342094, 2.65560246, 120.95362854, 32.45022202, 55.75970078],
+                        [16.39088631, 2.33887148, 34.78761292, 35.61492920, 60.81977463],
+                        [9.78298569, 10.00000000, 19.56597137, 20.00000000, -0.86660767],
+                        [1.28576660, 5.49873352, 34.93610382, 33.22600174, 60.51599884],
+                        [17.58912468, -1.63270092, 62.96052551, 16.45713997, 52.91245270],
+                        [5.64749718, -1.90428460, 62.37649155, 16.19474792, 61.09543991],
+                        [0.82255805, 2.34931135, 118.83985901, 32.83671188, 56.50753784],
+                        [-5.33874989, 1.64404404, 125.28501892, 33.35424042, -2.80731201],
+                    ]
+                )
+            ),
+        ]
+
+        expected_objectness_logits = [
+            torch.tensor(
+                [
+                    0.10111768,
+                    0.09112845,
+                    0.08466332,
+                    0.07589971,
+                    0.06650183,
+                    0.06350251,
+                    0.04299347,
+                    0.01864817,
+                    0.00986163,
+                    0.00078543,
+                    -0.04573630,
+                    -0.04799230,
+                ]
+            ),
+            torch.tensor(
+                [
+                    0.11373727,
+                    0.09377633,
+                    0.05281663,
+                    0.05143715,
+                    0.04040275,
+                    0.03250912,
+                    0.01307789,
+                    0.01177734,
+                    0.00038105,
+                    -0.00540255,
+                    -0.01194804,
+                    -0.01461012,
+                    -0.03061717,
+                    -0.03599222,
+                ]
+            ),
+        ]
+
+        torch.set_printoptions(precision=8, sci_mode=False)
+
+        for proposal, expected_proposal_box, im_size, expected_objectness_logit in zip(
+            proposals, expected_proposal_boxes, image_sizes, expected_objectness_logits
+        ):
+            self.assertEqual(len(proposal), len(expected_proposal_box))
+            self.assertEqual(proposal.image_size, im_size)
+            # It seems that there's some randomness in the result across different machines:
+            # This test can be run on a local machine for 100 times with exactly the same result,
+            # However, a different machine might produce slightly different results,
+            # thus the atol here.
+            err_msg = "computed proposal boxes = {}, expected {}".format(
+                proposal.proposal_boxes.tensor, expected_proposal_box.tensor
+            )
+            self.assertTrue(
+                torch.allclose(
+                    proposal.proposal_boxes.tensor, expected_proposal_box.tensor, atol=1e-5
+                ),
+                err_msg,
+            )
+
+            err_msg = "computed objectness logits = {}, expected {}".format(
+                proposal.objectness_logits, expected_objectness_logit
+            )
+            self.assertTrue(
+                torch.allclose(proposal.objectness_logits, expected_objectness_logit, atol=1e-5),
+                err_msg,
+            )
+
+    def test_rpn_proposals_inf(self):
+        N, Hi, Wi, A = 3, 3, 3, 3
+        proposals = [torch.rand(N, Hi * Wi * A, 4)]
+        pred_logits = [torch.rand(N, Hi * Wi * A)]
+        pred_logits[0][1][3:5].fill_(float("inf"))
+        find_top_rpn_proposals(proposals, pred_logits, [(10, 10)], 0.5, 1000, 1000, 0, False)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/structures/test_boxes.py b/tests/structures/test_boxes.py
new file mode 100644
index 0000000..cf7b35d
--- /dev/null
+++ b/tests/structures/test_boxes.py
@@ -0,0 +1,203 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import json
+import math
+import numpy as np
+import unittest
+import torch
+
+from detectron2.structures import Boxes, BoxMode, pairwise_ioa, pairwise_iou
+from detectron2.utils.env import TORCH_VERSION
+
+
+class TestBoxMode(unittest.TestCase):
+    def _convert_xy_to_wh(self, x):
+        return BoxMode.convert(x, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+
+    def _convert_xywha_to_xyxy(self, x):
+        return BoxMode.convert(x, BoxMode.XYWHA_ABS, BoxMode.XYXY_ABS)
+
+    def _convert_xywh_to_xywha(self, x):
+        return BoxMode.convert(x, BoxMode.XYWH_ABS, BoxMode.XYWHA_ABS)
+
+    def test_box_convert_list(self):
+        for tp in [list, tuple]:
+            box = tp([5.0, 5.0, 10.0, 10.0])
+            output = self._convert_xy_to_wh(box)
+            self.assertIsInstance(output, tp)
+            self.assertIsInstance(output[0], float)
+            self.assertEqual(output, tp([5.0, 5.0, 5.0, 5.0]))
+
+            with self.assertRaises(Exception):
+                self._convert_xy_to_wh([box])
+
+    def test_box_convert_array(self):
+        box = np.asarray([[5, 5, 10, 10], [1, 1, 2, 3]])
+        output = self._convert_xy_to_wh(box)
+        self.assertEqual(output.dtype, box.dtype)
+        self.assertEqual(output.shape, box.shape)
+        self.assertTrue((output[0] == [5, 5, 5, 5]).all())
+        self.assertTrue((output[1] == [1, 1, 1, 2]).all())
+
+    def test_box_convert_cpu_tensor(self):
+        box = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]])
+        output = self._convert_xy_to_wh(box)
+        self.assertEqual(output.dtype, box.dtype)
+        self.assertEqual(output.shape, box.shape)
+        output = output.numpy()
+        self.assertTrue((output[0] == [5, 5, 5, 5]).all())
+        self.assertTrue((output[1] == [1, 1, 1, 2]).all())
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_box_convert_cuda_tensor(self):
+        box = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]]).cuda()
+        output = self._convert_xy_to_wh(box)
+        self.assertEqual(output.dtype, box.dtype)
+        self.assertEqual(output.shape, box.shape)
+        self.assertEqual(output.device, box.device)
+        output = output.cpu().numpy()
+        self.assertTrue((output[0] == [5, 5, 5, 5]).all())
+        self.assertTrue((output[1] == [1, 1, 1, 2]).all())
+
+    def test_box_convert_xywha_to_xyxy_list(self):
+        for tp in [list, tuple]:
+            box = tp([50, 50, 30, 20, 0])
+            output = self._convert_xywha_to_xyxy(box)
+            self.assertIsInstance(output, tp)
+            self.assertEqual(output, tp([35, 40, 65, 60]))
+
+            with self.assertRaises(Exception):
+                self._convert_xywha_to_xyxy([box])
+
+    def test_box_convert_xywha_to_xyxy_array(self):
+        for dtype in [np.float64, np.float32]:
+            box = np.asarray(
+                [
+                    [50, 50, 30, 20, 0],
+                    [50, 50, 30, 20, 90],
+                    [1, 1, math.sqrt(2), math.sqrt(2), -45],
+                ],
+                dtype=dtype,
+            )
+            output = self._convert_xywha_to_xyxy(box)
+            self.assertEqual(output.dtype, box.dtype)
+            expected = np.asarray([[35, 40, 65, 60], [40, 35, 60, 65], [0, 0, 2, 2]], dtype=dtype)
+            self.assertTrue(np.allclose(output, expected, atol=1e-6), "output={}".format(output))
+
+    def test_box_convert_xywha_to_xyxy_tensor(self):
+        for dtype in [torch.float32, torch.float64]:
+            box = torch.tensor(
+                [
+                    [50, 50, 30, 20, 0],
+                    [50, 50, 30, 20, 90],
+                    [1, 1, math.sqrt(2), math.sqrt(2), -45],
+                ],
+                dtype=dtype,
+            )
+            output = self._convert_xywha_to_xyxy(box)
+            self.assertEqual(output.dtype, box.dtype)
+            expected = torch.tensor([[35, 40, 65, 60], [40, 35, 60, 65], [0, 0, 2, 2]], dtype=dtype)
+
+            self.assertTrue(torch.allclose(output, expected, atol=1e-6), "output={}".format(output))
+
+    def test_box_convert_xywh_to_xywha_list(self):
+        for tp in [list, tuple]:
+            box = tp([50, 50, 30, 20])
+            output = self._convert_xywh_to_xywha(box)
+            self.assertIsInstance(output, tp)
+            self.assertEqual(output, tp([65, 60, 30, 20, 0]))
+
+            with self.assertRaises(Exception):
+                self._convert_xywh_to_xywha([box])
+
+    def test_box_convert_xywh_to_xywha_array(self):
+        for dtype in [np.float64, np.float32]:
+            box = np.asarray([[30, 40, 70, 60], [30, 40, 60, 70], [-1, -1, 2, 2]], dtype=dtype)
+            output = self._convert_xywh_to_xywha(box)
+            self.assertEqual(output.dtype, box.dtype)
+            expected = np.asarray(
+                [[65, 70, 70, 60, 0], [60, 75, 60, 70, 0], [0, 0, 2, 2, 0]], dtype=dtype
+            )
+            self.assertTrue(np.allclose(output, expected, atol=1e-6), "output={}".format(output))
+
+    def test_box_convert_xywh_to_xywha_tensor(self):
+        for dtype in [torch.float32, torch.float64]:
+            box = torch.tensor([[30, 40, 70, 60], [30, 40, 60, 70], [-1, -1, 2, 2]], dtype=dtype)
+            output = self._convert_xywh_to_xywha(box)
+            self.assertEqual(output.dtype, box.dtype)
+            expected = torch.tensor(
+                [[65, 70, 70, 60, 0], [60, 75, 60, 70, 0], [0, 0, 2, 2, 0]], dtype=dtype
+            )
+
+            self.assertTrue(torch.allclose(output, expected, atol=1e-6), "output={}".format(output))
+
+    def test_json_serializable(self):
+        payload = {"box_mode": BoxMode.XYWH_REL}
+        try:
+            json.dumps(payload)
+        except Exception:
+            self.fail("JSON serialization failed")
+
+    def test_json_deserializable(self):
+        payload = '{"box_mode": 2}'
+        obj = json.loads(payload)
+        try:
+            obj["box_mode"] = BoxMode(obj["box_mode"])
+        except Exception:
+            self.fail("JSON deserialization failed")
+
+
+class TestBoxIOU(unittest.TestCase):
+    def create_boxes(self):
+        boxes1 = torch.tensor([[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]])
+
+        boxes2 = torch.tensor(
+            [
+                [0.0, 0.0, 1.0, 1.0],
+                [0.0, 0.0, 0.5, 1.0],
+                [0.0, 0.0, 1.0, 0.5],
+                [0.0, 0.0, 0.5, 0.5],
+                [0.5, 0.5, 1.0, 1.0],
+                [0.5, 0.5, 1.5, 1.5],
+            ]
+        )
+        return boxes1, boxes2
+
+    def test_pairwise_iou(self):
+        boxes1, boxes2 = self.create_boxes()
+        expected_ious = torch.tensor(
+            [
+                [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
+                [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
+            ]
+        )
+
+        ious = pairwise_iou(Boxes(boxes1), Boxes(boxes2))
+        self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_pairwise_ioa(self):
+        boxes1, boxes2 = self.create_boxes()
+        expected_ioas = torch.tensor(
+            [[1.0, 1.0, 1.0, 1.0, 1.0, 0.25], [1.0, 1.0, 1.0, 1.0, 1.0, 0.25]]
+        )
+        ioas = pairwise_ioa(Boxes(boxes1), Boxes(boxes2))
+        self.assertTrue(torch.allclose(ioas, expected_ioas))
+
+
+class TestBoxes(unittest.TestCase):
+    def test_empty_cat(self):
+        x = Boxes.cat([])
+        self.assertTrue(x.tensor.shape, (0, 4))
+
+    # require https://github.com/pytorch/pytorch/pull/39336
+    @unittest.skipIf(TORCH_VERSION < (1, 6), "Insufficient pytorch version")
+    def test_scriptability(self):
+        def func(x):
+            boxes = Boxes(x)
+            return boxes.area()
+
+        f = torch.jit.script(func)
+        f(torch.rand((3, 4)))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/structures/test_imagelist.py b/tests/structures/test_imagelist.py
new file mode 100644
index 0000000..93af5f9
--- /dev/null
+++ b/tests/structures/test_imagelist.py
@@ -0,0 +1,59 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import unittest
+from typing import List, Sequence, Tuple
+import torch
+
+from detectron2.structures import ImageList
+from detectron2.utils.env import TORCH_VERSION
+
+
+class TestImageList(unittest.TestCase):
+    def test_imagelist_padding_shape(self):
+        class TensorToImageList(torch.nn.Module):
+            def forward(self, tensors: Sequence[torch.Tensor]):
+                return ImageList.from_tensors(tensors, 4).tensor
+
+        func = torch.jit.trace(
+            TensorToImageList(), ([torch.ones((3, 10, 10), dtype=torch.float32)],)
+        )
+        ret = func([torch.ones((3, 15, 20), dtype=torch.float32)])
+        self.assertEqual(list(ret.shape), [1, 3, 16, 20], str(ret.shape))
+
+        func = torch.jit.trace(
+            TensorToImageList(),
+            (
+                [
+                    torch.ones((3, 16, 10), dtype=torch.float32),
+                    torch.ones((3, 13, 11), dtype=torch.float32),
+                ],
+            ),
+        )
+        ret = func(
+            [
+                torch.ones((3, 25, 20), dtype=torch.float32),
+                torch.ones((3, 10, 10), dtype=torch.float32),
+            ]
+        )
+        # does not support calling with different #images
+        self.assertEqual(list(ret.shape), [2, 3, 28, 20], str(ret.shape))
+
+    @unittest.skipIf(TORCH_VERSION < (1, 6), "Insufficient pytorch version")
+    def test_imagelist_scriptability(self):
+        image_nums = 2
+        image_tensor = torch.randn((image_nums, 10, 20), dtype=torch.float32)
+        image_shape = [(10, 20)] * image_nums
+
+        def f(image_tensor, image_shape: List[Tuple[int, int]]):
+            return ImageList(image_tensor, image_shape)
+
+        ret = f(image_tensor, image_shape)
+        ret_script = torch.jit.script(f)(image_tensor, image_shape)
+
+        self.assertEqual(len(ret), len(ret_script))
+        for i in range(image_nums):
+            self.assertTrue(torch.equal(ret[i], ret_script[i]))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/structures/test_instances.py b/tests/structures/test_instances.py
new file mode 100644
index 0000000..151e827
--- /dev/null
+++ b/tests/structures/test_instances.py
@@ -0,0 +1,120 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import unittest
+import torch
+
+from detectron2.export.torchscript import patch_instances
+from detectron2.structures import Boxes, Instances
+from detectron2.utils.env import TORCH_VERSION
+
+
+class TestInstances(unittest.TestCase):
+    def test_int_indexing(self):
+        attr1 = torch.tensor([[0.0, 0.0, 1.0], [0.0, 0.0, 0.5], [0.0, 0.0, 1.0], [0.0, 0.5, 0.5]])
+        attr2 = torch.tensor([0.1, 0.2, 0.3, 0.4])
+        instances = Instances((100, 100))
+        instances.attr1 = attr1
+        instances.attr2 = attr2
+        for i in range(-len(instances), len(instances)):
+            inst = instances[i]
+            self.assertEqual((inst.attr1 == attr1[i]).all(), True)
+            self.assertEqual((inst.attr2 == attr2[i]).all(), True)
+
+        self.assertRaises(IndexError, lambda: instances[len(instances)])
+        self.assertRaises(IndexError, lambda: instances[-len(instances) - 1])
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_script_new_fields(self):
+        class f(torch.nn.Module):
+            def forward(self, x: Instances):
+                proposal_boxes = x.proposal_boxes  # noqa F841
+                objectness_logits = x.objectness_logits  # noqa F841
+                return x
+
+        class g(torch.nn.Module):
+            def forward(self, x: Instances):
+                mask = x.mask  # noqa F841
+                return x
+
+        class g2(torch.nn.Module):
+            def forward(self, x: Instances):
+                proposal_boxes = x.proposal_boxes  # noqa F841
+                return x
+
+        fields = {"proposal_boxes": "Boxes", "objectness_logits": "Tensor"}
+        with patch_instances(fields):
+            torch.jit.script(f())
+
+        # can't script anymore after exiting the context
+        with self.assertRaises(Exception):
+            torch.jit.script(g2())
+
+        new_fields = {"mask": "Tensor"}
+        with patch_instances(new_fields):
+            torch.jit.script(g())
+            with self.assertRaises(Exception):
+                torch.jit.script(g2())
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_script_access_fields(self):
+        class f(torch.nn.Module):
+            def forward(self, x: Instances):
+                proposal_boxes = x.proposal_boxes
+                objectness_logits = x.objectness_logits
+                return proposal_boxes.tensor + objectness_logits
+
+        fields = {"proposal_boxes": "Boxes", "objectness_logits": "Tensor"}
+        with patch_instances(fields):
+            torch.jit.script(f())
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_script_len(self):
+        class f(torch.nn.Module):
+            def forward(self, x: Instances):
+                return len(x)
+
+        class g(torch.nn.Module):
+            def forward(self, x: Instances):
+                return len(x)
+
+        image_shape = (15, 15)
+
+        fields = {"proposal_boxes": "Boxes"}
+        with patch_instances(fields) as new_instance:
+            script_module = torch.jit.script(f())
+            x = new_instance(image_shape)
+            with self.assertRaises(Exception):
+                script_module(x)
+            box_tensors = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]])
+            x.proposal_boxes = Boxes(box_tensors)
+            length = script_module(x)
+            self.assertEqual(length, 2)
+
+        fields = {"objectness_logits": "Tensor"}
+        with patch_instances(fields) as new_instance:
+            script_module = torch.jit.script(g())
+            x = new_instance(image_shape)
+            objectness_logits = torch.tensor([1.0]).reshape(1, 1)
+            x.objectness_logits = objectness_logits
+            length = script_module(x)
+            self.assertEqual(length, 1)
+
+    @unittest.skipIf(TORCH_VERSION < (1, 7), "Insufficient pytorch version")
+    def test_script_has(self):
+        class f(torch.nn.Module):
+            def forward(self, x: Instances):
+                return x.has("proposal_boxes")
+
+        image_shape = (15, 15)
+        fields = {"proposal_boxes": "Boxes"}
+        with patch_instances(fields) as new_instance:
+            script_module = torch.jit.script(f())
+            x = new_instance(image_shape)
+            self.assertFalse(script_module(x))
+
+            box_tensors = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]])
+            x.proposal_boxes = Boxes(box_tensors)
+            self.assertTrue(script_module(x))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/structures/test_masks.py b/tests/structures/test_masks.py
new file mode 100644
index 0000000..de55b4d
--- /dev/null
+++ b/tests/structures/test_masks.py
@@ -0,0 +1,42 @@
+import unittest
+import torch
+
+from detectron2.structures.masks import BitMasks, PolygonMasks, polygons_to_bitmask
+
+
+class TestBitMask(unittest.TestCase):
+    def test_get_bounding_box(self):
+        masks = torch.tensor(
+            [
+                [
+                    [False, False, False, True],
+                    [False, False, True, True],
+                    [False, True, True, False],
+                    [False, True, True, False],
+                ],
+                [
+                    [False, False, False, False],
+                    [False, False, True, False],
+                    [False, True, True, False],
+                    [False, True, True, False],
+                ],
+                torch.zeros(4, 4),
+            ]
+        )
+        bitmask = BitMasks(masks)
+        box_true = torch.tensor([[1, 0, 4, 4], [1, 1, 3, 4], [0, 0, 0, 0]], dtype=torch.float32)
+        box = bitmask.get_bounding_boxes()
+        self.assertTrue(torch.all(box.tensor == box_true).item())
+
+        for box in box_true:
+            poly = box[[0, 1, 2, 1, 2, 3, 0, 3]].numpy()
+            mask = polygons_to_bitmask([poly], 4, 4)
+            reconstruct_box = BitMasks(mask[None, :, :]).get_bounding_boxes()[0].tensor
+            self.assertTrue(torch.all(box == reconstruct_box).item())
+
+            reconstruct_box = PolygonMasks([[poly]]).get_bounding_boxes()[0].tensor
+            self.assertTrue(torch.all(box == reconstruct_box).item())
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/structures/test_rotated_boxes.py b/tests/structures/test_rotated_boxes.py
new file mode 100644
index 0000000..575ac48
--- /dev/null
+++ b/tests/structures/test_rotated_boxes.py
@@ -0,0 +1,357 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+from __future__ import absolute_import, division, print_function, unicode_literals
+import logging
+import math
+import random
+import unittest
+import torch
+from fvcore.common.benchmark import benchmark
+
+from detectron2.layers.rotated_boxes import pairwise_iou_rotated
+from detectron2.structures.boxes import Boxes
+from detectron2.structures.rotated_boxes import RotatedBoxes, pairwise_iou
+
+logger = logging.getLogger(__name__)
+
+
+class TestRotatedBoxesLayer(unittest.TestCase):
+    def test_iou_0_dim_cpu(self):
+        boxes1 = torch.rand(0, 5, dtype=torch.float32)
+        boxes2 = torch.rand(10, 5, dtype=torch.float32)
+        expected_ious = torch.zeros(0, 10, dtype=torch.float32)
+        ious = pairwise_iou_rotated(boxes1, boxes2)
+        self.assertTrue(torch.allclose(ious, expected_ious))
+
+        boxes1 = torch.rand(10, 5, dtype=torch.float32)
+        boxes2 = torch.rand(0, 5, dtype=torch.float32)
+        expected_ious = torch.zeros(10, 0, dtype=torch.float32)
+        ious = pairwise_iou_rotated(boxes1, boxes2)
+        self.assertTrue(torch.allclose(ious, expected_ious))
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_iou_0_dim_cuda(self):
+        boxes1 = torch.rand(0, 5, dtype=torch.float32)
+        boxes2 = torch.rand(10, 5, dtype=torch.float32)
+        expected_ious = torch.zeros(0, 10, dtype=torch.float32)
+        ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda())
+        self.assertTrue(torch.allclose(ious_cuda.cpu(), expected_ious))
+
+        boxes1 = torch.rand(10, 5, dtype=torch.float32)
+        boxes2 = torch.rand(0, 5, dtype=torch.float32)
+        expected_ious = torch.zeros(10, 0, dtype=torch.float32)
+        ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda())
+        self.assertTrue(torch.allclose(ious_cuda.cpu(), expected_ious))
+
+    def test_iou_half_overlap_cpu(self):
+        boxes1 = torch.tensor([[0.5, 0.5, 1.0, 1.0, 0.0]], dtype=torch.float32)
+        boxes2 = torch.tensor([[0.25, 0.5, 0.5, 1.0, 0.0]], dtype=torch.float32)
+        expected_ious = torch.tensor([[0.5]], dtype=torch.float32)
+        ious = pairwise_iou_rotated(boxes1, boxes2)
+        self.assertTrue(torch.allclose(ious, expected_ious))
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_iou_half_overlap_cuda(self):
+        boxes1 = torch.tensor([[0.5, 0.5, 1.0, 1.0, 0.0]], dtype=torch.float32)
+        boxes2 = torch.tensor([[0.25, 0.5, 0.5, 1.0, 0.0]], dtype=torch.float32)
+        expected_ious = torch.tensor([[0.5]], dtype=torch.float32)
+        ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda())
+        self.assertTrue(torch.allclose(ious_cuda.cpu(), expected_ious))
+
+    def test_iou_precision(self):
+        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
+            boxes1 = torch.tensor([[565, 565, 10, 10.0, 0]], dtype=torch.float32, device=device)
+            boxes2 = torch.tensor([[565, 565, 10, 8.3, 0]], dtype=torch.float32, device=device)
+            iou = 8.3 / 10.0
+            expected_ious = torch.tensor([[iou]], dtype=torch.float32)
+            ious = pairwise_iou_rotated(boxes1, boxes2)
+            self.assertTrue(torch.allclose(ious.cpu(), expected_ious))
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_iou_too_many_boxes_cuda(self):
+        s1, s2 = 5, 1289035
+        boxes1 = torch.zeros(s1, 5)
+        boxes2 = torch.zeros(s2, 5)
+        ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda())
+        self.assertTupleEqual(tuple(ious_cuda.shape), (s1, s2))
+
+    def test_iou_extreme(self):
+        # Cause floating point issues in cuda kernels (#1266)
+        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
+            boxes1 = torch.tensor([[160.0, 153.0, 230.0, 23.0, -37.0]], device=device)
+            boxes2 = torch.tensor(
+                [
+                    [
+                        -1.117407639806935e17,
+                        1.3858420478349148e18,
+                        1000.0000610351562,
+                        1000.0000610351562,
+                        1612.0,
+                    ]
+                ],
+                device=device,
+            )
+            ious = pairwise_iou_rotated(boxes1, boxes2)
+            self.assertTrue(ious.min() >= 0, ious)
+
+
+class TestRotatedBoxesStructure(unittest.TestCase):
+    def test_clip_area_0_degree(self):
+        for _ in range(50):
+            num_boxes = 100
+            boxes_5d = torch.zeros(num_boxes, 5)
+            boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
+            boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
+            boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500)
+            boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500)
+            # Convert from (x_ctr, y_ctr, w, h, 0) to  (x1, y1, x2, y2)
+            boxes_4d = torch.zeros(num_boxes, 4)
+            boxes_4d[:, 0] = boxes_5d[:, 0] - boxes_5d[:, 2] / 2.0
+            boxes_4d[:, 1] = boxes_5d[:, 1] - boxes_5d[:, 3] / 2.0
+            boxes_4d[:, 2] = boxes_5d[:, 0] + boxes_5d[:, 2] / 2.0
+            boxes_4d[:, 3] = boxes_5d[:, 1] + boxes_5d[:, 3] / 2.0
+
+            image_size = (500, 600)
+            test_boxes_4d = Boxes(boxes_4d)
+            test_boxes_5d = RotatedBoxes(boxes_5d)
+            # Before clip
+            areas_4d = test_boxes_4d.area()
+            areas_5d = test_boxes_5d.area()
+            self.assertTrue(torch.allclose(areas_4d, areas_5d, atol=1e-1, rtol=1e-5))
+            # After clip
+            test_boxes_4d.clip(image_size)
+            test_boxes_5d.clip(image_size)
+            areas_4d = test_boxes_4d.area()
+            areas_5d = test_boxes_5d.area()
+            self.assertTrue(torch.allclose(areas_4d, areas_5d, atol=1e-1, rtol=1e-5))
+
+    def test_clip_area_arbitrary_angle(self):
+        num_boxes = 100
+        boxes_5d = torch.zeros(num_boxes, 5)
+        boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
+        boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
+        boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500)
+        boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500)
+        boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800)
+        clip_angle_threshold = random.uniform(0, 180)
+
+        image_size = (500, 600)
+        test_boxes_5d = RotatedBoxes(boxes_5d)
+        # Before clip
+        areas_before = test_boxes_5d.area()
+        # After clip
+        test_boxes_5d.clip(image_size, clip_angle_threshold)
+        areas_diff = test_boxes_5d.area() - areas_before
+
+        # the areas should only decrease after clipping
+        self.assertTrue(torch.all(areas_diff <= 0))
+        # whenever the box is clipped (thus the area shrinks),
+        # the angle for the box must be within the clip_angle_threshold
+        # Note that the clip function will normalize the angle range
+        # to be within (-180, 180]
+        self.assertTrue(
+            torch.all(torch.abs(boxes_5d[:, 4][torch.where(areas_diff < 0)]) < clip_angle_threshold)
+        )
+
+    def test_normalize_angles(self):
+        # torch.manual_seed(0)
+        for _ in range(50):
+            num_boxes = 100
+            boxes_5d = torch.zeros(num_boxes, 5)
+            boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
+            boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
+            boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500)
+            boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500)
+            boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800)
+            rotated_boxes = RotatedBoxes(boxes_5d)
+            normalized_boxes = rotated_boxes.clone()
+            normalized_boxes.normalize_angles()
+            self.assertTrue(torch.all(normalized_boxes.tensor[:, 4] >= -180))
+            self.assertTrue(torch.all(normalized_boxes.tensor[:, 4] < 180))
+            # x, y, w, h should not change
+            self.assertTrue(torch.allclose(boxes_5d[:, :4], normalized_boxes.tensor[:, :4]))
+            # the cos/sin values of the angles should stay the same
+
+            self.assertTrue(
+                torch.allclose(
+                    torch.cos(boxes_5d[:, 4] * math.pi / 180),
+                    torch.cos(normalized_boxes.tensor[:, 4] * math.pi / 180),
+                    atol=1e-5,
+                )
+            )
+
+            self.assertTrue(
+                torch.allclose(
+                    torch.sin(boxes_5d[:, 4] * math.pi / 180),
+                    torch.sin(normalized_boxes.tensor[:, 4] * math.pi / 180),
+                    atol=1e-5,
+                )
+            )
+
+    def test_pairwise_iou_0_degree(self):
+        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
+            boxes1 = torch.tensor(
+                [[0.5, 0.5, 1.0, 1.0, 0.0], [0.5, 0.5, 1.0, 1.0, 0.0]],
+                dtype=torch.float32,
+                device=device,
+            )
+            boxes2 = torch.tensor(
+                [
+                    [0.5, 0.5, 1.0, 1.0, 0.0],
+                    [0.25, 0.5, 0.5, 1.0, 0.0],
+                    [0.5, 0.25, 1.0, 0.5, 0.0],
+                    [0.25, 0.25, 0.5, 0.5, 0.0],
+                    [0.75, 0.75, 0.5, 0.5, 0.0],
+                    [1.0, 1.0, 1.0, 1.0, 0.0],
+                ],
+                dtype=torch.float32,
+                device=device,
+            )
+            expected_ious = torch.tensor(
+                [
+                    [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
+                    [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)],
+                ],
+                dtype=torch.float32,
+                device=device,
+            )
+            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
+            self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_pairwise_iou_45_degrees(self):
+        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
+            boxes1 = torch.tensor(
+                [
+                    [1, 1, math.sqrt(2), math.sqrt(2), 45],
+                    [1, 1, 2 * math.sqrt(2), 2 * math.sqrt(2), -45],
+                ],
+                dtype=torch.float32,
+                device=device,
+            )
+            boxes2 = torch.tensor([[1, 1, 2, 2, 0]], dtype=torch.float32, device=device)
+            expected_ious = torch.tensor([[0.5], [0.5]], dtype=torch.float32, device=device)
+            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
+            self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_pairwise_iou_orthogonal(self):
+        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
+            boxes1 = torch.tensor([[5, 5, 10, 6, 55]], dtype=torch.float32, device=device)
+            boxes2 = torch.tensor([[5, 5, 10, 6, -35]], dtype=torch.float32, device=device)
+            iou = (6.0 * 6.0) / (6.0 * 6.0 + 4.0 * 6.0 + 4.0 * 6.0)
+            expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device)
+            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
+            self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_pairwise_iou_large_close_boxes(self):
+        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
+            boxes1 = torch.tensor(
+                [[299.500000, 417.370422, 600.000000, 364.259186, 27.1828]],
+                dtype=torch.float32,
+                device=device,
+            )
+            boxes2 = torch.tensor(
+                [[299.500000, 417.370422, 600.000000, 364.259155, 27.1828]],
+                dtype=torch.float32,
+                device=device,
+            )
+            iou = 364.259155 / 364.259186
+            expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device)
+            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
+            self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_pairwise_iou_many_boxes(self):
+        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
+            num_boxes1 = 100
+            num_boxes2 = 200
+            boxes1 = torch.stack(
+                [
+                    torch.tensor(
+                        [5 + 20 * i, 5 + 20 * i, 10, 10, 0], dtype=torch.float32, device=device
+                    )
+                    for i in range(num_boxes1)
+                ]
+            )
+            boxes2 = torch.stack(
+                [
+                    torch.tensor(
+                        [5 + 20 * i, 5 + 20 * i, 10, 1 + 9 * i / num_boxes2, 0],
+                        dtype=torch.float32,
+                        device=device,
+                    )
+                    for i in range(num_boxes2)
+                ]
+            )
+            expected_ious = torch.zeros(num_boxes1, num_boxes2, dtype=torch.float32, device=device)
+            for i in range(min(num_boxes1, num_boxes2)):
+                expected_ious[i][i] = (1 + 9 * i / num_boxes2) / 10.0
+            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
+            self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_pairwise_iou_issue1207_simplified(self):
+        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
+            # Simplified test case of D2-issue-1207
+            boxes1 = torch.tensor([[3, 3, 8, 2, -45.0]], device=device)
+            boxes2 = torch.tensor([[6, 0, 8, 2, -45.0]], device=device)
+            iou = 0.0
+            expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device)
+
+            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
+            self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_pairwise_iou_issue1207(self):
+        for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []:
+            # The original test case in D2-issue-1207
+            boxes1 = torch.tensor([[160.0, 153.0, 230.0, 23.0, -37.0]], device=device)
+            boxes2 = torch.tensor([[190.0, 127.0, 80.0, 21.0, -46.0]], device=device)
+
+            iou = 0.0
+            expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device)
+
+            ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2))
+            self.assertTrue(torch.allclose(ious, expected_ious))
+
+    def test_empty_cat(self):
+        x = RotatedBoxes.cat([])
+        self.assertTrue(x.tensor.shape, (0, 5))
+
+
+def benchmark_rotated_iou():
+    num_boxes1 = 200
+    num_boxes2 = 500
+    boxes1 = torch.stack(
+        [
+            torch.tensor([5 + 20 * i, 5 + 20 * i, 10, 10, 0], dtype=torch.float32)
+            for i in range(num_boxes1)
+        ]
+    )
+    boxes2 = torch.stack(
+        [
+            torch.tensor(
+                [5 + 20 * i, 5 + 20 * i, 10, 1 + 9 * i / num_boxes2, 0], dtype=torch.float32
+            )
+            for i in range(num_boxes2)
+        ]
+    )
+
+    def func(dev, n=1):
+        b1 = boxes1.to(device=dev)
+        b2 = boxes2.to(device=dev)
+
+        def bench():
+            for _ in range(n):
+                pairwise_iou_rotated(b1, b2)
+            if dev.type == "cuda":
+                torch.cuda.synchronize()
+
+        return bench
+
+    # only run it once per timed loop, since it's slow
+    args = [{"dev": torch.device("cpu"), "n": 1}]
+    if torch.cuda.is_available():
+        args.append({"dev": torch.device("cuda"), "n": 10})
+
+    benchmark(func, "rotated_iou", args, warmup_iters=3)
+
+
+if __name__ == "__main__":
+    unittest.main()
+    benchmark_rotated_iou()
diff --git a/tests/test_checkpoint.py b/tests/test_checkpoint.py
new file mode 100644
index 0000000..725b488
--- /dev/null
+++ b/tests/test_checkpoint.py
@@ -0,0 +1,48 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import unittest
+from collections import OrderedDict
+import torch
+from torch import nn
+
+from detectron2.checkpoint.c2_model_loading import align_and_update_state_dicts
+from detectron2.utils.logger import setup_logger
+
+
+class TestCheckpointer(unittest.TestCase):
+    def setUp(self):
+        setup_logger()
+
+    def create_complex_model(self):
+        m = nn.Module()
+        m.block1 = nn.Module()
+        m.block1.layer1 = nn.Linear(2, 3)
+        m.layer2 = nn.Linear(3, 2)
+        m.res = nn.Module()
+        m.res.layer2 = nn.Linear(3, 2)
+
+        state_dict = OrderedDict()
+        state_dict["layer1.weight"] = torch.rand(3, 2)
+        state_dict["layer1.bias"] = torch.rand(3)
+        state_dict["layer2.weight"] = torch.rand(2, 3)
+        state_dict["layer2.bias"] = torch.rand(2)
+        state_dict["res.layer2.weight"] = torch.rand(2, 3)
+        state_dict["res.layer2.bias"] = torch.rand(2)
+        return m, state_dict
+
+    def test_complex_model_loaded(self):
+        for add_data_parallel in [False, True]:
+            model, state_dict = self.create_complex_model()
+            if add_data_parallel:
+                model = nn.DataParallel(model)
+            model_sd = model.state_dict()
+
+            align_and_update_state_dicts(model_sd, state_dict)
+            for loaded, stored in zip(model_sd.values(), state_dict.values()):
+                # different tensor references
+                self.assertFalse(id(loaded) == id(stored))
+                # same content
+                self.assertTrue(loaded.equal(stored))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_config.py b/tests/test_config.py
new file mode 100644
index 0000000..650bdf2
--- /dev/null
+++ b/tests/test_config.py
@@ -0,0 +1,240 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+
+import os
+import tempfile
+import unittest
+import torch
+
+from detectron2.config import configurable, downgrade_config, get_cfg, upgrade_config
+from detectron2.layers import ShapeSpec
+
+_V0_CFG = """
+MODEL:
+  RPN_HEAD:
+    NAME: "TEST"
+VERSION: 0
+"""
+
+_V1_CFG = """
+MODEL:
+  WEIGHT: "/path/to/weight"
+"""
+
+
+class TestConfigVersioning(unittest.TestCase):
+    def test_upgrade_downgrade_consistency(self):
+        cfg = get_cfg()
+        # check that custom is preserved
+        cfg.USER_CUSTOM = 1
+
+        down = downgrade_config(cfg, to_version=0)
+        up = upgrade_config(down)
+        self.assertTrue(up == cfg)
+
+    def _merge_cfg_str(self, cfg, merge_str):
+        f = tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False)
+        try:
+            f.write(merge_str)
+            f.close()
+            cfg.merge_from_file(f.name)
+        finally:
+            os.remove(f.name)
+        return cfg
+
+    def test_auto_upgrade(self):
+        cfg = get_cfg()
+        latest_ver = cfg.VERSION
+        cfg.USER_CUSTOM = 1
+
+        self._merge_cfg_str(cfg, _V0_CFG)
+
+        self.assertEqual(cfg.MODEL.RPN.HEAD_NAME, "TEST")
+        self.assertEqual(cfg.VERSION, latest_ver)
+
+    def test_guess_v1(self):
+        cfg = get_cfg()
+        latest_ver = cfg.VERSION
+        self._merge_cfg_str(cfg, _V1_CFG)
+        self.assertEqual(cfg.VERSION, latest_ver)
+
+
+class _TestClassA(torch.nn.Module):
+    @configurable
+    def __init__(self, arg1, arg2, arg3=3):
+        super().__init__()
+        self.arg1 = arg1
+        self.arg2 = arg2
+        self.arg3 = arg3
+        assert arg1 == 1
+        assert arg2 == 2
+        assert arg3 == 3
+
+    @classmethod
+    def from_config(cls, cfg):
+        args = {"arg1": cfg.ARG1, "arg2": cfg.ARG2}
+        return args
+
+
+class _TestClassB(_TestClassA):
+    @configurable
+    def __init__(self, input_shape, arg1, arg2, arg3=3):
+        """
+        Doc of _TestClassB
+        """
+        assert input_shape == "shape"
+        super().__init__(arg1, arg2, arg3)
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):  # test extra positional arg in from_config
+        args = {"arg1": cfg.ARG1, "arg2": cfg.ARG2}
+        args["input_shape"] = input_shape
+        return args
+
+
+class _LegacySubClass(_TestClassB):
+    # an old subclass written in cfg style
+    def __init__(self, cfg, input_shape, arg4=4):
+        super().__init__(cfg, input_shape)
+        assert self.arg1 == 1
+        assert self.arg2 == 2
+        assert self.arg3 == 3
+
+
+class _NewSubClassNewInit(_TestClassB):
+    # test new subclass with a new __init__
+    @configurable
+    def __init__(self, input_shape, arg4=4, **kwargs):
+        super().__init__(input_shape, **kwargs)
+        assert self.arg1 == 1
+        assert self.arg2 == 2
+        assert self.arg3 == 3
+
+
+class _LegacySubClassNotCfg(_TestClassB):
+    # an old subclass written in cfg style, but argument is not called "cfg"
+    def __init__(self, config, input_shape):
+        super().__init__(config, input_shape)
+        assert self.arg1 == 1
+        assert self.arg2 == 2
+        assert self.arg3 == 3
+
+
+class _TestClassC(_TestClassB):
+    @classmethod
+    def from_config(cls, cfg, input_shape, **kwargs):  # test extra kwarg overwrite
+        args = {"arg1": cfg.ARG1, "arg2": cfg.ARG2}
+        args["input_shape"] = input_shape
+        args.update(kwargs)
+        return args
+
+
+class _TestClassD(_TestClassA):
+    @configurable
+    def __init__(self, input_shape: ShapeSpec, arg1: int, arg2, arg3=3):
+        assert input_shape == "shape"
+        super().__init__(arg1, arg2, arg3)
+
+    # _TestClassA.from_config does not have input_shape args.
+    # Test whether input_shape will be forwarded to __init__
+
+
+class TestConfigurable(unittest.TestCase):
+    def testInitWithArgs(self):
+        _ = _TestClassA(arg1=1, arg2=2, arg3=3)
+        _ = _TestClassB("shape", arg1=1, arg2=2)
+        _ = _TestClassC("shape", arg1=1, arg2=2)
+        _ = _TestClassD("shape", arg1=1, arg2=2, arg3=3)
+
+    def testPatchedAttr(self):
+        self.assertTrue("Doc" in _TestClassB.__init__.__doc__)
+        self.assertEqual(_TestClassD.__init__.__annotations__["arg1"], int)
+
+    def testInitWithCfg(self):
+        cfg = get_cfg()
+        cfg.ARG1 = 1
+        cfg.ARG2 = 2
+        cfg.ARG3 = 3
+        _ = _TestClassA(cfg)
+        _ = _TestClassB(cfg, input_shape="shape")
+        _ = _TestClassC(cfg, input_shape="shape")
+        _ = _TestClassD(cfg, input_shape="shape")
+        _ = _LegacySubClass(cfg, input_shape="shape")
+        _ = _NewSubClassNewInit(cfg, input_shape="shape")
+        _ = _LegacySubClassNotCfg(cfg, input_shape="shape")
+        with self.assertRaises(TypeError):
+            # disallow forwarding positional args to __init__ since it's prone to errors
+            _ = _TestClassD(cfg, "shape")
+
+        # call with kwargs instead
+        _ = _TestClassA(cfg=cfg)
+        _ = _TestClassB(cfg=cfg, input_shape="shape")
+        _ = _TestClassC(cfg=cfg, input_shape="shape")
+        _ = _TestClassD(cfg=cfg, input_shape="shape")
+        _ = _LegacySubClass(cfg=cfg, input_shape="shape")
+        _ = _NewSubClassNewInit(cfg=cfg, input_shape="shape")
+        _ = _LegacySubClassNotCfg(config=cfg, input_shape="shape")
+
+    def testInitWithCfgOverwrite(self):
+        cfg = get_cfg()
+        cfg.ARG1 = 1
+        cfg.ARG2 = 999  # wrong config
+        with self.assertRaises(AssertionError):
+            _ = _TestClassA(cfg, arg3=3)
+
+        # overwrite arg2 with correct config later:
+        _ = _TestClassA(cfg, arg2=2, arg3=3)
+        _ = _TestClassB(cfg, input_shape="shape", arg2=2, arg3=3)
+        _ = _TestClassC(cfg, input_shape="shape", arg2=2, arg3=3)
+        _ = _TestClassD(cfg, input_shape="shape", arg2=2, arg3=3)
+
+        # call with kwargs cfg=cfg instead
+        _ = _TestClassA(cfg=cfg, arg2=2, arg3=3)
+        _ = _TestClassB(cfg=cfg, input_shape="shape", arg2=2, arg3=3)
+        _ = _TestClassC(cfg=cfg, input_shape="shape", arg2=2, arg3=3)
+        _ = _TestClassD(cfg=cfg, input_shape="shape", arg2=2, arg3=3)
+
+    def testInitWithCfgWrongArgs(self):
+        cfg = get_cfg()
+        cfg.ARG1 = 1
+        cfg.ARG2 = 2
+        with self.assertRaises(TypeError):
+            _ = _TestClassB(cfg, "shape", not_exist=1)
+        with self.assertRaises(TypeError):
+            _ = _TestClassC(cfg, "shape", not_exist=1)
+        with self.assertRaises(TypeError):
+            _ = _TestClassD(cfg, "shape", not_exist=1)
+
+    def testBadClass(self):
+        class _BadClass1:
+            @configurable
+            def __init__(self, a=1, b=2):
+                pass
+
+        class _BadClass2:
+            @configurable
+            def __init__(self, a=1, b=2):
+                pass
+
+            def from_config(self, cfg):  # noqa
+                pass
+
+        class _BadClass3:
+            @configurable
+            def __init__(self, a=1, b=2):
+                pass
+
+            # bad name: must be cfg
+            @classmethod
+            def from_config(cls, config):  # noqa
+                pass
+
+        with self.assertRaises(AttributeError):
+            _ = _BadClass1(a=1)
+
+        with self.assertRaises(TypeError):
+            _ = _BadClass2(a=1)
+
+        with self.assertRaises(TypeError):
+            _ = _BadClass3(get_cfg())
diff --git a/tests/test_engine.py b/tests/test_engine.py
new file mode 100644
index 0000000..6fec40e
--- /dev/null
+++ b/tests/test_engine.py
@@ -0,0 +1,75 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import json
+import os
+import tempfile
+import time
+import unittest
+from mock import MagicMock
+import torch
+from torch import nn
+
+from detectron2.engine import SimpleTrainer, hooks
+from detectron2.utils.events import CommonMetricPrinter, JSONWriter
+
+
+class SimpleModel(nn.Module):
+    def __init__(self, sleep_sec=0):
+        super().__init__()
+        self.mod = nn.Linear(10, 20)
+        self.sleep_sec = sleep_sec
+
+    def forward(self, x):
+        if self.sleep_sec > 0:
+            time.sleep(self.sleep_sec)
+        return {"loss": x.sum() + sum([x.mean() for x in self.parameters()])}
+
+
+class TestTrainer(unittest.TestCase):
+    def _data_loader(self, device):
+        device = torch.device(device)
+        while True:
+            yield torch.rand(3, 3).to(device)
+
+    def test_simple_trainer(self, device="cpu"):
+        model = SimpleModel().to(device=device)
+        trainer = SimpleTrainer(
+            model, self._data_loader(device), torch.optim.SGD(model.parameters(), 0.1)
+        )
+        trainer.train(0, 10)
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_simple_trainer_cuda(self):
+        self.test_simple_trainer(device="cuda")
+
+    def test_writer_hooks(self):
+        model = SimpleModel(sleep_sec=0.1)
+        trainer = SimpleTrainer(
+            model, self._data_loader("cpu"), torch.optim.SGD(model.parameters(), 0.1)
+        )
+
+        max_iter = 50
+
+        with tempfile.TemporaryDirectory(prefix="detectron2_test") as d:
+            json_file = os.path.join(d, "metrics.json")
+            writers = [CommonMetricPrinter(max_iter), JSONWriter(json_file)]
+            logger_info = writers[0].logger.info = MagicMock()
+
+            trainer.register_hooks(
+                [hooks.EvalHook(0, lambda: {"metric": 100}), hooks.PeriodicWriter(writers)]
+            )
+            trainer.train(0, max_iter)
+
+            with open(json_file, "r") as f:
+                data = [json.loads(line.strip()) for line in f]
+                self.assertEqual([x["iteration"] for x in data], [19, 39, 49, 50])
+                # the eval metric is in the last line with iter 50
+                self.assertIn("metric", data[-1], "Eval metric must be in last line of JSON!")
+
+            # test logged messages from CommonMetricPrinter
+            all_logs = [str(x) for x in logger_info.call_args_list]
+            self.assertEqual(len(all_logs), 3)
+            for log, iter in zip(all_logs, [19, 39, 49]):
+                self.assertIn(f"iter: {iter}", log)
+
+            self.assertIn("eta: 0:00:00", all_logs[-1], "Last ETA must be 0!")
diff --git a/tests/test_events.py b/tests/test_events.py
new file mode 100644
index 0000000..24b4a48
--- /dev/null
+++ b/tests/test_events.py
@@ -0,0 +1,46 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import json
+import os
+import tempfile
+import unittest
+
+from detectron2.utils.events import EventStorage, JSONWriter
+
+
+class TestEventWriter(unittest.TestCase):
+    def testScalar(self):
+        with tempfile.TemporaryDirectory(
+            prefix="detectron2_tests"
+        ) as dir, EventStorage() as storage:
+            json_file = os.path.join(dir, "test.json")
+            writer = JSONWriter(json_file)
+            for k in range(60):
+                storage.put_scalar("key", k, smoothing_hint=False)
+                if (k + 1) % 20 == 0:
+                    writer.write()
+                storage.step()
+            writer.close()
+            with open(json_file) as f:
+                data = [json.loads(l) for l in f]
+                self.assertTrue([int(k["key"]) for k in data] == [19, 39, 59])
+
+    def testScalarMismatchedPeriod(self):
+        with tempfile.TemporaryDirectory(
+            prefix="detectron2_tests"
+        ) as dir, EventStorage() as storage:
+            json_file = os.path.join(dir, "test.json")
+
+            writer = JSONWriter(json_file)
+            for k in range(60):
+                if k % 17 == 0:  # write in a differnt period
+                    storage.put_scalar("key2", k, smoothing_hint=False)
+                storage.put_scalar("key", k, smoothing_hint=False)
+                if (k + 1) % 20 == 0:
+                    writer.write()
+                storage.step()
+            writer.close()
+            with open(json_file) as f:
+                data = [json.loads(l) for l in f]
+                self.assertTrue([int(k.get("key2", 0)) for k in data] == [17, 0, 34, 0, 51, 0])
+                self.assertTrue([int(k.get("key", 0)) for k in data] == [0, 19, 0, 39, 0, 59])
+                self.assertTrue([int(k["iteration"]) for k in data] == [17, 19, 34, 39, 51, 59])
diff --git a/tests/test_export_caffe2.py b/tests/test_export_caffe2.py
new file mode 100644
index 0000000..009b533
--- /dev/null
+++ b/tests/test_export_caffe2.py
@@ -0,0 +1,70 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# -*- coding: utf-8 -*-
+
+import copy
+import numpy as np
+import os
+import tempfile
+import unittest
+import cv2
+import torch
+from fvcore.common.file_io import PathManager
+
+from detectron2 import model_zoo
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import get_cfg
+from detectron2.data import DatasetCatalog
+from detectron2.modeling import build_model
+from detectron2.utils.logger import setup_logger
+
+
+@unittest.skipIf(os.environ.get("CIRCLECI"), "Require COCO data and model zoo.")
+class TestCaffe2Export(unittest.TestCase):
+    def setUp(self):
+        setup_logger()
+
+    def _test_model(self, config_path, device="cpu"):
+        # requires extra dependencies
+        from detectron2.export import Caffe2Model, add_export_config, export_caffe2_model
+
+        cfg = get_cfg()
+        cfg.merge_from_file(model_zoo.get_config_file(config_path))
+        cfg = add_export_config(cfg)
+        cfg.MODEL.DEVICE = device
+
+        inputs = [{"image": self._get_test_image()}]
+        model = build_model(cfg)
+        DetectionCheckpointer(model).load(model_zoo.get_checkpoint_url(config_path))
+        c2_model = export_caffe2_model(cfg, model, copy.deepcopy(inputs))
+
+        with tempfile.TemporaryDirectory(prefix="detectron2_unittest") as d:
+            c2_model.save_protobuf(d)
+            c2_model.save_graph(os.path.join(d, "test.svg"), inputs=copy.deepcopy(inputs))
+            c2_model = Caffe2Model.load_protobuf(d)
+        c2_model(inputs)[0]["instances"]
+
+    def _get_test_image(self):
+        try:
+            file_name = DatasetCatalog.get("coco_2017_train")[0]["file_name"]
+            assert PathManager.exists(file_name)
+        except Exception:
+            self.skipTest("COCO dataset not available.")
+
+        with PathManager.open(file_name, "rb") as f:
+            buf = f.read()
+        img = cv2.imdecode(np.frombuffer(buf, dtype=np.uint8), cv2.IMREAD_COLOR)
+        assert img is not None, file_name
+        return torch.from_numpy(img.transpose(2, 0, 1))
+
+    def testMaskRCNN(self):
+        self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
+
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def testMaskRCNNGPU(self):
+        self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml", device="cuda")
+
+    def testRetinaNet(self):
+        self._test_model("COCO-Detection/retinanet_R_50_FPN_3x.yaml")
+
+    def testPanopticFPN(self):
+        self._test_model("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml")
diff --git a/tests/test_model_analysis.py b/tests/test_model_analysis.py
new file mode 100644
index 0000000..0e3f84c
--- /dev/null
+++ b/tests/test_model_analysis.py
@@ -0,0 +1,58 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+
+
+import unittest
+import torch
+
+import detectron2.model_zoo as model_zoo
+from detectron2.config import get_cfg
+from detectron2.modeling import build_model
+from detectron2.utils.analysis import flop_count_operators, parameter_count
+
+
+def get_model_zoo(config_path):
+    """
+    Like model_zoo.get, but do not load any weights (even pretrained)
+    """
+    cfg_file = model_zoo.get_config_file(config_path)
+    cfg = get_cfg()
+    cfg.merge_from_file(cfg_file)
+    if not torch.cuda.is_available():
+        cfg.MODEL.DEVICE = "cpu"
+    return build_model(cfg)
+
+
+class RetinaNetTest(unittest.TestCase):
+    def setUp(self):
+        self.model = get_model_zoo("COCO-Detection/retinanet_R_50_FPN_1x.yaml")
+
+    def test_flop(self):
+        # RetinaNet supports flop-counting with random inputs
+        inputs = [{"image": torch.rand(3, 800, 800)}]
+        res = flop_count_operators(self.model, inputs)
+        self.assertTrue(int(res["conv"]), 146)  # 146B flops
+
+    def test_param_count(self):
+        res = parameter_count(self.model)
+        self.assertTrue(res[""], 37915572)
+        self.assertTrue(res["backbone"], 31452352)
+
+
+class FasterRCNNTest(unittest.TestCase):
+    def setUp(self):
+        self.model = get_model_zoo("COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml")
+
+    def test_flop(self):
+        # Faster R-CNN supports flop-counting with random inputs
+        inputs = [{"image": torch.rand(3, 800, 800)}]
+        res = flop_count_operators(self.model, inputs)
+
+        # This only checks flops for backbone & proposal generator
+        # Flops for box head is not conv, and depends on #proposals, which is
+        # almost 0 for random inputs.
+        self.assertTrue(int(res["conv"]), 117)
+
+    def test_param_count(self):
+        res = parameter_count(self.model)
+        self.assertTrue(res[""], 41699936)
+        self.assertTrue(res["backbone"], 26799296)
diff --git a/tests/test_model_zoo.py b/tests/test_model_zoo.py
new file mode 100644
index 0000000..2d16c71
--- /dev/null
+++ b/tests/test_model_zoo.py
@@ -0,0 +1,29 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import logging
+import unittest
+
+from detectron2 import model_zoo
+from detectron2.modeling import FPN, GeneralizedRCNN
+
+logger = logging.getLogger(__name__)
+
+
+class TestModelZoo(unittest.TestCase):
+    def test_get_returns_model(self):
+        model = model_zoo.get("Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml", trained=False)
+        self.assertIsInstance(model, GeneralizedRCNN)
+        self.assertIsInstance(model.backbone, FPN)
+
+    def test_get_invalid_model(self):
+        self.assertRaises(RuntimeError, model_zoo.get, "Invalid/config.yaml")
+
+    def test_get_url(self):
+        url = model_zoo.get_checkpoint_url("Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml")
+        self.assertEqual(
+            url,
+            "https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn/138602908/model_final_01ca85.pkl",  # noqa
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_packaging.py b/tests/test_packaging.py
new file mode 100644
index 0000000..56c2834
--- /dev/null
+++ b/tests/test_packaging.py
@@ -0,0 +1,24 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import unittest
+
+from detectron2.utils.collect_env import collect_env_info
+
+
+class TestProjects(unittest.TestCase):
+    def test_import(self):
+        from detectron2.projects import point_rend
+
+        _ = point_rend.add_pointrend_config
+
+        import detectron2.projects.deeplab as deeplab
+
+        _ = deeplab.add_deeplab_config
+
+        # import detectron2.projects.panoptic_deeplab as panoptic_deeplab
+
+        # _ = panoptic_deeplab.add_panoptic_deeplab_config
+
+
+class TestCollectEnv(unittest.TestCase):
+    def test(self):
+        _ = collect_env_info()
diff --git a/tests/test_visualizer.py b/tests/test_visualizer.py
new file mode 100644
index 0000000..ddb9872
--- /dev/null
+++ b/tests/test_visualizer.py
@@ -0,0 +1,202 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+import numpy as np
+import os
+import tempfile
+import unittest
+import cv2
+import torch
+
+from detectron2.data import MetadataCatalog
+from detectron2.structures import BoxMode, Instances, RotatedBoxes
+from detectron2.utils.visualizer import ColorMode, Visualizer
+
+
+class TestVisualizer(unittest.TestCase):
+    def _random_data(self):
+        H, W = 100, 100
+        N = 10
+        img = np.random.rand(H, W, 3) * 255
+        boxxy = np.random.rand(N, 2) * (H // 2)
+        boxes = np.concatenate((boxxy, boxxy + H // 2), axis=1)
+
+        def _rand_poly():
+            return np.random.rand(3, 2).flatten() * H
+
+        polygons = [[_rand_poly() for _ in range(np.random.randint(1, 5))] for _ in range(N)]
+
+        mask = np.zeros_like(img[:, :, 0], dtype=np.bool)
+        mask[:40, 10:20] = 1
+
+        labels = [str(i) for i in range(N)]
+        return img, boxes, labels, polygons, [mask] * N
+
+    @property
+    def metadata(self):
+        return MetadataCatalog.get("coco_2017_train")
+
+    def test_draw_dataset_dict(self):
+        img = np.random.rand(512, 512, 3) * 255
+        dic = {
+            "annotations": [
+                {
+                    "bbox": [
+                        368.9946492271106,
+                        330.891438763377,
+                        13.148537455410235,
+                        13.644708680142685,
+                    ],
+                    "bbox_mode": BoxMode.XYWH_ABS,
+                    "category_id": 0,
+                    "iscrowd": 1,
+                    "segmentation": {
+                        "counts": "_jh52m?2N2N2N2O100O10O001N1O2MceP2",
+                        "size": [512, 512],
+                    },
+                }
+            ],
+            "height": 512,
+            "image_id": 1,
+            "width": 512,
+        }
+        v = Visualizer(img, self.metadata)
+        v.draw_dataset_dict(dic)
+
+    def test_overlay_instances(self):
+        img, boxes, labels, polygons, masks = self._random_data()
+
+        v = Visualizer(img, self.metadata)
+        output = v.overlay_instances(masks=polygons, boxes=boxes, labels=labels).get_image()
+        self.assertEqual(output.shape, img.shape)
+
+        # Test 2x scaling
+        v = Visualizer(img, self.metadata, scale=2.0)
+        output = v.overlay_instances(masks=polygons, boxes=boxes, labels=labels).get_image()
+        self.assertEqual(output.shape[0], img.shape[0] * 2)
+
+        # Test overlay masks
+        v = Visualizer(img, self.metadata)
+        output = v.overlay_instances(masks=masks, boxes=boxes, labels=labels).get_image()
+        self.assertEqual(output.shape, img.shape)
+
+    def test_overlay_instances_no_boxes(self):
+        img, boxes, labels, polygons, _ = self._random_data()
+        v = Visualizer(img, self.metadata)
+        v.overlay_instances(masks=polygons, boxes=None, labels=labels).get_image()
+
+    def test_draw_instance_predictions(self):
+        img, boxes, _, _, masks = self._random_data()
+        num_inst = len(boxes)
+        inst = Instances((img.shape[0], img.shape[1]))
+        inst.pred_classes = torch.randint(0, 80, size=(num_inst,))
+        inst.scores = torch.rand(num_inst)
+        inst.pred_boxes = torch.from_numpy(boxes)
+        inst.pred_masks = torch.from_numpy(np.asarray(masks))
+
+        v = Visualizer(img, self.metadata)
+        v.draw_instance_predictions(inst)
+
+    def test_BWmode_nomask(self):
+        img, boxes, _, _, masks = self._random_data()
+        num_inst = len(boxes)
+        inst = Instances((img.shape[0], img.shape[1]))
+        inst.pred_classes = torch.randint(0, 80, size=(num_inst,))
+        inst.scores = torch.rand(num_inst)
+        inst.pred_boxes = torch.from_numpy(boxes)
+
+        v = Visualizer(img, self.metadata, instance_mode=ColorMode.IMAGE_BW)
+        v.draw_instance_predictions(inst)
+
+    def test_draw_empty_mask_predictions(self):
+        img, boxes, _, _, masks = self._random_data()
+        num_inst = len(boxes)
+        inst = Instances((img.shape[0], img.shape[1]))
+        inst.pred_classes = torch.randint(0, 80, size=(num_inst,))
+        inst.scores = torch.rand(num_inst)
+        inst.pred_boxes = torch.from_numpy(boxes)
+        inst.pred_masks = torch.from_numpy(np.zeros_like(np.asarray(masks)))
+
+        v = Visualizer(img, self.metadata)
+        v.draw_instance_predictions(inst)
+
+    def test_correct_output_shape(self):
+        img = np.random.rand(928, 928, 3) * 255
+        v = Visualizer(img, self.metadata)
+        out = v.output.get_image()
+        self.assertEqual(out.shape, img.shape)
+
+    def test_overlay_rotated_instances(self):
+        H, W = 100, 150
+        img = np.random.rand(H, W, 3) * 255
+        num_boxes = 50
+        boxes_5d = torch.zeros(num_boxes, 5)
+        boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-0.1 * W, 1.1 * W)
+        boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-0.1 * H, 1.1 * H)
+        boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, max(W, H))
+        boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, max(W, H))
+        boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800)
+        rotated_boxes = RotatedBoxes(boxes_5d)
+        labels = [str(i) for i in range(num_boxes)]
+
+        v = Visualizer(img, self.metadata)
+        output = v.overlay_instances(boxes=rotated_boxes, labels=labels).get_image()
+        self.assertEqual(output.shape, img.shape)
+
+    def test_draw_no_metadata(self):
+        img, boxes, _, _, masks = self._random_data()
+        num_inst = len(boxes)
+        inst = Instances((img.shape[0], img.shape[1]))
+        inst.pred_classes = torch.randint(0, 80, size=(num_inst,))
+        inst.scores = torch.rand(num_inst)
+        inst.pred_boxes = torch.from_numpy(boxes)
+        inst.pred_masks = torch.from_numpy(np.asarray(masks))
+
+        v = Visualizer(img, MetadataCatalog.get("asdfasdf"))
+        v.draw_instance_predictions(inst)
+
+    def test_draw_binary_mask(self):
+        img, boxes, _, _, masks = self._random_data()
+        img[:, :, 0] = 0  # remove red color
+        mask = masks[0]
+        mask_with_hole = np.zeros_like(mask).astype("uint8")
+        mask_with_hole = cv2.rectangle(mask_with_hole, (10, 10), (50, 50), 1, 5)
+
+        for m in [mask, mask_with_hole]:
+            for save in [True, False]:
+                v = Visualizer(img)
+                o = v.draw_binary_mask(m, color="red", text="test")
+                if save:
+                    with tempfile.TemporaryDirectory(prefix="detectron2_viz") as d:
+                        path = os.path.join(d, "output.png")
+                        o.save(path)
+                        o = cv2.imread(path)[:, :, ::-1]
+                else:
+                    o = o.get_image().astype("float32")
+                    # red color is drawn on the image
+                self.assertTrue(o[:, :, 0].sum() > 0)
+
+    def test_border(self):
+        H, W = 200, 200
+        img = np.zeros((H, W, 3))
+        img[:, :, 0] = 255.0
+        v = Visualizer(img, scale=3)
+
+        mask = np.zeros((H, W))
+        mask[:, 100:150] = 1
+        # create a hole, to trigger imshow
+        mask = cv2.rectangle(mask, (110, 110), (130, 130), 0, thickness=-1)
+        output = v.draw_binary_mask(mask, color="blue")
+        output = output.get_image()[:, :, ::-1]
+
+        first_row = {tuple(x.tolist()) for x in output[0]}
+        last_row = {tuple(x.tolist()) for x in output[-1]}
+        # check quantization / off-by-1 error: the first and last row must have two colors
+        self.assertEqual(len(last_row), 2)
+        self.assertEqual(len(first_row), 2)
+        self.assertIn((0, 0, 255), last_row)
+        self.assertIn((0, 0, 255), first_row)
+
+
+if __name__ == "__main__":
+    unittest.main()