End2end (#61)

* export end2end onnx model * fixbug * add web demo (#58) * Update README.md * main code update yolov7-tiny deploy cfg * main code update yolov7-tiny training cfg * main code @liguagua752109150 https://github.com/WongKinYiu/yolov7/issues/33#issuecomment-1178669212 * main code @albertfaromatics https://github.com/WongKinYiu/yolov7/issues/35#issuecomment-1178800685 * main code update link * main code add custom hyp * main code update default activation function * main code update path * main figure add more tasks * main code update readme * main code update reparameterization * Update README.md * main code update readme * main code update aux training * main code update aux training * main code update aux training * main figure update yolov7 prediction * main code update readme * main code rename * main code rename * main code rename * main code rename * main code update readme * main code update visualization * main code fix gain for train_aux * main code update loss * main code update instance segmentation demo * main code update keypoint detection demo * main code update pose demo * main code update pose * main code update pose * main code update pose * main code update pose * main code update trace * Update README.md * main code fix ciou * main code fix nan of aux training https://github.com/WongKinYiu/yolov7/issues/250#issue-1312356380 @hudingding * support onnx to tensorrt convert (#114) * fuse IDetect (#148) * Fixes #199 (#203) * minor fix * resolve conflict * resolve conflict * resolve conflict * resolve conflict * resolve conflict * resolve * resolve * resolve * resolve Co-authored-by: AK391 <81195143+AK391@users.noreply.github.com> Co-authored-by: Alexey <AlexeyAB@users.noreply.github.com> Co-authored-by: Kin-Yiu, Wong <102582011@cc.ncu.edu.tw> Co-authored-by: linghu8812 <36389436+linghu8812@users.noreply.github.com> Co-authored-by: Alexander <84590713+SashaAlderson@users.noreply.github.com> Co-authored-by: Ben Raymond <ben@theraymonds.org> Co-authored-by: AlexeyAB84 <alexeyab84@gmail.com>
2022-07-22 21:24:13 +08:00 · 2022-07-22 21:24:13 +08:00 · afdc86f519
parent 2596994f39
commit afdc86f519
12 changed files with 283 additions and 17 deletions
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@ -0,0 +1,3 @@
+# 默认忽略的文件
+/shelf/
+/workspace.xml
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
@ -0,0 +1,46 @@
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="18">
+            <item index="0" class="java.lang.String" itemvalue="onnxruntime" />
+            <item index="1" class="java.lang.String" itemvalue="onnx-simplifier" />
+            <item index="2" class="java.lang.String" itemvalue="scipy" />
+            <item index="3" class="java.lang.String" itemvalue="thop" />
+            <item index="4" class="java.lang.String" itemvalue="opencv-python" />
+            <item index="5" class="java.lang.String" itemvalue="torch" />
+            <item index="6" class="java.lang.String" itemvalue="numpy" />
+            <item index="7" class="java.lang.String" itemvalue="torchvision" />
+            <item index="8" class="java.lang.String" itemvalue="tqdm" />
+            <item index="9" class="java.lang.String" itemvalue="pandas" />
+            <item index="10" class="java.lang.String" itemvalue="tensorboard" />
+            <item index="11" class="java.lang.String" itemvalue="seaborn" />
+            <item index="12" class="java.lang.String" itemvalue="matplotlib" />
+            <item index="13" class="java.lang.String" itemvalue="Cython" />
+            <item index="14" class="java.lang.String" itemvalue="pycocotools" />
+            <item index="15" class="java.lang.String" itemvalue="h5py" />
+            <item index="16" class="java.lang.String" itemvalue="opencv_python" />
+            <item index="17" class="java.lang.String" itemvalue="Pillow" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <option name="ignoredErrors">
+        <list>
+          <option value="N806" />
+          <option value="N801" />
+        </list>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredIdentifiers">
+        <list>
+          <option value="tkinter.*" />
+        </list>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
+</project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/yolov7.iml" filepath="$PROJECT_DIR$/.idea/yolov7.iml" />
+    </modules>
+  </component>
+</project>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
--- a/.idea/yolov7.iml
+++ b/.idea/yolov7.iml
@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+</module>
--- a/README.md
+++ b/README.md
@ -151,6 +151,13 @@ python detect.py --weights yolov7.pt --conf 0.25 --img-size 640 --source inferen
    </a>
 </div>

+
+## Export
+Use the args `--include-nms` can to export end to end onnx model which include the `EfficientNMS`. 
+```shell
+python models/export.py --weights yolov7.pt --grid  --include-nms
+```
+
 ## Citation

 ```
--- a/export.py
+++ b/export.py
@ -12,6 +12,7 @@ from models.experimental import attempt_load
 from utils.activations import Hardswish, SiLU
 from utils.general import set_logging, check_img_size
 from utils.torch_utils import select_device
+from utils.add_nms import RegisterNMS

 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
@ -22,6 +23,7 @@ if __name__ == '__main__':
    parser.add_argument('--grid', action='store_true', help='export Detect() layer grid')
    parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--simplify', action='store_true', help='simplify onnx model')
+    parser.add_argument('--include-nms', action='store_true', help='export end2end onnx')
    opt = parser.parse_args()
    opt.img_size *= 2 if len(opt.img_size) == 1 else 1  # expand
    print(opt)
@ -52,7 +54,9 @@ if __name__ == '__main__':
        #     m.forward = m.forward_export  # assign forward (optional)
    model.model[-1].export = not opt.grid  # set Detect() layer grid export
    y = model(img)  # dry run
-
+    if opt.include_nms:
+        model.model[-1].include_nms = True
+        y = None
    # TorchScript export
    try:
        print('\nStarting TorchScript export with torch %s...' % torch.__version__)
@ -75,9 +79,16 @@ if __name__ == '__main__':
                          dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'},  # size(1,3,640,640)
                                        'output': {0: 'batch', 2: 'y', 3: 'x'}} if opt.dynamic else None)

+        if opt.include_nms:
+            print('Registering NMS plugin...')
+            mo = RegisterNMS(f)
+            mo.register_nms()
+            mo.save(f)
+        else:
            # Checks
            onnx_model = onnx.load(f)  # load onnx model
            onnx.checker.check_model(onnx_model)  # check onnx model
+            # print(onnx.helper.printable_graph(onnx_model.graph))  # print a human readable model

            # # Metadata
            # d = {'stride': int(max(model.stride))}
@ -95,11 +106,9 @@ if __name__ == '__main__':
                assert check, 'assert check failed'
            except Exception as e:
                print(f'Simplifier failure: {e}')
-        # print(onnx.helper.printable_graph(onnx_model.graph))  # print a human readable model
        print('ONNX export success, saved as %s' % f)
    except Exception as e:
        print('ONNX export failure: %s' % e)
-
    # CoreML export
    try:
        import coremltools as ct
--- a/models/common.py
+++ b/models/common.py
@ -236,7 +236,7 @@ class Res(nn.Module):
 class ResX(Res):
    # ResNet bottleneck
    def __init__(self, c1, c2, shortcut=True, g=32, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
-        super().__init__(c1, c2, shortcu, g, e)
+        super().__init__(c1, c2, shortcut, g, e)
        c_ = int(c2 * e)  # hidden channels


--- a/models/yolo.py
+++ b/models/yolo.py
@ -5,7 +5,7 @@ from copy import deepcopy

 sys.path.append('./')  # to run '$ python *.py' files in subdirectories
 logger = logging.getLogger(__name__)
-
+import torch
 from models.common import *
 from models.experimental import *
 from utils.autoanchor import check_anchor_order
@ -23,7 +23,7 @@ except ImportError:
 class Detect(nn.Module):
    stride = None  # strides computed during build
    export = False  # onnx export
-
+    include_nms = False 
    def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
        super(Detect, self).__init__()
        self.nc = nc  # number of classes
@ -48,7 +48,6 @@ class Detect(nn.Module):
            if not self.training:  # inference
                if self.grid[i].shape[2:4] != x[i].shape[2:4]:
                    self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
-
                y = x[i].sigmoid()
                if not torch.onnx.is_in_onnx_export():
                    y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
@ -59,13 +58,28 @@ class Detect(nn.Module):
                    y = torch.cat((xy, wh, y[..., 4:]), -1)
                z.append(y.view(bs, -1, self.no))

-        return x if self.training else (torch.cat(z, 1), x)
+        if self.include_nms:
+            z = self.convert(z)
+
+        return x if self.training else (z, ) if self.include_nms else (torch.cat(z, 1), x)

    @staticmethod
    def _make_grid(nx=20, ny=20):
        yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
        return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()

+    def convert(self, z):
+        z = torch.cat(z, 1)
+        box = z[:, :, :4]
+        conf = z[:, :, 4:5]
+        score = z[:, :, 5:]
+        score *= conf
+        convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]],
+                                           dtype=torch.float32,
+                                           device=z.device)
+        box @= convert_matrix                          
+        return (box, score)
+

 class IDetect(nn.Module):
    stride = None  # strides computed during build
--- a/utils/add_nms.py
+++ b/utils/add_nms.py
@ -0,0 +1,151 @@
+import numpy as np
+import onnx
+from onnx import shape_inference
+import onnx_graphsurgeon as gs
+import logging
+
+LOGGER = logging.getLogger(__name__)
+
+class RegisterNMS(object):
+    def __init__(
+        self,
+        onnx_model_path: str,
+        precision: str = "fp32",
+    ):
+
+        self.graph = gs.import_onnx(onnx.load(onnx_model_path))
+        assert self.graph
+        LOGGER.info("ONNX graph created successfully")
+        # Fold constants via ONNX-GS that PyTorch2ONNX may have missed
+        self.graph.fold_constants()
+        self.precision = precision
+        self.batch_size = 1
+    def infer(self):
+        """
+        Sanitize the graph by cleaning any unconnected nodes, do a topological resort,
+        and fold constant inputs values. When possible, run shape inference on the
+        ONNX graph to determine tensor shapes.
+        """
+        for _ in range(3):
+            count_before = len(self.graph.nodes)
+
+            self.graph.cleanup().toposort()
+            try:
+                for node in self.graph.nodes:
+                    for o in node.outputs:
+                        o.shape = None
+                model = gs.export_onnx(self.graph)
+                model = shape_inference.infer_shapes(model)
+                self.graph = gs.import_onnx(model)
+            except Exception as e:
+                LOGGER.info(f"Shape inference could not be performed at this time:\n{e}")
+            try:
+                self.graph.fold_constants(fold_shapes=True)
+            except TypeError as e:
+                LOGGER.error(
+                    "This version of ONNX GraphSurgeon does not support folding shapes, "
+                    f"please upgrade your onnx_graphsurgeon module. Error:\n{e}"
+                )
+                raise
+
+            count_after = len(self.graph.nodes)
+            if count_before == count_after:
+                # No new folding occurred in this iteration, so we can stop for now.
+                break
+
+    def save(self, output_path):
+        """
+        Save the ONNX model to the given location.
+        Args:
+            output_path: Path pointing to the location where to write
+                out the updated ONNX model.
+        """
+        self.graph.cleanup().toposort()
+        model = gs.export_onnx(self.graph)
+        onnx.save(model, output_path)
+        LOGGER.info(f"Saved ONNX model to {output_path}")
+
+    def register_nms(
+        self,
+        *,
+        score_thresh: float = 0.25,
+        nms_thresh: float = 0.45,
+        detections_per_img: int = 100,
+    ):
+        """
+        Register the ``EfficientNMS_TRT`` plugin node.
+        NMS expects these shapes for its input tensors:
+            - box_net: [batch_size, number_boxes, 4]
+            - class_net: [batch_size, number_boxes, number_labels]
+        Args:
+            score_thresh (float): The scalar threshold for score (low scoring boxes are removed).
+            nms_thresh (float): The scalar threshold for IOU (new boxes that have high IOU
+                overlap with previously selected boxes are removed).
+            detections_per_img (int): Number of best detections to keep after NMS.
+        """
+
+        self.infer()
+        # Find the concat node at the end of the network
+        op_inputs = self.graph.outputs
+        op = "EfficientNMS_TRT"
+        attrs = {
+            "plugin_version": "1",
+            "background_class": -1,  # no background class
+            "max_output_boxes": detections_per_img,
+            "score_threshold": score_thresh,
+            "iou_threshold": nms_thresh,
+            "score_activation": False,
+            "box_coding": 0,
+        }
+
+        if self.precision == "fp32":
+            dtype_output = np.float32
+        elif self.precision == "fp16":
+            dtype_output = np.float16
+        else:
+            raise NotImplementedError(f"Currently not supports precision: {self.precision}")
+
+        # NMS Outputs
+        output_num_detections = gs.Variable(
+            name="num_detections",
+            dtype=np.int32,
+            shape=[self.batch_size, 1],
+        )  # A scalar indicating the number of valid detections per batch image.
+        output_boxes = gs.Variable(
+            name="detection_boxes",
+            dtype=dtype_output,
+            shape=[self.batch_size, detections_per_img, 4],
+        )
+        output_scores = gs.Variable(
+            name="detection_scores",
+            dtype=dtype_output,
+            shape=[self.batch_size, detections_per_img],
+        )
+        output_labels = gs.Variable(
+            name="detection_classes",
+            dtype=np.int32,
+            shape=[self.batch_size, detections_per_img],
+        )
+
+        op_outputs = [output_num_detections, output_boxes, output_scores, output_labels]
+
+        # Create the NMS Plugin node with the selected inputs. The outputs of the node will also
+        # become the final outputs of the graph.
+        self.graph.layer(op=op, name="batched_nms", inputs=op_inputs, outputs=op_outputs, attrs=attrs)
+        LOGGER.info(f"Created NMS plugin '{op}' with attributes: {attrs}")
+
+        self.graph.outputs = op_outputs
+
+        self.infer()
+
+    def save(self, output_path):
+        """
+        Save the ONNX model to the given location.
+        Args:
+            output_path: Path pointing to the location where to write
+                out the updated ONNX model.
+        """
+        self.graph.cleanup().toposort()
+        model = gs.export_onnx(self.graph)
+        onnx.save(model, output_path)
+        LOGGER.info(f"Saved ONNX model to {output_path}")