Add NMS to CoreML exports (#11361)

* Add NMS to CoreML exports * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2025-06-03 14:49:29 +08:00 · 2023-04-14 18:11:15 +02:00 · 2023-04-14 18:11:15 +02:00 · a66fa8314c
commit a66fa8314c
parent 60e29e2d86
1 changed files with 148 additions and 2 deletions
--- a/export.py
+++ b/export.py
@ -77,6 +77,25 @@ from utils.torch_utils import select_device, smart_inference_mode
 MACOS = platform.system() == 'Darwin'  # macOS environment
 class iOSModel(torch.nn.Module):
    def __init__(self, model, im):
        super().__init__()
        b, c, h, w = im.shape  # batch, channel, height, width
        self.model = model
        self.nc = model.nc  # number of classes
        if w == h:
            self.normalize = 1. / w
        else:
            self.normalize = torch.tensor([1. / w, 1. / h, 1. / w, 1. / h])  # broadcast (slower, smaller)
            # np = model(im)[0].shape[1]  # number of points
            # self.normalize = torch.tensor([1. / w, 1. / h, 1. / w, 1. / h]).expand(np, 4)  # explicit (faster, larger)
    def forward(self, x):
        xywh, conf, cls = self.model(x)[0].squeeze().split((4, 1, self.nc), 1)
        return cls * conf, xywh * self.normalize  # confidence (3780, 80), coordinates (3780, 4)
 def export_formats():
    # YOLOv5 export formats
    x = [
@ -223,7 +242,7 @@ def export_paddle(model, im, file, metadata, prefix=colorstr('PaddlePaddle:')):
@try_export
-def export_coreml(model, im, file, int8, half, prefix=colorstr('CoreML:')):
+def export_coreml(model, im, file, int8, half, nms, prefix=colorstr('CoreML:')):
    # YOLOv5 CoreML export
    check_requirements('coremltools')
    import coremltools as ct
@ -231,6 +250,8 @@ def export_coreml(model, im, file, int8, half, prefix=colorstr('CoreML:')):
    LOGGER.info(f'\n{prefix} starting export with coremltools {ct.__version__}...')
    f = file.with_suffix('.mlmodel')
    if nms:
        model = iOSModel(model, im)
    ts = torch.jit.trace(model, im, strict=False)  # TorchScript model
    ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255, bias=[0, 0, 0])])
    bits, mode = (8, 'kmeans_lut') if int8 else (16, 'linear') if half else (32, None)
@ -506,6 +527,129 @@ def add_tflite_metadata(file, metadata, num_outputs):
        tmp_file.unlink()
 def pipeline_coreml(model, im, file, names, y, prefix=colorstr('CoreML Pipeline:')):
    # YOLOv5 CoreML pipeline
    import coremltools as ct
    from PIL import Image
    print(f'{prefix} starting pipeline with coremltools {ct.__version__}...')
    batch_size, ch, h, w = list(im.shape)  # BCHW
    t = time.time()
    # Output shapes
    spec = model.get_spec()
    out0, out1 = iter(spec.description.output)
    if platform.system() == 'Darwin':
        img = Image.new('RGB', (w, h))  # img(192 width, 320 height)
        # img = torch.zeros((*opt.img_size, 3)).numpy()  # img size(320,192,3) iDetection
        out = model.predict({'image': img})
        out0_shape, out1_shape = out[out0.name].shape, out[out1.name].shape
    else:  # linux and windows can not run model.predict(), get sizes from pytorch output y
        s = tuple(y[0].shape)
        out0_shape, out1_shape = (s[1], s[2] - 5), (s[1], 4)  # (3780, 80), (3780, 4)
    # Checks
    nx, ny = spec.description.input[0].type.imageType.width, spec.description.input[0].type.imageType.height
    na, nc = out0_shape
    # na, nc = out0.type.multiArrayType.shape  # number anchors, classes
    assert len(names) == nc, f'{len(names)} names found for nc={nc}'  # check
    # Define output shapes (missing)
    out0.type.multiArrayType.shape[:] = out0_shape  # (3780, 80)
    out1.type.multiArrayType.shape[:] = out1_shape  # (3780, 4)
    # spec.neuralNetwork.preprocessing[0].featureName = '0'
    # Flexible input shapes
    # from coremltools.models.neural_network import flexible_shape_utils
    # s = [] # shapes
    # s.append(flexible_shape_utils.NeuralNetworkImageSize(320, 192))
    # s.append(flexible_shape_utils.NeuralNetworkImageSize(640, 384))  # (height, width)
    # flexible_shape_utils.add_enumerated_image_sizes(spec, feature_name='image', sizes=s)
    # r = flexible_shape_utils.NeuralNetworkImageSizeRange()  # shape ranges
    # r.add_height_range((192, 640))
    # r.add_width_range((192, 640))
    # flexible_shape_utils.update_image_size_range(spec, feature_name='image', size_range=r)
    # Print
    print(spec.description)
    # Model from spec
    model = ct.models.MLModel(spec)
    # 3. Create NMS protobuf
    nms_spec = ct.proto.Model_pb2.Model()
    nms_spec.specificationVersion = 5
    for i in range(2):
        decoder_output = model._spec.description.output[i].SerializeToString()
        nms_spec.description.input.add()
        nms_spec.description.input[i].ParseFromString(decoder_output)
        nms_spec.description.output.add()
        nms_spec.description.output[i].ParseFromString(decoder_output)
    nms_spec.description.output[0].name = 'confidence'
    nms_spec.description.output[1].name = 'coordinates'
    output_sizes = [nc, 4]
    for i in range(2):
        ma_type = nms_spec.description.output[i].type.multiArrayType
        ma_type.shapeRange.sizeRanges.add()
        ma_type.shapeRange.sizeRanges[0].lowerBound = 0
        ma_type.shapeRange.sizeRanges[0].upperBound = -1
        ma_type.shapeRange.sizeRanges.add()
        ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i]
        ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i]
        del ma_type.shape[:]
    nms = nms_spec.nonMaximumSuppression
    nms.confidenceInputFeatureName = out0.name  # 1x507x80
    nms.coordinatesInputFeatureName = out1.name  # 1x507x4
    nms.confidenceOutputFeatureName = 'confidence'
    nms.coordinatesOutputFeatureName = 'coordinates'
    nms.iouThresholdInputFeatureName = 'iouThreshold'
    nms.confidenceThresholdInputFeatureName = 'confidenceThreshold'
    nms.iouThreshold = 0.45
    nms.confidenceThreshold = 0.25
    nms.pickTop.perClass = True
    nms.stringClassLabels.vector.extend(names.values())
    nms_model = ct.models.MLModel(nms_spec)
    # 4. Pipeline models together
    pipeline = ct.models.pipeline.Pipeline(input_features=[('image', ct.models.datatypes.Array(3, ny, nx)),
                                                           ('iouThreshold', ct.models.datatypes.Double()),
                                                           ('confidenceThreshold', ct.models.datatypes.Double())],
                                           output_features=['confidence', 'coordinates'])
    pipeline.add_model(model)
    pipeline.add_model(nms_model)
    # Correct datatypes
    pipeline.spec.description.input[0].ParseFromString(model._spec.description.input[0].SerializeToString())
    pipeline.spec.description.output[0].ParseFromString(nms_model._spec.description.output[0].SerializeToString())
    pipeline.spec.description.output[1].ParseFromString(nms_model._spec.description.output[1].SerializeToString())
    # Update metadata
    pipeline.spec.specificationVersion = 5
    pipeline.spec.description.metadata.versionString = 'https://github.com/ultralytics/yolov5'
    pipeline.spec.description.metadata.shortDescription = 'https://github.com/ultralytics/yolov5'
    pipeline.spec.description.metadata.author = 'glenn.jocher@ultralytics.com'
    pipeline.spec.description.metadata.license = 'https://github.com/ultralytics/yolov5/blob/master/LICENSE'
    pipeline.spec.description.metadata.userDefined.update({
        'classes': ','.join(names.values()),
        'iou_threshold': str(nms.iouThreshold),
        'confidence_threshold': str(nms.confidenceThreshold)})
    # Save the model
    f = file.with_suffix('.mlmodel')  # filename
    model = ct.models.MLModel(pipeline.spec)
    model.input_description['image'] = 'Input image'
    model.input_description['iouThreshold'] = f'(optional) IOU Threshold override (default: {nms.iouThreshold})'
    model.input_description['confidenceThreshold'] = \
        f'(optional) Confidence Threshold override (default: {nms.confidenceThreshold})'
    model.output_description['confidence'] = 'Boxes × Class confidence (see user-defined metadata "classes")'
    model.output_description['coordinates'] = 'Boxes × [x, y, width, height] (relative to image size)'
    model.save(f)  # pipelined
    print(f'{prefix} pipeline success ({time.time() - t:.2f}s), saved as {f} ({file_size(f):.1f} MB)')
@smart_inference_mode()
 def run(
        data=ROOT / 'data/coco128.yaml',  # 'dataset.yaml path'
@ -584,7 +728,9 @@ def run(
    if xml:  # OpenVINO
        f[3], _ = export_openvino(file, metadata, half)
    if coreml:  # CoreML
-        f[4], _ = export_coreml(model, im, file, int8, half)
+        f[4], ct_model = export_coreml(model, im, file, int8, half, nms)
        if nms:
            pipeline_coreml(ct_model, im, file, model.names, y)
    if any((saved_model, pb, tflite, edgetpu, tfjs)):  # TensorFlow formats
        assert not tflite or not tfjs, 'TFLite and TF.js models must be exported separately, please pass only one type.'
        assert not isinstance(model, ClassificationModel), 'ClassificationModel export to TF formats not yet supported.'