yolov5/models/tf.py

# Ultralytics YOLOv5 🚀, AGPL-3.0 license
"""
TensorFlow, Keras and TFLite versions of YOLOv5
Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127

Usage:
    $ python models/tf.py --weights yolov5s.pt

Export:
    $ python export.py --weights yolov5s.pt --include saved_model pb tflite tfjs
"""

import argparse
import sys
from copy import deepcopy
from pathlib import Path

FILE = Path(__file__).resolve()
ROOT = FILE.parents[1]  # YOLOv5 root directory
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))  # add ROOT to PATH
# ROOT = ROOT.relative_to(Path.cwd())  # relative

import numpy as np
import tensorflow as tf
import torch
import torch.nn as nn
from tensorflow import keras

from models.common import (
    C3,
    SPP,
    SPPF,
    Bottleneck,
    BottleneckCSP,
    C3x,
    Concat,
    Conv,
    CrossConv,
    DWConv,
    DWConvTranspose2d,
    Focus,
    autopad,
)
from models.experimental import MixConv2d, attempt_load
from models.yolo import Detect, Segment
from utils.activations import SiLU
from utils.general import LOGGER, make_divisible, print_args


class TFBN(keras.layers.Layer):
    # TensorFlow BatchNormalization wrapper
    def __init__(self, w=None):
        """
        Initialize a TensorFlow BatchNormalization layer using optional PyTorch pretrained weights.

        Args:
            w (torch.nn.Module | None): A PyTorch BatchNorm2d layer whose weights are used to initialize the TensorFlow
                BatchNormalization layer. If `None`, the layer is initialized with default parameters.

        Returns:
            (None): This constructor does not return anything.

        Example:
            ```python
            import torch
            from tensorflow.keras import layers

            # PyTorch BatchNorm2d layer
            torch_bn_layer = torch.nn.BatchNorm2d(32)

            # TensorFlow BatchNormalization layer with weights from PyTorch layer
            tf_bn_layer = TFBN(w=torch_bn_layer)
            ```
        """
        super().__init__()
        self.bn = keras.layers.BatchNormalization(
            beta_initializer=keras.initializers.Constant(w.bias.numpy()),
            gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
            moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()),
            moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()),
            epsilon=w.eps,
        )

    def call(self, inputs):
        """
        Apply batch normalization using TensorFlow's BatchNormalization layer.

        Args:
            inputs (torch.Tensor | np.ndarray | tf.Tensor): Input tensor to which batch normalization should be applied.
                The input tensor must have a compatible shape (N, C, H, W) or (N, H, W, C) depending on the framework.

        Returns:
            (tf.Tensor): Tensor after applying batch normalization, with the same shape as the input tensor.

        Example:
            ```python
            import torch
            from models.tf import TFBN

            # Create dummy input tensor
            input_tensor = torch.rand(1, 3, 64, 64)

            # Initialize TFBN layer
            tfbn = TFBN(w=None)  # 'w' is typically a pretrained layer, None is for demo

            # Apply batch normalization
            output_tensor = tfbn.call(input_tensor)
            ```
        """
        return self.bn(inputs)


class TFPad(keras.layers.Layer):
    # Pad inputs in spatial dimensions 1 and 2
    def __init__(self, pad):
        """
        Initialize a padding layer for spatial dimensions with specified padding, supporting both int and tuple inputs.

        Args:
            pad (int | tuple[int, int]): Padding size. If an integer is provided, the same padding will be applied
                to all sides. If a tuple, it should specify the (pad_height, pad_width).

        Returns:
            None

        Example:
            ```python
            # Example of initializing TFPad with integer padding
            pad_layer = TFPad(2)

            # Example of initializing TFPad with tuple padding
            pad_layer = TFPad((2, 3))
            ```

        Notes:
            - This padding layer will only affect the spatial dimensions (height and width) of the input tensor.
            - The `pad` parameter can either be a single integer or a tuple of two integers.
            - The shape of the padding tensor is [batch, pad_height, pad_width, channels].
        """
        super().__init__()
        if isinstance(pad, int):
            self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
        else:  # tuple/list
            self.pad = tf.constant([[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]])

    def call(self, inputs):
        """
        Apply zero-padding to the input tensor in the spatial dimensions 1 and 2 as specified.

        Args:
            inputs (tf.Tensor): Input tensor to be padded, with shape (N, H, W, C) where N is batch size, H is height,
                W is width, and C is the number of channels.

        Returns:
            (tf.Tensor): Padded tensor with the same type as the input tensor, having the shape
                [batch, padded_height, padded_width, channels], where padded_height and padded_width include
                the applied padding in the respective dimensions.

        Example:
            ```python
            import tensorflow as tf
            from tfpad import TFPad

            # Initialize padding layer with integer padding
            pad_layer = TFPad(2)

            # Create a sample input tensor with shape [1, 3, 3, 1]
            inputs = tf.ones([1, 3, 3, 1])

            # Apply padding
            padded_output = pad_layer.call(inputs)

            print(padded_output.shape)  # Output shape should be [1, 7, 7, 1] after padding
            ```

        Notes:
            - The padding is symmetric on each border of the height and width dimensions.
            - The input tensor's shape is preserved except for the spatial dimensions where padding is applied.
        """
        return tf.pad(inputs, self.pad, mode="constant", constant_values=0)


class TFConv(keras.layers.Layer):
    # Standard convolution
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
        """
        Initializes a standard convolution layer with optional batch normalization and activation; supports only
        group=1.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels.
            k (int, optional): Kernel size. Defaults to 1.
            s (int, optional): Stride size. Defaults to 1.
            p (int | None, optional): Padding size. If None, it is automatically determined using `autopad` function.
                Defaults to None.
            g (int, optional): Number of groups. Only supports group=1. Defaults to 1.
            act (bool, optional): Whether to include activation. Defaults to True.
            w (torch.nn.Module | None, optional): Pretrained weights. If provided, these weights will be used to
                initialize the convolution layer and batch normalization layer (if present). Defaults to None.

        Returns:
            None

        Note:
            TensorFlow Conv2D does not support the 'groups' argument when using versions prior to 2.2.
            See https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow
            -and-pytorch for convolution padding inconsistencies between TensorFlow and PyTorch.
        """
        super().__init__()
        assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
        # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
        # see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
        conv = keras.layers.Conv2D(
            filters=c2,
            kernel_size=k,
            strides=s,
            padding="SAME" if s == 1 else "VALID",
            use_bias=not hasattr(w, "bn"),
            kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
            bias_initializer="zeros" if hasattr(w, "bn") else keras.initializers.Constant(w.conv.bias.numpy()),
        )
        self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
        self.bn = TFBN(w.bn) if hasattr(w, "bn") else tf.identity
        self.act = activations(w.act) if act else tf.identity

    def call(self, inputs):
        """
        Apply convolution, batch normalization, and activation functions to input tensors in sequence.

        Args:
            inputs (tf.Tensor): Input tensor with shape (N, H, W, C) where N is the batch size, H is the height, W is
                the width, and C is the number of channels.

        Returns:
            (tf.Tensor): Transformed tensor post convolution, batch normalization, and activation function, with shape
                typically modified based on kernel size, stride, and padding.

        Example:
            ```python
            # Example usage of TFConv
            conv_layer = TFConv(c1=3, c2=16, k=3, s=1, act=True)
            output = conv_layer(tf.random.uniform([1, 64, 64, 3]))  # Output tensor after convolution operations
            ```

        Note:
            This function performs operations sequentially: convolution, batch normalization (if specified), and
            activation. Ensure input tensor dimensions match expected shape for compatibility.
        """
        return self.act(self.bn(self.conv(inputs)))


class TFDWConv(keras.layers.Layer):
    # Depthwise convolution
    def __init__(self, c1, c2, k=1, s=1, p=None, act=True, w=None):
        """
        Initialize a depthwise convolution layer with optional batch normalization and activation for TensorFlow models.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels. Must be a multiple of `c1`.
            k (int): Kernel size. Defaults to 1.
            s (int): Stride value. Defaults to 1.
            p (int | tuple[int, int] | None): Padding size. Defaults to None, which means auto padding is applied.
            act (bool): Whether to apply an activation function. Defaults to True.
            w (torch.nn.Module | None): Pretrained weights. Defaults to None.

        Returns:
            (None): This initializer does not return a value. It configures the layer properties.

        Example:
            ```python
            tfdwconv_layer = TFDWConv(c1=32, c2=64, k=3, s=1, p=1, act=True, w=pretrained_weights)
            ```

        Note:
            TensorFlow's depthwise convolution implementation requires `c2` to be a multiple of `c1`. Ensure that
            this condition is met when specifying the number of output channels.
        """
        super().__init__()
        assert c2 % c1 == 0, f"TFDWConv() output={c2} must be a multiple of input={c1} channels"
        conv = keras.layers.DepthwiseConv2D(
            kernel_size=k,
            depth_multiplier=c2 // c1,
            strides=s,
            padding="SAME" if s == 1 else "VALID",
            use_bias=not hasattr(w, "bn"),
            depthwise_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
            bias_initializer="zeros" if hasattr(w, "bn") else keras.initializers.Constant(w.conv.bias.numpy()),
        )
        self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
        self.bn = TFBN(w.bn) if hasattr(w, "bn") else tf.identity
        self.act = activations(w.act) if act else tf.identity

    def call(self, inputs):
        """
        Applies depthwise convolution, batch normalization, and activation to input tensors in TensorFlow models.

        Args:
            inputs (tf.Tensor): Input tensor to the layer, with shape (N, H, W, C) where N is the batch size, H is height,
                W is width, and C is the number of channels.

        Returns:
            (tf.Tensor): Processed tensor after applying depthwise convolution, batch normalization, and activation function,
                with appropriate padding if necessary.

        Example:
            ```python
            import tensorflow as tf
            from pretrained_weights import get_weights  # hypothetical function to get pretrained weights

            input_tensor = tf.random.normal([1, 224, 224, 32])  # Example input tensor

            # Assuming predefined weights
            pretrained_weights = get_weights('path/to/weights.pth')

            depthwise_conv_layer = TFDWConv(c1=32, c2=64, k=3, s=1, p=1, act=True, w=pretrained_weights)
            output_tensor = depthwise_conv_layer(input_tensor)
            print(output_tensor.shape)  # Should output the shape after application of the layer
            ```

        Notes:
            - Ensure `c2` is a multiple of `c1` to satisfy TensorFlow's depthwise convolution requirements.
            - Padding is handled to ensure the input and output dimensions align as per specifications.
        """
        return self.act(self.bn(self.conv(inputs)))


class TFDWConvTranspose2d(keras.layers.Layer):
    # Depthwise ConvTranspose2d
    def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0, w=None):
        """
        Initialize depthwise ConvTranspose2D layer with specific channel, kernel, stride, and padding settings.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels, which must be equal to `c1`.
            k (int): Kernel size, must be 4.
            s (int): Stride size.
            p1 (int): Padding size, must be 1.
            p2 (int): Output padding size for the transposed convolution.
            w (torch.nn.Module): PyTorch layer containing pretrained weights.

        Returns:
            (None): Initializes the depthwise ConvTranspose2D layer with the specified parameters.

        Example:
            ```python
            # Assuming `w` contains pretrained weights compatible with the layer
            trans_conv_layer = TFDWConvTranspose2d(c1=64, c2=64, k=4, s=2, p1=1, p2=0, w=some_pretrained_layer)
            ```

        Notes:
            - The kernel size `k` must be 4, and padding `p1` must be 1 for the layer to function correctly.
            - The number of input channels `c1` must equal the number of output channels `c2`.
            - Ensure the provided weights `w` match the layer's specifications before initialization.
        """
        super().__init__()
        assert c1 == c2, f"TFDWConv() output={c2} must be equal to input={c1} channels"
        assert k == 4 and p1 == 1, "TFDWConv() only valid for k=4 and p1=1"
        weight, bias = w.weight.permute(2, 3, 1, 0).numpy(), w.bias.numpy()
        self.c1 = c1
        self.conv = [
            keras.layers.Conv2DTranspose(
                filters=1,
                kernel_size=k,
                strides=s,
                padding="VALID",
                output_padding=p2,
                use_bias=True,
                kernel_initializer=keras.initializers.Constant(weight[..., i : i + 1]),
                bias_initializer=keras.initializers.Constant(bias[i]),
            )
            for i in range(c1)
        ]

    def call(self, inputs):
        """
        Apply a depthwise transposed convolution to input tensors in TensorFlow models, concatenating outputs.

        Args:
            inputs (tf.Tensor): The input tensor with shape (N, H, W, C) where N is batch size, H is height, W is width,
                and C is the number of input channels.

        Returns:
            (tf.Tensor): Processed tensor with dimensions affected by the transposed convolution operation and concatenation
                along the channel dimension.

        Example:
            ```python
            inputs = tf.random.normal([1, 32, 32, 64])  # Example input tensor
            trans_conv_layer = TFDWConvTranspose2d(c1=64, c2=64, k=4, s=2, p1=1, p2=0, w=pretrained_weights)
            output = trans_conv_layer(inputs)  # Output tensor after transposed convolution
            ```

        Note:
            The input tensor must have the same number of input and output channels. The kernel size (k) must be 4 and
            the padding (p1) must be 1 to match the specific configuration supported by the module.
        """
        return tf.concat([m(x) for m, x in zip(self.conv, tf.split(inputs, self.c1, 3))], 3)[:, 1:-1, 1:-1]


class TFFocus(keras.layers.Layer):
    # Focus wh information into c-space
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
        """
        Initialize `TFFocus` layer to focus width and height information into channel space.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels.
            k (int, optional): Kernel size for the convolution. Default is 1.
            s (int, optional): Stride for the convolution. Default is 1.
            p (int | tuple[int, int], optional): Padding for the convolution. Default is None.
            g (int, optional): Number of groups for the convolution (must be 1). Default is 1.
            act (bool, optional): Whether to use activation function. Default is True.
            w (torch.nn.Module, optional): Pretrained weights for the convolution. Default is None.

        Returns:
            None

        Example:
            ```python
            tf_focus = TFFocus(c1=64, c2=128, k=3, s=1, p=1, act=True)
            output = tf_focus(input_tensor)
            ```

        Note:
            The `TFFocus` layer reduces the spatial dimensions by focusing width and height information into the channel
            space before further processing. This operation helps in reducing the spatial complexity while retaining
            essential features in the channel dimensions.
        """
        super().__init__()
        self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)

    def call(self, inputs):
        """
        Focus width and height information into channel space and apply convolution.

        Args:
            inputs (tf.Tensor): Input tensor of shape (B, W, H, C) where B is the batch size, W and H are the spatial
                dimensions, and C is the number of channels.

        Returns:
            (tf.Tensor): Output tensor after applying pixel shuffling and convolution, with shape (B, W/2, H/2, 4C).

        Example:
            ```python
            layer = TFFocus(c1=64, c2=128, k=1, s=1, p=None, g=1, act=True, w=None)
            input_tensor = tf.random.normal((1, 128, 128, 64))
            output_tensor = layer(input_tensor)
            ```

        Note:
            The input tensor is downsampled by a factor of 2 along the width and height dimensions and the number of channels is
            expanded by a factor of 4.
        """
        inputs = [inputs[:, ::2, ::2, :], inputs[:, 1::2, ::2, :], inputs[:, ::2, 1::2, :], inputs[:, 1::2, 1::2, :]]
        return self.conv(tf.concat(inputs, 3))


class TFBottleneck(keras.layers.Layer):
    # Standard bottleneck
    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None):
        """
        Perform non-maximum suppression (NMS) on prediction boxes.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels.
            shortcut (bool): Whether to use a residual (shortcut) connection. Default is True.
            g (int): Number of groups for grouped convolution. Default is 1.
            e (float): Expansion ratio to calculate the number of hidden channels. Default is 0.5.
            w (torch.nn.Module | None): Pretrained weights for the PyTorch model, used to initialize TensorFlow layers.

        Returns:
            (None): This constructor does not return a value. It initializes the bottleneck layer parameters.

        Example:
            ```python
            # Example of initializing TFBottleneck with specified parameters
            bottleneck_layer = TFBottleneck(c1=128, c2=256, shortcut=True, g=1, e=0.5, w=pretrained_weights)
            ```
        """
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
        self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
        self.add = shortcut and c1 == c2

    def call(self, inputs):
        """
        Applies a bottleneck transformation with optional skip connection in TensorFlow models.

        Args:
            inputs (tf.Tensor): Input tensor with shape (B, H, W, C), where B is the batch size, H is the height,
                W is the width, and C is the number of channels.

        Returns:
            (tf.Tensor): Output tensor with bottleneck transformation applied, maintaining the same spatial dimensions
                but with possibly different number of channels C_out.

        Example:
            ```python
            bottleneck_layer = TFBottleneck(c1=64, c2=128, shortcut=True, g=1, e=0.5, w=pretrained_weights)
            output = bottleneck_layer(input_tensor)
            ```

        Note:
            The transformation includes two convolutional layers with optional ReLU activation and batch normalization.
            If `shortcut` is enabled and the input and output channels match, a skip connection adds the input directly
            to the output.
        """
        return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))


class TFCrossConv(keras.layers.Layer):
    # Cross Convolution
    def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None):
        """
        Initialize a cross convolution layer with optional expansion, groups, and shortcut functionality.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels.
            k (int, optional): Kernel size for the convolution. Defaults to 3.
            s (int, optional): Stride for the convolution. Defaults to 1.
            g (int, optional): Number of groups for grouped convolution. Defaults to 1.
            e (float, optional): Expansion ratio to determine the hidden channels. Defaults to 1.0.
            shortcut (bool, optional): Whether to use a residual (shortcut) connection. Defaults to False.
            w (torch.nn.Module | None, optional): Pretrained PyTorch weights for the corresponding convolutional layers.
                Defaults to None.

        Returns:
            None

        Example:
            ```python
            tfxconv = TFCrossConv(c1=64, c2=128, k=3, s=1, g=1, e=1.0, shortcut=False, w=pretrained_weights)
            output = tfxconv(input_tensor)
            ```

        Note:
            - The cross convolution layer consists of two consecutive convolution operations with kernel shapes (1, k) and (k, 1).
            - The `shortcut` option adds a residual connection if input and output channels are equal.
            - The expansion ratio `e` controls the number of hidden channels: hidden channels = int(c2 * e).
        """
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = TFConv(c1, c_, (1, k), (1, s), w=w.cv1)
        self.cv2 = TFConv(c_, c2, (k, 1), (s, 1), g=g, w=w.cv2)
        self.add = shortcut and c1 == c2

    def call(self, inputs):
        """
        Performs cross convolution operations with optional shortcut connections in TensorFlow models.

        Args:
            inputs (tf.Tensor): Input tensor with shape (B, H, W, C), where B is the batch size, H is the height,
                W is the width, and C is the number of channels.

        Returns:
            (tf.Tensor): Output tensor after applying the cross convolution operations, with shape (B, H_out, W_out, C_out),
                where H_out and W_out may differ from H and W depending on the convolution parameters, and C_out is the number
                of output channels.

        Example:
            ```python
            tf_cross_conv = TFCrossConv(c1=64, c2=128, k=3, s=1, g=1, e=1.0, shortcut=True, w=pretrained_weights)
            output_tensor = tf_cross_conv(input_tensor)
            ```

        Note:
            - The cross convolution operation involves two separable convolutions: one with a kernel size of (1, k) and another
              with (k, 1).
            - If the shortcut connection is enabled (`shortcut=True`) and the input and output channels are equal (`c1 == c2`),
              the input tensor is added directly to the output after the convolution operations.
            - This layer allows flexible expansion ratios (`e`) and supports grouped convolutions (`g`).
        """
        return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))


class TFConv2d(keras.layers.Layer):
    # Substitution for PyTorch nn.Conv2D
    def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
        """
        Initialize a TensorFlow 2D convolution layer as an equivalent replacement for PyTorch's nn.Conv2D, without group
        convolutions.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels.
            k (int | tuple[int, int]): Kernel size for the convolution.
            s (int, optional): Stride size for the convolution. Defaults to 1.
            g (int, optional): Number of blocked connections from input channels to output channels (groups). Must be 1 as
                TensorFlow Conv2D does not support groups. Defaults to 1.
            bias (bool, optional): Whether to include a bias term in the convolution. Defaults to True.
            w (torch.nn.Conv2d | None, optional): Weights from a pre-trained PyTorch Conv2d layer to initialize this layer.
                Defaults to None.

        Returns:
            (TFConv2d): A TensorFlow 2D convolution layer initialized with the specified parameters and pre-trained weights
                if provided.

        Example:
            ```python
            from tensorflow.keras import Model
            from tensorflow.keras.layers import Input

            input_layer = Input(shape=(224, 224, 3))
            conv_layer = TFConv2d(3, 16, 3)
            output = conv_layer(input_layer)
            model = Model(inputs=input_layer, outputs=output)
            ```

        Note:
            TensorFlow Conv2D does not support group convolutions. The layer parameters should ensure that the number of
            groups (`g`) is set to 1.
        """
        super().__init__()
        assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
        self.conv = keras.layers.Conv2D(
            filters=c2,
            kernel_size=k,
            strides=s,
            padding="VALID",
            use_bias=bias,
            kernel_initializer=keras.initializers.Constant(w.weight.permute(2, 3, 1, 0).numpy()),
            bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None,
        )

    def call(self, inputs):
        """
        Perform convolution operation on input tensors, mimicking PyTorch's nn.Conv2d functionality.

        Args:
            inputs (tf.Tensor): Input tensor of shape (B, H, W, C), where B is batch size, H is height, W is width, and C is channels.

        Returns:
            (tf.Tensor): Output tensor resulting from convolution operation, maintaining dimensions (B, H, W, C).

        Example:
            ```python
            from tensorflow.keras import Model, Input
            from my_ultralytics_module import TFConv2d

            input_layer = Input(shape=(224, 224, 3))
            conv_layer = TFConv2d(3, 16, 3)
            output_tensor = conv_layer(input_layer)
            model = Model(inputs=input_layer, outputs=output_tensor)
            ```

        Notes:
            The convolution operation assumes 'VALID' padding, meaning no padding is added to the input tensor. Ensure input tensor
            dimensions and kernel size are appropriately configured for this setting.
        """
        return self.conv(inputs)


class TFBottleneckCSP(keras.layers.Layer):
    # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
        """
        Initialize a CSP bottleneck layer with specified parameters for TensorFlow models, supporting optional shortcut
        connections.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels.
            n (int): Number of Bottleneck layers to stack. Default is 1.
            shortcut (bool): If True, adds shortcut connections in bottleneck layers. Default is True.
            g (int): Number of groups for grouped convolutions. Default is 1.
            e (float): Expansion ratio for hidden layer dimensionality. Default is 0.5.
            w (object): Pretrained weights struct containing TensorFlow layer weights. Default is None.

        Returns:
            None

        Example:
            ```python
            csp_layer = TFBottleneckCSP(c1=64, c2=128, n=3, shortcut=True, g=1, e=0.5, w=pretrained_weights)
            output = csp_layer(input_tensor)
            ```

        Note:
            This layer combines Cross Stage Partial Networks (CSPNet) principles which split the feature map
            into two parts, enabling improved gradient flow and reduction in computation cost.
        """
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
        self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
        self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3)
        self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4)
        self.bn = TFBN(w.bn)
        self.act = lambda x: keras.activations.swish(x)
        self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])

    def call(self, inputs):
        """
        Combines CSP bottleneck layers and processes input tensors through convolution, bottleneck, and activation.

        Args:
            inputs (tf.Tensor): Input tensor to be processed with shape (N, H, W, C) where N is the batch size, H is height,
                W is width, and C is the number of channels.

        Returns:
            (tf.Tensor): Output tensor after combining CSP bottleneck layers, with shape (N, H, W, C_out).

        Example:
            ```python
            # Assuming `inputs` is a pre-existing tensor
            csp_layer = TFBottleneckCSP(c1=64, c2=128, n=3, shortcut=True, g=1, e=0.5, w=pretrained_weights)
            output = csp_layer(inputs)
            ```

        Note:
            This method concatenates the results from multiple bottleneck layers and applies normalization and activation
            functions to produce the final output.
        """
        y1 = self.cv3(self.m(self.cv1(inputs)))
        y2 = self.cv2(inputs)
        return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3))))


class TFC3(keras.layers.Layer):
    # CSP Bottleneck with 3 convolutions
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
        """
        Initializes the CSP bottleneck layer with 3 convolutions, with optional shortcuts and group convolutions.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels.
            n (int): Number of bottleneck layers within the CSP block.
            shortcut (bool): If True, add a shortcut to the bottleneck layers. Default is True.
            g (int): Number of groups in group convolutions. Default is 1.
            e (float): Expansion ratio for hidden channels. Default is 0.5.
            w (object | None): Pre-trained weights for initializing the layer. If None, random initialization is used. Default
                is None.

        Returns:
            (None): This initializer does not return a value, it configures the layer properties.

        Example:
            ```python
            layer = TFC3(128, 256, n=3, shortcut=False, g=1, e=0.5, w=pretrained_weights)
            input_tensor = tf.random.normal((1, 128, 128, 128))  # example input tensor
            output_tensor = layer(input_tensor)  # forward pass
            ```
        """
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
        self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
        self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
        self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])

    def call(self, inputs):
        """
        Perform forward pass through CSP Bottleneck with 3 convolutions for object detection.

        Args:
            inputs (tf.Tensor): Input tensor of shape (N, H, W, C) where N is the batch size, H is the height, W is
                the width, and C is the number of channels.

        Returns:
            (tf.Tensor): Output tensor after CSP Bottleneck processing, with shape dependent on the layer
                configuration, generally retaining the batch size N.

        Note:
            The implemented CSP (Cross Stage Partial) Bottleneck with 3 convolutions helps in reducing
            computational complexity while retaining essential features for object detection.

        Example:
            ```python
            layer = TFC3(c1=128, c2=256, n=3, shortcut=False, g=1, e=0.5, w=pretrained_weights)
            output_tensor = layer(input_tensor)
            ```

            Refer to the YOLOv5 repository for more details:
            https://github.com/ultralytics/yolov5
        """
        return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))


class TFC3x(keras.layers.Layer):
    # 3 module with cross-convolutions
    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
        """
        Summary:
        Initialize the TFC3x layer with cross-convolutions for enhanced feature extraction in deep learning models.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels.
            n (int): Number of sub-layers to stack within the module.
            shortcut (bool): Indicates whether to use residual connections across sub-layers. Defaults to True.
            g (int): Number of groups for grouped convolution. Defaults to 1.
            e (float): Expansion ratio for hidden channels relative to the output channels. Defaults to 0.5.
            w (nn.Module | None): Pretrained weights for initializing the layer if available. Defaults to None.

        Returns:
            None

        Example:
            ```python
            # Initialize TFC3x layer with 64 input channels, 128 output channels, stacking 3 sub-layers,
            # using residual connections, and optional pretrained weights.
            layer = TFC3x(c1=64, c2=128, n=3, shortcut=True, g=1, e=0.5, w=pretrained_weights)
            ```

        Note:
            The TFC3x layer is designed for advanced convolutional neural network (CNN) architectures, particularly for tasks
            requiring efficient and enhanced feature extraction, such as object detection. The cross-convolution mechanism
            employed within this layer aims to boost the model's representational capacity by incorporating unique feature
            interactions across multiple sub-layers. For further information, refer to the YOLOv5 repository at
            https://github.com/ultralytics/yolov5. This layer can be combined with other TensorFlow components to build custom
            CNN models.
        """
        super().__init__()
        c_ = int(c2 * e)  # hidden channels
        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
        self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
        self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
        self.m = keras.Sequential(
            [TFCrossConv(c_, c_, k=3, s=1, g=g, e=1.0, shortcut=shortcut, w=w.m[j]) for j in range(n)]
        )

    def call(self, inputs):
        """
        Processes input through cascaded cross-convolutions and merges features for enhanced object detection.

        Args:
            inputs (tf.Tensor): Input tensor of shape (N, H, W, C) where N is the batch size, H and W are the spatial dimensions,
                and C is the number of input channels.

        Returns:
            (tf.Tensor): Output tensor after applying cross-convolutions and concatenation with shape (N, H, W, C_out) where
                C_out is the number of output channels, as configured during initialization.

        Example:
            ```python
            import tensorflow as tf

            # Example initialization of TFC3x layer
            layer = TFC3x(c1=64, c2=128, n=2, shortcut=True, g=1, e=0.5, w=None)
            input_tensor = tf.random.normal((1, 128, 128, 64))  # Create a random tensor as input
            output_tensor = layer(input_tensor)  # Process the input tensor through the TFC3x layer
            ```

        Notes:
            For details on the cross convolutional structure and its application, visit the YOLOv5 repository at
            https://github.com/ultralytics/yolov5.
        """
        return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))


class TFSPP(keras.layers.Layer):
    # Spatial pyramid pooling layer used in YOLOv3-SPP
    def __init__(self, c1, c2, k=(5, 9, 13), w=None):
        """
        Initialize a Spatial Pyramid Pooling (SPP) layer for YOLOv3 with specified input/output channels and kernel
        sizes.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels.
            k (tuple[int, int, int]): Kernel sizes for max pooling layers. Defaults to (5, 9, 13).
            w (object | None): Pre-trained weights for initializing the convolutional layers.

        Returns:
            None

        Example:
            ```python
            spp_layer = TFSPP(c1=1024, c2=512, k=(5, 9, 13), w=pretrained_weights)
            output = spp_layer(input_tensor)
            ```

        Note:
            - The SPP layer enhances the receptive field during feature extraction by applying multiple max-pooling operations with different kernel sizes.
            - For more information, refer to the YOLOv5 repository at https://github.com/ultralytics/yolov5.
        """
        super().__init__()
        c_ = c1 // 2  # hidden channels
        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
        self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
        self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding="SAME") for x in k]

    def call(self, inputs):
        """
        Applies spatial pyramid pooling (SPP) to the input, enhancing receptive field.

        Args:
            inputs (tf.Tensor): Input tensor of shape (B, H, W, C), where B is the batch size, H is the height, W is the width,
                and C is the number of channels.

        Returns:
            (tf.Tensor): Output tensor after applying SPP, with shape (B, H, W, C_out), where C_out is the number of output
                channels.

        Example:
            ```python
            tf_spp_layer = TFSPP(c1=128, c2=256, k=(5, 9, 13))
            input_tensor = tf.random.normal((1, 64, 64, 128))
            output_tensor = tf_spp_layer(input_tensor)
            ```

        Note:
            This function enhances the receptive field by concatenating max-pooled output with original input, providing richer
            spatial context for object detection.
        """
        x = self.cv1(inputs)
        return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))


class TFSPPF(keras.layers.Layer):
    # Spatial pyramid pooling-Fast layer
    def __init__(self, c1, c2, k=5, w=None):
        """
        Initialize a fast spatial pyramid pooling layer for TensorFlow, specifying the input and output channels, kernel
        size, and weights.

        Args:
            c1 (int): Number of input channels.
            c2 (int): Number of output channels.
            k (int): Kernel size for the pooling layers.
            w (TFConv | None): Pretrained weights for the convolutional layers, if available.

        Returns:
            None: Initializes the TFSPPF layer, ready for use in forward passes for YOLO models.

        Example:
            ```python
            c1, c2, k = 512, 1024, 5
            sppf_layer = TFSPPF(c1, c2, k)
            input_tensor = tf.random.normal((1, 64, 64, 512))  # Example input tensor
            output_tensor = sppf_layer(input_tensor)
            ```
        """
        super().__init__()
        c_ = c1 // 2  # hidden channels
        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
        self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2)
        self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding="SAME")

    def call(self, inputs):
        """
        Perform forward pass using fast spatial pyramid pooling (SPPF) layer on input tensors for feature extraction.

        Args:
            inputs (tf.Tensor): Input tensor with shape (B, H, W, C) representing the batch size, height, width, and channels
                of the images.

        Returns:
            (tf.Tensor): Output tensor with shape (B, H, W, C_out) after concatenation of max-pooled features and final
                convolution.

        Example:
            ```python
            inputs = tf.random.normal((1, 256, 256, 64))
            sppf = TFSPPF(64, 128, k=5)
            outputs = sppf(inputs)
            print(outputs.shape)  # Example output shape: (1, 256, 256, 128)
            ```

        Note:
            This layer is designed for efficient spatial pyramid pooling by concatenating features from multiple receptive
            fields, enhancing the network's understanding of spatial hierarchies.
        """
        x = self.cv1(inputs)
        y1 = self.m(x)
        y2 = self.m(y1)
        return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3))


class TFDetect(keras.layers.Layer):
    # TF YOLOv5 Detect layer
    def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None):
        """
        Initialize the TensorFlow YOLOv5 detection layer with specified parameters for classes, anchors, channels, and
        image size.

        Args:
            nc (int): Number of classes for detection.
            anchors (tuple): Anchor boxes defined as a tuple of tuples, e.g., ((10, 13), (16, 30), (33, 23)).
            ch (tuple): Number of channels for each detection layer.
            imgsz (tuple): Image size as a tuple (height, width), e.g., (640, 640).
            w (nn.Module): Pretrained PyTorch model weights used to initialize the TensorFlow layer.

        Returns:
            None

        Example:
            ```python
            anchors = ((10, 13), (16, 30), (33, 23))
            ch = (128, 256, 512)
            imgsz = (640, 640)
            nc = 80  # Number of classes for detection

            yolo_layer = TFDetect(nc=nc, anchors=anchors, ch=ch, imgsz=imgsz, w=pretrained_model)
            ```

        Note:
            Ensure that the pretrained PyTorch weights (`w`) are correctly converted for TensorFlow initialization. The model
            should be set to evaluation mode if not in training.
        """
        super().__init__()
        self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
        self.nc = nc  # number of classes
        self.no = nc + 5  # number of outputs per anchor
        self.nl = len(anchors)  # number of detection layers
        self.na = len(anchors[0]) // 2  # number of anchors
        self.grid = [tf.zeros(1)] * self.nl  # init grid
        self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32)
        self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]), [self.nl, 1, -1, 1, 2])
        self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]
        self.training = False  # set to False after building model
        self.imgsz = imgsz
        for i in range(self.nl):
            ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
            self.grid[i] = self._make_grid(nx, ny)

    def call(self, inputs):
        """
        Applies detection layer forward pass to perform object detection in TensorFlow models.

        Args:
            inputs (list[tf.Tensor]): List of input tensors for each detection layer, with shape (B, H, W, C).
                Each tensor corresponds to a different detection layer, where B is the batch size, H and W are
                height and width, and C is the channel number.

        Returns:
            (list[tf.Tensor]): List of outputs for detected bounding boxes and class scores, each having shape
                (B, N, A * (5 + nc)) where N = H * W is the number of grid cells, A is the number of anchors,
                and nc is the number of classes.

        Example:
            ```python
            inputs = [tf.random.normal((1, 20, 20, 255)), tf.random.normal((1, 40, 40, 255))]
            detection_layer = TFDetect(nc=80, anchors=[(10,13), (16,30), (33,23)], ch=(255, 255), imgsz=(640, 640))
            outputs = detection_layer.call(inputs)
            for out in outputs:
                print(out.shape)
            ```

        Note:
            This function will only execute the inference path and will not perform training.
            For more details on the YOLOv5 model implementation, visit https://github.com/ultralytics/yolov5.
        """
        z = []  # inference output
        x = []
        for i in range(self.nl):
            x.append(self.m[i](inputs[i]))
            # x(bs,20,20,255) to x(bs,3,20,20,85)
            ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
            x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no])

            if not self.training:  # inference
                y = x[i]
                grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5
                anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4
                xy = (tf.sigmoid(y[..., 0:2]) * 2 + grid) * self.stride[i]  # xy
                wh = tf.sigmoid(y[..., 2:4]) ** 2 * anchor_grid
                # Normalize xywh to 0-1 to reduce calibration error
                xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
                wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
                y = tf.concat([xy, wh, tf.sigmoid(y[..., 4 : 5 + self.nc]), y[..., 5 + self.nc :]], -1)
                z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))

        return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1),)

    @staticmethod
    def _make_grid(nx=20, ny=20):
        """
        Generate a 2D grid of coordinates for given dimensions (nx, ny).

        Args:
            nx (int): Number of grid columns.
            ny (int): Number of grid rows.

        Returns:
            (tf.Tensor): A tensor of shape (1, ny, nx, 2) representing the grid coordinates,
                         where the last dimension contains the (x, y) coordinates for each grid point.

        Example:
            ```python
            grid = TFDetect._make_grid(nx=3, ny=2)
            print(grid.numpy())
            # Output:
            # [[[[0 0]
            #    [1 0]
            #    [2 0]]
            #
            #   [[0 1]
            #    [1 1]
            #    [2 1]]]]
            ```
        Note:
            The returned grid tensor is useful for aligning bounding box predictions
            with the spatial dimensions of the feature map.
        """
        # return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
        xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
        return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)


class TFSegment(TFDetect):
    # YOLOv5 Segment head for segmentation models
    def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
        """
        Initialize YOLOv5 Segment head with specified channel depths, anchors, and input size for segmentation models.

        Args:
            nc (int): Number of classes to detect.
            anchors (tuple): Anchors used for YOLO detection, defined as a tuple of tuple(s) with dimensions
                (anchor_count, 2).
            nm (int): Number of masks for segmentation.
            npr (int): Number of prototypes for mask generation.
            ch (tuple[int]): Tuple containing the number of input channels for each detection layer.
            imgsz (tuple[int, int]): Input image size in the format (height, width).
            w (object): Weights object containing predetermined network weights.

        Returns:
            None

        Example:
            ```python
            segment = TFSegment(nc=80, anchors=((10, 13), (16, 30), (33, 23)), nm=32, npr=256,
                                ch=(256, 512, 1024), imgsz=(640, 640), w=pretrained_weights)
            ```

        Note:
            This class inherits from `TFDetect` and extends it for segmentation tasks, adding mask prediction capabilities.
            The initialization includes setting up mask-related parameters and an additional proto layer for mask prediction.
        """
        super().__init__(nc, anchors, ch, imgsz, w)
        self.nm = nm  # number of masks
        self.npr = npr  # number of protos
        self.no = 5 + nc + self.nm  # number of outputs per anchor
        self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]  # output conv
        self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto)  # protos
        self.detect = TFDetect.call

    def call(self, x):
        """
        Apply YOLOv5 segmentation head, including detection and prototype layers, to the input tensor.

        Args:
            x (list[tf.Tensor]): List of input tensors from the previous layer, with each tensor in the list having shape
                (B, H, W, C), where B is the batch size, H and W are the height and width, and C is the number of channels.

        Returns:
            (tuple[tf.Tensor, tf.Tensor]): A tuple containing:
                - Detection tensor with shape (N, A, G, no), where N is batch size, A is number of anchors, G is grid size,
                    and `no` is the number of outputs per anchor (no = 5 + number of classes + number of masks).
                - Prototype tensor with shape (B, nm, G, G) for segmentation masks where nm is the number of masks and
                    G is the grid size.

        Example:
            ```python
            segment = TFSegment(nc=80, anchors=((10, 13), (16, 30), (33, 23)), nm=32, npr=256, ch=(256, 512, 1024),
                                imgsz=(640, 640), w=pretrained_weights)
            detection, prototype = segment.call([input_tensor_1, input_tensor_2, input_tensor_3])
            ```

        Note:
            This method overrides the `call` method of `TFDetect` class to include a prototype layer for mask prediction.
            The output includes both detection and mask predictions, with masks being generated from the prototype tensor
            processed through the network.
        """
        p = self.proto(x[0])
        # p = TFUpsample(None, scale_factor=4, mode='nearest')(self.proto(x[0]))  # (optional) full-size protos
        p = tf.transpose(p, [0, 3, 1, 2])  # from shape(1,160,160,32) to shape(1,32,160,160)
        x = self.detect(self, x)
        return (x, p) if self.training else (x[0], p)


class TFProto(keras.layers.Layer):
    def __init__(self, c1, c_=256, c2=32, w=None):
        """
        Initialize TFProto layer composed of convolutional and upsampling layers for feature extraction and
        transformation.

        Args:
            c1 (int): Number of input channels.
            c_ (int, optional): Number of intermediate channels. Defaults to 256.
            c2 (int, optional): Number of output channels. Defaults to 32.
            w (torch.nn.Module | None): Pretrained weights to initialize the TensorFlow convolution layers.

        Note:
            This layer is utilized within the YOLOv5 model for processing segmentation-specific features.
        """
        super().__init__()
        self.cv1 = TFConv(c1, c_, k=3, w=w.cv1)
        self.upsample = TFUpsample(None, scale_factor=2, mode="nearest")
        self.cv2 = TFConv(c_, c_, k=3, w=w.cv2)
        self.cv3 = TFConv(c_, c2, w=w.cv3)

    def call(self, inputs):
        """Performs forward pass through TFProto model applying convolutions and upscaling on input tensor."""
        return self.cv3(self.cv2(self.upsample(self.cv1(inputs))))


class TFUpsample(keras.layers.Layer):
    # TF version of torch.nn.Upsample()
    def __init__(self, size, scale_factor, mode, w=None):
        """
        Initialize a TensorFlow upsampling layer with specified size, scale factor, and mode.

        Args:
            size (tuple | None): Desired output size of the upsampled tensor. Ignored if `scale_factor` is specified.
            scale_factor (float | None): Multiplier for the spatial size of the input tensor. Must be an even number.
            mode (str): Upsampling algorithm. Supported modes include 'nearest' and 'bilinear'.
            w (None): Placeholder for weights, included for parameter consistency across layers.

        Returns:
            (None): This is an initializer method; thus, it does not return any value.

        Example:
            ```python
            upsample_layer = TFUpsample(size=None, scale_factor=2, mode='nearest')
            input_tensor = tf.random.normal([1, 64, 32, 32])  # Random input tensor of shape (B, C, H, W)
            output_tensor = upsample_layer(input_tensor)
            ```

        Note:
            The `scale_factor` must be an even number, ensuring proper scaling of tensor dimensions during upsampling.
            The default algorithm for upsampling is 'nearest', which utilizes nearest-neighbor interpolation.
        """
        super().__init__()
        assert scale_factor % 2 == 0, "scale_factor must be multiple of 2"
        self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * scale_factor, x.shape[2] * scale_factor), mode)
        # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
        # with default arguments: align_corners=False, half_pixel_centers=False
        # self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x,
        #                                                            size=(x.shape[1] * 2, x.shape[2] * 2))

    def call(self, inputs):
        """
        Upsamples the input tensor using nearest neighbor interpolation by a specified scale factor.

        Args:
            inputs (tf.Tensor): Input tensor to be upsampled, of shape (N, H, W, C) where N is the batch size, H and W
                are the height and width respectively, and C is the number of channels.

        Returns:
            (tf.Tensor): Upsampled tensor with dimensions (N, H * scale_factor, W * scale_factor, C).

        Example:
            ```python
            upsample_layer = TFUpsample(size=None, scale_factor=2, mode='nearest')
            input_tensor = tf.random.normal([1, 64, 32, 32])  # Random input tensor of shape (B, C, H, W)
            output_tensor = upsample_layer(input_tensor)
            ```

        Note:
            This implementation uses TensorFlow's `tf.image.resize` function to perform the nearest neighbor
            upsampling. Ensure that the `scale_factor` is an even number for proper upsampling.
        """
        return self.upsample(inputs)


class TFConcat(keras.layers.Layer):
    # TF version of torch.concat()
    def __init__(self, dimension=1, w=None):
        """
        Initializes a TensorFlow layer for concatenating tensors, converting from NCHW to NHWC format.

        Args:
            dimension (int): Dimension along which to concatenate. Must be 1 for converting NCHW to NHWC.
            w (None, optional): Placeholder for weights to maintain consistency with other layers.

        Returns:
            None

        Note:
            This layer is designed to handle tensor concatenation specifically for dimensions related to
            NCHW (channels-first) to NHWC (channels-last) format conversion.

        Example:
            ```python
            concat_layer = TFConcat(dimension=1)
            tensor1 = tf.random.normal((1, 32, 224, 224))  # Example tensor in NCHW format
            tensor2 = tf.random.normal((1, 32, 224, 224))  # Another example tensor in NCHW format
            result = concat_layer([tensor1, tensor2])
            ```
        """
        super().__init__()
        assert dimension == 1, "convert only NCHW to NHWC concat"
        self.d = 3

    def call(self, inputs):
        """
        Concatenate input tensors along the last dimension for NCHW to NHWC conversion.

        Args:
            inputs (list[tf.Tensor]): List of input tensors, each with shape (B, H, W, C) where B is the batch size,
                H is height, W is width, and C is the number of channels.

        Returns:
            (tf.Tensor): Concatenated tensor along the last dimension, maintaining the shape (B, H, W, ∑C_in),
                where ∑C_in is the sum of the input channels.

        Example:
            ```python
            concat_layer = TFConcat()
            tensor1 = tf.random.normal([1, 64, 64, 32])
            tensor2 = tf.random.normal([1, 64, 64, 64])
            concatenated_tensor = concat_layer([tensor1, tensor2])
            ```

        Note:
            This function is designed to convert concatenation from PyTorch's NCHW format to TensorFlow's NHWC format.
            Ensure all input tensors have compatible shapes except for the channel dimension.
        """
        return tf.concat(inputs, self.d)


def parse_model(d, ch, model, imgsz):
    """
    Parse YOLOv5 model configuration for TensorFlow and create its layer structure.

    Args:
        d (dict): Model configuration dictionary containing backbone and head definitions.
        ch (list[int]): List of channel numbers for each layer.
        model (object): Instance of the model with weights.
        imgsz (tuple[int, int]): Input image size (height, width).

    Returns:
        (list[keras.Sequential]): List of Keras Sequential models implementing the YOLOv5 architecture as specified by the
        given configuration dictionary.

    Example:
        ```python
        config = {
            "anchors": [[(10, 13), (16, 30), (33, 23)]],
            "nc": 80,
            "depth_multiple": 0.33,
            "width_multiple": 0.50,
            "backbone": [
                [-1, 1, "Conv", [64, 3, 1, 1]],
                [-1, 1, "BottleneckCSP", [128, 1, True]],
                [-1, 1, "SPP", [256]],
            ],
            "head": [
                [-1, 1, "BottleneckCSP", [128, 1, True]],
                [-1, 1, "Detect", [80, [[10, 13], [16, 30], [33, 23]], 256]],
            ],
        }
        ch = [3]
        model = YourModel()  # Substitute with an actual model object
        imgsz = (640, 640)

        parsed_model = parse_model(config, ch, model, imgsz)
        ```

    Note:
        This function dynamically creates the model layers specified by the configuration dictionary, ensuring channel
        dimensions, kernel sizes, and other parameters match expected values. It supports various module types like
        `Conv`, `Bottleneck`, `SPP`, and more.
    """
    LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
    anchors, nc, gd, gw, ch_mul = (
        d["anchors"],
        d["nc"],
        d["depth_multiple"],
        d["width_multiple"],
        d.get("channel_multiple"),
    )
    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
    if not ch_mul:
        ch_mul = 8

    layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
    for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]):  # from, number, module, args
        m_str = m
        m = eval(m) if isinstance(m, str) else m  # eval strings
        for j, a in enumerate(args):
            try:
                args[j] = eval(a) if isinstance(a, str) else a  # eval strings
            except NameError:
                pass

        n = max(round(n * gd), 1) if n > 1 else n  # depth gain
        if m in [
            nn.Conv2d,
            Conv,
            DWConv,
            DWConvTranspose2d,
            Bottleneck,
            SPP,
            SPPF,
            MixConv2d,
            Focus,
            CrossConv,
            BottleneckCSP,
            C3,
            C3x,
        ]:
            c1, c2 = ch[f], args[0]
            c2 = make_divisible(c2 * gw, ch_mul) if c2 != no else c2

            args = [c1, c2, *args[1:]]
            if m in [BottleneckCSP, C3, C3x]:
                args.insert(2, n)
                n = 1
        elif m is nn.BatchNorm2d:
            args = [ch[f]]
        elif m is Concat:
            c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
        elif m in [Detect, Segment]:
            args.append([ch[x + 1] for x in f])
            if isinstance(args[1], int):  # number of anchors
                args[1] = [list(range(args[1] * 2))] * len(f)
            if m is Segment:
                args[3] = make_divisible(args[3] * gw, ch_mul)
            args.append(imgsz)
        else:
            c2 = ch[f]

        tf_m = eval("TF" + m_str.replace("nn.", ""))
        m_ = (
            keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)])
            if n > 1
            else tf_m(*args, w=model.model[i])
        )  # module

        torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args)  # module
        t = str(m)[8:-2].replace("__main__.", "")  # module type
        np = sum(x.numel() for x in torch_m_.parameters())  # number params
        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
        LOGGER.info(f"{i:>3}{str(f):>18}{str(n):>3}{np:>10}  {t:<40}{str(args):<30}")  # print
        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
        layers.append(m_)
        ch.append(c2)
    return keras.Sequential(layers), sorted(save)


class TFModel:
    # TF YOLOv5 model
    def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, model=None, imgsz=(640, 640)):
        """
        Initialize the TensorFlow YOLOv5 model instance based on given configuration, channel count, and optional pre-
        trained weights.

        Args:
            cfg (str | dict): Path to the YOLOv5 configuration file (YAML) or a dictionary containing the model definition.
            ch (int): Number of input channels, usually 3 for RGB images.
            nc (int | None): Number of classes for detection. If provided, this overrides the `nc` value in the configuration.
            model (torch.nn.Module | None): Pre-trained PyTorch model instance to convert its weights for TensorFlow model.
            imgsz (tuple[int, int]): Image dimensions specified as (height, width).

        Returns:
            (None)

        Example:
            ```python
            tf_model = TFModel(cfg='models/yolov5s.yaml', ch=3, nc=80, model=pytorch_model_instance, imgsz=(640, 640))
            ```

        Note:
            Ensure that the provided model configuration file or dictionary conforms to the expected YOLOv5 format for seamless
            loading and parsing. Refer to https://github.com/ultralytics/yolov5 for additional details and model configurations.
        """
        super().__init__()
        if isinstance(cfg, dict):
            self.yaml = cfg  # model dict
        else:  # is *.yaml
            import yaml  # for torch hub

            self.yaml_file = Path(cfg).name
            with open(cfg) as f:
                self.yaml = yaml.load(f, Loader=yaml.FullLoader)  # model dict

        # Define model
        if nc and nc != self.yaml["nc"]:
            LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}")
            self.yaml["nc"] = nc  # override yaml value
        self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)

    def predict(
        self,
        inputs,
        tf_nms=False,
        agnostic_nms=False,
        topk_per_class=100,
        topk_all=100,
        iou_thres=0.45,
        conf_thres=0.25,
    ):
        """
        Predict bounding boxes and class scores for given input data using YOLOv5 model layers with optional TensorFlow
        NMS.

        Args:
            inputs (tf.Tensor): Input tensor containing the image data for object detection.
            tf_nms (bool): If True, apply TensorFlow NMS (Non-Maximum Suppression) to filter overlapping bounding boxes.
                Default is False.
            agnostic_nms (bool): If True, apply class-agnostic NMS. Default is False.
            topk_per_class (int): The maximum number of boxes retained per class after NMS. Default is 100.
            topk_all (int): The maximum number of boxes retained across all classes after NMS. Default is 100.
            iou_thres (float): Intersection-over-Union (IoU) threshold for filtering overlapping boxes in NMS. Default is 0.45.
            conf_thres (float): Confidence threshold for filtering low-confidence predictions. Default is 0.25.

        Returns:
            (tuple[tf.Tensor]): Predicted bounding boxes and class scores, optionally filtered by TensorFlow NMS.
                If `tf_nms` is True, the returned tuple will include NMS filtered boxes.

        Example:
            ```python
            import tensorflow as tf
            model = TFModel(cfg='yolov5s.yaml', model=pretrained_model, imgsz=(640, 640))

            # Generate dummy input data
            inputs = tf.random.normal((1, 640, 640, 3))

            # Run prediction
            outputs = model.predict(inputs, tf_nms=True)
            ```

        Note:
            The function supports both standard YOLOv5 inference and optional TensorFlow NMS for filtering predictions.
            Ensure input tensor dimensions align with model expectations to avoid runtime errors.
        """
        y = []  # outputs
        x = inputs
        for m in self.model.layers:
            if m.f != -1:  # if not from previous layer
                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers

            x = m(x)  # run
            y.append(x if m.i in self.savelist else None)  # save output

        # Add TensorFlow NMS
        if tf_nms:
            boxes = self._xywh2xyxy(x[0][..., :4])
            probs = x[0][:, :, 4:5]
            classes = x[0][:, :, 5:]
            scores = probs * classes
            if agnostic_nms:
                nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres)
            else:
                boxes = tf.expand_dims(boxes, 2)
                nms = tf.image.combined_non_max_suppression(
                    boxes, scores, topk_per_class, topk_all, iou_thres, conf_thres, clip_boxes=False
                )
            return (nms,)
        return x  # output [1,6300,85] = [xywh, conf, class0, class1, ...]
        # x = x[0]  # [x(1,6300,85), ...] to x(6300,85)
        # xywh = x[..., :4]  # x(6300,4) boxes
        # conf = x[..., 4:5]  # x(6300,1) confidences
        # cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1))  # x(6300,1)  classes
        # return tf.concat([conf, cls, xywh], 1)

    @staticmethod
    def _xywh2xyxy(xywh):
        """
        Converts bounding box format from [x, y, w, h] to [x1, y1, x2, y2].

        Args:
            xywh (tf.Tensor): Bounding boxes in [x, y, w, h] format with shape (N, 4),
                where N is the number of bounding boxes.

        Returns:
            (tf.Tensor): Bounding boxes in [x1, y1, x2, y2] format with shape (N, 4), corresponding to top-left
                (x1, y1) and bottom-right (x2, y2) coordinates.

        Example:
            ```python
            boxes_xywh = tf.constant([[100, 150, 200, 250]], dtype=tf.float32)
            boxes_xyxy = TFModel._xywh2xyxy(boxes_xywh)
            print(boxes_xyxy)
            # Output: tf.Tensor([[0., 0., 1., 1.]], shape=(1, 4), dtype=float32)
            ```

        Note:
            This function is primarily used internally for post-processing the output of model predictions.
        """
        x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
        return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)


class AgnosticNMS(keras.layers.Layer):
    # TF Agnostic NMS
    def call(self, input, topk_all, iou_thres, conf_thres):
        """
        Perform class-agnostic non-maximum suppression (NMS) on predicted bounding boxes.

        Args:
            inputs (tuple[torch.Tensor, torch.Tensor, torch.Tensor]): A tuple consisting of:
                - boxes (torch.Tensor): Tensor containing bounding box coordinates with shape (N, 4) for (x1, y1, x2, y2).
                - scores (torch.Tensor): Tensor containing class scores for each bounding box with shape (N, num_classes).
                - classes (torch.Tensor): Tensor containing class indices for each bounding box with shape (N,).
            max_detections (int): Maximum number of detections to keep after NMS.
            iou_threshold (float): IoU threshold for determining whether to suppress a bounding box.
            score_threshold (float): Minimum score for a box to be considered a valid detection.

        Returns:
            (list[torch.Tensor]): List of tensors post NMS processing, consisting of:
                - boxes (torch.Tensor): Tensor with shape (M, 4), where M≤max_detections after NMS.
                - scores (torch.Tensor): Tensor with shape (M,) containing the scores of the remaining boxes.
                - classes (torch.Tensor): Tensor with shape (M,) containing the class indices of the remaining boxes.

        Example:
            ```python
            boxes = torch.tensor([[100, 100, 200, 200], [110, 110, 210, 210], [300, 300, 400, 400]])
            scores = torch.tensor([0.9, 0.75, 0.8])
            classes = torch.tensor([1, 1, 2])
            nms = AgnosticNMS()
            kept_boxes, kept_scores, kept_classes = nms(boxes, scores, classes, max_detections=2, iou_threshold=0.5, score_threshold=0.3)
            ```

        Note:
            The function performs class-agnostic NMS, meaning that it treats all classes as a single class for the purpose of
            suppression, leading to a more aggressive filtering of overlapping boxes across different classes.
        """
        return tf.map_fn(
            lambda x: self._nms(x, topk_all, iou_thres, conf_thres),
            input,
            fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32),
            name="agnostic_nms",
        )

    @staticmethod
    def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25):
        """
        Perform non-maximum suppression (NMS) on detected objects using class-agnosticity.

        Args:
            x (tuple[torch.Tensor, torch.Tensor, torch.Tensor]): A tuple containing:
                - boxes (torch.Tensor): Bounding boxes with shape (N, 4), where N is the number of boxes.
                - classes (torch.Tensor): Class scores with shape (N, C), where C is the number of classes.
                - scores (torch.Tensor): Detection scores with shape (N, 1).
            topk_all (int): Maximum number of detections to keep after NMS.
            iou_thres (float): Intersection over Union (IoU) threshold for NMS.
            conf_thres (float): Confidence score threshold for filtering predictions.

        Returns:
            (tuple[torch.Tensor, torch.Tensor, torch.Tensor, int]): A tuple containing:
                - padded_boxes (torch.Tensor): Padded bounding boxes with shape (topk_all, 4).
                - padded_scores (torch.Tensor): Padded scores with shape (topk_all,).
                - padded_classes (torch.Tensor): Padded class indices with shape (topk_all,).
                - valid_detections (int): Number of valid detections after NMS.

        Example:
            ```python
            boxes = torch.tensor([[0, 0, 10, 10], [0, 0, 8, 8]], dtype=torch.float32)
            scores = torch.tensor([0.9, 0.8], dtype=torch.float32)
            classes = torch.tensor([[1, 0], [1, 0]], dtype=torch.float32)
            results = AgnosticNMS._nms((boxes, classes, scores), topk_all=10, iou_thres=0.5, conf_thres=0.3)
            print(results)
            ```

        Note:
            This method computes agnostic NMS, meaning classes are not considered in the suppression process. Use defined IoU and
            confidence thresholds to adjust filtering as needed.
        """
        boxes, classes, scores = x
        class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
        scores_inp = tf.reduce_max(scores, -1)
        selected_inds = tf.image.non_max_suppression(
            boxes, scores_inp, max_output_size=topk_all, iou_threshold=iou_thres, score_threshold=conf_thres
        )
        selected_boxes = tf.gather(boxes, selected_inds)
        padded_boxes = tf.pad(
            selected_boxes,
            paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]],
            mode="CONSTANT",
            constant_values=0.0,
        )
        selected_scores = tf.gather(scores_inp, selected_inds)
        padded_scores = tf.pad(
            selected_scores,
            paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
            mode="CONSTANT",
            constant_values=-1.0,
        )
        selected_classes = tf.gather(class_inds, selected_inds)
        padded_classes = tf.pad(
            selected_classes,
            paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
            mode="CONSTANT",
            constant_values=-1.0,
        )
        valid_detections = tf.shape(selected_inds)[0]
        return padded_boxes, padded_scores, padded_classes, valid_detections


def activations(act=nn.SiLU):
    """
    Provide an equivalent TensorFlow function/method when translating PyTorch code to TensorFlow.

    Args:
        act (torch.nn.Module): PyTorch activation module. Valid options include nn.LeakyReLU, nn.Hardswish, nn.SiLU, and
            custom SiLU from utils.activations.

    Returns:
        (tf.function): TensorFlow equivalent activation function based on the provided PyTorch activation module.

    Examples:
        ```python
        from tensorflow import keras

        # Using LeakyReLU activation
        activation_tf = activations(nn.LeakyReLU())
        output = activation_tf(input_tensor)

        # Using Hardswish activation
        activation_tf = activations(nn.Hardswish())
        output = activation_tf(input_tensor)
        ```

    Note:
        This mapping utility ensures compatibility when converting models across TensorFlow and PyTorch frameworks,
        allowing the use of equivalent activation functions based on the original PyTorch model configuration.
    """
    if isinstance(act, nn.LeakyReLU):
        return lambda x: keras.activations.relu(x, alpha=0.1)
    elif isinstance(act, nn.Hardswish):
        return lambda x: x * tf.nn.relu6(x + 3) * 0.166666667
    elif isinstance(act, (nn.SiLU, SiLU)):
        return lambda x: keras.activations.swish(x)
    else:
        raise Exception(f"no matching TensorFlow activation found for PyTorch activation {act}")


def representative_dataset_gen(dataset, ncalib=100):
    """
    Generate a representative dataset for TensorFlow Lite model calibration by yielding input tensors.

    Args:
        dataset (object): The dataset object providing image paths, images, original images, video captures, and
            strings. The dataset should support iteration yielding tuples (path, img, im0s, vid_cap, string).
        ncalib (int): The number of calibration samples to generate. Default is 100.

    Returns:
        generator: A generator yielding a list containing a single tensor with shape (1, H, W, C) per dataset sample.

    Example:
        ```python
        dataset = CustomDataset(...)  # Custom dataset
        representative_data = representative_dataset_gen(dataset, ncalib=50)
        for data in representative_data:
            print(data)
        ```

    Notes:
        - The function expects input images to be in (C, H, W) format; it then converts them to (H, W, C) and normalizes.
        - Ensure that dataset supports iteration and typical indexing to avoid iteration-related issues.
        - Refer to TensorFlow Lite documentation for details on the role of representative datasets during model
          quantization: https://www.tensorflow.org/lite/performance/post_training_quantization#full_integer_quantization.
    """
    for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
        im = np.transpose(img, [1, 2, 0])
        im = np.expand_dims(im, axis=0).astype(np.float32)
        im /= 255
        yield [im]
        if n >= ncalib:
            break


def run(
    weights=ROOT / "yolov5s.pt",  # weights path
    imgsz=(640, 640),  # inference size h,w
    batch_size=1,  # batch size
    dynamic=False,  # dynamic batch size
):
    """
    Exports YOLOv5 model from PyTorch to TensorFlow/Keras formats and performs inference for validation.

    Args:
        weights (str | Path): Path to the pre-trained YOLOv5 weights file (typically a .pt file).
        imgsz (tuple[int, int]): Tuple specifying the inference size (height, width) of the input images.
        batch_size (int): Number of images to process in a batch.
        dynamic (bool): Specifies dynamic batch size when set to True.

    Returns:
        None: The function does not return any value. It displays model summaries and performs inference.

    Example:
        ```python
        run(weights="yolov5s.pt", imgsz=(640, 640), batch_size=1, dynamic=False)
        ```

    Note:
        - Ensure that the specified weight file path points to a valid YOLOv5 weight file, and the weights are compatible
          with the model configuration.
        - The function will load the PyTorch model, convert it to TensorFlow/Keras formats, and display the model
          summaries. It will also perform a dummy inference to validate the export.
    """

    # PyTorch model
    im = torch.zeros((batch_size, 3, *imgsz))  # BCHW image
    model = attempt_load(weights, device=torch.device("cpu"), inplace=True, fuse=False)
    _ = model(im)  # inference
    model.info()

    # TensorFlow model
    im = tf.zeros((batch_size, *imgsz, 3))  # BHWC image
    tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
    _ = tf_model.predict(im)  # inference

    # Keras model
    im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
    keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
    keras_model.summary()

    LOGGER.info("PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.")


def parse_opt():
    """
    Parses and returns command-line options for model inference.

    This function sets up an argument parser for command-line options pertinent to performing model inference, including
    the weights path, image size, batch size, and dynamic batching.

    Args:
        None

    Returns:
        (argparse.Namespace): Parsed arguments as namespace with attributes 'weights' (str), 'imgsz' (list[int]),
            'batch_size' (int), and 'dynamic' (bool).

    Example:
        ```python
        opts = parse_opt()
        print(opts.weights)  # Outputs the path to the weights file
        ```

    See Also:
        - https://github.com/ultralytics/ultralytics for repository context and additional details.
        - `argparse` documentation: https://docs.python.org/3/library/argparse.html for more about argument parsing.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument("--weights", type=str, default=ROOT / "yolov5s.pt", help="weights path")
    parser.add_argument("--imgsz", "--img", "--img-size", nargs="+", type=int, default=[640], help="inference size h,w")
    parser.add_argument("--batch-size", type=int, default=1, help="batch size")
    parser.add_argument("--dynamic", action="store_true", help="dynamic batch size")
    opt = parser.parse_args()
    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
    print_args(vars(opt))
    return opt


def main(opt):
    """
    Main entry point for execution of TensorFlow YOLOv5 model conversion script.

    Args:
        opt (argparse.Namespace): Parsed command line options, including weights path, image size, batch size, and dynamic
            batch size flag.

    Returns:
        None
    """
    run(**vars(opt))


if __name__ == "__main__":
    opt = parse_opt()
    main(opt)