完成推理模块的转移

2025-04-17 15:57:16 +08:00
parent 74e8f0d415
commit b0379e64c9
130 changed files with 14269 additions and 3201 deletions
--- a/utils/yolov5/models/common.py
+++ b/utils/yolov5/models/common.py
--- a/utils/yolov5/models/experimental.py
+++ b/utils/yolov5/models/experimental.py
@ -0,0 +1,130 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+"""Experimental modules."""
+
+import math
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+from app.util.yolov5.utils.downloads import attempt_download
+
+
+class Sum(nn.Module):
+    """Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070."""
+
+    def __init__(self, n, weight=False):
+        """Initializes a module to sum outputs of layers with number of inputs `n` and optional weighting, supporting 2+
+        inputs.
+        """
+        super().__init__()
+        self.weight = weight  # apply weights boolean
+        self.iter = range(n - 1)  # iter object
+        if weight:
+            self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True)  # layer weights
+
+    def forward(self, x):
+        """Processes input through a customizable weighted sum of `n` inputs, optionally applying learned weights."""
+        y = x[0]  # no weight
+        if self.weight:
+            w = torch.sigmoid(self.w) * 2
+            for i in self.iter:
+                y = y + x[i + 1] * w[i]
+        else:
+            for i in self.iter:
+                y = y + x[i + 1]
+        return y
+
+
+class MixConv2d(nn.Module):
+    """Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595."""
+
+    def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
+        """Initializes MixConv2d with mixed depth-wise convolutional layers, taking input and output channels (c1, c2),
+        kernel sizes (k), stride (s), and channel distribution strategy (equal_ch).
+        """
+        super().__init__()
+        n = len(k)  # number of convolutions
+        if equal_ch:  # equal c_ per group
+            i = torch.linspace(0, n - 1e-6, c2).floor()  # c2 indices
+            c_ = [(i == g).sum() for g in range(n)]  # intermediate channels
+        else:  # equal weight.numel() per group
+            b = [c2] + [0] * n
+            a = np.eye(n + 1, n, k=-1)
+            a -= np.roll(a, 1, axis=1)
+            a *= np.array(k) ** 2
+            a[0] = 1
+            c_ = np.linalg.lstsq(a, b, rcond=None)[0].round()  # solve for equal weight indices, ax = b
+
+        self.m = nn.ModuleList(
+            [nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)]
+        )
+        self.bn = nn.BatchNorm2d(c2)
+        self.act = nn.SiLU()
+
+    def forward(self, x):
+        """Performs forward pass by applying SiLU activation on batch-normalized concatenated convolutional layer
+        outputs.
+        """
+        return self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
+
+
+class Ensemble(nn.ModuleList):
+    """Ensemble of models."""
+
+    def __init__(self):
+        """Initializes an ensemble of models to be used for aggregated predictions."""
+        super().__init__()
+
+    def forward(self, x, augment=False, profile=False, visualize=False):
+        """Performs forward pass aggregating outputs from an ensemble of models.."""
+        y = [module(x, augment, profile, visualize)[0] for module in self]
+        # y = torch.stack(y).max(0)[0]  # max ensemble
+        # y = torch.stack(y).mean(0)  # mean ensemble
+        y = torch.cat(y, 1)  # nms ensemble
+        return y, None  # inference, train output
+
+
+def attempt_load(weights, device=None, inplace=True, fuse=True):
+    """
+    Loads and fuses an ensemble or single YOLOv5 model from weights, handling device placement and model adjustments.
+
+    Example inputs: weights=[a,b,c] or a single model weights=[a] or weights=a.
+    """
+    from app.util.yolov5.models.yolo import Detect, Model
+
+    model = Ensemble()
+    for w in weights if isinstance(weights, list) else [weights]:
+        ckpt = torch.load(attempt_download(w), map_location="cpu")  # load
+        ckpt = (ckpt.get("ema") or ckpt["model"]).to(device).float()  # FP32 model
+
+        # Model compatibility updates
+        if not hasattr(ckpt, "stride"):
+            ckpt.stride = torch.tensor([32.0])
+        if hasattr(ckpt, "names") and isinstance(ckpt.names, (list, tuple)):
+            ckpt.names = dict(enumerate(ckpt.names))  # convert to dict
+
+        model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, "fuse") else ckpt.eval())  # model in eval mode
+
+    # Module updates
+    for m in model.modules():
+        t = type(m)
+        if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model):
+            m.inplace = inplace
+            if t is Detect and not isinstance(m.anchor_grid, list):
+                delattr(m, "anchor_grid")
+                setattr(m, "anchor_grid", [torch.zeros(1)] * m.nl)
+        elif t is nn.Upsample and not hasattr(m, "recompute_scale_factor"):
+            m.recompute_scale_factor = None  # torch 1.11.0 compatibility
+
+    # Return model
+    if len(model) == 1:
+        return model[-1]
+
+    # Return detection ensemble
+    print(f"Ensemble created with {weights}\n")
+    for k in "names", "nc", "yaml":
+        setattr(model, k, getattr(model[0], k))
+    model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride  # max stride
+    assert all(model[0].nc == m.nc for m in model), f"Models have different class counts: {[m.nc for m in model]}"
+    return model
--- a/utils/yolov5/models/hub/anchors.yaml
+++ b/utils/yolov5/models/hub/anchors.yaml
@ -0,0 +1,57 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Default anchors for COCO data
+
+# P5 -------------------------------------------------------------------------------------------------------------------
+# P5-640:
+anchors_p5_640:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+
+# P6 -------------------------------------------------------------------------------------------------------------------
+# P6-640:  thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11,  21,19,  17,41,  43,32,  39,70,  86,64,  65,131,  134,130,  120,265,  282,180,  247,354,  512,387
+anchors_p6_640:
+  - [9, 11, 21, 19, 17, 41] # P3/8
+  - [43, 32, 39, 70, 86, 64] # P4/16
+  - [65, 131, 134, 130, 120, 265] # P5/32
+  - [282, 180, 247, 354, 512, 387] # P6/64
+
+# P6-1280:  thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27,  44,40,  38,94,  96,68,  86,152,  180,137,  140,301,  303,264,  238,542,  436,615,  739,380,  925,792
+anchors_p6_1280:
+  - [19, 27, 44, 40, 38, 94] # P3/8
+  - [96, 68, 86, 152, 180, 137] # P4/16
+  - [140, 301, 303, 264, 238, 542] # P5/32
+  - [436, 615, 739, 380, 925, 792] # P6/64
+
+# P6-1920:  thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41,  67,59,  57,141,  144,103,  129,227,  270,205,  209,452,  455,396,  358,812,  653,922,  1109,570,  1387,1187
+anchors_p6_1920:
+  - [28, 41, 67, 59, 57, 141] # P3/8
+  - [144, 103, 129, 227, 270, 205] # P4/16
+  - [209, 452, 455, 396, 358, 812] # P5/32
+  - [653, 922, 1109, 570, 1387, 1187] # P6/64
+
+# P7 -------------------------------------------------------------------------------------------------------------------
+# P7-640:  thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11,  13,30,  29,20,  30,46,  61,38,  39,92,  78,80,  146,66,  79,163,  149,150,  321,143,  157,303,  257,402,  359,290,  524,372
+anchors_p7_640:
+  - [11, 11, 13, 30, 29, 20] # P3/8
+  - [30, 46, 61, 38, 39, 92] # P4/16
+  - [78, 80, 146, 66, 79, 163] # P5/32
+  - [149, 150, 321, 143, 157, 303] # P6/64
+  - [257, 402, 359, 290, 524, 372] # P7/128
+
+# P7-1280:  thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22,  54,36,  32,77,  70,83,  138,71,  75,173,  165,159,  148,334,  375,151,  334,317,  251,626,  499,474,  750,326,  534,814,  1079,818
+anchors_p7_1280:
+  - [19, 22, 54, 36, 32, 77] # P3/8
+  - [70, 83, 138, 71, 75, 173] # P4/16
+  - [165, 159, 148, 334, 375, 151] # P5/32
+  - [334, 317, 251, 626, 499, 474] # P6/64
+  - [750, 326, 534, 814, 1079, 818] # P7/128
+
+# P7-1920:  thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34,  81,55,  47,115,  105,124,  207,107,  113,259,  247,238,  222,500,  563,227,  501,476,  376,939,  749,711,  1126,489,  801,1222,  1618,1227
+anchors_p7_1920:
+  - [29, 34, 81, 55, 47, 115] # P3/8
+  - [105, 124, 207, 107, 113, 259] # P4/16
+  - [247, 238, 222, 500, 563, 227] # P5/32
+  - [501, 476, 376, 939, 749, 711] # P6/64
+  - [1126, 489, 801, 1222, 1618, 1227] # P7/128
--- a/utils/yolov5/models/hub/yolov3-spp.yaml
+++ b/utils/yolov5/models/hub/yolov3-spp.yaml
@ -0,0 +1,52 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+
+# darknet53 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [32, 3, 1]], # 0
+    [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
+    [-1, 1, Bottleneck, [64]],
+    [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
+    [-1, 2, Bottleneck, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
+    [-1, 8, Bottleneck, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
+    [-1, 8, Bottleneck, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
+    [-1, 4, Bottleneck, [1024]], # 10
+  ]
+
+# YOLOv3-SPP head
+head: [
+    [-1, 1, Bottleneck, [1024, False]],
+    [-1, 1, SPP, [512, [5, 9, 13]]],
+    [-1, 1, Conv, [1024, 3, 1]],
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
+
+    [-2, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 8], 1, Concat, [1]], # cat backbone P4
+    [-1, 1, Bottleneck, [512, False]],
+    [-1, 1, Bottleneck, [512, False]],
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
+
+    [-2, 1, Conv, [128, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P3
+    [-1, 1, Bottleneck, [256, False]],
+    [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
+
+    [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
--- a/utils/yolov5/models/hub/yolov3-tiny.yaml
+++ b/utils/yolov5/models/hub/yolov3-tiny.yaml
@ -0,0 +1,42 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+anchors:
+  - [10, 14, 23, 27, 37, 58] # P4/16
+  - [81, 82, 135, 169, 344, 319] # P5/32
+
+# YOLOv3-tiny backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [16, 3, 1]], # 0
+    [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
+    [-1, 1, Conv, [32, 3, 1]],
+    [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
+    [-1, 1, Conv, [64, 3, 1]],
+    [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
+    [-1, 1, Conv, [128, 3, 1]],
+    [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
+    [-1, 1, Conv, [256, 3, 1]],
+    [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
+    [-1, 1, Conv, [512, 3, 1]],
+    [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
+    [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
+  ]
+
+# YOLOv3-tiny head
+head: [
+    [-1, 1, Conv, [1024, 3, 1]],
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)
+
+    [-2, 1, Conv, [128, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 8], 1, Concat, [1]], # cat backbone P4
+    [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)
+
+    [[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
+  ]
--- a/utils/yolov5/models/hub/yolov3.yaml
+++ b/utils/yolov5/models/hub/yolov3.yaml
@ -0,0 +1,52 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+
+# darknet53 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [32, 3, 1]], # 0
+    [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
+    [-1, 1, Bottleneck, [64]],
+    [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
+    [-1, 2, Bottleneck, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
+    [-1, 8, Bottleneck, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
+    [-1, 8, Bottleneck, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
+    [-1, 4, Bottleneck, [1024]], # 10
+  ]
+
+# YOLOv3 head
+head: [
+    [-1, 1, Bottleneck, [1024, False]],
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, Conv, [1024, 3, 1]],
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
+
+    [-2, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 8], 1, Concat, [1]], # cat backbone P4
+    [-1, 1, Bottleneck, [512, False]],
+    [-1, 1, Bottleneck, [512, False]],
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
+
+    [-2, 1, Conv, [128, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P3
+    [-1, 1, Bottleneck, [256, False]],
+    [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
+
+    [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
--- a/utils/yolov5/models/hub/yolov5-bifpn.yaml
+++ b/utils/yolov5/models/hub/yolov5-bifpn.yaml
@ -0,0 +1,49 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+
+# YOLOv5 v6.0 BiFPN head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14, 6], 1, Concat, [1]], # cat P4 <--- BiFPN change
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+
+    [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
--- a/utils/yolov5/models/hub/yolov5-fpn.yaml
+++ b/utils/yolov5/models/hub/yolov5-fpn.yaml
@ -0,0 +1,43 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+
+# YOLOv5 v6.0 FPN head
+head: [
+    [-1, 3, C3, [1024, False]], # 10 (P5/32-large)
+
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 3, C3, [512, False]], # 14 (P4/16-medium)
+
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 3, C3, [256, False]], # 18 (P3/8-small)
+
+    [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
--- a/utils/yolov5/models/hub/yolov5-p2.yaml
+++ b/utils/yolov5/models/hub/yolov5-p2.yaml
@ -0,0 +1,55 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+
+# YOLOv5 v6.0 head with (P2, P3, P4, P5) outputs
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+
+    [-1, 1, Conv, [128, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 2], 1, Concat, [1]], # cat backbone P2
+    [-1, 1, C3, [128, False]], # 21 (P2/4-xsmall)
+
+    [-1, 1, Conv, [128, 3, 2]],
+    [[-1, 18], 1, Concat, [1]], # cat head P3
+    [-1, 3, C3, [256, False]], # 24 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 27 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 30 (P5/32-large)
+
+    [[21, 24, 27, 30], 1, Detect, [nc, anchors]], # Detect(P2, P3, P4, P5)
+  ]
--- a/utils/yolov5/models/hub/yolov5-p34.yaml
+++ b/utils/yolov5/models/hub/yolov5-p34.yaml
@ -0,0 +1,42 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 0.33 # model depth multiple
+width_multiple: 0.50 # layer channel multiple
+anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+
+# YOLOv5 v6.0 head with (P3, P4) outputs
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+
+    [[17, 20], 1, Detect, [nc, anchors]], # Detect(P3, P4)
+  ]
--- a/utils/yolov5/models/hub/yolov5-p6.yaml
+++ b/utils/yolov5/models/hub/yolov5-p6.yaml
@ -0,0 +1,57 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [768]],
+    [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 11
+  ]
+
+# YOLOv5 v6.0 head with (P3, P4, P5, P6) outputs
+head: [
+    [-1, 1, Conv, [768, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 8], 1, Concat, [1]], # cat backbone P5
+    [-1, 3, C3, [768, False]], # 15
+
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 19
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 23 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 20], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 16], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [768, False]], # 29 (P5/32-large)
+
+    [-1, 1, Conv, [768, 3, 2]],
+    [[-1, 12], 1, Concat, [1]], # cat head P6
+    [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
+
+    [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
+  ]
--- a/utils/yolov5/models/hub/yolov5-p7.yaml
+++ b/utils/yolov5/models/hub/yolov5-p7.yaml
@ -0,0 +1,68 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [768]],
+    [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
+    [-1, 3, C3, [1024]],
+    [-1, 1, Conv, [1280, 3, 2]], # 11-P7/128
+    [-1, 3, C3, [1280]],
+    [-1, 1, SPPF, [1280, 5]], # 13
+  ]
+
+# YOLOv5 v6.0 head with (P3, P4, P5, P6, P7) outputs
+head: [
+    [-1, 1, Conv, [1024, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 10], 1, Concat, [1]], # cat backbone P6
+    [-1, 3, C3, [1024, False]], # 17
+
+    [-1, 1, Conv, [768, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 8], 1, Concat, [1]], # cat backbone P5
+    [-1, 3, C3, [768, False]], # 21
+
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 25
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 29 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 26], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 32 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 22], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [768, False]], # 35 (P5/32-large)
+
+    [-1, 1, Conv, [768, 3, 2]],
+    [[-1, 18], 1, Concat, [1]], # cat head P6
+    [-1, 3, C3, [1024, False]], # 38 (P6/64-xlarge)
+
+    [-1, 1, Conv, [1024, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P7
+    [-1, 3, C3, [1280, False]], # 41 (P7/128-xxlarge)
+
+    [[29, 32, 35, 38, 41], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6, P7)
+  ]
--- a/utils/yolov5/models/hub/yolov5-panet.yaml
+++ b/utils/yolov5/models/hub/yolov5-panet.yaml
@ -0,0 +1,49 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+
+# YOLOv5 v6.0 PANet head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+
+    [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
--- a/utils/yolov5/models/hub/yolov5l6.yaml
+++ b/utils/yolov5/models/hub/yolov5l6.yaml
@ -0,0 +1,61 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+anchors:
+  - [19, 27, 44, 40, 38, 94] # P3/8
+  - [96, 68, 86, 152, 180, 137] # P4/16
+  - [140, 301, 303, 264, 238, 542] # P5/32
+  - [436, 615, 739, 380, 925, 792] # P6/64
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [768]],
+    [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 11
+  ]
+
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [768, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 8], 1, Concat, [1]], # cat backbone P5
+    [-1, 3, C3, [768, False]], # 15
+
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 19
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 23 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 20], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 16], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [768, False]], # 29 (P5/32-large)
+
+    [-1, 1, Conv, [768, 3, 2]],
+    [[-1, 12], 1, Concat, [1]], # cat head P6
+    [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
+
+    [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
+  ]
--- a/utils/yolov5/models/hub/yolov5m6.yaml
+++ b/utils/yolov5/models/hub/yolov5m6.yaml
@ -0,0 +1,61 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 0.67 # model depth multiple
+width_multiple: 0.75 # layer channel multiple
+anchors:
+  - [19, 27, 44, 40, 38, 94] # P3/8
+  - [96, 68, 86, 152, 180, 137] # P4/16
+  - [140, 301, 303, 264, 238, 542] # P5/32
+  - [436, 615, 739, 380, 925, 792] # P6/64
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [768]],
+    [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 11
+  ]
+
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [768, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 8], 1, Concat, [1]], # cat backbone P5
+    [-1, 3, C3, [768, False]], # 15
+
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 19
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 23 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 20], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 16], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [768, False]], # 29 (P5/32-large)
+
+    [-1, 1, Conv, [768, 3, 2]],
+    [[-1, 12], 1, Concat, [1]], # cat head P6
+    [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
+
+    [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
+  ]
--- a/utils/yolov5/models/hub/yolov5n6.yaml
+++ b/utils/yolov5/models/hub/yolov5n6.yaml
@ -0,0 +1,61 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 0.33 # model depth multiple
+width_multiple: 0.25 # layer channel multiple
+anchors:
+  - [19, 27, 44, 40, 38, 94] # P3/8
+  - [96, 68, 86, 152, 180, 137] # P4/16
+  - [140, 301, 303, 264, 238, 542] # P5/32
+  - [436, 615, 739, 380, 925, 792] # P6/64
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [768]],
+    [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 11
+  ]
+
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [768, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 8], 1, Concat, [1]], # cat backbone P5
+    [-1, 3, C3, [768, False]], # 15
+
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 19
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 23 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 20], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 16], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [768, False]], # 29 (P5/32-large)
+
+    [-1, 1, Conv, [768, 3, 2]],
+    [[-1, 12], 1, Concat, [1]], # cat head P6
+    [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
+
+    [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
+  ]
--- a/utils/yolov5/models/hub/yolov5s-LeakyReLU.yaml
+++ b/utils/yolov5/models/hub/yolov5s-LeakyReLU.yaml
@ -0,0 +1,50 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+activation: nn.LeakyReLU(0.1) # <----- Conv() activation used throughout entire YOLOv5 model
+depth_multiple: 0.33 # model depth multiple
+width_multiple: 0.50 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+
+    [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
--- a/utils/yolov5/models/hub/yolov5s-ghost.yaml
+++ b/utils/yolov5/models/hub/yolov5s-ghost.yaml
@ -0,0 +1,49 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 0.33 # model depth multiple
+width_multiple: 0.50 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3Ghost, [128]],
+    [-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3Ghost, [256]],
+    [-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3Ghost, [512]],
+    [-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3Ghost, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, GhostConv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3Ghost, [512, False]], # 13
+
+    [-1, 1, GhostConv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3Ghost, [256, False]], # 17 (P3/8-small)
+
+    [-1, 1, GhostConv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3Ghost, [512, False]], # 20 (P4/16-medium)
+
+    [-1, 1, GhostConv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large)
+
+    [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
--- a/utils/yolov5/models/hub/yolov5s-transformer.yaml
+++ b/utils/yolov5/models/hub/yolov5s-transformer.yaml
@ -0,0 +1,49 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 0.33 # model depth multiple
+width_multiple: 0.50 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3TR, [1024]], # 9 <--- C3TR() Transformer module
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+
+    [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
--- a/utils/yolov5/models/hub/yolov5s6.yaml
+++ b/utils/yolov5/models/hub/yolov5s6.yaml
@ -0,0 +1,61 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 0.33 # model depth multiple
+width_multiple: 0.50 # layer channel multiple
+anchors:
+  - [19, 27, 44, 40, 38, 94] # P3/8
+  - [96, 68, 86, 152, 180, 137] # P4/16
+  - [140, 301, 303, 264, 238, 542] # P5/32
+  - [436, 615, 739, 380, 925, 792] # P6/64
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [768]],
+    [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 11
+  ]
+
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [768, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 8], 1, Concat, [1]], # cat backbone P5
+    [-1, 3, C3, [768, False]], # 15
+
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 19
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 23 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 20], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 16], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [768, False]], # 29 (P5/32-large)
+
+    [-1, 1, Conv, [768, 3, 2]],
+    [[-1, 12], 1, Concat, [1]], # cat head P6
+    [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
+
+    [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
+  ]
--- a/utils/yolov5/models/hub/yolov5x6.yaml
+++ b/utils/yolov5/models/hub/yolov5x6.yaml
@ -0,0 +1,61 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.33 # model depth multiple
+width_multiple: 1.25 # layer channel multiple
+anchors:
+  - [19, 27, 44, 40, 38, 94] # P3/8
+  - [96, 68, 86, 152, 180, 137] # P4/16
+  - [140, 301, 303, 264, 238, 542] # P5/32
+  - [436, 615, 739, 380, 925, 792] # P6/64
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [768]],
+    [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 11
+  ]
+
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [768, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 8], 1, Concat, [1]], # cat backbone P5
+    [-1, 3, C3, [768, False]], # 15
+
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 19
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 23 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 20], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 16], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [768, False]], # 29 (P5/32-large)
+
+    [-1, 1, Conv, [768, 3, 2]],
+    [[-1, 12], 1, Concat, [1]], # cat head P6
+    [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
+
+    [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
+  ]
--- a/utils/yolov5/models/segment/yolov5l-seg.yaml
+++ b/utils/yolov5/models/segment/yolov5l-seg.yaml
@ -0,0 +1,49 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+
+    [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
+  ]
--- a/utils/yolov5/models/segment/yolov5m-seg.yaml
+++ b/utils/yolov5/models/segment/yolov5m-seg.yaml
@ -0,0 +1,49 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 0.67 # model depth multiple
+width_multiple: 0.75 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+
+    [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
+  ]
--- a/utils/yolov5/models/segment/yolov5n-seg.yaml
+++ b/utils/yolov5/models/segment/yolov5n-seg.yaml
@ -0,0 +1,49 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 0.33 # model depth multiple
+width_multiple: 0.25 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+
+    [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
+  ]
--- a/utils/yolov5/models/segment/yolov5s-seg.yaml
+++ b/utils/yolov5/models/segment/yolov5s-seg.yaml
@ -0,0 +1,49 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 0.33 # model depth multiple
+width_multiple: 0.5 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+
+    [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
+  ]
--- a/utils/yolov5/models/segment/yolov5x-seg.yaml
+++ b/utils/yolov5/models/segment/yolov5x-seg.yaml
@ -0,0 +1,49 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.33 # model depth multiple
+width_multiple: 1.25 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+
+    [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
+  ]
--- a/utils/yolov5/models/tf.py
+++ b/utils/yolov5/models/tf.py
@ -0,0 +1,797 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+"""
+TensorFlow, Keras and TFLite versions of YOLOv5
+Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127.
+
+Usage:
+    $ python models/tf.py --weights yolov5s.pt
+
+Export:
+    $ python export.py --weights yolov5s.pt --include saved_model pb tflite tfjs
+"""
+
+import argparse
+import sys
+from copy import deepcopy
+from pathlib import Path
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+# ROOT = ROOT.relative_to(Path.cwd())  # relative
+
+import numpy as np
+import tensorflow as tf
+import torch
+import torch.nn as nn
+from tensorflow import keras
+
+from models.common import (
+    C3,
+    SPP,
+    SPPF,
+    Bottleneck,
+    BottleneckCSP,
+    C3x,
+    Concat,
+    Conv,
+    CrossConv,
+    DWConv,
+    DWConvTranspose2d,
+    Focus,
+    autopad,
+)
+from models.experimental import MixConv2d, attempt_load
+from models.yolo import Detect, Segment
+from utils.activations import SiLU
+from utils.general import LOGGER, make_divisible, print_args
+
+
+class TFBN(keras.layers.Layer):
+    """TensorFlow BatchNormalization wrapper for initializing with optional pretrained weights."""
+
+    def __init__(self, w=None):
+        """Initializes a TensorFlow BatchNormalization layer with optional pretrained weights."""
+        super().__init__()
+        self.bn = keras.layers.BatchNormalization(
+            beta_initializer=keras.initializers.Constant(w.bias.numpy()),
+            gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
+            moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()),
+            moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()),
+            epsilon=w.eps,
+        )
+
+    def call(self, inputs):
+        """Applies batch normalization to the inputs."""
+        return self.bn(inputs)
+
+
+class TFPad(keras.layers.Layer):
+    """Pads input tensors in spatial dimensions 1 and 2 with specified integer or tuple padding values."""
+
+    def __init__(self, pad):
+        """
+        Initializes a padding layer for spatial dimensions 1 and 2 with specified padding, supporting both int and tuple
+        inputs.
+
+        Inputs are
+        """
+        super().__init__()
+        if isinstance(pad, int):
+            self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
+        else:  # tuple/list
+            self.pad = tf.constant([[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]])
+
+    def call(self, inputs):
+        """Pads input tensor with zeros using specified padding, suitable for int and tuple pad dimensions."""
+        return tf.pad(inputs, self.pad, mode="constant", constant_values=0)
+
+
+class TFConv(keras.layers.Layer):
+    """Implements a standard convolutional layer with optional batch normalization and activation for TensorFlow."""
+
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
+        """
+        Initializes a standard convolution layer with optional batch normalization and activation; supports only
+        group=1.
+
+        Inputs are ch_in, ch_out, weights, kernel, stride, padding, groups.
+        """
+        super().__init__()
+        assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
+        # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
+        # see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
+        conv = keras.layers.Conv2D(
+            filters=c2,
+            kernel_size=k,
+            strides=s,
+            padding="SAME" if s == 1 else "VALID",
+            use_bias=not hasattr(w, "bn"),
+            kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
+            bias_initializer="zeros" if hasattr(w, "bn") else keras.initializers.Constant(w.conv.bias.numpy()),
+        )
+        self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
+        self.bn = TFBN(w.bn) if hasattr(w, "bn") else tf.identity
+        self.act = activations(w.act) if act else tf.identity
+
+    def call(self, inputs):
+        """Applies convolution, batch normalization, and activation function to input tensors."""
+        return self.act(self.bn(self.conv(inputs)))
+
+
+class TFDWConv(keras.layers.Layer):
+    """Initializes a depthwise convolution layer with optional batch normalization and activation for TensorFlow."""
+
+    def __init__(self, c1, c2, k=1, s=1, p=None, act=True, w=None):
+        """
+        Initializes a depthwise convolution layer with optional batch normalization and activation for TensorFlow
+        models.
+
+        Input are ch_in, ch_out, weights, kernel, stride, padding, groups.
+        """
+        super().__init__()
+        assert c2 % c1 == 0, f"TFDWConv() output={c2} must be a multiple of input={c1} channels"
+        conv = keras.layers.DepthwiseConv2D(
+            kernel_size=k,
+            depth_multiplier=c2 // c1,
+            strides=s,
+            padding="SAME" if s == 1 else "VALID",
+            use_bias=not hasattr(w, "bn"),
+            depthwise_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
+            bias_initializer="zeros" if hasattr(w, "bn") else keras.initializers.Constant(w.conv.bias.numpy()),
+        )
+        self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
+        self.bn = TFBN(w.bn) if hasattr(w, "bn") else tf.identity
+        self.act = activations(w.act) if act else tf.identity
+
+    def call(self, inputs):
+        """Applies convolution, batch normalization, and activation function to input tensors."""
+        return self.act(self.bn(self.conv(inputs)))
+
+
+class TFDWConvTranspose2d(keras.layers.Layer):
+    """Implements a depthwise ConvTranspose2D layer for TensorFlow with specific settings."""
+
+    def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0, w=None):
+        """
+        Initializes depthwise ConvTranspose2D layer with specific channel, kernel, stride, and padding settings.
+
+        Inputs are ch_in, ch_out, weights, kernel, stride, padding, groups.
+        """
+        super().__init__()
+        assert c1 == c2, f"TFDWConv() output={c2} must be equal to input={c1} channels"
+        assert k == 4 and p1 == 1, "TFDWConv() only valid for k=4 and p1=1"
+        weight, bias = w.weight.permute(2, 3, 1, 0).numpy(), w.bias.numpy()
+        self.c1 = c1
+        self.conv = [
+            keras.layers.Conv2DTranspose(
+                filters=1,
+                kernel_size=k,
+                strides=s,
+                padding="VALID",
+                output_padding=p2,
+                use_bias=True,
+                kernel_initializer=keras.initializers.Constant(weight[..., i : i + 1]),
+                bias_initializer=keras.initializers.Constant(bias[i]),
+            )
+            for i in range(c1)
+        ]
+
+    def call(self, inputs):
+        """Processes input through parallel convolutions and concatenates results, trimming border pixels."""
+        return tf.concat([m(x) for m, x in zip(self.conv, tf.split(inputs, self.c1, 3))], 3)[:, 1:-1, 1:-1]
+
+
+class TFFocus(keras.layers.Layer):
+    """Focuses spatial information into channel space using pixel shuffling and convolution for TensorFlow models."""
+
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
+        """
+        Initializes TFFocus layer to focus width and height information into channel space with custom convolution
+        parameters.
+
+        Inputs are ch_in, ch_out, kernel, stride, padding, groups.
+        """
+        super().__init__()
+        self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
+
+    def call(self, inputs):
+        """
+        Performs pixel shuffling and convolution on input tensor, downsampling by 2 and expanding channels by 4.
+
+        Example x(b,w,h,c) -> y(b,w/2,h/2,4c).
+        """
+        inputs = [inputs[:, ::2, ::2, :], inputs[:, 1::2, ::2, :], inputs[:, ::2, 1::2, :], inputs[:, 1::2, 1::2, :]]
+        return self.conv(tf.concat(inputs, 3))
+
+
+class TFBottleneck(keras.layers.Layer):
+    """Implements a TensorFlow bottleneck layer with optional shortcut connections for efficient feature extraction."""
+
+    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None):
+        """
+        Initializes a standard bottleneck layer for TensorFlow models, expanding and contracting channels with optional
+        shortcut.
+
+        Arguments are ch_in, ch_out, shortcut, groups, expansion.
+        """
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
+        self.add = shortcut and c1 == c2
+
+    def call(self, inputs):
+        """Performs forward pass; if shortcut is True & input/output channels match, adds input to the convolution
+        result.
+        """
+        return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
+
+
+class TFCrossConv(keras.layers.Layer):
+    """Implements a cross convolutional layer with optional expansion, grouping, and shortcut for TensorFlow."""
+
+    def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None):
+        """Initializes cross convolution layer with optional expansion, grouping, and shortcut addition capabilities."""
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, (1, k), (1, s), w=w.cv1)
+        self.cv2 = TFConv(c_, c2, (k, 1), (s, 1), g=g, w=w.cv2)
+        self.add = shortcut and c1 == c2
+
+    def call(self, inputs):
+        """Passes input through two convolutions optionally adding the input if channel dimensions match."""
+        return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
+
+
+class TFConv2d(keras.layers.Layer):
+    """Implements a TensorFlow 2D convolution layer, mimicking PyTorch's nn.Conv2D for specified filters and stride."""
+
+    def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
+        """Initializes a TensorFlow 2D convolution layer, mimicking PyTorch's nn.Conv2D functionality for given filter
+        sizes and stride.
+        """
+        super().__init__()
+        assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
+        self.conv = keras.layers.Conv2D(
+            filters=c2,
+            kernel_size=k,
+            strides=s,
+            padding="VALID",
+            use_bias=bias,
+            kernel_initializer=keras.initializers.Constant(w.weight.permute(2, 3, 1, 0).numpy()),
+            bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None,
+        )
+
+    def call(self, inputs):
+        """Applies a convolution operation to the inputs and returns the result."""
+        return self.conv(inputs)
+
+
+class TFBottleneckCSP(keras.layers.Layer):
+    """Implements a CSP bottleneck layer for TensorFlow models to enhance gradient flow and efficiency."""
+
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
+        """
+        Initializes CSP bottleneck layer with specified channel sizes, count, shortcut option, groups, and expansion
+        ratio.
+
+        Inputs are ch_in, ch_out, number, shortcut, groups, expansion.
+        """
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
+        self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3)
+        self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4)
+        self.bn = TFBN(w.bn)
+        self.act = lambda x: keras.activations.swish(x)
+        self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
+
+    def call(self, inputs):
+        """Processes input through the model layers, concatenates, normalizes, activates, and reduces the output
+        dimensions.
+        """
+        y1 = self.cv3(self.m(self.cv1(inputs)))
+        y2 = self.cv2(inputs)
+        return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3))))
+
+
+class TFC3(keras.layers.Layer):
+    """CSP bottleneck layer with 3 convolutions for TensorFlow, supporting optional shortcuts and group convolutions."""
+
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
+        """
+        Initializes CSP Bottleneck with 3 convolutions, supporting optional shortcuts and group convolutions.
+
+        Inputs are ch_in, ch_out, number, shortcut, groups, expansion.
+        """
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
+        self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
+        self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
+
+    def call(self, inputs):
+        """
+        Processes input through a sequence of transformations for object detection (YOLOv5).
+
+        See https://github.com/ultralytics/yolov5.
+        """
+        return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
+
+
+class TFC3x(keras.layers.Layer):
+    """A TensorFlow layer for enhanced feature extraction using cross-convolutions in object detection models."""
+
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
+        """
+        Initializes layer with cross-convolutions for enhanced feature extraction in object detection models.
+
+        Inputs are ch_in, ch_out, number, shortcut, groups, expansion.
+        """
+        super().__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
+        self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
+        self.m = keras.Sequential(
+            [TFCrossConv(c_, c_, k=3, s=1, g=g, e=1.0, shortcut=shortcut, w=w.m[j]) for j in range(n)]
+        )
+
+    def call(self, inputs):
+        """Processes input through cascaded convolutions and merges features, returning the final tensor output."""
+        return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
+
+
+class TFSPP(keras.layers.Layer):
+    """Implements spatial pyramid pooling for YOLOv3-SPP with specific channels and kernel sizes."""
+
+    def __init__(self, c1, c2, k=(5, 9, 13), w=None):
+        """Initializes a YOLOv3-SPP layer with specific input/output channels and kernel sizes for pooling."""
+        super().__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
+        self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding="SAME") for x in k]
+
+    def call(self, inputs):
+        """Processes input through two TFConv layers and concatenates with max-pooled outputs at intermediate stage."""
+        x = self.cv1(inputs)
+        return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
+
+
+class TFSPPF(keras.layers.Layer):
+    """Implements a fast spatial pyramid pooling layer for TensorFlow with optimized feature extraction."""
+
+    def __init__(self, c1, c2, k=5, w=None):
+        """Initializes a fast spatial pyramid pooling layer with customizable in/out channels, kernel size, and
+        weights.
+        """
+        super().__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
+        self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2)
+        self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding="SAME")
+
+    def call(self, inputs):
+        """Executes the model's forward pass, concatenating input features with three max-pooled versions before final
+        convolution.
+        """
+        x = self.cv1(inputs)
+        y1 = self.m(x)
+        y2 = self.m(y1)
+        return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3))
+
+
+class TFDetect(keras.layers.Layer):
+    """Implements YOLOv5 object detection layer in TensorFlow for predicting bounding boxes and class probabilities."""
+
+    def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None):
+        """Initializes YOLOv5 detection layer for TensorFlow with configurable classes, anchors, channels, and image
+        size.
+        """
+        super().__init__()
+        self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
+        self.nc = nc  # number of classes
+        self.no = nc + 5  # number of outputs per anchor
+        self.nl = len(anchors)  # number of detection layers
+        self.na = len(anchors[0]) // 2  # number of anchors
+        self.grid = [tf.zeros(1)] * self.nl  # init grid
+        self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32)
+        self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]), [self.nl, 1, -1, 1, 2])
+        self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]
+        self.training = False  # set to False after building model
+        self.imgsz = imgsz
+        for i in range(self.nl):
+            ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
+            self.grid[i] = self._make_grid(nx, ny)
+
+    def call(self, inputs):
+        """Performs forward pass through the model layers to predict object bounding boxes and classifications."""
+        z = []  # inference output
+        x = []
+        for i in range(self.nl):
+            x.append(self.m[i](inputs[i]))
+            # x(bs,20,20,255) to x(bs,3,20,20,85)
+            ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
+            x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no])
+
+            if not self.training:  # inference
+                y = x[i]
+                grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5
+                anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4
+                xy = (tf.sigmoid(y[..., 0:2]) * 2 + grid) * self.stride[i]  # xy
+                wh = tf.sigmoid(y[..., 2:4]) ** 2 * anchor_grid
+                # Normalize xywh to 0-1 to reduce calibration error
+                xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
+                wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
+                y = tf.concat([xy, wh, tf.sigmoid(y[..., 4 : 5 + self.nc]), y[..., 5 + self.nc :]], -1)
+                z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
+
+        return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1),)
+
+    @staticmethod
+    def _make_grid(nx=20, ny=20):
+        """Generates a 2D grid of coordinates in (x, y) format with shape [1, 1, ny*nx, 2]."""
+        # return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
+        xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
+        return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
+
+
+class TFSegment(TFDetect):
+    """YOLOv5 segmentation head for TensorFlow, combining detection and segmentation."""
+
+    def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
+        """Initializes YOLOv5 Segment head with specified channel depths, anchors, and input size for segmentation
+        models.
+        """
+        super().__init__(nc, anchors, ch, imgsz, w)
+        self.nm = nm  # number of masks
+        self.npr = npr  # number of protos
+        self.no = 5 + nc + self.nm  # number of outputs per anchor
+        self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]  # output conv
+        self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto)  # protos
+        self.detect = TFDetect.call
+
+    def call(self, x):
+        """Applies detection and proto layers on input, returning detections and optionally protos if training."""
+        p = self.proto(x[0])
+        # p = TFUpsample(None, scale_factor=4, mode='nearest')(self.proto(x[0]))  # (optional) full-size protos
+        p = tf.transpose(p, [0, 3, 1, 2])  # from shape(1,160,160,32) to shape(1,32,160,160)
+        x = self.detect(self, x)
+        return (x, p) if self.training else (x[0], p)
+
+
+class TFProto(keras.layers.Layer):
+    """Implements convolutional and upsampling layers for feature extraction in YOLOv5 segmentation."""
+
+    def __init__(self, c1, c_=256, c2=32, w=None):
+        """Initializes TFProto layer with convolutional and upsampling layers for feature extraction and
+        transformation.
+        """
+        super().__init__()
+        self.cv1 = TFConv(c1, c_, k=3, w=w.cv1)
+        self.upsample = TFUpsample(None, scale_factor=2, mode="nearest")
+        self.cv2 = TFConv(c_, c_, k=3, w=w.cv2)
+        self.cv3 = TFConv(c_, c2, w=w.cv3)
+
+    def call(self, inputs):
+        """Performs forward pass through the model, applying convolutions and upscaling on input tensor."""
+        return self.cv3(self.cv2(self.upsample(self.cv1(inputs))))
+
+
+class TFUpsample(keras.layers.Layer):
+    """Implements a TensorFlow upsampling layer with specified size, scale factor, and interpolation mode."""
+
+    def __init__(self, size, scale_factor, mode, w=None):
+        """
+        Initializes a TensorFlow upsampling layer with specified size, scale_factor, and mode, ensuring scale_factor is
+        even.
+
+        Warning: all arguments needed including 'w'
+        """
+        super().__init__()
+        assert scale_factor % 2 == 0, "scale_factor must be multiple of 2"
+        self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * scale_factor, x.shape[2] * scale_factor), mode)
+        # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
+        # with default arguments: align_corners=False, half_pixel_centers=False
+        # self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x,
+        #                                                            size=(x.shape[1] * 2, x.shape[2] * 2))
+
+    def call(self, inputs):
+        """Applies upsample operation to inputs using nearest neighbor interpolation."""
+        return self.upsample(inputs)
+
+
+class TFConcat(keras.layers.Layer):
+    """Implements TensorFlow's version of torch.concat() for concatenating tensors along the last dimension."""
+
+    def __init__(self, dimension=1, w=None):
+        """Initializes a TensorFlow layer for NCHW to NHWC concatenation, requiring dimension=1."""
+        super().__init__()
+        assert dimension == 1, "convert only NCHW to NHWC concat"
+        self.d = 3
+
+    def call(self, inputs):
+        """Concatenates a list of tensors along the last dimension, used for NCHW to NHWC conversion."""
+        return tf.concat(inputs, self.d)
+
+
+def parse_model(d, ch, model, imgsz):
+    """Parses a model definition dict `d` to create YOLOv5 model layers, including dynamic channel adjustments."""
+    LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
+    anchors, nc, gd, gw, ch_mul = (
+        d["anchors"],
+        d["nc"],
+        d["depth_multiple"],
+        d["width_multiple"],
+        d.get("channel_multiple"),
+    )
+    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
+    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
+    if not ch_mul:
+        ch_mul = 8
+
+    layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
+    for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]):  # from, number, module, args
+        m_str = m
+        m = eval(m) if isinstance(m, str) else m  # eval strings
+        for j, a in enumerate(args):
+            try:
+                args[j] = eval(a) if isinstance(a, str) else a  # eval strings
+            except NameError:
+                pass
+
+        n = max(round(n * gd), 1) if n > 1 else n  # depth gain
+        if m in [
+            nn.Conv2d,
+            Conv,
+            DWConv,
+            DWConvTranspose2d,
+            Bottleneck,
+            SPP,
+            SPPF,
+            MixConv2d,
+            Focus,
+            CrossConv,
+            BottleneckCSP,
+            C3,
+            C3x,
+        ]:
+            c1, c2 = ch[f], args[0]
+            c2 = make_divisible(c2 * gw, ch_mul) if c2 != no else c2
+
+            args = [c1, c2, *args[1:]]
+            if m in [BottleneckCSP, C3, C3x]:
+                args.insert(2, n)
+                n = 1
+        elif m is nn.BatchNorm2d:
+            args = [ch[f]]
+        elif m is Concat:
+            c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
+        elif m in [Detect, Segment]:
+            args.append([ch[x + 1] for x in f])
+            if isinstance(args[1], int):  # number of anchors
+                args[1] = [list(range(args[1] * 2))] * len(f)
+            if m is Segment:
+                args[3] = make_divisible(args[3] * gw, ch_mul)
+            args.append(imgsz)
+        else:
+            c2 = ch[f]
+
+        tf_m = eval("TF" + m_str.replace("nn.", ""))
+        m_ = (
+            keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)])
+            if n > 1
+            else tf_m(*args, w=model.model[i])
+        )  # module
+
+        torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args)  # module
+        t = str(m)[8:-2].replace("__main__.", "")  # module type
+        np = sum(x.numel() for x in torch_m_.parameters())  # number params
+        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
+        LOGGER.info(f"{i:>3}{str(f):>18}{str(n):>3}{np:>10}  {t:<40}{str(args):<30}")  # print
+        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
+        layers.append(m_)
+        ch.append(c2)
+    return keras.Sequential(layers), sorted(save)
+
+
+class TFModel:
+    """Implements YOLOv5 model in TensorFlow, supporting TensorFlow, Keras, and TFLite formats for object detection."""
+
+    def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, model=None, imgsz=(640, 640)):
+        """Initializes TF YOLOv5 model with specified configuration, channels, classes, model instance, and input
+        size.
+        """
+        super().__init__()
+        if isinstance(cfg, dict):
+            self.yaml = cfg  # model dict
+        else:  # is *.yaml
+            import yaml  # for torch hub
+
+            self.yaml_file = Path(cfg).name
+            with open(cfg) as f:
+                self.yaml = yaml.load(f, Loader=yaml.FullLoader)  # model dict
+
+        # Define model
+        if nc and nc != self.yaml["nc"]:
+            LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}")
+            self.yaml["nc"] = nc  # override yaml value
+        self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)
+
+    def predict(
+        self,
+        inputs,
+        tf_nms=False,
+        agnostic_nms=False,
+        topk_per_class=100,
+        topk_all=100,
+        iou_thres=0.45,
+        conf_thres=0.25,
+    ):
+        """Runs inference on input data, with an option for TensorFlow NMS."""
+        y = []  # outputs
+        x = inputs
+        for m in self.model.layers:
+            if m.f != -1:  # if not from previous layer
+                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
+
+            x = m(x)  # run
+            y.append(x if m.i in self.savelist else None)  # save output
+
+        # Add TensorFlow NMS
+        if tf_nms:
+            boxes = self._xywh2xyxy(x[0][..., :4])
+            probs = x[0][:, :, 4:5]
+            classes = x[0][:, :, 5:]
+            scores = probs * classes
+            if agnostic_nms:
+                nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres)
+            else:
+                boxes = tf.expand_dims(boxes, 2)
+                nms = tf.image.combined_non_max_suppression(
+                    boxes, scores, topk_per_class, topk_all, iou_thres, conf_thres, clip_boxes=False
+                )
+            return (nms,)
+        return x  # output [1,6300,85] = [xywh, conf, class0, class1, ...]
+        # x = x[0]  # [x(1,6300,85), ...] to x(6300,85)
+        # xywh = x[..., :4]  # x(6300,4) boxes
+        # conf = x[..., 4:5]  # x(6300,1) confidences
+        # cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1))  # x(6300,1)  classes
+        # return tf.concat([conf, cls, xywh], 1)
+
+    @staticmethod
+    def _xywh2xyxy(xywh):
+        """Converts bounding box format from [x, y, w, h] to [x1, y1, x2, y2], where xy1=top-left and xy2=bottom-
+        right.
+        """
+        x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
+        return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)
+
+
+class AgnosticNMS(keras.layers.Layer):
+    """Performs agnostic non-maximum suppression (NMS) on detected objects using IoU and confidence thresholds."""
+
+    def call(self, input, topk_all, iou_thres, conf_thres):
+        """Performs agnostic NMS on input tensors using given thresholds and top-K selection."""
+        return tf.map_fn(
+            lambda x: self._nms(x, topk_all, iou_thres, conf_thres),
+            input,
+            fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32),
+            name="agnostic_nms",
+        )
+
+    @staticmethod
+    def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25):
+        """Performs agnostic non-maximum suppression (NMS) on detected objects, filtering based on IoU and confidence
+        thresholds.
+        """
+        boxes, classes, scores = x
+        class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
+        scores_inp = tf.reduce_max(scores, -1)
+        selected_inds = tf.image.non_max_suppression(
+            boxes, scores_inp, max_output_size=topk_all, iou_threshold=iou_thres, score_threshold=conf_thres
+        )
+        selected_boxes = tf.gather(boxes, selected_inds)
+        padded_boxes = tf.pad(
+            selected_boxes,
+            paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]],
+            mode="CONSTANT",
+            constant_values=0.0,
+        )
+        selected_scores = tf.gather(scores_inp, selected_inds)
+        padded_scores = tf.pad(
+            selected_scores,
+            paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
+            mode="CONSTANT",
+            constant_values=-1.0,
+        )
+        selected_classes = tf.gather(class_inds, selected_inds)
+        padded_classes = tf.pad(
+            selected_classes,
+            paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
+            mode="CONSTANT",
+            constant_values=-1.0,
+        )
+        valid_detections = tf.shape(selected_inds)[0]
+        return padded_boxes, padded_scores, padded_classes, valid_detections
+
+
+def activations(act=nn.SiLU):
+    """Converts PyTorch activations to TensorFlow equivalents, supporting LeakyReLU, Hardswish, and SiLU/Swish."""
+    if isinstance(act, nn.LeakyReLU):
+        return lambda x: keras.activations.relu(x, alpha=0.1)
+    elif isinstance(act, nn.Hardswish):
+        return lambda x: x * tf.nn.relu6(x + 3) * 0.166666667
+    elif isinstance(act, (nn.SiLU, SiLU)):
+        return lambda x: keras.activations.swish(x)
+    else:
+        raise Exception(f"no matching TensorFlow activation found for PyTorch activation {act}")
+
+
+def representative_dataset_gen(dataset, ncalib=100):
+    """Generates a representative dataset for calibration by yielding transformed numpy arrays from the input
+    dataset.
+    """
+    for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
+        im = np.transpose(img, [1, 2, 0])
+        im = np.expand_dims(im, axis=0).astype(np.float32)
+        im /= 255
+        yield [im]
+        if n >= ncalib:
+            break
+
+
+def run(
+    weights=ROOT / "yolov5s.pt",  # weights path
+    imgsz=(640, 640),  # inference size h,w
+    batch_size=1,  # batch size
+    dynamic=False,  # dynamic batch size
+):
+    # PyTorch model
+    """Exports YOLOv5 model from PyTorch to TensorFlow and Keras formats, performing inference for validation."""
+    im = torch.zeros((batch_size, 3, *imgsz))  # BCHW image
+    model = attempt_load(weights, device=torch.device("cpu"), inplace=True, fuse=False)
+    _ = model(im)  # inference
+    model.info()
+
+    # TensorFlow model
+    im = tf.zeros((batch_size, *imgsz, 3))  # BHWC image
+    tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
+    _ = tf_model.predict(im)  # inference
+
+    # Keras model
+    im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
+    keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
+    keras_model.summary()
+
+    LOGGER.info("PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.")
+
+
+def parse_opt():
+    """Parses and returns command-line options for model inference, including weights path, image size, batch size, and
+    dynamic batching.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--weights", type=str, default=ROOT / "yolov5s.pt", help="weights path")
+    parser.add_argument("--imgsz", "--img", "--img-size", nargs="+", type=int, default=[640], help="inference size h,w")
+    parser.add_argument("--batch-size", type=int, default=1, help="batch size")
+    parser.add_argument("--dynamic", action="store_true", help="dynamic batch size")
+    opt = parser.parse_args()
+    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
+    print_args(vars(opt))
+    return opt
+
+
+def main(opt):
+    """Executes the YOLOv5 model run function with parsed command line options."""
+    run(**vars(opt))
+
+
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)
--- a/utils/yolov5/models/yolo.py
+++ b/utils/yolov5/models/yolo.py
@ -0,0 +1,495 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+"""
+YOLO-specific modules.
+
+Usage:
+    $ python models/yolo.py --cfg yolov5s.yaml
+"""
+
+import argparse
+import contextlib
+import math
+import os
+import platform
+import sys
+from copy import deepcopy
+from pathlib import Path
+
+import torch
+import torch.nn as nn
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+if platform.system() != "Windows":
+    ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+
+from models.common import (
+    C3,
+    C3SPP,
+    C3TR,
+    SPP,
+    SPPF,
+    Bottleneck,
+    BottleneckCSP,
+    C3Ghost,
+    C3x,
+    Classify,
+    Concat,
+    Contract,
+    Conv,
+    CrossConv,
+    DetectMultiBackend,
+    DWConv,
+    DWConvTranspose2d,
+    Expand,
+    Focus,
+    GhostBottleneck,
+    GhostConv,
+    Proto,
+)
+from models.experimental import MixConv2d
+from utils.autoanchor import check_anchor_order
+from utils.general import LOGGER, check_version, check_yaml, colorstr, make_divisible, print_args
+from utils.plots import feature_visualization
+from utils.torch_utils import (
+    fuse_conv_and_bn,
+    initialize_weights,
+    model_info,
+    profile,
+    scale_img,
+    select_device,
+    time_sync,
+)
+
+try:
+    import thop  # for FLOPs computation
+except ImportError:
+    thop = None
+
+
+class Detect(nn.Module):
+    """YOLOv5 Detect head for processing input tensors and generating detection outputs in object detection models."""
+
+    stride = None  # strides computed during build
+    dynamic = False  # force grid reconstruction
+    export = False  # export mode
+
+    def __init__(self, nc=80, anchors=(), ch=(), inplace=True):
+        """Initializes YOLOv5 detection layer with specified classes, anchors, channels, and inplace operations."""
+        super().__init__()
+        self.nc = nc  # number of classes
+        self.no = nc + 5  # number of outputs per anchor
+        self.nl = len(anchors)  # number of detection layers
+        self.na = len(anchors[0]) // 2  # number of anchors
+        self.grid = [torch.empty(0) for _ in range(self.nl)]  # init grid
+        self.anchor_grid = [torch.empty(0) for _ in range(self.nl)]  # init anchor grid
+        self.register_buffer("anchors", torch.tensor(anchors).float().view(self.nl, -1, 2))  # shape(nl,na,2)
+        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
+        self.inplace = inplace  # use inplace ops (e.g. slice assignment)
+
+    def forward(self, x):
+        """Processes input through YOLOv5 layers, altering shape for detection: `x(bs, 3, ny, nx, 85)`."""
+        z = []  # inference output
+        for i in range(self.nl):
+            x[i] = self.m[i](x[i])  # conv
+            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
+            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
+
+            if not self.training:  # inference
+                if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
+                    self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
+
+                if isinstance(self, Segment):  # (boxes + masks)
+                    xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
+                    xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i]  # xy
+                    wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i]  # wh
+                    y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
+                else:  # Detect (boxes only)
+                    xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
+                    xy = (xy * 2 + self.grid[i]) * self.stride[i]  # xy
+                    wh = (wh * 2) ** 2 * self.anchor_grid[i]  # wh
+                    y = torch.cat((xy, wh, conf), 4)
+                z.append(y.view(bs, self.na * nx * ny, self.no))
+
+        return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)
+
+    def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version__, "1.10.0")):
+        """Generates a mesh grid for anchor boxes with optional compatibility for torch versions < 1.10."""
+        d = self.anchors[i].device
+        t = self.anchors[i].dtype
+        shape = 1, self.na, ny, nx, 2  # grid shape
+        y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t)
+        yv, xv = torch.meshgrid(y, x, indexing="ij") if torch_1_10 else torch.meshgrid(y, x)  # torch>=0.7 compatibility
+        grid = torch.stack((xv, yv), 2).expand(shape) - 0.5  # add grid offset, i.e. y = 2.0 * x - 0.5
+        anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)
+        return grid, anchor_grid
+
+
+class Segment(Detect):
+    """YOLOv5 Segment head for segmentation models, extending Detect with mask and prototype layers."""
+
+    def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
+        """Initializes YOLOv5 Segment head with options for mask count, protos, and channel adjustments."""
+        super().__init__(nc, anchors, ch, inplace)
+        self.nm = nm  # number of masks
+        self.npr = npr  # number of protos
+        self.no = 5 + nc + self.nm  # number of outputs per anchor
+        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
+        self.proto = Proto(ch[0], self.npr, self.nm)  # protos
+        self.detect = Detect.forward
+
+    def forward(self, x):
+        """Processes input through the network, returning detections and prototypes; adjusts output based on
+        training/export mode.
+        """
+        p = self.proto(x[0])
+        x = self.detect(self, x)
+        return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])
+
+
+class BaseModel(nn.Module):
+    """YOLOv5 base model."""
+
+    def forward(self, x, profile=False, visualize=False):
+        """Executes a single-scale inference or training pass on the YOLOv5 base model, with options for profiling and
+        visualization.
+        """
+        return self._forward_once(x, profile, visualize)  # single-scale inference, train
+
+    def _forward_once(self, x, profile=False, visualize=False):
+        """Performs a forward pass on the YOLOv5 model, enabling profiling and feature visualization options."""
+        y, dt = [], []  # outputs
+        for m in self.model:
+            if m.f != -1:  # if not from previous layer
+                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
+            if profile:
+                self._profile_one_layer(m, x, dt)
+            x = m(x)  # run
+            y.append(x if m.i in self.save else None)  # save output
+            if visualize:
+                feature_visualization(x, m.type, m.i, save_dir=visualize)
+        return x
+
+    def _profile_one_layer(self, m, x, dt):
+        """Profiles a single layer's performance by computing GFLOPs, execution time, and parameters."""
+        c = m == self.model[-1]  # is final layer, copy input as inplace fix
+        o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1e9 * 2 if thop else 0  # FLOPs
+        t = time_sync()
+        for _ in range(10):
+            m(x.copy() if c else x)
+        dt.append((time_sync() - t) * 100)
+        if m == self.model[0]:
+            LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s}  module")
+        LOGGER.info(f"{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f}  {m.type}")
+        if c:
+            LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s}  Total")
+
+    def fuse(self):
+        """Fuses Conv2d() and BatchNorm2d() layers in the model to improve inference speed."""
+        LOGGER.info("Fusing layers... ")
+        for m in self.model.modules():
+            if isinstance(m, (Conv, DWConv)) and hasattr(m, "bn"):
+                m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
+                delattr(m, "bn")  # remove batchnorm
+                m.forward = m.forward_fuse  # update forward
+        self.info()
+        return self
+
+    def info(self, verbose=False, img_size=640):
+        """Prints model information given verbosity and image size, e.g., `info(verbose=True, img_size=640)`."""
+        model_info(self, verbose, img_size)
+
+    def _apply(self, fn):
+        """Applies transformations like to(), cpu(), cuda(), half() to model tensors excluding parameters or registered
+        buffers.
+        """
+        self = super()._apply(fn)
+        m = self.model[-1]  # Detect()
+        if isinstance(m, (Detect, Segment)):
+            m.stride = fn(m.stride)
+            m.grid = list(map(fn, m.grid))
+            if isinstance(m.anchor_grid, list):
+                m.anchor_grid = list(map(fn, m.anchor_grid))
+        return self
+
+
+class DetectionModel(BaseModel):
+    """YOLOv5 detection model class for object detection tasks, supporting custom configurations and anchors."""
+
+    def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, anchors=None):
+        """Initializes YOLOv5 model with configuration file, input channels, number of classes, and custom anchors."""
+        super().__init__()
+        if isinstance(cfg, dict):
+            self.yaml = cfg  # model dict
+        else:  # is *.yaml
+            import yaml  # for torch hub
+
+            self.yaml_file = Path(cfg).name
+            with open(cfg, encoding="ascii", errors="ignore") as f:
+                self.yaml = yaml.safe_load(f)  # model dict
+
+        # Define model
+        ch = self.yaml["ch"] = self.yaml.get("ch", ch)  # input channels
+        if nc and nc != self.yaml["nc"]:
+            LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
+            self.yaml["nc"] = nc  # override yaml value
+        if anchors:
+            LOGGER.info(f"Overriding model.yaml anchors with anchors={anchors}")
+            self.yaml["anchors"] = round(anchors)  # override yaml value
+        self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])  # model, savelist
+        self.names = [str(i) for i in range(self.yaml["nc"])]  # default names
+        self.inplace = self.yaml.get("inplace", True)
+
+        # Build strides, anchors
+        m = self.model[-1]  # Detect()
+        if isinstance(m, (Detect, Segment)):
+
+            def _forward(x):
+                """Passes the input 'x' through the model and returns the processed output."""
+                return self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
+
+            s = 256  # 2x min stride
+            m.inplace = self.inplace
+            m.stride = torch.tensor([s / x.shape[-2] for x in _forward(torch.zeros(1, ch, s, s))])  # forward
+            check_anchor_order(m)
+            m.anchors /= m.stride.view(-1, 1, 1)
+            self.stride = m.stride
+            self._initialize_biases()  # only run once
+
+        # Init weights, biases
+        initialize_weights(self)
+        self.info()
+        LOGGER.info("")
+
+    def forward(self, x, augment=False, profile=False, visualize=False):
+        """Performs single-scale or augmented inference and may include profiling or visualization."""
+        if augment:
+            return self._forward_augment(x)  # augmented inference, None
+        return self._forward_once(x, profile, visualize)  # single-scale inference, train
+
+    def _forward_augment(self, x):
+        """Performs augmented inference across different scales and flips, returning combined detections."""
+        img_size = x.shape[-2:]  # height, width
+        s = [1, 0.83, 0.67]  # scales
+        f = [None, 3, None]  # flips (2-ud, 3-lr)
+        y = []  # outputs
+        for si, fi in zip(s, f):
+            xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
+            yi = self._forward_once(xi)[0]  # forward
+            # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1])  # save
+            yi = self._descale_pred(yi, fi, si, img_size)
+            y.append(yi)
+        y = self._clip_augmented(y)  # clip augmented tails
+        return torch.cat(y, 1), None  # augmented inference, train
+
+    def _descale_pred(self, p, flips, scale, img_size):
+        """De-scales predictions from augmented inference, adjusting for flips and image size."""
+        if self.inplace:
+            p[..., :4] /= scale  # de-scale
+            if flips == 2:
+                p[..., 1] = img_size[0] - p[..., 1]  # de-flip ud
+            elif flips == 3:
+                p[..., 0] = img_size[1] - p[..., 0]  # de-flip lr
+        else:
+            x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale  # de-scale
+            if flips == 2:
+                y = img_size[0] - y  # de-flip ud
+            elif flips == 3:
+                x = img_size[1] - x  # de-flip lr
+            p = torch.cat((x, y, wh, p[..., 4:]), -1)
+        return p
+
+    def _clip_augmented(self, y):
+        """Clips augmented inference tails for YOLOv5 models, affecting first and last tensors based on grid points and
+        layer counts.
+        """
+        nl = self.model[-1].nl  # number of detection layers (P3-P5)
+        g = sum(4**x for x in range(nl))  # grid points
+        e = 1  # exclude layer count
+        i = (y[0].shape[1] // g) * sum(4**x for x in range(e))  # indices
+        y[0] = y[0][:, :-i]  # large
+        i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e))  # indices
+        y[-1] = y[-1][:, i:]  # small
+        return y
+
+    def _initialize_biases(self, cf=None):
+        """
+        Initializes biases for YOLOv5's Detect() module, optionally using class frequencies (cf).
+
+        For details see https://arxiv.org/abs/1708.02002 section 3.3.
+        """
+        # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
+        m = self.model[-1]  # Detect() module
+        for mi, s in zip(m.m, m.stride):  # from
+            b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
+            b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
+            b.data[:, 5 : 5 + m.nc] += (
+                math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum())
+            )  # cls
+            mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+
+
+Model = DetectionModel  # retain YOLOv5 'Model' class for backwards compatibility
+
+
+class SegmentationModel(DetectionModel):
+    """YOLOv5 segmentation model for object detection and segmentation tasks with configurable parameters."""
+
+    def __init__(self, cfg="yolov5s-seg.yaml", ch=3, nc=None, anchors=None):
+        """Initializes a YOLOv5 segmentation model with configurable params: cfg (str) for configuration, ch (int) for channels, nc (int) for num classes, anchors (list)."""
+        super().__init__(cfg, ch, nc, anchors)
+
+
+class ClassificationModel(BaseModel):
+    """YOLOv5 classification model for image classification tasks, initialized with a config file or detection model."""
+
+    def __init__(self, cfg=None, model=None, nc=1000, cutoff=10):
+        """Initializes YOLOv5 model with config file `cfg`, input channels `ch`, number of classes `nc`, and `cuttoff`
+        index.
+        """
+        super().__init__()
+        self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg)
+
+    def _from_detection_model(self, model, nc=1000, cutoff=10):
+        """Creates a classification model from a YOLOv5 detection model, slicing at `cutoff` and adding a classification
+        layer.
+        """
+        if isinstance(model, DetectMultiBackend):
+            model = model.model  # unwrap DetectMultiBackend
+        model.model = model.model[:cutoff]  # backbone
+        m = model.model[-1]  # last layer
+        ch = m.conv.in_channels if hasattr(m, "conv") else m.cv1.conv.in_channels  # ch into module
+        c = Classify(ch, nc)  # Classify()
+        c.i, c.f, c.type = m.i, m.f, "models.common.Classify"  # index, from, type
+        model.model[-1] = c  # replace
+        self.model = model.model
+        self.stride = model.stride
+        self.save = []
+        self.nc = nc
+
+    def _from_yaml(self, cfg):
+        """Creates a YOLOv5 classification model from a specified *.yaml configuration file."""
+        self.model = None
+
+
+def parse_model(d, ch):
+    """Parses a YOLOv5 model from a dict `d`, configuring layers based on input channels `ch` and model architecture."""
+    LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
+    anchors, nc, gd, gw, act, ch_mul = (
+        d["anchors"],
+        d["nc"],
+        d["depth_multiple"],
+        d["width_multiple"],
+        d.get("activation"),
+        d.get("channel_multiple"),
+    )
+    if act:
+        Conv.default_act = eval(act)  # redefine default activation, i.e. Conv.default_act = nn.SiLU()
+        LOGGER.info(f"{colorstr('activation:')} {act}")  # print
+    if not ch_mul:
+        ch_mul = 8
+    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
+    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
+
+    layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
+    for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]):  # from, number, module, args
+        m = eval(m) if isinstance(m, str) else m  # eval strings
+        for j, a in enumerate(args):
+            with contextlib.suppress(NameError):
+                args[j] = eval(a) if isinstance(a, str) else a  # eval strings
+
+        n = n_ = max(round(n * gd), 1) if n > 1 else n  # depth gain
+        if m in {
+            Conv,
+            GhostConv,
+            Bottleneck,
+            GhostBottleneck,
+            SPP,
+            SPPF,
+            DWConv,
+            MixConv2d,
+            Focus,
+            CrossConv,
+            BottleneckCSP,
+            C3,
+            C3TR,
+            C3SPP,
+            C3Ghost,
+            nn.ConvTranspose2d,
+            DWConvTranspose2d,
+            C3x,
+        }:
+            c1, c2 = ch[f], args[0]
+            if c2 != no:  # if not output
+                c2 = make_divisible(c2 * gw, ch_mul)
+
+            args = [c1, c2, *args[1:]]
+            if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
+                args.insert(2, n)  # number of repeats
+                n = 1
+        elif m is nn.BatchNorm2d:
+            args = [ch[f]]
+        elif m is Concat:
+            c2 = sum(ch[x] for x in f)
+        # TODO: channel, gw, gd
+        elif m in {Detect, Segment}:
+            args.append([ch[x] for x in f])
+            if isinstance(args[1], int):  # number of anchors
+                args[1] = [list(range(args[1] * 2))] * len(f)
+            if m is Segment:
+                args[3] = make_divisible(args[3] * gw, ch_mul)
+        elif m is Contract:
+            c2 = ch[f] * args[0] ** 2
+        elif m is Expand:
+            c2 = ch[f] // args[0] ** 2
+        else:
+            c2 = ch[f]
+
+        m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args)  # module
+        t = str(m)[8:-2].replace("__main__.", "")  # module type
+        np = sum(x.numel() for x in m_.parameters())  # number params
+        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
+        LOGGER.info(f"{i:>3}{str(f):>18}{n_:>3}{np:10.0f}  {t:<40}{str(args):<30}")  # print
+        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
+        layers.append(m_)
+        if i == 0:
+            ch = []
+        ch.append(c2)
+    return nn.Sequential(*layers), sorted(save)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--cfg", type=str, default="yolov5s.yaml", help="model.yaml")
+    parser.add_argument("--batch-size", type=int, default=1, help="total batch size for all GPUs")
+    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
+    parser.add_argument("--profile", action="store_true", help="profile model speed")
+    parser.add_argument("--line-profile", action="store_true", help="profile model speed layer by layer")
+    parser.add_argument("--test", action="store_true", help="test all yolo*.yaml")
+    opt = parser.parse_args()
+    opt.cfg = check_yaml(opt.cfg)  # check YAML
+    print_args(vars(opt))
+    device = select_device(opt.device)
+
+    # Create model
+    im = torch.rand(opt.batch_size, 3, 640, 640).to(device)
+    model = Model(opt.cfg).to(device)
+
+    # Options
+    if opt.line_profile:  # profile layer by layer
+        model(im, profile=True)
+
+    elif opt.profile:  # profile forward-backward
+        results = profile(input=im, ops=[model], n=3)
+
+    elif opt.test:  # test all models
+        for cfg in Path(ROOT / "models").rglob("yolo*.yaml"):
+            try:
+                _ = Model(cfg)
+            except Exception as e:
+                print(f"Error in {cfg}: {e}")
+
+    else:  # report fused model summary
+        model.fuse()
--- a/utils/yolov5/models/yolov5l.yaml
+++ b/utils/yolov5/models/yolov5l.yaml
@ -0,0 +1,49 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+
+    [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
--- a/utils/yolov5/models/yolov5m.yaml
+++ b/utils/yolov5/models/yolov5m.yaml
@ -0,0 +1,49 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 0.67 # model depth multiple
+width_multiple: 0.75 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+
+    [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
--- a/utils/yolov5/models/yolov5n.yaml
+++ b/utils/yolov5/models/yolov5n.yaml
@ -0,0 +1,49 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 0.33 # model depth multiple
+width_multiple: 0.25 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+
+    [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
--- a/utils/yolov5/models/yolov5s.yaml
+++ b/utils/yolov5/models/yolov5s.yaml
@ -0,0 +1,49 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 0.33 # model depth multiple
+width_multiple: 0.50 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+
+    [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]
--- a/utils/yolov5/models/yolov5x.yaml
+++ b/utils/yolov5/models/yolov5x.yaml
@ -0,0 +1,49 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.33 # model depth multiple
+width_multiple: 1.25 # layer channel multiple
+anchors:
+  - [10, 13, 16, 30, 33, 23] # P3/8
+  - [30, 61, 62, 45, 59, 119] # P4/16
+  - [116, 90, 156, 198, 373, 326] # P5/32
+
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [
+    [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
+    [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
+    [-1, 3, C3, [128]],
+    [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
+    [-1, 6, C3, [256]],
+    [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
+    [-1, 9, C3, [512]],
+    [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
+    [-1, 3, C3, [1024]],
+    [-1, 1, SPPF, [1024, 5]], # 9
+  ]
+
+# YOLOv5 v6.0 head
+head: [
+    [-1, 1, Conv, [512, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 6], 1, Concat, [1]], # cat backbone P4
+    [-1, 3, C3, [512, False]], # 13
+
+    [-1, 1, Conv, [256, 1, 1]],
+    [-1, 1, nn.Upsample, [None, 2, "nearest"]],
+    [[-1, 4], 1, Concat, [1]], # cat backbone P3
+    [-1, 3, C3, [256, False]], # 17 (P3/8-small)
+
+    [-1, 1, Conv, [256, 3, 2]],
+    [[-1, 14], 1, Concat, [1]], # cat head P4
+    [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
+
+    [-1, 1, Conv, [512, 3, 2]],
+    [[-1, 10], 1, Concat, [1]], # cat head P5
+    [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
+
+    [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
+  ]