补全yolov5的代码

2025-03-13 17:48:59 +08:00
parent 6daea23f0a
commit 9d369b9898
42 changed files with 8571 additions and 171 deletions
--- a/app/api/business/project_detect_api.py
+++ b/app/api/business/project_detect_api.py
@@ -120,13 +120,20 @@ def run_detect_yolo(detect_log_in: ProjectDetectLogIn, session: Session = Depend
    detect_img_count = pdc.check_detect_img(detect_log_in.detect_id, session)
    if detect_img_count == 0:
        return rc.response_error("推理集合中没有内容，请先到推理集合中上传图片")
-    detect_log = pds.run_detect_yolo(detect_log_in, detect, train, session)
-    thread_train = threading.Thread(target=run_event_loop, args=(detect_log.pt_url,
-                                             detect_log.folder_url,
-                                             detect_log.detect_folder_url,
-                                             detect_log.detect_version,
-                                             detect_log.id, detect_log.detect_id, session,))
-    thread_train.start()
+    if detect.file_type == 'img' or detect.file_type == 'video':
+        detect_log = pds.run_detect_yolo(detect_log_in, detect, train, session)
+        thread_train = threading.Thread(target=run_event_loop,
+                                        args=(detect_log.pt_url, detect_log.folder_url, detect_log.detect_folder_url,
+                                              detect_log.detect_version, detect_log.id, detect_log.detect_id, session,))
+        thread_train.start()
+    elif detect.file_type == 'rtsp':
+        if detect_log_in.pt_type == 'best':
+            weights_pt = train.best_pt
+        else:
+            weights_pt = train.last_pt
+        thread_train = threading.Thread(target=pds.run_detect_rtsp,
+                                        args=(weights_pt, detect.rtsp_url, train.train_data,))
+        thread_train.start()
    return rc.response_success(msg="执行成功")


--- a/app/model/bussiness_model.py
+++ b/app/model/bussiness_model.py
@@ -103,6 +103,7 @@ class ProjectDetect(DbCommon):
    detect_status: Mapped[int] = mapped_column(Integer)
    file_type: Mapped[str] = mapped_column(String(10))
    folder_url: Mapped[str] = mapped_column(String(255))
+    rtsp_url: Mapped[str] = mapped_column(String(255))


 class ProjectDetectImg(DbCommon):
--- a/app/model/schemas/project_detect_schemas.py
+++ b/app/model/schemas/project_detect_schemas.py
@@ -24,6 +24,7 @@ class ProjectDetectOut(BaseModel):
    detect_version: Optional[int]
    file_type: Optional[str]
    folder_url: Optional[str]
+    rtsp_url: Optional[str]
    create_time: Optional[datetime]

    class Config:
--- a/app/service/project_detect_service.py
+++ b/app/service/project_detect_service.py
@@ -2,6 +2,14 @@ from sqlalchemy.orm import Session
 from typing import List
 from fastapi import UploadFile
 import subprocess
+from yolov5.models.common import DetectMultiBackend
+from yolov5.utils.torch_utils import select_device
+from yolov5.utils.dataloaders import LoadStreams
+from yolov5.utils.general import check_img_size, Profile, non_max_suppression, cv2, scale_boxes
+import torch
+from pathlib import Path
+from ultralytics.utils.plotting import Annotator, colors, save_one_box
+import platform

 from app.model.crud import project_detect_crud as pdc
 from app.model.schemas.project_detect_schemas import ProjectDetectIn, ProjectDetectOut, ProjectDetectLogIn
@@ -172,8 +180,96 @@ async def run_commend(weights: str, source: str, project: str, name: str,
            pdc.add_detect_imgs(detect_log_imgs, session)


-def run_detect_rtsp():
-    return None
+def run_detect_rtsp(weights_pt: str, rtsp_url: str, data: str):
+    """
+    rtsp 视频流推理
+    :param weights_pt: 权重文件
+    :param rtsp_url: 视频流地址
+    :param data: yaml文件
+    :return:
+    """
+    # 选择设备（CPU 或 GPU）
+    device = select_device('cpu')
+
+    # 加载模型
+    model = DetectMultiBackend(weights_pt, device=device, dnn=False, data=data, fp16=False)
+
+    stride, names, pt = model.stride, model.names, model.pt
+    imgsz = check_img_size((640, 640), s=stride)  # check image size
+
+    dataset = LoadStreams(rtsp_url, img_size=imgsz, stride=stride, auto=pt, vid_stride=1)
+    bs = len(dataset)
+
+    model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz))
+
+    seen, windows, dt = 0, [], (Profile(device=device), Profile(device=device), Profile(device=device))
+
+    for path, im, im0s, vid_cap, s in dataset:
+        with dt[0]:
+            im = torch.from_numpy(im).to(model.device)
+            im = im.half() if model.fp16 else im.float()  # uint8 to fp16/32
+            im /= 255  # 0 - 255 to 0.0 - 1.0
+            if len(im.shape) == 3:
+                im = im[None]  # expand for batch dim
+            if model.xml and im.shape[0] > 1:
+                ims = torch.chunk(im, im.shape[0], 0)
+
+        # Inference
+        with dt[1]:
+            if model.xml and im.shape[0] > 1:
+                pred = None
+                for image in ims:
+                    if pred is None:
+                        pred = model(image, augment=False, visualize=False).unsqueeze(0)
+                    else:
+                        pred = torch.cat((pred, model(image, augment=False, visualize=False).unsqueeze(0)),
+                                         dim=0)
+                pred = [pred, None]
+            else:
+                pred = model(im, augment=False, visualize=False)
+        # NMS
+        with dt[2]:
+            pred = non_max_suppression(pred, 0.25, 0.45, None, False, max_det=1000)
+
+        # Process predictions
+        for i, det in enumerate(pred):  # per image
+            seen += 1
+            p, im0, frame = path[i], im0s[i].copy(), dataset.count
+            s += f"{i}: "
+
+            p = Path(p)  # to Path
+            s += "{:g}x{:g} ".format(*im.shape[2:])  # print string
+            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
+            imc = im0.copy() if False else im0  # for save_crop
+            annotator = Annotator(im0, line_width=3, example=str(names))
+            if len(det):
+                # Rescale boxes from img_size to im0 size
+                det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
+
+                # Print results
+                for c in det[:, 5].unique():
+                    n = (det[:, 5] == c).sum()  # detections per class
+                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
+
+                # Write results
+                for *xyxy, conf, cls in reversed(det):
+                    c = int(cls)  # integer class
+                    label = names[c] if False else f"{names[c]}"
+                    confidence = float(conf)
+                    confidence_str = f"{confidence:.2f}"
+
+                    c = int(cls)  # integer class
+                    label = None if False else (names[c] if False else f"{names[c]} {conf:.2f}")
+                    annotator.box_label(xyxy, label, color=colors(c, True))
+
+            # Stream results
+            im0 = annotator.result()
+            if platform.system() == "Linux" and p not in windows:
+                windows.append(p)
+                cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)
+                cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
+            cv2.imshow(str(p), im0)
+            cv2.waitKey(1)  # 1 millisecond



--- a/yolov5/.dockerignore
+++ b/yolov5/.dockerignore
@@ -0,0 +1,222 @@
+# Repo-specific DockerIgnore -------------------------------------------------------------------------------------------
+.git
+.cache
+.idea
+runs
+output
+coco
+storage.googleapis.com
+
+data/samples/*
+**/results*.csv
+*.jpg
+
+# Neural Network weights -----------------------------------------------------------------------------------------------
+**/*.pt
+**/*.pth
+**/*.onnx
+**/*.engine
+**/*.mlmodel
+**/*.torchscript
+**/*.torchscript.pt
+**/*.tflite
+**/*.h5
+**/*.pb
+*_saved_model/
+*_web_model/
+*_openvino_model/
+
+# Below Copied From .gitignore -----------------------------------------------------------------------------------------
+# Below Copied From .gitignore -----------------------------------------------------------------------------------------
+
+
+# GitHub Python GitIgnore ----------------------------------------------------------------------------------------------
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+wandb/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# dotenv
+.env
+
+# virtualenv
+.venv*
+venv*/
+ENV*/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+
+# https://github.com/github/gitignore/blob/master/Global/macOS.gitignore -----------------------------------------------
+
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+Icon?
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+
+# https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff:
+.idea/*
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/dictionaries
+.html  # Bokeh Plots
+.pg  # TensorFlow Frozen Graphs
+.avi # videos
+
+# Sensitive or high-churn files:
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+
+# Gradle:
+.idea/**/gradle.xml
+.idea/**/libraries
+
+# CMake
+cmake-build-debug/
+cmake-build-release/
+
+# Mongo Explorer plugin:
+.idea/**/mongoSettings.xml
+
+## File-based project format:
+*.iws
+
+## Plugin-specific files:
+
+# IntelliJ
+out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Cursive Clojure plugin
+.idea/replstate.xml
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
--- a/yolov5/.gitattributes
+++ b/yolov5/.gitattributes
@@ -0,0 +1,2 @@
+# this drop notebooks from GitHub language stats
+*.ipynb linguist-vendored
--- a/yolov5/.gitignore
+++ b/yolov5/.gitignore
@@ -0,0 +1,258 @@
+# Repo-specific GitIgnore ----------------------------------------------------------------------------------------------
+*.jpg
+*.jpeg
+*.png
+*.bmp
+*.tif
+*.tiff
+*.heic
+*.JPG
+*.JPEG
+*.PNG
+*.BMP
+*.TIF
+*.TIFF
+*.HEIC
+*.mp4
+*.mov
+*.MOV
+*.avi
+*.data
+*.json
+*.cfg
+!setup.cfg
+!cfg/yolov3*.cfg
+
+storage.googleapis.com
+../runs/*
+data/*
+data/images/*
+!data/*.yaml
+!data/hyps
+!data/scripts
+!data/images
+!data/images/zidane.jpg
+!data/images/bus.jpg
+!data/*.sh
+
+results*.csv
+
+# Datasets -------------------------------------------------------------------------------------------------------------
+coco/
+coco128/
+VOC/
+
+# MATLAB GitIgnore -----------------------------------------------------------------------------------------------------
+*.m~
+*.mat
+!targets*.mat
+
+# Neural Network weights -----------------------------------------------------------------------------------------------
+*.weights
+*.pt
+*.pb
+*.onnx
+*.engine
+*.mlmodel
+*.mlpackage
+*.torchscript
+*.tflite
+*.h5
+*_saved_model/
+*_web_model/
+*_openvino_model/
+*_paddle_model/
+darknet53.conv.74
+yolov3-tiny.conv.15
+
+# GitHub Python GitIgnore ----------------------------------------------------------------------------------------------
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+/wandb/
+.installed.cfg
+*.egg
+
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# dotenv
+.env
+
+# virtualenv
+.venv*
+venv*/
+ENV*/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+
+# https://github.com/github/gitignore/blob/master/Global/macOS.gitignore -----------------------------------------------
+
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+Icon?
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+
+# https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff:
+.idea/*
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/dictionaries
+.html  # Bokeh Plots
+.pg  # TensorFlow Frozen Graphs
+.avi # videos
+
+# Sensitive or high-churn files:
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+
+# Gradle:
+.idea/**/gradle.xml
+.idea/**/libraries
+
+# CMake
+cmake-build-debug/
+cmake-build-release/
+
+# Mongo Explorer plugin:
+.idea/**/mongoSettings.xml
+
+## File-based project format:
+*.iws
+
+## Plugin-specific files:
+
+# IntelliJ
+out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Cursive Clojure plugin
+.idea/replstate.xml
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
--- a/yolov5/classify/predict.py
+++ b/yolov5/classify/predict.py
@@ -0,0 +1,241 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+"""
+Run YOLOv5 classification inference on images, videos, directories, globs, YouTube, webcam, streams, etc.
+
+Usage - sources:
+    $ python classify/predict.py --weights yolov5s-cls.pt --source 0                               # webcam
+                                                                   img.jpg                         # image
+                                                                   vid.mp4                         # video
+                                                                   screen                          # screenshot
+                                                                   path/                           # directory
+                                                                   list.txt                        # list of images
+                                                                   list.streams                    # list of streams
+                                                                   'path/*.jpg'                    # glob
+                                                                   'https://youtu.be/LNwODJXcvt4'  # YouTube
+                                                                   'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream
+
+Usage - formats:
+    $ python classify/predict.py --weights yolov5s-cls.pt                 # PyTorch
+                                           yolov5s-cls.torchscript        # TorchScript
+                                           yolov5s-cls.onnx               # ONNX Runtime or OpenCV DNN with --dnn
+                                           yolov5s-cls_openvino_model     # OpenVINO
+                                           yolov5s-cls.engine             # TensorRT
+                                           yolov5s-cls.mlmodel            # CoreML (macOS-only)
+                                           yolov5s-cls_saved_model        # TensorFlow SavedModel
+                                           yolov5s-cls.pb                 # TensorFlow GraphDef
+                                           yolov5s-cls.tflite             # TensorFlow Lite
+                                           yolov5s-cls_edgetpu.tflite     # TensorFlow Edge TPU
+                                           yolov5s-cls_paddle_model       # PaddlePaddle
+"""
+
+import argparse
+import os
+import platform
+import sys
+from pathlib import Path
+
+import torch
+import torch.nn.functional as F
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+
+from ultralytics.utils.plotting import Annotator
+
+from models.common import DetectMultiBackend
+from utils.augmentations import classify_transforms
+from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams
+from utils.general import (
+    LOGGER,
+    Profile,
+    check_file,
+    check_img_size,
+    check_imshow,
+    check_requirements,
+    colorstr,
+    cv2,
+    increment_path,
+    print_args,
+    strip_optimizer,
+)
+from utils.torch_utils import select_device, smart_inference_mode
+
+
+@smart_inference_mode()
+def run(
+    weights=ROOT / "yolov5s-cls.pt",  # model.pt path(s)
+    source=ROOT / "data/images",  # file/dir/URL/glob/screen/0(webcam)
+    data=ROOT / "data/coco128.yaml",  # dataset.yaml path
+    imgsz=(224, 224),  # inference size (height, width)
+    device="",  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+    view_img=False,  # show results
+    save_txt=False,  # save results to *.txt
+    nosave=False,  # do not save images/videos
+    augment=False,  # augmented inference
+    visualize=False,  # visualize features
+    update=False,  # update all models
+    project=ROOT / "runs/predict-cls",  # save results to project/name
+    name="exp",  # save results to project/name
+    exist_ok=False,  # existing project/name ok, do not increment
+    half=False,  # use FP16 half-precision inference
+    dnn=False,  # use OpenCV DNN for ONNX inference
+    vid_stride=1,  # video frame-rate stride
+):
+    """Conducts YOLOv5 classification inference on diverse input sources and saves results."""
+    source = str(source)
+    save_img = not nosave and not source.endswith(".txt")  # save inference images
+    is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
+    is_url = source.lower().startswith(("rtsp://", "rtmp://", "http://", "https://"))
+    webcam = source.isnumeric() or source.endswith(".streams") or (is_url and not is_file)
+    screenshot = source.lower().startswith("screen")
+    if is_url and is_file:
+        source = check_file(source)  # download
+
+    # Directories
+    save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
+    (save_dir / "labels" if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
+
+    # Load model
+    device = select_device(device)
+    model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
+    stride, names, pt = model.stride, model.names, model.pt
+    imgsz = check_img_size(imgsz, s=stride)  # check image size
+
+    # Dataloader
+    bs = 1  # batch_size
+    if webcam:
+        view_img = check_imshow(warn=True)
+        dataset = LoadStreams(source, img_size=imgsz, transforms=classify_transforms(imgsz[0]), vid_stride=vid_stride)
+        bs = len(dataset)
+    elif screenshot:
+        dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)
+    else:
+        dataset = LoadImages(source, img_size=imgsz, transforms=classify_transforms(imgsz[0]), vid_stride=vid_stride)
+    vid_path, vid_writer = [None] * bs, [None] * bs
+
+    # Run inference
+    model.warmup(imgsz=(1 if pt else bs, 3, *imgsz))  # warmup
+    seen, windows, dt = 0, [], (Profile(device=device), Profile(device=device), Profile(device=device))
+    for path, im, im0s, vid_cap, s in dataset:
+        with dt[0]:
+            im = torch.Tensor(im).to(model.device)
+            im = im.half() if model.fp16 else im.float()  # uint8 to fp16/32
+            if len(im.shape) == 3:
+                im = im[None]  # expand for batch dim
+
+        # Inference
+        with dt[1]:
+            results = model(im)
+
+        # Post-process
+        with dt[2]:
+            pred = F.softmax(results, dim=1)  # probabilities
+
+        # Process predictions
+        for i, prob in enumerate(pred):  # per image
+            seen += 1
+            if webcam:  # batch_size >= 1
+                p, im0, frame = path[i], im0s[i].copy(), dataset.count
+                s += f"{i}: "
+            else:
+                p, im0, frame = path, im0s.copy(), getattr(dataset, "frame", 0)
+
+            p = Path(p)  # to Path
+            save_path = str(save_dir / p.name)  # im.jpg
+            txt_path = str(save_dir / "labels" / p.stem) + ("" if dataset.mode == "image" else f"_{frame}")  # im.txt
+
+            s += "{:g}x{:g} ".format(*im.shape[2:])  # print string
+            annotator = Annotator(im0, example=str(names), pil=True)
+
+            # Print results
+            top5i = prob.argsort(0, descending=True)[:5].tolist()  # top 5 indices
+            s += f"{', '.join(f'{names[j]} {prob[j]:.2f}' for j in top5i)}, "
+
+            # Write results
+            text = "\n".join(f"{prob[j]:.2f} {names[j]}" for j in top5i)
+            if save_img or view_img:  # Add bbox to image
+                annotator.text([32, 32], text, txt_color=(255, 255, 255))
+            if save_txt:  # Write to file
+                with open(f"{txt_path}.txt", "a") as f:
+                    f.write(text + "\n")
+
+            # Stream results
+            im0 = annotator.result()
+            if view_img:
+                if platform.system() == "Linux" and p not in windows:
+                    windows.append(p)
+                    cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)
+                    cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
+                cv2.imshow(str(p), im0)
+                cv2.waitKey(1)  # 1 millisecond
+
+            # Save results (image with detections)
+            if save_img:
+                if dataset.mode == "image":
+                    cv2.imwrite(save_path, im0)
+                else:  # 'video' or 'stream'
+                    if vid_path[i] != save_path:  # new video
+                        vid_path[i] = save_path
+                        if isinstance(vid_writer[i], cv2.VideoWriter):
+                            vid_writer[i].release()  # release previous video writer
+                        if vid_cap:  # video
+                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
+                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+                        else:  # stream
+                            fps, w, h = 30, im0.shape[1], im0.shape[0]
+                        save_path = str(Path(save_path).with_suffix(".mp4"))  # force *.mp4 suffix on results videos
+                        vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
+                    vid_writer[i].write(im0)
+
+        # Print time (inference-only)
+        LOGGER.info(f"{s}{dt[1].dt * 1e3:.1f}ms")
+
+    # Print results
+    t = tuple(x.t / seen * 1e3 for x in dt)  # speeds per image
+    LOGGER.info(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}" % t)
+    if save_txt or save_img:
+        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ""
+        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
+    if update:
+        strip_optimizer(weights[0])  # update model (to fix SourceChangeWarning)
+
+
+def parse_opt():
+    """Parses command line arguments for YOLOv5 inference settings including model, source, device, and image size."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s-cls.pt", help="model path(s)")
+    parser.add_argument("--source", type=str, default=ROOT / "data/images", help="file/dir/URL/glob/screen/0(webcam)")
+    parser.add_argument("--data", type=str, default=ROOT / "data/coco128.yaml", help="(optional) dataset.yaml path")
+    parser.add_argument("--imgsz", "--img", "--img-size", nargs="+", type=int, default=[224], help="inference size h,w")
+    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
+    parser.add_argument("--view-img", action="store_true", help="show results")
+    parser.add_argument("--save-txt", action="store_true", help="save results to *.txt")
+    parser.add_argument("--nosave", action="store_true", help="do not save images/videos")
+    parser.add_argument("--augment", action="store_true", help="augmented inference")
+    parser.add_argument("--visualize", action="store_true", help="visualize features")
+    parser.add_argument("--update", action="store_true", help="update all models")
+    parser.add_argument("--project", default=ROOT / "runs/predict-cls", help="save results to project/name")
+    parser.add_argument("--name", default="exp", help="save results to project/name")
+    parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
+    parser.add_argument("--half", action="store_true", help="use FP16 half-precision inference")
+    parser.add_argument("--dnn", action="store_true", help="use OpenCV DNN for ONNX inference")
+    parser.add_argument("--vid-stride", type=int, default=1, help="video frame-rate stride")
+    opt = parser.parse_args()
+    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
+    print_args(vars(opt))
+    return opt
+
+
+def main(opt):
+    """Executes YOLOv5 model inference with options for ONNX DNN and video frame-rate stride adjustments."""
+    check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
+    run(**vars(opt))
+
+
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)
--- a/yolov5/classify/train.py
+++ b/yolov5/classify/train.py
@@ -0,0 +1,382 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+"""
+Train a YOLOv5 classifier model on a classification dataset.
+
+Usage - Single-GPU training:
+    $ python classify/train.py --model yolov5s-cls.pt --data imagenette160 --epochs 5 --img 224
+
+Usage - Multi-GPU DDP training:
+    $ python -m torch.distributed.run --nproc_per_node 4 --master_port 2022 classify/train.py --model yolov5s-cls.pt --data imagenet --epochs 5 --img 224 --device 0,1,2,3
+
+Datasets:           --data mnist, fashion-mnist, cifar10, cifar100, imagenette, imagewoof, imagenet, or 'path/to/data'
+YOLOv5-cls models:  --model yolov5n-cls.pt, yolov5s-cls.pt, yolov5m-cls.pt, yolov5l-cls.pt, yolov5x-cls.pt
+Torchvision models: --model resnet50, efficientnet_b0, etc. See https://pytorch.org/vision/stable/models.html
+"""
+
+import argparse
+import os
+import subprocess
+import sys
+import time
+from copy import deepcopy
+from datetime import datetime
+from pathlib import Path
+
+import torch
+import torch.distributed as dist
+import torch.hub as hub
+import torch.optim.lr_scheduler as lr_scheduler
+import torchvision
+from torch.cuda import amp
+from tqdm import tqdm
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+
+from classify import val as validate
+from models.experimental import attempt_load
+from models.yolo import ClassificationModel, DetectionModel
+from utils.dataloaders import create_classification_dataloader
+from utils.general import (
+    DATASETS_DIR,
+    LOGGER,
+    TQDM_BAR_FORMAT,
+    WorkingDirectory,
+    check_git_info,
+    check_git_status,
+    check_requirements,
+    colorstr,
+    download,
+    increment_path,
+    init_seeds,
+    print_args,
+    yaml_save,
+)
+from utils.loggers import GenericLogger
+from utils.plots import imshow_cls
+from utils.torch_utils import (
+    ModelEMA,
+    de_parallel,
+    model_info,
+    reshape_classifier_output,
+    select_device,
+    smart_DDP,
+    smart_optimizer,
+    smartCrossEntropyLoss,
+    torch_distributed_zero_first,
+)
+
+LOCAL_RANK = int(os.getenv("LOCAL_RANK", -1))  # https://pytorch.org/docs/stable/elastic/run.html
+RANK = int(os.getenv("RANK", -1))
+WORLD_SIZE = int(os.getenv("WORLD_SIZE", 1))
+GIT_INFO = check_git_info()
+
+
+def train(opt, device):
+    """Trains a YOLOv5 model, managing datasets, model optimization, logging, and saving checkpoints."""
+    init_seeds(opt.seed + 1 + RANK, deterministic=True)
+    save_dir, data, bs, epochs, nw, imgsz, pretrained = (
+        opt.save_dir,
+        Path(opt.data),
+        opt.batch_size,
+        opt.epochs,
+        min(os.cpu_count() - 1, opt.workers),
+        opt.imgsz,
+        str(opt.pretrained).lower() == "true",
+    )
+    cuda = device.type != "cpu"
+
+    # Directories
+    wdir = save_dir / "weights"
+    wdir.mkdir(parents=True, exist_ok=True)  # make dir
+    last, best = wdir / "last.pt", wdir / "best.pt"
+
+    # Save run settings
+    yaml_save(save_dir / "opt.yaml", vars(opt))
+
+    # Logger
+    logger = GenericLogger(opt=opt, console_logger=LOGGER) if RANK in {-1, 0} else None
+
+    # Download Dataset
+    with torch_distributed_zero_first(LOCAL_RANK), WorkingDirectory(ROOT):
+        data_dir = data if data.is_dir() else (DATASETS_DIR / data)
+        if not data_dir.is_dir():
+            LOGGER.info(f"\nDataset not found ⚠️, missing path {data_dir}, attempting download...")
+            t = time.time()
+            if str(data) == "imagenet":
+                subprocess.run(["bash", str(ROOT / "data/scripts/get_imagenet.sh")], shell=True, check=True)
+            else:
+                url = f"https://github.com/ultralytics/assets/releases/download/v0.0.0/{data}.zip"
+                download(url, dir=data_dir.parent)
+            s = f"Dataset download success ✅ ({time.time() - t:.1f}s), saved to {colorstr('bold', data_dir)}\n"
+            LOGGER.info(s)
+
+    # Dataloaders
+    nc = len([x for x in (data_dir / "train").glob("*") if x.is_dir()])  # number of classes
+    trainloader = create_classification_dataloader(
+        path=data_dir / "train",
+        imgsz=imgsz,
+        batch_size=bs // WORLD_SIZE,
+        augment=True,
+        cache=opt.cache,
+        rank=LOCAL_RANK,
+        workers=nw,
+    )
+
+    test_dir = data_dir / "test" if (data_dir / "test").exists() else data_dir / "val"  # data/test or data/val
+    if RANK in {-1, 0}:
+        testloader = create_classification_dataloader(
+            path=test_dir,
+            imgsz=imgsz,
+            batch_size=bs // WORLD_SIZE * 2,
+            augment=False,
+            cache=opt.cache,
+            rank=-1,
+            workers=nw,
+        )
+
+    # Model
+    with torch_distributed_zero_first(LOCAL_RANK), WorkingDirectory(ROOT):
+        if Path(opt.model).is_file() or opt.model.endswith(".pt"):
+            model = attempt_load(opt.model, device="cpu", fuse=False)
+        elif opt.model in torchvision.models.__dict__:  # TorchVision models i.e. resnet50, efficientnet_b0
+            model = torchvision.models.__dict__[opt.model](weights="IMAGENET1K_V1" if pretrained else None)
+        else:
+            m = hub.list("ultralytics/yolov5")  # + hub.list('pytorch/vision')  # models
+            raise ModuleNotFoundError(f"--model {opt.model} not found. Available models are: \n" + "\n".join(m))
+        if isinstance(model, DetectionModel):
+            LOGGER.warning("WARNING ⚠️ pass YOLOv5 classifier model with '-cls' suffix, i.e. '--model yolov5s-cls.pt'")
+            model = ClassificationModel(model=model, nc=nc, cutoff=opt.cutoff or 10)  # convert to classification model
+        reshape_classifier_output(model, nc)  # update class count
+    for m in model.modules():
+        if not pretrained and hasattr(m, "reset_parameters"):
+            m.reset_parameters()
+        if isinstance(m, torch.nn.Dropout) and opt.dropout is not None:
+            m.p = opt.dropout  # set dropout
+    for p in model.parameters():
+        p.requires_grad = True  # for training
+    model = model.to(device)
+
+    # Info
+    if RANK in {-1, 0}:
+        model.names = trainloader.dataset.classes  # attach class names
+        model.transforms = testloader.dataset.torch_transforms  # attach inference transforms
+        model_info(model)
+        if opt.verbose:
+            LOGGER.info(model)
+        images, labels = next(iter(trainloader))
+        file = imshow_cls(images[:25], labels[:25], names=model.names, f=save_dir / "train_images.jpg")
+        logger.log_images(file, name="Train Examples")
+        logger.log_graph(model, imgsz)  # log model
+
+    # Optimizer
+    optimizer = smart_optimizer(model, opt.optimizer, opt.lr0, momentum=0.9, decay=opt.decay)
+
+    # Scheduler
+    lrf = 0.01  # final lr (fraction of lr0)
+
+    # lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - lrf) + lrf  # cosine
+    def lf(x):
+        """Linear learning rate scheduler function, scaling learning rate from initial value to `lrf` over `epochs`."""
+        return (1 - x / epochs) * (1 - lrf) + lrf  # linear
+
+    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
+    # scheduler = lr_scheduler.OneCycleLR(optimizer, max_lr=lr0, total_steps=epochs, pct_start=0.1,
+    #                                    final_div_factor=1 / 25 / lrf)
+
+    # EMA
+    ema = ModelEMA(model) if RANK in {-1, 0} else None
+
+    # DDP mode
+    if cuda and RANK != -1:
+        model = smart_DDP(model)
+
+    # Train
+    t0 = time.time()
+    criterion = smartCrossEntropyLoss(label_smoothing=opt.label_smoothing)  # loss function
+    best_fitness = 0.0
+    scaler = amp.GradScaler(enabled=cuda)
+    val = test_dir.stem  # 'val' or 'test'
+    LOGGER.info(
+        f"Image sizes {imgsz} train, {imgsz} test\n"
+        f"Using {nw * WORLD_SIZE} dataloader workers\n"
+        f"Logging results to {colorstr('bold', save_dir)}\n"
+        f"Starting {opt.model} training on {data} dataset with {nc} classes for {epochs} epochs...\n\n"
+        f"{'Epoch':>10}{'GPU_mem':>10}{'train_loss':>12}{f'{val}_loss':>12}{'top1_acc':>12}{'top5_acc':>12}"
+    )
+    for epoch in range(epochs):  # loop over the dataset multiple times
+        tloss, vloss, fitness = 0.0, 0.0, 0.0  # train loss, val loss, fitness
+        model.train()
+        if RANK != -1:
+            trainloader.sampler.set_epoch(epoch)
+        pbar = enumerate(trainloader)
+        if RANK in {-1, 0}:
+            pbar = tqdm(enumerate(trainloader), total=len(trainloader), bar_format=TQDM_BAR_FORMAT)
+        for i, (images, labels) in pbar:  # progress bar
+            images, labels = images.to(device, non_blocking=True), labels.to(device)
+
+            # Forward
+            with amp.autocast(enabled=cuda):  # stability issues when enabled
+                loss = criterion(model(images), labels)
+
+            # Backward
+            scaler.scale(loss).backward()
+
+            # Optimize
+            scaler.unscale_(optimizer)  # unscale gradients
+            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0)  # clip gradients
+            scaler.step(optimizer)
+            scaler.update()
+            optimizer.zero_grad()
+            if ema:
+                ema.update(model)
+
+            if RANK in {-1, 0}:
+                # Print
+                tloss = (tloss * i + loss.item()) / (i + 1)  # update mean losses
+                mem = "%.3gG" % (torch.cuda.memory_reserved() / 1e9 if torch.cuda.is_available() else 0)  # (GB)
+                pbar.desc = f"{f'{epoch + 1}/{epochs}':>10}{mem:>10}{tloss:>12.3g}" + " " * 36
+
+                # Test
+                if i == len(pbar) - 1:  # last batch
+                    top1, top5, vloss = validate.run(
+                        model=ema.ema, dataloader=testloader, criterion=criterion, pbar=pbar
+                    )  # test accuracy, loss
+                    fitness = top1  # define fitness as top1 accuracy
+
+        # Scheduler
+        scheduler.step()
+
+        # Log metrics
+        if RANK in {-1, 0}:
+            # Best fitness
+            if fitness > best_fitness:
+                best_fitness = fitness
+
+            # Log
+            metrics = {
+                "train/loss": tloss,
+                f"{val}/loss": vloss,
+                "metrics/accuracy_top1": top1,
+                "metrics/accuracy_top5": top5,
+                "lr/0": optimizer.param_groups[0]["lr"],
+            }  # learning rate
+            logger.log_metrics(metrics, epoch)
+
+            # Save model
+            final_epoch = epoch + 1 == epochs
+            if (not opt.nosave) or final_epoch:
+                ckpt = {
+                    "epoch": epoch,
+                    "best_fitness": best_fitness,
+                    "model": deepcopy(ema.ema).half(),  # deepcopy(de_parallel(model)).half(),
+                    "ema": None,  # deepcopy(ema.ema).half(),
+                    "updates": ema.updates,
+                    "optimizer": None,  # optimizer.state_dict(),
+                    "opt": vars(opt),
+                    "git": GIT_INFO,  # {remote, branch, commit} if a git repo
+                    "date": datetime.now().isoformat(),
+                }
+
+                # Save last, best and delete
+                torch.save(ckpt, last)
+                if best_fitness == fitness:
+                    torch.save(ckpt, best)
+                del ckpt
+
+    # Train complete
+    if RANK in {-1, 0} and final_epoch:
+        LOGGER.info(
+            f"\nTraining complete ({(time.time() - t0) / 3600:.3f} hours)"
+            f"\nResults saved to {colorstr('bold', save_dir)}"
+            f"\nPredict:         python classify/predict.py --weights {best} --source im.jpg"
+            f"\nValidate:        python classify/val.py --weights {best} --data {data_dir}"
+            f"\nExport:          python export.py --weights {best} --include onnx"
+            f"\nPyTorch Hub:     model = torch.hub.load('ultralytics/yolov5', 'custom', '{best}')"
+            f"\nVisualize:       https://netron.app\n"
+        )
+
+        # Plot examples
+        images, labels = (x[:25] for x in next(iter(testloader)))  # first 25 images and labels
+        pred = torch.max(ema.ema(images.to(device)), 1)[1]
+        file = imshow_cls(images, labels, pred, de_parallel(model).names, verbose=False, f=save_dir / "test_images.jpg")
+
+        # Log results
+        meta = {"epochs": epochs, "top1_acc": best_fitness, "date": datetime.now().isoformat()}
+        logger.log_images(file, name="Test Examples (true-predicted)", epoch=epoch)
+        logger.log_model(best, epochs, metadata=meta)
+
+
+def parse_opt(known=False):
+    """Parses command line arguments for YOLOv5 training including model path, dataset, epochs, and more, returning
+    parsed arguments.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str, default="yolov5s-cls.pt", help="initial weights path")
+    parser.add_argument("--data", type=str, default="imagenette160", help="cifar10, cifar100, mnist, imagenet, ...")
+    parser.add_argument("--epochs", type=int, default=10, help="total training epochs")
+    parser.add_argument("--batch-size", type=int, default=64, help="total batch size for all GPUs")
+    parser.add_argument("--imgsz", "--img", "--img-size", type=int, default=224, help="train, val image size (pixels)")
+    parser.add_argument("--nosave", action="store_true", help="only save final checkpoint")
+    parser.add_argument("--cache", type=str, nargs="?", const="ram", help='--cache images in "ram" (default) or "disk"')
+    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
+    parser.add_argument("--workers", type=int, default=8, help="max dataloader workers (per RANK in DDP mode)")
+    parser.add_argument("--project", default=ROOT / "runs/train-cls", help="save to project/name")
+    parser.add_argument("--name", default="exp", help="save to project/name")
+    parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
+    parser.add_argument("--pretrained", nargs="?", const=True, default=True, help="start from i.e. --pretrained False")
+    parser.add_argument("--optimizer", choices=["SGD", "Adam", "AdamW", "RMSProp"], default="Adam", help="optimizer")
+    parser.add_argument("--lr0", type=float, default=0.001, help="initial learning rate")
+    parser.add_argument("--decay", type=float, default=5e-5, help="weight decay")
+    parser.add_argument("--label-smoothing", type=float, default=0.1, help="Label smoothing epsilon")
+    parser.add_argument("--cutoff", type=int, default=None, help="Model layer cutoff index for Classify() head")
+    parser.add_argument("--dropout", type=float, default=None, help="Dropout (fraction)")
+    parser.add_argument("--verbose", action="store_true", help="Verbose mode")
+    parser.add_argument("--seed", type=int, default=0, help="Global training seed")
+    parser.add_argument("--local_rank", type=int, default=-1, help="Automatic DDP Multi-GPU argument, do not modify")
+    return parser.parse_known_args()[0] if known else parser.parse_args()
+
+
+def main(opt):
+    """Executes YOLOv5 training with given options, handling device setup and DDP mode; includes pre-training checks."""
+    if RANK in {-1, 0}:
+        print_args(vars(opt))
+        check_git_status()
+        check_requirements(ROOT / "requirements.txt")
+
+    # DDP mode
+    device = select_device(opt.device, batch_size=opt.batch_size)
+    if LOCAL_RANK != -1:
+        assert opt.batch_size != -1, "AutoBatch is coming soon for classification, please pass a valid --batch-size"
+        assert opt.batch_size % WORLD_SIZE == 0, f"--batch-size {opt.batch_size} must be multiple of WORLD_SIZE"
+        assert torch.cuda.device_count() > LOCAL_RANK, "insufficient CUDA devices for DDP command"
+        torch.cuda.set_device(LOCAL_RANK)
+        device = torch.device("cuda", LOCAL_RANK)
+        dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo")
+
+    # Parameters
+    opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)  # increment run
+
+    # Train
+    train(opt, device)
+
+
+def run(**kwargs):
+    """
+    Executes YOLOv5 model training or inference with specified parameters, returning updated options.
+
+    Example: from yolov5 import classify; classify.train.run(data=mnist, imgsz=320, model='yolov5m')
+    """
+    opt = parse_opt(True)
+    for k, v in kwargs.items():
+        setattr(opt, k, v)
+    main(opt)
+    return opt
+
+
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)
--- a/yolov5/classify/tutorial.ipynb
+++ b/yolov5/classify/tutorial.ipynb
--- a/yolov5/classify/val.py
+++ b/yolov5/classify/val.py
@@ -0,0 +1,178 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+"""
+Validate a trained YOLOv5 classification model on a classification dataset.
+
+Usage:
+    $ bash data/scripts/get_imagenet.sh --val  # download ImageNet val split (6.3G, 50000 images)
+    $ python classify/val.py --weights yolov5m-cls.pt --data ../datasets/imagenet --img 224  # validate ImageNet
+
+Usage - formats:
+    $ python classify/val.py --weights yolov5s-cls.pt                 # PyTorch
+                                       yolov5s-cls.torchscript        # TorchScript
+                                       yolov5s-cls.onnx               # ONNX Runtime or OpenCV DNN with --dnn
+                                       yolov5s-cls_openvino_model     # OpenVINO
+                                       yolov5s-cls.engine             # TensorRT
+                                       yolov5s-cls.mlmodel            # CoreML (macOS-only)
+                                       yolov5s-cls_saved_model        # TensorFlow SavedModel
+                                       yolov5s-cls.pb                 # TensorFlow GraphDef
+                                       yolov5s-cls.tflite             # TensorFlow Lite
+                                       yolov5s-cls_edgetpu.tflite     # TensorFlow Edge TPU
+                                       yolov5s-cls_paddle_model       # PaddlePaddle
+"""
+
+import argparse
+import os
+import sys
+from pathlib import Path
+
+import torch
+from tqdm import tqdm
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+
+from models.common import DetectMultiBackend
+from utils.dataloaders import create_classification_dataloader
+from utils.general import (
+    LOGGER,
+    TQDM_BAR_FORMAT,
+    Profile,
+    check_img_size,
+    check_requirements,
+    colorstr,
+    increment_path,
+    print_args,
+)
+from utils.torch_utils import select_device, smart_inference_mode
+
+
+@smart_inference_mode()
+def run(
+    data=ROOT / "../datasets/mnist",  # dataset dir
+    weights=ROOT / "yolov5s-cls.pt",  # model.pt path(s)
+    batch_size=128,  # batch size
+    imgsz=224,  # inference size (pixels)
+    device="",  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+    workers=8,  # max dataloader workers (per RANK in DDP mode)
+    verbose=False,  # verbose output
+    project=ROOT / "runs/val-cls",  # save to project/name
+    name="exp",  # save to project/name
+    exist_ok=False,  # existing project/name ok, do not increment
+    half=False,  # use FP16 half-precision inference
+    dnn=False,  # use OpenCV DNN for ONNX inference
+    model=None,
+    dataloader=None,
+    criterion=None,
+    pbar=None,
+):
+    """Validates a YOLOv5 classification model on a dataset, computing metrics like top1 and top5 accuracy."""
+    # Initialize/load model and set device
+    training = model is not None
+    if training:  # called by train.py
+        device, pt, jit, engine = next(model.parameters()).device, True, False, False  # get model device, PyTorch model
+        half &= device.type != "cpu"  # half precision only supported on CUDA
+        model.half() if half else model.float()
+    else:  # called directly
+        device = select_device(device, batch_size=batch_size)
+
+        # Directories
+        save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
+        save_dir.mkdir(parents=True, exist_ok=True)  # make dir
+
+        # Load model
+        model = DetectMultiBackend(weights, device=device, dnn=dnn, fp16=half)
+        stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
+        imgsz = check_img_size(imgsz, s=stride)  # check image size
+        half = model.fp16  # FP16 supported on limited backends with CUDA
+        if engine:
+            batch_size = model.batch_size
+        else:
+            device = model.device
+            if not (pt or jit):
+                batch_size = 1  # export.py models default to batch-size 1
+                LOGGER.info(f"Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models")
+
+        # Dataloader
+        data = Path(data)
+        test_dir = data / "test" if (data / "test").exists() else data / "val"  # data/test or data/val
+        dataloader = create_classification_dataloader(
+            path=test_dir, imgsz=imgsz, batch_size=batch_size, augment=False, rank=-1, workers=workers
+        )
+
+    model.eval()
+    pred, targets, loss, dt = [], [], 0, (Profile(device=device), Profile(device=device), Profile(device=device))
+    n = len(dataloader)  # number of batches
+    action = "validating" if dataloader.dataset.root.stem == "val" else "testing"
+    desc = f"{pbar.desc[:-36]}{action:>36}" if pbar else f"{action}"
+    bar = tqdm(dataloader, desc, n, not training, bar_format=TQDM_BAR_FORMAT, position=0)
+    with torch.cuda.amp.autocast(enabled=device.type != "cpu"):
+        for images, labels in bar:
+            with dt[0]:
+                images, labels = images.to(device, non_blocking=True), labels.to(device)
+
+            with dt[1]:
+                y = model(images)
+
+            with dt[2]:
+                pred.append(y.argsort(1, descending=True)[:, :5])
+                targets.append(labels)
+                if criterion:
+                    loss += criterion(y, labels)
+
+    loss /= n
+    pred, targets = torch.cat(pred), torch.cat(targets)
+    correct = (targets[:, None] == pred).float()
+    acc = torch.stack((correct[:, 0], correct.max(1).values), dim=1)  # (top1, top5) accuracy
+    top1, top5 = acc.mean(0).tolist()
+
+    if pbar:
+        pbar.desc = f"{pbar.desc[:-36]}{loss:>12.3g}{top1:>12.3g}{top5:>12.3g}"
+    if verbose:  # all classes
+        LOGGER.info(f"{'Class':>24}{'Images':>12}{'top1_acc':>12}{'top5_acc':>12}")
+        LOGGER.info(f"{'all':>24}{targets.shape[0]:>12}{top1:>12.3g}{top5:>12.3g}")
+        for i, c in model.names.items():
+            acc_i = acc[targets == i]
+            top1i, top5i = acc_i.mean(0).tolist()
+            LOGGER.info(f"{c:>24}{acc_i.shape[0]:>12}{top1i:>12.3g}{top5i:>12.3g}")
+
+        # Print results
+        t = tuple(x.t / len(dataloader.dataset.samples) * 1e3 for x in dt)  # speeds per image
+        shape = (1, 3, imgsz, imgsz)
+        LOGGER.info(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms post-process per image at shape {shape}" % t)
+        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
+
+    return top1, top5, loss
+
+
+def parse_opt():
+    """Parses and returns command line arguments for YOLOv5 model evaluation and inference settings."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data", type=str, default=ROOT / "../datasets/mnist", help="dataset path")
+    parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s-cls.pt", help="model.pt path(s)")
+    parser.add_argument("--batch-size", type=int, default=128, help="batch size")
+    parser.add_argument("--imgsz", "--img", "--img-size", type=int, default=224, help="inference size (pixels)")
+    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
+    parser.add_argument("--workers", type=int, default=8, help="max dataloader workers (per RANK in DDP mode)")
+    parser.add_argument("--verbose", nargs="?", const=True, default=True, help="verbose output")
+    parser.add_argument("--project", default=ROOT / "runs/val-cls", help="save to project/name")
+    parser.add_argument("--name", default="exp", help="save to project/name")
+    parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
+    parser.add_argument("--half", action="store_true", help="use FP16 half-precision inference")
+    parser.add_argument("--dnn", action="store_true", help="use OpenCV DNN for ONNX inference")
+    opt = parser.parse_args()
+    print_args(vars(opt))
+    return opt
+
+
+def main(opt):
+    """Executes the YOLOv5 model prediction workflow, handling argument parsing and requirement checks."""
+    check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
+    run(**vars(opt))
+
+
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)
--- a/yolov5/data/Argoverse.yaml
+++ b/yolov5/data/Argoverse.yaml
@@ -0,0 +1,73 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/ by Argo AI
+# Example usage: python train.py --data Argoverse.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── Argoverse  ← downloads here (31.3 GB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/Argoverse # dataset root dir
+train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images
+val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images
+test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
+
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: bus
+  5: truck
+  6: traffic_light
+  7: stop_sign
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import json
+
+  from tqdm import tqdm
+  from utils.general import download, Path
+
+
+  def argoverse2yolo(set):
+      labels = {}
+      a = json.load(open(set, "rb"))
+      for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
+          img_id = annot['image_id']
+          img_name = a['images'][img_id]['name']
+          img_label_name = f'{img_name[:-3]}txt'
+
+          cls = annot['category_id']  # instance class id
+          x_center, y_center, width, height = annot['bbox']
+          x_center = (x_center + width / 2) / 1920.0  # offset and scale
+          y_center = (y_center + height / 2) / 1200.0  # offset and scale
+          width /= 1920.0  # scale
+          height /= 1200.0  # scale
+
+          img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
+          if not img_dir.exists():
+              img_dir.mkdir(parents=True, exist_ok=True)
+
+          k = str(img_dir / img_label_name)
+          if k not in labels:
+              labels[k] = []
+          labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")
+
+      for k in labels:
+          with open(k, "w") as f:
+              f.writelines(labels[k])
+
+
+  # Download
+  dir = Path(yaml['path'])  # dataset root dir
+  urls = ['https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip']
+  download(urls, dir=dir, delete=False)
+
+  # Convert
+  annotations_dir = 'Argoverse-HD/annotations/'
+  (dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images')  # rename 'tracking' to 'images'
+  for d in "train.json", "val.json":
+      argoverse2yolo(dir / annotations_dir / d)  # convert VisDrone annotations to YOLO labels
--- a/yolov5/data/GlobalWheat2020.yaml
+++ b/yolov5/data/GlobalWheat2020.yaml
@@ -0,0 +1,53 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Global Wheat 2020 dataset http://www.global-wheat.com/ by University of Saskatchewan
+# Example usage: python train.py --data GlobalWheat2020.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── GlobalWheat2020  ← downloads here (7.0 GB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/GlobalWheat2020 # dataset root dir
+train: # train images (relative to 'path') 3422 images
+  - images/arvalis_1
+  - images/arvalis_2
+  - images/arvalis_3
+  - images/ethz_1
+  - images/rres_1
+  - images/inrae_1
+  - images/usask_1
+val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1)
+  - images/ethz_1
+test: # test images (optional) 1276 images
+  - images/utokyo_1
+  - images/utokyo_2
+  - images/nau_1
+  - images/uq_1
+
+# Classes
+names:
+  0: wheat_head
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  from utils.general import download, Path
+
+
+  # Download
+  dir = Path(yaml['path'])  # dataset root dir
+  urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
+          'https://github.com/ultralytics/assets/releases/download/v0.0.0/GlobalWheat2020_labels.zip']
+  download(urls, dir=dir)
+
+  # Make Directories
+  for p in 'annotations', 'images', 'labels':
+      (dir / p).mkdir(parents=True, exist_ok=True)
+
+  # Move
+  for p in 'arvalis_1', 'arvalis_2', 'arvalis_3', 'ethz_1', 'rres_1', 'inrae_1', 'usask_1', \
+           'utokyo_1', 'utokyo_2', 'nau_1', 'uq_1':
+      (dir / p).rename(dir / 'images' / p)  # move to /images
+      f = (dir / p).with_suffix('.json')  # json file
+      if f.exists():
+          f.rename((dir / 'annotations' / p).with_suffix('.json'))  # move to /annotations
--- a/yolov5/data/ImageNet.yaml
+++ b/yolov5/data/ImageNet.yaml
--- a/yolov5/data/ImageNet10.yaml
+++ b/yolov5/data/ImageNet10.yaml
@@ -0,0 +1,31 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# ImageNet-1k dataset https://www.image-net.org/index.php by Stanford University
+# Simplified class names from https://github.com/anishathalye/imagenet-simple-labels
+# Example usage: python classify/train.py --data imagenet
+# parent
+# ├── yolov5
+# └── datasets
+#     └── imagenet10  ← downloads here
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/imagenet10 # dataset root dir
+train: train # train images (relative to 'path') 1281167 images
+val: val # val images (relative to 'path') 50000 images
+test: # test images (optional)
+
+# Classes
+names:
+  0: tench
+  1: goldfish
+  2: great white shark
+  3: tiger shark
+  4: hammerhead shark
+  5: electric ray
+  6: stingray
+  7: cock
+  8: hen
+  9: ostrich
+
+# Download script/URL (optional)
+download: data/scripts/get_imagenet10.sh
--- a/yolov5/data/ImageNet100.yaml
+++ b/yolov5/data/ImageNet100.yaml
@@ -0,0 +1,120 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# ImageNet-1k dataset https://www.image-net.org/index.php by Stanford University
+# Simplified class names from https://github.com/anishathalye/imagenet-simple-labels
+# Example usage: python classify/train.py --data imagenet
+# parent
+# ├── yolov5
+# └── datasets
+#     └── imagenet100  ← downloads here
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/imagenet100 # dataset root dir
+train: train # train images (relative to 'path') 1281167 images
+val: val # val images (relative to 'path') 50000 images
+test: # test images (optional)
+
+# Classes
+names:
+  0: tench
+  1: goldfish
+  2: great white shark
+  3: tiger shark
+  4: hammerhead shark
+  5: electric ray
+  6: stingray
+  7: cock
+  8: hen
+  9: ostrich
+  10: brambling
+  11: goldfinch
+  12: house finch
+  13: junco
+  14: indigo bunting
+  15: American robin
+  16: bulbul
+  17: jay
+  18: magpie
+  19: chickadee
+  20: American dipper
+  21: kite
+  22: bald eagle
+  23: vulture
+  24: great grey owl
+  25: fire salamander
+  26: smooth newt
+  27: newt
+  28: spotted salamander
+  29: axolotl
+  30: American bullfrog
+  31: tree frog
+  32: tailed frog
+  33: loggerhead sea turtle
+  34: leatherback sea turtle
+  35: mud turtle
+  36: terrapin
+  37: box turtle
+  38: banded gecko
+  39: green iguana
+  40: Carolina anole
+  41: desert grassland whiptail lizard
+  42: agama
+  43: frilled-necked lizard
+  44: alligator lizard
+  45: Gila monster
+  46: European green lizard
+  47: chameleon
+  48: Komodo dragon
+  49: Nile crocodile
+  50: American alligator
+  51: triceratops
+  52: worm snake
+  53: ring-necked snake
+  54: eastern hog-nosed snake
+  55: smooth green snake
+  56: kingsnake
+  57: garter snake
+  58: water snake
+  59: vine snake
+  60: night snake
+  61: boa constrictor
+  62: African rock python
+  63: Indian cobra
+  64: green mamba
+  65: sea snake
+  66: Saharan horned viper
+  67: eastern diamondback rattlesnake
+  68: sidewinder
+  69: trilobite
+  70: harvestman
+  71: scorpion
+  72: yellow garden spider
+  73: barn spider
+  74: European garden spider
+  75: southern black widow
+  76: tarantula
+  77: wolf spider
+  78: tick
+  79: centipede
+  80: black grouse
+  81: ptarmigan
+  82: ruffed grouse
+  83: prairie grouse
+  84: peacock
+  85: quail
+  86: partridge
+  87: grey parrot
+  88: macaw
+  89: sulphur-crested cockatoo
+  90: lorikeet
+  91: coucal
+  92: bee eater
+  93: hornbill
+  94: hummingbird
+  95: jacamar
+  96: toucan
+  97: duck
+  98: red-breasted merganser
+  99: goose
+# Download script/URL (optional)
+download: data/scripts/get_imagenet100.sh
--- a/yolov5/data/ImageNet1000.yaml
+++ b/yolov5/data/ImageNet1000.yaml
--- a/yolov5/data/Objects365.yaml
+++ b/yolov5/data/Objects365.yaml
@@ -0,0 +1,437 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# Objects365 dataset https://www.objects365.org/ by Megvii
+# Example usage: python train.py --data Objects365.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── Objects365  ← downloads here (712 GB = 367G data + 345G zips)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/Objects365 # dataset root dir
+train: images/train # train images (relative to 'path') 1742289 images
+val: images/val # val images (relative to 'path') 80000 images
+test: # test images (optional)
+
+# Classes
+names:
+  0: Person
+  1: Sneakers
+  2: Chair
+  3: Other Shoes
+  4: Hat
+  5: Car
+  6: Lamp
+  7: Glasses
+  8: Bottle
+  9: Desk
+  10: Cup
+  11: Street Lights
+  12: Cabinet/shelf
+  13: Handbag/Satchel
+  14: Bracelet
+  15: Plate
+  16: Picture/Frame
+  17: Helmet
+  18: Book
+  19: Gloves
+  20: Storage box
+  21: Boat
+  22: Leather Shoes
+  23: Flower
+  24: Bench
+  25: Potted Plant
+  26: Bowl/Basin
+  27: Flag
+  28: Pillow
+  29: Boots
+  30: Vase
+  31: Microphone
+  32: Necklace
+  33: Ring
+  34: SUV
+  35: Wine Glass
+  36: Belt
+  37: Monitor/TV
+  38: Backpack
+  39: Umbrella
+  40: Traffic Light
+  41: Speaker
+  42: Watch
+  43: Tie
+  44: Trash bin Can
+  45: Slippers
+  46: Bicycle
+  47: Stool
+  48: Barrel/bucket
+  49: Van
+  50: Couch
+  51: Sandals
+  52: Basket
+  53: Drum
+  54: Pen/Pencil
+  55: Bus
+  56: Wild Bird
+  57: High Heels
+  58: Motorcycle
+  59: Guitar
+  60: Carpet
+  61: Cell Phone
+  62: Bread
+  63: Camera
+  64: Canned
+  65: Truck
+  66: Traffic cone
+  67: Cymbal
+  68: Lifesaver
+  69: Towel
+  70: Stuffed Toy
+  71: Candle
+  72: Sailboat
+  73: Laptop
+  74: Awning
+  75: Bed
+  76: Faucet
+  77: Tent
+  78: Horse
+  79: Mirror
+  80: Power outlet
+  81: Sink
+  82: Apple
+  83: Air Conditioner
+  84: Knife
+  85: Hockey Stick
+  86: Paddle
+  87: Pickup Truck
+  88: Fork
+  89: Traffic Sign
+  90: Balloon
+  91: Tripod
+  92: Dog
+  93: Spoon
+  94: Clock
+  95: Pot
+  96: Cow
+  97: Cake
+  98: Dinning Table
+  99: Sheep
+  100: Hanger
+  101: Blackboard/Whiteboard
+  102: Napkin
+  103: Other Fish
+  104: Orange/Tangerine
+  105: Toiletry
+  106: Keyboard
+  107: Tomato
+  108: Lantern
+  109: Machinery Vehicle
+  110: Fan
+  111: Green Vegetables
+  112: Banana
+  113: Baseball Glove
+  114: Airplane
+  115: Mouse
+  116: Train
+  117: Pumpkin
+  118: Soccer
+  119: Skiboard
+  120: Luggage
+  121: Nightstand
+  122: Tea pot
+  123: Telephone
+  124: Trolley
+  125: Head Phone
+  126: Sports Car
+  127: Stop Sign
+  128: Dessert
+  129: Scooter
+  130: Stroller
+  131: Crane
+  132: Remote
+  133: Refrigerator
+  134: Oven
+  135: Lemon
+  136: Duck
+  137: Baseball Bat
+  138: Surveillance Camera
+  139: Cat
+  140: Jug
+  141: Broccoli
+  142: Piano
+  143: Pizza
+  144: Elephant
+  145: Skateboard
+  146: Surfboard
+  147: Gun
+  148: Skating and Skiing shoes
+  149: Gas stove
+  150: Donut
+  151: Bow Tie
+  152: Carrot
+  153: Toilet
+  154: Kite
+  155: Strawberry
+  156: Other Balls
+  157: Shovel
+  158: Pepper
+  159: Computer Box
+  160: Toilet Paper
+  161: Cleaning Products
+  162: Chopsticks
+  163: Microwave
+  164: Pigeon
+  165: Baseball
+  166: Cutting/chopping Board
+  167: Coffee Table
+  168: Side Table
+  169: Scissors
+  170: Marker
+  171: Pie
+  172: Ladder
+  173: Snowboard
+  174: Cookies
+  175: Radiator
+  176: Fire Hydrant
+  177: Basketball
+  178: Zebra
+  179: Grape
+  180: Giraffe
+  181: Potato
+  182: Sausage
+  183: Tricycle
+  184: Violin
+  185: Egg
+  186: Fire Extinguisher
+  187: Candy
+  188: Fire Truck
+  189: Billiards
+  190: Converter
+  191: Bathtub
+  192: Wheelchair
+  193: Golf Club
+  194: Briefcase
+  195: Cucumber
+  196: Cigar/Cigarette
+  197: Paint Brush
+  198: Pear
+  199: Heavy Truck
+  200: Hamburger
+  201: Extractor
+  202: Extension Cord
+  203: Tong
+  204: Tennis Racket
+  205: Folder
+  206: American Football
+  207: earphone
+  208: Mask
+  209: Kettle
+  210: Tennis
+  211: Ship
+  212: Swing
+  213: Coffee Machine
+  214: Slide
+  215: Carriage
+  216: Onion
+  217: Green beans
+  218: Projector
+  219: Frisbee
+  220: Washing Machine/Drying Machine
+  221: Chicken
+  222: Printer
+  223: Watermelon
+  224: Saxophone
+  225: Tissue
+  226: Toothbrush
+  227: Ice cream
+  228: Hot-air balloon
+  229: Cello
+  230: French Fries
+  231: Scale
+  232: Trophy
+  233: Cabbage
+  234: Hot dog
+  235: Blender
+  236: Peach
+  237: Rice
+  238: Wallet/Purse
+  239: Volleyball
+  240: Deer
+  241: Goose
+  242: Tape
+  243: Tablet
+  244: Cosmetics
+  245: Trumpet
+  246: Pineapple
+  247: Golf Ball
+  248: Ambulance
+  249: Parking meter
+  250: Mango
+  251: Key
+  252: Hurdle
+  253: Fishing Rod
+  254: Medal
+  255: Flute
+  256: Brush
+  257: Penguin
+  258: Megaphone
+  259: Corn
+  260: Lettuce
+  261: Garlic
+  262: Swan
+  263: Helicopter
+  264: Green Onion
+  265: Sandwich
+  266: Nuts
+  267: Speed Limit Sign
+  268: Induction Cooker
+  269: Broom
+  270: Trombone
+  271: Plum
+  272: Rickshaw
+  273: Goldfish
+  274: Kiwi fruit
+  275: Router/modem
+  276: Poker Card
+  277: Toaster
+  278: Shrimp
+  279: Sushi
+  280: Cheese
+  281: Notepaper
+  282: Cherry
+  283: Pliers
+  284: CD
+  285: Pasta
+  286: Hammer
+  287: Cue
+  288: Avocado
+  289: Hamimelon
+  290: Flask
+  291: Mushroom
+  292: Screwdriver
+  293: Soap
+  294: Recorder
+  295: Bear
+  296: Eggplant
+  297: Board Eraser
+  298: Coconut
+  299: Tape Measure/Ruler
+  300: Pig
+  301: Showerhead
+  302: Globe
+  303: Chips
+  304: Steak
+  305: Crosswalk Sign
+  306: Stapler
+  307: Camel
+  308: Formula 1
+  309: Pomegranate
+  310: Dishwasher
+  311: Crab
+  312: Hoverboard
+  313: Meat ball
+  314: Rice Cooker
+  315: Tuba
+  316: Calculator
+  317: Papaya
+  318: Antelope
+  319: Parrot
+  320: Seal
+  321: Butterfly
+  322: Dumbbell
+  323: Donkey
+  324: Lion
+  325: Urinal
+  326: Dolphin
+  327: Electric Drill
+  328: Hair Dryer
+  329: Egg tart
+  330: Jellyfish
+  331: Treadmill
+  332: Lighter
+  333: Grapefruit
+  334: Game board
+  335: Mop
+  336: Radish
+  337: Baozi
+  338: Target
+  339: French
+  340: Spring Rolls
+  341: Monkey
+  342: Rabbit
+  343: Pencil Case
+  344: Yak
+  345: Red Cabbage
+  346: Binoculars
+  347: Asparagus
+  348: Barbell
+  349: Scallop
+  350: Noddles
+  351: Comb
+  352: Dumpling
+  353: Oyster
+  354: Table Tennis paddle
+  355: Cosmetics Brush/Eyeliner Pencil
+  356: Chainsaw
+  357: Eraser
+  358: Lobster
+  359: Durian
+  360: Okra
+  361: Lipstick
+  362: Cosmetics Mirror
+  363: Curling
+  364: Table Tennis
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  from tqdm import tqdm
+
+  from utils.general import Path, check_requirements, download, np, xyxy2xywhn
+
+  check_requirements('pycocotools>=2.0')
+  from pycocotools.coco import COCO
+
+  # Make Directories
+  dir = Path(yaml['path'])  # dataset root dir
+  for p in 'images', 'labels':
+      (dir / p).mkdir(parents=True, exist_ok=True)
+      for q in 'train', 'val':
+          (dir / p / q).mkdir(parents=True, exist_ok=True)
+
+  # Train, Val Splits
+  for split, patches in [('train', 50 + 1), ('val', 43 + 1)]:
+      print(f"Processing {split} in {patches} patches ...")
+      images, labels = dir / 'images' / split, dir / 'labels' / split
+
+      # Download
+      url = f"https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/{split}/"
+      if split == 'train':
+          download([f'{url}zhiyuan_objv2_{split}.tar.gz'], dir=dir, delete=False)  # annotations json
+          download([f'{url}patch{i}.tar.gz' for i in range(patches)], dir=images, curl=True, delete=False, threads=8)
+      elif split == 'val':
+          download([f'{url}zhiyuan_objv2_{split}.json'], dir=dir, delete=False)  # annotations json
+          download([f'{url}images/v1/patch{i}.tar.gz' for i in range(15 + 1)], dir=images, curl=True, delete=False, threads=8)
+          download([f'{url}images/v2/patch{i}.tar.gz' for i in range(16, patches)], dir=images, curl=True, delete=False, threads=8)
+
+      # Move
+      for f in tqdm(images.rglob('*.jpg'), desc=f'Moving {split} images'):
+          f.rename(images / f.name)  # move to /images/{split}
+
+      # Labels
+      coco = COCO(dir / f'zhiyuan_objv2_{split}.json')
+      names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
+      for cid, cat in enumerate(names):
+          catIds = coco.getCatIds(catNms=[cat])
+          imgIds = coco.getImgIds(catIds=catIds)
+          for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
+              width, height = im["width"], im["height"]
+              path = Path(im["file_name"])  # image filename
+              try:
+                  with open(labels / path.with_suffix('.txt').name, 'a') as file:
+                      annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=False)
+                      for a in coco.loadAnns(annIds):
+                          x, y, w, h = a['bbox']  # bounding box in xywh (xy top-left corner)
+                          xyxy = np.array([x, y, x + w, y + h])[None]  # pixels(1,4)
+                          x, y, w, h = xyxy2xywhn(xyxy, w=width, h=height, clip=True)[0]  # normalized and clipped
+                          file.write(f"{cid} {x:.5f} {y:.5f} {w:.5f} {h:.5f}\n")
+              except Exception as e:
+                  print(e)
--- a/yolov5/data/SKU-110K.yaml
+++ b/yolov5/data/SKU-110K.yaml
@@ -0,0 +1,52 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax Retail
+# Example usage: python train.py --data SKU-110K.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── SKU-110K  ← downloads here (13.6 GB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/SKU-110K # dataset root dir
+train: train.txt # train images (relative to 'path')  8219 images
+val: val.txt # val images (relative to 'path')  588 images
+test: test.txt # test images (optional)  2936 images
+
+# Classes
+names:
+  0: object
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import shutil
+  from tqdm import tqdm
+  from utils.general import np, pd, Path, download, xyxy2xywh
+
+
+  # Download
+  dir = Path(yaml['path'])  # dataset root dir
+  parent = Path(dir.parent)  # download dir
+  urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
+  download(urls, dir=parent, delete=False)
+
+  # Rename directories
+  if dir.exists():
+      shutil.rmtree(dir)
+  (parent / 'SKU110K_fixed').rename(dir)  # rename dir
+  (dir / 'labels').mkdir(parents=True, exist_ok=True)  # create labels dir
+
+  # Convert labels
+  names = 'image', 'x1', 'y1', 'x2', 'y2', 'class', 'image_width', 'image_height'  # column names
+  for d in 'annotations_train.csv', 'annotations_val.csv', 'annotations_test.csv':
+      x = pd.read_csv(dir / 'annotations' / d, names=names).values  # annotations
+      images, unique_images = x[:, 0], np.unique(x[:, 0])
+      with open((dir / d).with_suffix('.txt').__str__().replace('annotations_', ''), 'w') as f:
+          f.writelines(f'./images/{s}\n' for s in unique_images)
+      for im in tqdm(unique_images, desc=f'Converting {dir / d}'):
+          cls = 0  # single-class dataset
+          with open((dir / 'labels' / im).with_suffix('.txt'), 'a') as f:
+              for r in x[images == im]:
+                  w, h = r[6], r[7]  # image width, height
+                  xywh = xyxy2xywh(np.array([[r[1] / w, r[2] / h, r[3] / w, r[4] / h]]))[0]  # instance
+                  f.write(f"{cls} {xywh[0]:.5f} {xywh[1]:.5f} {xywh[2]:.5f} {xywh[3]:.5f}\n")  # write label
--- a/yolov5/data/VOC.yaml
+++ b/yolov5/data/VOC.yaml
@@ -0,0 +1,99 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
+# Example usage: python train.py --data VOC.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── VOC  ← downloads here (2.8 GB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/VOC
+train: # train images (relative to 'path')  16551 images
+  - images/train2012
+  - images/train2007
+  - images/val2012
+  - images/val2007
+val: # val images (relative to 'path')  4952 images
+  - images/test2007
+test: # test images (optional)
+  - images/test2007
+
+# Classes
+names:
+  0: aeroplane
+  1: bicycle
+  2: bird
+  3: boat
+  4: bottle
+  5: bus
+  6: car
+  7: cat
+  8: chair
+  9: cow
+  10: diningtable
+  11: dog
+  12: horse
+  13: motorbike
+  14: person
+  15: pottedplant
+  16: sheep
+  17: sofa
+  18: train
+  19: tvmonitor
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import xml.etree.ElementTree as ET
+
+  from tqdm import tqdm
+  from utils.general import download, Path
+
+
+  def convert_label(path, lb_path, year, image_id):
+      def convert_box(size, box):
+          dw, dh = 1. / size[0], 1. / size[1]
+          x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
+          return x * dw, y * dh, w * dw, h * dh
+
+      in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
+      out_file = open(lb_path, 'w')
+      tree = ET.parse(in_file)
+      root = tree.getroot()
+      size = root.find('size')
+      w = int(size.find('width').text)
+      h = int(size.find('height').text)
+
+      names = list(yaml['names'].values())  # names list
+      for obj in root.iter('object'):
+          cls = obj.find('name').text
+          if cls in names and int(obj.find('difficult').text) != 1:
+              xmlbox = obj.find('bndbox')
+              bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
+              cls_id = names.index(cls)  # class id
+              out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
+
+
+  # Download
+  dir = Path(yaml['path'])  # dataset root dir
+  url = 'https://github.com/ultralytics/assets/releases/download/v0.0.0/'
+  urls = [f'{url}VOCtrainval_06-Nov-2007.zip',  # 446MB, 5012 images
+          f'{url}VOCtest_06-Nov-2007.zip',  # 438MB, 4953 images
+          f'{url}VOCtrainval_11-May-2012.zip']  # 1.95GB, 17126 images
+  download(urls, dir=dir / 'images', delete=False, curl=True, threads=3)
+
+  # Convert
+  path = dir / 'images/VOCdevkit'
+  for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
+      imgs_path = dir / 'images' / f'{image_set}{year}'
+      lbs_path = dir / 'labels' / f'{image_set}{year}'
+      imgs_path.mkdir(exist_ok=True, parents=True)
+      lbs_path.mkdir(exist_ok=True, parents=True)
+
+      with open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt') as f:
+          image_ids = f.read().strip().split()
+      for id in tqdm(image_ids, desc=f'{image_set}{year}'):
+          f = path / f'VOC{year}/JPEGImages/{id}.jpg'  # old img path
+          lb_path = (lbs_path / f.name).with_suffix('.txt')  # new label path
+          f.rename(imgs_path / f.name)  # move image
+          convert_label(path, lb_path, year, id)  # convert labels to YOLO format
--- a/yolov5/data/VisDrone.yaml
+++ b/yolov5/data/VisDrone.yaml
@@ -0,0 +1,69 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Tianjin University
+# Example usage: python train.py --data VisDrone.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── VisDrone  ← downloads here (2.3 GB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/VisDrone # dataset root dir
+train: VisDrone2019-DET-train/images # train images (relative to 'path')  6471 images
+val: VisDrone2019-DET-val/images # val images (relative to 'path')  548 images
+test: VisDrone2019-DET-test-dev/images # test images (optional)  1610 images
+
+# Classes
+names:
+  0: pedestrian
+  1: people
+  2: bicycle
+  3: car
+  4: van
+  5: truck
+  6: tricycle
+  7: awning-tricycle
+  8: bus
+  9: motor
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  from utils.general import download, os, Path
+
+  def visdrone2yolo(dir):
+      from PIL import Image
+      from tqdm import tqdm
+
+      def convert_box(size, box):
+          # Convert VisDrone box to YOLO xywh box
+          dw = 1. / size[0]
+          dh = 1. / size[1]
+          return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh
+
+      (dir / 'labels').mkdir(parents=True, exist_ok=True)  # make labels directory
+      pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}')
+      for f in pbar:
+          img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size
+          lines = []
+          with open(f, 'r') as file:  # read annotation.txt
+              for row in [x.split(',') for x in file.read().strip().splitlines()]:
+                  if row[4] == '0':  # VisDrone 'ignored regions' class 0
+                      continue
+                  cls = int(row[5]) - 1
+                  box = convert_box(img_size, tuple(map(int, row[:4])))
+                  lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n")
+                  with open(str(f).replace(os.sep + 'annotations' + os.sep, os.sep + 'labels' + os.sep), 'w') as fl:
+                      fl.writelines(lines)  # write label.txt
+
+
+  # Download
+  dir = Path(yaml['path'])  # dataset root dir
+  urls = ['https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-train.zip',
+          'https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-val.zip',
+          'https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-test-dev.zip',
+          'https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-test-challenge.zip']
+  download(urls, dir=dir, curl=True, threads=4)
+
+  # Convert
+  for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
+      visdrone2yolo(dir / d)  # convert VisDrone annotations to YOLO labels
--- a/yolov5/data/coco.yaml
+++ b/yolov5/data/coco.yaml
@@ -0,0 +1,115 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# COCO 2017 dataset http://cocodataset.org by Microsoft
+# Example usage: python train.py --data coco.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── coco  ← downloads here (20.1 GB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco # dataset root dir
+train: train2017.txt # train images (relative to 'path') 118287 images
+val: val2017.txt # val images (relative to 'path') 5000 images
+test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
+
+# Download script/URL (optional)
+download: |
+  from utils.general import download, Path
+
+
+  # Download labels
+  segments = False  # segment or box labels
+  dir = Path(yaml['path'])  # dataset root dir
+  url = 'https://github.com/ultralytics/assets/releases/download/v0.0.0/'
+  urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')]  # labels
+  download(urls, dir=dir.parent)
+
+  # Download data
+  urls = ['http://images.cocodataset.org/zips/train2017.zip',  # 19G, 118k images
+          'http://images.cocodataset.org/zips/val2017.zip',  # 1G, 5k images
+          'http://images.cocodataset.org/zips/test2017.zip']  # 7G, 41k images (optional)
+  download(urls, dir=dir / 'images', threads=3)
--- a/yolov5/data/coco128-seg.yaml
+++ b/yolov5/data/coco128-seg.yaml
@@ -0,0 +1,100 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# COCO128-seg dataset https://www.kaggle.com/datasets/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
+# Example usage: python train.py --data coco128.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── coco128-seg  ← downloads here (7 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco128-seg # dataset root dir
+train: images/train2017 # train images (relative to 'path') 128 images
+val: images/train2017 # val images (relative to 'path') 128 images
+test: # test images (optional)
+
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
+
+# Download script/URL (optional)
+download: https://github.com/ultralytics/assets/releases/download/v0.0.0/coco128-seg.zip
--- a/yolov5/data/coco128.yaml
+++ b/yolov5/data/coco128.yaml
@@ -0,0 +1,100 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# COCO128 dataset https://www.kaggle.com/datasets/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
+# Example usage: python train.py --data coco128.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── coco128  ← downloads here (7 MB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco128 # dataset root dir
+train: images/train2017 # train images (relative to 'path') 128 images
+val: images/train2017 # val images (relative to 'path') 128 images
+test: # test images (optional)
+
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
+
+# Download script/URL (optional)
+download: https://github.com/ultralytics/assets/releases/download/v0.0.0/coco128.zip
--- a/yolov5/data/images/bus.jpg
+++ b/yolov5/data/images/bus.jpg
--- a/yolov5/data/images/zidane.jpg
+++ b/yolov5/data/images/zidane.jpg
--- a/yolov5/data/xView.yaml
+++ b/yolov5/data/xView.yaml
@@ -0,0 +1,152 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+# DIUx xView 2018 Challenge https://challenge.xviewdataset.org by U.S. National Geospatial-Intelligence Agency (NGA)
+# --------  DOWNLOAD DATA MANUALLY and jar xf val_images.zip to 'datasets/xView' before running train command!  --------
+# Example usage: python train.py --data xView.yaml
+# parent
+# ├── yolov5
+# └── datasets
+#     └── xView  ← downloads here (20.7 GB)
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/xView # dataset root dir
+train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images
+val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images
+
+# Classes
+names:
+  0: Fixed-wing Aircraft
+  1: Small Aircraft
+  2: Cargo Plane
+  3: Helicopter
+  4: Passenger Vehicle
+  5: Small Car
+  6: Bus
+  7: Pickup Truck
+  8: Utility Truck
+  9: Truck
+  10: Cargo Truck
+  11: Truck w/Box
+  12: Truck Tractor
+  13: Trailer
+  14: Truck w/Flatbed
+  15: Truck w/Liquid
+  16: Crane Truck
+  17: Railway Vehicle
+  18: Passenger Car
+  19: Cargo Car
+  20: Flat Car
+  21: Tank car
+  22: Locomotive
+  23: Maritime Vessel
+  24: Motorboat
+  25: Sailboat
+  26: Tugboat
+  27: Barge
+  28: Fishing Vessel
+  29: Ferry
+  30: Yacht
+  31: Container Ship
+  32: Oil Tanker
+  33: Engineering Vehicle
+  34: Tower crane
+  35: Container Crane
+  36: Reach Stacker
+  37: Straddle Carrier
+  38: Mobile Crane
+  39: Dump Truck
+  40: Haul Truck
+  41: Scraper/Tractor
+  42: Front loader/Bulldozer
+  43: Excavator
+  44: Cement Mixer
+  45: Ground Grader
+  46: Hut/Tent
+  47: Shed
+  48: Building
+  49: Aircraft Hangar
+  50: Damaged Building
+  51: Facility
+  52: Construction Site
+  53: Vehicle Lot
+  54: Helipad
+  55: Storage Tank
+  56: Shipping container lot
+  57: Shipping Container
+  58: Pylon
+  59: Tower
+
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import json
+  import os
+  from pathlib import Path
+
+  import numpy as np
+  from PIL import Image
+  from tqdm import tqdm
+
+  from utils.dataloaders import autosplit
+  from utils.general import download, xyxy2xywhn
+
+
+  def convert_labels(fname=Path('xView/xView_train.geojson')):
+      # Convert xView geoJSON labels to YOLO format
+      path = fname.parent
+      with open(fname) as f:
+          print(f'Loading {fname}...')
+          data = json.load(f)
+
+      # Make dirs
+      labels = Path(path / 'labels' / 'train')
+      os.system(f'rm -rf {labels}')
+      labels.mkdir(parents=True, exist_ok=True)
+
+      # xView classes 11-94 to 0-59
+      xview_class2index = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, -1, 9, 10, 11,
+                           12, 13, 14, 15, -1, -1, 16, 17, 18, 19, 20, 21, 22, -1, 23, 24, 25, -1, 26, 27, -1, 28, -1,
+                           29, 30, 31, 32, 33, 34, 35, 36, 37, -1, 38, 39, 40, 41, 42, 43, 44, 45, -1, -1, -1, -1, 46,
+                           47, 48, 49, -1, 50, 51, -1, 52, -1, -1, -1, 53, 54, -1, 55, -1, -1, 56, -1, 57, -1, 58, 59]
+
+      shapes = {}
+      for feature in tqdm(data['features'], desc=f'Converting {fname}'):
+          p = feature['properties']
+          if p['bounds_imcoords']:
+              id = p['image_id']
+              file = path / 'train_images' / id
+              if file.exists():  # 1395.tif missing
+                  try:
+                      box = np.array([int(num) for num in p['bounds_imcoords'].split(",")])
+                      assert box.shape[0] == 4, f'incorrect box shape {box.shape[0]}'
+                      cls = p['type_id']
+                      cls = xview_class2index[int(cls)]  # xView class to 0-60
+                      assert 59 >= cls >= 0, f'incorrect class index {cls}'
+
+                      # Write YOLO label
+                      if id not in shapes:
+                          shapes[id] = Image.open(file).size
+                      box = xyxy2xywhn(box[None].astype(np.float), w=shapes[id][0], h=shapes[id][1], clip=True)
+                      with open((labels / id).with_suffix('.txt'), 'a') as f:
+                          f.write(f"{cls} {' '.join(f'{x:.6f}' for x in box[0])}\n")  # write label.txt
+                  except Exception as e:
+                      print(f'WARNING: skipping one label for {file}: {e}')
+
+
+  # Download manually from https://challenge.xviewdataset.org
+  dir = Path(yaml['path'])  # dataset root dir
+  # urls = ['https://d307kc0mrhucc3.cloudfront.net/train_labels.zip',  # train labels
+  #         'https://d307kc0mrhucc3.cloudfront.net/train_images.zip',  # 15G, 847 train images
+  #         'https://d307kc0mrhucc3.cloudfront.net/val_images.zip']  # 5G, 282 val images (no labels)
+  # download(urls, dir=dir, delete=False)
+
+  # Convert labels
+  convert_labels(dir / 'xView_train.geojson')
+
+  # Move images
+  images = Path(dir / 'images')
+  images.mkdir(parents=True, exist_ok=True)
+  Path(dir / 'train_images').rename(dir / 'images' / 'train')
+  Path(dir / 'val_images').rename(dir / 'images' / 'val')
+
+  # Split
+  autosplit(dir / 'images' / 'train')
--- a/yolov5/models/common.py
+++ b/yolov5/models/common.py
@@ -35,9 +35,9 @@ except (ImportError, AssertionError):

 from ultralytics.utils.plotting import Annotator, colors, save_one_box

-from utils import TryExcept
-from utils.dataloaders import exif_transpose, letterbox
-from utils.general import (
+from yolov5.utils import TryExcept
+from yolov5.utils.dataloaders import exif_transpose, letterbox
+from yolov5.utils.general import (
    LOGGER,
    ROOT,
    Profile,
@@ -54,7 +54,7 @@ from utils.general import (
    xyxy2xywh,
    yaml_load,
 )
-from utils.torch_utils import copy_attr, smart_inference_mode
+from yolov5.utils.torch_utils import copy_attr, smart_inference_mode


 def autopad(k, p=None, d=1):
@@ -473,7 +473,7 @@ class DetectMultiBackend(nn.Module):
        #   TensorFlow Lite:                *.tflite
        #   TensorFlow Edge TPU:            *_edgetpu.tflite
        #   PaddlePaddle:                   *_paddle_model
-        from models.experimental import attempt_download, attempt_load  # scoped to avoid circular import
+        from yolov5.models.experimental import attempt_download, attempt_load  # scoped to avoid circular import

        super().__init__()
        w = str(weights[0] if isinstance(weights, list) else weights)
@@ -661,7 +661,7 @@ class DetectMultiBackend(nn.Module):
        elif triton:  # NVIDIA Triton Inference Server
            LOGGER.info(f"Using {w} as Triton Inference Server...")
            check_requirements("tritonclient[all]")
-            from utils.triton import TritonRemoteModel
+            from yolov5.utils.triton import TritonRemoteModel

            model = TritonRemoteModel(url=w)
            nhwc = model.runtime.startswith("tensorflow")
@@ -780,8 +780,8 @@ class DetectMultiBackend(nn.Module):
        Example: path='path/to/model.onnx' -> type=onnx
        """
        # types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
-        from export import export_formats
-        from utils.downloads import is_url
+        from yolov5.export import export_formats
+        from yolov5.utils.downloads import is_url

        sf = list(export_formats().Suffix)  # export suffixes
        if not is_url(p, check=False):
--- a/yolov5/models/experimental.py
+++ b/yolov5/models/experimental.py
@@ -7,7 +7,7 @@ import numpy as np
 import torch
 import torch.nn as nn

-from utils.downloads import attempt_download
+from yolov5.utils.downloads import attempt_download


 class Sum(nn.Module):
@@ -91,7 +91,7 @@ def attempt_load(weights, device=None, inplace=True, fuse=True):

    Example inputs: weights=[a,b,c] or a single model weights=[a] or weights=a.
    """
-    from models.yolo import Detect, Model
+    from yolov5.models.yolo import Detect, Model

    model = Ensemble()
    for w in weights if isinstance(weights, list) else [weights]:
--- a/yolov5/segment/predict.py
+++ b/yolov5/segment/predict.py
@@ -0,0 +1,307 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+"""
+Run YOLOv5 segmentation inference on images, videos, directories, streams, etc.
+
+Usage - sources:
+    $ python segment/predict.py --weights yolov5s-seg.pt --source 0                               # webcam
+                                                                  img.jpg                         # image
+                                                                  vid.mp4                         # video
+                                                                  screen                          # screenshot
+                                                                  path/                           # directory
+                                                                  list.txt                        # list of images
+                                                                  list.streams                    # list of streams
+                                                                  'path/*.jpg'                    # glob
+                                                                  'https://youtu.be/LNwODJXcvt4'  # YouTube
+                                                                  'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream
+
+Usage - formats:
+    $ python segment/predict.py --weights yolov5s-seg.pt                 # PyTorch
+                                          yolov5s-seg.torchscript        # TorchScript
+                                          yolov5s-seg.onnx               # ONNX Runtime or OpenCV DNN with --dnn
+                                          yolov5s-seg_openvino_model     # OpenVINO
+                                          yolov5s-seg.engine             # TensorRT
+                                          yolov5s-seg.mlmodel            # CoreML (macOS-only)
+                                          yolov5s-seg_saved_model        # TensorFlow SavedModel
+                                          yolov5s-seg.pb                 # TensorFlow GraphDef
+                                          yolov5s-seg.tflite             # TensorFlow Lite
+                                          yolov5s-seg_edgetpu.tflite     # TensorFlow Edge TPU
+                                          yolov5s-seg_paddle_model       # PaddlePaddle
+"""
+
+import argparse
+import os
+import platform
+import sys
+from pathlib import Path
+
+import torch
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+
+from ultralytics.utils.plotting import Annotator, colors, save_one_box
+
+from models.common import DetectMultiBackend
+from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams
+from utils.general import (
+    LOGGER,
+    Profile,
+    check_file,
+    check_img_size,
+    check_imshow,
+    check_requirements,
+    colorstr,
+    cv2,
+    increment_path,
+    non_max_suppression,
+    print_args,
+    scale_boxes,
+    scale_segments,
+    strip_optimizer,
+)
+from utils.segment.general import masks2segments, process_mask, process_mask_native
+from utils.torch_utils import select_device, smart_inference_mode
+
+
+@smart_inference_mode()
+def run(
+    weights=ROOT / "yolov5s-seg.pt",  # model.pt path(s)
+    source=ROOT / "data/images",  # file/dir/URL/glob/screen/0(webcam)
+    data=ROOT / "data/coco128.yaml",  # dataset.yaml path
+    imgsz=(640, 640),  # inference size (height, width)
+    conf_thres=0.25,  # confidence threshold
+    iou_thres=0.45,  # NMS IOU threshold
+    max_det=1000,  # maximum detections per image
+    device="",  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+    view_img=False,  # show results
+    save_txt=False,  # save results to *.txt
+    save_conf=False,  # save confidences in --save-txt labels
+    save_crop=False,  # save cropped prediction boxes
+    nosave=False,  # do not save images/videos
+    classes=None,  # filter by class: --class 0, or --class 0 2 3
+    agnostic_nms=False,  # class-agnostic NMS
+    augment=False,  # augmented inference
+    visualize=False,  # visualize features
+    update=False,  # update all models
+    project=ROOT / "runs/predict-seg",  # save results to project/name
+    name="exp",  # save results to project/name
+    exist_ok=False,  # existing project/name ok, do not increment
+    line_thickness=3,  # bounding box thickness (pixels)
+    hide_labels=False,  # hide labels
+    hide_conf=False,  # hide confidences
+    half=False,  # use FP16 half-precision inference
+    dnn=False,  # use OpenCV DNN for ONNX inference
+    vid_stride=1,  # video frame-rate stride
+    retina_masks=False,
+):
+    """Run YOLOv5 segmentation inference on diverse sources including images, videos, directories, and streams."""
+    source = str(source)
+    save_img = not nosave and not source.endswith(".txt")  # save inference images
+    is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
+    is_url = source.lower().startswith(("rtsp://", "rtmp://", "http://", "https://"))
+    webcam = source.isnumeric() or source.endswith(".streams") or (is_url and not is_file)
+    screenshot = source.lower().startswith("screen")
+    if is_url and is_file:
+        source = check_file(source)  # download
+
+    # Directories
+    save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
+    (save_dir / "labels" if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
+
+    # Load model
+    device = select_device(device)
+    model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
+    stride, names, pt = model.stride, model.names, model.pt
+    imgsz = check_img_size(imgsz, s=stride)  # check image size
+
+    # Dataloader
+    bs = 1  # batch_size
+    if webcam:
+        view_img = check_imshow(warn=True)
+        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
+        bs = len(dataset)
+    elif screenshot:
+        dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)
+    else:
+        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
+    vid_path, vid_writer = [None] * bs, [None] * bs
+
+    # Run inference
+    model.warmup(imgsz=(1 if pt else bs, 3, *imgsz))  # warmup
+    seen, windows, dt = 0, [], (Profile(device=device), Profile(device=device), Profile(device=device))
+    for path, im, im0s, vid_cap, s in dataset:
+        with dt[0]:
+            im = torch.from_numpy(im).to(model.device)
+            im = im.half() if model.fp16 else im.float()  # uint8 to fp16/32
+            im /= 255  # 0 - 255 to 0.0 - 1.0
+            if len(im.shape) == 3:
+                im = im[None]  # expand for batch dim
+
+        # Inference
+        with dt[1]:
+            visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
+            pred, proto = model(im, augment=augment, visualize=visualize)[:2]
+
+        # NMS
+        with dt[2]:
+            pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det, nm=32)
+
+        # Second-stage classifier (optional)
+        # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
+
+        # Process predictions
+        for i, det in enumerate(pred):  # per image
+            seen += 1
+            if webcam:  # batch_size >= 1
+                p, im0, frame = path[i], im0s[i].copy(), dataset.count
+                s += f"{i}: "
+            else:
+                p, im0, frame = path, im0s.copy(), getattr(dataset, "frame", 0)
+
+            p = Path(p)  # to Path
+            save_path = str(save_dir / p.name)  # im.jpg
+            txt_path = str(save_dir / "labels" / p.stem) + ("" if dataset.mode == "image" else f"_{frame}")  # im.txt
+            s += "{:g}x{:g} ".format(*im.shape[2:])  # print string
+            imc = im0.copy() if save_crop else im0  # for save_crop
+            annotator = Annotator(im0, line_width=line_thickness, example=str(names))
+            if len(det):
+                if retina_masks:
+                    # scale bbox first the crop masks
+                    det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()  # rescale boxes to im0 size
+                    masks = process_mask_native(proto[i], det[:, 6:], det[:, :4], im0.shape[:2])  # HWC
+                else:
+                    masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True)  # HWC
+                    det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()  # rescale boxes to im0 size
+
+                # Segments
+                if save_txt:
+                    segments = [
+                        scale_segments(im0.shape if retina_masks else im.shape[2:], x, im0.shape, normalize=True)
+                        for x in reversed(masks2segments(masks))
+                    ]
+
+                # Print results
+                for c in det[:, 5].unique():
+                    n = (det[:, 5] == c).sum()  # detections per class
+                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
+
+                # Mask plotting
+                annotator.masks(
+                    masks,
+                    colors=[colors(x, True) for x in det[:, 5]],
+                    im_gpu=torch.as_tensor(im0, dtype=torch.float16).to(device).permute(2, 0, 1).flip(0).contiguous()
+                    / 255
+                    if retina_masks
+                    else im[i],
+                )
+
+                # Write results
+                for j, (*xyxy, conf, cls) in enumerate(reversed(det[:, :6])):
+                    if save_txt:  # Write to file
+                        seg = segments[j].reshape(-1)  # (n,2) to (n*2)
+                        line = (cls, *seg, conf) if save_conf else (cls, *seg)  # label format
+                        with open(f"{txt_path}.txt", "a") as f:
+                            f.write(("%g " * len(line)).rstrip() % line + "\n")
+
+                    if save_img or save_crop or view_img:  # Add bbox to image
+                        c = int(cls)  # integer class
+                        label = None if hide_labels else (names[c] if hide_conf else f"{names[c]} {conf:.2f}")
+                        annotator.box_label(xyxy, label, color=colors(c, True))
+                        # annotator.draw.polygon(segments[j], outline=colors(c, True), width=3)
+                    if save_crop:
+                        save_one_box(xyxy, imc, file=save_dir / "crops" / names[c] / f"{p.stem}.jpg", BGR=True)
+
+            # Stream results
+            im0 = annotator.result()
+            if view_img:
+                if platform.system() == "Linux" and p not in windows:
+                    windows.append(p)
+                    cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)
+                    cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
+                cv2.imshow(str(p), im0)
+                if cv2.waitKey(1) == ord("q"):  # 1 millisecond
+                    exit()
+
+            # Save results (image with detections)
+            if save_img:
+                if dataset.mode == "image":
+                    cv2.imwrite(save_path, im0)
+                else:  # 'video' or 'stream'
+                    if vid_path[i] != save_path:  # new video
+                        vid_path[i] = save_path
+                        if isinstance(vid_writer[i], cv2.VideoWriter):
+                            vid_writer[i].release()  # release previous video writer
+                        if vid_cap:  # video
+                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
+                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+                        else:  # stream
+                            fps, w, h = 30, im0.shape[1], im0.shape[0]
+                        save_path = str(Path(save_path).with_suffix(".mp4"))  # force *.mp4 suffix on results videos
+                        vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
+                    vid_writer[i].write(im0)
+
+        # Print time (inference-only)
+        LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1e3:.1f}ms")
+
+    # Print results
+    t = tuple(x.t / seen * 1e3 for x in dt)  # speeds per image
+    LOGGER.info(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}" % t)
+    if save_txt or save_img:
+        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ""
+        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
+    if update:
+        strip_optimizer(weights[0])  # update model (to fix SourceChangeWarning)
+
+
+def parse_opt():
+    """Parses command-line options for YOLOv5 inference including model paths, data sources, inference settings, and
+    output preferences.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s-seg.pt", help="model path(s)")
+    parser.add_argument("--source", type=str, default=ROOT / "data/images", help="file/dir/URL/glob/screen/0(webcam)")
+    parser.add_argument("--data", type=str, default=ROOT / "data/coco128.yaml", help="(optional) dataset.yaml path")
+    parser.add_argument("--imgsz", "--img", "--img-size", nargs="+", type=int, default=[640], help="inference size h,w")
+    parser.add_argument("--conf-thres", type=float, default=0.25, help="confidence threshold")
+    parser.add_argument("--iou-thres", type=float, default=0.45, help="NMS IoU threshold")
+    parser.add_argument("--max-det", type=int, default=1000, help="maximum detections per image")
+    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
+    parser.add_argument("--view-img", action="store_true", help="show results")
+    parser.add_argument("--save-txt", action="store_true", help="save results to *.txt")
+    parser.add_argument("--save-conf", action="store_true", help="save confidences in --save-txt labels")
+    parser.add_argument("--save-crop", action="store_true", help="save cropped prediction boxes")
+    parser.add_argument("--nosave", action="store_true", help="do not save images/videos")
+    parser.add_argument("--classes", nargs="+", type=int, help="filter by class: --classes 0, or --classes 0 2 3")
+    parser.add_argument("--agnostic-nms", action="store_true", help="class-agnostic NMS")
+    parser.add_argument("--augment", action="store_true", help="augmented inference")
+    parser.add_argument("--visualize", action="store_true", help="visualize features")
+    parser.add_argument("--update", action="store_true", help="update all models")
+    parser.add_argument("--project", default=ROOT / "runs/predict-seg", help="save results to project/name")
+    parser.add_argument("--name", default="exp", help="save results to project/name")
+    parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
+    parser.add_argument("--line-thickness", default=3, type=int, help="bounding box thickness (pixels)")
+    parser.add_argument("--hide-labels", default=False, action="store_true", help="hide labels")
+    parser.add_argument("--hide-conf", default=False, action="store_true", help="hide confidences")
+    parser.add_argument("--half", action="store_true", help="use FP16 half-precision inference")
+    parser.add_argument("--dnn", action="store_true", help="use OpenCV DNN for ONNX inference")
+    parser.add_argument("--vid-stride", type=int, default=1, help="video frame-rate stride")
+    parser.add_argument("--retina-masks", action="store_true", help="whether to plot masks in native resolution")
+    opt = parser.parse_args()
+    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
+    print_args(vars(opt))
+    return opt
+
+
+def main(opt):
+    """Executes YOLOv5 model inference with given options, checking for requirements before launching."""
+    check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
+    run(**vars(opt))
+
+
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)
--- a/yolov5/segment/train.py
+++ b/yolov5/segment/train.py
@@ -0,0 +1,764 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+"""
+Train a YOLOv5 segment model on a segment dataset Models and datasets download automatically from the latest YOLOv5
+release.
+
+Usage - Single-GPU training:
+    $ python segment/train.py --data coco128-seg.yaml --weights yolov5s-seg.pt --img 640  # from pretrained (recommended)
+    $ python segment/train.py --data coco128-seg.yaml --weights '' --cfg yolov5s-seg.yaml --img 640  # from scratch
+
+Usage - Multi-GPU DDP training:
+    $ python -m torch.distributed.run --nproc_per_node 4 --master_port 1 segment/train.py --data coco128-seg.yaml --weights yolov5s-seg.pt --img 640 --device 0,1,2,3
+
+Models:     https://github.com/ultralytics/yolov5/tree/master/models
+Datasets:   https://github.com/ultralytics/yolov5/tree/master/data
+Tutorial:   https://docs.ultralytics.com/yolov5/tutorials/train_custom_data
+"""
+
+import argparse
+import math
+import os
+import random
+import subprocess
+import sys
+import time
+from copy import deepcopy
+from datetime import datetime
+from pathlib import Path
+
+import numpy as np
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+import yaml
+from torch.optim import lr_scheduler
+from tqdm import tqdm
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+
+import segment.val as validate  # for end-of-epoch mAP
+from models.experimental import attempt_load
+from models.yolo import SegmentationModel
+from utils.autoanchor import check_anchors
+from utils.autobatch import check_train_batch_size
+from utils.callbacks import Callbacks
+from utils.downloads import attempt_download, is_url
+from utils.general import (
+    LOGGER,
+    TQDM_BAR_FORMAT,
+    check_amp,
+    check_dataset,
+    check_file,
+    check_git_info,
+    check_git_status,
+    check_img_size,
+    check_requirements,
+    check_suffix,
+    check_yaml,
+    colorstr,
+    get_latest_run,
+    increment_path,
+    init_seeds,
+    intersect_dicts,
+    labels_to_class_weights,
+    labels_to_image_weights,
+    one_cycle,
+    print_args,
+    print_mutation,
+    strip_optimizer,
+    yaml_save,
+)
+from utils.loggers import GenericLogger
+from utils.plots import plot_evolve, plot_labels
+from utils.segment.dataloaders import create_dataloader
+from utils.segment.loss import ComputeLoss
+from utils.segment.metrics import KEYS, fitness
+from utils.segment.plots import plot_images_and_masks, plot_results_with_masks
+from utils.torch_utils import (
+    EarlyStopping,
+    ModelEMA,
+    de_parallel,
+    select_device,
+    smart_DDP,
+    smart_optimizer,
+    smart_resume,
+    torch_distributed_zero_first,
+)
+
+LOCAL_RANK = int(os.getenv("LOCAL_RANK", -1))  # https://pytorch.org/docs/stable/elastic/run.html
+RANK = int(os.getenv("RANK", -1))
+WORLD_SIZE = int(os.getenv("WORLD_SIZE", 1))
+GIT_INFO = check_git_info()
+
+
+def train(hyp, opt, device, callbacks):
+    """
+    Trains the YOLOv5 model on a dataset, managing hyperparameters, model optimization, logging, and validation.
+
+    `hyp` is path/to/hyp.yaml or hyp dictionary.
+    """
+    (
+        save_dir,
+        epochs,
+        batch_size,
+        weights,
+        single_cls,
+        evolve,
+        data,
+        cfg,
+        resume,
+        noval,
+        nosave,
+        workers,
+        freeze,
+        mask_ratio,
+    ) = (
+        Path(opt.save_dir),
+        opt.epochs,
+        opt.batch_size,
+        opt.weights,
+        opt.single_cls,
+        opt.evolve,
+        opt.data,
+        opt.cfg,
+        opt.resume,
+        opt.noval,
+        opt.nosave,
+        opt.workers,
+        opt.freeze,
+        opt.mask_ratio,
+    )
+    # callbacks.run('on_pretrain_routine_start')
+
+    # Directories
+    w = save_dir / "weights"  # weights dir
+    (w.parent if evolve else w).mkdir(parents=True, exist_ok=True)  # make dir
+    last, best = w / "last.pt", w / "best.pt"
+
+    # Hyperparameters
+    if isinstance(hyp, str):
+        with open(hyp, errors="ignore") as f:
+            hyp = yaml.safe_load(f)  # load hyps dict
+    LOGGER.info(colorstr("hyperparameters: ") + ", ".join(f"{k}={v}" for k, v in hyp.items()))
+    opt.hyp = hyp.copy()  # for saving hyps to checkpoints
+
+    # Save run settings
+    if not evolve:
+        yaml_save(save_dir / "hyp.yaml", hyp)
+        yaml_save(save_dir / "opt.yaml", vars(opt))
+
+    # Loggers
+    data_dict = None
+    if RANK in {-1, 0}:
+        logger = GenericLogger(opt=opt, console_logger=LOGGER)
+
+    # Config
+    plots = not evolve and not opt.noplots  # create plots
+    overlap = not opt.no_overlap
+    cuda = device.type != "cpu"
+    init_seeds(opt.seed + 1 + RANK, deterministic=True)
+    with torch_distributed_zero_first(LOCAL_RANK):
+        data_dict = data_dict or check_dataset(data)  # check if None
+    train_path, val_path = data_dict["train"], data_dict["val"]
+    nc = 1 if single_cls else int(data_dict["nc"])  # number of classes
+    names = {0: "item"} if single_cls and len(data_dict["names"]) != 1 else data_dict["names"]  # class names
+    is_coco = isinstance(val_path, str) and val_path.endswith("coco/val2017.txt")  # COCO dataset
+
+    # Model
+    check_suffix(weights, ".pt")  # check weights
+    pretrained = weights.endswith(".pt")
+    if pretrained:
+        with torch_distributed_zero_first(LOCAL_RANK):
+            weights = attempt_download(weights)  # download if not found locally
+        ckpt = torch.load(weights, map_location="cpu")  # load checkpoint to CPU to avoid CUDA memory leak
+        model = SegmentationModel(cfg or ckpt["model"].yaml, ch=3, nc=nc, anchors=hyp.get("anchors")).to(device)
+        exclude = ["anchor"] if (cfg or hyp.get("anchors")) and not resume else []  # exclude keys
+        csd = ckpt["model"].float().state_dict()  # checkpoint state_dict as FP32
+        csd = intersect_dicts(csd, model.state_dict(), exclude=exclude)  # intersect
+        model.load_state_dict(csd, strict=False)  # load
+        LOGGER.info(f"Transferred {len(csd)}/{len(model.state_dict())} items from {weights}")  # report
+    else:
+        model = SegmentationModel(cfg, ch=3, nc=nc, anchors=hyp.get("anchors")).to(device)  # create
+    amp = check_amp(model)  # check AMP
+
+    # Freeze
+    freeze = [f"model.{x}." for x in (freeze if len(freeze) > 1 else range(freeze[0]))]  # layers to freeze
+    for k, v in model.named_parameters():
+        v.requires_grad = True  # train all layers
+        # v.register_hook(lambda x: torch.nan_to_num(x))  # NaN to 0 (commented for erratic training results)
+        if any(x in k for x in freeze):
+            LOGGER.info(f"freezing {k}")
+            v.requires_grad = False
+
+    # Image size
+    gs = max(int(model.stride.max()), 32)  # grid size (max stride)
+    imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2)  # verify imgsz is gs-multiple
+
+    # Batch size
+    if RANK == -1 and batch_size == -1:  # single-GPU only, estimate best batch size
+        batch_size = check_train_batch_size(model, imgsz, amp)
+        logger.update_params({"batch_size": batch_size})
+        # loggers.on_params_update({"batch_size": batch_size})
+
+    # Optimizer
+    nbs = 64  # nominal batch size
+    accumulate = max(round(nbs / batch_size), 1)  # accumulate loss before optimizing
+    hyp["weight_decay"] *= batch_size * accumulate / nbs  # scale weight_decay
+    optimizer = smart_optimizer(model, opt.optimizer, hyp["lr0"], hyp["momentum"], hyp["weight_decay"])
+
+    # Scheduler
+    if opt.cos_lr:
+        lf = one_cycle(1, hyp["lrf"], epochs)  # cosine 1->hyp['lrf']
+    else:
+
+        def lf(x):
+            """Linear learning rate scheduler decreasing from 1 to hyp['lrf'] over 'epochs'."""
+            return (1 - x / epochs) * (1.0 - hyp["lrf"]) + hyp["lrf"]  # linear
+
+    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)  # plot_lr_scheduler(optimizer, scheduler, epochs)
+
+    # EMA
+    ema = ModelEMA(model) if RANK in {-1, 0} else None
+
+    # Resume
+    best_fitness, start_epoch = 0.0, 0
+    if pretrained:
+        if resume:
+            best_fitness, start_epoch, epochs = smart_resume(ckpt, optimizer, ema, weights, epochs, resume)
+        del ckpt, csd
+
+    # DP mode
+    if cuda and RANK == -1 and torch.cuda.device_count() > 1:
+        LOGGER.warning(
+            "WARNING ⚠️ DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n"
+            "See Multi-GPU Tutorial at https://docs.ultralytics.com/yolov5/tutorials/multi_gpu_training to get started."
+        )
+        model = torch.nn.DataParallel(model)
+
+    # SyncBatchNorm
+    if opt.sync_bn and cuda and RANK != -1:
+        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
+        LOGGER.info("Using SyncBatchNorm()")
+
+    # Trainloader
+    train_loader, dataset = create_dataloader(
+        train_path,
+        imgsz,
+        batch_size // WORLD_SIZE,
+        gs,
+        single_cls,
+        hyp=hyp,
+        augment=True,
+        cache=None if opt.cache == "val" else opt.cache,
+        rect=opt.rect,
+        rank=LOCAL_RANK,
+        workers=workers,
+        image_weights=opt.image_weights,
+        quad=opt.quad,
+        prefix=colorstr("train: "),
+        shuffle=True,
+        mask_downsample_ratio=mask_ratio,
+        overlap_mask=overlap,
+    )
+    labels = np.concatenate(dataset.labels, 0)
+    mlc = int(labels[:, 0].max())  # max label class
+    assert mlc < nc, f"Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}"
+
+    # Process 0
+    if RANK in {-1, 0}:
+        val_loader = create_dataloader(
+            val_path,
+            imgsz,
+            batch_size // WORLD_SIZE * 2,
+            gs,
+            single_cls,
+            hyp=hyp,
+            cache=None if noval else opt.cache,
+            rect=True,
+            rank=-1,
+            workers=workers * 2,
+            pad=0.5,
+            mask_downsample_ratio=mask_ratio,
+            overlap_mask=overlap,
+            prefix=colorstr("val: "),
+        )[0]
+
+        if not resume:
+            if not opt.noautoanchor:
+                check_anchors(dataset, model=model, thr=hyp["anchor_t"], imgsz=imgsz)  # run AutoAnchor
+            model.half().float()  # pre-reduce anchor precision
+
+            if plots:
+                plot_labels(labels, names, save_dir)
+        # callbacks.run('on_pretrain_routine_end', labels, names)
+
+    # DDP mode
+    if cuda and RANK != -1:
+        model = smart_DDP(model)
+
+    # Model attributes
+    nl = de_parallel(model).model[-1].nl  # number of detection layers (to scale hyps)
+    hyp["box"] *= 3 / nl  # scale to layers
+    hyp["cls"] *= nc / 80 * 3 / nl  # scale to classes and layers
+    hyp["obj"] *= (imgsz / 640) ** 2 * 3 / nl  # scale to image size and layers
+    hyp["label_smoothing"] = opt.label_smoothing
+    model.nc = nc  # attach number of classes to model
+    model.hyp = hyp  # attach hyperparameters to model
+    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc  # attach class weights
+    model.names = names
+
+    # Start training
+    t0 = time.time()
+    nb = len(train_loader)  # number of batches
+    nw = max(round(hyp["warmup_epochs"] * nb), 100)  # number of warmup iterations, max(3 epochs, 100 iterations)
+    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
+    last_opt_step = -1
+    maps = np.zeros(nc)  # mAP per class
+    results = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
+    scheduler.last_epoch = start_epoch - 1  # do not move
+    scaler = torch.cuda.amp.GradScaler(enabled=amp)
+    stopper, stop = EarlyStopping(patience=opt.patience), False
+    compute_loss = ComputeLoss(model, overlap=overlap)  # init loss class
+    # callbacks.run('on_train_start')
+    LOGGER.info(
+        f"Image sizes {imgsz} train, {imgsz} val\n"
+        f"Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n"
+        f"Logging results to {colorstr('bold', save_dir)}\n"
+        f"Starting training for {epochs} epochs..."
+    )
+    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
+        # callbacks.run('on_train_epoch_start')
+        model.train()
+
+        # Update image weights (optional, single-GPU only)
+        if opt.image_weights:
+            cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc  # class weights
+            iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw)  # image weights
+            dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n)  # rand weighted idx
+
+        # Update mosaic border (optional)
+        # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
+        # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders
+
+        mloss = torch.zeros(4, device=device)  # mean losses
+        if RANK != -1:
+            train_loader.sampler.set_epoch(epoch)
+        pbar = enumerate(train_loader)
+        LOGGER.info(
+            ("\n" + "%11s" * 8)
+            % ("Epoch", "GPU_mem", "box_loss", "seg_loss", "obj_loss", "cls_loss", "Instances", "Size")
+        )
+        if RANK in {-1, 0}:
+            pbar = tqdm(pbar, total=nb, bar_format=TQDM_BAR_FORMAT)  # progress bar
+        optimizer.zero_grad()
+        for i, (imgs, targets, paths, _, masks) in pbar:  # batch ------------------------------------------------------
+            # callbacks.run('on_train_batch_start')
+            ni = i + nb * epoch  # number integrated batches (since train start)
+            imgs = imgs.to(device, non_blocking=True).float() / 255  # uint8 to float32, 0-255 to 0.0-1.0
+
+            # Warmup
+            if ni <= nw:
+                xi = [0, nw]  # x interp
+                # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
+                accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
+                for j, x in enumerate(optimizer.param_groups):
+                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
+                    x["lr"] = np.interp(ni, xi, [hyp["warmup_bias_lr"] if j == 0 else 0.0, x["initial_lr"] * lf(epoch)])
+                    if "momentum" in x:
+                        x["momentum"] = np.interp(ni, xi, [hyp["warmup_momentum"], hyp["momentum"]])
+
+            # Multi-scale
+            if opt.multi_scale:
+                sz = random.randrange(int(imgsz * 0.5), int(imgsz * 1.5) + gs) // gs * gs  # size
+                sf = sz / max(imgs.shape[2:])  # scale factor
+                if sf != 1:
+                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to gs-multiple)
+                    imgs = nn.functional.interpolate(imgs, size=ns, mode="bilinear", align_corners=False)
+
+            # Forward
+            with torch.cuda.amp.autocast(amp):
+                pred = model(imgs)  # forward
+                loss, loss_items = compute_loss(pred, targets.to(device), masks=masks.to(device).float())
+                if RANK != -1:
+                    loss *= WORLD_SIZE  # gradient averaged between devices in DDP mode
+                if opt.quad:
+                    loss *= 4.0
+
+            # Backward
+            scaler.scale(loss).backward()
+
+            # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html
+            if ni - last_opt_step >= accumulate:
+                scaler.unscale_(optimizer)  # unscale gradients
+                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0)  # clip gradients
+                scaler.step(optimizer)  # optimizer.step
+                scaler.update()
+                optimizer.zero_grad()
+                if ema:
+                    ema.update(model)
+                last_opt_step = ni
+
+            # Log
+            if RANK in {-1, 0}:
+                mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
+                mem = f"{torch.cuda.memory_reserved() / 1e9 if torch.cuda.is_available() else 0:.3g}G"  # (GB)
+                pbar.set_description(
+                    ("%11s" * 2 + "%11.4g" * 6)
+                    % (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0], imgs.shape[-1])
+                )
+                # callbacks.run('on_train_batch_end', model, ni, imgs, targets, paths)
+                # if callbacks.stop_training:
+                #    return
+
+                # Mosaic plots
+                if plots:
+                    if ni < 3:
+                        plot_images_and_masks(imgs, targets, masks, paths, save_dir / f"train_batch{ni}.jpg")
+                    if ni == 10:
+                        files = sorted(save_dir.glob("train*.jpg"))
+                        logger.log_images(files, "Mosaics", epoch)
+            # end batch ------------------------------------------------------------------------------------------------
+
+        # Scheduler
+        lr = [x["lr"] for x in optimizer.param_groups]  # for loggers
+        scheduler.step()
+
+        if RANK in {-1, 0}:
+            # mAP
+            # callbacks.run('on_train_epoch_end', epoch=epoch)
+            ema.update_attr(model, include=["yaml", "nc", "hyp", "names", "stride", "class_weights"])
+            final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
+            if not noval or final_epoch:  # Calculate mAP
+                results, maps, _ = validate.run(
+                    data_dict,
+                    batch_size=batch_size // WORLD_SIZE * 2,
+                    imgsz=imgsz,
+                    half=amp,
+                    model=ema.ema,
+                    single_cls=single_cls,
+                    dataloader=val_loader,
+                    save_dir=save_dir,
+                    plots=False,
+                    callbacks=callbacks,
+                    compute_loss=compute_loss,
+                    mask_downsample_ratio=mask_ratio,
+                    overlap=overlap,
+                )
+
+            # Update best mAP
+            fi = fitness(np.array(results).reshape(1, -1))  # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
+            stop = stopper(epoch=epoch, fitness=fi)  # early stop check
+            if fi > best_fitness:
+                best_fitness = fi
+            log_vals = list(mloss) + list(results) + lr
+            # callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi)
+            # Log val metrics and media
+            metrics_dict = dict(zip(KEYS, log_vals))
+            logger.log_metrics(metrics_dict, epoch)
+
+            # Save model
+            if (not nosave) or (final_epoch and not evolve):  # if save
+                ckpt = {
+                    "epoch": epoch,
+                    "best_fitness": best_fitness,
+                    "model": deepcopy(de_parallel(model)).half(),
+                    "ema": deepcopy(ema.ema).half(),
+                    "updates": ema.updates,
+                    "optimizer": optimizer.state_dict(),
+                    "opt": vars(opt),
+                    "git": GIT_INFO,  # {remote, branch, commit} if a git repo
+                    "date": datetime.now().isoformat(),
+                }
+
+                # Save last, best and delete
+                torch.save(ckpt, last)
+                if best_fitness == fi:
+                    torch.save(ckpt, best)
+                if opt.save_period > 0 and epoch % opt.save_period == 0:
+                    torch.save(ckpt, w / f"epoch{epoch}.pt")
+                    logger.log_model(w / f"epoch{epoch}.pt")
+                del ckpt
+                # callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi)
+
+        # EarlyStopping
+        if RANK != -1:  # if DDP training
+            broadcast_list = [stop if RANK == 0 else None]
+            dist.broadcast_object_list(broadcast_list, 0)  # broadcast 'stop' to all ranks
+            if RANK != 0:
+                stop = broadcast_list[0]
+        if stop:
+            break  # must break all DDP ranks
+
+        # end epoch ----------------------------------------------------------------------------------------------------
+    # end training -----------------------------------------------------------------------------------------------------
+    if RANK in {-1, 0}:
+        LOGGER.info(f"\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.")
+        for f in last, best:
+            if f.exists():
+                strip_optimizer(f)  # strip optimizers
+                if f is best:
+                    LOGGER.info(f"\nValidating {f}...")
+                    results, _, _ = validate.run(
+                        data_dict,
+                        batch_size=batch_size // WORLD_SIZE * 2,
+                        imgsz=imgsz,
+                        model=attempt_load(f, device).half(),
+                        iou_thres=0.65 if is_coco else 0.60,  # best pycocotools at iou 0.65
+                        single_cls=single_cls,
+                        dataloader=val_loader,
+                        save_dir=save_dir,
+                        save_json=is_coco,
+                        verbose=True,
+                        plots=plots,
+                        callbacks=callbacks,
+                        compute_loss=compute_loss,
+                        mask_downsample_ratio=mask_ratio,
+                        overlap=overlap,
+                    )  # val best model with plots
+                    if is_coco:
+                        # callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)
+                        metrics_dict = dict(zip(KEYS, list(mloss) + list(results) + lr))
+                        logger.log_metrics(metrics_dict, epoch)
+
+        # callbacks.run('on_train_end', last, best, epoch, results)
+        # on train end callback using genericLogger
+        logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs)
+        if not opt.evolve:
+            logger.log_model(best, epoch)
+        if plots:
+            plot_results_with_masks(file=save_dir / "results.csv")  # save results.png
+            files = ["results.png", "confusion_matrix.png", *(f"{x}_curve.png" for x in ("F1", "PR", "P", "R"))]
+            files = [(save_dir / f) for f in files if (save_dir / f).exists()]  # filter
+            LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
+            logger.log_images(files, "Results", epoch + 1)
+            logger.log_images(sorted(save_dir.glob("val*.jpg")), "Validation", epoch + 1)
+    torch.cuda.empty_cache()
+    return results
+
+
+def parse_opt(known=False):
+    """
+    Parses command line arguments for training configurations, returning parsed arguments.
+
+    Supports both known and unknown args.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--weights", type=str, default=ROOT / "yolov5s-seg.pt", help="initial weights path")
+    parser.add_argument("--cfg", type=str, default="", help="model.yaml path")
+    parser.add_argument("--data", type=str, default=ROOT / "data/coco128-seg.yaml", help="dataset.yaml path")
+    parser.add_argument("--hyp", type=str, default=ROOT / "data/hyps/hyp.scratch-low.yaml", help="hyperparameters path")
+    parser.add_argument("--epochs", type=int, default=100, help="total training epochs")
+    parser.add_argument("--batch-size", type=int, default=16, help="total batch size for all GPUs, -1 for autobatch")
+    parser.add_argument("--imgsz", "--img", "--img-size", type=int, default=640, help="train, val image size (pixels)")
+    parser.add_argument("--rect", action="store_true", help="rectangular training")
+    parser.add_argument("--resume", nargs="?", const=True, default=False, help="resume most recent training")
+    parser.add_argument("--nosave", action="store_true", help="only save final checkpoint")
+    parser.add_argument("--noval", action="store_true", help="only validate final epoch")
+    parser.add_argument("--noautoanchor", action="store_true", help="disable AutoAnchor")
+    parser.add_argument("--noplots", action="store_true", help="save no plot files")
+    parser.add_argument("--evolve", type=int, nargs="?", const=300, help="evolve hyperparameters for x generations")
+    parser.add_argument("--bucket", type=str, default="", help="gsutil bucket")
+    parser.add_argument("--cache", type=str, nargs="?", const="ram", help="image --cache ram/disk")
+    parser.add_argument("--image-weights", action="store_true", help="use weighted image selection for training")
+    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
+    parser.add_argument("--multi-scale", action="store_true", help="vary img-size +/- 50%%")
+    parser.add_argument("--single-cls", action="store_true", help="train multi-class data as single-class")
+    parser.add_argument("--optimizer", type=str, choices=["SGD", "Adam", "AdamW"], default="SGD", help="optimizer")
+    parser.add_argument("--sync-bn", action="store_true", help="use SyncBatchNorm, only available in DDP mode")
+    parser.add_argument("--workers", type=int, default=8, help="max dataloader workers (per RANK in DDP mode)")
+    parser.add_argument("--project", default=ROOT / "runs/train-seg", help="save to project/name")
+    parser.add_argument("--name", default="exp", help="save to project/name")
+    parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
+    parser.add_argument("--quad", action="store_true", help="quad dataloader")
+    parser.add_argument("--cos-lr", action="store_true", help="cosine LR scheduler")
+    parser.add_argument("--label-smoothing", type=float, default=0.0, help="Label smoothing epsilon")
+    parser.add_argument("--patience", type=int, default=100, help="EarlyStopping patience (epochs without improvement)")
+    parser.add_argument("--freeze", nargs="+", type=int, default=[0], help="Freeze layers: backbone=10, first3=0 1 2")
+    parser.add_argument("--save-period", type=int, default=-1, help="Save checkpoint every x epochs (disabled if < 1)")
+    parser.add_argument("--seed", type=int, default=0, help="Global training seed")
+    parser.add_argument("--local_rank", type=int, default=-1, help="Automatic DDP Multi-GPU argument, do not modify")
+
+    # Instance Segmentation Args
+    parser.add_argument("--mask-ratio", type=int, default=4, help="Downsample the truth masks to saving memory")
+    parser.add_argument("--no-overlap", action="store_true", help="Overlap masks train faster at slightly less mAP")
+
+    return parser.parse_known_args()[0] if known else parser.parse_args()
+
+
+def main(opt, callbacks=Callbacks()):
+    """Initializes training or evolution of YOLOv5 models based on provided configuration and options."""
+    if RANK in {-1, 0}:
+        print_args(vars(opt))
+        check_git_status()
+        check_requirements(ROOT / "requirements.txt")
+
+    # Resume
+    if opt.resume and not opt.evolve:  # resume from specified or most recent last.pt
+        last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run())
+        opt_yaml = last.parent.parent / "opt.yaml"  # train options yaml
+        opt_data = opt.data  # original dataset
+        if opt_yaml.is_file():
+            with open(opt_yaml, errors="ignore") as f:
+                d = yaml.safe_load(f)
+        else:
+            d = torch.load(last, map_location="cpu")["opt"]
+        opt = argparse.Namespace(**d)  # replace
+        opt.cfg, opt.weights, opt.resume = "", str(last), True  # reinstate
+        if is_url(opt_data):
+            opt.data = check_file(opt_data)  # avoid HUB resume auth timeout
+    else:
+        opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = (
+            check_file(opt.data),
+            check_yaml(opt.cfg),
+            check_yaml(opt.hyp),
+            str(opt.weights),
+            str(opt.project),
+        )  # checks
+        assert len(opt.cfg) or len(opt.weights), "either --cfg or --weights must be specified"
+        if opt.evolve:
+            if opt.project == str(ROOT / "runs/train-seg"):  # if default project name, rename to runs/evolve-seg
+                opt.project = str(ROOT / "runs/evolve-seg")
+            opt.exist_ok, opt.resume = opt.resume, False  # pass resume to exist_ok and disable resume
+        if opt.name == "cfg":
+            opt.name = Path(opt.cfg).stem  # use model.yaml as name
+        opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))
+
+    # DDP mode
+    device = select_device(opt.device, batch_size=opt.batch_size)
+    if LOCAL_RANK != -1:
+        msg = "is not compatible with YOLOv5 Multi-GPU DDP training"
+        assert not opt.image_weights, f"--image-weights {msg}"
+        assert not opt.evolve, f"--evolve {msg}"
+        assert opt.batch_size != -1, f"AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size"
+        assert opt.batch_size % WORLD_SIZE == 0, f"--batch-size {opt.batch_size} must be multiple of WORLD_SIZE"
+        assert torch.cuda.device_count() > LOCAL_RANK, "insufficient CUDA devices for DDP command"
+        torch.cuda.set_device(LOCAL_RANK)
+        device = torch.device("cuda", LOCAL_RANK)
+        dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo")
+
+    # Train
+    if not opt.evolve:
+        train(opt.hyp, opt, device, callbacks)
+
+    # Evolve hyperparameters (optional)
+    else:
+        # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
+        meta = {
+            "lr0": (1, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)
+            "lrf": (1, 0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
+            "momentum": (0.3, 0.6, 0.98),  # SGD momentum/Adam beta1
+            "weight_decay": (1, 0.0, 0.001),  # optimizer weight decay
+            "warmup_epochs": (1, 0.0, 5.0),  # warmup epochs (fractions ok)
+            "warmup_momentum": (1, 0.0, 0.95),  # warmup initial momentum
+            "warmup_bias_lr": (1, 0.0, 0.2),  # warmup initial bias lr
+            "box": (1, 0.02, 0.2),  # box loss gain
+            "cls": (1, 0.2, 4.0),  # cls loss gain
+            "cls_pw": (1, 0.5, 2.0),  # cls BCELoss positive_weight
+            "obj": (1, 0.2, 4.0),  # obj loss gain (scale with pixels)
+            "obj_pw": (1, 0.5, 2.0),  # obj BCELoss positive_weight
+            "iou_t": (0, 0.1, 0.7),  # IoU training threshold
+            "anchor_t": (1, 2.0, 8.0),  # anchor-multiple threshold
+            "anchors": (2, 2.0, 10.0),  # anchors per output grid (0 to ignore)
+            "fl_gamma": (0, 0.0, 2.0),  # focal loss gamma (efficientDet default gamma=1.5)
+            "hsv_h": (1, 0.0, 0.1),  # image HSV-Hue augmentation (fraction)
+            "hsv_s": (1, 0.0, 0.9),  # image HSV-Saturation augmentation (fraction)
+            "hsv_v": (1, 0.0, 0.9),  # image HSV-Value augmentation (fraction)
+            "degrees": (1, 0.0, 45.0),  # image rotation (+/- deg)
+            "translate": (1, 0.0, 0.9),  # image translation (+/- fraction)
+            "scale": (1, 0.0, 0.9),  # image scale (+/- gain)
+            "shear": (1, 0.0, 10.0),  # image shear (+/- deg)
+            "perspective": (0, 0.0, 0.001),  # image perspective (+/- fraction), range 0-0.001
+            "flipud": (1, 0.0, 1.0),  # image flip up-down (probability)
+            "fliplr": (0, 0.0, 1.0),  # image flip left-right (probability)
+            "mosaic": (1, 0.0, 1.0),  # image mixup (probability)
+            "mixup": (1, 0.0, 1.0),  # image mixup (probability)
+            "copy_paste": (1, 0.0, 1.0),
+        }  # segment copy-paste (probability)
+
+        with open(opt.hyp, errors="ignore") as f:
+            hyp = yaml.safe_load(f)  # load hyps dict
+            if "anchors" not in hyp:  # anchors commented in hyp.yaml
+                hyp["anchors"] = 3
+        if opt.noautoanchor:
+            del hyp["anchors"], meta["anchors"]
+        opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir)  # only val/save final epoch
+        # ei = [isinstance(x, (int, float)) for x in hyp.values()]  # evolvable indices
+        evolve_yaml, evolve_csv = save_dir / "hyp_evolve.yaml", save_dir / "evolve.csv"
+        if opt.bucket:
+            # download evolve.csv if exists
+            subprocess.run(
+                [
+                    "gsutil",
+                    "cp",
+                    f"gs://{opt.bucket}/evolve.csv",
+                    str(evolve_csv),
+                ]
+            )
+
+        for _ in range(opt.evolve):  # generations to evolve
+            if evolve_csv.exists():  # if evolve.csv exists: select best hyps and mutate
+                # Select parent(s)
+                parent = "single"  # parent selection method: 'single' or 'weighted'
+                x = np.loadtxt(evolve_csv, ndmin=2, delimiter=",", skiprows=1)
+                n = min(5, len(x))  # number of previous results to consider
+                x = x[np.argsort(-fitness(x))][:n]  # top n mutations
+                w = fitness(x) - fitness(x).min() + 1e-6  # weights (sum > 0)
+                if parent == "single" or len(x) == 1:
+                    # x = x[random.randint(0, n - 1)]  # random selection
+                    x = x[random.choices(range(n), weights=w)[0]]  # weighted selection
+                elif parent == "weighted":
+                    x = (x * w.reshape(n, 1)).sum(0) / w.sum()  # weighted combination
+
+                # Mutate
+                mp, s = 0.8, 0.2  # mutation probability, sigma
+                npr = np.random
+                npr.seed(int(time.time()))
+                g = np.array([meta[k][0] for k in hyp.keys()])  # gains 0-1
+                ng = len(meta)
+                v = np.ones(ng)
+                while all(v == 1):  # mutate until a change occurs (prevent duplicates)
+                    v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
+                for i, k in enumerate(hyp.keys()):  # plt.hist(v.ravel(), 300)
+                    hyp[k] = float(x[i + 12] * v[i])  # mutate
+
+            # Constrain to limits
+            for k, v in meta.items():
+                hyp[k] = max(hyp[k], v[1])  # lower limit
+                hyp[k] = min(hyp[k], v[2])  # upper limit
+                hyp[k] = round(hyp[k], 5)  # significant digits
+
+            # Train mutation
+            results = train(hyp.copy(), opt, device, callbacks)
+            callbacks = Callbacks()
+            # Write mutation results
+            print_mutation(KEYS[4:16], results, hyp.copy(), save_dir, opt.bucket)
+
+        # Plot results
+        plot_evolve(evolve_csv)
+        LOGGER.info(
+            f"Hyperparameter evolution finished {opt.evolve} generations\n"
+            f"Results saved to {colorstr('bold', save_dir)}\n"
+            f"Usage example: $ python train.py --hyp {evolve_yaml}"
+        )
+
+
+def run(**kwargs):
+    """
+    Executes YOLOv5 training with given parameters, altering options programmatically; returns updated options.
+
+    Example: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')
+    """
+    opt = parse_opt(True)
+    for k, v in kwargs.items():
+        setattr(opt, k, v)
+    main(opt)
+    return opt
+
+
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)
--- a/yolov5/segment/tutorial.ipynb
+++ b/yolov5/segment/tutorial.ipynb
@@ -0,0 +1,602 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "t6MPjfT5NrKQ"
+   },
+   "source": [
+    "<div align=\"center\">\n",
+    "\n",
+    "  <a href=\"https://ultralytics.com/yolov5\" target=\"_blank\">\n",
+    "    <img width=\"1024\", src=\"https://raw.githubusercontent.com/ultralytics/assets/main/yolov5/v70/splash.png\"></a>\n",
+    "\n",
+    "\n",
+    "<br>\n",
+    "  <a href=\"https://bit.ly/yolov5-paperspace-notebook\"><img src=\"https://assets.paperspace.io/img/gradient-badge.svg\" alt=\"Run on Gradient\"></a>\n",
+    "  <a href=\"https://colab.research.google.com/github/ultralytics/yolov5/blob/master/segment/tutorial.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n",
+    "  <a href=\"https://www.kaggle.com/models/ultralytics/yolov5\"><img src=\"https://kaggle.com/static/images/open-in-kaggle.svg\" alt=\"Open In Kaggle\"></a>\n",
+    "<br>\n",
+    "\n",
+    "This <a href=\"https://github.com/ultralytics/yolov5\">YOLOv5</a> 🚀 notebook by <a href=\"https://ultralytics.com\">Ultralytics</a> presents simple train, validate and predict examples to help start your AI adventure.<br>See <a href=\"https://github.com/ultralytics/yolov5/issues/new/choose\">GitHub</a> for community support or <a href=\"https://ultralytics.com/contact\">contact us</a> for professional support.\n",
+    "\n",
+    "</div>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "7mGmQbAO5pQb"
+   },
+   "source": [
+    "# Setup\n",
+    "\n",
+    "Clone GitHub [repository](https://github.com/ultralytics/yolov5), install [dependencies](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) and check PyTorch and GPU."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "wbvMlHd_QwMG",
+    "outputId": "171b23f0-71b9-4cbf-b666-6fa2ecef70c8"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "YOLOv5 🚀 v7.0-2-gc9d47ae Python-3.7.15 torch-1.12.1+cu113 CUDA:0 (Tesla T4, 15110MiB)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Setup complete ✅ (2 CPUs, 12.7 GB RAM, 22.6/78.2 GB disk)\n"
+     ]
+    }
+   ],
+   "source": [
+    "!git clone https://github.com/ultralytics/yolov5  # clone\n",
+    "%cd yolov5\n",
+    "%pip install -qr requirements.txt comet_ml  # install\n",
+    "\n",
+    "import torch\n",
+    "\n",
+    "import utils\n",
+    "\n",
+    "display = utils.notebook_init()  # checks"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "4JnkELT0cIJg"
+   },
+   "source": [
+    "# 1. Predict\n",
+    "\n",
+    "`segment/predict.py` runs YOLOv5 instance segmentation inference on a variety of sources, downloading models automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases), and saving results to `runs/predict`. Example inference sources are:\n",
+    "\n",
+    "```shell\n",
+    "python segment/predict.py --source 0  # webcam\n",
+    "                             img.jpg  # image \n",
+    "                             vid.mp4  # video\n",
+    "                             screen  # screenshot\n",
+    "                             path/  # directory\n",
+    "                             'path/*.jpg'  # glob\n",
+    "                             'https://youtu.be/LNwODJXcvt4'  # YouTube\n",
+    "                             'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "zR9ZbuQCH7FX",
+    "outputId": "3f67f1c7-f15e-4fa5-d251-967c3b77eaad"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1msegment/predict: \u001b[0mweights=['yolov5s-seg.pt'], source=data/images, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/predict-seg, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1, retina_masks=False\n",
+      "YOLOv5 🚀 v7.0-2-gc9d47ae Python-3.7.15 torch-1.12.1+cu113 CUDA:0 (Tesla T4, 15110MiB)\n",
+      "\n",
+      "Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s-seg.pt to yolov5s-seg.pt...\n",
+      "100% 14.9M/14.9M [00:01<00:00, 12.0MB/s]\n",
+      "\n",
+      "Fusing layers... \n",
+      "YOLOv5s-seg summary: 224 layers, 7611485 parameters, 0 gradients, 26.4 GFLOPs\n",
+      "image 1/2 /content/yolov5/data/images/bus.jpg: 640x480 4 persons, 1 bus, 18.2ms\n",
+      "image 2/2 /content/yolov5/data/images/zidane.jpg: 384x640 2 persons, 1 tie, 13.4ms\n",
+      "Speed: 0.5ms pre-process, 15.8ms inference, 18.5ms NMS per image at shape (1, 3, 640, 640)\n",
+      "Results saved to \u001b[1mruns/predict-seg/exp\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "!python segment/predict.py --weights yolov5s-seg.pt --img 640 --conf 0.25 --source data/images\n",
+    "# display.Image(filename='runs/predict-seg/exp/zidane.jpg', width=600)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "hkAzDWJ7cWTr"
+   },
+   "source": [
+    "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\n",
+    "<img align=\"left\" src=\"https://user-images.githubusercontent.com/26833433/199030123-08c72f8d-6871-4116-8ed3-c373642cf28e.jpg\" width=\"600\">"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "0eq1SMWl6Sfn"
+   },
+   "source": [
+    "# 2. Validate\n",
+    "Validate a model's accuracy on the [COCO](https://cocodataset.org/#home) dataset's `val` or `test` splits. Models are downloaded automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases). To show results by class use the `--verbose` flag."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "WQPtK1QYVaD_",
+    "outputId": "9d751d8c-bee8-4339-cf30-9854ca530449"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/coco2017labels-segments.zip  ...\n",
+      "Downloading http://images.cocodataset.org/zips/val2017.zip ...\n",
+      "######################################################################## 100.0%\n",
+      "######################################################################## 100.0%\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Download COCO val\n",
+    "!bash data/scripts/get_coco.sh --val --segments  # download (780M - 5000 images)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "X58w8JLpMnjH",
+    "outputId": "a140d67a-02da-479e-9ddb-7d54bf9e407a"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1msegment/val: \u001b[0mdata=/content/yolov5/data/coco.yaml, weights=['yolov5s-seg.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.6, max_det=300, task=val, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=False, project=runs/val-seg, name=exp, exist_ok=False, half=True, dnn=False\n",
+      "YOLOv5 🚀 v7.0-2-gc9d47ae Python-3.7.15 torch-1.12.1+cu113 CUDA:0 (Tesla T4, 15110MiB)\n",
+      "\n",
+      "Fusing layers... \n",
+      "YOLOv5s-seg summary: 224 layers, 7611485 parameters, 0 gradients, 26.4 GFLOPs\n",
+      "\u001b[34m\u001b[1mval: \u001b[0mScanning /content/datasets/coco/val2017... 4952 images, 48 backgrounds, 0 corrupt: 100% 5000/5000 [00:03<00:00, 1361.31it/s]\n",
+      "\u001b[34m\u001b[1mval: \u001b[0mNew cache created: /content/datasets/coco/val2017.cache\n",
+      "                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100% 157/157 [01:54<00:00,  1.37it/s]\n",
+      "                   all       5000      36335      0.673      0.517      0.566      0.373      0.672       0.49      0.532      0.319\n",
+      "Speed: 0.6ms pre-process, 4.4ms inference, 2.9ms NMS per image at shape (32, 3, 640, 640)\n",
+      "Results saved to \u001b[1mruns/val-seg/exp\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Validate YOLOv5s-seg on COCO val\n",
+    "!python segment/val.py --weights yolov5s-seg.pt --data coco.yaml --img 640 --half"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ZY2VXXXu74w5"
+   },
+   "source": [
+    "# 3. Train\n",
+    "\n",
+    "<p align=\"\"><a href=\"https://roboflow.com/?ref=ultralytics\"><img width=\"1000\" src=\"https://github.com/ultralytics/assets/raw/main/im/integrations-loop.png\"/></a></p>\n",
+    "Close the active learning loop by sampling images from your inference conditions with the `roboflow` pip package\n",
+    "<br><br>\n",
+    "\n",
+    "Train a YOLOv5s-seg model on the [COCO128](https://www.kaggle.com/datasets/ultralytics/coco128) dataset with `--data coco128-seg.yaml`, starting from pretrained `--weights yolov5s-seg.pt`, or from randomly initialized `--weights '' --cfg yolov5s-seg.yaml`.\n",
+    "\n",
+    "- **Pretrained [Models](https://github.com/ultralytics/yolov5/tree/master/models)** are downloaded\n",
+    "automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases)\n",
+    "- **[Datasets](https://github.com/ultralytics/yolov5/tree/master/data)** available for autodownload include: [COCO](https://github.com/ultralytics/yolov5/blob/master/data/coco.yaml), [COCO128](https://github.com/ultralytics/yolov5/blob/master/data/coco128.yaml), [VOC](https://github.com/ultralytics/yolov5/blob/master/data/VOC.yaml), [Argoverse](https://github.com/ultralytics/yolov5/blob/master/data/Argoverse.yaml), [VisDrone](https://github.com/ultralytics/yolov5/blob/master/data/VisDrone.yaml), [GlobalWheat](https://github.com/ultralytics/yolov5/blob/master/data/GlobalWheat2020.yaml), [xView](https://github.com/ultralytics/yolov5/blob/master/data/xView.yaml), [Objects365](https://github.com/ultralytics/yolov5/blob/master/data/Objects365.yaml), [SKU-110K](https://github.com/ultralytics/yolov5/blob/master/data/SKU-110K.yaml).\n",
+    "- **Training Results** are saved to `runs/train-seg/` with incrementing run directories, i.e. `runs/train-seg/exp2`, `runs/train-seg/exp3` etc.\n",
+    "<br><br>\n",
+    "\n",
+    "A **Mosaic Dataloader** is used for training which combines 4 images into 1 mosaic.\n",
+    "\n",
+    "## Train on Custom Data with Roboflow 🌟 NEW\n",
+    "\n",
+    "[Roboflow](https://roboflow.com/?ref=ultralytics) enables you to easily **organize, label, and prepare** a high quality dataset with your own custom data. Roboflow also makes it easy to establish an active learning pipeline, collaborate with your team on dataset improvement, and integrate directly into your model building workflow with the `roboflow` pip package.\n",
+    "\n",
+    "- Custom Training Example: [https://blog.roboflow.com/train-yolov5-instance-segmentation-custom-dataset/](https://blog.roboflow.com/train-yolov5-instance-segmentation-custom-dataset/?ref=ultralytics)\n",
+    "- Custom Training Notebook: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1JTz7kpmHsg-5qwVz2d2IH3AaenI1tv0N?usp=sharing)\n",
+    "<br>\n",
+    "\n",
+    "<p align=\"\"><a href=\"https://roboflow.com/?ref=ultralytics\"><img width=\"480\" src=\"https://robflow-public-assets.s3.amazonaws.com/how-to-train-yolov5-segmentation-annotation.gif\"/></a></p>Label images lightning fast (including with model-assisted labeling)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "i3oKtE4g-aNn"
+   },
+   "outputs": [],
+   "source": [
+    "# @title Select YOLOv5 🚀 logger {run: 'auto'}\n",
+    "logger = \"Comet\"  # @param ['Comet', 'ClearML', 'TensorBoard']\n",
+    "\n",
+    "if logger == \"Comet\":\n",
+    "    %pip install -q comet_ml\n",
+    "    import comet_ml\n",
+    "\n",
+    "    comet_ml.init()\n",
+    "elif logger == \"ClearML\":\n",
+    "    %pip install -q clearml\n",
+    "    import clearml\n",
+    "\n",
+    "    clearml.browser_login()\n",
+    "elif logger == \"TensorBoard\":\n",
+    "    %load_ext tensorboard\n",
+    "    %tensorboard --logdir runs/train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "1NcFxRcFdJ_O",
+    "outputId": "3a3e0cf7-e79c-47a5-c8e7-2d26eeeab988"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1msegment/train: \u001b[0mweights=yolov5s-seg.pt, cfg=, data=coco128-seg.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=3, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=ram, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs/train-seg, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, mask_ratio=4, no_overlap=False\n",
+      "\u001b[34m\u001b[1mgithub: \u001b[0mup to date with https://github.com/ultralytics/yolov5 ✅\n",
+      "YOLOv5 🚀 v7.0-2-gc9d47ae Python-3.7.15 torch-1.12.1+cu113 CUDA:0 (Tesla T4, 15110MiB)\n",
+      "\n",
+      "\u001b[34m\u001b[1mhyperparameters: \u001b[0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0, iou_t=0.2, anchor_t=4.0, fl_gamma=0.0, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.0, copy_paste=0.0\n",
+      "\u001b[34m\u001b[1mTensorBoard: \u001b[0mStart with 'tensorboard --logdir runs/train-seg', view at http://localhost:6006/\n",
+      "\n",
+      "Dataset not found ⚠️, missing paths ['/content/datasets/coco128-seg/images/train2017']\n",
+      "Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/coco128-seg.zip to coco128-seg.zip...\n",
+      "100% 6.79M/6.79M [00:01<00:00, 6.73MB/s]\n",
+      "Dataset download success ✅ (1.9s), saved to \u001b[1m/content/datasets\u001b[0m\n",
+      "\n",
+      "                 from  n    params  module                                  arguments                     \n",
+      "  0                -1  1      3520  models.common.Conv                      [3, 32, 6, 2, 2]              \n",
+      "  1                -1  1     18560  models.common.Conv                      [32, 64, 3, 2]                \n",
+      "  2                -1  1     18816  models.common.C3                        [64, 64, 1]                   \n",
+      "  3                -1  1     73984  models.common.Conv                      [64, 128, 3, 2]               \n",
+      "  4                -1  2    115712  models.common.C3                        [128, 128, 2]                 \n",
+      "  5                -1  1    295424  models.common.Conv                      [128, 256, 3, 2]              \n",
+      "  6                -1  3    625152  models.common.C3                        [256, 256, 3]                 \n",
+      "  7                -1  1   1180672  models.common.Conv                      [256, 512, 3, 2]              \n",
+      "  8                -1  1   1182720  models.common.C3                        [512, 512, 1]                 \n",
+      "  9                -1  1    656896  models.common.SPPF                      [512, 512, 5]                 \n",
+      " 10                -1  1    131584  models.common.Conv                      [512, 256, 1, 1]              \n",
+      " 11                -1  1         0  torch.nn.modules.upsampling.Upsample    [None, 2, 'nearest']          \n",
+      " 12           [-1, 6]  1         0  models.common.Concat                    [1]                           \n",
+      " 13                -1  1    361984  models.common.C3                        [512, 256, 1, False]          \n",
+      " 14                -1  1     33024  models.common.Conv                      [256, 128, 1, 1]              \n",
+      " 15                -1  1         0  torch.nn.modules.upsampling.Upsample    [None, 2, 'nearest']          \n",
+      " 16           [-1, 4]  1         0  models.common.Concat                    [1]                           \n",
+      " 17                -1  1     90880  models.common.C3                        [256, 128, 1, False]          \n",
+      " 18                -1  1    147712  models.common.Conv                      [128, 128, 3, 2]              \n",
+      " 19          [-1, 14]  1         0  models.common.Concat                    [1]                           \n",
+      " 20                -1  1    296448  models.common.C3                        [256, 256, 1, False]          \n",
+      " 21                -1  1    590336  models.common.Conv                      [256, 256, 3, 2]              \n",
+      " 22          [-1, 10]  1         0  models.common.Concat                    [1]                           \n",
+      " 23                -1  1   1182720  models.common.C3                        [512, 512, 1, False]          \n",
+      " 24      [17, 20, 23]  1    615133  models.yolo.Segment                     [80, [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]], 32, 128, [128, 256, 512]]\n",
+      "Model summary: 225 layers, 7621277 parameters, 7621277 gradients, 26.6 GFLOPs\n",
+      "\n",
+      "Transferred 367/367 items from yolov5s-seg.pt\n",
+      "\u001b[34m\u001b[1mAMP: \u001b[0mchecks passed ✅\n",
+      "\u001b[34m\u001b[1moptimizer:\u001b[0m SGD(lr=0.01) with parameter groups 60 weight(decay=0.0), 63 weight(decay=0.0005), 63 bias\n",
+      "\u001b[34m\u001b[1malbumentations: \u001b[0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))\n",
+      "\u001b[34m\u001b[1mtrain: \u001b[0mScanning /content/datasets/coco128-seg/labels/train2017... 126 images, 2 backgrounds, 0 corrupt: 100% 128/128 [00:00<00:00, 1389.59it/s]\n",
+      "\u001b[34m\u001b[1mtrain: \u001b[0mNew cache created: /content/datasets/coco128-seg/labels/train2017.cache\n",
+      "\u001b[34m\u001b[1mtrain: \u001b[0mCaching images (0.1GB ram): 100% 128/128 [00:00<00:00, 238.86it/s]\n",
+      "\u001b[34m\u001b[1mval: \u001b[0mScanning /content/datasets/coco128-seg/labels/train2017.cache... 126 images, 2 backgrounds, 0 corrupt: 100% 128/128 [00:00<?, ?it/s]\n",
+      "\u001b[34m\u001b[1mval: \u001b[0mCaching images (0.1GB ram): 100% 128/128 [00:01<00:00, 98.90it/s]\n",
+      "\n",
+      "\u001b[34m\u001b[1mAutoAnchor: \u001b[0m4.27 anchors/target, 0.994 Best Possible Recall (BPR). Current anchors are a good fit to dataset ✅\n",
+      "Plotting labels to runs/train-seg/exp/labels.jpg... \n",
+      "Image sizes 640 train, 640 val\n",
+      "Using 2 dataloader workers\n",
+      "Logging results to \u001b[1mruns/train-seg/exp\u001b[0m\n",
+      "Starting training for 3 epochs...\n",
+      "\n",
+      "      Epoch    GPU_mem   box_loss   seg_loss   obj_loss   cls_loss  Instances       Size\n",
+      "        0/2      4.92G     0.0417    0.04646    0.06066    0.02126        192        640: 100% 8/8 [00:08<00:00,  1.10s/it]\n",
+      "                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100% 4/4 [00:02<00:00,  1.81it/s]\n",
+      "                   all        128        929      0.737      0.649      0.715      0.492      0.719      0.617      0.658      0.408\n",
+      "\n",
+      "      Epoch    GPU_mem   box_loss   seg_loss   obj_loss   cls_loss  Instances       Size\n",
+      "        1/2      6.29G    0.04157    0.04503    0.05772    0.01777        208        640: 100% 8/8 [00:09<00:00,  1.21s/it]\n",
+      "                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100% 4/4 [00:02<00:00,  1.87it/s]\n",
+      "                   all        128        929      0.756      0.674      0.738      0.506      0.725       0.64       0.68      0.422\n",
+      "\n",
+      "      Epoch    GPU_mem   box_loss   seg_loss   obj_loss   cls_loss  Instances       Size\n",
+      "        2/2      6.29G     0.0425    0.04793    0.06784    0.01863        161        640: 100% 8/8 [00:03<00:00,  2.02it/s]\n",
+      "                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100% 4/4 [00:02<00:00,  1.88it/s]\n",
+      "                   all        128        929      0.736      0.694      0.747      0.522      0.769      0.622      0.683      0.427\n",
+      "\n",
+      "3 epochs completed in 0.009 hours.\n",
+      "Optimizer stripped from runs/train-seg/exp/weights/last.pt, 15.6MB\n",
+      "Optimizer stripped from runs/train-seg/exp/weights/best.pt, 15.6MB\n",
+      "\n",
+      "Validating runs/train-seg/exp/weights/best.pt...\n",
+      "Fusing layers... \n",
+      "Model summary: 165 layers, 7611485 parameters, 0 gradients, 26.4 GFLOPs\n",
+      "                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100% 4/4 [00:06<00:00,  1.59s/it]\n",
+      "                   all        128        929      0.738      0.694      0.746      0.522      0.759      0.625      0.682      0.426\n",
+      "                person        128        254      0.845      0.756      0.836       0.55      0.861      0.669      0.759      0.407\n",
+      "               bicycle        128          6      0.475      0.333      0.549      0.341      0.711      0.333      0.526      0.322\n",
+      "                   car        128         46      0.612      0.565      0.539      0.257      0.555      0.435      0.477      0.171\n",
+      "            motorcycle        128          5       0.73        0.8      0.752      0.571      0.747        0.8      0.752       0.42\n",
+      "              airplane        128          6          1      0.943      0.995      0.732       0.92      0.833      0.839      0.555\n",
+      "                   bus        128          7      0.677      0.714      0.722      0.653      0.711      0.714      0.722      0.593\n",
+      "                 train        128          3          1      0.951      0.995      0.551          1      0.884      0.995      0.781\n",
+      "                 truck        128         12      0.555      0.417      0.457      0.285      0.624      0.417      0.397      0.277\n",
+      "                  boat        128          6      0.624        0.5      0.584      0.186          1      0.326      0.412      0.133\n",
+      "         traffic light        128         14      0.513      0.302      0.411      0.247      0.435      0.214      0.376      0.251\n",
+      "             stop sign        128          2      0.824          1      0.995      0.796      0.906          1      0.995      0.747\n",
+      "                 bench        128          9       0.75      0.667      0.763      0.367      0.724      0.585      0.698      0.209\n",
+      "                  bird        128         16      0.961          1      0.995      0.686      0.918      0.938       0.91      0.525\n",
+      "                   cat        128          4      0.771      0.857      0.945      0.752       0.76        0.8      0.945      0.728\n",
+      "                   dog        128          9      0.987      0.778      0.963      0.681          1      0.705       0.89      0.574\n",
+      "                 horse        128          2      0.703          1      0.995      0.697      0.759          1      0.995      0.249\n",
+      "              elephant        128         17      0.916      0.882       0.93      0.691      0.811      0.765      0.829      0.537\n",
+      "                  bear        128          1      0.664          1      0.995      0.995      0.701          1      0.995      0.895\n",
+      "                 zebra        128          4      0.864          1      0.995      0.921      0.879          1      0.995      0.804\n",
+      "               giraffe        128          9      0.883      0.889       0.94      0.683      0.845      0.778       0.78      0.463\n",
+      "              backpack        128          6          1       0.59      0.701      0.372          1      0.474       0.52      0.252\n",
+      "              umbrella        128         18      0.654      0.839      0.887       0.52      0.517      0.556      0.427      0.229\n",
+      "               handbag        128         19       0.54      0.211      0.408      0.221      0.796      0.206      0.396      0.196\n",
+      "                   tie        128          7      0.864      0.857      0.857      0.577      0.925      0.857      0.857      0.534\n",
+      "              suitcase        128          4      0.716          1      0.945      0.647      0.767          1      0.945      0.634\n",
+      "               frisbee        128          5      0.708        0.8      0.761      0.643      0.737        0.8      0.761      0.501\n",
+      "                  skis        128          1      0.691          1      0.995      0.796      0.761          1      0.995      0.199\n",
+      "             snowboard        128          7      0.918      0.857      0.904      0.604       0.32      0.286      0.235      0.137\n",
+      "           sports ball        128          6      0.902      0.667      0.701      0.466      0.727        0.5      0.497      0.471\n",
+      "                  kite        128         10      0.586        0.4      0.511      0.231      0.663      0.394      0.417      0.139\n",
+      "          baseball bat        128          4      0.359        0.5      0.401      0.169      0.631        0.5      0.526      0.133\n",
+      "        baseball glove        128          7          1      0.519       0.58      0.327      0.687      0.286      0.455      0.328\n",
+      "            skateboard        128          5      0.729        0.8      0.862      0.631      0.599        0.6      0.604      0.379\n",
+      "         tennis racket        128          7       0.57      0.714      0.645      0.448      0.608      0.714      0.645      0.412\n",
+      "                bottle        128         18      0.469      0.393      0.537      0.357      0.661      0.389      0.543      0.349\n",
+      "            wine glass        128         16      0.677      0.938      0.866      0.441       0.53      0.625       0.67      0.334\n",
+      "                   cup        128         36      0.777      0.722      0.812      0.466      0.725      0.583      0.762      0.467\n",
+      "                  fork        128          6      0.948      0.333      0.425       0.27      0.527      0.167       0.18      0.102\n",
+      "                 knife        128         16      0.757      0.587      0.669      0.458       0.79        0.5      0.552       0.34\n",
+      "                 spoon        128         22       0.74      0.364      0.559      0.269      0.925      0.364      0.513      0.213\n",
+      "                  bowl        128         28      0.766      0.714      0.725      0.559      0.803      0.584      0.665      0.353\n",
+      "                banana        128          1      0.408          1      0.995      0.398      0.539          1      0.995      0.497\n",
+      "              sandwich        128          2          1          0      0.695      0.536          1          0      0.498      0.448\n",
+      "                orange        128          4      0.467          1      0.995      0.693      0.518          1      0.995      0.663\n",
+      "              broccoli        128         11      0.462      0.455      0.383      0.259      0.548      0.455      0.384      0.256\n",
+      "                carrot        128         24      0.631      0.875       0.77      0.533      0.757      0.909      0.853      0.499\n",
+      "               hot dog        128          2      0.555          1      0.995      0.995      0.578          1      0.995      0.796\n",
+      "                 pizza        128          5       0.89        0.8      0.962      0.796          1      0.778      0.962      0.766\n",
+      "                 donut        128         14      0.695          1      0.893      0.772      0.704          1      0.893      0.696\n",
+      "                  cake        128          4      0.826          1      0.995       0.92      0.862          1      0.995      0.846\n",
+      "                 chair        128         35       0.53      0.571      0.613      0.336       0.67        0.6      0.538      0.271\n",
+      "                 couch        128          6      0.972      0.667      0.833      0.627          1       0.62      0.696      0.394\n",
+      "          potted plant        128         14        0.7      0.857      0.883      0.552      0.836      0.857      0.883      0.473\n",
+      "                   bed        128          3      0.979      0.667       0.83      0.366          1          0       0.83      0.373\n",
+      "          dining table        128         13      0.775      0.308      0.505      0.364      0.644      0.231       0.25     0.0804\n",
+      "                toilet        128          2      0.836          1      0.995      0.846      0.887          1      0.995      0.797\n",
+      "                    tv        128          2        0.6          1      0.995      0.846      0.655          1      0.995      0.896\n",
+      "                laptop        128          3      0.822      0.333      0.445      0.307          1          0      0.392       0.12\n",
+      "                 mouse        128          2          1          0          0          0          1          0          0          0\n",
+      "                remote        128          8      0.745        0.5       0.62      0.459      0.821        0.5      0.624      0.449\n",
+      "            cell phone        128          8      0.686      0.375      0.502      0.272      0.488       0.25       0.28      0.132\n",
+      "             microwave        128          3      0.831          1      0.995      0.722      0.867          1      0.995      0.592\n",
+      "                  oven        128          5      0.439        0.4      0.435      0.294      0.823        0.6      0.645      0.418\n",
+      "                  sink        128          6      0.677        0.5      0.565      0.448      0.722        0.5       0.46      0.362\n",
+      "          refrigerator        128          5      0.533        0.8      0.783      0.524      0.558        0.8      0.783      0.527\n",
+      "                  book        128         29      0.732      0.379      0.423      0.196       0.69      0.207       0.38      0.131\n",
+      "                 clock        128          9      0.889      0.778      0.917      0.677      0.908      0.778      0.875      0.604\n",
+      "                  vase        128          2      0.375          1      0.995      0.995      0.455          1      0.995      0.796\n",
+      "              scissors        128          1          1          0     0.0166    0.00166          1          0          0          0\n",
+      "            teddy bear        128         21      0.813      0.829      0.841      0.457      0.826      0.678      0.786      0.422\n",
+      "            toothbrush        128          5      0.806          1      0.995      0.733      0.991          1      0.995      0.628\n",
+      "Results saved to \u001b[1mruns/train-seg/exp\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Train YOLOv5s on COCO128 for 3 epochs\n",
+    "!python segment/train.py --img 640 --batch 16 --epochs 3 --data coco128-seg.yaml --weights yolov5s-seg.pt --cache"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "15glLzbQx5u0"
+   },
+   "source": [
+    "# 4. Visualize"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "nWOsI5wJR1o3"
+   },
+   "source": [
+    "## Comet Logging and Visualization 🌟 NEW\n",
+    "\n",
+    "[Comet](https://www.comet.com/site/lp/yolov5-with-comet/?utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=yolov5_colab) is now fully integrated with YOLOv5. Track and visualize model metrics in real time, save your hyperparameters, datasets, and model checkpoints, and visualize your model predictions with [Comet Custom Panels](https://www.comet.com/docs/v2/guides/comet-dashboard/code-panels/about-panels/?utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=yolov5_colab)! Comet makes sure you never lose track of your work and makes it easy to share results and collaborate across teams of all sizes!\n",
+    "\n",
+    "Getting started is easy:\n",
+    "```shell\n",
+    "pip install comet_ml  # 1. install\n",
+    "export COMET_API_KEY=<Your API Key>  # 2. paste API key\n",
+    "python train.py --img 640 --epochs 3 --data coco128.yaml --weights yolov5s.pt  # 3. train\n",
+    "```\n",
+    "To learn more about all of the supported Comet features for this integration, check out the [Comet Tutorial](https://docs.ultralytics.com/yolov5/tutorials/comet_logging_integration). If you'd like to learn more about Comet, head over to our [documentation](https://www.comet.com/docs/v2/?utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=yolov5_colab). Get started by trying out the Comet Colab Notebook:\n",
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1RG0WOQyxlDlo5Km8GogJpIEJlg_5lyYO?usp=sharing)\n",
+    "\n",
+    "<a href=\"https://bit.ly/yolov5-readme-comet2\">\n",
+    "<img alt=\"Comet Dashboard\" src=\"https://user-images.githubusercontent.com/26833433/202851203-164e94e1-2238-46dd-91f8-de020e9d6b41.png\" width=\"1280\"/></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Lay2WsTjNJzP"
+   },
+   "source": [
+    "## ClearML Logging and Automation 🌟 NEW\n",
+    "\n",
+    "[ClearML](https://cutt.ly/yolov5-notebook-clearml) is completely integrated into YOLOv5 to track your experimentation, manage dataset versions and even remotely execute training runs. To enable ClearML (check cells above):\n",
+    "\n",
+    "- `pip install clearml`\n",
+    "- run `clearml-init` to connect to a ClearML server (**deploy your own [open-source server](https://github.com/allegroai/clearml-server)**, or use our [free hosted server](https://cutt.ly/yolov5-notebook-clearml))\n",
+    "\n",
+    "You'll get all the great expected features from an experiment manager: live updates, model upload, experiment comparison etc. but ClearML also tracks uncommitted changes and installed packages for example. Thanks to that ClearML Tasks (which is what we call experiments) are also reproducible on different machines! With only 1 extra line, we can schedule a YOLOv5 training task on a queue to be executed by any number of ClearML Agents (workers).\n",
+    "\n",
+    "You can use ClearML Data to version your dataset and then pass it to YOLOv5 simply using its unique ID. This will help you keep track of your data without adding extra hassle. Explore the [ClearML Tutorial](https://docs.ultralytics.com/yolov5/tutorials/clearml_logging_integration) for details!\n",
+    "\n",
+    "<a href=\"https://cutt.ly/yolov5-notebook-clearml\">\n",
+    "<img alt=\"ClearML Experiment Management UI\" src=\"https://github.com/thepycoder/clearml_screenshots/raw/main/scalars.jpg\" width=\"1280\"/></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "-WPvRbS5Swl6"
+   },
+   "source": [
+    "## Local Logging\n",
+    "\n",
+    "Training results are automatically logged with [Tensorboard](https://www.tensorflow.org/tensorboard) and [CSV](https://github.com/ultralytics/yolov5/pull/4148) loggers to `runs/train`, with a new experiment directory created for each new training as `runs/train/exp2`, `runs/train/exp3`, etc.\n",
+    "\n",
+    "This directory contains train and val statistics, mosaics, labels, predictions and augmentated mosaics, as well as metrics and charts including precision-recall (PR) curves and confusion matrices. \n",
+    "\n",
+    "<img alt=\"Local logging results\" src=\"https://user-images.githubusercontent.com/26833433/183222430-e1abd1b7-782c-4cde-b04d-ad52926bf818.jpg\" width=\"1280\"/>\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Zelyeqbyt3GD"
+   },
+   "source": [
+    "# Environments\n",
+    "\n",
+    "YOLOv5 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled):\n",
+    "\n",
+    "- **Notebooks** with free GPU: <a href=\"https://bit.ly/yolov5-paperspace-notebook\"><img src=\"https://assets.paperspace.io/img/gradient-badge.svg\" alt=\"Run on Gradient\"></a> <a href=\"https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a> <a href=\"https://www.kaggle.com/models/ultralytics/yolov5\"><img src=\"https://kaggle.com/static/images/open-in-kaggle.svg\" alt=\"Open In Kaggle\"></a>\n",
+    "- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/google_cloud_quickstart_tutorial/)\n",
+    "- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial/)\n",
+    "- **Docker Image**. See [Docker Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/docker_image_quickstart_tutorial/) <a href=\"https://hub.docker.com/r/ultralytics/yolov5\"><img src=\"https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker\" alt=\"Docker Pulls\"></a>\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "6Qu7Iesl0p54"
+   },
+   "source": [
+    "# Status\n",
+    "\n",
+    "![YOLOv5 CI](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml/badge.svg)\n",
+    "\n",
+    "If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 training ([train.py](https://github.com/ultralytics/yolov5/blob/master/train.py)), testing ([val.py](https://github.com/ultralytics/yolov5/blob/master/val.py)), inference ([detect.py](https://github.com/ultralytics/yolov5/blob/master/detect.py)) and export ([export.py](https://github.com/ultralytics/yolov5/blob/master/export.py)) on macOS, Windows, and Ubuntu every 24 hours and on every commit.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "IEijrePND_2I"
+   },
+   "source": [
+    "# Appendix\n",
+    "\n",
+    "Additional content below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "GMusP4OAxFu6"
+   },
+   "outputs": [],
+   "source": [
+    "# YOLOv5 PyTorch HUB Inference (DetectionModels only)\n",
+    "\n",
+    "model = torch.hub.load(\n",
+    "    \"ultralytics/yolov5\", \"yolov5s-seg\", force_reload=True, trust_repo=True\n",
+    ")  # or yolov5n - yolov5x6 or custom\n",
+    "im = \"https://ultralytics.com/images/zidane.jpg\"  # file, Path, PIL.Image, OpenCV, nparray, list\n",
+    "results = model(im)  # inference\n",
+    "results.print()  # or .show(), .save(), .crop(), .pandas(), etc."
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "name": "YOLOv5 Segmentation Tutorial",
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
--- a/yolov5/segment/val.py
+++ b/yolov5/segment/val.py
@@ -0,0 +1,522 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+"""
+Validate a trained YOLOv5 segment model on a segment dataset.
+
+Usage:
+    $ bash data/scripts/get_coco.sh --val --segments  # download COCO-segments val split (1G, 5000 images)
+    $ python segment/val.py --weights yolov5s-seg.pt --data coco.yaml --img 640  # validate COCO-segments
+
+Usage - formats:
+    $ python segment/val.py --weights yolov5s-seg.pt                 # PyTorch
+                                      yolov5s-seg.torchscript        # TorchScript
+                                      yolov5s-seg.onnx               # ONNX Runtime or OpenCV DNN with --dnn
+                                      yolov5s-seg_openvino_label     # OpenVINO
+                                      yolov5s-seg.engine             # TensorRT
+                                      yolov5s-seg.mlmodel            # CoreML (macOS-only)
+                                      yolov5s-seg_saved_model        # TensorFlow SavedModel
+                                      yolov5s-seg.pb                 # TensorFlow GraphDef
+                                      yolov5s-seg.tflite             # TensorFlow Lite
+                                      yolov5s-seg_edgetpu.tflite     # TensorFlow Edge TPU
+                                      yolov5s-seg_paddle_model       # PaddlePaddle
+"""
+
+import argparse
+import json
+import os
+import subprocess
+import sys
+from multiprocessing.pool import ThreadPool
+from pathlib import Path
+
+import numpy as np
+import torch
+from tqdm import tqdm
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[1]  # YOLOv5 root directory
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))  # add ROOT to PATH
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
+
+import torch.nn.functional as F
+
+from models.common import DetectMultiBackend
+from models.yolo import SegmentationModel
+from utils.callbacks import Callbacks
+from utils.general import (
+    LOGGER,
+    NUM_THREADS,
+    TQDM_BAR_FORMAT,
+    Profile,
+    check_dataset,
+    check_img_size,
+    check_requirements,
+    check_yaml,
+    coco80_to_coco91_class,
+    colorstr,
+    increment_path,
+    non_max_suppression,
+    print_args,
+    scale_boxes,
+    xywh2xyxy,
+    xyxy2xywh,
+)
+from utils.metrics import ConfusionMatrix, box_iou
+from utils.plots import output_to_target, plot_val_study
+from utils.segment.dataloaders import create_dataloader
+from utils.segment.general import mask_iou, process_mask, process_mask_native, scale_image
+from utils.segment.metrics import Metrics, ap_per_class_box_and_mask
+from utils.segment.plots import plot_images_and_masks
+from utils.torch_utils import de_parallel, select_device, smart_inference_mode
+
+
+def save_one_txt(predn, save_conf, shape, file):
+    """Saves detection results in txt format; includes class, xywh (normalized), optionally confidence if `save_conf` is
+    True.
+    """
+    gn = torch.tensor(shape)[[1, 0, 1, 0]]  # normalization gain whwh
+    for *xyxy, conf, cls in predn.tolist():
+        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
+        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
+        with open(file, "a") as f:
+            f.write(("%g " * len(line)).rstrip() % line + "\n")
+
+
+def save_one_json(predn, jdict, path, class_map, pred_masks):
+    """
+    Saves a JSON file with detection results including bounding boxes, category IDs, scores, and segmentation masks.
+
+    Example JSON result: {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}.
+    """
+    from pycocotools.mask import encode
+
+    def single_encode(x):
+        """Encodes binary mask arrays into RLE (Run-Length Encoding) format for JSON serialization."""
+        rle = encode(np.asarray(x[:, :, None], order="F", dtype="uint8"))[0]
+        rle["counts"] = rle["counts"].decode("utf-8")
+        return rle
+
+    image_id = int(path.stem) if path.stem.isnumeric() else path.stem
+    box = xyxy2xywh(predn[:, :4])  # xywh
+    box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
+    pred_masks = np.transpose(pred_masks, (2, 0, 1))
+    with ThreadPool(NUM_THREADS) as pool:
+        rles = pool.map(single_encode, pred_masks)
+    for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
+        jdict.append(
+            {
+                "image_id": image_id,
+                "category_id": class_map[int(p[5])],
+                "bbox": [round(x, 3) for x in b],
+                "score": round(p[4], 5),
+                "segmentation": rles[i],
+            }
+        )
+
+
+def process_batch(detections, labels, iouv, pred_masks=None, gt_masks=None, overlap=False, masks=False):
+    """
+    Return correct prediction matrix
+    Arguments:
+        detections (array[N, 6]), x1, y1, x2, y2, conf, class
+        labels (array[M, 5]), class, x1, y1, x2, y2
+    Returns:
+        correct (array[N, 10]), for 10 IoU levels.
+    """
+    if masks:
+        if overlap:
+            nl = len(labels)
+            index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
+            gt_masks = gt_masks.repeat(nl, 1, 1)  # shape(1,640,640) -> (n,640,640)
+            gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
+        if gt_masks.shape[1:] != pred_masks.shape[1:]:
+            gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode="bilinear", align_corners=False)[0]
+            gt_masks = gt_masks.gt_(0.5)
+        iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1))
+    else:  # boxes
+        iou = box_iou(labels[:, 1:], detections[:, :4])
+
+    correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool)
+    correct_class = labels[:, 0:1] == detections[:, 5]
+    for i in range(len(iouv)):
+        x = torch.where((iou >= iouv[i]) & correct_class)  # IoU > threshold and classes match
+        if x[0].shape[0]:
+            matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()  # [label, detect, iou]
+            if x[0].shape[0] > 1:
+                matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+                # matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+            correct[matches[:, 1].astype(int), i] = True
+    return torch.tensor(correct, dtype=torch.bool, device=iouv.device)
+
+
+@smart_inference_mode()
+def run(
+    data,
+    weights=None,  # model.pt path(s)
+    batch_size=32,  # batch size
+    imgsz=640,  # inference size (pixels)
+    conf_thres=0.001,  # confidence threshold
+    iou_thres=0.6,  # NMS IoU threshold
+    max_det=300,  # maximum detections per image
+    task="val",  # train, val, test, speed or study
+    device="",  # cuda device, i.e. 0 or 0,1,2,3 or cpu
+    workers=8,  # max dataloader workers (per RANK in DDP mode)
+    single_cls=False,  # treat as single-class dataset
+    augment=False,  # augmented inference
+    verbose=False,  # verbose output
+    save_txt=False,  # save results to *.txt
+    save_hybrid=False,  # save label+prediction hybrid results to *.txt
+    save_conf=False,  # save confidences in --save-txt labels
+    save_json=False,  # save a COCO-JSON results file
+    project=ROOT / "runs/val-seg",  # save to project/name
+    name="exp",  # save to project/name
+    exist_ok=False,  # existing project/name ok, do not increment
+    half=True,  # use FP16 half-precision inference
+    dnn=False,  # use OpenCV DNN for ONNX inference
+    model=None,
+    dataloader=None,
+    save_dir=Path(""),
+    plots=True,
+    overlap=False,
+    mask_downsample_ratio=1,
+    compute_loss=None,
+    callbacks=Callbacks(),
+):
+    """Validates a YOLOv5 segmentation model on specified dataset, producing metrics, plots, and optional JSON
+    output.
+    """
+    if save_json:
+        check_requirements("pycocotools>=2.0.6")
+        process = process_mask_native  # more accurate
+    else:
+        process = process_mask  # faster
+
+    # Initialize/load model and set device
+    training = model is not None
+    if training:  # called by train.py
+        device, pt, jit, engine = next(model.parameters()).device, True, False, False  # get model device, PyTorch model
+        half &= device.type != "cpu"  # half precision only supported on CUDA
+        model.half() if half else model.float()
+        nm = de_parallel(model).model[-1].nm  # number of masks
+    else:  # called directly
+        device = select_device(device, batch_size=batch_size)
+
+        # Directories
+        save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
+        (save_dir / "labels" if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
+
+        # Load model
+        model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
+        stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
+        imgsz = check_img_size(imgsz, s=stride)  # check image size
+        half = model.fp16  # FP16 supported on limited backends with CUDA
+        nm = de_parallel(model).model.model[-1].nm if isinstance(model, SegmentationModel) else 32  # number of masks
+        if engine:
+            batch_size = model.batch_size
+        else:
+            device = model.device
+            if not (pt or jit):
+                batch_size = 1  # export.py models default to batch-size 1
+                LOGGER.info(f"Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models")
+
+        # Data
+        data = check_dataset(data)  # check
+
+    # Configure
+    model.eval()
+    cuda = device.type != "cpu"
+    is_coco = isinstance(data.get("val"), str) and data["val"].endswith(f"coco{os.sep}val2017.txt")  # COCO dataset
+    nc = 1 if single_cls else int(data["nc"])  # number of classes
+    iouv = torch.linspace(0.5, 0.95, 10, device=device)  # iou vector for mAP@0.5:0.95
+    niou = iouv.numel()
+
+    # Dataloader
+    if not training:
+        if pt and not single_cls:  # check --weights are trained on --data
+            ncm = model.model.nc
+            assert ncm == nc, (
+                f"{weights} ({ncm} classes) trained on different --data than what you passed ({nc} "
+                f"classes). Pass correct combination of --weights and --data that are trained together."
+            )
+        model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz))  # warmup
+        pad, rect = (0.0, False) if task == "speed" else (0.5, pt)  # square inference for benchmarks
+        task = task if task in ("train", "val", "test") else "val"  # path to train/val/test images
+        dataloader = create_dataloader(
+            data[task],
+            imgsz,
+            batch_size,
+            stride,
+            single_cls,
+            pad=pad,
+            rect=rect,
+            workers=workers,
+            prefix=colorstr(f"{task}: "),
+            overlap_mask=overlap,
+            mask_downsample_ratio=mask_downsample_ratio,
+        )[0]
+
+    seen = 0
+    confusion_matrix = ConfusionMatrix(nc=nc)
+    names = model.names if hasattr(model, "names") else model.module.names  # get class names
+    if isinstance(names, (list, tuple)):  # old format
+        names = dict(enumerate(names))
+    class_map = coco80_to_coco91_class() if is_coco else list(range(1000))
+    s = ("%22s" + "%11s" * 10) % (
+        "Class",
+        "Images",
+        "Instances",
+        "Box(P",
+        "R",
+        "mAP50",
+        "mAP50-95)",
+        "Mask(P",
+        "R",
+        "mAP50",
+        "mAP50-95)",
+    )
+    dt = Profile(device=device), Profile(device=device), Profile(device=device)
+    metrics = Metrics()
+    loss = torch.zeros(4, device=device)
+    jdict, stats = [], []
+    # callbacks.run('on_val_start')
+    pbar = tqdm(dataloader, desc=s, bar_format=TQDM_BAR_FORMAT)  # progress bar
+    for batch_i, (im, targets, paths, shapes, masks) in enumerate(pbar):
+        # callbacks.run('on_val_batch_start')
+        with dt[0]:
+            if cuda:
+                im = im.to(device, non_blocking=True)
+                targets = targets.to(device)
+                masks = masks.to(device)
+            masks = masks.float()
+            im = im.half() if half else im.float()  # uint8 to fp16/32
+            im /= 255  # 0 - 255 to 0.0 - 1.0
+            nb, _, height, width = im.shape  # batch size, channels, height, width
+
+        # Inference
+        with dt[1]:
+            preds, protos, train_out = model(im) if compute_loss else (*model(im, augment=augment)[:2], None)
+
+        # Loss
+        if compute_loss:
+            loss += compute_loss((train_out, protos), targets, masks)[1]  # box, obj, cls
+
+        # NMS
+        targets[:, 2:] *= torch.tensor((width, height, width, height), device=device)  # to pixels
+        lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
+        with dt[2]:
+            preds = non_max_suppression(
+                preds, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls, max_det=max_det, nm=nm
+            )
+
+        # Metrics
+        plot_masks = []  # masks for plotting
+        for si, (pred, proto) in enumerate(zip(preds, protos)):
+            labels = targets[targets[:, 0] == si, 1:]
+            nl, npr = labels.shape[0], pred.shape[0]  # number of labels, predictions
+            path, shape = Path(paths[si]), shapes[si][0]
+            correct_masks = torch.zeros(npr, niou, dtype=torch.bool, device=device)  # init
+            correct_bboxes = torch.zeros(npr, niou, dtype=torch.bool, device=device)  # init
+            seen += 1
+
+            if npr == 0:
+                if nl:
+                    stats.append((correct_masks, correct_bboxes, *torch.zeros((2, 0), device=device), labels[:, 0]))
+                    if plots:
+                        confusion_matrix.process_batch(detections=None, labels=labels[:, 0])
+                continue
+
+            # Masks
+            midx = [si] if overlap else targets[:, 0] == si
+            gt_masks = masks[midx]
+            pred_masks = process(proto, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:])
+
+            # Predictions
+            if single_cls:
+                pred[:, 5] = 0
+            predn = pred.clone()
+            scale_boxes(im[si].shape[1:], predn[:, :4], shape, shapes[si][1])  # native-space pred
+
+            # Evaluate
+            if nl:
+                tbox = xywh2xyxy(labels[:, 1:5])  # target boxes
+                scale_boxes(im[si].shape[1:], tbox, shape, shapes[si][1])  # native-space labels
+                labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
+                correct_bboxes = process_batch(predn, labelsn, iouv)
+                correct_masks = process_batch(predn, labelsn, iouv, pred_masks, gt_masks, overlap=overlap, masks=True)
+                if plots:
+                    confusion_matrix.process_batch(predn, labelsn)
+            stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0]))  # (conf, pcls, tcls)
+
+            pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8)
+            if plots and batch_i < 3:
+                plot_masks.append(pred_masks[:15])  # filter top 15 to plot
+
+            # Save/log
+            if save_txt:
+                save_one_txt(predn, save_conf, shape, file=save_dir / "labels" / f"{path.stem}.txt")
+            if save_json:
+                pred_masks = scale_image(
+                    im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), shape, shapes[si][1]
+                )
+                save_one_json(predn, jdict, path, class_map, pred_masks)  # append to COCO-JSON dictionary
+            # callbacks.run('on_val_image_end', pred, predn, path, names, im[si])
+
+        # Plot images
+        if plots and batch_i < 3:
+            if len(plot_masks):
+                plot_masks = torch.cat(plot_masks, dim=0)
+            plot_images_and_masks(im, targets, masks, paths, save_dir / f"val_batch{batch_i}_labels.jpg", names)
+            plot_images_and_masks(
+                im,
+                output_to_target(preds, max_det=15),
+                plot_masks,
+                paths,
+                save_dir / f"val_batch{batch_i}_pred.jpg",
+                names,
+            )  # pred
+
+        # callbacks.run('on_val_batch_end')
+
+    # Compute metrics
+    stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)]  # to numpy
+    if len(stats) and stats[0].any():
+        results = ap_per_class_box_and_mask(*stats, plot=plots, save_dir=save_dir, names=names)
+        metrics.update(results)
+    nt = np.bincount(stats[4].astype(int), minlength=nc)  # number of targets per class
+
+    # Print results
+    pf = "%22s" + "%11i" * 2 + "%11.3g" * 8  # print format
+    LOGGER.info(pf % ("all", seen, nt.sum(), *metrics.mean_results()))
+    if nt.sum() == 0:
+        LOGGER.warning(f"WARNING ⚠️ no labels found in {task} set, can not compute metrics without labels")
+
+    # Print results per class
+    if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):
+        for i, c in enumerate(metrics.ap_class_index):
+            LOGGER.info(pf % (names[c], seen, nt[c], *metrics.class_result(i)))
+
+    # Print speeds
+    t = tuple(x.t / seen * 1e3 for x in dt)  # speeds per image
+    if not training:
+        shape = (batch_size, 3, imgsz, imgsz)
+        LOGGER.info(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}" % t)
+
+    # Plots
+    if plots:
+        confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
+    # callbacks.run('on_val_end')
+
+    mp_bbox, mr_bbox, map50_bbox, map_bbox, mp_mask, mr_mask, map50_mask, map_mask = metrics.mean_results()
+
+    # Save JSON
+    if save_json and len(jdict):
+        w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else ""  # weights
+        anno_json = str(Path("../../datasets/coco/annotations/instances_val2017.json"))  # annotations
+        pred_json = str(save_dir / f"{w}_predictions.json")  # predictions
+        LOGGER.info(f"\nEvaluating pycocotools mAP... saving {pred_json}...")
+        with open(pred_json, "w") as f:
+            json.dump(jdict, f)
+
+        try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
+            from pycocotools.coco import COCO
+            from pycocotools.cocoeval import COCOeval
+
+            anno = COCO(anno_json)  # init annotations api
+            pred = anno.loadRes(pred_json)  # init predictions api
+            results = []
+            for eval in COCOeval(anno, pred, "bbox"), COCOeval(anno, pred, "segm"):
+                if is_coco:
+                    eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files]  # img ID to evaluate
+                eval.evaluate()
+                eval.accumulate()
+                eval.summarize()
+                results.extend(eval.stats[:2])  # update results (mAP@0.5:0.95, mAP@0.5)
+            map_bbox, map50_bbox, map_mask, map50_mask = results
+        except Exception as e:
+            LOGGER.info(f"pycocotools unable to run: {e}")
+
+    # Return results
+    model.float()  # for training
+    if not training:
+        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ""
+        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
+    final_metric = mp_bbox, mr_bbox, map50_bbox, map_bbox, mp_mask, mr_mask, map50_mask, map_mask
+    return (*final_metric, *(loss.cpu() / len(dataloader)).tolist()), metrics.get_maps(nc), t
+
+
+def parse_opt():
+    """Parses command line arguments for configuring YOLOv5 options like dataset path, weights, batch size, and
+    inference settings.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data", type=str, default=ROOT / "data/coco128-seg.yaml", help="dataset.yaml path")
+    parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s-seg.pt", help="model path(s)")
+    parser.add_argument("--batch-size", type=int, default=32, help="batch size")
+    parser.add_argument("--imgsz", "--img", "--img-size", type=int, default=640, help="inference size (pixels)")
+    parser.add_argument("--conf-thres", type=float, default=0.001, help="confidence threshold")
+    parser.add_argument("--iou-thres", type=float, default=0.6, help="NMS IoU threshold")
+    parser.add_argument("--max-det", type=int, default=300, help="maximum detections per image")
+    parser.add_argument("--task", default="val", help="train, val, test, speed or study")
+    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
+    parser.add_argument("--workers", type=int, default=8, help="max dataloader workers (per RANK in DDP mode)")
+    parser.add_argument("--single-cls", action="store_true", help="treat as single-class dataset")
+    parser.add_argument("--augment", action="store_true", help="augmented inference")
+    parser.add_argument("--verbose", action="store_true", help="report mAP by class")
+    parser.add_argument("--save-txt", action="store_true", help="save results to *.txt")
+    parser.add_argument("--save-hybrid", action="store_true", help="save label+prediction hybrid results to *.txt")
+    parser.add_argument("--save-conf", action="store_true", help="save confidences in --save-txt labels")
+    parser.add_argument("--save-json", action="store_true", help="save a COCO-JSON results file")
+    parser.add_argument("--project", default=ROOT / "runs/val-seg", help="save results to project/name")
+    parser.add_argument("--name", default="exp", help="save to project/name")
+    parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
+    parser.add_argument("--half", action="store_true", help="use FP16 half-precision inference")
+    parser.add_argument("--dnn", action="store_true", help="use OpenCV DNN for ONNX inference")
+    opt = parser.parse_args()
+    opt.data = check_yaml(opt.data)  # check YAML
+    # opt.save_json |= opt.data.endswith('coco.yaml')
+    opt.save_txt |= opt.save_hybrid
+    print_args(vars(opt))
+    return opt
+
+
+def main(opt):
+    """Executes YOLOv5 tasks including training, validation, testing, speed, and study with configurable options."""
+    check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
+
+    if opt.task in ("train", "val", "test"):  # run normally
+        if opt.conf_thres > 0.001:  # https://github.com/ultralytics/yolov5/issues/1466
+            LOGGER.warning(f"WARNING ⚠️ confidence threshold {opt.conf_thres} > 0.001 produces invalid results")
+        if opt.save_hybrid:
+            LOGGER.warning("WARNING ⚠️ --save-hybrid returns high mAP from hybrid labels, not from predictions alone")
+        run(**vars(opt))
+
+    else:
+        weights = opt.weights if isinstance(opt.weights, list) else [opt.weights]
+        opt.half = torch.cuda.is_available() and opt.device != "cpu"  # FP16 for fastest results
+        if opt.task == "speed":  # speed benchmarks
+            # python val.py --task speed --data coco.yaml --batch 1 --weights yolov5n.pt yolov5s.pt...
+            opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False
+            for opt.weights in weights:
+                run(**vars(opt), plots=False)
+
+        elif opt.task == "study":  # speed vs mAP benchmarks
+            # python val.py --task study --data coco.yaml --iou 0.7 --weights yolov5n.pt yolov5s.pt...
+            for opt.weights in weights:
+                f = f"study_{Path(opt.data).stem}_{Path(opt.weights).stem}.txt"  # filename to save to
+                x, y = list(range(256, 1536 + 128, 128)), []  # x axis (image sizes), y axis
+                for opt.imgsz in x:  # img-size
+                    LOGGER.info(f"\nRunning {f} --imgsz {opt.imgsz}...")
+                    r, _, t = run(**vars(opt), plots=False)
+                    y.append(r + t)  # results and times
+                np.savetxt(f, y, fmt="%10.4g")  # save
+            subprocess.run(["zip", "-r", "study.zip", "study_*.txt"])
+            plot_val_study(x=x)  # plot
+        else:
+            raise NotImplementedError(f'--task {opt.task} not in ("train", "val", "test", "speed", "study")')
+
+
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)
--- a/yolov5/train.py
+++ b/yolov5/train.py
@@ -409,7 +409,7 @@ def train(hyp, opt, device, callbacks):
                    imgs = nn.functional.interpolate(imgs, size=ns, mode="bilinear", align_corners=False)

            # Forward
-            with torch.amp.autocast(device_type='cuda', enabled=amp):
+            with torch.amp.autocast(device_type='cuda',enabled=amp):
                pred = model(imgs)  # forward
                loss, loss_items = compute_loss(pred, targets.to(device))  # loss scaled by batch_size
                if RANK != -1:
--- a/yolov5/utils/augmentations.py
+++ b/yolov5/utils/augmentations.py
@@ -10,8 +10,8 @@ import torch
 import torchvision.transforms as T
 import torchvision.transforms.functional as TF

-from utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box, xywhn2xyxy
-from utils.metrics import bbox_ioa
+from yolov5.utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box, xywhn2xyxy
+from yolov5.utils.metrics import bbox_ioa

 IMAGENET_MEAN = 0.485, 0.456, 0.406  # RGB mean
 IMAGENET_STD = 0.229, 0.224, 0.225  # RGB standard deviation
--- a/yolov5/utils/dataloaders.py
+++ b/yolov5/utils/dataloaders.py
@@ -26,7 +26,7 @@ from PIL import ExifTags, Image, ImageOps
 from torch.utils.data import DataLoader, Dataset, dataloader, distributed
 from tqdm import tqdm

-from utils.augmentations import (
+from yolov5.utils.augmentations import (
    Albumentations,
    augment_hsv,
    classify_albumentations,
@@ -36,7 +36,7 @@ from utils.augmentations import (
    mixup,
    random_perspective,
 )
-from utils.general import (
+from yolov5.utils.general import (
    DATASETS_DIR,
    LOGGER,
    NUM_THREADS,
@@ -55,7 +55,7 @@ from utils.general import (
    xywhn2xyxy,
    xyxy2xywhn,
 )
-from utils.torch_utils import torch_distributed_zero_first
+from yolov5.utils.torch_utils import torch_distributed_zero_first

 # Parameters
 HELP_URL = "See https://docs.ultralytics.com/yolov5/tutorials/train_custom_data"
--- a/yolov5/utils/general.py
+++ b/yolov5/utils/general.py
@@ -45,9 +45,9 @@ except (ImportError, AssertionError):

 from ultralytics.utils.checks import check_requirements

-from utils import TryExcept, emojis
-from utils.downloads import curl_download, gsutil_getsize
-from utils.metrics import box_iou, fitness
+from yolov5.utils import TryExcept, emojis
+from yolov5.utils.downloads import curl_download, gsutil_getsize
+from yolov5.utils.metrics import box_iou, fitness

 FILE = Path(__file__).resolve()
 ROOT = FILE.parents[1]  # YOLOv5 root directory
@@ -585,7 +585,7 @@ def check_dataset(data, autodownload=True):

 def check_amp(model):
    """Checks PyTorch AMP functionality for a model, returns True if AMP operates correctly, otherwise False."""
-    from models.common import AutoShape, DetectMultiBackend
+    from yolov5.models.common import AutoShape, DetectMultiBackend

    def amp_allclose(model, im):
        """Compares FP32 and AMP model inference outputs, ensuring they are close within a 10% absolute tolerance."""
@@ -611,6 +611,27 @@ def check_amp(model):
        return False


+def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
+    # Rescale coords (xyxy) from img1_shape to img0_shape
+    if ratio_pad is None:  # calculate from img0_shape
+        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
+        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
+    else:
+        gain = ratio_pad[0][0]
+        pad = ratio_pad[1]
+
+    coords[:, [0, 2]] -= pad[0]  # x padding
+    coords[:, [1, 3]] -= pad[1]  # y padding
+    coords[:, :4] /= gain
+
+    # Clip bounding xyxy bounding boxes to image shape (height, width)
+    coords[:, 0].clamp_(0, img0_shape[1])  # x1
+    coords[:, 1].clamp_(0, img0_shape[0])  # y1
+    coords[:, 2].clamp_(0, img0_shape[1])  # x2
+    coords[:, 3].clamp_(0, img0_shape[0])  # y2
+    return coords
+
+
 def yaml_load(file="data.yaml"):
    """Safely loads and returns the contents of a YAML file specified by `file` argument."""
    with open(file, errors="ignore") as f:
--- a/yolov5/utils/loggers/comet/optimizer_config.json
+++ b/yolov5/utils/loggers/comet/optimizer_config.json
@@ -1,135 +0,0 @@
-{
-  "algorithm": "random",
-  "parameters": {
-    "anchor_t": {
-      "type": "discrete",
-      "values": [2, 8]
-    },
-    "batch_size": {
-      "type": "discrete",
-      "values": [16, 32, 64]
-    },
-    "box": {
-      "type": "discrete",
-      "values": [0.02, 0.2]
-    },
-    "cls": {
-      "type": "discrete",
-      "values": [0.2]
-    },
-    "cls_pw": {
-      "type": "discrete",
-      "values": [0.5]
-    },
-    "copy_paste": {
-      "type": "discrete",
-      "values": [1]
-    },
-    "degrees": {
-      "type": "discrete",
-      "values": [0, 45]
-    },
-    "epochs": {
-      "type": "discrete",
-      "values": [5]
-    },
-    "fl_gamma": {
-      "type": "discrete",
-      "values": [0]
-    },
-    "fliplr": {
-      "type": "discrete",
-      "values": [0]
-    },
-    "flipud": {
-      "type": "discrete",
-      "values": [0]
-    },
-    "hsv_h": {
-      "type": "discrete",
-      "values": [0]
-    },
-    "hsv_s": {
-      "type": "discrete",
-      "values": [0]
-    },
-    "hsv_v": {
-      "type": "discrete",
-      "values": [0]
-    },
-    "iou_t": {
-      "type": "discrete",
-      "values": [0.7]
-    },
-    "lr0": {
-      "type": "discrete",
-      "values": [1e-5, 0.1]
-    },
-    "lrf": {
-      "type": "discrete",
-      "values": [0.01, 1]
-    },
-    "mixup": {
-      "type": "discrete",
-      "values": [1]
-    },
-    "momentum": {
-      "type": "discrete",
-      "values": [0.6]
-    },
-    "mosaic": {
-      "type": "discrete",
-      "values": [0]
-    },
-    "obj": {
-      "type": "discrete",
-      "values": [0.2]
-    },
-    "obj_pw": {
-      "type": "discrete",
-      "values": [0.5]
-    },
-    "optimizer": {
-      "type": "categorical",
-      "values": ["SGD", "Adam", "AdamW"]
-    },
-    "perspective": {
-      "type": "discrete",
-      "values": [0]
-    },
-    "scale": {
-      "type": "discrete",
-      "values": [0]
-    },
-    "shear": {
-      "type": "discrete",
-      "values": [0]
-    },
-    "translate": {
-      "type": "discrete",
-      "values": [0]
-    },
-    "warmup_bias_lr": {
-      "type": "discrete",
-      "values": [0, 0.2]
-    },
-    "warmup_epochs": {
-      "type": "discrete",
-      "values": [5]
-    },
-    "warmup_momentum": {
-      "type": "discrete",
-      "values": [0, 0.95]
-    },
-    "weight_decay": {
-      "type": "discrete",
-      "values": [0, 0.001]
-    }
-  },
-  "spec": {
-    "maxCombo": 0,
-    "metric": "metrics/mAP_0.5",
-    "objective": "maximize"
-  },
-  "trials": 1
-}
--- a/yolov5/utils/metrics.py
+++ b/yolov5/utils/metrics.py
@@ -9,7 +9,7 @@ import matplotlib.pyplot as plt
 import numpy as np
 import torch

-from utils import TryExcept, threaded
+from yolov5.utils import TryExcept, threaded


 def fitness(x):
--- a/yolov5/utils/plots.py
+++ b/yolov5/utils/plots.py
@@ -18,9 +18,9 @@ from PIL import Image, ImageDraw
 from scipy.ndimage.filters import gaussian_filter1d
 from ultralytics.utils.plotting import Annotator

-from utils import TryExcept, threaded
-from utils.general import LOGGER, clip_boxes, increment_path, xywh2xyxy, xyxy2xywh
-from utils.metrics import fitness
+from yolov5.utils import TryExcept, threaded
+from yolov5.utils.general import LOGGER, clip_boxes, increment_path, xywh2xyxy, xyxy2xywh
+from yolov5.utils.metrics import fitness

 # Settings
 RANK = int(os.getenv("RANK", -1))
@@ -372,7 +372,7 @@ def plot_labels(labels, names=(), save_dir=Path("")):

 def imshow_cls(im, labels=None, pred=None, names=None, nmax=25, verbose=False, f=Path("images.jpg")):
    """Displays a grid of images with optional labels and predictions, saving to a file."""
-    from utils.augmentations import denormalize
+    from yolov5.utils.augmentations import denormalize

    names = names or [f"class{i}" for i in range(1000)]
    blocks = torch.chunk(
--- a/yolov5/utils/torch_utils.py
+++ b/yolov5/utils/torch_utils.py
@@ -17,7 +17,7 @@ import torch.nn as nn
 import torch.nn.functional as F
 from torch.nn.parallel import DistributedDataParallel as DDP

-from utils.general import LOGGER, check_version, colorstr, file_date, git_describe
+from yolov5.utils.general import LOGGER, check_version, colorstr, file_date, git_describe

 LOCAL_RANK = int(os.getenv("LOCAL_RANK", -1))  # https://pytorch.org/docs/stable/elastic/run.html
 RANK = int(os.getenv("RANK", -1))
@@ -68,7 +68,7 @@ def smart_DDP(model):

 def reshape_classifier_output(model, n=1000):
    """Reshapes last layer of model to match class count 'n', supporting Classify, Linear, Sequential types."""
-    from models.common import Classify
+    from yolov5.models.common import Classify

    name, m = list((model.model if hasattr(model, "model") else model).named_children())[-1]  # last module
    if isinstance(m, Classify):  # YOLOv5 Classify() head
--- a/yolov5/yolov5s.pt
+++ b/yolov5/yolov5s.pt