完成训练模块的转移

2025-04-17 11:03:05 +08:00
parent 4439687870
commit 74e8f0d415
188 changed files with 32931 additions and 70 deletions
--- a/application/urls.py
+++ b/application/urls.py
@@ -11,6 +11,7 @@ from apps.vadmin.system.views import app as vadmin_system_app
 from apps.vadmin.record.views import app as vadmin_record_app
 from apps.vadmin.help.views import app as vadmin_help_app
 from apps.business.project.views import app as project_app
 from apps.business.train.views import app as train_app
 # 引入应用中的路由
@@ -21,4 +22,5 @@ urlpatterns = [
    {"ApiRouter": vadmin_record_app, "prefix": "/vadmin/record", "tags": ["记录管理"]},
    {"ApiRouter": vadmin_help_app, "prefix": "/vadmin/help", "tags": ["帮助中心管理"]},
    {"ApiRouter": project_app, "prefix": "/business/project", "tags": ["项目管理"]},
    {"ApiRouter": train_app, "prefix": "/business/train", "tags": ["训练管理"]},
 ]
--- a/apps/business/project/crud.py
+++ b/apps/business/project/crud.py
@@ -5,23 +5,19 @@
 # @File           : crud.py
 # @IDE            : PyCharm
 # @desc           : 数据访问层
 import application.settings
 from . import schemas, models, params
 from apps.vadmin.auth.utils.validation.auth import Auth
 from utils import os_utils as os, random_utils as ru
 from utils.huawei_obs import ObsClient
 from utils import status
 from core.exception import CustomException
-if application.settings.DEBUG:
+from application.settings import datasets_url, runs_url, images_url
    from application.config.development import datasets_url, runs_url, images_url
 else:
    from application.config.production import datasets_url, runs_url, images_url
 from typing import Any, List
 from core.crud import DalBase
 from fastapi import UploadFile
 from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy import select, func, case, and_
+from sqlalchemy import select, func, case
 class ProjectInfoDal(DalBase):
@@ -96,6 +92,9 @@ class ProjectInfoDal(DalBase):
            project: schemas.ProjectInfoIn,
            auth: Auth
    ) -> Any:
        """
        新建项目
        """
        obj = self.model(**project.model_dump())
        obj.user_id = auth.user.id
        obj.project_no = ru.random_str(6)
@@ -106,7 +105,9 @@ class ProjectInfoDal(DalBase):
            obj.dept_id = 0
        else:
            obj.dept_id = auth.dept_ids[0]
        # 新建数据集文件夹
        os.create_folder(datasets_url, obj.project_no)
        # 新建训练文件夹
        os.create_folder(runs_url, obj.project_no)
        await self.flush(obj)
        return await self.out_dict(obj, None, False, schemas.ProjectInfoOut)
@@ -214,6 +215,55 @@ class ProjectImageDal(DalBase):
        ObsClient.del_objects(object_keys)
        await self.delete_datas(ids)
    async def get_img_count(
            self,
            proj_id: int) -> int:
        """
        查询图片数量
        """
        train_count = await self.get_count(
            v_where=[models.ProjectImage.project_id == proj_id, models.ProjectImage.img_type == 'train'])
        val_count = await self.get_count(
            v_where=[models.ProjectImage.project_id == proj_id, models.ProjectImage.img_type == 'val'])
        return train_count, val_count
    async def check_image_label(
            self,
            proj_id: int) -> int:
        """
        查询图片未标注数量
        """
        # 1 子查询
        subquery = (
            select(
                models.ProjectImgLabel.image_id,
                func.ifnull(func.count(models.ProjectImgLabel.id), 0).label('label_count')
            )
            .group_by(models.ProjectImgLabel.image_id)
            .subquery()
        )
        # 2 主查询
        query = (
            select(
                models.ProjectImage,
                func.ifnull(subquery.c.label_count, 0).label('label_count')
            )
            .outerjoin(subquery, models.ProjectImage.id == subquery.c.image_id)
        )
        train_count_sql = await self.filter_core(
            v_start_sql=query,
            v_where=[models.ProjectImage.project_id == proj_id, models.ProjectImage.img_type == 'train'],
            v_return_sql=True)
        train_count = await self.get_count(train_count_sql)
        val_count_sql = await self.filter_core(
            v_start_sql=query,
            v_where=[models.ProjectImage.project_id == proj_id, models.ProjectImage.img_type == 'val'],
            v_return_sql=True)
        val_count = await self.get_count(val_count_sql)
        return train_count, val_count
 class ProjectLabelDal(DalBase):
    """
@@ -233,14 +283,27 @@ class ProjectLabelDal(DalBase):
            label_id: int = None
    ):
        wheres = [
-            models.ProjectLabel.project_id == pro_id,
+            self.model.project_id == pro_id,
-            models.ProjectLabel.label_name == name
+            self.model.label_name == name
        ]
        if label_id:
-            wheres.append(models.ProjectLabel.id != label_id)
+            wheres.append(self.model.id != label_id)
        count = await self.get_count(v_where=wheres)
        return count > 0
    async def get_label_for_train(self, project_id: int):
        id_list = []
        name_list = []
        label_list = self.get_datas(
            v_where=[self.model.project_id == project_id],
            v_order='asc',
            v_order_field='id',
            v_return_count=False)
        for label in label_list:
            id_list.append(label.id)
            name_list.append(label.label_name)
        return id_list, name_list
 class ProjectImgLabelDal(DalBase):
    """
@@ -260,6 +323,13 @@ class ProjectImgLabelDal(DalBase):
            img.image_id = image_id
        await self.create_datas(img_labels)
    async def get_img_label_list(self, image_id: int):
        return await self.get_datas(
            v_return_count=False,
            v_where=[self.model.image_id == image_id],
            v_order="asc",
            v_order_field="id")
 class ProjectImgLeaferDal(DalBase):
    """
--- a/apps/business/project/params/project_info.py
+++ b/apps/business/project/params/project_info.py
@@ -15,14 +15,10 @@ class ProjectInfoParams(QueryParams):
            self,
            project_name: str | None = Query(None, title="项目名称"),
            type_code: str | None = Query(None, title="项目类别"),
            dept_id: str | None = Query(None, title="部门id"),
            user_id: str | None = Query(None, title="用户id"),
            params: Paging = Depends()
    ):
        super().__init__(params)
        self.project_name = ("like", project_name)
        self.type_code = type_code
        self.dept_id = dept_id
        self.user_id = user_id
--- a/apps/business/train/crud.py
+++ b/apps/business/train/crud.py
@@ -9,6 +9,8 @@
 from sqlalchemy.ext.asyncio import AsyncSession
 from core.crud import DalBase
 from . import models, schemas
 from utils import os_utils as os
 from utils.csv_utils import read_csv
 class ProjectTrainDal(DalBase):
@@ -17,4 +19,75 @@ class ProjectTrainDal(DalBase):
        super(ProjectTrainDal, self).__init__()
        self.db = db
        self.model = models.ProjectTrain
-        self.schema = schemas.ProjectTrainSimpleOut
+        self.schema = schemas.ProjectTrainOut
    async def get_result(self, train_id: int):
        """
        查询训练报告
        """
        data = await self.get_data(data_id=train_id)
        if data is None:
            return None
        result_csv_path = os.file_path(data.train_url, 'results.csv')
        result_row = read_csv(result_csv_path)
        report_data = {}
        # 轮数
        epoch_data = []
        # 边界框回归损失（Bounding Box Loss），衡量预测框位置（中心坐标、宽高）与真实框的差异，值越低表示定位越准。
        train_box_loss = []
        # 目标置信度损失（Objectness Loss），衡量检测到目标的置信度误差（即是否包含物体），值越低表示模型越能正确判断有无物体。
        train_obj_loss = []
        # 分类损失（Classification Loss），衡量预测类别与真实类别的差异，值越低表示分类越准。
        train_cls_loss = []
        # 验证集的边界框回归损失，反映模型在未见数据上的定位能力。
        val_box_loss = []
        # 验证集的目标置信度损失，反映模型在未见数据上判断物体存在的能力。
        val_obj_loss = []
        # 验证集的分类损失，反映模型在未见数据上的分类准确性。
        val_cls_loss = []
        # 精确率（Precision）：正确检测的正样本占所有预测为正样本的比例，反映“误检率”。值越高说明误检越少。
        m_p = []
        # 召回率（Recall）：正确检测的正样本占所有真实正样本的比例，反映“漏检率”。值越高说明漏检越少。
        m_r = []
        # 主干网络（Backbone）的学习率。
        x_lr0 = []
        # 检测头（Head）的学习率。
        x_lr1 = []
        for row in result_row:
            epoch_data.append(row[0].strip())
            train_box_loss.append(row[1].strip())
            train_obj_loss.append(row[2].strip())
            train_cls_loss.append(row[3].strip())
            val_box_loss.append(row[8].strip())
            val_obj_loss.append(row[9].strip())
            val_cls_loss.append(row[10].strip())
            m_p.append(row[4].strip())
            m_r.append(row[5].strip())
            x_lr0.append(row[11].strip())
            x_lr1.append(row[12].strip())
        report_data['epoch_data'] = epoch_data
        report_data['train_box_loss'] = train_box_loss
        report_data['train_obj_loss'] = train_obj_loss
        report_data['train_cls_loss'] = train_cls_loss
        report_data['val_box_loss'] = val_box_loss
        report_data['val_obj_loss'] = val_obj_loss
        report_data['val_cls_loss'] = val_cls_loss
        report_data['m_p'] = m_p
        report_data['m_r'] = m_r
        report_data['x_lr0'] = x_lr0
        report_data['x_lr1'] = x_lr1
        return report_data
--- a/apps/business/train/models/init.py
+++ b/apps/business/train/models/init.py
@@ -0,0 +1 @@
 from .train import ProjectTrain
--- a/apps/business/train/models/train.py
+++ b/apps/business/train/models/train.py
@@ -6,7 +6,7 @@ from db.db_base import BaseModel
 class ProjectTrain(BaseModel):
    """
-    项目训练版本信息表
+    项目训练信息表
    """
    __tablename__ = "project_train"
    __table_args__ = ({'comment': '项目训练版本信息表'})
--- a/apps/business/train/params/project_train.py
+++ b/apps/business/train/params/project_train.py
@@ -6,10 +6,15 @@
 # @IDE            : PyCharm
 # @desc           : 项目巡逻片信息
-from fastapi import Depends
+from fastapi import Depends, Query
 from core.dependencies import Paging, QueryParams
 class ProjectTrainParams(QueryParams):
-    def __init__(self, params: Paging = Depends()):
+    def __init__(
            self,
            project_id: int | 0 = Query(0, title="项目id"),
            params: Paging = Depends()
    ):
        super().__init__(params)
        self.project_id = project_id
--- a/apps/business/train/schemas/init.py
+++ b/apps/business/train/schemas/init.py
@@ -1 +1 @@
-from .project_train import ProjectTrain, ProjectTrainSimpleOut
+from .project_train import ProjectTrainIn, ProjectTrainOut
--- a/apps/business/train/schemas/project_train.py
+++ b/apps/business/train/schemas/project_train.py
@@ -6,27 +6,29 @@
 # @IDE            : PyCharm
 # @desc           : pydantic 模型，用于数据库序列化操作
 from datetime import datetime
 from pydantic import BaseModel, Field, ConfigDict
-from core.data_types import DatetimeStr
+from typing import Optional
 """
 项目训练版本信息表
 """
-class ProjectTrain(BaseModel):
+class ProjectTrainIn(BaseModel):
-    project_id: int = Field(..., title="None")
+    project_id: Optional[int] = Field(..., description="项目id")
-    train_version: str = Field(..., title="None")
+    weights_id: Optional[str] = Field(None, description="权重文件")
-    train_url: str = Field(..., title="None")
+    epochs: Optional[int] = Field(50, description="训练轮数")
-    train_data: str = Field(..., title="None")
+    patience: Optional[int] = Field(20, description="早停的耐心值")
    weights_id: int = Field(..., title="None")
    weights_name: str = Field(..., title="None")
    epochs: int = Field(..., title="None")
    patience: int = Field(..., title="None")
    best_pt: str = Field(..., title="None")
    last_pt: str = Field(..., title="None")
    user_id: int = Field(..., title="None")
-class ProjectTrainSimpleOut(ProjectTrain):
+class ProjectTrainOut(BaseModel):
    id: Optional[int] = Field(None, description="训练id")
    train_version: Optional[str] = Field(None, description="训练版本号")
    weights_name: Optional[str] = Field(None, description="权重名称")
    epochs: Optional[int] = Field(None, description="训练轮数")
    patience: Optional[int] = Field(None, description="早停的耐心值")
    create_time: Optional[datetime] = Field(None, description="训练时间")
    model_config = ConfigDict(from_attributes=True)
    id: int = Field(..., title="编号")
    create_datetime: DatetimeStr = Field(..., title="创建时间")
    update_datetime: DatetimeStr = Field(..., title="更新时间")
--- a/apps/business/train/service.py
+++ b/apps/business/train/service.py
@@ -0,0 +1,200 @@
 from . import schemas, models, crud
 from apps.business.project import schemas as proj_schemas, models as proj_models, crud as proj_crud
 from utils import os_utils as os
 from application.settings import *
 from utils.websocket_server import room_manager
 import yaml
 import asyncio
 import subprocess
 from typing import List
 from redis.asyncio import Redis
 from sqlalchemy.ext.asyncio import AsyncSession
 async def before_train(proj_info: proj_models.ProjectInfo, db: AsyncSession):
    """
    yolov5执行训练任务
    :param proj_info: 项目信息
    :param db: 数据库session
    :return:
    """
    proj_dal = proj_crud.ProjectInfoDal(db)
    img_dal = proj_crud.ProjectImageDal(db)
    label_dal = proj_crud.ProjectLabelDal(db)
    # 先查询两个图片列表
    project_images_train = img_dal.get_data(
        v_where=[proj_models.ProjectImage.project_id == proj_info.id, proj_models.ProjectImage.img_type == 'train'])
    project_images_val = img_dal.get_data(
        v_where=[proj_models.ProjectImage.project_id == proj_info.id, proj_models.ProjectImage.img_type == 'val'])
    # 得到训练版本
    version_path = 'v' + str(proj_info.train_version + 1)
    # 创建训练的根目录
    train_path = os.create_folder(datasets_url, proj_info.project_no, version_path)
    # 查询项目所属标签，返回两个 id，name一一对应的数组
    label_id_list, label_name_list = label_dal.get_label_for_train(proj_info.id)
    # 创建图片的的两个文件夹
    img_path_train = os.create_folder(train_path, 'images', 'train')
    img_path_val = os.create_folder(train_path, 'images', 'val')
    # 创建标签的两个文件夹
    label_path_train = os.create_folder(train_path, 'labels', 'train')
    label_path_val = os.create_folder(train_path, 'labels', 'val')
    # 在根目录下创建yaml文件
    yaml_file = os.file_path(train_path, proj_info.project_no + '.yaml')
    yaml_data = {
        'path': train_path,
        'train': 'images/train',
        'val': 'images/val',
        'test': None,
        'names': {i: name for i, name in enumerate(label_name_list)}
    }
    with open(yaml_file, 'w', encoding='utf-8') as file:
        yaml.dump(yaml_data, file, allow_unicode=True, default_flow_style=False)
    # 开始循环复制图片和生成label.txt
    # 先操作train
    operate_img_label(project_images_train, img_path_train, label_path_train, db, label_id_list)
    # 再操作val
    operate_img_label(project_images_val, img_path_val, label_path_val, db, label_id_list)
    # 开始执行异步训练
    data = yaml_file
    project = os.file_path(runs_url, proj_info.project_no)
    name = version_path
    return data, project, name
 async def operate_img_label(
        img_list: List[proj_models.ProjectImgLabel],
        img_path: str,
        label_path: str,
        db: AsyncSession,
        label_id_list: []):
    """
    生成图片和标签内容
    :param label_id_list:
    :param db: 数据库session
    :param img_list:
    :param img_path:
    :param label_path:
    :return:
    """
    for i in range(len(img_list)):
        image = img_list[i]
        # 先复制图片，并把图片改名，不改后缀
        file_name = 'image' + str(i)
        os.copy_and_rename_file(image.image_url, img_path, file_name)
        # 查询这张图片的label信息然后生成这张照片的txt文件
        img_label_list = await proj_crud.ProjectImgLabelDal(db).get_img_label_list(image.id)
        label_txt_path = os.file_path(label_path, file_name + '.txt')
        with open(label_txt_path, 'w', encoding='utf-8') as file:
            for image_label in img_label_list:
                index = label_id_list.index(image_label.label_id)
                file.write(str(index) + ' ' + image_label.mark_center_x + ' '
                           + image_label.mark_center_y + ' '
                           + image_label.mark_width + ' '
                           + image_label.mark_height + '\n')
 async def run_event_loop(
        data: str,
        project: str,
        name: str,
        train_in: schemas.ProjectTrainIn,
        project_id: int,
        db: AsyncSession):
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    # 运行异步函数
    loop.run_until_complete(run_commend(data, project, name, train_in.epochs, train_in.patience, train_in.weights_id,
                                        project_id, db))
    # 可选: 关闭循环
    loop.close()
 async def run_commend(
        data: str,
        project: str,
        name: str,
        epochs: int,
        patience: int,
        weights: str,
        project_id: int,
        db: AsyncSession,
        rd: Redis):
    """
    执行训练
    :param data: 训练数据集
    :param project: 训练结果的项目目录
    :param name: 实验名称
    :param epochs: 训练轮数
    :param patience: 早停耐心值
    :param weights: 权重文件
    :param project_id: 项目id
    :param db: 数据库session
    :param rd: redis连接
    :return:
    """
    yolo_path = os.file_path(yolo_url, 'train.py')
    room = 'train_' + str(project_id)
    await room_manager.send_to_room(room, f"AiCheckV2.0: 模型训练开始，请稍等。。。\n")
    commend = ["python", '-u', yolo_path, "--data=" + data, "--project=" + project, "--name=" + name,
               "--epochs=" + str(epochs), "--batch-size=8", "--exist-ok", "--patience=" + str(patience)]
    # 增加权重文件，在之前训练的基础上重新巡逻
    if weights != '' and weights is not None:
        train_info = await crud.ProjectTrainDal(db).get_data(data_id=int(weights))
        if train_info is not None:
            commend.append("--weights=" + train_info.best_pt)
    is_gpu = rd.get('is_gpu')
    # 判断是否存在cuda版本
    if is_gpu == 'True':
        commend.append("--device=0")
    # 启动子进程
    with subprocess.Popen(
            commend,
            bufsize=1,  # bufsize=0时，为不缓存；bufsize=1时，按行缓存；bufsize为其他正整数时，为按照近似该正整数的字节数缓存
            shell=False,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,  # 这里可以显示yolov5训练过程中出现的进度条等信息
            text=True,  # 缓存内容为文本，避免后续编码显示问题
            encoding='utf-8',
    ) as process:
        while process.poll() is None:
            line = process.stdout.readline()
            process.stdout.flush()  # 刷新缓存，防止缓存过多造成卡死
            if line != '\n' and '0%' not in line:
                await room_manager.send_to_room(room, line + '\n')
        # 等待进程结束并获取返回码
        return_code = process.wait()
        if return_code != 0:
            await room_manager.send_to_room(room, 'error')
        else:
            await room_manager.send_to_room(room, 'success')
            # 然后保存版本训练信息
            train = models.ProjectTrain()
            train.project_id = project_id
            train.train_version = name
            train_url = os.file_path(project, name)
            train.train_url = train_url
            train.train_data = data
            bast_pt_path = os.file_path(train_url, 'weights', 'best.pt')
            last_pt_path = os.file_path(train_url, 'weights', 'last.pt')
            train.best_pt = bast_pt_path
            train.last_pt = last_pt_path
            if weights is not None and weights != '':
                train.weights_id = weights
                train.weights_name = train_info.train_version
            train.patience = patience
            train.epochs = epochs
            await crud.ProjectTrainDal(db).create_data(data=train)
--- a/apps/business/train/views.py
+++ b/apps/business/train/views.py
@@ -3,49 +3,70 @@
 # @version        : 1.0
 # @Create Time    : 2025/04/03 10:32
 # @File           : views.py
-# @IDE            : PyCharm
+from . import models, schemas, crud
-# @desc           : 路由，视图文件
+from apps.business.project.crud import ProjectInfoDal, ProjectImageDal
-
+from utils.response import SuccessResponse, ErrorResponse
 from sqlalchemy.ext.asyncio import AsyncSession
 from fastapi import APIRouter, Depends
 from . import models, schemas, crud, params
 from core.dependencies import IdList
 from apps.vadmin.auth.utils.current import AllUserAuth
 from utils.response import SuccessResponse
 from apps.vadmin.auth.utils.validation.auth import Auth
-from core.database import db_getter
+import service
 import threading
 from fastapi import APIRouter, Depends
 app = APIRouter()
 ###########################################################
-#    项目巡逻片信息
+#    项目训练信息
 ###########################################################
-@app.get("/project/train", summary="获取项目巡逻片信息列表", tags=["项目巡逻片信息"])
+@app.post("/", summary="执行训练")
-async def get_project_train_list(p: params.ProjectTrainParams = Depends(), auth: Auth = Depends(AllUserAuth())):
+async def run_train(
-    datas, count = await crud.ProjectTrainDal(auth.db).get_datas(**p.dict(), v_return_count=True)
+        train_in: schemas.ProjectTrainIn,
-    return SuccessResponse(datas, count=count)
+        auth: Auth = Depends(AllUserAuth())):
    proj_id = train_in.project_id
    proj_dal = ProjectInfoDal(auth.db)
    proj_img_dal = ProjectImageDal(auth.db)
    proj_info = await proj_dal.get_data(proj_id)
    if proj_info is None:
        return ErrorResponse(msg="项目信息查询错误")
    train_count, val_count = await proj_img_dal.get_img_count(proj_id)
    if train_count == 0:
        return ErrorResponse("请先上传训练图片")
    if train_count < 10:
        return ErrorResponse("训练图片少于10张，请继续上传训练图片")
    if val_count == 0:
        return ErrorResponse("请先上传验证图片")
    if val_count < 5:
        return ErrorResponse("验证图片少于5张，请继续上传验证图片")
    train_label_count, val_label_count = await proj_img_dal.check_image_label(proj_id)
    if train_label_count > 0:
        return ErrorResponse("训练图片中存在未标注的图片")
    if val_label_count > 0:
        return ErrorResponse("验证图片中存在未标注的图片")
    data, project, name = service.before_train(proj_info, auth.db)
    # 异步执行操作，操作过程通过websocket进行同步
    thread_train = threading.Thread(
        target=service.run_event_loop,
        args=(data, project, name, train_in, proj_id, auth.db,))
    thread_train.start()
    return SuccessResponse(msg="执行成功")
-@app.post("/project/train", summary="创建项目巡逻片信息", tags=["项目巡逻片信息"])
+@app.get("/{proj_id}", summary="查询训练列表")
-async def create_project_train(data: schemas.ProjectTrain, auth: Auth = Depends(AllUserAuth())):
+async def train_list(
-    return SuccessResponse(await crud.ProjectTrainDal(auth.db).create_data(data=data))
+        proj_id: int,
        auth: Auth = Depends(AllUserAuth())):
    datas = await crud.ProjectTrainDal(auth.db).get_datas(
        v_where=[models.ProjectTrain.project_id == proj_id],
        v_schema=schemas.ProjectTrainOut,
        v_order="asc",
        v_order_field="id",v_return_count=False)
    return SuccessResponse(data=datas)
-@app.delete("/project/train", summary="删除项目巡逻片信息", description="硬删除", tags=["项目巡逻片信息"])
+@app.get("/result/{proj_id}", summary="查询训练报告")
-async def delete_project_train_list(ids: IdList = Depends(), auth: Auth = Depends(AllUserAuth())):
+async def get_result(train_id:int, auth: Auth = Depends(AllUserAuth())):
-    await crud.ProjectTrainDal(auth.db).delete_datas(ids=ids.ids, v_soft=False)
+    result = await crud.ProjectTrainDal(auth.db).get_result(train_id)
-    return SuccessResponse("删除成功")
+    return SuccessResponse(data=result)
@app.put("/project/train/{data_id}", summary="更新项目巡逻片信息", tags=["项目巡逻片信息"])
 async def put_project_train(data_id: int, data: schemas.ProjectTrain, auth: Auth = Depends(AllUserAuth())):
    return SuccessResponse(await crud.ProjectTrainDal(auth.db).put_data(data_id, data))
@app.get("/project/train/{data_id}", summary="获取项目巡逻片信息信息", tags=["项目巡逻片信息"])
 async def get_project_train(data_id: int, db: AsyncSession = Depends(db_getter)):
    schema = schemas.ProjectTrainSimpleOut
    return SuccessResponse(await crud.ProjectTrainDal(db).get_data(data_id, v_schema=schema))
--- a/core/event.py
+++ b/core/event.py
@@ -6,7 +6,7 @@
 # @IDE            : PyCharm
 # @desc           : 全局事件
-
+import torch
 from fastapi import FastAPI
 from motor.motor_asyncio import AsyncIOMotorClient
 from application.settings import REDIS_DB_URL, MONGO_DB_URL, MONGO_DB_NAME, EVENTS
@@ -68,6 +68,9 @@ async def connect_redis(app: FastAPI, status: bool):
            response = await rd.ping()
            if response:
                print("Redis 连接成功")
                # 数据初始化
                is_gpu = torch.cuda.is_available()
                rd.set('is_gpu', str(is_gpu))
            else:
                print("Redis 连接失败")
        except AuthenticationError as e:
--- a/core/websocket_app.py
+++ b/core/websocket_app.py
@@ -0,0 +1,30 @@
 from fastapi import WebSocket
 from fastapi import FastAPI
 from starlette.websockets import WebSocketState
 from utils.websocket_server import room_manager
 def websocket_config(app: FastAPI):
    @app.websocket("/{room}")
    async def websocket_room(websocket: WebSocket, room: str):
        """
        websocket 房间管理
        :param websocket:
        :param room:
        :return:
        """
        await websocket.accept()
        await room_manager.add_to_room(room, websocket)
        try:
            while True:
                data = await websocket.receive_text()
                await room_manager.broadcast_to_room(room, data, exclude_websocket=websocket)
        except Exception as e:
            if websocket.client_state != WebSocketState.DISCONNECTED:
                await websocket.close(code=1000)
        finally:
            await room_manager.remove_from_room(room, websocket)
            if websocket.client_state != WebSocketState.DISCONNECTED:
                await websocket.close(code=1001)
--- a/deep_sort/init.py
+++ b/deep_sort/init.py
@@ -0,0 +1,19 @@
 from .deep_sort import DeepSort
 __all__ = ['DeepSort', 'build_tracker']
 def build_tracker(cfg, use_cuda):
    if cfg.USE_FASTREID:
        return DeepSort(model_path=cfg.FASTREID.CHECKPOINT, model_config=cfg.FASTREID.CFG,
                        max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=use_cuda)
    else:
        return DeepSort(model_path=cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=use_cuda)
--- a/deep_sort/configs/deep_sort.yaml
+++ b/deep_sort/configs/deep_sort.yaml
@@ -0,0 +1,10 @@
 DEEPSORT:
  REID_CKPT: "./deep_sort/deep/checkpoint/ckpt.t7"
  MAX_DIST: 0.2
  MIN_CONFIDENCE: 0.5
  NMS_MAX_OVERLAP: 0.5
  MAX_IOU_DISTANCE: 0.7
  MAX_AGE: 70
  N_INIT: 3
  NN_BUDGET: 100
--- a/deep_sort/configs/fastreid.yaml
+++ b/deep_sort/configs/fastreid.yaml
@@ -0,0 +1,3 @@
 FASTREID:
  CFG: "thirdparty/fast-reid/configs/Market1501/bagtricks_R50.yml"
  CHECKPOINT: "deep_sort/deep/checkpoint/market_bot_R50.pth"
--- a/deep_sort/configs/mask_rcnn.yaml
+++ b/deep_sort/configs/mask_rcnn.yaml
@@ -0,0 +1,6 @@
 MASKRCNN:
  LABEL: "./coco_classes.json"
  WEIGHT: "./detector/Mask_RCNN/save_weights/maskrcnn_resnet50_fpn_coco.pth"
  NUM_CLASSES: 90
  BOX_THRESH: 0.5
--- a/deep_sort/configs/mmdet.yaml
+++ b/deep_sort/configs/mmdet.yaml
@@ -0,0 +1,5 @@
 MMDET:
  CFG: "thirdparty/mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py"
  CHECKPOINT: "detector/MMDet/weight/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth"
  SCORE_THRESH: 0.5
--- a/deep_sort/deep/GETTING_STARTED.md
+++ b/deep_sort/deep/GETTING_STARTED.md
@@ -0,0 +1,82 @@
 In deepsort algorithm, appearance feature extraction network used to extract features from **image_crops** for matching purpose.The original model used in paper is in `model.py`, and its parameter here [ckpt.t7](https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6). This repository also provides a `resnet.py` script and its pre-training weights on Imagenet here.
 ```
 # resnet18
 https://download.pytorch.org/models/resnet18-5c106cde.pth
 # resnet34    
 https://download.pytorch.org/models/resnet34-333f7ec4.pth
 # resnet50
 https://download.pytorch.org/models/resnet50-19c8e357.pth
 # resnext50_32x4d
 https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth
 ```
 ## Dataset PrePare
 To train the model, first you need download [Market1501](http://www.liangzheng.com.cn/Project/project_reid.html) dataset or [Mars](http://www.liangzheng.com.cn/Project/project_mars.html) dataset.  
 If you want to train on your **own dataset**, assuming you have already downloaded the dataset.The dataset should be arranged in the following way.
 ```
 ├── dataset_root: The root dir of the dataset.
  ├── class1: Category 1 is located in the folder dir.
   ├── xxx1.jpg: Image belonging to category 1.
   ├── xxx2.jpg: Image belonging to category 1.
  ├── class2: Category 2 is located in the folder dir.
   ├── xxx3.jpg: Image belonging to category 2.
   ├── xxx4.jpg: Image belonging to category 2.
  ├── class3: Category 3 is located in the folder dir.
  ...
  ...
 ```
 ## Training the RE-ID model
 Assuming you have already prepare the dataset. Then you can use the following command to start your training progress.
 #### training on a single GPU
 ```python
 usage: train.py [--data-dir]
                [--epochs]
                [--batch_size]
                [--lr]
                [--lrf]
                [--weights]
                [--freeze-layers]
                [--gpu_id]
 # default use cuda:0, use Net in `model.py`
 python train.py --data-dir [dataset/root/path] --weights [(optional)pre-train/weight/path]
 # you can use `--freeze-layers` option to freeze full convolutional layer parameters except fc layers parameters
 python train.py --data-dir [dataset/root/path] --weights [(optional)pre-train/weight/path] --freeze-layers
 ```
 #### training on multiple GPU
 ```python
 usage: train_multiGPU.py [--data-dir]
                         [--epochs]
                         [--batch_size]
                         [--lr]
                         [--lrf]
                         [--syncBN]
                         [--weights]
                         [--freeze-layers]
                         # not change the following parameters, the system will automatically assignment
                         [--device]
                         [--world_size]
                         [--dist_url]
 # default use cuda:0, cuda:1, cuda:2, cuda:3, use resnet18 in `resnet.py`
 CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 train_multiGPU.py --data-dir [dataset/root/path] --weights [(optional)pre-train/weight/path] 
 # you can use `--freeze-layers` option to freeze full convolutional layer parameters except fc layers parameters
 CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 train_multiGPU.py --data-dir [dataset/root/path] --weights [(optional)pre-train/weight/path] --freeze-layers
 ```
 An example of training progress is as follows:
 ![train.jpg](./train.jpg)
 The last, you can evaluate it using [test.py](deep_sort/deep/test.py) and [evaluate.py](deep_sort/deep/evalute.py).
--- a/deep_sort/deep/init.py
+++ b/deep_sort/deep/init.py
--- a/deep_sort/deep/checkpoint/.gitkeep
+++ b/deep_sort/deep/checkpoint/.gitkeep
--- a/deep_sort/deep/checkpoint/ckpt.t7
+++ b/deep_sort/deep/checkpoint/ckpt.t7
--- a/deep_sort/deep/datasets.py
+++ b/deep_sort/deep/datasets.py
@@ -0,0 +1,92 @@
 import json
 import os
 import random
 import cv2
 from PIL import Image
 import torch
 from torch.utils.data import Dataset
 import matplotlib.pyplot as plt
 class ClsDataset(Dataset):
    def __init__(self, images_path, images_labels, transform=None):
        self.images_path = images_path
        self.images_labels = images_labels
        self.transform = transform
    def __len__(self):
        return len(self.images_path)
    def __getitem__(self, idx):
        img = cv2.imread(self.images_path[idx])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        label = self.images_labels[idx]
        if self.transform is not None:
            img = self.transform(img)
        return img, label
    @staticmethod
    def collate_fn(batch):
        images, labels = tuple(zip(*batch))
        images = torch.stack(images, dim=0)
        labels = torch.as_tensor(labels)
        return images, labels
 def read_split_data(root, valid_rate=0.2):
    assert os.path.exists(root), 'dataset root: {} does not exist.'.format(root)
    class_names = [cls for cls in os.listdir(root) if os.path.isdir(os.path.join(root, cls))]
    class_names.sort()
    class_indices = {name: i for i, name in enumerate(class_names)}
    json_str = json.dumps({v: k for k, v in class_indices.items()}, indent=4)
    with open('class_indices.json', 'w') as f:
        f.write(json_str)
    train_images_path = []
    train_labels = []
    val_images_path = []
    val_labels = []
    per_class_num = []
    supported = ['.jpg', '.JPG', '.png', '.PNG']
    for cls in class_names:
        cls_path = os.path.join(root, cls)
        images_path = [os.path.join(cls_path, i) for i in os.listdir(cls_path)
                       if os.path.splitext(i)[-1] in supported]
        images_label = class_indices[cls]
        per_class_num.append(len(images_path))
        val_path = random.sample(images_path, int(len(images_path) * valid_rate))
        for img_path in images_path:
            if img_path in val_path:
                val_images_path.append(img_path)
                val_labels.append(images_label)
            else:
                train_images_path.append(img_path)
                train_labels.append(images_label)
    print("{} images were found in the dataset.".format(sum(per_class_num)))
    print("{} images for training.".format(len(train_images_path)))
    print("{} images for validation.".format(len(val_images_path)))
    assert len(train_images_path) > 0, "number of training images must greater than zero"
    assert len(val_images_path) > 0, "number of validation images must greater than zero"
    plot_distribution = False
    if plot_distribution:
        plt.bar(range(len(class_names)), per_class_num, align='center')
        plt.xticks(range(len(class_names)), class_names)
        for i, v in enumerate(per_class_num):
            plt.text(x=i, y=v + 5, s=str(v), ha='center')
        plt.xlabel('classes')
        plt.ylabel('numbers')
        plt.title('the distribution of dataset')
        plt.show()
    return [train_images_path, train_labels], [val_images_path, val_labels], len(class_names)
--- a/deep_sort/deep/evaluate.py
+++ b/deep_sort/deep/evaluate.py
@@ -0,0 +1,15 @@
 import torch
 features = torch.load("features.pth")
 qf = features["qf"]
 ql = features["ql"]
 gf = features["gf"]
 gl = features["gl"]
 scores = qf.mm(gf.t())
 res = scores.topk(5, dim=1)[1][:,0]
 top1correct = gl[res].eq(ql).sum().item()
 print("Acc top1:{:.3f}".format(top1correct/ql.size(0)))
--- a/deep_sort/deep/feature_extractor.py
+++ b/deep_sort/deep/feature_extractor.py
@@ -0,0 +1,93 @@
 import torch
 import torchvision.transforms as transforms
 import numpy as np
 import cv2
 import logging
 from .model import Net
 from .resnet import resnet18
 # from fastreid.config import get_cfg
 # from fastreid.engine import DefaultTrainer
 # from fastreid.utils.checkpoint import Checkpointer
 class Extractor(object):
    def __init__(self, model_path, use_cuda=True):
        self.net = Net(reid=True)
        # self.net = resnet18(reid=True)
        self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
        state_dict = torch.load(model_path, map_location=lambda storage, loc: storage)
        self.net.load_state_dict(state_dict if 'net_dict' not in state_dict else state_dict['net_dict'], strict=False)
        logger = logging.getLogger("root.tracker")
        logger.info("Loading weights from {}... Done!".format(model_path))
        self.net.to(self.device)
        self.size = (64, 128)
        self.norm = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ])
    def _preprocess(self, im_crops):
        """
        TODO:
            1. to float with scale from 0 to 1
            2. resize to (64, 128) as Market1501 dataset did
            3. concatenate to a numpy array
            3. to torch Tensor
            4. normalize
        """
        def _resize(im, size):
            return cv2.resize(im.astype(np.float32) / 255., size)
        im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops], dim=0).float()
        return im_batch
    def __call__(self, im_crops):
        im_batch = self._preprocess(im_crops)
        with torch.no_grad():
            im_batch = im_batch.to(self.device)
            features = self.net(im_batch)
        return features.cpu().numpy()
 class FastReIDExtractor(object):
    def __init__(self, model_config, model_path, use_cuda=True):
        cfg = get_cfg()
        cfg.merge_from_file(model_config)
        cfg.MODEL.BACKBONE.PRETRAIN = False
        self.net = DefaultTrainer.build_model(cfg)
        self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
        Checkpointer(self.net).load(model_path)
        logger = logging.getLogger("root.tracker")
        logger.info("Loading weights from {}... Done!".format(model_path))
        self.net.to(self.device)
        self.net.eval()
        height, width = cfg.INPUT.SIZE_TEST
        self.size = (width, height)
        self.norm = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ])
    def _preprocess(self, im_crops):
        def _resize(im, size):
            return cv2.resize(im.astype(np.float32) / 255., size)
        im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops], dim=0).float()
        return im_batch
    def __call__(self, im_crops):
        im_batch = self._preprocess(im_crops)
        with torch.no_grad():
            im_batch = im_batch.to(self.device)
            features = self.net(im_batch)
        return features.cpu().numpy()
 if __name__ == '__main__':
    img = cv2.imread("demo.jpg")[:, :, (2, 1, 0)]
    extr = Extractor("checkpoint/ckpt.t7")
    feature = extr(img)
    print(feature.shape)
--- a/deep_sort/deep/model.py
+++ b/deep_sort/deep/model.py
@@ -0,0 +1,105 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 class BasicBlock(nn.Module):
    def __init__(self, c_in, c_out, is_downsample=False):
        super(BasicBlock, self).__init__()
        self.is_downsample = is_downsample
        if is_downsample:
            self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False)
        else:
            self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(c_out)
        self.relu = nn.ReLU(True)
        self.conv2 = nn.Conv2d(c_out, c_out, 3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(c_out)
        if is_downsample:
            self.downsample = nn.Sequential(
                nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
                nn.BatchNorm2d(c_out)
            )
        elif c_in != c_out:
            self.downsample = nn.Sequential(
                nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
                nn.BatchNorm2d(c_out)
            )
            self.is_downsample = True
    def forward(self, x):
        y = self.conv1(x)
        y = self.bn1(y)
        y = self.relu(y)
        y = self.conv2(y)
        y = self.bn2(y)
        if self.is_downsample:
            x = self.downsample(x)
        return F.relu(x.add(y), True)
 def make_layers(c_in, c_out, repeat_times, is_downsample=False):
    blocks = []
    for i in range(repeat_times):
        if i == 0:
            blocks += [BasicBlock(c_in, c_out, is_downsample=is_downsample), ]
        else:
            blocks += [BasicBlock(c_out, c_out), ]
    return nn.Sequential(*blocks)
 class Net(nn.Module):
    def __init__(self, num_classes=751, reid=False):
        super(Net, self).__init__()
        # 3 128 64
        self.conv = nn.Sequential(
            nn.Conv2d(3, 64, 3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            # nn.Conv2d(32,32,3,stride=1,padding=1),
            # nn.BatchNorm2d(32),
            # nn.ReLU(inplace=True),
            nn.MaxPool2d(3, 2, padding=1),
        )
        # 32 64 32
        self.layer1 = make_layers(64, 64, 2, False)
        # 32 64 32
        self.layer2 = make_layers(64, 128, 2, True)
        # 64 32 16
        self.layer3 = make_layers(128, 256, 2, True)
        # 128 16 8
        self.layer4 = make_layers(256, 512, 2, True)
        # 256 8 4
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        # 256 1 1 
        self.reid = reid
        self.classifier = nn.Sequential(
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(256, num_classes),
        )
    def forward(self, x):
        x = self.conv(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        # B x 128
        if self.reid:
            x = x.div(x.norm(p=2, dim=1, keepdim=True))
            return x
        # classifier
        x = self.classifier(x)
        return x
 if __name__ == '__main__':
    net = Net()
    x = torch.randn(4, 3, 128, 64)
    y = net(x)
--- a/deep_sort/deep/multi_train_utils/distributed_utils.py
+++ b/deep_sort/deep/multi_train_utils/distributed_utils.py
@@ -0,0 +1,67 @@
 import os
 import torch
 import torch.distributed as dist
 def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ['RANK'])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
        print("Not using distributed mode")
        args.distributed = False
        return
    args.distributed = True
    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(args.rank, args.dist_url), flush=True)
    dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                            world_size=args.world_size, rank=args.rank)
    dist.barrier()
 def cleanup():
    dist.destroy_process_group()
 def is_dist_avail_and_initialized():
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True
 def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()
 def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()
 def is_main_process():
    return get_rank() == 0
 def reduce_value(value, average=True):
    world_size = get_world_size()
    if world_size < 2:
        return value
    with torch.no_grad():
        dist.all_reduce(value)
        if average:
            value /= world_size
        return value
--- a/deep_sort/deep/multi_train_utils/train_eval_utils.py
+++ b/deep_sort/deep/multi_train_utils/train_eval_utils.py
@@ -0,0 +1,90 @@
 import sys
 from tqdm import tqdm
 import torch
 from .distributed_utils import reduce_value, is_main_process
 def load_model(state_dict, model_state_dict, model):
    for k in state_dict:
        if k in model_state_dict:
            if state_dict[k].shape != model_state_dict[k].shape:
                print('Skip loading parameter {}, required shape {}, ' \
                      'loaded shape {}.'.format(
                    k, model_state_dict[k].shape, state_dict[k].shape))
                state_dict[k] = model_state_dict[k]
        else:
            print('Drop parameter {}.'.format(k))
    for k in model_state_dict:
        if not (k in state_dict):
            print('No param {}.'.format(k))
            state_dict[k] = model_state_dict[k]
    model.load_state_dict(state_dict, strict=False)
    return model
 def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    criterion = torch.nn.CrossEntropyLoss()
    mean_loss = torch.zeros(1).to(device)
    sum_num = torch.zeros(1).to(device)
    optimizer.zero_grad()
    if is_main_process():
        data_loader = tqdm(data_loader, file=sys.stdout)
    for idx, (images, labels) in enumerate(data_loader):
        # forward
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        # backward
        loss.backward()
        loss = reduce_value(loss, average=True)
        mean_loss = (mean_loss * idx + loss.detach()) / (idx + 1)
        pred = torch.max(outputs, dim=1)[1]
        sum_num += torch.eq(pred, labels).sum()
        if is_main_process():
            data_loader.desc = '[epoch {}] mean loss {}'.format(epoch, mean_loss.item())
        if not torch.isfinite(loss):
            print('loss is infinite, ending training')
            sys.exit(1)
        optimizer.step()
        optimizer.zero_grad()
    if device != torch.device('cpu'):
        torch.cuda.synchronize(device)
    sum_num = reduce_value(sum_num, average=False)
    return sum_num.item(), mean_loss.item()
@torch.no_grad()
 def evaluate(model, data_loader, device):
    model.eval()
    criterion = torch.nn.CrossEntropyLoss()
    test_loss = torch.zeros(1).to(device)
    sum_num = torch.zeros(1).to(device)
    if is_main_process():
        data_loader = tqdm(data_loader, file=sys.stdout)
    for idx, (inputs, labels) in enumerate(data_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss = reduce_value(loss, average=True)
        test_loss = (test_loss * idx + loss.detach()) / (idx + 1)
        pred = torch.max(outputs, dim=1)[1]
        sum_num += torch.eq(pred, labels).sum()
    if device != torch.device('cpu'):
        torch.cuda.synchronize(device)
    sum_num = reduce_value(sum_num, average=False)
    return sum_num.item(), test_loss.item()
--- a/deep_sort/deep/resnet.py
+++ b/deep_sort/deep/resnet.py
@@ -0,0 +1,173 @@
 import torch.nn as nn
 import torch
 class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channel)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channel)
        self.downsample = downsample
    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += identity
        out = self.relu(out)
        return out
 class Bottleneck(nn.Module):
    expansion = 4
    def __init__(self, in_channel, out_channel, stride=1, downsample=None,
                 groups=1, width_per_group=64):
        super(Bottleneck, self).__init__()
        width = int(out_channel * (width_per_group / 64.)) * groups
        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width, kernel_size=1,
                               stride=1, bias=False)
        self.bn1 = nn.BatchNorm2d(width)
        self.conv2 = nn.Conv2d(in_channels=width, out_channels=width, kernel_size=3,
                               stride=stride, padding=1, bias=False, groups=groups)
        self.bn2 = nn.BatchNorm2d(width)
        self.conv3 = nn.Conv2d(in_channels=width, out_channels=out_channel * self.expansion,
                               kernel_size=1, stride=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channel * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)
        out += identity
        out = self.relu(out)
        return out
 class ResNet(nn.Module):
    def __init__(self, block, blocks_num, reid=False, num_classes=1000, groups=1, width_per_group=64):
        super(ResNet, self).__init__()
        self.reid = reid
        self.in_channel = 64
        self.groups = groups
        self.width_per_group = width_per_group
        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
                               padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layers(block, 64, blocks_num[0])
        self.layer2 = self._make_layers(block, 128, blocks_num[1], stride=2)
        self.layer3 = self._make_layers(block, 256, blocks_num[2], stride=2)
        # self.layer4 = self._make_layers(block, 512, blocks_num[3], stride=1)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(256 * block.expansion, num_classes)
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
    def _make_layers(self, block, channel, block_num, stride=1):
        downsample = None
        if stride != 1 or self.in_channel != channel * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(channel * block.expansion)
            )
        layers = []
        layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride,
                            groups=self.groups, width_per_group=self.width_per_group))
        self.in_channel = channel * block.expansion
        for _ in range(1, block_num):
            layers.append(block(self.in_channel, channel, groups=self.groups, width_per_group=self.width_per_group))
        return nn.Sequential(*layers)
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        # x = self.layer4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        # B x 512
        if self.reid:
            x = x.div(x.norm(p=2, dim=1, keepdim=True))
            return x
        # classifier
        x = self.fc(x)
        return x
 def resnet18(num_classes=1000, reid=False):
    # https://download.pytorch.org/models/resnet18-5c106cde.pth
    return ResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes, reid=reid)
 def resnet34(num_classes=1000, reid=False):
    # https://download.pytorch.org/models/resnet34-333f7ec4.pth
    return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, reid=reid)
 def resnet50(num_classes=1000, reid=False):
    # https://download.pytorch.org/models/resnet50-19c8e357.pth
    return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, reid=reid)
 def resnext50_32x4d(num_classes=1000, reid=False):
    # https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth
    groups = 32
    width_per_group = 4
    return ResNet(Bottleneck, [3, 4, 6, 3], reid=reid,
                  num_classes=num_classes, groups=groups, width_per_group=width_per_group)
 if __name__ == '__main__':
    net = resnet18(reid=True)
    x = torch.randn(4, 3, 128, 64)
    y = net(x)
--- a/deep_sort/deep/test.py
+++ b/deep_sort/deep/test.py
@@ -0,0 +1,77 @@
 import torch
 import torch.backends.cudnn as cudnn
 import torchvision
 import argparse
 import os
 from model import Net
 parser = argparse.ArgumentParser(description="Train on market1501")
 parser.add_argument("--data-dir", default='data', type=str)
 parser.add_argument("--no-cuda", action="store_true")
 parser.add_argument("--gpu-id", default=0, type=int)
 args = parser.parse_args()
 # device
 device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu"
 if torch.cuda.is_available() and not args.no_cuda:
    cudnn.benchmark = True
 # data loader
 root = args.data_dir
 query_dir = os.path.join(root, "query")
 gallery_dir = os.path.join(root, "gallery")
 transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize((128, 64)),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
 ])
 queryloader = torch.utils.data.DataLoader(
    torchvision.datasets.ImageFolder(query_dir, transform=transform),
    batch_size=64, shuffle=False
 )
 galleryloader = torch.utils.data.DataLoader(
    torchvision.datas0ets.ImageFolder(gallery_dir, transform=transform),
    batch_size=64, shuffle=False
 )
 # net definition
 net = Net(reid=True)
 assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!"
 print('Loading from checkpoint/ckpt.t7')
 checkpoint = torch.load("./checkpoint/ckpt.t7")
 net_dict = checkpoint['net_dict']
 net.load_state_dict(net_dict, strict=False)
 net.eval()
 net.to(device)
 # compute features
 query_features = torch.tensor([]).float()
 query_labels = torch.tensor([]).long()
 gallery_features = torch.tensor([]).float()
 gallery_labels = torch.tensor([]).long()
 with torch.no_grad():
    for idx, (inputs, labels) in enumerate(queryloader):
        inputs = inputs.to(device)
        features = net(inputs).cpu()
        query_features = torch.cat((query_features, features), dim=0)
        query_labels = torch.cat((query_labels, labels))
    for idx, (inputs, labels) in enumerate(galleryloader):
        inputs = inputs.to(device)
        features = net(inputs).cpu()
        gallery_features = torch.cat((gallery_features, features), dim=0)
        gallery_labels = torch.cat((gallery_labels, labels))
 gallery_labels -= 2
 # save features
 features = {
    "qf": query_features,
    "ql": query_labels,
    "gf": gallery_features,
    "gl": gallery_labels
 }
 torch.save(features, "features.pth")
--- a/deep_sort/deep/train.jpg
+++ b/deep_sort/deep/train.jpg
--- a/deep_sort/deep/train.py
+++ b/deep_sort/deep/train.py
@@ -0,0 +1,151 @@
 import argparse
 import os
 import tempfile
 import math
 import warnings
 import matplotlib.pyplot as plt
 import torch
 import torchvision
 from torch.optim import lr_scheduler
 from multi_train_utils.distributed_utils import init_distributed_mode, cleanup
 from multi_train_utils.train_eval_utils import train_one_epoch, evaluate, load_model
 import torch.distributed as dist
 from datasets import ClsDataset, read_split_data
 from model import Net
 from resnet import resnet18
 # plot figure
 x_epoch = []
 record = {'train_loss': [], 'train_err': [], 'test_loss': [], 'test_err': []}
 fig = plt.figure()
 ax0 = fig.add_subplot(121, title="loss")
 ax1 = fig.add_subplot(122, title="top1_err")
 def draw_curve(epoch, train_loss, train_err, test_loss, test_err):
    global record
    record['train_loss'].append(train_loss)
    record['train_err'].append(train_err)
    record['test_loss'].append(test_loss)
    record['test_err'].append(test_err)
    x_epoch.append(epoch)
    ax0.plot(x_epoch, record['train_loss'], 'bo-', label='train')
    ax0.plot(x_epoch, record['test_loss'], 'ro-', label='val')
    ax1.plot(x_epoch, record['train_err'], 'bo-', label='train')
    ax1.plot(x_epoch, record['test_err'], 'ro-', label='val')
    if epoch == 0:
        ax0.legend()
        ax1.legend()
    fig.savefig("train.jpg")
 def main(args):
    batch_size = args.batch_size
    device = 'cuda:{}'.format(args.gpu_id) if torch.cuda.is_available() else 'cpu'
    train_info, val_info, num_classes = read_split_data(args.data_dir, valid_rate=0.2)
    train_images_path, train_labels = train_info
    val_images_path, val_labels = val_info
    transform_train = torchvision.transforms.Compose([
        torchvision.transforms.RandomCrop((128, 64), padding=4),
        torchvision.transforms.RandomHorizontalFlip(),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    transform_val = torchvision.transforms.Compose([
        torchvision.transforms.Resize((128, 64)),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    train_dataset = ClsDataset(
        images_path=train_images_path,
        images_labels=train_labels,
        transform=transform_train
    )
    val_dataset = ClsDataset(
        images_path=val_images_path,
        images_labels=val_labels,
        transform=transform_val
    )
    number_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])
    print('Using {} dataloader workers every process'.format(number_workers))
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        pin_memory=True,
        num_workers=number_workers
    )
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        pin_memory=True,
        num_workers=number_workers,
    )
    # net definition
    start_epoch = 0
    net = Net(num_classes=num_classes)
    if args.weights:
        print('Loading from ', args.weights)
        checkpoint = torch.load(args.weights, map_location='cpu')
        net_dict = checkpoint if 'net_dict' not in checkpoint else checkpoint['net_dict']
        start_epoch = checkpoint['epoch'] if 'epoch' in checkpoint else start_epoch
        net = load_model(net_dict, net.state_dict(), net)
    if args.freeze_layers:
        for name, param in net.named_parameters():
            if 'classifier' not in name:
                param.requires_grad = False
    net.to(device)
    # loss and optimizer
    pg = [p for p in net.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(pg, args.lr, momentum=0.9, weight_decay=5e-4)
    lr = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lr)
    for epoch in range(start_epoch, start_epoch + args.epochs):
        train_positive, train_loss = train_one_epoch(net, optimizer, train_loader, device, epoch)
        train_acc = train_positive / len(train_dataset)
        scheduler.step()
        test_positive, test_loss = evaluate(net, val_loader, device)
        test_acc = test_positive / len(val_dataset)
        print('[epoch {}] accuracy: {}'.format(epoch, test_acc))
        state_dict = {
            'net_dict': net.state_dict(),
            'acc': test_acc,
            'epoch': epoch
        }
        torch.save(state_dict, './checkpoint/model_{}.pth'.format(epoch))
        draw_curve(epoch, train_loss, 1 - train_acc, test_loss, 1 - test_acc)
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Train on market1501")
    parser.add_argument("--data-dir", default='data', type=str)
    parser.add_argument('--epochs', type=int, default=40)
    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument("--lr", default=0.001, type=float)
    parser.add_argument('--lrf', default=0.1, type=float)
    parser.add_argument('--weights', type=str, default='./checkpoint/resnet18.pth')
    parser.add_argument('--freeze-layers', action='store_true')
    parser.add_argument('--gpu_id', default='0', help='gpu id')
    args = parser.parse_args()
    main(args)
--- a/deep_sort/deep/train_multiGPU.py
+++ b/deep_sort/deep/train_multiGPU.py
@@ -0,0 +1,189 @@
 import argparse
 import os
 import tempfile
 import math
 import warnings
 import matplotlib.pyplot as plt
 import torch
 import torchvision
 from torch.optim import lr_scheduler
 from multi_train_utils.distributed_utils import init_distributed_mode, cleanup
 from multi_train_utils.train_eval_utils import train_one_epoch, evaluate, load_model
 import torch.distributed as dist
 from datasets import ClsDataset, read_split_data
 from resnet import resnet18
 # plot figure
 x_epoch = []
 record = {'train_loss': [], 'train_err': [], 'test_loss': [], 'test_err': []}
 fig = plt.figure()
 ax0 = fig.add_subplot(121, title="loss")
 ax1 = fig.add_subplot(122, title="top1_err")
 def draw_curve(epoch, train_loss, train_err, test_loss, test_err):
    global record
    record['train_loss'].append(train_loss)
    record['train_err'].append(train_err)
    record['test_loss'].append(test_loss)
    record['test_err'].append(test_err)
    x_epoch.append(epoch)
    ax0.plot(x_epoch, record['train_loss'], 'bo-', label='train')
    ax0.plot(x_epoch, record['test_loss'], 'ro-', label='val')
    ax1.plot(x_epoch, record['train_err'], 'bo-', label='train')
    ax1.plot(x_epoch, record['test_err'], 'ro-', label='val')
    if epoch == 0:
        ax0.legend()
        ax1.legend()
    fig.savefig("train.jpg")
 def main(args):
    init_distributed_mode(args)
    rank = args.rank
    device = torch.device(args.device)
    batch_size = args.batch_size
    weights_path = args.weights
    args.lr *= args.world_size
    checkpoint_path = ''
    if rank == 0:
        print(args)
        if os.path.exists('./checkpoint') is False:
            os.mkdir('./checkpoint')
    train_info, val_info, num_classes = read_split_data(args.data_dir, valid_rate=0.2)
    train_images_path, train_labels = train_info
    val_images_path, val_labels = val_info
    transform_train = torchvision.transforms.Compose([
        torchvision.transforms.RandomCrop((128, 64), padding=4),
        torchvision.transforms.RandomHorizontalFlip(),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    transform_val = torchvision.transforms.Compose([
        torchvision.transforms.Resize((128, 64)),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    train_dataset = ClsDataset(
        images_path=train_images_path,
        images_labels=train_labels,
        transform=transform_train
    )
    val_dataset = ClsDataset(
        images_path=val_images_path,
        images_labels=val_labels,
        transform=transform_val
    )
    train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
    val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
    train_batch_sampler = torch.utils.data.BatchSampler(train_sampler, batch_size, drop_last=True)
    number_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])
    if rank == 0:
        print('Using {} dataloader workers every process'.format(number_workers))
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_sampler=train_batch_sampler,
        pin_memory=True,
        num_workers=number_workers
    )
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        sampler=val_sampler,
        batch_size=batch_size,
        pin_memory=True,
        num_workers=number_workers,
    )
    # net definition
    start_epoch = 0
    net = resnet18(num_classes=num_classes)
    if args.weights:
        print('Loading from ', args.weights)
        checkpoint = torch.load(args.weights, map_location='cpu')
        net_dict = checkpoint if 'net_dict' not in checkpoint else checkpoint['net_dict']
        start_epoch = checkpoint['epoch'] if 'epoch' in checkpoint else start_epoch
        net = load_model(net_dict, net.state_dict(), net)
    else:
        warnings.warn("better providing pretraining weights")
        checkpoint_path = os.path.join(tempfile.gettempdir(), 'initial_weights.pth')
        if rank == 0:
            torch.save(net.state_dict(), checkpoint_path)
        dist.barrier()
        net.load_state_dict(torch.load(checkpoint_path, map_location='cpu'))
    if args.freeze_layers:
        for name, param in net.named_parameters():
            if 'fc' not in name:
                param.requires_grad = False
    else:
        if args.syncBN:
            net = torch.nn.SyncBatchNorm.convert_sync_batchnorm(net)
    net.to(device)
    net = torch.nn.parallel.DistributedDataParallel(net, device_ids=[args.gpu])
    # loss and optimizer
    pg = [p for p in net.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(pg, args.lr, momentum=0.9, weight_decay=5e-4)
    lr = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lr)
    for epoch in range(start_epoch, start_epoch + args.epochs):
        train_positive, train_loss = train_one_epoch(net, optimizer, train_loader, device, epoch)
        train_acc = train_positive / len(train_dataset)
        scheduler.step()
        test_positive, test_loss = evaluate(net, val_loader, device)
        test_acc = test_positive / len(val_dataset)
        if rank == 0:
            print('[epoch {}] accuracy: {}'.format(epoch, test_acc))
            state_dict = {
                'net_dict': net.module.state_dict(),
                'acc': test_acc,
                'epoch': epoch
            }
            torch.save(state_dict, './checkpoint/model_{}.pth'.format(epoch))
        draw_curve(epoch, train_loss, 1 - train_acc, test_loss, 1 - test_acc)
    if rank == 0:
        if os.path.exists(checkpoint_path) is True:
            os.remove(checkpoint_path)
    cleanup()
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Train on market1501")
    parser.add_argument("--data-dir", default='data', type=str)
    parser.add_argument('--epochs', type=int, default=40)
    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument("--lr", default=0.001, type=float)
    parser.add_argument('--lrf', default=0.1, type=float)
    parser.add_argument('--syncBN', type=bool, default=True)
    parser.add_argument('--weights', type=str, default='./checkpoint/resnet18.pth')
    parser.add_argument('--freeze-layers', action='store_true')
    # not change the following parameters, the system will automatically assignment
    parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0, 1 or cpu)')
    parser.add_argument('--world_size', default=4, type=int, help='number of distributed processes')
    parser.add_argument('--dist_url', default='env://', help='url used to set up distributed training')
    args = parser.parse_args()
    main(args)
--- a/deep_sort/deep_sort.py
+++ b/deep_sort/deep_sort.py
@@ -0,0 +1,121 @@
 import numpy as np
 import torch
 from .deep.feature_extractor import Extractor, FastReIDExtractor
 from .sort.nn_matching import NearestNeighborDistanceMetric
 from .sort.preprocessing import non_max_suppression
 from .sort.detection import Detection
 from .sort.tracker import Tracker
 __all__ = ['DeepSort']
 class DeepSort(object):
    def __init__(self, model_path, model_config=None, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0,
                 max_iou_distance=0.7, max_age=70, n_init=3, nn_budget=100, use_cuda=True):
        self.min_confidence = min_confidence
        self.nms_max_overlap = nms_max_overlap
        if model_config is None:
            self.extractor = Extractor(model_path, use_cuda=use_cuda)
        else:
            self.extractor = FastReIDExtractor(model_config, model_path, use_cuda=use_cuda)
        max_cosine_distance = max_dist
        metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
        self.tracker = Tracker(metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init)
    def update(self, bbox_xywh, confidences, classes, ori_img, masks=None):
        self.height, self.width = ori_img.shape[:2]
        # generate detections
        features = self._get_features(bbox_xywh, ori_img)
        bbox_tlwh = self._xywh_to_tlwh(bbox_xywh)
        detections = [Detection(bbox_tlwh[i], conf, label, features[i], None if masks is None else masks[i])
                      for i, (conf, label) in enumerate(zip(confidences, classes))
                      if conf > self.min_confidence]
        # run on non-maximum supression
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = non_max_suppression(boxes, self.nms_max_overlap, scores)
        detections = [detections[i] for i in indices]
        # update tracker
        self.tracker.predict()
        self.tracker.update(detections)
        # output bbox identities
        outputs = []
        mask_outputs = []
        for track in self.tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            box = track.to_tlwh()
            x1, y1, x2, y2 = self._tlwh_to_xyxy(box)
            track_id = track.track_id
            track_cls = track.cls
            outputs.append(np.array([x1, y1, x2, y2, track_cls, track_id], dtype=np.int32))
            if track.mask is not None:
                mask_outputs.append(track.mask)
        if len(outputs) > 0:
            outputs = np.stack(outputs, axis=0)
        return outputs, mask_outputs
    """
    TODO:
        Convert bbox from xc_yc_w_h to xtl_ytl_w_h
    Thanks JieChen91@github.com for reporting this bug!
    """
    @staticmethod
    def _xywh_to_tlwh(bbox_xywh):
        if isinstance(bbox_xywh, np.ndarray):
            bbox_tlwh = bbox_xywh.copy()
        elif isinstance(bbox_xywh, torch.Tensor):
            bbox_tlwh = bbox_xywh.clone()
        bbox_tlwh[:, 0] = bbox_xywh[:, 0] - bbox_xywh[:, 2] / 2.
        bbox_tlwh[:, 1] = bbox_xywh[:, 1] - bbox_xywh[:, 3] / 2.
        return bbox_tlwh
    def _xywh_to_xyxy(self, bbox_xywh):
        x, y, w, h = bbox_xywh
        x1 = max(int(x - w / 2), 0)
        x2 = min(int(x + w / 2), self.width - 1)
        y1 = max(int(y - h / 2), 0)
        y2 = min(int(y + h / 2), self.height - 1)
        return x1, y1, x2, y2
    def _tlwh_to_xyxy(self, bbox_tlwh):
        """
        TODO:
            Convert bbox from xtl_ytl_w_h to xc_yc_w_h
        Thanks JieChen91@github.com for reporting this bug!
        """
        x, y, w, h = bbox_tlwh
        x1 = max(int(x), 0)
        x2 = min(int(x + w), self.width - 1)
        y1 = max(int(y), 0)
        y2 = min(int(y + h), self.height - 1)
        return x1, y1, x2, y2
    @staticmethod
    def _xyxy_to_tlwh(bbox_xyxy):
        x1, y1, x2, y2 = bbox_xyxy
        t = x1
        l = y1
        w = int(x2 - x1)
        h = int(y2 - y1)
        return t, l, w, h
    def _get_features(self, bbox_xywh, ori_img):
        im_crops = []
        for box in bbox_xywh:
            x1, y1, x2, y2 = self._xywh_to_xyxy(box)
            im = ori_img[y1:y2, x1:x2]
            im_crops.append(im)
        if im_crops:
            features = self.extractor(im_crops)
        else:
            features = np.array([])
        return features
--- a/deep_sort/sort/init.py
+++ b/deep_sort/sort/init.py
--- a/deep_sort/sort/detection.py
+++ b/deep_sort/sort/detection.py
@@ -0,0 +1,51 @@
 # vim: expandtab:ts=4:sw=4
 import numpy as np
 class Detection(object):
    """
    This class represents a bounding box detection in a single image.
    Parameters
    ----------
    tlwh : array_like
        Bounding box in format `(x, y, w, h)`.
    confidence : float
        Detector confidence score.
    feature : array_like
        A feature vector that describes the object contained in this image.
    Attributes
    ----------
    tlwh : ndarray
        Bounding box in format `(top left x, top left y, width, height)`.
    confidence : ndarray
        Detector confidence score.
    feature : ndarray | NoneType
        A feature vector that describes the object contained in this image.
    """
    def __init__(self, tlwh, confidence, label, feature, mask=None):
        self.tlwh = np.asarray(tlwh, dtype=np.float32)
        self.confidence = float(confidence)
        self.cls = int(label)
        self.feature = np.asarray(feature, dtype=np.float32)
        self.mask = mask
    def to_tlbr(self):
        """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
        `(top left, bottom right)`.
        """
        ret = self.tlwh.copy()
        ret[2:] += ret[:2]
        return ret
    def to_xyah(self):
        """Convert bounding box to format `(center x, center y, aspect ratio,
        height)`, where the aspect ratio is `width / height`.
        """
        ret = self.tlwh.copy()
        ret[:2] += ret[2:] / 2
        ret[2] /= ret[3]
        return ret
--- a/deep_sort/sort/iou_matching.py
+++ b/deep_sort/sort/iou_matching.py
@@ -0,0 +1,81 @@
 # vim: expandtab:ts=4:sw=4
 from __future__ import absolute_import
 import numpy as np
 from . import linear_assignment
 def iou(bbox, candidates):
    """Computer intersection over union.
    Parameters
    ----------
    bbox : ndarray
        A bounding box in format `(top left x, top left y, width, height)`.
    candidates : ndarray
        A matrix of candidate bounding boxes (one per row) in the same format
        as `bbox`.
    Returns
    -------
    ndarray
        The intersection over union in [0, 1] between the `bbox` and each
        candidate. A higher score means a larger fraction of the `bbox` is
        occluded by the candidate.
    """
    bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
    candidates_tl = candidates[:, :2]
    candidates_br = candidates[:, :2] + candidates[:, 2:]
    tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
               np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
    br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
               np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
    wh = np.maximum(0., br - tl)
    area_intersection = wh.prod(axis=1)
    area_bbox = bbox[2:].prod()
    area_candidates = candidates[:, 2:].prod(axis=1)
    return area_intersection / (area_bbox + area_candidates - area_intersection)
 def iou_cost(tracks, detections, track_indices=None,
             detection_indices=None):
    """An intersection over union distance metric.
    Parameters
    ----------
    tracks : List[deep_sort.track.Track]
        A list of tracks.
    detections : List[deep_sort.detection.Detection]
        A list of detections.
    track_indices : Optional[List[int]]
        A list of indices to tracks that should be matched. Defaults to
        all `tracks`.
    detection_indices : Optional[List[int]]
        A list of indices to detections that should be matched. Defaults
        to all `detections`.
    Returns
    -------
    ndarray
        Returns a cost matrix of shape
        len(track_indices), len(detection_indices) where entry (i, j) is
        `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
    """
    if track_indices is None:
        track_indices = np.arange(len(tracks))
    if detection_indices is None:
        detection_indices = np.arange(len(detections))
    cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
    for row, track_idx in enumerate(track_indices):
        if tracks[track_idx].time_since_update > 1:
            cost_matrix[row, :] = linear_assignment.INFTY_COST
            continue
        bbox = tracks[track_idx].to_tlwh()
        candidates = np.asarray([detections[i].tlwh for i in detection_indices])
        cost_matrix[row, :] = 1. - iou(bbox, candidates)
    return cost_matrix
--- a/deep_sort/sort/kalman_filter.py
+++ b/deep_sort/sort/kalman_filter.py
@@ -0,0 +1,231 @@
 # vim: expandtab:ts=4:sw=4
 import numpy as np
 import scipy.linalg
 """
 Table for the 0.95 quantile of the chi-square distribution with N degrees of
 freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
 function and used as Mahalanobis gating threshold.
 """
 chi2inv95 = {
    1: 3.8415,
    2: 5.9915,
    3: 7.8147,
    4: 9.4877,
    5: 11.070,
    6: 12.592,
    7: 14.067,
    8: 15.507,
    9: 16.919}
 class KalmanFilter(object):
    """
    A simple Kalman filter for tracking bounding boxes in image space.
    The 8-dimensional state space
        x, y, a, h, vx, vy, va, vh
    contains the bounding box center position (x, y), aspect ratio a, height h,
    and their respective velocities.
    Object motion follows a constant velocity model. The bounding box location
    (x, y, a, h) is taken as direct observation of the state space (linear
    observation model).
    """
    def __init__(self):
        ndim, dt = 4, 1.
        # Create Kalman filter model matrices.
        self._motion_mat = np.eye(2 * ndim, 2 * ndim)
        for i in range(ndim):
            self._motion_mat[i, ndim + i] = dt
        self._update_mat = np.eye(ndim, 2 * ndim)
        # Motion and observation uncertainty are chosen relative to the current
        # state estimate. These weights control the amount of uncertainty in
        # the model. This is a bit hacky.
        self._std_weight_position = 1. / 20
        self._std_weight_velocity = 1. / 160
    def initiate(self, measurement):
        """Create track from unassociated measurement.
        Parameters
        ----------
        measurement : ndarray
            Bounding box coordinates (x, y, a, h) with center position (x, y),
            aspect ratio a, and height h.
        Returns
        -------
        (ndarray, ndarray)
            Returns the mean vector (8 dimensional) and covariance matrix (8x8
            dimensional) of the new track. Unobserved velocities are initialized
            to 0 mean.
        """
        mean_pos = measurement
        mean_vel = np.zeros_like(mean_pos)
        mean = np.r_[mean_pos, mean_vel]
        std = [
            2 * self._std_weight_position * measurement[3],
            2 * self._std_weight_position * measurement[3],
            1e-2,
            2 * self._std_weight_position * measurement[3],
            10 * self._std_weight_velocity * measurement[3],
            10 * self._std_weight_velocity * measurement[3],
            1e-5,
            10 * self._std_weight_velocity * measurement[3]]
        covariance = np.diag(np.square(std))
        return mean, covariance
    def predict(self, mean, covariance):
        """Run Kalman filter prediction step.
        Parameters
        ----------
        mean : ndarray
            The 8 dimensional mean vector of the object state at the previous
            time step.
        covariance : ndarray
            The 8x8 dimensional covariance matrix of the object state at the
            previous time step.
        Returns
        -------
        (ndarray, ndarray)
            Returns the mean vector and covariance matrix of the predicted
            state. Unobserved velocities are initialized to 0 mean.
        """
        std_pos = [
            self._std_weight_position * mean[3],
            self._std_weight_position * mean[3],
            1e-2,
            self._std_weight_position * mean[3]]
        std_vel = [
            self._std_weight_velocity * mean[3],
            self._std_weight_velocity * mean[3],
            1e-5,
            self._std_weight_velocity * mean[3]]
        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
        mean = np.dot(self._motion_mat, mean)
        covariance = np.linalg.multi_dot((
            self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
        return mean, covariance
    def project(self, mean, covariance):
        """Project state distribution to measurement space.
        Parameters
        ----------
        mean : ndarray
            The state's mean vector (8 dimensional array).
        covariance : ndarray
            The state's covariance matrix (8x8 dimensional).
        Returns
        -------
        (ndarray, ndarray)
            Returns the projected mean and covariance matrix of the given state
            estimate.
        """
        std = [
            self._std_weight_position * mean[3],
            self._std_weight_position * mean[3],
            1e-1,
            self._std_weight_position * mean[3]]
        innovation_cov = np.diag(np.square(std))
        mean = np.dot(self._update_mat, mean)
        covariance = np.linalg.multi_dot((
            self._update_mat, covariance, self._update_mat.T))
        return mean, covariance + innovation_cov
    def update(self, mean, covariance, measurement):
        """Run Kalman filter correction step.
        Parameters
        ----------
        mean : ndarray
            The predicted state's mean vector (8 dimensional).
        covariance : ndarray
            The state's covariance matrix (8x8 dimensional).
        measurement : ndarray
            The 4 dimensional measurement vector (x, y, a, h), where (x, y)
            is the center position, a the aspect ratio, and h the height of the
            bounding box.
        Returns
        -------
        (ndarray, ndarray)
            Returns the measurement-corrected state distribution.
        """
        projected_mean, projected_cov = self.project(mean, covariance)
        chol_factor, lower = scipy.linalg.cho_factor(
            projected_cov, lower=True, check_finite=False)
        kalman_gain = scipy.linalg.cho_solve(
            (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
            check_finite=False).T
        innovation = measurement - projected_mean
        new_mean = mean + np.dot(innovation, kalman_gain.T)
        # new_covariance = covariance - np.linalg.multi_dot((
        #     kalman_gain, projected_cov, kalman_gain.T))
        new_covariance = covariance - np.linalg.multi_dot((
            kalman_gain, self._update_mat, covariance))
        return new_mean, new_covariance
    def gating_distance(self, mean, covariance, measurements,
                        only_position=False):
        """Compute gating distance between state distribution and measurements.
        A suitable distance threshold can be obtained from `chi2inv95`. If
        `only_position` is False, the chi-square distribution has 4 degrees of
        freedom, otherwise 2.
        Parameters
        ----------
        mean : ndarray
            Mean vector over the state distribution (8 dimensional).
        covariance : ndarray
            Covariance of the state distribution (8x8 dimensional).
        measurements : ndarray
            An Nx4 dimensional matrix of N measurements, each in
            format (x, y, a, h) where (x, y) is the bounding box center
            position, a the aspect ratio, and h the height.
        only_position : Optional[bool]
            If True, distance computation is done with respect to the bounding
            box center position only.
        Returns
        -------
        ndarray
            Returns an array of length N, where the i-th element contains the
            squared Mahalanobis distance between (mean, covariance) and
            `measurements[i]`.
        """
        mean, covariance = self.project(mean, covariance)
        if only_position:
            mean, covariance = mean[:2], covariance[:2, :2]
            measurements = measurements[:, :2]
        cholesky_factor = np.linalg.cholesky(covariance)
        d = measurements - mean
        z = scipy.linalg.solve_triangular(
            cholesky_factor, d.T, lower=True, check_finite=False,
            overwrite_b=True)
        squared_maha = np.sum(z * z, axis=0)
        return squared_maha
--- a/deep_sort/sort/linear_assignment.py
+++ b/deep_sort/sort/linear_assignment.py
@@ -0,0 +1,192 @@
 # vim: expandtab:ts=4:sw=4
 from __future__ import absolute_import
 import numpy as np
 # from sklearn.utils.linear_assignment_ import linear_assignment
 from scipy.optimize import linear_sum_assignment as linear_assignment
 from . import kalman_filter
 INFTY_COST = 1e+5
 def min_cost_matching(
        distance_metric, max_distance, tracks, detections, track_indices=None,
        detection_indices=None):
    """Solve linear assignment problem.
    Parameters
    ----------
    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
        The distance metric is given a list of tracks and detections as well as
        a list of N track indices and M detection indices. The metric should
        return the NxM dimensional cost matrix, where element (i, j) is the
        association cost between the i-th track in the given track indices and
        the j-th detection in the given detection_indices.
    max_distance : float
        Gating threshold. Associations with cost larger than this value are
        disregarded.
    tracks : List[track.Track]
        A list of predicted tracks at the current time step.
    detections : List[detection.Detection]
        A list of detections at the current time step.
    track_indices : List[int]
        List of track indices that maps rows in `cost_matrix` to tracks in
        `tracks` (see description above).
    detection_indices : List[int]
        List of detection indices that maps columns in `cost_matrix` to
        detections in `detections` (see description above).
    Returns
    -------
    (List[(int, int)], List[int], List[int])
        Returns a tuple with the following three entries:
        * A list of matched track and detection indices.
        * A list of unmatched track indices.
        * A list of unmatched detection indices.
    """
    if track_indices is None:
        track_indices = np.arange(len(tracks))
    if detection_indices is None:
        detection_indices = np.arange(len(detections))
    if len(detection_indices) == 0 or len(track_indices) == 0:
        return [], track_indices, detection_indices  # Nothing to match.
    cost_matrix = distance_metric(
        tracks, detections, track_indices, detection_indices)
    cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
    row_indices, col_indices = linear_assignment(cost_matrix)
    matches, unmatched_tracks, unmatched_detections = [], [], []
    for col, detection_idx in enumerate(detection_indices):
        if col not in col_indices:
            unmatched_detections.append(detection_idx)
    for row, track_idx in enumerate(track_indices):
        if row not in row_indices:
            unmatched_tracks.append(track_idx)
    for row, col in zip(row_indices, col_indices):
        track_idx = track_indices[row]
        detection_idx = detection_indices[col]
        if cost_matrix[row, col] > max_distance:
            unmatched_tracks.append(track_idx)
            unmatched_detections.append(detection_idx)
        else:
            matches.append((track_idx, detection_idx))
    return matches, unmatched_tracks, unmatched_detections
 def matching_cascade(
        distance_metric, max_distance, cascade_depth, tracks, detections,
        track_indices=None, detection_indices=None):
    """Run matching cascade.
    Parameters
    ----------
    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
        The distance metric is given a list of tracks and detections as well as
        a list of N track indices and M detection indices. The metric should
        return the NxM dimensional cost matrix, where element (i, j) is the
        association cost between the i-th track in the given track indices and
        the j-th detection in the given detection indices.
    max_distance : float
        Gating threshold. Associations with cost larger than this value are
        disregarded.
    cascade_depth: int
        The cascade depth, should be se to the maximum track age.
    tracks : List[track.Track]
        A list of predicted tracks at the current time step.
    detections : List[detection.Detection]
        A list of detections at the current time step.
    track_indices : Optional[List[int]]
        List of track indices that maps rows in `cost_matrix` to tracks in
        `tracks` (see description above). Defaults to all tracks.
    detection_indices : Optional[List[int]]
        List of detection indices that maps columns in `cost_matrix` to
        detections in `detections` (see description above). Defaults to all
        detections.
    Returns
    -------
    (List[(int, int)], List[int], List[int])
        Returns a tuple with the following three entries:
        * A list of matched track and detection indices.
        * A list of unmatched track indices.
        * A list of unmatched detection indices.
    """
    if track_indices is None:
        track_indices = list(range(len(tracks)))
    if detection_indices is None:
        detection_indices = list(range(len(detections)))
    unmatched_detections = detection_indices
    matches = []
    for level in range(cascade_depth):
        if len(unmatched_detections) == 0:  # No detections left
            break
        track_indices_l = [
            k for k in track_indices
            if tracks[k].time_since_update == 1 + level
        ]
        if len(track_indices_l) == 0:  # Nothing to match at this level
            continue
        matches_l, _, unmatched_detections = \
            min_cost_matching(
                distance_metric, max_distance, tracks, detections,
                track_indices_l, unmatched_detections)
        matches += matches_l
    unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
    return matches, unmatched_tracks, unmatched_detections
 def gate_cost_matrix(
        kf, cost_matrix, tracks, detections, track_indices, detection_indices,
        gated_cost=INFTY_COST, only_position=False):
    """Invalidate infeasible entries in cost matrix based on the state
    distributions obtained by Kalman filtering.
    Parameters
    ----------
    kf : The Kalman filter.
    cost_matrix : ndarray
        The NxM dimensional cost matrix, where N is the number of track indices
        and M is the number of detection indices, such that entry (i, j) is the
        association cost between `tracks[track_indices[i]]` and
        `detections[detection_indices[j]]`.
    tracks : List[track.Track]
        A list of predicted tracks at the current time step.
    detections : List[detection.Detection]
        A list of detections at the current time step.
    track_indices : List[int]
        List of track indices that maps rows in `cost_matrix` to tracks in
        `tracks` (see description above).
    detection_indices : List[int]
        List of detection indices that maps columns in `cost_matrix` to
        detections in `detections` (see description above).
    gated_cost : Optional[float]
        Entries in the cost matrix corresponding to infeasible associations are
        set this value. Defaults to a very large value.
    only_position : Optional[bool]
        If True, only the x, y position of the state distribution is considered
        during gating. Defaults to False.
    Returns
    -------
    ndarray
        Returns the modified cost matrix.
    """
    gating_dim = 2 if only_position else 4
    gating_threshold = kalman_filter.chi2inv95[gating_dim]
    measurements = np.asarray(
        [detections[i].to_xyah() for i in detection_indices])
    for row, track_idx in enumerate(track_indices):
        track = tracks[track_idx]
        gating_distance = kf.gating_distance(
            track.mean, track.covariance, measurements, only_position)
        cost_matrix[row, gating_distance > gating_threshold] = gated_cost
    return cost_matrix
--- a/deep_sort/sort/nn_matching.py
+++ b/deep_sort/sort/nn_matching.py
@@ -0,0 +1,176 @@
 # vim: expandtab:ts=4:sw=4
 import numpy as np
 def _pdist(a, b):
    """Compute pair-wise squared distance between points in `a` and `b`.
    Parameters
    ----------
    a : array_like
        An NxM matrix of N samples of dimensionality M.
    b : array_like
        An LxM matrix of L samples of dimensionality M.
    Returns
    -------
    ndarray
        Returns a matrix of size len(a), len(b) such that eleement (i, j)
        contains the squared distance between `a[i]` and `b[j]`.
    """
    a, b = np.asarray(a), np.asarray(b)
    if len(a) == 0 or len(b) == 0:
        return np.zeros((len(a), len(b)))
    a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
    r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
    r2 = np.clip(r2, 0., float(np.inf))
    return r2
 def _cosine_distance(a, b, data_is_normalized=False):
    """Compute pair-wise cosine distance between points in `a` and `b`.
    Parameters
    ----------
    a : array_like
        An NxM matrix of N samples of dimensionality M.
    b : array_like
        An LxM matrix of L samples of dimensionality M.
    data_is_normalized : Optional[bool]
        If True, assumes rows in a and b are unit length vectors.
        Otherwise, a and b are explicitly normalized to lenght 1.
    Returns
    -------
    ndarray
        Returns a matrix of size len(a), len(b) such that eleement (i, j)
        contains the squared distance between `a[i]` and `b[j]`.
    """
    if not data_is_normalized:
        a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
        b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
    return 1. - np.dot(a, b.T)
 def _nn_euclidean_distance(x, y):
    """ Helper function for nearest neighbor distance metric (Euclidean).
    Parameters
    ----------
    x : ndarray
        A matrix of N row-vectors (sample points).
    y : ndarray
        A matrix of M row-vectors (query points).
    Returns
    -------
    ndarray
        A vector of length M that contains for each entry in `y` the
        smallest Euclidean distance to a sample in `x`.
    """
    distances = _pdist(x, y)
    return np.maximum(0.0, distances.min(axis=0))
 def _nn_cosine_distance(x, y):
    """ Helper function for nearest neighbor distance metric (cosine).
    Parameters
    ----------
    x : ndarray
        A matrix of N row-vectors (sample points).
    y : ndarray
        A matrix of M row-vectors (query points).
    Returns
    -------
    ndarray
        A vector of length M that contains for each entry in `y` the
        smallest cosine distance to a sample in `x`.
    """
    distances = _cosine_distance(x, y)
    return distances.min(axis=0)
 class NearestNeighborDistanceMetric(object):
    """
    A nearest neighbor distance metric that, for each target, returns
    the closest distance to any sample that has been observed so far.
    Parameters
    ----------
    metric : str
        Either "euclidean" or "cosine".
    matching_threshold: float
        The matching threshold. Samples with larger distance are considered an
        invalid match.
    budget : Optional[int]
        If not None, fix samples per class to at most this number. Removes
        the oldest samples when the budget is reached.
    Attributes
    ----------
    samples : Dict[int -> List[ndarray]]
        A dictionary that maps from target identities to the list of samples
        that have been observed so far.
    """
    def __init__(self, metric, matching_threshold, budget=None):
        if metric == "euclidean":
            self._metric = _nn_euclidean_distance
        elif metric == "cosine":
            self._metric = _nn_cosine_distance
        else:
            raise ValueError(
                "Invalid metric; must be either 'euclidean' or 'cosine'")
        self.matching_threshold = matching_threshold
        self.budget = budget
        self.samples = {}
    def partial_fit(self, features, targets, active_targets):
        """Update the distance metric with new data.
        Parameters
        ----------
        features : ndarray
            An NxM matrix of N features of dimensionality M.
        targets : ndarray
            An integer array of associated target identities.
        active_targets : List[int]
            A list of targets that are currently present in the scene.
        """
        for feature, target in zip(features, targets):
            self.samples.setdefault(target, []).append(feature)
            if self.budget is not None:
                self.samples[target] = self.samples[target][-self.budget:]
        self.samples = {k: self.samples[k] for k in active_targets}
    def distance(self, features, targets):
        """Compute distance between features and targets.
        Parameters
        ----------
        features : ndarray
            An NxM matrix of N features of dimensionality M.
        targets : List[int]
            A list of targets to match the given `features` against.
        Returns
        -------
        ndarray
            Returns a cost matrix of shape len(targets), len(features), where
            element (i, j) contains the closest squared distance between
            `targets[i]` and `features[j]`.
        """
        cost_matrix = np.zeros((len(targets), len(features)))
        for i, target in enumerate(targets):
            cost_matrix[i, :] = self._metric(self.samples[target], features)
        return cost_matrix
--- a/deep_sort/sort/preprocessing.py
+++ b/deep_sort/sort/preprocessing.py
@@ -0,0 +1,73 @@
 # vim: expandtab:ts=4:sw=4
 import numpy as np
 import cv2
 def non_max_suppression(boxes, max_bbox_overlap, scores=None):
    """Suppress overlapping detections.
    Original code from [1]_ has been adapted to include confidence score.
    .. [1] http://www.pyimagesearch.com/2015/02/16/
           faster-non-maximum-suppression-python/
    Examples
    --------
        >>> boxes = [d.roi for d in detections]
        >>> scores = [d.confidence for d in detections]
        >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
        >>> detections = [detections[i] for i in indices]
    Parameters
    ----------
    boxes : ndarray
        Array of ROIs (x, y, width, height).
    max_bbox_overlap : float
        ROIs that overlap more than this values are suppressed.
    scores : Optional[array_like]
        Detector confidence score.
    Returns
    -------
    List[int]
        Returns indices of detections that have survived non-maxima suppression.
    """
    if len(boxes) == 0:
        return []
    boxes = boxes.astype(np.float32)
    pick = []
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2] + boxes[:, 0]
    y2 = boxes[:, 3] + boxes[:, 1]
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    if scores is not None:
        idxs = np.argsort(scores)
    else:
        idxs = np.argsort(y2)
    while len(idxs) > 0:
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)
        xx1 = np.maximum(x1[i], x1[idxs[:last]])
        yy1 = np.maximum(y1[i], y1[idxs[:last]])
        xx2 = np.minimum(x2[i], x2[idxs[:last]])
        yy2 = np.minimum(y2[i], y2[idxs[:last]])
        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)
        overlap = (w * h) / (area[idxs[:last]] + area[idxs[last]] - w * h)
        idxs = np.delete(
            idxs, np.concatenate(
                ([last], np.where(overlap > max_bbox_overlap)[0])))
    return pick
--- a/deep_sort/sort/track.py
+++ b/deep_sort/sort/track.py
@@ -0,0 +1,169 @@
 # vim: expandtab:ts=4:sw=4
 class TrackState:
    """
    Enumeration type for the single target track state. Newly created tracks are
    classified as `tentative` until enough evidence has been collected. Then,
    the track state is changed to `confirmed`. Tracks that are no longer alive
    are classified as `deleted` to mark them for removal from the set of active
    tracks.
    """
    Tentative = 1
    Confirmed = 2
    Deleted = 3
 class Track:
    """
    A single target track with state space `(x, y, a, h)` and associated
    velocities, where `(x, y)` is the center of the bounding box, `a` is the
    aspect ratio and `h` is the height.
    Parameters
    ----------
    mean : ndarray
        Mean vector of the initial state distribution.
    covariance : ndarray
        Covariance matrix of the initial state distribution.
    track_id : int
        A unique track identifier.
    n_init : int
        Number of consecutive detections before the track is confirmed. The
        track state is set to `Deleted` if a miss occurs within the first
        `n_init` frames.
    max_age : int
        The maximum number of consecutive misses before the track state is
        set to `Deleted`.
    feature : Optional[ndarray]
        Feature vector of the detection this track originates from. If not None,
        this feature is added to the `features` cache.
    Attributes
    ----------
    mean : ndarray
        Mean vector of the initial state distribution.
    covariance : ndarray
        Covariance matrix of the initial state distribution.
    track_id : int
        A unique track identifier.
    hits : int
        Total number of measurement updates.
    age : int
        Total number of frames since first occurance.
    time_since_update : int
        Total number of frames since last measurement update.
    state : TrackState
        The current track state.
    features : List[ndarray]
        A cache of features. On each measurement update, the associated feature
        vector is added to this list.
    """
    def __init__(self, mean, covariance, track_id, n_init, max_age,
                 feature=None, cls=None, mask=None):
        self.mean = mean
        self.covariance = covariance
        self.track_id = track_id
        self.hits = 1
        self.age = 1
        self.time_since_update = 0
        self.state = TrackState.Tentative
        self.cls = cls
        self.mask = mask
        self.features = []
        if feature is not None:
            self.features.append(feature)
        self._n_init = n_init
        self._max_age = max_age
    def to_tlwh(self):
        """Get current position in bounding box format `(top left x, top left y,
        width, height)`.
        Returns
        -------
        ndarray
            The bounding box.
        """
        ret = self.mean[:4].copy()
        ret[2] *= ret[3]
        ret[:2] -= ret[2:] / 2
        return ret
    def to_tlbr(self):
        """Get current position in bounding box format `(min x, miny, max x,
        max y)`.
        Returns
        -------
        ndarray
            The bounding box.
        """
        ret = self.to_tlwh()
        ret[2:] = ret[:2] + ret[2:]
        return ret
    def predict(self, kf):
        """Propagate the state distribution to the current time step using a
        Kalman filter prediction step.
        Parameters
        ----------
        kf : kalman_filter.KalmanFilter
            The Kalman filter.
        """
        self.mean, self.covariance = kf.predict(self.mean, self.covariance)
        self.age += 1
        self.time_since_update += 1
    def update(self, kf, detection):
        """Perform Kalman filter measurement update step and update the feature
        cache.
        Parameters
        ----------
        kf : kalman_filter.KalmanFilter
            The Kalman filter.
        detection : Detection
            The associated detection.
        """
        self.mask = detection.mask
        self.mean, self.covariance = kf.update(
            self.mean, self.covariance, detection.to_xyah())
        self.features.append(detection.feature)
        self.hits += 1
        self.time_since_update = 0
        if self.state == TrackState.Tentative and self.hits >= self._n_init:
            self.state = TrackState.Confirmed
    def mark_missed(self):
        """Mark this track as missed (no association at the current time step).
        """
        if self.state == TrackState.Tentative:
            self.state = TrackState.Deleted
        elif self.time_since_update > self._max_age:
            self.state = TrackState.Deleted
    def is_tentative(self):
        """Returns True if this track is tentative (unconfirmed).
        """
        return self.state == TrackState.Tentative
    def is_confirmed(self):
        """Returns True if this track is confirmed."""
        return self.state == TrackState.Confirmed
    def is_deleted(self):
        """Returns True if this track is dead and should be deleted."""
        return self.state == TrackState.Deleted
--- a/deep_sort/sort/tracker.py
+++ b/deep_sort/sort/tracker.py
@@ -0,0 +1,138 @@
 # vim: expandtab:ts=4:sw=4
 from __future__ import absolute_import
 import numpy as np
 from . import kalman_filter
 from . import linear_assignment
 from . import iou_matching
 from .track import Track
 class Tracker:
    """
    This is the multi-target tracker.
    Parameters
    ----------
    metric : nn_matching.NearestNeighborDistanceMetric
        A distance metric for measurement-to-track association.
    max_age : int
        Maximum number of missed misses before a track is deleted.
    n_init : int
        Number of consecutive detections before the track is confirmed. The
        track state is set to `Deleted` if a miss occurs within the first
        `n_init` frames.
    Attributes
    ----------
    metric : nn_matching.NearestNeighborDistanceMetric
        The distance metric used for measurement to track association.
    max_age : int
        Maximum number of missed misses before a track is deleted.
    n_init : int
        Number of frames that a track remains in initialization phase.
    kf : kalman_filter.KalmanFilter
        A Kalman filter to filter target trajectories in image space.
    tracks : List[Track]
        The list of active tracks at the current time step.
    """
    def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3):
        self.metric = metric
        self.max_iou_distance = max_iou_distance
        self.max_age = max_age
        self.n_init = n_init
        self.kf = kalman_filter.KalmanFilter()
        self.tracks = []
        self._next_id = 1
    def predict(self):
        """Propagate track state distributions one time step forward.
        This function should be called once every time step, before `update`.
        """
        for track in self.tracks:
            track.predict(self.kf)
    def update(self, detections):
        """Perform measurement update and track management.
        Parameters
        ----------
        detections : List[deep_sort.detection.Detection]
            A list of detections at the current time step.
        """
        # Run matching cascade.
        matches, unmatched_tracks, unmatched_detections = \
            self._match(detections)
        # Update track set.
        for track_idx, detection_idx in matches:
            self.tracks[track_idx].update(
                self.kf, detections[detection_idx])
        for track_idx in unmatched_tracks:
            self.tracks[track_idx].mark_missed()
        for detection_idx in unmatched_detections:
            self._initiate_track(detections[detection_idx])
        self.tracks = [t for t in self.tracks if not t.is_deleted()]
        # Update distance metric.
        active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
        features, targets = [], []
        for track in self.tracks:
            if not track.is_confirmed():
                continue
            features += track.features
            targets += [track.track_id for _ in track.features]
            track.features = []
        self.metric.partial_fit(
            np.asarray(features), np.asarray(targets), active_targets)
    def _match(self, detections):
        def gated_metric(tracks, dets, track_indices, detection_indices):
            features = np.array([dets[i].feature for i in detection_indices])
            targets = np.array([tracks[i].track_id for i in track_indices])
            cost_matrix = self.metric.distance(features, targets)
            cost_matrix = linear_assignment.gate_cost_matrix(
                self.kf, cost_matrix, tracks, dets, track_indices,
                detection_indices)
            return cost_matrix
        # Split track set into confirmed and unconfirmed tracks.
        confirmed_tracks = [
            i for i, t in enumerate(self.tracks) if t.is_confirmed()]
        unconfirmed_tracks = [
            i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
        # Associate confirmed tracks using appearance features.
        matches_a, unmatched_tracks_a, unmatched_detections = \
            linear_assignment.matching_cascade(
                gated_metric, self.metric.matching_threshold, self.max_age,
                self.tracks, detections, confirmed_tracks)
        # Associate remaining tracks together with unconfirmed tracks using IOU.
        iou_track_candidates = unconfirmed_tracks + [
            k for k in unmatched_tracks_a if
            self.tracks[k].time_since_update == 1]
        unmatched_tracks_a = [
            k for k in unmatched_tracks_a if
            self.tracks[k].time_since_update != 1]
        matches_b, unmatched_tracks_b, unmatched_detections = \
            linear_assignment.min_cost_matching(
                iou_matching.iou_cost, self.max_iou_distance, self.tracks,
                detections, iou_track_candidates, unmatched_detections)
        matches = matches_a + matches_b
        unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
        return matches, unmatched_tracks, unmatched_detections
    def _initiate_track(self, detection):
        mean, covariance = self.kf.initiate(detection.to_xyah())
        self.tracks.append(Track(
            mean, covariance, self._next_id, self.n_init, self.max_age,
            detection.feature, detection.cls, detection.mask))
        self._next_id += 1
--- a/deep_sort/utils/init.py
+++ b/deep_sort/utils/init.py
@@ -0,0 +1,2 @@
 def datasets():
    return None
--- a/deep_sort/utils/asserts.py
+++ b/deep_sort/utils/asserts.py
@@ -0,0 +1,13 @@
 from os import environ
 def assert_in(file, files_to_check):
    if file not in files_to_check:
        raise AssertionError("{} does not exist in the list".format(str(file)))
    return True
 def assert_in_env(check_list: list):
    for item in check_list:
        assert_in(item, environ.keys())
    return True
--- a/deep_sort/utils/draw.py
+++ b/deep_sort/utils/draw.py
@@ -0,0 +1,51 @@
 import numpy as np
 import cv2
 palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)
 def compute_color_for_labels(label):
    """
    Simple function that adds fixed color depending on the class
    """
    color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
    return tuple(color)
 def draw_masks(image, mask, color, thresh: float = 0.7, alpha: float = 0.5):
    np_image = np.asarray(image)
    mask = mask > thresh
    color = np.asarray(color)
    img_to_draw = np.copy(np_image)
    # TODO: There might be a way to vectorize this
    img_to_draw[mask] = color
    out = np_image * (1 - alpha) + img_to_draw * alpha
    return out.astype(np.uint8)
 def draw_boxes(img, bbox, names=None, identities=None, masks=None, offset=(0, 0)):
    for i, box in enumerate(bbox):
        x1, y1, x2, y2 = [int(i) for i in box]
        x1 += offset[0]
        x2 += offset[0]
        y1 += offset[1]
        y2 += offset[1]
        # box text and bar
        id = int(identities[i]) if identities is not None else 0
        color = compute_color_for_labels(id)
        label = '{:}{:d}'.format(names[i], id)
        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
        if masks is not None:
            mask = masks[i]
            img = draw_masks(img, mask, color)
        cv2.rectangle(img, (x1, y1), (x2, y2), color, 3)
        cv2.rectangle(img, (x1, y1), (x1 + t_size[0] + 3, y1 + t_size[1] + 4), color, -1)
        cv2.putText(img, label, (x1, y1 + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2)
    return img
 if __name__ == '__main__':
    for i in range(82):
        print(compute_color_for_labels(i))
--- a/deep_sort/utils/evaluation.py
+++ b/deep_sort/utils/evaluation.py
@@ -0,0 +1,103 @@
 import os
 import numpy as np
 import copy
 import motmetrics as mm
 mm.lap.default_solver = 'lap'
 from utils.io import read_results, unzip_objs
 class Evaluator(object):
    def __init__(self, data_root, seq_name, data_type):
        self.data_root = data_root
        self.seq_name = seq_name
        self.data_type = data_type
        self.load_annotations()
        self.reset_accumulator()
    def load_annotations(self):
        assert self.data_type == 'mot'
        gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt')
        self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True)
        self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True)
    def reset_accumulator(self):
        self.acc = mm.MOTAccumulator(auto_id=True)
    def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
        # results
        trk_tlwhs = np.copy(trk_tlwhs)
        trk_ids = np.copy(trk_ids)
        # gts
        gt_objs = self.gt_frame_dict.get(frame_id, [])
        gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
        # ignore boxes
        ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
        ignore_tlwhs = unzip_objs(ignore_objs)[0]
        # remove ignored results
        keep = np.ones(len(trk_tlwhs), dtype=bool)
        iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
        if len(iou_distance) > 0:
            match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
            match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
            match_ious = iou_distance[match_is, match_js]
            match_js = np.asarray(match_js, dtype=int)
            match_js = match_js[np.logical_not(np.isnan(match_ious))]
            keep[match_js] = False
            trk_tlwhs = trk_tlwhs[keep]
            trk_ids = trk_ids[keep]
        # get distance matrix
        iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
        # acc
        self.acc.update(gt_ids, trk_ids, iou_distance)
        if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'):
            events = self.acc.last_mot_events  # only supported by https://github.com/longcw/py-motmetrics
        else:
            events = None
        return events
    def eval_file(self, filename):
        self.reset_accumulator()
        result_frame_dict = read_results(filename, self.data_type, is_gt=False)
        frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys())))
        for frame_id in frames:
            trk_objs = result_frame_dict.get(frame_id, [])
            trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
            self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
        return self.acc
    @staticmethod
    def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
        names = copy.deepcopy(names)
        if metrics is None:
            metrics = mm.metrics.motchallenge_metrics
        metrics = copy.deepcopy(metrics)
        mh = mm.metrics.create()
        summary = mh.compute_many(
            accs,
            metrics=metrics,
            names=names,
            generate_overall=True
        )
        return summary
    @staticmethod
    def save_summary(summary, filename):
        import pandas as pd
        writer = pd.ExcelWriter(filename)
        summary.to_excel(writer)
        writer.save()
--- a/deep_sort/utils/io.py
+++ b/deep_sort/utils/io.py
@@ -0,0 +1,133 @@
 import os
 from typing import Dict
 import numpy as np
 # from utils.log import get_logger
 def write_results(filename, results, data_type):
    if data_type == 'mot':
        save_format = '{frame},{id},{cls},{x1},{y1},{w},{h},-1,-1,-1,-1\n'
    elif data_type == 'kitti':
        save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
    else:
        raise ValueError(data_type)
    with open(filename, 'w') as f:
        for frame_id, tlwhs, track_ids, classes in results:
            if data_type == 'kitti':
                frame_id -= 1
            for tlwh, track_id, cls_id in zip(tlwhs, track_ids, classes):
                if track_id < 0:
                    continue
                x1, y1, w, h = tlwh
                x2, y2 = x1 + w, y1 + h
                line = save_format.format(frame=frame_id, id=track_id, cls=cls_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h)
                f.write(line)
 # def write_results(filename, results_dict: Dict, data_type: str):
 #     if not filename:
 #         return
 #     path = os.path.dirname(filename)
 #     if not os.path.exists(path):
 #         os.makedirs(path)
 #     if data_type in ('mot', 'mcmot', 'lab'):
 #         save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
 #     elif data_type == 'kitti':
 #         save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n'
 #     else:
 #         raise ValueError(data_type)
 #     with open(filename, 'w') as f:
 #         for frame_id, frame_data in results_dict.items():
 #             if data_type == 'kitti':
 #                 frame_id -= 1
 #             for tlwh, track_id in frame_data:
 #                 if track_id < 0:
 #                     continue
 #                 x1, y1, w, h = tlwh
 #                 x2, y2 = x1 + w, y1 + h
 #                 line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0)
 #                 f.write(line)
 #     logger.info('Save results to {}'.format(filename))
 def read_results(filename, data_type: str, is_gt=False, is_ignore=False):
    if data_type in ('mot', 'lab'):
        read_fun = read_mot_results
    else:
        raise ValueError('Unknown data type: {}'.format(data_type))
    return read_fun(filename, is_gt, is_ignore)
 """
 labels={'ped', ...			% 1
 'person_on_vhcl', ...	% 2
 'car', ...				% 3
 'bicycle', ...			% 4
 'mbike', ...			% 5
 'non_mot_vhcl', ...		% 6
 'static_person', ...	% 7
 'distractor', ...		% 8
 'occluder', ...			% 9
 'occluder_on_grnd', ...		%10
 'occluder_full', ...		% 11
 'reflection', ...		% 12
 'crowd' ...			% 13
 };
 """
 def read_mot_results(filename, is_gt, is_ignore):
    valid_labels = {1}
    ignore_labels = {2, 7, 8, 12}
    results_dict = dict()
    if os.path.isfile(filename):
        with open(filename, 'r') as f:
            for line in f.readlines():
                linelist = line.split(',')
                if len(linelist) < 7:
                    continue
                fid = int(linelist[0])
                if fid < 1:
                    continue
                results_dict.setdefault(fid, list())
                if is_gt:
                    if 'MOT16-' in filename or 'MOT17-' in filename:
                        label = int(float(linelist[7]))
                        mark = int(float(linelist[6]))
                        if mark == 0 or label not in valid_labels:
                            continue
                    score = 1
                elif is_ignore:
                    if 'MOT16-' in filename or 'MOT17-' in filename:
                        label = int(float(linelist[7]))
                        vis_ratio = float(linelist[8])
                        if label not in ignore_labels and vis_ratio >= 0:
                            continue
                    else:
                        continue
                    score = 1
                else:
                    score = float(linelist[6])
                tlwh = tuple(map(float, linelist[2:6]))
                target_id = int(linelist[1])
                results_dict[fid].append((tlwh, target_id, score))
    return results_dict
 def unzip_objs(objs):
    if len(objs) > 0:
        tlwhs, ids, scores = zip(*objs)
    else:
        tlwhs, ids, scores = [], [], []
    tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
    return tlwhs, ids, scores
--- a/deep_sort/utils/json_logger.py
+++ b/deep_sort/utils/json_logger.py
@@ -0,0 +1,383 @@
 """
 References:
    https://medium.com/analytics-vidhya/creating-a-custom-logging-mechanism-for-real-time-object-detection-using-tdd-4ca2cfcd0a2f
 """
 import json
 from os import makedirs
 from os.path import exists, join
 from datetime import datetime
 class JsonMeta(object):
    HOURS = 3
    MINUTES = 59
    SECONDS = 59
    PATH_TO_SAVE = 'LOGS'
    DEFAULT_FILE_NAME = 'remaining'
 class BaseJsonLogger(object):
    """
    This is the base class that returns __dict__ of its own
    it also returns the dicts of objects in the attributes that are list instances
    """
    def dic(self):
        # returns dicts of objects
        out = {}
        for k, v in self.__dict__.items():
            if hasattr(v, 'dic'):
                out[k] = v.dic()
            elif isinstance(v, list):
                out[k] = self.list(v)
            else:
                out[k] = v
        return out
    @staticmethod
    def list(values):
        # applies the dic method on items in the list
        return [v.dic() if hasattr(v, 'dic') else v for v in values]
 class Label(BaseJsonLogger):
    """
    For each bounding box there are various categories with confidences. Label class keeps track of that information.
    """
    def __init__(self, category: str, confidence: float):
        self.category = category
        self.confidence = confidence
 class Bbox(BaseJsonLogger):
    """
    This module stores the information for each frame and use them in JsonParser
    Attributes:
        labels (list): List of label module.
        top (int):
        left (int):
        width (int):
        height (int):
    Args:
        bbox_id (float):
        top (int):
        left (int):
        width (int):
        height (int):
    References:
        Check Label module for better understanding.
    """
    def __init__(self, bbox_id, top, left, width, height):
        self.labels = []
        self.bbox_id = bbox_id
        self.top = top
        self.left = left
        self.width = width
        self.height = height
    def add_label(self, category, confidence):
        # adds category and confidence only if top_k is not exceeded.
        self.labels.append(Label(category, confidence))
    def labels_full(self, value):
        return len(self.labels) == value
 class Frame(BaseJsonLogger):
    """
    This module stores the information for each frame and use them in JsonParser
    Attributes:
        timestamp (float): The elapsed time of captured frame
        frame_id (int): The frame number of the captured video
        bboxes (list of Bbox objects): Stores the list of bbox objects.
    References:
        Check Bbox class for better information
    Args:
        timestamp (float):
        frame_id (int):
    """
    def __init__(self, frame_id: int, timestamp: float = None):
        self.frame_id = frame_id
        self.timestamp = timestamp
        self.bboxes = []
    def add_bbox(self, bbox_id: int, top: int, left: int, width: int, height: int):
        bboxes_ids = [bbox.bbox_id for bbox in self.bboxes]
        if bbox_id not in bboxes_ids:
            self.bboxes.append(Bbox(bbox_id, top, left, width, height))
        else:
            raise ValueError("Frame with id: {} already has a Bbox with id: {}".format(self.frame_id, bbox_id))
    def add_label_to_bbox(self, bbox_id: int, category: str, confidence: float):
        bboxes = {bbox.id: bbox for bbox in self.bboxes}
        if bbox_id in bboxes.keys():
            res = bboxes.get(bbox_id)
            res.add_label(category, confidence)
        else:
            raise ValueError('the bbox with id: {} does not exists!'.format(bbox_id))
 class BboxToJsonLogger(BaseJsonLogger):
    """
    ُ This module is designed to automate the task of logging jsons. An example json is used
    to show the contents of json file shortly
    Example:
          {
          "video_details": {
            "frame_width": 1920,
            "frame_height": 1080,
            "frame_rate": 20,
            "video_name": "/home/gpu/codes/MSD/pedestrian_2/project/public/camera1.avi"
          },
          "frames": [
            {
              "frame_id": 329,
              "timestamp": 3365.1254
              "bboxes": [
                {
                  "labels": [
                    {
                      "category": "pedestrian",
                      "confidence": 0.9
                    }
                  ],
                  "bbox_id": 0,
                  "top": 1257,
                  "left": 138,
                  "width": 68,
                  "height": 109
                }
              ]
            }],
    Attributes:
        frames (dict): It's a dictionary that maps each frame_id to json attributes.
        video_details (dict): information about video file.
        top_k_labels (int): shows the allowed number of labels
        start_time (datetime object): we use it to automate the json output by time.
    Args:
        top_k_labels (int): shows the allowed number of labels
    """
    def __init__(self, top_k_labels: int = 1):
        self.frames = {}
        self.video_details = self.video_details = dict(frame_width=None, frame_height=None, frame_rate=None,
                                                       video_name=None)
        self.top_k_labels = top_k_labels
        self.start_time = datetime.now()
    def set_top_k(self, value):
        self.top_k_labels = value
    def frame_exists(self, frame_id: int) -> bool:
        """
        Args:
            frame_id (int):
        Returns:
            bool: true if frame_id is recognized
        """
        return frame_id in self.frames.keys()
    def add_frame(self, frame_id: int, timestamp: float = None) -> None:
        """
        Args:
            frame_id (int):
            timestamp (float): opencv captured frame time property
        Raises:
             ValueError: if frame_id would not exist in class frames attribute
        Returns:
            None
        """
        if not self.frame_exists(frame_id):
            self.frames[frame_id] = Frame(frame_id, timestamp)
        else:
            raise ValueError("Frame id: {} already exists".format(frame_id))
    def bbox_exists(self, frame_id: int, bbox_id: int) -> bool:
        """
        Args:
            frame_id:
            bbox_id:
        Returns:
            bool: if bbox exists in frame bboxes list
        """
        bboxes = []
        if self.frame_exists(frame_id=frame_id):
            bboxes = [bbox.bbox_id for bbox in self.frames[frame_id].bboxes]
        return bbox_id in bboxes
    def find_bbox(self, frame_id: int, bbox_id: int):
        """
        Args:
            frame_id:
            bbox_id:
        Returns:
            bbox_id (int):
        Raises:
            ValueError: if bbox_id does not exist in the bbox list of specific frame.
        """
        if not self.bbox_exists(frame_id, bbox_id):
            raise ValueError("frame with id: {} does not contain bbox with id: {}".format(frame_id, bbox_id))
        bboxes = {bbox.bbox_id: bbox for bbox in self.frames[frame_id].bboxes}
        return bboxes.get(bbox_id)
    def add_bbox_to_frame(self, frame_id: int, bbox_id: int, top: int, left: int, width: int, height: int) -> None:
        """
        Args:
            frame_id (int):
            bbox_id (int):
            top (int):
            left (int):
            width (int):
            height (int):
        Returns:
            None
        Raises:
            ValueError: if bbox_id already exist in frame information with frame_id
            ValueError: if frame_id does not exist in frames attribute
        """
        if self.frame_exists(frame_id):
            frame = self.frames[frame_id]
            if not self.bbox_exists(frame_id, bbox_id):
                frame.add_bbox(bbox_id, top, left, width, height)
            else:
                raise ValueError(
                    "frame with frame_id: {} already contains the bbox with id: {} ".format(frame_id, bbox_id))
        else:
            raise ValueError("frame with frame_id: {} does not exist".format(frame_id))
    def add_label_to_bbox(self, frame_id: int, bbox_id: int, category: str, confidence: float):
        """
        Args:
            frame_id:
            bbox_id:
            category:
            confidence: the confidence value returned from yolo detection
        Returns:
            None
        Raises:
            ValueError: if labels quota (top_k_labels) exceeds.
        """
        bbox = self.find_bbox(frame_id, bbox_id)
        if not bbox.labels_full(self.top_k_labels):
            bbox.add_label(category, confidence)
        else:
            raise ValueError("labels in frame_id: {}, bbox_id: {} is fulled".format(frame_id, bbox_id))
    def add_video_details(self, frame_width: int = None, frame_height: int = None, frame_rate: int = None,
                          video_name: str = None):
        self.video_details['frame_width'] = frame_width
        self.video_details['frame_height'] = frame_height
        self.video_details['frame_rate'] = frame_rate
        self.video_details['video_name'] = video_name
    def output(self):
        output = {'video_details': self.video_details}
        result = list(self.frames.values())
        output['frames'] = [item.dic() for item in result]
        return output
    def json_output(self, output_name):
        """
        Args:
            output_name:
        Returns:
            None
        Notes:
            It creates the json output with `output_name` name.
        """
        if not output_name.endswith('.json'):
            output_name += '.json'
        with open(output_name, 'w') as file:
            json.dump(self.output(), file)
        file.close()
    def set_start(self):
        self.start_time = datetime.now()
    def schedule_output_by_time(self, output_dir=JsonMeta.PATH_TO_SAVE, hours: int = 0, minutes: int = 0,
                                seconds: int = 60) -> None:
        """
        Notes:
            Creates folder and then periodically stores the jsons on that address.
        Args:
            output_dir (str): the directory where output files will be stored
            hours (int):
            minutes (int):
            seconds (int):
        Returns:
            None
        """
        end = datetime.now()
        interval = 0
        interval += abs(min([hours, JsonMeta.HOURS]) * 3600)
        interval += abs(min([minutes, JsonMeta.MINUTES]) * 60)
        interval += abs(min([seconds, JsonMeta.SECONDS]))
        diff = (end - self.start_time).seconds
        if diff > interval:
            output_name = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '.json'
            if not exists(output_dir):
                makedirs(output_dir)
            output = join(output_dir, output_name)
            self.json_output(output_name=output)
            self.frames = {}
            self.start_time = datetime.now()
    def schedule_output_by_frames(self, frames_quota, frame_counter, output_dir=JsonMeta.PATH_TO_SAVE):
        """
        saves as the number of frames quota increases higher.
        :param frames_quota:
        :param frame_counter:
        :param output_dir:
        :return:
        """
        pass
    def flush(self, output_dir):
        """
        Notes:
            We use this function to output jsons whenever possible.
            like the time that we exit the while loop of opencv.
        Args:
            output_dir:
        Returns:
            None
        """
        filename = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '-remaining.json'
        output = join(output_dir, filename)
        self.json_output(output_name=output)
--- a/deep_sort/utils/log.py
+++ b/deep_sort/utils/log.py
@@ -0,0 +1,17 @@
 import logging
 def get_logger(name='root'):
    formatter = logging.Formatter(
        # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')
        fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
    handler = logging.StreamHandler()
    handler.setFormatter(formatter)
    logger = logging.getLogger(name)
    logger.setLevel(logging.INFO)
    logger.addHandler(handler)
    return logger
--- a/deep_sort/utils/parser.py
+++ b/deep_sort/utils/parser.py
@@ -0,0 +1,38 @@
 import os
 import yaml
 from easydict import EasyDict as edict
 class YamlParser(edict):
    """
    This is yaml parser based on EasyDict.
    """
    def __init__(self, cfg_dict=None, config_file=None):
        if cfg_dict is None:
            cfg_dict = {}
        if config_file is not None:
            assert (os.path.isfile(config_file))
            with open(config_file, 'r') as fo:
                cfg_dict.update(yaml.safe_load(fo.read()))
        super(YamlParser, self).__init__(cfg_dict)
    def merge_from_file(self, config_file):
        with open(config_file, 'r') as fo:
            self.update(yaml.safe_load(fo.read()))
    def merge_from_dict(self, config_dict):
        self.update(config_dict)
 def get_config(config_file=None):
    return YamlParser(config_file=config_file)
 if __name__ == "__main__":
    cfg = YamlParser(config_file="../configs/yolov3.yaml")
    cfg.merge_from_file("../configs/deep_sort.yaml")
    import ipdb; ipdb.set_trace()
--- a/deep_sort/utils/tools.py
+++ b/deep_sort/utils/tools.py
@@ -0,0 +1,39 @@
 from functools import wraps
 from time import time
 def is_video(ext: str):
    """
    Returns true if ext exists in
    allowed_exts for video files.
    Args:
        ext:
    Returns:
    """
    allowed_exts = ('.mp4', '.webm', '.ogg', '.avi', '.wmv', '.mkv', '.3gp')
    return any((ext.endswith(x) for x in allowed_exts))
 def tik_tok(func):
    """
    keep track of time for each process.
    Args:
        func:
    Returns:
    """
    @wraps(func)
    def _time_it(*args, **kwargs):
        start = time()
        try:
            return func(*args, **kwargs)
        finally:
            end_ = time()
            print("time: {:.03f}s, fps: {:.03f}".format(end_ - start, 1 / (end_ - start)))
    return _time_it
--- a/main.py
+++ b/main.py
@@ -17,7 +17,7 @@ from starlette.middleware.cors import CORSMiddleware
 from application import settings
 from application import urls
 from starlette.staticfiles import StaticFiles  # 依赖安装：pip install aiofiles
-from core.docs import custom_api_docs
+from core.websocket_app import websocket_config
 from core.exception import register_exception
 import typer
 from scripts.initialize.initialize import InitializeData, Environment
@@ -64,7 +64,9 @@ def create_app():
    for url in urls.urlpatterns:
        app.include_router(url["ApiRouter"], prefix=url["prefix"], tags=url["tags"])
    # 配置接口文档静态资源
-    custom_api_docs(app)
+    # custom_api_docs(app)
    # 开启websocket
    websocket_config(app)
    return app
--- a/requirements.txt
+++ b/requirements.txt
--- a/utils/websocket_server.py
+++ b/utils/websocket_server.py
@@ -0,0 +1,47 @@
 from fastapi import WebSocket
 class SocketManager:
    def __init__(self):
        self.rooms = {}
    async def add_to_room(self, room: str, websocket: WebSocket):
        if room not in self.rooms:
            self.rooms[room] = []
        self.rooms[room].append(websocket)
    async def remove_from_room(self, room: str, websocket: WebSocket):
        if room in self.rooms:
            self.rooms[room].remove(websocket)
            if len(self.rooms[room]) == 0:
                del self.rooms[room]
                if room.startswith('detect_rtsp_'):
                    print()
    async def broadcast_to_room(self, room: str, message: str, exclude_websocket: WebSocket = None):
        if room in self.rooms:
            for ws in self.rooms[room]:
                if ws != exclude_websocket:
                    try:
                        await ws.send_text(message)
                    except:
                        await self.remove_from_room(room, ws)
    async def send_to_room(self, room: str, message: str):
        if room in self.rooms:
            for ws in self.rooms[room]:
                try:
                    await ws.send_text(message)
                except Exception as e:
                    print(e)
    async def send_stream_to_room(self, room: str, message):
        if room in self.rooms:
            for ws in self.rooms[room]:
                try:
                    await ws.send_bytes(message)
                except Exception as e:
                    print(e)
 room_manager = SocketManager()
--- a/yolov5/.dockerignore
+++ b/yolov5/.dockerignore
@@ -0,0 +1,222 @@
 # Repo-specific DockerIgnore -------------------------------------------------------------------------------------------
 .git
 .cache
 .idea
 runs
 output
 coco
 storage.googleapis.com
 data/samples/*
 **/results*.csv
 *.jpg
 # Neural Network weights -----------------------------------------------------------------------------------------------
 **/*.pt
 **/*.pth
 **/*.onnx
 **/*.engine
 **/*.mlmodel
 **/*.torchscript
 **/*.torchscript.pt
 **/*.tflite
 **/*.h5
 **/*.pb
 *_saved_model/
 *_web_model/
 *_openvino_model/
 # Below Copied From .gitignore -----------------------------------------------------------------------------------------
 # Below Copied From .gitignore -----------------------------------------------------------------------------------------
 # GitHub Python GitIgnore ----------------------------------------------------------------------------------------------
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 env/
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 *.egg-info/
 wandb/
 .installed.cfg
 *.egg
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 .hypothesis/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # pyenv
 .python-version
 # celery beat schedule file
 celerybeat-schedule
 # SageMath parsed files
 *.sage.py
 # dotenv
 .env
 # virtualenv
 .venv*
 venv*/
 ENV*/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore -----------------------------------------------
 # General
 .DS_Store
 .AppleDouble
 .LSOverride
 # Icon must end with two \r
 Icon
 Icon?
 # Thumbnails
 ._*
 # Files that might appear in the root of a volume
 .DocumentRevisions-V100
 .fseventsd
 .Spotlight-V100
 .TemporaryItems
 .Trashes
 .VolumeIcon.icns
 .com.apple.timemachine.donotpresent
 # Directories potentially created on remote AFP share
 .AppleDB
 .AppleDesktop
 Network Trash Folder
 Temporary Items
 .apdisk
 # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
 # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
 # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
 # User-specific stuff:
 .idea/*
 .idea/**/workspace.xml
 .idea/**/tasks.xml
 .idea/dictionaries
 .html  # Bokeh Plots
 .pg  # TensorFlow Frozen Graphs
 .avi # videos
 # Sensitive or high-churn files:
 .idea/**/dataSources/
 .idea/**/dataSources.ids
 .idea/**/dataSources.local.xml
 .idea/**/sqlDataSources.xml
 .idea/**/dynamic.xml
 .idea/**/uiDesigner.xml
 # Gradle:
 .idea/**/gradle.xml
 .idea/**/libraries
 # CMake
 cmake-build-debug/
 cmake-build-release/
 # Mongo Explorer plugin:
 .idea/**/mongoSettings.xml
 ## File-based project format:
 *.iws
 ## Plugin-specific files:
 # IntelliJ
 out/
 # mpeltonen/sbt-idea plugin
 .idea_modules/
 # JIRA plugin
 atlassian-ide-plugin.xml
 # Cursive Clojure plugin
 .idea/replstate.xml
 # Crashlytics plugin (for Android Studio and IntelliJ)
 com_crashlytics_export_strings.xml
 crashlytics.properties
 crashlytics-build.properties
 fabric.properties
--- a/yolov5/.gitattributes
+++ b/yolov5/.gitattributes
@@ -0,0 +1,2 @@
 # this drop notebooks from GitHub language stats
 *.ipynb linguist-vendored
--- a/yolov5/.gitignore
+++ b/yolov5/.gitignore
@@ -0,0 +1,258 @@
 # Repo-specific GitIgnore ----------------------------------------------------------------------------------------------
 *.jpg
 *.jpeg
 *.png
 *.bmp
 *.tif
 *.tiff
 *.heic
 *.JPG
 *.JPEG
 *.PNG
 *.BMP
 *.TIF
 *.TIFF
 *.HEIC
 *.mp4
 *.mov
 *.MOV
 *.avi
 *.data
 *.json
 *.cfg
 !setup.cfg
 !cfg/yolov3*.cfg
 storage.googleapis.com
 runs/*
 data/*
 data/images/*
 !data/*.yaml
 !data/hyps
 !data/scripts
 !data/images
 !data/images/zidane.jpg
 !data/images/bus.jpg
 !data/*.sh
 results*.csv
 # Datasets -------------------------------------------------------------------------------------------------------------
 coco/
 coco128/
 VOC/
 # MATLAB GitIgnore -----------------------------------------------------------------------------------------------------
 *.m~
 *.mat
 !targets*.mat
 # Neural Network weights -----------------------------------------------------------------------------------------------
 *.weights
 *.pt
 *.pb
 *.onnx
 *.engine
 *.mlmodel
 *.mlpackage
 *.torchscript
 *.tflite
 *.h5
 *_saved_model/
 *_web_model/
 *_openvino_model/
 *_paddle_model/
 darknet53.conv.74
 yolov3-tiny.conv.15
 # GitHub Python GitIgnore ----------------------------------------------------------------------------------------------
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 env/
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 *.egg-info/
 /wandb/
 .installed.cfg
 *.egg
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 .hypothesis/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # pyenv
 .python-version
 # celery beat schedule file
 celerybeat-schedule
 # SageMath parsed files
 *.sage.py
 # dotenv
 .env
 # virtualenv
 .venv*
 venv*/
 ENV*/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore -----------------------------------------------
 # General
 .DS_Store
 .AppleDouble
 .LSOverride
 # Icon must end with two \r
 Icon
 Icon?
 # Thumbnails
 ._*
 # Files that might appear in the root of a volume
 .DocumentRevisions-V100
 .fseventsd
 .Spotlight-V100
 .TemporaryItems
 .Trashes
 .VolumeIcon.icns
 .com.apple.timemachine.donotpresent
 # Directories potentially created on remote AFP share
 .AppleDB
 .AppleDesktop
 Network Trash Folder
 Temporary Items
 .apdisk
 # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
 # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
 # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
 # User-specific stuff:
 .idea/*
 .idea/**/workspace.xml
 .idea/**/tasks.xml
 .idea/dictionaries
 .html  # Bokeh Plots
 .pg  # TensorFlow Frozen Graphs
 .avi # videos
 # Sensitive or high-churn files:
 .idea/**/dataSources/
 .idea/**/dataSources.ids
 .idea/**/dataSources.local.xml
 .idea/**/sqlDataSources.xml
 .idea/**/dynamic.xml
 .idea/**/uiDesigner.xml
 # Gradle:
 .idea/**/gradle.xml
 .idea/**/libraries
 # CMake
 cmake-build-debug/
 cmake-build-release/
 # Mongo Explorer plugin:
 .idea/**/mongoSettings.xml
 ## File-based project format:
 *.iws
 ## Plugin-specific files:
 # IntelliJ
 out/
 # mpeltonen/sbt-idea plugin
 .idea_modules/
 # JIRA plugin
 atlassian-ide-plugin.xml
 # Cursive Clojure plugin
 .idea/replstate.xml
 # Crashlytics plugin (for Android Studio and IntelliJ)
 com_crashlytics_export_strings.xml
 crashlytics.properties
 crashlytics-build.properties
 fabric.properties
--- a/yolov5/CITATION.cff
+++ b/yolov5/CITATION.cff
@@ -0,0 +1,14 @@
 cff-version: 1.2.0
 preferred-citation:
  type: software
  message: If you use YOLOv5, please cite it as below.
  authors:
  - family-names: Jocher
    given-names: Glenn
    orcid: "https://orcid.org/0000-0001-5950-6979"
  title: "YOLOv5 by Ultralytics"
  version: 7.0
  doi: 10.5281/zenodo.3908559
  date-released: 2020-5-29
  license: AGPL-3.0
  url: "https://github.com/ultralytics/yolov5"
--- a/yolov5/CONTRIBUTING.md
+++ b/yolov5/CONTRIBUTING.md
@@ -0,0 +1,76 @@
 ## Contributing to YOLOv5 🚀
 We love your input! We want to make contributing to YOLOv5 as easy and transparent as possible, whether it's:
 - Reporting a bug
 - Discussing the current state of the code
 - Submitting a fix
 - Proposing a new feature
 - Becoming a maintainer
 YOLOv5 works so well due to our combined community effort, and for every small improvement you contribute you will be helping push the frontiers of what's possible in AI 😃!
 ## Submitting a Pull Request (PR) 🛠️
 Submitting a PR is easy! This example shows how to submit a PR for updating `requirements.txt` in 4 steps:
 ### 1. Select File to Update
 Select `requirements.txt` to update by clicking on it in GitHub.
 <p align="center"><img width="800" alt="PR_step1" src="https://user-images.githubusercontent.com/26833433/122260847-08be2600-ced4-11eb-828b-8287ace4136c.png"></p>
 ### 2. Click 'Edit this file'
 The button is in the top-right corner.
 <p align="center"><img width="800" alt="PR_step2" src="https://user-images.githubusercontent.com/26833433/122260844-06f46280-ced4-11eb-9eec-b8a24be519ca.png"></p>
 ### 3. Make Changes
 Change the `matplotlib` version from `3.2.2` to `3.3`.
 <p align="center"><img width="800" alt="PR_step3" src="https://user-images.githubusercontent.com/26833433/122260853-0a87e980-ced4-11eb-9fd2-3650fb6e0842.png"></p>
 ### 4. Preview Changes and Submit PR
 Click on the **Preview changes** tab to verify your updates. At the bottom of the screen select 'Create a **new branch** for this commit', assign your branch a descriptive name such as `fix/matplotlib_version` and click the green **Propose changes** button. All done, your PR is now submitted to YOLOv5 for review and approval 😃!
 <p align="center"><img width="800" alt="PR_step4" src="https://user-images.githubusercontent.com/26833433/122260856-0b208000-ced4-11eb-8e8e-77b6151cbcc3.png"></p>
 ### PR recommendations
 To allow your work to be integrated as seamlessly as possible, we advise you to:
 - ✅ Verify your PR is **up-to-date** with `ultralytics/yolov5` `master` branch. If your PR is behind you can update your code by clicking the 'Update branch' button or by running `git pull` and `git merge master` locally.
 <p align="center"><img width="751" alt="Screenshot 2022-08-29 at 22 47 15" src="https://user-images.githubusercontent.com/26833433/187295893-50ed9f44-b2c9-4138-a614-de69bd1753d7.png"></p>
 - ✅ Verify all YOLOv5 Continuous Integration (CI) **checks are passing**.
 <p align="center"><img width="751" alt="Screenshot 2022-08-29 at 22 47 03" src="https://user-images.githubusercontent.com/26833433/187296922-545c5498-f64a-4d8c-8300-5fa764360da6.png"></p>
 - ✅ Reduce changes to the absolute **minimum** required for your bug fix or feature addition. _"It is not daily increase but daily decrease, hack away the unessential. The closer to the source, the less wastage there is."_ — Bruce Lee
 ## Submitting a Bug Report 🐛
 If you spot a problem with YOLOv5 please submit a Bug Report!
 For us to start investigating a possible problem we need to be able to reproduce it ourselves first. We've created a few short guidelines below to help users provide what we need to get started.
 When asking a question, people will be better able to provide help if you provide **code** that they can easily understand and use to **reproduce** the problem. This is referred to by community members as creating a [minimum reproducible example](https://docs.ultralytics.com/help/minimum_reproducible_example/). Your code that reproduces the problem should be:
 - ✅ **Minimal** – Use as little code as possible that still produces the same problem
 - ✅ **Complete** – Provide **all** parts someone else needs to reproduce your problem in the question itself
 - ✅ **Reproducible** – Test the code you're about to provide to make sure it reproduces the problem
 In addition to the above requirements, for [Ultralytics](https://www.ultralytics.com/) to provide assistance your code should be:
 - ✅ **Current** – Verify that your code is up-to-date with the current GitHub [master](https://github.com/ultralytics/yolov5/tree/master), and if necessary `git pull` or `git clone` a new copy to ensure your problem has not already been resolved by previous commits.
 - ✅ **Unmodified** – Your problem must be reproducible without any modifications to the codebase in this repository. [Ultralytics](https://www.ultralytics.com/) does not provide support for custom code ⚠️.
 If you believe your problem meets all of the above criteria, please close this issue and raise a new one using the 🐛 **Bug Report** [template](https://github.com/ultralytics/yolov5/issues/new/choose) and provide a [minimum reproducible example](https://docs.ultralytics.com/help/minimum_reproducible_example/) to help us better understand and diagnose your problem.
 ## License
 By contributing, you agree that your contributions will be licensed under the [AGPL-3.0 license](https://choosealicense.com/licenses/agpl-3.0/)
--- a/yolov5/LICENSE
+++ b/yolov5/LICENSE
@@ -0,0 +1,661 @@
                    GNU AFFERO GENERAL PUBLIC LICENSE
                       Version 3, 19 November 2007
 Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.
                            Preamble
  The GNU Affero General Public License is a free, copyleft license for
 software and other kinds of works, specifically designed to ensure
 cooperation with the community in the case of network server software.
  The licenses for most software and other practical works are designed
 to take away your freedom to share and change the works.  By contrast,
 our General Public Licenses are intended to guarantee your freedom to
 share and change all versions of a program--to make sure it remains free
 software for all its users.
  When we speak of free software, we are referring to freedom, not
 price.  Our General Public Licenses are designed to make sure that you
 have the freedom to distribute copies of free software (and charge for
 them if you wish), that you receive source code or can get it if you
 want it, that you can change the software or use pieces of it in new
 free programs, and that you know you can do these things.
  Developers that use our General Public Licenses protect your rights
 with two steps: (1) assert copyright on the software, and (2) offer
 you this License which gives you legal permission to copy, distribute
 and/or modify the software.
  A secondary benefit of defending all users' freedom is that
 improvements made in alternate versions of the program, if they
 receive widespread use, become available for other developers to
 incorporate.  Many developers of free software are heartened and
 encouraged by the resulting cooperation.  However, in the case of
 software used on network servers, this result may fail to come about.
 The GNU General Public License permits making a modified version and
 letting the public access it on a server without ever releasing its
 source code to the public.
  The GNU Affero General Public License is designed specifically to
 ensure that, in such cases, the modified source code becomes available
 to the community.  It requires the operator of a network server to
 provide the source code of the modified version running there to the
 users of that server.  Therefore, public use of a modified version, on
 a publicly accessible server, gives the public access to the source
 code of the modified version.
  An older license, called the Affero General Public License and
 published by Affero, was designed to accomplish similar goals.  This is
 a different license, not a version of the Affero GPL, but Affero has
 released a new version of the Affero GPL which permits relicensing under
 this license.
  The precise terms and conditions for copying, distribution and
 modification follow.
                       TERMS AND CONDITIONS
  0. Definitions.
  "This License" refers to version 3 of the GNU Affero General Public License.
  "Copyright" also means copyright-like laws that apply to other kinds of
 works, such as semiconductor masks.
  "The Program" refers to any copyrightable work licensed under this
 License.  Each licensee is addressed as "you".  "Licensees" and
 "recipients" may be individuals or organizations.
  To "modify" a work means to copy from or adapt all or part of the work
 in a fashion requiring copyright permission, other than the making of an
 exact copy.  The resulting work is called a "modified version" of the
 earlier work or a work "based on" the earlier work.
  A "covered work" means either the unmodified Program or a work based
 on the Program.
  To "propagate" a work means to do anything with it that, without
 permission, would make you directly or secondarily liable for
 infringement under applicable copyright law, except executing it on a
 computer or modifying a private copy.  Propagation includes copying,
 distribution (with or without modification), making available to the
 public, and in some countries other activities as well.
  To "convey" a work means any kind of propagation that enables other
 parties to make or receive copies.  Mere interaction with a user through
 a computer network, with no transfer of a copy, is not conveying.
  An interactive user interface displays "Appropriate Legal Notices"
 to the extent that it includes a convenient and prominently visible
 feature that (1) displays an appropriate copyright notice, and (2)
 tells the user that there is no warranty for the work (except to the
 extent that warranties are provided), that licensees may convey the
 work under this License, and how to view a copy of this License.  If
 the interface presents a list of user commands or options, such as a
 menu, a prominent item in the list meets this criterion.
  1. Source Code.
  The "source code" for a work means the preferred form of the work
 for making modifications to it.  "Object code" means any non-source
 form of a work.
  A "Standard Interface" means an interface that either is an official
 standard defined by a recognized standards body, or, in the case of
 interfaces specified for a particular programming language, one that
 is widely used among developers working in that language.
  The "System Libraries" of an executable work include anything, other
 than the work as a whole, that (a) is included in the normal form of
 packaging a Major Component, but which is not part of that Major
 Component, and (b) serves only to enable use of the work with that
 Major Component, or to implement a Standard Interface for which an
 implementation is available to the public in source code form.  A
 "Major Component", in this context, means a major essential component
 (kernel, window system, and so on) of the specific operating system
 (if any) on which the executable work runs, or a compiler used to
 produce the work, or an object code interpreter used to run it.
  The "Corresponding Source" for a work in object code form means all
 the source code needed to generate, install, and (for an executable
 work) run the object code and to modify the work, including scripts to
 control those activities.  However, it does not include the work's
 System Libraries, or general-purpose tools or generally available free
 programs which are used unmodified in performing those activities but
 which are not part of the work.  For example, Corresponding Source
 includes interface definition files associated with source files for
 the work, and the source code for shared libraries and dynamically
 linked subprograms that the work is specifically designed to require,
 such as by intimate data communication or control flow between those
 subprograms and other parts of the work.
  The Corresponding Source need not include anything that users
 can regenerate automatically from other parts of the Corresponding
 Source.
  The Corresponding Source for a work in source code form is that
 same work.
  2. Basic Permissions.
  All rights granted under this License are granted for the term of
 copyright on the Program, and are irrevocable provided the stated
 conditions are met.  This License explicitly affirms your unlimited
 permission to run the unmodified Program.  The output from running a
 covered work is covered by this License only if the output, given its
 content, constitutes a covered work.  This License acknowledges your
 rights of fair use or other equivalent, as provided by copyright law.
  You may make, run and propagate covered works that you do not
 convey, without conditions so long as your license otherwise remains
 in force.  You may convey covered works to others for the sole purpose
 of having them make modifications exclusively for you, or provide you
 with facilities for running those works, provided that you comply with
 the terms of this License in conveying all material for which you do
 not control copyright.  Those thus making or running the covered works
 for you must do so exclusively on your behalf, under your direction
 and control, on terms that prohibit them from making any copies of
 your copyrighted material outside their relationship with you.
  Conveying under any other circumstances is permitted solely under
 the conditions stated below.  Sublicensing is not allowed; section 10
 makes it unnecessary.
  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
  No covered work shall be deemed part of an effective technological
 measure under any applicable law fulfilling obligations under article
 11 of the WIPO copyright treaty adopted on 20 December 1996, or
 similar laws prohibiting or restricting circumvention of such
 measures.
  When you convey a covered work, you waive any legal power to forbid
 circumvention of technological measures to the extent such circumvention
 is effected by exercising rights under this License with respect to
 the covered work, and you disclaim any intention to limit operation or
 modification of the work as a means of enforcing, against the work's
 users, your or third parties' legal rights to forbid circumvention of
 technological measures.
  4. Conveying Verbatim Copies.
  You may convey verbatim copies of the Program's source code as you
 receive it, in any medium, provided that you conspicuously and
 appropriately publish on each copy an appropriate copyright notice;
 keep intact all notices stating that this License and any
 non-permissive terms added in accord with section 7 apply to the code;
 keep intact all notices of the absence of any warranty; and give all
 recipients a copy of this License along with the Program.
  You may charge any price or no price for each copy that you convey,
 and you may offer support or warranty protection for a fee.
  5. Conveying Modified Source Versions.
  You may convey a work based on the Program, or the modifications to
 produce it from the Program, in the form of source code under the
 terms of section 4, provided that you also meet all of these conditions:
    a) The work must carry prominent notices stating that you modified
    it, and giving a relevant date.
    b) The work must carry prominent notices stating that it is
    released under this License and any conditions added under section
    7.  This requirement modifies the requirement in section 4 to
    "keep intact all notices".
    c) You must license the entire work, as a whole, under this
    License to anyone who comes into possession of a copy.  This
    License will therefore apply, along with any applicable section 7
    additional terms, to the whole of the work, and all its parts,
    regardless of how they are packaged.  This License gives no
    permission to license the work in any other way, but it does not
    invalidate such permission if you have separately received it.
    d) If the work has interactive user interfaces, each must display
    Appropriate Legal Notices; however, if the Program has interactive
    interfaces that do not display Appropriate Legal Notices, your
    work need not make them do so.
  A compilation of a covered work with other separate and independent
 works, which are not by their nature extensions of the covered work,
 and which are not combined with it such as to form a larger program,
 in or on a volume of a storage or distribution medium, is called an
 "aggregate" if the compilation and its resulting copyright are not
 used to limit the access or legal rights of the compilation's users
 beyond what the individual works permit.  Inclusion of a covered work
 in an aggregate does not cause this License to apply to the other
 parts of the aggregate.
  6. Conveying Non-Source Forms.
  You may convey a covered work in object code form under the terms
 of sections 4 and 5, provided that you also convey the
 machine-readable Corresponding Source under the terms of this License,
 in one of these ways:
    a) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by the
    Corresponding Source fixed on a durable physical medium
    customarily used for software interchange.
    b) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by a
    written offer, valid for at least three years and valid for as
    long as you offer spare parts or customer support for that product
    model, to give anyone who possesses the object code either (1) a
    copy of the Corresponding Source for all the software in the
    product that is covered by this License, on a durable physical
    medium customarily used for software interchange, for a price no
    more than your reasonable cost of physically performing this
    conveying of source, or (2) access to copy the
    Corresponding Source from a network server at no charge.
    c) Convey individual copies of the object code with a copy of the
    written offer to provide the Corresponding Source.  This
    alternative is allowed only occasionally and noncommercially, and
    only if you received the object code with such an offer, in accord
    with subsection 6b.
    d) Convey the object code by offering access from a designated
    place (gratis or for a charge), and offer equivalent access to the
    Corresponding Source in the same way through the same place at no
    further charge.  You need not require recipients to copy the
    Corresponding Source along with the object code.  If the place to
    copy the object code is a network server, the Corresponding Source
    may be on a different server (operated by you or a third party)
    that supports equivalent copying facilities, provided you maintain
    clear directions next to the object code saying where to find the
    Corresponding Source.  Regardless of what server hosts the
    Corresponding Source, you remain obligated to ensure that it is
    available for as long as needed to satisfy these requirements.
    e) Convey the object code using peer-to-peer transmission, provided
    you inform other peers where the object code and Corresponding
    Source of the work are being offered to the general public at no
    charge under subsection 6d.
  A separable portion of the object code, whose source code is excluded
 from the Corresponding Source as a System Library, need not be
 included in conveying the object code work.
  A "User Product" is either (1) a "consumer product", which means any
 tangible personal property which is normally used for personal, family,
 or household purposes, or (2) anything designed or sold for incorporation
 into a dwelling.  In determining whether a product is a consumer product,
 doubtful cases shall be resolved in favor of coverage.  For a particular
 product received by a particular user, "normally used" refers to a
 typical or common use of that class of product, regardless of the status
 of the particular user or of the way in which the particular user
 actually uses, or expects or is expected to use, the product.  A product
 is a consumer product regardless of whether the product has substantial
 commercial, industrial or non-consumer uses, unless such uses represent
 the only significant mode of use of the product.
  "Installation Information" for a User Product means any methods,
 procedures, authorization keys, or other information required to install
 and execute modified versions of a covered work in that User Product from
 a modified version of its Corresponding Source.  The information must
 suffice to ensure that the continued functioning of the modified object
 code is in no case prevented or interfered with solely because
 modification has been made.
  If you convey an object code work under this section in, or with, or
 specifically for use in, a User Product, and the conveying occurs as
 part of a transaction in which the right of possession and use of the
 User Product is transferred to the recipient in perpetuity or for a
 fixed term (regardless of how the transaction is characterized), the
 Corresponding Source conveyed under this section must be accompanied
 by the Installation Information.  But this requirement does not apply
 if neither you nor any third party retains the ability to install
 modified object code on the User Product (for example, the work has
 been installed in ROM).
  The requirement to provide Installation Information does not include a
 requirement to continue to provide support service, warranty, or updates
 for a work that has been modified or installed by the recipient, or for
 the User Product in which it has been modified or installed.  Access to a
 network may be denied when the modification itself materially and
 adversely affects the operation of the network or violates the rules and
 protocols for communication across the network.
  Corresponding Source conveyed, and Installation Information provided,
 in accord with this section must be in a format that is publicly
 documented (and with an implementation available to the public in
 source code form), and must require no special password or key for
 unpacking, reading or copying.
  7. Additional Terms.
  "Additional permissions" are terms that supplement the terms of this
 License by making exceptions from one or more of its conditions.
 Additional permissions that are applicable to the entire Program shall
 be treated as though they were included in this License, to the extent
 that they are valid under applicable law.  If additional permissions
 apply only to part of the Program, that part may be used separately
 under those permissions, but the entire Program remains governed by
 this License without regard to the additional permissions.
  When you convey a copy of a covered work, you may at your option
 remove any additional permissions from that copy, or from any part of
 it.  (Additional permissions may be written to require their own
 removal in certain cases when you modify the work.)  You may place
 additional permissions on material, added by you to a covered work,
 for which you have or can give appropriate copyright permission.
  Notwithstanding any other provision of this License, for material you
 add to a covered work, you may (if authorized by the copyright holders of
 that material) supplement the terms of this License with terms:
    a) Disclaiming warranty or limiting liability differently from the
    terms of sections 15 and 16 of this License; or
    b) Requiring preservation of specified reasonable legal notices or
    author attributions in that material or in the Appropriate Legal
    Notices displayed by works containing it; or
    c) Prohibiting misrepresentation of the origin of that material, or
    requiring that modified versions of such material be marked in
    reasonable ways as different from the original version; or
    d) Limiting the use for publicity purposes of names of licensors or
    authors of the material; or
    e) Declining to grant rights under trademark law for use of some
    trade names, trademarks, or service marks; or
    f) Requiring indemnification of licensors and authors of that
    material by anyone who conveys the material (or modified versions of
    it) with contractual assumptions of liability to the recipient, for
    any liability that these contractual assumptions directly impose on
    those licensors and authors.
  All other non-permissive additional terms are considered "further
 restrictions" within the meaning of section 10.  If the Program as you
 received it, or any part of it, contains a notice stating that it is
 governed by this License along with a term that is a further
 restriction, you may remove that term.  If a license document contains
 a further restriction but permits relicensing or conveying under this
 License, you may add to a covered work material governed by the terms
 of that license document, provided that the further restriction does
 not survive such relicensing or conveying.
  If you add terms to a covered work in accord with this section, you
 must place, in the relevant source files, a statement of the
 additional terms that apply to those files, or a notice indicating
 where to find the applicable terms.
  Additional terms, permissive or non-permissive, may be stated in the
 form of a separately written license, or stated as exceptions;
 the above requirements apply either way.
  8. Termination.
  You may not propagate or modify a covered work except as expressly
 provided under this License.  Any attempt otherwise to propagate or
 modify it is void, and will automatically terminate your rights under
 this License (including any patent licenses granted under the third
 paragraph of section 11).
  However, if you cease all violation of this License, then your
 license from a particular copyright holder is reinstated (a)
 provisionally, unless and until the copyright holder explicitly and
 finally terminates your license, and (b) permanently, if the copyright
 holder fails to notify you of the violation by some reasonable means
 prior to 60 days after the cessation.
  Moreover, your license from a particular copyright holder is
 reinstated permanently if the copyright holder notifies you of the
 violation by some reasonable means, this is the first time you have
 received notice of violation of this License (for any work) from that
 copyright holder, and you cure the violation prior to 30 days after
 your receipt of the notice.
  Termination of your rights under this section does not terminate the
 licenses of parties who have received copies or rights from you under
 this License.  If your rights have been terminated and not permanently
 reinstated, you do not qualify to receive new licenses for the same
 material under section 10.
  9. Acceptance Not Required for Having Copies.
  You are not required to accept this License in order to receive or
 run a copy of the Program.  Ancillary propagation of a covered work
 occurring solely as a consequence of using peer-to-peer transmission
 to receive a copy likewise does not require acceptance.  However,
 nothing other than this License grants you permission to propagate or
 modify any covered work.  These actions infringe copyright if you do
 not accept this License.  Therefore, by modifying or propagating a
 covered work, you indicate your acceptance of this License to do so.
  10. Automatic Licensing of Downstream Recipients.
  Each time you convey a covered work, the recipient automatically
 receives a license from the original licensors, to run, modify and
 propagate that work, subject to this License.  You are not responsible
 for enforcing compliance by third parties with this License.
  An "entity transaction" is a transaction transferring control of an
 organization, or substantially all assets of one, or subdividing an
 organization, or merging organizations.  If propagation of a covered
 work results from an entity transaction, each party to that
 transaction who receives a copy of the work also receives whatever
 licenses to the work the party's predecessor in interest had or could
 give under the previous paragraph, plus a right to possession of the
 Corresponding Source of the work from the predecessor in interest, if
 the predecessor has it or can get it with reasonable efforts.
  You may not impose any further restrictions on the exercise of the
 rights granted or affirmed under this License.  For example, you may
 not impose a license fee, royalty, or other charge for exercise of
 rights granted under this License, and you may not initiate litigation
 (including a cross-claim or counterclaim in a lawsuit) alleging that
 any patent claim is infringed by making, using, selling, offering for
 sale, or importing the Program or any portion of it.
  11. Patents.
  A "contributor" is a copyright holder who authorizes use under this
 License of the Program or a work on which the Program is based.  The
 work thus licensed is called the contributor's "contributor version".
  A contributor's "essential patent claims" are all patent claims
 owned or controlled by the contributor, whether already acquired or
 hereafter acquired, that would be infringed by some manner, permitted
 by this License, of making, using, or selling its contributor version,
 but do not include claims that would be infringed only as a
 consequence of further modification of the contributor version.  For
 purposes of this definition, "control" includes the right to grant
 patent sublicenses in a manner consistent with the requirements of
 this License.
  Each contributor grants you a non-exclusive, worldwide, royalty-free
 patent license under the contributor's essential patent claims, to
 make, use, sell, offer for sale, import and otherwise run, modify and
 propagate the contents of its contributor version.
  In the following three paragraphs, a "patent license" is any express
 agreement or commitment, however denominated, not to enforce a patent
 (such as an express permission to practice a patent or covenant not to
 sue for patent infringement).  To "grant" such a patent license to a
 party means to make such an agreement or commitment not to enforce a
 patent against the party.
  If you convey a covered work, knowingly relying on a patent license,
 and the Corresponding Source of the work is not available for anyone
 to copy, free of charge and under the terms of this License, through a
 publicly available network server or other readily accessible means,
 then you must either (1) cause the Corresponding Source to be so
 available, or (2) arrange to deprive yourself of the benefit of the
 patent license for this particular work, or (3) arrange, in a manner
 consistent with the requirements of this License, to extend the patent
 license to downstream recipients.  "Knowingly relying" means you have
 actual knowledge that, but for the patent license, your conveying the
 covered work in a country, or your recipient's use of the covered work
 in a country, would infringe one or more identifiable patents in that
 country that you have reason to believe are valid.
  If, pursuant to or in connection with a single transaction or
 arrangement, you convey, or propagate by procuring conveyance of, a
 covered work, and grant a patent license to some of the parties
 receiving the covered work authorizing them to use, propagate, modify
 or convey a specific copy of the covered work, then the patent license
 you grant is automatically extended to all recipients of the covered
 work and works based on it.
  A patent license is "discriminatory" if it does not include within
 the scope of its coverage, prohibits the exercise of, or is
 conditioned on the non-exercise of one or more of the rights that are
 specifically granted under this License.  You may not convey a covered
 work if you are a party to an arrangement with a third party that is
 in the business of distributing software, under which you make payment
 to the third party based on the extent of your activity of conveying
 the work, and under which the third party grants, to any of the
 parties who would receive the covered work from you, a discriminatory
 patent license (a) in connection with copies of the covered work
 conveyed by you (or copies made from those copies), or (b) primarily
 for and in connection with specific products or compilations that
 contain the covered work, unless you entered into that arrangement,
 or that patent license was granted, prior to 28 March 2007.
  Nothing in this License shall be construed as excluding or limiting
 any implied license or other defenses to infringement that may
 otherwise be available to you under applicable patent law.
  12. No Surrender of Others' Freedom.
  If conditions are imposed on you (whether by court order, agreement or
 otherwise) that contradict the conditions of this License, they do not
 excuse you from the conditions of this License.  If you cannot convey a
 covered work so as to satisfy simultaneously your obligations under this
 License and any other pertinent obligations, then as a consequence you may
 not convey it at all.  For example, if you agree to terms that obligate you
 to collect a royalty for further conveying from those to whom you convey
 the Program, the only way you could satisfy both those terms and this
 License would be to refrain entirely from conveying the Program.
  13. Remote Network Interaction; Use with the GNU General Public License.
  Notwithstanding any other provision of this License, if you modify the
 Program, your modified version must prominently offer all users
 interacting with it remotely through a computer network (if your version
 supports such interaction) an opportunity to receive the Corresponding
 Source of your version by providing access to the Corresponding Source
 from a network server at no charge, through some standard or customary
 means of facilitating copying of software.  This Corresponding Source
 shall include the Corresponding Source for any work covered by version 3
 of the GNU General Public License that is incorporated pursuant to the
 following paragraph.
  Notwithstanding any other provision of this License, you have
 permission to link or combine any covered work with a work licensed
 under version 3 of the GNU General Public License into a single
 combined work, and to convey the resulting work.  The terms of this
 License will continue to apply to the part which is the covered work,
 but the work with which it is combined will remain governed by version
 3 of the GNU General Public License.
  14. Revised Versions of this License.
  The Free Software Foundation may publish revised and/or new versions of
 the GNU Affero General Public License from time to time.  Such new versions
 will be similar in spirit to the present version, but may differ in detail to
 address new problems or concerns.
  Each version is given a distinguishing version number.  If the
 Program specifies that a certain numbered version of the GNU Affero General
 Public License "or any later version" applies to it, you have the
 option of following the terms and conditions either of that numbered
 version or of any later version published by the Free Software
 Foundation.  If the Program does not specify a version number of the
 GNU Affero General Public License, you may choose any version ever published
 by the Free Software Foundation.
  If the Program specifies that a proxy can decide which future
 versions of the GNU Affero General Public License can be used, that proxy's
 public statement of acceptance of a version permanently authorizes you
 to choose that version for the Program.
  Later license versions may give you additional or different
 permissions.  However, no additional obligations are imposed on any
 author or copyright holder as a result of your choosing to follow a
 later version.
  15. Disclaimer of Warranty.
  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
 APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
 HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
 OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
 IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
 ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
  16. Limitation of Liability.
  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
 WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
 THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
 GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
 USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
 DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
 PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
 EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
 SUCH DAMAGES.
  17. Interpretation of Sections 15 and 16.
  If the disclaimer of warranty and limitation of liability provided
 above cannot be given local legal effect according to their terms,
 reviewing courts shall apply local law that most closely approximates
 an absolute waiver of all civil liability in connection with the
 Program, unless a warranty or assumption of liability accompanies a
 copy of the Program in return for a fee.
                     END OF TERMS AND CONDITIONS
            How to Apply These Terms to Your New Programs
  If you develop a new program, and you want it to be of the greatest
 possible use to the public, the best way to achieve this is to make it
 free software which everyone can redistribute and change under these terms.
  To do so, attach the following notices to the program.  It is safest
 to attach them to the start of each source file to most effectively
 state the exclusion of warranty; and each file should have at least
 the "copyright" line and a pointer to where the full notice is found.
    <one line to give the program's name and a brief idea of what it does.>
    Copyright (C) <year>  <name of author>
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.
    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 Also add information on how to contact you by electronic and paper mail.
  If your software can interact with users remotely through a computer
 network, you should also make sure that it provides a way for users to
 get its source.  For example, if your program is a web application, its
 interface could display a "Source" link that leads users to an archive
 of the code.  There are many ways you could offer source, and different
 solutions will be better for different programs; see section 13 for the
 specific requirements.
  You should also get your employer (if you work as a programmer) or school,
 if any, to sign a "copyright disclaimer" for the program, if necessary.
 For more information on this, and how to apply and follow the GNU AGPL, see
 <https://www.gnu.org/licenses/>.
--- a/yolov5/README.md
+++ b/yolov5/README.md
@@ -0,0 +1,470 @@
 <div align="center">
  <p>
    <a href="https://www.ultralytics.com/events/yolovision" target="_blank">
      <img width="100%" src="https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/banner-yolov8.png"></a>
  </p>
 [中文](https://docs.ultralytics.com/zh) | [한국어](https://docs.ultralytics.com/ko) | [日本語](https://docs.ultralytics.com/ja) | [Русский](https://docs.ultralytics.com/ru) | [Deutsch](https://docs.ultralytics.com/de) | [Français](https://docs.ultralytics.com/fr) | [Español](https://docs.ultralytics.com/es) | [Português](https://docs.ultralytics.com/pt) | [Türkçe](https://docs.ultralytics.com/tr) | [Tiếng Việt](https://docs.ultralytics.com/vi) | [العربية](https://docs.ultralytics.com/ar)
 <div>
    <a href="https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml"><img src="https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml/badge.svg" alt="YOLOv5 CI"></a>
    <a href="https://zenodo.org/badge/latestdoi/264818686"><img src="https://zenodo.org/badge/264818686.svg" alt="YOLOv5 Citation"></a>
    <a href="https://hub.docker.com/r/ultralytics/yolov5"><img src="https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker" alt="Docker Pulls"></a>
    <a href="https://discord.com/invite/ultralytics"><img alt="Discord" src="https://img.shields.io/discord/1089800235347353640?logo=discord&logoColor=white&label=Discord&color=blue"></a> <a href="https://community.ultralytics.com/"><img alt="Ultralytics Forums" src="https://img.shields.io/discourse/users?server=https%3A%2F%2Fcommunity.ultralytics.com&logo=discourse&label=Forums&color=blue"></a> <a href="https://reddit.com/r/ultralytics"><img alt="Ultralytics Reddit" src="https://img.shields.io/reddit/subreddit-subscribers/ultralytics?style=flat&logo=reddit&logoColor=white&label=Reddit&color=blue"></a>
    <br>
    <a href="https://bit.ly/yolov5-paperspace-notebook"><img src="https://assets.paperspace.io/img/gradient-badge.svg" alt="Run on Gradient"></a>
    <a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
    <a href="https://www.kaggle.com/models/ultralytics/yolov5"><img src="https://kaggle.com/static/images/open-in-kaggle.svg" alt="Open In Kaggle"></a>
  </div>
  <br>
 YOLOv5 🚀 is the world's most loved vision AI, representing <a href="https://www.ultralytics.com/">Ultralytics</a> open-source research into future vision AI methods, incorporating lessons learned and best practices evolved over thousands of hours of research and development.
 We hope that the resources here will help you get the most out of YOLOv5. Please browse the YOLOv5 <a href="https://docs.ultralytics.com/yolov5/">Docs</a> for details, raise an issue on <a href="https://github.com/ultralytics/yolov5/issues/new/choose">GitHub</a> for support, and join our <a href="https://discord.com/invite/ultralytics">Discord</a> community for questions and discussions!
 To request an Enterprise License please complete the form at [Ultralytics Licensing](https://www.ultralytics.com/license).
 <div align="center">
  <a href="https://github.com/ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-github.png" width="2%" alt="Ultralytics GitHub"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="2%">
  <a href="https://www.linkedin.com/company/ultralytics/"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-linkedin.png" width="2%" alt="Ultralytics LinkedIn"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="2%">
  <a href="https://twitter.com/ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-twitter.png" width="2%" alt="Ultralytics Twitter"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="2%">
  <a href="https://youtube.com/ultralytics?sub_confirmation=1"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-youtube.png" width="2%" alt="Ultralytics YouTube"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="2%">
  <a href="https://www.tiktok.com/@ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-tiktok.png" width="2%" alt="Ultralytics TikTok"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="2%">
  <a href="https://ultralytics.com/bilibili"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-bilibili.png" width="2%" alt="Ultralytics BiliBili"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="2%">
  <a href="https://discord.com/invite/ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-discord.png" width="2%" alt="Ultralytics Discord"></a>
 </div>
 </div>
 <br>
 ## <div align="center">YOLO11 🚀 NEW</div>
 We are excited to unveil the launch of Ultralytics YOLO11 🚀, the latest advancement in our state-of-the-art (SOTA) vision models! Available now at **[GitHub](https://github.com/ultralytics/ultralytics)**, YOLO11 builds on our legacy of speed, precision, and ease of use. Whether you're tackling object detection, image segmentation, or image classification, YOLO11 delivers the performance and versatility needed to excel in diverse applications.
 Get started today and unlock the full potential of YOLO11! Visit the [Ultralytics Docs](https://docs.ultralytics.com/) for comprehensive guides and resources:
 [![PyPI version](https://badge.fury.io/py/ultralytics.svg)](https://badge.fury.io/py/ultralytics) [![Downloads](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics)
 ```bash
 pip install ultralytics
 ```
 <div align="center">
  <a href="https://www.ultralytics.com/yolo" target="_blank">
  <img width="100%" src="https://raw.githubusercontent.com/ultralytics/assets/refs/heads/main/yolo/performance-comparison.png"></a>
 </div>
 ## <div align="center">Documentation</div>
 See the [YOLOv5 Docs](https://docs.ultralytics.com/yolov5/) for full documentation on training, testing and deployment. See below for quickstart examples.
 <details open>
 <summary>Install</summary>
 Clone repo and install [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) in a [**Python>=3.8.0**](https://www.python.org/) environment, including [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/).
 ```bash
 git clone https://github.com/ultralytics/yolov5  # clone
 cd yolov5
 pip install -r requirements.txt  # install
 ```
 </details>
 <details>
 <summary>Inference</summary>
 YOLOv5 [PyTorch Hub](https://docs.ultralytics.com/yolov5/tutorials/pytorch_hub_model_loading/) inference. [Models](https://github.com/ultralytics/yolov5/tree/master/models) download automatically from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases).
 ```python
 import torch
 # Model
 model = torch.hub.load("ultralytics/yolov5", "yolov5s")  # or yolov5n - yolov5x6, custom
 # Images
 img = "https://ultralytics.com/images/zidane.jpg"  # or file, Path, PIL, OpenCV, numpy, list
 # Inference
 results = model(img)
 # Results
 results.print()  # or .show(), .save(), .crop(), .pandas(), etc.
 ```
 </details>
 <details>
 <summary>Inference with detect.py</summary>
 `detect.py` runs inference on a variety of sources, downloading [models](https://github.com/ultralytics/yolov5/tree/master/models) automatically from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases) and saving results to `runs/detect`.
 ```bash
 python detect.py --weights yolov5s.pt --source 0                               # webcam
                                               img.jpg                         # image
                                               vid.mp4                         # video
                                               screen                          # screenshot
                                               path/                           # directory
                                               list.txt                        # list of images
                                               list.streams                    # list of streams
                                               'path/*.jpg'                    # glob
                                               'https://youtu.be/LNwODJXcvt4'  # YouTube
                                               'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream
 ```
 </details>
 <details>
 <summary>Training</summary>
 The commands below reproduce YOLOv5 [COCO](https://github.com/ultralytics/yolov5/blob/master/data/scripts/get_coco.sh) results. [Models](https://github.com/ultralytics/yolov5/tree/master/models) and [datasets](https://github.com/ultralytics/yolov5/tree/master/data) download automatically from the latest YOLOv5 [release](https://github.com/ultralytics/yolov5/releases). Training times for YOLOv5n/s/m/l/x are 1/2/4/6/8 days on a V100 GPU ([Multi-GPU](https://docs.ultralytics.com/yolov5/tutorials/multi_gpu_training/) times faster). Use the largest `--batch-size` possible, or pass `--batch-size -1` for YOLOv5 [AutoBatch](https://github.com/ultralytics/yolov5/pull/5092). Batch sizes shown for V100-16GB.
 ```bash
 python train.py --data coco.yaml --epochs 300 --weights '' --cfg yolov5n.yaml  --batch-size 128
                                                                 yolov5s                    64
                                                                 yolov5m                    40
                                                                 yolov5l                    24
                                                                 yolov5x                    16
 ```
 <img width="800" src="https://user-images.githubusercontent.com/26833433/90222759-949d8800-ddc1-11ea-9fa1-1c97eed2b963.png">
 </details>
 <details open>
 <summary>Tutorials</summary>
 - [Train Custom Data](https://docs.ultralytics.com/yolov5/tutorials/train_custom_data/) 🚀 RECOMMENDED
 - [Tips for Best Training Results](https://docs.ultralytics.com/guides/model-training-tips/) ☘️
 - [Multi-GPU Training](https://docs.ultralytics.com/yolov5/tutorials/multi_gpu_training/)
 - [PyTorch Hub](https://docs.ultralytics.com/yolov5/tutorials/pytorch_hub_model_loading/) 🌟 NEW
 - [TFLite, ONNX, CoreML, TensorRT Export](https://docs.ultralytics.com/yolov5/tutorials/model_export/) 🚀
 - [NVIDIA Jetson platform Deployment](https://docs.ultralytics.com/yolov5/tutorials/running_on_jetson_nano/) 🌟 NEW
 - [Test-Time Augmentation (TTA)](https://docs.ultralytics.com/yolov5/tutorials/test_time_augmentation/)
 - [Model Ensembling](https://docs.ultralytics.com/yolov5/tutorials/model_ensembling/)
 - [Model Pruning/Sparsity](https://docs.ultralytics.com/yolov5/tutorials/model_pruning_and_sparsity/)
 - [Hyperparameter Evolution](https://docs.ultralytics.com/yolov5/tutorials/hyperparameter_evolution/)
 - [Transfer Learning with Frozen Layers](https://docs.ultralytics.com/yolov5/tutorials/transfer_learning_with_frozen_layers/)
 - [Architecture Summary](https://docs.ultralytics.com/yolov5/tutorials/architecture_description/) 🌟 NEW
 - [Ultralytics HUB to train and deploy YOLO](https://www.ultralytics.com/hub) 🚀 RECOMMENDED
 - [ClearML Logging](https://docs.ultralytics.com/yolov5/tutorials/clearml_logging_integration/)
 - [YOLOv5 with Neural Magic's Deepsparse](https://docs.ultralytics.com/yolov5/tutorials/neural_magic_pruning_quantization/)
 - [Comet Logging](https://docs.ultralytics.com/yolov5/tutorials/comet_logging_integration/) 🌟 NEW
 </details>
 ## <div align="center">Integrations</div>
 Our key integrations with leading AI platforms extend the functionality of Ultralytics' offerings, enhancing tasks like dataset labeling, training, visualization, and model management. Discover how Ultralytics, in collaboration with [W&B](https://docs.wandb.ai/guides/integrations/ultralytics/), [Comet](https://bit.ly/yolov8-readme-comet), [Roboflow](https://roboflow.com/?ref=ultralytics) and [OpenVINO](https://docs.ultralytics.com/integrations/openvino/), can optimize your AI workflow.
 <br>
 <a href="https://www.ultralytics.com/hub" target="_blank">
 <img width="100%" src="https://github.com/ultralytics/assets/raw/main/yolov8/banner-integrations.png" alt="Ultralytics active learning integrations"></a>
 <br>
 <br>
 <div align="center">
  <a href="https://www.ultralytics.com/hub">
    <img src="https://github.com/ultralytics/assets/raw/main/partners/logo-ultralytics-hub.png" width="10%" alt="Ultralytics HUB logo"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="15%" height="0" alt="space">
  <a href="https://docs.wandb.ai/guides/integrations/ultralytics/">
    <img src="https://github.com/ultralytics/assets/raw/main/partners/logo-wb.png" width="10%" alt="ClearML logo"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="15%" height="0" alt="space">
  <a href="https://bit.ly/yolov8-readme-comet">
    <img src="https://github.com/ultralytics/assets/raw/main/partners/logo-comet.png" width="10%" alt="Comet ML logo"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="15%" height="0" alt="space">
  <a href="https://bit.ly/yolov5-neuralmagic">
    <img src="https://github.com/ultralytics/assets/raw/main/partners/logo-neuralmagic.png" width="10%" alt="NeuralMagic logo"></a>
 </div>
 |                                                         Ultralytics HUB 🚀                                                         |                                                               W&B                                                               |                                                                       Comet ⭐ NEW                                                                        |                                              Neural Magic                                              |
 | :--------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------: |
 | Streamline YOLO workflows: Label, train, and deploy effortlessly with [Ultralytics HUB](https://www.ultralytics.com/hub). Try now! | Track experiments, hyperparameters, and results with [Weights & Biases](https://docs.wandb.ai/guides/integrations/ultralytics/) | Free forever, [Comet](https://bit.ly/yolov5-readme-comet) lets you save YOLOv5 models, resume training, and interactively visualize and debug predictions | Run YOLO11 inference up to 6x faster with [Neural Magic DeepSparse](https://bit.ly/yolov5-neuralmagic) |
 ## <div align="center">Ultralytics HUB</div>
 Experience seamless AI with [Ultralytics HUB](https://www.ultralytics.com/hub) ⭐, the all-in-one solution for data visualization, YOLOv5 and YOLOv8 🚀 model training and deployment, without any coding. Transform images into actionable insights and bring your AI visions to life with ease using our cutting-edge platform and user-friendly [Ultralytics App](https://www.ultralytics.com/app-install). Start your journey for **Free** now!
 <a align="center" href="https://www.ultralytics.com/hub" target="_blank">
 <img width="100%" src="https://github.com/ultralytics/assets/raw/main/im/ultralytics-hub.png"></a>
 ## <div align="center">Why YOLOv5</div>
 YOLOv5 has been designed to be super easy to get started and simple to learn. We prioritize real-world results.
 <p align="left"><img width="800" src="https://user-images.githubusercontent.com/26833433/155040763-93c22a27-347c-4e3c-847a-8094621d3f4e.png"></p>
 <details>
  <summary>YOLOv5-P5 640 Figure</summary>
 <p align="left"><img width="800" src="https://user-images.githubusercontent.com/26833433/155040757-ce0934a3-06a6-43dc-a979-2edbbd69ea0e.png"></p>
 </details>
 <details>
  <summary>Figure Notes</summary>
 - **COCO AP val** denotes mAP@0.5:0.95 metric measured on the 5000-image [COCO val2017](http://cocodataset.org) dataset over various inference sizes from 256 to 1536.
 - **GPU Speed** measures average inference time per image on [COCO val2017](http://cocodataset.org) dataset using a [AWS p3.2xlarge](https://aws.amazon.com/ec2/instance-types/p4/) V100 instance at batch-size 32.
 - **EfficientDet** data from [google/automl](https://github.com/google/automl) at batch size 8.
 - **Reproduce** by `python val.py --task study --data coco.yaml --iou 0.7 --weights yolov5n6.pt yolov5s6.pt yolov5m6.pt yolov5l6.pt yolov5x6.pt`
 </details>
 ### Pretrained Checkpoints
 | Model                                                                                           | size<br><sup>(pixels) | mAP<sup>val<br>50-95 | mAP<sup>val<br>50 | Speed<br><sup>CPU b1<br>(ms) | Speed<br><sup>V100 b1<br>(ms) | Speed<br><sup>V100 b32<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>@640 (B) |
 | ----------------------------------------------------------------------------------------------- | --------------------- | -------------------- | ----------------- | ---------------------------- | ----------------------------- | ------------------------------ | ------------------ | ---------------------- |
 | [YOLOv5n](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5n.pt)              | 640                   | 28.0                 | 45.7              | **45**                       | **6.3**                       | **0.6**                        | **1.9**            | **4.5**                |
 | [YOLOv5s](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt)              | 640                   | 37.4                 | 56.8              | 98                           | 6.4                           | 0.9                            | 7.2                | 16.5                   |
 | [YOLOv5m](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5m.pt)              | 640                   | 45.4                 | 64.1              | 224                          | 8.2                           | 1.7                            | 21.2               | 49.0                   |
 | [YOLOv5l](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5l.pt)              | 640                   | 49.0                 | 67.3              | 430                          | 10.1                          | 2.7                            | 46.5               | 109.1                  |
 | [YOLOv5x](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5x.pt)              | 640                   | 50.7                 | 68.9              | 766                          | 12.1                          | 4.8                            | 86.7               | 205.7                  |
 |                                                                                                 |                       |                      |                   |                              |                               |                                |                    |                        |
 | [YOLOv5n6](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5n6.pt)            | 1280                  | 36.0                 | 54.4              | 153                          | 8.1                           | 2.1                            | 3.2                | 4.6                    |
 | [YOLOv5s6](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s6.pt)            | 1280                  | 44.8                 | 63.7              | 385                          | 8.2                           | 3.6                            | 12.6               | 16.8                   |
 | [YOLOv5m6](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5m6.pt)            | 1280                  | 51.3                 | 69.3              | 887                          | 11.1                          | 6.8                            | 35.7               | 50.0                   |
 | [YOLOv5l6](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5l6.pt)            | 1280                  | 53.7                 | 71.3              | 1784                         | 15.8                          | 10.5                           | 76.8               | 111.4                  |
 | [YOLOv5x6](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5x6.pt)<br>+ [TTA] | 1280<br>1536          | 55.0<br>**55.8**     | 72.7<br>**72.7**  | 3136<br>-                    | 26.2<br>-                     | 19.4<br>-                      | 140.7<br>-         | 209.8<br>-             |
 <details>
  <summary>Table Notes</summary>
 - All checkpoints are trained to 300 epochs with default settings. Nano and Small models use [hyp.scratch-low.yaml](https://github.com/ultralytics/yolov5/blob/master/data/hyps/hyp.scratch-low.yaml) hyps, all others use [hyp.scratch-high.yaml](https://github.com/ultralytics/yolov5/blob/master/data/hyps/hyp.scratch-high.yaml).
 - **mAP<sup>val</sup>** values are for single-model single-scale on [COCO val2017](http://cocodataset.org) dataset.<br>Reproduce by `python val.py --data coco.yaml --img 640 --conf 0.001 --iou 0.65`
 - **Speed** averaged over COCO val images using a [AWS p3.2xlarge](https://aws.amazon.com/ec2/instance-types/p4/) instance. NMS times (~1 ms/img) not included.<br>Reproduce by `python val.py --data coco.yaml --img 640 --task speed --batch 1`
 - **TTA** [Test Time Augmentation](https://docs.ultralytics.com/yolov5/tutorials/test_time_augmentation/) includes reflection and scale augmentations.<br>Reproduce by `python val.py --data coco.yaml --img 1536 --iou 0.7 --augment`
 </details>
 ## <div align="center">Segmentation</div>
 Our new YOLOv5 [release v7.0](https://github.com/ultralytics/yolov5/releases/v7.0) instance segmentation models are the fastest and most accurate in the world, beating all current [SOTA benchmarks](https://paperswithcode.com/sota/real-time-instance-segmentation-on-mscoco). We've made them super simple to train, validate and deploy. See full details in our [Release Notes](https://github.com/ultralytics/yolov5/releases/v7.0) and visit our [YOLOv5 Segmentation Colab Notebook](https://github.com/ultralytics/yolov5/blob/master/segment/tutorial.ipynb) for quickstart tutorials.
 <details>
  <summary>Segmentation Checkpoints</summary>
 <div align="center">
 <a align="center" href="https://www.ultralytics.com/yolo" target="_blank">
 <img width="800" src="https://user-images.githubusercontent.com/61612323/204180385-84f3aca9-a5e9-43d8-a617-dda7ca12e54a.png"></a>
 </div>
 We trained YOLOv5 segmentations models on COCO for 300 epochs at image size 640 using A100 GPUs. We exported all models to ONNX FP32 for CPU speed tests and to TensorRT FP16 for GPU speed tests. We ran all speed tests on Google [Colab Pro](https://colab.research.google.com/signup) notebooks for easy reproducibility.
 | Model                                                                                      | size<br><sup>(pixels) | mAP<sup>box<br>50-95 | mAP<sup>mask<br>50-95 | Train time<br><sup>300 epochs<br>A100 (hours) | Speed<br><sup>ONNX CPU<br>(ms) | Speed<br><sup>TRT A100<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>@640 (B) |
 | ------------------------------------------------------------------------------------------ | --------------------- | -------------------- | --------------------- | --------------------------------------------- | ------------------------------ | ------------------------------ | ------------------ | ---------------------- |
 | [YOLOv5n-seg](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5n-seg.pt) | 640                   | 27.6                 | 23.4                  | 80:17                                         | **62.7**                       | **1.2**                        | **2.0**            | **7.1**                |
 | [YOLOv5s-seg](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s-seg.pt) | 640                   | 37.6                 | 31.7                  | 88:16                                         | 173.3                          | 1.4                            | 7.6                | 26.4                   |
 | [YOLOv5m-seg](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5m-seg.pt) | 640                   | 45.0                 | 37.1                  | 108:36                                        | 427.0                          | 2.2                            | 22.0               | 70.8                   |
 | [YOLOv5l-seg](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5l-seg.pt) | 640                   | 49.0                 | 39.9                  | 66:43 (2x)                                    | 857.4                          | 2.9                            | 47.9               | 147.7                  |
 | [YOLOv5x-seg](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5x-seg.pt) | 640                   | **50.7**             | **41.4**              | 62:56 (3x)                                    | 1579.2                         | 4.5                            | 88.8               | 265.7                  |
 - All checkpoints are trained to 300 epochs with SGD optimizer with `lr0=0.01` and `weight_decay=5e-5` at image size 640 and all default settings.<br>Runs logged to https://wandb.ai/glenn-jocher/YOLOv5_v70_official
 - **Accuracy** values are for single-model single-scale on COCO dataset.<br>Reproduce by `python segment/val.py --data coco.yaml --weights yolov5s-seg.pt`
 - **Speed** averaged over 100 inference images using a [Colab Pro](https://colab.research.google.com/signup) A100 High-RAM instance. Values indicate inference speed only (NMS adds about 1ms per image). <br>Reproduce by `python segment/val.py --data coco.yaml --weights yolov5s-seg.pt --batch 1`
 - **Export** to ONNX at FP32 and TensorRT at FP16 done with `export.py`. <br>Reproduce by `python export.py --weights yolov5s-seg.pt --include engine --device 0 --half`
 </details>
 <details>
  <summary>Segmentation Usage Examples &nbsp;<a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/segment/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a></summary>
 ### Train
 YOLOv5 segmentation training supports auto-download COCO128-seg segmentation dataset with `--data coco128-seg.yaml` argument and manual download of COCO-segments dataset with `bash data/scripts/get_coco.sh --train --val --segments` and then `python train.py --data coco.yaml`.
 ```bash
 # Single-GPU
 python segment/train.py --data coco128-seg.yaml --weights yolov5s-seg.pt --img 640
 # Multi-GPU DDP
 python -m torch.distributed.run --nproc_per_node 4 --master_port 1 segment/train.py --data coco128-seg.yaml --weights yolov5s-seg.pt --img 640 --device 0,1,2,3
 ```
 ### Val
 Validate YOLOv5s-seg mask mAP on COCO dataset:
 ```bash
 bash data/scripts/get_coco.sh --val --segments  # download COCO val segments split (780MB, 5000 images)
 python segment/val.py --weights yolov5s-seg.pt --data coco.yaml --img 640  # validate
 ```
 ### Predict
 Use pretrained YOLOv5m-seg.pt to predict bus.jpg:
 ```bash
 python segment/predict.py --weights yolov5m-seg.pt --source data/images/bus.jpg
 ```
 ```python
 model = torch.hub.load(
    "ultralytics/yolov5", "custom", "yolov5m-seg.pt"
 )  # load from PyTorch Hub (WARNING: inference not yet supported)
 ```
 | ![zidane](https://user-images.githubusercontent.com/26833433/203113421-decef4c4-183d-4a0a-a6c2-6435b33bc5d3.jpg) | ![bus](https://user-images.githubusercontent.com/26833433/203113416-11fe0025-69f7-4874-a0a6-65d0bfe2999a.jpg) |
 | ---------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------- |
 ### Export
 Export YOLOv5s-seg model to ONNX and TensorRT:
 ```bash
 python export.py --weights yolov5s-seg.pt --include onnx engine --img 640 --device 0
 ```
 </details>
 ## <div align="center">Classification</div>
 YOLOv5 [release v6.2](https://github.com/ultralytics/yolov5/releases) brings support for classification model training, validation and deployment! See full details in our [Release Notes](https://github.com/ultralytics/yolov5/releases/v6.2) and visit our [YOLOv5 Classification Colab Notebook](https://github.com/ultralytics/yolov5/blob/master/classify/tutorial.ipynb) for quickstart tutorials.
 <details>
  <summary>Classification Checkpoints</summary>
 <br>
 We trained YOLOv5-cls classification models on ImageNet for 90 epochs using a 4xA100 instance, and we trained ResNet and EfficientNet models alongside with the same default training settings to compare. We exported all models to ONNX FP32 for CPU speed tests and to TensorRT FP16 for GPU speed tests. We ran all speed tests on Google [Colab Pro](https://colab.research.google.com/signup) for easy reproducibility.
 | Model                                                                                              | size<br><sup>(pixels) | acc<br><sup>top1 | acc<br><sup>top5 | Training<br><sup>90 epochs<br>4xA100 (hours) | Speed<br><sup>ONNX CPU<br>(ms) | Speed<br><sup>TensorRT V100<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>@224 (B) |
 | -------------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | -------------------------------------------- | ------------------------------ | ----------------------------------- | ------------------ | ---------------------- |
 | [YOLOv5n-cls](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5n-cls.pt)         | 224                   | 64.6             | 85.4             | 7:59                                         | **3.3**                        | **0.5**                             | **2.5**            | **0.5**                |
 | [YOLOv5s-cls](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s-cls.pt)         | 224                   | 71.5             | 90.2             | 8:09                                         | 6.6                            | 0.6                                 | 5.4                | 1.4                    |
 | [YOLOv5m-cls](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5m-cls.pt)         | 224                   | 75.9             | 92.9             | 10:06                                        | 15.5                           | 0.9                                 | 12.9               | 3.9                    |
 | [YOLOv5l-cls](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5l-cls.pt)         | 224                   | 78.0             | 94.0             | 11:56                                        | 26.9                           | 1.4                                 | 26.5               | 8.5                    |
 | [YOLOv5x-cls](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5x-cls.pt)         | 224                   | **79.0**         | **94.4**         | 15:04                                        | 54.3                           | 1.8                                 | 48.1               | 15.9                   |
 |                                                                                                    |                       |                  |                  |                                              |                                |                                     |                    |                        |
 | [ResNet18](https://github.com/ultralytics/yolov5/releases/download/v7.0/resnet18.pt)               | 224                   | 70.3             | 89.5             | **6:47**                                     | 11.2                           | 0.5                                 | 11.7               | 3.7                    |
 | [ResNet34](https://github.com/ultralytics/yolov5/releases/download/v7.0/resnet34.pt)               | 224                   | 73.9             | 91.8             | 8:33                                         | 20.6                           | 0.9                                 | 21.8               | 7.4                    |
 | [ResNet50](https://github.com/ultralytics/yolov5/releases/download/v7.0/resnet50.pt)               | 224                   | 76.8             | 93.4             | 11:10                                        | 23.4                           | 1.0                                 | 25.6               | 8.5                    |
 | [ResNet101](https://github.com/ultralytics/yolov5/releases/download/v7.0/resnet101.pt)             | 224                   | 78.5             | 94.3             | 17:10                                        | 42.1                           | 1.9                                 | 44.5               | 15.9                   |
 |                                                                                                    |                       |                  |                  |                                              |                                |                                     |                    |                        |
 | [EfficientNet_b0](https://github.com/ultralytics/yolov5/releases/download/v7.0/efficientnet_b0.pt) | 224                   | 75.1             | 92.4             | 13:03                                        | 12.5                           | 1.3                                 | 5.3                | 1.0                    |
 | [EfficientNet_b1](https://github.com/ultralytics/yolov5/releases/download/v7.0/efficientnet_b1.pt) | 224                   | 76.4             | 93.2             | 17:04                                        | 14.9                           | 1.6                                 | 7.8                | 1.5                    |
 | [EfficientNet_b2](https://github.com/ultralytics/yolov5/releases/download/v7.0/efficientnet_b2.pt) | 224                   | 76.6             | 93.4             | 17:10                                        | 15.9                           | 1.6                                 | 9.1                | 1.7                    |
 | [EfficientNet_b3](https://github.com/ultralytics/yolov5/releases/download/v7.0/efficientnet_b3.pt) | 224                   | 77.7             | 94.0             | 19:19                                        | 18.9                           | 1.9                                 | 12.2               | 2.4                    |
 <details>
  <summary>Table Notes (click to expand)</summary>
 - All checkpoints are trained to 90 epochs with SGD optimizer with `lr0=0.001` and `weight_decay=5e-5` at image size 224 and all default settings.<br>Runs logged to https://wandb.ai/glenn-jocher/YOLOv5-Classifier-v6-2
 - **Accuracy** values are for single-model single-scale on [ImageNet-1k](https://www.image-net.org/index.php) dataset.<br>Reproduce by `python classify/val.py --data ../datasets/imagenet --img 224`
 - **Speed** averaged over 100 inference images using a Google [Colab Pro](https://colab.research.google.com/signup) V100 High-RAM instance.<br>Reproduce by `python classify/val.py --data ../datasets/imagenet --img 224 --batch 1`
 - **Export** to ONNX at FP32 and TensorRT at FP16 done with `export.py`. <br>Reproduce by `python export.py --weights yolov5s-cls.pt --include engine onnx --imgsz 224`
 </details>
 </details>
 <details>
  <summary>Classification Usage Examples &nbsp;<a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/classify/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a></summary>
 ### Train
 YOLOv5 classification training supports auto-download of MNIST, Fashion-MNIST, CIFAR10, CIFAR100, Imagenette, Imagewoof, and ImageNet datasets with the `--data` argument. To start training on MNIST for example use `--data mnist`.
 ```bash
 # Single-GPU
 python classify/train.py --model yolov5s-cls.pt --data cifar100 --epochs 5 --img 224 --batch 128
 # Multi-GPU DDP
 python -m torch.distributed.run --nproc_per_node 4 --master_port 1 classify/train.py --model yolov5s-cls.pt --data imagenet --epochs 5 --img 224 --device 0,1,2,3
 ```
 ### Val
 Validate YOLOv5m-cls accuracy on ImageNet-1k dataset:
 ```bash
 bash data/scripts/get_imagenet.sh --val  # download ImageNet val split (6.3G, 50000 images)
 python classify/val.py --weights yolov5m-cls.pt --data ../datasets/imagenet --img 224  # validate
 ```
 ### Predict
 Use pretrained YOLOv5s-cls.pt to predict bus.jpg:
 ```bash
 python classify/predict.py --weights yolov5s-cls.pt --source data/images/bus.jpg
 ```
 ```python
 model = torch.hub.load("ultralytics/yolov5", "custom", "yolov5s-cls.pt")  # load from PyTorch Hub
 ```
 ### Export
 Export a group of trained YOLOv5s-cls, ResNet and EfficientNet models to ONNX and TensorRT:
 ```bash
 python export.py --weights yolov5s-cls.pt resnet50.pt efficientnet_b0.pt --include onnx engine --img 224
 ```
 </details>
 ## <div align="center">Environments</div>
 Get started in seconds with our verified environments. Click each icon below for details.
 <div align="center">
  <a href="https://bit.ly/yolov5-paperspace-notebook">
    <img src="https://github.com/ultralytics/assets/releases/download/v0.0.0/logo-gradient.png" width="10%" /></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="5%" alt="" />
  <a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb">
    <img src="https://github.com/ultralytics/assets/releases/download/v0.0.0/logo-colab-small.png" width="10%" /></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="5%" alt="" />
  <a href="https://www.kaggle.com/models/ultralytics/yolov5">
    <img src="https://github.com/ultralytics/assets/releases/download/v0.0.0/logo-kaggle-small.png" width="10%" /></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="5%" alt="" />
  <a href="https://hub.docker.com/r/ultralytics/yolov5">
    <img src="https://github.com/ultralytics/assets/releases/download/v0.0.0/logo-docker-small.png" width="10%" /></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="5%" alt="" />
  <a href="https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial/">
    <img src="https://github.com/ultralytics/assets/releases/download/v0.0.0/logo-aws-small.png" width="10%" /></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="5%" alt="" />
  <a href="https://docs.ultralytics.com/yolov5/environments/google_cloud_quickstart_tutorial/">
    <img src="https://github.com/ultralytics/assets/releases/download/v0.0.0/logo-gcp-small.png" width="10%" /></a>
 </div>
 ## <div align="center">Contribute</div>
 We love your input! We want to make contributing to YOLOv5 as easy and transparent as possible. Please see our [Contributing Guide](https://docs.ultralytics.com/help/contributing/) to get started, and fill out the [YOLOv5 Survey](https://www.ultralytics.com/survey?utm_source=github&utm_medium=social&utm_campaign=Survey) to send us feedback on your experiences. Thank you to all our contributors!
 <!-- SVG image from https://opencollective.com/ultralytics/contributors.svg?width=990 -->
 <a href="https://github.com/ultralytics/yolov5/graphs/contributors">
 <img src="https://github.com/ultralytics/assets/raw/main/im/image-contributors.png" /></a>
 ## <div align="center">License</div>
 Ultralytics offers two licensing options to accommodate diverse use cases:
 - **AGPL-3.0 License**: This [OSI-approved](https://opensource.org/license) open-source license is ideal for students and enthusiasts, promoting open collaboration and knowledge sharing. See the [LICENSE](https://github.com/ultralytics/yolov5/blob/master/LICENSE) file for more details.
 - **Enterprise License**: Designed for commercial use, this license permits seamless integration of Ultralytics software and AI models into commercial goods and services, bypassing the open-source requirements of AGPL-3.0. If your scenario involves embedding our solutions into a commercial offering, reach out through [Ultralytics Licensing](https://www.ultralytics.com/license).
 ## <div align="center">Contact</div>
 For YOLOv5 bug reports and feature requests please visit [GitHub Issues](https://github.com/ultralytics/yolov5/issues), and join our [Discord](https://discord.com/invite/ultralytics) community for questions and discussions!
 <br>
 <div align="center">
  <a href="https://github.com/ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-github.png" width="3%" alt="Ultralytics GitHub"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%">
  <a href="https://www.linkedin.com/company/ultralytics/"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-linkedin.png" width="3%" alt="Ultralytics LinkedIn"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%">
  <a href="https://twitter.com/ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-twitter.png" width="3%" alt="Ultralytics Twitter"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%">
  <a href="https://youtube.com/ultralytics?sub_confirmation=1"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-youtube.png" width="3%" alt="Ultralytics YouTube"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%">
  <a href="https://www.tiktok.com/@ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-tiktok.png" width="3%" alt="Ultralytics TikTok"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%">
  <a href="https://ultralytics.com/bilibili"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-bilibili.png" width="3%" alt="Ultralytics BiliBili"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%">
  <a href="https://discord.com/invite/ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-discord.png" width="3%" alt="Ultralytics Discord"></a>
 </div>
 [tta]: https://docs.ultralytics.com/yolov5/tutorials/test_time_augmentation
--- a/yolov5/README.zh-CN.md
+++ b/yolov5/README.zh-CN.md
@@ -0,0 +1,470 @@
 <div align="center">
  <p>
    <a href="https://www.ultralytics.com/events/yolovision" target="_blank">
      <img width="100%" src="https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/banner-yolov8.png"></a>
  </p>
 [中文](https://docs.ultralytics.com/zh) | [한국어](https://docs.ultralytics.com/ko) | [日本語](https://docs.ultralytics.com/ja) | [Русский](https://docs.ultralytics.com/ru) | [Deutsch](https://docs.ultralytics.com/de) | [Français](https://docs.ultralytics.com/fr) | [Español](https://docs.ultralytics.com/es) | [Português](https://docs.ultralytics.com/pt) | [Türkçe](https://docs.ultralytics.com/tr) | [Tiếng Việt](https://docs.ultralytics.com/vi) | [العربية](https://docs.ultralytics.com/ar)
 <div>
    <a href="https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml"><img src="https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml/badge.svg" alt="YOLOv5 CI"></a>
    <a href="https://zenodo.org/badge/latestdoi/264818686"><img src="https://zenodo.org/badge/264818686.svg" alt="YOLOv5 Citation"></a>
    <a href="https://hub.docker.com/r/ultralytics/yolov5"><img src="https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker" alt="Docker Pulls"></a>
    <a href="https://discord.com/invite/ultralytics"><img alt="Discord" src="https://img.shields.io/discord/1089800235347353640?logo=discord&logoColor=white&label=Discord&color=blue"></a> <a href="https://community.ultralytics.com/"><img alt="Ultralytics Forums" src="https://img.shields.io/discourse/users?server=https%3A%2F%2Fcommunity.ultralytics.com&logo=discourse&label=Forums&color=blue"></a> <a href="https://reddit.com/r/ultralytics"><img alt="Ultralytics Reddit" src="https://img.shields.io/reddit/subreddit-subscribers/ultralytics?style=flat&logo=reddit&logoColor=white&label=Reddit&color=blue"></a>
    <br>
    <a href="https://bit.ly/yolov5-paperspace-notebook"><img src="https://assets.paperspace.io/img/gradient-badge.svg" alt="Run on Gradient"></a>
    <a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
    <a href="https://www.kaggle.com/models/ultralytics/yolov5"><img src="https://kaggle.com/static/images/open-in-kaggle.svg" alt="Open In Kaggle"></a>
  </div>
  <br>
 YOLOv5 🚀 是世界上最受欢迎的视觉 AI，代表<a href="https://www.ultralytics.com/"> Ultralytics </a>对未来视觉 AI 方法的开源研究，结合在数千小时的研究和开发中积累的经验教训和最佳实践。
 我们希望这里的资源能帮助您充分利用 YOLOv5。请浏览 YOLOv5 <a href="https://docs.ultralytics.com/yolov5/">文档</a> 了解详细信息，在 <a href="https://github.com/ultralytics/yolov5/issues/new/choose">GitHub</a> 上提交问题以获得支持，并加入我们的 <a href="https://discord.com/invite/ultralytics">Discord</a> 社区进行问题和讨论！
 如需申请企业许可，请在 [Ultralytics Licensing](https://www.ultralytics.com/license) 处填写表格
 <div align="center">
  <a href="https://github.com/ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-github.png" width="2%" alt="Ultralytics GitHub"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="2%">
  <a href="https://www.linkedin.com/company/ultralytics/"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-linkedin.png" width="2%" alt="Ultralytics LinkedIn"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="2%">
  <a href="https://twitter.com/ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-twitter.png" width="2%" alt="Ultralytics Twitter"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="2%">
  <a href="https://youtube.com/ultralytics?sub_confirmation=1"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-youtube.png" width="2%" alt="Ultralytics YouTube"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="2%">
  <a href="https://www.tiktok.com/@ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-tiktok.png" width="2%" alt="Ultralytics TikTok"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="2%">
  <a href="https://ultralytics.com/bilibili"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-bilibili.png" width="2%" alt="Ultralytics BiliBili"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="2%">
  <a href="https://discord.com/invite/ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-discord.png" width="2%" alt="Ultralytics Discord"></a>
 </div>
 </div>
 ## <div align="center">YOLO11 🚀 全新发布</div>
 我们很高兴宣布推出 Ultralytics YOLO11 🚀，这是我们最先进视觉模型的最新进展！现已在 **[GitHub](https://github.com/ultralytics/ultralytics)** 上发布。YOLO11 在速度、精度和易用性方面进一步提升，无论是处理目标检测、图像分割还是图像分类任务，YOLO11 都具备出色的性能和多功能性，助您在各种应用中脱颖而出。
 立即开始，解锁 YOLO11 的全部潜力！访问 [Ultralytics 文档](https://docs.ultralytics.com/) 获取全面的指南和资源：
 [![PyPI 版本](https://badge.fury.io/py/ultralytics.svg)](https://badge.fury.io/py/ultralytics) [![下载量](https://static.pepy.tech/badge/ultralytics)](https://www.pepy.tech/projects/ultralytics)
 ```bash
 pip install ultralytics
 ```
 <div align="center">
  <a href="https://www.ultralytics.com/yolo" target="_blank">
  <img width="100%" src="https://raw.githubusercontent.com/ultralytics/assets/refs/heads/main/yolo/performance-comparison.png"></a>
 </div>
 ## <div align="center">文档</div>
 有关训练、测试和部署的完整文档见[YOLOv5 文档](https://docs.ultralytics.com/yolov5/)。请参阅下面的快速入门示例。
 <details open>
 <summary>安装</summary>
 克隆 repo，并要求在 [**Python>=3.8.0**](https://www.python.org/) 环境中安装 [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) ，且要求 [**PyTorch>=1.8**](https://pytorch.org/get-started/locally/) 。
 ```bash
 git clone https://github.com/ultralytics/yolov5  # clone
 cd yolov5
 pip install -r requirements.txt  # install
 ```
 </details>
 <details>
 <summary>推理</summary>
 使用 YOLOv5 [PyTorch Hub](https://docs.ultralytics.com/yolov5/tutorials/pytorch_hub_model_loading/) 推理。最新 [模型](https://github.com/ultralytics/yolov5/tree/master/models) 将自动的从 YOLOv5 [release](https://github.com/ultralytics/yolov5/releases) 中下载。
 ```python
 import torch
 # Model
 model = torch.hub.load("ultralytics/yolov5", "yolov5s")  # or yolov5n - yolov5x6, custom
 # Images
 img = "https://ultralytics.com/images/zidane.jpg"  # or file, Path, PIL, OpenCV, numpy, list
 # Inference
 results = model(img)
 # Results
 results.print()  # or .show(), .save(), .crop(), .pandas(), etc.
 ```
 </details>
 <details>
 <summary>使用 detect.py 推理</summary>
 `detect.py` 在各种来源上运行推理， [模型](https://github.com/ultralytics/yolov5/tree/master/models) 自动从 最新的YOLOv5 [release](https://github.com/ultralytics/yolov5/releases) 中下载，并将结果保存到 `runs/detect` 。
 ```bash
 python detect.py --weights yolov5s.pt --source 0                               # webcam
                                               img.jpg                         # image
                                               vid.mp4                         # video
                                               screen                          # screenshot
                                               path/                           # directory
                                               list.txt                        # list of images
                                               list.streams                    # list of streams
                                               'path/*.jpg'                    # glob
                                               'https://youtu.be/LNwODJXcvt4'  # YouTube
                                               'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream
 ```
 </details>
 <details>
 <summary>训练</summary>
 下面的命令重现 YOLOv5 在 [COCO](https://github.com/ultralytics/yolov5/blob/master/data/scripts/get_coco.sh) 数据集上的结果。 最新的 [模型](https://github.com/ultralytics/yolov5/tree/master/models) 和 [数据集](https://github.com/ultralytics/yolov5/tree/master/data)
 将自动的从 YOLOv5 [release](https://github.com/ultralytics/yolov5/releases) 中下载。 YOLOv5n/s/m/l/x 在 V100 GPU 的训练时间为 1/2/4/6/8 天（ [多GPU](https://docs.ultralytics.com/yolov5/tutorials/multi_gpu_training/) 训练速度更快）。 尽可能使用更大的 `--batch-size` ，或通过 `--batch-size -1` 实现 YOLOv5 [自动批处理](https://github.com/ultralytics/yolov5/pull/5092) 。下方显示的 batchsize 适用于 V100-16GB。
 ```bash
 python train.py --data coco.yaml --epochs 300 --weights '' --cfg yolov5n.yaml  --batch-size 128
                                                                 yolov5s                    64
                                                                 yolov5m                    40
                                                                 yolov5l                    24
                                                                 yolov5x                    16
 ```
 <img width="800" src="https://user-images.githubusercontent.com/26833433/90222759-949d8800-ddc1-11ea-9fa1-1c97eed2b963.png">
 </details>
 <details open>
 <summary>教程</summary>
 - [自定义数据训练](https://docs.ultralytics.com/yolov5/tutorials/train_custom_data/) 🚀 **推荐**
 - [最佳训练效果的提示](https://docs.ultralytics.com/guides/model-training-tips/) ☘️
 - [多GPU训练](https://docs.ultralytics.com/yolov5/tutorials/multi_gpu_training/)
 - [PyTorch Hub](https://docs.ultralytics.com/yolov5/tutorials/pytorch_hub_model_loading/) 🌟 **全新**
 - [TFLite, ONNX, CoreML, TensorRT 导出](https://docs.ultralytics.com/yolov5/tutorials/model_export/) 🚀
 - [NVIDIA Jetson 平台部署](https://docs.ultralytics.com/yolov5/tutorials/running_on_jetson_nano/) 🌟 **全新**
 - [测试时增强 (TTA)](https://docs.ultralytics.com/yolov5/tutorials/test_time_augmentation/)
 - [模型集成](https://docs.ultralytics.com/yolov5/tutorials/model_ensembling/)
 - [模型剪枝/稀疏化](https://docs.ultralytics.com/yolov5/tutorials/model_pruning_and_sparsity/)
 - [超参数进化](https://docs.ultralytics.com/yolov5/tutorials/hyperparameter_evolution/)
 - [冻结层的迁移学习](https://docs.ultralytics.com/yolov5/tutorials/transfer_learning_with_frozen_layers/)
 - [架构概述](https://docs.ultralytics.com/yolov5/tutorials/architecture_description/) 🌟 **全新**
 - [使用 Ultralytics HUB 进行 YOLO 训练和部署](https://www.ultralytics.com/hub) 🚀 **推荐**
 - [ClearML 日志记录](https://docs.ultralytics.com/yolov5/tutorials/clearml_logging_integration/)
 - [与 Neural Magic 的 Deepsparse 集成的 YOLOv5](https://docs.ultralytics.com/yolov5/tutorials/neural_magic_pruning_quantization/)
 - [Comet 日志记录](https://docs.ultralytics.com/yolov5/tutorials/comet_logging_integration/) 🌟 **全新**
 </details>
 ## <div align="center">集成</div>
 我们与领先的 AI 平台的关键集成扩展了 Ultralytics 产品的功能，提升了数据集标注、训练、可视化和模型管理等任务。探索 Ultralytics 如何通过与 [W&B](https://docs.wandb.ai/guides/integrations/ultralytics/)、[Comet](https://bit.ly/yolov8-readme-comet)、[Roboflow](https://roboflow.com/?ref=ultralytics) 和 [OpenVINO](https://docs.ultralytics.com/integrations/openvino/) 的合作，优化您的 AI 工作流程。
 <br>
 <a href="https://www.ultralytics.com/hub" target="_blank">
 <img width="100%" src="https://github.com/ultralytics/assets/raw/main/yolov8/banner-integrations.png" alt="Ultralytics active learning integrations"></a>
 <br>
 <br>
 <div align="center">
  <a href="https://www.ultralytics.com/hub">
    <img src="https://github.com/ultralytics/assets/raw/main/partners/logo-ultralytics-hub.png" width="10%" alt="Ultralytics HUB logo"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="15%" height="0" alt="space">
  <a href="https://docs.wandb.ai/guides/integrations/ultralytics/">
    <img src="https://github.com/ultralytics/assets/raw/main/partners/logo-wb.png" width="10%" alt="W&B logo"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="15%" height="0" alt="space">
  <a href="https://bit.ly/yolov8-readme-comet">
    <img src="https://github.com/ultralytics/assets/raw/main/partners/logo-comet.png" width="10%" alt="Comet ML logo"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="15%" height="0" alt="space">
  <a href="https://bit.ly/yolov5-neuralmagic">
    <img src="https://github.com/ultralytics/assets/raw/main/partners/logo-neuralmagic.png" width="10%" alt="NeuralMagic logo"></a>
 </div>
 |                                              Ultralytics HUB 🚀                                              |                                                  W&B                                                   |                                                     Comet ⭐ 全新                                                      |                                            Neural Magic                                             |
 | :----------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------: |
 | 简化 YOLO 工作流程：通过 [Ultralytics HUB](https://www.ultralytics.com/hub) 轻松标注、训练和部署。立即试用！ | 使用 [Weights & Biases](https://docs.wandb.ai/guides/integrations/ultralytics/) 跟踪实验、超参数和结果 | 永久免费，[Comet](https://bit.ly/yolov5-readme-comet) 允许您保存 YOLO11 模型、恢复训练，并交互式地可视化和调试预测结果 | 使用 [Neural Magic DeepSparse](https://bit.ly/yolov5-neuralmagic) 运行 YOLO11 推理，速度提升至 6 倍 |
 ## <div align="center">Ultralytics HUB</div>
 [Ultralytics HUB](https://www.ultralytics.com/hub) 是我们的⭐**新的**用于可视化数据集、训练 YOLOv5 🚀 模型并以无缝体验部署到现实世界的无代码解决方案。现在开始 **免费** 使用他！
 <a align="center" href="https://www.ultralytics.com/hub" target="_blank">
 <img width="100%" src="https://github.com/ultralytics/assets/raw/main/im/ultralytics-hub.png"></a>
 ## <div align="center">为什么选择 YOLOv5</div>
 YOLOv5 超级容易上手，简单易学。我们优先考虑现实世界的结果。
 <p align="left"><img width="800" src="https://user-images.githubusercontent.com/26833433/155040763-93c22a27-347c-4e3c-847a-8094621d3f4e.png"></p>
 <details>
  <summary>YOLOv5-P5 640 图</summary>
 <p align="left"><img width="800" src="https://user-images.githubusercontent.com/26833433/155040757-ce0934a3-06a6-43dc-a979-2edbbd69ea0e.png"></p>
 </details>
 <details>
  <summary>图表笔记</summary>
 - **COCO AP val** 表示 mAP@0.5:0.95 指标，在 [COCO val2017](http://cocodataset.org) 数据集的 5000 张图像上测得， 图像包含 256 到 1536 各种推理大小。
 - **显卡推理速度** 为在 [COCO val2017](http://cocodataset.org) 数据集上的平均推理时间，使用 [AWS p3.2xlarge](https://aws.amazon.com/ec2/instance-types/p4/) V100实例，batchsize 为 32 。
 - **EfficientDet** 数据来自 [google/automl](https://github.com/google/automl) ， batchsize 为32。
 - **复现命令** 为 `python val.py --task study --data coco.yaml --iou 0.7 --weights yolov5n6.pt yolov5s6.pt yolov5m6.pt yolov5l6.pt yolov5x6.pt`
 </details>
 ### 预训练模型
 | 模型                                                                                           | 尺寸<br><sup>（像素） | mAP<sup>val<br>50-95 | mAP<sup>val<br>50 | 推理速度<br><sup>CPU b1<br>（ms） | 推理速度<br><sup>V100 b1<br>（ms） | 速度<br><sup>V100 b32<br>（ms） | 参数量<br><sup>(M) | FLOPs<br><sup>@640 (B) |
 | ---------------------------------------------------------------------------------------------- | --------------------- | -------------------- | ----------------- | --------------------------------- | ---------------------------------- | ------------------------------- | ------------------ | ---------------------- |
 | [YOLOv5n](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5n.pt)             | 640                   | 28.0                 | 45.7              | **45**                            | **6.3**                            | **0.6**                         | **1.9**            | **4.5**                |
 | [YOLOv5s](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt)             | 640                   | 37.4                 | 56.8              | 98                                | 6.4                                | 0.9                             | 7.2                | 16.5                   |
 | [YOLOv5m](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5m.pt)             | 640                   | 45.4                 | 64.1              | 224                               | 8.2                                | 1.7                             | 21.2               | 49.0                   |
 | [YOLOv5l](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5l.pt)             | 640                   | 49.0                 | 67.3              | 430                               | 10.1                               | 2.7                             | 46.5               | 109.1                  |
 | [YOLOv5x](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5x.pt)             | 640                   | 50.7                 | 68.9              | 766                               | 12.1                               | 4.8                             | 86.7               | 205.7                  |
 |                                                                                                |                       |                      |                   |                                   |                                    |                                 |                    |                        |
 | [YOLOv5n6](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5n6.pt)           | 1280                  | 36.0                 | 54.4              | 153                               | 8.1                                | 2.1                             | 3.2                | 4.6                    |
 | [YOLOv5s6](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s6.pt)           | 1280                  | 44.8                 | 63.7              | 385                               | 8.2                                | 3.6                             | 12.6               | 16.8                   |
 | [YOLOv5m6](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5m6.pt)           | 1280                  | 51.3                 | 69.3              | 887                               | 11.1                               | 6.8                             | 35.7               | 50.0                   |
 | [YOLOv5l6](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5l6.pt)           | 1280                  | 53.7                 | 71.3              | 1784                              | 15.8                               | 10.5                            | 76.8               | 111.4                  |
 | [YOLOv5x6](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5x6.pt)<br>+[TTA] | 1280<br>1536          | 55.0<br>**55.8**     | 72.7<br>**72.7**  | 3136<br>-                         | 26.2<br>-                          | 19.4<br>-                       | 140.7<br>-         | 209.8<br>-             |
 <details>
  <summary>笔记</summary>
 - 所有模型都使用默认配置，训练 300 epochs。n和s模型使用 [hyp.scratch-low.yaml](https://github.com/ultralytics/yolov5/blob/master/data/hyps/hyp.scratch-low.yaml) ，其他模型都使用 [hyp.scratch-high.yaml](https://github.com/ultralytics/yolov5/blob/master/data/hyps/hyp.scratch-high.yaml) 。
 - \*\*mAP<sup>val</sup>\*\*在单模型单尺度上计算，数据集使用 [COCO val2017](http://cocodataset.org) 。<br>复现命令 `python val.py --data coco.yaml --img 640 --conf 0.001 --iou 0.65`
 - **推理速度**在 COCO val 图像总体时间上进行平均得到，测试环境使用[AWS p3.2xlarge](https://aws.amazon.com/ec2/instance-types/p4/)实例。 NMS 时间 (大约 1 ms/img) 不包括在内。<br>复现命令 `python val.py --data coco.yaml --img 640 --task speed --batch 1`
 - **TTA** [测试时数据增强](https://docs.ultralytics.com/yolov5/tutorials/test_time_augmentation/) 包括反射和尺度变换。<br>复现命令 `python val.py --data coco.yaml --img 1536 --iou 0.7 --augment`
 </details>
 ## <div align="center">实例分割模型 ⭐ 新</div>
 我们新的 YOLOv5 [release v7.0](https://github.com/ultralytics/yolov5/releases/v7.0) 实例分割模型是世界上最快和最准确的模型，击败所有当前 [SOTA 基准](https://paperswithcode.com/sota/real-time-instance-segmentation-on-mscoco)。我们使它非常易于训练、验证和部署。更多细节请查看 [发行说明](https://github.com/ultralytics/yolov5/releases/v7.0) 或访问我们的 [YOLOv5 分割 Colab 笔记本](https://github.com/ultralytics/yolov5/blob/master/segment/tutorial.ipynb) 以快速入门。
 <details>
  <summary>实例分割模型列表</summary>
 <br>
 <div align="center">
 <a align="center" href="https://www.ultralytics.com/yolo" target="_blank">
 <img width="800" src="https://user-images.githubusercontent.com/61612323/204180385-84f3aca9-a5e9-43d8-a617-dda7ca12e54a.png"></a>
 </div>
 我们使用 A100 GPU 在 COCO 上以 640 图像大小训练了 300 epochs 得到 YOLOv5 分割模型。我们将所有模型导出到 ONNX FP32 以进行 CPU 速度测试，并导出到 TensorRT FP16 以进行 GPU 速度测试。为了便于再现，我们在 Google [Colab Pro](https://colab.research.google.com/signup) 上进行了所有速度测试。
 | 模型                                                                                       | 尺寸<br><sup>（像素） | mAP<sup>box<br>50-95 | mAP<sup>mask<br>50-95 | 训练时长<br><sup>300 epochs<br>A100 GPU（小时） | 推理速度<br><sup>ONNX CPU<br>（ms） | 推理速度<br><sup>TRT A100<br>（ms） | 参数量<br><sup>(M) | FLOPs<br><sup>@640 (B) |
 | ------------------------------------------------------------------------------------------ | --------------------- | -------------------- | --------------------- | ----------------------------------------------- | ----------------------------------- | ----------------------------------- | ------------------ | ---------------------- |
 | [YOLOv5n-seg](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5n-seg.pt) | 640                   | 27.6                 | 23.4                  | 80:17                                           | **62.7**                            | **1.2**                             | **2.0**            | **7.1**                |
 | [YOLOv5s-seg](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s-seg.pt) | 640                   | 37.6                 | 31.7                  | 88:16                                           | 173.3                               | 1.4                                 | 7.6                | 26.4                   |
 | [YOLOv5m-seg](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5m-seg.pt) | 640                   | 45.0                 | 37.1                  | 108:36                                          | 427.0                               | 2.2                                 | 22.0               | 70.8                   |
 | [YOLOv5l-seg](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5l-seg.pt) | 640                   | 49.0                 | 39.9                  | 66:43 (2x)                                      | 857.4                               | 2.9                                 | 47.9               | 147.7                  |
 | [YOLOv5x-seg](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5x-seg.pt) | 640                   | **50.7**             | **41.4**              | 62:56 (3x)                                      | 1579.2                              | 4.5                                 | 88.8               | 265.7                  |
 - 所有模型使用 SGD 优化器训练， 都使用 `lr0=0.01` 和 `weight_decay=5e-5` 参数， 图像大小为 640 。<br>训练 log 可以查看 https://wandb.ai/glenn-jocher/YOLOv5_v70_official
 - **准确性**结果都在 COCO 数据集上，使用单模型单尺度测试得到。<br>复现命令 `python segment/val.py --data coco.yaml --weights yolov5s-seg.pt`
 - **推理速度**是使用 100 张图像推理时间进行平均得到，测试环境使用 [Colab Pro](https://colab.research.google.com/signup) 上 A100 高 RAM 实例。结果仅表示推理速度（NMS 每张图像增加约 1 毫秒）。<br>复现命令 `python segment/val.py --data coco.yaml --weights yolov5s-seg.pt --batch 1`
 - **模型转换**到 FP32 的 ONNX 和 FP16 的 TensorRT 脚本为 `export.py`.<br>运行命令 `python export.py --weights yolov5s-seg.pt --include engine --device 0 --half`
 </details>
 <details>
  <summary>分割模型使用示例 &nbsp;<a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/segment/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a></summary>
 ### 训练
 YOLOv5分割训练支持自动下载 COCO128-seg 分割数据集，用户仅需在启动指令中包含 `--data coco128-seg.yaml` 参数。 若要手动下载，使用命令 `bash data/scripts/get_coco.sh --train --val --segments`， 在下载完毕后，使用命令 `python train.py --data coco.yaml` 开启训练。
 ```bash
 # 单 GPU
 python segment/train.py --data coco128-seg.yaml --weights yolov5s-seg.pt --img 640
 # 多 GPU， DDP 模式
 python -m torch.distributed.run --nproc_per_node 4 --master_port 1 segment/train.py --data coco128-seg.yaml --weights yolov5s-seg.pt --img 640 --device 0,1,2,3
 ```
 ### 验证
 在 COCO 数据集上验证 YOLOv5s-seg mask mAP：
 ```bash
 bash data/scripts/get_coco.sh --val --segments  # 下载 COCO val segments 数据集 (780MB, 5000 images)
 python segment/val.py --weights yolov5s-seg.pt --data coco.yaml --img 640  # 验证
 ```
 ### 预测
 使用预训练的 YOLOv5m-seg.pt 来预测 bus.jpg：
 ```bash
 python segment/predict.py --weights yolov5m-seg.pt --source data/images/bus.jpg
 ```
 ```python
 model = torch.hub.load(
    "ultralytics/yolov5", "custom", "yolov5m-seg.pt"
 )  # 从load from PyTorch Hub 加载模型 (WARNING: 推理暂未支持)
 ```
 | ![zidane](https://user-images.githubusercontent.com/26833433/203113421-decef4c4-183d-4a0a-a6c2-6435b33bc5d3.jpg) | ![bus](https://user-images.githubusercontent.com/26833433/203113416-11fe0025-69f7-4874-a0a6-65d0bfe2999a.jpg) |
 | ---------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------- |
 ### 模型导出
 将 YOLOv5s-seg 模型导出到 ONNX 和 TensorRT：
 ```bash
 python export.py --weights yolov5s-seg.pt --include onnx engine --img 640 --device 0
 ```
 </details>
 ## <div align="center">分类网络 ⭐ 新</div>
 YOLOv5 [release v6.2](https://github.com/ultralytics/yolov5/releases) 带来对分类模型训练、验证和部署的支持！详情请查看 [发行说明](https://github.com/ultralytics/yolov5/releases/v6.2) 或访问我们的 [YOLOv5 分类 Colab 笔记本](https://github.com/ultralytics/yolov5/blob/master/classify/tutorial.ipynb) 以快速入门。
 <details>
  <summary>分类网络模型</summary>
 <br>
 我们使用 4xA100 实例在 ImageNet 上训练了 90 个 epochs 得到 YOLOv5-cls 分类模型，我们训练了 ResNet 和 EfficientNet 模型以及相同的默认训练设置以进行比较。我们将所有模型导出到 ONNX FP32 以进行 CPU 速度测试，并导出到 TensorRT FP16 以进行 GPU 速度测试。为了便于重现，我们在 Google 上进行了所有速度测试 [Colab Pro](https://colab.research.google.com/signup) 。
 | 模型                                                                                               | 尺寸<br><sup>（像素） | acc<br><sup>top1 | acc<br><sup>top5 | 训练时长<br><sup>90 epochs<br>4xA100（小时） | 推理速度<br><sup>ONNX CPU<br>（ms） | 推理速度<br><sup>TensorRT V100<br>（ms） | 参数<br><sup>(M) | FLOPs<br><sup>@640 (B) |
 | -------------------------------------------------------------------------------------------------- | --------------------- | ---------------- | ---------------- | -------------------------------------------- | ----------------------------------- | ---------------------------------------- | ---------------- | ---------------------- |
 | [YOLOv5n-cls](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5n-cls.pt)         | 224                   | 64.6             | 85.4             | 7:59                                         | **3.3**                             | **0.5**                                  | **2.5**          | **0.5**                |
 | [YOLOv5s-cls](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s-cls.pt)         | 224                   | 71.5             | 90.2             | 8:09                                         | 6.6                                 | 0.6                                      | 5.4              | 1.4                    |
 | [YOLOv5m-cls](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5m-cls.pt)         | 224                   | 75.9             | 92.9             | 10:06                                        | 15.5                                | 0.9                                      | 12.9             | 3.9                    |
 | [YOLOv5l-cls](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5l-cls.pt)         | 224                   | 78.0             | 94.0             | 11:56                                        | 26.9                                | 1.4                                      | 26.5             | 8.5                    |
 | [YOLOv5x-cls](https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5x-cls.pt)         | 224                   | **79.0**         | **94.4**         | 15:04                                        | 54.3                                | 1.8                                      | 48.1             | 15.9                   |
 |                                                                                                    |                       |                  |                  |                                              |                                     |                                          |                  |                        |
 | [ResNet18](https://github.com/ultralytics/yolov5/releases/download/v7.0/resnet18.pt)               | 224                   | 70.3             | 89.5             | **6:47**                                     | 11.2                                | 0.5                                      | 11.7             | 3.7                    |
 | [Resnetzch](https://github.com/ultralytics/yolov5/releases/download/v7.0/resnet34.pt)              | 224                   | 73.9             | 91.8             | 8:33                                         | 20.6                                | 0.9                                      | 21.8             | 7.4                    |
 | [ResNet50](https://github.com/ultralytics/yolov5/releases/download/v7.0/resnet50.pt)               | 224                   | 76.8             | 93.4             | 11:10                                        | 23.4                                | 1.0                                      | 25.6             | 8.5                    |
 | [ResNet101](https://github.com/ultralytics/yolov5/releases/download/v7.0/resnet101.pt)             | 224                   | 78.5             | 94.3             | 17:10                                        | 42.1                                | 1.9                                      | 44.5             | 15.9                   |
 |                                                                                                    |                       |                  |                  |                                              |                                     |                                          |                  |                        |
 | [EfficientNet_b0](https://github.com/ultralytics/yolov5/releases/download/v7.0/efficientnet_b0.pt) | 224                   | 75.1             | 92.4             | 13:03                                        | 12.5                                | 1.3                                      | 5.3              | 1.0                    |
 | [EfficientNet_b1](https://github.com/ultralytics/yolov5/releases/download/v7.0/efficientnet_b1.pt) | 224                   | 76.4             | 93.2             | 17:04                                        | 14.9                                | 1.6                                      | 7.8              | 1.5                    |
 | [EfficientNet_b2](https://github.com/ultralytics/yolov5/releases/download/v7.0/efficientnet_b2.pt) | 224                   | 76.6             | 93.4             | 17:10                                        | 15.9                                | 1.6                                      | 9.1              | 1.7                    |
 | [EfficientNet_b3](https://github.com/ultralytics/yolov5/releases/download/v7.0/efficientnet_b3.pt) | 224                   | 77.7             | 94.0             | 19:19                                        | 18.9                                | 1.9                                      | 12.2             | 2.4                    |
 <details>
  <summary>Table Notes (点击以展开)</summary>
 - 所有模型都使用 SGD 优化器训练 90 个 epochs，都使用 `lr0=0.001` 和 `weight_decay=5e-5` 参数， 图像大小为 224 ，且都使用默认设置。<br>训练 log 可以查看 https://wandb.ai/glenn-jocher/YOLOv5-Classifier-v6-2
 - **准确性**都在单模型单尺度上计算，数据集使用 [ImageNet-1k](https://www.image-net.org/index.php) 。<br>复现命令 `python classify/val.py --data ../datasets/imagenet --img 224`
 - **推理速度**是使用 100 个推理图像进行平均得到，测试环境使用谷歌 [Colab Pro](https://colab.research.google.com/signup) V100 高 RAM 实例。<br>复现命令 `python classify/val.py --data ../datasets/imagenet --img 224 --batch 1`
 - **模型导出**到 FP32 的 ONNX 和 FP16 的 TensorRT 使用 `export.py` 。<br>复现命令 `python export.py --weights yolov5s-cls.pt --include engine onnx --imgsz 224`
 </details>
 </details>
 <details>
  <summary>分类训练示例 &nbsp;<a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/classify/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a></summary>
 ### 训练
 YOLOv5 分类训练支持自动下载 MNIST、Fashion-MNIST、CIFAR10、CIFAR100、Imagenette、Imagewoof 和 ImageNet 数据集，命令中使用 `--data` 即可。 MNIST 示例 `--data mnist` 。
 ```bash
 # 单 GPU
 python classify/train.py --model yolov5s-cls.pt --data cifar100 --epochs 5 --img 224 --batch 128
 # 多 GPU， DDP 模式
 python -m torch.distributed.run --nproc_per_node 4 --master_port 1 classify/train.py --model yolov5s-cls.pt --data imagenet --epochs 5 --img 224 --device 0,1,2,3
 ```
 ### 验证
 在 ImageNet-1k 数据集上验证 YOLOv5m-cls 的准确性：
 ```bash
 bash data/scripts/get_imagenet.sh --val  # download ImageNet val split (6.3G, 50000 images)
 python classify/val.py --weights yolov5m-cls.pt --data ../datasets/imagenet --img 224  # validate
 ```
 ### 预测
 使用预训练的 YOLOv5s-cls.pt 来预测 bus.jpg：
 ```bash
 python classify/predict.py --weights yolov5s-cls.pt --source data/images/bus.jpg
 ```
 ```python
 model = torch.hub.load("ultralytics/yolov5", "custom", "yolov5s-cls.pt")  # load from PyTorch Hub
 ```
 ### 模型导出
 将一组经过训练的 YOLOv5s-cls、ResNet 和 EfficientNet 模型导出到 ONNX 和 TensorRT：
 ```bash
 python export.py --weights yolov5s-cls.pt resnet50.pt efficientnet_b0.pt --include onnx engine --img 224
 ```
 </details>
 ## <div align="center">环境</div>
 使用下面我们经过验证的环境，在几秒钟内开始使用 YOLOv5 。单击下面的图标了解详细信息。
 <div align="center">
  <a href="https://bit.ly/yolov5-paperspace-notebook">
    <img src="https://github.com/ultralytics/assets/releases/download/v0.0.0/logo-gradient.png" width="10%" /></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="5%" alt="" />
  <a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb">
    <img src="https://github.com/ultralytics/assets/releases/download/v0.0.0/logo-colab-small.png" width="10%" /></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="5%" alt="" />
  <a href="https://www.kaggle.com/models/ultralytics/yolov5">
    <img src="https://github.com/ultralytics/assets/releases/download/v0.0.0/logo-kaggle-small.png" width="10%" /></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="5%" alt="" />
  <a href="https://hub.docker.com/r/ultralytics/yolov5">
    <img src="https://github.com/ultralytics/assets/releases/download/v0.0.0/logo-docker-small.png" width="10%" /></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="5%" alt="" />
  <a href="https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial/">
    <img src="https://github.com/ultralytics/assets/releases/download/v0.0.0/logo-aws-small.png" width="10%" /></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="5%" alt="" />
  <a href="https://docs.ultralytics.com/yolov5/environments/google_cloud_quickstart_tutorial/">
    <img src="https://github.com/ultralytics/assets/releases/download/v0.0.0/logo-gcp-small.png" width="10%" /></a>
 </div>
 ## <div align="center">贡献</div>
 我们喜欢您的意见或建议！我们希望尽可能简单和透明地为 YOLOv5 做出贡献。请看我们的 [投稿指南](https://docs.ultralytics.com/help/contributing/)，并填写 [YOLOv5调查](https://www.ultralytics.com/survey?utm_source=github&utm_medium=social&utm_campaign=Survey) 向我们发送您的体验反馈。感谢我们所有的贡献者！
 <!-- SVG image from https://opencollective.com/ultralytics/contributors.svg?width=990 -->
 <a href="https://github.com/ultralytics/yolov5/graphs/contributors">
 <img src="https://github.com/ultralytics/assets/raw/main/im/image-contributors.png" /></a>
 ## <div align="center">许可证</div>
 Ultralytics 提供两种许可证选项以适应各种使用场景：
 - **AGPL-3.0 许可证**：这个[OSI 批准](https://opensource.org/license)的开源许可证非常适合学生和爱好者，可以推动开放的协作和知识分享。请查看[LICENSE](https://github.com/ultralytics/yolov5/blob/master/LICENSE) 文件以了解更多细节。
 - **企业许可证**：专为商业用途设计，该许可证允许将 Ultralytics 的软件和 AI 模型无缝集成到商业产品和服务中，从而绕过 AGPL-3.0 的开源要求。如果您的场景涉及将我们的解决方案嵌入到商业产品中，请通过 [Ultralytics Licensing](https://www.ultralytics.com/license)与我们联系。
 ## <div align="center">联系方式</div>
 对于 Ultralytics 的错误报告和功能请求，请访问 [GitHub Issues](https://github.com/ultralytics/yolov5/issues)，并加入我们的 [Discord](https://discord.com/invite/ultralytics) 社区进行问题和讨论！
 <br>
 <div align="center">
  <a href="https://github.com/ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-github.png" width="3%" alt="Ultralytics GitHub"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%">
  <a href="https://www.linkedin.com/company/ultralytics/"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-linkedin.png" width="3%" alt="Ultralytics LinkedIn"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%">
  <a href="https://twitter.com/ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-twitter.png" width="3%" alt="Ultralytics Twitter"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%">
  <a href="https://youtube.com/ultralytics?sub_confirmation=1"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-youtube.png" width="3%" alt="Ultralytics YouTube"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%">
  <a href="https://www.tiktok.com/@ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-tiktok.png" width="3%" alt="Ultralytics TikTok"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%">
  <a href="https://ultralytics.com/bilibili"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-bilibili.png" width="3%" alt="Ultralytics BiliBili"></a>
  <img src="https://github.com/ultralytics/assets/raw/main/social/logo-transparent.png" width="3%">
  <a href="https://discord.com/invite/ultralytics"><img src="https://github.com/ultralytics/assets/raw/main/social/logo-social-discord.png" width="3%" alt="Ultralytics Discord"></a>
 </div>
 [tta]: https://docs.ultralytics.com/yolov5/tutorials/test_time_augmentation
--- a/yolov5/benchmarks.py
+++ b/yolov5/benchmarks.py
@@ -0,0 +1,294 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 """
 Run YOLOv5 benchmarks on all supported export formats.
 Format                      | `export.py --include`         | Model
 ---                         | ---                           | ---
 PyTorch                     | -                             | yolov5s.pt
 TorchScript                 | `torchscript`                 | yolov5s.torchscript
 ONNX                        | `onnx`                        | yolov5s.onnx
 OpenVINO                    | `openvino`                    | yolov5s_openvino_model/
 TensorRT                    | `engine`                      | yolov5s.engine
 CoreML                      | `coreml`                      | yolov5s.mlpackage
 TensorFlow SavedModel       | `saved_model`                 | yolov5s_saved_model/
 TensorFlow GraphDef         | `pb`                          | yolov5s.pb
 TensorFlow Lite             | `tflite`                      | yolov5s.tflite
 TensorFlow Edge TPU         | `edgetpu`                     | yolov5s_edgetpu.tflite
 TensorFlow.js               | `tfjs`                        | yolov5s_web_model/
 Requirements:
    $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime openvino-dev tensorflow-cpu  # CPU
    $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime-gpu openvino-dev tensorflow  # GPU
    $ pip install -U nvidia-tensorrt --index-url https://pypi.ngc.nvidia.com  # TensorRT
 Usage:
    $ python benchmarks.py --weights yolov5s.pt --img 640
 """
 import argparse
 import platform
 import sys
 import time
 from pathlib import Path
 import pandas as pd
 FILE = Path(__file__).resolve()
 ROOT = FILE.parents[0]  # YOLOv5 root directory
 if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))  # add ROOT to PATH
 # ROOT = ROOT.relative_to(Path.cwd())  # relative
 import export
 from models.experimental import attempt_load
 from models.yolo import SegmentationModel
 from segment.val import run as val_seg
 from utils import notebook_init
 from utils.general import LOGGER, check_yaml, file_size, print_args
 from utils.torch_utils import select_device
 from val import run as val_det
 def run(
    weights=ROOT / "yolov5s.pt",  # weights path
    imgsz=640,  # inference size (pixels)
    batch_size=1,  # batch size
    data=ROOT / "data/coco128.yaml",  # dataset.yaml path
    device="",  # cuda device, i.e. 0 or 0,1,2,3 or cpu
    half=False,  # use FP16 half-precision inference
    test=False,  # test exports only
    pt_only=False,  # test PyTorch only
    hard_fail=False,  # throw error on benchmark failure
 ):
    """
    Run YOLOv5 benchmarks on multiple export formats and log results for model performance evaluation.
    Args:
        weights (Path | str): Path to the model weights file (default: ROOT / "yolov5s.pt").
        imgsz (int): Inference size in pixels (default: 640).
        batch_size (int): Batch size for inference (default: 1).
        data (Path | str): Path to the dataset.yaml file (default: ROOT / "data/coco128.yaml").
        device (str): CUDA device, e.g., '0' or '0,1,2,3' or 'cpu' (default: "").
        half (bool): Use FP16 half-precision inference (default: False).
        test (bool): Test export formats only (default: False).
        pt_only (bool): Test PyTorch format only (default: False).
        hard_fail (bool): Throw an error on benchmark failure if True (default: False).
    Returns:
        None. Logs information about the benchmark results, including the format, size, mAP50-95, and inference time.
    Notes:
        Supported export formats and models include PyTorch, TorchScript, ONNX, OpenVINO, TensorRT, CoreML,
            TensorFlow SavedModel, TensorFlow GraphDef, TensorFlow Lite, and TensorFlow Edge TPU. Edge TPU and TF.js
            are unsupported.
    Example:
        ```python
        $ python benchmarks.py --weights yolov5s.pt --img 640
        ```
    Usage:
        Install required packages:
          $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime openvino-dev tensorflow-cpu  # CPU support
          $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime-gpu openvino-dev tensorflow   # GPU support
          $ pip install -U nvidia-tensorrt --index-url https://pypi.ngc.nvidia.com  # TensorRT
        Run benchmarks:
          $ python benchmarks.py --weights yolov5s.pt --img 640
    """
    y, t = [], time.time()
    device = select_device(device)
    model_type = type(attempt_load(weights, fuse=False))  # DetectionModel, SegmentationModel, etc.
    for i, (name, f, suffix, cpu, gpu) in export.export_formats().iterrows():  # index, (name, file, suffix, CPU, GPU)
        try:
            assert i not in (9, 10), "inference not supported"  # Edge TPU and TF.js are unsupported
            assert i != 5 or platform.system() == "Darwin", "inference only supported on macOS>=10.13"  # CoreML
            if "cpu" in device.type:
                assert cpu, "inference not supported on CPU"
            if "cuda" in device.type:
                assert gpu, "inference not supported on GPU"
            # Export
            if f == "-":
                w = weights  # PyTorch format
            else:
                w = export.run(
                    weights=weights, imgsz=[imgsz], include=[f], batch_size=batch_size, device=device, half=half
                )[-1]  # all others
            assert suffix in str(w), "export failed"
            # Validate
            if model_type == SegmentationModel:
                result = val_seg(data, w, batch_size, imgsz, plots=False, device=device, task="speed", half=half)
                metric = result[0][7]  # (box(p, r, map50, map), mask(p, r, map50, map), *loss(box, obj, cls))
            else:  # DetectionModel:
                result = val_det(data, w, batch_size, imgsz, plots=False, device=device, task="speed", half=half)
                metric = result[0][3]  # (p, r, map50, map, *loss(box, obj, cls))
            speed = result[2][1]  # times (preprocess, inference, postprocess)
            y.append([name, round(file_size(w), 1), round(metric, 4), round(speed, 2)])  # MB, mAP, t_inference
        except Exception as e:
            if hard_fail:
                assert type(e) is AssertionError, f"Benchmark --hard-fail for {name}: {e}"
            LOGGER.warning(f"WARNING ⚠️ Benchmark failure for {name}: {e}")
            y.append([name, None, None, None])  # mAP, t_inference
        if pt_only and i == 0:
            break  # break after PyTorch
    # Print results
    LOGGER.info("\n")
    parse_opt()
    notebook_init()  # print system info
    c = ["Format", "Size (MB)", "mAP50-95", "Inference time (ms)"] if map else ["Format", "Export", "", ""]
    py = pd.DataFrame(y, columns=c)
    LOGGER.info(f"\nBenchmarks complete ({time.time() - t:.2f}s)")
    LOGGER.info(str(py if map else py.iloc[:, :2]))
    if hard_fail and isinstance(hard_fail, str):
        metrics = py["mAP50-95"].array  # values to compare to floor
        floor = eval(hard_fail)  # minimum metric floor to pass, i.e. = 0.29 mAP for YOLOv5n
        assert all(x > floor for x in metrics if pd.notna(x)), f"HARD FAIL: mAP50-95 < floor {floor}"
    return py
 def test(
    weights=ROOT / "yolov5s.pt",  # weights path
    imgsz=640,  # inference size (pixels)
    batch_size=1,  # batch size
    data=ROOT / "data/coco128.yaml",  # dataset.yaml path
    device="",  # cuda device, i.e. 0 or 0,1,2,3 or cpu
    half=False,  # use FP16 half-precision inference
    test=False,  # test exports only
    pt_only=False,  # test PyTorch only
    hard_fail=False,  # throw error on benchmark failure
 ):
    """
    Run YOLOv5 export tests for all supported formats and log the results, including export statuses.
    Args:
        weights (Path | str): Path to the model weights file (.pt format). Default is 'ROOT / "yolov5s.pt"'.
        imgsz (int): Inference image size (in pixels). Default is 640.
        batch_size (int): Batch size for testing. Default is 1.
        data (Path | str): Path to the dataset configuration file (.yaml format). Default is 'ROOT / "data/coco128.yaml"'.
        device (str): Device for running the tests, can be 'cpu' or a specific CUDA device ('0', '0,1,2,3', etc.). Default is an empty string.
        half (bool): Use FP16 half-precision for inference if True. Default is False.
        test (bool): Test export formats only without running inference. Default is False.
        pt_only (bool): Test only the PyTorch model if True. Default is False.
        hard_fail (bool): Raise error on export or test failure if True. Default is False.
    Returns:
        pd.DataFrame: DataFrame containing the results of the export tests, including format names and export statuses.
    Examples:
        ```python
        $ python benchmarks.py --weights yolov5s.pt --img 640
        ```
    Notes:
        Supported export formats and models include PyTorch, TorchScript, ONNX, OpenVINO, TensorRT, CoreML, TensorFlow
        SavedModel, TensorFlow GraphDef, TensorFlow Lite, and TensorFlow Edge TPU. Edge TPU and TF.js are unsupported.
    Usage:
        Install required packages:
            $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime openvino-dev tensorflow-cpu  # CPU support
            $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime-gpu openvino-dev tensorflow   # GPU support
            $ pip install -U nvidia-tensorrt --index-url https://pypi.ngc.nvidia.com  # TensorRT
        Run export tests:
            $ python benchmarks.py --weights yolov5s.pt --img 640
    """
    y, t = [], time.time()
    device = select_device(device)
    for i, (name, f, suffix, gpu) in export.export_formats().iterrows():  # index, (name, file, suffix, gpu-capable)
        try:
            w = (
                weights
                if f == "-"
                else export.run(weights=weights, imgsz=[imgsz], include=[f], device=device, half=half)[-1]
            )  # weights
            assert suffix in str(w), "export failed"
            y.append([name, True])
        except Exception:
            y.append([name, False])  # mAP, t_inference
    # Print results
    LOGGER.info("\n")
    parse_opt()
    notebook_init()  # print system info
    py = pd.DataFrame(y, columns=["Format", "Export"])
    LOGGER.info(f"\nExports complete ({time.time() - t:.2f}s)")
    LOGGER.info(str(py))
    return py
 def parse_opt():
    """
    Parses command-line arguments for YOLOv5 model inference configuration.
    Args:
        weights (str): The path to the weights file. Defaults to 'ROOT / "yolov5s.pt"'.
        imgsz (int): Inference size in pixels. Defaults to 640.
        batch_size (int): Batch size. Defaults to 1.
        data (str): Path to the dataset YAML file. Defaults to 'ROOT / "data/coco128.yaml"'.
        device (str): CUDA device, e.g., '0' or '0,1,2,3' or 'cpu'. Defaults to an empty string (auto-select).
        half (bool): Use FP16 half-precision inference. This is a flag and defaults to False.
        test (bool): Test exports only. This is a flag and defaults to False.
        pt_only (bool): Test PyTorch only. This is a flag and defaults to False.
        hard_fail (bool | str): Throw an error on benchmark failure. Can be a boolean or a string representing a minimum
            metric floor, e.g., '0.29'. Defaults to False.
    Returns:
        argparse.Namespace: Parsed command-line arguments encapsulated in an argparse Namespace object.
    Notes:
        The function modifies the 'opt.data' by checking and validating the YAML path using 'check_yaml()'.
        The parsed arguments are printed for reference using 'print_args()'.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument("--weights", type=str, default=ROOT / "yolov5s.pt", help="weights path")
    parser.add_argument("--imgsz", "--img", "--img-size", type=int, default=640, help="inference size (pixels)")
    parser.add_argument("--batch-size", type=int, default=1, help="batch size")
    parser.add_argument("--data", type=str, default=ROOT / "data/coco128.yaml", help="dataset.yaml path")
    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
    parser.add_argument("--half", action="store_true", help="use FP16 half-precision inference")
    parser.add_argument("--test", action="store_true", help="test exports only")
    parser.add_argument("--pt-only", action="store_true", help="test PyTorch only")
    parser.add_argument("--hard-fail", nargs="?", const=True, default=False, help="Exception on error or < min metric")
    opt = parser.parse_args()
    opt.data = check_yaml(opt.data)  # check YAML
    print_args(vars(opt))
    return opt
 def main(opt):
    """
    Executes YOLOv5 benchmark tests or main training/inference routines based on the provided command-line arguments.
    Args:
        opt (argparse.Namespace): Parsed command-line arguments including options for weights, image size, batch size, data
            configuration, device, and other flags for inference settings.
    Returns:
        None: This function does not return any value. It leverages side-effects such as logging and running benchmarks.
    Example:
        ```python
        if __name__ == "__main__":
            opt = parse_opt()
            main(opt)
        ```
    Notes:
        - For a complete list of supported export formats and their respective requirements, refer to the
          [Ultralytics YOLOv5 Export Formats](https://github.com/ultralytics/yolov5#export-formats).
        - Ensure that you have installed all necessary dependencies by following the installation instructions detailed in
          the [main repository](https://github.com/ultralytics/yolov5#installation).
        ```shell
        # Running benchmarks on default weights and image size
        $ python benchmarks.py --weights yolov5s.pt --img 640
        ```
    """
    test(**vars(opt)) if opt.test else run(**vars(opt))
 if __name__ == "__main__":
    opt = parse_opt()
    main(opt)
--- a/yolov5/classify/predict.py
+++ b/yolov5/classify/predict.py
@@ -0,0 +1,241 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 """
 Run YOLOv5 classification inference on images, videos, directories, globs, YouTube, webcam, streams, etc.
 Usage - sources:
    $ python classify/predict.py --weights yolov5s-cls.pt --source 0                               # webcam
                                                                   img.jpg                         # image
                                                                   vid.mp4                         # video
                                                                   screen                          # screenshot
                                                                   path/                           # directory
                                                                   list.txt                        # list of images
                                                                   list.streams                    # list of streams
                                                                   'path/*.jpg'                    # glob
                                                                   'https://youtu.be/LNwODJXcvt4'  # YouTube
                                                                   'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream
 Usage - formats:
    $ python classify/predict.py --weights yolov5s-cls.pt                 # PyTorch
                                           yolov5s-cls.torchscript        # TorchScript
                                           yolov5s-cls.onnx               # ONNX Runtime or OpenCV DNN with --dnn
                                           yolov5s-cls_openvino_model     # OpenVINO
                                           yolov5s-cls.engine             # TensorRT
                                           yolov5s-cls.mlmodel            # CoreML (macOS-only)
                                           yolov5s-cls_saved_model        # TensorFlow SavedModel
                                           yolov5s-cls.pb                 # TensorFlow GraphDef
                                           yolov5s-cls.tflite             # TensorFlow Lite
                                           yolov5s-cls_edgetpu.tflite     # TensorFlow Edge TPU
                                           yolov5s-cls_paddle_model       # PaddlePaddle
 """
 import argparse
 import os
 import platform
 import sys
 from pathlib import Path
 import torch
 import torch.nn.functional as F
 FILE = Path(__file__).resolve()
 ROOT = FILE.parents[1]  # YOLOv5 root directory
 if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))  # add ROOT to PATH
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 from ultralytics.utils.plotting import Annotator
 from models.common import DetectMultiBackend
 from utils.augmentations import classify_transforms
 from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams
 from utils.general import (
    LOGGER,
    Profile,
    check_file,
    check_img_size,
    check_imshow,
    check_requirements,
    colorstr,
    cv2,
    increment_path,
    print_args,
    strip_optimizer,
 )
 from utils.torch_utils import select_device, smart_inference_mode
@smart_inference_mode()
 def run(
    weights=ROOT / "yolov5s-cls.pt",  # model.pt path(s)
    source=ROOT / "data/images",  # file/dir/URL/glob/screen/0(webcam)
    data=ROOT / "data/coco128.yaml",  # dataset.yaml path
    imgsz=(224, 224),  # inference size (height, width)
    device="",  # cuda device, i.e. 0 or 0,1,2,3 or cpu
    view_img=False,  # show results
    save_txt=False,  # save results to *.txt
    nosave=False,  # do not save images/videos
    augment=False,  # augmented inference
    visualize=False,  # visualize features
    update=False,  # update all models
    project=ROOT / "runs/predict-cls",  # save results to project/name
    name="exp",  # save results to project/name
    exist_ok=False,  # existing project/name ok, do not increment
    half=False,  # use FP16 half-precision inference
    dnn=False,  # use OpenCV DNN for ONNX inference
    vid_stride=1,  # video frame-rate stride
 ):
    """Conducts YOLOv5 classification inference on diverse input sources and saves results."""
    source = str(source)
    save_img = not nosave and not source.endswith(".txt")  # save inference images
    is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
    is_url = source.lower().startswith(("rtsp://", "rtmp://", "http://", "https://"))
    webcam = source.isnumeric() or source.endswith(".streams") or (is_url and not is_file)
    screenshot = source.lower().startswith("screen")
    if is_url and is_file:
        source = check_file(source)  # download
    # Directories
    save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
    (save_dir / "labels" if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
    # Load model
    device = select_device(device)
    model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
    stride, names, pt = model.stride, model.names, model.pt
    imgsz = check_img_size(imgsz, s=stride)  # check image size
    # Dataloader
    bs = 1  # batch_size
    if webcam:
        view_img = check_imshow(warn=True)
        dataset = LoadStreams(source, img_size=imgsz, transforms=classify_transforms(imgsz[0]), vid_stride=vid_stride)
        bs = len(dataset)
    elif screenshot:
        dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)
    else:
        dataset = LoadImages(source, img_size=imgsz, transforms=classify_transforms(imgsz[0]), vid_stride=vid_stride)
    vid_path, vid_writer = [None] * bs, [None] * bs
    # Run inference
    model.warmup(imgsz=(1 if pt else bs, 3, *imgsz))  # warmup
    seen, windows, dt = 0, [], (Profile(device=device), Profile(device=device), Profile(device=device))
    for path, im, im0s, vid_cap, s in dataset:
        with dt[0]:
            im = torch.Tensor(im).to(model.device)
            im = im.half() if model.fp16 else im.float()  # uint8 to fp16/32
            if len(im.shape) == 3:
                im = im[None]  # expand for batch dim
        # Inference
        with dt[1]:
            results = model(im)
        # Post-process
        with dt[2]:
            pred = F.softmax(results, dim=1)  # probabilities
        # Process predictions
        for i, prob in enumerate(pred):  # per image
            seen += 1
            if webcam:  # batch_size >= 1
                p, im0, frame = path[i], im0s[i].copy(), dataset.count
                s += f"{i}: "
            else:
                p, im0, frame = path, im0s.copy(), getattr(dataset, "frame", 0)
            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # im.jpg
            txt_path = str(save_dir / "labels" / p.stem) + ("" if dataset.mode == "image" else f"_{frame}")  # im.txt
            s += "{:g}x{:g} ".format(*im.shape[2:])  # print string
            annotator = Annotator(im0, example=str(names), pil=True)
            # Print results
            top5i = prob.argsort(0, descending=True)[:5].tolist()  # top 5 indices
            s += f"{', '.join(f'{names[j]} {prob[j]:.2f}' for j in top5i)}, "
            # Write results
            text = "\n".join(f"{prob[j]:.2f} {names[j]}" for j in top5i)
            if save_img or view_img:  # Add bbox to image
                annotator.text([32, 32], text, txt_color=(255, 255, 255))
            if save_txt:  # Write to file
                with open(f"{txt_path}.txt", "a") as f:
                    f.write(text + "\n")
            # Stream results
            im0 = annotator.result()
            if view_img:
                if platform.system() == "Linux" and p not in windows:
                    windows.append(p)
                    cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)
                    cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond
            # Save results (image with detections)
            if save_img:
                if dataset.mode == "image":
                    cv2.imwrite(save_path, im0)
                else:  # 'video' or 'stream'
                    if vid_path[i] != save_path:  # new video
                        vid_path[i] = save_path
                        if isinstance(vid_writer[i], cv2.VideoWriter):
                            vid_writer[i].release()  # release previous video writer
                        if vid_cap:  # video
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        else:  # stream
                            fps, w, h = 30, im0.shape[1], im0.shape[0]
                        save_path = str(Path(save_path).with_suffix(".mp4"))  # force *.mp4 suffix on results videos
                        vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
                    vid_writer[i].write(im0)
        # Print time (inference-only)
        LOGGER.info(f"{s}{dt[1].dt * 1e3:.1f}ms")
    # Print results
    t = tuple(x.t / seen * 1e3 for x in dt)  # speeds per image
    LOGGER.info(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}" % t)
    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ""
        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
    if update:
        strip_optimizer(weights[0])  # update model (to fix SourceChangeWarning)
 def parse_opt():
    """Parses command line arguments for YOLOv5 inference settings including model, source, device, and image size."""
    parser = argparse.ArgumentParser()
    parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s-cls.pt", help="model path(s)")
    parser.add_argument("--source", type=str, default=ROOT / "data/images", help="file/dir/URL/glob/screen/0(webcam)")
    parser.add_argument("--data", type=str, default=ROOT / "data/coco128.yaml", help="(optional) dataset.yaml path")
    parser.add_argument("--imgsz", "--img", "--img-size", nargs="+", type=int, default=[224], help="inference size h,w")
    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
    parser.add_argument("--view-img", action="store_true", help="show results")
    parser.add_argument("--save-txt", action="store_true", help="save results to *.txt")
    parser.add_argument("--nosave", action="store_true", help="do not save images/videos")
    parser.add_argument("--augment", action="store_true", help="augmented inference")
    parser.add_argument("--visualize", action="store_true", help="visualize features")
    parser.add_argument("--update", action="store_true", help="update all models")
    parser.add_argument("--project", default=ROOT / "runs/predict-cls", help="save results to project/name")
    parser.add_argument("--name", default="exp", help="save results to project/name")
    parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
    parser.add_argument("--half", action="store_true", help="use FP16 half-precision inference")
    parser.add_argument("--dnn", action="store_true", help="use OpenCV DNN for ONNX inference")
    parser.add_argument("--vid-stride", type=int, default=1, help="video frame-rate stride")
    opt = parser.parse_args()
    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
    print_args(vars(opt))
    return opt
 def main(opt):
    """Executes YOLOv5 model inference with options for ONNX DNN and video frame-rate stride adjustments."""
    check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
    run(**vars(opt))
 if __name__ == "__main__":
    opt = parse_opt()
    main(opt)
--- a/yolov5/classify/train.py
+++ b/yolov5/classify/train.py
@@ -0,0 +1,382 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 """
 Train a YOLOv5 classifier model on a classification dataset.
 Usage - Single-GPU training:
    $ python classify/train.py --model yolov5s-cls.pt --data imagenette160 --epochs 5 --img 224
 Usage - Multi-GPU DDP training:
    $ python -m torch.distributed.run --nproc_per_node 4 --master_port 2022 classify/train.py --model yolov5s-cls.pt --data imagenet --epochs 5 --img 224 --device 0,1,2,3
 Datasets:           --data mnist, fashion-mnist, cifar10, cifar100, imagenette, imagewoof, imagenet, or 'path/to/data'
 YOLOv5-cls models:  --model yolov5n-cls.pt, yolov5s-cls.pt, yolov5m-cls.pt, yolov5l-cls.pt, yolov5x-cls.pt
 Torchvision models: --model resnet50, efficientnet_b0, etc. See https://pytorch.org/vision/stable/models.html
 """
 import argparse
 import os
 import subprocess
 import sys
 import time
 from copy import deepcopy
 from datetime import datetime
 from pathlib import Path
 import torch
 import torch.distributed as dist
 import torch.hub as hub
 import torch.optim.lr_scheduler as lr_scheduler
 import torchvision
 from torch.cuda import amp
 from tqdm import tqdm
 FILE = Path(__file__).resolve()
 ROOT = FILE.parents[1]  # YOLOv5 root directory
 if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))  # add ROOT to PATH
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 from classify import val as validate
 from models.experimental import attempt_load
 from models.yolo import ClassificationModel, DetectionModel
 from utils.dataloaders import create_classification_dataloader
 from utils.general import (
    DATASETS_DIR,
    LOGGER,
    TQDM_BAR_FORMAT,
    WorkingDirectory,
    check_git_info,
    check_git_status,
    check_requirements,
    colorstr,
    download,
    increment_path,
    init_seeds,
    print_args,
    yaml_save,
 )
 from utils.loggers import GenericLogger
 from utils.plots import imshow_cls
 from utils.torch_utils import (
    ModelEMA,
    de_parallel,
    model_info,
    reshape_classifier_output,
    select_device,
    smart_DDP,
    smart_optimizer,
    smartCrossEntropyLoss,
    torch_distributed_zero_first,
 )
 LOCAL_RANK = int(os.getenv("LOCAL_RANK", -1))  # https://pytorch.org/docs/stable/elastic/run.html
 RANK = int(os.getenv("RANK", -1))
 WORLD_SIZE = int(os.getenv("WORLD_SIZE", 1))
 GIT_INFO = check_git_info()
 def train(opt, device):
    """Trains a YOLOv5 model, managing datasets, model optimization, logging, and saving checkpoints."""
    init_seeds(opt.seed + 1 + RANK, deterministic=True)
    save_dir, data, bs, epochs, nw, imgsz, pretrained = (
        opt.save_dir,
        Path(opt.data),
        opt.batch_size,
        opt.epochs,
        min(os.cpu_count() - 1, opt.workers),
        opt.imgsz,
        str(opt.pretrained).lower() == "true",
    )
    cuda = device.type != "cpu"
    # Directories
    wdir = save_dir / "weights"
    wdir.mkdir(parents=True, exist_ok=True)  # make dir
    last, best = wdir / "last.pt", wdir / "best.pt"
    # Save run settings
    yaml_save(save_dir / "opt.yaml", vars(opt))
    # Logger
    logger = GenericLogger(opt=opt, console_logger=LOGGER) if RANK in {-1, 0} else None
    # Download Dataset
    with torch_distributed_zero_first(LOCAL_RANK), WorkingDirectory(ROOT):
        data_dir = data if data.is_dir() else (DATASETS_DIR / data)
        if not data_dir.is_dir():
            LOGGER.info(f"\nDataset not found ⚠️, missing path {data_dir}, attempting download...")
            t = time.time()
            if str(data) == "imagenet":
                subprocess.run(["bash", str(ROOT / "data/scripts/get_imagenet.sh")], shell=True, check=True)
            else:
                url = f"https://github.com/ultralytics/assets/releases/download/v0.0.0/{data}.zip"
                download(url, dir=data_dir.parent)
            s = f"Dataset download success ✅ ({time.time() - t:.1f}s), saved to {colorstr('bold', data_dir)}\n"
            LOGGER.info(s)
    # Dataloaders
    nc = len([x for x in (data_dir / "train").glob("*") if x.is_dir()])  # number of classes
    trainloader = create_classification_dataloader(
        path=data_dir / "train",
        imgsz=imgsz,
        batch_size=bs // WORLD_SIZE,
        augment=True,
        cache=opt.cache,
        rank=LOCAL_RANK,
        workers=nw,
    )
    test_dir = data_dir / "test" if (data_dir / "test").exists() else data_dir / "val"  # data/test or data/val
    if RANK in {-1, 0}:
        testloader = create_classification_dataloader(
            path=test_dir,
            imgsz=imgsz,
            batch_size=bs // WORLD_SIZE * 2,
            augment=False,
            cache=opt.cache,
            rank=-1,
            workers=nw,
        )
    # Model
    with torch_distributed_zero_first(LOCAL_RANK), WorkingDirectory(ROOT):
        if Path(opt.model).is_file() or opt.model.endswith(".pt"):
            model = attempt_load(opt.model, device="cpu", fuse=False)
        elif opt.model in torchvision.models.__dict__:  # TorchVision models i.e. resnet50, efficientnet_b0
            model = torchvision.models.__dict__[opt.model](weights="IMAGENET1K_V1" if pretrained else None)
        else:
            m = hub.list("ultralytics/yolov5")  # + hub.list('pytorch/vision')  # models
            raise ModuleNotFoundError(f"--model {opt.model} not found. Available models are: \n" + "\n".join(m))
        if isinstance(model, DetectionModel):
            LOGGER.warning("WARNING ⚠️ pass YOLOv5 classifier model with '-cls' suffix, i.e. '--model yolov5s-cls.pt'")
            model = ClassificationModel(model=model, nc=nc, cutoff=opt.cutoff or 10)  # convert to classification model
        reshape_classifier_output(model, nc)  # update class count
    for m in model.modules():
        if not pretrained and hasattr(m, "reset_parameters"):
            m.reset_parameters()
        if isinstance(m, torch.nn.Dropout) and opt.dropout is not None:
            m.p = opt.dropout  # set dropout
    for p in model.parameters():
        p.requires_grad = True  # for training
    model = model.to(device)
    # Info
    if RANK in {-1, 0}:
        model.names = trainloader.dataset.classes  # attach class names
        model.transforms = testloader.dataset.torch_transforms  # attach inference transforms
        model_info(model)
        if opt.verbose:
            LOGGER.info(model)
        images, labels = next(iter(trainloader))
        file = imshow_cls(images[:25], labels[:25], names=model.names, f=save_dir / "train_images.jpg")
        logger.log_images(file, name="Train Examples")
        logger.log_graph(model, imgsz)  # log model
    # Optimizer
    optimizer = smart_optimizer(model, opt.optimizer, opt.lr0, momentum=0.9, decay=opt.decay)
    # Scheduler
    lrf = 0.01  # final lr (fraction of lr0)
    # lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - lrf) + lrf  # cosine
    def lf(x):
        """Linear learning rate scheduler function, scaling learning rate from initial value to `lrf` over `epochs`."""
        return (1 - x / epochs) * (1 - lrf) + lrf  # linear
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
    # scheduler = lr_scheduler.OneCycleLR(optimizer, max_lr=lr0, total_steps=epochs, pct_start=0.1,
    #                                    final_div_factor=1 / 25 / lrf)
    # EMA
    ema = ModelEMA(model) if RANK in {-1, 0} else None
    # DDP mode
    if cuda and RANK != -1:
        model = smart_DDP(model)
    # Train
    t0 = time.time()
    criterion = smartCrossEntropyLoss(label_smoothing=opt.label_smoothing)  # loss function
    best_fitness = 0.0
    scaler = amp.GradScaler(enabled=cuda)
    val = test_dir.stem  # 'val' or 'test'
    LOGGER.info(
        f"Image sizes {imgsz} train, {imgsz} test\n"
        f"Using {nw * WORLD_SIZE} dataloader workers\n"
        f"Logging results to {colorstr('bold', save_dir)}\n"
        f"Starting {opt.model} training on {data} dataset with {nc} classes for {epochs} epochs...\n\n"
        f"{'Epoch':>10}{'GPU_mem':>10}{'train_loss':>12}{f'{val}_loss':>12}{'top1_acc':>12}{'top5_acc':>12}"
    )
    for epoch in range(epochs):  # loop over the dataset multiple times
        tloss, vloss, fitness = 0.0, 0.0, 0.0  # train loss, val loss, fitness
        model.train()
        if RANK != -1:
            trainloader.sampler.set_epoch(epoch)
        pbar = enumerate(trainloader)
        if RANK in {-1, 0}:
            pbar = tqdm(enumerate(trainloader), total=len(trainloader), bar_format=TQDM_BAR_FORMAT)
        for i, (images, labels) in pbar:  # progress bar
            images, labels = images.to(device, non_blocking=True), labels.to(device)
            # Forward
            with amp.autocast(enabled=cuda):  # stability issues when enabled
                loss = criterion(model(images), labels)
            # Backward
            scaler.scale(loss).backward()
            # Optimize
            scaler.unscale_(optimizer)  # unscale gradients
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0)  # clip gradients
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            if ema:
                ema.update(model)
            if RANK in {-1, 0}:
                # Print
                tloss = (tloss * i + loss.item()) / (i + 1)  # update mean losses
                mem = "%.3gG" % (torch.cuda.memory_reserved() / 1e9 if torch.cuda.is_available() else 0)  # (GB)
                pbar.desc = f"{f'{epoch + 1}/{epochs}':>10}{mem:>10}{tloss:>12.3g}" + " " * 36
                # Test
                if i == len(pbar) - 1:  # last batch
                    top1, top5, vloss = validate.run(
                        model=ema.ema, dataloader=testloader, criterion=criterion, pbar=pbar
                    )  # test accuracy, loss
                    fitness = top1  # define fitness as top1 accuracy
        # Scheduler
        scheduler.step()
        # Log metrics
        if RANK in {-1, 0}:
            # Best fitness
            if fitness > best_fitness:
                best_fitness = fitness
            # Log
            metrics = {
                "train/loss": tloss,
                f"{val}/loss": vloss,
                "metrics/accuracy_top1": top1,
                "metrics/accuracy_top5": top5,
                "lr/0": optimizer.param_groups[0]["lr"],
            }  # learning rate
            logger.log_metrics(metrics, epoch)
            # Save model
            final_epoch = epoch + 1 == epochs
            if (not opt.nosave) or final_epoch:
                ckpt = {
                    "epoch": epoch,
                    "best_fitness": best_fitness,
                    "model": deepcopy(ema.ema).half(),  # deepcopy(de_parallel(model)).half(),
                    "ema": None,  # deepcopy(ema.ema).half(),
                    "updates": ema.updates,
                    "optimizer": None,  # optimizer.state_dict(),
                    "opt": vars(opt),
                    "git": GIT_INFO,  # {remote, branch, commit} if a git repo
                    "date": datetime.now().isoformat(),
                }
                # Save last, best and delete
                torch.save(ckpt, last)
                if best_fitness == fitness:
                    torch.save(ckpt, best)
                del ckpt
    # Train complete
    if RANK in {-1, 0} and final_epoch:
        LOGGER.info(
            f"\nTraining complete ({(time.time() - t0) / 3600:.3f} hours)"
            f"\nResults saved to {colorstr('bold', save_dir)}"
            f"\nPredict:         python classify/predict.py --weights {best} --source im.jpg"
            f"\nValidate:        python classify/val.py --weights {best} --data {data_dir}"
            f"\nExport:          python export.py --weights {best} --include onnx"
            f"\nPyTorch Hub:     model = torch.hub.load('ultralytics/yolov5', 'custom', '{best}')"
            f"\nVisualize:       https://netron.app\n"
        )
        # Plot examples
        images, labels = (x[:25] for x in next(iter(testloader)))  # first 25 images and labels
        pred = torch.max(ema.ema(images.to(device)), 1)[1]
        file = imshow_cls(images, labels, pred, de_parallel(model).names, verbose=False, f=save_dir / "test_images.jpg")
        # Log results
        meta = {"epochs": epochs, "top1_acc": best_fitness, "date": datetime.now().isoformat()}
        logger.log_images(file, name="Test Examples (true-predicted)", epoch=epoch)
        logger.log_model(best, epochs, metadata=meta)
 def parse_opt(known=False):
    """Parses command line arguments for YOLOv5 training including model path, dataset, epochs, and more, returning
    parsed arguments.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument("--model", type=str, default="yolov5s-cls.pt", help="initial weights path")
    parser.add_argument("--data", type=str, default="imagenette160", help="cifar10, cifar100, mnist, imagenet, ...")
    parser.add_argument("--epochs", type=int, default=10, help="total training epochs")
    parser.add_argument("--batch-size", type=int, default=64, help="total batch size for all GPUs")
    parser.add_argument("--imgsz", "--img", "--img-size", type=int, default=224, help="train, val image size (pixels)")
    parser.add_argument("--nosave", action="store_true", help="only save final checkpoint")
    parser.add_argument("--cache", type=str, nargs="?", const="ram", help='--cache images in "ram" (default) or "disk"')
    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
    parser.add_argument("--workers", type=int, default=8, help="max dataloader workers (per RANK in DDP mode)")
    parser.add_argument("--project", default=ROOT / "runs/train-cls", help="save to project/name")
    parser.add_argument("--name", default="exp", help="save to project/name")
    parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
    parser.add_argument("--pretrained", nargs="?", const=True, default=True, help="start from i.e. --pretrained False")
    parser.add_argument("--optimizer", choices=["SGD", "Adam", "AdamW", "RMSProp"], default="Adam", help="optimizer")
    parser.add_argument("--lr0", type=float, default=0.001, help="initial learning rate")
    parser.add_argument("--decay", type=float, default=5e-5, help="weight decay")
    parser.add_argument("--label-smoothing", type=float, default=0.1, help="Label smoothing epsilon")
    parser.add_argument("--cutoff", type=int, default=None, help="Model layer cutoff index for Classify() head")
    parser.add_argument("--dropout", type=float, default=None, help="Dropout (fraction)")
    parser.add_argument("--verbose", action="store_true", help="Verbose mode")
    parser.add_argument("--seed", type=int, default=0, help="Global training seed")
    parser.add_argument("--local_rank", type=int, default=-1, help="Automatic DDP Multi-GPU argument, do not modify")
    return parser.parse_known_args()[0] if known else parser.parse_args()
 def main(opt):
    """Executes YOLOv5 training with given options, handling device setup and DDP mode; includes pre-training checks."""
    if RANK in {-1, 0}:
        print_args(vars(opt))
        check_git_status()
        check_requirements(ROOT / "requirements.txt")
    # DDP mode
    device = select_device(opt.device, batch_size=opt.batch_size)
    if LOCAL_RANK != -1:
        assert opt.batch_size != -1, "AutoBatch is coming soon for classification, please pass a valid --batch-size"
        assert opt.batch_size % WORLD_SIZE == 0, f"--batch-size {opt.batch_size} must be multiple of WORLD_SIZE"
        assert torch.cuda.device_count() > LOCAL_RANK, "insufficient CUDA devices for DDP command"
        torch.cuda.set_device(LOCAL_RANK)
        device = torch.device("cuda", LOCAL_RANK)
        dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo")
    # Parameters
    opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)  # increment run
    # Train
    train(opt, device)
 def run(**kwargs):
    """
    Executes YOLOv5 model training or inference with specified parameters, returning updated options.
    Example: from yolov5 import classify; classify.train.run(data=mnist, imgsz=320, model='yolov5m')
    """
    opt = parse_opt(True)
    for k, v in kwargs.items():
        setattr(opt, k, v)
    main(opt)
    return opt
 if __name__ == "__main__":
    opt = parse_opt()
    main(opt)
--- a/yolov5/classify/tutorial.ipynb
+++ b/yolov5/classify/tutorial.ipynb
--- a/yolov5/classify/val.py
+++ b/yolov5/classify/val.py
@@ -0,0 +1,178 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 """
 Validate a trained YOLOv5 classification model on a classification dataset.
 Usage:
    $ bash data/scripts/get_imagenet.sh --val  # download ImageNet val split (6.3G, 50000 images)
    $ python classify/val.py --weights yolov5m-cls.pt --data ../datasets/imagenet --img 224  # validate ImageNet
 Usage - formats:
    $ python classify/val.py --weights yolov5s-cls.pt                 # PyTorch
                                       yolov5s-cls.torchscript        # TorchScript
                                       yolov5s-cls.onnx               # ONNX Runtime or OpenCV DNN with --dnn
                                       yolov5s-cls_openvino_model     # OpenVINO
                                       yolov5s-cls.engine             # TensorRT
                                       yolov5s-cls.mlmodel            # CoreML (macOS-only)
                                       yolov5s-cls_saved_model        # TensorFlow SavedModel
                                       yolov5s-cls.pb                 # TensorFlow GraphDef
                                       yolov5s-cls.tflite             # TensorFlow Lite
                                       yolov5s-cls_edgetpu.tflite     # TensorFlow Edge TPU
                                       yolov5s-cls_paddle_model       # PaddlePaddle
 """
 import argparse
 import os
 import sys
 from pathlib import Path
 import torch
 from tqdm import tqdm
 FILE = Path(__file__).resolve()
 ROOT = FILE.parents[1]  # YOLOv5 root directory
 if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))  # add ROOT to PATH
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 from models.common import DetectMultiBackend
 from utils.dataloaders import create_classification_dataloader
 from utils.general import (
    LOGGER,
    TQDM_BAR_FORMAT,
    Profile,
    check_img_size,
    check_requirements,
    colorstr,
    increment_path,
    print_args,
 )
 from utils.torch_utils import select_device, smart_inference_mode
@smart_inference_mode()
 def run(
    data=ROOT / "../datasets/mnist",  # dataset dir
    weights=ROOT / "yolov5s-cls.pt",  # model.pt path(s)
    batch_size=128,  # batch size
    imgsz=224,  # inference size (pixels)
    device="",  # cuda device, i.e. 0 or 0,1,2,3 or cpu
    workers=8,  # max dataloader workers (per RANK in DDP mode)
    verbose=False,  # verbose output
    project=ROOT / "runs/val-cls",  # save to project/name
    name="exp",  # save to project/name
    exist_ok=False,  # existing project/name ok, do not increment
    half=False,  # use FP16 half-precision inference
    dnn=False,  # use OpenCV DNN for ONNX inference
    model=None,
    dataloader=None,
    criterion=None,
    pbar=None,
 ):
    """Validates a YOLOv5 classification model on a dataset, computing metrics like top1 and top5 accuracy."""
    # Initialize/load model and set device
    training = model is not None
    if training:  # called by train.py
        device, pt, jit, engine = next(model.parameters()).device, True, False, False  # get model device, PyTorch model
        half &= device.type != "cpu"  # half precision only supported on CUDA
        model.half() if half else model.float()
    else:  # called directly
        device = select_device(device, batch_size=batch_size)
        # Directories
        save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
        save_dir.mkdir(parents=True, exist_ok=True)  # make dir
        # Load model
        model = DetectMultiBackend(weights, device=device, dnn=dnn, fp16=half)
        stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
        imgsz = check_img_size(imgsz, s=stride)  # check image size
        half = model.fp16  # FP16 supported on limited backends with CUDA
        if engine:
            batch_size = model.batch_size
        else:
            device = model.device
            if not (pt or jit):
                batch_size = 1  # export.py models default to batch-size 1
                LOGGER.info(f"Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models")
        # Dataloader
        data = Path(data)
        test_dir = data / "test" if (data / "test").exists() else data / "val"  # data/test or data/val
        dataloader = create_classification_dataloader(
            path=test_dir, imgsz=imgsz, batch_size=batch_size, augment=False, rank=-1, workers=workers
        )
    model.eval()
    pred, targets, loss, dt = [], [], 0, (Profile(device=device), Profile(device=device), Profile(device=device))
    n = len(dataloader)  # number of batches
    action = "validating" if dataloader.dataset.root.stem == "val" else "testing"
    desc = f"{pbar.desc[:-36]}{action:>36}" if pbar else f"{action}"
    bar = tqdm(dataloader, desc, n, not training, bar_format=TQDM_BAR_FORMAT, position=0)
    with torch.cuda.amp.autocast(enabled=device.type != "cpu"):
        for images, labels in bar:
            with dt[0]:
                images, labels = images.to(device, non_blocking=True), labels.to(device)
            with dt[1]:
                y = model(images)
            with dt[2]:
                pred.append(y.argsort(1, descending=True)[:, :5])
                targets.append(labels)
                if criterion:
                    loss += criterion(y, labels)
    loss /= n
    pred, targets = torch.cat(pred), torch.cat(targets)
    correct = (targets[:, None] == pred).float()
    acc = torch.stack((correct[:, 0], correct.max(1).values), dim=1)  # (top1, top5) accuracy
    top1, top5 = acc.mean(0).tolist()
    if pbar:
        pbar.desc = f"{pbar.desc[:-36]}{loss:>12.3g}{top1:>12.3g}{top5:>12.3g}"
    if verbose:  # all classes
        LOGGER.info(f"{'Class':>24}{'Images':>12}{'top1_acc':>12}{'top5_acc':>12}")
        LOGGER.info(f"{'all':>24}{targets.shape[0]:>12}{top1:>12.3g}{top5:>12.3g}")
        for i, c in model.names.items():
            acc_i = acc[targets == i]
            top1i, top5i = acc_i.mean(0).tolist()
            LOGGER.info(f"{c:>24}{acc_i.shape[0]:>12}{top1i:>12.3g}{top5i:>12.3g}")
        # Print results
        t = tuple(x.t / len(dataloader.dataset.samples) * 1e3 for x in dt)  # speeds per image
        shape = (1, 3, imgsz, imgsz)
        LOGGER.info(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms post-process per image at shape {shape}" % t)
        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
    return top1, top5, loss
 def parse_opt():
    """Parses and returns command line arguments for YOLOv5 model evaluation and inference settings."""
    parser = argparse.ArgumentParser()
    parser.add_argument("--data", type=str, default=ROOT / "../datasets/mnist", help="dataset path")
    parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s-cls.pt", help="model.pt path(s)")
    parser.add_argument("--batch-size", type=int, default=128, help="batch size")
    parser.add_argument("--imgsz", "--img", "--img-size", type=int, default=224, help="inference size (pixels)")
    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
    parser.add_argument("--workers", type=int, default=8, help="max dataloader workers (per RANK in DDP mode)")
    parser.add_argument("--verbose", nargs="?", const=True, default=True, help="verbose output")
    parser.add_argument("--project", default=ROOT / "runs/val-cls", help="save to project/name")
    parser.add_argument("--name", default="exp", help="save to project/name")
    parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
    parser.add_argument("--half", action="store_true", help="use FP16 half-precision inference")
    parser.add_argument("--dnn", action="store_true", help="use OpenCV DNN for ONNX inference")
    opt = parser.parse_args()
    print_args(vars(opt))
    return opt
 def main(opt):
    """Executes the YOLOv5 model prediction workflow, handling argument parsing and requirement checks."""
    check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
    run(**vars(opt))
 if __name__ == "__main__":
    opt = parse_opt()
    main(opt)
--- a/yolov5/data/Argoverse.yaml
+++ b/yolov5/data/Argoverse.yaml
@@ -0,0 +1,73 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 # Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/ by Argo AI
 # Example usage: python train.py --data Argoverse.yaml
 # parent
 # ├── yolov5
 # └── datasets
 #     └── Argoverse  ← downloads here (31.3 GB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/Argoverse # dataset root dir
 train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images
 val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images
 test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
 # Classes
 names:
  0: person
  1: bicycle
  2: car
  3: motorcycle
  4: bus
  5: truck
  6: traffic_light
  7: stop_sign
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  import json
  from tqdm import tqdm
  from utils.general import download, Path
  def argoverse2yolo(set):
      labels = {}
      a = json.load(open(set, "rb"))
      for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
          img_id = annot['image_id']
          img_name = a['images'][img_id]['name']
          img_label_name = f'{img_name[:-3]}txt'
          cls = annot['category_id']  # instance class id
          x_center, y_center, width, height = annot['bbox']
          x_center = (x_center + width / 2) / 1920.0  # offset and scale
          y_center = (y_center + height / 2) / 1200.0  # offset and scale
          width /= 1920.0  # scale
          height /= 1200.0  # scale
          img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
          if not img_dir.exists():
              img_dir.mkdir(parents=True, exist_ok=True)
          k = str(img_dir / img_label_name)
          if k not in labels:
              labels[k] = []
          labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")
      for k in labels:
          with open(k, "w") as f:
              f.writelines(labels[k])
  # Download
  dir = Path(yaml['path'])  # dataset root dir
  urls = ['https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip']
  download(urls, dir=dir, delete=False)
  # Convert
  annotations_dir = 'Argoverse-HD/annotations/'
  (dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images')  # rename 'tracking' to 'images'
  for d in "train.json", "val.json":
      argoverse2yolo(dir / annotations_dir / d)  # convert VisDrone annotations to YOLO labels
--- a/yolov5/data/GlobalWheat2020.yaml
+++ b/yolov5/data/GlobalWheat2020.yaml
@@ -0,0 +1,53 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 # Global Wheat 2020 dataset http://www.global-wheat.com/ by University of Saskatchewan
 # Example usage: python train.py --data GlobalWheat2020.yaml
 # parent
 # ├── yolov5
 # └── datasets
 #     └── GlobalWheat2020  ← downloads here (7.0 GB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/GlobalWheat2020 # dataset root dir
 train: # train images (relative to 'path') 3422 images
  - images/arvalis_1
  - images/arvalis_2
  - images/arvalis_3
  - images/ethz_1
  - images/rres_1
  - images/inrae_1
  - images/usask_1
 val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1)
  - images/ethz_1
 test: # test images (optional) 1276 images
  - images/utokyo_1
  - images/utokyo_2
  - images/nau_1
  - images/uq_1
 # Classes
 names:
  0: wheat_head
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  from utils.general import download, Path
  # Download
  dir = Path(yaml['path'])  # dataset root dir
  urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
          'https://github.com/ultralytics/assets/releases/download/v0.0.0/GlobalWheat2020_labels.zip']
  download(urls, dir=dir)
  # Make Directories
  for p in 'annotations', 'images', 'labels':
      (dir / p).mkdir(parents=True, exist_ok=True)
  # Move
  for p in 'arvalis_1', 'arvalis_2', 'arvalis_3', 'ethz_1', 'rres_1', 'inrae_1', 'usask_1', \
           'utokyo_1', 'utokyo_2', 'nau_1', 'uq_1':
      (dir / p).rename(dir / 'images' / p)  # move to /images
      f = (dir / p).with_suffix('.json')  # json file
      if f.exists():
          f.rename((dir / 'annotations' / p).with_suffix('.json'))  # move to /annotations
--- a/yolov5/data/ImageNet.yaml
+++ b/yolov5/data/ImageNet.yaml
--- a/yolov5/data/ImageNet10.yaml
+++ b/yolov5/data/ImageNet10.yaml
@@ -0,0 +1,31 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 # ImageNet-1k dataset https://www.image-net.org/index.php by Stanford University
 # Simplified class names from https://github.com/anishathalye/imagenet-simple-labels
 # Example usage: python classify/train.py --data imagenet
 # parent
 # ├── yolov5
 # └── datasets
 #     └── imagenet10  ← downloads here
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/imagenet10 # dataset root dir
 train: train # train images (relative to 'path') 1281167 images
 val: val # val images (relative to 'path') 50000 images
 test: # test images (optional)
 # Classes
 names:
  0: tench
  1: goldfish
  2: great white shark
  3: tiger shark
  4: hammerhead shark
  5: electric ray
  6: stingray
  7: cock
  8: hen
  9: ostrich
 # Download script/URL (optional)
 download: data/scripts/get_imagenet10.sh
--- a/yolov5/data/ImageNet100.yaml
+++ b/yolov5/data/ImageNet100.yaml
@@ -0,0 +1,120 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 # ImageNet-1k dataset https://www.image-net.org/index.php by Stanford University
 # Simplified class names from https://github.com/anishathalye/imagenet-simple-labels
 # Example usage: python classify/train.py --data imagenet
 # parent
 # ├── yolov5
 # └── datasets
 #     └── imagenet100  ← downloads here
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/imagenet100 # dataset root dir
 train: train # train images (relative to 'path') 1281167 images
 val: val # val images (relative to 'path') 50000 images
 test: # test images (optional)
 # Classes
 names:
  0: tench
  1: goldfish
  2: great white shark
  3: tiger shark
  4: hammerhead shark
  5: electric ray
  6: stingray
  7: cock
  8: hen
  9: ostrich
  10: brambling
  11: goldfinch
  12: house finch
  13: junco
  14: indigo bunting
  15: American robin
  16: bulbul
  17: jay
  18: magpie
  19: chickadee
  20: American dipper
  21: kite
  22: bald eagle
  23: vulture
  24: great grey owl
  25: fire salamander
  26: smooth newt
  27: newt
  28: spotted salamander
  29: axolotl
  30: American bullfrog
  31: tree frog
  32: tailed frog
  33: loggerhead sea turtle
  34: leatherback sea turtle
  35: mud turtle
  36: terrapin
  37: box turtle
  38: banded gecko
  39: green iguana
  40: Carolina anole
  41: desert grassland whiptail lizard
  42: agama
  43: frilled-necked lizard
  44: alligator lizard
  45: Gila monster
  46: European green lizard
  47: chameleon
  48: Komodo dragon
  49: Nile crocodile
  50: American alligator
  51: triceratops
  52: worm snake
  53: ring-necked snake
  54: eastern hog-nosed snake
  55: smooth green snake
  56: kingsnake
  57: garter snake
  58: water snake
  59: vine snake
  60: night snake
  61: boa constrictor
  62: African rock python
  63: Indian cobra
  64: green mamba
  65: sea snake
  66: Saharan horned viper
  67: eastern diamondback rattlesnake
  68: sidewinder
  69: trilobite
  70: harvestman
  71: scorpion
  72: yellow garden spider
  73: barn spider
  74: European garden spider
  75: southern black widow
  76: tarantula
  77: wolf spider
  78: tick
  79: centipede
  80: black grouse
  81: ptarmigan
  82: ruffed grouse
  83: prairie grouse
  84: peacock
  85: quail
  86: partridge
  87: grey parrot
  88: macaw
  89: sulphur-crested cockatoo
  90: lorikeet
  91: coucal
  92: bee eater
  93: hornbill
  94: hummingbird
  95: jacamar
  96: toucan
  97: duck
  98: red-breasted merganser
  99: goose
 # Download script/URL (optional)
 download: data/scripts/get_imagenet100.sh
--- a/yolov5/data/ImageNet1000.yaml
+++ b/yolov5/data/ImageNet1000.yaml
--- a/yolov5/data/Objects365.yaml
+++ b/yolov5/data/Objects365.yaml
@@ -0,0 +1,437 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 # Objects365 dataset https://www.objects365.org/ by Megvii
 # Example usage: python train.py --data Objects365.yaml
 # parent
 # ├── yolov5
 # └── datasets
 #     └── Objects365  ← downloads here (712 GB = 367G data + 345G zips)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/Objects365 # dataset root dir
 train: images/train # train images (relative to 'path') 1742289 images
 val: images/val # val images (relative to 'path') 80000 images
 test: # test images (optional)
 # Classes
 names:
  0: Person
  1: Sneakers
  2: Chair
  3: Other Shoes
  4: Hat
  5: Car
  6: Lamp
  7: Glasses
  8: Bottle
  9: Desk
  10: Cup
  11: Street Lights
  12: Cabinet/shelf
  13: Handbag/Satchel
  14: Bracelet
  15: Plate
  16: Picture/Frame
  17: Helmet
  18: Book
  19: Gloves
  20: Storage box
  21: Boat
  22: Leather Shoes
  23: Flower
  24: Bench
  25: Potted Plant
  26: Bowl/Basin
  27: Flag
  28: Pillow
  29: Boots
  30: Vase
  31: Microphone
  32: Necklace
  33: Ring
  34: SUV
  35: Wine Glass
  36: Belt
  37: Monitor/TV
  38: Backpack
  39: Umbrella
  40: Traffic Light
  41: Speaker
  42: Watch
  43: Tie
  44: Trash bin Can
  45: Slippers
  46: Bicycle
  47: Stool
  48: Barrel/bucket
  49: Van
  50: Couch
  51: Sandals
  52: Basket
  53: Drum
  54: Pen/Pencil
  55: Bus
  56: Wild Bird
  57: High Heels
  58: Motorcycle
  59: Guitar
  60: Carpet
  61: Cell Phone
  62: Bread
  63: Camera
  64: Canned
  65: Truck
  66: Traffic cone
  67: Cymbal
  68: Lifesaver
  69: Towel
  70: Stuffed Toy
  71: Candle
  72: Sailboat
  73: Laptop
  74: Awning
  75: Bed
  76: Faucet
  77: Tent
  78: Horse
  79: Mirror
  80: Power outlet
  81: Sink
  82: Apple
  83: Air Conditioner
  84: Knife
  85: Hockey Stick
  86: Paddle
  87: Pickup Truck
  88: Fork
  89: Traffic Sign
  90: Balloon
  91: Tripod
  92: Dog
  93: Spoon
  94: Clock
  95: Pot
  96: Cow
  97: Cake
  98: Dinning Table
  99: Sheep
  100: Hanger
  101: Blackboard/Whiteboard
  102: Napkin
  103: Other Fish
  104: Orange/Tangerine
  105: Toiletry
  106: Keyboard
  107: Tomato
  108: Lantern
  109: Machinery Vehicle
  110: Fan
  111: Green Vegetables
  112: Banana
  113: Baseball Glove
  114: Airplane
  115: Mouse
  116: Train
  117: Pumpkin
  118: Soccer
  119: Skiboard
  120: Luggage
  121: Nightstand
  122: Tea pot
  123: Telephone
  124: Trolley
  125: Head Phone
  126: Sports Car
  127: Stop Sign
  128: Dessert
  129: Scooter
  130: Stroller
  131: Crane
  132: Remote
  133: Refrigerator
  134: Oven
  135: Lemon
  136: Duck
  137: Baseball Bat
  138: Surveillance Camera
  139: Cat
  140: Jug
  141: Broccoli
  142: Piano
  143: Pizza
  144: Elephant
  145: Skateboard
  146: Surfboard
  147: Gun
  148: Skating and Skiing shoes
  149: Gas stove
  150: Donut
  151: Bow Tie
  152: Carrot
  153: Toilet
  154: Kite
  155: Strawberry
  156: Other Balls
  157: Shovel
  158: Pepper
  159: Computer Box
  160: Toilet Paper
  161: Cleaning Products
  162: Chopsticks
  163: Microwave
  164: Pigeon
  165: Baseball
  166: Cutting/chopping Board
  167: Coffee Table
  168: Side Table
  169: Scissors
  170: Marker
  171: Pie
  172: Ladder
  173: Snowboard
  174: Cookies
  175: Radiator
  176: Fire Hydrant
  177: Basketball
  178: Zebra
  179: Grape
  180: Giraffe
  181: Potato
  182: Sausage
  183: Tricycle
  184: Violin
  185: Egg
  186: Fire Extinguisher
  187: Candy
  188: Fire Truck
  189: Billiards
  190: Converter
  191: Bathtub
  192: Wheelchair
  193: Golf Club
  194: Briefcase
  195: Cucumber
  196: Cigar/Cigarette
  197: Paint Brush
  198: Pear
  199: Heavy Truck
  200: Hamburger
  201: Extractor
  202: Extension Cord
  203: Tong
  204: Tennis Racket
  205: Folder
  206: American Football
  207: earphone
  208: Mask
  209: Kettle
  210: Tennis
  211: Ship
  212: Swing
  213: Coffee Machine
  214: Slide
  215: Carriage
  216: Onion
  217: Green beans
  218: Projector
  219: Frisbee
  220: Washing Machine/Drying Machine
  221: Chicken
  222: Printer
  223: Watermelon
  224: Saxophone
  225: Tissue
  226: Toothbrush
  227: Ice cream
  228: Hot-air balloon
  229: Cello
  230: French Fries
  231: Scale
  232: Trophy
  233: Cabbage
  234: Hot dog
  235: Blender
  236: Peach
  237: Rice
  238: Wallet/Purse
  239: Volleyball
  240: Deer
  241: Goose
  242: Tape
  243: Tablet
  244: Cosmetics
  245: Trumpet
  246: Pineapple
  247: Golf Ball
  248: Ambulance
  249: Parking meter
  250: Mango
  251: Key
  252: Hurdle
  253: Fishing Rod
  254: Medal
  255: Flute
  256: Brush
  257: Penguin
  258: Megaphone
  259: Corn
  260: Lettuce
  261: Garlic
  262: Swan
  263: Helicopter
  264: Green Onion
  265: Sandwich
  266: Nuts
  267: Speed Limit Sign
  268: Induction Cooker
  269: Broom
  270: Trombone
  271: Plum
  272: Rickshaw
  273: Goldfish
  274: Kiwi fruit
  275: Router/modem
  276: Poker Card
  277: Toaster
  278: Shrimp
  279: Sushi
  280: Cheese
  281: Notepaper
  282: Cherry
  283: Pliers
  284: CD
  285: Pasta
  286: Hammer
  287: Cue
  288: Avocado
  289: Hamimelon
  290: Flask
  291: Mushroom
  292: Screwdriver
  293: Soap
  294: Recorder
  295: Bear
  296: Eggplant
  297: Board Eraser
  298: Coconut
  299: Tape Measure/Ruler
  300: Pig
  301: Showerhead
  302: Globe
  303: Chips
  304: Steak
  305: Crosswalk Sign
  306: Stapler
  307: Camel
  308: Formula 1
  309: Pomegranate
  310: Dishwasher
  311: Crab
  312: Hoverboard
  313: Meat ball
  314: Rice Cooker
  315: Tuba
  316: Calculator
  317: Papaya
  318: Antelope
  319: Parrot
  320: Seal
  321: Butterfly
  322: Dumbbell
  323: Donkey
  324: Lion
  325: Urinal
  326: Dolphin
  327: Electric Drill
  328: Hair Dryer
  329: Egg tart
  330: Jellyfish
  331: Treadmill
  332: Lighter
  333: Grapefruit
  334: Game board
  335: Mop
  336: Radish
  337: Baozi
  338: Target
  339: French
  340: Spring Rolls
  341: Monkey
  342: Rabbit
  343: Pencil Case
  344: Yak
  345: Red Cabbage
  346: Binoculars
  347: Asparagus
  348: Barbell
  349: Scallop
  350: Noddles
  351: Comb
  352: Dumpling
  353: Oyster
  354: Table Tennis paddle
  355: Cosmetics Brush/Eyeliner Pencil
  356: Chainsaw
  357: Eraser
  358: Lobster
  359: Durian
  360: Okra
  361: Lipstick
  362: Cosmetics Mirror
  363: Curling
  364: Table Tennis
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  from tqdm import tqdm
  from utils.general import Path, check_requirements, download, np, xyxy2xywhn
  check_requirements('pycocotools>=2.0')
  from pycocotools.coco import COCO
  # Make Directories
  dir = Path(yaml['path'])  # dataset root dir
  for p in 'images', 'labels':
      (dir / p).mkdir(parents=True, exist_ok=True)
      for q in 'train', 'val':
          (dir / p / q).mkdir(parents=True, exist_ok=True)
  # Train, Val Splits
  for split, patches in [('train', 50 + 1), ('val', 43 + 1)]:
      print(f"Processing {split} in {patches} patches ...")
      images, labels = dir / 'images' / split, dir / 'labels' / split
      # Download
      url = f"https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/{split}/"
      if split == 'train':
          download([f'{url}zhiyuan_objv2_{split}.tar.gz'], dir=dir, delete=False)  # annotations json
          download([f'{url}patch{i}.tar.gz' for i in range(patches)], dir=images, curl=True, delete=False, threads=8)
      elif split == 'val':
          download([f'{url}zhiyuan_objv2_{split}.json'], dir=dir, delete=False)  # annotations json
          download([f'{url}images/v1/patch{i}.tar.gz' for i in range(15 + 1)], dir=images, curl=True, delete=False, threads=8)
          download([f'{url}images/v2/patch{i}.tar.gz' for i in range(16, patches)], dir=images, curl=True, delete=False, threads=8)
      # Move
      for f in tqdm(images.rglob('*.jpg'), desc=f'Moving {split} images'):
          f.rename(images / f.name)  # move to /images/{split}
      # Labels
      coco = COCO(dir / f'zhiyuan_objv2_{split}.json')
      names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
      for cid, cat in enumerate(names):
          catIds = coco.getCatIds(catNms=[cat])
          imgIds = coco.getImgIds(catIds=catIds)
          for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
              width, height = im["width"], im["height"]
              path = Path(im["file_name"])  # image filename
              try:
                  with open(labels / path.with_suffix('.txt').name, 'a') as file:
                      annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=False)
                      for a in coco.loadAnns(annIds):
                          x, y, w, h = a['bbox']  # bounding box in xywh (xy top-left corner)
                          xyxy = np.array([x, y, x + w, y + h])[None]  # pixels(1,4)
                          x, y, w, h = xyxy2xywhn(xyxy, w=width, h=height, clip=True)[0]  # normalized and clipped
                          file.write(f"{cid} {x:.5f} {y:.5f} {w:.5f} {h:.5f}\n")
              except Exception as e:
                  print(e)
--- a/yolov5/data/SKU-110K.yaml
+++ b/yolov5/data/SKU-110K.yaml
@@ -0,0 +1,52 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 # SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax Retail
 # Example usage: python train.py --data SKU-110K.yaml
 # parent
 # ├── yolov5
 # └── datasets
 #     └── SKU-110K  ← downloads here (13.6 GB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/SKU-110K # dataset root dir
 train: train.txt # train images (relative to 'path')  8219 images
 val: val.txt # val images (relative to 'path')  588 images
 test: test.txt # test images (optional)  2936 images
 # Classes
 names:
  0: object
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  import shutil
  from tqdm import tqdm
  from utils.general import np, pd, Path, download, xyxy2xywh
  # Download
  dir = Path(yaml['path'])  # dataset root dir
  parent = Path(dir.parent)  # download dir
  urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
  download(urls, dir=parent, delete=False)
  # Rename directories
  if dir.exists():
      shutil.rmtree(dir)
  (parent / 'SKU110K_fixed').rename(dir)  # rename dir
  (dir / 'labels').mkdir(parents=True, exist_ok=True)  # create labels dir
  # Convert labels
  names = 'image', 'x1', 'y1', 'x2', 'y2', 'class', 'image_width', 'image_height'  # column names
  for d in 'annotations_train.csv', 'annotations_val.csv', 'annotations_test.csv':
      x = pd.read_csv(dir / 'annotations' / d, names=names).values  # annotations
      images, unique_images = x[:, 0], np.unique(x[:, 0])
      with open((dir / d).with_suffix('.txt').__str__().replace('annotations_', ''), 'w') as f:
          f.writelines(f'./images/{s}\n' for s in unique_images)
      for im in tqdm(unique_images, desc=f'Converting {dir / d}'):
          cls = 0  # single-class dataset
          with open((dir / 'labels' / im).with_suffix('.txt'), 'a') as f:
              for r in x[images == im]:
                  w, h = r[6], r[7]  # image width, height
                  xywh = xyxy2xywh(np.array([[r[1] / w, r[2] / h, r[3] / w, r[4] / h]]))[0]  # instance
                  f.write(f"{cls} {xywh[0]:.5f} {xywh[1]:.5f} {xywh[2]:.5f} {xywh[3]:.5f}\n")  # write label
--- a/yolov5/data/VOC.yaml
+++ b/yolov5/data/VOC.yaml
@@ -0,0 +1,99 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
 # Example usage: python train.py --data VOC.yaml
 # parent
 # ├── yolov5
 # └── datasets
 #     └── VOC  ← downloads here (2.8 GB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/VOC
 train: # train images (relative to 'path')  16551 images
  - images/train2012
  - images/train2007
  - images/val2012
  - images/val2007
 val: # val images (relative to 'path')  4952 images
  - images/test2007
 test: # test images (optional)
  - images/test2007
 # Classes
 names:
  0: aeroplane
  1: bicycle
  2: bird
  3: boat
  4: bottle
  5: bus
  6: car
  7: cat
  8: chair
  9: cow
  10: diningtable
  11: dog
  12: horse
  13: motorbike
  14: person
  15: pottedplant
  16: sheep
  17: sofa
  18: train
  19: tvmonitor
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  import xml.etree.ElementTree as ET
  from tqdm import tqdm
  from utils.general import download, Path
  def convert_label(path, lb_path, year, image_id):
      def convert_box(size, box):
          dw, dh = 1. / size[0], 1. / size[1]
          x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
          return x * dw, y * dh, w * dw, h * dh
      in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
      out_file = open(lb_path, 'w')
      tree = ET.parse(in_file)
      root = tree.getroot()
      size = root.find('size')
      w = int(size.find('width').text)
      h = int(size.find('height').text)
      names = list(yaml['names'].values())  # names list
      for obj in root.iter('object'):
          cls = obj.find('name').text
          if cls in names and int(obj.find('difficult').text) != 1:
              xmlbox = obj.find('bndbox')
              bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
              cls_id = names.index(cls)  # class id
              out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
  # Download
  dir = Path(yaml['path'])  # dataset root dir
  url = 'https://github.com/ultralytics/assets/releases/download/v0.0.0/'
  urls = [f'{url}VOCtrainval_06-Nov-2007.zip',  # 446MB, 5012 images
          f'{url}VOCtest_06-Nov-2007.zip',  # 438MB, 4953 images
          f'{url}VOCtrainval_11-May-2012.zip']  # 1.95GB, 17126 images
  download(urls, dir=dir / 'images', delete=False, curl=True, threads=3)
  # Convert
  path = dir / 'images/VOCdevkit'
  for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
      imgs_path = dir / 'images' / f'{image_set}{year}'
      lbs_path = dir / 'labels' / f'{image_set}{year}'
      imgs_path.mkdir(exist_ok=True, parents=True)
      lbs_path.mkdir(exist_ok=True, parents=True)
      with open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt') as f:
          image_ids = f.read().strip().split()
      for id in tqdm(image_ids, desc=f'{image_set}{year}'):
          f = path / f'VOC{year}/JPEGImages/{id}.jpg'  # old img path
          lb_path = (lbs_path / f.name).with_suffix('.txt')  # new label path
          f.rename(imgs_path / f.name)  # move image
          convert_label(path, lb_path, year, id)  # convert labels to YOLO format
--- a/yolov5/data/VisDrone.yaml
+++ b/yolov5/data/VisDrone.yaml
@@ -0,0 +1,69 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 # VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Tianjin University
 # Example usage: python train.py --data VisDrone.yaml
 # parent
 # ├── yolov5
 # └── datasets
 #     └── VisDrone  ← downloads here (2.3 GB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/VisDrone # dataset root dir
 train: VisDrone2019-DET-train/images # train images (relative to 'path')  6471 images
 val: VisDrone2019-DET-val/images # val images (relative to 'path')  548 images
 test: VisDrone2019-DET-test-dev/images # test images (optional)  1610 images
 # Classes
 names:
  0: pedestrian
  1: people
  2: bicycle
  3: car
  4: van
  5: truck
  6: tricycle
  7: awning-tricycle
  8: bus
  9: motor
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  from utils.general import download, os, Path
  def visdrone2yolo(dir):
      from PIL import Image
      from tqdm import tqdm
      def convert_box(size, box):
          # Convert VisDrone box to YOLO xywh box
          dw = 1. / size[0]
          dh = 1. / size[1]
          return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh
      (dir / 'labels').mkdir(parents=True, exist_ok=True)  # make labels directory
      pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}')
      for f in pbar:
          img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size
          lines = []
          with open(f, 'r') as file:  # read annotation.txt
              for row in [x.split(',') for x in file.read().strip().splitlines()]:
                  if row[4] == '0':  # VisDrone 'ignored regions' class 0
                      continue
                  cls = int(row[5]) - 1
                  box = convert_box(img_size, tuple(map(int, row[:4])))
                  lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n")
                  with open(str(f).replace(os.sep + 'annotations' + os.sep, os.sep + 'labels' + os.sep), 'w') as fl:
                      fl.writelines(lines)  # write label.txt
  # Download
  dir = Path(yaml['path'])  # dataset root dir
  urls = ['https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-train.zip',
          'https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-val.zip',
          'https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-test-dev.zip',
          'https://github.com/ultralytics/assets/releases/download/v0.0.0/VisDrone2019-DET-test-challenge.zip']
  download(urls, dir=dir, curl=True, threads=4)
  # Convert
  for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
      visdrone2yolo(dir / d)  # convert VisDrone annotations to YOLO labels
--- a/yolov5/data/coco.yaml
+++ b/yolov5/data/coco.yaml
@@ -0,0 +1,115 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 # COCO 2017 dataset http://cocodataset.org by Microsoft
 # Example usage: python train.py --data coco.yaml
 # parent
 # ├── yolov5
 # └── datasets
 #     └── coco  ← downloads here (20.1 GB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/coco # dataset root dir
 train: train2017.txt # train images (relative to 'path') 118287 images
 val: val2017.txt # val images (relative to 'path') 5000 images
 test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
 # Classes
 names:
  0: person
  1: bicycle
  2: car
  3: motorcycle
  4: airplane
  5: bus
  6: train
  7: truck
  8: boat
  9: traffic light
  10: fire hydrant
  11: stop sign
  12: parking meter
  13: bench
  14: bird
  15: cat
  16: dog
  17: horse
  18: sheep
  19: cow
  20: elephant
  21: bear
  22: zebra
  23: giraffe
  24: backpack
  25: umbrella
  26: handbag
  27: tie
  28: suitcase
  29: frisbee
  30: skis
  31: snowboard
  32: sports ball
  33: kite
  34: baseball bat
  35: baseball glove
  36: skateboard
  37: surfboard
  38: tennis racket
  39: bottle
  40: wine glass
  41: cup
  42: fork
  43: knife
  44: spoon
  45: bowl
  46: banana
  47: apple
  48: sandwich
  49: orange
  50: broccoli
  51: carrot
  52: hot dog
  53: pizza
  54: donut
  55: cake
  56: chair
  57: couch
  58: potted plant
  59: bed
  60: dining table
  61: toilet
  62: tv
  63: laptop
  64: mouse
  65: remote
  66: keyboard
  67: cell phone
  68: microwave
  69: oven
  70: toaster
  71: sink
  72: refrigerator
  73: book
  74: clock
  75: vase
  76: scissors
  77: teddy bear
  78: hair drier
  79: toothbrush
 # Download script/URL (optional)
 download: |
  from utils.general import download, Path
  # Download labels
  segments = False  # segment or box labels
  dir = Path(yaml['path'])  # dataset root dir
  url = 'https://github.com/ultralytics/assets/releases/download/v0.0.0/'
  urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')]  # labels
  download(urls, dir=dir.parent)
  # Download data
  urls = ['http://images.cocodataset.org/zips/train2017.zip',  # 19G, 118k images
          'http://images.cocodataset.org/zips/val2017.zip',  # 1G, 5k images
          'http://images.cocodataset.org/zips/test2017.zip']  # 7G, 41k images (optional)
  download(urls, dir=dir / 'images', threads=3)
--- a/yolov5/data/coco128-seg.yaml
+++ b/yolov5/data/coco128-seg.yaml
@@ -0,0 +1,100 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 # COCO128-seg dataset https://www.kaggle.com/datasets/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
 # Example usage: python train.py --data coco128.yaml
 # parent
 # ├── yolov5
 # └── datasets
 #     └── coco128-seg  ← downloads here (7 MB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/coco128-seg # dataset root dir
 train: images/train2017 # train images (relative to 'path') 128 images
 val: images/train2017 # val images (relative to 'path') 128 images
 test: # test images (optional)
 # Classes
 names:
  0: person
  1: bicycle
  2: car
  3: motorcycle
  4: airplane
  5: bus
  6: train
  7: truck
  8: boat
  9: traffic light
  10: fire hydrant
  11: stop sign
  12: parking meter
  13: bench
  14: bird
  15: cat
  16: dog
  17: horse
  18: sheep
  19: cow
  20: elephant
  21: bear
  22: zebra
  23: giraffe
  24: backpack
  25: umbrella
  26: handbag
  27: tie
  28: suitcase
  29: frisbee
  30: skis
  31: snowboard
  32: sports ball
  33: kite
  34: baseball bat
  35: baseball glove
  36: skateboard
  37: surfboard
  38: tennis racket
  39: bottle
  40: wine glass
  41: cup
  42: fork
  43: knife
  44: spoon
  45: bowl
  46: banana
  47: apple
  48: sandwich
  49: orange
  50: broccoli
  51: carrot
  52: hot dog
  53: pizza
  54: donut
  55: cake
  56: chair
  57: couch
  58: potted plant
  59: bed
  60: dining table
  61: toilet
  62: tv
  63: laptop
  64: mouse
  65: remote
  66: keyboard
  67: cell phone
  68: microwave
  69: oven
  70: toaster
  71: sink
  72: refrigerator
  73: book
  74: clock
  75: vase
  76: scissors
  77: teddy bear
  78: hair drier
  79: toothbrush
 # Download script/URL (optional)
 download: https://github.com/ultralytics/assets/releases/download/v0.0.0/coco128-seg.zip
--- a/yolov5/data/coco128.yaml
+++ b/yolov5/data/coco128.yaml
@@ -0,0 +1,100 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 # COCO128 dataset https://www.kaggle.com/datasets/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
 # Example usage: python train.py --data coco128.yaml
 # parent
 # ├── yolov5
 # └── datasets
 #     └── coco128  ← downloads here (7 MB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/coco128 # dataset root dir
 train: images/train2017 # train images (relative to 'path') 128 images
 val: images/train2017 # val images (relative to 'path') 128 images
 test: # test images (optional)
 # Classes
 names:
  0: person
  1: bicycle
  2: car
  3: motorcycle
  4: airplane
  5: bus
  6: train
  7: truck
  8: boat
  9: traffic light
  10: fire hydrant
  11: stop sign
  12: parking meter
  13: bench
  14: bird
  15: cat
  16: dog
  17: horse
  18: sheep
  19: cow
  20: elephant
  21: bear
  22: zebra
  23: giraffe
  24: backpack
  25: umbrella
  26: handbag
  27: tie
  28: suitcase
  29: frisbee
  30: skis
  31: snowboard
  32: sports ball
  33: kite
  34: baseball bat
  35: baseball glove
  36: skateboard
  37: surfboard
  38: tennis racket
  39: bottle
  40: wine glass
  41: cup
  42: fork
  43: knife
  44: spoon
  45: bowl
  46: banana
  47: apple
  48: sandwich
  49: orange
  50: broccoli
  51: carrot
  52: hot dog
  53: pizza
  54: donut
  55: cake
  56: chair
  57: couch
  58: potted plant
  59: bed
  60: dining table
  61: toilet
  62: tv
  63: laptop
  64: mouse
  65: remote
  66: keyboard
  67: cell phone
  68: microwave
  69: oven
  70: toaster
  71: sink
  72: refrigerator
  73: book
  74: clock
  75: vase
  76: scissors
  77: teddy bear
  78: hair drier
  79: toothbrush
 # Download script/URL (optional)
 download: https://github.com/ultralytics/assets/releases/download/v0.0.0/coco128.zip
--- a/yolov5/data/hyps/hyp.Objects365.yaml
+++ b/yolov5/data/hyps/hyp.Objects365.yaml
@@ -0,0 +1,35 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 # Hyperparameters for Objects365 training
 # python train.py --weights yolov5m.pt --data Objects365.yaml --evolve
 # See Hyperparameter Evolution tutorial for details https://github.com/ultralytics/yolov5#tutorials
 lr0: 0.00258
 lrf: 0.17
 momentum: 0.779
 weight_decay: 0.00058
 warmup_epochs: 1.33
 warmup_momentum: 0.86
 warmup_bias_lr: 0.0711
 box: 0.0539
 cls: 0.299
 cls_pw: 0.825
 obj: 0.632
 obj_pw: 1.0
 iou_t: 0.2
 anchor_t: 3.44
 anchors: 3.2
 fl_gamma: 0.0
 hsv_h: 0.0188
 hsv_s: 0.704
 hsv_v: 0.36
 degrees: 0.0
 translate: 0.0902
 scale: 0.491
 shear: 0.0
 perspective: 0.0
 flipud: 0.0
 fliplr: 0.5
 mosaic: 1.0
 mixup: 0.0
 copy_paste: 0.0
--- a/yolov5/data/hyps/hyp.VOC.yaml
+++ b/yolov5/data/hyps/hyp.VOC.yaml
@@ -0,0 +1,41 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 # Hyperparameters for VOC training
 # python train.py --batch 128 --weights yolov5m6.pt --data VOC.yaml --epochs 50 --img 512 --hyp hyp.scratch-med.yaml --evolve
 # See Hyperparameter Evolution tutorial for details https://github.com/ultralytics/yolov5#tutorials
 # YOLOv5 Hyperparameter Evolution Results
 # Best generation: 467
 # Last generation: 996
 #    metrics/precision,       metrics/recall,      metrics/mAP_0.5, metrics/mAP_0.5:0.95,         val/box_loss,         val/obj_loss,         val/cls_loss
 #              0.87729,              0.85125,              0.91286,              0.72664,            0.0076739,            0.0042529,            0.0013865
 lr0: 0.00334
 lrf: 0.15135
 momentum: 0.74832
 weight_decay: 0.00025
 warmup_epochs: 3.3835
 warmup_momentum: 0.59462
 warmup_bias_lr: 0.18657
 box: 0.02
 cls: 0.21638
 cls_pw: 0.5
 obj: 0.51728
 obj_pw: 0.67198
 iou_t: 0.2
 anchor_t: 3.3744
 fl_gamma: 0.0
 hsv_h: 0.01041
 hsv_s: 0.54703
 hsv_v: 0.27739
 degrees: 0.0
 translate: 0.04591
 scale: 0.75544
 shear: 0.0
 perspective: 0.0
 flipud: 0.0
 fliplr: 0.5
 mosaic: 0.85834
 mixup: 0.04266
 copy_paste: 0.0
 anchors: 3.412
--- a/yolov5/data/hyps/hyp.no-augmentation.yaml
+++ b/yolov5/data/hyps/hyp.no-augmentation.yaml
@@ -0,0 +1,36 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 # Hyperparameters when using Albumentations frameworks
 # python train.py --hyp hyp.no-augmentation.yaml
 # See https://github.com/ultralytics/yolov5/pull/3882 for YOLOv5 + Albumentations Usage examples
 lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
 lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf)
 momentum: 0.937 # SGD momentum/Adam beta1
 weight_decay: 0.0005 # optimizer weight decay 5e-4
 warmup_epochs: 3.0 # warmup epochs (fractions ok)
 warmup_momentum: 0.8 # warmup initial momentum
 warmup_bias_lr: 0.1 # warmup initial bias lr
 box: 0.05 # box loss gain
 cls: 0.3 # cls loss gain
 cls_pw: 1.0 # cls BCELoss positive_weight
 obj: 0.7 # obj loss gain (scale with pixels)
 obj_pw: 1.0 # obj BCELoss positive_weight
 iou_t: 0.20 # IoU training threshold
 anchor_t: 4.0 # anchor-multiple threshold
 # anchors: 3  # anchors per output layer (0 to ignore)
 # this parameters are all zero since we want to use albumentation framework
 fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
 hsv_h: 0 # image HSV-Hue augmentation (fraction)
 hsv_s: 0 # image HSV-Saturation augmentation (fraction)
 hsv_v: 0 # image HSV-Value augmentation (fraction)
 degrees: 0.0 # image rotation (+/- deg)
 translate: 0 # image translation (+/- fraction)
 scale: 0 # image scale (+/- gain)
 shear: 0 # image shear (+/- deg)
 perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
 flipud: 0.0 # image flip up-down (probability)
 fliplr: 0.0 # image flip left-right (probability)
 mosaic: 0.0 # image mosaic (probability)
 mixup: 0.0 # image mixup (probability)
 copy_paste: 0.0 # segment copy-paste (probability)
--- a/yolov5/data/hyps/hyp.scratch-high.yaml
+++ b/yolov5/data/hyps/hyp.scratch-high.yaml
@@ -0,0 +1,35 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 # Hyperparameters for high-augmentation COCO training from scratch
 # python train.py --batch 32 --cfg yolov5m6.yaml --weights '' --data coco.yaml --img 1280 --epochs 300
 # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
 lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
 lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf)
 momentum: 0.937 # SGD momentum/Adam beta1
 weight_decay: 0.0005 # optimizer weight decay 5e-4
 warmup_epochs: 3.0 # warmup epochs (fractions ok)
 warmup_momentum: 0.8 # warmup initial momentum
 warmup_bias_lr: 0.1 # warmup initial bias lr
 box: 0.05 # box loss gain
 cls: 0.3 # cls loss gain
 cls_pw: 1.0 # cls BCELoss positive_weight
 obj: 0.7 # obj loss gain (scale with pixels)
 obj_pw: 1.0 # obj BCELoss positive_weight
 iou_t: 0.20 # IoU training threshold
 anchor_t: 4.0 # anchor-multiple threshold
 # anchors: 3  # anchors per output layer (0 to ignore)
 fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
 hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
 hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
 hsv_v: 0.4 # image HSV-Value augmentation (fraction)
 degrees: 0.0 # image rotation (+/- deg)
 translate: 0.1 # image translation (+/- fraction)
 scale: 0.9 # image scale (+/- gain)
 shear: 0.0 # image shear (+/- deg)
 perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
 flipud: 0.0 # image flip up-down (probability)
 fliplr: 0.5 # image flip left-right (probability)
 mosaic: 1.0 # image mosaic (probability)
 mixup: 0.1 # image mixup (probability)
 copy_paste: 0.1 # segment copy-paste (probability)
--- a/yolov5/data/hyps/hyp.scratch-low.yaml
+++ b/yolov5/data/hyps/hyp.scratch-low.yaml
@@ -0,0 +1,35 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 # Hyperparameters for low-augmentation COCO training from scratch
 # python train.py --batch 64 --cfg yolov5n6.yaml --weights '' --data coco.yaml --img 640 --epochs 300 --linear
 # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
 lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
 lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf)
 momentum: 0.937 # SGD momentum/Adam beta1
 weight_decay: 0.0005 # optimizer weight decay 5e-4
 warmup_epochs: 3.0 # warmup epochs (fractions ok)
 warmup_momentum: 0.8 # warmup initial momentum
 warmup_bias_lr: 0.1 # warmup initial bias lr
 box: 0.05 # box loss gain
 cls: 0.5 # cls loss gain
 cls_pw: 1.0 # cls BCELoss positive_weight
 obj: 1.0 # obj loss gain (scale with pixels)
 obj_pw: 1.0 # obj BCELoss positive_weight
 iou_t: 0.20 # IoU training threshold
 anchor_t: 4.0 # anchor-multiple threshold
 # anchors: 3  # anchors per output layer (0 to ignore)
 fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
 hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
 hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
 hsv_v: 0.4 # image HSV-Value augmentation (fraction)
 degrees: 0.0 # image rotation (+/- deg)
 translate: 0.1 # image translation (+/- fraction)
 scale: 0.5 # image scale (+/- gain)
 shear: 0.0 # image shear (+/- deg)
 perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
 flipud: 0.0 # image flip up-down (probability)
 fliplr: 0.5 # image flip left-right (probability)
 mosaic: 1.0 # image mosaic (probability)
 mixup: 0.0 # image mixup (probability)
 copy_paste: 0.0 # segment copy-paste (probability)
--- a/yolov5/data/hyps/hyp.scratch-med.yaml
+++ b/yolov5/data/hyps/hyp.scratch-med.yaml
@@ -0,0 +1,35 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 # Hyperparameters for medium-augmentation COCO training from scratch
 # python train.py --batch 32 --cfg yolov5m6.yaml --weights '' --data coco.yaml --img 1280 --epochs 300
 # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
 lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
 lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf)
 momentum: 0.937 # SGD momentum/Adam beta1
 weight_decay: 0.0005 # optimizer weight decay 5e-4
 warmup_epochs: 3.0 # warmup epochs (fractions ok)
 warmup_momentum: 0.8 # warmup initial momentum
 warmup_bias_lr: 0.1 # warmup initial bias lr
 box: 0.05 # box loss gain
 cls: 0.3 # cls loss gain
 cls_pw: 1.0 # cls BCELoss positive_weight
 obj: 0.7 # obj loss gain (scale with pixels)
 obj_pw: 1.0 # obj BCELoss positive_weight
 iou_t: 0.20 # IoU training threshold
 anchor_t: 4.0 # anchor-multiple threshold
 # anchors: 3  # anchors per output layer (0 to ignore)
 fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
 hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
 hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
 hsv_v: 0.4 # image HSV-Value augmentation (fraction)
 degrees: 0.0 # image rotation (+/- deg)
 translate: 0.1 # image translation (+/- fraction)
 scale: 0.9 # image scale (+/- gain)
 shear: 0.0 # image shear (+/- deg)
 perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
 flipud: 0.0 # image flip up-down (probability)
 fliplr: 0.5 # image flip left-right (probability)
 mosaic: 1.0 # image mosaic (probability)
 mixup: 0.1 # image mixup (probability)
 copy_paste: 0.0 # segment copy-paste (probability)
--- a/yolov5/data/images/bus.jpg
+++ b/yolov5/data/images/bus.jpg
--- a/yolov5/data/images/zidane.jpg
+++ b/yolov5/data/images/zidane.jpg
--- a/yolov5/data/scripts/download_weights.sh
+++ b/yolov5/data/scripts/download_weights.sh
@@ -0,0 +1,22 @@
 #!/bin/bash
 # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
 # Download latest models from https://github.com/ultralytics/yolov5/releases
 # Example usage: bash data/scripts/download_weights.sh
 # parent
 # └── yolov5
 #     ├── yolov5s.pt  ← downloads here
 #     ├── yolov5m.pt
 #     └── ...
 python - <<EOF
 from utils.downloads import attempt_download
 p5 = list('nsmlx')  # P5 models
 p6 = [f'{x}6' for x in p5]  # P6 models
 cls = [f'{x}-cls' for x in p5]  # classification models
 seg = [f'{x}-seg' for x in p5]  # classification models
 for x in p5 + p6 + cls + seg:
    attempt_download(f'weights/yolov5{x}.pt')
 EOF
--- a/yolov5/data/scripts/get_coco.sh
+++ b/yolov5/data/scripts/get_coco.sh
@@ -0,0 +1,56 @@
 #!/bin/bash
 # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
 # Download COCO 2017 dataset http://cocodataset.org
 # Example usage: bash data/scripts/get_coco.sh
 # parent
 # ├── yolov5
 # └── datasets
 #     └── coco  ← downloads here
 # Arguments (optional) Usage: bash data/scripts/get_coco.sh --train --val --test --segments
 if [ "$#" -gt 0 ]; then
  for opt in "$@"; do
    case "${opt}" in
    --train) train=true ;;
    --val) val=true ;;
    --test) test=true ;;
    --segments) segments=true ;;
    esac
  done
 else
  train=true
  val=true
  test=false
  segments=false
 fi
 # Download/unzip labels
 d='../datasets' # unzip directory
 url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
 if [ "$segments" == "true" ]; then
  f='coco2017labels-segments.zip' # 168 MB
 else
  f='coco2017labels.zip' # 46 MB
 fi
 echo 'Downloading' $url$f ' ...'
 curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
 # Download/unzip images
 d='../datasets/coco/images' # unzip directory
 url=http://images.cocodataset.org/zips/
 if [ "$train" == "true" ]; then
  f='train2017.zip' # 19G, 118k images
  echo 'Downloading' $url$f '...'
  curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
 fi
 if [ "$val" == "true" ]; then
  f='val2017.zip' # 1G, 5k images
  echo 'Downloading' $url$f '...'
  curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
 fi
 if [ "$test" == "true" ]; then
  f='test2017.zip' # 7G, 41k images (optional)
  echo 'Downloading' $url$f '...'
  curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
 fi
 wait # finish background tasks
--- a/yolov5/data/scripts/get_coco128.sh
+++ b/yolov5/data/scripts/get_coco128.sh
@@ -0,0 +1,17 @@
 #!/bin/bash
 # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
 # Download COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017)
 # Example usage: bash data/scripts/get_coco128.sh
 # parent
 # ├── yolov5
 # └── datasets
 #     └── coco128  ← downloads here
 # Download/unzip images and labels
 d='../datasets' # unzip directory
 url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
 f='coco128.zip' # or 'coco128-segments.zip', 68 MB
 echo 'Downloading' $url$f ' ...'
 curl -L $url$f -o $f -# && unzip -q $f -d $d && rm $f &
 wait # finish background tasks
--- a/yolov5/data/scripts/get_imagenet.sh
+++ b/yolov5/data/scripts/get_imagenet.sh
@@ -0,0 +1,51 @@
 #!/bin/bash
 # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
 # Download ILSVRC2012 ImageNet dataset https://image-net.org
 # Example usage: bash data/scripts/get_imagenet.sh
 # parent
 # ├── yolov5
 # └── datasets
 #     └── imagenet  ← downloads here
 # Arguments (optional) Usage: bash data/scripts/get_imagenet.sh --train --val
 if [ "$#" -gt 0 ]; then
  for opt in "$@"; do
    case "${opt}" in
    --train) train=true ;;
    --val) val=true ;;
    esac
  done
 else
  train=true
  val=true
 fi
 # Make dir
 d='../datasets/imagenet' # unzip directory
 mkdir -p $d && cd $d
 # Download/unzip train
 if [ "$train" == "true" ]; then
  wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_train.tar # download 138G, 1281167 images
  mkdir train && mv ILSVRC2012_img_train.tar train/ && cd train
  tar -xf ILSVRC2012_img_train.tar && rm -f ILSVRC2012_img_train.tar
  find . -name "*.tar" | while read NAME; do
    mkdir -p "${NAME%.tar}"
    tar -xf "${NAME}" -C "${NAME%.tar}"
    rm -f "${NAME}"
  done
  cd ..
 fi
 # Download/unzip val
 if [ "$val" == "true" ]; then
  wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar # download 6.3G, 50000 images
  mkdir val && mv ILSVRC2012_img_val.tar val/ && cd val && tar -xf ILSVRC2012_img_val.tar
  wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash # move into subdirs
 fi
 # Delete corrupted image (optional: PNG under JPEG name that may cause dataloaders to fail)
 # rm train/n04266014/n04266014_10835.JPEG
 # TFRecords (optional)
 # wget https://raw.githubusercontent.com/tensorflow/models/master/research/slim/datasets/imagenet_lsvrc_2015_synsets.txt
--- a/yolov5/data/scripts/get_imagenet10.sh
+++ b/yolov5/data/scripts/get_imagenet10.sh
@@ -0,0 +1,29 @@
 #!/bin/bash
 # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
 # Download ILSVRC2012 ImageNet dataset https://image-net.org
 # Example usage: bash data/scripts/get_imagenet.sh
 # parent
 # ├── yolov5
 # └── datasets
 #     └── imagenet  ← downloads here
 # Arguments (optional) Usage: bash data/scripts/get_imagenet.sh --train --val
 if [ "$#" -gt 0 ]; then
  for opt in "$@"; do
    case "${opt}" in
    --train) train=true ;;
    --val) val=true ;;
    esac
  done
 else
  train=true
  val=true
 fi
 # Make dir
 d='../datasets/imagenet10' # unzip directory
 mkdir -p $d && cd $d
 # Download/unzip train
 wget https://github.com/ultralytics/yolov5/releases/download/v1.0/imagenet10.zip
 unzip imagenet10.zip && rm imagenet10.zip
--- a/yolov5/data/scripts/get_imagenet100.sh
+++ b/yolov5/data/scripts/get_imagenet100.sh
@@ -0,0 +1,29 @@
 #!/bin/bash
 # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
 # Download ILSVRC2012 ImageNet dataset https://image-net.org
 # Example usage: bash data/scripts/get_imagenet.sh
 # parent
 # ├── yolov5
 # └── datasets
 #     └── imagenet  ← downloads here
 # Arguments (optional) Usage: bash data/scripts/get_imagenet.sh --train --val
 if [ "$#" -gt 0 ]; then
  for opt in "$@"; do
    case "${opt}" in
    --train) train=true ;;
    --val) val=true ;;
    esac
  done
 else
  train=true
  val=true
 fi
 # Make dir
 d='../datasets/imagenet100' # unzip directory
 mkdir -p $d && cd $d
 # Download/unzip train
 wget https://github.com/ultralytics/yolov5/releases/download/v1.0/imagenet100.zip
 unzip imagenet100.zip && rm imagenet100.zip
--- a/yolov5/data/scripts/get_imagenet1000.sh
+++ b/yolov5/data/scripts/get_imagenet1000.sh
@@ -0,0 +1,29 @@
 #!/bin/bash
 # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
 # Download ILSVRC2012 ImageNet dataset https://image-net.org
 # Example usage: bash data/scripts/get_imagenet.sh
 # parent
 # ├── yolov5
 # └── datasets
 #     └── imagenet  ← downloads here
 # Arguments (optional) Usage: bash data/scripts/get_imagenet.sh --train --val
 if [ "$#" -gt 0 ]; then
  for opt in "$@"; do
    case "${opt}" in
    --train) train=true ;;
    --val) val=true ;;
    esac
  done
 else
  train=true
  val=true
 fi
 # Make dir
 d='../datasets/imagenet1000' # unzip directory
 mkdir -p $d && cd $d
 # Download/unzip train
 wget https://github.com/ultralytics/yolov5/releases/download/v1.0/imagenet1000.zip
 unzip imagenet1000.zip && rm imagenet1000.zip
--- a/yolov5/data/xView.yaml
+++ b/yolov5/data/xView.yaml
@@ -0,0 +1,152 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 # DIUx xView 2018 Challenge https://challenge.xviewdataset.org by U.S. National Geospatial-Intelligence Agency (NGA)
 # --------  DOWNLOAD DATA MANUALLY and jar xf val_images.zip to 'datasets/xView' before running train command!  --------
 # Example usage: python train.py --data xView.yaml
 # parent
 # ├── yolov5
 # └── datasets
 #     └── xView  ← downloads here (20.7 GB)
 # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
 path: ../datasets/xView # dataset root dir
 train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images
 val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images
 # Classes
 names:
  0: Fixed-wing Aircraft
  1: Small Aircraft
  2: Cargo Plane
  3: Helicopter
  4: Passenger Vehicle
  5: Small Car
  6: Bus
  7: Pickup Truck
  8: Utility Truck
  9: Truck
  10: Cargo Truck
  11: Truck w/Box
  12: Truck Tractor
  13: Trailer
  14: Truck w/Flatbed
  15: Truck w/Liquid
  16: Crane Truck
  17: Railway Vehicle
  18: Passenger Car
  19: Cargo Car
  20: Flat Car
  21: Tank car
  22: Locomotive
  23: Maritime Vessel
  24: Motorboat
  25: Sailboat
  26: Tugboat
  27: Barge
  28: Fishing Vessel
  29: Ferry
  30: Yacht
  31: Container Ship
  32: Oil Tanker
  33: Engineering Vehicle
  34: Tower crane
  35: Container Crane
  36: Reach Stacker
  37: Straddle Carrier
  38: Mobile Crane
  39: Dump Truck
  40: Haul Truck
  41: Scraper/Tractor
  42: Front loader/Bulldozer
  43: Excavator
  44: Cement Mixer
  45: Ground Grader
  46: Hut/Tent
  47: Shed
  48: Building
  49: Aircraft Hangar
  50: Damaged Building
  51: Facility
  52: Construction Site
  53: Vehicle Lot
  54: Helipad
  55: Storage Tank
  56: Shipping container lot
  57: Shipping Container
  58: Pylon
  59: Tower
 # Download script/URL (optional) ---------------------------------------------------------------------------------------
 download: |
  import json
  import os
  from pathlib import Path
  import numpy as np
  from PIL import Image
  from tqdm import tqdm
  from utils.dataloaders import autosplit
  from utils.general import download, xyxy2xywhn
  def convert_labels(fname=Path('xView/xView_train.geojson')):
      # Convert xView geoJSON labels to YOLO format
      path = fname.parent
      with open(fname) as f:
          print(f'Loading {fname}...')
          data = json.load(f)
      # Make dirs
      labels = Path(path / 'labels' / 'train')
      os.system(f'rm -rf {labels}')
      labels.mkdir(parents=True, exist_ok=True)
      # xView classes 11-94 to 0-59
      xview_class2index = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, -1, 9, 10, 11,
                           12, 13, 14, 15, -1, -1, 16, 17, 18, 19, 20, 21, 22, -1, 23, 24, 25, -1, 26, 27, -1, 28, -1,
                           29, 30, 31, 32, 33, 34, 35, 36, 37, -1, 38, 39, 40, 41, 42, 43, 44, 45, -1, -1, -1, -1, 46,
                           47, 48, 49, -1, 50, 51, -1, 52, -1, -1, -1, 53, 54, -1, 55, -1, -1, 56, -1, 57, -1, 58, 59]
      shapes = {}
      for feature in tqdm(data['features'], desc=f'Converting {fname}'):
          p = feature['properties']
          if p['bounds_imcoords']:
              id = p['image_id']
              file = path / 'train_images' / id
              if file.exists():  # 1395.tif missing
                  try:
                      box = np.array([int(num) for num in p['bounds_imcoords'].split(",")])
                      assert box.shape[0] == 4, f'incorrect box shape {box.shape[0]}'
                      cls = p['type_id']
                      cls = xview_class2index[int(cls)]  # xView class to 0-60
                      assert 59 >= cls >= 0, f'incorrect class index {cls}'
                      # Write YOLO label
                      if id not in shapes:
                          shapes[id] = Image.open(file).size
                      box = xyxy2xywhn(box[None].astype(np.float), w=shapes[id][0], h=shapes[id][1], clip=True)
                      with open((labels / id).with_suffix('.txt'), 'a') as f:
                          f.write(f"{cls} {' '.join(f'{x:.6f}' for x in box[0])}\n")  # write label.txt
                  except Exception as e:
                      print(f'WARNING: skipping one label for {file}: {e}')
  # Download manually from https://challenge.xviewdataset.org
  dir = Path(yaml['path'])  # dataset root dir
  # urls = ['https://d307kc0mrhucc3.cloudfront.net/train_labels.zip',  # train labels
  #         'https://d307kc0mrhucc3.cloudfront.net/train_images.zip',  # 15G, 847 train images
  #         'https://d307kc0mrhucc3.cloudfront.net/val_images.zip']  # 5G, 282 val images (no labels)
  # download(urls, dir=dir, delete=False)
  # Convert labels
  convert_labels(dir / 'xView_train.geojson')
  # Move images
  images = Path(dir / 'images')
  images.mkdir(parents=True, exist_ok=True)
  Path(dir / 'train_images').rename(dir / 'images' / 'train')
  Path(dir / 'val_images').rename(dir / 'images' / 'val')
  # Split
  autosplit(dir / 'images' / 'train')
--- a/yolov5/detect.py
+++ b/yolov5/detect.py
@@ -0,0 +1,438 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 """
 Run YOLOv5 detection inference on images, videos, directories, globs, YouTube, webcam, streams, etc.
 Usage - sources:
    $ python detect.py --weights yolov5s.pt --source 0                               # webcam
                                                     img.jpg                         # image
                                                     vid.mp4                         # video
                                                     screen                          # screenshot
                                                     path/                           # directory
                                                     list.txt                        # list of images
                                                     list.streams                    # list of streams
                                                     'path/*.jpg'                    # glob
                                                     'https://youtu.be/LNwODJXcvt4'  # YouTube
                                                     'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream
 Usage - formats:
    $ python detect.py --weights yolov5s.pt                 # PyTorch
                                 yolov5s.torchscript        # TorchScript
                                 yolov5s.onnx               # ONNX Runtime or OpenCV DNN with --dnn
                                 yolov5s_openvino_model     # OpenVINO
                                 yolov5s.engine             # TensorRT
                                 yolov5s.mlpackage          # CoreML (macOS-only)
                                 yolov5s_saved_model        # TensorFlow SavedModel
                                 yolov5s.pb                 # TensorFlow GraphDef
                                 yolov5s.tflite             # TensorFlow Lite
                                 yolov5s_edgetpu.tflite     # TensorFlow Edge TPU
                                 yolov5s_paddle_model       # PaddlePaddle
 """
 import argparse
 import csv
 import os
 import platform
 import sys
 from pathlib import Path
 import torch
 FILE = Path(__file__).resolve()
 ROOT = FILE.parents[0]  # YOLOv5 root directory
 if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))  # add ROOT to PATH
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 from ultralytics.utils.plotting import Annotator, colors, save_one_box
 from models.common import DetectMultiBackend
 from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams
 from utils.general import (
    LOGGER,
    Profile,
    check_file,
    check_img_size,
    check_imshow,
    check_requirements,
    colorstr,
    cv2,
    increment_path,
    non_max_suppression,
    print_args,
    scale_boxes,
    strip_optimizer,
    xyxy2xywh,
 )
 from utils.torch_utils import select_device, smart_inference_mode
@smart_inference_mode()
 def run(
    weights=ROOT / "yolov5s.pt",  # model path or triton URL
    source=ROOT / "data/images",  # file/dir/URL/glob/screen/0(webcam)
    data=ROOT / "data/coco128.yaml",  # dataset.yaml path
    imgsz=(640, 640),  # inference size (height, width)
    conf_thres=0.25,  # confidence threshold
    iou_thres=0.45,  # NMS IOU threshold
    max_det=1000,  # maximum detections per image
    device="",  # cuda device, i.e. 0 or 0,1,2,3 or cpu
    view_img=False,  # show results
    save_txt=False,  # save results to *.txt
    save_format=0,  # save boxes coordinates in YOLO format or Pascal-VOC format (0 for YOLO and 1 for Pascal-VOC)
    save_csv=False,  # save results in CSV format
    save_conf=False,  # save confidences in --save-txt labels
    save_crop=False,  # save cropped prediction boxes
    nosave=False,  # do not save images/videos
    classes=None,  # filter by class: --class 0, or --class 0 2 3
    agnostic_nms=False,  # class-agnostic NMS
    augment=False,  # augmented inference
    visualize=False,  # visualize features
    update=False,  # update all models
    project=ROOT / "runs/detect",  # save results to project/name
    name="exp",  # save results to project/name
    exist_ok=False,  # existing project/name ok, do not increment
    line_thickness=3,  # bounding box thickness (pixels)
    hide_labels=False,  # hide labels
    hide_conf=False,  # hide confidences
    half=False,  # use FP16 half-precision inference
    dnn=False,  # use OpenCV DNN for ONNX inference
    vid_stride=1,  # video frame-rate stride
 ):
    """
    Runs YOLOv5 detection inference on various sources like images, videos, directories, streams, etc.
    Args:
        weights (str | Path): Path to the model weights file or a Triton URL. Default is 'yolov5s.pt'.
        source (str | Path): Input source, which can be a file, directory, URL, glob pattern, screen capture, or webcam
            index. Default is 'data/images'.
        data (str | Path): Path to the dataset YAML file. Default is 'data/coco128.yaml'.
        imgsz (tuple[int, int]): Inference image size as a tuple (height, width). Default is (640, 640).
        conf_thres (float): Confidence threshold for detections. Default is 0.25.
        iou_thres (float): Intersection Over Union (IOU) threshold for non-max suppression. Default is 0.45.
        max_det (int): Maximum number of detections per image. Default is 1000.
        device (str): CUDA device identifier (e.g., '0' or '0,1,2,3') or 'cpu'. Default is an empty string, which uses the
            best available device.
        view_img (bool): If True, display inference results using OpenCV. Default is False.
        save_txt (bool): If True, save results in a text file. Default is False.
        save_csv (bool): If True, save results in a CSV file. Default is False.
        save_conf (bool): If True, include confidence scores in the saved results. Default is False.
        save_crop (bool): If True, save cropped prediction boxes. Default is False.
        nosave (bool): If True, do not save inference images or videos. Default is False.
        classes (list[int]): List of class indices to filter detections by. Default is None.
        agnostic_nms (bool): If True, perform class-agnostic non-max suppression. Default is False.
        augment (bool): If True, use augmented inference. Default is False.
        visualize (bool): If True, visualize feature maps. Default is False.
        update (bool): If True, update all models' weights. Default is False.
        project (str | Path): Directory to save results. Default is 'runs/detect'.
        name (str): Name of the current experiment; used to create a subdirectory within 'project'. Default is 'exp'.
        exist_ok (bool): If True, existing directories with the same name are reused instead of being incremented. Default is
            False.
        line_thickness (int): Thickness of bounding box lines in pixels. Default is 3.
        hide_labels (bool): If True, do not display labels on bounding boxes. Default is False.
        hide_conf (bool): If True, do not display confidence scores on bounding boxes. Default is False.
        half (bool): If True, use FP16 half-precision inference. Default is False.
        dnn (bool): If True, use OpenCV DNN backend for ONNX inference. Default is False.
        vid_stride (int): Stride for processing video frames, to skip frames between processing. Default is 1.
    Returns:
        None
    Examples:
        ```python
        from ultralytics import run
        # Run inference on an image
        run(source='data/images/example.jpg', weights='yolov5s.pt', device='0')
        # Run inference on a video with specific confidence threshold
        run(source='data/videos/example.mp4', weights='yolov5s.pt', conf_thres=0.4, device='0')
        ```
    """
    source = str(source)
    save_img = not nosave and not source.endswith(".txt")  # save inference images
    is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
    is_url = source.lower().startswith(("rtsp://", "rtmp://", "http://", "https://"))
    webcam = source.isnumeric() or source.endswith(".streams") or (is_url and not is_file)
    screenshot = source.lower().startswith("screen")
    if is_url and is_file:
        source = check_file(source)  # download
    # Directories
    save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
    (save_dir / "labels" if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
    # Load model
    device = select_device(device)
    model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
    stride, names, pt = model.stride, model.names, model.pt
    imgsz = check_img_size(imgsz, s=stride)  # check image size
    # Dataloader
    bs = 1  # batch_size
    if webcam:
        view_img = check_imshow(warn=True)
        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
        bs = len(dataset)
    elif screenshot:
        dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
    vid_path, vid_writer = [None] * bs, [None] * bs
    # Run inference
    model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz))  # warmup
    seen, windows, dt = 0, [], (Profile(device=device), Profile(device=device), Profile(device=device))
    for path, im, im0s, vid_cap, s in dataset:
        with dt[0]:
            im = torch.from_numpy(im).to(model.device)
            im = im.half() if model.fp16 else im.float()  # uint8 to fp16/32
            im /= 255  # 0 - 255 to 0.0 - 1.0
            if len(im.shape) == 3:
                im = im[None]  # expand for batch dim
            if model.xml and im.shape[0] > 1:
                ims = torch.chunk(im, im.shape[0], 0)
        # Inference
        with dt[1]:
            visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
            if model.xml and im.shape[0] > 1:
                pred = None
                for image in ims:
                    if pred is None:
                        pred = model(image, augment=augment, visualize=visualize).unsqueeze(0)
                    else:
                        pred = torch.cat((pred, model(image, augment=augment, visualize=visualize).unsqueeze(0)), dim=0)
                pred = [pred, None]
            else:
                pred = model(im, augment=augment, visualize=visualize)
        # NMS
        with dt[2]:
            pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
        # Second-stage classifier (optional)
        # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
        # Define the path for the CSV file
        csv_path = save_dir / "predictions.csv"
        # Create or append to the CSV file
        def write_to_csv(image_name, prediction, confidence):
            """Writes prediction data for an image to a CSV file, appending if the file exists."""
            data = {"Image Name": image_name, "Prediction": prediction, "Confidence": confidence}
            file_exists = os.path.isfile(csv_path)
            with open(csv_path, mode="a", newline="") as f:
                writer = csv.DictWriter(f, fieldnames=data.keys())
                if not file_exists:
                    writer.writeheader()
                writer.writerow(data)
        # Process predictions
        for i, det in enumerate(pred):  # per image
            seen += 1
            if webcam:  # batch_size >= 1
                p, im0, frame = path[i], im0s[i].copy(), dataset.count
                s += f"{i}: "
            else:
                p, im0, frame = path, im0s.copy(), getattr(dataset, "frame", 0)
            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # im.jpg
            txt_path = str(save_dir / "labels" / p.stem) + ("" if dataset.mode == "image" else f"_{frame}")  # im.txt
            s += "{:g}x{:g} ".format(*im.shape[2:])  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            imc = im0.copy() if save_crop else im0  # for save_crop
            annotator = Annotator(im0, line_width=line_thickness, example=str(names))
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
                # Print results
                for c in det[:, 5].unique():
                    n = (det[:, 5] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
                # Write results
                for *xyxy, conf, cls in reversed(det):
                    c = int(cls)  # integer class
                    label = names[c] if hide_conf else f"{names[c]}"
                    confidence = float(conf)
                    confidence_str = f"{confidence:.2f}"
                    if save_csv:
                        write_to_csv(p.name, label, confidence_str)
                    if save_txt:  # Write to file
                        if save_format == 0:
                            coords = (
                                (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()
                            )  # normalized xywh
                        else:
                            coords = (torch.tensor(xyxy).view(1, 4) / gn).view(-1).tolist()  # xyxy
                        line = (cls, *coords, conf) if save_conf else (cls, *coords)  # label format
                        with open(f"{txt_path}.txt", "a") as f:
                            f.write(("%g " * len(line)).rstrip() % line + "\n")
                    if save_img or save_crop or view_img:  # Add bbox to image
                        c = int(cls)  # integer class
                        label = None if hide_labels else (names[c] if hide_conf else f"{names[c]} {conf:.2f}")
                        annotator.box_label(xyxy, label, color=colors(c, True))
                    if save_crop:
                        save_one_box(xyxy, imc, file=save_dir / "crops" / names[c] / f"{p.stem}.jpg", BGR=True)
            # Stream results
            im0 = annotator.result()
            if view_img:
                if platform.system() == "Linux" and p not in windows:
                    windows.append(p)
                    cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)
                    cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond
            # Save results (image with detections)
            if save_img:
                if dataset.mode == "image":
                    cv2.imwrite(save_path, im0)
                else:  # 'video' or 'stream'
                    if vid_path[i] != save_path:  # new video
                        vid_path[i] = save_path
                        if isinstance(vid_writer[i], cv2.VideoWriter):
                            vid_writer[i].release()  # release previous video writer
                        if vid_cap:  # video
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        else:  # stream
                            fps, w, h = 30, im0.shape[1], im0.shape[0]
                        save_path = str(Path(save_path).with_suffix(".mp4"))  # force *.mp4 suffix on results videos
                        vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
                    vid_writer[i].write(im0)
        # Print time (inference-only)
        LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1e3:.1f}ms")
    # Print results
    t = tuple(x.t / seen * 1e3 for x in dt)  # speeds per image
    LOGGER.info(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}" % t)
    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ""
        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
    if update:
        strip_optimizer(weights[0])  # update model (to fix SourceChangeWarning)
 def parse_opt():
    """
    Parse command-line arguments for YOLOv5 detection, allowing custom inference options and model configurations.
    Args:
        --weights (str | list[str], optional): Model path or Triton URL. Defaults to ROOT / 'yolov5s.pt'.
        --source (str, optional): File/dir/URL/glob/screen/0(webcam). Defaults to ROOT / 'data/images'.
        --data (str, optional): Dataset YAML path. Provides dataset configuration information.
        --imgsz (list[int], optional): Inference size (height, width). Defaults to [640].
        --conf-thres (float, optional): Confidence threshold. Defaults to 0.25.
        --iou-thres (float, optional): NMS IoU threshold. Defaults to 0.45.
        --max-det (int, optional): Maximum number of detections per image. Defaults to 1000.
        --device (str, optional): CUDA device, i.e., '0' or '0,1,2,3' or 'cpu'. Defaults to "".
        --view-img (bool, optional): Flag to display results. Defaults to False.
        --save-txt (bool, optional): Flag to save results to *.txt files. Defaults to False.
        --save-csv (bool, optional): Flag to save results in CSV format. Defaults to False.
        --save-conf (bool, optional): Flag to save confidences in labels saved via --save-txt. Defaults to False.
        --save-crop (bool, optional): Flag to save cropped prediction boxes. Defaults to False.
        --nosave (bool, optional): Flag to prevent saving images/videos. Defaults to False.
        --classes (list[int], optional): List of classes to filter results by, e.g., '--classes 0 2 3'. Defaults to None.
        --agnostic-nms (bool, optional): Flag for class-agnostic NMS. Defaults to False.
        --augment (bool, optional): Flag for augmented inference. Defaults to False.
        --visualize (bool, optional): Flag for visualizing features. Defaults to False.
        --update (bool, optional): Flag to update all models in the model directory. Defaults to False.
        --project (str, optional): Directory to save results. Defaults to ROOT / 'runs/detect'.
        --name (str, optional): Sub-directory name for saving results within --project. Defaults to 'exp'.
        --exist-ok (bool, optional): Flag to allow overwriting if the project/name already exists. Defaults to False.
        --line-thickness (int, optional): Thickness (in pixels) of bounding boxes. Defaults to 3.
        --hide-labels (bool, optional): Flag to hide labels in the output. Defaults to False.
        --hide-conf (bool, optional): Flag to hide confidences in the output. Defaults to False.
        --half (bool, optional): Flag to use FP16 half-precision inference. Defaults to False.
        --dnn (bool, optional): Flag to use OpenCV DNN for ONNX inference. Defaults to False.
        --vid-stride (int, optional): Video frame-rate stride, determining the number of frames to skip in between
            consecutive frames. Defaults to 1.
    Returns:
        argparse.Namespace: Parsed command-line arguments as an argparse.Namespace object.
    Example:
        ```python
        from ultralytics import YOLOv5
        args = YOLOv5.parse_opt()
        ```
    """
    parser = argparse.ArgumentParser()
    parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s.pt", help="model path or triton URL")
    parser.add_argument("--source", type=str, default=ROOT / "data/images", help="file/dir/URL/glob/screen/0(webcam)")
    parser.add_argument("--data", type=str, default=ROOT / "data/coco128.yaml", help="(optional) dataset.yaml path")
    parser.add_argument("--imgsz", "--img", "--img-size", nargs="+", type=int, default=[640], help="inference size h,w")
    parser.add_argument("--conf-thres", type=float, default=0.25, help="confidence threshold")
    parser.add_argument("--iou-thres", type=float, default=0.45, help="NMS IoU threshold")
    parser.add_argument("--max-det", type=int, default=1000, help="maximum detections per image")
    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
    parser.add_argument("--view-img", action="store_true", help="show results")
    parser.add_argument("--save-txt", action="store_true", help="save results to *.txt")
    parser.add_argument(
        "--save-format",
        type=int,
        default=0,
        help="whether to save boxes coordinates in YOLO format or Pascal-VOC format when save-txt is True, 0 for YOLO and 1 for Pascal-VOC",
    )
    parser.add_argument("--save-csv", action="store_true", help="save results in CSV format")
    parser.add_argument("--save-conf", action="store_true", help="save confidences in --save-txt labels")
    parser.add_argument("--save-crop", action="store_true", help="save cropped prediction boxes")
    parser.add_argument("--nosave", action="store_true", help="do not save images/videos")
    parser.add_argument("--classes", nargs="+", type=int, help="filter by class: --classes 0, or --classes 0 2 3")
    parser.add_argument("--agnostic-nms", action="store_true", help="class-agnostic NMS")
    parser.add_argument("--augment", action="store_true", help="augmented inference")
    parser.add_argument("--visualize", action="store_true", help="visualize features")
    parser.add_argument("--update", action="store_true", help="update all models")
    parser.add_argument("--project", default=ROOT / "runs/detect", help="save results to project/name")
    parser.add_argument("--name", default="exp", help="save results to project/name")
    parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
    parser.add_argument("--line-thickness", default=3, type=int, help="bounding box thickness (pixels)")
    parser.add_argument("--hide-labels", default=False, action="store_true", help="hide labels")
    parser.add_argument("--hide-conf", default=False, action="store_true", help="hide confidences")
    parser.add_argument("--half", action="store_true", help="use FP16 half-precision inference")
    parser.add_argument("--dnn", action="store_true", help="use OpenCV DNN for ONNX inference")
    parser.add_argument("--vid-stride", type=int, default=1, help="video frame-rate stride")
    opt = parser.parse_args()
    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand
    print_args(vars(opt))
    return opt
 def main(opt):
    """
    Executes YOLOv5 model inference based on provided command-line arguments, validating dependencies before running.
    Args:
        opt (argparse.Namespace): Command-line arguments for YOLOv5 detection. See function `parse_opt` for details.
    Returns:
        None
    Note:
        This function performs essential pre-execution checks and initiates the YOLOv5 detection process based on user-specified
        options. Refer to the usage guide and examples for more information about different sources and formats at:
        https://github.com/ultralytics/ultralytics
    Example usage:
    ```python
    if __name__ == "__main__":
        opt = parse_opt()
        main(opt)
    ```
    """
    check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
    run(**vars(opt))
 if __name__ == "__main__":
    opt = parse_opt()
    main(opt)
--- a/yolov5/export.py
+++ b/yolov5/export.py
--- a/yolov5/hubconf.py
+++ b/yolov5/hubconf.py
@@ -0,0 +1,510 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 """
 PyTorch Hub models https://pytorch.org/hub/ultralytics_yolov5.
 Usage:
    import torch
    model = torch.hub.load('ultralytics/yolov5', 'yolov5s')  # official model
    model = torch.hub.load('ultralytics/yolov5:master', 'yolov5s')  # from branch
    model = torch.hub.load('ultralytics/yolov5', 'custom', 'yolov5s.pt')  # custom/local model
    model = torch.hub.load('.', 'custom', 'yolov5s.pt', source='local')  # local repo
 """
 import torch
 def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
    """
    Creates or loads a YOLOv5 model, with options for pretrained weights and model customization.
    Args:
        name (str): Model name (e.g., 'yolov5s') or path to the model checkpoint (e.g., 'path/to/best.pt').
        pretrained (bool, optional): If True, loads pretrained weights into the model. Defaults to True.
        channels (int, optional): Number of input channels the model expects. Defaults to 3.
        classes (int, optional): Number of classes the model is expected to detect. Defaults to 80.
        autoshape (bool, optional): If True, applies the YOLOv5 .autoshape() wrapper for various input formats. Defaults to True.
        verbose (bool, optional): If True, prints detailed information during the model creation/loading process. Defaults to True.
        device (str | torch.device | None, optional): Device to use for model parameters (e.g., 'cpu', 'cuda'). If None, selects
            the best available device. Defaults to None.
    Returns:
        (DetectMultiBackend | AutoShape): The loaded YOLOv5 model, potentially wrapped with AutoShape if specified.
    Examples:
        ```python
        import torch
        from ultralytics import _create
        # Load an official YOLOv5s model with pretrained weights
        model = _create('yolov5s')
        # Load a custom model from a local checkpoint
        model = _create('path/to/custom_model.pt', pretrained=False)
        # Load a model with specific input channels and classes
        model = _create('yolov5s', channels=1, classes=10)
        ```
    Notes:
        For more information on model loading and customization, visit the
        [YOLOv5 PyTorch Hub Documentation](https://docs.ultralytics.com/yolov5/tutorials/pytorch_hub_model_loading).
    """
    from pathlib import Path
    from models.common import AutoShape, DetectMultiBackend
    from models.experimental import attempt_load
    from models.yolo import ClassificationModel, DetectionModel, SegmentationModel
    from utils.downloads import attempt_download
    from utils.general import LOGGER, ROOT, check_requirements, intersect_dicts, logging
    from utils.torch_utils import select_device
    if not verbose:
        LOGGER.setLevel(logging.WARNING)
    check_requirements(ROOT / "requirements.txt", exclude=("opencv-python", "tensorboard", "thop"))
    name = Path(name)
    path = name.with_suffix(".pt") if name.suffix == "" and not name.is_dir() else name  # checkpoint path
    try:
        device = select_device(device)
        if pretrained and channels == 3 and classes == 80:
            try:
                model = DetectMultiBackend(path, device=device, fuse=autoshape)  # detection model
                if autoshape:
                    if model.pt and isinstance(model.model, ClassificationModel):
                        LOGGER.warning(
                            "WARNING ⚠️ YOLOv5 ClassificationModel is not yet AutoShape compatible. "
                            "You must pass torch tensors in BCHW to this model, i.e. shape(1,3,224,224)."
                        )
                    elif model.pt and isinstance(model.model, SegmentationModel):
                        LOGGER.warning(
                            "WARNING ⚠️ YOLOv5 SegmentationModel is not yet AutoShape compatible. "
                            "You will not be able to run inference with this model."
                        )
                    else:
                        model = AutoShape(model)  # for file/URI/PIL/cv2/np inputs and NMS
            except Exception:
                model = attempt_load(path, device=device, fuse=False)  # arbitrary model
        else:
            cfg = list((Path(__file__).parent / "models").rglob(f"{path.stem}.yaml"))[0]  # model.yaml path
            model = DetectionModel(cfg, channels, classes)  # create model
            if pretrained:
                ckpt = torch.load(attempt_download(path), map_location=device)  # load
                csd = ckpt["model"].float().state_dict()  # checkpoint state_dict as FP32
                csd = intersect_dicts(csd, model.state_dict(), exclude=["anchors"])  # intersect
                model.load_state_dict(csd, strict=False)  # load
                if len(ckpt["model"].names) == classes:
                    model.names = ckpt["model"].names  # set class names attribute
        if not verbose:
            LOGGER.setLevel(logging.INFO)  # reset to default
        return model.to(device)
    except Exception as e:
        help_url = "https://docs.ultralytics.com/yolov5/tutorials/pytorch_hub_model_loading"
        s = f"{e}. Cache may be out of date, try `force_reload=True` or see {help_url} for help."
        raise Exception(s) from e
 def custom(path="path/to/model.pt", autoshape=True, _verbose=True, device=None):
    """
    Loads a custom or local YOLOv5 model from a given path with optional autoshaping and device specification.
    Args:
        path (str): Path to the custom model file (e.g., 'path/to/model.pt').
        autoshape (bool): Apply YOLOv5 .autoshape() wrapper to model if True, enabling compatibility with various input
            types (default is True).
        _verbose (bool): If True, prints all informational messages to the screen; otherwise, operates silently
            (default is True).
        device (str | torch.device | None): Device to load the model on, e.g., 'cpu', 'cuda', torch.device('cuda:0'), etc.
            (default is None, which automatically selects the best available device).
    Returns:
        torch.nn.Module: A YOLOv5 model loaded with the specified parameters.
    Notes:
        For more details on loading models from PyTorch Hub:
        https://docs.ultralytics.com/yolov5/tutorials/pytorch_hub_model_loading
    Examples:
        ```python
        # Load model from a given path with autoshape enabled on the best available device
        model = torch.hub.load('ultralytics/yolov5', 'custom', 'yolov5s.pt')
        # Load model from a local path without autoshape on the CPU device
        model = torch.hub.load('.', 'custom', 'yolov5s.pt', source='local', autoshape=False, device='cpu')
        ```
    """
    return _create(path, autoshape=autoshape, verbose=_verbose, device=device)
 def yolov5n(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
    """
    Instantiates the YOLOv5-nano model with options for pretraining, input channels, class count, autoshaping,
    verbosity, and device.
    Args:
        pretrained (bool): If True, loads pretrained weights into the model. Defaults to True.
        channels (int): Number of input channels for the model. Defaults to 3.
        classes (int): Number of classes for object detection. Defaults to 80.
        autoshape (bool): If True, applies the YOLOv5 .autoshape() wrapper to the model for various formats (file/URI/PIL/
            cv2/np) and non-maximum suppression (NMS) during inference. Defaults to True.
        _verbose (bool): If True, prints detailed information to the screen. Defaults to True.
        device (str | torch.device | None): Specifies the device to use for model computation. If None, uses the best device
            available (i.e., GPU if available, otherwise CPU). Defaults to None.
    Returns:
        DetectionModel | ClassificationModel | SegmentationModel: The instantiated YOLOv5-nano model, potentially with
            pretrained weights and autoshaping applied.
    Notes:
        For further details on loading models from PyTorch Hub, refer to [PyTorch Hub models](https://pytorch.org/hub/
        ultralytics_yolov5).
    Examples:
        ```python
        import torch
        from ultralytics import yolov5n
        # Load the YOLOv5-nano model with defaults
        model = yolov5n()
        # Load the YOLOv5-nano model with a specific device
        model = yolov5n(device='cuda')
        ```
    """
    return _create("yolov5n", pretrained, channels, classes, autoshape, _verbose, device)
 def yolov5s(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
    """
    Create a YOLOv5-small (yolov5s) model with options for pretraining, input channels, class count, autoshaping,
    verbosity, and device configuration.
    Args:
        pretrained (bool, optional): Flag to load pretrained weights into the model. Defaults to True.
        channels (int, optional): Number of input channels. Defaults to 3.
        classes (int, optional): Number of model classes. Defaults to 80.
        autoshape (bool, optional): Whether to wrap the model with YOLOv5's .autoshape() for handling various input formats.
            Defaults to True.
        _verbose (bool, optional): Flag to print detailed information regarding model loading. Defaults to True.
        device (str | torch.device | None, optional): Device to use for model computation, can be 'cpu', 'cuda', or
            torch.device instances. If None, automatically selects the best available device. Defaults to None.
    Returns:
        torch.nn.Module: The YOLOv5-small model configured and loaded according to the specified parameters.
    Example:
        ```python
        import torch
        # Load the official YOLOv5-small model with pretrained weights
        model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
        # Load the YOLOv5-small model from a specific branch
        model = torch.hub.load('ultralytics/yolov5:master', 'yolov5s')
        # Load a custom YOLOv5-small model from a local checkpoint
        model = torch.hub.load('ultralytics/yolov5', 'custom', 'yolov5s.pt')
        # Load a local YOLOv5-small model specifying source as local repository
        model = torch.hub.load('.', 'custom', 'yolov5s.pt', source='local')
        ```
    Notes:
        For more details on model loading and customization, visit
        the [YOLOv5 PyTorch Hub Documentation](https://pytorch.org/hub/ultralytics_yolov5).
    """
    return _create("yolov5s", pretrained, channels, classes, autoshape, _verbose, device)
 def yolov5m(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
    """
    Instantiates the YOLOv5-medium model with customizable pretraining, channel count, class count, autoshaping,
    verbosity, and device.
    Args:
        pretrained (bool, optional): Whether to load pretrained weights into the model. Default is True.
        channels (int, optional): Number of input channels. Default is 3.
        classes (int, optional): Number of model classes. Default is 80.
        autoshape (bool, optional): Apply YOLOv5 .autoshape() wrapper to the model for handling various input formats.
            Default is True.
        _verbose (bool, optional): Whether to print detailed information to the screen. Default is True.
        device (str | torch.device | None, optional): Device specification to use for model parameters (e.g., 'cpu', 'cuda').
            Default is None.
    Returns:
        torch.nn.Module: The instantiated YOLOv5-medium model.
    Usage Example:
        ```python
        import torch
        model = torch.hub.load('ultralytics/yolov5', 'yolov5m')  # Load YOLOv5-medium from Ultralytics repository
        model = torch.hub.load('ultralytics/yolov5:master', 'yolov5m')  # Load from the master branch
        model = torch.hub.load('ultralytics/yolov5', 'custom', 'yolov5m.pt')  # Load a custom/local YOLOv5-medium model
        model = torch.hub.load('.', 'custom', 'yolov5m.pt', source='local')  # Load from a local repository
        ```
    For more information, visit https://pytorch.org/hub/ultralytics_yolov5.
    """
    return _create("yolov5m", pretrained, channels, classes, autoshape, _verbose, device)
 def yolov5l(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
    """
    Creates YOLOv5-large model with options for pretraining, channels, classes, autoshaping, verbosity, and device
    selection.
    Args:
        pretrained (bool): Load pretrained weights into the model. Default is True.
        channels (int): Number of input channels. Default is 3.
        classes (int): Number of model classes. Default is 80.
        autoshape (bool): Apply YOLOv5 .autoshape() wrapper to model. Default is True.
        _verbose (bool): Print all information to screen. Default is True.
        device (str | torch.device | None): Device to use for model parameters, e.g., 'cpu', 'cuda', or a torch.device instance.
            Default is None.
    Returns:
        YOLOv5 model (torch.nn.Module): The YOLOv5-large model instantiated with specified configurations and possibly
        pretrained weights.
    Examples:
        ```python
        import torch
        model = torch.hub.load('ultralytics/yolov5', 'yolov5l')
        ```
    Notes:
        For additional details, refer to the PyTorch Hub models documentation:
        https://pytorch.org/hub/ultralytics_yolov5
    """
    return _create("yolov5l", pretrained, channels, classes, autoshape, _verbose, device)
 def yolov5x(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
    """
    Perform object detection using the YOLOv5-xlarge model with options for pretraining, input channels, class count,
    autoshaping, verbosity, and device specification.
    Args:
        pretrained (bool): If True, loads pretrained weights into the model. Defaults to True.
        channels (int): Number of input channels for the model. Defaults to 3.
        classes (int): Number of model classes for object detection. Defaults to 80.
        autoshape (bool): If True, applies the YOLOv5 .autoshape() wrapper for handling different input formats. Defaults to
            True.
        _verbose (bool): If True, prints detailed information during model loading. Defaults to True.
        device (str | torch.device | None): Device specification for computing the model, e.g., 'cpu', 'cuda:0', torch.device('cuda').
            Defaults to None.
    Returns:
        torch.nn.Module: The YOLOv5-xlarge model loaded with the specified parameters, optionally with pretrained weights and
        autoshaping applied.
    Example:
        ```python
        import torch
        model = torch.hub.load('ultralytics/yolov5', 'yolov5x')
        ```
    For additional details, refer to the official YOLOv5 PyTorch Hub models documentation:
    https://pytorch.org/hub/ultralytics_yolov5
    """
    return _create("yolov5x", pretrained, channels, classes, autoshape, _verbose, device)
 def yolov5n6(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
    """
    Creates YOLOv5-nano-P6 model with options for pretraining, channels, classes, autoshaping, verbosity, and device.
    Args:
        pretrained (bool, optional): If True, loads pretrained weights into the model. Default is True.
        channels (int, optional): Number of input channels. Default is 3.
        classes (int, optional): Number of model classes. Default is 80.
        autoshape (bool, optional): If True, applies the YOLOv5 .autoshape() wrapper to the model. Default is True.
        _verbose (bool, optional): If True, prints all information to screen. Default is True.
        device (str | torch.device | None, optional): Device to use for model parameters. Can be 'cpu', 'cuda', or None.
            Default is None.
    Returns:
        torch.nn.Module: YOLOv5-nano-P6 model loaded with the specified configurations.
    Example:
        ```python
        import torch
        model = yolov5n6(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device='cuda')
        ```
    Notes:
        For more information on PyTorch Hub models, visit: https://pytorch.org/hub/ultralytics_yolov5
    """
    return _create("yolov5n6", pretrained, channels, classes, autoshape, _verbose, device)
 def yolov5s6(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
    """
    Instantiate the YOLOv5-small-P6 model with options for pretraining, input channels, number of classes, autoshaping,
    verbosity, and device selection.
    Args:
        pretrained (bool): If True, loads pretrained weights. Default is True.
        channels (int): Number of input channels. Default is 3.
        classes (int): Number of object detection classes. Default is 80.
        autoshape (bool): If True, applies YOLOv5 .autoshape() wrapper to the model, allowing for varied input formats.
            Default is True.
        _verbose (bool): If True, prints detailed information during model loading. Default is True.
        device (str | torch.device | None): Device specification for model parameters (e.g., 'cpu', 'cuda', or torch.device).
            Default is None, which selects an available device automatically.
    Returns:
        torch.nn.Module: The YOLOv5-small-P6 model instance.
    Usage:
        ```python
        import torch
        model = torch.hub.load('ultralytics/yolov5', 'yolov5s6')
        model = torch.hub.load('ultralytics/yolov5:master', 'yolov5s6')  # load from a specific branch
        model = torch.hub.load('ultralytics/yolov5', 'custom', 'path/to/yolov5s6.pt')  # custom/local model
        model = torch.hub.load('.', 'custom', 'path/to/yolov5s6.pt', source='local')  # local repo model
        ```
    Notes:
        - For more information, refer to the PyTorch Hub models documentation at https://pytorch.org/hub/ultralytics_yolov5
    Raises:
        Exception: If there is an error during model creation or loading, with a suggestion to visit the YOLOv5
            tutorials for help.
    """
    return _create("yolov5s6", pretrained, channels, classes, autoshape, _verbose, device)
 def yolov5m6(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
    """
    Create YOLOv5-medium-P6 model with options for pretraining, channel count, class count, autoshaping, verbosity, and
    device.
    Args:
        pretrained (bool): If True, loads pretrained weights. Default is True.
        channels (int): Number of input channels. Default is 3.
        classes (int): Number of model classes. Default is 80.
        autoshape (bool): Apply YOLOv5 .autoshape() wrapper to the model for file/URI/PIL/cv2/np inputs and NMS.
            Default is True.
        _verbose (bool): If True, prints detailed information to the screen. Default is True.
        device (str | torch.device | None): Device to use for model parameters. Default is None, which uses the
            best available device.
    Returns:
        torch.nn.Module: The YOLOv5-medium-P6 model.
    Refer to the PyTorch Hub models documentation: https://pytorch.org/hub/ultralytics_yolov5 for additional details.
    Example:
        ```python
        import torch
        # Load YOLOv5-medium-P6 model
        model = torch.hub.load('ultralytics/yolov5', 'yolov5m6')
        ```
    Notes:
        - The model can be loaded with pre-trained weights for better performance on specific tasks.
        - The autoshape feature simplifies input handling by allowing various popular data formats.
    """
    return _create("yolov5m6", pretrained, channels, classes, autoshape, _verbose, device)
 def yolov5l6(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
    """
    Instantiate the YOLOv5-large-P6 model with options for pretraining, channel and class counts, autoshaping,
    verbosity, and device selection.
    Args:
        pretrained (bool, optional): If True, load pretrained weights into the model. Default is True.
        channels (int, optional): Number of input channels. Default is 3.
        classes (int, optional): Number of model classes. Default is 80.
        autoshape (bool, optional): If True, apply YOLOv5 .autoshape() wrapper to the model for input flexibility. Default is True.
        _verbose (bool, optional): If True, print all information to the screen. Default is True.
        device (str | torch.device | None, optional): Device to use for model parameters, e.g., 'cpu', 'cuda', or torch.device.
            If None, automatically selects the best available device. Default is None.
    Returns:
        torch.nn.Module: The instantiated YOLOv5-large-P6 model.
    Example:
        ```python
        import torch
        model = torch.hub.load('ultralytics/yolov5', 'yolov5l6')  # official model
        model = torch.hub.load('ultralytics/yolov5:master', 'yolov5l6')  # from specific branch
        model = torch.hub.load('ultralytics/yolov5', 'custom', 'path/to/yolov5l6.pt')  # custom/local model
        model = torch.hub.load('.', 'custom', 'path/to/yolov5l6.pt', source='local')  # local repository
        ```
    Note:
        Refer to [PyTorch Hub Documentation](https://pytorch.org/hub/ultralytics_yolov5) for additional usage instructions.
    """
    return _create("yolov5l6", pretrained, channels, classes, autoshape, _verbose, device)
 def yolov5x6(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
    """
    Creates the YOLOv5-xlarge-P6 model with options for pretraining, number of input channels, class count, autoshaping,
    verbosity, and device selection.
    Args:
        pretrained (bool): If True, loads pretrained weights into the model. Default is True.
        channels (int): Number of input channels. Default is 3.
        classes (int): Number of model classes. Default is 80.
        autoshape (bool): If True, applies YOLOv5 .autoshape() wrapper to the model. Default is True.
        _verbose (bool): If True, prints all information to the screen. Default is True.
        device (str | torch.device | None): Device to use for model parameters, can be a string, torch.device object, or
            None for default device selection. Default is None.
    Returns:
        torch.nn.Module: The instantiated YOLOv5-xlarge-P6 model.
    Example:
        ```python
        import torch
        model = torch.hub.load('ultralytics/yolov5', 'yolov5x6')  # load the YOLOv5-xlarge-P6 model
        ```
    Note:
        For more information on YOLOv5 models, visit the official documentation:
        https://docs.ultralytics.com/yolov5
    """
    return _create("yolov5x6", pretrained, channels, classes, autoshape, _verbose, device)
 if __name__ == "__main__":
    import argparse
    from pathlib import Path
    import numpy as np
    from PIL import Image
    from utils.general import cv2, print_args
    # Argparser
    parser = argparse.ArgumentParser()
    parser.add_argument("--model", type=str, default="yolov5s", help="model name")
    opt = parser.parse_args()
    print_args(vars(opt))
    # Model
    model = _create(name=opt.model, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True)
    # model = custom(path='path/to/model.pt')  # custom
    # Images
    imgs = [
        "data/images/zidane.jpg",  # filename
        Path("data/images/zidane.jpg"),  # Path
        "https://ultralytics.com/images/zidane.jpg",  # URI
        cv2.imread("data/images/bus.jpg")[:, :, ::-1],  # OpenCV
        Image.open("data/images/bus.jpg"),  # PIL
        np.zeros((320, 640, 3)),
    ]  # numpy
    # Inference
    results = model(imgs, size=320)  # batched inference
    # Results
    results.print()
    results.save()
--- a/Show More
+++ b/Show More
`@@ -1 +1 @@`
	`from .project_train import ProjectTrain, ProjectTrainSimpleOut`	`from .project_train import ProjectTrainIn, ProjectTrainOut`
		`@@ -0,0 +1,2 @@`
							`# this drop notebooks from GitHub language stats`
							`*.ipynb linguist-vendored`