4 Commits

Author SHA1 Message Date
2976f7686c deep_sort 提升版本 2025-06-13 09:29:27 +08:00
5d6ac1f7c2 deep_sort 提升版本 2025-06-12 14:06:55 +08:00
ca245e4cec deep_sort 提升版本 2025-06-11 16:19:38 +08:00
9e99b08d13 提高版本到yolov11 2025-06-09 15:27:45 +08:00
264 changed files with 272 additions and 46660 deletions

2
algo/__init__.py Normal file
View File

@ -0,0 +1,2 @@
from .yolov11 import YoloModel
from .deep_sort import DeepSortModel

82
algo/deep_sort.py Normal file
View File

@ -0,0 +1,82 @@
import cv2
import time
import asyncio
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
from utils.websocket_server import room_manager
class DeepSortModel:
def __init__(self, pt_url):
self.model = YOLO(pt_url)
self.tracker = DeepSort()
def sort_video(self, url, detect_id: int, idx_to_class: {}):
"""
对文件夹中的视频或rtsp的视频流进行目标追踪
"""
room_name = 'deep_sort_' + str(detect_id)
room_count = 'deep_sort_count_' + str(detect_id)
count_result = {}
for key in idx_to_class.keys():
count_result[key] = set()
cap = cv2.VideoCapture(url)
start_time = time.time()
while cap.isOpened():
# 检查是否已经超过10分钟600秒
elapsed_time = time.time() - start_time
if elapsed_time > 600: # 600 seconds = 10 minutes
break
ret, frame = cap.read()
if not ret:
break
# YOLO 推理GPU 加速)
results = self.model(frame, device=0, conf=0.6, verbose=False)
# 获取检测框数据
detections = results[0].boxes.data.cpu().numpy()
# DeepSORT 格式转换:[(bbox_x, y, w, h), confidence, class_id]
tracker_inputs = []
for det in detections:
x1, y1, x2, y2, conf, cls = det
bbox = [x1, y1, x2 - x1, y2 - y1] # (x, y, w, h)
tracker_inputs.append((bbox, conf, int(cls)))
# 更新跟踪器
tracks = self.tracker.update_tracks(tracker_inputs, frame=frame)
# 获取所有被确认过的追踪目标
active_tracks = []
# 绘制跟踪结果
for track in tracks:
if not track.is_confirmed():
active_tracks.append(track)
continue
track_id = track.track_id
track_cls = str(track.det_class)
ltrb = track.to_ltrb()
x1, y1, x2, y2 = map(int, ltrb)
# 绘制矩形框和ID标签
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(
frame,
f"{idx_to_class[track_cls]} {track_id}",
(x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 255, 0),
2,
)
for tark in active_tracks:
class_id = str(tark.det_class)
count_result[class_id].add(tark.track_id)
# 对应每个label进行计数
result = {}
for key in count_result.keys():
result[idx_to_class[key]] = len(count_result[key])
# 将帧编码为 JPEG
ret, jpeg = cv2.imencode('.jpg', frame)
if ret:
jpeg_bytes = jpeg.tobytes()
asyncio.run(room_manager.send_stream_to_room(room_name, jpeg_bytes))
asyncio.run(room_manager.send_to_room(room_count, str(result)))

115
algo/yolov11.py Normal file
View File

@ -0,0 +1,115 @@
import cv2
import time
import asyncio
from pathlib import Path
from ultralytics import YOLO
from utils.websocket_server import room_manager
# 开始训练回调
def on_train_start(trainer):
full_path = trainer.save_dir
p = Path(full_path)
room_name = 'train_' + p.parent.name
asyncio.run(room_manager.send_to_room(room_name, 'start'))
# 结束训练回调
def on_train_end(trainer):
full_path = trainer.save_dir
p = Path(full_path)
room_name = 'train_' + p.parent.name
asyncio.run(room_manager.send_to_room(room_name, 'end'))
# 每轮训练结束回调函数
def on_train_epoch_end(trainer):
full_path = trainer.save_dir
p = Path(full_path)
room_name = 'train_' + p.parent.name
asyncio.run(room_manager.send_to_room(room_name, trainer.epoch))
# 预测开始回调
def on_predict_start(predictor):
full_path = predictor.save_dir
p = Path(full_path)
room_name = 'detect_' + p.parent.parent.name
asyncio.run(room_manager.send_to_room(room_name, 'start'))
# 每个批次预测结束回调
def on_predict_batch_end(predictor):
full_path = predictor.save_dir
p = Path(full_path)
room_name = 'detect_' + p.parent.parent.name
asyncio.run(room_manager.send_to_room(room_name, predictor.seen))
# 预测结束回调
def on_predict_end(predictor):
full_path = predictor.save_dir
p = Path(full_path)
room_name = 'detect_' + p.parent.parent.name
asyncio.run(room_manager.send_to_room(room_name, 'end'))
class YoloModel:
def __init__(self, pt_url=None):
if pt_url:
self.model = YOLO(pt_url)
else:
self.model = YOLO('yolo11n.pt')
def train(self, data, epochs, project, name, patience):
"""
模型训练
"""
self.model.add_callback('on_train_start', on_train_start)
self.model.add_callback('on_train_end', on_train_end)
self.model.add_callback('on_train_epoch_end', on_train_epoch_end)
self.model.train(
data=data,
epochs=epochs,
imgsz=640,
device=0,
project=project,
name=name,
patience=patience,
verbose=False
)
def predict_folder(self, source, name, project):
"""
对文件夹中的内容(图片或者视频)进行预测
"""
self.model.add_callback('on_predict_start', on_predict_start)
self.model.add_callback('on_predict_batch_end', on_predict_batch_end)
self.model.add_callback('on_predict_end', on_predict_end)
self.model.predict(
source=source,
name=name,
project=project,
save=True,
save_txt=True,
device='0',
conf=0.6
)
def predict_rtsp(self, source, room_name):
"""
对rtsp视频流进行预测
"""
start_time = time.time()
for result in self.model.predict(source=source, stream=True, device='0', conf=0.6):
# 检查是否已经超过10分钟600秒
elapsed_time = time.time() - start_time
if elapsed_time > 600: # 600 seconds = 10 minutes
break
frame = result.plot()
ret, jpeg = cv2.imencode('.jpg', frame)
if ret:
frame_data = jpeg.tobytes()
asyncio.run(room_manager.send_stream_to_room(room_name, frame_data))

View File

@ -1,135 +1,15 @@
from utils.websocket_server import room_manager
import time
import torch
from deep_sort.deep_sort import DeepSort
from deep_sort.utils.draw import draw_boxes
from utils.yolov5.models.common import DetectMultiBackend
from utils.yolov5.utils.torch_utils import select_device
from utils.yolov5.utils.dataloaders import LoadImages, LoadStreams
from utils.yolov5.utils.general import check_img_size, non_max_suppression, cv2, scale_coords, xyxy2xywh
from algo import DeepSortModel
def yolov5_to_deepsort_format(pred):
"""
将YOLOv5的预测结果转换为Deep SORT所需的格式
:param pred: YOLOv5的预测结果
:return: 转换后的bbox_xywh, confs, class_ids
"""
pred[:, :4] = xyxy2xywh(pred[:, :4])
xywh = pred[:, :4].cpu().numpy()
conf = pred[:, 4].cpu().numpy()
cls = pred[:, 5].cpu().numpy()
return xywh, conf, cls
async def run_deepsort(
def run_deepsort(
detect_id: int,
weights_pt: str,
data: str,
idx_to_class: {},
sort_type: str = 'video',
video_path: str = None,
rtsp_url: str = None
url
):
"""
deep_sort追踪先经过yolov5对目标进行识别,
deep_sort追踪先经过yolo对目标进行识别
再调用deepsort对目标进行追踪
"""
room = 'deep_sort_' + str(detect_id)
room_count = 'deep_sort_count_' + str(detect_id)
# 选择设备CPU 或 GPU
device = select_device('cuda:0')
model = DetectMultiBackend(weights_pt, device=device, dnn=False, data=data, fp16=False)
deepsort = DeepSort(
model_path="deep_sort/deep/checkpoint/ckpt.t7", # ReID 模型路径
max_dist=0.2, # 外观特征匹配阈值(越小越严格)
max_iou_distance=0.7, # 最大IoU距离阈值
max_age=70, # 目标最大存活帧数(未匹配时保留的帧数)
n_init=3 # 初始确认帧数连续匹配到n_init次后确认跟踪
)
stride, names, pt = model.stride, model.names, model.pt
img_sz = check_img_size((640, 640), s=stride) # check image size
if sort_type == 'video':
dataset = LoadImages(video_path, img_size=img_sz, stride=stride, auto=pt, vid_stride=1)
else:
dataset = LoadStreams(rtsp_url, img_size=img_sz, stride=stride, auto=pt, vid_stride=1)
bs = len(dataset)
count_result = {}
for key in idx_to_class.keys():
count_result[key] = set()
model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *img_sz))
time.sleep(3) # 等待3s等待websocket进入
start_time = time.time()
for path, im, im0s, vid_cap, s in dataset:
# 检查是否已经超过10分钟600秒
elapsed_time = time.time() - start_time
if elapsed_time > 600: # 600 seconds = 10 minutes
print(room, "已达到最大执行时间,结束推理。")
break
if room_manager.rooms.get(room):
im0 = im0s[0]
im = torch.from_numpy(im).to(model.device)
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
pred = model(im, augment=False, visualize=False)
# NMS
pred = non_max_suppression(pred, 0.25, 0.45, None, False, max_det=1000)[0]
pred[:, :4] = scale_coords(im.shape[2:], pred[:, :4], im0.shape).round()
# 使用YOLOv5进行检测后得到的pred
bbox_xywh, cls_conf, cls_ids = yolov5_to_deepsort_format(pred)
mask = cls_ids == 0
bbox_xywh = bbox_xywh[mask]
bbox_xywh[:, 2:] *= 1.2
cls_conf = cls_conf[mask]
cls_ids = cls_ids[mask]
# 调用Deep SORT更新方法
outputs, _ = deepsort.update(bbox_xywh, cls_conf, cls_ids, im0)
if len(outputs) > 0:
bbox_xyxy = outputs[:, :4]
identities = outputs[:, -1]
cls = outputs[:, -2]
names = [idx_to_class[str(label)] for label in cls]
# 开始画框
ori_img = draw_boxes(im0, bbox_xyxy, names, identities, None)
# 获取所有被确认过的追踪目标
active_tracks = [
track for track in deepsort.tracker.tracks
if track.is_confirmed()
]
for tark in active_tracks:
class_id = str(tark.cls)
count_result[class_id].add(tark.track_id)
# 对应每个label进行计数
result = {}
for key in count_result.keys():
result[idx_to_class[key]] = len(count_result[key])
# 将帧编码为 JPEG
ret, jpeg = cv2.imencode('.jpg', ori_img)
if ret:
jpeg_bytes = jpeg.tobytes()
await room_manager.send_stream_to_room(room, jpeg_bytes)
await room_manager.send_to_room(room_count, str(result))
else:
print(room, '结束追踪')
break
deep_sort = DeepSortModel(weights_pt)
deep_sort.sort_video(url=url, detect_id=detect_id, idx_to_class=idx_to_class)

View File

@ -1,19 +1,11 @@
from algo import YoloModel
from utils import os_utils as os
from . import models, crud, schemas
from utils.websocket_server import room_manager
from application.settings import yolo_url, detect_url
from application.settings import detect_url
from apps.business.train import models as train_models
from utils.yolov5.utils.dataloaders import LoadStreams
from utils.yolov5.utils.torch_utils import select_device
from ultralytics.utils.plotting import Annotator, colors
from utils.yolov5.models.common import DetectMultiBackend
from apps.business.deepsort import service as deepsort_service
from utils.yolov5.utils.general import check_img_size, non_max_suppression, cv2, scale_boxes
import time
import torch
import asyncio
import subprocess
from sqlalchemy.ext.asyncio import AsyncSession
@ -59,69 +51,25 @@ async def before_detect(
return detect_log
def run_img_loop(
def run_detect_folder(
weights: str,
source: str,
project: str,
name: str,
detect_id: int,
is_gpu: str):
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
# 运行异步函数
loop.run_until_complete(run_detect_img(weights, source, project, name, detect_id, is_gpu))
# 可选: 关闭循环
loop.close()
async def run_detect_img(
weights: str,
source: str,
project: str,
name: str,
detect_id: int,
is_gpu: str):
name: str):
"""
执行yolov5的推理
:param weights: 权重文件
:param source: 图片所在文件
:param project: 推理完成的文件位置
:param name: 版本名称
:param log_id: 日志id
:param detect_id: 推理集合id
:param db: 数据库session
:param is_gpu: 是否gpu加速
:return:
"""
yolo_path = os.file_path(yolo_url, 'detect.py')
room = 'detect_' + str(detect_id)
await room_manager.send_to_room(room, f"AiCheck: 模型训练开始,请稍等。。。\n")
commend = ["python", '-u', yolo_path, "--weights", weights, "--source", source, "--name", name, "--project",
project, "--save-txt", "--conf-thres", "0.6"]
# 判断是否存在cuda版本
if is_gpu == 'True':
commend.append("--device=0")
# 启动子进程
with subprocess.Popen(
commend,
bufsize=1, # bufsize=0时为不缓存bufsize=1时按行缓存bufsize为其他正整数时为按照近似该正整数的字节数缓存
shell=False,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, # 这里可以显示yolov5训练过程中出现的进度条等信息
text=True, # 缓存内容为文本,避免后续编码显示问题
encoding='utf-8',
) as process:
while process.poll() is None:
line = process.stdout.readline()
process.stdout.flush() # 刷新缓存,防止缓存过多造成卡死
if line != '\n' and 'yolo' not in line:
await room_manager.send_to_room(room, line + '\n')
# 等待进程结束并获取返回码
return_code = process.wait()
if return_code != 0:
await room_manager.send_to_room(room, 'error')
else:
await room_manager.send_to_room(room, 'success')
model = YoloModel(weights)
model.predict_folder(
source=source,
project=project,
name=name
)
async def update_sql(db: AsyncSession, detect_id: int, log_id: int, project, name):
@ -148,121 +96,13 @@ async def update_sql(db: AsyncSession, detect_id: int, log_id: int, project, nam
await crud.ProjectDetectLogFileDal(db).create_models(detect_log_files)
async def run_detect_rtsp(weights_pt: str, rtsp_url: str, data: str, detect_id: int, is_gpu: str):
def run_detect_rtsp(weights_pt: str, rtsp_url: str, room_name: str):
"""
rtsp 视频流推理
:param detect_id: 训练集的id
:param room_name: websocket链接名称
:param weights_pt: 权重文件
:param rtsp_url: 视频流地址
:param data: yaml文件
:param is_gpu: 是否启用加速
:return:
"""
room = 'detect_rtsp_' + str(detect_id)
# 选择设备CPU 或 GPU
device = select_device('cpu')
# 判断是否存在cuda版本
if is_gpu == 'True':
device = select_device('cuda:0')
# 加载模型
model = DetectMultiBackend(weights_pt, device=device, dnn=False, data=data, fp16=False)
stride, names, pt = model.stride, model.names, model.pt
img_sz = check_img_size((640, 640), s=stride) # check image size
dataset = LoadStreams(rtsp_url, img_size=img_sz, stride=stride, auto=pt, vid_stride=1)
bs = len(dataset)
model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *img_sz))
time.sleep(3) # 等待3s等待websocket进入
start_time = time.time()
for path, im, im0s, vid_cap, s in dataset:
# 检查是否已经超过10分钟600秒
elapsed_time = time.time() - start_time
if elapsed_time > 600: # 600 seconds = 10 minutes
print(room, "已达到最大执行时间,结束推理。")
break
if room_manager.rooms.get(room):
im = torch.from_numpy(im).to(model.device)
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
# Inference
pred = model(im, augment=False, visualize=False)
# NMS
pred = non_max_suppression(pred, 0.25, 0.45, None, False, max_det=1000)
# Process predictions
for i, det in enumerate(pred): # per image
p, im0, frame = path[i], im0s[i].copy(), dataset.count
annotator = Annotator(im0, line_width=3, example=str(names))
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
# Write results
for *xyxy, conf, cls in reversed(det):
c = int(cls) # integer class
label = None if False else (names[c] if False else f"{names[c]} {conf:.2f}")
annotator.box_label(xyxy, label, color=colors(c, True))
# Stream results
im0 = annotator.result()
# 将帧编码为 JPEG
ret, jpeg = cv2.imencode('.jpg', im0)
if ret:
frame_data = jpeg.tobytes()
await room_manager.send_stream_to_room(room, frame_data)
else:
print(room, '结束推理')
break
def run_rtsp_loop(weights_pt: str, rtsp_url: str, data: str, detect_id: int, is_gpu: str):
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
# 运行异步函数
loop.run_until_complete(
run_detect_rtsp(
weights_pt,
rtsp_url,
data,
detect_id,
is_gpu
)
)
# 可选: 关闭循环
loop.close()
def run_deepsort_loop(
detect_id: int,
weights_pt: str,
data: str,
idx_to_class: {},
sort_type: str = 'video',
video_path: str = None,
rtsp_url: str = None
):
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
# 运行异步函数
loop.run_until_complete(
deepsort_service.run_deepsort(
detect_id,
weights_pt,
data,
idx_to_class,
sort_type,
video_path,
rtsp_url
)
)
# 可选: 关闭循环
loop.close()
model = YoloModel(weights_pt)
model.predict_rtsp(rtsp_url, room_name)

View File

@ -8,19 +8,18 @@
from utils import os_utils as osu
from core.dependencies import IdList
from core.database import redis_getter
from utils.websocket_server import room_manager
from . import schemas, crud, params, service, models
from apps.business.train.crud import ProjectTrainDal
from apps.business.project.crud import ProjectInfoDal, ProjectLabelDal
from apps.vadmin.auth.utils.current import AllUserAuth
from apps.vadmin.auth.utils.validation.auth import Auth
from utils.response import SuccessResponse, ErrorResponse
from apps.business.deepsort import service as deep_sort_service
from apps.business.project.crud import ProjectInfoDal, ProjectLabelDal
import os
import shutil
import threading
from redis.asyncio import Redis
from fastapi.responses import FileResponse
from fastapi import Depends, APIRouter, Form, UploadFile, BackgroundTasks
@ -106,8 +105,7 @@ async def delete_file(
@app.post("/start", summary="开始推理")
async def run_detect_yolo(
detect_log_in: schemas.ProjectDetectLogIn,
auth: Auth = Depends(AllUserAuth()),
rd: Redis = Depends(redis_getter)):
auth: Auth = Depends(AllUserAuth())):
detect_dal = crud.ProjectDetectDal(auth.db)
train_dal = ProjectTrainDal(auth.db)
detect = await detect_dal.get_data(detect_log_in.detect_id)
@ -119,21 +117,29 @@ async def run_detect_yolo(
file_count = await crud.ProjectDetectFileDal(auth.db).file_count(detect_log_in.detect_id)
if file_count == 0 and detect.rtsp_url is None:
return ErrorResponse("推理集合中没有文件,请先到推理集合中上传推理内容")
is_gpu = await rd.get('is_gpu')
# 判断一下是单纯的推理项目还是跟踪项目
project_info = await ProjectInfoDal(auth.db).get_data(data_id=detect.project_id)
if project_info.type_code == 'yolo':
if detect.file_type == 'img' or detect.file_type == 'video':
detect_log = await service.before_detect(detect_log_in, detect, train, auth.db, auth.user.id)
thread_train = threading.Thread(target=service.run_img_loop,
args=(detect_log.pt_url, detect_log.folder_url,
detect_log.detect_folder_url, detect_log.detect_version,
detect_log.detect_id, is_gpu))
thread_train = threading.Thread(
target=service.run_detect_folder,
args=(
detect_log.pt_url,
detect_log.folder_url,
detect_log.detect_folder_url,
detect_log.detect_version
)
)
thread_train.start()
await service.update_sql(
auth.db, detect_log.detect_id,
detect_log.id, detect_log.detect_folder_url,
detect_log.detect_version)
auth.db,
detect_log.detect_id,
detect_log.id,
detect_log.detect_folder_url,
detect_log.detect_version
)
return SuccessResponse(msg="执行成功", data=file_count)
elif detect.file_type == 'rtsp':
room = 'detect_rtsp_' + str(detect.id)
if not room_manager.rooms.get(room):
@ -141,32 +147,33 @@ async def run_detect_yolo(
weights_pt = train.best_pt
else:
weights_pt = train.last_pt
thread_train = threading.Thread(target=service.run_rtsp_loop,
args=(weights_pt, detect.rtsp_url, train.train_data, detect.id, is_gpu,))
thread_train = threading.Thread(
target=service.run_detect_rtsp,
args=(
weights_pt,
detect.rtsp_url,
room
)
)
thread_train.start()
return SuccessResponse(msg="执行成功")
elif project_info.type_code == 'deepsort':
room = 'deep_sort_' + str(detect.id)
if not room_manager.rooms.get(room):
# 查询项目所属标签,返回两个 idname一一对应的数组
label_id_list, label_name_list = await ProjectLabelDal(auth.db).get_label_for_train(project_info.id)
idx_to_class = {str(i): name for i, name in enumerate(label_name_list)}
# if detect_log_in.pt_type == 'best':
# weights_pt = train.best_pt
# else:
# weights_pt = train.last_pt
if detect.file_type == 'rtsp':
threading_main = threading.Thread(
target=service.run_deepsort_loop,
args=(detect.id, train.best_pt, train.train_data,
idx_to_class, 'rtsp', None, detect.rtsp_url))
threading_main.start()
elif detect.file_type == 'video':
threading_main = threading.Thread(
target=service.run_deepsort_loop,
args=(detect.id, train.best_pt, train.train_data,
idx_to_class, 'video', detect.folder_url, None))
threading_main.start()
return SuccessResponse(msg="执行成功")
threading_main = threading.Thread(
target=deep_sort_service.run_deepsort,
args=(
detect.id,
train.best_pt,
idx_to_class,
detect.rtsp_url
)
)
threading_main.start()
return SuccessResponse(msg="执行成功")
###########################################################

View File

@ -18,7 +18,7 @@ from typing import Optional
class ProjectTrainIn(BaseModel):
project_id: Optional[int] = Field(..., description="项目id")
weights_id: Optional[str] = Field(None, description="权重文件")
weights_id: Optional[int] = Field(None, description="权重文件")
weights_name: Optional[str] = Field(None, description="权重文件名称")
epochs: Optional[int] = Field(50, description="训练轮数")
patience: Optional[int] = Field(20, description="早停的耐心值")

View File

@ -1,13 +1,12 @@
from algo import YoloModel
from utils import os_utils as osu
from application.settings import *
from . import schemas, models, crud
from utils.websocket_server import room_manager
from apps.business.project import models as proj_models, crud as proj_crud
import yaml
import asyncio
import subprocess
from sqlalchemy.ext.asyncio import AsyncSession
@ -118,19 +117,16 @@ def run_event_loop(
# 运行异步函数,开始训练
loop_run = asyncio.new_event_loop()
asyncio.set_event_loop(loop_run)
loop_run.run_until_complete(run_commend(data, project, name, train_in.epochs, train_in.patience,
project_id, train_info, is_gup))
loop_run.run_until_complete(run_commend(data, project, name, train_in.epochs, train_in.patience, train_info))
async def run_commend(
def run_commend(
data: str,
project: str,
name: str,
epochs: int,
patience: int,
project_id: int,
train_info: models.ProjectTrain,
is_gpu: str):
train_info: models.ProjectTrain):
"""
执行训练
:param data: 训练数据集
@ -138,47 +134,14 @@ async def run_commend(
:param name: 实验名称
:param epochs: 训练轮数
:param patience: 早停耐心值
:param weights: 权重文件
:param project_id: 项目id
:param train_info: 训练信息
:param is_gpu: 是否是gpu环境
:return:
"""
yolo_path = osu.file_path(yolo_url, 'train.py')
room = 'train_' + str(project_id)
await room_manager.send_to_room(room, f"AiCheckV2.0: 模型训练开始,请稍等。。。\n")
commend = ["python", '-u', yolo_path, "--data=" + data, "--project=" + project, "--name=" + name,
"--epochs=" + str(epochs), "--batch-size=8", "--exist-ok", "--patience=" + str(patience)]
# 增加权重文件,在之前训练的基础上重新训练
if train_info is not None:
commend.append("--weights=" + train_info.best_pt)
# 判断是否存在cuda版本
if is_gpu == 'True':
commend.append("--device=0")
# 启动子进程
with subprocess.Popen(
commend,
bufsize=1, # bufsize=0时为不缓存bufsize=1时按行缓存bufsize为其他正整数时为按照近似该正整数的字节数缓存
shell=False,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, # 这里可以显示yolov5训练过程中出现的进度条等信息
text=True, # 缓存内容为文本,避免后续编码显示问题
encoding='utf-8',
) as process:
while process.poll() is None:
line = process.stdout.readline()
process.stdout.flush() # 刷新缓存,防止缓存过多造成卡死
if line != '\n' and '0%' not in line and 'yolo' not in line:
await room_manager.send_to_room(room, line + '\n')
# 等待进程结束并获取返回码
return_code = process.wait()
if return_code != 0:
await room_manager.send_to_room(room, 'error')
else:
await room_manager.send_to_room(room, 'success')
if train_info is None:
model = YoloModel()
else:
model = YoloModel(train_info.best_pt)
model.train(data=data, epochs=epochs, project=project, name=name, patience=patience)
async def add_train(

View File

@ -25,8 +25,7 @@ app = APIRouter()
@app.post("/start", summary="执行训练")
async def run_train(
train_in: schemas.ProjectTrainIn,
auth: Auth = Depends(AllUserAuth()),
rd: Redis = Depends(redis_getter)):
auth: Auth = Depends(AllUserAuth())):
proj_id = train_in.project_id
proj_dal = ProjectInfoDal(auth.db)
proj_img_dal = ProjectImageDal(auth.db)
@ -48,14 +47,14 @@ async def run_train(
if val_label_count > 0:
return ErrorResponse("验证图片中存在未标注的图片")
data, project, name = await service.before_train(proj_info, auth.db)
is_gpu = await rd.get('is_gpu')
train_info = None
if train_in.weights_id is not None:
train_info = await crud.ProjectTrainDal(auth.db).get_data(train_in.weights_id)
# 异步执行操作操作过程通过websocket进行同步
thread_train = threading.Thread(
target=service.run_event_loop,
args=(data, project, name, train_in, proj_id, train_info, is_gpu))
target=service.run_commend,
args=(data, project, name, train_in.epochs, train_in.patience, train_info)
)
thread_train.start()
await service.add_train(auth.db, proj_id, name, project, data, train_in, auth.user.id)
return SuccessResponse(msg="执行成功")

View File

@ -1,19 +0,0 @@
from .deep_sort import DeepSort
__all__ = ['DeepSort', 'build_tracker']
def build_tracker(cfg, use_cuda):
if cfg.USE_FASTREID:
return DeepSort(model_path=cfg.FASTREID.CHECKPOINT, model_config=cfg.FASTREID.CFG,
max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
use_cuda=use_cuda)
else:
return DeepSort(model_path=cfg.DEEPSORT.REID_CKPT,
max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
use_cuda=use_cuda)

View File

@ -1,10 +0,0 @@
DEEPSORT:
REID_CKPT: "./deep_sort/deep/checkpoint/ckpt.t7"
MAX_DIST: 0.2
MIN_CONFIDENCE: 0.5
NMS_MAX_OVERLAP: 0.5
MAX_IOU_DISTANCE: 0.7
MAX_AGE: 70
N_INIT: 3
NN_BUDGET: 100

View File

@ -1,3 +0,0 @@
FASTREID:
CFG: "thirdparty/fast-reid/configs/Market1501/bagtricks_R50.yml"
CHECKPOINT: "deep_sort/deep/checkpoint/market_bot_R50.pth"

View File

@ -1,6 +0,0 @@
MASKRCNN:
LABEL: "./coco_classes.json"
WEIGHT: "./detector/Mask_RCNN/save_weights/maskrcnn_resnet50_fpn_coco.pth"
NUM_CLASSES: 90
BOX_THRESH: 0.5

View File

@ -1,5 +0,0 @@
MMDET:
CFG: "thirdparty/mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py"
CHECKPOINT: "detector/MMDet/weight/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth"
SCORE_THRESH: 0.5

View File

@ -1,82 +0,0 @@
In deepsort algorithm, appearance feature extraction network used to extract features from **image_crops** for matching purpose.The original model used in paper is in `model.py`, and its parameter here [ckpt.t7](https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6). This repository also provides a `resnet.py` script and its pre-training weights on Imagenet here.
```
# resnet18
https://download.pytorch.org/models/resnet18-5c106cde.pth
# resnet34
https://download.pytorch.org/models/resnet34-333f7ec4.pth
# resnet50
https://download.pytorch.org/models/resnet50-19c8e357.pth
# resnext50_32x4d
https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth
```
## Dataset PrePare
To train the model, first you need download [Market1501](http://www.liangzheng.com.cn/Project/project_reid.html) dataset or [Mars](http://www.liangzheng.com.cn/Project/project_mars.html) dataset.
If you want to train on your **own dataset**, assuming you have already downloaded the dataset.The dataset should be arranged in the following way.
```
├── dataset_root: The root dir of the dataset.
├── class1: Category 1 is located in the folder dir.
├── xxx1.jpg: Image belonging to category 1.
├── xxx2.jpg: Image belonging to category 1.
├── class2: Category 2 is located in the folder dir.
├── xxx3.jpg: Image belonging to category 2.
├── xxx4.jpg: Image belonging to category 2.
├── class3: Category 3 is located in the folder dir.
...
...
```
## Training the RE-ID model
Assuming you have already prepare the dataset. Then you can use the following command to start your training progress.
#### training on a single GPU
```python
usage: train.py [--data-dir]
[--epochs]
[--batch_size]
[--lr]
[--lrf]
[--weights]
[--freeze-layers]
[--gpu_id]
# default use cuda:0, use Net in `model.py`
python train.py --data-dir [dataset/root/path] --weights [(optional)pre-train/weight/path]
# you can use `--freeze-layers` option to freeze full convolutional layer parameters except fc layers parameters
python train.py --data-dir [dataset/root/path] --weights [(optional)pre-train/weight/path] --freeze-layers
```
#### training on multiple GPU
```python
usage: train_multiGPU.py [--data-dir]
[--epochs]
[--batch_size]
[--lr]
[--lrf]
[--syncBN]
[--weights]
[--freeze-layers]
# not change the following parameters, the system will automatically assignment
[--device]
[--world_size]
[--dist_url]
# default use cuda:0, cuda:1, cuda:2, cuda:3, use resnet18 in `resnet.py`
CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 train_multiGPU.py --data-dir [dataset/root/path] --weights [(optional)pre-train/weight/path]
# you can use `--freeze-layers` option to freeze full convolutional layer parameters except fc layers parameters
CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 train_multiGPU.py --data-dir [dataset/root/path] --weights [(optional)pre-train/weight/path] --freeze-layers
```
An example of training progress is as follows:
![train.jpg](./train.jpg)
The last, you can evaluate it using [test.py](deep_sort/deep/test.py) and [evaluate.py](deep_sort/deep/evalute.py).

Binary file not shown.

View File

@ -1,92 +0,0 @@
import json
import os
import random
import cv2
from PIL import Image
import torch
from torch.utils.data import Dataset
import matplotlib.pyplot as plt
class ClsDataset(Dataset):
def __init__(self, images_path, images_labels, transform=None):
self.images_path = images_path
self.images_labels = images_labels
self.transform = transform
def __len__(self):
return len(self.images_path)
def __getitem__(self, idx):
img = cv2.imread(self.images_path[idx])
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = Image.fromarray(img)
label = self.images_labels[idx]
if self.transform is not None:
img = self.transform(img)
return img, label
@staticmethod
def collate_fn(batch):
images, labels = tuple(zip(*batch))
images = torch.stack(images, dim=0)
labels = torch.as_tensor(labels)
return images, labels
def read_split_data(root, valid_rate=0.2):
assert os.path.exists(root), 'dataset root: {} does not exist.'.format(root)
class_names = [cls for cls in os.listdir(root) if os.path.isdir(os.path.join(root, cls))]
class_names.sort()
class_indices = {name: i for i, name in enumerate(class_names)}
json_str = json.dumps({v: k for k, v in class_indices.items()}, indent=4)
with open('class_indices.json', 'w') as f:
f.write(json_str)
train_images_path = []
train_labels = []
val_images_path = []
val_labels = []
per_class_num = []
supported = ['.jpg', '.JPG', '.png', '.PNG']
for cls in class_names:
cls_path = os.path.join(root, cls)
images_path = [os.path.join(cls_path, i) for i in os.listdir(cls_path)
if os.path.splitext(i)[-1] in supported]
images_label = class_indices[cls]
per_class_num.append(len(images_path))
val_path = random.sample(images_path, int(len(images_path) * valid_rate))
for img_path in images_path:
if img_path in val_path:
val_images_path.append(img_path)
val_labels.append(images_label)
else:
train_images_path.append(img_path)
train_labels.append(images_label)
print("{} images were found in the dataset.".format(sum(per_class_num)))
print("{} images for training.".format(len(train_images_path)))
print("{} images for validation.".format(len(val_images_path)))
assert len(train_images_path) > 0, "number of training images must greater than zero"
assert len(val_images_path) > 0, "number of validation images must greater than zero"
plot_distribution = False
if plot_distribution:
plt.bar(range(len(class_names)), per_class_num, align='center')
plt.xticks(range(len(class_names)), class_names)
for i, v in enumerate(per_class_num):
plt.text(x=i, y=v + 5, s=str(v), ha='center')
plt.xlabel('classes')
plt.ylabel('numbers')
plt.title('the distribution of dataset')
plt.show()
return [train_images_path, train_labels], [val_images_path, val_labels], len(class_names)

View File

@ -1,15 +0,0 @@
import torch
features = torch.load("features.pth")
qf = features["qf"]
ql = features["ql"]
gf = features["gf"]
gl = features["gl"]
scores = qf.mm(gf.t())
res = scores.topk(5, dim=1)[1][:,0]
top1correct = gl[res].eq(ql).sum().item()
print("Acc top1:{:.3f}".format(top1correct/ql.size(0)))

View File

@ -1,93 +0,0 @@
import torch
import torchvision.transforms as transforms
import numpy as np
import cv2
import logging
from .model import Net
from .resnet import resnet18
# from fastreid.config import get_cfg
# from fastreid.engine import DefaultTrainer
# from fastreid.utils.checkpoint import Checkpointer
class Extractor(object):
def __init__(self, model_path, use_cuda=True):
self.net = Net(reid=True)
# self.net = resnet18(reid=True)
self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
state_dict = torch.load(model_path, map_location=lambda storage, loc: storage)
self.net.load_state_dict(state_dict if 'net_dict' not in state_dict else state_dict['net_dict'], strict=False)
logger = logging.getLogger("root.tracker")
logger.info("Loading weights from {}... Done!".format(model_path))
self.net.to(self.device)
self.size = (64, 128)
self.norm = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])
def _preprocess(self, im_crops):
"""
TODO:
1. to float with scale from 0 to 1
2. resize to (64, 128) as Market1501 dataset did
3. concatenate to a numpy array
3. to torch Tensor
4. normalize
"""
def _resize(im, size):
return cv2.resize(im.astype(np.float32) / 255., size)
im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops], dim=0).float()
return im_batch
def __call__(self, im_crops):
im_batch = self._preprocess(im_crops)
with torch.no_grad():
im_batch = im_batch.to(self.device)
features = self.net(im_batch)
return features.cpu().numpy()
class FastReIDExtractor(object):
def __init__(self, model_config, model_path, use_cuda=True):
cfg = get_cfg()
cfg.merge_from_file(model_config)
cfg.MODEL.BACKBONE.PRETRAIN = False
self.net = DefaultTrainer.build_model(cfg)
self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
Checkpointer(self.net).load(model_path)
logger = logging.getLogger("root.tracker")
logger.info("Loading weights from {}... Done!".format(model_path))
self.net.to(self.device)
self.net.eval()
height, width = cfg.INPUT.SIZE_TEST
self.size = (width, height)
self.norm = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])
def _preprocess(self, im_crops):
def _resize(im, size):
return cv2.resize(im.astype(np.float32) / 255., size)
im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops], dim=0).float()
return im_batch
def __call__(self, im_crops):
im_batch = self._preprocess(im_crops)
with torch.no_grad():
im_batch = im_batch.to(self.device)
features = self.net(im_batch)
return features.cpu().numpy()
if __name__ == '__main__':
img = cv2.imread("demo.jpg")[:, :, (2, 1, 0)]
extr = Extractor("checkpoint/ckpt.t7")
feature = extr(img)
print(feature.shape)

View File

@ -1,105 +0,0 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class BasicBlock(nn.Module):
def __init__(self, c_in, c_out, is_downsample=False):
super(BasicBlock, self).__init__()
self.is_downsample = is_downsample
if is_downsample:
self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False)
else:
self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(c_out)
self.relu = nn.ReLU(True)
self.conv2 = nn.Conv2d(c_out, c_out, 3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(c_out)
if is_downsample:
self.downsample = nn.Sequential(
nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
nn.BatchNorm2d(c_out)
)
elif c_in != c_out:
self.downsample = nn.Sequential(
nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
nn.BatchNorm2d(c_out)
)
self.is_downsample = True
def forward(self, x):
y = self.conv1(x)
y = self.bn1(y)
y = self.relu(y)
y = self.conv2(y)
y = self.bn2(y)
if self.is_downsample:
x = self.downsample(x)
return F.relu(x.add(y), True)
def make_layers(c_in, c_out, repeat_times, is_downsample=False):
blocks = []
for i in range(repeat_times):
if i == 0:
blocks += [BasicBlock(c_in, c_out, is_downsample=is_downsample), ]
else:
blocks += [BasicBlock(c_out, c_out), ]
return nn.Sequential(*blocks)
class Net(nn.Module):
def __init__(self, num_classes=751, reid=False):
super(Net, self).__init__()
# 3 128 64
self.conv = nn.Sequential(
nn.Conv2d(3, 64, 3, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
# nn.Conv2d(32,32,3,stride=1,padding=1),
# nn.BatchNorm2d(32),
# nn.ReLU(inplace=True),
nn.MaxPool2d(3, 2, padding=1),
)
# 32 64 32
self.layer1 = make_layers(64, 64, 2, False)
# 32 64 32
self.layer2 = make_layers(64, 128, 2, True)
# 64 32 16
self.layer3 = make_layers(128, 256, 2, True)
# 128 16 8
self.layer4 = make_layers(256, 512, 2, True)
# 256 8 4
self.avgpool = nn.AdaptiveAvgPool2d(1)
# 256 1 1
self.reid = reid
self.classifier = nn.Sequential(
nn.Linear(512, 256),
nn.BatchNorm1d(256),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(256, num_classes),
)
def forward(self, x):
x = self.conv(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
# B x 128
if self.reid:
x = x.div(x.norm(p=2, dim=1, keepdim=True))
return x
# classifier
x = self.classifier(x)
return x
if __name__ == '__main__':
net = Net()
x = torch.randn(4, 3, 128, 64)
y = net(x)

View File

@ -1,67 +0,0 @@
import os
import torch
import torch.distributed as dist
def init_distributed_mode(args):
if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
args.rank = int(os.environ['RANK'])
args.world_size = int(os.environ['WORLD_SIZE'])
args.gpu = int(os.environ['LOCAL_RANK'])
elif 'SLURM_PROCID' in os.environ:
args.rank = int(os.environ['SLURM_PROCID'])
args.gpu = args.rank % torch.cuda.device_count()
else:
print("Not using distributed mode")
args.distributed = False
return
args.distributed = True
torch.cuda.set_device(args.gpu)
args.dist_backend = 'nccl'
print('| distributed init (rank {}): {}'.format(args.rank, args.dist_url), flush=True)
dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
world_size=args.world_size, rank=args.rank)
dist.barrier()
def cleanup():
dist.destroy_process_group()
def is_dist_avail_and_initialized():
if not dist.is_available():
return False
if not dist.is_initialized():
return False
return True
def get_world_size():
if not is_dist_avail_and_initialized():
return 1
return dist.get_world_size()
def get_rank():
if not is_dist_avail_and_initialized():
return 0
return dist.get_rank()
def is_main_process():
return get_rank() == 0
def reduce_value(value, average=True):
world_size = get_world_size()
if world_size < 2:
return value
with torch.no_grad():
dist.all_reduce(value)
if average:
value /= world_size
return value

View File

@ -1,90 +0,0 @@
import sys
from tqdm import tqdm
import torch
from .distributed_utils import reduce_value, is_main_process
def load_model(state_dict, model_state_dict, model):
for k in state_dict:
if k in model_state_dict:
if state_dict[k].shape != model_state_dict[k].shape:
print('Skip loading parameter {}, required shape {}, ' \
'loaded shape {}.'.format(
k, model_state_dict[k].shape, state_dict[k].shape))
state_dict[k] = model_state_dict[k]
else:
print('Drop parameter {}.'.format(k))
for k in model_state_dict:
if not (k in state_dict):
print('No param {}.'.format(k))
state_dict[k] = model_state_dict[k]
model.load_state_dict(state_dict, strict=False)
return model
def train_one_epoch(model, optimizer, data_loader, device, epoch):
model.train()
criterion = torch.nn.CrossEntropyLoss()
mean_loss = torch.zeros(1).to(device)
sum_num = torch.zeros(1).to(device)
optimizer.zero_grad()
if is_main_process():
data_loader = tqdm(data_loader, file=sys.stdout)
for idx, (images, labels) in enumerate(data_loader):
# forward
images, labels = images.to(device), labels.to(device)
outputs = model(images)
loss = criterion(outputs, labels)
# backward
loss.backward()
loss = reduce_value(loss, average=True)
mean_loss = (mean_loss * idx + loss.detach()) / (idx + 1)
pred = torch.max(outputs, dim=1)[1]
sum_num += torch.eq(pred, labels).sum()
if is_main_process():
data_loader.desc = '[epoch {}] mean loss {}'.format(epoch, mean_loss.item())
if not torch.isfinite(loss):
print('loss is infinite, ending training')
sys.exit(1)
optimizer.step()
optimizer.zero_grad()
if device != torch.device('cpu'):
torch.cuda.synchronize(device)
sum_num = reduce_value(sum_num, average=False)
return sum_num.item(), mean_loss.item()
@torch.no_grad()
def evaluate(model, data_loader, device):
model.eval()
criterion = torch.nn.CrossEntropyLoss()
test_loss = torch.zeros(1).to(device)
sum_num = torch.zeros(1).to(device)
if is_main_process():
data_loader = tqdm(data_loader, file=sys.stdout)
for idx, (inputs, labels) in enumerate(data_loader):
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
loss = criterion(outputs, labels)
loss = reduce_value(loss, average=True)
test_loss = (test_loss * idx + loss.detach()) / (idx + 1)
pred = torch.max(outputs, dim=1)[1]
sum_num += torch.eq(pred, labels).sum()
if device != torch.device('cpu'):
torch.cuda.synchronize(device)
sum_num = reduce_value(sum_num, average=False)
return sum_num.item(), test_loss.item()

View File

@ -1,173 +0,0 @@
import torch.nn as nn
import torch
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=3,
stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channel)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channel)
self.downsample = downsample
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, in_channel, out_channel, stride=1, downsample=None,
groups=1, width_per_group=64):
super(Bottleneck, self).__init__()
width = int(out_channel * (width_per_group / 64.)) * groups
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width, kernel_size=1,
stride=1, bias=False)
self.bn1 = nn.BatchNorm2d(width)
self.conv2 = nn.Conv2d(in_channels=width, out_channels=width, kernel_size=3,
stride=stride, padding=1, bias=False, groups=groups)
self.bn2 = nn.BatchNorm2d(width)
self.conv3 = nn.Conv2d(in_channels=width, out_channels=out_channel * self.expansion,
kernel_size=1, stride=1, bias=False)
self.bn3 = nn.BatchNorm2d(out_channel * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, blocks_num, reid=False, num_classes=1000, groups=1, width_per_group=64):
super(ResNet, self).__init__()
self.reid = reid
self.in_channel = 64
self.groups = groups
self.width_per_group = width_per_group
self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(self.in_channel)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layers(block, 64, blocks_num[0])
self.layer2 = self._make_layers(block, 128, blocks_num[1], stride=2)
self.layer3 = self._make_layers(block, 256, blocks_num[2], stride=2)
# self.layer4 = self._make_layers(block, 512, blocks_num[3], stride=1)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(256 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def _make_layers(self, block, channel, block_num, stride=1):
downsample = None
if stride != 1 or self.in_channel != channel * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(channel * block.expansion)
)
layers = []
layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride,
groups=self.groups, width_per_group=self.width_per_group))
self.in_channel = channel * block.expansion
for _ in range(1, block_num):
layers.append(block(self.in_channel, channel, groups=self.groups, width_per_group=self.width_per_group))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
# x = self.layer4(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
# B x 512
if self.reid:
x = x.div(x.norm(p=2, dim=1, keepdim=True))
return x
# classifier
x = self.fc(x)
return x
def resnet18(num_classes=1000, reid=False):
# https://download.pytorch.org/models/resnet18-5c106cde.pth
return ResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes, reid=reid)
def resnet34(num_classes=1000, reid=False):
# https://download.pytorch.org/models/resnet34-333f7ec4.pth
return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, reid=reid)
def resnet50(num_classes=1000, reid=False):
# https://download.pytorch.org/models/resnet50-19c8e357.pth
return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, reid=reid)
def resnext50_32x4d(num_classes=1000, reid=False):
# https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth
groups = 32
width_per_group = 4
return ResNet(Bottleneck, [3, 4, 6, 3], reid=reid,
num_classes=num_classes, groups=groups, width_per_group=width_per_group)
if __name__ == '__main__':
net = resnet18(reid=True)
x = torch.randn(4, 3, 128, 64)
y = net(x)

View File

@ -1,77 +0,0 @@
import torch
import torch.backends.cudnn as cudnn
import torchvision
import argparse
import os
from model import Net
parser = argparse.ArgumentParser(description="Train on market1501")
parser.add_argument("--data-dir", default='data', type=str)
parser.add_argument("--no-cuda", action="store_true")
parser.add_argument("--gpu-id", default=0, type=int)
args = parser.parse_args()
# device
device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu"
if torch.cuda.is_available() and not args.no_cuda:
cudnn.benchmark = True
# data loader
root = args.data_dir
query_dir = os.path.join(root, "query")
gallery_dir = os.path.join(root, "gallery")
transform = torchvision.transforms.Compose([
torchvision.transforms.Resize((128, 64)),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
queryloader = torch.utils.data.DataLoader(
torchvision.datasets.ImageFolder(query_dir, transform=transform),
batch_size=64, shuffle=False
)
galleryloader = torch.utils.data.DataLoader(
torchvision.datas0ets.ImageFolder(gallery_dir, transform=transform),
batch_size=64, shuffle=False
)
# net definition
net = Net(reid=True)
assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!"
print('Loading from checkpoint/ckpt.t7')
checkpoint = torch.load("./checkpoint/ckpt.t7")
net_dict = checkpoint['net_dict']
net.load_state_dict(net_dict, strict=False)
net.eval()
net.to(device)
# compute features
query_features = torch.tensor([]).float()
query_labels = torch.tensor([]).long()
gallery_features = torch.tensor([]).float()
gallery_labels = torch.tensor([]).long()
with torch.no_grad():
for idx, (inputs, labels) in enumerate(queryloader):
inputs = inputs.to(device)
features = net(inputs).cpu()
query_features = torch.cat((query_features, features), dim=0)
query_labels = torch.cat((query_labels, labels))
for idx, (inputs, labels) in enumerate(galleryloader):
inputs = inputs.to(device)
features = net(inputs).cpu()
gallery_features = torch.cat((gallery_features, features), dim=0)
gallery_labels = torch.cat((gallery_labels, labels))
gallery_labels -= 2
# save features
features = {
"qf": query_features,
"ql": query_labels,
"gf": gallery_features,
"gl": gallery_labels
}
torch.save(features, "features.pth")

Binary file not shown.

Before

Width:  |  Height:  |  Size: 59 KiB

View File

@ -1,151 +0,0 @@
import argparse
import os
import tempfile
import math
import warnings
import matplotlib.pyplot as plt
import torch
import torchvision
from torch.optim import lr_scheduler
from multi_train_utils.distributed_utils import init_distributed_mode, cleanup
from multi_train_utils.train_eval_utils import train_one_epoch, evaluate, load_model
import torch.distributed as dist
from datasets import ClsDataset, read_split_data
from model import Net
from resnet import resnet18
# plot figure
x_epoch = []
record = {'train_loss': [], 'train_err': [], 'test_loss': [], 'test_err': []}
fig = plt.figure()
ax0 = fig.add_subplot(121, title="loss")
ax1 = fig.add_subplot(122, title="top1_err")
def draw_curve(epoch, train_loss, train_err, test_loss, test_err):
global record
record['train_loss'].append(train_loss)
record['train_err'].append(train_err)
record['test_loss'].append(test_loss)
record['test_err'].append(test_err)
x_epoch.append(epoch)
ax0.plot(x_epoch, record['train_loss'], 'bo-', label='train')
ax0.plot(x_epoch, record['test_loss'], 'ro-', label='val')
ax1.plot(x_epoch, record['train_err'], 'bo-', label='train')
ax1.plot(x_epoch, record['test_err'], 'ro-', label='val')
if epoch == 0:
ax0.legend()
ax1.legend()
fig.savefig("train.jpg")
def main(args):
batch_size = args.batch_size
device = 'cuda:{}'.format(args.gpu_id) if torch.cuda.is_available() else 'cpu'
train_info, val_info, num_classes = read_split_data(args.data_dir, valid_rate=0.2)
train_images_path, train_labels = train_info
val_images_path, val_labels = val_info
transform_train = torchvision.transforms.Compose([
torchvision.transforms.RandomCrop((128, 64), padding=4),
torchvision.transforms.RandomHorizontalFlip(),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
transform_val = torchvision.transforms.Compose([
torchvision.transforms.Resize((128, 64)),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
train_dataset = ClsDataset(
images_path=train_images_path,
images_labels=train_labels,
transform=transform_train
)
val_dataset = ClsDataset(
images_path=val_images_path,
images_labels=val_labels,
transform=transform_val
)
number_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])
print('Using {} dataloader workers every process'.format(number_workers))
train_loader = torch.utils.data.DataLoader(
train_dataset,
batch_size=batch_size,
shuffle=True,
pin_memory=True,
num_workers=number_workers
)
val_loader = torch.utils.data.DataLoader(
val_dataset,
batch_size=batch_size,
shuffle=False,
pin_memory=True,
num_workers=number_workers,
)
# net definition
start_epoch = 0
net = Net(num_classes=num_classes)
if args.weights:
print('Loading from ', args.weights)
checkpoint = torch.load(args.weights, map_location='cpu')
net_dict = checkpoint if 'net_dict' not in checkpoint else checkpoint['net_dict']
start_epoch = checkpoint['epoch'] if 'epoch' in checkpoint else start_epoch
net = load_model(net_dict, net.state_dict(), net)
if args.freeze_layers:
for name, param in net.named_parameters():
if 'classifier' not in name:
param.requires_grad = False
net.to(device)
# loss and optimizer
pg = [p for p in net.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(pg, args.lr, momentum=0.9, weight_decay=5e-4)
lr = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lr)
for epoch in range(start_epoch, start_epoch + args.epochs):
train_positive, train_loss = train_one_epoch(net, optimizer, train_loader, device, epoch)
train_acc = train_positive / len(train_dataset)
scheduler.step()
test_positive, test_loss = evaluate(net, val_loader, device)
test_acc = test_positive / len(val_dataset)
print('[epoch {}] accuracy: {}'.format(epoch, test_acc))
state_dict = {
'net_dict': net.state_dict(),
'acc': test_acc,
'epoch': epoch
}
torch.save(state_dict, './checkpoint/model_{}.pth'.format(epoch))
draw_curve(epoch, train_loss, 1 - train_acc, test_loss, 1 - test_acc)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Train on market1501")
parser.add_argument("--data-dir", default='data', type=str)
parser.add_argument('--epochs', type=int, default=40)
parser.add_argument('--batch_size', type=int, default=32)
parser.add_argument("--lr", default=0.001, type=float)
parser.add_argument('--lrf', default=0.1, type=float)
parser.add_argument('--weights', type=str, default='./checkpoint/resnet18.pth')
parser.add_argument('--freeze-layers', action='store_true')
parser.add_argument('--gpu_id', default='0', help='gpu id')
args = parser.parse_args()
main(args)

View File

@ -1,189 +0,0 @@
import argparse
import os
import tempfile
import math
import warnings
import matplotlib.pyplot as plt
import torch
import torchvision
from torch.optim import lr_scheduler
from multi_train_utils.distributed_utils import init_distributed_mode, cleanup
from multi_train_utils.train_eval_utils import train_one_epoch, evaluate, load_model
import torch.distributed as dist
from datasets import ClsDataset, read_split_data
from resnet import resnet18
# plot figure
x_epoch = []
record = {'train_loss': [], 'train_err': [], 'test_loss': [], 'test_err': []}
fig = plt.figure()
ax0 = fig.add_subplot(121, title="loss")
ax1 = fig.add_subplot(122, title="top1_err")
def draw_curve(epoch, train_loss, train_err, test_loss, test_err):
global record
record['train_loss'].append(train_loss)
record['train_err'].append(train_err)
record['test_loss'].append(test_loss)
record['test_err'].append(test_err)
x_epoch.append(epoch)
ax0.plot(x_epoch, record['train_loss'], 'bo-', label='train')
ax0.plot(x_epoch, record['test_loss'], 'ro-', label='val')
ax1.plot(x_epoch, record['train_err'], 'bo-', label='train')
ax1.plot(x_epoch, record['test_err'], 'ro-', label='val')
if epoch == 0:
ax0.legend()
ax1.legend()
fig.savefig("train.jpg")
def main(args):
init_distributed_mode(args)
rank = args.rank
device = torch.device(args.device)
batch_size = args.batch_size
weights_path = args.weights
args.lr *= args.world_size
checkpoint_path = ''
if rank == 0:
print(args)
if os.path.exists('./checkpoint') is False:
os.mkdir('./checkpoint')
train_info, val_info, num_classes = read_split_data(args.data_dir, valid_rate=0.2)
train_images_path, train_labels = train_info
val_images_path, val_labels = val_info
transform_train = torchvision.transforms.Compose([
torchvision.transforms.RandomCrop((128, 64), padding=4),
torchvision.transforms.RandomHorizontalFlip(),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
transform_val = torchvision.transforms.Compose([
torchvision.transforms.Resize((128, 64)),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
train_dataset = ClsDataset(
images_path=train_images_path,
images_labels=train_labels,
transform=transform_train
)
val_dataset = ClsDataset(
images_path=val_images_path,
images_labels=val_labels,
transform=transform_val
)
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)
train_batch_sampler = torch.utils.data.BatchSampler(train_sampler, batch_size, drop_last=True)
number_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])
if rank == 0:
print('Using {} dataloader workers every process'.format(number_workers))
train_loader = torch.utils.data.DataLoader(
train_dataset,
batch_sampler=train_batch_sampler,
pin_memory=True,
num_workers=number_workers
)
val_loader = torch.utils.data.DataLoader(
val_dataset,
sampler=val_sampler,
batch_size=batch_size,
pin_memory=True,
num_workers=number_workers,
)
# net definition
start_epoch = 0
net = resnet18(num_classes=num_classes)
if args.weights:
print('Loading from ', args.weights)
checkpoint = torch.load(args.weights, map_location='cpu')
net_dict = checkpoint if 'net_dict' not in checkpoint else checkpoint['net_dict']
start_epoch = checkpoint['epoch'] if 'epoch' in checkpoint else start_epoch
net = load_model(net_dict, net.state_dict(), net)
else:
warnings.warn("better providing pretraining weights")
checkpoint_path = os.path.join(tempfile.gettempdir(), 'initial_weights.pth')
if rank == 0:
torch.save(net.state_dict(), checkpoint_path)
dist.barrier()
net.load_state_dict(torch.load(checkpoint_path, map_location='cpu'))
if args.freeze_layers:
for name, param in net.named_parameters():
if 'fc' not in name:
param.requires_grad = False
else:
if args.syncBN:
net = torch.nn.SyncBatchNorm.convert_sync_batchnorm(net)
net.to(device)
net = torch.nn.parallel.DistributedDataParallel(net, device_ids=[args.gpu])
# loss and optimizer
pg = [p for p in net.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(pg, args.lr, momentum=0.9, weight_decay=5e-4)
lr = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lr)
for epoch in range(start_epoch, start_epoch + args.epochs):
train_positive, train_loss = train_one_epoch(net, optimizer, train_loader, device, epoch)
train_acc = train_positive / len(train_dataset)
scheduler.step()
test_positive, test_loss = evaluate(net, val_loader, device)
test_acc = test_positive / len(val_dataset)
if rank == 0:
print('[epoch {}] accuracy: {}'.format(epoch, test_acc))
state_dict = {
'net_dict': net.module.state_dict(),
'acc': test_acc,
'epoch': epoch
}
torch.save(state_dict, './checkpoint/model_{}.pth'.format(epoch))
draw_curve(epoch, train_loss, 1 - train_acc, test_loss, 1 - test_acc)
if rank == 0:
if os.path.exists(checkpoint_path) is True:
os.remove(checkpoint_path)
cleanup()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Train on market1501")
parser.add_argument("--data-dir", default='data', type=str)
parser.add_argument('--epochs', type=int, default=40)
parser.add_argument('--batch_size', type=int, default=32)
parser.add_argument("--lr", default=0.001, type=float)
parser.add_argument('--lrf', default=0.1, type=float)
parser.add_argument('--syncBN', type=bool, default=True)
parser.add_argument('--weights', type=str, default='./checkpoint/resnet18.pth')
parser.add_argument('--freeze-layers', action='store_true')
# not change the following parameters, the system will automatically assignment
parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0, 1 or cpu)')
parser.add_argument('--world_size', default=4, type=int, help='number of distributed processes')
parser.add_argument('--dist_url', default='env://', help='url used to set up distributed training')
args = parser.parse_args()
main(args)

View File

@ -1,121 +0,0 @@
import numpy as np
import torch
from .deep.feature_extractor import Extractor, FastReIDExtractor
from .sort.nn_matching import NearestNeighborDistanceMetric
from .sort.preprocessing import non_max_suppression
from .sort.detection import Detection
from .sort.tracker import Tracker
__all__ = ['DeepSort']
class DeepSort(object):
def __init__(self, model_path, model_config=None, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0,
max_iou_distance=0.7, max_age=70, n_init=3, nn_budget=100, use_cuda=True):
self.min_confidence = min_confidence
self.nms_max_overlap = nms_max_overlap
if model_config is None:
self.extractor = Extractor(model_path, use_cuda=use_cuda)
else:
self.extractor = FastReIDExtractor(model_config, model_path, use_cuda=use_cuda)
max_cosine_distance = max_dist
metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
self.tracker = Tracker(metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init)
def update(self, bbox_xywh, confidences, classes, ori_img, masks=None):
self.height, self.width = ori_img.shape[:2]
# generate detections
features = self._get_features(bbox_xywh, ori_img)
bbox_tlwh = self._xywh_to_tlwh(bbox_xywh)
detections = [Detection(bbox_tlwh[i], conf, label, features[i], None if masks is None else masks[i])
for i, (conf, label) in enumerate(zip(confidences, classes))
if conf > self.min_confidence]
# run on non-maximum supression
boxes = np.array([d.tlwh for d in detections])
scores = np.array([d.confidence for d in detections])
indices = non_max_suppression(boxes, self.nms_max_overlap, scores)
detections = [detections[i] for i in indices]
# update tracker
self.tracker.predict()
self.tracker.update(detections)
# output bbox identities
outputs = []
mask_outputs = []
for track in self.tracker.tracks:
if not track.is_confirmed() or track.time_since_update > 1:
continue
box = track.to_tlwh()
x1, y1, x2, y2 = self._tlwh_to_xyxy(box)
track_id = track.track_id
track_cls = track.cls
outputs.append(np.array([x1, y1, x2, y2, track_cls, track_id], dtype=np.int32))
if track.mask is not None:
mask_outputs.append(track.mask)
if len(outputs) > 0:
outputs = np.stack(outputs, axis=0)
return outputs, mask_outputs
"""
TODO:
Convert bbox from xc_yc_w_h to xtl_ytl_w_h
Thanks JieChen91@github.com for reporting this bug!
"""
@staticmethod
def _xywh_to_tlwh(bbox_xywh):
if isinstance(bbox_xywh, np.ndarray):
bbox_tlwh = bbox_xywh.copy()
elif isinstance(bbox_xywh, torch.Tensor):
bbox_tlwh = bbox_xywh.clone()
bbox_tlwh[:, 0] = bbox_xywh[:, 0] - bbox_xywh[:, 2] / 2.
bbox_tlwh[:, 1] = bbox_xywh[:, 1] - bbox_xywh[:, 3] / 2.
return bbox_tlwh
def _xywh_to_xyxy(self, bbox_xywh):
x, y, w, h = bbox_xywh
x1 = max(int(x - w / 2), 0)
x2 = min(int(x + w / 2), self.width - 1)
y1 = max(int(y - h / 2), 0)
y2 = min(int(y + h / 2), self.height - 1)
return x1, y1, x2, y2
def _tlwh_to_xyxy(self, bbox_tlwh):
"""
TODO:
Convert bbox from xtl_ytl_w_h to xc_yc_w_h
Thanks JieChen91@github.com for reporting this bug!
"""
x, y, w, h = bbox_tlwh
x1 = max(int(x), 0)
x2 = min(int(x + w), self.width - 1)
y1 = max(int(y), 0)
y2 = min(int(y + h), self.height - 1)
return x1, y1, x2, y2
@staticmethod
def _xyxy_to_tlwh(bbox_xyxy):
x1, y1, x2, y2 = bbox_xyxy
t = x1
l = y1
w = int(x2 - x1)
h = int(y2 - y1)
return t, l, w, h
def _get_features(self, bbox_xywh, ori_img):
im_crops = []
for box in bbox_xywh:
x1, y1, x2, y2 = self._xywh_to_xyxy(box)
im = ori_img[y1:y2, x1:x2]
im_crops.append(im)
if im_crops:
features = self.extractor(im_crops)
else:
features = np.array([])
return features

View File

@ -1,51 +0,0 @@
# vim: expandtab:ts=4:sw=4
import numpy as np
class Detection(object):
"""
This class represents a bounding box detection in a single image.
Parameters
----------
tlwh : array_like
Bounding box in format `(x, y, w, h)`.
confidence : float
Detector confidence score.
feature : array_like
A feature vector that describes the object contained in this image.
Attributes
----------
tlwh : ndarray
Bounding box in format `(top left x, top left y, width, height)`.
confidence : ndarray
Detector confidence score.
feature : ndarray | NoneType
A feature vector that describes the object contained in this image.
"""
def __init__(self, tlwh, confidence, label, feature, mask=None):
self.tlwh = np.asarray(tlwh, dtype=np.float32)
self.confidence = float(confidence)
self.cls = int(label)
self.feature = np.asarray(feature, dtype=np.float32)
self.mask = mask
def to_tlbr(self):
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
`(top left, bottom right)`.
"""
ret = self.tlwh.copy()
ret[2:] += ret[:2]
return ret
def to_xyah(self):
"""Convert bounding box to format `(center x, center y, aspect ratio,
height)`, where the aspect ratio is `width / height`.
"""
ret = self.tlwh.copy()
ret[:2] += ret[2:] / 2
ret[2] /= ret[3]
return ret

View File

@ -1,81 +0,0 @@
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
from . import linear_assignment
def iou(bbox, candidates):
"""Computer intersection over union.
Parameters
----------
bbox : ndarray
A bounding box in format `(top left x, top left y, width, height)`.
candidates : ndarray
A matrix of candidate bounding boxes (one per row) in the same format
as `bbox`.
Returns
-------
ndarray
The intersection over union in [0, 1] between the `bbox` and each
candidate. A higher score means a larger fraction of the `bbox` is
occluded by the candidate.
"""
bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
candidates_tl = candidates[:, :2]
candidates_br = candidates[:, :2] + candidates[:, 2:]
tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
wh = np.maximum(0., br - tl)
area_intersection = wh.prod(axis=1)
area_bbox = bbox[2:].prod()
area_candidates = candidates[:, 2:].prod(axis=1)
return area_intersection / (area_bbox + area_candidates - area_intersection)
def iou_cost(tracks, detections, track_indices=None,
detection_indices=None):
"""An intersection over union distance metric.
Parameters
----------
tracks : List[deep_sort.track.Track]
A list of tracks.
detections : List[deep_sort.detection.Detection]
A list of detections.
track_indices : Optional[List[int]]
A list of indices to tracks that should be matched. Defaults to
all `tracks`.
detection_indices : Optional[List[int]]
A list of indices to detections that should be matched. Defaults
to all `detections`.
Returns
-------
ndarray
Returns a cost matrix of shape
len(track_indices), len(detection_indices) where entry (i, j) is
`1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
"""
if track_indices is None:
track_indices = np.arange(len(tracks))
if detection_indices is None:
detection_indices = np.arange(len(detections))
cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
for row, track_idx in enumerate(track_indices):
if tracks[track_idx].time_since_update > 1:
cost_matrix[row, :] = linear_assignment.INFTY_COST
continue
bbox = tracks[track_idx].to_tlwh()
candidates = np.asarray([detections[i].tlwh for i in detection_indices])
cost_matrix[row, :] = 1. - iou(bbox, candidates)
return cost_matrix

View File

@ -1,231 +0,0 @@
# vim: expandtab:ts=4:sw=4
import numpy as np
import scipy.linalg
"""
Table for the 0.95 quantile of the chi-square distribution with N degrees of
freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
function and used as Mahalanobis gating threshold.
"""
chi2inv95 = {
1: 3.8415,
2: 5.9915,
3: 7.8147,
4: 9.4877,
5: 11.070,
6: 12.592,
7: 14.067,
8: 15.507,
9: 16.919}
class KalmanFilter(object):
"""
A simple Kalman filter for tracking bounding boxes in image space.
The 8-dimensional state space
x, y, a, h, vx, vy, va, vh
contains the bounding box center position (x, y), aspect ratio a, height h,
and their respective velocities.
Object motion follows a constant velocity model. The bounding box location
(x, y, a, h) is taken as direct observation of the state space (linear
observation model).
"""
def __init__(self):
ndim, dt = 4, 1.
# Create Kalman filter model matrices.
self._motion_mat = np.eye(2 * ndim, 2 * ndim)
for i in range(ndim):
self._motion_mat[i, ndim + i] = dt
self._update_mat = np.eye(ndim, 2 * ndim)
# Motion and observation uncertainty are chosen relative to the current
# state estimate. These weights control the amount of uncertainty in
# the model. This is a bit hacky.
self._std_weight_position = 1. / 20
self._std_weight_velocity = 1. / 160
def initiate(self, measurement):
"""Create track from unassociated measurement.
Parameters
----------
measurement : ndarray
Bounding box coordinates (x, y, a, h) with center position (x, y),
aspect ratio a, and height h.
Returns
-------
(ndarray, ndarray)
Returns the mean vector (8 dimensional) and covariance matrix (8x8
dimensional) of the new track. Unobserved velocities are initialized
to 0 mean.
"""
mean_pos = measurement
mean_vel = np.zeros_like(mean_pos)
mean = np.r_[mean_pos, mean_vel]
std = [
2 * self._std_weight_position * measurement[3],
2 * self._std_weight_position * measurement[3],
1e-2,
2 * self._std_weight_position * measurement[3],
10 * self._std_weight_velocity * measurement[3],
10 * self._std_weight_velocity * measurement[3],
1e-5,
10 * self._std_weight_velocity * measurement[3]]
covariance = np.diag(np.square(std))
return mean, covariance
def predict(self, mean, covariance):
"""Run Kalman filter prediction step.
Parameters
----------
mean : ndarray
The 8 dimensional mean vector of the object state at the previous
time step.
covariance : ndarray
The 8x8 dimensional covariance matrix of the object state at the
previous time step.
Returns
-------
(ndarray, ndarray)
Returns the mean vector and covariance matrix of the predicted
state. Unobserved velocities are initialized to 0 mean.
"""
std_pos = [
self._std_weight_position * mean[3],
self._std_weight_position * mean[3],
1e-2,
self._std_weight_position * mean[3]]
std_vel = [
self._std_weight_velocity * mean[3],
self._std_weight_velocity * mean[3],
1e-5,
self._std_weight_velocity * mean[3]]
motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
mean = np.dot(self._motion_mat, mean)
covariance = np.linalg.multi_dot((
self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
return mean, covariance
def project(self, mean, covariance):
"""Project state distribution to measurement space.
Parameters
----------
mean : ndarray
The state's mean vector (8 dimensional array).
covariance : ndarray
The state's covariance matrix (8x8 dimensional).
Returns
-------
(ndarray, ndarray)
Returns the projected mean and covariance matrix of the given state
estimate.
"""
std = [
self._std_weight_position * mean[3],
self._std_weight_position * mean[3],
1e-1,
self._std_weight_position * mean[3]]
innovation_cov = np.diag(np.square(std))
mean = np.dot(self._update_mat, mean)
covariance = np.linalg.multi_dot((
self._update_mat, covariance, self._update_mat.T))
return mean, covariance + innovation_cov
def update(self, mean, covariance, measurement):
"""Run Kalman filter correction step.
Parameters
----------
mean : ndarray
The predicted state's mean vector (8 dimensional).
covariance : ndarray
The state's covariance matrix (8x8 dimensional).
measurement : ndarray
The 4 dimensional measurement vector (x, y, a, h), where (x, y)
is the center position, a the aspect ratio, and h the height of the
bounding box.
Returns
-------
(ndarray, ndarray)
Returns the measurement-corrected state distribution.
"""
projected_mean, projected_cov = self.project(mean, covariance)
chol_factor, lower = scipy.linalg.cho_factor(
projected_cov, lower=True, check_finite=False)
kalman_gain = scipy.linalg.cho_solve(
(chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
check_finite=False).T
innovation = measurement - projected_mean
new_mean = mean + np.dot(innovation, kalman_gain.T)
# new_covariance = covariance - np.linalg.multi_dot((
# kalman_gain, projected_cov, kalman_gain.T))
new_covariance = covariance - np.linalg.multi_dot((
kalman_gain, self._update_mat, covariance))
return new_mean, new_covariance
def gating_distance(self, mean, covariance, measurements,
only_position=False):
"""Compute gating distance between state distribution and measurements.
A suitable distance threshold can be obtained from `chi2inv95`. If
`only_position` is False, the chi-square distribution has 4 degrees of
freedom, otherwise 2.
Parameters
----------
mean : ndarray
Mean vector over the state distribution (8 dimensional).
covariance : ndarray
Covariance of the state distribution (8x8 dimensional).
measurements : ndarray
An Nx4 dimensional matrix of N measurements, each in
format (x, y, a, h) where (x, y) is the bounding box center
position, a the aspect ratio, and h the height.
only_position : Optional[bool]
If True, distance computation is done with respect to the bounding
box center position only.
Returns
-------
ndarray
Returns an array of length N, where the i-th element contains the
squared Mahalanobis distance between (mean, covariance) and
`measurements[i]`.
"""
mean, covariance = self.project(mean, covariance)
if only_position:
mean, covariance = mean[:2], covariance[:2, :2]
measurements = measurements[:, :2]
cholesky_factor = np.linalg.cholesky(covariance)
d = measurements - mean
z = scipy.linalg.solve_triangular(
cholesky_factor, d.T, lower=True, check_finite=False,
overwrite_b=True)
squared_maha = np.sum(z * z, axis=0)
return squared_maha

View File

@ -1,192 +0,0 @@
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
# from sklearn.utils.linear_assignment_ import linear_assignment
from scipy.optimize import linear_sum_assignment as linear_assignment
from . import kalman_filter
INFTY_COST = 1e+5
def min_cost_matching(
distance_metric, max_distance, tracks, detections, track_indices=None,
detection_indices=None):
"""Solve linear assignment problem.
Parameters
----------
distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
The distance metric is given a list of tracks and detections as well as
a list of N track indices and M detection indices. The metric should
return the NxM dimensional cost matrix, where element (i, j) is the
association cost between the i-th track in the given track indices and
the j-th detection in the given detection_indices.
max_distance : float
Gating threshold. Associations with cost larger than this value are
disregarded.
tracks : List[track.Track]
A list of predicted tracks at the current time step.
detections : List[detection.Detection]
A list of detections at the current time step.
track_indices : List[int]
List of track indices that maps rows in `cost_matrix` to tracks in
`tracks` (see description above).
detection_indices : List[int]
List of detection indices that maps columns in `cost_matrix` to
detections in `detections` (see description above).
Returns
-------
(List[(int, int)], List[int], List[int])
Returns a tuple with the following three entries:
* A list of matched track and detection indices.
* A list of unmatched track indices.
* A list of unmatched detection indices.
"""
if track_indices is None:
track_indices = np.arange(len(tracks))
if detection_indices is None:
detection_indices = np.arange(len(detections))
if len(detection_indices) == 0 or len(track_indices) == 0:
return [], track_indices, detection_indices # Nothing to match.
cost_matrix = distance_metric(
tracks, detections, track_indices, detection_indices)
cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
row_indices, col_indices = linear_assignment(cost_matrix)
matches, unmatched_tracks, unmatched_detections = [], [], []
for col, detection_idx in enumerate(detection_indices):
if col not in col_indices:
unmatched_detections.append(detection_idx)
for row, track_idx in enumerate(track_indices):
if row not in row_indices:
unmatched_tracks.append(track_idx)
for row, col in zip(row_indices, col_indices):
track_idx = track_indices[row]
detection_idx = detection_indices[col]
if cost_matrix[row, col] > max_distance:
unmatched_tracks.append(track_idx)
unmatched_detections.append(detection_idx)
else:
matches.append((track_idx, detection_idx))
return matches, unmatched_tracks, unmatched_detections
def matching_cascade(
distance_metric, max_distance, cascade_depth, tracks, detections,
track_indices=None, detection_indices=None):
"""Run matching cascade.
Parameters
----------
distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
The distance metric is given a list of tracks and detections as well as
a list of N track indices and M detection indices. The metric should
return the NxM dimensional cost matrix, where element (i, j) is the
association cost between the i-th track in the given track indices and
the j-th detection in the given detection indices.
max_distance : float
Gating threshold. Associations with cost larger than this value are
disregarded.
cascade_depth: int
The cascade depth, should be se to the maximum track age.
tracks : List[track.Track]
A list of predicted tracks at the current time step.
detections : List[detection.Detection]
A list of detections at the current time step.
track_indices : Optional[List[int]]
List of track indices that maps rows in `cost_matrix` to tracks in
`tracks` (see description above). Defaults to all tracks.
detection_indices : Optional[List[int]]
List of detection indices that maps columns in `cost_matrix` to
detections in `detections` (see description above). Defaults to all
detections.
Returns
-------
(List[(int, int)], List[int], List[int])
Returns a tuple with the following three entries:
* A list of matched track and detection indices.
* A list of unmatched track indices.
* A list of unmatched detection indices.
"""
if track_indices is None:
track_indices = list(range(len(tracks)))
if detection_indices is None:
detection_indices = list(range(len(detections)))
unmatched_detections = detection_indices
matches = []
for level in range(cascade_depth):
if len(unmatched_detections) == 0: # No detections left
break
track_indices_l = [
k for k in track_indices
if tracks[k].time_since_update == 1 + level
]
if len(track_indices_l) == 0: # Nothing to match at this level
continue
matches_l, _, unmatched_detections = \
min_cost_matching(
distance_metric, max_distance, tracks, detections,
track_indices_l, unmatched_detections)
matches += matches_l
unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
return matches, unmatched_tracks, unmatched_detections
def gate_cost_matrix(
kf, cost_matrix, tracks, detections, track_indices, detection_indices,
gated_cost=INFTY_COST, only_position=False):
"""Invalidate infeasible entries in cost matrix based on the state
distributions obtained by Kalman filtering.
Parameters
----------
kf : The Kalman filter.
cost_matrix : ndarray
The NxM dimensional cost matrix, where N is the number of track indices
and M is the number of detection indices, such that entry (i, j) is the
association cost between `tracks[track_indices[i]]` and
`detections[detection_indices[j]]`.
tracks : List[track.Track]
A list of predicted tracks at the current time step.
detections : List[detection.Detection]
A list of detections at the current time step.
track_indices : List[int]
List of track indices that maps rows in `cost_matrix` to tracks in
`tracks` (see description above).
detection_indices : List[int]
List of detection indices that maps columns in `cost_matrix` to
detections in `detections` (see description above).
gated_cost : Optional[float]
Entries in the cost matrix corresponding to infeasible associations are
set this value. Defaults to a very large value.
only_position : Optional[bool]
If True, only the x, y position of the state distribution is considered
during gating. Defaults to False.
Returns
-------
ndarray
Returns the modified cost matrix.
"""
gating_dim = 2 if only_position else 4
gating_threshold = kalman_filter.chi2inv95[gating_dim]
measurements = np.asarray(
[detections[i].to_xyah() for i in detection_indices])
for row, track_idx in enumerate(track_indices):
track = tracks[track_idx]
gating_distance = kf.gating_distance(
track.mean, track.covariance, measurements, only_position)
cost_matrix[row, gating_distance > gating_threshold] = gated_cost
return cost_matrix

View File

@ -1,176 +0,0 @@
# vim: expandtab:ts=4:sw=4
import numpy as np
def _pdist(a, b):
"""Compute pair-wise squared distance between points in `a` and `b`.
Parameters
----------
a : array_like
An NxM matrix of N samples of dimensionality M.
b : array_like
An LxM matrix of L samples of dimensionality M.
Returns
-------
ndarray
Returns a matrix of size len(a), len(b) such that eleement (i, j)
contains the squared distance between `a[i]` and `b[j]`.
"""
a, b = np.asarray(a), np.asarray(b)
if len(a) == 0 or len(b) == 0:
return np.zeros((len(a), len(b)))
a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
r2 = np.clip(r2, 0., float(np.inf))
return r2
def _cosine_distance(a, b, data_is_normalized=False):
"""Compute pair-wise cosine distance between points in `a` and `b`.
Parameters
----------
a : array_like
An NxM matrix of N samples of dimensionality M.
b : array_like
An LxM matrix of L samples of dimensionality M.
data_is_normalized : Optional[bool]
If True, assumes rows in a and b are unit length vectors.
Otherwise, a and b are explicitly normalized to lenght 1.
Returns
-------
ndarray
Returns a matrix of size len(a), len(b) such that eleement (i, j)
contains the squared distance between `a[i]` and `b[j]`.
"""
if not data_is_normalized:
a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
return 1. - np.dot(a, b.T)
def _nn_euclidean_distance(x, y):
""" Helper function for nearest neighbor distance metric (Euclidean).
Parameters
----------
x : ndarray
A matrix of N row-vectors (sample points).
y : ndarray
A matrix of M row-vectors (query points).
Returns
-------
ndarray
A vector of length M that contains for each entry in `y` the
smallest Euclidean distance to a sample in `x`.
"""
distances = _pdist(x, y)
return np.maximum(0.0, distances.min(axis=0))
def _nn_cosine_distance(x, y):
""" Helper function for nearest neighbor distance metric (cosine).
Parameters
----------
x : ndarray
A matrix of N row-vectors (sample points).
y : ndarray
A matrix of M row-vectors (query points).
Returns
-------
ndarray
A vector of length M that contains for each entry in `y` the
smallest cosine distance to a sample in `x`.
"""
distances = _cosine_distance(x, y)
return distances.min(axis=0)
class NearestNeighborDistanceMetric(object):
"""
A nearest neighbor distance metric that, for each target, returns
the closest distance to any sample that has been observed so far.
Parameters
----------
metric : str
Either "euclidean" or "cosine".
matching_threshold: float
The matching threshold. Samples with larger distance are considered an
invalid match.
budget : Optional[int]
If not None, fix samples per class to at most this number. Removes
the oldest samples when the budget is reached.
Attributes
----------
samples : Dict[int -> List[ndarray]]
A dictionary that maps from target identities to the list of samples
that have been observed so far.
"""
def __init__(self, metric, matching_threshold, budget=None):
if metric == "euclidean":
self._metric = _nn_euclidean_distance
elif metric == "cosine":
self._metric = _nn_cosine_distance
else:
raise ValueError(
"Invalid metric; must be either 'euclidean' or 'cosine'")
self.matching_threshold = matching_threshold
self.budget = budget
self.samples = {}
def partial_fit(self, features, targets, active_targets):
"""Update the distance metric with new data.
Parameters
----------
features : ndarray
An NxM matrix of N features of dimensionality M.
targets : ndarray
An integer array of associated target identities.
active_targets : List[int]
A list of targets that are currently present in the scene.
"""
for feature, target in zip(features, targets):
self.samples.setdefault(target, []).append(feature)
if self.budget is not None:
self.samples[target] = self.samples[target][-self.budget:]
self.samples = {k: self.samples[k] for k in active_targets}
def distance(self, features, targets):
"""Compute distance between features and targets.
Parameters
----------
features : ndarray
An NxM matrix of N features of dimensionality M.
targets : List[int]
A list of targets to match the given `features` against.
Returns
-------
ndarray
Returns a cost matrix of shape len(targets), len(features), where
element (i, j) contains the closest squared distance between
`targets[i]` and `features[j]`.
"""
cost_matrix = np.zeros((len(targets), len(features)))
for i, target in enumerate(targets):
cost_matrix[i, :] = self._metric(self.samples[target], features)
return cost_matrix

View File

@ -1,73 +0,0 @@
# vim: expandtab:ts=4:sw=4
import numpy as np
import cv2
def non_max_suppression(boxes, max_bbox_overlap, scores=None):
"""Suppress overlapping detections.
Original code from [1]_ has been adapted to include confidence score.
.. [1] http://www.pyimagesearch.com/2015/02/16/
faster-non-maximum-suppression-python/
Examples
--------
>>> boxes = [d.roi for d in detections]
>>> scores = [d.confidence for d in detections]
>>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
>>> detections = [detections[i] for i in indices]
Parameters
----------
boxes : ndarray
Array of ROIs (x, y, width, height).
max_bbox_overlap : float
ROIs that overlap more than this values are suppressed.
scores : Optional[array_like]
Detector confidence score.
Returns
-------
List[int]
Returns indices of detections that have survived non-maxima suppression.
"""
if len(boxes) == 0:
return []
boxes = boxes.astype(np.float32)
pick = []
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2] + boxes[:, 0]
y2 = boxes[:, 3] + boxes[:, 1]
area = (x2 - x1 + 1) * (y2 - y1 + 1)
if scores is not None:
idxs = np.argsort(scores)
else:
idxs = np.argsort(y2)
while len(idxs) > 0:
last = len(idxs) - 1
i = idxs[last]
pick.append(i)
xx1 = np.maximum(x1[i], x1[idxs[:last]])
yy1 = np.maximum(y1[i], y1[idxs[:last]])
xx2 = np.minimum(x2[i], x2[idxs[:last]])
yy2 = np.minimum(y2[i], y2[idxs[:last]])
w = np.maximum(0, xx2 - xx1 + 1)
h = np.maximum(0, yy2 - yy1 + 1)
overlap = (w * h) / (area[idxs[:last]] + area[idxs[last]] - w * h)
idxs = np.delete(
idxs, np.concatenate(
([last], np.where(overlap > max_bbox_overlap)[0])))
return pick

View File

@ -1,169 +0,0 @@
# vim: expandtab:ts=4:sw=4
class TrackState:
"""
Enumeration type for the single target track state. Newly created tracks are
classified as `tentative` until enough evidence has been collected. Then,
the track state is changed to `confirmed`. Tracks that are no longer alive
are classified as `deleted` to mark them for removal from the set of active
tracks.
"""
Tentative = 1
Confirmed = 2
Deleted = 3
class Track:
"""
A single target track with state space `(x, y, a, h)` and associated
velocities, where `(x, y)` is the center of the bounding box, `a` is the
aspect ratio and `h` is the height.
Parameters
----------
mean : ndarray
Mean vector of the initial state distribution.
covariance : ndarray
Covariance matrix of the initial state distribution.
track_id : int
A unique track identifier.
n_init : int
Number of consecutive detections before the track is confirmed. The
track state is set to `Deleted` if a miss occurs within the first
`n_init` frames.
max_age : int
The maximum number of consecutive misses before the track state is
set to `Deleted`.
feature : Optional[ndarray]
Feature vector of the detection this track originates from. If not None,
this feature is added to the `features` cache.
Attributes
----------
mean : ndarray
Mean vector of the initial state distribution.
covariance : ndarray
Covariance matrix of the initial state distribution.
track_id : int
A unique track identifier.
hits : int
Total number of measurement updates.
age : int
Total number of frames since first occurance.
time_since_update : int
Total number of frames since last measurement update.
state : TrackState
The current track state.
features : List[ndarray]
A cache of features. On each measurement update, the associated feature
vector is added to this list.
"""
def __init__(self, mean, covariance, track_id, n_init, max_age,
feature=None, cls=None, mask=None):
self.mean = mean
self.covariance = covariance
self.track_id = track_id
self.hits = 1
self.age = 1
self.time_since_update = 0
self.state = TrackState.Tentative
self.cls = cls
self.mask = mask
self.features = []
if feature is not None:
self.features.append(feature)
self._n_init = n_init
self._max_age = max_age
def to_tlwh(self):
"""Get current position in bounding box format `(top left x, top left y,
width, height)`.
Returns
-------
ndarray
The bounding box.
"""
ret = self.mean[:4].copy()
ret[2] *= ret[3]
ret[:2] -= ret[2:] / 2
return ret
def to_tlbr(self):
"""Get current position in bounding box format `(min x, miny, max x,
max y)`.
Returns
-------
ndarray
The bounding box.
"""
ret = self.to_tlwh()
ret[2:] = ret[:2] + ret[2:]
return ret
def predict(self, kf):
"""Propagate the state distribution to the current time step using a
Kalman filter prediction step.
Parameters
----------
kf : kalman_filter.KalmanFilter
The Kalman filter.
"""
self.mean, self.covariance = kf.predict(self.mean, self.covariance)
self.age += 1
self.time_since_update += 1
def update(self, kf, detection):
"""Perform Kalman filter measurement update step and update the feature
cache.
Parameters
----------
kf : kalman_filter.KalmanFilter
The Kalman filter.
detection : Detection
The associated detection.
"""
self.mask = detection.mask
self.mean, self.covariance = kf.update(
self.mean, self.covariance, detection.to_xyah())
self.features.append(detection.feature)
self.hits += 1
self.time_since_update = 0
if self.state == TrackState.Tentative and self.hits >= self._n_init:
self.state = TrackState.Confirmed
def mark_missed(self):
"""Mark this track as missed (no association at the current time step).
"""
if self.state == TrackState.Tentative:
self.state = TrackState.Deleted
elif self.time_since_update > self._max_age:
self.state = TrackState.Deleted
def is_tentative(self):
"""Returns True if this track is tentative (unconfirmed).
"""
return self.state == TrackState.Tentative
def is_confirmed(self):
"""Returns True if this track is confirmed."""
return self.state == TrackState.Confirmed
def is_deleted(self):
"""Returns True if this track is dead and should be deleted."""
return self.state == TrackState.Deleted

View File

@ -1,138 +0,0 @@
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
from . import kalman_filter
from . import linear_assignment
from . import iou_matching
from .track import Track
class Tracker:
"""
This is the multi-target tracker.
Parameters
----------
metric : nn_matching.NearestNeighborDistanceMetric
A distance metric for measurement-to-track association.
max_age : int
Maximum number of missed misses before a track is deleted.
n_init : int
Number of consecutive detections before the track is confirmed. The
track state is set to `Deleted` if a miss occurs within the first
`n_init` frames.
Attributes
----------
metric : nn_matching.NearestNeighborDistanceMetric
The distance metric used for measurement to track association.
max_age : int
Maximum number of missed misses before a track is deleted.
n_init : int
Number of frames that a track remains in initialization phase.
kf : kalman_filter.KalmanFilter
A Kalman filter to filter target trajectories in image space.
tracks : List[Track]
The list of active tracks at the current time step.
"""
def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3):
self.metric = metric
self.max_iou_distance = max_iou_distance
self.max_age = max_age
self.n_init = n_init
self.kf = kalman_filter.KalmanFilter()
self.tracks = []
self._next_id = 1
def predict(self):
"""Propagate track state distributions one time step forward.
This function should be called once every time step, before `update`.
"""
for track in self.tracks:
track.predict(self.kf)
def update(self, detections):
"""Perform measurement update and track management.
Parameters
----------
detections : List[deep_sort.detection.Detection]
A list of detections at the current time step.
"""
# Run matching cascade.
matches, unmatched_tracks, unmatched_detections = \
self._match(detections)
# Update track set.
for track_idx, detection_idx in matches:
self.tracks[track_idx].update(
self.kf, detections[detection_idx])
for track_idx in unmatched_tracks:
self.tracks[track_idx].mark_missed()
for detection_idx in unmatched_detections:
self._initiate_track(detections[detection_idx])
self.tracks = [t for t in self.tracks if not t.is_deleted()]
# Update distance metric.
active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
features, targets = [], []
for track in self.tracks:
if not track.is_confirmed():
continue
features += track.features
targets += [track.track_id for _ in track.features]
track.features = []
self.metric.partial_fit(
np.asarray(features), np.asarray(targets), active_targets)
def _match(self, detections):
def gated_metric(tracks, dets, track_indices, detection_indices):
features = np.array([dets[i].feature for i in detection_indices])
targets = np.array([tracks[i].track_id for i in track_indices])
cost_matrix = self.metric.distance(features, targets)
cost_matrix = linear_assignment.gate_cost_matrix(
self.kf, cost_matrix, tracks, dets, track_indices,
detection_indices)
return cost_matrix
# Split track set into confirmed and unconfirmed tracks.
confirmed_tracks = [
i for i, t in enumerate(self.tracks) if t.is_confirmed()]
unconfirmed_tracks = [
i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
# Associate confirmed tracks using appearance features.
matches_a, unmatched_tracks_a, unmatched_detections = \
linear_assignment.matching_cascade(
gated_metric, self.metric.matching_threshold, self.max_age,
self.tracks, detections, confirmed_tracks)
# Associate remaining tracks together with unconfirmed tracks using IOU.
iou_track_candidates = unconfirmed_tracks + [
k for k in unmatched_tracks_a if
self.tracks[k].time_since_update == 1]
unmatched_tracks_a = [
k for k in unmatched_tracks_a if
self.tracks[k].time_since_update != 1]
matches_b, unmatched_tracks_b, unmatched_detections = \
linear_assignment.min_cost_matching(
iou_matching.iou_cost, self.max_iou_distance, self.tracks,
detections, iou_track_candidates, unmatched_detections)
matches = matches_a + matches_b
unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
return matches, unmatched_tracks, unmatched_detections
def _initiate_track(self, detection):
mean, covariance = self.kf.initiate(detection.to_xyah())
self.tracks.append(Track(
mean, covariance, self._next_id, self.n_init, self.max_age,
detection.feature, detection.cls, detection.mask))
self._next_id += 1

View File

@ -1,2 +0,0 @@
def datasets():
return None

View File

@ -1,13 +0,0 @@
from os import environ
def assert_in(file, files_to_check):
if file not in files_to_check:
raise AssertionError("{} does not exist in the list".format(str(file)))
return True
def assert_in_env(check_list: list):
for item in check_list:
assert_in(item, environ.keys())
return True

View File

@ -1,51 +0,0 @@
import numpy as np
import cv2
palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)
def compute_color_for_labels(label):
"""
Simple function that adds fixed color depending on the class
"""
color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
return tuple(color)
def draw_masks(image, mask, color, thresh: float = 0.7, alpha: float = 0.5):
np_image = np.asarray(image)
mask = mask > thresh
color = np.asarray(color)
img_to_draw = np.copy(np_image)
# TODO: There might be a way to vectorize this
img_to_draw[mask] = color
out = np_image * (1 - alpha) + img_to_draw * alpha
return out.astype(np.uint8)
def draw_boxes(img, bbox, names=None, identities=None, masks=None, offset=(0, 0)):
for i, box in enumerate(bbox):
x1, y1, x2, y2 = [int(i) for i in box]
x1 += offset[0]
x2 += offset[0]
y1 += offset[1]
y2 += offset[1]
# box text and bar
id = int(identities[i]) if identities is not None else 0
color = compute_color_for_labels(id)
label = '{:}{:d}'.format(names[i], id)
t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
if masks is not None:
mask = masks[i]
img = draw_masks(img, mask, color)
cv2.rectangle(img, (x1, y1), (x2, y2), color, 3)
cv2.rectangle(img, (x1, y1), (x1 + t_size[0] + 3, y1 + t_size[1] + 4), color, -1)
cv2.putText(img, label, (x1, y1 + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2)
return img
if __name__ == '__main__':
for i in range(82):
print(compute_color_for_labels(i))

View File

@ -1,103 +0,0 @@
import os
import numpy as np
import copy
import motmetrics as mm
mm.lap.default_solver = 'lap'
from utils.io import read_results, unzip_objs
class Evaluator(object):
def __init__(self, data_root, seq_name, data_type):
self.data_root = data_root
self.seq_name = seq_name
self.data_type = data_type
self.load_annotations()
self.reset_accumulator()
def load_annotations(self):
assert self.data_type == 'mot'
gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt')
self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True)
self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True)
def reset_accumulator(self):
self.acc = mm.MOTAccumulator(auto_id=True)
def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
# results
trk_tlwhs = np.copy(trk_tlwhs)
trk_ids = np.copy(trk_ids)
# gts
gt_objs = self.gt_frame_dict.get(frame_id, [])
gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
# ignore boxes
ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
ignore_tlwhs = unzip_objs(ignore_objs)[0]
# remove ignored results
keep = np.ones(len(trk_tlwhs), dtype=bool)
iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
if len(iou_distance) > 0:
match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
match_ious = iou_distance[match_is, match_js]
match_js = np.asarray(match_js, dtype=int)
match_js = match_js[np.logical_not(np.isnan(match_ious))]
keep[match_js] = False
trk_tlwhs = trk_tlwhs[keep]
trk_ids = trk_ids[keep]
# get distance matrix
iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
# acc
self.acc.update(gt_ids, trk_ids, iou_distance)
if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'):
events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics
else:
events = None
return events
def eval_file(self, filename):
self.reset_accumulator()
result_frame_dict = read_results(filename, self.data_type, is_gt=False)
frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys())))
for frame_id in frames:
trk_objs = result_frame_dict.get(frame_id, [])
trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
return self.acc
@staticmethod
def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
names = copy.deepcopy(names)
if metrics is None:
metrics = mm.metrics.motchallenge_metrics
metrics = copy.deepcopy(metrics)
mh = mm.metrics.create()
summary = mh.compute_many(
accs,
metrics=metrics,
names=names,
generate_overall=True
)
return summary
@staticmethod
def save_summary(summary, filename):
import pandas as pd
writer = pd.ExcelWriter(filename)
summary.to_excel(writer)
writer.save()

View File

@ -1,133 +0,0 @@
import os
from typing import Dict
import numpy as np
# from utils.log import get_logger
def write_results(filename, results, data_type):
if data_type == 'mot':
save_format = '{frame},{id},{cls},{x1},{y1},{w},{h},-1,-1,-1,-1\n'
elif data_type == 'kitti':
save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
else:
raise ValueError(data_type)
with open(filename, 'w') as f:
for frame_id, tlwhs, track_ids, classes in results:
if data_type == 'kitti':
frame_id -= 1
for tlwh, track_id, cls_id in zip(tlwhs, track_ids, classes):
if track_id < 0:
continue
x1, y1, w, h = tlwh
x2, y2 = x1 + w, y1 + h
line = save_format.format(frame=frame_id, id=track_id, cls=cls_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h)
f.write(line)
# def write_results(filename, results_dict: Dict, data_type: str):
# if not filename:
# return
# path = os.path.dirname(filename)
# if not os.path.exists(path):
# os.makedirs(path)
# if data_type in ('mot', 'mcmot', 'lab'):
# save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
# elif data_type == 'kitti':
# save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n'
# else:
# raise ValueError(data_type)
# with open(filename, 'w') as f:
# for frame_id, frame_data in results_dict.items():
# if data_type == 'kitti':
# frame_id -= 1
# for tlwh, track_id in frame_data:
# if track_id < 0:
# continue
# x1, y1, w, h = tlwh
# x2, y2 = x1 + w, y1 + h
# line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0)
# f.write(line)
# logger.info('Save results to {}'.format(filename))
def read_results(filename, data_type: str, is_gt=False, is_ignore=False):
if data_type in ('mot', 'lab'):
read_fun = read_mot_results
else:
raise ValueError('Unknown data type: {}'.format(data_type))
return read_fun(filename, is_gt, is_ignore)
"""
labels={'ped', ... % 1
'person_on_vhcl', ... % 2
'car', ... % 3
'bicycle', ... % 4
'mbike', ... % 5
'non_mot_vhcl', ... % 6
'static_person', ... % 7
'distractor', ... % 8
'occluder', ... % 9
'occluder_on_grnd', ... %10
'occluder_full', ... % 11
'reflection', ... % 12
'crowd' ... % 13
};
"""
def read_mot_results(filename, is_gt, is_ignore):
valid_labels = {1}
ignore_labels = {2, 7, 8, 12}
results_dict = dict()
if os.path.isfile(filename):
with open(filename, 'r') as f:
for line in f.readlines():
linelist = line.split(',')
if len(linelist) < 7:
continue
fid = int(linelist[0])
if fid < 1:
continue
results_dict.setdefault(fid, list())
if is_gt:
if 'MOT16-' in filename or 'MOT17-' in filename:
label = int(float(linelist[7]))
mark = int(float(linelist[6]))
if mark == 0 or label not in valid_labels:
continue
score = 1
elif is_ignore:
if 'MOT16-' in filename or 'MOT17-' in filename:
label = int(float(linelist[7]))
vis_ratio = float(linelist[8])
if label not in ignore_labels and vis_ratio >= 0:
continue
else:
continue
score = 1
else:
score = float(linelist[6])
tlwh = tuple(map(float, linelist[2:6]))
target_id = int(linelist[1])
results_dict[fid].append((tlwh, target_id, score))
return results_dict
def unzip_objs(objs):
if len(objs) > 0:
tlwhs, ids, scores = zip(*objs)
else:
tlwhs, ids, scores = [], [], []
tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
return tlwhs, ids, scores

View File

@ -1,383 +0,0 @@
"""
References:
https://medium.com/analytics-vidhya/creating-a-custom-logging-mechanism-for-real-time-object-detection-using-tdd-4ca2cfcd0a2f
"""
import json
from os import makedirs
from os.path import exists, join
from datetime import datetime
class JsonMeta(object):
HOURS = 3
MINUTES = 59
SECONDS = 59
PATH_TO_SAVE = 'LOGS'
DEFAULT_FILE_NAME = 'remaining'
class BaseJsonLogger(object):
"""
This is the base class that returns __dict__ of its own
it also returns the dicts of objects in the attributes that are list instances
"""
def dic(self):
# returns dicts of objects
out = {}
for k, v in self.__dict__.items():
if hasattr(v, 'dic'):
out[k] = v.dic()
elif isinstance(v, list):
out[k] = self.list(v)
else:
out[k] = v
return out
@staticmethod
def list(values):
# applies the dic method on items in the list
return [v.dic() if hasattr(v, 'dic') else v for v in values]
class Label(BaseJsonLogger):
"""
For each bounding box there are various categories with confidences. Label class keeps track of that information.
"""
def __init__(self, category: str, confidence: float):
self.category = category
self.confidence = confidence
class Bbox(BaseJsonLogger):
"""
This module stores the information for each frame and use them in JsonParser
Attributes:
labels (list): List of label module.
top (int):
left (int):
width (int):
height (int):
Args:
bbox_id (float):
top (int):
left (int):
width (int):
height (int):
References:
Check Label module for better understanding.
"""
def __init__(self, bbox_id, top, left, width, height):
self.labels = []
self.bbox_id = bbox_id
self.top = top
self.left = left
self.width = width
self.height = height
def add_label(self, category, confidence):
# adds category and confidence only if top_k is not exceeded.
self.labels.append(Label(category, confidence))
def labels_full(self, value):
return len(self.labels) == value
class Frame(BaseJsonLogger):
"""
This module stores the information for each frame and use them in JsonParser
Attributes:
timestamp (float): The elapsed time of captured frame
frame_id (int): The frame number of the captured video
bboxes (list of Bbox objects): Stores the list of bbox objects.
References:
Check Bbox class for better information
Args:
timestamp (float):
frame_id (int):
"""
def __init__(self, frame_id: int, timestamp: float = None):
self.frame_id = frame_id
self.timestamp = timestamp
self.bboxes = []
def add_bbox(self, bbox_id: int, top: int, left: int, width: int, height: int):
bboxes_ids = [bbox.bbox_id for bbox in self.bboxes]
if bbox_id not in bboxes_ids:
self.bboxes.append(Bbox(bbox_id, top, left, width, height))
else:
raise ValueError("Frame with id: {} already has a Bbox with id: {}".format(self.frame_id, bbox_id))
def add_label_to_bbox(self, bbox_id: int, category: str, confidence: float):
bboxes = {bbox.id: bbox for bbox in self.bboxes}
if bbox_id in bboxes.keys():
res = bboxes.get(bbox_id)
res.add_label(category, confidence)
else:
raise ValueError('the bbox with id: {} does not exists!'.format(bbox_id))
class BboxToJsonLogger(BaseJsonLogger):
"""
ُ This module is designed to automate the task of logging jsons. An example json is used
to show the contents of json file shortly
Example:
{
"video_details": {
"frame_width": 1920,
"frame_height": 1080,
"frame_rate": 20,
"video_name": "/home/gpu/codes/MSD/pedestrian_2/project/public/camera1.avi"
},
"frames": [
{
"frame_id": 329,
"timestamp": 3365.1254
"bboxes": [
{
"labels": [
{
"category": "pedestrian",
"confidence": 0.9
}
],
"bbox_id": 0,
"top": 1257,
"left": 138,
"width": 68,
"height": 109
}
]
}],
Attributes:
frames (dict): It's a dictionary that maps each frame_id to json attributes.
video_details (dict): information about video file.
top_k_labels (int): shows the allowed number of labels
start_time (datetime object): we use it to automate the json output by time.
Args:
top_k_labels (int): shows the allowed number of labels
"""
def __init__(self, top_k_labels: int = 1):
self.frames = {}
self.video_details = self.video_details = dict(frame_width=None, frame_height=None, frame_rate=None,
video_name=None)
self.top_k_labels = top_k_labels
self.start_time = datetime.now()
def set_top_k(self, value):
self.top_k_labels = value
def frame_exists(self, frame_id: int) -> bool:
"""
Args:
frame_id (int):
Returns:
bool: true if frame_id is recognized
"""
return frame_id in self.frames.keys()
def add_frame(self, frame_id: int, timestamp: float = None) -> None:
"""
Args:
frame_id (int):
timestamp (float): opencv captured frame time property
Raises:
ValueError: if frame_id would not exist in class frames attribute
Returns:
None
"""
if not self.frame_exists(frame_id):
self.frames[frame_id] = Frame(frame_id, timestamp)
else:
raise ValueError("Frame id: {} already exists".format(frame_id))
def bbox_exists(self, frame_id: int, bbox_id: int) -> bool:
"""
Args:
frame_id:
bbox_id:
Returns:
bool: if bbox exists in frame bboxes list
"""
bboxes = []
if self.frame_exists(frame_id=frame_id):
bboxes = [bbox.bbox_id for bbox in self.frames[frame_id].bboxes]
return bbox_id in bboxes
def find_bbox(self, frame_id: int, bbox_id: int):
"""
Args:
frame_id:
bbox_id:
Returns:
bbox_id (int):
Raises:
ValueError: if bbox_id does not exist in the bbox list of specific frame.
"""
if not self.bbox_exists(frame_id, bbox_id):
raise ValueError("frame with id: {} does not contain bbox with id: {}".format(frame_id, bbox_id))
bboxes = {bbox.bbox_id: bbox for bbox in self.frames[frame_id].bboxes}
return bboxes.get(bbox_id)
def add_bbox_to_frame(self, frame_id: int, bbox_id: int, top: int, left: int, width: int, height: int) -> None:
"""
Args:
frame_id (int):
bbox_id (int):
top (int):
left (int):
width (int):
height (int):
Returns:
None
Raises:
ValueError: if bbox_id already exist in frame information with frame_id
ValueError: if frame_id does not exist in frames attribute
"""
if self.frame_exists(frame_id):
frame = self.frames[frame_id]
if not self.bbox_exists(frame_id, bbox_id):
frame.add_bbox(bbox_id, top, left, width, height)
else:
raise ValueError(
"frame with frame_id: {} already contains the bbox with id: {} ".format(frame_id, bbox_id))
else:
raise ValueError("frame with frame_id: {} does not exist".format(frame_id))
def add_label_to_bbox(self, frame_id: int, bbox_id: int, category: str, confidence: float):
"""
Args:
frame_id:
bbox_id:
category:
confidence: the confidence value returned from yolo detection
Returns:
None
Raises:
ValueError: if labels quota (top_k_labels) exceeds.
"""
bbox = self.find_bbox(frame_id, bbox_id)
if not bbox.labels_full(self.top_k_labels):
bbox.add_label(category, confidence)
else:
raise ValueError("labels in frame_id: {}, bbox_id: {} is fulled".format(frame_id, bbox_id))
def add_video_details(self, frame_width: int = None, frame_height: int = None, frame_rate: int = None,
video_name: str = None):
self.video_details['frame_width'] = frame_width
self.video_details['frame_height'] = frame_height
self.video_details['frame_rate'] = frame_rate
self.video_details['video_name'] = video_name
def output(self):
output = {'video_details': self.video_details}
result = list(self.frames.values())
output['frames'] = [item.dic() for item in result]
return output
def json_output(self, output_name):
"""
Args:
output_name:
Returns:
None
Notes:
It creates the json output with `output_name` name.
"""
if not output_name.endswith('.json'):
output_name += '.json'
with open(output_name, 'w') as file:
json.dump(self.output(), file)
file.close()
def set_start(self):
self.start_time = datetime.now()
def schedule_output_by_time(self, output_dir=JsonMeta.PATH_TO_SAVE, hours: int = 0, minutes: int = 0,
seconds: int = 60) -> None:
"""
Notes:
Creates folder and then periodically stores the jsons on that address.
Args:
output_dir (str): the directory where output files will be stored
hours (int):
minutes (int):
seconds (int):
Returns:
None
"""
end = datetime.now()
interval = 0
interval += abs(min([hours, JsonMeta.HOURS]) * 3600)
interval += abs(min([minutes, JsonMeta.MINUTES]) * 60)
interval += abs(min([seconds, JsonMeta.SECONDS]))
diff = (end - self.start_time).seconds
if diff > interval:
output_name = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '.json'
if not exists(output_dir):
makedirs(output_dir)
output = join(output_dir, output_name)
self.json_output(output_name=output)
self.frames = {}
self.start_time = datetime.now()
def schedule_output_by_frames(self, frames_quota, frame_counter, output_dir=JsonMeta.PATH_TO_SAVE):
"""
saves as the number of frames quota increases higher.
:param frames_quota:
:param frame_counter:
:param output_dir:
:return:
"""
pass
def flush(self, output_dir):
"""
Notes:
We use this function to output jsons whenever possible.
like the time that we exit the while loop of opencv.
Args:
output_dir:
Returns:
None
"""
filename = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '-remaining.json'
output = join(output_dir, filename)
self.json_output(output_name=output)

View File

@ -1,17 +0,0 @@
import logging
def get_logger(name='root'):
formatter = logging.Formatter(
# fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')
fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
handler = logging.StreamHandler()
handler.setFormatter(formatter)
logger = logging.getLogger(name)
logger.setLevel(logging.INFO)
logger.addHandler(handler)
return logger

View File

@ -1,38 +0,0 @@
import os
import yaml
from easydict import EasyDict as edict
class YamlParser(edict):
"""
This is yaml parser based on EasyDict.
"""
def __init__(self, cfg_dict=None, config_file=None):
if cfg_dict is None:
cfg_dict = {}
if config_file is not None:
assert (os.path.isfile(config_file))
with open(config_file, 'r') as fo:
cfg_dict.update(yaml.safe_load(fo.read()))
super(YamlParser, self).__init__(cfg_dict)
def merge_from_file(self, config_file):
with open(config_file, 'r') as fo:
self.update(yaml.safe_load(fo.read()))
def merge_from_dict(self, config_dict):
self.update(config_dict)
def get_config(config_file=None):
return YamlParser(config_file=config_file)
if __name__ == "__main__":
cfg = YamlParser(config_file="../configs/yolov3.yaml")
cfg.merge_from_file("../configs/deep_sort.yaml")
import ipdb; ipdb.set_trace()

View File

@ -1,39 +0,0 @@
from functools import wraps
from time import time
def is_video(ext: str):
"""
Returns true if ext exists in
allowed_exts for video files.
Args:
ext:
Returns:
"""
allowed_exts = ('.mp4', '.webm', '.ogg', '.avi', '.wmv', '.mkv', '.3gp')
return any((ext.endswith(x) for x in allowed_exts))
def tik_tok(func):
"""
keep track of time for each process.
Args:
func:
Returns:
"""
@wraps(func)
def _time_it(*args, **kwargs):
start = time()
try:
return func(*args, **kwargs)
finally:
end_ = time()
print("time: {:.03f}s, fps: {:.03f}".format(end_ - start, 1 / (end_ - start)))
return _time_it

Binary file not shown.

View File

@ -1,181 +0,0 @@
import os
import torch
from yolov5.models.common import DetectMultiBackend
from yolov5.utils.torch_utils import select_device
from yolov5.utils.dataloaders import LoadImages
from yolov5.utils.general import check_img_size, non_max_suppression, cv2, scale_coords, xyxy2xywh
from deep_sort.deep_sort import DeepSort
class VideoTracker(object):
def __init__(self, weights_pt, data, video_path, save_path, idx_to_class):
self.video_path = video_path
self.save_path = save_path
self.idx_to_class = idx_to_class
# 选择设备CPU 或 GPU
device = select_device('cpu')
self.vdo = cv2.VideoCapture()
self.detector = DetectMultiBackend(weights_pt, device=device, dnn=False, data=data, fp16=False)
self.deepsort = DeepSort(
model_path="deep_sort/deep/checkpoint/ckpt.t7", # ReID 模型路径
max_dist=0.2, # 外观特征匹配阈值(越小越严格)
max_iou_distance=0.7, # 最大IoU距离阈值
max_age=70, # 目标最大存活帧数(未匹配时保留的帧数)
n_init=3 # 初始确认帧数连续匹配到n_init次后确认跟踪
)
self.class_names = self.detector.class_names
def __enter__(self):
self.vdo.open(self.video_path)
self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))
assert self.vdo.isOpened()
if self.save_path:
os.makedirs(self.args.save_path, exist_ok=True)
# path of saved video and results
self.save_video_path = os.path.join(self.save_path, "results.avi")
self.save_results_path = os.path.join(self.save_path, "results.txt")
# create video writer
fourcc = cv2.VideoWriter_fourcc(*'MJPG')
self.writer = cv2.VideoWriter(self.save_video_path, fourcc, 20, (self.im_width, self.im_height))
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
if exc_type:
print(exc_type, exc_value, exc_traceback)
def run(self):
stride, names, pt = self.model.stride, self.model.names, self.model.pt
imgsz = check_img_size((640, 640), s=stride) # check image size
dataset = LoadImages(self.video_path, img_size=imgsz, stride=stride, auto=pt, vid_stride=1)
bs = len(dataset)
self.model.warmup(imgsz=(1 if pt or self.model.triton else bs, 3, *imgsz))
for path, im, im0s, vid_cap, s in dataset:
im = torch.from_numpy(im).to(self.model.device)
im = im.half() if self.model.fp16 else im.float() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
if self.model.xml and im.shape[0] > 1:
ims = torch.chunk(im, im.shape[0], 0)
# Inference
if self.model.xml and im.shape[0] > 1:
pred = None
for image in ims:
if pred is None:
pred = self.model(image, augment=False, visualize=False).unsqueeze(0)
else:
pred = torch.cat(
(pred, self.model(image, augment=False, visualize=False).unsqueeze(0)),
dim=0
)
pred = [pred, None]
else:
pred = self.model(im, augment=False, visualize=False)
# NMS
pred = non_max_suppression(pred, 0.40, 0.45, None, False, max_det=1000)[0]
image = im0s[0]
pred[:, :4] = scale_coords(im.shape[2:], pred[:, :4], image.shape).round()
# 使用YOLOv5进行检测后得到的pred
bbox_xywh, cls_conf, cls_ids = yolov5_to_deepsort_format(pred)
# select person class
mask = cls_ids == 0
bbox_xywh = bbox_xywh[mask]
# bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector
bbox_xywh[:, 2:] *= 1.2
cls_conf = cls_conf[mask]
cls_ids = cls_ids[mask]
# 调用Deep SORT更新方法
outputs, _ = self.deepsort.update(bbox_xywh, cls_conf, cls_ids, image)
count_result = {}
for key in self.idx_to_class.keys():
count_result[key] = set()
# draw boxes for visualization
if len(outputs) > 0:
bbox_xyxy = outputs[:, :4] # 这个是检测所在框的坐标的数组
identities = outputs[:, -1] # 这个是每个元素的计数的数组
cls = outputs[:, -2] # 这个是标签数组id的数组
names = [self.idx_to_class[str(label)] for label in cls]
image = draw_boxes(image, bbox_xyxy, names, identities)
for i in range(len(cls)):
count_result[str(cls[i])].add(identities[i])
def draw_boxes(img, bbox, names=None, identities=None, offset=(0, 0)):
for i, box in enumerate(bbox):
x1, y1, x2, y2 = [int(i) for i in box]
x1 += offset[0]
x2 += offset[0]
y1 += offset[1]
y2 += offset[1]
# box text and bar
id = int(identities[i]) if identities is not None else 0
color = compute_color_for_labels(id)
label = '{:}{:d}'.format(names[i], id)
t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
cv2.rectangle(img, (x1, y1), (x2, y2), color, 3)
cv2.rectangle(img, (x1, y1), (x1 + t_size[0] + 3, y1 + t_size[1] + 4), color, -1)
cv2.putText(img, label, (x1, y1 + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2)
return img
def compute_color_for_labels(label):
"""
Simple function that adds fixed color depending on the class
"""
color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
return tuple(color)
def yolov5_to_deepsort_format(pred):
"""
将YOLOv5的预测结果转换为Deep SORT所需的格式
:param pred: YOLOv5的预测结果
:return: 转换后的bbox_xywh, confs, class_ids
"""
pred[:, :4] = xyxy2xywh(pred[:, :4])
xywh = pred[:, :4].cpu().numpy()
conf = pred[:, 4].cpu().numpy()
cls = pred[:, 5].cpu().numpy()
return xywh, conf, cls
if __name__ == "__main__":
args = parse_args()
cfg = get_config()
if args.segment:
cfg.USE_SEGMENT = True
else:
cfg.USE_SEGMENT = False
if args.mmdet:
cfg.merge_from_file(args.config_mmdetection)
cfg.USE_MMDET = True
else:
cfg.merge_from_file(args.config_detection)
cfg.USE_MMDET = False
cfg.merge_from_file(args.config_deepsort)
if args.fastreid:
cfg.merge_from_file(args.config_fastreid)
cfg.USE_FASTREID = True
else:
cfg.USE_FASTREID = False
with VideoTracker(cfg, args, video_path=args.VIDEO_PATH) as vdo_trk:
vdo_trk.run()

File diff suppressed because it is too large Load Diff

View File

@ -1,130 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
"""Experimental modules."""
import math
import numpy as np
import torch
import torch.nn as nn
from utils.yolov5.utils.downloads import attempt_download
class Sum(nn.Module):
"""Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070."""
def __init__(self, n, weight=False):
"""Initializes a module to sum outputs of layers with number of inputs `n` and optional weighting, supporting 2+
inputs.
"""
super().__init__()
self.weight = weight # apply weights boolean
self.iter = range(n - 1) # iter object
if weight:
self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights
def forward(self, x):
"""Processes input through a customizable weighted sum of `n` inputs, optionally applying learned weights."""
y = x[0] # no weight
if self.weight:
w = torch.sigmoid(self.w) * 2
for i in self.iter:
y = y + x[i + 1] * w[i]
else:
for i in self.iter:
y = y + x[i + 1]
return y
class MixConv2d(nn.Module):
"""Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595."""
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
"""Initializes MixConv2d with mixed depth-wise convolutional layers, taking input and output channels (c1, c2),
kernel sizes (k), stride (s), and channel distribution strategy (equal_ch).
"""
super().__init__()
n = len(k) # number of convolutions
if equal_ch: # equal c_ per group
i = torch.linspace(0, n - 1e-6, c2).floor() # c2 indices
c_ = [(i == g).sum() for g in range(n)] # intermediate channels
else: # equal weight.numel() per group
b = [c2] + [0] * n
a = np.eye(n + 1, n, k=-1)
a -= np.roll(a, 1, axis=1)
a *= np.array(k) ** 2
a[0] = 1
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
self.m = nn.ModuleList(
[nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)]
)
self.bn = nn.BatchNorm2d(c2)
self.act = nn.SiLU()
def forward(self, x):
"""Performs forward pass by applying SiLU activation on batch-normalized concatenated convolutional layer
outputs.
"""
return self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
class Ensemble(nn.ModuleList):
"""Ensemble of models."""
def __init__(self):
"""Initializes an ensemble of models to be used for aggregated predictions."""
super().__init__()
def forward(self, x, augment=False, profile=False, visualize=False):
"""Performs forward pass aggregating outputs from an ensemble of models.."""
y = [module(x, augment, profile, visualize)[0] for module in self]
# y = torch.stack(y).max(0)[0] # max ensemble
# y = torch.stack(y).mean(0) # mean ensemble
y = torch.cat(y, 1) # nms ensemble
return y, None # inference, train output
def attempt_load(weights, device=None, inplace=True, fuse=True):
"""
Loads and fuses an ensemble or single YOLOv5 model from weights, handling device placement and model adjustments.
Example inputs: weights=[a,b,c] or a single model weights=[a] or weights=a.
"""
from utils.yolov5.models.yolo import Detect, Model
model = Ensemble()
for w in weights if isinstance(weights, list) else [weights]:
ckpt = torch.load(attempt_download(w), map_location="cpu") # load
ckpt = (ckpt.get("ema") or ckpt["model"]).to(device).float() # FP32 model
# Model compatibility updates
if not hasattr(ckpt, "stride"):
ckpt.stride = torch.tensor([32.0])
if hasattr(ckpt, "names") and isinstance(ckpt.names, (list, tuple)):
ckpt.names = dict(enumerate(ckpt.names)) # convert to dict
model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, "fuse") else ckpt.eval()) # model in eval mode
# Module updates
for m in model.modules():
t = type(m)
if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model):
m.inplace = inplace
if t is Detect and not isinstance(m.anchor_grid, list):
delattr(m, "anchor_grid")
setattr(m, "anchor_grid", [torch.zeros(1)] * m.nl)
elif t is nn.Upsample and not hasattr(m, "recompute_scale_factor"):
m.recompute_scale_factor = None # torch 1.11.0 compatibility
# Return model
if len(model) == 1:
return model[-1]
# Return detection ensemble
print(f"Ensemble created with {weights}\n")
for k in "names", "nc", "yaml":
setattr(model, k, getattr(model[0], k))
model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride
assert all(model[0].nc == m.nc for m in model), f"Models have different class counts: {[m.nc for m in model]}"
return model

View File

@ -1,57 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Default anchors for COCO data
# P5 -------------------------------------------------------------------------------------------------------------------
# P5-640:
anchors_p5_640:
- [10, 13, 16, 30, 33, 23] # P3/8
- [30, 61, 62, 45, 59, 119] # P4/16
- [116, 90, 156, 198, 373, 326] # P5/32
# P6 -------------------------------------------------------------------------------------------------------------------
# P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387
anchors_p6_640:
- [9, 11, 21, 19, 17, 41] # P3/8
- [43, 32, 39, 70, 86, 64] # P4/16
- [65, 131, 134, 130, 120, 265] # P5/32
- [282, 180, 247, 354, 512, 387] # P6/64
# P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
anchors_p6_1280:
- [19, 27, 44, 40, 38, 94] # P3/8
- [96, 68, 86, 152, 180, 137] # P4/16
- [140, 301, 303, 264, 238, 542] # P5/32
- [436, 615, 739, 380, 925, 792] # P6/64
# P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187
anchors_p6_1920:
- [28, 41, 67, 59, 57, 141] # P3/8
- [144, 103, 129, 227, 270, 205] # P4/16
- [209, 452, 455, 396, 358, 812] # P5/32
- [653, 922, 1109, 570, 1387, 1187] # P6/64
# P7 -------------------------------------------------------------------------------------------------------------------
# P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372
anchors_p7_640:
- [11, 11, 13, 30, 29, 20] # P3/8
- [30, 46, 61, 38, 39, 92] # P4/16
- [78, 80, 146, 66, 79, 163] # P5/32
- [149, 150, 321, 143, 157, 303] # P6/64
- [257, 402, 359, 290, 524, 372] # P7/128
# P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818
anchors_p7_1280:
- [19, 22, 54, 36, 32, 77] # P3/8
- [70, 83, 138, 71, 75, 173] # P4/16
- [165, 159, 148, 334, 375, 151] # P5/32
- [334, 317, 251, 626, 499, 474] # P6/64
- [750, 326, 534, 814, 1079, 818] # P7/128
# P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227
anchors_p7_1920:
- [29, 34, 81, 55, 47, 115] # P3/8
- [105, 124, 207, 107, 113, 259] # P4/16
- [247, 238, 222, 500, 563, 227] # P5/32
- [501, 476, 376, 939, 749, 711] # P6/64
- [1126, 489, 801, 1222, 1618, 1227] # P7/128

View File

@ -1,52 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10, 13, 16, 30, 33, 23] # P3/8
- [30, 61, 62, 45, 59, 119] # P4/16
- [116, 90, 156, 198, 373, 326] # P5/32
# darknet53 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [32, 3, 1]], # 0
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
[-1, 1, Bottleneck, [64]],
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
[-1, 2, Bottleneck, [128]],
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
[-1, 8, Bottleneck, [256]],
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
[-1, 8, Bottleneck, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
[-1, 4, Bottleneck, [1024]], # 10
]
# YOLOv3-SPP head
head: [
[-1, 1, Bottleneck, [1024, False]],
[-1, 1, SPP, [512, [5, 9, 13]]],
[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
[-2, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P3
[-1, 1, Bottleneck, [256, False]],
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -1,42 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10, 14, 23, 27, 37, 58] # P4/16
- [81, 82, 135, 169, 344, 319] # P5/32
# YOLOv3-tiny backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [16, 3, 1]], # 0
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
[-1, 1, Conv, [32, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
[-1, 1, Conv, [64, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
[-1, 1, Conv, [128, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
[-1, 1, Conv, [256, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
[-1, 1, Conv, [512, 3, 1]],
[-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
[-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
]
# YOLOv3-tiny head
head: [
[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)
[[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
]

View File

@ -1,52 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10, 13, 16, 30, 33, 23] # P3/8
- [30, 61, 62, 45, 59, 119] # P4/16
- [116, 90, 156, 198, 373, 326] # P5/32
# darknet53 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [32, 3, 1]], # 0
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
[-1, 1, Bottleneck, [64]],
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
[-1, 2, Bottleneck, [128]],
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
[-1, 8, Bottleneck, [256]],
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
[-1, 8, Bottleneck, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
[-1, 4, Bottleneck, [1024]], # 10
]
# YOLOv3 head
head: [
[-1, 1, Bottleneck, [1024, False]],
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [1024, 3, 1]],
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
[-2, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 8], 1, Concat, [1]], # cat backbone P4
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Bottleneck, [512, False]],
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
[-2, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P3
[-1, 1, Bottleneck, [256, False]],
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -1,49 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10, 13, 16, 30, 33, 23] # P3/8
- [30, 61, 62, 45, 59, 119] # P4/16
- [116, 90, 156, 198, 373, 326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 BiFPN head
head: [
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14, 6], 1, Concat, [1]], # cat P4 <--- BiFPN change
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -1,43 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10, 13, 16, 30, 33, 23] # P3/8
- [30, 61, 62, 45, 59, 119] # P4/16
- [116, 90, 156, 198, 373, 326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 FPN head
head: [
[-1, 3, C3, [1024, False]], # 10 (P5/32-large)
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 1, Conv, [512, 1, 1]],
[-1, 3, C3, [512, False]], # 14 (P4/16-medium)
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 1, Conv, [256, 1, 1]],
[-1, 3, C3, [256, False]], # 18 (P3/8-small)
[[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -1,55 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head with (P2, P3, P4, P5) outputs
head: [
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 2], 1, Concat, [1]], # cat backbone P2
[-1, 1, C3, [128, False]], # 21 (P2/4-xsmall)
[-1, 1, Conv, [128, 3, 2]],
[[-1, 18], 1, Concat, [1]], # cat head P3
[-1, 3, C3, [256, False]], # 24 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 27 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 30 (P5/32-large)
[[21, 24, 27, 30], 1, Detect, [nc, anchors]], # Detect(P2, P3, P4, P5)
]

View File

@ -1,42 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head with (P3, P4) outputs
head: [
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[[17, 20], 1, Detect, [nc, anchors]], # Detect(P3, P4)
]

View File

@ -1,57 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head with (P3, P4, P5, P6) outputs
head: [
[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

View File

@ -1,68 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, Conv, [1280, 3, 2]], # 11-P7/128
[-1, 3, C3, [1280]],
[-1, 1, SPPF, [1280, 5]], # 13
]
# YOLOv5 v6.0 head with (P3, P4, P5, P6, P7) outputs
head: [
[-1, 1, Conv, [1024, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 10], 1, Concat, [1]], # cat backbone P6
[-1, 3, C3, [1024, False]], # 17
[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 21
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 25
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 29 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 26], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 32 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 22], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 35 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 18], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 38 (P6/64-xlarge)
[-1, 1, Conv, [1024, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P7
[-1, 3, C3, [1280, False]], # 41 (P7/128-xxlarge)
[[29, 32, 35, 38, 41], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6, P7)
]

View File

@ -1,49 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10, 13, 16, 30, 33, 23] # P3/8
- [30, 61, 62, 45, 59, 119] # P4/16
- [116, 90, 156, 198, 373, 326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 PANet head
head: [
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -1,61 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [19, 27, 44, 40, 38, 94] # P3/8
- [96, 68, 86, 152, 180, 137] # P4/16
- [140, 301, 303, 264, 238, 542] # P5/32
- [436, 615, 739, 380, 925, 792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head: [
[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

View File

@ -1,61 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
anchors:
- [19, 27, 44, 40, 38, 94] # P3/8
- [96, 68, 86, 152, 180, 137] # P4/16
- [140, 301, 303, 264, 238, 542] # P5/32
- [436, 615, 739, 380, 925, 792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head: [
[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

View File

@ -1,61 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.25 # layer channel multiple
anchors:
- [19, 27, 44, 40, 38, 94] # P3/8
- [96, 68, 86, 152, 180, 137] # P4/16
- [140, 301, 303, 264, 238, 542] # P5/32
- [436, 615, 739, 380, 925, 792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head: [
[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

View File

@ -1,50 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
activation: nn.LeakyReLU(0.1) # <----- Conv() activation used throughout entire YOLOv5 model
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10, 13, 16, 30, 33, 23] # P3/8
- [30, 61, 62, 45, 59, 119] # P4/16
- [116, 90, 156, 198, 373, 326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head: [
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -1,49 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10, 13, 16, 30, 33, 23] # P3/8
- [30, 61, 62, 45, 59, 119] # P4/16
- [116, 90, 156, 198, 373, 326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3Ghost, [128]],
[-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3Ghost, [256]],
[-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3Ghost, [512]],
[-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3Ghost, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head: [
[-1, 1, GhostConv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3Ghost, [512, False]], # 13
[-1, 1, GhostConv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3Ghost, [256, False]], # 17 (P3/8-small)
[-1, 1, GhostConv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3Ghost, [512, False]], # 20 (P4/16-medium)
[-1, 1, GhostConv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -1,49 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10, 13, 16, 30, 33, 23] # P3/8
- [30, 61, 62, 45, 59, 119] # P4/16
- [116, 90, 156, 198, 373, 326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3TR, [1024]], # 9 <--- C3TR() Transformer module
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head: [
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -1,61 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [19, 27, 44, 40, 38, 94] # P3/8
- [96, 68, 86, 152, 180, 137] # P4/16
- [140, 301, 303, 264, 238, 542] # P5/32
- [436, 615, 739, 380, 925, 792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head: [
[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

View File

@ -1,61 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.33 # model depth multiple
width_multiple: 1.25 # layer channel multiple
anchors:
- [19, 27, 44, 40, 38, 94] # P3/8
- [96, 68, 86, 152, 180, 137] # P4/16
- [140, 301, 303, 264, 238, 542] # P5/32
- [436, 615, 739, 380, 925, 792] # P6/64
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
[-1, 3, C3, [768]],
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 11
]
# YOLOv5 v6.0 head
head: [
[-1, 1, Conv, [768, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 8], 1, Concat, [1]], # cat backbone P5
[-1, 3, C3, [768, False]], # 15
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 19
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 20], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 16], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
[-1, 1, Conv, [768, 3, 2]],
[[-1, 12], 1, Concat, [1]], # cat head P6
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
]

View File

@ -1,49 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10, 13, 16, 30, 33, 23] # P3/8
- [30, 61, 62, 45, 59, 119] # P4/16
- [116, 90, 156, 198, 373, 326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head: [
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
]

View File

@ -1,49 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
anchors:
- [10, 13, 16, 30, 33, 23] # P3/8
- [30, 61, 62, 45, 59, 119] # P4/16
- [116, 90, 156, 198, 373, 326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head: [
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
]

View File

@ -1,49 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.25 # layer channel multiple
anchors:
- [10, 13, 16, 30, 33, 23] # P3/8
- [30, 61, 62, 45, 59, 119] # P4/16
- [116, 90, 156, 198, 373, 326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head: [
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
]

View File

@ -1,49 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.5 # layer channel multiple
anchors:
- [10, 13, 16, 30, 33, 23] # P3/8
- [30, 61, 62, 45, 59, 119] # P4/16
- [116, 90, 156, 198, 373, 326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head: [
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
]

View File

@ -1,49 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.33 # model depth multiple
width_multiple: 1.25 # layer channel multiple
anchors:
- [10, 13, 16, 30, 33, 23] # P3/8
- [30, 61, 62, 45, 59, 119] # P4/16
- [116, 90, 156, 198, 373, 326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head: [
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
]

View File

@ -1,797 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
"""
TensorFlow, Keras and TFLite versions of YOLOv5
Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127.
Usage:
$ python models/tf.py --weights yolov5s.pt
Export:
$ python export.py --weights yolov5s.pt --include saved_model pb tflite tfjs
"""
import argparse
import sys
from copy import deepcopy
from pathlib import Path
FILE = Path(__file__).resolve()
ROOT = FILE.parents[1] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
# ROOT = ROOT.relative_to(Path.cwd()) # relative
import numpy as np
import tensorflow as tf
import torch
import torch.nn as nn
from tensorflow import keras
from models.common import (
C3,
SPP,
SPPF,
Bottleneck,
BottleneckCSP,
C3x,
Concat,
Conv,
CrossConv,
DWConv,
DWConvTranspose2d,
Focus,
autopad,
)
from models.experimental import MixConv2d, attempt_load
from models.yolo import Detect, Segment
from utils.activations import SiLU
from utils.general import LOGGER, make_divisible, print_args
class TFBN(keras.layers.Layer):
"""TensorFlow BatchNormalization wrapper for initializing with optional pretrained weights."""
def __init__(self, w=None):
"""Initializes a TensorFlow BatchNormalization layer with optional pretrained weights."""
super().__init__()
self.bn = keras.layers.BatchNormalization(
beta_initializer=keras.initializers.Constant(w.bias.numpy()),
gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()),
moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()),
epsilon=w.eps,
)
def call(self, inputs):
"""Applies batch normalization to the inputs."""
return self.bn(inputs)
class TFPad(keras.layers.Layer):
"""Pads input tensors in spatial dimensions 1 and 2 with specified integer or tuple padding values."""
def __init__(self, pad):
"""
Initializes a padding layer for spatial dimensions 1 and 2 with specified padding, supporting both int and tuple
inputs.
Inputs are
"""
super().__init__()
if isinstance(pad, int):
self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
else: # tuple/list
self.pad = tf.constant([[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]])
def call(self, inputs):
"""Pads input tensor with zeros using specified padding, suitable for int and tuple pad dimensions."""
return tf.pad(inputs, self.pad, mode="constant", constant_values=0)
class TFConv(keras.layers.Layer):
"""Implements a standard convolutional layer with optional batch normalization and activation for TensorFlow."""
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
"""
Initializes a standard convolution layer with optional batch normalization and activation; supports only
group=1.
Inputs are ch_in, ch_out, weights, kernel, stride, padding, groups.
"""
super().__init__()
assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
# TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
# see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
conv = keras.layers.Conv2D(
filters=c2,
kernel_size=k,
strides=s,
padding="SAME" if s == 1 else "VALID",
use_bias=not hasattr(w, "bn"),
kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
bias_initializer="zeros" if hasattr(w, "bn") else keras.initializers.Constant(w.conv.bias.numpy()),
)
self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
self.bn = TFBN(w.bn) if hasattr(w, "bn") else tf.identity
self.act = activations(w.act) if act else tf.identity
def call(self, inputs):
"""Applies convolution, batch normalization, and activation function to input tensors."""
return self.act(self.bn(self.conv(inputs)))
class TFDWConv(keras.layers.Layer):
"""Initializes a depthwise convolution layer with optional batch normalization and activation for TensorFlow."""
def __init__(self, c1, c2, k=1, s=1, p=None, act=True, w=None):
"""
Initializes a depthwise convolution layer with optional batch normalization and activation for TensorFlow
models.
Input are ch_in, ch_out, weights, kernel, stride, padding, groups.
"""
super().__init__()
assert c2 % c1 == 0, f"TFDWConv() output={c2} must be a multiple of input={c1} channels"
conv = keras.layers.DepthwiseConv2D(
kernel_size=k,
depth_multiplier=c2 // c1,
strides=s,
padding="SAME" if s == 1 else "VALID",
use_bias=not hasattr(w, "bn"),
depthwise_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
bias_initializer="zeros" if hasattr(w, "bn") else keras.initializers.Constant(w.conv.bias.numpy()),
)
self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
self.bn = TFBN(w.bn) if hasattr(w, "bn") else tf.identity
self.act = activations(w.act) if act else tf.identity
def call(self, inputs):
"""Applies convolution, batch normalization, and activation function to input tensors."""
return self.act(self.bn(self.conv(inputs)))
class TFDWConvTranspose2d(keras.layers.Layer):
"""Implements a depthwise ConvTranspose2D layer for TensorFlow with specific settings."""
def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0, w=None):
"""
Initializes depthwise ConvTranspose2D layer with specific channel, kernel, stride, and padding settings.
Inputs are ch_in, ch_out, weights, kernel, stride, padding, groups.
"""
super().__init__()
assert c1 == c2, f"TFDWConv() output={c2} must be equal to input={c1} channels"
assert k == 4 and p1 == 1, "TFDWConv() only valid for k=4 and p1=1"
weight, bias = w.weight.permute(2, 3, 1, 0).numpy(), w.bias.numpy()
self.c1 = c1
self.conv = [
keras.layers.Conv2DTranspose(
filters=1,
kernel_size=k,
strides=s,
padding="VALID",
output_padding=p2,
use_bias=True,
kernel_initializer=keras.initializers.Constant(weight[..., i : i + 1]),
bias_initializer=keras.initializers.Constant(bias[i]),
)
for i in range(c1)
]
def call(self, inputs):
"""Processes input through parallel convolutions and concatenates results, trimming border pixels."""
return tf.concat([m(x) for m, x in zip(self.conv, tf.split(inputs, self.c1, 3))], 3)[:, 1:-1, 1:-1]
class TFFocus(keras.layers.Layer):
"""Focuses spatial information into channel space using pixel shuffling and convolution for TensorFlow models."""
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
"""
Initializes TFFocus layer to focus width and height information into channel space with custom convolution
parameters.
Inputs are ch_in, ch_out, kernel, stride, padding, groups.
"""
super().__init__()
self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
def call(self, inputs):
"""
Performs pixel shuffling and convolution on input tensor, downsampling by 2 and expanding channels by 4.
Example x(b,w,h,c) -> y(b,w/2,h/2,4c).
"""
inputs = [inputs[:, ::2, ::2, :], inputs[:, 1::2, ::2, :], inputs[:, ::2, 1::2, :], inputs[:, 1::2, 1::2, :]]
return self.conv(tf.concat(inputs, 3))
class TFBottleneck(keras.layers.Layer):
"""Implements a TensorFlow bottleneck layer with optional shortcut connections for efficient feature extraction."""
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None):
"""
Initializes a standard bottleneck layer for TensorFlow models, expanding and contracting channels with optional
shortcut.
Arguments are ch_in, ch_out, shortcut, groups, expansion.
"""
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
self.add = shortcut and c1 == c2
def call(self, inputs):
"""Performs forward pass; if shortcut is True & input/output channels match, adds input to the convolution
result.
"""
return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
class TFCrossConv(keras.layers.Layer):
"""Implements a cross convolutional layer with optional expansion, grouping, and shortcut for TensorFlow."""
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None):
"""Initializes cross convolution layer with optional expansion, grouping, and shortcut addition capabilities."""
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, (1, k), (1, s), w=w.cv1)
self.cv2 = TFConv(c_, c2, (k, 1), (s, 1), g=g, w=w.cv2)
self.add = shortcut and c1 == c2
def call(self, inputs):
"""Passes input through two convolutions optionally adding the input if channel dimensions match."""
return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
class TFConv2d(keras.layers.Layer):
"""Implements a TensorFlow 2D convolution layer, mimicking PyTorch's nn.Conv2D for specified filters and stride."""
def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
"""Initializes a TensorFlow 2D convolution layer, mimicking PyTorch's nn.Conv2D functionality for given filter
sizes and stride.
"""
super().__init__()
assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
self.conv = keras.layers.Conv2D(
filters=c2,
kernel_size=k,
strides=s,
padding="VALID",
use_bias=bias,
kernel_initializer=keras.initializers.Constant(w.weight.permute(2, 3, 1, 0).numpy()),
bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None,
)
def call(self, inputs):
"""Applies a convolution operation to the inputs and returns the result."""
return self.conv(inputs)
class TFBottleneckCSP(keras.layers.Layer):
"""Implements a CSP bottleneck layer for TensorFlow models to enhance gradient flow and efficiency."""
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
"""
Initializes CSP bottleneck layer with specified channel sizes, count, shortcut option, groups, and expansion
ratio.
Inputs are ch_in, ch_out, number, shortcut, groups, expansion.
"""
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3)
self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4)
self.bn = TFBN(w.bn)
self.act = lambda x: keras.activations.swish(x)
self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
def call(self, inputs):
"""Processes input through the model layers, concatenates, normalizes, activates, and reduces the output
dimensions.
"""
y1 = self.cv3(self.m(self.cv1(inputs)))
y2 = self.cv2(inputs)
return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3))))
class TFC3(keras.layers.Layer):
"""CSP bottleneck layer with 3 convolutions for TensorFlow, supporting optional shortcuts and group convolutions."""
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
"""
Initializes CSP Bottleneck with 3 convolutions, supporting optional shortcuts and group convolutions.
Inputs are ch_in, ch_out, number, shortcut, groups, expansion.
"""
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
def call(self, inputs):
"""
Processes input through a sequence of transformations for object detection (YOLOv5).
See https://github.com/ultralytics/yolov5.
"""
return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
class TFC3x(keras.layers.Layer):
"""A TensorFlow layer for enhanced feature extraction using cross-convolutions in object detection models."""
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
"""
Initializes layer with cross-convolutions for enhanced feature extraction in object detection models.
Inputs are ch_in, ch_out, number, shortcut, groups, expansion.
"""
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
self.m = keras.Sequential(
[TFCrossConv(c_, c_, k=3, s=1, g=g, e=1.0, shortcut=shortcut, w=w.m[j]) for j in range(n)]
)
def call(self, inputs):
"""Processes input through cascaded convolutions and merges features, returning the final tensor output."""
return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
class TFSPP(keras.layers.Layer):
"""Implements spatial pyramid pooling for YOLOv3-SPP with specific channels and kernel sizes."""
def __init__(self, c1, c2, k=(5, 9, 13), w=None):
"""Initializes a YOLOv3-SPP layer with specific input/output channels and kernel sizes for pooling."""
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding="SAME") for x in k]
def call(self, inputs):
"""Processes input through two TFConv layers and concatenates with max-pooled outputs at intermediate stage."""
x = self.cv1(inputs)
return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
class TFSPPF(keras.layers.Layer):
"""Implements a fast spatial pyramid pooling layer for TensorFlow with optimized feature extraction."""
def __init__(self, c1, c2, k=5, w=None):
"""Initializes a fast spatial pyramid pooling layer with customizable in/out channels, kernel size, and
weights.
"""
super().__init__()
c_ = c1 // 2 # hidden channels
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2)
self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding="SAME")
def call(self, inputs):
"""Executes the model's forward pass, concatenating input features with three max-pooled versions before final
convolution.
"""
x = self.cv1(inputs)
y1 = self.m(x)
y2 = self.m(y1)
return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3))
class TFDetect(keras.layers.Layer):
"""Implements YOLOv5 object detection layer in TensorFlow for predicting bounding boxes and class probabilities."""
def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None):
"""Initializes YOLOv5 detection layer for TensorFlow with configurable classes, anchors, channels, and image
size.
"""
super().__init__()
self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
self.nc = nc # number of classes
self.no = nc + 5 # number of outputs per anchor
self.nl = len(anchors) # number of detection layers
self.na = len(anchors[0]) // 2 # number of anchors
self.grid = [tf.zeros(1)] * self.nl # init grid
self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32)
self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]), [self.nl, 1, -1, 1, 2])
self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]
self.training = False # set to False after building model
self.imgsz = imgsz
for i in range(self.nl):
ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
self.grid[i] = self._make_grid(nx, ny)
def call(self, inputs):
"""Performs forward pass through the model layers to predict object bounding boxes and classifications."""
z = [] # inference output
x = []
for i in range(self.nl):
x.append(self.m[i](inputs[i]))
# x(bs,20,20,255) to x(bs,3,20,20,85)
ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no])
if not self.training: # inference
y = x[i]
grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5
anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4
xy = (tf.sigmoid(y[..., 0:2]) * 2 + grid) * self.stride[i] # xy
wh = tf.sigmoid(y[..., 2:4]) ** 2 * anchor_grid
# Normalize xywh to 0-1 to reduce calibration error
xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
y = tf.concat([xy, wh, tf.sigmoid(y[..., 4 : 5 + self.nc]), y[..., 5 + self.nc :]], -1)
z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1),)
@staticmethod
def _make_grid(nx=20, ny=20):
"""Generates a 2D grid of coordinates in (x, y) format with shape [1, 1, ny*nx, 2]."""
# return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
class TFSegment(TFDetect):
"""YOLOv5 segmentation head for TensorFlow, combining detection and segmentation."""
def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
"""Initializes YOLOv5 Segment head with specified channel depths, anchors, and input size for segmentation
models.
"""
super().__init__(nc, anchors, ch, imgsz, w)
self.nm = nm # number of masks
self.npr = npr # number of protos
self.no = 5 + nc + self.nm # number of outputs per anchor
self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)] # output conv
self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto) # protos
self.detect = TFDetect.call
def call(self, x):
"""Applies detection and proto layers on input, returning detections and optionally protos if training."""
p = self.proto(x[0])
# p = TFUpsample(None, scale_factor=4, mode='nearest')(self.proto(x[0])) # (optional) full-size protos
p = tf.transpose(p, [0, 3, 1, 2]) # from shape(1,160,160,32) to shape(1,32,160,160)
x = self.detect(self, x)
return (x, p) if self.training else (x[0], p)
class TFProto(keras.layers.Layer):
"""Implements convolutional and upsampling layers for feature extraction in YOLOv5 segmentation."""
def __init__(self, c1, c_=256, c2=32, w=None):
"""Initializes TFProto layer with convolutional and upsampling layers for feature extraction and
transformation.
"""
super().__init__()
self.cv1 = TFConv(c1, c_, k=3, w=w.cv1)
self.upsample = TFUpsample(None, scale_factor=2, mode="nearest")
self.cv2 = TFConv(c_, c_, k=3, w=w.cv2)
self.cv3 = TFConv(c_, c2, w=w.cv3)
def call(self, inputs):
"""Performs forward pass through the model, applying convolutions and upscaling on input tensor."""
return self.cv3(self.cv2(self.upsample(self.cv1(inputs))))
class TFUpsample(keras.layers.Layer):
"""Implements a TensorFlow upsampling layer with specified size, scale factor, and interpolation mode."""
def __init__(self, size, scale_factor, mode, w=None):
"""
Initializes a TensorFlow upsampling layer with specified size, scale_factor, and mode, ensuring scale_factor is
even.
Warning: all arguments needed including 'w'
"""
super().__init__()
assert scale_factor % 2 == 0, "scale_factor must be multiple of 2"
self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * scale_factor, x.shape[2] * scale_factor), mode)
# self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
# with default arguments: align_corners=False, half_pixel_centers=False
# self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x,
# size=(x.shape[1] * 2, x.shape[2] * 2))
def call(self, inputs):
"""Applies upsample operation to inputs using nearest neighbor interpolation."""
return self.upsample(inputs)
class TFConcat(keras.layers.Layer):
"""Implements TensorFlow's version of torch.concat() for concatenating tensors along the last dimension."""
def __init__(self, dimension=1, w=None):
"""Initializes a TensorFlow layer for NCHW to NHWC concatenation, requiring dimension=1."""
super().__init__()
assert dimension == 1, "convert only NCHW to NHWC concat"
self.d = 3
def call(self, inputs):
"""Concatenates a list of tensors along the last dimension, used for NCHW to NHWC conversion."""
return tf.concat(inputs, self.d)
def parse_model(d, ch, model, imgsz):
"""Parses a model definition dict `d` to create YOLOv5 model layers, including dynamic channel adjustments."""
LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
anchors, nc, gd, gw, ch_mul = (
d["anchors"],
d["nc"],
d["depth_multiple"],
d["width_multiple"],
d.get("channel_multiple"),
)
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
if not ch_mul:
ch_mul = 8
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]): # from, number, module, args
m_str = m
m = eval(m) if isinstance(m, str) else m # eval strings
for j, a in enumerate(args):
try:
args[j] = eval(a) if isinstance(a, str) else a # eval strings
except NameError:
pass
n = max(round(n * gd), 1) if n > 1 else n # depth gain
if m in [
nn.Conv2d,
Conv,
DWConv,
DWConvTranspose2d,
Bottleneck,
SPP,
SPPF,
MixConv2d,
Focus,
CrossConv,
BottleneckCSP,
C3,
C3x,
]:
c1, c2 = ch[f], args[0]
c2 = make_divisible(c2 * gw, ch_mul) if c2 != no else c2
args = [c1, c2, *args[1:]]
if m in [BottleneckCSP, C3, C3x]:
args.insert(2, n)
n = 1
elif m is nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
elif m in [Detect, Segment]:
args.append([ch[x + 1] for x in f])
if isinstance(args[1], int): # number of anchors
args[1] = [list(range(args[1] * 2))] * len(f)
if m is Segment:
args[3] = make_divisible(args[3] * gw, ch_mul)
args.append(imgsz)
else:
c2 = ch[f]
tf_m = eval("TF" + m_str.replace("nn.", ""))
m_ = (
keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)])
if n > 1
else tf_m(*args, w=model.model[i])
) # module
torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace("__main__.", "") # module type
np = sum(x.numel() for x in torch_m_.parameters()) # number params
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
LOGGER.info(f"{i:>3}{str(f):>18}{str(n):>3}{np:>10} {t:<40}{str(args):<30}") # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_)
ch.append(c2)
return keras.Sequential(layers), sorted(save)
class TFModel:
"""Implements YOLOv5 model in TensorFlow, supporting TensorFlow, Keras, and TFLite formats for object detection."""
def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, model=None, imgsz=(640, 640)):
"""Initializes TF YOLOv5 model with specified configuration, channels, classes, model instance, and input
size.
"""
super().__init__()
if isinstance(cfg, dict):
self.yaml = cfg # model dict
else: # is *.yaml
import yaml # for torch hub
self.yaml_file = Path(cfg).name
with open(cfg) as f:
self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict
# Define model
if nc and nc != self.yaml["nc"]:
LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}")
self.yaml["nc"] = nc # override yaml value
self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)
def predict(
self,
inputs,
tf_nms=False,
agnostic_nms=False,
topk_per_class=100,
topk_all=100,
iou_thres=0.45,
conf_thres=0.25,
):
"""Runs inference on input data, with an option for TensorFlow NMS."""
y = [] # outputs
x = inputs
for m in self.model.layers:
if m.f != -1: # if not from previous layer
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
x = m(x) # run
y.append(x if m.i in self.savelist else None) # save output
# Add TensorFlow NMS
if tf_nms:
boxes = self._xywh2xyxy(x[0][..., :4])
probs = x[0][:, :, 4:5]
classes = x[0][:, :, 5:]
scores = probs * classes
if agnostic_nms:
nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres)
else:
boxes = tf.expand_dims(boxes, 2)
nms = tf.image.combined_non_max_suppression(
boxes, scores, topk_per_class, topk_all, iou_thres, conf_thres, clip_boxes=False
)
return (nms,)
return x # output [1,6300,85] = [xywh, conf, class0, class1, ...]
# x = x[0] # [x(1,6300,85), ...] to x(6300,85)
# xywh = x[..., :4] # x(6300,4) boxes
# conf = x[..., 4:5] # x(6300,1) confidences
# cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1)) # x(6300,1) classes
# return tf.concat([conf, cls, xywh], 1)
@staticmethod
def _xywh2xyxy(xywh):
"""Converts bounding box format from [x, y, w, h] to [x1, y1, x2, y2], where xy1=top-left and xy2=bottom-
right.
"""
x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)
class AgnosticNMS(keras.layers.Layer):
"""Performs agnostic non-maximum suppression (NMS) on detected objects using IoU and confidence thresholds."""
def call(self, input, topk_all, iou_thres, conf_thres):
"""Performs agnostic NMS on input tensors using given thresholds and top-K selection."""
return tf.map_fn(
lambda x: self._nms(x, topk_all, iou_thres, conf_thres),
input,
fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32),
name="agnostic_nms",
)
@staticmethod
def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25):
"""Performs agnostic non-maximum suppression (NMS) on detected objects, filtering based on IoU and confidence
thresholds.
"""
boxes, classes, scores = x
class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
scores_inp = tf.reduce_max(scores, -1)
selected_inds = tf.image.non_max_suppression(
boxes, scores_inp, max_output_size=topk_all, iou_threshold=iou_thres, score_threshold=conf_thres
)
selected_boxes = tf.gather(boxes, selected_inds)
padded_boxes = tf.pad(
selected_boxes,
paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]],
mode="CONSTANT",
constant_values=0.0,
)
selected_scores = tf.gather(scores_inp, selected_inds)
padded_scores = tf.pad(
selected_scores,
paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
mode="CONSTANT",
constant_values=-1.0,
)
selected_classes = tf.gather(class_inds, selected_inds)
padded_classes = tf.pad(
selected_classes,
paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
mode="CONSTANT",
constant_values=-1.0,
)
valid_detections = tf.shape(selected_inds)[0]
return padded_boxes, padded_scores, padded_classes, valid_detections
def activations(act=nn.SiLU):
"""Converts PyTorch activations to TensorFlow equivalents, supporting LeakyReLU, Hardswish, and SiLU/Swish."""
if isinstance(act, nn.LeakyReLU):
return lambda x: keras.activations.relu(x, alpha=0.1)
elif isinstance(act, nn.Hardswish):
return lambda x: x * tf.nn.relu6(x + 3) * 0.166666667
elif isinstance(act, (nn.SiLU, SiLU)):
return lambda x: keras.activations.swish(x)
else:
raise Exception(f"no matching TensorFlow activation found for PyTorch activation {act}")
def representative_dataset_gen(dataset, ncalib=100):
"""Generates a representative dataset for calibration by yielding transformed numpy arrays from the input
dataset.
"""
for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
im = np.transpose(img, [1, 2, 0])
im = np.expand_dims(im, axis=0).astype(np.float32)
im /= 255
yield [im]
if n >= ncalib:
break
def run(
weights=ROOT / "yolov5s.pt", # weights path
imgsz=(640, 640), # inference size h,w
batch_size=1, # batch size
dynamic=False, # dynamic batch size
):
# PyTorch model
"""Exports YOLOv5 model from PyTorch to TensorFlow and Keras formats, performing inference for validation."""
im = torch.zeros((batch_size, 3, *imgsz)) # BCHW image
model = attempt_load(weights, device=torch.device("cpu"), inplace=True, fuse=False)
_ = model(im) # inference
model.info()
# TensorFlow model
im = tf.zeros((batch_size, *imgsz, 3)) # BHWC image
tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
_ = tf_model.predict(im) # inference
# Keras model
im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
keras_model.summary()
LOGGER.info("PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.")
def parse_opt():
"""Parses and returns command-line options for model inference, including weights path, image size, batch size, and
dynamic batching.
"""
parser = argparse.ArgumentParser()
parser.add_argument("--weights", type=str, default=ROOT / "yolov5s.pt", help="weights path")
parser.add_argument("--imgsz", "--img", "--img-size", nargs="+", type=int, default=[640], help="inference size h,w")
parser.add_argument("--batch-size", type=int, default=1, help="batch size")
parser.add_argument("--dynamic", action="store_true", help="dynamic batch size")
opt = parser.parse_args()
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
print_args(vars(opt))
return opt
def main(opt):
"""Executes the YOLOv5 model run function with parsed command line options."""
run(**vars(opt))
if __name__ == "__main__":
opt = parse_opt()
main(opt)

View File

@ -1,495 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
"""
YOLO-specific modules.
Usage:
$ python models/yolo.py --cfg yolov5s.yaml
"""
import argparse
import contextlib
import math
import os
import platform
import sys
from copy import deepcopy
from pathlib import Path
import torch
import torch.nn as nn
FILE = Path(__file__).resolve()
ROOT = FILE.parents[1] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
if platform.system() != "Windows":
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
from utils.yolov5.models.common import (
C3,
C3SPP,
C3TR,
SPP,
SPPF,
Bottleneck,
BottleneckCSP,
C3Ghost,
C3x,
Classify,
Concat,
Contract,
Conv,
CrossConv,
DetectMultiBackend,
DWConv,
DWConvTranspose2d,
Expand,
Focus,
GhostBottleneck,
GhostConv,
Proto,
)
from utils.yolov5.models.experimental import MixConv2d
from utils.yolov5.utils.autoanchor import check_anchor_order
from utils.yolov5.utils.general import LOGGER, check_version, check_yaml, colorstr, make_divisible, print_args
from utils.yolov5.utils.plots import feature_visualization
from utils.yolov5.utils.torch_utils import (
fuse_conv_and_bn,
initialize_weights,
model_info,
profile,
scale_img,
select_device,
time_sync,
)
try:
import thop # for FLOPs computation
except ImportError:
thop = None
class Detect(nn.Module):
"""YOLOv5 Detect head for processing input tensors and generating detection outputs in object detection models."""
stride = None # strides computed during build
dynamic = False # force grid reconstruction
export = False # export mode
def __init__(self, nc=80, anchors=(), ch=(), inplace=True):
"""Initializes YOLOv5 detection layer with specified classes, anchors, channels, and inplace operations."""
super().__init__()
self.nc = nc # number of classes
self.no = nc + 5 # number of outputs per anchor
self.nl = len(anchors) # number of detection layers
self.na = len(anchors[0]) // 2 # number of anchors
self.grid = [torch.empty(0) for _ in range(self.nl)] # init grid
self.anchor_grid = [torch.empty(0) for _ in range(self.nl)] # init anchor grid
self.register_buffer("anchors", torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2)
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
self.inplace = inplace # use inplace ops (e.g. slice assignment)
def forward(self, x):
"""Processes input through YOLOv5 layers, altering shape for detection: `x(bs, 3, ny, nx, 85)`."""
z = [] # inference output
for i in range(self.nl):
x[i] = self.m[i](x[i]) # conv
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
if not self.training: # inference
if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
if isinstance(self, Segment): # (boxes + masks)
xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i] # xy
wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i] # wh
y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
else: # Detect (boxes only)
xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy
wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh
y = torch.cat((xy, wh, conf), 4)
z.append(y.view(bs, self.na * nx * ny, self.no))
return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)
def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version__, "1.10.0")):
"""Generates a mesh grid for anchor boxes with optional compatibility for torch versions < 1.10."""
d = self.anchors[i].device
t = self.anchors[i].dtype
shape = 1, self.na, ny, nx, 2 # grid shape
y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t)
yv, xv = torch.meshgrid(y, x, indexing="ij") if torch_1_10 else torch.meshgrid(y, x) # torch>=0.7 compatibility
grid = torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y = 2.0 * x - 0.5
anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)
return grid, anchor_grid
class Segment(Detect):
"""YOLOv5 Segment head for segmentation models, extending Detect with mask and prototype layers."""
def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
"""Initializes YOLOv5 Segment head with options for mask count, protos, and channel adjustments."""
super().__init__(nc, anchors, ch, inplace)
self.nm = nm # number of masks
self.npr = npr # number of protos
self.no = 5 + nc + self.nm # number of outputs per anchor
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
self.proto = Proto(ch[0], self.npr, self.nm) # protos
self.detect = Detect.forward
def forward(self, x):
"""Processes input through the network, returning detections and prototypes; adjusts output based on
training/export mode.
"""
p = self.proto(x[0])
x = self.detect(self, x)
return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])
class BaseModel(nn.Module):
"""YOLOv5 base model."""
def forward(self, x, profile=False, visualize=False):
"""Executes a single-scale inference or training pass on the YOLOv5 base model, with options for profiling and
visualization.
"""
return self._forward_once(x, profile, visualize) # single-scale inference, train
def _forward_once(self, x, profile=False, visualize=False):
"""Performs a forward pass on the YOLOv5 model, enabling profiling and feature visualization options."""
y, dt = [], [] # outputs
for m in self.model:
if m.f != -1: # if not from previous layer
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
if profile:
self._profile_one_layer(m, x, dt)
x = m(x) # run
y.append(x if m.i in self.save else None) # save output
if visualize:
feature_visualization(x, m.type, m.i, save_dir=visualize)
return x
def _profile_one_layer(self, m, x, dt):
"""Profiles a single layer's performance by computing GFLOPs, execution time, and parameters."""
c = m == self.model[-1] # is final layer, copy input as inplace fix
o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1e9 * 2 if thop else 0 # FLOPs
t = time_sync()
for _ in range(10):
m(x.copy() if c else x)
dt.append((time_sync() - t) * 100)
if m == self.model[0]:
LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} module")
LOGGER.info(f"{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}")
if c:
LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total")
def fuse(self):
"""Fuses Conv2d() and BatchNorm2d() layers in the model to improve inference speed."""
LOGGER.info("Fusing layers... ")
for m in self.model.modules():
if isinstance(m, (Conv, DWConv)) and hasattr(m, "bn"):
m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
delattr(m, "bn") # remove batchnorm
m.forward = m.forward_fuse # update forward
self.info()
return self
def info(self, verbose=False, img_size=640):
"""Prints model information given verbosity and image size, e.g., `info(verbose=True, img_size=640)`."""
model_info(self, verbose, img_size)
def _apply(self, fn):
"""Applies transformations like to(), cpu(), cuda(), half() to model tensors excluding parameters or registered
buffers.
"""
self = super()._apply(fn)
m = self.model[-1] # Detect()
if isinstance(m, (Detect, Segment)):
m.stride = fn(m.stride)
m.grid = list(map(fn, m.grid))
if isinstance(m.anchor_grid, list):
m.anchor_grid = list(map(fn, m.anchor_grid))
return self
class DetectionModel(BaseModel):
"""YOLOv5 detection model class for object detection tasks, supporting custom configurations and anchors."""
def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, anchors=None):
"""Initializes YOLOv5 model with configuration file, input channels, number of classes, and custom anchors."""
super().__init__()
if isinstance(cfg, dict):
self.yaml = cfg # model dict
else: # is *.yaml
import yaml # for torch hub
self.yaml_file = Path(cfg).name
with open(cfg, encoding="ascii", errors="ignore") as f:
self.yaml = yaml.safe_load(f) # model dict
# Define model
ch = self.yaml["ch"] = self.yaml.get("ch", ch) # input channels
if nc and nc != self.yaml["nc"]:
LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
self.yaml["nc"] = nc # override yaml value
if anchors:
LOGGER.info(f"Overriding model.yaml anchors with anchors={anchors}")
self.yaml["anchors"] = round(anchors) # override yaml value
self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
self.names = [str(i) for i in range(self.yaml["nc"])] # default names
self.inplace = self.yaml.get("inplace", True)
# Build strides, anchors
m = self.model[-1] # Detect()
if isinstance(m, (Detect, Segment)):
def _forward(x):
"""Passes the input 'x' through the model and returns the processed output."""
return self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
s = 256 # 2x min stride
m.inplace = self.inplace
m.stride = torch.tensor([s / x.shape[-2] for x in _forward(torch.zeros(1, ch, s, s))]) # forward
check_anchor_order(m)
m.anchors /= m.stride.view(-1, 1, 1)
self.stride = m.stride
self._initialize_biases() # only run once
# Init weights, biases
initialize_weights(self)
self.info()
LOGGER.info("")
def forward(self, x, augment=False, profile=False, visualize=False):
"""Performs single-scale or augmented inference and may include profiling or visualization."""
if augment:
return self._forward_augment(x) # augmented inference, None
return self._forward_once(x, profile, visualize) # single-scale inference, train
def _forward_augment(self, x):
"""Performs augmented inference across different scales and flips, returning combined detections."""
img_size = x.shape[-2:] # height, width
s = [1, 0.83, 0.67] # scales
f = [None, 3, None] # flips (2-ud, 3-lr)
y = [] # outputs
for si, fi in zip(s, f):
xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
yi = self._forward_once(xi)[0] # forward
# cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
yi = self._descale_pred(yi, fi, si, img_size)
y.append(yi)
y = self._clip_augmented(y) # clip augmented tails
return torch.cat(y, 1), None # augmented inference, train
def _descale_pred(self, p, flips, scale, img_size):
"""De-scales predictions from augmented inference, adjusting for flips and image size."""
if self.inplace:
p[..., :4] /= scale # de-scale
if flips == 2:
p[..., 1] = img_size[0] - p[..., 1] # de-flip ud
elif flips == 3:
p[..., 0] = img_size[1] - p[..., 0] # de-flip lr
else:
x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale
if flips == 2:
y = img_size[0] - y # de-flip ud
elif flips == 3:
x = img_size[1] - x # de-flip lr
p = torch.cat((x, y, wh, p[..., 4:]), -1)
return p
def _clip_augmented(self, y):
"""Clips augmented inference tails for YOLOv5 models, affecting first and last tensors based on grid points and
layer counts.
"""
nl = self.model[-1].nl # number of detection layers (P3-P5)
g = sum(4**x for x in range(nl)) # grid points
e = 1 # exclude layer count
i = (y[0].shape[1] // g) * sum(4**x for x in range(e)) # indices
y[0] = y[0][:, :-i] # large
i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices
y[-1] = y[-1][:, i:] # small
return y
def _initialize_biases(self, cf=None):
"""
Initializes biases for YOLOv5's Detect() module, optionally using class frequencies (cf).
For details see https://arxiv.org/abs/1708.02002 section 3.3.
"""
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
m = self.model[-1] # Detect() module
for mi, s in zip(m.m, m.stride): # from
b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
b.data[:, 5 : 5 + m.nc] += (
math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum())
) # cls
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibility
class SegmentationModel(DetectionModel):
"""YOLOv5 segmentation model for object detection and segmentation tasks with configurable parameters."""
def __init__(self, cfg="yolov5s-seg.yaml", ch=3, nc=None, anchors=None):
"""Initializes a YOLOv5 segmentation model with configurable params: cfg (str) for configuration, ch (int) for channels, nc (int) for num classes, anchors (list)."""
super().__init__(cfg, ch, nc, anchors)
class ClassificationModel(BaseModel):
"""YOLOv5 classification model for image classification tasks, initialized with a config file or detection model."""
def __init__(self, cfg=None, model=None, nc=1000, cutoff=10):
"""Initializes YOLOv5 model with config file `cfg`, input channels `ch`, number of classes `nc`, and `cuttoff`
index.
"""
super().__init__()
self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg)
def _from_detection_model(self, model, nc=1000, cutoff=10):
"""Creates a classification model from a YOLOv5 detection model, slicing at `cutoff` and adding a classification
layer.
"""
if isinstance(model, DetectMultiBackend):
model = model.model # unwrap DetectMultiBackend
model.model = model.model[:cutoff] # backbone
m = model.model[-1] # last layer
ch = m.conv.in_channels if hasattr(m, "conv") else m.cv1.conv.in_channels # ch into module
c = Classify(ch, nc) # Classify()
c.i, c.f, c.type = m.i, m.f, "models.common.Classify" # index, from, type
model.model[-1] = c # replace
self.model = model.model
self.stride = model.stride
self.save = []
self.nc = nc
def _from_yaml(self, cfg):
"""Creates a YOLOv5 classification model from a specified *.yaml configuration file."""
self.model = None
def parse_model(d, ch):
"""Parses a YOLOv5 model from a dict `d`, configuring layers based on input channels `ch` and model architecture."""
LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
anchors, nc, gd, gw, act, ch_mul = (
d["anchors"],
d["nc"],
d["depth_multiple"],
d["width_multiple"],
d.get("activation"),
d.get("channel_multiple"),
)
if act:
Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU()
LOGGER.info(f"{colorstr('activation:')} {act}") # print
if not ch_mul:
ch_mul = 8
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]): # from, number, module, args
m = eval(m) if isinstance(m, str) else m # eval strings
for j, a in enumerate(args):
with contextlib.suppress(NameError):
args[j] = eval(a) if isinstance(a, str) else a # eval strings
n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
if m in {
Conv,
GhostConv,
Bottleneck,
GhostBottleneck,
SPP,
SPPF,
DWConv,
MixConv2d,
Focus,
CrossConv,
BottleneckCSP,
C3,
C3TR,
C3SPP,
C3Ghost,
nn.ConvTranspose2d,
DWConvTranspose2d,
C3x,
}:
c1, c2 = ch[f], args[0]
if c2 != no: # if not output
c2 = make_divisible(c2 * gw, ch_mul)
args = [c1, c2, *args[1:]]
if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
args.insert(2, n) # number of repeats
n = 1
elif m is nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
c2 = sum(ch[x] for x in f)
# TODO: channel, gw, gd
elif m in {Detect, Segment}:
args.append([ch[x] for x in f])
if isinstance(args[1], int): # number of anchors
args[1] = [list(range(args[1] * 2))] * len(f)
if m is Segment:
args[3] = make_divisible(args[3] * gw, ch_mul)
elif m is Contract:
c2 = ch[f] * args[0] ** 2
elif m is Expand:
c2 = ch[f] // args[0] ** 2
else:
c2 = ch[f]
m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace("__main__.", "") # module type
np = sum(x.numel() for x in m_.parameters()) # number params
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
LOGGER.info(f"{i:>3}{str(f):>18}{n_:>3}{np:10.0f} {t:<40}{str(args):<30}") # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_)
if i == 0:
ch = []
ch.append(c2)
return nn.Sequential(*layers), sorted(save)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--cfg", type=str, default="yolov5s.yaml", help="model.yaml")
parser.add_argument("--batch-size", type=int, default=1, help="total batch size for all GPUs")
parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
parser.add_argument("--profile", action="store_true", help="profile model speed")
parser.add_argument("--line-profile", action="store_true", help="profile model speed layer by layer")
parser.add_argument("--test", action="store_true", help="test all yolo*.yaml")
opt = parser.parse_args()
opt.cfg = check_yaml(opt.cfg) # check YAML
print_args(vars(opt))
device = select_device(opt.device)
# Create model
im = torch.rand(opt.batch_size, 3, 640, 640).to(device)
model = Model(opt.cfg).to(device)
# Options
if opt.line_profile: # profile layer by layer
model(im, profile=True)
elif opt.profile: # profile forward-backward
results = profile(input=im, ops=[model], n=3)
elif opt.test: # test all models
for cfg in Path(ROOT / "models").rglob("yolo*.yaml"):
try:
_ = Model(cfg)
except Exception as e:
print(f"Error in {cfg}: {e}")
else: # report fused model summary
model.fuse()

View File

@ -1,49 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple
anchors:
- [10, 13, 16, 30, 33, 23] # P3/8
- [30, 61, 62, 45, 59, 119] # P4/16
- [116, 90, 156, 198, 373, 326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head: [
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -1,49 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
anchors:
- [10, 13, 16, 30, 33, 23] # P3/8
- [30, 61, 62, 45, 59, 119] # P4/16
- [116, 90, 156, 198, 373, 326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head: [
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -1,49 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.25 # layer channel multiple
anchors:
- [10, 13, 16, 30, 33, 23] # P3/8
- [30, 61, 62, 45, 59, 119] # P4/16
- [116, 90, 156, 198, 373, 326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head: [
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -1,49 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10, 13, 16, 30, 33, 23] # P3/8
- [30, 61, 62, 45, 59, 119] # P4/16
- [116, 90, 156, 198, 373, 326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head: [
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -1,49 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Parameters
nc: 80 # number of classes
depth_multiple: 1.33 # model depth multiple
width_multiple: 1.25 # layer channel multiple
anchors:
- [10, 13, 16, 30, 33, 23] # P3/8
- [30, 61, 62, 45, 59, 119] # P4/16
- [116, 90, 156, 198, 373, 326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head: [
[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 13
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

View File

@ -1,97 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
"""utils/initialization."""
import contextlib
import platform
import threading
def emojis(str=""):
"""Returns an emoji-safe version of a string, stripped of emojis on Windows platforms."""
return str.encode().decode("ascii", "ignore") if platform.system() == "Windows" else str
class TryExcept(contextlib.ContextDecorator):
"""A context manager and decorator for error handling that prints an optional message with emojis on exception."""
def __init__(self, msg=""):
"""Initializes TryExcept with an optional message, used as a decorator or context manager for error handling."""
self.msg = msg
def __enter__(self):
"""Enter the runtime context related to this object for error handling with an optional message."""
pass
def __exit__(self, exc_type, value, traceback):
"""Context manager exit method that prints an error message with emojis if an exception occurred, always returns
True.
"""
if value:
print(emojis(f"{self.msg}{': ' if self.msg else ''}{value}"))
return True
def threaded(func):
"""Decorator @threaded to run a function in a separate thread, returning the thread instance."""
def wrapper(*args, **kwargs):
"""Runs the decorated function in a separate daemon thread and returns the thread instance."""
thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True)
thread.start()
return thread
return wrapper
def join_threads(verbose=False):
"""
Joins all daemon threads, optionally printing their names if verbose is True.
Example: atexit.register(lambda: join_threads())
"""
main_thread = threading.current_thread()
for t in threading.enumerate():
if t is not main_thread:
if verbose:
print(f"Joining thread {t.name}")
t.join()
def notebook_init(verbose=True):
"""Initializes notebook environment by checking requirements, cleaning up, and displaying system info."""
print("Checking setup...")
import os
import shutil
from ultralytics.utils.checks import check_requirements
from utils.general import check_font, is_colab
from utils.torch_utils import select_device # imports
check_font()
import psutil
if check_requirements("wandb", install=False):
os.system("pip uninstall -y wandb") # eliminate unexpected account creation prompt with infinite hang
if is_colab():
shutil.rmtree("/content/sample_data", ignore_errors=True) # remove colab /sample_data directory
# System info
display = None
if verbose:
gb = 1 << 30 # bytes to GiB (1024 ** 3)
ram = psutil.virtual_memory().total
total, used, free = shutil.disk_usage("/")
with contextlib.suppress(Exception): # clear display if ipython is installed
from IPython import display
display.clear_output()
s = f"({os.cpu_count()} CPUs, {ram / gb:.1f} GB RAM, {(total - free) / gb:.1f}/{total / gb:.1f} GB disk)"
else:
s = ""
select_device(newline=False)
print(emojis(f"Setup complete ✅ {s}"))
return display

View File

@ -1,134 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
"""Activation functions."""
import torch
import torch.nn as nn
import torch.nn.functional as F
class SiLU(nn.Module):
"""Applies the Sigmoid-weighted Linear Unit (SiLU) activation function, also known as Swish."""
@staticmethod
def forward(x):
"""
Applies the Sigmoid-weighted Linear Unit (SiLU) activation function.
https://arxiv.org/pdf/1606.08415.pdf.
"""
return x * torch.sigmoid(x)
class Hardswish(nn.Module):
"""Applies the Hardswish activation function, which is efficient for mobile and embedded devices."""
@staticmethod
def forward(x):
"""
Applies the Hardswish activation function, compatible with TorchScript, CoreML, and ONNX.
Equivalent to x * F.hardsigmoid(x)
"""
return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0 # for TorchScript, CoreML and ONNX
class Mish(nn.Module):
"""Mish activation https://github.com/digantamisra98/Mish."""
@staticmethod
def forward(x):
"""Applies the Mish activation function, a smooth alternative to ReLU."""
return x * F.softplus(x).tanh()
class MemoryEfficientMish(nn.Module):
"""Efficiently applies the Mish activation function using custom autograd for reduced memory usage."""
class F(torch.autograd.Function):
"""Implements a custom autograd function for memory-efficient Mish activation."""
@staticmethod
def forward(ctx, x):
"""Applies the Mish activation function, a smooth ReLU alternative, to the input tensor `x`."""
ctx.save_for_backward(x)
return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x)))
@staticmethod
def backward(ctx, grad_output):
"""Computes the gradient of the Mish activation function with respect to input `x`."""
x = ctx.saved_tensors[0]
sx = torch.sigmoid(x)
fx = F.softplus(x).tanh()
return grad_output * (fx + x * sx * (1 - fx * fx))
def forward(self, x):
"""Applies the Mish activation function to the input tensor `x`."""
return self.F.apply(x)
class FReLU(nn.Module):
"""FReLU activation https://arxiv.org/abs/2007.11824."""
def __init__(self, c1, k=3): # ch_in, kernel
"""Initializes FReLU activation with channel `c1` and kernel size `k`."""
super().__init__()
self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
self.bn = nn.BatchNorm2d(c1)
def forward(self, x):
"""
Applies FReLU activation with max operation between input and BN-convolved input.
https://arxiv.org/abs/2007.11824
"""
return torch.max(x, self.bn(self.conv(x)))
class AconC(nn.Module):
"""
ACON activation (activate or not) function.
AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter
See "Activate or Not: Learning Customized Activation" https://arxiv.org/pdf/2009.04759.pdf.
"""
def __init__(self, c1):
"""Initializes AconC with learnable parameters p1, p2, and beta for channel-wise activation control."""
super().__init__()
self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1))
self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1))
self.beta = nn.Parameter(torch.ones(1, c1, 1, 1))
def forward(self, x):
"""Applies AconC activation function with learnable parameters for channel-wise control on input tensor x."""
dpx = (self.p1 - self.p2) * x
return dpx * torch.sigmoid(self.beta * dpx) + self.p2 * x
class MetaAconC(nn.Module):
"""
ACON activation (activate or not) function.
AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter
See "Activate or Not: Learning Customized Activation" https://arxiv.org/pdf/2009.04759.pdf.
"""
def __init__(self, c1, k=1, s=1, r=16):
"""Initializes MetaAconC with params: channel_in (c1), kernel size (k=1), stride (s=1), reduction (r=16)."""
super().__init__()
c2 = max(r, c1 // r)
self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1))
self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1))
self.fc1 = nn.Conv2d(c1, c2, k, s, bias=True)
self.fc2 = nn.Conv2d(c2, c1, k, s, bias=True)
# self.bn1 = nn.BatchNorm2d(c2)
# self.bn2 = nn.BatchNorm2d(c1)
def forward(self, x):
"""Applies a forward pass transforming input `x` using learnable parameters and sigmoid activation."""
y = x.mean(dim=2, keepdims=True).mean(dim=3, keepdims=True)
# batch-size 1 bug/instabilities https://github.com/ultralytics/yolov5/issues/2891
# beta = torch.sigmoid(self.bn2(self.fc2(self.bn1(self.fc1(y))))) # bug/unstable
beta = torch.sigmoid(self.fc2(self.fc1(y))) # bug patch BN layers removed
dpx = (self.p1 - self.p2) * x
return dpx * torch.sigmoid(beta * dpx) + self.p2 * x

View File

@ -1,440 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
"""Image augmentation functions."""
import math
import random
import cv2
import numpy as np
import torch
import torchvision.transforms as T
import torchvision.transforms.functional as TF
from utils.yolov5.utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box, xywhn2xyxy
from utils.yolov5.utils.metrics import bbox_ioa
IMAGENET_MEAN = 0.485, 0.456, 0.406 # RGB mean
IMAGENET_STD = 0.229, 0.224, 0.225 # RGB standard deviation
class Albumentations:
"""Provides optional data augmentation for YOLOv5 using Albumentations library if installed."""
def __init__(self, size=640):
"""Initializes Albumentations class for optional data augmentation in YOLOv5 with specified input size."""
self.transform = None
prefix = colorstr("albumentations: ")
try:
import albumentations as A
check_version(A.__version__, "1.0.3", hard=True) # version requirement
T = [
A.RandomResizedCrop(height=size, width=size, scale=(0.8, 1.0), ratio=(0.9, 1.11), p=0.0),
A.Blur(p=0.01),
A.MedianBlur(p=0.01),
A.ToGray(p=0.01),
A.CLAHE(p=0.01),
A.RandomBrightnessContrast(p=0.0),
A.RandomGamma(p=0.0),
A.ImageCompression(quality_lower=75, p=0.0),
] # transforms
self.transform = A.Compose(T, bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]))
LOGGER.info(prefix + ", ".join(f"{x}".replace("always_apply=False, ", "") for x in T if x.p))
except ImportError: # package not installed, skip
pass
except Exception as e:
LOGGER.info(f"{prefix}{e}")
def __call__(self, im, labels, p=1.0):
"""Applies transformations to an image and labels with probability `p`, returning updated image and labels."""
if self.transform and random.random() < p:
new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]) # transformed
im, labels = new["image"], np.array([[c, *b] for c, b in zip(new["class_labels"], new["bboxes"])])
return im, labels
def normalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD, inplace=False):
"""
Applies ImageNet normalization to RGB images in BCHW format, modifying them in-place if specified.
Example: y = (x - mean) / std
"""
return TF.normalize(x, mean, std, inplace=inplace)
def denormalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD):
"""Reverses ImageNet normalization for BCHW format RGB images by applying `x = x * std + mean`."""
for i in range(3):
x[:, i] = x[:, i] * std[i] + mean[i]
return x
def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
"""Applies HSV color-space augmentation to an image with random gains for hue, saturation, and value."""
if hgain or sgain or vgain:
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))
dtype = im.dtype # uint8
x = np.arange(0, 256, dtype=r.dtype)
lut_hue = ((x * r[0]) % 180).astype(dtype)
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im) # no return needed
def hist_equalize(im, clahe=True, bgr=False):
"""Equalizes image histogram, with optional CLAHE, for BGR or RGB image with shape (n,m,3) and range 0-255."""
yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
if clahe:
c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
yuv[:, :, 0] = c.apply(yuv[:, :, 0])
else:
yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram
return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB
def replicate(im, labels):
"""
Replicates half of the smallest object labels in an image for data augmentation.
Returns augmented image and labels.
"""
h, w = im.shape[:2]
boxes = labels[:, 1:].astype(int)
x1, y1, x2, y2 = boxes.T
s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
for i in s.argsort()[: round(s.size * 0.5)]: # smallest indices
x1b, y1b, x2b, y2b = boxes[i]
bh, bw = y2b - y1b, x2b - x1b
yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b] # im4[ymin:ymax, xmin:xmax]
labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
return im, labels
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
"""Resizes and pads image to new_shape with stride-multiple constraints, returns resized image, ratio, padding."""
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better val mAP)
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return im, ratio, (dw, dh)
def random_perspective(
im, targets=(), segments=(), degrees=10, translate=0.1, scale=0.1, shear=10, perspective=0.0, border=(0, 0)
):
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10))
# targets = [cls, xyxy]
"""Applies random perspective transformation to an image, modifying the image and corresponding labels."""
height = im.shape[0] + border[0] * 2 # shape(h,w,c)
width = im.shape[1] + border[1] * 2
# Center
C = np.eye(3)
C[0, 2] = -im.shape[1] / 2 # x translation (pixels)
C[1, 2] = -im.shape[0] / 2 # y translation (pixels)
# Perspective
P = np.eye(3)
P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
# Rotation and Scale
R = np.eye(3)
a = random.uniform(-degrees, degrees)
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
s = random.uniform(1 - scale, 1 + scale)
# s = 2 ** random.uniform(-scale, scale)
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
# Shear
S = np.eye(3)
S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
# Translation
T = np.eye(3)
T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
# Combined rotation matrix
M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
if perspective:
im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
else: # affine
im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
if n := len(targets):
use_segments = any(x.any() for x in segments) and len(segments) == n
new = np.zeros((n, 4))
if use_segments: # warp segments
segments = resample_segments(segments) # upsample
for i, segment in enumerate(segments):
xy = np.ones((len(segment), 3))
xy[:, :2] = segment
xy = xy @ M.T # transform
xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine
# clip
new[i] = segment2box(xy, width, height)
else: # warp boxes
xy = np.ones((n * 4, 3))
xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
xy = xy @ M.T # transform
xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
# create new boxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# clip
new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
# filter candidates
i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
targets = targets[i]
targets[:, 1:5] = new[i]
return im, targets
def copy_paste(im, labels, segments, p=0.5):
"""
Applies Copy-Paste augmentation by flipping and merging segments and labels on an image.
Details at https://arxiv.org/abs/2012.07177.
"""
n = len(segments)
if p and n:
h, w, c = im.shape # height, width, channels
im_new = np.zeros(im.shape, np.uint8)
for j in random.sample(range(n), k=round(p * n)):
l, s = labels[j], segments[j]
box = w - l[3], l[2], w - l[1], l[4]
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
if (ioa < 0.30).all(): # allow 30% obscuration of existing labels
labels = np.concatenate((labels, [[l[0], *box]]), 0)
segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (1, 1, 1), cv2.FILLED)
result = cv2.flip(im, 1) # augment segments (flip left-right)
i = cv2.flip(im_new, 1).astype(bool)
im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug
return im, labels, segments
def cutout(im, labels, p=0.5):
"""
Applies cutout augmentation to an image with optional label adjustment, using random masks of varying sizes.
Details at https://arxiv.org/abs/1708.04552.
"""
if random.random() < p:
h, w = im.shape[:2]
scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
for s in scales:
mask_h = random.randint(1, int(h * s)) # create random masks
mask_w = random.randint(1, int(w * s))
# box
xmin = max(0, random.randint(0, w) - mask_w // 2)
ymin = max(0, random.randint(0, h) - mask_h // 2)
xmax = min(w, xmin + mask_w)
ymax = min(h, ymin + mask_h)
# apply random color mask
im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
# return unobscured labels
if len(labels) and s > 0.03:
box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
ioa = bbox_ioa(box, xywhn2xyxy(labels[:, 1:5], w, h)) # intersection over area
labels = labels[ioa < 0.60] # remove >60% obscured labels
return labels
def mixup(im, labels, im2, labels2):
"""
Applies MixUp augmentation by blending images and labels.
See https://arxiv.org/pdf/1710.09412.pdf for details.
"""
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
im = (im * r + im2 * (1 - r)).astype(np.uint8)
labels = np.concatenate((labels, labels2), 0)
return im, labels
def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):
"""
Filters bounding box candidates by minimum width-height threshold `wh_thr` (pixels), aspect ratio threshold
`ar_thr`, and area ratio threshold `area_thr`.
box1(4,n) is before augmentation, box2(4,n) is after augmentation.
"""
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates
def classify_albumentations(
augment=True,
size=224,
scale=(0.08, 1.0),
ratio=(0.75, 1.0 / 0.75), # 0.75, 1.33
hflip=0.5,
vflip=0.0,
jitter=0.4,
mean=IMAGENET_MEAN,
std=IMAGENET_STD,
auto_aug=False,
):
# YOLOv5 classification Albumentations (optional, only used if package is installed)
"""Sets up and returns Albumentations transforms for YOLOv5 classification tasks depending on augmentation
settings.
"""
prefix = colorstr("albumentations: ")
try:
import albumentations as A
from albumentations.pytorch import ToTensorV2
check_version(A.__version__, "1.0.3", hard=True) # version requirement
if augment: # Resize and crop
T = [A.RandomResizedCrop(height=size, width=size, scale=scale, ratio=ratio)]
if auto_aug:
# TODO: implement AugMix, AutoAug & RandAug in albumentation
LOGGER.info(f"{prefix}auto augmentations are currently not supported")
else:
if hflip > 0:
T += [A.HorizontalFlip(p=hflip)]
if vflip > 0:
T += [A.VerticalFlip(p=vflip)]
if jitter > 0:
color_jitter = (float(jitter),) * 3 # repeat value for brightness, contrast, saturation, 0 hue
T += [A.ColorJitter(*color_jitter, 0)]
else: # Use fixed crop for eval set (reproducibility)
T = [A.SmallestMaxSize(max_size=size), A.CenterCrop(height=size, width=size)]
T += [A.Normalize(mean=mean, std=std), ToTensorV2()] # Normalize and convert to Tensor
LOGGER.info(prefix + ", ".join(f"{x}".replace("always_apply=False, ", "") for x in T if x.p))
return A.Compose(T)
except ImportError: # package not installed, skip
LOGGER.warning(f"{prefix}⚠️ not found, install with `pip install albumentations` (recommended)")
except Exception as e:
LOGGER.info(f"{prefix}{e}")
def classify_transforms(size=224):
"""Applies a series of transformations including center crop, ToTensor, and normalization for classification."""
assert isinstance(size, int), f"ERROR: classify_transforms size {size} must be integer, not (list, tuple)"
# T.Compose([T.ToTensor(), T.Resize(size), T.CenterCrop(size), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
class LetterBox:
"""Resizes and pads images to specified dimensions while maintaining aspect ratio for YOLOv5 preprocessing."""
def __init__(self, size=(640, 640), auto=False, stride=32):
"""Initializes a LetterBox object for YOLOv5 image preprocessing with optional auto sizing and stride
adjustment.
"""
super().__init__()
self.h, self.w = (size, size) if isinstance(size, int) else size
self.auto = auto # pass max size integer, automatically solve for short side using stride
self.stride = stride # used with auto
def __call__(self, im):
"""
Resizes and pads input image `im` (HWC format) to specified dimensions, maintaining aspect ratio.
im = np.array HWC
"""
imh, imw = im.shape[:2]
r = min(self.h / imh, self.w / imw) # ratio of new/old
h, w = round(imh * r), round(imw * r) # resized image
hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else self.h, self.w
top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1)
im_out = np.full((self.h, self.w, 3), 114, dtype=im.dtype)
im_out[top : top + h, left : left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
return im_out
class CenterCrop:
"""Applies center crop to an image, resizing it to the specified size while maintaining aspect ratio."""
def __init__(self, size=640):
"""Initializes CenterCrop for image preprocessing, accepting single int or tuple for size, defaults to 640."""
super().__init__()
self.h, self.w = (size, size) if isinstance(size, int) else size
def __call__(self, im):
"""
Applies center crop to the input image and resizes it to a specified size, maintaining aspect ratio.
im = np.array HWC
"""
imh, imw = im.shape[:2]
m = min(imh, imw) # min dimension
top, left = (imh - m) // 2, (imw - m) // 2
return cv2.resize(im[top : top + m, left : left + m], (self.w, self.h), interpolation=cv2.INTER_LINEAR)
class ToTensor:
"""Converts BGR np.array image from HWC to RGB CHW format, normalizes to [0, 1], and supports FP16 if half=True."""
def __init__(self, half=False):
"""Initializes ToTensor for YOLOv5 image preprocessing, with optional half precision (half=True for FP16)."""
super().__init__()
self.half = half
def __call__(self, im):
"""
Converts BGR np.array image from HWC to RGB CHW format, and normalizes to [0, 1], with support for FP16 if
`half=True`.
im = np.array HWC in BGR order
"""
im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous
im = torch.from_numpy(im) # to torch
im = im.half() if self.half else im.float() # uint8 to fp16/32
im /= 255.0 # 0-255 to 0.0-1.0
return im

View File

@ -1,175 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
"""AutoAnchor utils."""
import random
import numpy as np
import torch
import yaml
from tqdm import tqdm
from utils.yolov5.utils import TryExcept
from utils.yolov5.utils.general import LOGGER, TQDM_BAR_FORMAT, colorstr
PREFIX = colorstr("AutoAnchor: ")
def check_anchor_order(m):
"""Checks and corrects anchor order against stride in YOLOv5 Detect() module if necessary."""
a = m.anchors.prod(-1).mean(-1).view(-1) # mean anchor area per output layer
da = a[-1] - a[0] # delta a
ds = m.stride[-1] - m.stride[0] # delta s
if da and (da.sign() != ds.sign()): # same order
LOGGER.info(f"{PREFIX}Reversing anchor order")
m.anchors[:] = m.anchors.flip(0)
@TryExcept(f"{PREFIX}ERROR")
def check_anchors(dataset, model, thr=4.0, imgsz=640):
"""Evaluates anchor fit to dataset and adjusts if necessary, supporting customizable threshold and image size."""
m = model.module.model[-1] if hasattr(model, "module") else model.model[-1] # Detect()
shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale
wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh
def metric(k): # compute metric
"""Computes ratio metric, anchors above threshold, and best possible recall for YOLOv5 anchor evaluation."""
r = wh[:, None] / k[None]
x = torch.min(r, 1 / r).min(2)[0] # ratio metric
best = x.max(1)[0] # best_x
aat = (x > 1 / thr).float().sum(1).mean() # anchors above threshold
bpr = (best > 1 / thr).float().mean() # best possible recall
return bpr, aat
stride = m.stride.to(m.anchors.device).view(-1, 1, 1) # model strides
anchors = m.anchors.clone() * stride # current anchors
bpr, aat = metric(anchors.cpu().view(-1, 2))
s = f"\n{PREFIX}{aat:.2f} anchors/target, {bpr:.3f} Best Possible Recall (BPR). "
if bpr > 0.98: # threshold to recompute
LOGGER.info(f"{s}Current anchors are a good fit to dataset ✅")
else:
LOGGER.info(f"{s}Anchors are a poor fit to dataset ⚠️, attempting to improve...")
na = m.anchors.numel() // 2 # number of anchors
anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
new_bpr = metric(anchors)[0]
if new_bpr > bpr: # replace anchors
anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors)
m.anchors[:] = anchors.clone().view_as(m.anchors)
check_anchor_order(m) # must be in pixel-space (not grid-space)
m.anchors /= stride
s = f"{PREFIX}Done ✅ (optional: update model *.yaml to use these anchors in the future)"
else:
s = f"{PREFIX}Done ⚠️ (original anchors better than new anchors, proceeding with original anchors)"
LOGGER.info(s)
def kmean_anchors(dataset="./data/coco128.yaml", n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
"""
Creates kmeans-evolved anchors from training dataset.
Arguments:
dataset: path to data.yaml, or a loaded dataset
n: number of anchors
img_size: image size used for training
thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
gen: generations to evolve anchors using genetic algorithm
verbose: print all results
Return:
k: kmeans evolved anchors
Usage:
from utils.autoanchor import *; _ = kmean_anchors()
"""
from scipy.cluster.vq import kmeans
npr = np.random
thr = 1 / thr
def metric(k, wh): # compute metrics
"""Computes ratio metric, anchors above threshold, and best possible recall for YOLOv5 anchor evaluation."""
r = wh[:, None] / k[None]
x = torch.min(r, 1 / r).min(2)[0] # ratio metric
# x = wh_iou(wh, torch.tensor(k)) # iou metric
return x, x.max(1)[0] # x, best_x
def anchor_fitness(k): # mutation fitness
"""Evaluates fitness of YOLOv5 anchors by computing recall and ratio metrics for an anchor evolution process."""
_, best = metric(torch.tensor(k, dtype=torch.float32), wh)
return (best * (best > thr).float()).mean() # fitness
def print_results(k, verbose=True):
"""Sorts and logs kmeans-evolved anchor metrics and best possible recall values for YOLOv5 anchor evaluation."""
k = k[np.argsort(k.prod(1))] # sort small to large
x, best = metric(k, wh0)
bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr
s = (
f"{PREFIX}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr\n"
f"{PREFIX}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, "
f"past_thr={x[x > thr].mean():.3f}-mean: "
)
for x in k:
s += "%i,%i, " % (round(x[0]), round(x[1]))
if verbose:
LOGGER.info(s[:-2])
return k
if isinstance(dataset, str): # *.yaml file
with open(dataset, errors="ignore") as f:
data_dict = yaml.safe_load(f) # model dict
from utils.dataloaders import LoadImagesAndLabels
dataset = LoadImagesAndLabels(data_dict["train"], augment=True, rect=True)
# Get label wh
shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh
# Filter
i = (wh0 < 3.0).any(1).sum()
if i:
LOGGER.info(f"{PREFIX}WARNING ⚠️ Extremely small objects found: {i} of {len(wh0)} labels are <3 pixels in size")
wh = wh0[(wh0 >= 2.0).any(1)].astype(np.float32) # filter > 2 pixels
# wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1
# Kmeans init
try:
LOGGER.info(f"{PREFIX}Running kmeans for {n} anchors on {len(wh)} points...")
assert n <= len(wh) # apply overdetermined constraint
s = wh.std(0) # sigmas for whitening
k = kmeans(wh / s, n, iter=30)[0] * s # points
assert n == len(k) # kmeans may return fewer points than requested if wh is insufficient or too similar
except Exception:
LOGGER.warning(f"{PREFIX}WARNING ⚠️ switching strategies from kmeans to random init")
k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size # random init
wh, wh0 = (torch.tensor(x, dtype=torch.float32) for x in (wh, wh0))
k = print_results(k, verbose=False)
# Plot
# k, d = [None] * 20, [None] * 20
# for i in tqdm(range(1, 21)):
# k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance
# fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
# ax = ax.ravel()
# ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
# fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh
# ax[0].hist(wh[wh[:, 0]<100, 0],400)
# ax[1].hist(wh[wh[:, 1]<100, 1],400)
# fig.savefig('wh.png', dpi=200)
# Evolve
f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma
pbar = tqdm(range(gen), bar_format=TQDM_BAR_FORMAT) # progress bar
for _ in pbar:
v = np.ones(sh)
while (v == 1).all(): # mutate until a change occurs (prevent duplicates)
v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
kg = (k.copy() * v).clip(min=2.0)
fg = anchor_fitness(kg)
if fg > f:
f, k = fg, kg.copy()
pbar.desc = f"{PREFIX}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}"
if verbose:
print_results(k, verbose)
return print_results(k).astype(np.float32)

View File

@ -1,70 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
"""Auto-batch utils."""
from copy import deepcopy
import numpy as np
import torch
from utils.yolov5.utils.general import LOGGER, colorstr
from utils.yolov5.utils.torch_utils import profile
def check_train_batch_size(model, imgsz=640, amp=True):
"""Checks and computes optimal training batch size for YOLOv5 model, given image size and AMP setting."""
with torch.cuda.amp.autocast(amp):
return autobatch(deepcopy(model).train(), imgsz) # compute optimal batch size
def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
"""Estimates optimal YOLOv5 batch size using `fraction` of CUDA memory."""
# Usage:
# import torch
# from utils.autobatch import autobatch
# model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)
# print(autobatch(model))
# Check device
prefix = colorstr("AutoBatch: ")
LOGGER.info(f"{prefix}Computing optimal batch size for --imgsz {imgsz}")
device = next(model.parameters()).device # get model device
if device.type == "cpu":
LOGGER.info(f"{prefix}CUDA not detected, using default CPU batch-size {batch_size}")
return batch_size
if torch.backends.cudnn.benchmark:
LOGGER.info(f"{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}")
return batch_size
# Inspect CUDA memory
gb = 1 << 30 # bytes to GiB (1024 ** 3)
d = str(device).upper() # 'CUDA:0'
properties = torch.cuda.get_device_properties(device) # device properties
t = properties.total_memory / gb # GiB total
r = torch.cuda.memory_reserved(device) / gb # GiB reserved
a = torch.cuda.memory_allocated(device) / gb # GiB allocated
f = t - (r + a) # GiB free
LOGGER.info(f"{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free")
# Profile batch sizes
batch_sizes = [1, 2, 4, 8, 16]
try:
img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes]
results = profile(img, model, n=3, device=device)
except Exception as e:
LOGGER.warning(f"{prefix}{e}")
# Fit a solution
y = [x[2] for x in results if x] # memory [2]
p = np.polyfit(batch_sizes[: len(y)], y, deg=1) # first degree polynomial fit
b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size)
if None in results: # some sizes failed
i = results.index(None) # first fail index
if b >= batch_sizes[i]: # y intercept above failure point
b = batch_sizes[max(i - 1, 0)] # select prior safe point
if b < 1 or b > 1024: # b outside of safe range
b = batch_size
LOGGER.warning(f"{prefix}WARNING ⚠️ CUDA anomaly detected, recommend restart environment and retry command.")
fraction = (np.polyval(p, b) + r + a) / t # actual fraction predicted
LOGGER.info(f"{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅")
return b

View File

@ -1 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license

View File

@ -1,26 +0,0 @@
# AWS EC2 instance startup 'MIME' script https://aws.amazon.com/premiumsupport/knowledge-center/execute-user-data-ec2/
# This script will run on every instance restart, not only on first start
# --- DO NOT COPY ABOVE COMMENTS WHEN PASTING INTO USERDATA ---
Content-Type: multipart/mixed; boundary="//"
MIME-Version: 1.0
--//
Content-Type: text/cloud-config; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="cloud-config.txt"
#cloud-config
cloud_final_modules:
- [scripts-user, always]
--//
Content-Type: text/x-shellscript; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="userdata.txt"
#!/bin/bash
# --- paste contents of userdata.sh here ---
--//

View File

@ -1,42 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
# Resume all interrupted trainings in yolov5/ dir including DDP trainings
# Usage: $ python utils/aws/resume.py
import os
import sys
from pathlib import Path
import torch
import yaml
FILE = Path(__file__).resolve()
ROOT = FILE.parents[2] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
port = 0 # --master_port
path = Path("").resolve()
for last in path.rglob("*/**/last.pt"):
ckpt = torch.load(last)
if ckpt["optimizer"] is None:
continue
# Load opt.yaml
with open(last.parent.parent / "opt.yaml", errors="ignore") as f:
opt = yaml.safe_load(f)
# Get device count
d = opt["device"].split(",") # devices
nd = len(d) # number of devices
ddp = nd > 1 or (nd == 0 and torch.cuda.device_count() > 1) # distributed data parallel
if ddp: # multi-GPU
port += 1
cmd = f"python -m torch.distributed.run --nproc_per_node {nd} --master_port {port} train.py --resume {last}"
else: # single-GPU
cmd = f"python train.py --resume {last}"
cmd += " > /dev/null 2>&1 &" # redirect output to dev/null and run in daemon thread
print(cmd)
os.system(cmd)

View File

@ -1,27 +0,0 @@
#!/bin/bash
# AWS EC2 instance startup script https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html
# This script will run only once on first instance start (for a re-start script see mime.sh)
# /home/ubuntu (ubuntu) or /home/ec2-user (amazon-linux) is working dir
# Use >300 GB SSD
cd home/ubuntu
if [ ! -d yolov5 ]; then
echo "Running first-time script." # install dependencies, download COCO, pull Docker
git clone https://github.com/ultralytics/yolov5 -b master && sudo chmod -R 777 yolov5
cd yolov5
bash data/scripts/get_coco.sh && echo "COCO done." &
sudo docker pull ultralytics/yolov5:latest && echo "Docker done." &
python -m pip install --upgrade pip && pip install -r requirements.txt && python detect.py && echo "Requirements done." &
wait && echo "All tasks done." # finish background tasks
else
echo "Running re-start script." # resume interrupted runs
i=0
list=$(sudo docker ps -qa) # container list i.e. $'one\ntwo\nthree\nfour'
while IFS= read -r id; do
((i++))
echo "restarting container $i: $id"
sudo docker start $id
# sudo docker exec -it $id python train.py --resume # single-GPU
sudo docker exec -d $id python utils/aws/resume.py # multi-scenario
done <<<"$list"
fi

View File

@ -1,72 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
"""Callback utils."""
import threading
class Callbacks:
"""Handles all registered callbacks for YOLOv5 Hooks."""
def __init__(self):
"""Initializes a Callbacks object to manage registered YOLOv5 training event hooks."""
self._callbacks = {
"on_pretrain_routine_start": [],
"on_pretrain_routine_end": [],
"on_train_start": [],
"on_train_epoch_start": [],
"on_train_batch_start": [],
"optimizer_step": [],
"on_before_zero_grad": [],
"on_train_batch_end": [],
"on_train_epoch_end": [],
"on_val_start": [],
"on_val_batch_start": [],
"on_val_image_end": [],
"on_val_batch_end": [],
"on_val_end": [],
"on_fit_epoch_end": [], # fit = train + val
"on_model_save": [],
"on_train_end": [],
"on_params_update": [],
"teardown": [],
}
self.stop_training = False # set True to interrupt training
def register_action(self, hook, name="", callback=None):
"""
Register a new action to a callback hook.
Args:
hook: The callback hook name to register the action to
name: The name of the action for later reference
callback: The callback to fire
"""
assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
assert callable(callback), f"callback '{callback}' is not callable"
self._callbacks[hook].append({"name": name, "callback": callback})
def get_registered_actions(self, hook=None):
"""
Returns all the registered actions by callback hook.
Args:
hook: The name of the hook to check, defaults to all
"""
return self._callbacks[hook] if hook else self._callbacks
def run(self, hook, *args, thread=False, **kwargs):
"""
Loop through the registered actions and fire all callbacks on main thread.
Args:
hook: The name of the hook to check, defaults to all
args: Arguments to receive from YOLOv5
thread: (boolean) Run callbacks in daemon thread
kwargs: Keyword Arguments to receive from YOLOv5
"""
assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
for logger in self._callbacks[hook]:
if thread:
threading.Thread(target=logger["callback"], args=args, kwargs=kwargs, daemon=True).start()
else:
logger["callback"](*args, **kwargs)

File diff suppressed because it is too large Load Diff

View File

@ -1,73 +0,0 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Builds ultralytics/yolov5:latest image on DockerHub https://hub.docker.com/r/ultralytics/yolov5
# Image is CUDA-optimized for YOLOv5 single/multi-GPU training and inference
# Start FROM PyTorch image https://hub.docker.com/r/pytorch/pytorch
FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime
# Downloads to user config dir
ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/
# Install linux packages
ENV DEBIAN_FRONTEND noninteractive
RUN apt update
RUN TZ=Etc/UTC apt install -y tzdata
RUN apt install --no-install-recommends -y gcc git zip curl htop libgl1 libglib2.0-0 libpython3-dev gnupg
# RUN alias python=python3
# Security updates
# https://security.snyk.io/vuln/SNYK-UBUNTU1804-OPENSSL-3314796
RUN apt upgrade --no-install-recommends -y openssl
# Create working directory
RUN rm -rf /usr/src/app && mkdir -p /usr/src/app
WORKDIR /usr/src/app
# Copy contents
COPY . /usr/src/app
# Install pip packages
COPY requirements.txt .
RUN python3 -m pip install --upgrade pip wheel
RUN pip install --no-cache -r requirements.txt albumentations comet gsutil notebook \
coremltools onnx onnx-simplifier onnxruntime 'openvino-dev>=2023.0'
# tensorflow tensorflowjs \
# Set environment variables
ENV OMP_NUM_THREADS=1
# Cleanup
ENV DEBIAN_FRONTEND teletype
# Usage Examples -------------------------------------------------------------------------------------------------------
# Build and Push
# t=ultralytics/yolov5:latest && sudo docker build -f utils/docker/Dockerfile -t $t . && sudo docker push $t
# Pull and Run
# t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all $t
# Pull and Run with local directory access
# t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all -v "$(pwd)"/datasets:/usr/src/datasets $t
# Kill all
# sudo docker kill $(sudo docker ps -q)
# Kill all image-based
# sudo docker kill $(sudo docker ps -qa --filter ancestor=ultralytics/yolov5:latest)
# DockerHub tag update
# t=ultralytics/yolov5:latest tnew=ultralytics/yolov5:v6.2 && sudo docker pull $t && sudo docker tag $t $tnew && sudo docker push $tnew
# Clean up
# sudo docker system prune -a --volumes
# Update Ubuntu drivers
# https://www.maketecheasier.com/install-nvidia-drivers-ubuntu/
# DDP test
# python -m torch.distributed.run --nproc_per_node 2 --master_port 1 train.py --epochs 3
# GCP VM from Image
# docker.io/ultralytics/yolov5:latest

View File

@ -1,40 +0,0 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Builds ultralytics/yolov5:latest-arm64 image on DockerHub https://hub.docker.com/r/ultralytics/yolov5
# Image is aarch64-compatible for Apple M1 and other ARM architectures i.e. Jetson Nano and Raspberry Pi
# Start FROM Ubuntu image https://hub.docker.com/_/ubuntu
FROM arm64v8/ubuntu:22.10
# Downloads to user config dir
ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/
# Install linux packages
ENV DEBIAN_FRONTEND noninteractive
RUN apt update
RUN TZ=Etc/UTC apt install -y tzdata
RUN apt install --no-install-recommends -y python3-pip git zip curl htop gcc libgl1 libglib2.0-0 libpython3-dev
# RUN alias python=python3
# Install pip packages
COPY requirements.txt .
RUN python3 -m pip install --upgrade pip wheel
RUN pip install --no-cache -r requirements.txt albumentations gsutil notebook \
coremltools onnx onnxruntime
# tensorflow-aarch64 tensorflowjs \
# Create working directory
RUN mkdir -p /usr/src/app
WORKDIR /usr/src/app
# Copy contents
COPY . /usr/src/app
ENV DEBIAN_FRONTEND teletype
# Usage Examples -------------------------------------------------------------------------------------------------------
# Build and Push
# t=ultralytics/yolov5:latest-arm64 && sudo docker build --platform linux/arm64 -f utils/docker/Dockerfile-arm64 -t $t . && sudo docker push $t
# Pull and Run
# t=ultralytics/yolov5:latest-arm64 && sudo docker pull $t && sudo docker run -it --ipc=host -v "$(pwd)"/datasets:/usr/src/datasets $t

View File

@ -1,42 +0,0 @@
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Builds ultralytics/yolov5:latest-cpu image on DockerHub https://hub.docker.com/r/ultralytics/yolov5
# Image is CPU-optimized for ONNX, OpenVINO and PyTorch YOLOv5 deployments
# Start FROM Ubuntu image https://hub.docker.com/_/ubuntu
FROM ubuntu:23.10
# Downloads to user config dir
ADD https://ultralytics.com/assets/Arial.ttf https://ultralytics.com/assets/Arial.Unicode.ttf /root/.config/Ultralytics/
# Install linux packages
# g++ required to build 'tflite_support' and 'lap' packages, libusb-1.0-0 required for 'tflite_support' package
RUN apt update \
&& apt install --no-install-recommends -y python3-pip git zip curl htop libgl1 libglib2.0-0 libpython3-dev gnupg g++ libusb-1.0-0
# RUN alias python=python3
# Remove python3.11/EXTERNALLY-MANAGED or use 'pip install --break-system-packages' avoid 'externally-managed-environment' Ubuntu nightly error
RUN rm -rf /usr/lib/python3.11/EXTERNALLY-MANAGED
# Install pip packages
COPY requirements.txt .
RUN python3 -m pip install --upgrade pip wheel
RUN pip install --no-cache -r requirements.txt albumentations gsutil notebook \
coremltools onnx onnx-simplifier onnxruntime 'openvino-dev>=2023.0' \
# tensorflow tensorflowjs \
--extra-index-url https://download.pytorch.org/whl/cpu
# Create working directory
RUN mkdir -p /usr/src/app
WORKDIR /usr/src/app
# Copy contents
COPY . /usr/src/app
# Usage Examples -------------------------------------------------------------------------------------------------------
# Build and Push
# t=ultralytics/yolov5:latest-cpu && sudo docker build -f utils/docker/Dockerfile-cpu -t $t . && sudo docker push $t
# Pull and Run
# t=ultralytics/yolov5:latest-cpu && sudo docker pull $t && sudo docker run -it --ipc=host -v "$(pwd)"/datasets:/usr/src/datasets $t

View File

@ -1,136 +0,0 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
"""Download utils."""
import logging
import subprocess
import urllib
from pathlib import Path
import requests
import torch
def is_url(url, check=True):
"""Determines if a string is a URL and optionally checks its existence online, returning a boolean."""
try:
url = str(url)
result = urllib.parse.urlparse(url)
assert all([result.scheme, result.netloc]) # check if is url
return (urllib.request.urlopen(url).getcode() == 200) if check else True # check if exists online
except (AssertionError, urllib.request.HTTPError):
return False
def gsutil_getsize(url=""):
"""
Returns the size in bytes of a file at a Google Cloud Storage URL using `gsutil du`.
Returns 0 if the command fails or output is empty.
"""
output = subprocess.check_output(["gsutil", "du", url], shell=True, encoding="utf-8")
return int(output.split()[0]) if output else 0
def url_getsize(url="https://ultralytics.com/images/bus.jpg"):
"""Returns the size in bytes of a downloadable file at a given URL; defaults to -1 if not found."""
response = requests.head(url, allow_redirects=True)
return int(response.headers.get("content-length", -1))
def curl_download(url, filename, *, silent: bool = False) -> bool:
"""Download a file from a url to a filename using curl."""
silent_option = "sS" if silent else "" # silent
proc = subprocess.run(
[
"curl",
"-#",
f"-{silent_option}L",
url,
"--output",
filename,
"--retry",
"9",
"-C",
"-",
]
)
return proc.returncode == 0
def safe_download(file, url, url2=None, min_bytes=1e0, error_msg=""):
"""
Downloads a file from a URL (or alternate URL) to a specified path if file is above a minimum size.
Removes incomplete downloads.
"""
from utils.yolov5.utils.general import LOGGER
file = Path(file)
assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
try: # url1
LOGGER.info(f"Downloading {url} to {file}...")
torch.hub.download_url_to_file(url, str(file), progress=LOGGER.level <= logging.INFO)
assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check
except Exception as e: # url2
if file.exists():
file.unlink() # remove partial downloads
LOGGER.info(f"ERROR: {e}\nRe-attempting {url2 or url} to {file}...")
# curl download, retry and resume on fail
curl_download(url2 or url, file)
finally:
if not file.exists() or file.stat().st_size < min_bytes: # check
if file.exists():
file.unlink() # remove partial downloads
LOGGER.info(f"ERROR: {assert_msg}\n{error_msg}")
LOGGER.info("")
def attempt_download(file, repo="ultralytics/yolov5", release="v7.0"):
"""Downloads a file from GitHub release assets or via direct URL if not found locally, supporting backup
versions.
"""
from utils.yolov5.utils.general import LOGGER
def github_assets(repository, version="latest"):
"""Fetches GitHub repository release tag and asset names using the GitHub API."""
if version != "latest":
version = f"tags/{version}" # i.e. tags/v7.0
response = requests.get(f"https://api.github.com/repos/{repository}/releases/{version}").json() # github api
return response["tag_name"], [x["name"] for x in response["assets"]] # tag, assets
file = Path(str(file).strip().replace("'", ""))
if not file.exists():
# URL specified
name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc.
if str(file).startswith(("http:/", "https:/")): # download
url = str(file).replace(":/", "://") # Pathlib turns :// -> :/
file = name.split("?")[0] # parse authentication https://url.com/file.txt?auth...
if Path(file).is_file():
LOGGER.info(f"Found {url} locally at {file}") # file already exists
else:
safe_download(file=file, url=url, min_bytes=1e5)
return file
# GitHub assets
assets = [f"yolov5{size}{suffix}.pt" for size in "nsmlx" for suffix in ("", "6", "-cls", "-seg")] # default
try:
tag, assets = github_assets(repo, release)
except Exception:
try:
tag, assets = github_assets(repo) # latest release
except Exception:
try:
tag = subprocess.check_output("git tag", shell=True, stderr=subprocess.STDOUT).decode().split()[-1]
except Exception:
tag = release
if name in assets:
file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required)
safe_download(
file,
url=f"https://github.com/{repo}/releases/download/{tag}/{name}",
min_bytes=1e5,
error_msg=f"{file} missing, try downloading from https://github.com/{repo}/releases/{tag}",
)
return str(file)

View File

@ -1,70 +0,0 @@
# Flask REST API
[REST](https://en.wikipedia.org/wiki/Representational_state_transfer) [API](https://en.wikipedia.org/wiki/API)s are commonly used to expose Machine Learning (ML) models to other services. This folder contains an example REST API created using Flask to expose the YOLOv5s model from [PyTorch Hub](https://pytorch.org/hub/ultralytics_yolov5/).
## Requirements
[Flask](https://palletsprojects.com/projects/flask/) is required. Install with:
```shell
$ pip install Flask
```
## Run
After Flask installation run:
```shell
$ python3 restapi.py --port 5000
```
Then use [curl](https://curl.se/) to perform a request:
```shell
$ curl -X POST -F image=@zidane.jpg 'http://localhost:5000/v1/object-detection/yolov5s'
```
The model inference results are returned as a JSON response:
```json
[
{
"class": 0,
"confidence": 0.8900438547,
"height": 0.9318675399,
"name": "person",
"width": 0.3264600933,
"xcenter": 0.7438579798,
"ycenter": 0.5207948685
},
{
"class": 0,
"confidence": 0.8440024257,
"height": 0.7155083418,
"name": "person",
"width": 0.6546785235,
"xcenter": 0.427829951,
"ycenter": 0.6334488392
},
{
"class": 27,
"confidence": 0.3771208823,
"height": 0.3902671337,
"name": "tie",
"width": 0.0696444362,
"xcenter": 0.3675483763,
"ycenter": 0.7991207838
},
{
"class": 27,
"confidence": 0.3527112305,
"height": 0.1540903747,
"name": "tie",
"width": 0.0336618312,
"xcenter": 0.7814827561,
"ycenter": 0.5065554976
}
]
```
An example python script to perform inference using [requests](https://docs.python-requests.org/en/master/) is given in `example_request.py`

Some files were not shown because too many files have changed in this diff Show More