完成训练模块的转移

This commit is contained in:
2025-04-17 11:03:05 +08:00
parent 4439687870
commit 74e8f0d415
188 changed files with 32931 additions and 70 deletions

307
yolov5/segment/predict.py Normal file
View File

@ -0,0 +1,307 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
"""
Run YOLOv5 segmentation inference on images, videos, directories, streams, etc.
Usage - sources:
$ python segment/predict.py --weights yolov5s-seg.pt --source 0 # webcam
img.jpg # image
vid.mp4 # video
screen # screenshot
path/ # directory
list.txt # list of images
list.streams # list of streams
'path/*.jpg' # glob
'https://youtu.be/LNwODJXcvt4' # YouTube
'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream
Usage - formats:
$ python segment/predict.py --weights yolov5s-seg.pt # PyTorch
yolov5s-seg.torchscript # TorchScript
yolov5s-seg.onnx # ONNX Runtime or OpenCV DNN with --dnn
yolov5s-seg_openvino_model # OpenVINO
yolov5s-seg.engine # TensorRT
yolov5s-seg.mlmodel # CoreML (macOS-only)
yolov5s-seg_saved_model # TensorFlow SavedModel
yolov5s-seg.pb # TensorFlow GraphDef
yolov5s-seg.tflite # TensorFlow Lite
yolov5s-seg_edgetpu.tflite # TensorFlow Edge TPU
yolov5s-seg_paddle_model # PaddlePaddle
"""
import argparse
import os
import platform
import sys
from pathlib import Path
import torch
FILE = Path(__file__).resolve()
ROOT = FILE.parents[1] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
from ultralytics.utils.plotting import Annotator, colors, save_one_box
from models.common import DetectMultiBackend
from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams
from utils.general import (
LOGGER,
Profile,
check_file,
check_img_size,
check_imshow,
check_requirements,
colorstr,
cv2,
increment_path,
non_max_suppression,
print_args,
scale_boxes,
scale_segments,
strip_optimizer,
)
from utils.segment.general import masks2segments, process_mask, process_mask_native
from utils.torch_utils import select_device, smart_inference_mode
@smart_inference_mode()
def run(
weights=ROOT / "yolov5s-seg.pt", # model.pt path(s)
source=ROOT / "data/images", # file/dir/URL/glob/screen/0(webcam)
data=ROOT / "data/coco128.yaml", # dataset.yaml path
imgsz=(640, 640), # inference size (height, width)
conf_thres=0.25, # confidence threshold
iou_thres=0.45, # NMS IOU threshold
max_det=1000, # maximum detections per image
device="", # cuda device, i.e. 0 or 0,1,2,3 or cpu
view_img=False, # show results
save_txt=False, # save results to *.txt
save_conf=False, # save confidences in --save-txt labels
save_crop=False, # save cropped prediction boxes
nosave=False, # do not save images/videos
classes=None, # filter by class: --class 0, or --class 0 2 3
agnostic_nms=False, # class-agnostic NMS
augment=False, # augmented inference
visualize=False, # visualize features
update=False, # update all models
project=ROOT / "runs/predict-seg", # save results to project/name
name="exp", # save results to project/name
exist_ok=False, # existing project/name ok, do not increment
line_thickness=3, # bounding box thickness (pixels)
hide_labels=False, # hide labels
hide_conf=False, # hide confidences
half=False, # use FP16 half-precision inference
dnn=False, # use OpenCV DNN for ONNX inference
vid_stride=1, # video frame-rate stride
retina_masks=False,
):
"""Run YOLOv5 segmentation inference on diverse sources including images, videos, directories, and streams."""
source = str(source)
save_img = not nosave and not source.endswith(".txt") # save inference images
is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
is_url = source.lower().startswith(("rtsp://", "rtmp://", "http://", "https://"))
webcam = source.isnumeric() or source.endswith(".streams") or (is_url and not is_file)
screenshot = source.lower().startswith("screen")
if is_url and is_file:
source = check_file(source) # download
# Directories
save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
(save_dir / "labels" if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Load model
device = select_device(device)
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
stride, names, pt = model.stride, model.names, model.pt
imgsz = check_img_size(imgsz, s=stride) # check image size
# Dataloader
bs = 1 # batch_size
if webcam:
view_img = check_imshow(warn=True)
dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
bs = len(dataset)
elif screenshot:
dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)
else:
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
vid_path, vid_writer = [None] * bs, [None] * bs
# Run inference
model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup
seen, windows, dt = 0, [], (Profile(device=device), Profile(device=device), Profile(device=device))
for path, im, im0s, vid_cap, s in dataset:
with dt[0]:
im = torch.from_numpy(im).to(model.device)
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
# Inference
with dt[1]:
visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
pred, proto = model(im, augment=augment, visualize=visualize)[:2]
# NMS
with dt[2]:
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det, nm=32)
# Second-stage classifier (optional)
# pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
# Process predictions
for i, det in enumerate(pred): # per image
seen += 1
if webcam: # batch_size >= 1
p, im0, frame = path[i], im0s[i].copy(), dataset.count
s += f"{i}: "
else:
p, im0, frame = path, im0s.copy(), getattr(dataset, "frame", 0)
p = Path(p) # to Path
save_path = str(save_dir / p.name) # im.jpg
txt_path = str(save_dir / "labels" / p.stem) + ("" if dataset.mode == "image" else f"_{frame}") # im.txt
s += "{:g}x{:g} ".format(*im.shape[2:]) # print string
imc = im0.copy() if save_crop else im0 # for save_crop
annotator = Annotator(im0, line_width=line_thickness, example=str(names))
if len(det):
if retina_masks:
# scale bbox first the crop masks
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round() # rescale boxes to im0 size
masks = process_mask_native(proto[i], det[:, 6:], det[:, :4], im0.shape[:2]) # HWC
else:
masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True) # HWC
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round() # rescale boxes to im0 size
# Segments
if save_txt:
segments = [
scale_segments(im0.shape if retina_masks else im.shape[2:], x, im0.shape, normalize=True)
for x in reversed(masks2segments(masks))
]
# Print results
for c in det[:, 5].unique():
n = (det[:, 5] == c).sum() # detections per class
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
# Mask plotting
annotator.masks(
masks,
colors=[colors(x, True) for x in det[:, 5]],
im_gpu=torch.as_tensor(im0, dtype=torch.float16).to(device).permute(2, 0, 1).flip(0).contiguous()
/ 255
if retina_masks
else im[i],
)
# Write results
for j, (*xyxy, conf, cls) in enumerate(reversed(det[:, :6])):
if save_txt: # Write to file
seg = segments[j].reshape(-1) # (n,2) to (n*2)
line = (cls, *seg, conf) if save_conf else (cls, *seg) # label format
with open(f"{txt_path}.txt", "a") as f:
f.write(("%g " * len(line)).rstrip() % line + "\n")
if save_img or save_crop or view_img: # Add bbox to image
c = int(cls) # integer class
label = None if hide_labels else (names[c] if hide_conf else f"{names[c]} {conf:.2f}")
annotator.box_label(xyxy, label, color=colors(c, True))
# annotator.draw.polygon(segments[j], outline=colors(c, True), width=3)
if save_crop:
save_one_box(xyxy, imc, file=save_dir / "crops" / names[c] / f"{p.stem}.jpg", BGR=True)
# Stream results
im0 = annotator.result()
if view_img:
if platform.system() == "Linux" and p not in windows:
windows.append(p)
cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux)
cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
cv2.imshow(str(p), im0)
if cv2.waitKey(1) == ord("q"): # 1 millisecond
exit()
# Save results (image with detections)
if save_img:
if dataset.mode == "image":
cv2.imwrite(save_path, im0)
else: # 'video' or 'stream'
if vid_path[i] != save_path: # new video
vid_path[i] = save_path
if isinstance(vid_writer[i], cv2.VideoWriter):
vid_writer[i].release() # release previous video writer
if vid_cap: # video
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else: # stream
fps, w, h = 30, im0.shape[1], im0.shape[0]
save_path = str(Path(save_path).with_suffix(".mp4")) # force *.mp4 suffix on results videos
vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
vid_writer[i].write(im0)
# Print time (inference-only)
LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1e3:.1f}ms")
# Print results
t = tuple(x.t / seen * 1e3 for x in dt) # speeds per image
LOGGER.info(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}" % t)
if save_txt or save_img:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ""
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
if update:
strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning)
def parse_opt():
"""Parses command-line options for YOLOv5 inference including model paths, data sources, inference settings, and
output preferences.
"""
parser = argparse.ArgumentParser()
parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s-seg.pt", help="model path(s)")
parser.add_argument("--source", type=str, default=ROOT / "data/images", help="file/dir/URL/glob/screen/0(webcam)")
parser.add_argument("--data", type=str, default=ROOT / "data/coco128.yaml", help="(optional) dataset.yaml path")
parser.add_argument("--imgsz", "--img", "--img-size", nargs="+", type=int, default=[640], help="inference size h,w")
parser.add_argument("--conf-thres", type=float, default=0.25, help="confidence threshold")
parser.add_argument("--iou-thres", type=float, default=0.45, help="NMS IoU threshold")
parser.add_argument("--max-det", type=int, default=1000, help="maximum detections per image")
parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
parser.add_argument("--view-img", action="store_true", help="show results")
parser.add_argument("--save-txt", action="store_true", help="save results to *.txt")
parser.add_argument("--save-conf", action="store_true", help="save confidences in --save-txt labels")
parser.add_argument("--save-crop", action="store_true", help="save cropped prediction boxes")
parser.add_argument("--nosave", action="store_true", help="do not save images/videos")
parser.add_argument("--classes", nargs="+", type=int, help="filter by class: --classes 0, or --classes 0 2 3")
parser.add_argument("--agnostic-nms", action="store_true", help="class-agnostic NMS")
parser.add_argument("--augment", action="store_true", help="augmented inference")
parser.add_argument("--visualize", action="store_true", help="visualize features")
parser.add_argument("--update", action="store_true", help="update all models")
parser.add_argument("--project", default=ROOT / "runs/predict-seg", help="save results to project/name")
parser.add_argument("--name", default="exp", help="save results to project/name")
parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
parser.add_argument("--line-thickness", default=3, type=int, help="bounding box thickness (pixels)")
parser.add_argument("--hide-labels", default=False, action="store_true", help="hide labels")
parser.add_argument("--hide-conf", default=False, action="store_true", help="hide confidences")
parser.add_argument("--half", action="store_true", help="use FP16 half-precision inference")
parser.add_argument("--dnn", action="store_true", help="use OpenCV DNN for ONNX inference")
parser.add_argument("--vid-stride", type=int, default=1, help="video frame-rate stride")
parser.add_argument("--retina-masks", action="store_true", help="whether to plot masks in native resolution")
opt = parser.parse_args()
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
print_args(vars(opt))
return opt
def main(opt):
"""Executes YOLOv5 model inference with given options, checking for requirements before launching."""
check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
run(**vars(opt))
if __name__ == "__main__":
opt = parse_opt()
main(opt)

764
yolov5/segment/train.py Normal file
View File

@ -0,0 +1,764 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
"""
Train a YOLOv5 segment model on a segment dataset Models and datasets download automatically from the latest YOLOv5
release.
Usage - Single-GPU training:
$ python segment/train.py --data coco128-seg.yaml --weights yolov5s-seg.pt --img 640 # from pretrained (recommended)
$ python segment/train.py --data coco128-seg.yaml --weights '' --cfg yolov5s-seg.yaml --img 640 # from scratch
Usage - Multi-GPU DDP training:
$ python -m torch.distributed.run --nproc_per_node 4 --master_port 1 segment/train.py --data coco128-seg.yaml --weights yolov5s-seg.pt --img 640 --device 0,1,2,3
Models: https://github.com/ultralytics/yolov5/tree/master/models
Datasets: https://github.com/ultralytics/yolov5/tree/master/data
Tutorial: https://docs.ultralytics.com/yolov5/tutorials/train_custom_data
"""
import argparse
import math
import os
import random
import subprocess
import sys
import time
from copy import deepcopy
from datetime import datetime
from pathlib import Path
import numpy as np
import torch
import torch.distributed as dist
import torch.nn as nn
import yaml
from torch.optim import lr_scheduler
from tqdm import tqdm
FILE = Path(__file__).resolve()
ROOT = FILE.parents[1] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
import segment.val as validate # for end-of-epoch mAP
from models.experimental import attempt_load
from models.yolo import SegmentationModel
from utils.autoanchor import check_anchors
from utils.autobatch import check_train_batch_size
from utils.callbacks import Callbacks
from utils.downloads import attempt_download, is_url
from utils.general import (
LOGGER,
TQDM_BAR_FORMAT,
check_amp,
check_dataset,
check_file,
check_git_info,
check_git_status,
check_img_size,
check_requirements,
check_suffix,
check_yaml,
colorstr,
get_latest_run,
increment_path,
init_seeds,
intersect_dicts,
labels_to_class_weights,
labels_to_image_weights,
one_cycle,
print_args,
print_mutation,
strip_optimizer,
yaml_save,
)
from utils.loggers import GenericLogger
from utils.plots import plot_evolve, plot_labels
from utils.segment.dataloaders import create_dataloader
from utils.segment.loss import ComputeLoss
from utils.segment.metrics import KEYS, fitness
from utils.segment.plots import plot_images_and_masks, plot_results_with_masks
from utils.torch_utils import (
EarlyStopping,
ModelEMA,
de_parallel,
select_device,
smart_DDP,
smart_optimizer,
smart_resume,
torch_distributed_zero_first,
)
LOCAL_RANK = int(os.getenv("LOCAL_RANK", -1)) # https://pytorch.org/docs/stable/elastic/run.html
RANK = int(os.getenv("RANK", -1))
WORLD_SIZE = int(os.getenv("WORLD_SIZE", 1))
GIT_INFO = check_git_info()
def train(hyp, opt, device, callbacks):
"""
Trains the YOLOv5 model on a dataset, managing hyperparameters, model optimization, logging, and validation.
`hyp` is path/to/hyp.yaml or hyp dictionary.
"""
(
save_dir,
epochs,
batch_size,
weights,
single_cls,
evolve,
data,
cfg,
resume,
noval,
nosave,
workers,
freeze,
mask_ratio,
) = (
Path(opt.save_dir),
opt.epochs,
opt.batch_size,
opt.weights,
opt.single_cls,
opt.evolve,
opt.data,
opt.cfg,
opt.resume,
opt.noval,
opt.nosave,
opt.workers,
opt.freeze,
opt.mask_ratio,
)
# callbacks.run('on_pretrain_routine_start')
# Directories
w = save_dir / "weights" # weights dir
(w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir
last, best = w / "last.pt", w / "best.pt"
# Hyperparameters
if isinstance(hyp, str):
with open(hyp, errors="ignore") as f:
hyp = yaml.safe_load(f) # load hyps dict
LOGGER.info(colorstr("hyperparameters: ") + ", ".join(f"{k}={v}" for k, v in hyp.items()))
opt.hyp = hyp.copy() # for saving hyps to checkpoints
# Save run settings
if not evolve:
yaml_save(save_dir / "hyp.yaml", hyp)
yaml_save(save_dir / "opt.yaml", vars(opt))
# Loggers
data_dict = None
if RANK in {-1, 0}:
logger = GenericLogger(opt=opt, console_logger=LOGGER)
# Config
plots = not evolve and not opt.noplots # create plots
overlap = not opt.no_overlap
cuda = device.type != "cpu"
init_seeds(opt.seed + 1 + RANK, deterministic=True)
with torch_distributed_zero_first(LOCAL_RANK):
data_dict = data_dict or check_dataset(data) # check if None
train_path, val_path = data_dict["train"], data_dict["val"]
nc = 1 if single_cls else int(data_dict["nc"]) # number of classes
names = {0: "item"} if single_cls and len(data_dict["names"]) != 1 else data_dict["names"] # class names
is_coco = isinstance(val_path, str) and val_path.endswith("coco/val2017.txt") # COCO dataset
# Model
check_suffix(weights, ".pt") # check weights
pretrained = weights.endswith(".pt")
if pretrained:
with torch_distributed_zero_first(LOCAL_RANK):
weights = attempt_download(weights) # download if not found locally
ckpt = torch.load(weights, map_location="cpu") # load checkpoint to CPU to avoid CUDA memory leak
model = SegmentationModel(cfg or ckpt["model"].yaml, ch=3, nc=nc, anchors=hyp.get("anchors")).to(device)
exclude = ["anchor"] if (cfg or hyp.get("anchors")) and not resume else [] # exclude keys
csd = ckpt["model"].float().state_dict() # checkpoint state_dict as FP32
csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect
model.load_state_dict(csd, strict=False) # load
LOGGER.info(f"Transferred {len(csd)}/{len(model.state_dict())} items from {weights}") # report
else:
model = SegmentationModel(cfg, ch=3, nc=nc, anchors=hyp.get("anchors")).to(device) # create
amp = check_amp(model) # check AMP
# Freeze
freeze = [f"model.{x}." for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze
for k, v in model.named_parameters():
v.requires_grad = True # train all layers
# v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0 (commented for erratic training results)
if any(x in k for x in freeze):
LOGGER.info(f"freezing {k}")
v.requires_grad = False
# Image size
gs = max(int(model.stride.max()), 32) # grid size (max stride)
imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple
# Batch size
if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size
batch_size = check_train_batch_size(model, imgsz, amp)
logger.update_params({"batch_size": batch_size})
# loggers.on_params_update({"batch_size": batch_size})
# Optimizer
nbs = 64 # nominal batch size
accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing
hyp["weight_decay"] *= batch_size * accumulate / nbs # scale weight_decay
optimizer = smart_optimizer(model, opt.optimizer, hyp["lr0"], hyp["momentum"], hyp["weight_decay"])
# Scheduler
if opt.cos_lr:
lf = one_cycle(1, hyp["lrf"], epochs) # cosine 1->hyp['lrf']
else:
def lf(x):
"""Linear learning rate scheduler decreasing from 1 to hyp['lrf'] over 'epochs'."""
return (1 - x / epochs) * (1.0 - hyp["lrf"]) + hyp["lrf"] # linear
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs)
# EMA
ema = ModelEMA(model) if RANK in {-1, 0} else None
# Resume
best_fitness, start_epoch = 0.0, 0
if pretrained:
if resume:
best_fitness, start_epoch, epochs = smart_resume(ckpt, optimizer, ema, weights, epochs, resume)
del ckpt, csd
# DP mode
if cuda and RANK == -1 and torch.cuda.device_count() > 1:
LOGGER.warning(
"WARNING ⚠️ DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n"
"See Multi-GPU Tutorial at https://docs.ultralytics.com/yolov5/tutorials/multi_gpu_training to get started."
)
model = torch.nn.DataParallel(model)
# SyncBatchNorm
if opt.sync_bn and cuda and RANK != -1:
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
LOGGER.info("Using SyncBatchNorm()")
# Trainloader
train_loader, dataset = create_dataloader(
train_path,
imgsz,
batch_size // WORLD_SIZE,
gs,
single_cls,
hyp=hyp,
augment=True,
cache=None if opt.cache == "val" else opt.cache,
rect=opt.rect,
rank=LOCAL_RANK,
workers=workers,
image_weights=opt.image_weights,
quad=opt.quad,
prefix=colorstr("train: "),
shuffle=True,
mask_downsample_ratio=mask_ratio,
overlap_mask=overlap,
)
labels = np.concatenate(dataset.labels, 0)
mlc = int(labels[:, 0].max()) # max label class
assert mlc < nc, f"Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}"
# Process 0
if RANK in {-1, 0}:
val_loader = create_dataloader(
val_path,
imgsz,
batch_size // WORLD_SIZE * 2,
gs,
single_cls,
hyp=hyp,
cache=None if noval else opt.cache,
rect=True,
rank=-1,
workers=workers * 2,
pad=0.5,
mask_downsample_ratio=mask_ratio,
overlap_mask=overlap,
prefix=colorstr("val: "),
)[0]
if not resume:
if not opt.noautoanchor:
check_anchors(dataset, model=model, thr=hyp["anchor_t"], imgsz=imgsz) # run AutoAnchor
model.half().float() # pre-reduce anchor precision
if plots:
plot_labels(labels, names, save_dir)
# callbacks.run('on_pretrain_routine_end', labels, names)
# DDP mode
if cuda and RANK != -1:
model = smart_DDP(model)
# Model attributes
nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps)
hyp["box"] *= 3 / nl # scale to layers
hyp["cls"] *= nc / 80 * 3 / nl # scale to classes and layers
hyp["obj"] *= (imgsz / 640) ** 2 * 3 / nl # scale to image size and layers
hyp["label_smoothing"] = opt.label_smoothing
model.nc = nc # attach number of classes to model
model.hyp = hyp # attach hyperparameters to model
model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights
model.names = names
# Start training
t0 = time.time()
nb = len(train_loader) # number of batches
nw = max(round(hyp["warmup_epochs"] * nb), 100) # number of warmup iterations, max(3 epochs, 100 iterations)
# nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training
last_opt_step = -1
maps = np.zeros(nc) # mAP per class
results = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
scheduler.last_epoch = start_epoch - 1 # do not move
scaler = torch.cuda.amp.GradScaler(enabled=amp)
stopper, stop = EarlyStopping(patience=opt.patience), False
compute_loss = ComputeLoss(model, overlap=overlap) # init loss class
# callbacks.run('on_train_start')
LOGGER.info(
f"Image sizes {imgsz} train, {imgsz} val\n"
f"Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n"
f"Logging results to {colorstr('bold', save_dir)}\n"
f"Starting training for {epochs} epochs..."
)
for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
# callbacks.run('on_train_epoch_start')
model.train()
# Update image weights (optional, single-GPU only)
if opt.image_weights:
cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights
iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights
dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx
# Update mosaic border (optional)
# b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
# dataset.mosaic_border = [b - imgsz, -b] # height, width borders
mloss = torch.zeros(4, device=device) # mean losses
if RANK != -1:
train_loader.sampler.set_epoch(epoch)
pbar = enumerate(train_loader)
LOGGER.info(
("\n" + "%11s" * 8)
% ("Epoch", "GPU_mem", "box_loss", "seg_loss", "obj_loss", "cls_loss", "Instances", "Size")
)
if RANK in {-1, 0}:
pbar = tqdm(pbar, total=nb, bar_format=TQDM_BAR_FORMAT) # progress bar
optimizer.zero_grad()
for i, (imgs, targets, paths, _, masks) in pbar: # batch ------------------------------------------------------
# callbacks.run('on_train_batch_start')
ni = i + nb * epoch # number integrated batches (since train start)
imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0
# Warmup
if ni <= nw:
xi = [0, nw] # x interp
# compute_loss.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou)
accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round())
for j, x in enumerate(optimizer.param_groups):
# bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
x["lr"] = np.interp(ni, xi, [hyp["warmup_bias_lr"] if j == 0 else 0.0, x["initial_lr"] * lf(epoch)])
if "momentum" in x:
x["momentum"] = np.interp(ni, xi, [hyp["warmup_momentum"], hyp["momentum"]])
# Multi-scale
if opt.multi_scale:
sz = random.randrange(int(imgsz * 0.5), int(imgsz * 1.5) + gs) // gs * gs # size
sf = sz / max(imgs.shape[2:]) # scale factor
if sf != 1:
ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple)
imgs = nn.functional.interpolate(imgs, size=ns, mode="bilinear", align_corners=False)
# Forward
with torch.cuda.amp.autocast(amp):
pred = model(imgs) # forward
loss, loss_items = compute_loss(pred, targets.to(device), masks=masks.to(device).float())
if RANK != -1:
loss *= WORLD_SIZE # gradient averaged between devices in DDP mode
if opt.quad:
loss *= 4.0
# Backward
scaler.scale(loss).backward()
# Optimize - https://pytorch.org/docs/master/notes/amp_examples.html
if ni - last_opt_step >= accumulate:
scaler.unscale_(optimizer) # unscale gradients
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0) # clip gradients
scaler.step(optimizer) # optimizer.step
scaler.update()
optimizer.zero_grad()
if ema:
ema.update(model)
last_opt_step = ni
# Log
if RANK in {-1, 0}:
mloss = (mloss * i + loss_items) / (i + 1) # update mean losses
mem = f"{torch.cuda.memory_reserved() / 1e9 if torch.cuda.is_available() else 0:.3g}G" # (GB)
pbar.set_description(
("%11s" * 2 + "%11.4g" * 6)
% (f"{epoch}/{epochs - 1}", mem, *mloss, targets.shape[0], imgs.shape[-1])
)
# callbacks.run('on_train_batch_end', model, ni, imgs, targets, paths)
# if callbacks.stop_training:
# return
# Mosaic plots
if plots:
if ni < 3:
plot_images_and_masks(imgs, targets, masks, paths, save_dir / f"train_batch{ni}.jpg")
if ni == 10:
files = sorted(save_dir.glob("train*.jpg"))
logger.log_images(files, "Mosaics", epoch)
# end batch ------------------------------------------------------------------------------------------------
# Scheduler
lr = [x["lr"] for x in optimizer.param_groups] # for loggers
scheduler.step()
if RANK in {-1, 0}:
# mAP
# callbacks.run('on_train_epoch_end', epoch=epoch)
ema.update_attr(model, include=["yaml", "nc", "hyp", "names", "stride", "class_weights"])
final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
if not noval or final_epoch: # Calculate mAP
results, maps, _ = validate.run(
data_dict,
batch_size=batch_size // WORLD_SIZE * 2,
imgsz=imgsz,
half=amp,
model=ema.ema,
single_cls=single_cls,
dataloader=val_loader,
save_dir=save_dir,
plots=False,
callbacks=callbacks,
compute_loss=compute_loss,
mask_downsample_ratio=mask_ratio,
overlap=overlap,
)
# Update best mAP
fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
stop = stopper(epoch=epoch, fitness=fi) # early stop check
if fi > best_fitness:
best_fitness = fi
log_vals = list(mloss) + list(results) + lr
# callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi)
# Log val metrics and media
metrics_dict = dict(zip(KEYS, log_vals))
logger.log_metrics(metrics_dict, epoch)
# Save model
if (not nosave) or (final_epoch and not evolve): # if save
ckpt = {
"epoch": epoch,
"best_fitness": best_fitness,
"model": deepcopy(de_parallel(model)).half(),
"ema": deepcopy(ema.ema).half(),
"updates": ema.updates,
"optimizer": optimizer.state_dict(),
"opt": vars(opt),
"git": GIT_INFO, # {remote, branch, commit} if a git repo
"date": datetime.now().isoformat(),
}
# Save last, best and delete
torch.save(ckpt, last)
if best_fitness == fi:
torch.save(ckpt, best)
if opt.save_period > 0 and epoch % opt.save_period == 0:
torch.save(ckpt, w / f"epoch{epoch}.pt")
logger.log_model(w / f"epoch{epoch}.pt")
del ckpt
# callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi)
# EarlyStopping
if RANK != -1: # if DDP training
broadcast_list = [stop if RANK == 0 else None]
dist.broadcast_object_list(broadcast_list, 0) # broadcast 'stop' to all ranks
if RANK != 0:
stop = broadcast_list[0]
if stop:
break # must break all DDP ranks
# end epoch ----------------------------------------------------------------------------------------------------
# end training -----------------------------------------------------------------------------------------------------
if RANK in {-1, 0}:
LOGGER.info(f"\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.")
for f in last, best:
if f.exists():
strip_optimizer(f) # strip optimizers
if f is best:
LOGGER.info(f"\nValidating {f}...")
results, _, _ = validate.run(
data_dict,
batch_size=batch_size // WORLD_SIZE * 2,
imgsz=imgsz,
model=attempt_load(f, device).half(),
iou_thres=0.65 if is_coco else 0.60, # best pycocotools at iou 0.65
single_cls=single_cls,
dataloader=val_loader,
save_dir=save_dir,
save_json=is_coco,
verbose=True,
plots=plots,
callbacks=callbacks,
compute_loss=compute_loss,
mask_downsample_ratio=mask_ratio,
overlap=overlap,
) # val best model with plots
if is_coco:
# callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi)
metrics_dict = dict(zip(KEYS, list(mloss) + list(results) + lr))
logger.log_metrics(metrics_dict, epoch)
# callbacks.run('on_train_end', last, best, epoch, results)
# on train end callback using genericLogger
logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs)
if not opt.evolve:
logger.log_model(best, epoch)
if plots:
plot_results_with_masks(file=save_dir / "results.csv") # save results.png
files = ["results.png", "confusion_matrix.png", *(f"{x}_curve.png" for x in ("F1", "PR", "P", "R"))]
files = [(save_dir / f) for f in files if (save_dir / f).exists()] # filter
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}")
logger.log_images(files, "Results", epoch + 1)
logger.log_images(sorted(save_dir.glob("val*.jpg")), "Validation", epoch + 1)
torch.cuda.empty_cache()
return results
def parse_opt(known=False):
"""
Parses command line arguments for training configurations, returning parsed arguments.
Supports both known and unknown args.
"""
parser = argparse.ArgumentParser()
parser.add_argument("--weights", type=str, default=ROOT / "yolov5s-seg.pt", help="initial weights path")
parser.add_argument("--cfg", type=str, default="", help="model.yaml path")
parser.add_argument("--data", type=str, default=ROOT / "data/coco128-seg.yaml", help="dataset.yaml path")
parser.add_argument("--hyp", type=str, default=ROOT / "data/hyps/hyp.scratch-low.yaml", help="hyperparameters path")
parser.add_argument("--epochs", type=int, default=100, help="total training epochs")
parser.add_argument("--batch-size", type=int, default=16, help="total batch size for all GPUs, -1 for autobatch")
parser.add_argument("--imgsz", "--img", "--img-size", type=int, default=640, help="train, val image size (pixels)")
parser.add_argument("--rect", action="store_true", help="rectangular training")
parser.add_argument("--resume", nargs="?", const=True, default=False, help="resume most recent training")
parser.add_argument("--nosave", action="store_true", help="only save final checkpoint")
parser.add_argument("--noval", action="store_true", help="only validate final epoch")
parser.add_argument("--noautoanchor", action="store_true", help="disable AutoAnchor")
parser.add_argument("--noplots", action="store_true", help="save no plot files")
parser.add_argument("--evolve", type=int, nargs="?", const=300, help="evolve hyperparameters for x generations")
parser.add_argument("--bucket", type=str, default="", help="gsutil bucket")
parser.add_argument("--cache", type=str, nargs="?", const="ram", help="image --cache ram/disk")
parser.add_argument("--image-weights", action="store_true", help="use weighted image selection for training")
parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
parser.add_argument("--multi-scale", action="store_true", help="vary img-size +/- 50%%")
parser.add_argument("--single-cls", action="store_true", help="train multi-class data as single-class")
parser.add_argument("--optimizer", type=str, choices=["SGD", "Adam", "AdamW"], default="SGD", help="optimizer")
parser.add_argument("--sync-bn", action="store_true", help="use SyncBatchNorm, only available in DDP mode")
parser.add_argument("--workers", type=int, default=8, help="max dataloader workers (per RANK in DDP mode)")
parser.add_argument("--project", default=ROOT / "runs/train-seg", help="save to project/name")
parser.add_argument("--name", default="exp", help="save to project/name")
parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
parser.add_argument("--quad", action="store_true", help="quad dataloader")
parser.add_argument("--cos-lr", action="store_true", help="cosine LR scheduler")
parser.add_argument("--label-smoothing", type=float, default=0.0, help="Label smoothing epsilon")
parser.add_argument("--patience", type=int, default=100, help="EarlyStopping patience (epochs without improvement)")
parser.add_argument("--freeze", nargs="+", type=int, default=[0], help="Freeze layers: backbone=10, first3=0 1 2")
parser.add_argument("--save-period", type=int, default=-1, help="Save checkpoint every x epochs (disabled if < 1)")
parser.add_argument("--seed", type=int, default=0, help="Global training seed")
parser.add_argument("--local_rank", type=int, default=-1, help="Automatic DDP Multi-GPU argument, do not modify")
# Instance Segmentation Args
parser.add_argument("--mask-ratio", type=int, default=4, help="Downsample the truth masks to saving memory")
parser.add_argument("--no-overlap", action="store_true", help="Overlap masks train faster at slightly less mAP")
return parser.parse_known_args()[0] if known else parser.parse_args()
def main(opt, callbacks=Callbacks()):
"""Initializes training or evolution of YOLOv5 models based on provided configuration and options."""
if RANK in {-1, 0}:
print_args(vars(opt))
check_git_status()
check_requirements(ROOT / "requirements.txt")
# Resume
if opt.resume and not opt.evolve: # resume from specified or most recent last.pt
last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run())
opt_yaml = last.parent.parent / "opt.yaml" # train options yaml
opt_data = opt.data # original dataset
if opt_yaml.is_file():
with open(opt_yaml, errors="ignore") as f:
d = yaml.safe_load(f)
else:
d = torch.load(last, map_location="cpu")["opt"]
opt = argparse.Namespace(**d) # replace
opt.cfg, opt.weights, opt.resume = "", str(last), True # reinstate
if is_url(opt_data):
opt.data = check_file(opt_data) # avoid HUB resume auth timeout
else:
opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = (
check_file(opt.data),
check_yaml(opt.cfg),
check_yaml(opt.hyp),
str(opt.weights),
str(opt.project),
) # checks
assert len(opt.cfg) or len(opt.weights), "either --cfg or --weights must be specified"
if opt.evolve:
if opt.project == str(ROOT / "runs/train-seg"): # if default project name, rename to runs/evolve-seg
opt.project = str(ROOT / "runs/evolve-seg")
opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume
if opt.name == "cfg":
opt.name = Path(opt.cfg).stem # use model.yaml as name
opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))
# DDP mode
device = select_device(opt.device, batch_size=opt.batch_size)
if LOCAL_RANK != -1:
msg = "is not compatible with YOLOv5 Multi-GPU DDP training"
assert not opt.image_weights, f"--image-weights {msg}"
assert not opt.evolve, f"--evolve {msg}"
assert opt.batch_size != -1, f"AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size"
assert opt.batch_size % WORLD_SIZE == 0, f"--batch-size {opt.batch_size} must be multiple of WORLD_SIZE"
assert torch.cuda.device_count() > LOCAL_RANK, "insufficient CUDA devices for DDP command"
torch.cuda.set_device(LOCAL_RANK)
device = torch.device("cuda", LOCAL_RANK)
dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo")
# Train
if not opt.evolve:
train(opt.hyp, opt, device, callbacks)
# Evolve hyperparameters (optional)
else:
# Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
meta = {
"lr0": (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3)
"lrf": (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf)
"momentum": (0.3, 0.6, 0.98), # SGD momentum/Adam beta1
"weight_decay": (1, 0.0, 0.001), # optimizer weight decay
"warmup_epochs": (1, 0.0, 5.0), # warmup epochs (fractions ok)
"warmup_momentum": (1, 0.0, 0.95), # warmup initial momentum
"warmup_bias_lr": (1, 0.0, 0.2), # warmup initial bias lr
"box": (1, 0.02, 0.2), # box loss gain
"cls": (1, 0.2, 4.0), # cls loss gain
"cls_pw": (1, 0.5, 2.0), # cls BCELoss positive_weight
"obj": (1, 0.2, 4.0), # obj loss gain (scale with pixels)
"obj_pw": (1, 0.5, 2.0), # obj BCELoss positive_weight
"iou_t": (0, 0.1, 0.7), # IoU training threshold
"anchor_t": (1, 2.0, 8.0), # anchor-multiple threshold
"anchors": (2, 2.0, 10.0), # anchors per output grid (0 to ignore)
"fl_gamma": (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5)
"hsv_h": (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction)
"hsv_s": (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction)
"hsv_v": (1, 0.0, 0.9), # image HSV-Value augmentation (fraction)
"degrees": (1, 0.0, 45.0), # image rotation (+/- deg)
"translate": (1, 0.0, 0.9), # image translation (+/- fraction)
"scale": (1, 0.0, 0.9), # image scale (+/- gain)
"shear": (1, 0.0, 10.0), # image shear (+/- deg)
"perspective": (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001
"flipud": (1, 0.0, 1.0), # image flip up-down (probability)
"fliplr": (0, 0.0, 1.0), # image flip left-right (probability)
"mosaic": (1, 0.0, 1.0), # image mixup (probability)
"mixup": (1, 0.0, 1.0), # image mixup (probability)
"copy_paste": (1, 0.0, 1.0),
} # segment copy-paste (probability)
with open(opt.hyp, errors="ignore") as f:
hyp = yaml.safe_load(f) # load hyps dict
if "anchors" not in hyp: # anchors commented in hyp.yaml
hyp["anchors"] = 3
if opt.noautoanchor:
del hyp["anchors"], meta["anchors"]
opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir) # only val/save final epoch
# ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices
evolve_yaml, evolve_csv = save_dir / "hyp_evolve.yaml", save_dir / "evolve.csv"
if opt.bucket:
# download evolve.csv if exists
subprocess.run(
[
"gsutil",
"cp",
f"gs://{opt.bucket}/evolve.csv",
str(evolve_csv),
]
)
for _ in range(opt.evolve): # generations to evolve
if evolve_csv.exists(): # if evolve.csv exists: select best hyps and mutate
# Select parent(s)
parent = "single" # parent selection method: 'single' or 'weighted'
x = np.loadtxt(evolve_csv, ndmin=2, delimiter=",", skiprows=1)
n = min(5, len(x)) # number of previous results to consider
x = x[np.argsort(-fitness(x))][:n] # top n mutations
w = fitness(x) - fitness(x).min() + 1e-6 # weights (sum > 0)
if parent == "single" or len(x) == 1:
# x = x[random.randint(0, n - 1)] # random selection
x = x[random.choices(range(n), weights=w)[0]] # weighted selection
elif parent == "weighted":
x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination
# Mutate
mp, s = 0.8, 0.2 # mutation probability, sigma
npr = np.random
npr.seed(int(time.time()))
g = np.array([meta[k][0] for k in hyp.keys()]) # gains 0-1
ng = len(meta)
v = np.ones(ng)
while all(v == 1): # mutate until a change occurs (prevent duplicates)
v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300)
hyp[k] = float(x[i + 12] * v[i]) # mutate
# Constrain to limits
for k, v in meta.items():
hyp[k] = max(hyp[k], v[1]) # lower limit
hyp[k] = min(hyp[k], v[2]) # upper limit
hyp[k] = round(hyp[k], 5) # significant digits
# Train mutation
results = train(hyp.copy(), opt, device, callbacks)
callbacks = Callbacks()
# Write mutation results
print_mutation(KEYS[4:16], results, hyp.copy(), save_dir, opt.bucket)
# Plot results
plot_evolve(evolve_csv)
LOGGER.info(
f"Hyperparameter evolution finished {opt.evolve} generations\n"
f"Results saved to {colorstr('bold', save_dir)}\n"
f"Usage example: $ python train.py --hyp {evolve_yaml}"
)
def run(**kwargs):
"""
Executes YOLOv5 training with given parameters, altering options programmatically; returns updated options.
Example: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')
"""
opt = parse_opt(True)
for k, v in kwargs.items():
setattr(opt, k, v)
main(opt)
return opt
if __name__ == "__main__":
opt = parse_opt()
main(opt)

602
yolov5/segment/tutorial.ipynb vendored Normal file
View File

@ -0,0 +1,602 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "t6MPjfT5NrKQ"
},
"source": [
"<div align=\"center\">\n",
"\n",
" <a href=\"https://ultralytics.com/yolov5\" target=\"_blank\">\n",
" <img width=\"1024\", src=\"https://raw.githubusercontent.com/ultralytics/assets/main/yolov5/v70/splash.png\"></a>\n",
"\n",
"\n",
"<br>\n",
" <a href=\"https://bit.ly/yolov5-paperspace-notebook\"><img src=\"https://assets.paperspace.io/img/gradient-badge.svg\" alt=\"Run on Gradient\"></a>\n",
" <a href=\"https://colab.research.google.com/github/ultralytics/yolov5/blob/master/segment/tutorial.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n",
" <a href=\"https://www.kaggle.com/models/ultralytics/yolov5\"><img src=\"https://kaggle.com/static/images/open-in-kaggle.svg\" alt=\"Open In Kaggle\"></a>\n",
"<br>\n",
"\n",
"This <a href=\"https://github.com/ultralytics/yolov5\">YOLOv5</a> 🚀 notebook by <a href=\"https://ultralytics.com\">Ultralytics</a> presents simple train, validate and predict examples to help start your AI adventure.<br>See <a href=\"https://github.com/ultralytics/yolov5/issues/new/choose\">GitHub</a> for community support or <a href=\"https://ultralytics.com/contact\">contact us</a> for professional support.\n",
"\n",
"</div>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7mGmQbAO5pQb"
},
"source": [
"# Setup\n",
"\n",
"Clone GitHub [repository](https://github.com/ultralytics/yolov5), install [dependencies](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) and check PyTorch and GPU."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "wbvMlHd_QwMG",
"outputId": "171b23f0-71b9-4cbf-b666-6fa2ecef70c8"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"YOLOv5 🚀 v7.0-2-gc9d47ae Python-3.7.15 torch-1.12.1+cu113 CUDA:0 (Tesla T4, 15110MiB)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Setup complete ✅ (2 CPUs, 12.7 GB RAM, 22.6/78.2 GB disk)\n"
]
}
],
"source": [
"!git clone https://github.com/ultralytics/yolov5 # clone\n",
"%cd yolov5\n",
"%pip install -qr requirements.txt comet_ml # install\n",
"\n",
"import torch\n",
"\n",
"import utils\n",
"\n",
"display = utils.notebook_init() # checks"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4JnkELT0cIJg"
},
"source": [
"# 1. Predict\n",
"\n",
"`segment/predict.py` runs YOLOv5 instance segmentation inference on a variety of sources, downloading models automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases), and saving results to `runs/predict`. Example inference sources are:\n",
"\n",
"```shell\n",
"python segment/predict.py --source 0 # webcam\n",
" img.jpg # image \n",
" vid.mp4 # video\n",
" screen # screenshot\n",
" path/ # directory\n",
" 'path/*.jpg' # glob\n",
" 'https://youtu.be/LNwODJXcvt4' # YouTube\n",
" 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "zR9ZbuQCH7FX",
"outputId": "3f67f1c7-f15e-4fa5-d251-967c3b77eaad"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[34m\u001b[1msegment/predict: \u001b[0mweights=['yolov5s-seg.pt'], source=data/images, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/predict-seg, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1, retina_masks=False\n",
"YOLOv5 🚀 v7.0-2-gc9d47ae Python-3.7.15 torch-1.12.1+cu113 CUDA:0 (Tesla T4, 15110MiB)\n",
"\n",
"Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s-seg.pt to yolov5s-seg.pt...\n",
"100% 14.9M/14.9M [00:01<00:00, 12.0MB/s]\n",
"\n",
"Fusing layers... \n",
"YOLOv5s-seg summary: 224 layers, 7611485 parameters, 0 gradients, 26.4 GFLOPs\n",
"image 1/2 /content/yolov5/data/images/bus.jpg: 640x480 4 persons, 1 bus, 18.2ms\n",
"image 2/2 /content/yolov5/data/images/zidane.jpg: 384x640 2 persons, 1 tie, 13.4ms\n",
"Speed: 0.5ms pre-process, 15.8ms inference, 18.5ms NMS per image at shape (1, 3, 640, 640)\n",
"Results saved to \u001b[1mruns/predict-seg/exp\u001b[0m\n"
]
}
],
"source": [
"!python segment/predict.py --weights yolov5s-seg.pt --img 640 --conf 0.25 --source data/images\n",
"# display.Image(filename='runs/predict-seg/exp/zidane.jpg', width=600)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "hkAzDWJ7cWTr"
},
"source": [
"&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\n",
"<img align=\"left\" src=\"https://user-images.githubusercontent.com/26833433/199030123-08c72f8d-6871-4116-8ed3-c373642cf28e.jpg\" width=\"600\">"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "0eq1SMWl6Sfn"
},
"source": [
"# 2. Validate\n",
"Validate a model's accuracy on the [COCO](https://cocodataset.org/#home) dataset's `val` or `test` splits. Models are downloaded automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases). To show results by class use the `--verbose` flag."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "WQPtK1QYVaD_",
"outputId": "9d751d8c-bee8-4339-cf30-9854ca530449"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/coco2017labels-segments.zip ...\n",
"Downloading http://images.cocodataset.org/zips/val2017.zip ...\n",
"######################################################################## 100.0%\n",
"######################################################################## 100.0%\n"
]
}
],
"source": [
"# Download COCO val\n",
"!bash data/scripts/get_coco.sh --val --segments # download (780M - 5000 images)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "X58w8JLpMnjH",
"outputId": "a140d67a-02da-479e-9ddb-7d54bf9e407a"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[34m\u001b[1msegment/val: \u001b[0mdata=/content/yolov5/data/coco.yaml, weights=['yolov5s-seg.pt'], batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.6, max_det=300, task=val, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=False, project=runs/val-seg, name=exp, exist_ok=False, half=True, dnn=False\n",
"YOLOv5 🚀 v7.0-2-gc9d47ae Python-3.7.15 torch-1.12.1+cu113 CUDA:0 (Tesla T4, 15110MiB)\n",
"\n",
"Fusing layers... \n",
"YOLOv5s-seg summary: 224 layers, 7611485 parameters, 0 gradients, 26.4 GFLOPs\n",
"\u001b[34m\u001b[1mval: \u001b[0mScanning /content/datasets/coco/val2017... 4952 images, 48 backgrounds, 0 corrupt: 100% 5000/5000 [00:03<00:00, 1361.31it/s]\n",
"\u001b[34m\u001b[1mval: \u001b[0mNew cache created: /content/datasets/coco/val2017.cache\n",
" Class Images Instances Box(P R mAP50 mAP50-95) Mask(P R mAP50 mAP50-95): 100% 157/157 [01:54<00:00, 1.37it/s]\n",
" all 5000 36335 0.673 0.517 0.566 0.373 0.672 0.49 0.532 0.319\n",
"Speed: 0.6ms pre-process, 4.4ms inference, 2.9ms NMS per image at shape (32, 3, 640, 640)\n",
"Results saved to \u001b[1mruns/val-seg/exp\u001b[0m\n"
]
}
],
"source": [
"# Validate YOLOv5s-seg on COCO val\n",
"!python segment/val.py --weights yolov5s-seg.pt --data coco.yaml --img 640 --half"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ZY2VXXXu74w5"
},
"source": [
"# 3. Train\n",
"\n",
"<p align=\"\"><a href=\"https://roboflow.com/?ref=ultralytics\"><img width=\"1000\" src=\"https://github.com/ultralytics/assets/raw/main/im/integrations-loop.png\"/></a></p>\n",
"Close the active learning loop by sampling images from your inference conditions with the `roboflow` pip package\n",
"<br><br>\n",
"\n",
"Train a YOLOv5s-seg model on the [COCO128](https://www.kaggle.com/datasets/ultralytics/coco128) dataset with `--data coco128-seg.yaml`, starting from pretrained `--weights yolov5s-seg.pt`, or from randomly initialized `--weights '' --cfg yolov5s-seg.yaml`.\n",
"\n",
"- **Pretrained [Models](https://github.com/ultralytics/yolov5/tree/master/models)** are downloaded\n",
"automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases)\n",
"- **[Datasets](https://github.com/ultralytics/yolov5/tree/master/data)** available for autodownload include: [COCO](https://github.com/ultralytics/yolov5/blob/master/data/coco.yaml), [COCO128](https://github.com/ultralytics/yolov5/blob/master/data/coco128.yaml), [VOC](https://github.com/ultralytics/yolov5/blob/master/data/VOC.yaml), [Argoverse](https://github.com/ultralytics/yolov5/blob/master/data/Argoverse.yaml), [VisDrone](https://github.com/ultralytics/yolov5/blob/master/data/VisDrone.yaml), [GlobalWheat](https://github.com/ultralytics/yolov5/blob/master/data/GlobalWheat2020.yaml), [xView](https://github.com/ultralytics/yolov5/blob/master/data/xView.yaml), [Objects365](https://github.com/ultralytics/yolov5/blob/master/data/Objects365.yaml), [SKU-110K](https://github.com/ultralytics/yolov5/blob/master/data/SKU-110K.yaml).\n",
"- **Training Results** are saved to `runs/train-seg/` with incrementing run directories, i.e. `runs/train-seg/exp2`, `runs/train-seg/exp3` etc.\n",
"<br><br>\n",
"\n",
"A **Mosaic Dataloader** is used for training which combines 4 images into 1 mosaic.\n",
"\n",
"## Train on Custom Data with Roboflow 🌟 NEW\n",
"\n",
"[Roboflow](https://roboflow.com/?ref=ultralytics) enables you to easily **organize, label, and prepare** a high quality dataset with your own custom data. Roboflow also makes it easy to establish an active learning pipeline, collaborate with your team on dataset improvement, and integrate directly into your model building workflow with the `roboflow` pip package.\n",
"\n",
"- Custom Training Example: [https://blog.roboflow.com/train-yolov5-instance-segmentation-custom-dataset/](https://blog.roboflow.com/train-yolov5-instance-segmentation-custom-dataset/?ref=ultralytics)\n",
"- Custom Training Notebook: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1JTz7kpmHsg-5qwVz2d2IH3AaenI1tv0N?usp=sharing)\n",
"<br>\n",
"\n",
"<p align=\"\"><a href=\"https://roboflow.com/?ref=ultralytics\"><img width=\"480\" src=\"https://robflow-public-assets.s3.amazonaws.com/how-to-train-yolov5-segmentation-annotation.gif\"/></a></p>Label images lightning fast (including with model-assisted labeling)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "i3oKtE4g-aNn"
},
"outputs": [],
"source": [
"# @title Select YOLOv5 🚀 logger {run: 'auto'}\n",
"logger = \"Comet\" # @param ['Comet', 'ClearML', 'TensorBoard']\n",
"\n",
"if logger == \"Comet\":\n",
" %pip install -q comet_ml\n",
" import comet_ml\n",
"\n",
" comet_ml.init()\n",
"elif logger == \"ClearML\":\n",
" %pip install -q clearml\n",
" import clearml\n",
"\n",
" clearml.browser_login()\n",
"elif logger == \"TensorBoard\":\n",
" %load_ext tensorboard\n",
" %tensorboard --logdir runs/train"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1NcFxRcFdJ_O",
"outputId": "3a3e0cf7-e79c-47a5-c8e7-2d26eeeab988"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[34m\u001b[1msegment/train: \u001b[0mweights=yolov5s-seg.pt, cfg=, data=coco128-seg.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=3, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=ram, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs/train-seg, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, mask_ratio=4, no_overlap=False\n",
"\u001b[34m\u001b[1mgithub: \u001b[0mup to date with https://github.com/ultralytics/yolov5 ✅\n",
"YOLOv5 🚀 v7.0-2-gc9d47ae Python-3.7.15 torch-1.12.1+cu113 CUDA:0 (Tesla T4, 15110MiB)\n",
"\n",
"\u001b[34m\u001b[1mhyperparameters: \u001b[0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0, iou_t=0.2, anchor_t=4.0, fl_gamma=0.0, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.0, copy_paste=0.0\n",
"\u001b[34m\u001b[1mTensorBoard: \u001b[0mStart with 'tensorboard --logdir runs/train-seg', view at http://localhost:6006/\n",
"\n",
"Dataset not found ⚠️, missing paths ['/content/datasets/coco128-seg/images/train2017']\n",
"Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/coco128-seg.zip to coco128-seg.zip...\n",
"100% 6.79M/6.79M [00:01<00:00, 6.73MB/s]\n",
"Dataset download success ✅ (1.9s), saved to \u001b[1m/content/datasets\u001b[0m\n",
"\n",
" from n params module arguments \n",
" 0 -1 1 3520 models.common.Conv [3, 32, 6, 2, 2] \n",
" 1 -1 1 18560 models.common.Conv [32, 64, 3, 2] \n",
" 2 -1 1 18816 models.common.C3 [64, 64, 1] \n",
" 3 -1 1 73984 models.common.Conv [64, 128, 3, 2] \n",
" 4 -1 2 115712 models.common.C3 [128, 128, 2] \n",
" 5 -1 1 295424 models.common.Conv [128, 256, 3, 2] \n",
" 6 -1 3 625152 models.common.C3 [256, 256, 3] \n",
" 7 -1 1 1180672 models.common.Conv [256, 512, 3, 2] \n",
" 8 -1 1 1182720 models.common.C3 [512, 512, 1] \n",
" 9 -1 1 656896 models.common.SPPF [512, 512, 5] \n",
" 10 -1 1 131584 models.common.Conv [512, 256, 1, 1] \n",
" 11 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n",
" 12 [-1, 6] 1 0 models.common.Concat [1] \n",
" 13 -1 1 361984 models.common.C3 [512, 256, 1, False] \n",
" 14 -1 1 33024 models.common.Conv [256, 128, 1, 1] \n",
" 15 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n",
" 16 [-1, 4] 1 0 models.common.Concat [1] \n",
" 17 -1 1 90880 models.common.C3 [256, 128, 1, False] \n",
" 18 -1 1 147712 models.common.Conv [128, 128, 3, 2] \n",
" 19 [-1, 14] 1 0 models.common.Concat [1] \n",
" 20 -1 1 296448 models.common.C3 [256, 256, 1, False] \n",
" 21 -1 1 590336 models.common.Conv [256, 256, 3, 2] \n",
" 22 [-1, 10] 1 0 models.common.Concat [1] \n",
" 23 -1 1 1182720 models.common.C3 [512, 512, 1, False] \n",
" 24 [17, 20, 23] 1 615133 models.yolo.Segment [80, [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]], 32, 128, [128, 256, 512]]\n",
"Model summary: 225 layers, 7621277 parameters, 7621277 gradients, 26.6 GFLOPs\n",
"\n",
"Transferred 367/367 items from yolov5s-seg.pt\n",
"\u001b[34m\u001b[1mAMP: \u001b[0mchecks passed ✅\n",
"\u001b[34m\u001b[1moptimizer:\u001b[0m SGD(lr=0.01) with parameter groups 60 weight(decay=0.0), 63 weight(decay=0.0005), 63 bias\n",
"\u001b[34m\u001b[1malbumentations: \u001b[0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))\n",
"\u001b[34m\u001b[1mtrain: \u001b[0mScanning /content/datasets/coco128-seg/labels/train2017... 126 images, 2 backgrounds, 0 corrupt: 100% 128/128 [00:00<00:00, 1389.59it/s]\n",
"\u001b[34m\u001b[1mtrain: \u001b[0mNew cache created: /content/datasets/coco128-seg/labels/train2017.cache\n",
"\u001b[34m\u001b[1mtrain: \u001b[0mCaching images (0.1GB ram): 100% 128/128 [00:00<00:00, 238.86it/s]\n",
"\u001b[34m\u001b[1mval: \u001b[0mScanning /content/datasets/coco128-seg/labels/train2017.cache... 126 images, 2 backgrounds, 0 corrupt: 100% 128/128 [00:00<?, ?it/s]\n",
"\u001b[34m\u001b[1mval: \u001b[0mCaching images (0.1GB ram): 100% 128/128 [00:01<00:00, 98.90it/s]\n",
"\n",
"\u001b[34m\u001b[1mAutoAnchor: \u001b[0m4.27 anchors/target, 0.994 Best Possible Recall (BPR). Current anchors are a good fit to dataset ✅\n",
"Plotting labels to runs/train-seg/exp/labels.jpg... \n",
"Image sizes 640 train, 640 val\n",
"Using 2 dataloader workers\n",
"Logging results to \u001b[1mruns/train-seg/exp\u001b[0m\n",
"Starting training for 3 epochs...\n",
"\n",
" Epoch GPU_mem box_loss seg_loss obj_loss cls_loss Instances Size\n",
" 0/2 4.92G 0.0417 0.04646 0.06066 0.02126 192 640: 100% 8/8 [00:08<00:00, 1.10s/it]\n",
" Class Images Instances Box(P R mAP50 mAP50-95) Mask(P R mAP50 mAP50-95): 100% 4/4 [00:02<00:00, 1.81it/s]\n",
" all 128 929 0.737 0.649 0.715 0.492 0.719 0.617 0.658 0.408\n",
"\n",
" Epoch GPU_mem box_loss seg_loss obj_loss cls_loss Instances Size\n",
" 1/2 6.29G 0.04157 0.04503 0.05772 0.01777 208 640: 100% 8/8 [00:09<00:00, 1.21s/it]\n",
" Class Images Instances Box(P R mAP50 mAP50-95) Mask(P R mAP50 mAP50-95): 100% 4/4 [00:02<00:00, 1.87it/s]\n",
" all 128 929 0.756 0.674 0.738 0.506 0.725 0.64 0.68 0.422\n",
"\n",
" Epoch GPU_mem box_loss seg_loss obj_loss cls_loss Instances Size\n",
" 2/2 6.29G 0.0425 0.04793 0.06784 0.01863 161 640: 100% 8/8 [00:03<00:00, 2.02it/s]\n",
" Class Images Instances Box(P R mAP50 mAP50-95) Mask(P R mAP50 mAP50-95): 100% 4/4 [00:02<00:00, 1.88it/s]\n",
" all 128 929 0.736 0.694 0.747 0.522 0.769 0.622 0.683 0.427\n",
"\n",
"3 epochs completed in 0.009 hours.\n",
"Optimizer stripped from runs/train-seg/exp/weights/last.pt, 15.6MB\n",
"Optimizer stripped from runs/train-seg/exp/weights/best.pt, 15.6MB\n",
"\n",
"Validating runs/train-seg/exp/weights/best.pt...\n",
"Fusing layers... \n",
"Model summary: 165 layers, 7611485 parameters, 0 gradients, 26.4 GFLOPs\n",
" Class Images Instances Box(P R mAP50 mAP50-95) Mask(P R mAP50 mAP50-95): 100% 4/4 [00:06<00:00, 1.59s/it]\n",
" all 128 929 0.738 0.694 0.746 0.522 0.759 0.625 0.682 0.426\n",
" person 128 254 0.845 0.756 0.836 0.55 0.861 0.669 0.759 0.407\n",
" bicycle 128 6 0.475 0.333 0.549 0.341 0.711 0.333 0.526 0.322\n",
" car 128 46 0.612 0.565 0.539 0.257 0.555 0.435 0.477 0.171\n",
" motorcycle 128 5 0.73 0.8 0.752 0.571 0.747 0.8 0.752 0.42\n",
" airplane 128 6 1 0.943 0.995 0.732 0.92 0.833 0.839 0.555\n",
" bus 128 7 0.677 0.714 0.722 0.653 0.711 0.714 0.722 0.593\n",
" train 128 3 1 0.951 0.995 0.551 1 0.884 0.995 0.781\n",
" truck 128 12 0.555 0.417 0.457 0.285 0.624 0.417 0.397 0.277\n",
" boat 128 6 0.624 0.5 0.584 0.186 1 0.326 0.412 0.133\n",
" traffic light 128 14 0.513 0.302 0.411 0.247 0.435 0.214 0.376 0.251\n",
" stop sign 128 2 0.824 1 0.995 0.796 0.906 1 0.995 0.747\n",
" bench 128 9 0.75 0.667 0.763 0.367 0.724 0.585 0.698 0.209\n",
" bird 128 16 0.961 1 0.995 0.686 0.918 0.938 0.91 0.525\n",
" cat 128 4 0.771 0.857 0.945 0.752 0.76 0.8 0.945 0.728\n",
" dog 128 9 0.987 0.778 0.963 0.681 1 0.705 0.89 0.574\n",
" horse 128 2 0.703 1 0.995 0.697 0.759 1 0.995 0.249\n",
" elephant 128 17 0.916 0.882 0.93 0.691 0.811 0.765 0.829 0.537\n",
" bear 128 1 0.664 1 0.995 0.995 0.701 1 0.995 0.895\n",
" zebra 128 4 0.864 1 0.995 0.921 0.879 1 0.995 0.804\n",
" giraffe 128 9 0.883 0.889 0.94 0.683 0.845 0.778 0.78 0.463\n",
" backpack 128 6 1 0.59 0.701 0.372 1 0.474 0.52 0.252\n",
" umbrella 128 18 0.654 0.839 0.887 0.52 0.517 0.556 0.427 0.229\n",
" handbag 128 19 0.54 0.211 0.408 0.221 0.796 0.206 0.396 0.196\n",
" tie 128 7 0.864 0.857 0.857 0.577 0.925 0.857 0.857 0.534\n",
" suitcase 128 4 0.716 1 0.945 0.647 0.767 1 0.945 0.634\n",
" frisbee 128 5 0.708 0.8 0.761 0.643 0.737 0.8 0.761 0.501\n",
" skis 128 1 0.691 1 0.995 0.796 0.761 1 0.995 0.199\n",
" snowboard 128 7 0.918 0.857 0.904 0.604 0.32 0.286 0.235 0.137\n",
" sports ball 128 6 0.902 0.667 0.701 0.466 0.727 0.5 0.497 0.471\n",
" kite 128 10 0.586 0.4 0.511 0.231 0.663 0.394 0.417 0.139\n",
" baseball bat 128 4 0.359 0.5 0.401 0.169 0.631 0.5 0.526 0.133\n",
" baseball glove 128 7 1 0.519 0.58 0.327 0.687 0.286 0.455 0.328\n",
" skateboard 128 5 0.729 0.8 0.862 0.631 0.599 0.6 0.604 0.379\n",
" tennis racket 128 7 0.57 0.714 0.645 0.448 0.608 0.714 0.645 0.412\n",
" bottle 128 18 0.469 0.393 0.537 0.357 0.661 0.389 0.543 0.349\n",
" wine glass 128 16 0.677 0.938 0.866 0.441 0.53 0.625 0.67 0.334\n",
" cup 128 36 0.777 0.722 0.812 0.466 0.725 0.583 0.762 0.467\n",
" fork 128 6 0.948 0.333 0.425 0.27 0.527 0.167 0.18 0.102\n",
" knife 128 16 0.757 0.587 0.669 0.458 0.79 0.5 0.552 0.34\n",
" spoon 128 22 0.74 0.364 0.559 0.269 0.925 0.364 0.513 0.213\n",
" bowl 128 28 0.766 0.714 0.725 0.559 0.803 0.584 0.665 0.353\n",
" banana 128 1 0.408 1 0.995 0.398 0.539 1 0.995 0.497\n",
" sandwich 128 2 1 0 0.695 0.536 1 0 0.498 0.448\n",
" orange 128 4 0.467 1 0.995 0.693 0.518 1 0.995 0.663\n",
" broccoli 128 11 0.462 0.455 0.383 0.259 0.548 0.455 0.384 0.256\n",
" carrot 128 24 0.631 0.875 0.77 0.533 0.757 0.909 0.853 0.499\n",
" hot dog 128 2 0.555 1 0.995 0.995 0.578 1 0.995 0.796\n",
" pizza 128 5 0.89 0.8 0.962 0.796 1 0.778 0.962 0.766\n",
" donut 128 14 0.695 1 0.893 0.772 0.704 1 0.893 0.696\n",
" cake 128 4 0.826 1 0.995 0.92 0.862 1 0.995 0.846\n",
" chair 128 35 0.53 0.571 0.613 0.336 0.67 0.6 0.538 0.271\n",
" couch 128 6 0.972 0.667 0.833 0.627 1 0.62 0.696 0.394\n",
" potted plant 128 14 0.7 0.857 0.883 0.552 0.836 0.857 0.883 0.473\n",
" bed 128 3 0.979 0.667 0.83 0.366 1 0 0.83 0.373\n",
" dining table 128 13 0.775 0.308 0.505 0.364 0.644 0.231 0.25 0.0804\n",
" toilet 128 2 0.836 1 0.995 0.846 0.887 1 0.995 0.797\n",
" tv 128 2 0.6 1 0.995 0.846 0.655 1 0.995 0.896\n",
" laptop 128 3 0.822 0.333 0.445 0.307 1 0 0.392 0.12\n",
" mouse 128 2 1 0 0 0 1 0 0 0\n",
" remote 128 8 0.745 0.5 0.62 0.459 0.821 0.5 0.624 0.449\n",
" cell phone 128 8 0.686 0.375 0.502 0.272 0.488 0.25 0.28 0.132\n",
" microwave 128 3 0.831 1 0.995 0.722 0.867 1 0.995 0.592\n",
" oven 128 5 0.439 0.4 0.435 0.294 0.823 0.6 0.645 0.418\n",
" sink 128 6 0.677 0.5 0.565 0.448 0.722 0.5 0.46 0.362\n",
" refrigerator 128 5 0.533 0.8 0.783 0.524 0.558 0.8 0.783 0.527\n",
" book 128 29 0.732 0.379 0.423 0.196 0.69 0.207 0.38 0.131\n",
" clock 128 9 0.889 0.778 0.917 0.677 0.908 0.778 0.875 0.604\n",
" vase 128 2 0.375 1 0.995 0.995 0.455 1 0.995 0.796\n",
" scissors 128 1 1 0 0.0166 0.00166 1 0 0 0\n",
" teddy bear 128 21 0.813 0.829 0.841 0.457 0.826 0.678 0.786 0.422\n",
" toothbrush 128 5 0.806 1 0.995 0.733 0.991 1 0.995 0.628\n",
"Results saved to \u001b[1mruns/train-seg/exp\u001b[0m\n"
]
}
],
"source": [
"# Train YOLOv5s on COCO128 for 3 epochs\n",
"!python segment/train.py --img 640 --batch 16 --epochs 3 --data coco128-seg.yaml --weights yolov5s-seg.pt --cache"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "15glLzbQx5u0"
},
"source": [
"# 4. Visualize"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "nWOsI5wJR1o3"
},
"source": [
"## Comet Logging and Visualization 🌟 NEW\n",
"\n",
"[Comet](https://www.comet.com/site/lp/yolov5-with-comet/?utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=yolov5_colab) is now fully integrated with YOLOv5. Track and visualize model metrics in real time, save your hyperparameters, datasets, and model checkpoints, and visualize your model predictions with [Comet Custom Panels](https://www.comet.com/docs/v2/guides/comet-dashboard/code-panels/about-panels/?utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=yolov5_colab)! Comet makes sure you never lose track of your work and makes it easy to share results and collaborate across teams of all sizes!\n",
"\n",
"Getting started is easy:\n",
"```shell\n",
"pip install comet_ml # 1. install\n",
"export COMET_API_KEY=<Your API Key> # 2. paste API key\n",
"python train.py --img 640 --epochs 3 --data coco128.yaml --weights yolov5s.pt # 3. train\n",
"```\n",
"To learn more about all of the supported Comet features for this integration, check out the [Comet Tutorial](https://docs.ultralytics.com/yolov5/tutorials/comet_logging_integration). If you'd like to learn more about Comet, head over to our [documentation](https://www.comet.com/docs/v2/?utm_source=yolov5&utm_medium=partner&utm_campaign=partner_yolov5_2022&utm_content=yolov5_colab). Get started by trying out the Comet Colab Notebook:\n",
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1RG0WOQyxlDlo5Km8GogJpIEJlg_5lyYO?usp=sharing)\n",
"\n",
"<a href=\"https://bit.ly/yolov5-readme-comet2\">\n",
"<img alt=\"Comet Dashboard\" src=\"https://user-images.githubusercontent.com/26833433/202851203-164e94e1-2238-46dd-91f8-de020e9d6b41.png\" width=\"1280\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Lay2WsTjNJzP"
},
"source": [
"## ClearML Logging and Automation 🌟 NEW\n",
"\n",
"[ClearML](https://cutt.ly/yolov5-notebook-clearml) is completely integrated into YOLOv5 to track your experimentation, manage dataset versions and even remotely execute training runs. To enable ClearML (check cells above):\n",
"\n",
"- `pip install clearml`\n",
"- run `clearml-init` to connect to a ClearML server (**deploy your own [open-source server](https://github.com/allegroai/clearml-server)**, or use our [free hosted server](https://cutt.ly/yolov5-notebook-clearml))\n",
"\n",
"You'll get all the great expected features from an experiment manager: live updates, model upload, experiment comparison etc. but ClearML also tracks uncommitted changes and installed packages for example. Thanks to that ClearML Tasks (which is what we call experiments) are also reproducible on different machines! With only 1 extra line, we can schedule a YOLOv5 training task on a queue to be executed by any number of ClearML Agents (workers).\n",
"\n",
"You can use ClearML Data to version your dataset and then pass it to YOLOv5 simply using its unique ID. This will help you keep track of your data without adding extra hassle. Explore the [ClearML Tutorial](https://docs.ultralytics.com/yolov5/tutorials/clearml_logging_integration) for details!\n",
"\n",
"<a href=\"https://cutt.ly/yolov5-notebook-clearml\">\n",
"<img alt=\"ClearML Experiment Management UI\" src=\"https://github.com/thepycoder/clearml_screenshots/raw/main/scalars.jpg\" width=\"1280\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "-WPvRbS5Swl6"
},
"source": [
"## Local Logging\n",
"\n",
"Training results are automatically logged with [Tensorboard](https://www.tensorflow.org/tensorboard) and [CSV](https://github.com/ultralytics/yolov5/pull/4148) loggers to `runs/train`, with a new experiment directory created for each new training as `runs/train/exp2`, `runs/train/exp3`, etc.\n",
"\n",
"This directory contains train and val statistics, mosaics, labels, predictions and augmentated mosaics, as well as metrics and charts including precision-recall (PR) curves and confusion matrices. \n",
"\n",
"<img alt=\"Local logging results\" src=\"https://user-images.githubusercontent.com/26833433/183222430-e1abd1b7-782c-4cde-b04d-ad52926bf818.jpg\" width=\"1280\"/>\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Zelyeqbyt3GD"
},
"source": [
"# Environments\n",
"\n",
"YOLOv5 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled):\n",
"\n",
"- **Notebooks** with free GPU: <a href=\"https://bit.ly/yolov5-paperspace-notebook\"><img src=\"https://assets.paperspace.io/img/gradient-badge.svg\" alt=\"Run on Gradient\"></a> <a href=\"https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a> <a href=\"https://www.kaggle.com/models/ultralytics/yolov5\"><img src=\"https://kaggle.com/static/images/open-in-kaggle.svg\" alt=\"Open In Kaggle\"></a>\n",
"- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/google_cloud_quickstart_tutorial/)\n",
"- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/aws_quickstart_tutorial/)\n",
"- **Docker Image**. See [Docker Quickstart Guide](https://docs.ultralytics.com/yolov5/environments/docker_image_quickstart_tutorial/) <a href=\"https://hub.docker.com/r/ultralytics/yolov5\"><img src=\"https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker\" alt=\"Docker Pulls\"></a>\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "6Qu7Iesl0p54"
},
"source": [
"# Status\n",
"\n",
"![YOLOv5 CI](https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml/badge.svg)\n",
"\n",
"If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 training ([train.py](https://github.com/ultralytics/yolov5/blob/master/train.py)), testing ([val.py](https://github.com/ultralytics/yolov5/blob/master/val.py)), inference ([detect.py](https://github.com/ultralytics/yolov5/blob/master/detect.py)) and export ([export.py](https://github.com/ultralytics/yolov5/blob/master/export.py)) on macOS, Windows, and Ubuntu every 24 hours and on every commit.\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "IEijrePND_2I"
},
"source": [
"# Appendix\n",
"\n",
"Additional content below."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "GMusP4OAxFu6"
},
"outputs": [],
"source": [
"# YOLOv5 PyTorch HUB Inference (DetectionModels only)\n",
"\n",
"model = torch.hub.load(\n",
" \"ultralytics/yolov5\", \"yolov5s-seg\", force_reload=True, trust_repo=True\n",
") # or yolov5n - yolov5x6 or custom\n",
"im = \"https://ultralytics.com/images/zidane.jpg\" # file, Path, PIL.Image, OpenCV, nparray, list\n",
"results = model(im) # inference\n",
"results.print() # or .show(), .save(), .crop(), .pandas(), etc."
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"name": "YOLOv5 Segmentation Tutorial",
"provenance": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

522
yolov5/segment/val.py Normal file
View File

@ -0,0 +1,522 @@
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
"""
Validate a trained YOLOv5 segment model on a segment dataset.
Usage:
$ bash data/scripts/get_coco.sh --val --segments # download COCO-segments val split (1G, 5000 images)
$ python segment/val.py --weights yolov5s-seg.pt --data coco.yaml --img 640 # validate COCO-segments
Usage - formats:
$ python segment/val.py --weights yolov5s-seg.pt # PyTorch
yolov5s-seg.torchscript # TorchScript
yolov5s-seg.onnx # ONNX Runtime or OpenCV DNN with --dnn
yolov5s-seg_openvino_label # OpenVINO
yolov5s-seg.engine # TensorRT
yolov5s-seg.mlmodel # CoreML (macOS-only)
yolov5s-seg_saved_model # TensorFlow SavedModel
yolov5s-seg.pb # TensorFlow GraphDef
yolov5s-seg.tflite # TensorFlow Lite
yolov5s-seg_edgetpu.tflite # TensorFlow Edge TPU
yolov5s-seg_paddle_model # PaddlePaddle
"""
import argparse
import json
import os
import subprocess
import sys
from multiprocessing.pool import ThreadPool
from pathlib import Path
import numpy as np
import torch
from tqdm import tqdm
FILE = Path(__file__).resolve()
ROOT = FILE.parents[1] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
import torch.nn.functional as F
from models.common import DetectMultiBackend
from models.yolo import SegmentationModel
from utils.callbacks import Callbacks
from utils.general import (
LOGGER,
NUM_THREADS,
TQDM_BAR_FORMAT,
Profile,
check_dataset,
check_img_size,
check_requirements,
check_yaml,
coco80_to_coco91_class,
colorstr,
increment_path,
non_max_suppression,
print_args,
scale_boxes,
xywh2xyxy,
xyxy2xywh,
)
from utils.metrics import ConfusionMatrix, box_iou
from utils.plots import output_to_target, plot_val_study
from utils.segment.dataloaders import create_dataloader
from utils.segment.general import mask_iou, process_mask, process_mask_native, scale_image
from utils.segment.metrics import Metrics, ap_per_class_box_and_mask
from utils.segment.plots import plot_images_and_masks
from utils.torch_utils import de_parallel, select_device, smart_inference_mode
def save_one_txt(predn, save_conf, shape, file):
"""Saves detection results in txt format; includes class, xywh (normalized), optionally confidence if `save_conf` is
True.
"""
gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh
for *xyxy, conf, cls in predn.tolist():
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
with open(file, "a") as f:
f.write(("%g " * len(line)).rstrip() % line + "\n")
def save_one_json(predn, jdict, path, class_map, pred_masks):
"""
Saves a JSON file with detection results including bounding boxes, category IDs, scores, and segmentation masks.
Example JSON result: {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}.
"""
from pycocotools.mask import encode
def single_encode(x):
"""Encodes binary mask arrays into RLE (Run-Length Encoding) format for JSON serialization."""
rle = encode(np.asarray(x[:, :, None], order="F", dtype="uint8"))[0]
rle["counts"] = rle["counts"].decode("utf-8")
return rle
image_id = int(path.stem) if path.stem.isnumeric() else path.stem
box = xyxy2xywh(predn[:, :4]) # xywh
box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
pred_masks = np.transpose(pred_masks, (2, 0, 1))
with ThreadPool(NUM_THREADS) as pool:
rles = pool.map(single_encode, pred_masks)
for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
jdict.append(
{
"image_id": image_id,
"category_id": class_map[int(p[5])],
"bbox": [round(x, 3) for x in b],
"score": round(p[4], 5),
"segmentation": rles[i],
}
)
def process_batch(detections, labels, iouv, pred_masks=None, gt_masks=None, overlap=False, masks=False):
"""
Return correct prediction matrix
Arguments:
detections (array[N, 6]), x1, y1, x2, y2, conf, class
labels (array[M, 5]), class, x1, y1, x2, y2
Returns:
correct (array[N, 10]), for 10 IoU levels.
"""
if masks:
if overlap:
nl = len(labels)
index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
gt_masks = gt_masks.repeat(nl, 1, 1) # shape(1,640,640) -> (n,640,640)
gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
if gt_masks.shape[1:] != pred_masks.shape[1:]:
gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode="bilinear", align_corners=False)[0]
gt_masks = gt_masks.gt_(0.5)
iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1))
else: # boxes
iou = box_iou(labels[:, 1:], detections[:, :4])
correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool)
correct_class = labels[:, 0:1] == detections[:, 5]
for i in range(len(iouv)):
x = torch.where((iou >= iouv[i]) & correct_class) # IoU > threshold and classes match
if x[0].shape[0]:
matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() # [label, detect, iou]
if x[0].shape[0] > 1:
matches = matches[matches[:, 2].argsort()[::-1]]
matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
# matches = matches[matches[:, 2].argsort()[::-1]]
matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
correct[matches[:, 1].astype(int), i] = True
return torch.tensor(correct, dtype=torch.bool, device=iouv.device)
@smart_inference_mode()
def run(
data,
weights=None, # model.pt path(s)
batch_size=32, # batch size
imgsz=640, # inference size (pixels)
conf_thres=0.001, # confidence threshold
iou_thres=0.6, # NMS IoU threshold
max_det=300, # maximum detections per image
task="val", # train, val, test, speed or study
device="", # cuda device, i.e. 0 or 0,1,2,3 or cpu
workers=8, # max dataloader workers (per RANK in DDP mode)
single_cls=False, # treat as single-class dataset
augment=False, # augmented inference
verbose=False, # verbose output
save_txt=False, # save results to *.txt
save_hybrid=False, # save label+prediction hybrid results to *.txt
save_conf=False, # save confidences in --save-txt labels
save_json=False, # save a COCO-JSON results file
project=ROOT / "runs/val-seg", # save to project/name
name="exp", # save to project/name
exist_ok=False, # existing project/name ok, do not increment
half=True, # use FP16 half-precision inference
dnn=False, # use OpenCV DNN for ONNX inference
model=None,
dataloader=None,
save_dir=Path(""),
plots=True,
overlap=False,
mask_downsample_ratio=1,
compute_loss=None,
callbacks=Callbacks(),
):
"""Validates a YOLOv5 segmentation model on specified dataset, producing metrics, plots, and optional JSON
output.
"""
if save_json:
check_requirements("pycocotools>=2.0.6")
process = process_mask_native # more accurate
else:
process = process_mask # faster
# Initialize/load model and set device
training = model is not None
if training: # called by train.py
device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model
half &= device.type != "cpu" # half precision only supported on CUDA
model.half() if half else model.float()
nm = de_parallel(model).model[-1].nm # number of masks
else: # called directly
device = select_device(device, batch_size=batch_size)
# Directories
save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
(save_dir / "labels" if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Load model
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
imgsz = check_img_size(imgsz, s=stride) # check image size
half = model.fp16 # FP16 supported on limited backends with CUDA
nm = de_parallel(model).model.model[-1].nm if isinstance(model, SegmentationModel) else 32 # number of masks
if engine:
batch_size = model.batch_size
else:
device = model.device
if not (pt or jit):
batch_size = 1 # export.py models default to batch-size 1
LOGGER.info(f"Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models")
# Data
data = check_dataset(data) # check
# Configure
model.eval()
cuda = device.type != "cpu"
is_coco = isinstance(data.get("val"), str) and data["val"].endswith(f"coco{os.sep}val2017.txt") # COCO dataset
nc = 1 if single_cls else int(data["nc"]) # number of classes
iouv = torch.linspace(0.5, 0.95, 10, device=device) # iou vector for mAP@0.5:0.95
niou = iouv.numel()
# Dataloader
if not training:
if pt and not single_cls: # check --weights are trained on --data
ncm = model.model.nc
assert ncm == nc, (
f"{weights} ({ncm} classes) trained on different --data than what you passed ({nc} "
f"classes). Pass correct combination of --weights and --data that are trained together."
)
model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup
pad, rect = (0.0, False) if task == "speed" else (0.5, pt) # square inference for benchmarks
task = task if task in ("train", "val", "test") else "val" # path to train/val/test images
dataloader = create_dataloader(
data[task],
imgsz,
batch_size,
stride,
single_cls,
pad=pad,
rect=rect,
workers=workers,
prefix=colorstr(f"{task}: "),
overlap_mask=overlap,
mask_downsample_ratio=mask_downsample_ratio,
)[0]
seen = 0
confusion_matrix = ConfusionMatrix(nc=nc)
names = model.names if hasattr(model, "names") else model.module.names # get class names
if isinstance(names, (list, tuple)): # old format
names = dict(enumerate(names))
class_map = coco80_to_coco91_class() if is_coco else list(range(1000))
s = ("%22s" + "%11s" * 10) % (
"Class",
"Images",
"Instances",
"Box(P",
"R",
"mAP50",
"mAP50-95)",
"Mask(P",
"R",
"mAP50",
"mAP50-95)",
)
dt = Profile(device=device), Profile(device=device), Profile(device=device)
metrics = Metrics()
loss = torch.zeros(4, device=device)
jdict, stats = [], []
# callbacks.run('on_val_start')
pbar = tqdm(dataloader, desc=s, bar_format=TQDM_BAR_FORMAT) # progress bar
for batch_i, (im, targets, paths, shapes, masks) in enumerate(pbar):
# callbacks.run('on_val_batch_start')
with dt[0]:
if cuda:
im = im.to(device, non_blocking=True)
targets = targets.to(device)
masks = masks.to(device)
masks = masks.float()
im = im.half() if half else im.float() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
nb, _, height, width = im.shape # batch size, channels, height, width
# Inference
with dt[1]:
preds, protos, train_out = model(im) if compute_loss else (*model(im, augment=augment)[:2], None)
# Loss
if compute_loss:
loss += compute_loss((train_out, protos), targets, masks)[1] # box, obj, cls
# NMS
targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels
lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling
with dt[2]:
preds = non_max_suppression(
preds, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls, max_det=max_det, nm=nm
)
# Metrics
plot_masks = [] # masks for plotting
for si, (pred, proto) in enumerate(zip(preds, protos)):
labels = targets[targets[:, 0] == si, 1:]
nl, npr = labels.shape[0], pred.shape[0] # number of labels, predictions
path, shape = Path(paths[si]), shapes[si][0]
correct_masks = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init
correct_bboxes = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init
seen += 1
if npr == 0:
if nl:
stats.append((correct_masks, correct_bboxes, *torch.zeros((2, 0), device=device), labels[:, 0]))
if plots:
confusion_matrix.process_batch(detections=None, labels=labels[:, 0])
continue
# Masks
midx = [si] if overlap else targets[:, 0] == si
gt_masks = masks[midx]
pred_masks = process(proto, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:])
# Predictions
if single_cls:
pred[:, 5] = 0
predn = pred.clone()
scale_boxes(im[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred
# Evaluate
if nl:
tbox = xywh2xyxy(labels[:, 1:5]) # target boxes
scale_boxes(im[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels
labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels
correct_bboxes = process_batch(predn, labelsn, iouv)
correct_masks = process_batch(predn, labelsn, iouv, pred_masks, gt_masks, overlap=overlap, masks=True)
if plots:
confusion_matrix.process_batch(predn, labelsn)
stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0])) # (conf, pcls, tcls)
pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8)
if plots and batch_i < 3:
plot_masks.append(pred_masks[:15]) # filter top 15 to plot
# Save/log
if save_txt:
save_one_txt(predn, save_conf, shape, file=save_dir / "labels" / f"{path.stem}.txt")
if save_json:
pred_masks = scale_image(
im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), shape, shapes[si][1]
)
save_one_json(predn, jdict, path, class_map, pred_masks) # append to COCO-JSON dictionary
# callbacks.run('on_val_image_end', pred, predn, path, names, im[si])
# Plot images
if plots and batch_i < 3:
if len(plot_masks):
plot_masks = torch.cat(plot_masks, dim=0)
plot_images_and_masks(im, targets, masks, paths, save_dir / f"val_batch{batch_i}_labels.jpg", names)
plot_images_and_masks(
im,
output_to_target(preds, max_det=15),
plot_masks,
paths,
save_dir / f"val_batch{batch_i}_pred.jpg",
names,
) # pred
# callbacks.run('on_val_batch_end')
# Compute metrics
stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)] # to numpy
if len(stats) and stats[0].any():
results = ap_per_class_box_and_mask(*stats, plot=plots, save_dir=save_dir, names=names)
metrics.update(results)
nt = np.bincount(stats[4].astype(int), minlength=nc) # number of targets per class
# Print results
pf = "%22s" + "%11i" * 2 + "%11.3g" * 8 # print format
LOGGER.info(pf % ("all", seen, nt.sum(), *metrics.mean_results()))
if nt.sum() == 0:
LOGGER.warning(f"WARNING ⚠️ no labels found in {task} set, can not compute metrics without labels")
# Print results per class
if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):
for i, c in enumerate(metrics.ap_class_index):
LOGGER.info(pf % (names[c], seen, nt[c], *metrics.class_result(i)))
# Print speeds
t = tuple(x.t / seen * 1e3 for x in dt) # speeds per image
if not training:
shape = (batch_size, 3, imgsz, imgsz)
LOGGER.info(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}" % t)
# Plots
if plots:
confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
# callbacks.run('on_val_end')
mp_bbox, mr_bbox, map50_bbox, map_bbox, mp_mask, mr_mask, map50_mask, map_mask = metrics.mean_results()
# Save JSON
if save_json and len(jdict):
w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else "" # weights
anno_json = str(Path("../datasets/coco/annotations/instances_val2017.json")) # annotations
pred_json = str(save_dir / f"{w}_predictions.json") # predictions
LOGGER.info(f"\nEvaluating pycocotools mAP... saving {pred_json}...")
with open(pred_json, "w") as f:
json.dump(jdict, f)
try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
anno = COCO(anno_json) # init annotations api
pred = anno.loadRes(pred_json) # init predictions api
results = []
for eval in COCOeval(anno, pred, "bbox"), COCOeval(anno, pred, "segm"):
if is_coco:
eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files] # img ID to evaluate
eval.evaluate()
eval.accumulate()
eval.summarize()
results.extend(eval.stats[:2]) # update results (mAP@0.5:0.95, mAP@0.5)
map_bbox, map50_bbox, map_mask, map50_mask = results
except Exception as e:
LOGGER.info(f"pycocotools unable to run: {e}")
# Return results
model.float() # for training
if not training:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ""
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
final_metric = mp_bbox, mr_bbox, map50_bbox, map_bbox, mp_mask, mr_mask, map50_mask, map_mask
return (*final_metric, *(loss.cpu() / len(dataloader)).tolist()), metrics.get_maps(nc), t
def parse_opt():
"""Parses command line arguments for configuring YOLOv5 options like dataset path, weights, batch size, and
inference settings.
"""
parser = argparse.ArgumentParser()
parser.add_argument("--data", type=str, default=ROOT / "data/coco128-seg.yaml", help="dataset.yaml path")
parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s-seg.pt", help="model path(s)")
parser.add_argument("--batch-size", type=int, default=32, help="batch size")
parser.add_argument("--imgsz", "--img", "--img-size", type=int, default=640, help="inference size (pixels)")
parser.add_argument("--conf-thres", type=float, default=0.001, help="confidence threshold")
parser.add_argument("--iou-thres", type=float, default=0.6, help="NMS IoU threshold")
parser.add_argument("--max-det", type=int, default=300, help="maximum detections per image")
parser.add_argument("--task", default="val", help="train, val, test, speed or study")
parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
parser.add_argument("--workers", type=int, default=8, help="max dataloader workers (per RANK in DDP mode)")
parser.add_argument("--single-cls", action="store_true", help="treat as single-class dataset")
parser.add_argument("--augment", action="store_true", help="augmented inference")
parser.add_argument("--verbose", action="store_true", help="report mAP by class")
parser.add_argument("--save-txt", action="store_true", help="save results to *.txt")
parser.add_argument("--save-hybrid", action="store_true", help="save label+prediction hybrid results to *.txt")
parser.add_argument("--save-conf", action="store_true", help="save confidences in --save-txt labels")
parser.add_argument("--save-json", action="store_true", help="save a COCO-JSON results file")
parser.add_argument("--project", default=ROOT / "runs/val-seg", help="save results to project/name")
parser.add_argument("--name", default="exp", help="save to project/name")
parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
parser.add_argument("--half", action="store_true", help="use FP16 half-precision inference")
parser.add_argument("--dnn", action="store_true", help="use OpenCV DNN for ONNX inference")
opt = parser.parse_args()
opt.data = check_yaml(opt.data) # check YAML
# opt.save_json |= opt.data.endswith('coco.yaml')
opt.save_txt |= opt.save_hybrid
print_args(vars(opt))
return opt
def main(opt):
"""Executes YOLOv5 tasks including training, validation, testing, speed, and study with configurable options."""
check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
if opt.task in ("train", "val", "test"): # run normally
if opt.conf_thres > 0.001: # https://github.com/ultralytics/yolov5/issues/1466
LOGGER.warning(f"WARNING ⚠️ confidence threshold {opt.conf_thres} > 0.001 produces invalid results")
if opt.save_hybrid:
LOGGER.warning("WARNING ⚠️ --save-hybrid returns high mAP from hybrid labels, not from predictions alone")
run(**vars(opt))
else:
weights = opt.weights if isinstance(opt.weights, list) else [opt.weights]
opt.half = torch.cuda.is_available() and opt.device != "cpu" # FP16 for fastest results
if opt.task == "speed": # speed benchmarks
# python val.py --task speed --data coco.yaml --batch 1 --weights yolov5n.pt yolov5s.pt...
opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False
for opt.weights in weights:
run(**vars(opt), plots=False)
elif opt.task == "study": # speed vs mAP benchmarks
# python val.py --task study --data coco.yaml --iou 0.7 --weights yolov5n.pt yolov5s.pt...
for opt.weights in weights:
f = f"study_{Path(opt.data).stem}_{Path(opt.weights).stem}.txt" # filename to save to
x, y = list(range(256, 1536 + 128, 128)), [] # x axis (image sizes), y axis
for opt.imgsz in x: # img-size
LOGGER.info(f"\nRunning {f} --imgsz {opt.imgsz}...")
r, _, t = run(**vars(opt), plots=False)
y.append(r + t) # results and times
np.savetxt(f, y, fmt="%10.4g") # save
subprocess.run(["zip", "-r", "study.zip", "study_*.txt"])
plot_val_study(x=x) # plot
else:
raise NotImplementedError(f'--task {opt.task} not in ("train", "val", "test", "speed", "study")')
if __name__ == "__main__":
opt = parse_opt()
main(opt)