diff --git a/app/service/project_detect_service.py b/app/service/project_detect_service.py index b6ea6d7..e8801ef 100644 --- a/app/service/project_detect_service.py +++ b/app/service/project_detect_service.py @@ -12,7 +12,8 @@ from ultralytics.utils.plotting import Annotator, colors from app.model.crud import project_detect_crud as pdc from app.model.schemas.project_detect_schemas import ProjectDetectIn, ProjectDetectOut, ProjectDetectLogIn -from app.model.bussiness_model import ProjectDetect, ProjectDetectImg, ProjectTrain, ProjectDetectLog, ProjectDetectLogImg +from app.model.bussiness_model import ProjectDetect, ProjectDetectImg, ProjectTrain, ProjectDetectLog, \ + ProjectDetectLogImg from app.util.random_utils import random_str from app.config.config_reader import detect_url from app.util import os_utils as os @@ -146,7 +147,8 @@ def run_detect_yolo(detect_in: ProjectDetectLogIn, detect: ProjectDetect, train: return detect_log -async def run_detect_img(weights: str, source: str, project: str, name: str, log_id: int, detect_id: int, session: Session): +async def run_detect_img(weights: str, source: str, project: str, name: str, log_id: int, detect_id: int, + session: Session): """ 执行yolov5的推理 :param weights: 权重文件 @@ -161,7 +163,8 @@ async def run_detect_img(weights: str, source: str, project: str, name: str, log yolo_path = os.file_path(yolo_url, 'detect.py') room = 'detect_' + str(detect_id) await room_manager.send_to_room(room, f"AiCheck: 模型训练开始,请稍等。。。\n") - commend = ["python", '-u', yolo_path, "--weights", weights, "--source", source, "--name", name, "--project", project, "--save-txt", "--conf-thres", "0.4"] + commend = ["python", '-u', yolo_path, "--weights", weights, "--source", source, "--name", name, "--project", + project, "--save-txt", "--conf-thres", "0.4"] is_gpu = redis_conn.get('is_gpu') # 判断是否存在cuda版本 if is_gpu == 'True': @@ -231,7 +234,7 @@ async def run_detect_rtsp(weights_pt: str, rtsp_url: str, data: str, detect_id: seen, windows, dt = 0, [], (Profile(device=device), Profile(device=device), Profile(device=device)) - time.sleep(3)# 等待3s,等待websocket进入 + time.sleep(3) # 等待3s,等待websocket进入 for path, im, im0s, vid_cap, s in dataset: if room_manager.rooms.get(room): @@ -283,8 +286,5 @@ async def run_detect_rtsp(weights_pt: str, rtsp_url: str, data: str, detect_id: frame_data = jpeg.tobytes() await room_manager.send_stream_to_room(room, frame_data) else: + print(room, '结束推理'); break - - - - diff --git a/app/service/project_sort_service.py b/app/service/project_sort_service.py new file mode 100644 index 0000000..9be2e80 --- /dev/null +++ b/app/service/project_sort_service.py @@ -0,0 +1,152 @@ +import time +import torch +from app.util.yolov5.models.common import DetectMultiBackend +from app.util.yolov5.utils.torch_utils import select_device +from app.util.yolov5.utils.dataloaders import LoadStreams +from app.util.yolov5.utils.general import check_img_size, non_max_suppression, cv2, scale_coords, xyxy2xywh + +from app.websocket.web_socket_server import room_manager +from app.common.redis_cli import redis_conn +from deep_sort.deep_sort import DeepSort + +palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1) + +# 初始化 DeepSORT 跟踪器 +deepsort = DeepSort( + model_path="deep_sort/deep/checkpoint/ckpt.t7", # ReID 模型路径 + max_dist=0.2, # 外观特征匹配阈值(越小越严格) + max_iou_distance=0.7, # 最大IoU距离阈值 + max_age=70, # 目标最大存活帧数(未匹配时保留的帧数) + n_init=3 # 初始确认帧数(连续匹配到n_init次后确认跟踪) +) + + +async def run_deepsort_rtsp(weights_pt: str, rtsp_url: str, data: str, detect_id: int, idx_to_class: dict): + """ + rtsp 视频流推理 + :param detect_id: 训练集的id + :param weights_pt: 权重文件 + :param rtsp_url: 视频流地址 + :param data: yaml文件 + :param idx_to_class: yaml文件 + :return: + """ + room = 'deepsort_rtsp_' + str(detect_id) + # 选择设备(CPU 或 GPU) + device = select_device('cpu') + is_gpu = redis_conn.get('is_gpu') + # 判断是否存在cuda版本 + if is_gpu == 'True': + device = select_device('cuda:0') + + # 加载模型 + model = DetectMultiBackend(weights_pt, device=device, dnn=False, data=data, fp16=False) + + stride, names, pt = model.stride, model.names, model.pt + imgsz = check_img_size((640, 640), s=stride) # check image size + + dataset = LoadStreams(rtsp_url, img_size=imgsz, stride=stride, auto=pt, vid_stride=1) + bs = len(dataset) + + model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) + + time.sleep(3) # 等待3s,等待websocket进入 + + for path, im, im0s, vid_cap, s in dataset: + if room_manager.rooms.get(room): + im = torch.from_numpy(im).to(model.device) + im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 + im /= 255 # 0 - 255 to 0.0 - 1.0 + if len(im.shape) == 3: + im = im[None] # expand for batch dim + if model.xml and im.shape[0] > 1: + ims = torch.chunk(im, im.shape[0], 0) + + # Inference + if model.xml and im.shape[0] > 1: + pred = None + for image in ims: + if pred is None: + pred = model(image, augment=False, visualize=False).unsqueeze(0) + else: + pred = torch.cat((pred, model(image, augment=False, visualize=False).unsqueeze(0)), + dim=0) + pred = [pred, None] + else: + pred = model(im, augment=False, visualize=False) + # NMS + pred = non_max_suppression(pred, 0.45, 0.45, None, False, max_det=1000) + + image = im0s[0] + + pred[:, :4] = scale_coords(im.shape[2:], pred[:, :4], image.shape).round() + + # 使用YOLOv5进行检测后得到的pred + bbox_xywh, cls_conf, cls_ids = yolo_to_deepsort_format(pred) + + # select person class + mask = cls_ids == 0 + + bbox_xywh = bbox_xywh[mask] + # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector + bbox_xywh[:, 2:] *= 1.2 + cls_conf = cls_conf[mask] + cls_ids = cls_ids[mask] + + # 调用Deep SORT更新方法 + outputs, _ = deepsort.update(bbox_xywh, cls_conf, cls_ids, image) + + # draw boxes for visualization + if len(outputs) > 0: + bbox_xyxy = outputs[:, :4] + identities = outputs[:, -1] + cls = outputs[:, -2] + names = [idx_to_class[str(label)] for label in cls] + image = draw_boxes(image, bbox_xyxy, names, identities) + # 将帧编码为 JPEG + ret, jpeg = cv2.imencode('.jpg', image) + if ret: + frame_data = jpeg.tobytes() + await room_manager.send_stream_to_room(room, frame_data) + else: + print(room, '结束跟踪') + break + + +def draw_boxes(img, bbox, names=None, identities=None, offset=(0, 0)): + for i, box in enumerate(bbox): + x1, y1, x2, y2 = [int(i) for i in box] + x1 += offset[0] + x2 += offset[0] + y1 += offset[1] + y2 += offset[1] + # box text and bar + id = int(identities[i]) if identities is not None else 0 + color = compute_color_for_labels(id) + label = '{:}{:d}'.format(names[i], id) + t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] + cv2.rectangle(img, (x1, y1), (x2, y2), color, 3) + cv2.rectangle(img, (x1, y1), (x1 + t_size[0] + 3, y1 + t_size[1] + 4), color, -1) + cv2.putText(img, label, (x1, y1 + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2) + return img + + +def compute_color_for_labels(label): + """ + Simple function that adds fixed color depending on the class + """ + color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette] + return tuple(color) + + +def yolo_to_deepsort_format(pred): + """ + 将YOLOv5的预测结果转换为Deep SORT所需的格式 + :param pred: YOLOv5的预测结果 + :return: xywh, conf, cls + """ + pred[:, :4] = xyxy2xywh(pred[:, :4]) + xywh = pred[:, :4].cpu().numpy() + conf = pred[:, 4].cpu().numpy() + cls = pred[:, 5].cpu().numpy() + return xywh, conf, cls diff --git a/app/service/project_train_service.py b/app/service/project_train_service.py index 8c0d9ec..7dc4f83 100644 --- a/app/service/project_train_service.py +++ b/app/service/project_train_service.py @@ -218,12 +218,12 @@ async def run_commend(data: str, project: str, name: str, epochs: int, patience: stdout=subprocess.PIPE, stderr=subprocess.STDOUT, # 这里可以显示yolov5训练过程中出现的进度条等信息 text=True, # 缓存内容为文本,避免后续编码显示问题 - encoding='latin1', + encoding='utf-8', ) as process: while process.poll() is None: line = process.stdout.readline() process.stdout.flush() # 刷新缓存,防止缓存过多造成卡死 - if line != '\n': + if line != '\n' and '0%' not in line: await room_manager.send_to_room(room, line + '\n') # 等待进程结束并获取返回码 diff --git a/deep_sort/__init__.py b/deep_sort/__init__.py new file mode 100644 index 0000000..be3976f --- /dev/null +++ b/deep_sort/__init__.py @@ -0,0 +1,19 @@ +from .deep_sort import DeepSort + +__all__ = ['DeepSort', 'build_tracker'] + + +def build_tracker(cfg, use_cuda): + if cfg.USE_FASTREID: + return DeepSort(model_path=cfg.FASTREID.CHECKPOINT, model_config=cfg.FASTREID.CFG, + max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, + nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, + max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, + use_cuda=use_cuda) + + else: + return DeepSort(model_path=cfg.DEEPSORT.REID_CKPT, + max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, + nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, + max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, + use_cuda=use_cuda) diff --git a/deep_sort/configs/deep_sort.yaml b/deep_sort/configs/deep_sort.yaml new file mode 100644 index 0000000..1145809 --- /dev/null +++ b/deep_sort/configs/deep_sort.yaml @@ -0,0 +1,10 @@ +DEEPSORT: + REID_CKPT: "./deep_sort/deep/checkpoint/ckpt.t7" + MAX_DIST: 0.2 + MIN_CONFIDENCE: 0.5 + NMS_MAX_OVERLAP: 0.5 + MAX_IOU_DISTANCE: 0.7 + MAX_AGE: 70 + N_INIT: 3 + NN_BUDGET: 100 + \ No newline at end of file diff --git a/deep_sort/configs/fastreid.yaml b/deep_sort/configs/fastreid.yaml new file mode 100644 index 0000000..60b37f7 --- /dev/null +++ b/deep_sort/configs/fastreid.yaml @@ -0,0 +1,3 @@ +FASTREID: + CFG: "thirdparty/fast-reid/configs/Market1501/bagtricks_R50.yml" + CHECKPOINT: "deep_sort/deep/checkpoint/market_bot_R50.pth" \ No newline at end of file diff --git a/deep_sort/configs/mask_rcnn.yaml b/deep_sort/configs/mask_rcnn.yaml new file mode 100644 index 0000000..cb3c736 --- /dev/null +++ b/deep_sort/configs/mask_rcnn.yaml @@ -0,0 +1,6 @@ +MASKRCNN: + LABEL: "./coco_classes.json" + WEIGHT: "./detector/Mask_RCNN/save_weights/maskrcnn_resnet50_fpn_coco.pth" + + NUM_CLASSES: 90 + BOX_THRESH: 0.5 \ No newline at end of file diff --git a/deep_sort/configs/mmdet.yaml b/deep_sort/configs/mmdet.yaml new file mode 100644 index 0000000..e1b7e58 --- /dev/null +++ b/deep_sort/configs/mmdet.yaml @@ -0,0 +1,5 @@ +MMDET: + CFG: "thirdparty/mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py" + CHECKPOINT: "detector/MMDet/weight/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth" + + SCORE_THRESH: 0.5 \ No newline at end of file diff --git a/deep_sort/deep/GETTING_STARTED.md b/deep_sort/deep/GETTING_STARTED.md new file mode 100644 index 0000000..b55ef24 --- /dev/null +++ b/deep_sort/deep/GETTING_STARTED.md @@ -0,0 +1,82 @@ +In deepsort algorithm, appearance feature extraction network used to extract features from **image_crops** for matching purpose.The original model used in paper is in `model.py`, and its parameter here [ckpt.t7](https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6). This repository also provides a `resnet.py` script and its pre-training weights on Imagenet here. + +``` +# resnet18 +https://download.pytorch.org/models/resnet18-5c106cde.pth +# resnet34 +https://download.pytorch.org/models/resnet34-333f7ec4.pth +# resnet50 +https://download.pytorch.org/models/resnet50-19c8e357.pth +# resnext50_32x4d +https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth +``` + +## Dataset PrePare + +To train the model, first you need download [Market1501](http://www.liangzheng.com.cn/Project/project_reid.html) dataset or [Mars](http://www.liangzheng.com.cn/Project/project_mars.html) dataset. + +If you want to train on your **own dataset**, assuming you have already downloaded the dataset.The dataset should be arranged in the following way. + +``` + ├── dataset_root: The root dir of the dataset. + ├── class1: Category 1 is located in the folder dir. + ├── xxx1.jpg: Image belonging to category 1. + ├── xxx2.jpg: Image belonging to category 1. + ├── class2: Category 2 is located in the folder dir. + ├── xxx3.jpg: Image belonging to category 2. + ├── xxx4.jpg: Image belonging to category 2. + ├── class3: Category 3 is located in the folder dir. + ... + ... +``` + +## Training the RE-ID model + +Assuming you have already prepare the dataset. Then you can use the following command to start your training progress. + +#### training on a single GPU + +```python +usage: train.py [--data-dir] + [--epochs] + [--batch_size] + [--lr] + [--lrf] + [--weights] + [--freeze-layers] + [--gpu_id] + +# default use cuda:0, use Net in `model.py` +python train.py --data-dir [dataset/root/path] --weights [(optional)pre-train/weight/path] +# you can use `--freeze-layers` option to freeze full convolutional layer parameters except fc layers parameters +python train.py --data-dir [dataset/root/path] --weights [(optional)pre-train/weight/path] --freeze-layers +``` + +#### training on multiple GPU + +```python +usage: train_multiGPU.py [--data-dir] + [--epochs] + [--batch_size] + [--lr] + [--lrf] + [--syncBN] + [--weights] + [--freeze-layers] + # not change the following parameters, the system will automatically assignment + [--device] + [--world_size] + [--dist_url] + +# default use cuda:0, cuda:1, cuda:2, cuda:3, use resnet18 in `resnet.py` +CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 train_multiGPU.py --data-dir [dataset/root/path] --weights [(optional)pre-train/weight/path] +# you can use `--freeze-layers` option to freeze full convolutional layer parameters except fc layers parameters +CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 train_multiGPU.py --data-dir [dataset/root/path] --weights [(optional)pre-train/weight/path] --freeze-layers +``` + +An example of training progress is as follows: + +![train.jpg](./train.jpg) + +The last, you can evaluate it using [test.py](deep_sort/deep/test.py) and [evaluate.py](deep_sort/deep/evalute.py). + diff --git a/deep_sort/deep/__init__.py b/deep_sort/deep/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/deep_sort/deep/checkpoint/.gitkeep b/deep_sort/deep/checkpoint/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/deep_sort/deep/checkpoint/ckpt.t7 b/deep_sort/deep/checkpoint/ckpt.t7 new file mode 100644 index 0000000..d253aae Binary files /dev/null and b/deep_sort/deep/checkpoint/ckpt.t7 differ diff --git a/deep_sort/deep/datasets.py b/deep_sort/deep/datasets.py new file mode 100644 index 0000000..9e83b37 --- /dev/null +++ b/deep_sort/deep/datasets.py @@ -0,0 +1,92 @@ +import json +import os +import random + +import cv2 +from PIL import Image +import torch +from torch.utils.data import Dataset +import matplotlib.pyplot as plt + + +class ClsDataset(Dataset): + def __init__(self, images_path, images_labels, transform=None): + self.images_path = images_path + self.images_labels = images_labels + self.transform = transform + + def __len__(self): + return len(self.images_path) + + def __getitem__(self, idx): + img = cv2.imread(self.images_path[idx]) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = Image.fromarray(img) + label = self.images_labels[idx] + + if self.transform is not None: + img = self.transform(img) + return img, label + + @staticmethod + def collate_fn(batch): + images, labels = tuple(zip(*batch)) + images = torch.stack(images, dim=0) + labels = torch.as_tensor(labels) + return images, labels + + +def read_split_data(root, valid_rate=0.2): + assert os.path.exists(root), 'dataset root: {} does not exist.'.format(root) + + class_names = [cls for cls in os.listdir(root) if os.path.isdir(os.path.join(root, cls))] + class_names.sort() + + class_indices = {name: i for i, name in enumerate(class_names)} + json_str = json.dumps({v: k for k, v in class_indices.items()}, indent=4) + with open('class_indices.json', 'w') as f: + f.write(json_str) + + train_images_path = [] + train_labels = [] + val_images_path = [] + val_labels = [] + per_class_num = [] + + supported = ['.jpg', '.JPG', '.png', '.PNG'] + for cls in class_names: + cls_path = os.path.join(root, cls) + images_path = [os.path.join(cls_path, i) for i in os.listdir(cls_path) + if os.path.splitext(i)[-1] in supported] + images_label = class_indices[cls] + per_class_num.append(len(images_path)) + + val_path = random.sample(images_path, int(len(images_path) * valid_rate)) + for img_path in images_path: + if img_path in val_path: + val_images_path.append(img_path) + val_labels.append(images_label) + else: + train_images_path.append(img_path) + train_labels.append(images_label) + + print("{} images were found in the dataset.".format(sum(per_class_num))) + print("{} images for training.".format(len(train_images_path))) + print("{} images for validation.".format(len(val_images_path))) + + assert len(train_images_path) > 0, "number of training images must greater than zero" + assert len(val_images_path) > 0, "number of validation images must greater than zero" + + plot_distribution = False + if plot_distribution: + plt.bar(range(len(class_names)), per_class_num, align='center') + plt.xticks(range(len(class_names)), class_names) + + for i, v in enumerate(per_class_num): + plt.text(x=i, y=v + 5, s=str(v), ha='center') + + plt.xlabel('classes') + plt.ylabel('numbers') + plt.title('the distribution of dataset') + plt.show() + return [train_images_path, train_labels], [val_images_path, val_labels], len(class_names) diff --git a/deep_sort/deep/evaluate.py b/deep_sort/deep/evaluate.py new file mode 100644 index 0000000..31c40a4 --- /dev/null +++ b/deep_sort/deep/evaluate.py @@ -0,0 +1,15 @@ +import torch + +features = torch.load("features.pth") +qf = features["qf"] +ql = features["ql"] +gf = features["gf"] +gl = features["gl"] + +scores = qf.mm(gf.t()) +res = scores.topk(5, dim=1)[1][:,0] +top1correct = gl[res].eq(ql).sum().item() + +print("Acc top1:{:.3f}".format(top1correct/ql.size(0))) + + diff --git a/deep_sort/deep/feature_extractor.py b/deep_sort/deep/feature_extractor.py new file mode 100644 index 0000000..b01cdf4 --- /dev/null +++ b/deep_sort/deep/feature_extractor.py @@ -0,0 +1,93 @@ +import torch +import torchvision.transforms as transforms +import numpy as np +import cv2 +import logging + +from .model import Net +from .resnet import resnet18 +# from fastreid.config import get_cfg +# from fastreid.engine import DefaultTrainer +# from fastreid.utils.checkpoint import Checkpointer + + +class Extractor(object): + def __init__(self, model_path, use_cuda=True): + self.net = Net(reid=True) + # self.net = resnet18(reid=True) + self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu" + state_dict = torch.load(model_path, map_location=lambda storage, loc: storage) + self.net.load_state_dict(state_dict if 'net_dict' not in state_dict else state_dict['net_dict'], strict=False) + logger = logging.getLogger("root.tracker") + logger.info("Loading weights from {}... Done!".format(model_path)) + self.net.to(self.device) + self.size = (64, 128) + self.norm = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), + ]) + + def _preprocess(self, im_crops): + """ + TODO: + 1. to float with scale from 0 to 1 + 2. resize to (64, 128) as Market1501 dataset did + 3. concatenate to a numpy array + 3. to torch Tensor + 4. normalize + """ + + def _resize(im, size): + return cv2.resize(im.astype(np.float32) / 255., size) + + im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops], dim=0).float() + return im_batch + + def __call__(self, im_crops): + im_batch = self._preprocess(im_crops) + with torch.no_grad(): + im_batch = im_batch.to(self.device) + features = self.net(im_batch) + return features.cpu().numpy() + + +class FastReIDExtractor(object): + def __init__(self, model_config, model_path, use_cuda=True): + cfg = get_cfg() + cfg.merge_from_file(model_config) + cfg.MODEL.BACKBONE.PRETRAIN = False + self.net = DefaultTrainer.build_model(cfg) + self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu" + + Checkpointer(self.net).load(model_path) + logger = logging.getLogger("root.tracker") + logger.info("Loading weights from {}... Done!".format(model_path)) + self.net.to(self.device) + self.net.eval() + height, width = cfg.INPUT.SIZE_TEST + self.size = (width, height) + self.norm = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), + ]) + + def _preprocess(self, im_crops): + def _resize(im, size): + return cv2.resize(im.astype(np.float32) / 255., size) + + im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops], dim=0).float() + return im_batch + + def __call__(self, im_crops): + im_batch = self._preprocess(im_crops) + with torch.no_grad(): + im_batch = im_batch.to(self.device) + features = self.net(im_batch) + return features.cpu().numpy() + + +if __name__ == '__main__': + img = cv2.imread("demo.jpg")[:, :, (2, 1, 0)] + extr = Extractor("checkpoint/ckpt.t7") + feature = extr(img) + print(feature.shape) diff --git a/deep_sort/deep/model.py b/deep_sort/deep/model.py new file mode 100644 index 0000000..9a0f291 --- /dev/null +++ b/deep_sort/deep/model.py @@ -0,0 +1,105 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class BasicBlock(nn.Module): + def __init__(self, c_in, c_out, is_downsample=False): + super(BasicBlock, self).__init__() + self.is_downsample = is_downsample + if is_downsample: + self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False) + else: + self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(c_out) + self.relu = nn.ReLU(True) + self.conv2 = nn.Conv2d(c_out, c_out, 3, stride=1, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(c_out) + if is_downsample: + self.downsample = nn.Sequential( + nn.Conv2d(c_in, c_out, 1, stride=2, bias=False), + nn.BatchNorm2d(c_out) + ) + elif c_in != c_out: + self.downsample = nn.Sequential( + nn.Conv2d(c_in, c_out, 1, stride=1, bias=False), + nn.BatchNorm2d(c_out) + ) + self.is_downsample = True + + def forward(self, x): + y = self.conv1(x) + y = self.bn1(y) + y = self.relu(y) + y = self.conv2(y) + y = self.bn2(y) + if self.is_downsample: + x = self.downsample(x) + return F.relu(x.add(y), True) + + +def make_layers(c_in, c_out, repeat_times, is_downsample=False): + blocks = [] + for i in range(repeat_times): + if i == 0: + blocks += [BasicBlock(c_in, c_out, is_downsample=is_downsample), ] + else: + blocks += [BasicBlock(c_out, c_out), ] + return nn.Sequential(*blocks) + + +class Net(nn.Module): + def __init__(self, num_classes=751, reid=False): + super(Net, self).__init__() + # 3 128 64 + self.conv = nn.Sequential( + nn.Conv2d(3, 64, 3, stride=1, padding=1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True), + # nn.Conv2d(32,32,3,stride=1,padding=1), + # nn.BatchNorm2d(32), + # nn.ReLU(inplace=True), + nn.MaxPool2d(3, 2, padding=1), + ) + # 32 64 32 + self.layer1 = make_layers(64, 64, 2, False) + # 32 64 32 + self.layer2 = make_layers(64, 128, 2, True) + # 64 32 16 + self.layer3 = make_layers(128, 256, 2, True) + # 128 16 8 + self.layer4 = make_layers(256, 512, 2, True) + # 256 8 4 + self.avgpool = nn.AdaptiveAvgPool2d(1) + # 256 1 1 + self.reid = reid + self.classifier = nn.Sequential( + nn.Linear(512, 256), + nn.BatchNorm1d(256), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(256, num_classes), + ) + + def forward(self, x): + x = self.conv(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.avgpool(x) + x = x.view(x.size(0), -1) + # B x 128 + if self.reid: + x = x.div(x.norm(p=2, dim=1, keepdim=True)) + return x + # classifier + x = self.classifier(x) + return x + + +if __name__ == '__main__': + net = Net() + x = torch.randn(4, 3, 128, 64) + y = net(x) + diff --git a/deep_sort/deep/multi_train_utils/distributed_utils.py b/deep_sort/deep/multi_train_utils/distributed_utils.py new file mode 100644 index 0000000..4cfc813 --- /dev/null +++ b/deep_sort/deep/multi_train_utils/distributed_utils.py @@ -0,0 +1,67 @@ +import os + +import torch +import torch.distributed as dist + + +def init_distributed_mode(args): + if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: + args.rank = int(os.environ['RANK']) + args.world_size = int(os.environ['WORLD_SIZE']) + args.gpu = int(os.environ['LOCAL_RANK']) + elif 'SLURM_PROCID' in os.environ: + args.rank = int(os.environ['SLURM_PROCID']) + args.gpu = args.rank % torch.cuda.device_count() + else: + print("Not using distributed mode") + args.distributed = False + return + + args.distributed = True + + torch.cuda.set_device(args.gpu) + args.dist_backend = 'nccl' + print('| distributed init (rank {}): {}'.format(args.rank, args.dist_url), flush=True) + dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + dist.barrier() + + +def cleanup(): + dist.destroy_process_group() + + +def is_dist_avail_and_initialized(): + if not dist.is_available(): + return False + if not dist.is_initialized(): + return False + return True + + +def get_world_size(): + if not is_dist_avail_and_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank(): + if not is_dist_avail_and_initialized(): + return 0 + return dist.get_rank() + + +def is_main_process(): + return get_rank() == 0 + + +def reduce_value(value, average=True): + world_size = get_world_size() + if world_size < 2: + return value + with torch.no_grad(): + dist.all_reduce(value) + if average: + value /= world_size + + return value diff --git a/deep_sort/deep/multi_train_utils/train_eval_utils.py b/deep_sort/deep/multi_train_utils/train_eval_utils.py new file mode 100644 index 0000000..fdc073b --- /dev/null +++ b/deep_sort/deep/multi_train_utils/train_eval_utils.py @@ -0,0 +1,90 @@ +import sys + +from tqdm import tqdm +import torch + +from .distributed_utils import reduce_value, is_main_process + + +def load_model(state_dict, model_state_dict, model): + for k in state_dict: + if k in model_state_dict: + if state_dict[k].shape != model_state_dict[k].shape: + print('Skip loading parameter {}, required shape {}, ' \ + 'loaded shape {}.'.format( + k, model_state_dict[k].shape, state_dict[k].shape)) + state_dict[k] = model_state_dict[k] + else: + print('Drop parameter {}.'.format(k)) + for k in model_state_dict: + if not (k in state_dict): + print('No param {}.'.format(k)) + state_dict[k] = model_state_dict[k] + model.load_state_dict(state_dict, strict=False) + return model + + +def train_one_epoch(model, optimizer, data_loader, device, epoch): + model.train() + criterion = torch.nn.CrossEntropyLoss() + mean_loss = torch.zeros(1).to(device) + sum_num = torch.zeros(1).to(device) + optimizer.zero_grad() + + if is_main_process(): + data_loader = tqdm(data_loader, file=sys.stdout) + + for idx, (images, labels) in enumerate(data_loader): + # forward + images, labels = images.to(device), labels.to(device) + outputs = model(images) + loss = criterion(outputs, labels) + + # backward + loss.backward() + loss = reduce_value(loss, average=True) + mean_loss = (mean_loss * idx + loss.detach()) / (idx + 1) + pred = torch.max(outputs, dim=1)[1] + sum_num += torch.eq(pred, labels).sum() + + if is_main_process(): + data_loader.desc = '[epoch {}] mean loss {}'.format(epoch, mean_loss.item()) + + if not torch.isfinite(loss): + print('loss is infinite, ending training') + sys.exit(1) + + optimizer.step() + optimizer.zero_grad() + if device != torch.device('cpu'): + torch.cuda.synchronize(device) + sum_num = reduce_value(sum_num, average=False) + + return sum_num.item(), mean_loss.item() + + +@torch.no_grad() +def evaluate(model, data_loader, device): + model.eval() + criterion = torch.nn.CrossEntropyLoss() + test_loss = torch.zeros(1).to(device) + sum_num = torch.zeros(1).to(device) + if is_main_process(): + data_loader = tqdm(data_loader, file=sys.stdout) + + for idx, (inputs, labels) in enumerate(data_loader): + inputs, labels = inputs.to(device), labels.to(device) + outputs = model(inputs) + loss = criterion(outputs, labels) + loss = reduce_value(loss, average=True) + + test_loss = (test_loss * idx + loss.detach()) / (idx + 1) + pred = torch.max(outputs, dim=1)[1] + sum_num += torch.eq(pred, labels).sum() + + if device != torch.device('cpu'): + torch.cuda.synchronize(device) + + sum_num = reduce_value(sum_num, average=False) + + return sum_num.item(), test_loss.item() diff --git a/deep_sort/deep/resnet.py b/deep_sort/deep/resnet.py new file mode 100644 index 0000000..6912b13 --- /dev/null +++ b/deep_sort/deep/resnet.py @@ -0,0 +1,173 @@ +import torch.nn as nn +import torch + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs): + super(BasicBlock, self).__init__() + self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=3, + stride=stride, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(out_channel) + self.relu = nn.ReLU() + self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, + stride=1, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(out_channel) + self.downsample = downsample + + def forward(self, x): + identity = x + if self.downsample is not None: + identity = self.downsample(x) + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + out += identity + out = self.relu(out) + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, in_channel, out_channel, stride=1, downsample=None, + groups=1, width_per_group=64): + super(Bottleneck, self).__init__() + width = int(out_channel * (width_per_group / 64.)) * groups + + self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width, kernel_size=1, + stride=1, bias=False) + self.bn1 = nn.BatchNorm2d(width) + self.conv2 = nn.Conv2d(in_channels=width, out_channels=width, kernel_size=3, + stride=stride, padding=1, bias=False, groups=groups) + self.bn2 = nn.BatchNorm2d(width) + self.conv3 = nn.Conv2d(in_channels=width, out_channels=out_channel * self.expansion, + kernel_size=1, stride=1, bias=False) + self.bn3 = nn.BatchNorm2d(out_channel * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + + def forward(self, x): + identity = x + if self.downsample is not None: + identity = self.downsample(x) + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + out += identity + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, blocks_num, reid=False, num_classes=1000, groups=1, width_per_group=64): + super(ResNet, self).__init__() + self.reid = reid + self.in_channel = 64 + + self.groups = groups + self.width_per_group = width_per_group + + self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2, + padding=3, bias=False) + self.bn1 = nn.BatchNorm2d(self.in_channel) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layers(block, 64, blocks_num[0]) + self.layer2 = self._make_layers(block, 128, blocks_num[1], stride=2) + self.layer3 = self._make_layers(block, 256, blocks_num[2], stride=2) + # self.layer4 = self._make_layers(block, 512, blocks_num[3], stride=1) + + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.fc = nn.Linear(256 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def _make_layers(self, block, channel, block_num, stride=1): + downsample = None + if stride != 1 or self.in_channel != channel * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(channel * block.expansion) + ) + layers = [] + layers.append(block(self.in_channel, channel, downsample=downsample, stride=stride, + groups=self.groups, width_per_group=self.width_per_group)) + self.in_channel = channel * block.expansion + + for _ in range(1, block_num): + layers.append(block(self.in_channel, channel, groups=self.groups, width_per_group=self.width_per_group)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + # x = self.layer4(x) + x = self.avgpool(x) + x = torch.flatten(x, 1) + + # B x 512 + if self.reid: + x = x.div(x.norm(p=2, dim=1, keepdim=True)) + return x + # classifier + x = self.fc(x) + return x + + +def resnet18(num_classes=1000, reid=False): + # https://download.pytorch.org/models/resnet18-5c106cde.pth + return ResNet(BasicBlock, [2, 2, 2, 2], num_classes=num_classes, reid=reid) + + +def resnet34(num_classes=1000, reid=False): + # https://download.pytorch.org/models/resnet34-333f7ec4.pth + return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, reid=reid) + + +def resnet50(num_classes=1000, reid=False): + # https://download.pytorch.org/models/resnet50-19c8e357.pth + return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, reid=reid) + + +def resnext50_32x4d(num_classes=1000, reid=False): + # https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth + groups = 32 + width_per_group = 4 + return ResNet(Bottleneck, [3, 4, 6, 3], reid=reid, + num_classes=num_classes, groups=groups, width_per_group=width_per_group) + + +if __name__ == '__main__': + net = resnet18(reid=True) + x = torch.randn(4, 3, 128, 64) + y = net(x) diff --git a/deep_sort/deep/test.py b/deep_sort/deep/test.py new file mode 100644 index 0000000..eb40bac --- /dev/null +++ b/deep_sort/deep/test.py @@ -0,0 +1,77 @@ +import torch +import torch.backends.cudnn as cudnn +import torchvision + +import argparse +import os + +from model import Net + +parser = argparse.ArgumentParser(description="Train on market1501") +parser.add_argument("--data-dir", default='data', type=str) +parser.add_argument("--no-cuda", action="store_true") +parser.add_argument("--gpu-id", default=0, type=int) +args = parser.parse_args() + +# device +device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu" +if torch.cuda.is_available() and not args.no_cuda: + cudnn.benchmark = True + +# data loader +root = args.data_dir +query_dir = os.path.join(root, "query") +gallery_dir = os.path.join(root, "gallery") +transform = torchvision.transforms.Compose([ + torchvision.transforms.Resize((128, 64)), + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) +]) +queryloader = torch.utils.data.DataLoader( + torchvision.datasets.ImageFolder(query_dir, transform=transform), + batch_size=64, shuffle=False +) +galleryloader = torch.utils.data.DataLoader( + torchvision.datas0ets.ImageFolder(gallery_dir, transform=transform), + batch_size=64, shuffle=False +) + +# net definition +net = Net(reid=True) +assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!" +print('Loading from checkpoint/ckpt.t7') +checkpoint = torch.load("./checkpoint/ckpt.t7") +net_dict = checkpoint['net_dict'] +net.load_state_dict(net_dict, strict=False) +net.eval() +net.to(device) + +# compute features +query_features = torch.tensor([]).float() +query_labels = torch.tensor([]).long() +gallery_features = torch.tensor([]).float() +gallery_labels = torch.tensor([]).long() + +with torch.no_grad(): + for idx, (inputs, labels) in enumerate(queryloader): + inputs = inputs.to(device) + features = net(inputs).cpu() + query_features = torch.cat((query_features, features), dim=0) + query_labels = torch.cat((query_labels, labels)) + + for idx, (inputs, labels) in enumerate(galleryloader): + inputs = inputs.to(device) + features = net(inputs).cpu() + gallery_features = torch.cat((gallery_features, features), dim=0) + gallery_labels = torch.cat((gallery_labels, labels)) + +gallery_labels -= 2 + +# save features +features = { + "qf": query_features, + "ql": query_labels, + "gf": gallery_features, + "gl": gallery_labels +} +torch.save(features, "features.pth") diff --git a/deep_sort/deep/train.jpg b/deep_sort/deep/train.jpg new file mode 100644 index 0000000..3635a61 Binary files /dev/null and b/deep_sort/deep/train.jpg differ diff --git a/deep_sort/deep/train.py b/deep_sort/deep/train.py new file mode 100644 index 0000000..9922038 --- /dev/null +++ b/deep_sort/deep/train.py @@ -0,0 +1,151 @@ +import argparse +import os +import tempfile + +import math +import warnings +import matplotlib.pyplot as plt +import torch +import torchvision +from torch.optim import lr_scheduler + +from multi_train_utils.distributed_utils import init_distributed_mode, cleanup +from multi_train_utils.train_eval_utils import train_one_epoch, evaluate, load_model +import torch.distributed as dist +from datasets import ClsDataset, read_split_data + +from model import Net +from resnet import resnet18 + +# plot figure +x_epoch = [] +record = {'train_loss': [], 'train_err': [], 'test_loss': [], 'test_err': []} +fig = plt.figure() +ax0 = fig.add_subplot(121, title="loss") +ax1 = fig.add_subplot(122, title="top1_err") + + +def draw_curve(epoch, train_loss, train_err, test_loss, test_err): + global record + record['train_loss'].append(train_loss) + record['train_err'].append(train_err) + record['test_loss'].append(test_loss) + record['test_err'].append(test_err) + + x_epoch.append(epoch) + ax0.plot(x_epoch, record['train_loss'], 'bo-', label='train') + ax0.plot(x_epoch, record['test_loss'], 'ro-', label='val') + ax1.plot(x_epoch, record['train_err'], 'bo-', label='train') + ax1.plot(x_epoch, record['test_err'], 'ro-', label='val') + if epoch == 0: + ax0.legend() + ax1.legend() + fig.savefig("train.jpg") + + +def main(args): + batch_size = args.batch_size + device = 'cuda:{}'.format(args.gpu_id) if torch.cuda.is_available() else 'cpu' + + train_info, val_info, num_classes = read_split_data(args.data_dir, valid_rate=0.2) + train_images_path, train_labels = train_info + val_images_path, val_labels = val_info + + transform_train = torchvision.transforms.Compose([ + torchvision.transforms.RandomCrop((128, 64), padding=4), + torchvision.transforms.RandomHorizontalFlip(), + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]) + transform_val = torchvision.transforms.Compose([ + torchvision.transforms.Resize((128, 64)), + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]) + + train_dataset = ClsDataset( + images_path=train_images_path, + images_labels=train_labels, + transform=transform_train + ) + val_dataset = ClsDataset( + images_path=val_images_path, + images_labels=val_labels, + transform=transform_val + ) + + number_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) + print('Using {} dataloader workers every process'.format(number_workers)) + + train_loader = torch.utils.data.DataLoader( + train_dataset, + batch_size=batch_size, + shuffle=True, + pin_memory=True, + num_workers=number_workers + ) + val_loader = torch.utils.data.DataLoader( + val_dataset, + batch_size=batch_size, + shuffle=False, + pin_memory=True, + num_workers=number_workers, + ) + + # net definition + start_epoch = 0 + net = Net(num_classes=num_classes) + if args.weights: + print('Loading from ', args.weights) + checkpoint = torch.load(args.weights, map_location='cpu') + net_dict = checkpoint if 'net_dict' not in checkpoint else checkpoint['net_dict'] + start_epoch = checkpoint['epoch'] if 'epoch' in checkpoint else start_epoch + net = load_model(net_dict, net.state_dict(), net) + + if args.freeze_layers: + for name, param in net.named_parameters(): + if 'classifier' not in name: + param.requires_grad = False + + net.to(device) + + # loss and optimizer + pg = [p for p in net.parameters() if p.requires_grad] + optimizer = torch.optim.SGD(pg, args.lr, momentum=0.9, weight_decay=5e-4) + + lr = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf + scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lr) + for epoch in range(start_epoch, start_epoch + args.epochs): + train_positive, train_loss = train_one_epoch(net, optimizer, train_loader, device, epoch) + train_acc = train_positive / len(train_dataset) + scheduler.step() + + test_positive, test_loss = evaluate(net, val_loader, device) + test_acc = test_positive / len(val_dataset) + + print('[epoch {}] accuracy: {}'.format(epoch, test_acc)) + + state_dict = { + 'net_dict': net.state_dict(), + 'acc': test_acc, + 'epoch': epoch + } + torch.save(state_dict, './checkpoint/model_{}.pth'.format(epoch)) + draw_curve(epoch, train_loss, 1 - train_acc, test_loss, 1 - test_acc) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description="Train on market1501") + parser.add_argument("--data-dir", default='data', type=str) + parser.add_argument('--epochs', type=int, default=40) + parser.add_argument('--batch_size', type=int, default=32) + parser.add_argument("--lr", default=0.001, type=float) + parser.add_argument('--lrf', default=0.1, type=float) + + parser.add_argument('--weights', type=str, default='./checkpoint/resnet18.pth') + parser.add_argument('--freeze-layers', action='store_true') + + parser.add_argument('--gpu_id', default='0', help='gpu id') + args = parser.parse_args() + + main(args) diff --git a/deep_sort/deep/train_multiGPU.py b/deep_sort/deep/train_multiGPU.py new file mode 100644 index 0000000..27d5ab7 --- /dev/null +++ b/deep_sort/deep/train_multiGPU.py @@ -0,0 +1,189 @@ +import argparse +import os +import tempfile + +import math +import warnings +import matplotlib.pyplot as plt +import torch +import torchvision +from torch.optim import lr_scheduler + +from multi_train_utils.distributed_utils import init_distributed_mode, cleanup +from multi_train_utils.train_eval_utils import train_one_epoch, evaluate, load_model +import torch.distributed as dist +from datasets import ClsDataset, read_split_data + +from resnet import resnet18 + + +# plot figure +x_epoch = [] +record = {'train_loss': [], 'train_err': [], 'test_loss': [], 'test_err': []} +fig = plt.figure() +ax0 = fig.add_subplot(121, title="loss") +ax1 = fig.add_subplot(122, title="top1_err") + + +def draw_curve(epoch, train_loss, train_err, test_loss, test_err): + global record + record['train_loss'].append(train_loss) + record['train_err'].append(train_err) + record['test_loss'].append(test_loss) + record['test_err'].append(test_err) + + x_epoch.append(epoch) + ax0.plot(x_epoch, record['train_loss'], 'bo-', label='train') + ax0.plot(x_epoch, record['test_loss'], 'ro-', label='val') + ax1.plot(x_epoch, record['train_err'], 'bo-', label='train') + ax1.plot(x_epoch, record['test_err'], 'ro-', label='val') + if epoch == 0: + ax0.legend() + ax1.legend() + fig.savefig("train.jpg") + + +def main(args): + init_distributed_mode(args) + + rank = args.rank + device = torch.device(args.device) + batch_size = args.batch_size + weights_path = args.weights + args.lr *= args.world_size + checkpoint_path = '' + + if rank == 0: + print(args) + if os.path.exists('./checkpoint') is False: + os.mkdir('./checkpoint') + + train_info, val_info, num_classes = read_split_data(args.data_dir, valid_rate=0.2) + train_images_path, train_labels = train_info + val_images_path, val_labels = val_info + + transform_train = torchvision.transforms.Compose([ + torchvision.transforms.RandomCrop((128, 64), padding=4), + torchvision.transforms.RandomHorizontalFlip(), + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]) + transform_val = torchvision.transforms.Compose([ + torchvision.transforms.Resize((128, 64)), + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]) + + train_dataset = ClsDataset( + images_path=train_images_path, + images_labels=train_labels, + transform=transform_train + ) + val_dataset = ClsDataset( + images_path=val_images_path, + images_labels=val_labels, + transform=transform_val + ) + train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) + val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) + + train_batch_sampler = torch.utils.data.BatchSampler(train_sampler, batch_size, drop_last=True) + + number_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) + + if rank == 0: + print('Using {} dataloader workers every process'.format(number_workers)) + + train_loader = torch.utils.data.DataLoader( + train_dataset, + batch_sampler=train_batch_sampler, + pin_memory=True, + num_workers=number_workers + ) + val_loader = torch.utils.data.DataLoader( + val_dataset, + sampler=val_sampler, + batch_size=batch_size, + pin_memory=True, + num_workers=number_workers, + ) + + # net definition + start_epoch = 0 + net = resnet18(num_classes=num_classes) + if args.weights: + print('Loading from ', args.weights) + checkpoint = torch.load(args.weights, map_location='cpu') + net_dict = checkpoint if 'net_dict' not in checkpoint else checkpoint['net_dict'] + start_epoch = checkpoint['epoch'] if 'epoch' in checkpoint else start_epoch + net = load_model(net_dict, net.state_dict(), net) + else: + warnings.warn("better providing pretraining weights") + checkpoint_path = os.path.join(tempfile.gettempdir(), 'initial_weights.pth') + if rank == 0: + torch.save(net.state_dict(), checkpoint_path) + + dist.barrier() + net.load_state_dict(torch.load(checkpoint_path, map_location='cpu')) + + if args.freeze_layers: + for name, param in net.named_parameters(): + if 'fc' not in name: + param.requires_grad = False + else: + if args.syncBN: + net = torch.nn.SyncBatchNorm.convert_sync_batchnorm(net) + net.to(device) + + net = torch.nn.parallel.DistributedDataParallel(net, device_ids=[args.gpu]) + + # loss and optimizer + pg = [p for p in net.parameters() if p.requires_grad] + optimizer = torch.optim.SGD(pg, args.lr, momentum=0.9, weight_decay=5e-4) + + lr = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (1 - args.lrf) + args.lrf + scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lr) + for epoch in range(start_epoch, start_epoch + args.epochs): + train_positive, train_loss = train_one_epoch(net, optimizer, train_loader, device, epoch) + train_acc = train_positive / len(train_dataset) + scheduler.step() + + test_positive, test_loss = evaluate(net, val_loader, device) + test_acc = test_positive / len(val_dataset) + + if rank == 0: + print('[epoch {}] accuracy: {}'.format(epoch, test_acc)) + + state_dict = { + 'net_dict': net.module.state_dict(), + 'acc': test_acc, + 'epoch': epoch + } + torch.save(state_dict, './checkpoint/model_{}.pth'.format(epoch)) + draw_curve(epoch, train_loss, 1 - train_acc, test_loss, 1 - test_acc) + + if rank == 0: + if os.path.exists(checkpoint_path) is True: + os.remove(checkpoint_path) + cleanup() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description="Train on market1501") + parser.add_argument("--data-dir", default='data', type=str) + parser.add_argument('--epochs', type=int, default=40) + parser.add_argument('--batch_size', type=int, default=32) + parser.add_argument("--lr", default=0.001, type=float) + parser.add_argument('--lrf', default=0.1, type=float) + parser.add_argument('--syncBN', type=bool, default=True) + + parser.add_argument('--weights', type=str, default='./checkpoint/resnet18.pth') + parser.add_argument('--freeze-layers', action='store_true') + + # not change the following parameters, the system will automatically assignment + parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0, 1 or cpu)') + parser.add_argument('--world_size', default=4, type=int, help='number of distributed processes') + parser.add_argument('--dist_url', default='env://', help='url used to set up distributed training') + args = parser.parse_args() + + main(args) diff --git a/deep_sort/deep_sort.py b/deep_sort/deep_sort.py new file mode 100644 index 0000000..ffca7b3 --- /dev/null +++ b/deep_sort/deep_sort.py @@ -0,0 +1,121 @@ +import numpy as np +import torch + +from .deep.feature_extractor import Extractor, FastReIDExtractor +from .sort.nn_matching import NearestNeighborDistanceMetric +from .sort.preprocessing import non_max_suppression +from .sort.detection import Detection +from .sort.tracker import Tracker + +__all__ = ['DeepSort'] + + +class DeepSort(object): + def __init__(self, model_path, model_config=None, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0, + max_iou_distance=0.7, max_age=70, n_init=3, nn_budget=100, use_cuda=True): + self.min_confidence = min_confidence + self.nms_max_overlap = nms_max_overlap + + if model_config is None: + self.extractor = Extractor(model_path, use_cuda=use_cuda) + else: + self.extractor = FastReIDExtractor(model_config, model_path, use_cuda=use_cuda) + + max_cosine_distance = max_dist + metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) + self.tracker = Tracker(metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init) + + def update(self, bbox_xywh, confidences, classes, ori_img, masks=None): + self.height, self.width = ori_img.shape[:2] + # generate detections + features = self._get_features(bbox_xywh, ori_img) + bbox_tlwh = self._xywh_to_tlwh(bbox_xywh) + detections = [Detection(bbox_tlwh[i], conf, label, features[i], None if masks is None else masks[i]) + for i, (conf, label) in enumerate(zip(confidences, classes)) + if conf > self.min_confidence] + + # run on non-maximum supression + boxes = np.array([d.tlwh for d in detections]) + scores = np.array([d.confidence for d in detections]) + indices = non_max_suppression(boxes, self.nms_max_overlap, scores) + detections = [detections[i] for i in indices] + + # update tracker + self.tracker.predict() + self.tracker.update(detections) + + # output bbox identities + outputs = [] + mask_outputs = [] + for track in self.tracker.tracks: + if not track.is_confirmed() or track.time_since_update > 1: + continue + box = track.to_tlwh() + x1, y1, x2, y2 = self._tlwh_to_xyxy(box) + track_id = track.track_id + track_cls = track.cls + outputs.append(np.array([x1, y1, x2, y2, track_cls, track_id], dtype=np.int32)) + if track.mask is not None: + mask_outputs.append(track.mask) + if len(outputs) > 0: + outputs = np.stack(outputs, axis=0) + return outputs, mask_outputs + + """ + TODO: + Convert bbox from xc_yc_w_h to xtl_ytl_w_h + Thanks JieChen91@github.com for reporting this bug! + """ + + @staticmethod + def _xywh_to_tlwh(bbox_xywh): + if isinstance(bbox_xywh, np.ndarray): + bbox_tlwh = bbox_xywh.copy() + elif isinstance(bbox_xywh, torch.Tensor): + bbox_tlwh = bbox_xywh.clone() + bbox_tlwh[:, 0] = bbox_xywh[:, 0] - bbox_xywh[:, 2] / 2. + bbox_tlwh[:, 1] = bbox_xywh[:, 1] - bbox_xywh[:, 3] / 2. + return bbox_tlwh + + def _xywh_to_xyxy(self, bbox_xywh): + x, y, w, h = bbox_xywh + x1 = max(int(x - w / 2), 0) + x2 = min(int(x + w / 2), self.width - 1) + y1 = max(int(y - h / 2), 0) + y2 = min(int(y + h / 2), self.height - 1) + return x1, y1, x2, y2 + + def _tlwh_to_xyxy(self, bbox_tlwh): + """ + TODO: + Convert bbox from xtl_ytl_w_h to xc_yc_w_h + Thanks JieChen91@github.com for reporting this bug! + """ + x, y, w, h = bbox_tlwh + x1 = max(int(x), 0) + x2 = min(int(x + w), self.width - 1) + y1 = max(int(y), 0) + y2 = min(int(y + h), self.height - 1) + return x1, y1, x2, y2 + + @staticmethod + def _xyxy_to_tlwh(bbox_xyxy): + x1, y1, x2, y2 = bbox_xyxy + + t = x1 + l = y1 + w = int(x2 - x1) + h = int(y2 - y1) + return t, l, w, h + + def _get_features(self, bbox_xywh, ori_img): + im_crops = [] + for box in bbox_xywh: + x1, y1, x2, y2 = self._xywh_to_xyxy(box) + im = ori_img[y1:y2, x1:x2] + im_crops.append(im) + if im_crops: + features = self.extractor(im_crops) + else: + features = np.array([]) + return features diff --git a/deep_sort/sort/__init__.py b/deep_sort/sort/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/deep_sort/sort/detection.py b/deep_sort/sort/detection.py new file mode 100644 index 0000000..fa33467 --- /dev/null +++ b/deep_sort/sort/detection.py @@ -0,0 +1,51 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np + + +class Detection(object): + """ + This class represents a bounding box detection in a single image. + + Parameters + ---------- + tlwh : array_like + Bounding box in format `(x, y, w, h)`. + confidence : float + Detector confidence score. + feature : array_like + A feature vector that describes the object contained in this image. + + Attributes + ---------- + tlwh : ndarray + Bounding box in format `(top left x, top left y, width, height)`. + confidence : ndarray + Detector confidence score. + feature : ndarray | NoneType + A feature vector that describes the object contained in this image. + + """ + + def __init__(self, tlwh, confidence, label, feature, mask=None): + self.tlwh = np.asarray(tlwh, dtype=np.float32) + self.confidence = float(confidence) + self.cls = int(label) + self.feature = np.asarray(feature, dtype=np.float32) + self.mask = mask + + def to_tlbr(self): + """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + def to_xyah(self): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = self.tlwh.copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret diff --git a/deep_sort/sort/iou_matching.py b/deep_sort/sort/iou_matching.py new file mode 100644 index 0000000..c4dd0b8 --- /dev/null +++ b/deep_sort/sort/iou_matching.py @@ -0,0 +1,81 @@ +# vim: expandtab:ts=4:sw=4 +from __future__ import absolute_import +import numpy as np +from . import linear_assignment + + +def iou(bbox, candidates): + """Computer intersection over union. + + Parameters + ---------- + bbox : ndarray + A bounding box in format `(top left x, top left y, width, height)`. + candidates : ndarray + A matrix of candidate bounding boxes (one per row) in the same format + as `bbox`. + + Returns + ------- + ndarray + The intersection over union in [0, 1] between the `bbox` and each + candidate. A higher score means a larger fraction of the `bbox` is + occluded by the candidate. + + """ + bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] + candidates_tl = candidates[:, :2] + candidates_br = candidates[:, :2] + candidates[:, 2:] + + tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], + np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] + br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], + np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] + wh = np.maximum(0., br - tl) + + area_intersection = wh.prod(axis=1) + area_bbox = bbox[2:].prod() + area_candidates = candidates[:, 2:].prod(axis=1) + return area_intersection / (area_bbox + area_candidates - area_intersection) + + +def iou_cost(tracks, detections, track_indices=None, + detection_indices=None): + """An intersection over union distance metric. + + Parameters + ---------- + tracks : List[deep_sort.track.Track] + A list of tracks. + detections : List[deep_sort.detection.Detection] + A list of detections. + track_indices : Optional[List[int]] + A list of indices to tracks that should be matched. Defaults to + all `tracks`. + detection_indices : Optional[List[int]] + A list of indices to detections that should be matched. Defaults + to all `detections`. + + Returns + ------- + ndarray + Returns a cost matrix of shape + len(track_indices), len(detection_indices) where entry (i, j) is + `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. + + """ + if track_indices is None: + track_indices = np.arange(len(tracks)) + if detection_indices is None: + detection_indices = np.arange(len(detections)) + + cost_matrix = np.zeros((len(track_indices), len(detection_indices))) + for row, track_idx in enumerate(track_indices): + if tracks[track_idx].time_since_update > 1: + cost_matrix[row, :] = linear_assignment.INFTY_COST + continue + + bbox = tracks[track_idx].to_tlwh() + candidates = np.asarray([detections[i].tlwh for i in detection_indices]) + cost_matrix[row, :] = 1. - iou(bbox, candidates) + return cost_matrix diff --git a/deep_sort/sort/kalman_filter.py b/deep_sort/sort/kalman_filter.py new file mode 100644 index 0000000..dbd56da --- /dev/null +++ b/deep_sort/sort/kalman_filter.py @@ -0,0 +1,231 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np +import scipy.linalg + + +""" +Table for the 0.95 quantile of the chi-square distribution with N degrees of +freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv +function and used as Mahalanobis gating threshold. +""" +chi2inv95 = { + 1: 3.8415, + 2: 5.9915, + 3: 7.8147, + 4: 9.4877, + 5: 11.070, + 6: 12.592, + 7: 14.067, + 8: 15.507, + 9: 16.919} + + +class KalmanFilter(object): + """ + A simple Kalman filter for tracking bounding boxes in image space. + + The 8-dimensional state space + + x, y, a, h, vx, vy, va, vh + + contains the bounding box center position (x, y), aspect ratio a, height h, + and their respective velocities. + + Object motion follows a constant velocity model. The bounding box location + (x, y, a, h) is taken as direct observation of the state space (linear + observation model). + + """ + + def __init__(self): + ndim, dt = 4, 1. + + # Create Kalman filter model matrices. + self._motion_mat = np.eye(2 * ndim, 2 * ndim) + for i in range(ndim): + self._motion_mat[i, ndim + i] = dt + self._update_mat = np.eye(ndim, 2 * ndim) + + # Motion and observation uncertainty are chosen relative to the current + # state estimate. These weights control the amount of uncertainty in + # the model. This is a bit hacky. + self._std_weight_position = 1. / 20 + self._std_weight_velocity = 1. / 160 + + def initiate(self, measurement): + """Create track from unassociated measurement. + + Parameters + ---------- + measurement : ndarray + Bounding box coordinates (x, y, a, h) with center position (x, y), + aspect ratio a, and height h. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector (8 dimensional) and covariance matrix (8x8 + dimensional) of the new track. Unobserved velocities are initialized + to 0 mean. + + """ + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + mean = np.r_[mean_pos, mean_vel] + + std = [ + 2 * self._std_weight_position * measurement[3], + 2 * self._std_weight_position * measurement[3], + 1e-2, + 2 * self._std_weight_position * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 1e-5, + 10 * self._std_weight_velocity * measurement[3]] + covariance = np.diag(np.square(std)) + return mean, covariance + + def predict(self, mean, covariance): + """Run Kalman filter prediction step. + + Parameters + ---------- + mean : ndarray + The 8 dimensional mean vector of the object state at the previous + time step. + covariance : ndarray + The 8x8 dimensional covariance matrix of the object state at the + previous time step. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + + """ + std_pos = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-2, + self._std_weight_position * mean[3]] + std_vel = [ + self._std_weight_velocity * mean[3], + self._std_weight_velocity * mean[3], + 1e-5, + self._std_weight_velocity * mean[3]] + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + mean = np.dot(self._motion_mat, mean) + covariance = np.linalg.multi_dot(( + self._motion_mat, covariance, self._motion_mat.T)) + motion_cov + + return mean, covariance + + def project(self, mean, covariance): + """Project state distribution to measurement space. + + Parameters + ---------- + mean : ndarray + The state's mean vector (8 dimensional array). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + + Returns + ------- + (ndarray, ndarray) + Returns the projected mean and covariance matrix of the given state + estimate. + + """ + std = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-1, + self._std_weight_position * mean[3]] + innovation_cov = np.diag(np.square(std)) + + mean = np.dot(self._update_mat, mean) + covariance = np.linalg.multi_dot(( + self._update_mat, covariance, self._update_mat.T)) + return mean, covariance + innovation_cov + + def update(self, mean, covariance, measurement): + """Run Kalman filter correction step. + + Parameters + ---------- + mean : ndarray + The predicted state's mean vector (8 dimensional). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + measurement : ndarray + The 4 dimensional measurement vector (x, y, a, h), where (x, y) + is the center position, a the aspect ratio, and h the height of the + bounding box. + + Returns + ------- + (ndarray, ndarray) + Returns the measurement-corrected state distribution. + + """ + projected_mean, projected_cov = self.project(mean, covariance) + + chol_factor, lower = scipy.linalg.cho_factor( + projected_cov, lower=True, check_finite=False) + kalman_gain = scipy.linalg.cho_solve( + (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, + check_finite=False).T + innovation = measurement - projected_mean + + new_mean = mean + np.dot(innovation, kalman_gain.T) + # new_covariance = covariance - np.linalg.multi_dot(( + # kalman_gain, projected_cov, kalman_gain.T)) + new_covariance = covariance - np.linalg.multi_dot(( + kalman_gain, self._update_mat, covariance)) + return new_mean, new_covariance + + def gating_distance(self, mean, covariance, measurements, + only_position=False): + """Compute gating distance between state distribution and measurements. + + A suitable distance threshold can be obtained from `chi2inv95`. If + `only_position` is False, the chi-square distribution has 4 degrees of + freedom, otherwise 2. + + Parameters + ---------- + mean : ndarray + Mean vector over the state distribution (8 dimensional). + covariance : ndarray + Covariance of the state distribution (8x8 dimensional). + measurements : ndarray + An Nx4 dimensional matrix of N measurements, each in + format (x, y, a, h) where (x, y) is the bounding box center + position, a the aspect ratio, and h the height. + only_position : Optional[bool] + If True, distance computation is done with respect to the bounding + box center position only. + + Returns + ------- + ndarray + Returns an array of length N, where the i-th element contains the + squared Mahalanobis distance between (mean, covariance) and + `measurements[i]`. + + """ + mean, covariance = self.project(mean, covariance) + if only_position: + mean, covariance = mean[:2], covariance[:2, :2] + measurements = measurements[:, :2] + + cholesky_factor = np.linalg.cholesky(covariance) + d = measurements - mean + z = scipy.linalg.solve_triangular( + cholesky_factor, d.T, lower=True, check_finite=False, + overwrite_b=True) + squared_maha = np.sum(z * z, axis=0) + return squared_maha diff --git a/deep_sort/sort/linear_assignment.py b/deep_sort/sort/linear_assignment.py new file mode 100644 index 0000000..858b71a --- /dev/null +++ b/deep_sort/sort/linear_assignment.py @@ -0,0 +1,192 @@ +# vim: expandtab:ts=4:sw=4 +from __future__ import absolute_import +import numpy as np +# from sklearn.utils.linear_assignment_ import linear_assignment +from scipy.optimize import linear_sum_assignment as linear_assignment +from . import kalman_filter + + +INFTY_COST = 1e+5 + + +def min_cost_matching( + distance_metric, max_distance, tracks, detections, track_indices=None, + detection_indices=None): + """Solve linear assignment problem. + + Parameters + ---------- + distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray + The distance metric is given a list of tracks and detections as well as + a list of N track indices and M detection indices. The metric should + return the NxM dimensional cost matrix, where element (i, j) is the + association cost between the i-th track in the given track indices and + the j-th detection in the given detection_indices. + max_distance : float + Gating threshold. Associations with cost larger than this value are + disregarded. + tracks : List[track.Track] + A list of predicted tracks at the current time step. + detections : List[detection.Detection] + A list of detections at the current time step. + track_indices : List[int] + List of track indices that maps rows in `cost_matrix` to tracks in + `tracks` (see description above). + detection_indices : List[int] + List of detection indices that maps columns in `cost_matrix` to + detections in `detections` (see description above). + + Returns + ------- + (List[(int, int)], List[int], List[int]) + Returns a tuple with the following three entries: + * A list of matched track and detection indices. + * A list of unmatched track indices. + * A list of unmatched detection indices. + + """ + if track_indices is None: + track_indices = np.arange(len(tracks)) + if detection_indices is None: + detection_indices = np.arange(len(detections)) + + if len(detection_indices) == 0 or len(track_indices) == 0: + return [], track_indices, detection_indices # Nothing to match. + + cost_matrix = distance_metric( + tracks, detections, track_indices, detection_indices) + cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5 + + row_indices, col_indices = linear_assignment(cost_matrix) + + matches, unmatched_tracks, unmatched_detections = [], [], [] + for col, detection_idx in enumerate(detection_indices): + if col not in col_indices: + unmatched_detections.append(detection_idx) + for row, track_idx in enumerate(track_indices): + if row not in row_indices: + unmatched_tracks.append(track_idx) + for row, col in zip(row_indices, col_indices): + track_idx = track_indices[row] + detection_idx = detection_indices[col] + if cost_matrix[row, col] > max_distance: + unmatched_tracks.append(track_idx) + unmatched_detections.append(detection_idx) + else: + matches.append((track_idx, detection_idx)) + return matches, unmatched_tracks, unmatched_detections + + +def matching_cascade( + distance_metric, max_distance, cascade_depth, tracks, detections, + track_indices=None, detection_indices=None): + """Run matching cascade. + + Parameters + ---------- + distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray + The distance metric is given a list of tracks and detections as well as + a list of N track indices and M detection indices. The metric should + return the NxM dimensional cost matrix, where element (i, j) is the + association cost between the i-th track in the given track indices and + the j-th detection in the given detection indices. + max_distance : float + Gating threshold. Associations with cost larger than this value are + disregarded. + cascade_depth: int + The cascade depth, should be se to the maximum track age. + tracks : List[track.Track] + A list of predicted tracks at the current time step. + detections : List[detection.Detection] + A list of detections at the current time step. + track_indices : Optional[List[int]] + List of track indices that maps rows in `cost_matrix` to tracks in + `tracks` (see description above). Defaults to all tracks. + detection_indices : Optional[List[int]] + List of detection indices that maps columns in `cost_matrix` to + detections in `detections` (see description above). Defaults to all + detections. + + Returns + ------- + (List[(int, int)], List[int], List[int]) + Returns a tuple with the following three entries: + * A list of matched track and detection indices. + * A list of unmatched track indices. + * A list of unmatched detection indices. + + """ + if track_indices is None: + track_indices = list(range(len(tracks))) + if detection_indices is None: + detection_indices = list(range(len(detections))) + + unmatched_detections = detection_indices + matches = [] + for level in range(cascade_depth): + if len(unmatched_detections) == 0: # No detections left + break + + track_indices_l = [ + k for k in track_indices + if tracks[k].time_since_update == 1 + level + ] + if len(track_indices_l) == 0: # Nothing to match at this level + continue + + matches_l, _, unmatched_detections = \ + min_cost_matching( + distance_metric, max_distance, tracks, detections, + track_indices_l, unmatched_detections) + matches += matches_l + unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches)) + return matches, unmatched_tracks, unmatched_detections + + +def gate_cost_matrix( + kf, cost_matrix, tracks, detections, track_indices, detection_indices, + gated_cost=INFTY_COST, only_position=False): + """Invalidate infeasible entries in cost matrix based on the state + distributions obtained by Kalman filtering. + + Parameters + ---------- + kf : The Kalman filter. + cost_matrix : ndarray + The NxM dimensional cost matrix, where N is the number of track indices + and M is the number of detection indices, such that entry (i, j) is the + association cost between `tracks[track_indices[i]]` and + `detections[detection_indices[j]]`. + tracks : List[track.Track] + A list of predicted tracks at the current time step. + detections : List[detection.Detection] + A list of detections at the current time step. + track_indices : List[int] + List of track indices that maps rows in `cost_matrix` to tracks in + `tracks` (see description above). + detection_indices : List[int] + List of detection indices that maps columns in `cost_matrix` to + detections in `detections` (see description above). + gated_cost : Optional[float] + Entries in the cost matrix corresponding to infeasible associations are + set this value. Defaults to a very large value. + only_position : Optional[bool] + If True, only the x, y position of the state distribution is considered + during gating. Defaults to False. + + Returns + ------- + ndarray + Returns the modified cost matrix. + + """ + gating_dim = 2 if only_position else 4 + gating_threshold = kalman_filter.chi2inv95[gating_dim] + measurements = np.asarray( + [detections[i].to_xyah() for i in detection_indices]) + for row, track_idx in enumerate(track_indices): + track = tracks[track_idx] + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position) + cost_matrix[row, gating_distance > gating_threshold] = gated_cost + return cost_matrix diff --git a/deep_sort/sort/nn_matching.py b/deep_sort/sort/nn_matching.py new file mode 100644 index 0000000..21e5b4f --- /dev/null +++ b/deep_sort/sort/nn_matching.py @@ -0,0 +1,176 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np + + +def _pdist(a, b): + """Compute pair-wise squared distance between points in `a` and `b`. + + Parameters + ---------- + a : array_like + An NxM matrix of N samples of dimensionality M. + b : array_like + An LxM matrix of L samples of dimensionality M. + + Returns + ------- + ndarray + Returns a matrix of size len(a), len(b) such that eleement (i, j) + contains the squared distance between `a[i]` and `b[j]`. + + """ + a, b = np.asarray(a), np.asarray(b) + if len(a) == 0 or len(b) == 0: + return np.zeros((len(a), len(b))) + a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1) + r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :] + r2 = np.clip(r2, 0., float(np.inf)) + return r2 + + +def _cosine_distance(a, b, data_is_normalized=False): + """Compute pair-wise cosine distance between points in `a` and `b`. + + Parameters + ---------- + a : array_like + An NxM matrix of N samples of dimensionality M. + b : array_like + An LxM matrix of L samples of dimensionality M. + data_is_normalized : Optional[bool] + If True, assumes rows in a and b are unit length vectors. + Otherwise, a and b are explicitly normalized to lenght 1. + + Returns + ------- + ndarray + Returns a matrix of size len(a), len(b) such that eleement (i, j) + contains the squared distance between `a[i]` and `b[j]`. + + """ + if not data_is_normalized: + a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True) + b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True) + return 1. - np.dot(a, b.T) + + +def _nn_euclidean_distance(x, y): + """ Helper function for nearest neighbor distance metric (Euclidean). + + Parameters + ---------- + x : ndarray + A matrix of N row-vectors (sample points). + y : ndarray + A matrix of M row-vectors (query points). + + Returns + ------- + ndarray + A vector of length M that contains for each entry in `y` the + smallest Euclidean distance to a sample in `x`. + + """ + distances = _pdist(x, y) + return np.maximum(0.0, distances.min(axis=0)) + + +def _nn_cosine_distance(x, y): + """ Helper function for nearest neighbor distance metric (cosine). + + Parameters + ---------- + x : ndarray + A matrix of N row-vectors (sample points). + y : ndarray + A matrix of M row-vectors (query points). + + Returns + ------- + ndarray + A vector of length M that contains for each entry in `y` the + smallest cosine distance to a sample in `x`. + + """ + distances = _cosine_distance(x, y) + return distances.min(axis=0) + + +class NearestNeighborDistanceMetric(object): + """ + A nearest neighbor distance metric that, for each target, returns + the closest distance to any sample that has been observed so far. + + Parameters + ---------- + metric : str + Either "euclidean" or "cosine". + matching_threshold: float + The matching threshold. Samples with larger distance are considered an + invalid match. + budget : Optional[int] + If not None, fix samples per class to at most this number. Removes + the oldest samples when the budget is reached. + + Attributes + ---------- + samples : Dict[int -> List[ndarray]] + A dictionary that maps from target identities to the list of samples + that have been observed so far. + + """ + + def __init__(self, metric, matching_threshold, budget=None): + + if metric == "euclidean": + self._metric = _nn_euclidean_distance + elif metric == "cosine": + self._metric = _nn_cosine_distance + else: + raise ValueError( + "Invalid metric; must be either 'euclidean' or 'cosine'") + self.matching_threshold = matching_threshold + self.budget = budget + self.samples = {} + + def partial_fit(self, features, targets, active_targets): + """Update the distance metric with new data. + + Parameters + ---------- + features : ndarray + An NxM matrix of N features of dimensionality M. + targets : ndarray + An integer array of associated target identities. + active_targets : List[int] + A list of targets that are currently present in the scene. + + """ + for feature, target in zip(features, targets): + self.samples.setdefault(target, []).append(feature) + if self.budget is not None: + self.samples[target] = self.samples[target][-self.budget:] + self.samples = {k: self.samples[k] for k in active_targets} + + def distance(self, features, targets): + """Compute distance between features and targets. + + Parameters + ---------- + features : ndarray + An NxM matrix of N features of dimensionality M. + targets : List[int] + A list of targets to match the given `features` against. + + Returns + ------- + ndarray + Returns a cost matrix of shape len(targets), len(features), where + element (i, j) contains the closest squared distance between + `targets[i]` and `features[j]`. + + """ + cost_matrix = np.zeros((len(targets), len(features))) + for i, target in enumerate(targets): + cost_matrix[i, :] = self._metric(self.samples[target], features) + return cost_matrix diff --git a/deep_sort/sort/preprocessing.py b/deep_sort/sort/preprocessing.py new file mode 100644 index 0000000..0062cda --- /dev/null +++ b/deep_sort/sort/preprocessing.py @@ -0,0 +1,73 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np +import cv2 + + +def non_max_suppression(boxes, max_bbox_overlap, scores=None): + """Suppress overlapping detections. + + Original code from [1]_ has been adapted to include confidence score. + + .. [1] http://www.pyimagesearch.com/2015/02/16/ + faster-non-maximum-suppression-python/ + + Examples + -------- + + >>> boxes = [d.roi for d in detections] + >>> scores = [d.confidence for d in detections] + >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores) + >>> detections = [detections[i] for i in indices] + + Parameters + ---------- + boxes : ndarray + Array of ROIs (x, y, width, height). + max_bbox_overlap : float + ROIs that overlap more than this values are suppressed. + scores : Optional[array_like] + Detector confidence score. + + Returns + ------- + List[int] + Returns indices of detections that have survived non-maxima suppression. + + """ + if len(boxes) == 0: + return [] + + boxes = boxes.astype(np.float32) + pick = [] + + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + boxes[:, 0] + y2 = boxes[:, 3] + boxes[:, 1] + + area = (x2 - x1 + 1) * (y2 - y1 + 1) + if scores is not None: + idxs = np.argsort(scores) + else: + idxs = np.argsort(y2) + + while len(idxs) > 0: + last = len(idxs) - 1 + i = idxs[last] + pick.append(i) + + xx1 = np.maximum(x1[i], x1[idxs[:last]]) + yy1 = np.maximum(y1[i], y1[idxs[:last]]) + xx2 = np.minimum(x2[i], x2[idxs[:last]]) + yy2 = np.minimum(y2[i], y2[idxs[:last]]) + + w = np.maximum(0, xx2 - xx1 + 1) + h = np.maximum(0, yy2 - yy1 + 1) + + overlap = (w * h) / (area[idxs[:last]] + area[idxs[last]] - w * h) + + idxs = np.delete( + idxs, np.concatenate( + ([last], np.where(overlap > max_bbox_overlap)[0]))) + + return pick diff --git a/deep_sort/sort/track.py b/deep_sort/sort/track.py new file mode 100644 index 0000000..5dd7839 --- /dev/null +++ b/deep_sort/sort/track.py @@ -0,0 +1,169 @@ +# vim: expandtab:ts=4:sw=4 + + +class TrackState: + """ + Enumeration type for the single target track state. Newly created tracks are + classified as `tentative` until enough evidence has been collected. Then, + the track state is changed to `confirmed`. Tracks that are no longer alive + are classified as `deleted` to mark them for removal from the set of active + tracks. + + """ + + Tentative = 1 + Confirmed = 2 + Deleted = 3 + + +class Track: + """ + A single target track with state space `(x, y, a, h)` and associated + velocities, where `(x, y)` is the center of the bounding box, `a` is the + aspect ratio and `h` is the height. + + Parameters + ---------- + mean : ndarray + Mean vector of the initial state distribution. + covariance : ndarray + Covariance matrix of the initial state distribution. + track_id : int + A unique track identifier. + n_init : int + Number of consecutive detections before the track is confirmed. The + track state is set to `Deleted` if a miss occurs within the first + `n_init` frames. + max_age : int + The maximum number of consecutive misses before the track state is + set to `Deleted`. + feature : Optional[ndarray] + Feature vector of the detection this track originates from. If not None, + this feature is added to the `features` cache. + + Attributes + ---------- + mean : ndarray + Mean vector of the initial state distribution. + covariance : ndarray + Covariance matrix of the initial state distribution. + track_id : int + A unique track identifier. + hits : int + Total number of measurement updates. + age : int + Total number of frames since first occurance. + time_since_update : int + Total number of frames since last measurement update. + state : TrackState + The current track state. + features : List[ndarray] + A cache of features. On each measurement update, the associated feature + vector is added to this list. + + """ + + def __init__(self, mean, covariance, track_id, n_init, max_age, + feature=None, cls=None, mask=None): + self.mean = mean + self.covariance = covariance + self.track_id = track_id + self.hits = 1 + self.age = 1 + self.time_since_update = 0 + + self.state = TrackState.Tentative + self.cls = cls + self.mask = mask + self.features = [] + if feature is not None: + self.features.append(feature) + + self._n_init = n_init + self._max_age = max_age + + def to_tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + + Returns + ------- + ndarray + The bounding box. + + """ + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + def to_tlbr(self): + """Get current position in bounding box format `(min x, miny, max x, + max y)`. + + Returns + ------- + ndarray + The bounding box. + + """ + ret = self.to_tlwh() + ret[2:] = ret[:2] + ret[2:] + return ret + + def predict(self, kf): + """Propagate the state distribution to the current time step using a + Kalman filter prediction step. + + Parameters + ---------- + kf : kalman_filter.KalmanFilter + The Kalman filter. + + """ + self.mean, self.covariance = kf.predict(self.mean, self.covariance) + self.age += 1 + self.time_since_update += 1 + + def update(self, kf, detection): + """Perform Kalman filter measurement update step and update the feature + cache. + + Parameters + ---------- + kf : kalman_filter.KalmanFilter + The Kalman filter. + detection : Detection + The associated detection. + + """ + self.mask = detection.mask + self.mean, self.covariance = kf.update( + self.mean, self.covariance, detection.to_xyah()) + self.features.append(detection.feature) + + self.hits += 1 + self.time_since_update = 0 + if self.state == TrackState.Tentative and self.hits >= self._n_init: + self.state = TrackState.Confirmed + + def mark_missed(self): + """Mark this track as missed (no association at the current time step). + """ + if self.state == TrackState.Tentative: + self.state = TrackState.Deleted + elif self.time_since_update > self._max_age: + self.state = TrackState.Deleted + + def is_tentative(self): + """Returns True if this track is tentative (unconfirmed). + """ + return self.state == TrackState.Tentative + + def is_confirmed(self): + """Returns True if this track is confirmed.""" + return self.state == TrackState.Confirmed + + def is_deleted(self): + """Returns True if this track is dead and should be deleted.""" + return self.state == TrackState.Deleted diff --git a/deep_sort/sort/tracker.py b/deep_sort/sort/tracker.py new file mode 100644 index 0000000..2a9d3c2 --- /dev/null +++ b/deep_sort/sort/tracker.py @@ -0,0 +1,138 @@ +# vim: expandtab:ts=4:sw=4 +from __future__ import absolute_import +import numpy as np +from . import kalman_filter +from . import linear_assignment +from . import iou_matching +from .track import Track + + +class Tracker: + """ + This is the multi-target tracker. + + Parameters + ---------- + metric : nn_matching.NearestNeighborDistanceMetric + A distance metric for measurement-to-track association. + max_age : int + Maximum number of missed misses before a track is deleted. + n_init : int + Number of consecutive detections before the track is confirmed. The + track state is set to `Deleted` if a miss occurs within the first + `n_init` frames. + + Attributes + ---------- + metric : nn_matching.NearestNeighborDistanceMetric + The distance metric used for measurement to track association. + max_age : int + Maximum number of missed misses before a track is deleted. + n_init : int + Number of frames that a track remains in initialization phase. + kf : kalman_filter.KalmanFilter + A Kalman filter to filter target trajectories in image space. + tracks : List[Track] + The list of active tracks at the current time step. + + """ + + def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3): + self.metric = metric + self.max_iou_distance = max_iou_distance + self.max_age = max_age + self.n_init = n_init + + self.kf = kalman_filter.KalmanFilter() + self.tracks = [] + self._next_id = 1 + + def predict(self): + """Propagate track state distributions one time step forward. + + This function should be called once every time step, before `update`. + """ + for track in self.tracks: + track.predict(self.kf) + + def update(self, detections): + """Perform measurement update and track management. + + Parameters + ---------- + detections : List[deep_sort.detection.Detection] + A list of detections at the current time step. + + """ + # Run matching cascade. + matches, unmatched_tracks, unmatched_detections = \ + self._match(detections) + + # Update track set. + for track_idx, detection_idx in matches: + self.tracks[track_idx].update( + self.kf, detections[detection_idx]) + for track_idx in unmatched_tracks: + self.tracks[track_idx].mark_missed() + for detection_idx in unmatched_detections: + self._initiate_track(detections[detection_idx]) + self.tracks = [t for t in self.tracks if not t.is_deleted()] + + # Update distance metric. + active_targets = [t.track_id for t in self.tracks if t.is_confirmed()] + features, targets = [], [] + for track in self.tracks: + if not track.is_confirmed(): + continue + features += track.features + targets += [track.track_id for _ in track.features] + track.features = [] + self.metric.partial_fit( + np.asarray(features), np.asarray(targets), active_targets) + + def _match(self, detections): + + def gated_metric(tracks, dets, track_indices, detection_indices): + features = np.array([dets[i].feature for i in detection_indices]) + targets = np.array([tracks[i].track_id for i in track_indices]) + cost_matrix = self.metric.distance(features, targets) + cost_matrix = linear_assignment.gate_cost_matrix( + self.kf, cost_matrix, tracks, dets, track_indices, + detection_indices) + + return cost_matrix + + # Split track set into confirmed and unconfirmed tracks. + confirmed_tracks = [ + i for i, t in enumerate(self.tracks) if t.is_confirmed()] + unconfirmed_tracks = [ + i for i, t in enumerate(self.tracks) if not t.is_confirmed()] + + # Associate confirmed tracks using appearance features. + matches_a, unmatched_tracks_a, unmatched_detections = \ + linear_assignment.matching_cascade( + gated_metric, self.metric.matching_threshold, self.max_age, + self.tracks, detections, confirmed_tracks) + + # Associate remaining tracks together with unconfirmed tracks using IOU. + iou_track_candidates = unconfirmed_tracks + [ + k for k in unmatched_tracks_a if + self.tracks[k].time_since_update == 1] + unmatched_tracks_a = [ + k for k in unmatched_tracks_a if + self.tracks[k].time_since_update != 1] + matches_b, unmatched_tracks_b, unmatched_detections = \ + linear_assignment.min_cost_matching( + iou_matching.iou_cost, self.max_iou_distance, self.tracks, + detections, iou_track_candidates, unmatched_detections) + + matches = matches_a + matches_b + unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b)) + return matches, unmatched_tracks, unmatched_detections + + def _initiate_track(self, detection): + mean, covariance = self.kf.initiate(detection.to_xyah()) + self.tracks.append(Track( + mean, covariance, self._next_id, self.n_init, self.max_age, + detection.feature, detection.cls, detection.mask)) + self._next_id += 1 diff --git a/deep_sort/utils/__init__.py b/deep_sort/utils/__init__.py new file mode 100644 index 0000000..fa9f7c7 --- /dev/null +++ b/deep_sort/utils/__init__.py @@ -0,0 +1,2 @@ +def datasets(): + return None \ No newline at end of file diff --git a/deep_sort/utils/asserts.py b/deep_sort/utils/asserts.py new file mode 100644 index 0000000..59a73cc --- /dev/null +++ b/deep_sort/utils/asserts.py @@ -0,0 +1,13 @@ +from os import environ + + +def assert_in(file, files_to_check): + if file not in files_to_check: + raise AssertionError("{} does not exist in the list".format(str(file))) + return True + + +def assert_in_env(check_list: list): + for item in check_list: + assert_in(item, environ.keys()) + return True diff --git a/deep_sort/utils/draw.py b/deep_sort/utils/draw.py new file mode 100644 index 0000000..9ab2061 --- /dev/null +++ b/deep_sort/utils/draw.py @@ -0,0 +1,51 @@ +import numpy as np +import cv2 + +palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1) + + +def compute_color_for_labels(label): + """ + Simple function that adds fixed color depending on the class + """ + color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette] + return tuple(color) + + +def draw_masks(image, mask, color, thresh: float = 0.7, alpha: float = 0.5): + np_image = np.asarray(image) + mask = mask > thresh + + color = np.asarray(color) + img_to_draw = np.copy(np_image) + # TODO: There might be a way to vectorize this + img_to_draw[mask] = color + + out = np_image * (1 - alpha) + img_to_draw * alpha + return out.astype(np.uint8) + + +def draw_boxes(img, bbox, names=None, identities=None, masks=None, offset=(0, 0)): + for i, box in enumerate(bbox): + x1, y1, x2, y2 = [int(i) for i in box] + x1 += offset[0] + x2 += offset[0] + y1 += offset[1] + y2 += offset[1] + # box text and bar + id = int(identities[i]) if identities is not None else 0 + color = compute_color_for_labels(id) + label = '{:}{:d}'.format(names[i], id) + t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] + if masks is not None: + mask = masks[i] + img = draw_masks(img, mask, color) + cv2.rectangle(img, (x1, y1), (x2, y2), color, 3) + cv2.rectangle(img, (x1, y1), (x1 + t_size[0] + 3, y1 + t_size[1] + 4), color, -1) + cv2.putText(img, label, (x1, y1 + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2) + return img + + +if __name__ == '__main__': + for i in range(82): + print(compute_color_for_labels(i)) diff --git a/deep_sort/utils/evaluation.py b/deep_sort/utils/evaluation.py new file mode 100644 index 0000000..1001794 --- /dev/null +++ b/deep_sort/utils/evaluation.py @@ -0,0 +1,103 @@ +import os +import numpy as np +import copy +import motmetrics as mm +mm.lap.default_solver = 'lap' +from utils.io import read_results, unzip_objs + + +class Evaluator(object): + + def __init__(self, data_root, seq_name, data_type): + self.data_root = data_root + self.seq_name = seq_name + self.data_type = data_type + + self.load_annotations() + self.reset_accumulator() + + def load_annotations(self): + assert self.data_type == 'mot' + + gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt') + self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True) + self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True) + + def reset_accumulator(self): + self.acc = mm.MOTAccumulator(auto_id=True) + + def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False): + # results + trk_tlwhs = np.copy(trk_tlwhs) + trk_ids = np.copy(trk_ids) + + # gts + gt_objs = self.gt_frame_dict.get(frame_id, []) + gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2] + + # ignore boxes + ignore_objs = self.gt_ignore_frame_dict.get(frame_id, []) + ignore_tlwhs = unzip_objs(ignore_objs)[0] + + + # remove ignored results + keep = np.ones(len(trk_tlwhs), dtype=bool) + iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5) + if len(iou_distance) > 0: + match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) + match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) + match_ious = iou_distance[match_is, match_js] + + match_js = np.asarray(match_js, dtype=int) + match_js = match_js[np.logical_not(np.isnan(match_ious))] + keep[match_js] = False + trk_tlwhs = trk_tlwhs[keep] + trk_ids = trk_ids[keep] + + # get distance matrix + iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5) + + # acc + self.acc.update(gt_ids, trk_ids, iou_distance) + + if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'): + events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics + else: + events = None + return events + + def eval_file(self, filename): + self.reset_accumulator() + + result_frame_dict = read_results(filename, self.data_type, is_gt=False) + frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys()))) + for frame_id in frames: + trk_objs = result_frame_dict.get(frame_id, []) + trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2] + self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False) + + return self.acc + + @staticmethod + def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')): + names = copy.deepcopy(names) + if metrics is None: + metrics = mm.metrics.motchallenge_metrics + metrics = copy.deepcopy(metrics) + + mh = mm.metrics.create() + summary = mh.compute_many( + accs, + metrics=metrics, + names=names, + generate_overall=True + ) + + return summary + + @staticmethod + def save_summary(summary, filename): + import pandas as pd + writer = pd.ExcelWriter(filename) + summary.to_excel(writer) + writer.save() diff --git a/deep_sort/utils/io.py b/deep_sort/utils/io.py new file mode 100644 index 0000000..35bec62 --- /dev/null +++ b/deep_sort/utils/io.py @@ -0,0 +1,133 @@ +import os +from typing import Dict +import numpy as np + +# from utils.log import get_logger + + +def write_results(filename, results, data_type): + if data_type == 'mot': + save_format = '{frame},{id},{cls},{x1},{y1},{w},{h},-1,-1,-1,-1\n' + elif data_type == 'kitti': + save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n' + else: + raise ValueError(data_type) + + with open(filename, 'w') as f: + for frame_id, tlwhs, track_ids, classes in results: + if data_type == 'kitti': + frame_id -= 1 + for tlwh, track_id, cls_id in zip(tlwhs, track_ids, classes): + if track_id < 0: + continue + x1, y1, w, h = tlwh + x2, y2 = x1 + w, y1 + h + line = save_format.format(frame=frame_id, id=track_id, cls=cls_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h) + f.write(line) + + +# def write_results(filename, results_dict: Dict, data_type: str): +# if not filename: +# return +# path = os.path.dirname(filename) +# if not os.path.exists(path): +# os.makedirs(path) + +# if data_type in ('mot', 'mcmot', 'lab'): +# save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n' +# elif data_type == 'kitti': +# save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n' +# else: +# raise ValueError(data_type) + +# with open(filename, 'w') as f: +# for frame_id, frame_data in results_dict.items(): +# if data_type == 'kitti': +# frame_id -= 1 +# for tlwh, track_id in frame_data: +# if track_id < 0: +# continue +# x1, y1, w, h = tlwh +# x2, y2 = x1 + w, y1 + h +# line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0) +# f.write(line) +# logger.info('Save results to {}'.format(filename)) + + +def read_results(filename, data_type: str, is_gt=False, is_ignore=False): + if data_type in ('mot', 'lab'): + read_fun = read_mot_results + else: + raise ValueError('Unknown data type: {}'.format(data_type)) + + return read_fun(filename, is_gt, is_ignore) + + +""" +labels={'ped', ... % 1 +'person_on_vhcl', ... % 2 +'car', ... % 3 +'bicycle', ... % 4 +'mbike', ... % 5 +'non_mot_vhcl', ... % 6 +'static_person', ... % 7 +'distractor', ... % 8 +'occluder', ... % 9 +'occluder_on_grnd', ... %10 +'occluder_full', ... % 11 +'reflection', ... % 12 +'crowd' ... % 13 +}; +""" + + +def read_mot_results(filename, is_gt, is_ignore): + valid_labels = {1} + ignore_labels = {2, 7, 8, 12} + results_dict = dict() + if os.path.isfile(filename): + with open(filename, 'r') as f: + for line in f.readlines(): + linelist = line.split(',') + if len(linelist) < 7: + continue + fid = int(linelist[0]) + if fid < 1: + continue + results_dict.setdefault(fid, list()) + + if is_gt: + if 'MOT16-' in filename or 'MOT17-' in filename: + label = int(float(linelist[7])) + mark = int(float(linelist[6])) + if mark == 0 or label not in valid_labels: + continue + score = 1 + elif is_ignore: + if 'MOT16-' in filename or 'MOT17-' in filename: + label = int(float(linelist[7])) + vis_ratio = float(linelist[8]) + if label not in ignore_labels and vis_ratio >= 0: + continue + else: + continue + score = 1 + else: + score = float(linelist[6]) + + tlwh = tuple(map(float, linelist[2:6])) + target_id = int(linelist[1]) + + results_dict[fid].append((tlwh, target_id, score)) + + return results_dict + + +def unzip_objs(objs): + if len(objs) > 0: + tlwhs, ids, scores = zip(*objs) + else: + tlwhs, ids, scores = [], [], [] + tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4) + + return tlwhs, ids, scores \ No newline at end of file diff --git a/deep_sort/utils/json_logger.py b/deep_sort/utils/json_logger.py new file mode 100644 index 0000000..0afd0b4 --- /dev/null +++ b/deep_sort/utils/json_logger.py @@ -0,0 +1,383 @@ +""" +References: + https://medium.com/analytics-vidhya/creating-a-custom-logging-mechanism-for-real-time-object-detection-using-tdd-4ca2cfcd0a2f +""" +import json +from os import makedirs +from os.path import exists, join +from datetime import datetime + + +class JsonMeta(object): + HOURS = 3 + MINUTES = 59 + SECONDS = 59 + PATH_TO_SAVE = 'LOGS' + DEFAULT_FILE_NAME = 'remaining' + + +class BaseJsonLogger(object): + """ + This is the base class that returns __dict__ of its own + it also returns the dicts of objects in the attributes that are list instances + + """ + + def dic(self): + # returns dicts of objects + out = {} + for k, v in self.__dict__.items(): + if hasattr(v, 'dic'): + out[k] = v.dic() + elif isinstance(v, list): + out[k] = self.list(v) + else: + out[k] = v + return out + + @staticmethod + def list(values): + # applies the dic method on items in the list + return [v.dic() if hasattr(v, 'dic') else v for v in values] + + +class Label(BaseJsonLogger): + """ + For each bounding box there are various categories with confidences. Label class keeps track of that information. + """ + + def __init__(self, category: str, confidence: float): + self.category = category + self.confidence = confidence + + +class Bbox(BaseJsonLogger): + """ + This module stores the information for each frame and use them in JsonParser + Attributes: + labels (list): List of label module. + top (int): + left (int): + width (int): + height (int): + + Args: + bbox_id (float): + top (int): + left (int): + width (int): + height (int): + + References: + Check Label module for better understanding. + + + """ + + def __init__(self, bbox_id, top, left, width, height): + self.labels = [] + self.bbox_id = bbox_id + self.top = top + self.left = left + self.width = width + self.height = height + + def add_label(self, category, confidence): + # adds category and confidence only if top_k is not exceeded. + self.labels.append(Label(category, confidence)) + + def labels_full(self, value): + return len(self.labels) == value + + +class Frame(BaseJsonLogger): + """ + This module stores the information for each frame and use them in JsonParser + Attributes: + timestamp (float): The elapsed time of captured frame + frame_id (int): The frame number of the captured video + bboxes (list of Bbox objects): Stores the list of bbox objects. + + References: + Check Bbox class for better information + + Args: + timestamp (float): + frame_id (int): + + """ + + def __init__(self, frame_id: int, timestamp: float = None): + self.frame_id = frame_id + self.timestamp = timestamp + self.bboxes = [] + + def add_bbox(self, bbox_id: int, top: int, left: int, width: int, height: int): + bboxes_ids = [bbox.bbox_id for bbox in self.bboxes] + if bbox_id not in bboxes_ids: + self.bboxes.append(Bbox(bbox_id, top, left, width, height)) + else: + raise ValueError("Frame with id: {} already has a Bbox with id: {}".format(self.frame_id, bbox_id)) + + def add_label_to_bbox(self, bbox_id: int, category: str, confidence: float): + bboxes = {bbox.id: bbox for bbox in self.bboxes} + if bbox_id in bboxes.keys(): + res = bboxes.get(bbox_id) + res.add_label(category, confidence) + else: + raise ValueError('the bbox with id: {} does not exists!'.format(bbox_id)) + + +class BboxToJsonLogger(BaseJsonLogger): + """ + ُ This module is designed to automate the task of logging jsons. An example json is used + to show the contents of json file shortly + Example: + { + "video_details": { + "frame_width": 1920, + "frame_height": 1080, + "frame_rate": 20, + "video_name": "/home/gpu/codes/MSD/pedestrian_2/project/public/camera1.avi" + }, + "frames": [ + { + "frame_id": 329, + "timestamp": 3365.1254 + "bboxes": [ + { + "labels": [ + { + "category": "pedestrian", + "confidence": 0.9 + } + ], + "bbox_id": 0, + "top": 1257, + "left": 138, + "width": 68, + "height": 109 + } + ] + }], + + Attributes: + frames (dict): It's a dictionary that maps each frame_id to json attributes. + video_details (dict): information about video file. + top_k_labels (int): shows the allowed number of labels + start_time (datetime object): we use it to automate the json output by time. + + Args: + top_k_labels (int): shows the allowed number of labels + + """ + + def __init__(self, top_k_labels: int = 1): + self.frames = {} + self.video_details = self.video_details = dict(frame_width=None, frame_height=None, frame_rate=None, + video_name=None) + self.top_k_labels = top_k_labels + self.start_time = datetime.now() + + def set_top_k(self, value): + self.top_k_labels = value + + def frame_exists(self, frame_id: int) -> bool: + """ + Args: + frame_id (int): + + Returns: + bool: true if frame_id is recognized + """ + return frame_id in self.frames.keys() + + def add_frame(self, frame_id: int, timestamp: float = None) -> None: + """ + Args: + frame_id (int): + timestamp (float): opencv captured frame time property + + Raises: + ValueError: if frame_id would not exist in class frames attribute + + Returns: + None + + """ + if not self.frame_exists(frame_id): + self.frames[frame_id] = Frame(frame_id, timestamp) + else: + raise ValueError("Frame id: {} already exists".format(frame_id)) + + def bbox_exists(self, frame_id: int, bbox_id: int) -> bool: + """ + Args: + frame_id: + bbox_id: + + Returns: + bool: if bbox exists in frame bboxes list + """ + bboxes = [] + if self.frame_exists(frame_id=frame_id): + bboxes = [bbox.bbox_id for bbox in self.frames[frame_id].bboxes] + return bbox_id in bboxes + + def find_bbox(self, frame_id: int, bbox_id: int): + """ + + Args: + frame_id: + bbox_id: + + Returns: + bbox_id (int): + + Raises: + ValueError: if bbox_id does not exist in the bbox list of specific frame. + """ + if not self.bbox_exists(frame_id, bbox_id): + raise ValueError("frame with id: {} does not contain bbox with id: {}".format(frame_id, bbox_id)) + bboxes = {bbox.bbox_id: bbox for bbox in self.frames[frame_id].bboxes} + return bboxes.get(bbox_id) + + def add_bbox_to_frame(self, frame_id: int, bbox_id: int, top: int, left: int, width: int, height: int) -> None: + """ + + Args: + frame_id (int): + bbox_id (int): + top (int): + left (int): + width (int): + height (int): + + Returns: + None + + Raises: + ValueError: if bbox_id already exist in frame information with frame_id + ValueError: if frame_id does not exist in frames attribute + """ + if self.frame_exists(frame_id): + frame = self.frames[frame_id] + if not self.bbox_exists(frame_id, bbox_id): + frame.add_bbox(bbox_id, top, left, width, height) + else: + raise ValueError( + "frame with frame_id: {} already contains the bbox with id: {} ".format(frame_id, bbox_id)) + else: + raise ValueError("frame with frame_id: {} does not exist".format(frame_id)) + + def add_label_to_bbox(self, frame_id: int, bbox_id: int, category: str, confidence: float): + """ + Args: + frame_id: + bbox_id: + category: + confidence: the confidence value returned from yolo detection + + Returns: + None + + Raises: + ValueError: if labels quota (top_k_labels) exceeds. + """ + bbox = self.find_bbox(frame_id, bbox_id) + if not bbox.labels_full(self.top_k_labels): + bbox.add_label(category, confidence) + else: + raise ValueError("labels in frame_id: {}, bbox_id: {} is fulled".format(frame_id, bbox_id)) + + def add_video_details(self, frame_width: int = None, frame_height: int = None, frame_rate: int = None, + video_name: str = None): + self.video_details['frame_width'] = frame_width + self.video_details['frame_height'] = frame_height + self.video_details['frame_rate'] = frame_rate + self.video_details['video_name'] = video_name + + def output(self): + output = {'video_details': self.video_details} + result = list(self.frames.values()) + output['frames'] = [item.dic() for item in result] + return output + + def json_output(self, output_name): + """ + Args: + output_name: + + Returns: + None + + Notes: + It creates the json output with `output_name` name. + """ + if not output_name.endswith('.json'): + output_name += '.json' + with open(output_name, 'w') as file: + json.dump(self.output(), file) + file.close() + + def set_start(self): + self.start_time = datetime.now() + + def schedule_output_by_time(self, output_dir=JsonMeta.PATH_TO_SAVE, hours: int = 0, minutes: int = 0, + seconds: int = 60) -> None: + """ + Notes: + Creates folder and then periodically stores the jsons on that address. + + Args: + output_dir (str): the directory where output files will be stored + hours (int): + minutes (int): + seconds (int): + + Returns: + None + + """ + end = datetime.now() + interval = 0 + interval += abs(min([hours, JsonMeta.HOURS]) * 3600) + interval += abs(min([minutes, JsonMeta.MINUTES]) * 60) + interval += abs(min([seconds, JsonMeta.SECONDS])) + diff = (end - self.start_time).seconds + + if diff > interval: + output_name = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '.json' + if not exists(output_dir): + makedirs(output_dir) + output = join(output_dir, output_name) + self.json_output(output_name=output) + self.frames = {} + self.start_time = datetime.now() + + def schedule_output_by_frames(self, frames_quota, frame_counter, output_dir=JsonMeta.PATH_TO_SAVE): + """ + saves as the number of frames quota increases higher. + :param frames_quota: + :param frame_counter: + :param output_dir: + :return: + """ + pass + + def flush(self, output_dir): + """ + Notes: + We use this function to output jsons whenever possible. + like the time that we exit the while loop of opencv. + + Args: + output_dir: + + Returns: + None + + """ + filename = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '-remaining.json' + output = join(output_dir, filename) + self.json_output(output_name=output) diff --git a/deep_sort/utils/log.py b/deep_sort/utils/log.py new file mode 100644 index 0000000..0d48757 --- /dev/null +++ b/deep_sort/utils/log.py @@ -0,0 +1,17 @@ +import logging + + +def get_logger(name='root'): + formatter = logging.Formatter( + # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s') + fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') + + handler = logging.StreamHandler() + handler.setFormatter(formatter) + + logger = logging.getLogger(name) + logger.setLevel(logging.INFO) + logger.addHandler(handler) + return logger + + diff --git a/deep_sort/utils/parser.py b/deep_sort/utils/parser.py new file mode 100644 index 0000000..c9b13d8 --- /dev/null +++ b/deep_sort/utils/parser.py @@ -0,0 +1,38 @@ +import os +import yaml +from easydict import EasyDict as edict + +class YamlParser(edict): + """ + This is yaml parser based on EasyDict. + """ + def __init__(self, cfg_dict=None, config_file=None): + if cfg_dict is None: + cfg_dict = {} + + if config_file is not None: + assert (os.path.isfile(config_file)) + with open(config_file, 'r') as fo: + cfg_dict.update(yaml.safe_load(fo.read())) + + super(YamlParser, self).__init__(cfg_dict) + + + def merge_from_file(self, config_file): + with open(config_file, 'r') as fo: + self.update(yaml.safe_load(fo.read())) + + + def merge_from_dict(self, config_dict): + self.update(config_dict) + + +def get_config(config_file=None): + return YamlParser(config_file=config_file) + + +if __name__ == "__main__": + cfg = YamlParser(config_file="../configs/yolov3.yaml") + cfg.merge_from_file("../configs/deep_sort.yaml") + + import ipdb; ipdb.set_trace() diff --git a/deep_sort/utils/tools.py b/deep_sort/utils/tools.py new file mode 100644 index 0000000..965fb69 --- /dev/null +++ b/deep_sort/utils/tools.py @@ -0,0 +1,39 @@ +from functools import wraps +from time import time + + +def is_video(ext: str): + """ + Returns true if ext exists in + allowed_exts for video files. + + Args: + ext: + + Returns: + + """ + + allowed_exts = ('.mp4', '.webm', '.ogg', '.avi', '.wmv', '.mkv', '.3gp') + return any((ext.endswith(x) for x in allowed_exts)) + + +def tik_tok(func): + """ + keep track of time for each process. + Args: + func: + + Returns: + + """ + @wraps(func) + def _time_it(*args, **kwargs): + start = time() + try: + return func(*args, **kwargs) + finally: + end_ = time() + print("time: {:.03f}s, fps: {:.03f}".format(end_ - start, 1 / (end_ - start))) + + return _time_it diff --git a/requirements_cpu.txt b/requirements_cpu.txt index 5ccb352..4ba3cc3 100644 --- a/requirements_cpu.txt +++ b/requirements_cpu.txt @@ -9,6 +9,7 @@ PyJWT~=2.3.0 python-multipart==0.0.5 redis~=4.1.4 SQLAlchemy~=2.0.34 +cryptography==44.0.2 uvicorn~=0.17.5 uvicorn[standard] loguru~=0.6.0 diff --git a/requirements_deepsort.txt b/requirements_deepsort.txt new file mode 100644 index 0000000..074e025 --- /dev/null +++ b/requirements_deepsort.txt @@ -0,0 +1,38 @@ +# deep_sort ----------------------这个暂时还不需要 +atomicwrites==1.3.0 +attrs==19.3.0 +colorama==0.4.3 +easydict~=1.13 +entrypoints==0.3 +et-xmlfile==1.0.1 +flake8==3.7.9 +flake8-import-order==0.18.1 +importlib-metadata==1.6.0 +jdcal==1.4.1 +joblib==1.2.0 +lap==0.4.0 +mccabe==0.6.1 +more-itertools==8.2.0 +motmetrics==1.2.0 +openpyxl==3.0.3 +packaging==20.3 +pluggy==0.13.1 +py==1.10.0 +py-cpuinfo==5.0.0 +pycodestyle==2.5.0 +pyflakes==2.1.1 +pyparsing==2.4.7 +pytest==5.4.1 +pytest-benchmark==3.2.3 +python-dateutil==2.8.1 +pytz==2019.3 +scikit-learn==1.6.1 +six==1.14.0 +sklearn==0.0 +Vizer==0.1.5 +wcwidth==0.1.9 +xmltodict==0.12.0 +zipp==3.1.0 +mmdet~=3.0.0 +pycocotools~=2.0.6 +python-dotenv~=0.21.0 \ No newline at end of file diff --git a/requirements_gpu.txt b/requirements_gpu.txt index 8870527..e711add 100644 --- a/requirements_gpu.txt +++ b/requirements_gpu.txt @@ -7,8 +7,9 @@ pydantic~=1.9.0 # pydantic email-validator==1.1.3 PyJWT~=2.3.0 python-multipart==0.0.5 -redis~=4.1.4 +redis==4.1.4 SQLAlchemy~=2.0.34 +cryptography==44.0.2 uvicorn~=0.17.5 uvicorn[standard] loguru~=0.6.0 @@ -18,10 +19,10 @@ pymysql==1.0.2 pynvml==12.0.0 requests-toolbelt==1.0.0 python-socketio == 5.12.1 +setuptools==75.8.0 # YOLOV5 ---------------------------------------------------------------------- -# BASE ------------------------------------------------------------------------ gitpython>=3.1.30 matplotlib==3.7.0 numpy==1.24.0 # cuda版本的numpy需要降一下版本 @@ -35,7 +36,5 @@ thop>=0.1.1 # FLOPs computation # torchvision==0.21.0+cu124 # 本地安装 tqdm>=4.66.3 ultralytics==8.3.75 # https://ultralytics.com - -# Plotting -------------------------------------------------------------------- pandas==2.2.3 -seaborn==0.11.2 # 对应的这个依赖也需要降 \ No newline at end of file +seaborn==0.11.2 # 对应的这个依赖也需要降 diff --git a/yolov5/models/common.py b/yolov5/models/common.py index ea893db..a168fd6 100644 --- a/yolov5/models/common.py +++ b/yolov5/models/common.py @@ -891,7 +891,7 @@ class AutoShape(nn.Module): x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32 - with amp.autocast(autocast): + with torch.amp.autocast(device_type='cuda', enabled=autocast): # Inference with dt[1]: y = self.model(x, augment=augment) # forward diff --git a/yolov5/train.py b/yolov5/train.py index d65fee7..b10992e 100644 --- a/yolov5/train.py +++ b/yolov5/train.py @@ -352,7 +352,7 @@ def train(hyp, opt, device, callbacks): maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move - scaler = torch.cuda.amp.GradScaler(enabled=amp) + scaler = torch.amp.GradScaler(device='cuda', enabled=amp) stopper, stop = EarlyStopping(patience=opt.patience), False compute_loss = ComputeLoss(model) # init loss class callbacks.run("on_train_start")