添加注释
This commit is contained in:
BIN
torch2trt/imgs/yolov5l-face.jpg
Normal file
BIN
torch2trt/imgs/yolov5l-face.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 176 KiB |
BIN
torch2trt/imgs/yolov5m-face.jpg
Normal file
BIN
torch2trt/imgs/yolov5m-face.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 177 KiB |
BIN
torch2trt/imgs/yolov5n-0.5.jpg
Normal file
BIN
torch2trt/imgs/yolov5n-0.5.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 177 KiB |
BIN
torch2trt/imgs/yolov5n-face.jpg
Normal file
BIN
torch2trt/imgs/yolov5n-face.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 177 KiB |
BIN
torch2trt/imgs/yolov5s-face.jpg
Normal file
BIN
torch2trt/imgs/yolov5s-face.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 176 KiB |
98
torch2trt/main.py
Normal file
98
torch2trt/main.py
Normal file
@ -0,0 +1,98 @@
|
||||
import os
|
||||
import sys
|
||||
import cv2
|
||||
import copy
|
||||
import torch
|
||||
import argparse
|
||||
root_path=os.path.dirname(os.path.abspath(os.path.dirname(__file__))) # 项目根路径:获取当前路径,再上级路径
|
||||
sys.path.append(root_path) # 将项目根路径写入系统路径
|
||||
from utils.general import check_img_size,non_max_suppression_face,scale_coords,xyxy2xywh
|
||||
from utils.datasets import letterbox
|
||||
from detect_plate import scale_coords_landmarks,show_results
|
||||
from torch2trt.trt_model import TrtModel
|
||||
cur_path=os.path.abspath(os.path.dirname(__file__))
|
||||
def img_process(img_path,long_side=640,stride_max=32):
|
||||
'''
|
||||
图像预处理
|
||||
'''
|
||||
orgimg=cv2.imread(img_path)
|
||||
img0 = copy.deepcopy(orgimg)
|
||||
h0, w0 = orgimg.shape[:2] # orig hw
|
||||
r = long_side/ max(h0, w0) # resize image to img_size
|
||||
if r != 1: # always resize down, only resize up if training with augmentation
|
||||
interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR
|
||||
img0 = cv2.resize(img0, (int(w0 * r), int(h0 * r)), interpolation=interp)
|
||||
|
||||
imgsz = check_img_size(long_side, s=stride_max) # check img_size
|
||||
|
||||
img = letterbox(img0, new_shape=imgsz,auto=False)[0] # auto True最小矩形 False固定尺度
|
||||
# Convert
|
||||
img = img[:, :, ::-1].transpose(2, 0, 1).copy() # BGR to RGB, to 3x416x416
|
||||
img = torch.from_numpy(img)
|
||||
img = img.float() # uint8 to fp16/32
|
||||
img /= 255.0 # 0 - 255 to 0.0 - 1.0
|
||||
if img.ndimension() == 3:
|
||||
img = img.unsqueeze(0)
|
||||
return img,orgimg
|
||||
|
||||
def img_vis(img,orgimg,pred,vis_thres = 0.6):
|
||||
'''
|
||||
预测可视化
|
||||
vis_thres: 可视化阈值
|
||||
'''
|
||||
|
||||
print('img.shape: ', img.shape)
|
||||
print('orgimg.shape: ', orgimg.shape)
|
||||
|
||||
no_vis_nums=0
|
||||
# Process detections
|
||||
for i, det in enumerate(pred): # detections per image
|
||||
gn = torch.tensor(orgimg.shape)[[1, 0, 1, 0]] # normalization gain whwh
|
||||
gn_lks = torch.tensor(orgimg.shape)[[1, 0, 1, 0, 1, 0, 1, 0, 1, 0]] # normalization gain landmarks
|
||||
if len(det):
|
||||
# Rescale boxes from img_size to im0 size
|
||||
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], orgimg.shape).round()
|
||||
|
||||
# Print results
|
||||
for c in det[:, -1].unique():
|
||||
n = (det[:, -1] == c).sum() # detections per class
|
||||
|
||||
det[:, 5:15] = scale_coords_landmarks(img.shape[2:], det[:, 5:15], orgimg.shape).round()
|
||||
|
||||
for j in range(det.size()[0]):
|
||||
|
||||
|
||||
if det[j, 4].cpu().numpy() < vis_thres:
|
||||
no_vis_nums+=1
|
||||
continue
|
||||
|
||||
xywh = (xyxy2xywh(det[j, :4].view(1, 4)) / gn).view(-1).tolist()
|
||||
conf = det[j, 4].cpu().numpy()
|
||||
landmarks = (det[j, 5:15].view(1, 10) / gn_lks).view(-1).tolist()
|
||||
class_num = det[j, 15].cpu().numpy()
|
||||
orgimg = show_results(orgimg, xywh, conf, landmarks, class_num)
|
||||
|
||||
cv2.imwrite(cur_path+'/result.jpg', orgimg)
|
||||
print('result save in '+cur_path+'/result.jpg')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--img_path', type=str, default=cur_path+"/sample.jpg", help='img path')
|
||||
parser.add_argument('--trt_path', type=str, required=True, help='trt_path')
|
||||
parser.add_argument('--output_shape', type=list, default=[1,25200,16], help='input[1,3,640,640] -> output[1,25200,16]')
|
||||
opt = parser.parse_args()
|
||||
|
||||
|
||||
img,orgimg=img_process(opt.img_path)
|
||||
model=TrtModel(opt.trt_path)
|
||||
pred=model(img.numpy()).reshape(opt.output_shape) # forward
|
||||
model.destroy()
|
||||
|
||||
# Apply NMS
|
||||
pred = non_max_suppression_face(torch.from_numpy(pred), conf_thres=0.3, iou_thres=0.5)
|
||||
|
||||
# ============可视化================
|
||||
img_vis(img,orgimg,pred)
|
||||
|
||||
|
68
torch2trt/readme.md
Normal file
68
torch2trt/readme.md
Normal file
@ -0,0 +1,68 @@
|
||||
English | [简体中文](readme_CN.md)
|
||||
|
||||
|
||||
|
||||
# Overall process
|
||||
|
||||
## 1.Pytorch->TensorRT
|
||||
|
||||
```shell
|
||||
python export.py --weights "torch's path" --onnx2trt --fp16_trt
|
||||
```
|
||||
|
||||
|
||||
## 2.TensorRT inference
|
||||
```shell
|
||||
python torch2trt/main.py --trt_path "trt's path"
|
||||
```
|
||||
Image preprocessing -> TensorRT inference -> visualization
|
||||
|
||||
|
||||
|
||||
# Time-consuming comparison
|
||||
|
||||
| Backbone |Pytorch(ms) |TensorRT_FP16(ms) |
|
||||
|:---:|:----:|:----:|
|
||||
|yolov5n-0.5| 7.7 | 2.1 |
|
||||
|yolov5n-face| 7.7 | 2.4 |
|
||||
|yolov5s-face| 5.6 | 2.2 |
|
||||
|yolov5m-face| 9.9 | 3.3 |
|
||||
|yolov5l-face| 15.9 | 4.5 |
|
||||
|
||||
> Pytorch=1.10.0+cu102 TensorRT=8.2.0.6 Hardware=rtx2080ti
|
||||
|
||||
```shell
|
||||
python torch2trt/speed.py --torch_path "torch's path" --trt_path "trt's path"
|
||||
```
|
||||
|
||||
|
||||
|
||||
# Visualization
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<th>yolov5n-0.5</th>
|
||||
<th>yolov5n-face</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><img src="./imgs/yolov5n-0.5.jpg" /></td>
|
||||
<td><img src="./imgs/yolov5n-face.jpg" /></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<th>yolov5s-face</th>
|
||||
<th>yolov5m-face</th>
|
||||
<th>yolov5l-face</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><img src="./imgs/yolov5s-face.jpg" /></td>
|
||||
<td><img src="./imgs/yolov5m-face.jpg" /></td>
|
||||
<td><img src="./imgs/yolov5l-face.jpg" /></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
|
||||
|
||||
|
65
torch2trt/readme_CN.md
Normal file
65
torch2trt/readme_CN.md
Normal file
@ -0,0 +1,65 @@
|
||||
|
||||
|
||||
# 整体流程
|
||||
|
||||
## 1.Pytorch->TensorRT
|
||||
|
||||
```shell
|
||||
python export.py --weights "torch权重路径" --onnx2trt --fp16_trt
|
||||
```
|
||||
|
||||
|
||||
## 2.TensorRT推理
|
||||
```shell
|
||||
python torch2trt/main.py --trt_path "trt权重路径"
|
||||
```
|
||||
|
||||
图像预处理 -> TensorRT推理 -> 可视化结果
|
||||
|
||||
|
||||
|
||||
# 耗时对比
|
||||
|
||||
| | Pytorch(ms) | TensorRT_FP16(ms) |
|
||||
|:---:|:----:|:----:|
|
||||
| yolov5n-0.5 | 7.7 | 2.1 |
|
||||
| yolov5n-face | 7.7 | 2.4 |
|
||||
| yolov5s-face | 5.6 | 2.2 |
|
||||
| yolov5m-face | 9.9 | 3.3 |
|
||||
| yolov5l-face | 15.9 | 4.5 |
|
||||
|
||||
> Pytorch=1.10.0+cu102 TensorRT=8.2.0.6 Hardware=rtx2080ti
|
||||
|
||||
```shell
|
||||
python torch2trt/speed.py --torch_path "torch权重路径" --trt_path "trt权重路径"
|
||||
```
|
||||
|
||||
|
||||
|
||||
# 可视化
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<th>yolov5n-0.5</th>
|
||||
<th>yolov5n-face</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><img src="./imgs/yolov5n-0.5.jpg" /></td>
|
||||
<td><img src="./imgs/yolov5n-face.jpg" /></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<th>yolov5s-face</th>
|
||||
<th>yolov5m-face</th>
|
||||
<th>yolov5l-face</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><img src="./imgs/yolov5s-face.jpg" /></td>
|
||||
<td><img src="./imgs/yolov5m-face.jpg" /></td>
|
||||
<td><img src="./imgs/yolov5l-face.jpg" /></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
|
BIN
torch2trt/sample.jpg
Normal file
BIN
torch2trt/sample.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 85 KiB |
49
torch2trt/speed.py
Normal file
49
torch2trt/speed.py
Normal file
@ -0,0 +1,49 @@
|
||||
from models.experimental import attempt_load
|
||||
from torch2trt.trt_model import TrtModel
|
||||
import argparse
|
||||
import torch
|
||||
import time
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def run(model,img,warmup_iter,iter):
|
||||
|
||||
|
||||
print('start warm up...')
|
||||
for _ in tqdm(range(warmup_iter)):
|
||||
model(img)
|
||||
|
||||
|
||||
print('start calculate...')
|
||||
torch.cuda.synchronize()
|
||||
start = time.time()
|
||||
for __ in tqdm(range(iter)):
|
||||
model(img)
|
||||
torch.cuda.synchronize()
|
||||
end = time.time()
|
||||
return ((end - start) * 1000)/float(iter)
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--torch_path', type=str,required=True, help='torch weights path')
|
||||
parser.add_argument('--trt_path', type=str,required=True, help='tensorrt weights path')
|
||||
|
||||
parser.add_argument('--device', type=int,default=0, help='cuda device')
|
||||
parser.add_argument('--img_shape', type=list,default=[1,3,640,640], help='tensorrt weights path')
|
||||
parser.add_argument('--warmup_iter', type=int, default=100,help='warm up iter')
|
||||
parser.add_argument('--iter', type=int, default=300,help='average elapsed time of iterations')
|
||||
opt = parser.parse_args()
|
||||
|
||||
|
||||
# -----------------------torch-----------------------------------------
|
||||
img = torch.zeros(opt.img_shape)
|
||||
model = attempt_load(opt.torch_path, map_location=torch.device('cpu')) # load FP32 model
|
||||
model.eval()
|
||||
total_time=run(model.to(opt.device),img.to(opt.device),opt.warmup_iter,opt.iter)
|
||||
print('Pytorch is %.2f ms/img'%total_time)
|
||||
|
||||
# -----------------------tensorrt-----------------------------------------
|
||||
model=TrtModel(opt.trt_path)
|
||||
total_time=run(model,img.numpy(),opt.warmup_iter,opt.iter)
|
||||
model.destroy()
|
||||
print('TensorRT is %.2f ms/img'%total_time)
|
118
torch2trt/trt_model.py
Normal file
118
torch2trt/trt_model.py
Normal file
@ -0,0 +1,118 @@
|
||||
import pycuda.autoinit
|
||||
import pycuda.driver as cuda
|
||||
import tensorrt as trt
|
||||
import numpy as np
|
||||
|
||||
EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
|
||||
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
|
||||
def GiB(val):
|
||||
return val * 1 << 30
|
||||
|
||||
def ONNX_to_TRT(onnx_model_path=None,trt_engine_path=None,fp16_mode=False):
|
||||
"""
|
||||
仅适用TensorRT V8版本
|
||||
生成cudaEngine,并保存引擎文件(仅支持固定输入尺度)
|
||||
|
||||
fp16_mode: True则fp16预测
|
||||
onnx_model_path: 将加载的onnx权重路径
|
||||
trt_engine_path: trt引擎文件保存路径
|
||||
"""
|
||||
builder = trt.Builder(TRT_LOGGER)
|
||||
network = builder.create_network(EXPLICIT_BATCH)
|
||||
parser = trt.OnnxParser(network, TRT_LOGGER)
|
||||
|
||||
config = builder.create_builder_config()
|
||||
config.max_workspace_size=GiB(1)
|
||||
if fp16_mode:
|
||||
config.set_flag(trt.BuilderFlag.FP16)
|
||||
with open(onnx_model_path, 'rb') as model:
|
||||
assert parser.parse(model.read())
|
||||
serialized_engine=builder.build_serialized_network(network, config)
|
||||
|
||||
|
||||
with open(trt_engine_path, 'wb') as f:
|
||||
f.write(serialized_engine) # 序列化
|
||||
|
||||
print('TensorRT file in ' + trt_engine_path)
|
||||
print('============ONNX->TensorRT SUCCESS============')
|
||||
|
||||
class TrtModel():
|
||||
'''
|
||||
TensorRT infer
|
||||
'''
|
||||
def __init__(self,trt_path):
|
||||
self.ctx=cuda.Device(0).make_context()
|
||||
stream = cuda.Stream()
|
||||
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
|
||||
runtime = trt.Runtime(TRT_LOGGER)
|
||||
|
||||
# Deserialize the engine from file
|
||||
with open(trt_path, "rb") as f:
|
||||
engine = runtime.deserialize_cuda_engine(f.read())
|
||||
context = engine.create_execution_context()
|
||||
|
||||
host_inputs = []
|
||||
cuda_inputs = []
|
||||
host_outputs = []
|
||||
cuda_outputs = []
|
||||
bindings = []
|
||||
|
||||
for binding in engine:
|
||||
print('bingding:', binding, engine.get_binding_shape(binding))
|
||||
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
|
||||
dtype = trt.nptype(engine.get_binding_dtype(binding))
|
||||
# Allocate host and device buffers
|
||||
host_mem = cuda.pagelocked_empty(size, dtype)
|
||||
cuda_mem = cuda.mem_alloc(host_mem.nbytes)
|
||||
# Append the device buffer to device bindings.
|
||||
bindings.append(int(cuda_mem))
|
||||
# Append to the appropriate list.
|
||||
if engine.binding_is_input(binding):
|
||||
self.input_w = engine.get_binding_shape(binding)[-1]
|
||||
self.input_h = engine.get_binding_shape(binding)[-2]
|
||||
host_inputs.append(host_mem)
|
||||
cuda_inputs.append(cuda_mem)
|
||||
else:
|
||||
host_outputs.append(host_mem)
|
||||
cuda_outputs.append(cuda_mem)
|
||||
|
||||
# Store
|
||||
self.stream = stream
|
||||
self.context = context
|
||||
self.engine = engine
|
||||
self.host_inputs = host_inputs
|
||||
self.cuda_inputs = cuda_inputs
|
||||
self.host_outputs = host_outputs
|
||||
self.cuda_outputs = cuda_outputs
|
||||
self.bindings = bindings
|
||||
self.batch_size = engine.max_batch_size
|
||||
|
||||
def __call__(self,img_np_nchw):
|
||||
'''
|
||||
TensorRT推理
|
||||
:param img_np_nchw: 输入图像
|
||||
'''
|
||||
self.ctx.push()
|
||||
|
||||
# Restore
|
||||
stream = self.stream
|
||||
context = self.context
|
||||
engine = self.engine
|
||||
host_inputs = self.host_inputs
|
||||
cuda_inputs = self.cuda_inputs
|
||||
host_outputs = self.host_outputs
|
||||
cuda_outputs = self.cuda_outputs
|
||||
bindings = self.bindings
|
||||
|
||||
np.copyto(host_inputs[0], img_np_nchw.ravel())
|
||||
cuda.memcpy_htod_async(cuda_inputs[0], host_inputs[0], stream)
|
||||
context.execute_async(batch_size=self.batch_size, bindings=bindings, stream_handle=stream.handle)
|
||||
cuda.memcpy_dtoh_async(host_outputs[0], cuda_outputs[0], stream)
|
||||
stream.synchronize()
|
||||
self.ctx.pop()
|
||||
return host_outputs[0]
|
||||
|
||||
|
||||
def destroy(self):
|
||||
# Remove any context from the top of the context stack, deactivating it.
|
||||
self.ctx.pop()
|
Reference in New Issue
Block a user