测试代码
另外一个说明cv2绘制不了中文,但可以用其他包实现。
from pathlib import Path
import cv2
import torch
from models.common import DetectMultiBackend
from utils.dataloaders import LoadImages
from utils.general import Profile, increment_path, non_max_suppression, scale_boxes
from utils.plots import Annotator
from utils.torch_utils import select_device
device = 'cpu'
weights = 'D:\PycharmProjects\swallow\wights\yolov5s.pt'
device = select_device(device)
dnn = False
half = False
data = 'D:\PycharmProjects\swallow\config\coco128.yaml'
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
source = 'D:\PycharmProjects\swallow\data\images'
imgsz = (640, 640)
stride = 32
pt = True
vid_stride = 1
bs = 1 # batch_size
conf_thres = 0.25 # confidence threshold
iou_thres = 0.45 # NMS IOU threshold
classes = [0, 1, 2, 3, 4]
agnostic_nms = False
max_det = 1000
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup
seen, windows, dt = 0, [], (Profile(), Profile(), Profile())
for i, (path, im, im0s, vid_cap, s) in enumerate(dataset):
with dt[0]:
im = torch.from_numpy(im).to(model.device)
im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
with dt[1]:
pred = model(im, augment=True, visualize=False)
# NMS
with dt[2]:
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
print(f'预测数据:{pred}')
for i, det in enumerate(pred): # per image
p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
p = Path(p) # to Path
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
for d in det:
cv2.rectangle(im0, (int(d[0]), int(d[1])), (int(d[2]), int(d[3])), (0, 0, 255), 2)
cv2.imshow('name', im0)
cv2.waitKey(0)
DetectMultiBackend:
支持各种模型推理:
# Usage:
# PyTorch: weights = *.pt
# TorchScript: *.torchscript
# ONNX Runtime: *.onnx
# ONNX OpenCV DNN: *.onnx --dnn
# OpenVINO: *_openvino_model
# CoreML: *.mlmodel
# TensorRT: *.engine
# TensorFlow SavedModel: *_saved_model
# TensorFlow GraphDef: *.pb
# TensorFlow Lite: *.tflite
# TensorFlow Edge TPU: *_edgetpu.tflite
# PaddlePaddle: *_paddle_model
1.首先根据文件后缀判断文件类型。
pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w)
2:初始化模型
elif jit: # TorchScript
LOGGER.info(f'Loading {w} for TorchScript inference...')
extra_files = {'config.txt': ''} # model metadata
model = torch.jit.load(w, _extra_files=extra_files, map_location=device)
model.half() if fp16 else model.float()
if extra_files['config.txt']: # load metadata dict
d = json.loads(extra_files['config.txt'],
object_hook=lambda d: {int(k) if k.isdigit() else k: v
for k, v in d.items()})
stride, names = int(d['stride']), d['names']
3:forward调用模型
elif self.jit: # TorchScript
y = self.model(im)
结合export.py 工具,可以导出不同的模型,运行不同形式的模型。
Detect:
训练时候的损失函数:
https://mp.csdn.net/mp_blog/creation/editor/128985650
pxy = pxy.sigmoid() * 2 - 0.5
pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i]
推理还原代码:
self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy
wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh
y = torch.cat((xy, wh, conf), 4)
解释:
yolo模型是基于特征金字塔。比如原始图片大小(640, 480), 那么他会按步长(8, 16, 32)下降得到新的三张特征图[(80, 60), *(40, 30), ,]。 那么还原回去是不是也应该乘以步长, 其实从损失函数可以看出,模型预测的只是一个偏移。所以还原回去,按照原定方式还原就行了。
模型输出:
z.append(y.view(bs, self.na * nx * ny, self.no))
本来应该是(1, 3, 80, 60, 85) 含义是:有一张图片,把它分成 (80, 60)的网格,每个网格有3个先验框。每个先验框预测 box(x, y, w ,h) 4 + 置信度 (1)+ 类别热编码(80)。
推理的时候我们只关心,预测的物体。所以view了一下。含义为:预测了几张图片,总共预测了多少物体(其中大部分是背景,因为存在3张特征图,预测量是非常恐怖的)
nms:
1: 根据置信度,过滤大量的背景或者不符合的预测值
xc = prediction[..., 4] > conf_thres # candidates
for xi, x in enumerate(prediction): # image index, image inference
x = x[xc[xi]] # confidence
2:box坐标转换
box = xywh2xyxy(x[:, :4])
3: 计算得分,得到预测类别最高得分, 过滤掉不符合的类别
类别的得分,是置信度 * 类别概率的综合分数。但是判别标准还是置信度阈值。
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
conf, j = x[:, 5:mi].max(1, keepdim=True)
x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
4:根据置信度排序
x = x[x[:, 4].argsort(descending=True)] # sort by confidence
5:计算nms
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
参考资料
NMS(非极大值抑制)_zouxiaolv的博客-CSDN博客_非极大值抑制