如果您想要一个快速简单的解决方案,您可以使用更轻量级的 yolo 文件之一。您可以从此网站获取权重和配置文件(它们成对出现并且必须一起使用):https://pjreddie.com/darknet/yolo/ https://pjreddie.com/darknet/yolo/(别担心,它看起来是草图,但没关系)
使用较小的网络可以获得更高的 fps,但精度也会较差。如果这是您愿意接受的权衡,那么这是最容易做的事情。
这是一些用于检测牙刷的代码。第一个文件只是一个类文件,有助于使 Yolo 网络的使用更加无缝。第二个是“主”文件,用于打开 VideoCapture 并向网络提供图像。
yolo.py
import cv2
import numpy as np
class Yolo:
def __init__(self, cfg, weights, names, conf_thresh, nms_thresh, use_cuda = False):
# save thresholds
self.ct = conf_thresh;
self.nmst = nms_thresh;
# create net
self.net = cv2.dnn.readNet(weights, cfg);
print("Finished: " + str(weights));
self.classes = [];
file = open(names, 'r');
for line in file:
self.classes.append(line.strip());
# use gpu + CUDA to speed up detections
if use_cuda:
self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA);
self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA);
# get output names
layer_names = self.net.getLayerNames();
self.output_layers = [layer_names[i[0]-1] for i in self.net.getUnconnectedOutLayers()];
# runs detection on the image and draws on it
def detect(self, img, target_id):
# get detection stuff
b, c, ids, idxs = self.get_detection_data(img, target_id);
# draw result
img = self.draw(img, b, c, ids, idxs);
return img, len(idxs);
# returns boxes, confidences, class_ids, and indexes (indices?)
def get_detection_data(self, img, target_id):
# get output
layer_outputs = self.get_inf(img);
# get dims
height, width = img.shape[:2];
# filter thresholds and target
b, c, ids, idxs = self.thresh(layer_outputs, width, height, target_id);
return b, c, ids, idxs;
# runs the network on an image
def get_inf(self, img):
# construct a blob
blob = cv2.dnn.blobFromImage(img, 1 / 255.0, (416,416), swapRB=True, crop=False);
# get response
self.net.setInput(blob);
layer_outputs = self.net.forward(self.output_layers);
return layer_outputs;
# filters the layer output by conf, nms and id
def thresh(self, layer_outputs, width, height, target_id):
# some lists
boxes = [];
confidences = [];
class_ids = [];
# each layer outputs
for output in layer_outputs:
for detection in output:
# get id and confidence
scores = detection[5:];
class_id = np.argmax(scores);
confidence = scores[class_id];
# filter out low confidence
if confidence > self.ct and class_id == target_id:
# scale bounding box back to the image size
box = detection[0:4] * np.array([width, height, width, height]);
(cx, cy, w, h) = box.astype('int');
# grab the top-left corner of the box
tx = int(cx - (w / 2));
ty = int(cy - (h / 2));
# update lists
boxes.append([tx,ty,int(w),int(h)]);
confidences.append(float(confidence));
class_ids.append(class_id);
# apply NMS
idxs = cv2.dnn.NMSBoxes(boxes, confidences, self.ct, self.nmst);
return boxes, confidences, class_ids, idxs;
# draw detections on image
def draw(self, img, boxes, confidences, class_ids, idxs):
# check for zero
if len(idxs) > 0:
# loop over indices
for i in idxs.flatten():
# extract the bounding box coords
(x,y) = (boxes[i][0], boxes[i][1]);
(w,h) = (boxes[i][2], boxes[i][3]);
# draw a box
cv2.rectangle(img, (x,y), (x+w,y+h), (0,0,255), 2);
# draw text
text = "{}: {:.4}".format(self.classes[class_ids[i]], confidences[i]);
cv2.putText(img, text, (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 2);
return img;
main.py
import cv2
import numpy as np
# this is the "yolo.py" file, I assume it's in the same folder as this program
from yolo import Yolo
# these are the filepaths of the yolo files
weights = "yolov3-tiny.weights";
config = "yolov3-tiny.cfg";
labels = "yolov3.txt";
# init yolo network
target_class_id = 79; # toothbrush
conf_thresh = 0.4; # less == more boxes (but more false positives)
nms_thresh = 0.4; # less == more boxes (but more overlap)
net = Yolo(config, weights, labels, conf_thresh, nms_thresh);
# open video capture
cap = cv2.VideoCapture(0);
# loop
done = False;
while not done:
# get frame
ret, frame = cap.read();
if not ret:
done = cv2.waitKey(1) == ord('q');
continue;
# do detection
frame, _ = net.detect(frame, target_class_id);
# show
cv2.imshow("Marked", frame);
done = cv2.waitKey(1) == ord('q');
如果您不想使用重量较轻的文件,有几个选项可供您选择。
如果您有 Nvidia GPU,则可以使用 CUDA彻底地提高你的帧率。即使是普通的 nvidia GPU 也比仅在 CPU 上运行快几倍。
绕过持续运行检测的成本的常见策略是仅使用它来最初捕获目标。您可以使用神经网络的检测来初始化对象跟踪器,类似于人在对象周围绘制边界框。对象跟踪器速度更快,并且无需不断地对每一帧进行全面检测。
如果您在单独的线程中运行 Yolo 和对象跟踪,那么您可以像相机一样快地运行。您需要存储帧的历史记录,以便当 Yolo 线程完成一帧时,您可以检查旧帧以查看您是否已经在跟踪对象,这样您就可以在相应的帧上快速启动对象跟踪器-转发它让它赶上。这个程序并不简单,您需要确保正确管理线程之间的数据。不过,对于熟悉多线程来说,这是一个很好的练习,这是编程中的一大进步。