论文地址:[YOLO] [YOLOv2/YOLO9000] [YOLOv3] [YOLOv4]
YOLO系列权重、配置文件下载地址:https://github.com/AlexeyAB/darknet
代码解读:[Deep Learning based Object Detection using YOLOv3 with OpenCV ( Python / C++ ) ][中文翻译]
代码下载:这边有一个可以运行YOLOv3、YOLOv4、YOLO-Fastest,YOLObile四种网络的[C++代码][参考博客],只需在主函数修改参数即可,强烈推荐.
运行平台:i7 9700+VS2017+opencv4.4.0(opencv版本不宜过低,之前使用opencv3.4.9,只能运行YOLOv3和YOLO-Fastest,运行剩下两个会在加载网络处报错,应该是不支持YOLOv4的激活函数)
链接:https://pan.baidu.com/s/1EJRMypMR0SSEGGjCpyYskg
提取码:560s
可能遇到的报错:找不到opencv440d.dll,直接去opencv安装目录下E:\opencv4.4.0\build\x64\vc15\bin将opencv440d.dll文件复制到C:\Windows\System32下即可
模型可视化网址:https://netron.app/
一、网络输出
YOLOv3输出3个特征图,从而实现检测小目标的功能。
this->net.forward(outs, this->net.getUnconnectedOutLayersNames());
outs是一个三维矩阵,每一个包围框都输出一个包含85个元素的行向量,以红色方框所在那一行为例
前4维:归一化后的目标尺寸,分别对应横坐标、纵坐标、宽度、高度(具体的横纵坐标、目标尺寸还需要用到原文公式进一步计算)
第5维:显示该包围框包含目标的概率(这个数值在我的片面理解应该是类似yolov1里面提到的,目标预测包围框与groundtruth之间的IOU)
后80维:代表80个类别对应的置信度/Score
二、代码注释笔记
main_yolo.cpp
#include "yolo.h"
YOLO::YOLO(Net_config config)
{
cout << "Net use " << config.netname << endl;
this->confThreshold = config.confThreshold;
this->nmsThreshold = config.nmsThreshold;
this->inpWidth = config.inpWidth;
this->inpHeight = config.inpHeight;
strcpy_s(this->netname, config.netname.c_str());
ifstream ifs(config.classesFile.c_str());
string line;
while (getline(ifs, line)) this->classes.push_back(line);
this->net = readNetFromDarknet(config.modelConfiguration, config.modelWeights);
this->net.setPreferableBackend(DNN_BACKEND_OPENCV);
this->net.setPreferableTarget(DNN_TARGET_CPU);
}
void YOLO::postprocess(Mat& frame, const vector<Mat>& outs)
{
vector<int> classIds;
vector<float> confidences;
vector<Rect> boxes;
for (size_t i = 0; i < outs.size(); ++i)
{
float* data = (float*)outs[i].data;
for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
{
cv::Mat look = outs[i];
Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
Point classIdPoint;
double confidence;
minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
if (confidence > this->confThreshold)
{
int centerX = (int)(data[0] * frame.cols);
int centerY = (int)(data[1] * frame.rows);
int width = (int)(data[2] * frame.cols);
int height = (int)(data[3] * frame.rows);
int left = centerX - width / 2;
int top = centerY - height / 2;
classIds.push_back(classIdPoint.x);
confidences.push_back((float)confidence);
boxes.push_back(Rect(left, top, width, height));
}
}
}
vector<int> indices;
NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
Rect box = boxes[idx];
this->drawPred(classIds[idx], confidences[idx], box.x, box.y,
box.x + box.width, box.y + box.height, frame);
}
}
void YOLO::drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame)
{
rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 3);
string label = format("%.2f", conf);
if (!this->classes.empty())
{
CV_Assert(classId < (int)this->classes.size());
label = this->classes[classId] + ":" + label;
}
int baseLine;
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
top = max(top, labelSize.height);
putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);
}
void YOLO::detect(Mat& frame)
{
Mat blob;
blobFromImage(frame, blob, 1 / 255.0, Size(this->inpWidth, this->inpHeight), Scalar(0, 0, 0), true, false);
this->net.setInput(blob);
vector<Mat> outs;
this->net.forward(outs, this->net.getUnconnectedOutLayersNames());
this->postprocess(frame, outs);
vector<double> layersTimes;
double freq = getTickFrequency() / 1000;
double t = net.getPerfProfile(layersTimes) / freq;
string label = format("%s Inference time : %.2f ms", this->netname, t);
putText(frame, label, Point(0, 30), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 0, 255), 2);
}
int main()
{
YOLO yolo_model(yolo_nets[0]);
string imgpath = "person.jpg";
Mat srcimg = imread(imgpath);
yolo_model.detect(srcimg);
static const string kWinName = "Deep learning object detection in OpenCV";
namedWindow(kWinName, WINDOW_NORMAL);
imshow(kWinName, srcimg);
waitKey(10);
destroyAllWindows();
}
三、跟踪结果
跟踪速度还算可以,用GPU可能会快点
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)