如何使用 opencv 调用 yolov5 onnx 模型 ?
rtx3090 24G release 90 fps by use yolov5n
rtx3090 24G release 20 fps by use yolov5s
#include <fstream>
#include <string>
#include <vector>
#include <opencv2/opencv.hpp>
/*!
* use ONNX model
* */
class YoloV5
{
public:
struct Data{
int classIndex;
float confidence;
cv::Rect box;
};
bool init(const std::string & model,bool isUseGPU = true) {
try{
net = cv::dnn::readNetFromONNX(model);
if(isUseGPU) {
net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
std::cout << "SET DNN_BACKEND_CUDA & DNN_TARGET_CUDA" << std::endl;
}else {
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
std::cout << "SET DNN_BACKEND_OPENCV & DNN_TARGET_CPU" << std::endl;
}
}catch (const cv::Exception &e){
std::cerr << e.what() << std::endl;
return false;
}
return true;
}
void detect(cv::Mat & frame,const std::vector<std::string> &labels,std::vector<YoloV5::Data> &output,float confidenceThreshold = 0.45,float scoreThreshold = 0.5,float nmsThreshold = 0.45) {
cv::Mat blob;
cv::dnn::blobFromImage(frame, blob, 1./255., cv::Size(width, height), cv::Scalar(), true, false);
net.setInput(blob);
std::vector<cv::Mat> predictions;
net.forward(predictions, net.getUnconnectedOutLayersNames());
float x_factor = frame.cols / static_cast<float>(width);
float y_factor = frame.rows / static_cast<float>(height);
float *data = (float *)predictions[0].data;
const int dimensions = 85;
const int rows = 25200;
std::vector<int> predClassIds;
std::vector<float> predConfidences;
std::vector<cv::Rect> predBoxes;
for (int i = 0; i < rows; ++i) {
float confidence = data[4];
if (confidence >= confidenceThreshold) {
float * classes_scores = data + 5;
cv::Mat scores(1, labels.size(), CV_32FC1, classes_scores);
cv::Point class_id;
double max_class_score;
cv::minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
if (max_class_score > scoreThreshold) {
predConfidences.push_back(confidence);
predClassIds.push_back(class_id.x);
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
int left = int((x - 0.5 * w) * x_factor);
int top = int((y - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
predBoxes.push_back(cv::Rect(left, top, width, height));
}
}
data += dimensions;
}
std::vector<int> indices;
cv::dnn::NMSBoxes(predBoxes, predConfidences, scoreThreshold, nmsThreshold, indices);
for (int i = 0; i < indices.size(); i++) {
int idx = indices[i];
output.push_back({predClassIds[idx],predConfidences[idx],predBoxes[idx]});
}
}
void detectWithDrawBox(const std::vector<std::string> &labels,cv::Mat & mat,float confidenceThreshold = 0.45,float scoreThreshold = 0.5,float nmsThreshold = 0.45) {
std::vector<Data> result;
auto begin = std::chrono::steady_clock::now();
detect(mat, labels,result , confidenceThreshold, scoreThreshold,nmsThreshold);
auto cost = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - begin).count() / 1000.0;
std::stringstream cost_text;
cost_text << std::fixed << std::setprecision(2) << "cost:" << cost << " fps:" << 1000.0 / cost << " number:" << result.size();
cv::rectangle(mat,{10,5,268,20},{255,157,51},-1);
cv::putText(mat, cost_text.str(), {10, 20}, cv::FONT_HERSHEY_PLAIN, 1, {255, 255, 255});
const std::vector<cv::Scalar> colors = {cv::Scalar(255, 255, 0), cv::Scalar(0, 255, 0), cv::Scalar(0, 255, 255), cv::Scalar(255, 0, 0)};
for (auto i = 0; i < result.size(); ++i) {
auto &data = result[i];
const auto color = colors[data.classIndex % colors.size()];
std::stringstream text;
text << std::fixed << std::setprecision(2) << "class " << labels[data.classIndex] << " rank " << data.confidence;
cv::rectangle(mat, data.box, color, 1);
auto pos = data.box.tl();
auto fontSize = cv::getTextSize(text.str(),cv::FONT_HERSHEY_PLAIN,1,2, nullptr);
cv::rectangle(mat,{pos.x, pos.y - fontSize.height,fontSize.width,fontSize.height + 1},color,-1);
cv::putText(mat, text.str(), {pos.x, pos.y}, cv::FONT_HERSHEY_PLAIN, 1, {0,0,0});
}
}
private:
cv::dnn::Net net;
int width = 640.0;
int height = 640.0;
};
int main(int argc, char * argv[]) {
// input
std::vector<std::string> _labels{"person", "bicycle", "car", "motorbike", "aeroplane",
"bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign",
"parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
"bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase",
"frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
"skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork",
"knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
"hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
"diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard",
"cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock",
"vase", "scissors", "teddy bear", "hair drier", "toothbrush"};
std::string _source = R"(sample.mp4)";
std::string _model = R"(yolov5s.onnx)";
YoloV5 v5;
v5.init(_model);
cv::VideoCapture cap;
cap.open(_source);
cv::Mat mat(640, 640, CV_8UC3, {200, 200, 200});
do {
cap >> mat;
if (mat.empty()) {
break;
}
cv::resize(mat, mat, {640, 640});
v5.detectWithDrawBox(_labels,mat,0.45,0.45,0.5);
cv::imshow("win | press key q quit", mat);
if (cv::waitKey(1) == 'q') {
break;
}
} while (true);
cv::waitKey();
return 0;
}
https://learnopencv.com/object-detection-using-yolov5-and-opencv-dnn-in-c-and-python/
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)