如图,有如下字母表;
现尝试采用最邻近算法KNN,取前10列字符作为训练数据,然后识别字母表中的字符。
创建训练数据
首先通过获取前10列字符的轮廓外接矩形,将字符裁剪出作为训练样本建立图库。
#include <string>
#include <iostream>
#include <opencv2\opencv.hpp>
#include<numeric>
using namespace cv;
int main()
{
Mat src, dst;
src = imread("E:/image/image/letters.png");
if (src.empty())
{
std::cout<<"can not load image \n"<<std::endl;
return -1;
}
imshow("input", src);
dst = src.clone();
cvtColor(src, src, COLOR_BGR2GRAY);
Mat bin = src.clone();
//创建感兴趣区域,选取左侧10列作为训练数据
Mat ROI = src(Rect(0,0,400,src.size().height));
blur(ROI, ROI, Size(9,9));
threshold(ROI, ROI, 210, 255, THRESH_BINARY);
Canny(ROI, ROI, 20, 80, 3, false);
std::vector<std::vector<Point>> contours;
std::vector<Vec4i>hierarchy;
findContours(ROI, contours, hierarchy, RETR_EXTERNAL, CHAIN_APPROX_SIMPLE, Point(0,0));
RNG rng(0);
std::vector<RotatedRect> minRects(contours.size());
std::vector<float> height, width;
for (int i = 0; i < contours.size(); i++)
{
Scalar color = Scalar(rng.uniform(0,255), rng.uniform(0,255), rng.uniform(0,255));
Rect rect = boundingRect(contours[i]);
height.push_back(rect.height);
width.push_back(rect.width);
}
//获取字符外接矩形宽高的最大值
std::vector<float>::iterator h = std::max_element(std::begin(height), std::end(height));
std::vector<float>::iterator w = std::max_element(std::begin(width), std::end(width));
//获得轮廓的外接矩形
std::vector<Rect>rects;
//将轮廓外接矩形按纵坐标排序
std::vector<float>sequence;
for (int i = 0; i < contours.size(); i++)
{
Scalar color = Scalar(0, rng.uniform(0,255), rng.uniform(0,255));
Rect rect = boundingRect(contours[i]);
Rect reRect = Rect(Point(rect.x + rect.width/2.0 - *w/2.0,rect.y + rect.height/2.0 - *h/2.0 + 2), Point(rect.x + rect.width/2.0 + *w/2.0, rect.y + rect.height/2.0 + *h/2.0 - 2));
rectangle(dst, reRect, color, 2);
rects.push_back(reRect);
sequence.push_back(reRect.y);
}
//按纵坐标排序
sort(sequence.begin(), sequence.end());
threshold(bin, bin, 0, 255, CV_THRESH_BINARY|CV_THRESH_OTSU);
int n = 0;
for (int i = 0; i < rects.size(); i++)
{
for (int j = 0; j < 26; j++)
{
std::string outPath = "E:/image/letters/";
char label = 'a';
char temp[256];
if ((rects[i].y > sequence[j*10]-*h/2.0) && (rects[i].y < sequence[j*10]+*h/2.0))
{
label = label + j;
sprintf_s(temp, "%d", n);
outPath = outPath + label + "/" + temp + ".jpg";
//std::cout<<outPath<<std::endl;
imwrite(outPath,bin(rects[i]));
n++;
}
}
}
imshow("output", dst);
waitKey();
return 0;
}
被选中的字符:
裁剪后得到的图库:
利用KNN识别字符
利用KNN识别出图中的字符,并将识别结果显示在图中对应的字符旁边。(同种字符用同一种颜色显示)
#include <io.h>
#include <string>
#include <iostream>
#include <opencv2\opencv.hpp>
#include <opencv2\ml.hpp>
#include<numeric>
using namespace cv;
using namespace ml;
int main()
{
===============================读取训练数据===============================
//图片共有10类
const int classSum = 26;
//每类共50张图片
const int imagesSum = 10;
//图片尺寸
const int imageRows = 33;
const int imageCols = 26;
//每一行一个训练图片
float trainingData[classSum*imagesSum][imageRows*imageCols] = {{0}};
//训练样本标签
float labels[classSum*imagesSum]={0};
for (int i = 0; i < classSum; i++)
{
//目标文件夹路径
std::string inPath = "E:\\image\\letters\\";
char label = 'a';
int k = 0;
label = label + i;
inPath = inPath + label + "\\*.jpg";
//用于查找的句柄
long handle;
struct _finddata_t fileinfo;
//第一次查找
handle = _findfirst(inPath.c_str(),&fileinfo);
if(handle == -1)
return -1;
do
{
//找到的文件的文件名
std::string imgname = "E:/image/letters/";
imgname = imgname + label + "/" + fileinfo.name;
//std::cout<<imgname<<std::endl;
Mat src = imread(imgname, 0);
if (src.empty())
{
std::cout<<"can not load image \n"<<std::endl;
return -1;
}
//序列化后放入作为样本矩阵的一行
for(int j = 0; j<imageRows*imageCols; j++)
{
trainingData[i*imagesSum + k][j] = (float)src.data[j];
}
// 设置样本标签
labels[i*imagesSum + k] = label;
k++;
} while (!_findnext(handle, &fileinfo));
_findclose(handle);
}
//训练样本数据及对应标签
Mat trainingDataMat(classSum*imagesSum, imageRows*imageCols, CV_32FC1, trainingData);
Mat labelsMat(classSum*imagesSum, 1, CV_32FC1, labels);
//std::cout<<trainingDataMat<<std::endl;
//std::cout<<labelsMat<<std::endl;
===============================创建KNN模型===============================
Ptr<KNearest>model = KNearest::create();
model->setDefaultK(classSum);
model->setIsClassifier(true);
Ptr<TrainData>trainData = TrainData::create(trainingDataMat, ROW_SAMPLE, labelsMat);
model->train(trainData);
//model->save("E:/image/KNearestModel.xml");
===============================预测部分===============================
Mat src, dst;
src = imread("E:/image/image/letters.png");
if (src.empty())
{
std::cout<<"can not load image \n"<<std::endl;
return -1;
}
dst = src.clone();
//创建感兴趣区域,选取右侧10列作为预测数据
cvtColor(src, src, COLOR_BGR2GRAY);
blur(src, src, Size(9,9));
threshold(src, src, 210, 255, THRESH_BINARY);
Canny(src, src, 20, 80, 3, false);
std::vector<std::vector<Point>> contours;
std::vector<Vec4i>hierarchy;
findContours(src, contours, hierarchy, RETR_EXTERNAL, CHAIN_APPROX_SIMPLE, Point(0,0));
for (int i = 0; i < contours.size(); i++)
{
Rect rect = boundingRect(contours[i]);
//以矩形中心及指定的宽高作为字符区域
Rect reRect = Rect(Point(rect.x + rect.width/2.0 - imageCols/2.0,rect.y + rect.height/2.0 - imageRows/2.0), Point(rect.x + rect.width/2.0 + imageCols/2.0, rect.y + rect.height/2.0 + imageRows/2.0));
Mat sampleImg;
cvtColor(dst, sampleImg, COLOR_BGR2GRAY);
threshold(sampleImg, sampleImg, 0, 255, CV_THRESH_BINARY|CV_THRESH_OTSU);
Mat sample = Mat::zeros(Size(imageCols,imageRows), sampleImg.type());
float sampleData[imageRows*imageCols];
int nub = 0;
for (int r = 0; r < imageRows; r++)
{
for (int c = 0; c < imageCols; c++)
{
sampleData[nub] = sampleImg.at<uchar>(reRect.y+r,reRect.x+c);
nub++;
}
}
Mat sampleDataMat(1, imageRows*imageCols, CV_32FC1, sampleData);
char f;
f = model->predict(sampleDataMat);
char temp[256];
sprintf_s(temp, "%c", f);
std::cout<<temp<<"\n"<<std::endl;
std::string text(temp);
RNG rng(f);
Scalar color = Scalar(rng.uniform(0,255), rng.uniform(0,255), rng.uniform(0,255));
putText(dst, text, Point(reRect.x,reRect.y + reRect.height),1,1.5,color,2);
}
imshow("output",dst);
waitKey();
return 0;
}
显示识别结果:
相关链接:
图片及代码下载