最近有个项目里要整理缩略图和原图是否匹配,然后就去找工具和方法做了,之后很好奇图像相似度是怎么算的,google了下貌似很深奥,而且无最简单的例子java代码源码下载,估计图形学的人不削用java吧。
个人从来没有研究过图像学,也没看过什么论文或者相关文档,写这个完全是靠google和百度,自己写了个实验了下,测试用例也少,估计有大BUG的存在,所以看的人权当学习交流,切勿生产使用。
思路:
1,二值化,(不做颜色比对了,生成黑白图)
2 ,取黑色的矩形阵列,(不管图片大小,只取同颜色的矩形区域,判断规则下文讲)
3,排序取前几个矩阵,(去最大矩阵的 %d 面积以上的矩阵,%d作为参数)。
4,取得2张图的矩阵,对比矩阵距离面积等,根据面积权重,得到相似度。
如这么图取的矩阵就是红色区域,没画全。
好了,开始了,下面的代码要是有人发现问题PM我,我去改正:
1,二值化:
package cn.my.image.Compare.test;
import java.awt.image.BufferedImage;
import java.io.IOException;
import java.io.InputStream;
import javax.imageio.ImageIO;
public class Binarization {
private static float RP = 0.333f;
private static float GP = 0.334f;
private static float BP = 0.333f;
private static int Threshold = 0;
private int h;
private int w;
public Binarization(){
}
/**
* set your threshold such as 160 这里手工设判断黑白的标准,默认是图像全局的平均值。
* @param threshold
*/
public Binarization(int threshold){
Threshold = threshold;
}
/**
* init with r,g,b Proportion 设置R G B的权重比,默认平分
* @param r
* @param g
* @param b
*/
public Binarization(float r,float g,float b,int threshold){
RP = r;
GP = g;
BP = b;
Threshold = threshold;
}
public int getH(){
return h;
}
public int getW() {
return w;
}
public int getArea(){
return w*h;
}
public int[][] toBinarization(InputStream stream) throws IOException {
BufferedImage bi=ImageIO.read(stream);//input image
h=bi.getHeight();//get height
w=bi.getWidth();//get width
int sumRGB = 0;
int[][] gray=new int[w][h];
for (int x = 0; x < w; x++) {
for (int y = 0; y < h; y++) {
gray[x][y]=getGray(bi.getRGB(x, y));
sumRGB = sumRGB + gray[x][y];
}
}
if(Threshold ==0){
Threshold = sumRGB/(h*w); //threshold by avager
}
int[][] binary=new int[w][h];
for (int x = 0; x < w; x++) {
for (int y = 0; y < h; y++) {
if(getAverageColor(gray, x, y, w, h)>Threshold){
binary[x][y] = 1 ; /// 1 for White
}else{
binary[x][y] = 0 ; /// 0 for black
}
}
}
//System.out.print("black " +test1 + " white "+test2);
return binary;
}
private int getGray(int rgb){
int r = (rgb & 16711680) >> 16;
int g = (rgb & 65280) >> 8;
int b = (rgb & 255);
int top=(int) (r*RP+g*GP+b*BP);
return (int)(top);
}
private int getAverageColor(int[][] gray, int x, int y, int w, int h)
{
int rs = gray[x][y]
+ (x == 0 ? 255 : gray[x - 1][y])
+ (x == 0 || y == 0 ? 255 : gray[x - 1][y - 1])
+ (x == 0 || y == h - 1 ? 255 : gray[x - 1][y + 1])
+ (y == 0 ? 255 : gray[x][y - 1])
+ (y == h - 1 ? 255 : gray[x][y + 1])
+ (x == w - 1 ? 255 : gray[x + 1][ y])
+ (x == w - 1 || y == 0 ? 255 : gray[x + 1][y - 1])
+ (x == w - 1 || y == h - 1 ? 255 : gray[x + 1][y + 1]);
return rs / 9;
}
}
2.取得矩阵
package cn.my.image.Compare.test;
import java.util.ArrayList;
public class Distribution {
private boolean isBlackSquare = true;
private int picW;
private int picH;
public Distribution(){
}
/**这里设置是否需要逆序,比如一个图旋转 180度,或者图片逆序。需要设置这个属性为false,然后逆序矩形区域, 等于取了白点值。
* is use black to get square
* (some picture need one black and one write to compare)
* such as Anti-color picture or 180 flip
* @param isBlackSquare
*/
public Distribution(boolean isBlackSquare){
this.isBlackSquare = isBlackSquare;
}
/**
* get DNA
* @param binary
* @return DNA array
*/
public ArrayList<Square> toDistribution(int binary[][]){
picW = binary.length;
picH = binary[0].length;
//get picture's height width
if(!isBlackSquare){
anti(binary);
}
return getSquareArray(binary);
}
/**
* get image's x, y
* i[0] for x i[1] for y
* before toDistribution it will return 0;
* @return
*/
public int[] getWH(){
int[] xy = new int[2];
xy[0] = picW;
xy[1] = picH;
return xy;
}
/**
* TODO it's not good 取得图片的矩形List,看到我的英文注释了吧,我觉得这段写的真心烂 ,
* @param binary
* @return
*/
private ArrayList<Square> getSquareArray(int binary[][]){
ArrayList<Square> squareList = new ArrayList<Square>();
boolean goon = true;
int x=0,y=0;
int tags = (picW>picH?picW:picH)/100;
while(goon) {
//Calculate next x,y
for(int j= y; j<picH;j++) {
for(int i = 0;i <picW;i++) {
if(binary[i][j] == 1) {
x = i;
y = j;
i = picW;
j = picH;
}
}
}
if(binary[x][y] != 1 || (x ==picW-1)&&(y ==picH-1)){
break;
}
Square square = getSquare(binary,x,y);
if(square.l>tags)
squareList.add(square);
if(x+square.l>=picW&&y+square.l>=picH)
goon = false;
}
return squareList;
}
/**
* create a Square object 取得1个 矩形区域。判断标准是这样的,我取一个矩形的边,然后
如果边上的黑色点数小于取得的矩形内部白点总数,则认为到边界了,保存矩形&退出,
黑色点数大于取得的矩形内部白点总数,去检测矩形外面一层的边矩形的length +1
* Square:x,y for start point ,l for side length
* @param binary
* @param x
* @param y
* @return
*/
private Square getSquare(int binary[][],int x,int y) {
Square square = new Square(x,y);
int length = 0;
int black=0,write=0,writeSum=0;
boolean goon = true;
while(goon) {
black = getSidePoint(binary,x,y,length);
write = 2*length -1 -black;
writeSum = writeSum +write;
length ++;
//quit Standard
if(x+length >=picW || y+length>=picH || writeSum>=black){
goon = false;
}
}
square.l = length;
return square;
}
/**
* get black point in square side 获得矩形的2条边的黑点数量,这2条边每次调用x,y 都会+1。
* @param binary
* @param start_X
* @param start_Y
* @param length
* @return
*/
private int getSidePoint(int binary[][],int start_X,int start_Y,int length){
/*if(length==0) {
//for test!
System.out.println("Distribution-->getSidePoint-->legth ==0");
System.exit(0);
}*/
int diffPoint = 0;
int x ,y;
if(binary[start_X+length][start_Y+length]==1)
diffPoint--;
for(int i=0;i<=length;i++){
x = binary[start_X+i][start_Y+length];
y = binary[start_X+length][start_Y+i];
if(x==1)
diffPoint++;
if(y==1)
diffPoint++;
binary[start_X+i][start_Y+length] = -1;
binary[start_X+length][start_Y+i] = -1; // Mark Read
}
return diffPoint;
}
/**
* anti list 白黑交换函数
* @param binary
*/
private void anti(int binary[][]) {
for (int x = 0; x < picW; x++) {
for (int y = 0; y < picH; y++) {
if(binary[x][y] == 1 ){
binary[x][y] = 0 ; /// 0 for White
}else{
binary[x][y] = 1 ; /// 1 for black
}
}
}
}
}
3.对比
package cn.my.image.Compare.test;
import java.util.ArrayList;
public class Compare {
private float pr = 0.5f;
private double area = 0;
public Compare() {
}
/**
* input particle between 0-1 0 for use all squares 1 for use largest square
* 英文比较差,这里设置的是取得最大矩形面积百分之几面积以上的矩形,设置1,就取最大,设置0,全取,我命名叫颗粒度,应该算是吧,默认50%
* @param particle
*/
public Compare(float particle) {
if (particle <= 1 && particle >= 0)
this.pr = particle;
}
public double toCompare(ArrayList<Square> s1, ArrayList<Square> s2, int area) {
this.area = area;
double fainllyratio = 0;
s1 = Sort(s1); //排序取前几个
s2 = Sort(s2);
ArrayList<Square> small = s1.size()<s2.size()?s1:s2;
ArrayList<Square> large = s1.size()>=s2.size()?s1:s2;
int tsize = 0;
for(int i=0;i <small.size();i++){
tsize = tsize + small.get(i).getArea();
}
for(int i=0;i <small.size();i++){
fainllyratio= fainllyratio + (ratioFrom(small.get(i),large) *small.get(i).getArea() /tsize) ; //根据面积比不同的面积 相似度占的权重不同。
}
return fainllyratio;
}
//算单个矩形的比率(相似度吧),比率这个英文好像拼错了。
private double ratioFrom(Square s1, ArrayList<Square> large) {
ArrayList<Square> temp = new ArrayList<Square>();
int xend = s1.x + s1.l;
int yend = s1.y + s1.l;
int xm = s1.getCenterX();
int ym = s1.getCenterY();
Square square = new Square(-1, -1);
square.l = 999999999;
for (Square s2 : large) {
int x1 = s2.getCenterX();
int y1 = s2.getCenterY();
if (x1 <= xend && x1 >= s1.x && y1 <= yend && y1 >= s1.y) {
// in the square
temp.add(s2);
}
if ((getAbsolute(x1 - xm) + getAbsolute(y1 - ym)) < square.l)
square = s2;
}
double l1 = 0, l2 = 0, sizeratio = 0,size1=0;
if (temp.isEmpty()) {
temp.add(square);
}
for (Square s : temp) {
size1 = size1 + s.l * s.l;
l1 = l1 + s.x + s.l / 2;
l2 = l2 + s.y + s.l / 2;
}
double size2 = s1.l * s1.l;
sizeratio = getAbsolute(size1 - size2)/((size1 + size2)/2);
double l = (l1 / temp.size() - xm) * (l1 / temp.size() - xm)
+ (l2 / temp.size() - ym) * (l2 / temp.size() - ym);
double ratio = Math.sqrt(l)/Math.sqrt(area);
ratio = (ratio + sizeratio)/2;
return 1-ratio;
}
private double getAbsolute(double i) {
return i > 0 ? i : -i;
}
private ArrayList<Square> Sort(ArrayList<Square> squares) {
ArrayList<Square> newsquares = new ArrayList<Square>();
if (squares.size() == 0)
return newsquares;
while (true) {
Square temp = new Square(-1, -1);
for (Square s : squares) {
if (s.l > temp.l) {
temp = s;
}
}
newsquares.add(temp);
if (temp.l <= newsquares.get(0).l * pr)
break;
squares.remove(temp);
if (squares.isEmpty())
break;
}
return newsquares;
}
}
OK:
测试:像素1900*2000的
Binarization b = new Binarization();
FileInputStream is = new FileInputStream(new File("d:/1.jpg"));
int a[][] = b.toBinarization(is);
Distribution d = new Distribution();
ArrayList<Square> s = d.toDistribution(a);
System.out.println(s.size());
FileInputStream is2 = new FileInputStream(new File("d:/3.jpg"));
int a2[][] = b.toBinarization(is2);
d = new Distribution();
ArrayList<Square> s2 = d.toDistribution(a2);
System.out.println(s.size());
Compare c= new Compare();
double m1 = c.toCompare(s, s2, b.getArea());
double m2 = c.toCompare(s2,s, b.getArea());
System.out.println("'''''" +m1 + "," +m2 +"," );
结果 :
'''''0.8107870909493387,0.7989862217705622, 相似度0.8.07 平均有 80%吧
同一张图测试对比
结果
'''''1.0,1.0000000000000002, 基本就是1,超过的点2应该是程序里面的double啊,开方啊之类的算久了 比较bug。忽略吧
'''''1.0,1.0000000000000002,
同一张图,反色对比
'''''0.21948688097837563,0.3277072822487256,
相似图。设置颗粒度0.1f,貌似就出问题了
'''''0.8188462582289785,0.6802030848688854,
完全不一致啊,平均75相识读,但正反比不一样说明算法稳定性很差,要改进啊。
同一张图。设置颗粒度0.1f。
'''''0.24759013614791678,0.0, 相似度0.这还差不多,要的就是0不过你也给太夸张了。。0!FT。。。
反正还有问题了,有待改进~!