kmeans算法网上资料很多,其原理简单来说就选取k个聚类中心,然后把剩余的点归类到与其相距最近的聚类中心,是一种无监督算法。缺点和不足有两个,第一个是k需要指定,第二个是对于聚类中心种子点的选取很敏感。本文将以yolov4算法使用kmeans算法生成anchors为例进行说明。
1、计算多个点的kmeans
import numpy as np
#生成假数据
data = np.random.rand(50,2)
print(data.shape)
#生成初始聚类中心
centers = data[np.random.choice(data.shape[0],5,replace=False)]
print(centers.shape)
new_centers = np.zeros(centers.shape)
while np.any(centers != new_centers):
#distance是所有点到centers的一范数距离,使用的是矩阵计算
distance = np.sum(np.abs(data[:,None,:]-centers),axis=-1)
index = np.argmin(distance,axis=-1)
for i in range(centers.shape[0]):
new_centers[i] = np.mean(data[index==i],axis=0)
centers = new_centers.copy()
print(new_centers)
(50, 2)
(5, 2)
[[0.33028589 0.17096868]
[0.37237028 0.88923208]
[0.49766799 0.553117 ]
[0.04129898 0.4883419 ]
[0.88315816 0.29498035]]
2、获取所有的标注框长宽
如以下代码,-filelist可以获得所有训练数据的路径,比如使用VOC格式,那么train.txt文件中存的路径就是/path/VOCdevkit/JPEGimages/XXXX.jpg,对应的标注文件在/path/VOCdevkit/Annotations/XXXX.xml中,yolo格式的标注文件在/path/VOCdevkit/labels/XXXX.xml,XXXX是图片名称。参见以下代码及注释。
def main(argv):
parser = argparse.ArgumentParser()
parser.add_argument('-filelist', default = '\\path\\to\\voc\\filelist\\train.txt',
help='path tlelist\n' )
parser.add_argument('-output_dir', default = 'generated_anchors/anchors', type = str,
help='Output anchor directory\n' )
parser.add_argument('-num_clusters', default = 0, type = int,
help='number of clusters\n' )
args = parser.parse_args()
#用来存放anchors文件的文件夹,保存最后的结果
if not os.path.exists(args.output_dir):
os.mkdir(args.output_dir)
f = open(args.filelist)
#读取每一行,获得所有图征路径
lines = [line.rstrip('\n') for line in f.readlines()]
annotation_dims = []
#用来存放标签、宽、高
size = np.zeros((1,1,3))
for line in lines:
#line = line.replace('images','labels')
#line = line.replace('img1','labels')
line = line.replace('JPEGImages','labels')
line = line.replace('.jpg','.txt')
line = line.replace('.png','.txt')
print(line)
f2 = open(line)
for line in f2.readlines():
line = line.rstrip('\n')
w,h = line.split(' ')[3:]
#print(w,h)
annotation_dims.append(tuple(map(float,(w,h))))
#获得最后的所有标注形状是(N,2) w,h
annotation_dims = np.array(annotation_dims)
eps = 0.005
#如果没有指定聚类中心,则迭代多个版本
if args.num_clusters == 0:
for num_clusters in range(1,11): #we make 1 through 10 clusters
anchor_file = join( args.output_dir,'anchors%d.txt'%(num_clusters))
indices = [ random.randrange(annotation_dims.shape[0]) for i in range(num_clusters)]
centroids = annotation_dims[indices]
kmeans(annotation_dims,centroids,eps,anchor_file)
print('centroids.shape', centroids.shape)
else:
#指定的情况下,需要
anchor_file = join( args.output_dir,'anchors%d.txt'%(args.num_clusters))
indices = [ random.randrange(annotation_dims.shape[0]) for i in range(args.num_clusters)]
centroids = annotation_dims[indices]
kmeans(annotation_dims,centroids,eps,anchor_file)
print('centroids.shape', centroids.shape)
以上部分就是获得所有标注的宽和高,下面呢是核心的算法kmeans的实现,要读一下代码,我加注释了。
3、求kmeans
width_in_cfg_file = 416.
height_in_cfg_file = 416.
def IOU(x,centroids):
similarities = []
k = len(centroids)
for centroid in centroids:
c_w,c_h = centroid
w,h = x
if c_w>=w and c_h>=h:
similarity = w*h/(c_w*c_h)
elif c_w>=w and c_h<=h:
similarity = w*c_h/(w*h + (c_w-w)*c_h)
elif c_w<=w and c_h>=h:
similarity = c_w*h/(w*h + c_w*(c_h-h))
else: #means both w,h are bigger than c_w and c_h respectively
similarity = (c_w*c_h)/(w*h)
similarities.append(similarity) # will become (k,) shape
return np.array(similarities)
def avg_IOU(X,centroids):
n,d = X.shape
sum = 0.
for i in range(X.shape[0]):
#note IOU() will return array which contains IoU for each centroid and X[i] // slightly ineffective, but I am too lazy
sum+= max(IOU(X[i],centroids))
return sum/n
def write_anchors_to_file(centroids,X,anchor_file):
f = open(anchor_file,'w')
anchors = centroids.copy()
print(anchors.shape)
for i in range(anchors.shape[0]):
anchors[i][0]*=width_in_cfg_file/32.
anchors[i][1]*=height_in_cfg_file/32.
widths = anchors[:,0]
sorted_indices = np.argsort(widths)
print('Anchors = ', anchors[sorted_indices])
for i in sorted_indices[:-1]:
f.write('%0.2f,%0.2f, '%(anchors[i,0],anchors[i,1]))
#there should not be comma after last anchor, that's why
f.write('%0.2f,%0.2f\n'%(anchors[sorted_indices[-1:],0],anchors[sorted_indices[-1:],1]))
f.write('%f\n'%(avg_IOU(X,centroids)))
print()
def kmeans(X,centroids,eps,anchor_file):
N = X.shape[0]
iterations = 0
k,dim = centroids.shape
prev_assignments = np.ones(N)*(-1)
iter = 0
old_D = np.zeros((N,k))
while True:
D = []
iter+=1
for i in range(N):
d = 1 - IOU(X[i],centroids)
D.append(d)
D = np.array(D) # D.shape = (N,k)
print("iter {}: dists = {}".format(iter,np.sum(np.abs(old_D-D))))
#assign samples to centroids
assignments = np.argmin(D,axis=1)
if (assignments == prev_assignments).all() :
print("Centroids = ",centroids)
write_anchors_to_file(centroids,X,anchor_file)
return
#calculate new centroids
centroid_sums=np.zeros((k,dim),np.float)
for i in range(N):
centroid_sums[assignments[i]]+=X[i]
for j in range(k):
centroids[j] = centroid_sums[j]/(np.sum(assignments==j))
prev_assignments = assignments.copy()
old_D = D.copy()
将以上两部分组合,即可进行kmeans聚类从而获得anchors.