K-means 算法及其代码
- K-means算法介绍
- K-means 伪代码
- K-means python 代码
K-means算法介绍
链接: 模式识别—聚类分析
K-means 伪代码
- 计算两个点之间的欧式距离
def calcluate_distance(core: tuple, dot: tuple):
"""
计算两个点之间的欧氏距离
:param core: 质心坐标 (x,y) 类型为tuple
:param dot: 要计算距离的点(m,n) 类型为tuple
:return: 距离 dist 类型为float
"""
return dist
- 计算给定点应分配到哪一个质心
def calculate_cluster(dot: tuple, cores: list):
"""
计算给定点应该指派到哪一个质心
:param dot: 待处理的点
:param cores: 质心列表
:return: 应该指派到的质心的序号
"""
distance_list = []
for core in cores:
min_dist = min(distance_list)
put_to_index = distance_list.index(min_dist)
return put_to_index
- 将点分配到最近的质心的簇
def put_dot_into_clusters(row_data: list, k: int, cores: list):
"""
将点指派至最近质心的簇
:param cores:
:param row_data:
:param k:
:return: 已分配点的簇
"""
clusters = []
for each in range(k):
for every_data in row_data:
return clusters
- 计算当前簇的下一个质心
def re_calculate_core(cluster: set):
"""
计算当前簇的下一个质心
:param cluster:
:return: new_core
"""
all_x = []
all_y = []
for each_dot in cluster:
new_core = (round(avg_x, 2), round(avg_y, 2))
return new_core
- 初始化数据点
for num in range(10):
data_list.append(adot)
K-means python 代码
IDE: Pyharm
Version:Python 3.7.3
from random import random, sample
from math import pow
def calcluate_distance(core: tuple, dot: tuple):
"""
计算两个点之间的欧氏距离
:param core: 质心坐标 (x,y) 类型为tuple
:param dot: 要计算距离的点(m,n) 类型为tuple
:return: 距离 dist 类型为float
"""
dist = pow(((dot[0] - core[0]) ** 2 + (dot[1] - core[1]) ** 2), 0.5)
return dist
def calculate_cluster(dot: tuple, cores: list):
"""
计算给定点应该指派到哪一个质心
:param dot: 待处理的点
:param cores: 质心列表
:return: 应该指派到的质心的序号
"""
distance_list = []
for core in cores:
dist = calcluate_distance(core, dot)
distance_list.append(dist)
min_dist = min(distance_list)
put_to_index = distance_list.index(min_dist)
return put_to_index
def initiation_cores(row_data: list, k: int):
"""
根据row_data的数据生成初始质心
:param row_data: 原始数据
:param k: k值
:return: 质心列表
"""
cores = sample(row_data, k)
return cores
def put_dot_into_clusters(row_data: list, k: int, cores: list):
"""
将点指派至最近质心的簇
:param cores:
:param row_data:
:param k:
:return: 已分配点的簇
"""
clusters = []
for each in range(k):
clusters.append(set())
for every_data in row_data:
index = calculate_cluster(every_data, cores)
clusters[index].add(every_data)
return clusters
def re_calculate_core(cluster: set):
"""
计算当前簇的下一个质心
:param cluster:
:return:
"""
all_x = []
all_y = []
for each_dot in cluster:
all_x.append(each_dot[0])
all_y.append(each_dot[1])
avg_x = sum(all_x) / len(all_x)
avg_y = sum(all_y) / len(all_y)
new_core = (round(avg_x, 2), round(avg_y, 2))
return new_core
if __name__ == '__main__':
data_list = []
for num in range(10):
adot = (round(random() * 20 - 100, 2), round(random() * 20 - 100, 2))
data_list.append(adot)
for num in range(100):
adot = (round(random() * 100 + 100, 2), round(random() * 50 + 150, 2))
data_list.append(adot)
for num in range(50):
adot = (round(random() * 20, 2), round(random() * 20, 2))
data_list.append(adot)
for num in range(50):
adot = (round(random() * 100 + 100, 2), round(random() * 20, 2))
data_list.append(adot)
for num in range(100):
adot = (round(random() * 200, 2), round(random() * 200, 2))
data_list.append(adot)
k = 4
my_cores = initiation_cores(data_list, k)
roundx = 0
while True:
roundx += 1
cl = put_dot_into_clusters(data_list, k, my_cores)
new_cores = list()
for index in range(k):
new_cores.append(re_calculate_core(cl[index]))
if new_cores == my_cores:
break
else:
my_cores = new_cores
import matplotlib.pyplot as plt
colors = ['#0000FF', '#FF0000', '#00FF00', '#666666', '#FFFF00']
for index in range(k):
color = colors[index % 5]
for every_dot in cl[index]:
plt.scatter(every_dot[0], every_dot[1], c=color, alpha=0.53)
plt.scatter(my_cores[index][0], my_cores[index][1], marker='+', c='#000000', s=180)
plt.show()
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)