实验结果直接戳这里免费下载实验报告
(决策树算法的实验还没做 做了之后再上传吧 最近有点忙555)
1.KNN算法
即最邻近结点算法 / K均值聚类算法
1.1 KNN算法原理:
KNN属于lazy learning —— 不会对训练样本数据进行学习
对一个新数据 计算它与训练集中数据的距离 选择最短的k个作为邻居 然后预测新数据的类别和k个邻居中一致性最多的所属类别。
1.2 KNN算法的优点
- 经典算法,简单、快速。
- 对处理大数据集,该算法是相对可伸缩和高效率的。
有新数据不用重新聚类了~所以是相对可伸缩的
1.3 KNN算法的缺点
不同初始值可能导致不同的结果
- 对于噪声数据和孤立点数据是敏感的
- 要求训练样本正确体现数据的真实分布
由于KNN的预测效果是强依赖于训练数据的,所以KNN不会对训练数据进行深入学习,只是单纯地考虑数据之间的距离。
所以一旦训练样本不能正确体现数据真实分布,预测就会不准确!
另外,训练数据如果不属于同一分布,也会导致预测不准确。
1.4 KNN的做法(目的):
预测新数据的类别和k个邻居中一致性最多的所属类别。
1.5 编程实现
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
class KNearestNeighbor(object):
def __init__(self):
pass
def loadData(self, path):
data = pd.read_csv(path, header=None)
data.columns = ['sepal length', 'sepal width',
'petal length', 'petal width',
'species'] # 特征及类别名称
X = data.iloc[0:150, 0:4].values
y = data.iloc[0:150, 4].values
# Iris-setosa 输出label用0表示
y[y == 'Iris-setosa'] = 0
# Iris-versicolor 输出label用1表示
y[y == 'Iris-versicolor'] = 1
# Iris-virginica 输出label用2表示
y[y == 'Iris-virginica'] = 2
# Iris - setosa 4个特征
self.X_setosa, self.y_setosa = X[0:50], y[0:50]
# Iris-versicolor 4个特征
self.X_versicolor, self.y_versicolor = X[50:100], y[50:100]
# Iris-virginica 4个特征
self.X_virginica, self.y_virginica = X[100:150], y[100:150]
# 训练集
self.X_setosa_train = self.X_setosa[:30, :]
self.y_setosa_train = self.y_setosa[:30]
self.X_versicolor_train = self.X_versicolor[:30, :]
self.y_versicolor_train = self.y_versicolor[:30]
self.X_virginica_train = self.X_virginica[:30, :]
self.y_virginica_train = self.y_virginica[:30]
self.X_train = np.vstack([self.X_setosa_train, self.X_versicolor_train, self.X_virginica_train])
self.y_train = np.hstack([self.y_setosa_train, self.y_versicolor_train, self.y_virginica_train])
# 测试集
self.X_setosa_test = self.X_setosa[30:50, :]
self.y_setosa_test = self.y_setosa[30:50]
self.X_versicolor_test = self.X_versicolor[30:50, :]
self.y_versicolor_test = self.y_versicolor[30:50]
self.X_virginica_test = self.X_virginica[30:50, :]
self.y_virginica_test = self.y_virginica[30:50]
self.X_test = np.vstack([self.X_setosa_test, self.X_versicolor_test, self.X_virginica_test])
self.y_test = np.hstack([self.y_setosa_test, self.y_versicolor_test, self.y_virginica_test])
# 利用matplotlib包中的函数 展示数据
def showData(self):
# 只选择sepal length和petal length两个特征,在二维平面上作图
# 训练集
plt.scatter(self.X_setosa_train[:, 0], self.X_setosa_train[:, 2], color='red', marker='o', label='setosa_train')
plt.scatter(self.X_versicolor_train[:, 0], self.X_versicolor_train[:, 2], color='blue', marker='^',
label='versicolor_train')
plt.scatter(self.X_virginica_train[:, 0], self.X_virginica_train[:, 2], color='green', marker='s',
label='virginica_train')
# 测试集
plt.scatter(self.X_setosa_test[:, 0], self.X_setosa_test[:, 2], color='y', marker='o', label='setosa_test')
plt.scatter(self.X_versicolor_test[:, 0], self.X_versicolor_test[:, 2], color='y', marker='^',
label='versicolor_test')
plt.scatter(self.X_virginica_test[:, 0], self.X_virginica_test[:, 2], color='y', marker='s',
label='virginica_test')
plt.xlabel('sepal length')
plt.ylabel('petal length')
plt.legend(loc=4)
plt.show()
# 预测函数
def predict(self, X, k=1):
# 计算欧氏距离
num_test = X.shape[0]
# because(X - X_train)*(X- X_train) = -2X*X_train + X_train,so
d1 = -2 * np.dot(X, self.X_train.T)
# shape (num_test,1)
d2 = np.sum(np.square(X), axis=1, keepdims=True)
d3 = np.sum(np.square(self.X_train), axis=1)
dist = np.sqrt(d1 + d2 + d3)
# 根据k值,选择最可能属于的类别
y_pred = np.zeros(num_test)
for i in range(num_test):
dist_k_min = np.argsort(dist[i])[:k] # 最近邻k个实例位置
y_kclose = self.y_train[dist_k_min] # 最近k个实例对应的标签
# 找出k个标签中从属类别最多的作为预测类别
y_pred[i] = np.argmax(np.bincount(y_kclose.tolist()))
return y_pred
if __name__ == "__main__":
path = "iris.txt"
knn = KNearestNeighbor()
knn.loadData(path)
knn.showData()
print(knn.X_train)
for k in range(1, 11):
y_pred = knn.predict(X=knn.X_test, k=k)
accuracy = np.mean(y_pred == knn.y_test)
print(f'k={k}时,测试集预测准确率:{accuracy}')
2.搭建人工神经网络
2.1 基础知识
利用python实现人工神经网络ANN,包括感知机和后向传播网络(利用BP算法)
这里分别写了两篇文章进行学习&总结:
感知机
反向传播算法(这个写的真的超级详尽)
2.2 实验代码
import numpy as np
import matplotlib.pyplot as plt
# 感知机
# 从数据集文件中加载数据
data_set = [] # 数据集
data_label = [] # 数据标签
file = open('ann_Perceptron.txt')
for line in file:
line = line.split(' ')
for i in range(len(line)):
line[i] = float(line[i])
data_set.append(line[0:2])
data_label.append(int(line[-1]))
file.close()
data = np.array(data_set)
label = np.array(data_label)
# 初始化w b alpha
w = np.array([0, 0])
b = 0
alpha = 1
# 计算 y*(w*x+b)
f = (np.dot(data, w.T) + b) * label
idx = np.where(f <= 0)
# 使用随机梯度下降法训练模型 求解w b
iteration = 1
while f[idx].size != 0:
point = np.random.randint((f[idx].shape[0]))
x = data[idx[0][point]]
y = label[idx[0][point]]
w = w + alpha * y * x
b = b + alpha * y
print('Iteration:%d w:%s b:%s' % (iteration, w, b))
f = (np.dot(data, w.T) + b) * label
idx = np.where(f <= 0)
iteration = iteration + 1
# 绘图显示
x1 = np.arange(0, 6, 0.1)
x2 = (w[0] * x1 + b) / (-w[1])
idx_p = np.where(label == 1)
idx_n = np.where(label != 1)
data_p = data[idx_p]
data_n = data[idx_n]
plt.scatter(data_p[:, 0], data_p[:, 1], color='red')
plt.scatter(data_n[:, 0], data_n[:, 1], color='blue')
plt.plot(x1, x2)
plt.show()
print('\nPerceptron learning algorithm is over')
# BP网络
def BP(x, y, num):
epochs = 20000 #定义迭代次数
learning_rate = 0.3
I_num = x.shape[0]
H_num = num
O_num = y.shape[0]
V = np.random.rand(I_num, H_num) - 0.5
dV = np.random.rand(I_num, H_num)
W = np.random.rand(H_num, O_num) - 0.5
dW = np.random.rand(H_num, O_num)
mse_record = np.ones((1, epochs))
# 前向传播
for step in range(epochs):
H_in = np.dot(x, V) # 1 x H_num
H_out = sigmoid(H_in) # 1 x H_num
O_in = np.dot(H_out, W) # 1 x 0_num
O_out = O_in # 1 x 0_num
error = (O_out - y) # 1 x 0_num
mse = np.average(np.square(error))
mse_record[0, step] = mse
if mse < 4e-4:
break
# 反向传播
# 减去误差 更新w
for h in range(H_num):
for j in range(O_num):
# W偏导
dW[h, j] = (y[j] - O_out[j]) * H_out[h]
# 更新V
for i in range(I_num):
for h in range(H_num):
sum = 0
for j in range(O_num):
sum = sum + (y[j] - O_out[j]) * O_out[j] * H_out[h] * W[h, j]
dV[i, h] = sum * H_out[h] * x[i]
W = W + learning_rate * dW
V = V + learning_rate * dV
print(f"epoch:{step} output:{O_out}")
# 激活函数sigmoid的实现
def sigmoid(x):
s = 1 / (1 + np.exp(-x))
return s
def sigmoid_derivative(x):
s = 1 / (1 + np.exp(-x))
ds = s * (1 - s)
return ds
# 主函数
if __name__ == '__main__':
x = np.array([2, 5], dtype=float)
y_true = np.array([3, 6, 4], dtype=float)
BP(x, y_true, 10)