# -*- coding:utf-8 -*- import numpy as np import matplotlib.pyplot as plt import random def text2num(string): str_list = string.replace("\n", " ").split(" ") while '' in str_list: str_list.remove('') num_list = [float(i) for i in str_list] return num_list def sigmoid(x): return 1.0 / (1 + np.exp(-x)) def data_plot(data_list, weight): x_data = [list(i[0:2]) for i in data_list if i[2] == 0.0] y_data = [list(i[0:2]) for i in data_list if i[2] == 1.0] x_data = np.reshape(x_data, np.shape(x_data)) y_data = np.reshape(y_data, np.shape(y_data)) linear_x = np.arange(-4, 4, 1) linear_y = (-weight[0] - weight[1] * linear_x) / weight[2] print(linear_y) plt.figure(1) plt.scatter(x_data[:, 0], x_data[:, 1], c='r') plt.scatter(y_data[:, 0], y_data[:, 1], c='g') print(linear_x) print(linear_y.tolist()[0]) plt.plot(linear_x, linear_y.tolist()[0]) plt.show() def grad_desc(data_mat, label_mat, rate, times): data_mat = np.mat(data_mat) label_mat = np.mat(label_mat) m,n = np.shape(data_mat) weight = np.ones((n, 1)) for i in range(times): h = sigmoid(data_mat * weight) error = h - label_mat weight = weight - rate * data_mat.transpose() * error return weight def random_grad_desc(data_mat, label_mat, rate, times): data_mat = np.mat(data_mat) m,n = np.shape(data_mat) weight = np.ones((n, 1)) for i in range(times): for j in range(m): h = sigmoid(data_mat[j] * weight) error = h - label_mat[j] weight = weight - rate * data_mat[j].transpose() * error return weight def improve_random_grad_desc(data_mat, label_mat, times): data_mat = np.mat(data_mat) m,n = np.shape(data_mat) weight = np.ones((n, 1)) for i in range(times): index_data = [i for i in range(m)] for j in range(m): rate = 0.0001 + 4 / (i + j + 1) index = random.sample(index_data, 1) h = sigmoid(data_mat[index] * weight) error = h - label_mat[index] weight = weight - rate * data_mat[index].transpose() * error index_data.remove(index[0]) return weight def main(): file = open("/Users/chenzu/Documents/code-machine-learning/data/LR", "rb") file_lines = file.read().decode("UTF-8") data_list = text2num(file_lines) data_len = int(len(data_list) / 3) data_list = np.reshape(data_list, (data_len, 3)) data_mat_temp = data_list[:, 0:2] data_mat = [] for i in data_mat_temp: data_mat.append([1, i[0], i[1]]) print(data_mat) label_mat = data_list[:, 2:3] #梯度下降求参数 weight = improve_random_grad_desc(data_mat, label_mat, 500) print(weight) data_plot(data_list, weight) if __name__ == '__main__': main() |