描述:采用数据挖掘算法中人工神经网络算法,首先用Adaptive—Lasso方法找出相关性最大的因素,根据现有的数据建立神经网络模型,然后对未知属性做出预测。
代码:
import pandas as pd
def Data_pro(feature,data):
data_train = data.loc[range(1994, 2014)].copy() # 取2014年前的数据建模
data_mean = data_train.mean()
data_std = data_train.std()
data_train = (data_train - data_mean) / data_std # 数据标准化
x_train = data_train[feature].as_matrix() # 特征数据
y_train = data_train['y'].as_matrix() # 标签数据
return data_mean,data_std,x_train,y_train
def Build_Net(data,data_mean,data_std,x_train,y_train,feature):
from keras.models import Sequential
from keras.layers.core import Dense, Activation
model = Sequential() # 建立模型
model.add(Dense(input_dim=6, output_dim=12))
model.add(Activation('relu')) # 用relu函数作为激活函数,能够大幅提供准确度
model.add(Dense(input_dim=12, output_dim=1))
model.compile(loss='mean_squared_error', optimizer='adam') # 编译模型
model.fit(x_train, y_train, nb_epoch=10000, batch_size=16) # 训练模型,学习一万次
#model.save_weights(modelfile) # 保存模型参数
# 预测,并还原结果。
x = ((data[feature] - data_mean[feature]) / data_std[feature]).as_matrix()
data[u'y_pred'] = model.predict(x) * data_std['y'] + data_mean['y']
print(data[u'y_pred'])
return data
#data.to_excel(outputfile)
def Draw_result(data):
import matplotlib.pyplot as plt # 画出预测结果图
p = data[['y', 'y_pred']].plot(subplots=True, style=['b-o', 'r-*'])
plt.show()
def main():
inputfile = 'F:/Python/IDLE--python/BigDataAnalyze/chapter13-data/data1_GM11.xls' # 灰色预测后保存的路径
outputfile = '../data/revenue.xls' # 神经网络预测后保存的结果
modelfile = '../tmp/1-net.model' # 模型保存路径
feature = ['x1', 'x2', 'x3', 'x4', 'x5', 'x7'] # 特征所在列
data = pd.read_excel(inputfile) # 读取数据
#数据处理,标准化,训练数据集
data_mean, data_std, x_train, y_train = Data_pro(feature,data)
#预测模型建立
data = Build_Net(data,data_mean,data_std,x_train,y_train,feature)
#画出预测结果图
Draw_result(data)
if __name__ == '__main__':
main()