# (二)使用GBDT预测新能源汽车充电桩的故障检测问题(55分)
# 请你用训练数据构建相应的模型,并将模型进行保存
import matplotlib.pyplot as plt
# 正常显示中文及字符
plt.rcParams['font.family'] = 'SimHei'
plt.rcParams['axes.unicode_minus'] = False
import pandas
# 过滤可忽略的警告
import warnings
warnings.filterwarnings('ignore')
# 加载数据集
test_data = pandas.read_csv(r'data_test.csv', delimiter=',')
train_data = pandas.read_csv(r'data_train.csv', delimiter=',')
# 读取训练集的csv文件,将次数据作为模型训练和验证的数据集
xtrain_x = train_data.iloc[:, :-1]
xtrain_y = train_data.iloc[:, -1:]
# 读取测试集csv文件,将此数据作为模型的测试集
test_x = xtrain_x
test_y = xtrain_y
# 导入我们scikit-learn的GBDT算法,对数据进行拟合,输出模型在测试集上的得分
from sklearn.ensemble import GradientBoostingClassifier
model_Gra = GradientBoostingClassifier()
model_Gra.fit(xtrain_x, xtrain_y)
import pandas as pd
import pickle
# save model to file
# 写模式用于保存模型
pickle.dump(model_Gra, open("tree_model.pkl", "wb"))
# load model from file
# 读模式用于加载模型
loaded_model = pickle.load(open("tree_model.pkl", "rb"))
# make predictions for test data
# 6.练GBDT模型
loaded_model.fit(test_x, test_y)
# 预测训练集
h = loaded_model.predict(xtrain_x)
print('预测训练集:', h)
# 预测测试集
h_test = loaded_model.predict(test_x)
print('测试集预测:', h_test)
# 训练集精度
score = loaded_model.score(xtrain_x, xtrain_y)
print('训练集精度:', score)
# 测试集精度
score = loaded_model.score(test_x, test_y)
print('测试集精度:', score)
# 保存预测结果至csv文件
train_predict = {"train": h}
predict_D = pd.DataFrame(train_predict)
predict_D.to_csv(r"train_predict.csv", index=False)
test_predict = {"test": h_test}
predict_E = pd.DataFrame(test_predict)
predict_E.to_csv(r"test_predict.csv", index=False)