1. 数据预处理 处理多特征样本
import pandas as pd
data = pd.read_csv('./....csv')
# 划分数据 iloc[] 第一个参数 行 第二个参数 列
X = data.iloc[:, :-1]
# 标签为-1/1, 讲-1 替换为 0
Y = data.iloc[:, -1].replace(-1, 0)
# numpy 转换为 tensor
X = torch.form_numpy(X.values)).float()
Y = torch.from_numpy(Y.values.reshape(-1, 1)).float()
2. 创建模型
# nn.Sequential 连接多层
model = nn.Sequential(
nn.Linear(15, 1)
nn.Sigmoid() # 激活
)
loss_fun = nn.BCELoss() # 二元交叉熵损失
opt = torch.optim.Adam(model.parameters(), lr = 0.0001) # 优化器
3. Train
# batch 批次训练
batches = 16
num_of_batch = 653//16
epoches = 1000
for epoch in range(epoches):
for batch in range(num_of_batch):
start = batches * batch
end = start + batches
x = X[start:end]
y = Y[start:end]
y_pred = model(x)
loss = loss_fun(y_pred, y)
opt.zerp_grad()
loss.backward()
opt.step()
4. 测试
(model(X).data.numpy > 0.5).astype('int')