通过CNN卷积神经网络对食物图片进行分类
训练集与验证集中图片格式为 ‘[类别]_[编号].jpg’
#Import 需要的套件
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import time
#Read image 利用 OpenCV (cv2) 讀入照片並存放在 numpy array 中
#label是一个布尔变量,代表需不需要回传y值
def readfile(path, label):
# label 是一個 boolean variable,代表需不需要回傳 y 值
image_dir = sorted(os.listdir(path))
# 先创建数组
# 图像大小为128X128,有RGB三个通道
x = np.zeros((len(image_dir), 128, 128, 3), dtype=np.uint8)
#y是标签
y = np.zeros((len(image_dir)), dtype=np.uint8)
for i, file in enumerate(image_dir):
img = cv2.imread(os.path.join(path, file))
#更改尺寸到128*128
x[i, :, :] = cv2.resize(img,(128, 128))
#将label读入y中
if label:
# 训练集图像命名方式为"类别该类第几张图片.jpg
# 这里是从图片名称中取出其类别
y[i] = int(file.split("")[0])
if label:
return x, y
else:
return x
调用函数读取
#分別將 training set、validation set、testing set 用 readfile 函式讀進來
workspace_dir = ‘./food-11’
print(“Reading data”)
train_x, train_y = readfile(os.path.join(workspace_dir, “training”), True)
print(“Size of training data = {}”.format(len(train_x)))
val_x, val_y = readfile(os.path.join(workspace_dir, “validation”), True)
print(“Size of validation data = {}”.format(len(val_x)))
test_x = readfile(os.path.join(workspace_dir, “testing”), False)
print(“Size of Testing data = {}”.format(len(test_x)))
Out:
Reading data
Size of training data = 9866
Size of validation data = 3430
Size of Testing data = 3347
Dataset
在 Pytorch 中,我們可以利用 torch.utils.data 的 Dataset 及 DataLoader 來"包裝" data,使後續的 training 及 testing 更為方便。
Dataset 需要 overload 兩個函數:len 及 getitem
len 必須要回傳 dataset 的大小,而 getitem 則定義了當程式利用 [ ] 取值時,dataset 應該要怎麼回傳資料。
實際上我們並不會直接使用到這兩個函數,但是使用 DataLoader 在 enumerate Dataset 時會使用到,沒有實做的話會在程式運行階段出現 error。
这里还对图片进行了数据增强。transforms表示对图片的预处理方式。、
#training 时,通过随机旋转、水平翻转图片来进行数据增强(data augmentation)
train_transform = transforms.Compose([
transforms.ToPILImage(),
transforms.RandomHorizontalFlip(), #隨機將圖片水平翻轉
transforms.RandomRotation(15), #隨機旋轉圖片
transforms.ToTensor(), #將圖片轉成 Tensor,並把數值normalize到[0,1](data normalization)
])
#testing 時不需做 data augmentation
test_transform = transforms.Compose([
transforms.ToPILImage(),
transforms.ToTensor(),
])
class ImgDataset(Dataset):
def init(self, x, y=None, transform=None):
self.x = x
# label 需要是 LongTensor 型
self.y = y
if y is not None:
self.y = torch.LongTensor(y)
self.transform = transform
def len(self):
return len(self.x)
def getitem(self, index):
X = self.x[index]
if self.transform is not None:
X = self.transform(X)
if self.y is not None:
Y = self.y[index]
return X, Y
else:
return X
batch_size = 128
train_set = ImgDataset(train_x, train_y, train_transform)
val_set = ImgDataset(val_x, val_y, test_transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
定义模型
先是一个卷积神经网络,再是一个全连接的前向传播神经网络。
卷积神经网络的一级卷积层由卷积层cov+批标准化batchnorm+激活函数ReLU+最大池化MaxPool构成。
class Classifier(nn.Module):
def init(self):
super(Classifier, self).init()
#torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
#torch.nn.MaxPool2d(kernel_size, stride, padding)
#input 維度 [3, 128, 128]
self.cnn = nn.Sequential(
nn.Conv2d(3, 64, 3, 1, 1),
#彩色图片为红绿蓝三个通道,有64个不同的核,不同的核可以提取不一样的特征,一个核输出一个通道,所以做完一次卷积之后有64个通道,核大小3*3,步长 1,周边填充0的长度为1
[64, 128, 128]
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [64, 64, 64]
#做完一次卷积之后,图片大小为128128,通道为64(输出通道数都是可以设置的);池化之后图片大小为6464
nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [128, 32, 32]
#做完二次卷积之后,图片大小为6464,通道为128;池化之后图片大小为3232
nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [256, 16, 16]
#做完三卷积之后,图片大小为3232,通道为256;池化之后图片大小为1616
nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [512, 8, 8]
#做完四卷积之后,图片大小为1616,通道为512;池化之后图片大小为88
nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [512, 4, 4]
)
#做完五卷积之后,图片大小为88,通道为512;池化之后图片大小为44
self.fc = nn.Sequential(
nn.Linear(51244, 1024),
nn.ReLU(),
nn.Linear(1024, 512),
nn.ReLU(),
nn.Linear(512, 11)
)
#四次卷积之后图片大小为44,通道为512,flatten之后为512 4 * 4,送入全链接层。
#最后是11个分类
def forward(self, x):
out = self.cnn(x)
out = out.view(out.size()[0], -1)# 摊平成1维
return self.fc(out)
在train set 上训练,参考val set上的结果调参
train set和val set中的数据都有标签,我们先在train set上训练模型,并对比模型在train set和val set上预测的正确率。
如果模型在val set上的正确率不高,说明模型的泛化性能不好。
需要调整cnn的参数
使用训练集training set进行训练,并使用验证集validation set来选择最好的参数。
如果遇到out of memory的报错,应该调小上面的batch_size = 128。
model = Classifier().cuda() #用cuda加速
loss = nn.CrossEntropyLoss() # 因為是 classification task,所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 30 #迭代次数
for epoch in range(num_epoch):
epoch_start_time = time.time()
train_acc = 0.0
train_loss = 0.0
val_acc = 0.0
val_loss = 0.0
model.train() # 確保 model 是在 train model (開啟 Dropout 等...)
for i, data in enumerate(train_loader):
optimizer.zero_grad() # 用 optimizer 將 model 參數的 gradient 歸零
train_pred = model(data[0].cuda()) # 利用 model 得到預測的機率分佈 這邊實際上就是去呼叫 model 的 forward 函數
batch_loss = loss(train_pred, data[1].cuda()) # 計算 loss (注意 prediction 跟 label 必須同時在 CPU 或是 GPU 上)
batch_loss.backward() # 利用 back propagation 算出每個參數的 gradient
optimizer.step() # 以 optimizer 用 gradient 更新參數值
train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
train_loss += batch_loss.item()
#验证集val
model.eval()
with torch.no_grad():
for i, data in enumerate(val_loader):
val_pred = model(data[0].cuda())
batch_loss = loss(val_pred, data[1].cuda())
val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
val_loss += batch_loss.item()
#將結果 print 出來
print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
(epoch + 1, num_epoch, time.time()-epoch_start_time, \
train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(),val_loss/val_set.__len__()))
Out:
[001/030] 30.54 sec(s) Train Acc: 0.219542 Loss: 0.018593 | Val Acc: 0.223032 loss: 0.017266
[002/030] 30.32 sec(s) Train Acc: 0.323333 Loss: 0.015177 | Val Acc: 0.309621 loss: 0.015568
[003/030] 30.32 sec(s) Train Acc: 0.387188 Loss: 0.013808 | Val Acc: 0.377843 loss: 0.014594
[004/030] 30.36 sec(s) Train Acc: 0.443037 Loss: 0.012711 | Val Acc: 0.431778 loss: 0.012918
[005/030] 30.42 sec(s) Train Acc: 0.472431 Loss: 0.012017 | Val Acc: 0.211953 loss: 0.022984
[006/030] 30.44 sec(s) Train Acc: 0.502331 Loss: 0.011441 | Val Acc: 0.432945 loss: 0.013718
[007/030] 30.51 sec(s) Train Acc: 0.519562 Loss: 0.010774 | Val Acc: 0.358892 loss: 0.016559
[008/030] 30.51 sec(s) Train Acc: 0.550172 Loss: 0.010292 | Val Acc: 0.449271 loss: 0.013144
[009/030] 30.52 sec(s) Train Acc: 0.577336 Loss: 0.009582 | Val Acc: 0.427697 loss: 0.013619
[010/030] 30.57 sec(s) Train Acc: 0.597405 Loss: 0.009059 | Val Acc: 0.484548 loss: 0.012584
[011/030] 30.60 sec(s) Train Acc: 0.621224 Loss: 0.008519 | Val Acc: 0.505831 loss: 0.012606
[012/030] 30.59 sec(s) Train Acc: 0.647983 Loss: 0.007928 | Val Acc: 0.262974 loss: 0.028745
[013/030] 30.61 sec(s) Train Acc: 0.654875 Loss: 0.007988 | Val Acc: 0.560350 loss: 0.010429
[014/030] 30.62 sec(s) Train Acc: 0.667241 Loss: 0.007576 | Val Acc: 0.466472 loss: 0.013659
[015/030] 30.67 sec(s) Train Acc: 0.692581 Loss: 0.006917 | Val Acc: 0.568222 loss: 0.010538
[016/030] 30.69 sec(s) Train Acc: 0.701196 Loss: 0.006740 | Val Acc: 0.468805 loss: 0.014866
[017/030] 30.70 sec(s) Train Acc: 0.717920 Loss: 0.006458 | Val Acc: 0.495044 loss: 0.014071
[018/030] 30.70 sec(s) Train Acc: 0.733732 Loss: 0.006045 | Val Acc: 0.610787 loss: 0.009511
[019/030] 30.66 sec(s) Train Acc: 0.742347 Loss: 0.005843 | Val Acc: 0.611370 loss: 0.009757
[020/030] 30.65 sec(s) Train Acc: 0.741942 Loss: 0.005834 | Val Acc: 0.619534 loss: 0.009403
[021/030] 30.76 sec(s) Train Acc: 0.757653 Loss: 0.005436 | Val Acc: 0.648980 loss: 0.009385
[022/030] 30.75 sec(s) Train Acc: 0.777721 Loss: 0.005044 | Val Acc: 0.602915 loss: 0.010735
[023/030] 30.70 sec(s) Train Acc: 0.787046 Loss: 0.004846 | Val Acc: 0.633236 loss: 0.009841
[024/030] 30.67 sec(s) Train Acc: 0.785830 Loss: 0.004880 | Val Acc: 0.518950 loss: 0.014643
[025/030] 30.65 sec(s) Train Acc: 0.792621 Loss: 0.004609 | Val Acc: 0.644898 loss: 0.009813
[026/030] 30.71 sec(s) Train Acc: 0.816136 Loss: 0.004233 | Val Acc: 0.616327 loss: 0.011062
[027/030] 30.71 sec(s) Train Acc: 0.826373 Loss: 0.003974 | Val Acc: 0.593878 loss: 0.012730
[028/030] 30.66 sec(s) Train Acc: 0.819785 Loss: 0.004078 | Val Acc: 0.647813 loss: 0.009875
[029/030] 30.71 sec(s) Train Acc: 0.847456 Loss: 0.003512 | Val Acc: 0.596501 loss: 0.013376
[030/030] 30.64 sec(s) Train Acc: 0.849078 Loss: 0.003442 | Val Acc: 0.613703 loss: 0.010566
得到好的参数后,我们使用training set和validation set共同训练(数据量变多,模型效果更好)
在总的训练集上训练
在确保val set,train set上正确率都不错后
将两个训练集合为一个总的训练集
这里优化的就是权重w了
train_val_x = np.concatenate((train_x, val_x), axis=0) # 将train_x和val_x拼接起来
train_val_y = np.concatenate((train_y, val_y), axis=0)# 将train_y和val_y拼接起来
train_val_set = ImgDataset(train_val_x, train_val_y, train_transform)
train_val_loader = DataLoader(train_val_set, batch_size=batch_size, shuffle=True)
model_best = Classifier().cuda()# cuda加速
loss = nn.CrossEntropyLoss() # 因為是 classification task,所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model_best.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 30
for epoch in range(num_epoch):
epoch_start_time = time.time()
train_acc = 0.0
train_loss = 0.0
model_best.train()
for i, data in enumerate(train_val_loader):
optimizer.zero_grad()
train_pred = model_best(data[0].cuda())
batch_loss = loss(train_pred, data[1].cuda())
batch_loss.backward()
optimizer.step()
train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
train_loss += batch_loss.item()
#將結果 print 出來
print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f' % \
(epoch + 1, num_epoch, time.time()-epoch_start_time, \
train_acc/train_val_set.__len__(), train_loss/train_val_set.__len__()))
Testing
利用剛剛 train 好的 model 進行 prediction
Pytorch中的 model.train() 和 model.eval() 模式
model.train() :启用 BatchNormalization 和 Dropout
model.eval() :不启用 BatchNormalization 和 Dropout
test_set = ImgDataset(test_x, transform=test_transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
model_best.eval()
prediction = []
with torch.no_grad():
for i, data in enumerate(test_loader):
test_pred = model_best(data.cuda())
test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
for y in test_label:
prediction.append(y)
#將結果寫入 csv 檔
with open(“predict.csv”, ‘w’) as f:
f.write(‘Id,Category\n’)
for i, y in enumerate(prediction):
f.write(’{},{}\n’.format(i, y))