图片分类器-解决过拟合问题

通过简单的模型实现分类器后, 出现了过拟合问题, 接下来实验一些方法来解决。

加载数据、配置设备

1
2
3
import torch
import torchvision
import torchvision.transforms as transforms
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from enum import Flag
from logging import root
from random import shuffle
from turtle import down


transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))]
)

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)

classes = ('plain', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
1
2
3
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(device)
cuda:0

构建神经网络模型

依然沿用之前的模型结构, 并加入Dropout层。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from turtle import forward
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
def __init__(self) -> None:
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)

def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2_drop(self.conv2(x))))
x = x.view(-1, 16*5*5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.dropout(x, training=self.training)
x = self.fc3(x)
return x

net = Net()
net = net.to(device)

定义损失函数和优化器

优化器换用 Adam, 应用权重正则化, 添加学习率选择策略。

1
2
3
4
5
6
7
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr = 1e-3, weight_decay=1e-5)
scheduler = lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.1)
# optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

初始学习率过大会严重影响最初的学习效果。 lr = 1e-2时前几轮的准确率只有0.1左右。

训练和验证

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from imghdr import tests
from msilib import datasizemask


dataloaders = {
'train': trainloader,
'valid': testloader

}
dataset_sizes = {
'train' : trainset.data.shape[0],
'valid': testset.data.shape[0]
}
for epoch in range(40):

print('Epoch {}/{}'.format(epoch, 19))
print('-'*10)

for phase in ['train', 'valid']:
if phase == 'train':
net.train(True)
else:
net.train(False)

running_loss = 0.0
running_corrects = 0

for data in dataloaders[phase]:
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()

outputs = net(inputs)
loss = criterion(outputs, labels)
_, preds = torch.max(outputs.data, 1)

if phase == 'train':
loss.backward()
optimizer.step()

running_loss += loss.item()
running_corrects += torch.sum(preds == labels.data)

epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects / dataset_sizes[phase]

print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
scheduler.step()

虽然训练速度变慢, 但过拟合情况得到很好的改善。