Resnet训练及结果分析

BaseLine类


import torch
import torch.nn as nn
import torch.nn.functional as F

class block(nn.Module):
    def __init__(self, inplane, plane, stride=1):
        super(block, self).__init__()
        self.conv1 = nn.Conv2d(inplane, plane, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(plane)
        self.conv2 = nn.Conv2d(plane, plane, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(plane)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        return out

class BaseLine(nn.Module):
    def __init__(self, block, layers, num_classes=10):
        super(BaseLine, self).__init__()
        self.inplane = 16
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(16)
        self.layer1 = self._make_layer(block, 16, layers[0], 1)
        self.layer2 = self._make_layer(block, 32, layers[1], 2)
        self.layer3 = self._make_layer(block, 64, layers[2], 2)
        self.GAP = nn.AdaptiveAvgPool2d((1, 1))
        self.linear = nn.Linear(64, num_classes)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.GAP(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

    def _make_layer(self, block, plane, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.inplane, plane, stride))
            self.inplane = plane
        return nn.Sequential(*layers)

if __name__ == "__main__":
    x = torch.ones((2, 3, 32, 32))
    model = BaseLine(block, [3, 3, 3])
    y = model(x)
    print(y)

训练过程代码


from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import torch
from resnet import *
from baseline import *
import pickle

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
transform = transforms.Compose([transforms.RandomHorizontalFlip(),
                                transforms.RandomCrop(32, 4),
                                transforms.ToTensor(),
                                normalize])

train_data = datasets.CIFAR10("./data", train=True, transform=transform)
train_loader = DataLoader(train_data, batch_size=128, shuffle=True)

val_data = datasets.CIFAR10(root='./data', train=False, transform=transforms.Compose([
    transforms.ToTensor(),
    normalize,
]), download=True)
val_loader = DataLoader(val_data, batch_size=128, shuffle=True)

def test(model, test_loader):
    correct = 0
    size = 0
    model.train(mode=False)
    for i, (input, target) in enumerate(test_loader):
        input, target = input.to(device), target.to(device)
        output = model(input)
        correct += torch.sum(torch.argmax(output, dim=1) == target)
        size += len(input)
    print("precision:{}%".format(correct / size * 100))
    return (correct / size * 100).item()

if __name__ == "__main__":
    losses = []
    precisions=[]
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # model = Resnet(BasicBlock, [4, 4, 4])
    model = BaseLine(block,[9,9,9])
    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0001)
    for eopch in range(100):
        model.train(mode=True)
        for i, (input, target) in enumerate(train_loader):
            input, target = input.to(device), target.to(device)
            output = model(input)
            optimizer.zero_grad()
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            losses.append(loss.item())
            if (i % 30 == 0):
                print("The {}th epoch,the {}th batch,loss:{}".format(eopch, i, loss.item()))
                losses.append(loss.item())
        precisions.append(test(model,val_loader))
    with open("./lossesB9.pkl","wb") as f:
        pickle.dump(losses,f)
    with open("./precisionB9.pkl","wb") as f:
        pickle.dump(precisions,f)

BaseLine32与BaseLine56的准确率对比

Resnet训练及结果分析

随着网络的加深,普通神经网络出现了明显的退化现象

Resnet56与BaseLine56的准确率对比

Resnet训练及结果分析 Resnet可以很好的解决网络退化现象

评论区 0