Pytorch训练图像分类网络

1,390 阅读5分钟

1 数据来源是什么?

一般图像领域分类问题公共数据集有ImagetNet,CIFRA10,CIFRA100,MINIST数据集,这里简单介绍一下

  • ImageNet  Imagenet数据集是目前深度学习图像领域应用得非常多的一个领域,关于图像分类、定位、检测等研究工作大多基于此数据集展开。而且文档详细,有专门的团队维护,使用非常方便,在计算机视觉领域研究论文中应用非常广,几乎成为了目前深度学习图像领域算法性能检验的“标准”数据集。Imagenet数据集有1400多万幅图片,涵盖2万多个类别。
  • CIFRA10 该数据集共有60000张彩色图像,这些图像是32*32,分为10个类,每类6000张图。

  • CIFRA100 此数据集与CIFAR-10类似,不同之处在于它有100个类,每个类包含600个图像。每类分为500个训练图像和100个测试图像。其中100个类分为20个大类。每个图像都带有一个“精细”标签(它所属的类)和一个“粗略”标签(它所属的大类)。
  • MNIST MNIST 数据集来自美国国家标准与技术研究所, National Institute of Standards and Technology (NIST). 训练集 (training set) 由来自 250 个不同人手写的数字构成, 其中 50% 是高中学生, 50% 来自人口普查局 (the Census Bureau) 的工作人员. 测试集(test set) 也是同样比例的手写数字数据.


这里我们采用CIFRA10数据集

2 怎么导入数据

方法一

使用opencv,pillow将图像导入成numpy的array,再转tensor

方法二

使用tourch的包torchvision

3 训练步骤

  1. 使用torchvision导入数据

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))  #mean  std是一种计算,计算方法是data - mean / std
    ])
    
    trainset = torchvision.datasets.CIFAR10(
        root='./data',train=True,download=True,transform=transform
    )
    
    #trainLoader对trainSet进行操作的工具,shuffle操作代表每次取出batch以后重新洗牌
    trainLoader = torch.utils.data.DataLoader(
        trainset,batch_size=4,shuffle=True,num_workers=0
    )
    
    testSet = torchvision.datasets.CIFAR10(
        root="./data",train=False,download=True,transform=transform
    )
    
    testLoader = torch.utils.data.DataLoader(testSet,batch_size=4,shuffle=False,num_workers=0)
    
    classes =  ('plane', 'car', 'bird', 'cat',
               'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
    

  2. 定义一个神经网络

    class Net(nn.Module):
        def __init__(self):
            super(Net,self).__init__()
            self.conv1 = nn.Conv2d(3,6,5)
            self.pool = nn.MaxPool2d(2,2)
            self.conv2 = nn.Conv2d(6,16,5)
            self.fc1 = nn.Linear(16*5*5,120)
            self.fc2 = nn.Linear(120,84)
            self.fc3 = nn.Linear(84,10)
    
        def forward(self,x):
            x = self.pool(F.relu(self.conv1(x)))
            x = self.pool(F.relu(self.conv2(x)))
            x = x.view(-1,16*5*5)
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = self.fc3(x)
            return x
    

  3. 定义损失函数和优化器

    ##定义损失函数和优化器
    net = Net()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(),lr=0.001,momentum=0.9)
    

  4. 训练神经网络

    for epoch in range(2):
        runningLoss = 0.0
        for i,data in enumerate(trainLoader,0): #enumerate 给可遍历对象赋予索引
            inputs,labels = data
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs,labels)
            loss.backward()
            optimizer.step()
    
            runningLoss += loss.item()
            if i%2000 == 1999:
                print('[%d , %d] loss: %0.3f' % (epoch+1,i+1,runningLoss/2000))
                runningLoss = 0.0
    
    print("train finished")
    

  5. 测试训练结果

# 对一个batch进行测试****************************************************************************************************************
# 这段代码用于展示图片
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1,2,0)))
    plt.show()

# 使用迭代器获取图片
dataiter = iter(trainLoader)
images, labels = dataiter.next()

# 因为得到的是一个batch的图片张量,所以拼成一个图片
imshow(torchvision.utils.make_grid(images))

# %.ns代表打印__str__之后的结果,结果截取n位
print(' '.join('%.5s' % classes[labels[j]] for j in range(4)))
net = Net()
net.load_state_dict(torch.load(PATH))
outputs = net(images)
_, predicted = torch.max(outputs, 1)
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]
                              for j in range(4)))
# 对于所有的测试集进行测试******************************************************************************************************
correct = 0
total = 0
with torch.no_grad():
    for data in testLoader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
        100 * correct / total))



## 测试每个类分别的正确率********************************************************************************************
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testLoader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

4 如何放在GPU上进行计算

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

#将神经网络放在device上
net.to(device)
#将数据放在device上
inputs,labels = data[0].to(device),data[1].to(device)

5 完整代码

#coding=utf-8
import torch
import torchvision
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np


#导入数据***************************************************************************************************
#变换的组合,这里面是转成tensor 同时从(0,255)转为(0,1),然后归一化到(-1,1)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))  #mean  std是一种计算,计算方法是data - mean / std
])

trainset = torchvision.datasets.CIFAR10(
    root='./data',train=True,download=True,transform=transform
)

#trainLoader对trainSet进行操作的工具,shuffle操作代表每次取出batch以后重新洗牌
trainLoader = torch.utils.data.DataLoader(
    trainset,batch_size=4,shuffle=True,num_workers=0
)

testSet = torchvision.datasets.CIFAR10(
    root="./data",train=False,download=True,transform=transform
)

testLoader = torch.utils.data.DataLoader(testSet,batch_size=4,shuffle=False,num_workers=0)

classes =  ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


#定义网络 ******************************************************************************************************
#Conv2d的参数 [input_channels_number,output_channedls_output,height,width]
#nn.Linear参数 [input_features,output_features]
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.conv1 = nn.Conv2d(3,6,5)
        self.pool = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(6,16,5)
        self.fc1 = nn.Linear(16*5*5,120)
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)

    def forward(self,x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1,16*5*5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


##定义损失函数和优化器
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(),lr=0.001,momentum=0.9)


#训练网络 ******************************************************************************************
for epoch in range(2):
    runningLoss = 0.0
    for i,data in enumerate(trainLoader,0): #enumerate 给可遍历对象赋予索引
        inputs,labels = data
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs,labels)
        loss.backward()
        optimizer.step()

        runningLoss += loss.item()
        if i%2000 == 1999:
            print('[%d , %d] loss: %0.3f' % (epoch+1,i+1,runningLoss/2000))
            runningLoss = 0.0

print("train finished")

# 存输模型***********************************************************************************************************************
PATH = './cifar_net.pth'
torch.save(net.state_dict(),PATH)

# 对一个batch进行测试****************************************************************************************************************
# 这段代码用于展示图片
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1,2,0)))
    plt.show()

# 使用迭代器获取图片
dataiter = iter(trainLoader)
images, labels = dataiter.next()

# 因为得到的是一个batch的图片张量,所以拼成一个图片
imshow(torchvision.utils.make_grid(images))

# %.ns代表打印__str__之后的结果,结果截取n位
print(' '.join('%.5s' % classes[labels[j]] for j in range(4)))
net = Net()
net.load_state_dict(torch.load(PATH))
outputs = net(images)
_, predicted = torch.max(outputs, 1)
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]
                              for j in range(4)))
# 对于所有的测试集进行测试******************************************************************************************************
correct = 0
total = 0
with torch.no_grad():
    for data in testLoader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
        100 * correct / total))



## 测试每个类分别的正确率********************************************************************************************
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testLoader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))