本文为365天深度学习训练营中的学习记录博客● 原作者K同学啊学习目标1.保存最佳模型权重2.调用官方VGG-16网络框架一、前期准备1.设置GPUimport torch import torch.nn as nn import torchvision.transforms as transforms import torchvision from torchvision import transforms, datasets import os,PIL,pathlib,warnings warnings.filterwarnings(ignore) #忽略警告信息 device torch.device(cuda if torch.cuda.is_available() else cpu) device2.导入数据import os,PIL,random,pathlib data_dir ./48-data/ data_dir pathlib.Path(data_dir) data_paths list(data_dir.glob(*)) classeNames [str(path).split(\\)[1] for path in data_paths] classeNamestrain_transforms transforms.Compose([ transforms.Resize([224, 224]), # 将输入图片resize成统一尺寸 transforms.RandomHorizontalFlip(), # 随机水平翻转 transforms.ToTensor(), # 将PIL Image或numpy.ndarray转换为tensor并归一化到[0,1]之间 transforms.Normalize( # 标准化处理--转换为标准正太分布高斯分布使模型更容易收敛 mean[0.485, 0.456, 0.406], std[0.229, 0.224, 0.225]) # 其中 mean[0.485,0.456,0.406]与std[0.229,0.224,0.225] 从数据集中随机抽样计算得到的。 ]) total_data datasets.ImageFolder(./48-data/,transformtrain_transforms) total_datatotal_data.class_to_idx3.划分数据集train_size int(0.8 * len(total_data)) test_size len(total_data) - train_size train_dataset, test_dataset torch.utils.data.random_split(total_data, [train_size, test_size]) train_dataset, test_datasetbatch_size 32 train_dl torch.utils.data.DataLoader(train_dataset, batch_sizebatch_size, shuffleTrue, num_workers1) test_dl torch.utils.data.DataLoader(test_dataset, batch_sizebatch_size, shuffleTrue, num_workers1)for X, y in test_dl: print(Shape of X [N, C, H, W]: , X.shape) print(Shape of y: , y.shape, y.dtype) break二、调用官方的VGG-16模型VGG-16是由牛津大学几何组提出的深度卷积神经网络架构用于图像分类和对象识别任务。VGG-16在ImageNet图像识别竞赛中取得了很好的成绩展示了其在大规模识别任务中的有效性。VGG-16的主要特点1.深度VGG-16由13个卷积层和3个全连接层组成因此具有相对较深的网络结构这种深度有助于网络学习的更加抽象和复杂的特征。2.卷积层的设计VGG-16的卷积层全部采用3×3卷积核和步长为1的卷积操作同时在卷积层之后都接有ReLU激活函数ReLU(x)max(0,x)。这种设计的好处在于堆叠多个较小的卷积核可以提高网络的非线性建模能力同时减少了参数数量从而降低了过拟合的风险。3.池化层在卷积核之后VGG-16使用最大化池化层来减少特征图的空间尺寸帮助提取更加显著的特征并减少计算量。4.全连接层VGG-16在卷积层之后接有3个全连接层最后一个全连接层输出与类别相对应的向量用于进行分类。VGG-16的网络结构13个卷积层3个全连接层5个池化层from torchvision.models import vgg16 device cuda if torch.cuda.is_available() else cpu print(Using {} device.format(device)) # 加载预训练模型并且对模型进行微调 model vgg16(pretrained True).to(device) # 加载预训练的vgg16模型 for param in model.parameters(): param.requires_grad False # 冻结模型的参数这样子在训练的时候只训练最后一层的参数 # 修改classifier模块的第6层即(6): Linear(in_features4096, out_features2, biasTrue) # 注意查看我们下方打印出来的模型 model.classifier._modules[6] nn.Linear(4096,len(classeNames)) # 修改vgg16模型中最后一层全连接层输出目标类别个数 model.to(device) model三、训练模型1.编写训练函数# 训练循环 def train(dataloader, model, loss_fn, optimizer): size len(dataloader.dataset) # 训练集的大小 num_batches len(dataloader) # 批次数目, (size/batch_size向上取整) train_loss, train_acc 0, 0 # 初始化训练损失和正确率 for X, y in dataloader: # 获取图片及其标签 X, y X.to(device), y.to(device) # 计算预测误差 pred model(X) # 网络输出 loss loss_fn(pred, y) # 计算网络输出和真实值之间的差距targets为真实值计算二者差值即为损失 # 反向传播 optimizer.zero_grad() # grad属性归零 loss.backward() # 反向传播 optimizer.step() # 每一步自动更新 # 记录acc与loss train_acc (pred.argmax(1) y).type(torch.float).sum().item() train_loss loss.item() train_acc / size train_loss / num_batches return train_acc, train_loss2.编写测试函数def test (dataloader, model, loss_fn): size len(dataloader.dataset) # 测试集的大小 num_batches len(dataloader) # 批次数目, (size/batch_size向上取整) test_loss, test_acc 0, 0 # 当不进行训练时停止梯度更新节省计算内存消耗 with torch.no_grad(): for imgs, target in dataloader: imgs, target imgs.to(device), target.to(device) # 计算loss target_pred model(imgs) loss loss_fn(target_pred, target) test_loss loss.item() test_acc (target_pred.argmax(1) target).type(torch.float).sum().item() test_acc / size test_loss / num_batches return test_acc, test_loss3.设置动态学习率def adjust_learning_rate(optimizer, epoch, start_lr): # 每 2 个epoch衰减到原来的 0.98 lr start_lr * (0.92 ** (epoch // 2)) for param_group in optimizer.param_groups: param_group[lr] lr learn_rate 1e-4 # 初始学习率 optimizer torch.optim.SGD(model.parameters(), lrlearn_rate)4.正式训练并保存模型import copy loss_fn nn.CrossEntropyLoss() # 创建损失函数 epochs 40 train_loss [] train_acc [] test_loss [] test_acc [] best_acc 0 # 设置一个最佳准确率作为最佳模型的判别指标 for epoch in range(epochs): # 更新学习率使用自定义学习率时使用 adjust_learning_rate(optimizer, epoch, learn_rate) model.train() epoch_train_acc, epoch_train_loss train(train_dl, model, loss_fn, optimizer) #scheduler.step() # 更新学习率调用官方动态学习率接口时使用 model.eval() epoch_test_acc, epoch_test_loss test(test_dl, model, loss_fn) # 保存最佳模型到 best_model if epoch_test_acc best_acc: best_acc epoch_test_acc best_model copy.deepcopy(model) train_acc.append(epoch_train_acc) train_loss.append(epoch_train_loss) test_acc.append(epoch_test_acc) test_loss.append(epoch_test_loss) # 获取当前的学习率 lr optimizer.state_dict()[param_groups][0][lr] template (Epoch:{:2d}, Train_acc:{:.1f}%, Train_loss:{:.3f}, Test_acc:{:.1f}%, Test_loss:{:.3f}, Lr:{:.2E}) print(template.format(epoch1, epoch_train_acc*100, epoch_train_loss, epoch_test_acc*100, epoch_test_loss, lr)) # 保存最佳模型到文件中 PATH ./best_model.pth # 保存的参数文件名 torch.save(best_model.state_dict(), PATH) print(Done)四、结果可视化import matplotlib.pyplot as plt #隐藏警告 import warnings warnings.filterwarnings(ignore) #忽略警告信息 plt.rcParams[font.sans-serif] [SimHei] # 用来正常显示中文标签 plt.rcParams[axes.unicode_minus] False # 用来正常显示负号 plt.rcParams[figure.dpi] 100 #分辨率 from datetime import datetime current_time datetime.now() # 获取当前时间 epochs_range range(epochs) plt.figure(figsize(12, 3)) plt.subplot(1, 2, 1) plt.plot(epochs_range, train_acc, labelTraining Accuracy) plt.plot(epochs_range, test_acc, labelTest Accuracy) plt.legend(loclower right) plt.title(Training and Validation Accuracy) plt.xlabel(current_time) plt.subplot(1, 2, 2) plt.plot(epochs_range, train_loss, labelTraining Loss) plt.plot(epochs_range, test_loss, labelTest Loss) plt.legend(locupper right) plt.title(Training and Validation Loss) plt.show()五、手动搭建VGG-16网络框架import torch.nn.functional as F class Model(nn.Module): def __init__(self): super(Model, self).__init__() self.conv1nn.Sequential( nn.Conv2d(3, 64, kernel_size3, padding1), # 224*224*64 nn.ReLU(inplaceTrue)) self.conv2nn.Sequential( nn.Conv2d(64, 64, kernel_size3, padding1), # 224*224*64 nn.ReLU(inplaceTrue)) self.pool1nn.Sequential( nn.MaxPool2d(2)) # 112*112*64 self.conv3nn.Sequential( nn.Conv2d(64, 128, kernel_size3, padding1), # 112*112*128 nn.ReLU(inplaceTrue)) self.conv4nn.Sequential( nn.Conv2d(128, 128, kernel_size3, padding1), # 112*112*128 nn.ReLU(inplaceTrue)) self.pool2nn.Sequential( nn.MaxPool2d(2)) # 56*56*128 self.conv5nn.Sequential( nn.Conv2d(128, 256, kernel_size3, padding1), # 56*56*256 nn.ReLU(inplaceTrue)) self.conv6nn.Sequential( nn.Conv2d(256, 256, kernel_size3, padding1), # 56*56*256 nn.ReLU(inplaceTrue)) self.conv7nn.Sequential( nn.Conv2d(256, 256, kernel_size3, padding1), # 56*56*256 nn.ReLU(inplaceTrue)) self.pool3nn.Sequential( nn.MaxPool2d(2)) #28*28*256 self.conv8nn.Sequential( nn.Conv2d(256, 512, kernel_size3, padding1), #28*28*512 nn.ReLU(inplaceTrue)) self.conv9nn.Sequential( nn.Conv2d(512, 512, kernel_size3, padding1), # 28*28*512 nn.ReLU(inplaceTrue)) self.conv10nn.Sequential( nn.Conv2d(512, 512, kernel_size3, padding1), # 28*28*512 nn.ReLU(inplaceTrue)) self.pool4nn.Sequential( nn.MaxPool2d(2)) #14*14*512 self.conv11nn.Sequential( nn.Conv2d(512, 512, kernel_size3, padding1), # 14*14*512 nn.ReLU(inplaceTrue)) self.conv12nn.Sequential( nn.Conv2d(512, 512, kernel_size3, padding1), # 14*14*512 nn.ReLU(inplaceTrue)) self.conv13nn.Sequential( nn.Conv2d(512, 512, kernel_size3, padding1), # 14*14*512 nn.ReLU(inplaceTrue)) self.pool5nn.Sequential( nn.MaxPool2d(2)) # 7*7*512 self.fc1nn.Sequential( nn.Linear(7*7*512, 4096), nn.ReLU(inplaceTrue), nn.Dropout(0.5)) self.fc2nn.Sequential( nn.Linear(4096,4096), nn.ReLU(inplaceTrue), nn.Dropout(0.5)) self.fc3nn.Sequential( nn.Linear(4096, len(classeNames))) def forward(self, x): batch_size x.size(0) x self.conv1(x) # 卷积-激活 x self.conv2(x) # 卷积-激活 x self.pool1(x) # 池化 x self.conv3(x) # 卷积-激活 x self.conv4(x) # 卷积-激活 x self.pool2(x) # 池化 x self.conv5(x) # 卷积-激活 x self.conv6(x) # 卷积-激活 x self.conv7(x) # 卷积-激活 x self.pool3(x) # 池化 x self.conv8(x) # 卷积-激活 x self.conv9(x) # 卷积-激活 x self.conv10(x) # 卷积-激活 x self.pool4(x) # 池化 x self.conv11(x) # 卷积-激活 x self.conv12(x) # 卷积-激活 x self.conv13(x) # 卷积-激活 x self.pool5(x) # 池化 x x.view(batch_size, -1) x self.fc1(x) x self.fc2(x) x self.fc3(x) return x device cuda if torch.cuda.is_available() else cpu print(Using {} device.format(device)) model Model().to(device) model六、模型调优尝试提高测试集准确率在AI的建议下from torchvision.models import vgg16 device cuda if torch.cuda.is_available() else cpu print(Using {} device.format(device)) model vgg16(pretrainedTrue).to(device) # ----------------------核心冻结逻辑冻结前3个卷积块解冻后2个卷积块---------------------- # VGG16 features层结构划分 # Block1: 0~1层 | Block2: 2~4层 | Block3:5~8层 | Block4:9~12层 | Block5:13~16层 # 冻结前3个Block(0~8层)解冻Block4、Block5(9~16层) for idx, param in enumerate(model.features.parameters()): if idx 17: # 前17个参数对应前3个卷积块完全冻结 param.requires_grad False else: # 后两个卷积块Block4、Block5允许梯度更新微调人脸高层特征 param.requires_grad True # ---------------------------------------------------------------------------------------- # 2. 替换最后一层全连接适配你的人脸分类数 num_classes len(classeNames) in_features model.classifier[6].in_features model.classifier[6] nn.Linear(in_features, num_classes) model.to(device)冻结前3个卷积块浅层前 3 个卷积块负责提取通用基础特征横竖边、明暗梯度、基础纹理人脸、猫狗、风景都共用这套基础视觉逻辑完全不需要重新训练冻结操作这部分网络权重锁死不更新反向传播时不会修改参数解冻范围VGG16 后 2 个卷积块block4、block5 最后的全连接分类头个人总结本周学习了VGG-16算法通过代码复现了VGG-16的框架这是目前学到的层数最多的模型有13个卷积层和3个全连接层每个卷积层后面都有激活函数。提高训练集准确率时我使用了AI给定的代码使VGG-16网络的前3个卷积块冻结后面解冻使训练集的准确率提高到60%以上不过模型过拟合很严重。