尧图网站建设 尧图网络
  • 首页
  • 关于我们
  • 服务项目
  • 案例展示
  • 建站流程
  • 资讯中心
  • 联系我们
首页/资讯中心/详情

Alex-VGG3

Alex-VGG3
📅 发布时间:2026/6/18 12:53:37

鉴于之前的两次对照实验都无法提现出AlexNet和VGG的区别,我调节了训练的样本数据,也对代码进行了调整,数据集从原先的CIFAR-10: 10类 换成了CIFAR-100: 100类
训练代码和结果如下

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
import time
import os# ============ 设备 ============
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
print("使用设备:", device)# ============ 数据集选择函数 ============
def get_dataset(dataset_name='cifar100', data_root='./data'):"""支持的数据集:- cifar10: 10类,32x32,6万张- cifar100: 100类,32x32,6万张  - tiny-imagenet: 200类,64x64,10万张- imagenet-subset: 1000类的子集,224x224"""if dataset_name == 'cifar10':print("加载 CIFAR-10 数据集 (10类, 32x32)")mean, std = (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)img_size = 32num_classes = 10transform_train = transforms.Compose([transforms.RandomCrop(32, padding=4),transforms.RandomHorizontalFlip(),transforms.ToTensor(),transforms.Normalize(mean, std)])transform_test = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean, std)])trainset = torchvision.datasets.CIFAR10(root=data_root, train=True, download=True, transform=transform_train)testset = torchvision.datasets.CIFAR10(root=data_root, train=False, download=True, transform=transform_test)elif dataset_name == 'cifar100':print("加载 CIFAR-100 数据集 (100类, 32x32)")mean, std = (0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)img_size = 32num_classes = 100transform_train = transforms.Compose([transforms.RandomCrop(32, padding=4),transforms.RandomHorizontalFlip(),transforms.RandomRotation(15),  # 增加旋转增强transforms.ToTensor(),transforms.Normalize(mean, std)])transform_test = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean, std)])trainset = torchvision.datasets.CIFAR100(root=data_root, train=True, download=True, transform=transform_train)testset = torchvision.datasets.CIFAR100(root=data_root, train=False, download=True, transform=transform_test)elif dataset_name == 'tiny-imagenet':print("加载 Tiny-ImageNet 数据集 (200类, 64x64)")# Tiny-ImageNet 需要特殊处理mean, std = (0.485, 0.456, 0.406), (0.229, 0.224, 0.225)img_size = 64num_classes = 200transform_train = transforms.Compose([transforms.RandomCrop(64, padding=8),transforms.RandomHorizontalFlip(),transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),transforms.ToTensor(),transforms.Normalize(mean, std)])transform_test = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean, std)])# 使用ImageFolder加载Tiny-ImageNet# 需要先下载并解压: http://cs231n.stanford.edu/tiny-imagenet-200.ziptiny_path = os.path.join(data_root, 'tiny-imagenet-200')if not os.path.exists(tiny_path):print("请先下载Tiny-ImageNet: wget http://cs231n.stanford.edu/tiny-imagenet-200.zip")print("然后解压到:", tiny_path)raise FileNotFoundError(f"找不到 {tiny_path}")trainset = torchvision.datasets.ImageFolder(os.path.join(tiny_path, 'train'), transform=transform_train)testset = torchvision.datasets.ImageFolder(os.path.join(tiny_path, 'val'), transform=transform_test)elif dataset_name == 'stl10':print("加载 STL-10 数据集 (10类, 96x96)")mean, std = (0.4467, 0.4398, 0.4066), (0.2603, 0.2566, 0.2713)img_size = 96num_classes = 10transform_train = transforms.Compose([transforms.RandomCrop(96, padding=12),transforms.RandomHorizontalFlip(),transforms.ToTensor(),transforms.Normalize(mean, std)])transform_test = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean, std)])trainset = torchvision.datasets.STL10(root=data_root, split='train', download=True, transform=transform_train)testset = torchvision.datasets.STL10(root=data_root, split='test', download=True, transform=transform_test)else:raise ValueError(f"不支持的数据集: {dataset_name}")print(f"训练集大小: {len(trainset)}, 测试集大小: {len(testset)}")print(f"图片尺寸: {img_size}x{img_size}, 类别数: {num_classes}")return trainset, testset, num_classes, img_size# ============ 自适应模型定义 ============
class AdaptiveAlexNet(nn.Module):"""可适应不同输入尺寸的AlexNet"""def __init__(self, num_classes=10, img_size=32):super().__init__()# 根据输入尺寸调整架构if img_size <= 32:# 小图片:减少池化层self.features = nn.Sequential(nn.Conv2d(3, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),nn.Conv2d(64, 192, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),nn.Conv2d(192, 384, 3, padding=1), nn.ReLU(),nn.Conv2d(384, 256, 3, padding=1), nn.ReLU(),nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2))feat_size = (img_size // 8) ** 2 * 256else:# 大图片:标准架构self.features = nn.Sequential(nn.Conv2d(3, 96, 11, stride=4, padding=2), nn.ReLU(), nn.MaxPool2d(3, stride=2),nn.Conv2d(96, 256, 5, padding=2), nn.ReLU(), nn.MaxPool2d(3, stride=2),nn.Conv2d(256, 384, 3, padding=1), nn.ReLU(),nn.Conv2d(384, 384, 3, padding=1), nn.ReLU(),nn.Conv2d(384, 256, 3, padding=1), nn.ReLU(), nn.MaxPool2d(3, stride=2))# 计算特征大小with torch.no_grad():dummy = torch.zeros(1, 3, img_size, img_size)feat_size = self.features(dummy).view(1, -1).size(1)self.classifier = nn.Sequential(nn.Dropout(0.5),nn.Linear(feat_size, 4096), nn.ReLU(),nn.Dropout(0.5),nn.Linear(4096, 4096), nn.ReLU(),nn.Linear(4096, num_classes))def forward(self, x):x = self.features(x)x = x.view(x.size(0), -1)return self.classifier(x)class AdaptiveVGG(nn.Module):"""可适应不同输入尺寸的VGG"""def __init__(self, num_classes=10, img_size=32, use_bn=True):super().__init__()def make_layers(cfg, use_bn=True):layers = []in_channels = 3for v in cfg:if v == 'M':layers.append(nn.MaxPool2d(2, 2))else:conv = nn.Conv2d(in_channels, v, 3, padding=1)if use_bn:layers += [conv, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]else:layers += [conv, nn.ReLU(inplace=True)]in_channels = vreturn nn.Sequential(*layers)# 根据图片大小选择配置if img_size <= 32:# 小图片:VGG11配置,减少池化cfg = [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 512, 512]elif img_size <= 64:# 中等图片:VGG11标准配置cfg = [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512]else:# 大图片:VGG16配置cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']self.features = make_layers(cfg, use_bn)# 自适应池化确保输出固定大小self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) if img_size > 32 else nn.AdaptiveAvgPool2d((1, 1))# 计算分类器输入大小with torch.no_grad():dummy = torch.zeros(1, 3, img_size, img_size)feat_size = self.avgpool(self.features(dummy)).view(1, -1).size(1)self.classifier = nn.Sequential(nn.Linear(feat_size, 4096 if img_size > 32 else 512),nn.ReLU(True),nn.Dropout(0.5),nn.Linear(4096 if img_size > 32 else 512, 4096 if img_size > 32 else 512),nn.ReLU(True),nn.Dropout(0.5),nn.Linear(4096 if img_size > 32 else 512, num_classes))def forward(self, x):x = self.features(x)x = self.avgpool(x)x = x.view(x.size(0), -1)return self.classifier(x)# ============ 训练函数(不变)============
def train_epoch(model, loader, criterion, optimizer):model.train()loss_total, correct, total = 0, 0, 0pbar = tqdm(loader, desc='Training')for imgs, labels in pbar:imgs, labels = imgs.to(device), labels.to(device)optimizer.zero_grad()outputs = model(imgs)loss = criterion(outputs, labels)loss.backward()optimizer.step()loss_total += loss.item()_, pred = outputs.max(1)total += labels.size(0)correct += pred.eq(labels).sum().item()pbar.set_postfix({'Loss': f'{loss.item():.4f}', 'Acc': f'{100.*correct/total:.2f}%'})return loss_total/len(loader), 100.*correct/totaldef test_epoch(model, loader, criterion):model.eval()loss_total, correct, total = 0, 0, 0with torch.no_grad():for imgs, labels in tqdm(loader, desc='Testing'):imgs, labels = imgs.to(device), labels.to(device)outputs = model(imgs)loss = criterion(outputs, labels)loss_total += loss.item()_, pred = outputs.max(1)total += labels.size(0)correct += pred.eq(labels).sum().item()return loss_total/len(loader), 100.*correct/total# ============ 实验运行函数 ============
def run_experiment(dataset_name='cifar100', epochs=10, batch_size=128):"""运行对比实验"""print(f"\n{'='*50}")print(f"开始实验: {dataset_name.upper()}")print(f"{'='*50}\n")# 获取数据集trainset, testset, num_classes, img_size = get_dataset(dataset_name)# 创建数据加载器trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)# 创建模型alex_model = AdaptiveAlexNet(num_classes=num_classes, img_size=img_size).to(device)vgg_model = AdaptiveVGG(num_classes=num_classes, img_size=img_size, use_bn=True).to(device)# 打印模型参数量alex_params = sum(p.numel() for p in alex_model.parameters()) / 1e6vgg_params = sum(p.numel() for p in vgg_model.parameters()) / 1e6print(f"AlexNet 参数量: {alex_params:.2f}M")print(f"VGG 参数量: {vgg_params:.2f}M")# 训练配置criterion = nn.CrossEntropyLoss()alex_optimizer = optim.Adam(alex_model.parameters(), lr=0.001, weight_decay=5e-4)vgg_optimizer = optim.Adam(vgg_model.parameters(), lr=0.001, weight_decay=5e-4)# 学习率调度器alex_scheduler = optim.lr_scheduler.CosineAnnealingLR(alex_optimizer, T_max=epochs)vgg_scheduler = optim.lr_scheduler.CosineAnnealingLR(vgg_optimizer, T_max=epochs)# 记录历史alex_hist = {"train_loss":[], "train_acc":[], "test_loss":[], "test_acc":[]}vgg_hist = {"train_loss":[], "train_acc":[], "test_loss":[], "test_acc":[]}# 训练循环for epoch in range(1, epochs+1):print(f"\n--- Epoch {epoch}/{epochs} ---")# AlexNetprint("训练 AlexNet...")tl, ta = train_epoch(alex_model, trainloader, criterion, alex_optimizer)vl, va = test_epoch(alex_model, testloader, criterion)alex_scheduler.step()alex_hist["train_loss"].append(tl)alex_hist["train_acc"].append(ta)alex_hist["test_loss"].append(vl)alex_hist["test_acc"].append(va)print(f"AlexNet | Train Loss: {tl:.4f}, Train Acc: {ta:.2f}% | Test Loss: {vl:.4f}, Test Acc: {va:.2f}%")# VGGprint("训练 VGG...")tl, ta = train_epoch(vgg_model, trainloader, criterion, vgg_optimizer)vl, va = test_epoch(vgg_model, testloader, criterion)vgg_scheduler.step()vgg_hist["train_loss"].append(tl)vgg_hist["train_acc"].append(ta)vgg_hist["test_loss"].append(vl)vgg_hist["test_acc"].append(va)print(f"VGG     | Train Loss: {tl:.4f}, Train Acc: {ta:.2f}% | Test Loss: {vl:.4f}, Test Acc: {va:.2f}%")return alex_hist, vgg_hist# ============ 可视化函数 ============
def plot_comparison(alex_hist, vgg_hist, dataset_name):"""绘制对比曲线"""epochs = range(1, len(alex_hist["train_loss"])+1)fig, axes = plt.subplots(2, 2, figsize=(15, 12))# 训练损失axes[0, 0].plot(epochs, alex_hist["train_loss"], "r-", label="AlexNet", linewidth=2)axes[0, 0].plot(epochs, vgg_hist["train_loss"], "b-", label="VGG", linewidth=2)axes[0, 0].set_title(f"Training Loss - {dataset_name.upper()}", fontsize=14)axes[0, 0].set_xlabel("Epoch")axes[0, 0].set_ylabel("Loss")axes[0, 0].legend()axes[0, 0].grid(True, alpha=0.3)# 测试损失axes[0, 1].plot(epochs, alex_hist["test_loss"], "r--", label="AlexNet", linewidth=2)axes[0, 1].plot(epochs, vgg_hist["test_loss"], "b--", label="VGG", linewidth=2)axes[0, 1].set_title(f"Test Loss - {dataset_name.upper()}", fontsize=14)axes[0, 1].set_xlabel("Epoch")axes[0, 1].set_ylabel("Loss")axes[0, 1].legend()axes[0, 1].grid(True, alpha=0.3)# 训练准确率axes[1, 0].plot(epochs, alex_hist["train_acc"], "r-", label="AlexNet", linewidth=2)axes[1, 0].plot(epochs, vgg_hist["train_acc"], "b-", label="VGG", linewidth=2)axes[1, 0].set_title(f"Training Accuracy - {dataset_name.upper()}", fontsize=14)axes[1, 0].set_xlabel("Epoch")axes[1, 0].set_ylabel("Accuracy (%)")axes[1, 0].legend()axes[1, 0].grid(True, alpha=0.3)# 测试准确率axes[1, 1].plot(epochs, alex_hist["test_acc"], "r--", label="AlexNet", linewidth=2)axes[1, 1].plot(epochs, vgg_hist["test_acc"], "b--", label="VGG", linewidth=2)axes[1, 1].set_title(f"Test Accuracy - {dataset_name.upper()}", fontsize=14)axes[1, 1].set_xlabel("Epoch")axes[1, 1].set_ylabel("Accuracy (%)")axes[1, 1].legend()axes[1, 1].grid(True, alpha=0.3)plt.tight_layout()plt.savefig(f'comparison_{dataset_name}.png', dpi=100)plt.show()# 打印最终结果print(f"\n{'='*50}")print(f"最终结果 - {dataset_name.upper()}")print(f"{'='*50}")print(f"AlexNet - 最佳测试准确率: {max(alex_hist['test_acc']):.2f}%")print(f"VGG     - 最佳测试准确率: {max(vgg_hist['test_acc']):.2f}%")print(f"VGG 相对提升: {max(vgg_hist['test_acc']) - max(alex_hist['test_acc']):.2f}%")# ============ 主函数 ============
if __name__ == "__main__":# 选择数据集:'cifar10', 'cifar100', 'stl10', 'tiny-imagenet'DATASET = 'cifar100'  # 改成 'cifar100' 或 'stl10' 试试EPOCHS = 20  # 增加训练轮数BATCH_SIZE = 128# 运行实验alex_hist, vgg_hist = run_experiment(dataset_name=DATASET,epochs=EPOCHS,batch_size=BATCH_SIZE)# 绘制对比图plot_comparison(alex_hist, vgg_hist, DATASET)# 可选:运行多个数据集对比# for dataset in ['cifar10', 'cifar100', 'stl10']:#     alex_hist, vgg_hist = run_experiment(dataset, epochs=20)#     plot_comparison(alex_hist, vgg_hist, dataset)

image

image

image

通过上面的两幅图可以看到更换到 CIFAR-100 之后,同样经过20轮的训练,虽然在初期AlexNet的loss下降较快,准确率上升快些,可以随着训练轮数增加,由于分类时100个,导致相对简单的模型由于参数太少,未能有效的对物体进行识别,分类难度提升10倍,随机猜测准确率,所以表现不佳,但是VGG由于模型参数相对多些,更能够有效的和数据进行拟合,后期占优。后面我接着调节超参观察数据源对训练结果的影响

相关新闻

  • 操作系统应用构建(十二)RustDesk 用户服务器搭建——东方仙盟筑基期
  • 10/17
  • NOIP2021 T2

最新新闻

  • 2026年6月无人机维修培训机构推荐:TOP5评测专业价格实战案例 - 品牌推荐
  • 2025-2026年银谷大厦电话查询:租用前请核实楼宇资质与租赁合同条款 - 品牌推荐
  • 免费转换秘籍:2026年将PDF幻灯片转为可编辑PPT的3种路径 - 时时资讯
  • 2026年免费教程:PDF每页导出高清JPG,这招比截图强百倍 - 时时资讯
  • TC1043低功耗模拟前端芯片:集成运放、比较器与基准源的电路设计实战
  • 2026年开源大模型架构解析:Transformer演进与实操选型指南

日新闻

  • 5分钟掌握Python进化算法:Geatpy高性能优化工具完全指南
  • Microchip 24AA044 EEPROM选型与应用全指南:从参数解析到实战编程
  • 华为的鸿蒙到底有多牛?为什么称作遥遥领先?

周新闻

  • 3步解锁iOS设备:applera1n激活锁绕过完全指南
  • 39 2026 人工智能证书终极盘点,普通人选 AI 证书可以从这些方向入手
  • Redis 暴露公网有多危险?从端口检查到补救步骤

月新闻

  • 【总结】入门篇:50句话让你记住架构核心概念
  • WeChatMsg技术方案解析:实现Mac微信数据自主管理的完整解决方案
  • WeChatMsg:革新性微信数据备份方案,打造你的专属数字记忆库

关于尧图

  • 公司简介
  • 团队介绍
  • 企业文化
  • 荣誉资质

服务项目

  • 定制开发
  • 电商建站
  • UI 设计
  • 运维服务

快速链接

  • 案例展示
  • 建站流程
  • 常见问题
  • 资讯中心

联系方式

  • 📍北京市朝阳区互联网产业园 A 座 10 层
  • 📞400-888-8888
  • ✉️contact@rkmt.cn
  • 🕐周一至周日 9:00-21:00

© 2024 北京尧图网络科技有限公司 版权所有 | 京 ICP 备 XXXXXXXX 号