本场景构建了一个简单的神经网络作为样例代码,执行一个普通的float32训练,用于对比开启AMP训练后的加速效果。
我们先构建一个简单的神经网络。
import time import torch import torch.nn as nn import torch_npu from torch.utils.data import Dataset, DataLoader import torchvision device = torch.device('npu:0') # 用户请自行定义训练设备 # 定义一个简单的神经网络 class CNN(nn.Module): def __init__(self): super(CNN, self).__init__() self.net = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(3, 3), stride=(1, 1), padding=1), nn.MaxPool2d(kernel_size=2), nn.Conv2d(16, 32, 3, 1, 1), nn.MaxPool2d(2), nn.Flatten(), nn.Linear(32*7*7, 16), nn.ReLU(), nn.Linear(16, 10) ) def forward(self, x): return self.net(x)
这一部分我们从torchvision中获取训练数据集,设置训练相关的参数batch_size、epochs。
train_data = torchvision.datasets.MNIST( root='mnist', download=True, train=True, transform=torchvision.transforms.ToTensor() ) batch_size = 64 model = CNN().to(device) train_dataloader = DataLoader(train_data, batch_size=batch_size) # 定义DataLoader loss_func = nn.CrossEntropyLoss().to(device) # 定义损失函数 optimizer = torch.optim.SGD(model.parameters(), lr=0.1) # 定义优化器 epochs = 10
for epo in range(epochs): for imgs, labels in train_dataloader: start_time = time.time() imgs = imgs.to(device) labels = labels.to(device) outputs = model(imgs) # 前向计算 loss = loss_func(outputs, labels) # 损失函数计算 optimizer.zero_grad() loss.backward() optimizer.step()