单卡场景样例
模块和参数设置
引入所需的模块,设置供用户自定义的参数。
import argparse import os import time import torch import torch_npu import torch.nn.parallel import torch.optim import torch.utils.data import torch.utils.data.distributed import torchvision.transforms as transforms import torchvision.datasets as datasets import torchvision.models as models from apex import amp # 导入amp模块 model_names = sorted(name for name in models.__dict__ if name.islower() and not name.startswith("__") and callable(models.__dict__[name])) def parse_args(): """ 用户自定义数据集路径、模型路径 """ parser = argparse.ArgumentParser(description='PyTorch ImageNet Inferring') parser.add_argument('--data', metavar='DIR', default="/data/imagenet", help='path to dataset') parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet50', choices=model_names, help='model architecture: ' + ' | '.join(model_names) + ' (default: resnet18)') parser.add_argument('-b', '--batch_size', default=512, type=int, metavar='N', help='mini-batch size (default: 256), this is the total ' 'batch size of all GPUs on the current node when ' 'using Data Parallel or Distributed Data Parallel') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--pretrained', dest='pretrained', action='store_true', help='use pre-trained model') parser.add_argument('--npu', default=None, type=int, help='NPU id to use.') parser.add_argument('-j', '--workers', default=32, type=int, metavar='N', help='number of data loading workers (default: 32)') parser.add_argument('--lr', '--learning_rate', default=0.1, type=float, metavar='LR', help='initial learning rate', dest='lr') parser.add_argument('--wd', '--weight_decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') args, unknown_args = parser.parse_known_args() if len(unknown_args) > 0: for bad_arg in unknown_args: print("ERROR: Unknown command line arg: %s" % bad_arg) raise ValueError("Invalid command line arg(s)") return args ......
主函数
设置主函数入口。
...... def main(): args = parse_args() if args.npu is None: args.npu = 0 global CALCULATE_DEVICE CALCULATE_DEVICE = "npu:{}".format(args.npu) torch_npu.npu.set_device(CALCULATE_DEVICE) print("use ", CALCULATE_DEVICE) main_worker(args.npu, args) ......
创建模型
在main_worker中创建模型,设置device和优化器。
...... def main_worker(npu, args): global best_acc1 args.npu = npu print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch](zero_init_residual=True) # 将模型数据复制到昇腾AI处理器中 model = model.to(CALCULATE_DEVICE) optimizer = torch.optim.SGD([ {'params': [param for name, param in model.named_parameters() if name[-4:] == 'bias'], 'weight_decay': 0.0}, {'params': [param for name, param in model.named_parameters() if name[-4:] != 'bias'], 'weight_decay': args.weight_decay}], args.lr) ......
使能混合精度
在main_worker中初始化混合精度模型,使用后可加速运算,但结果的准确率可能会轻微降低。可根据实际场景选择使用。
...... model, optimizer = amp.initialize(model, optimizer, opt_level="O2", loss_scale=1024, verbosity=1)
加载模型参数
在main_worker中从模型文件中恢复训练好的模型参数并加载。
...... if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) best_acc1 = checkpoint['best_acc1'] best_acc1 = best_acc1.to("npu:{}".format(args.npu)) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' ".format(args.resume)) else: print("=> no checkpoint found at '{}'".format(args.resume)) ......
初始化数据集
在main_worker中对图像数据进行加载与预处理。
...... valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) val_loader = torch.utils.data.DataLoader( datasets.ImageFolder(valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) ......
运行推理
在main_worker中运行推理。
...... validate(val_loader, model, args)
在线推理
在线推理的实现代码如下。
...... def validate(val_loader, model, args): batch_time = AverageMeter('Time', ':6.3f') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter( len(val_loader), [batch_time, top1, top5], prefix='Test: ') # ========================================================================= # 切换到推理模式 # ========================================================================= model.eval() # ========================================================================= # 在 torch.no_grad():分支下执行模型正向计算 # ========================================================================= with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(val_loader): # 将图像数据置于NPU中 images = images.to(CALCULATE_DEVICE, non_blocking=True) target = target.to(torch.int32).to(CALCULATE_DEVICE, non_blocking=True) # 计算输出 output = model(images) # 统计结果精度 acc1, acc5 = accuracy(output, target, topk=(1, 5)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # 测量运行时间 batch_time.update(time.time() - end) end = time.time() # 打印推理运算过程日志 progress.display(i) print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5)) return top1.avg class AverageMeter(object): """计算并存储平均值和当前值""" def __init__(self, name, fmt=':f'): self.name = name self.fmt = fmt self.reset() self.start_count_index = 10 def reset(self): self.val = 0 self.avg = 0 self.sum = 0 self.count = 0 def update(self, val, n=1): if self.count == 0: self.batchsize = n self.val = val self.count += n if self.count > (self.start_count_index * self.batchsize): self.sum += val * n self.avg = self.sum / (self.count - self.start_count_index * self.batchsize) def __str__(self): fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' return fmtstr.format(**self.__dict__) class ProgressMeter(object): """记录模型运算过程信息""" def __init__(self, num_batches, meters, prefix=""): self.batch_fmtstr = self._get_batch_fmtstr(num_batches) self.meters = meters self.prefix = prefix def display(self, batch): entries = [self.prefix + self.batch_fmtstr.format(batch)] entries += [str(meter) for meter in self.meters] print('\t'.join(entries)) def _get_batch_fmtstr(self, num_batches): num_digits = len(str(num_batches // 1)) fmt = '{:' + str(num_digits) + 'd}' return '[' + fmt + '/' + fmt.format(num_batches) + ']' def accuracy(output, target, topk=(1,)): """根据指定值k,计算k个顶部预测的精度""" with torch.no_grad(): maxk = max(topk) batch_size = target.size(0) _, pred = output.topk(maxk, 1, True, True) pred = pred.t() correct = pred.eq(target.view(1, -1).expand_as(pred)) res = [] for k in topk: correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) res.append(correct_k.mul_(100.0 / batch_size)) return res if __name__ == '__main__': main()
推理完成
当出现推理结果精度的回显时,说明推理完成。样例回显截图如下。
父主题: 参考样例