在模型中遇到问题时,使用整网复现问题成本较大,可以构建测试用例,参考单算子dump方法dump数据来复现精度或性能问题,便于定位解决。构建测试用例一般有如下两种方式。
import copy import torch import torch_npu from torch.testing._internal.common_utils import TestCase, run_tests class TestMax(TestCase): def cpu_op_exec(self, input1): # 调用算子 output = torch.max(input1) output = output.to('cpu') output = output.numpy() return output def npu_op_exec(self, input1): # 调用对应npu算子 output = torch.max(input1) return output def test_max(self): input = torch.randn(10,20) input = input.to(torch.int64) # 数据dtype转换 input_cpu = copy.deepcopy(input) input_npu = copy.deepcopy(input).npu() output_cpu = self.cpu_op_exec(input_cpu) output_npu = self.npu_op_exec(input_npu) # 比较cpu和npu的计算结果,prec为允许误差 self.assertEqual(output_cpu, output_npu, prec = 1e-4) if __name__ == '__main__': run_tests()
使用其他算子与目标算子一起构建复杂一些的计算过程场景。此处构建一个包含两个OP的Module,构建样例如下:
import copy import torch from torch.testing._internal.common_utils import TestCase, run_tests import torch_npu import torch.nn as nn class Model(nn.Module): def __init__(self, in_channels=1, hooks=False): super(Model, self).__init__() self.conv = nn.Conv2d(in_channels, in_channels*2, kernel_size=64) if hooks: self.conv.weight.register_hook(lambda grad: print(grad)) def forward(self, x): out = self.conv(x) return out class TestConv2d(TestCase): def test_conv2d(self): model = Model(in_channels=16) # 若需要获取反向计算结果,则加入hooks获取反向即可 # model = Model(in_channels=16, hooks=True) # 创建输入tensor input_tensor = torch.randn(4,16,64,64) input_tensor_cpu= copy.deepcopy(input_tensor) out = model(input_tensor_cpu) loss = out.sum() loss.backward() cpuout = out # 3 to NPU 运行,将model和input_tensor放到NPU运行 torch_npu.npu.set_device("npu:0") # 一般先set_device设定运行卡 model_npu = Model(in_channels=16).npu() input_tensor_npu= copy.deepcopy(input_tensor).npu() out = model_npu(input_tensor_npu) loss = out.sum() loss.backward() npuout = out #根据结果,确定是否为报错场景 self.assertEqual(cpuout, npuout, prec = 1e-4) if __name__ == '__main__': run_tests()