完成量化计算参数scale数据类型的转换。
npu_trans_quant_param(Tensor scale, Tensor? offset=None) -> Tensor
一个Tensor类型的输出,代表trans_quant_param的计算结果。
1 2 3 4 5 6 7 | import torch import torch_npu import logging import os scale = torch.randn(16, dtype=torch.float32) offset = torch.randn(16, dtype=torch.float32) npu_out = torch_npu.npu_trans_quant_param(scale.npu(), offset.npu()) |
图模式下,npu_trans_quant_param计算出的结果tensor为uint64数据类型。由于torch不支持该数据类型,需要搭配其他接口使用,如示例代码中的npu_quant_matmul。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 | import torch import torch_npu import torchair as tng from torchair.ge_concrete_graph import ge_apis as ge from torchair.configs.compiler_config import CompilerConfig import logging from torchair.core.utils import logger logger.setLevel(logging.DEBUG) import os import numpy as np os.environ["ENABLE_ACLNN"] = "true" config = CompilerConfig() npu_backend = tng.get_npu_backend(compiler_config=config) class MyModel(torch.nn.Module): def __init__(self): super().__init__() def forward(self, x1, x2, scale, offset, bias): scale_1 = torch_npu.npu_trans_quant_param(scale, offset) return torch_npu.npu_quant_matmul(x1, x2, scale_1, offset=offset, bias=bias) cpu_model = MyModel() model = cpu_model.npu() cpu_x1 = torch.randint(-1, 1, (15, 1, 512), dtype=torch.int8) cpu_x2 = torch.randint(-1, 1, (15, 512, 128), dtype=torch.int8) scale = torch.randn(1, dtype=torch.float32) offset = torch.randn(1, dtype=torch.float32) bias = torch.randint(-1,1, (15, 1, 128), dtype=torch.int32) model = torch.compile(cpu_model, backend=npu_backend, dynamic=True) npu_out = model(cpu_x1.npu(), cpu_x2.npu(), scale.npu(), offset.npu(), bias.npu()) |