将INT8数据反量化为FP16。
计算公式为:
npu_anti_quant(Tensor x, Tensor scale, *, Tensor? offset=None, ScalarType? dst_dtype=None, ScalarType? src_dtype=None) -> Tensor
一个Tensor类型的输出,代表antiquant的计算结果。
#单算子调用模式 import torch import torch_npu x_tensor = torch.tensor([1,2,3,4], dtype=torch.int8).npu() scale = torch.tensor([2.0], dtype=torch.float).npu() offset = torch.tensor([2.0], dtype=torch.float).npu() out=torch_npu.npu_anti_quant(x_tensor, scale, offset=offset, dst_dtype=torch.float16) #torch api入图模式 import torch import torch_npu import torchair as tng from torchair.ge_concrete_graph import ge_apis as ge from torchair.configs.compiler_config import CompilerConfig config = CompilerConfig() config.debug.graph_dump.type = 'pbtxt' npu_backend = tng.get_npu_backend(compiler_config=config) x_tensor = torch.tensor([1,2,3,4], dtype=torch.int8).npu() scale = torch.tensor([2.0], dtype=torch.float).npu() offset = torch.tensor([2.0], dtype=torch.float).npu() class Model(torch.nn.Module): def __init__(self): super().__init__() def forward(self,x,scale,offset): return torch_npu.npu_anti_quant(x, scale, offset=offset, dst_dtype=torch.float16) cpu_model = Model() model = torch.compile(cpu_model, backend=npu_backend, dynamic=False, fullgraph=True) output = model(x_tensor,scale,offset)