def fuse_add_softmax_dropout(training, dropout, attn_mask, attn_scores, attn_head_size, p=0.5, dim=-1):
使用NPU自定义算子替换原生写法,以提高性能。
from torch_npu.contrib.function import fuse_add_softmax_dropout fuse_add_softmax_dropout(training, dropout, npu_input1, npu_input2, alpha, p=axis)
>>> training = True >>> dropout = nn.DropoutWithByteMask(0.1) >>> npu_input1 = torch.rand(96, 12, 384, 384).half().npu() >>> npu_input2 = torch.rand(96, 12, 384, 384).half().npu() >>> alpha = 0.125 >>> axis = -1 >>> output = fuse_add_softmax_dropout(training, dropout, npu_input1, npu_input2, alpha, p=axis)