本章节列出的接口均为预留接口,后续有可能变更或废弃,不建议开发者使用,开发者无需关注。
Matmul
- __aicore__ inline void SetSubBlockIdx(uint8_t subBlockIdx);
- __aicore__ inline const handle SyncGroupJoin(uint32_t groupID)
- __aicore__ inline const handle SyncGroupJoin(uint32_t groupID)
- __aicore__ inline void SetAntiQuantScalar(const SrcT offsetScalar, const SrcT scaleScalar);
- __aicore__ inline void SetAntiQuantVector(const LocalTensor<SrcT> &offsetTensor, const LocalTensor<SrcT> &scaleTensor);
- __aicore__ inline void SetTensorAWithCopy(const GlobalTensor<SrcAT>& gm, const LocalTensor<SrcAT> &leftMatrix, bool isTransposeA = false);
- __aicore__ inline void SetTensorBWithCopy(const GlobalTensor<SrcBT>& gm, const LocalTensor<SrcBT> &rightMatrix, bool isTransposeB = false);
SoftMax
- template <typename T1, typename T2> __aicore__ inline bool AdjustSoftMaxRes(const LocalTensor<T1>& softMaxRes, const LocalTensor<T2>& maxTensor, const uint32_t from, const T1 to, const SoftMaxShapeInfo& softmaxShapeInfo)
LayerNorm
- template <typename T, bool isReuseSource = false> __aicore__ inline void LayerNorm(const LocalTensor<T>& output, const LocalTensor<T>& outputMean, const LocalTensor<T>& outputVariance, const LocalTensor<T>& inputX, const LocalTensor<T>& gamma, const LocalTensor<T>& beta, const T epsilon, LayerNormTiling& tiling)
- template <typename T, bool isReuseSource = false> __aicore__ inline void LayerNorm(const LocalTensor<T>& output, const LocalTensor<T>& outputMean, const LocalTensor<T>& outputVariance, const LocalTensor<T>& inputX, const LocalTensor<T>& gamma, const LocalTensor<T>& beta, const LocalTensor<uint8_t>& sharedTmpBuffer, const T epsilon, LayerNormTiling& tiling)
- template <typename T, bool isReuseSource = false>
__aicore__ inline void LayerNormGrad(const LocalTensor<T> &outputPdX, const LocalTensor<T> &resForGamma, const LocalTensor<T> &inputDy, const LocalTensor<T> &inputX, const LocalTensor<T> &inputVariance, const LocalTensor<T> &inputMean, const LocalTensor<T> &inputGamma, T epsilon, LayerNormGradTiling &tiling)
- template <typename T, bool isReuseSource = false>
__aicore__ inline void LayerNormGrad(const LocalTensor<T> &outputPdX, const LocalTensor<T> &resForGamma, const LocalTensor<T> &inputDy, const LocalTensor<T> &inputX, const LocalTensor<T> &inputVariance, const LocalTensor<T> &inputMean, const LocalTensor<T> &inputGamma, LocalTensor<uint8_t> &sharedTmpBuffer, T epsilon, LayerNormGradTiling &tiling)
- template <typename T, bool isReuseSource = false>
__aicore__ inline void LayerNormGradBeta(const LocalTensor<T>& outputPdGamma, const LocalTensor<T>& outputPdBeta, const LocalTensor<T>& resForGamma, const LocalTensor<T>& inputDy, LayerNormGradBetaTiling& tiling)
- template <typename T, bool isReuseSource = false>
__aicore__ inline void LayerNormGradBeta(const LocalTensor<T>& outputPdGamma, const LocalTensor<T>& outputPdBeta, const LocalTensor<T>& resForGamma, const LocalTensor<T>& inputDy, const LocalTensor<uint8_t>& sharedTmpBuffer,const LayerNormGradBetaTiling& tiling)
- void GetLayerNormMaxMinTmpSize(const ge::Shape& srcShape, const uint32_t typeSize, const bool isReuseSource, uint32_t& maxValue, uint32_t& minValue)
- void GetLayerNormRedNDTillingInfo(const uint32_t stackBufferSize, const uint32_t typeSize, optiling::LayerNormRedTiling& tilling, optiling::LayerNormRedParams& redParams)
- inline void GetLayerNormGradMaxMinTmpSize(const ge::Shape &srcShape, const uint32_t typeSize, const bool isReuseSource, uint32_t &maxValue, uint32_t &minValue)
- inline void GetLayerNormGradNDTilingInfo(const ge::Shape srcShape, const uint32_t stackBufferSize, const uint32_t typeSize, const bool isReuseSource, optiling::LayerNormGradTiling &tiling)
- inline void GetLayerNormGradReduceTilingInfo(const uint32_t stackBufferSize, const uint32_t typeSize, optiling::LayerNormGradReduceTiling &tiling, optiling::LayerNormGradReduceParams &reduceParams, const bool isReuseSource = false)
- void GetLayerNormGradBetaMaxMinTmpSize(const ge::Shape& srcShape, const uint32_t typeSize, const bool isReuseSource, uint32_t& maxValue, uint32_t& minValue)
- void GetLayerNormGradBetaNDTilingInfo(const ge::Shape srcShape, const uint32_t stackBufferSize, const uint32_t typeSize, const bool isReuseSource, optiling::LayerNormGradBetaTiling& tiling)
- void GetLayerNormGradBetaReduceTilingInfo(const uint32_t stackBufferSize, const uint32_t typeSize, optiling::LayerNormGradBetaReduceTiling& tiling, optiling::LayerNormGradBetaReduceParams& params)
ConfusionTranspose
- void GetConfusionTransposeOnlyTilingInfo(const ge::Shape &srcShape, const uint32_t stackBufferSize, const uint32_t typeSize, optiling::ConfusionTransposeTiling &tiling);
AscendAntiQuant
- inline uint32_t GetAscendAntiQuantMaxTmpSize(const ge::Shape &srcShape, const ge::Shape &scaleShape, bool isTranspose, ge::DataType inputDataType, ge::DataType outputDataType)
- inline uint32_t GetAscendAntiQuantMinTmpSize(const ge::Shape &srcShape, const ge::Shape &scaleShape, bool isTranspose, ge::DataType inputDataType, ge::DataType outputDataType)
SwiGLU
void GetSwiGLUTmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCount, uint32_t &extraBuffer)
GeGLU
void GetGeGLUTmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCnt, uint32_t &extraBuf)
数学库
- void GetTanhTmpBufferFactorSize(const uint32_t typeSize, uint32_t& maxLiveNodeCnt, uint32_t& extraBuf)
- void GetAsinTmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCount, uint32_t &extraBuffer)
- void GetAsinhTmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCnt, uint32_t &extraBuf)
- void GetAcosTmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCount, uint32_t &extraBuffer)
- void GetAcoshTmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCnt, uint32_t &extraBuf)
- void GetCosTmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCnt, uint32_t &extraBuf)
- void GetCoshTmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCount, uint32_t &extraBuffer)
- void GetAtanTmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCnt, uint32_t &extraBuf)
- void GetAtanhTmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCnt, uint32_t &extraBuf)
- void GetSinTmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCnt, uint32_t &extraBuf)
- void GetSinhTmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCnt, uint32_t &extraBuf)
- void GetTanTmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCnt, uint32_t &extraBuf)
- void GetLogTmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCnt, uint32_t &extraBuf)
- void GetLog2TmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCnt, uint32_t &extraBuf)
- void GetLog10TmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCnt, uint32_t &extraBuf)
- void GetPowerTmpBufferFactorSize(const bool baseIsTensor, const bool expIsTensor, const bool typeIsInt, const uint32_t typeSize, uint32_t& maxLiveNodeCount, uint32_t& extraBuffer)
- void GetErfTmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCount, uint32_t &extraBuffer)
- void GetErfcTmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCnt, uint32_t &extraBuf)
- void GetFracTmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCnt, uint32_t &extraBuf)
- void GetTruncTmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCnt, uint32_t &extraBuf)
- inline uint32_t GetSignMaxTmpSize(const ge::Shape srcShape, const uint32_t typeSize, const bool isReuseSource)
- inline uint32_t GetSignMinTmpSize(const ge::Shape srcShape, const uint32_t typeSize, const bool isReuseSource)
- inline uint32_t GetXorMaxTmpSize(const ge::Shape srcShape, const uint32_t typeSize, const bool isReuseSource)
- inline uint32_t GetXorMinTmpSize(const ge::Shape srcShape, const uint32_t typeSize, const bool isReuseSource)
- void GetCeilTmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCnt, uint32_t &extraBuf)
- void GetFloorTmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCnt, uint32_t &extraBuf)
- void GetClampTmpBufferFactorSize(const uint32_t typeSize, uint32_t &maxLiveNodeCnt, uint32_t &extraBuf)
- void GetExpTmpBufferFactorSize(const uint32_t typeSize, uint32_t& maxLiveNodeCount, uint32_t& extraBuffer)
数据类型定义
- LocalTensor
- SetAddrWithOffset
- Print [CPU ONLY]
- ToFile [CPU ONLY]
- GetBufferHandle
- GetPosition
- GetLength
- SetBufferLen
- SetShapeInfo
- GetShapeInfo
- SetAddr
- GetPhyAddr
- operator()
- GetShapeSize
- GlobalTensor
- SetAddr
- GetValue
- SetValue
- SetShapeInfo
- GetShapeInfo
- operator()
- TensorDesc类
- ListTensorDesc类
内存管理和同步控制
- TPipe
- Init
- GetAbsAddr
- InitShareBufStart
- InitShareBufEnd
- GetQueueEndAddress
- Reset
- Destroy
- GetBaseAddr
- ReleaseEvent
- IsAivTscm
- GetBaseAddr [cpu only]
- TQueBind
- TBuf
GetWithOffset
SetTpipeBuf
- __aicore__ constexpr Hardware GetPhyType(TPosition pos);
- template <typename T, TPosition pos> __aicore__ inline bool PopStackBuffer(LocalTensor<T>& popLocal);
- template <TPosition pos> __aicore__ inline bool PopStackBuffer(TBuf<pos>& popBuffer, TBufType& bufStart);
矢量计算
- template <typename T, bool isSetMask = true> __aicore__ inline void RepeatReduceSum(const LocalTensor<T>& dstLocal, const LocalTensor<T>& srcLocal, const int32_t repeat, const int32_t elemsInOneRepeate, const int32_t dstBlkStride, const int32_t srcBlkStride, const int32_t dstRepStride, const int32_t srcRepStride);
- template <typename T, typename U> __aicore__ inline void Select(const LocalTensor<T>& dstLocal, const LocalTensor<U>& selMask, const LocalTensor<T>& src0Local, uint8_t repeatTimes, const BinaryRepeatParams& repeatParams);
- template <typename T, SELMODE selMode> __aicore__ inline void Select(const LocalTensor<T>& dstLocal, const LocalTensor<T>& src0Local, const LocalTensor<T>& src1Local, uint8_t repeatTimes, const BinaryRepeatParams& repeatParams);
- template <typename T> __aicore__ inline void GatherMask(const LocalTensor<T>& dstLocal, const LocalTensor<T>& src0Local, const LocalTensor<T>& src1Local, const uint8_t patternMode, const GatherMaskParams& gatherMaskParams);
- template <typename T> __aicore__ inline void GatherMask(const LocalTensor<T>& dstLocal, const LocalTensor<T>& src0Local, const uint8_t patternMode, const GatherMaskParams& gatherMaskParams);
矩阵计算
- template <typename T> __aicore__ inline void InitConstValue(const LocalTensor<T> &dstLocal, const InitConstValueParams<T> &initConstValueParams);
- template <typename T> __aicore__ inline void LoadDataWithTranspose(const LocalTensor<T>& dstLocal, const LocalTensor<T>& srcLocal, const LoadData2dTransposeParams& loadDataParams);
- __aicore__ inline void SetFmatrix(uint16_t l1H, uint16_t l1W, const uint8_t padList[4], const FmatrixMode &fmatrixMode);
- __aicore__ inline void SetLoadDataRepeat(const LoadDataRepeatParam& repeatParams);
- template <typename T> __aicore__ inline void SetLoadDataPaddingValue(const T padValue);
- __aicore__ inline void SetLoadDataBoundary(uint32_t boundaryValue);
- template <typename dst_T, typename src_T> __aicore__ inline void Fixpipe(const LocalTensor<dst_T>& dstLocal, const LocalTensor<src_T>& srcLocal, const FixpipeParams<src_T>& intriParams);
- template <typename dst_T, typename src_T> __aicore__ inline void Fixpipe(const LocalTensor<dst_T>& dstLocal, const LocalTensor<src_T>& srcLocal, const LocalTensor<uint64_t>& cbufWorkspace, const FixpipeParams<src_T>& intriParams);
- template <typename dst_T, typename src_T> __aicore__ inline void Fixpipe(const GlobalTensor<dst_T>& dstGlobal, const LocalTensor<src_T>& srcLocal, const FixpipeParams<src_T>& intriParams);
- template <typename dst_T, typename src_T> __aicore__ inline void Fixpipe(const GlobalTensor<dst_T>& dstGlobal, const LocalTensor<src_T>& srcLocal, const LocalTensor<uint64_t>& cbufWorkspace, const FixpipeParams<src_T>& intriParams);
- template <typename T> __aicore__ inline void SetFixPipeConfig(const LocalTensor<T> &reluPre, const LocalTensor<T> &quantPre, bool isUnitFlag = false);
- template <typename T, bool setRelu = false> __aicore__ inline void SetFixPipeConfig(const LocalTensor<T> &preTensor, bool isUnitFlag = false);
- __aicore__ inline void SetFixpipeNz2ndFlag(uint16_t ndNum, uint16_t srcNdStride, uint16_t dstNdStride);
- __aicore__ inline void SetFixpipePreQuantFlag(uint64_t config);
- template<typename T> __aicore__ inline void SetLeakyReluAlpha(T scaleValue);
- template <typename T, typename U> __aicore__ inline __inout_pipe__(V) void BroadCastVecToMM(const LocalTensor<T> &dstLocal, const LocalTensor<U> &srcLocal, const int32_t blockCount, const uint8_t blockLen, const uint8_t srcGap, const uint8_t dstGap);
其他
- __aicore__ inline __gm__ uint8_t* __gm__ SetDumpWorkSpacePtr(__gm__ uint8_t* workspace)
- __aicore__ inline __gm__ uint8_t* __gm__ GetDumpWorkSpacePtr()
- __aicore__ void SetSysWorkSpacePtr(__gm__ uint8_t* workspace)
- __aicore__ inline AscendC::RpcCommClient* GetRpcClient()
- __aicore__ inline void ResetMask()
- __aicore__ inline void SetLoadDataBoundary(uint32_t boundaryValue)
- template <MemDsbT arg0> __aicore__ inline void DataSyncBarrier()
- template <HardEventevent, MemoryT memT, bool isVirtual> __aicore__ inline void HSetFlag(int32_t eventID)
- template <HardEvent event, MemoryT memT, bool isVirtual> __aicore__ inline void HWaitFlag(int32_t eventID)
- __aicore__ inline void PreLoad(const int64_t prefetchLen)
- template <typename T> __aicore__ inline __inout_pipe__(S) void InitSyncID(GlobalTensor<T> gmWorkspace);
- template <typename T> __aicore__ inline __in_pipe__(V) __out_pipe__(MTE3) void InitOutput(GlobalTensor<T> gmWorkspaceAddr, uint32_t size, T value = 0);
- __aicore__ inline void InitDetermineComputeWorkspace(GlobalTensor<int32_t> &gmWorkspace, LocalTensor<int32_t> &ubWorkspace);
- __aicore__ inline void NotifyNextBlock(GlobalTensor<int32_t> &gmWorkspace, LocalTensor<int32_t> &ubWorkspace);
- __aicore__ inline void WaitPreBlock(GlobalTensor<int32_t> &gmWorkspace, LocalTensor<int32_t> &ubWorkspace);
- template <typename T> __aicore__ inline void Concat(LocalTensor<T> &concatLocal, const LocalTensor<T> &srcLocal, const LocalTensor<T> &tmpLocal, const int32_t repeatTimes);
- template <typename T> __aicore__ inline void Extract(const LocalTensor<T> &dstValueLocal, const LocalTensor<uint32_t> &dstIndexLocal, const LocalTensor<T> &sortedLocal, const int32_t repeatTimes);
- template <typename T, bool isFullSort> __aicore__ inline void Sort(const LocalTensor<T> &dstLocal, const LocalTensor<T> &concatLocal, const LocalTensor<uint32_t> &indexLocal, LocalTensor<T> &tmpLocal, const int32_t repeatTimes);
- template <typename T> __aicore__ inline void MrgSort(const LocalTensor<T>& dstLocal, const MrgSortSrcList<T>& srcLocal, const MrgSort4Info& params);
- template <typename T, bool isExhaustedSuspension = false> __aicore__ inline void MrgSort(const LocalTensor<T> &dstLocal, const MrgSortSrcList<T> &sortList, const uint16_t elementCountList[4], uint32_t sortedNum[4], uint16_t validBit, const int32_t repeatTimes);
- template <typename T> __aicore__ inline uint32_t GetSortLen(const uint32_t elemCount);
- template <typename T> __aicore__ inline uint32_t GetSortOffset(const uint32_t elemOffset);
- inline uint32_t GetSortTmpSize(const platform_ascendc::PlatformAscendC &ascendcPlatform, const uint32_t elemCount, const uint32_t dataTypeSize);
- inline uint32_t GetConcatTmpSize(const platform_ascendc::PlatformAscendC &ascendcPlatform, const uint32_t elemCount, const uint32_t dataTypeSize);
- __aicore__ inline void WaitEvent(uint16_t flagId);
- emplate<pipe_t pipe> __aicore__ inline void NotifyEvent(uint16_t flagId);
- __aicore__ inline int64_t GetStoreAtomicConfig();
- template <AtomicDtype type, AtomicOp op> __aicore__ inline void SetStoreAtomicConfig();
- __aicore__ inline int64_t GetAccVal();
- __aicore__ inline int64_t GetReduceMaxMinCount();
- __aicore__ inline void InitDump(uint32_t gmLen);
- __aicore__ inline void InitDump(GM_ADDR dumpStartAddr, uint32_t gmLen);
- template <typename T> __aicore__ inline void DumpAccChkPoint(LocalTensor<T> &tensor, uint32_t index, uint32_t countOff, uint32_t dumpSize);
- template <typename T> __aicore__ inline void DumpAccChkPoint(GlobalTensor<T> &tensor, uint32_t index, uint32_t countOff, uint32_t dumpSize);
- #define GET_TILING_DATA_WITH_STRUCT(tiling_struct, tiling_data, tiling_arg) tiling_struct tiling_data
- #define GET_TILING_DATA_MEMBER(tiling_type, member, var, tiling) tiling_type point##var
- #define KERNEL_TASK_TYPE(key, value) ENABLE_FEATURE_FOR_COMPILE(key, value)
- templete<typename T> void TransDataTo5HD(uint64_t dstLocalList[16], uint64_t srcLocalList[16], const TransDataTo5HDParams& transDataParams);
- template <typename T> __aicore__ inline void SetCmpMask(const LocalTensor<T>& src);