下载
中文
注册

预留接口

本章节列出的接口均为预留接口,后续有可能变更或废弃,不建议开发者使用,开发者无需关注。

Matmul

  • __aicore__ inline void SetSubBlockIdx(uint8_t subBlockIdx);
  • __aicore__ inline void SetAntiQuantScalar(const SrcT offsetScalar, const SrcT scaleScalar);
  • __aicore__ inline void SetAntiQuantVector(const LocalTensor<SrcT> &offsetTensor, const LocalTensor<SrcT> &scaleTensor);
  • __aicore__ inline void SetTensorAWithCopy(const GlobalTensor<SrcAT>& gm, const LocalTensor<SrcAT> &leftMatrix, bool isTransposeA = false);
  • __aicore__ inline void SetTensorBWithCopy(const GlobalTensor<SrcBT>& gm, const LocalTensor<SrcBT> &rightMatrix, bool isTransposeB = false);

ConfusionTranspose

void GetConfusionTransposeOnlyTilingInfo(const ge::Shape &srcShape, const uint32_t stackBufferSize, const uint32_t typeSize, optiling::ConfusionTransposeTiling &tiling);

内存管理和同步控制

  • TPipe
    • Init
    • GetAbsAddr
    • InitShareBufStart
    • InitShareBufEnd
    • Reset
    • Destroy
  • __aicore__ constexpr Hardware GetPhyType(TPosition pos);
  • template <typename T, TPosition pos> __aicore__ inline bool PopStackBuffer(LocalTensor<T>& popLocal);
  • template <TPosition pos> __aicore__ inline bool PopStackBuffer(TBuf<pos>& popBuffer, TBufType& bufStart);

矢量计算

  • template <typename T, bool isSetMask = true> __aicore__ inline void RepeatReduceSum(const LocalTensor<T>& dstLocal, const LocalTensor<T>& srcLocal, const int32_t repeat, const int32_t elemsInOneRepeate, const int32_t dstBlkStride, const int32_t srcBlkStride, const int32_t dstRepStride, const int32_t srcRepStride);
  • template <typename T, typename U> __aicore__ inline void Select(const LocalTensor<T>& dstLocal, const LocalTensor<U>& selMask, const LocalTensor<T>& src0Local, uint8_t repeatTimes, const BinaryRepeatParams& repeatParams);
  • template <typename T, SELMODE selMode> __aicore__ inline void Select(const LocalTensor<T>& dstLocal, const LocalTensor<T>& src0Local, const LocalTensor<T>& src1Local, uint8_t repeatTimes, const BinaryRepeatParams& repeatParams);

矩阵计算

  • template <typename dst_T, typename src_T> __aicore__ inline void Fixpipe(const LocalTensor<dst_T>& dstLocal, const LocalTensor<src_T>& srcLocal, const FixpipeParams<src_T>& intriParams);
  • template <typename dst_T, typename src_T> __aicore__ inline void Fixpipe(const LocalTensor<dst_T>& dstLocal, const LocalTensor<src_T>& srcLocal, const LocalTensor<uint64_t>& cbufWorkspace, const FixpipeParams<src_T>& intriParams);
  • template <typename dst_T, typename src_T> __aicore__ inline void Fixpipe(const GlobalTensor<dst_T>& dstGlobal, const LocalTensor<src_T>& srcLocal, const FixpipeParams<src_T>& intriParams);
  • template <typename dst_T, typename src_T> __aicore__ inline void Fixpipe(const GlobalTensor<dst_T>& dstGlobal, const LocalTensor<src_T>& srcLocal, const LocalTensor<uint64_t>& cbufWorkspace, const FixpipeParams<src_T>& intriParams);
  • template <typename DstT, typename SrcT, const FixpipeConfig& config = CFG_ROW_MAJOR> void Fixpipe(const LocalTensor<DstT>& dstLocal, const LocalTensor<SrcT>& srcLocal, const FixpipeParamsV220& intriParams);
  • template <typename DstT, typename SrcT, const FixpipeConfig& config = CFG_ROW_MAJOR> void Fixpipe(const LocalTensor<DstT>& dstLocal, const LocalTensor<SrcT>& srcLocal, const LocalTensor<uint64_t>& cbufWorkspace, const FixpipeParamsV220& intriParams);
  • template <typename T, typename U> __aicore__ inline __inout_pipe__(V) void BroadCastVecToMM(const LocalTensor<T> &dstLocal, const LocalTensor<U> &srcLocal, const int32_t blockCount, const uint8_t blockLen, const uint8_t srcGap, const uint8_t dstGap);

其他

  • __aicore__ inline __gm__ uint8_t* __gm__ SetDumpWorkSpacePtr(__gm__ uint8_t* workspace)
  • __aicore__ inline __gm__ uint8_t* __gm__ GetDumpWorkSpacePtr()
  • __aicore__ void SetSysWorkSpacePtr(__gm__ uint8_t* workspace)
  • template <MemDsbT arg0> __aicore__ inline void DataSyncBarrier()
  • template <HardEventevent, MemoryT memT, bool isVirtual> __aicore__ inline void HSetFlag(int32_t eventID)
  • template <HardEvent event, MemoryT memT, bool isVirtual> __aicore__ inline void HWaitFlag(int32_t eventID)
  • template <typename T> __aicore__ inline __in_pipe__(V) __out_pipe__(MTE3) void InitOutput(GlobalTensor<T> gmWorkspaceAddr, uint32_t size, T value = 0);
  • __aicore__ inline void InitDetermineComputeWorkspace(GlobalTensor<int32_t> &gmWorkspace, LocalTensor<int32_t> &ubWorkspace);
  • __aicore__ inline void NotifyNextBlock(GlobalTensor<int32_t> &gmWorkspace, LocalTensor<int32_t> &ubWorkspace);
  • __aicore__ inline void WaitPreBlock(GlobalTensor<int32_t> &gmWorkspace, LocalTensor<int32_t> &ubWorkspace);
  • template <typename T> __aicore__ inline void MrgSort(const LocalTensor<T>& dstLocal, const MrgSortSrcList<T>& srcLocal, const MrgSort4Info& params);
  • #define GET_TILING_DATA_WITH_STRUCT(tiling_struct, tiling_data, tiling_arg) tiling_struct tiling_data
  • #define GET_TILING_DATA_MEMBER(tiling_type, member, var, tiling) tiling_type point##var
  • template <typename T> __aicore__ inline void SetCmpMask(const LocalTensor<T>& src);