基于Ascend C开发的矢量计算相关的算子可以运行在Vector Core上,本节主要介绍如何基于Vector Core架构进行算子编程,该特性仅支持Atlas 推理系列产品。
学习本节内容之前,请先参考算子实现、算子调用学习基于AI Core的算子端到端的开发流程,下文内容会重点介绍基于VectorCore进行算子编程的差异点。具体差异点如下:
extern "C" __global__ __aicore__ void add_custom(__gm__ uint8_t *x, __gm__ uint8_t *y, __gm__ uint8_t *z, __gm__ uint8_t *workspace, __gm__ uint8_t *tiling) { GET_TILING_DATA(tilingData, tiling); if (workspace == nullptr) { return; } SetSysWorkspace(workspace); GM_ADDR usr = GetUserWorkspace(workspace); KernelAdd op; op.Init(x, y, z, tilingData.blockDim, tilingData.totalLength, tilingData.tileNum); KERNEL_TASK_TYPE_DEFAULT(KERNEL_TYPE_MIX_VECTOR_CORE); // 使能VectorCore if (TILING_KEY_IS(1)) { op.Process1(); } else if (TILING_KEY_IS(2)) { op.Process2(); } // ... }
auto ascendcPlatform = platform_ascendc::PlatformAscendCManager::GetInstance(); auto totalCoreNum = ascendcPlatform.GetCoreNumAic(); // ASCENDXXX请替换为实际的版本型号 if (ascendcPlatform.GetSocVersion() == platform_ascendc::SocVersion::ASCENDXXX) { totalCoreNum = totalCoreNum + ascendcPlatform.GetCoreNumVector(); } ... kernel_name<<<totalCoreNum , l2ctrl, stream>>>(argument list);
// 配套的host侧tiling函数示例: ge::graphStatus TilingFunc(gert::TilingContext* context) { // 使能VectorCore,将block_dim置为AI Core中vector核数 + Vector Core中的vector核数 auto ascendcPlatform = platform_ascendc::PlatformAscendC(platformInfo); auto totalCoreNum = ascendcPlatform.GetCoreNumAic(); // ASCENDXXX请替换为实际的版本型号 if (ascendcPlatform.GetSocVersion() == platform_ascendc::SocVersion::ASCENDXXX) { totalCoreNum = totalCoreNum + ascendcPlatform.GetCoreNumVector(); } context->SetBlockDim(totalCoreNum); }