算子原型定义
AI CPU算子开发流程将以算子开发样例工程中“reshape_cust”算子为例,供开发者参考。
进入算子工程的“op_proto”目录,编写IR实现文件“reshape_cust.h”和“reshape_cust.cc”,将算子注册到算子原型库中。网络运行时,GE会调用算子原型库的校验接口进行基本参数的校验,校验通过后,会根据原型库中的推导函数推导每个节点的输出shape与dtype,进行输出tensor的静态内存的分配。
reshape_cust.h实现
MindStudio已在reshape_cust.h文件中生成了算子注册的代码模板文件,开发者可根据需要进行修改,ReshapeCust算子的原型定义如下所示:
#ifndef GE_OP_INTERP_RESHAPE_CUST_H #define GE_OP_INTERP_RESHAPE_CUST_H #include "graph/operator_reg.h" namespace ge { REG_OP(ReshapeCust) .INPUT(tensor, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32})) .INPUT(shape, TensorType({DT_INT32, DT_INT64})) .OUTPUT(output, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32})) .OP_END_FACTORY_REG(ReshapeCust) } #endif // GE_OP_INTERP_RESHAPE_CUST_H
- REG_OP(ReshapeCust)中的Add为算子注册到昇腾AI处理器中的Type,需要与算子适配插件实现(TensorFlow)中REGISTER_CUSTOM_OP("ReshapeCust")中的算子类型保持一致。
- .INPUT与.OUTPUT分别为算子的输入、输出Tensor的名称与数据类型,输入输出的顺序需要与算子代码实现函数形参顺序以及算子信息定义中参数的顺序保持一致。
reshape_cust.cc实现
原型定义的关键点是推理输出Tensor的shape。ReshapeCust算子推理输出shape的原理为:首先获取两个输入,一个是输入tensor,一个是目标的shape,校验输入tensor元素个数是否与目标shape相同,校验成功则将输出shape设置为目标shape值。
MindStudio已在reshape_cust.cc文件中生成了代码模板文件,开发者可根据需要进行修改。reshape_cust.cc的实现代码如下所示:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
#include "reshape_cust.h" //IR注册头文件 #include <vector> //可使用vector类模板并调用vector相关接口 #include <string> //C++标准库,可使用string类构造对象并调用string相关接口 #include <iostream> //C++标准库,调用输入输出流接口 namespace { // 获取目标shape template <typename T> std::vector<int64_t> AsInt64(const T *data, int64_t dataSize) { std::vector<int64_t> ret(dataSize); for (int64_t i = 0; i < dataSize; ++i) { ret[i] = data[i]; } return ret; } // 根据shape获取元素个数 int64_t GetElementNum(const std::vector<int64_t> &shape) { int64_t ret = 1; for (size_t i = 0; i < shape.size(); ++i) { ret *= shape[i]; } return ret; } } namespace ge { IMPLEMT_COMMON_INFERFUNC(ReshapeCustInferShape) { TensorDesc tensordesc_tensor = op.GetInputDescByName("tensor"); TensorDesc tensordesc_shape = op.GetInputDescByName("shape"); TensorDesc tensordesc_output = op.GetOutputDescByName("output"); Tensor shape_tensor; // 获取目标shape的值 if (op.GetInputConstData("shape", shape_tensor) == GRAPH_SUCCESS) { DataType shape_type = tensordesc_shape.GetDataType(); std::vector<int64_t> shape_values; if (shape_type == DT_INT32) { auto shape_data = reinterpret_cast<const int32_t *>(shape_tensor.GetData()); shape_values = AsInt64<int32_t>(shape_data, shape_tensor.GetSize()/sizeof(int32_t)); } else { auto shape_data = reinterpret_cast<const int64_t *>(shape_tensor.GetData()); shape_values = AsInt64<int64_t>(shape_data, shape_tensor.GetSize()/sizeof(int64_t)); } // 校验目标shape是否合法 std::vector<int64_t> input_shape = tensordesc_tensor.GetShape().GetDims(); int64_t input_element_num = GetElementNum(input_shape); int64_t shape_element_num = GetElementNum(shape_values); if (input_element_num != shape_element_num) { return GRAPH_FAILED; } // 设置输出tensor的shape tensordesc_output.SetShape(Shape(shape_values)); tensordesc_output.SetOriginShape(Shape(shape_values)); } tensordesc_output.SetDataType(tensordesc_tensor.GetDataType()); std::vector<std::pair<int64_t,int64_t>> range; auto status = op.GetInputDesc("tensor").GetShapeRange(range); if (status != GRAPH_SUCCESS) { return GRAPH_FAILED; } tensordesc_output.SetShapeRange(range); (void)op.UpdateOutputDesc("output", tensordesc_output); return GRAPH_SUCCESS; } COMMON_INFER_FUNC_REG(ReshapeCust, ReshapeCustInferShape); } |
父主题: AI CPU算子开发