样例参考
本节给出实现RPing功能的完整代码样例。
该样例实现了一个单机8卡组网场景的RPing功能。
代码样例
样例代码文件包含“rping_test.cc”与“rping_test.h”文件。
- 头文件“rping_test.h”定义了device的IP地址,以单机八卡为例,具体如下:
#include <stdio.h> #include <stdint.h> #include <string.h> #include <stdlib.h> #include <unistd.h> #include "acl/acl.h" #include "acl/acl_prof.h" #include "hccl/hccn_rping.h" // 以单机8卡为例,以下配置中IP地址仅为示例 #define ipLen 16 char deviceIp[8][ipLen] = { "192.168.99.127", "192.168.99.128", "192.168.99.129", "192.168.99.130", "192.168.99.131", "192.168.99.132", "192.168.99.133", "192.168.99.134" };
- “rping_test.cc”文件具体实现了RPing功能,代码示例如下:
#include <chrono> #include <vector> #include <thread> #include <atomic> #include "rping_test.h" int singleDevAllProc(int devId, std::vector<int> devices, int devClientId, std::atomic<bool> *isStop) { // 初始化device HccnRpingInitAttr *initAttr = new HccnRpingInitAttr(); initAttr->mode = HCCN_RPING_MODE_ROCE; initAttr->port = 13886; initAttr->npuNum = 128; initAttr->bufferSize = 4096 * 50; // 必须大于这个值:pktnum * 2048 * targetNum initAttr->ipAddr = new char[ipLen]; strcpy(initAttr->ipAddr, deviceIp[devId]); HccnRpingCtx rpingCtx = nullptr; aclrtSetDevice(devId); HccnResult ret = HccnRpingInit(devId, initAttr, &rpingCtx); if (ret != HCCN_SUCCESS) { printf("device init failed.\n"); return -1; } printf("rpingCtx [%p]", rpingCtx); printf("device[%d] init success!\n", devId); if (devId != devClientId) { sleep(20); // target启动后需要一直保持等待探测请求的状态 while (isStop->load() == false) { sleep(1); continue; } HccnRpingDeinit(rpingCtx); delete[] initAttr->ipAddr; delete initAttr; return 0; } // 添加target int targetNum = devices.size() - 1; HccnRpingTargetInfo *target = new HccnRpingTargetInfo[targetNum]; for (int i = 0; i < devices.size() - 1; i++) { int devTargetId = devices[i]; target[i].srcPort = 0; target[i].sl = 4; target[i].tc = (33 & 0x3f) << 2; target[i].port = 13886; target[i].payloadLen = 12; target[i].srcIp = new char[ipLen]; target[i].dstIp = new char[ipLen]; char payload[12] = "hellotarget"; strcpy(target[i].payload, payload); strcpy(target[i].srcIp, deviceIp[devClientId]); strcpy(target[i].dstIp, deviceIp[devTargetId]); } ret = HccnRpingAddTarget(rpingCtx, targetNum, target); if (ret != HCCN_SUCCESS) { delete[] target; printf("device add target failed.\n"); return -1; } printf("device[%d] add target success!\n", devId); // 发起请求 uint32_t pktNum = 10; // 发给每个target的报文数量 uint32_t interval = 1; // ms uint32_t timeout = 100; // ms ret = HccnRpingBatchPingStart(rpingCtx, pktNum, interval, timeout); if (ret != HCCN_SUCCESS) { delete[] target; printf("device start ping failed.\n"); return -1; } printf("device[%d] start ping!\n", devId); // 获取结果 HccnRpingResultInfo *result = new HccnRpingResultInfo[targetNum]; HccnResult hccnRet = HCCN_E_AGAIN; while(hccnRet == HCCN_E_AGAIN) { sleep(1); hccnRet = HccnRpingGetResult(rpingCtx, targetNum, target, result); } if (ret != HCCN_SUCCESS) { delete[] target; delete[] result; printf("device get result failed.\n"); return -1; } for (int i = 0; i < targetNum; i++) { printf("txPkt[%u] rxPkt[%u] minRTT[%u] maxRTT[%u] avgRTT[%u] state[%u] payloadLen[%u]\n", result[i].txPkt, result[i].rxPkt, result[i].minRTT, result[i].maxRTT, result[i].avgRTT, result[i].state, result[i].payloadLen); } ret = HccnRpingBatchPingStop(rpingCtx); if (ret != HCCN_SUCCESS) { delete[] target; delete[] result; printf("device stop ping failed.\n"); return -1; } printf("device[%d] stop ping!\n", devId); HccnRpingRemoveTarget(rpingCtx, targetNum, target); if (ret != HCCN_SUCCESS) { delete[] target; delete[] result; printf("device remove target failed.\n"); return -1; } printf("device[%d] remove target sucess!\n", devId); // 释放内存 ret = HccnRpingDeinit(rpingCtx); if (ret != HCCN_SUCCESS) { printf("device deinit failed.\n"); return -1; } printf("device[%d] deinit success!\n", devId); for (int i = 0; i < targetNum; i++) { delete[] target[i].srcIp; delete[] target[i].dstIp; } delete[] result; delete[] target; printf("rpingCtx test success!!!\n"); return 0; } int main(int argc, char *argv[]) { // 获取设备个数 int deviceNum = argc - 2; // 获取循环次数 int loop = atoi(argv[1]); // 记录client Id int devClientId = atoi(argv[2]); // 记录全部的device Id std::vector<int> devices; for (int i = 0; i < deviceNum; i++) { int dev = atoi(argv[i + 2]); bool isRepeat = false; for (int j = 0; j < devices.size(); j++) { if (dev == devices[j]) { isRepeat = true; printf("%d is repeat!\n", dev); break; } } if (!isRepeat) { printf("dev: %d, isrepeat: %d\n", dev, isRepeat); devices.push_back(dev); } } std::vector<std::thread> test_threads; std::atomic<bool> isStop{false}; for (int i = 0; i < loop; i++) { printf("\n*************分割线**************\n\n"); printf("%dth process start!!\n", i+1); for (int j = 0; j < devices.size(); j++) { test_threads.push_back(std::thread(singleDevAllProc, devices[j], devices, devClientId, &isStop)); printf("device[%d] start running!!\n", devices[j]); } for (int j = 0; j < deviceNum; j++) { isStop.store(true); if (test_threads[j].joinable()) { test_threads[j].join(); printf("device[%d] stop running!!\n", devices[j]); } } test_threads.clear(); } return 0; }
Makefile文件参考
编译需要的Makefile文件如下:
# #loading path #-------------------------------------------------------------------------------------------------------------------------------------------------- CXXFLAGS := -std=c++11\ -Werror\ -fstack-protector-strong\ -fPIE -pie\ -O2\ -g\ -s\ -Wl,-z,relro\ -Wl,-z,now\ -Wl,-z,noexecstack\ -Wl,--copy-dt-needed-entries HCCL_INC_DIR = ${ASCEND_DIR}/include HCCL_LIB_DIR = ${ASCEND_DIR}/lib64 ACL_INC_DIR = ${ASCEND_DIR}/include ACL_LIB_DIR = ${ASCEND_DIR}/lib64 LIST = rping_test # #library flags #-------------------------------------------------------------------------------------------------------------------------------------------------- LIBS = -L$(HCCL_LIB_DIR) -lhccl\ -L$(ACL_LIB_DIR) -lascendcl INCLUDEDIRS = -I$(HCCL_INC_DIR)\ -I$(ACL_INC_DIR) # #make #-------------------------------------------------------------------------------------------------------------------------------------------------- all: @mkdir -p bin g++ $(CXXFLAGS) rping_test.cc $(INCLUDEDIRS) -o rping_test $(LIBS) @printf "\033[0;32;32mrping_test compile completed\n\033[m" mv $(LIST) ./bin .PHONY: clean clean: rm -rf ./bin/*_test
依赖环境变量
编译本节所示样例代码前需配置如下环境变量。
source /usr/local/Ascend/ascend-toolkit/set_env.sh export ASCEND_DIR=/usr/local/Ascend/ascend-toolkit/latest
其中“/usr/local/Ascend”为CANN软件使用root用户安装的默认安装路径,如CANN软件使用普通用户安装或指定路径安装,请自行替换。
编译执行样例
- 执行“make”命令,会在bin目录下面编译生成“rping_test”可执行文件。
make
- 执行如下命令,运行“rping_test”可执行文件。
./bin/rping_test <Number of cycles> <client NPU devlogicId> <target NPU devlogicId>
- <Number of cycles>:从RPing功能初始化到释放RPing资源的全流程执行次数。
- <client NPU devlogicId>:client NPU的Device逻辑ID。
- <target NPU devlogicId>:target NPU的Device逻辑ID,若有多个target NPU,target NPU的逻辑ID直接使用空格间隔即可
命令示例:
./bin/rping_test 1 0 1 2 3 4 5 6 7
代表RPing功能执行一次,client NPU为Device 0,target NPU为Device 1到Device 7共7个NPU。