本ARM架构绑核优化样例是基于完成单机多卡训练手动迁移的代码进行代码改动、完成优化的样例。
if args.device == 'npu': ngpus_per_node = len(args.process_device_map) else: ngpus_per_node = torch.cuda.device_count() ''' if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args) '''
在注释的原代码后,添加如下代码:
args.gpu = 'npu:%d' % args.rank main_worker(args.gpu, ngpus_per_node, args)
def main_worker(gpu, ngpus_per_node, args): global best_acc1 #args.gpu = args.process_device_map[gpu]
# 指定训练设备为昇腾AI处理器 #loc = 'npu:{}'.format(args.gpu) torch_npu.npu.set_device(gpu) # 计算用于训练的batch_size和workers args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
原代码如下:
if torch_npu.npu.is_available(): if args.gpu: device = torch.device('npu:{}'.format(args.gpu)) else: device = torch.device("npu") #elif torch.backends.mps.is_available(): #device = torch.device("mps") else: device = torch.device("cpu")
修改后代码如下:
if torch_npu.npu.is_available(): if args.gpu: device=torch.device('npu:{}'.format(args.rank)) else: device = torch.device("npu") #elif torch.backends.mps.is_available(): # device = torch.device("mps") else: device = torch.device("cpu")
RANK_ID_START=0 RANK_SIZE=8 for((RANK_ID=$RANK_ID_START;RANK_ID<$((RANK_SIZE+RANK_ID_START));RANK_ID++)); do KERNEL_NUM=$(($(nproc)/8)) PID_START=$((KERNEL_NUM * RANK_ID)) PID_END=$((PID_START + KERNEL_NUM - 1)) #以下拉起训练的代码需根据实际修改。**为端口号,根据实际选择一个闲置端口填写。 nohup taskset -c $PID_START-$PID_END python3 xxx.py ${Dataset_Path} --lr 0.8 --arch resnet50 --dist-url 'tcp://127.0.0.1:**' --dist-backend 'hccl' --world-size 8 --batch-size 1024 --epochs 20 --rank $RANK_ID -j ${KERNEL_NUM} --local_rank $RANK_ID & done