cd {PATH_TO_MEGATRON_LM} bash tests_extend/system_tests/pretrain_llama.sh
--tensor-model-parallel-size # 张量并行
--pipeline-model-parallel-size # 流水线并行
--num-layers-per-virtual-pipeline-stage 2 # 每个虚拟流水线阶段的层数为2
--sequence-parallel # 序列并行