下载原始代码仓：

git clone https://gitee.com/ascend/AscendSpeed
cd AscendSpeed 
mkdir logs
mkdir ckpt

准备环境：

conda create -n (conda环境名称) python=3.7
conda activate (conda环境名称)

下载并安装torch及torch_npu：

# 安装torch：
pip install torch==1.11 -i https://pypi.tuna.tsinghua.edu.cn/simple
# 安装torch_npu:
wget https://gitee.com/ascend/pytorch/releases/download/v5.0.rc2-pytorch1.11.0/torch_npu-1.11.0.post1-cp37-cp37m-linux_aarch64.whl (ARM)
wget https://gitee.com/ascend/pytorch/releases/download/v5.0.rc2-pytorch1.11.0/torch_npu-1.11.0.post1-cp37-cp37m-linux_x86_64.whl (X86)
pip install torch_npu-1.11.0.post1-cp37-cp37m-linux_XXXXXX.whl

安装deepspeed与deepspeed_npu：

pip install deepspeed==0.9.2
git clone https://gitee.com/ascend/DeepSpeed.git -b v0.9.2 deepspeed_npu
cd deepspeed_npu
pip3 install -e .

安装其他所需要的依赖：

pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple

获取数据集：

wget http://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json
mkdir dataset
# 执行preprocess_data.py
python tools/preprocess_data.py \
            --input alpaca_data.json \
            --output-prefix dataset/alpaca \
            --tokenizer-type PretrainedFromHF \
            --tokenizer-name-or-path llama-7b-hf \
            --tokenizer-not-use-fast \
            --handler-name GeneralInstructionHandler

启动任务：

sh examples/llama/pretrain_llama_ptd_16B.sh

运行仓上模型