运行仓上模型

  1. 下载原始代码仓:

    git clone https://gitee.com/ascend/AscendSpeed
    cd AscendSpeed 
    mkdir logs
    mkdir ckpt

  2. 准备环境:

    conda create -n (conda环境名称) python=3.7
    conda activate (conda环境名称)
    下载并安装torch及torch_npu:
    # 安装torch:
    pip install torch==1.11 -i https://pypi.tuna.tsinghua.edu.cn/simple
    # 安装torch_npu:
    wget https://gitee.com/ascend/pytorch/releases/download/v5.0.rc2-pytorch1.11.0/torch_npu-1.11.0.post1-cp37-cp37m-linux_aarch64.whl (ARM)
    wget https://gitee.com/ascend/pytorch/releases/download/v5.0.rc2-pytorch1.11.0/torch_npu-1.11.0.post1-cp37-cp37m-linux_x86_64.whl (X86)
    pip install torch_npu-1.11.0.post1-cp37-cp37m-linux_XXXXXX.whl
    安装deepspeed与deepspeed_npu:
    pip install deepspeed==0.9.2
    git clone https://gitee.com/ascend/DeepSpeed.git -b v0.9.2 deepspeed_npu
    cd deepspeed_npu
    pip3 install -e .
    安装其他所需要的依赖:
    pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple

  3. 获取数据集:

    wget http://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json
    mkdir dataset
    # 执行preprocess_data.py
    python tools/preprocess_data.py \
                --input alpaca_data.json \
                --output-prefix dataset/alpaca \
                --tokenizer-type PretrainedFromHF \
                --tokenizer-name-or-path llama-7b-hf \
                --tokenizer-not-use-fast \
                --handler-name GeneralInstructionHandler                

  4. 启动任务:

    sh examples/llama/pretrain_llama_ptd_16B.sh