执行样例前请使用以下命令配置环境变量。
source /usr/local/Ascend/ascend-toolkit/set_env.sh # CANN
source /usr/local/Ascend/nnal/atb/set_env.sh # ATB
source /usr/local/Ascend/llm_model/set_env.sh # ATB Models
source /usr/local/Ascend/mindie/set_env.sh # MindIE
SMPL_PARAM='{"temperature":0.5,"top_k":10,"top_p":0.9,"seed":1234,"repetition_penalty":1}' benchmark \ --DatasetPath "/{数据集路径}/token_gsm8k_model.csv" \ --DatasetType gsm8k \ --ModelName llama_7b \ --ModelPath "/{模型权重路径}/llama_7b" \ --TestType client \ --Http https://{ipAddress}:{port} \ --ManagementHttp https://{managementIpAddress}:{managementPort} \ --Concurrency 128 \ --TaskKind stream_token \ --Tokenizer False \ --MaxOutputLen 512 \ --DoSampling True \ --SamplingParams $SMPL_PARAM
benchmark \ --DatasetPath "/{数据集路径}/token_gsm8k_model.csv" \ --DatasetType "gsm8k" \ --ModelName llama_7b \ --ModelPath "/{模型权重路径}/llama_7b" \ --TestType client \ --Http https://{ipAddress}:{port} \ --ManagementHttp https://{managementIpAddress}:{managementPort} \ --Concurrency 128 \ --TaskKind stream_token \ --Tokenizer False \ --MaxOutputLen 512