curl -H "Accept: application/json" -H "Content-type: application/json" --cacert ca.pem --cert client.pem --key client.key.pem -X POST -d '{ "id": "42", "inputs": [{ "name": "input0", "shape": [ 1, 10 ], "datatype": "UINT32", "data": [ 396, 319, 13996, 29877, 29901, 29907, 3333, 20718, 316, 23924 ], "parameters": { "temperature": 0.5, "top_k": 10, "top_p": 0.95, "do_sample": true, "seed": null, "repetition_penalty": 1.03, "max_new_tokens": 512 } }], "outputs": [{ "name": "output0" }] }' https://127.0.0.1:1025/v2/models/llama_65b/infer
curl -H "Accept: application/json" -H "Content-type: application/json" --cacert ca.pem --cert client.pem --key client.key.pem -X POST -d '{ "id":"a123", "text_input": "My name is Olivier and I", "parameters": { "details": true, "do_sample": true, "max_new_tokens":200, "repetition_penalty": 1.1, "seed": 123, "temperature": 1, "top_k": 2147483647, "top_p": 0.99, "batch_size":100 } }' https://127.0.0.1:1025/v2/models/llama_65b/generate
curl -H "Accept: application/json" -H "Content-type: application/json" --cacert ca.pem --cert client.pem --key client.key.pem -X POST -d '{ "id":"a123", "text_input": "My name is Olivier and I", "parameters": { "details": true, "do_sample": true, "max_new_tokens":200, "repetition_penalty": 1.1, "seed": 123, "temperature": 1, "top_k": 2147483647, "top_p": 0.99, "batch_size":100 } }' https://127.0.0.1:1025/v2/models/llama_65b/generate_stream
其他接口请参见兼容OpenAI接口章节。
也可以使用MindIE Client的Python接口来进行推理,例如文本推理。
首先需要用户创建一个MindIE Client,将该文件命名为utils.py,后续可持续使用该方法。
import argparse from mindieclient.python.httpclient import MindIEHTTPClient def create_client(request_count=1): # get argument parser = argparse.ArgumentParser() parser.add_argument( "-u", "--url", required=False, default="https://127.0.0.1:1025", help="MindIE-Server URL.", ) parser.add_argument( "-v", "--verbose", action="store_true", required=False, default=True, help="Enable detailed information output.", ) parser.add_argument( "-s", "--ssl", action="store_true", required=False, default=False, help="Enable encrypted link with https", ) parser.add_argument( "-ca", "--ca_certs", required=False, default="ca.pem", help="Provide https ca certificate.", ) parser.add_argument( "-key", "--key_file", required=False, default="client.key.pem", help="Provide https client certificate.", ) parser.add_argument( "-cert", "--cert_file", required=False, default="client.pem", help="Provide https client keyfile.", ) args = parser.parse_args() # create client try: if args.ssl: ssl_options = {} if args.ca_certs is not None: ssl_options["ca_certs"] = args.ca_certs if args.key_file is not None: ssl_options["keyfile"] = args.key_file if args.cert_file is not None: ssl_options["certfile"] = args.cert_file mindie_client = MindIEHTTPClient( url=args.url, verbose=args.verbose, enable_ssl=True, ssl_options=ssl_options, concurrency=request_count, ) else: mindie_client = MindIEHTTPClient( url=args.url, verbose=args.verbose, concurrency=request_count ) except Exception as e: raise e return mindie_client
之后创建文件,调用上述create_client方法,即可调用文本推理接口。
from utils import create_client if __name__ == "__main__": # get argument and create client try: mindie_client = create_client() except Exception as e: print("Client Creation falied!") sys.exit(1) # create input prompt = "My name is Olivier and I" model_name = "llama_65b" parameters = { "do_sample": True, "temperature": 0.5, "top_k": 10, "top_p": 0.9, "truncate": 5, "typical_p": 0.9, "seed": 1, "repetition_penalty": 1, "watermark": True, "details": True, } # apply model inference result = mindie_client.generate( model_name, prompt, request_id="1", parameters=parameters, ) print(result.get_response())
其他MindIE Client接口请参见class MindIEHTTPClient章节。