使用兼容Triton接口

其他接口请参见兼容OpenAI接口章节。

也可以使用MindIE Client的Python接口来进行推理,例如文本推理。

首先需要用户创建一个MindIE Client,将该文件命名为utils.py,后续可持续使用该方法。

import argparse
from mindieclient.python.httpclient import MindIEHTTPClient
def create_client(request_count=1):
    # get argument
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-u",
        "--url",
        required=False,
        default="https://127.0.0.1:1025",
        help="MindIE-Server URL.",
    )
    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        required=False,
        default=True,
        help="Enable detailed information output.",
    )
    parser.add_argument(
        "-s",
        "--ssl",
        action="store_true",
        required=False,
        default=False,
        help="Enable encrypted link with https",
    )
    parser.add_argument(
        "-ca",
        "--ca_certs",
        required=False,
        default="ca.pem",
        help="Provide https ca certificate.",
    )
    parser.add_argument(
        "-key",
        "--key_file",
        required=False,
        default="client.key.pem",
        help="Provide https client certificate.",
    )
    parser.add_argument(
        "-cert",
        "--cert_file",
        required=False,
        default="client.pem",
        help="Provide https client keyfile.",
    )
    args = parser.parse_args()
    # create client
    try:
        if args.ssl:
            ssl_options = {}
            if args.ca_certs is not None:
                ssl_options["ca_certs"] = args.ca_certs
            if args.key_file is not None:
                ssl_options["keyfile"] = args.key_file
            if args.cert_file is not None:
                ssl_options["certfile"] = args.cert_file
            mindie_client = MindIEHTTPClient(
                url=args.url,
                verbose=args.verbose,
                enable_ssl=True,
                ssl_options=ssl_options,
                concurrency=request_count,
            )
        else:
            mindie_client = MindIEHTTPClient(
                url=args.url, verbose=args.verbose, concurrency=request_count
            )
    except Exception as e:
        raise e
    return mindie_client

之后创建文件,调用上述create_client方法,即可调用文本推理接口。

from utils import create_client
if __name__ == "__main__":
    # get argument and create client
    try:
        mindie_client = create_client()
    except Exception as e:
        print("Client Creation falied!")
        sys.exit(1)
    # create input
    prompt = "My name is Olivier and I"
    model_name = "llama_65b"
    parameters = {
        "do_sample": True,
        "temperature": 0.5,
        "top_k": 10,
        "top_p": 0.9,
        "truncate": 5,
        "typical_p": 0.9,
        "seed": 1,
        "repetition_penalty": 1,
        "watermark": True,
        "details": True,
    }
    # apply model inference
    result = mindie_client.generate(
        model_name,
        prompt,
        request_id="1",
        parameters=parameters,
    )
    print(result.get_response())

其他MindIE Client接口请参见class MindIEHTTPClient章节。