同步推理

import sys
import numpy as np
from mindieclient.python.httpclient import Input, RequestedOutput
from utils import create_client
from mindieclient.python.common import Log

logger = Log(__name__).getlog()

if __name__ == "__main__":
    # get argument and create client
    try:
        mindie_client = create_client()
    except Exception as e:
        logger.exception("Client Creation falied!")
        sys.exit(1)
    # create input and requested output
    inputs = []
    outputs = []
    input_data = np.arange(start=0, stop=16, dtype=np.uint32)
    input_data = np.expand_dims(input_data, axis=0)
    inputs.append(Input("INPUT0", [1, 16], "UINT32"))
    inputs[0].initialize_data(input_data)
    outputs.append(RequestedOutput("OUTPUT0"))
    # apply model inference
    model_name = "llama_65b"
    results = mindie_client.infer(
        model_name,
        inputs,
        outputs=outputs,
    )
    logger.info(results.get_response())
    output_data = results.retrieve_output_name_to_numpy("OUTPUT0")
    logger.info("input_data: %s", np.array2string(input_data))
    logger.info("output_data: %s", np.array2string(output_data))