import sys import numpy as np from mindieclient.python.httpclient import Input, RequestedOutput from utils import create_client from mindieclient.python.common import Log logger = Log(__name__).getlog() if __name__ == "__main__": # get argument and create client request_count = 2 try: mindie_client = create_client(request_count) except Exception as e: logger.exception("Client Creation falied!") sys.exit(1) # create input and requested output inputs = [] outputs = [] input_data = np.arange(start=0, stop=16, dtype=np.uint32) input_data = np.expand_dims(input_data, axis=0) inputs.append(Input("INPUT0", [1, 16], "UINT32")) inputs[0].initialize_data(input_data) outputs.append(RequestedOutput("OUTPUT0")) # apply async inference model_name = "llama_65b" async_requests = [] for _ in range(request_count): async_requests.append( mindie_client.async_infer( model_name, inputs, outputs=outputs, ) ) # get_result for async_request in async_requests: result = async_request.get_result() logger.info(result.get_response()) output_data = result.retrieve_output_name_to_numpy("OUTPUT0") logger.info("output_data: %s", output_data)