推理模型

用户可以从LynSDK提供的示例代码中查看完整样例,在示例代码中,调用各接口后都添加了异常判断和处理,以下是关键步骤代码示例,仅供参考,不可以直接拷贝编译。

lynError_t ret = 0;
lynModel_t model = LYN_FAKE_MODEL_HANDLE;
int32_t devID = 0;
lynContext_t context = nullptr;
lynStream_t stream = nullptr;

const lynModelDesc_t *modelDesc = nullptr;
uint8_t* hostIn = nullptr;
void * devinptr = nullptr;
uint8_t *hostOut= nullptr;
void * devoutptr = nullptr;

//模型推理 样本批大小
uint32_t batchSize = 1;
constexpr uint32_t RUN_INFER_TIMES = 5;

//Runtime资源申请
ERROR_RET(lynCreateContext(&context, devID));
ERROR_RET(lynCreateStream(&stream));

//加载模型
ERROR_RET(lynLoadModel(modelPath, &model));

//获取模型描述信息
ERROR_RET(lynModelGetDesc(model, &modelDesc));

//初始化输入数据
hostIn = (uint8_t*)malloc(modelDesc->inputDataLen * batchSize);//client ptr
for (size_t inputLoop = 0;inputLoop < modelDesc->inputDataLen;inputLoop++){
    hostIn[inputLoop] = inputLoop;
}

//分配Server侧内存并将待推理数据从Client侧拷贝至Server侧
ERROR_RET(lynMalloc(&devinptr, modelDesc->inputDataLen * batchSize));//server ptr
ERROR_RET(lynMemcpy(devinptr,hostIn,modelDesc->inputDataLen,ClientToServer));//server ptr

hostOut = (uint8_t*)malloc(modelDesc->outputDataLen * batchSize);

//分配模型推理输出
ERROR_RET(lynMalloc(&devoutptr, modelDesc->outputDataLen * batchSize));

//执行异步推理、同步等待并将推理结果拷贝至Client侧
for (uint32_t run = 0;run < RUN_INFER_TIMES;run++) {
    ERROR_RET(lynExecuteModelAsync(stream,model,devinptr,devoutptr,modelDesc->inputTensorAttrArray[0].batchSize));
    ERROR_RET(lynSynchronizeStream(stream));
    ERROR_RET(lynMemcpy(hostOut,devoutptr,modelDesc->outputDataLen, ServerToClient));
}

//内存回收和runtime资源销毁
if (devinptr) {lynFree(devinptr); devinptr=nullptr;}
if (devoutptr) {lynFree(devoutptr); devoutptr=nullptr;}
if (hostIn) {free(hostIn);hostIn=nullptr;}
if (hostOut) {free(hostOut);hostOut=nullptr;}
if (model !=LYN_FAKE_MODEL_HANDLE) {lynUnloadModel(model); model=LYN_FAKE_MODEL_HANDLE;}
if (stream) {lynDestroyStream(stream); stream = nullptr;}
if (context) {lynDestroyContext(context); context = nullptr;}