#include "VPTProcess.h" #include "helpers/cuda_helper.h" #include "helpers/logger.hpp" #include "../ai_platform/task_param_manager.h" #ifndef _MSC_VER #include #endif #include #include #include #include "ErrorInfo.h" #include "fstream" #include #include "vpt.h" #include "opencv2/opencv.hpp" typedef struct objDetector { void* det_handle; float threshold; int max_batchsize; map taskTrackers; objDetector() { det_handle = NULL; threshold = 0.6; } } objDetector; //cv::VideoWriter face_det_writer; /* 算法初始化 */ int VPT_Init(void *&handle, VPTProcess_PARAM vparam) { objDetector *tools = new objDetector; vpt_param param; param.mode = DEVICE_GPU; param.gpuid = vparam.gpuid; param.threshold = vparam.threshold; param.engine = ENGINE_TENSORRT; param.auth_license = vparam.auth_license; param.preprocess_param = "CopyData_CPU2GPU_U8;" "TypeConvert_U8_F32;" "ResizeMaxMidPad_F32_F32,test_size,640,test_max_size,640,max_height,640,max_width,640," "submean_b,0,submean_g,0,submean_r,0," "variance_rev_b,0.00392,variance_rev_g,0.00392,variance_rev_r,0.00392;" "BGR2RGB_F32_F32;" "NHWC2NCHW_F32" ; param.serialize_file = vparam.serialize_file; param.max_batch = vparam.max_batch; tools->max_batchsize = vparam.max_batch; int flag = vpt_init(&(tools->det_handle), param); if (SUCCESS != flag) { if (tools) { delete tools; tools = NULL; } } else { handle = tools; } return flag; } /* 算法计算 */ int VPT_Process_GPU(void * handle, sy_img * batch_img, int batchsize, vector& result, vector>& deleteObjectID, vector>& unUsedResult) { if (result.empty()) result.resize(batchsize); objDetector *tools = (objDetector*)handle; bool isUseDet = true; int channels = 3; /* 结果结构体初始化 */ vpt_result *vpt_det_result = new vpt_result[batchsize]; for (int b = 0; b < batchsize; b++) { vpt_det_result[b].obj_count_ = 0; vpt_det_result[b].obj_results_ = new vpt_obj_result[MAX_DET_COUNT]; } /* 路数太多时按照最大batchsize数拆批次运行 */ int cur_batch_size = tools->max_batchsize; int cycleTimes = batchsize / cur_batch_size + (batchsize % cur_batch_size == 0 ? 0 : 1); for (int c = 0; c < cycleTimes; c++) { int real_batchsize = c == cycleTimes - 1 ? (batchsize - cur_batch_size*c) : cur_batch_size; int startbatch = c*cur_batch_size; vpt_result *real_res = vpt_det_result + startbatch; vpt_batch(tools->det_handle, batch_img + startbatch, real_batchsize, &real_res); #if 0 vector> tempDeleteObjectID; tempDeleteObjectID.resize(batchsize); #endif } vector >> detectResult(batchsize); // sort #if 0 for (int b = 0; b < batchsize; b++) { printf("batch: %d, %d %d %d \n", b, batch_img[b].c_ , batch_img[b].h_ , batch_img[b].w_); int data_size = batch_img[b].c_ * batch_img[b].h_ * batch_img[b].w_; unsigned char *imgdata = new unsigned char[data_size]; cudaMemcpy(imgdata, batch_img[b].data_, sizeof(unsigned char) * data_size, cudaMemcpyDeviceToHost); cv::Mat big_img = cv::Mat(batch_img[b].h_, batch_img[b].w_, CV_8UC3, imgdata); for (int c = 0; c < result[b].obj_count && c < MAX_OBJ_COUNT; c++) { printf("%d %d %d %d\n", vpt_det_result[b].obj_results_[c].obj_rect.left_, vpt_det_result[b].obj_results_[c].obj_rect.top_, vpt_det_result[b].obj_results_[c].obj_rect.width_, vpt_det_result[b].obj_results_[c].obj_rect.height_); cv::rectangle(big_img, cv::Rect(vpt_det_result[b].obj_results_[c].obj_rect.left_, vpt_det_result[b].obj_results_[c].obj_rect.top_, vpt_det_result[b].obj_results_[c].obj_rect.width_, vpt_det_result[b].obj_results_[c].obj_rect.height_), cv::Scalar(158, 52, 254), 3, 1, 0); } face_det_writer << big_img; delete[] imgdata; } #endif /* 将检测的结果放进数组转换为跟踪的输入需要（若为人脸则检测结果可能跟多，比如需要带上ldmk点） */ // filter by threshold. for (int b = 0; b < batchsize; b++) { vpt_result &cur_result = vpt_det_result[b]; for (int c = 0; c < cur_result.obj_count_ && c < MAX_OBJ_COUNT; c++) { float x1 = vpt_det_result[b].obj_results_[c].obj_rect.left_; float y1 = vpt_det_result[b].obj_results_[c].obj_rect.top_; float x2 = vpt_det_result[b].obj_results_[c].obj_rect.left_ + vpt_det_result[b].obj_results_[c].obj_rect.width_; float y2 = vpt_det_result[b].obj_results_[c].obj_rect.top_ + vpt_det_result[b].obj_results_[c].obj_rect.height_; float class_id = vpt_det_result[b].obj_results_[c].obj_index; float score = vpt_det_result[b].obj_results_[c].obj_score; if (score >= THRESHOLD) { vector obj; obj.push_back(x1); obj.push_back(y1); obj.push_back(x2); obj.push_back(y2); obj.push_back(score); obj.push_back(class_id); detectResult[b].push_back(obj); } } } /* 跟踪 */ // Update Tracker Result. int detectIndex = 0; for(auto iter_tracker : tools->taskTrackers) { if (!iter_tracker.second.tracker.GetState()) continue; Sort &cur_sort = tools->taskTrackers[iter_tracker.first].tracker; isUseDet = true; /* FusionInterval是跳帧参数，以十类人车物为例，一般跳5帧，所以第一帧检测，后续四帧纯跟踪 */ for (int j = 0; j < FusionInterval; j++) { /* 跟踪：第一帧带检测框信息的跟踪，取结果返回 */ if (j == 0) { int objCount = cur_sort.update(isUseDet, false, detectResult[detectIndex], result[detectIndex].obj, deleteObjectID[detectIndex]); result[detectIndex].obj_count = objCount; vector>().swap(detectResult[detectIndex]); detectResult[detectIndex].clear(); isUseDet = false; } else /* 跟踪：后四帧纯粹跟踪纯跟踪结果不返回 */ { onelevel_det_result un_result; un_result.obj_count = cur_sort.update(isUseDet, false, detectResult[detectIndex], un_result.obj, deleteObjectID[detectIndex]); } } ++detectIndex; } if(vpt_det_result) { for (int b = 0; b < batchsize; b++) { delete[] vpt_det_result[b].obj_results_; } delete[] vpt_det_result; } // printf("detectIndex:%d det count: %d\n", detectIndex, result[detectIndex-1].obj_count); vector >>().swap(detectResult); // free memory. return SUCCESS; } /* 算法计算 */ int VPT_Process_GPU2(void * handle, sy_img * batch_img, vector& tasklist, vector& result, vector>& deleteObjectID, vector>& unUsedResult) { int batchsize = tasklist.size(); if (result.empty()) result.resize(batchsize); objDetector *tools = (objDetector*)handle; bool isUseDet = true; int channels = 3; /* 结果结构体初始化 */ vpt_result *vpt_det_result = new vpt_result[batchsize]; for (int b = 0; b < batchsize; b++) { vpt_det_result[b].obj_count_ = 0; vpt_det_result[b].obj_results_ = new vpt_obj_result[MAX_DET_COUNT]; } /* 路数太多时按照最大batchsize数拆批次运行 */ int cur_batch_size = tools->max_batchsize; int cycleTimes = batchsize / cur_batch_size + (batchsize % cur_batch_size == 0 ? 0 : 1); for (int c = 0; c < cycleTimes; c++) { int real_batchsize = c == cycleTimes - 1 ? (batchsize - cur_batch_size*c) : cur_batch_size; int startbatch = c*cur_batch_size; vpt_result *real_res = vpt_det_result + startbatch; vpt_batch(tools->det_handle, batch_img + startbatch, real_batchsize, &real_res); #if 0 vector> tempDeleteObjectID; tempDeleteObjectID.resize(batchsize); #endif } vector >> detectResult(batchsize); // sort #if 0 for (int b = 0; b < batchsize; b++) { printf("batch: %d, %d %d %d \n", b, batch_img[b].c_ , batch_img[b].h_ , batch_img[b].w_); int data_size = batch_img[b].c_ * batch_img[b].h_ * batch_img[b].w_; unsigned char *imgdata = new unsigned char[data_size]; cudaMemcpy(imgdata, batch_img[b].data_, sizeof(unsigned char) * data_size, cudaMemcpyDeviceToHost); cv::Mat big_img = cv::Mat(batch_img[b].h_, batch_img[b].w_, CV_8UC3, imgdata); for (int c = 0; c < result[b].obj_count && c < MAX_OBJ_COUNT; c++) { printf("%d %d %d %d\n", vpt_det_result[b].obj_results_[c].obj_rect.left_, vpt_det_result[b].obj_results_[c].obj_rect.top_, vpt_det_result[b].obj_results_[c].obj_rect.width_, vpt_det_result[b].obj_results_[c].obj_rect.height_); cv::rectangle(big_img, cv::Rect(vpt_det_result[b].obj_results_[c].obj_rect.left_, vpt_det_result[b].obj_results_[c].obj_rect.top_, vpt_det_result[b].obj_results_[c].obj_rect.width_, vpt_det_result[b].obj_results_[c].obj_rect.height_), cv::Scalar(158, 52, 254), 3, 1, 0); } face_det_writer << big_img; delete[] imgdata; } #endif /* 将检测的结果放进数组转换为跟踪的输入需要（若为人脸则检测结果可能跟多，比如需要带上ldmk点） */ // filter by threshold. for (int b = 0; b < batchsize; b++) { vpt_result &cur_result = vpt_det_result[b]; for (int c = 0; c < cur_result.obj_count_ && c < MAX_OBJ_COUNT; c++) { float x1 = vpt_det_result[b].obj_results_[c].obj_rect.left_; float y1 = vpt_det_result[b].obj_results_[c].obj_rect.top_; float x2 = vpt_det_result[b].obj_results_[c].obj_rect.left_ + vpt_det_result[b].obj_results_[c].obj_rect.width_; float y2 = vpt_det_result[b].obj_results_[c].obj_rect.top_ + vpt_det_result[b].obj_results_[c].obj_rect.height_; float class_id = vpt_det_result[b].obj_results_[c].obj_index; float score = vpt_det_result[b].obj_results_[c].obj_score; if (score >= THRESHOLD) { vector obj; obj.push_back(x1); obj.push_back(y1); obj.push_back(x2); obj.push_back(y2); obj.push_back(score); obj.push_back(class_id); detectResult[b].push_back(obj); } } } for (size_t detectIndex = 0; detectIndex < batchsize; detectIndex++) { string task_id = tasklist[detectIndex]; if (! tools->taskTrackers[task_id].tracker.GetState()) continue; Sort &cur_sort = tools->taskTrackers[task_id].tracker; isUseDet = true; /* FusionInterval是跳帧参数，以十类人车物为例，一般跳5帧，所以第一帧检测，后续四帧纯跟踪 */ for (int j = 0; j < FusionInterval; j++) { /* 跟踪：第一帧带检测框信息的跟踪，取结果返回 */ if (j == 0) { int objCount = cur_sort.update(isUseDet, false, detectResult[detectIndex], result[detectIndex].obj, deleteObjectID[detectIndex]); result[detectIndex].obj_count = objCount; vector>().swap(detectResult[detectIndex]); detectResult[detectIndex].clear(); isUseDet = false; } else /* 跟踪：后四帧纯粹跟踪纯跟踪结果不返回 */ { onelevel_det_result un_result; un_result.obj_count = cur_sort.update(isUseDet, false, detectResult[detectIndex], un_result.obj, deleteObjectID[detectIndex]); } } } if(vpt_det_result) { for (int b = 0; b < batchsize; b++) { delete[] vpt_det_result[b].obj_results_; } delete[] vpt_det_result; } // printf("detectIndex:%d det count: %d\n", detectIndex, result[detectIndex-1].obj_count); vector >>().swap(detectResult); // free memory. return SUCCESS; } /* 算法句柄资源释放 */ void VPT_Release(void * handle) { objDetector *tools = (objDetector*)handle; if (tools) { if (tools->det_handle) { // ctools_release(&tools->det_handle); vpt_release(&tools->det_handle); tools->det_handle = NULL; } //vector().swap(tools->taskTrackers); delete tools; tools = NULL; } } /* 任务添加跟踪器 */ void AddTaskTracker(void * handle, const string taskID, double rWidth, double rHeight) { objDetector *tools = (objDetector*)handle; TaskTracker t; t.TaskID = taskID; t.ratioWidth = rWidth; t.ratioHeight = rHeight; tools->taskTrackers[taskID] = t; } /* 任务结束跟踪器 */ bool FinishTaskTracker(void * handle, const string taskID) { objDetector *tools = (objDetector*)handle; tools->taskTrackers.erase(taskID); return true; } /* 任务暂停跟踪器 */ void PauseTaskTracker(void * handle, const string taskID) { objDetector *tools = (objDetector*)handle; tools->taskTrackers[taskID].tracker.Pause(); } /* 任务重新开启跟踪器 */ void RestartTaskTracker(void * handle, const string taskID) { objDetector *tools = (objDetector*)handle; tools->taskTrackers[taskID].tracker.ReSet(); } /* void DrawTracker(void * handle, const string taskID, cv::Mat *img) { objDetector *tools = (objDetector*)handle; for (int i = 0; i < tools->taskTrackers.size(); i++) { if (strcmp((tools->taskTrackers[i].TaskID).c_str(), taskID.c_str()) == 0) { tools->taskTrackers[i].tracker.addTracker(img); break; } } } */ void permute(float * image, int testWidth, int testHeight) { //cv::Mat host_image; float * host_image; //host_image.create(testHeight, testWidth, CV_32FC3); host_image = (float *)malloc(testHeight*testWidth * 3 * sizeof(float));; float *Host_img = new float[3 * testWidth * testHeight]{};//?????ڴ? float* image_data_original = image; CHECK(cudaMemcpy(Host_img, image_data_original, testWidth*testHeight * 3 * sizeof(float), cudaMemcpyDeviceToHost)); for (int j = 0; j < testHeight; j++) { float *pts = host_image + j * testWidth * 3; for (int i = 0; i < testWidth; i++) { //pts[3 * i] = cv::saturate_cast(Host_img[3 * (j*host_image.cols + i) + 0]); //b //pts[3 * i + 1] = cv::saturate_cast(Host_img[3 * (j*host_image.cols + i) + 1]); //g //pts[3 * i + 2] = cv::saturate_cast(Host_img[3 * (j*host_image.cols + i) + 2]); //r pts[3 * i] = (Host_img[j * testWidth + i]); //b pts[3 * i + 1] = (Host_img[testWidth * testHeight + j * testWidth + i]); //g pts[3 * i + 2] = (Host_img[2 * testWidth * testHeight + j * testWidth + i]); //r } } cudaMemcpy(image_data_original, host_image, testWidth*testHeight * 3 * sizeof(float), cudaMemcpyHostToDevice); free(host_image); //cv::Mat showImg; //cv::resize(host_image, showImg, cv::Size(640, 480)); //cv::imshow("image", showImg); //cv::waitKey(0); } cv::Mat GpuMat2OpencvMat(unsigned char* image, int width, int height) { int testWidth = width; int testHeight = height; cv::Mat host_image; host_image.create(testHeight, testWidth, CV_8UC3); unsigned char *Host_img = new unsigned char[3 * testWidth * testHeight]{};//?????ڴ? unsigned char* image_data_original = image; cudaError_t code = cudaMemcpy(Host_img, image_data_original, testWidth*testHeight * 3 * sizeof(unsigned char), cudaMemcpyDeviceToHost);//?????Կ???????ͼ???????? if (code != 0) { printf("==========================================================error"); } std::ofstream outfile("decode.bin", ios::out | ios::binary); outfile.write((char*)Host_img, int(sizeof(char) * 1080 * 1920 * 3)); outfile.close(); cudaMemcpy(host_image.data, image_data_original, 1920 * testHeight * 3 * sizeof(unsigned char), cudaMemcpyDeviceToHost);//?????Կ???????ͼ???????? // for (int j = 0; j < host_image.rows; j++) // { // uchar *pts = host_image.ptr(j); // for (int i = 0; i < host_image.cols; i++) // { // //pts[3 * i] = cv::saturate_cast(Host_img[3 * (j*host_image.cols + i) + 0]); //b // //pts[3 * i + 1] = cv::saturate_cast(Host_img[3 * (j*host_image.cols + i) + 1]); //g // //pts[3 * i + 2] = cv::saturate_cast(Host_img[3 * (j*host_image.cols + i) + 2]); //r // pts[3 * i] = cv::saturate_cast(Host_img[j* host_image.cols*3 + 3 * i]); //b // pts[3 * i + 1] = cv::saturate_cast(Host_img[j* host_image.cols*3 + 3 * i + 1]); //g // pts[3 * i + 2] = cv::saturate_cast(Host_img[j* host_image.cols*3 + 3 * i + 2]); //r // } // } cv::imwrite("input3.jpg", host_image); return host_image; }