VPTProcess.cpp 16.3 KB

Edit Raw Blame History

#include "VPTProcess.h"
#include "helpers/cuda_helper.h"
#include "helpers/logger.hpp"
#include "../ai_platform/task_param_manager.h"
#ifndef _MSC_VER
#include <sys/time.h>
#endif
#include <stdlib.h>
#include <cuda_runtime.h>
#include <time.h>
#include "ErrorInfo.h"
#include "fstream"
#include <boost/thread/thread.hpp>
#include "vpt.h"
#include "opencv2/opencv.hpp"
typedef struct objDetector {

	void* det_handle;
	float threshold;
	int max_batchsize;
	map<string, TaskTracker> taskTrackers;
	objDetector()
	{
		det_handle = NULL;
		threshold = 0.6;
	}
} objDetector;

//cv::VideoWriter face_det_writer;
/* 算法初始化 */
int VPT_Init(void *&handle, VPTProcess_PARAM vparam)
{
	objDetector *tools = new objDetector;

	vpt_param param;
	param.mode = DEVICE_GPU;
	param.gpuid = vparam.gpuid;
	param.threshold = vparam.threshold;
	param.engine = ENGINE_TENSORRT;
	param.auth_license = vparam.auth_license;
	param.preprocess_param =
		"CopyData_CPU2GPU_U8;"
		"TypeConvert_U8_F32;"
		"ResizeMaxMidPad_F32_F32,test_size,640,test_max_size,640,max_height,640,max_width,640,"
		"submean_b,0,submean_g,0,submean_r,0,"
		"variance_rev_b,0.00392,variance_rev_g,0.00392,variance_rev_r,0.00392;"
		"BGR2RGB_F32_F32;"
		"NHWC2NCHW_F32"
		;

	param.serialize_file = vparam.serialize_file;
	param.max_batch = vparam.max_batch;
	tools->max_batchsize = vparam.max_batch;

	int flag = vpt_init(&(tools->det_handle), param);

	if (SUCCESS != flag)
	{
		if (tools)
		{
			delete tools;
			tools = NULL;
		}
	}
	else
	{
		handle = tools;
	}
	return flag;
}

/* 算法计算 */
int VPT_Process_GPU(void * handle, sy_img * batch_img, int batchsize,
					vector<onelevel_det_result>& result, vector<vector<int>>& deleteObjectID, vector<vector<onelevel_det_result>>& unUsedResult)
{
	if (result.empty())
		result.resize(batchsize);

	objDetector *tools = (objDetector*)handle;

	bool isUseDet = true;
	int channels = 3;

	/* 结果结构体初始化 */
	vpt_result *vpt_det_result = new vpt_result[batchsize];
	for (int b = 0; b < batchsize; b++)
	{
		vpt_det_result[b].obj_count_ = 0;
		vpt_det_result[b].obj_results_ = new vpt_obj_result[MAX_DET_COUNT];
	}

	/* 路数太多时 按照最大batchsize数 拆批次运行 */
	int cur_batch_size = tools->max_batchsize;
	int cycleTimes = batchsize / cur_batch_size + (batchsize % cur_batch_size == 0 ? 0 : 1);

	for (int c = 0; c < cycleTimes; c++)
	{
		int real_batchsize = c == cycleTimes - 1 ? (batchsize - cur_batch_size*c) : cur_batch_size;
		int startbatch = c*cur_batch_size;

		vpt_result *real_res = vpt_det_result + startbatch;
		vpt_batch(tools->det_handle, batch_img + startbatch, real_batchsize, &real_res);

#if 0
		vector<vector<int>> tempDeleteObjectID;
		tempDeleteObjectID.resize(batchsize);
#endif
	}

	vector <vector< vector <float>>> detectResult(batchsize);  // sort
#if 0
	for (int b = 0; b < batchsize; b++)
	{
		printf("batch: %d, %d %d %d \n", b, batch_img[b].c_ , batch_img[b].h_ , batch_img[b].w_);
		int data_size = batch_img[b].c_ * batch_img[b].h_ * batch_img[b].w_;
		unsigned char *imgdata = new unsigned char[data_size];
		cudaMemcpy(imgdata, batch_img[b].data_, sizeof(unsigned char) * data_size, cudaMemcpyDeviceToHost);
		cv::Mat big_img = cv::Mat(batch_img[b].h_, batch_img[b].w_, CV_8UC3, imgdata);

		for (int c = 0; c < result[b].obj_count && c < MAX_OBJ_COUNT; c++)
		{
			printf("%d %d %d %d\n", vpt_det_result[b].obj_results_[c].obj_rect.left_, vpt_det_result[b].obj_results_[c].obj_rect.top_,
				vpt_det_result[b].obj_results_[c].obj_rect.width_,
				vpt_det_result[b].obj_results_[c].obj_rect.height_);
			cv::rectangle(big_img, cv::Rect(vpt_det_result[b].obj_results_[c].obj_rect.left_, vpt_det_result[b].obj_results_[c].obj_rect.top_,
				vpt_det_result[b].obj_results_[c].obj_rect.width_,
				vpt_det_result[b].obj_results_[c].obj_rect.height_), cv::Scalar(158, 52, 254), 3, 1, 0);
		}

		face_det_writer << big_img;
		delete[] imgdata;
	}
#endif
	/* 将检测的结果放进数组 转换为跟踪的输入需要（若为人脸 则检测结果可能跟多，比如需要带上ldmk点） */
	// filter by threshold.
	for (int b = 0; b < batchsize; b++)
	{
		vpt_result &cur_result = vpt_det_result[b];

		for (int c = 0; c < cur_result.obj_count_ && c < MAX_OBJ_COUNT; c++)
		{
			float x1 = vpt_det_result[b].obj_results_[c].obj_rect.left_;
			float y1 = vpt_det_result[b].obj_results_[c].obj_rect.top_;
			float x2 = vpt_det_result[b].obj_results_[c].obj_rect.left_ + vpt_det_result[b].obj_results_[c].obj_rect.width_;
			float y2 = vpt_det_result[b].obj_results_[c].obj_rect.top_ + vpt_det_result[b].obj_results_[c].obj_rect.height_;

			float class_id = vpt_det_result[b].obj_results_[c].obj_index;
			float score = vpt_det_result[b].obj_results_[c].obj_score;

			if (score >= THRESHOLD)
			{
				vector <float> obj;
				obj.push_back(x1);
				obj.push_back(y1);
				obj.push_back(x2);
				obj.push_back(y2);
				obj.push_back(score);
				obj.push_back(class_id);
				detectResult[b].push_back(obj);
			}
		}
	}

	/* 跟踪 */
	// Update Tracker Result.
	int detectIndex = 0;
	for(auto iter_tracker : tools->taskTrackers)
	{
		if (!iter_tracker.second.tracker.GetState())
			continue;

	    Sort &cur_sort = tools->taskTrackers[iter_tracker.first].tracker;
		isUseDet = true;

		/* FusionInterval是跳帧参数，以十类人车物为例，一般跳5帧，所以第一帧检测，后续四帧纯跟踪 */
		for (int j = 0; j < FusionInterval; j++)
		{
			/* 跟踪：第一帧 带检测框信息的跟踪，取结果返回 */
			if (j == 0)
			{
				int objCount = cur_sort.update(isUseDet, false, detectResult[detectIndex], result[detectIndex].obj, deleteObjectID[detectIndex]);
				result[detectIndex].obj_count = objCount;

				vector<vector<float>>().swap(detectResult[detectIndex]);
				detectResult[detectIndex].clear();
				isUseDet = false;
			}
			else  /* 跟踪：后四帧 纯粹跟踪 纯跟踪结果不返回 */
			{
				onelevel_det_result un_result;
				un_result.obj_count = cur_sort.update(isUseDet, false, detectResult[detectIndex], un_result.obj, deleteObjectID[detectIndex]);
			}
		}
		++detectIndex;
	}


	if(vpt_det_result)
	{
		for (int b = 0; b < batchsize; b++)
		{
			delete[] vpt_det_result[b].obj_results_;
		}
		delete[] vpt_det_result;
	}

	// printf("detectIndex:%d det count: %d\n", detectIndex, result[detectIndex-1].obj_count);
	vector <vector< vector <float>>>().swap(detectResult);  // free memory.
	return SUCCESS;
}

/* 算法计算 */
int VPT_Process_GPU2(void * handle, sy_img * batch_img, vector<string>& tasklist,
					vector<onelevel_det_result>& result, vector<vector<int>>& deleteObjectID, vector<vector<onelevel_det_result>>& unUsedResult)
{
	int batchsize = tasklist.size();

	if (result.empty())
		result.resize(batchsize);

	objDetector *tools = (objDetector*)handle;

	bool isUseDet = true;
	int channels = 3;

	/* 结果结构体初始化 */
	vpt_result *vpt_det_result = new vpt_result[batchsize];
	for (int b = 0; b < batchsize; b++)
	{
		vpt_det_result[b].obj_count_ = 0;
		vpt_det_result[b].obj_results_ = new vpt_obj_result[MAX_DET_COUNT];
	}

	/* 路数太多时 按照最大batchsize数 拆批次运行 */
	int cur_batch_size = tools->max_batchsize;
	int cycleTimes = batchsize / cur_batch_size + (batchsize % cur_batch_size == 0 ? 0 : 1);

	for (int c = 0; c < cycleTimes; c++)
	{
		int real_batchsize = c == cycleTimes - 1 ? (batchsize - cur_batch_size*c) : cur_batch_size;
		int startbatch = c*cur_batch_size;

		vpt_result *real_res = vpt_det_result + startbatch;
		vpt_batch(tools->det_handle, batch_img + startbatch, real_batchsize, &real_res);

#if 0
		vector<vector<int>> tempDeleteObjectID;
		tempDeleteObjectID.resize(batchsize);
#endif
	}

	vector <vector< vector <float>>> detectResult(batchsize);  // sort
#if 0
	for (int b = 0; b < batchsize; b++)
	{
		printf("batch: %d, %d %d %d \n", b, batch_img[b].c_ , batch_img[b].h_ , batch_img[b].w_);
		int data_size = batch_img[b].c_ * batch_img[b].h_ * batch_img[b].w_;
		unsigned char *imgdata = new unsigned char[data_size];
		cudaMemcpy(imgdata, batch_img[b].data_, sizeof(unsigned char) * data_size, cudaMemcpyDeviceToHost);
		cv::Mat big_img = cv::Mat(batch_img[b].h_, batch_img[b].w_, CV_8UC3, imgdata);

		for (int c = 0; c < result[b].obj_count && c < MAX_OBJ_COUNT; c++)
		{
			printf("%d %d %d %d\n", vpt_det_result[b].obj_results_[c].obj_rect.left_, vpt_det_result[b].obj_results_[c].obj_rect.top_,
				vpt_det_result[b].obj_results_[c].obj_rect.width_,
				vpt_det_result[b].obj_results_[c].obj_rect.height_);
			cv::rectangle(big_img, cv::Rect(vpt_det_result[b].obj_results_[c].obj_rect.left_, vpt_det_result[b].obj_results_[c].obj_rect.top_,
				vpt_det_result[b].obj_results_[c].obj_rect.width_,
				vpt_det_result[b].obj_results_[c].obj_rect.height_), cv::Scalar(158, 52, 254), 3, 1, 0);
		}

		face_det_writer << big_img;
		delete[] imgdata;
	}
#endif
	/* 将检测的结果放进数组 转换为跟踪的输入需要（若为人脸 则检测结果可能跟多，比如需要带上ldmk点） */
	// filter by threshold.
	for (int b = 0; b < batchsize; b++)
	{
		vpt_result &cur_result = vpt_det_result[b];

		for (int c = 0; c < cur_result.obj_count_ && c < MAX_OBJ_COUNT; c++)
		{
			float x1 = vpt_det_result[b].obj_results_[c].obj_rect.left_;
			float y1 = vpt_det_result[b].obj_results_[c].obj_rect.top_;
			float x2 = vpt_det_result[b].obj_results_[c].obj_rect.left_ + vpt_det_result[b].obj_results_[c].obj_rect.width_;
			float y2 = vpt_det_result[b].obj_results_[c].obj_rect.top_ + vpt_det_result[b].obj_results_[c].obj_rect.height_;

			float class_id = vpt_det_result[b].obj_results_[c].obj_index;
			float score = vpt_det_result[b].obj_results_[c].obj_score;

			if (score >= THRESHOLD)
			{
				vector <float> obj;
				obj.push_back(x1);
				obj.push_back(y1);
				obj.push_back(x2);
				obj.push_back(y2);
				obj.push_back(score);
				obj.push_back(class_id);
				detectResult[b].push_back(obj);
			}
		}
	}

	for (size_t detectIndex = 0; detectIndex < batchsize; detectIndex++) {
		string task_id = tasklist[detectIndex];

		if (! tools->taskTrackers[task_id].tracker.GetState())
			continue;

		Sort &cur_sort = tools->taskTrackers[task_id].tracker;
		isUseDet = true;

		/* FusionInterval是跳帧参数，以十类人车物为例，一般跳5帧，所以第一帧检测，后续四帧纯跟踪 */
		for (int j = 0; j < FusionInterval; j++)
		{
			/* 跟踪：第一帧 带检测框信息的跟踪，取结果返回 */
			if (j == 0)
			{
				int objCount = cur_sort.update(isUseDet, false, detectResult[detectIndex], result[detectIndex].obj, deleteObjectID[detectIndex]);
				result[detectIndex].obj_count = objCount;

				vector<vector<float>>().swap(detectResult[detectIndex]);
				detectResult[detectIndex].clear();
				isUseDet = false;
			}
			else  /* 跟踪：后四帧 纯粹跟踪 纯跟踪结果不返回 */
			{
				onelevel_det_result un_result;
				un_result.obj_count = cur_sort.update(isUseDet, false, detectResult[detectIndex], un_result.obj, deleteObjectID[detectIndex]);
			}
		}
	}

	if(vpt_det_result)
	{
		for (int b = 0; b < batchsize; b++)
		{
			delete[] vpt_det_result[b].obj_results_;
		}
		delete[] vpt_det_result;
	}

	// printf("detectIndex:%d det count: %d\n", detectIndex, result[detectIndex-1].obj_count);
	vector <vector< vector <float>>>().swap(detectResult);  // free memory.
	return SUCCESS;
}


/* 算法句柄 资源释放 */
void VPT_Release(void * handle)
{
	objDetector *tools = (objDetector*)handle;

	if (tools)
	{
		if (tools->det_handle)
		{
			// ctools_release(&tools->det_handle);
			vpt_release(&tools->det_handle);
			tools->det_handle = NULL;
		}

		//vector<TaskTracker>().swap(tools->taskTrackers);
		delete tools;
		tools = NULL;
	}
}

/* 任务添加跟踪器 */
void AddTaskTracker(void * handle, const string taskID, double rWidth, double rHeight)
{
	objDetector *tools = (objDetector*)handle;
	TaskTracker t;
	t.TaskID = taskID;
	t.ratioWidth = rWidth;
	t.ratioHeight = rHeight;
	tools->taskTrackers[taskID] = t;
}

/* 任务结束跟踪器 */
bool FinishTaskTracker(void * handle, const string taskID)
{
	objDetector *tools = (objDetector*)handle;
	tools->taskTrackers.erase(taskID);
	return true;
}

/* 任务暂停跟踪器 */
void PauseTaskTracker(void * handle, const string taskID)
{
	objDetector *tools = (objDetector*)handle;
	tools->taskTrackers[taskID].tracker.Pause();
}

/* 任务重新开启跟踪器 */
void RestartTaskTracker(void * handle, const string taskID)
{
	objDetector *tools = (objDetector*)handle;
	tools->taskTrackers[taskID].tracker.ReSet();

}

/*
void DrawTracker(void * handle, const string taskID, cv::Mat *img)
{
	objDetector *tools = (objDetector*)handle;
	for (int i = 0; i < tools->taskTrackers.size(); i++)
	{
		if (strcmp((tools->taskTrackers[i].TaskID).c_str(), taskID.c_str()) == 0)
		{
			tools->taskTrackers[i].tracker.addTracker(img);
			break;
		}
	}
}
*/

void permute(float * image, int testWidth, int testHeight)
{
	//cv::Mat host_image;
	float * host_image;
	//host_image.create(testHeight, testWidth, CV_32FC3);
	host_image = (float *)malloc(testHeight*testWidth * 3 * sizeof(float));;

	float *Host_img = new float[3 * testWidth * testHeight]{};//?????ڴ?
	float* image_data_original = image;
	CHECK(cudaMemcpy(Host_img, image_data_original, testWidth*testHeight * 3 * sizeof(float), cudaMemcpyDeviceToHost));

	for (int j = 0; j < testHeight; j++)
	{
		float *pts = host_image + j * testWidth * 3;
		for (int i = 0; i < testWidth; i++)
		{
			//pts[3 * i] = cv::saturate_cast<uchar>(Host_img[3 * (j*host_image.cols + i) + 0]);                                     //b
			//pts[3 * i + 1] = cv::saturate_cast<uchar>(Host_img[3 * (j*host_image.cols + i) + 1]);             //g
			//pts[3 * i + 2] = cv::saturate_cast<uchar>(Host_img[3 * (j*host_image.cols + i) + 2]);         //r
			pts[3 * i] = (Host_img[j * testWidth + i]);                                     //b
			pts[3 * i + 1] = (Host_img[testWidth * testHeight + j * testWidth + i]);             //g
			pts[3 * i + 2] = (Host_img[2 * testWidth * testHeight + j * testWidth + i]);         //r
		}
	}

	cudaMemcpy(image_data_original, host_image, testWidth*testHeight * 3 * sizeof(float), cudaMemcpyHostToDevice);
	free(host_image);
	//cv::Mat showImg;
	//cv::resize(host_image, showImg, cv::Size(640, 480));
	//cv::imshow("image", showImg);
	//cv::waitKey(0);
}

cv::Mat GpuMat2OpencvMat(unsigned char* image, int width, int height)
{
	int testWidth = width;
	int testHeight = height;
	cv::Mat host_image;
	host_image.create(testHeight, testWidth, CV_8UC3);
	unsigned char *Host_img = new unsigned char[3 * testWidth * testHeight]{};//?????ڴ?
	unsigned char* image_data_original = image;

	cudaError_t code = cudaMemcpy(Host_img, image_data_original, testWidth*testHeight * 3 * sizeof(unsigned char), cudaMemcpyDeviceToHost);//?????Կ???????ͼ????????
	if (code != 0)
	{
		printf("==========================================================error");
	}
	std::ofstream outfile("decode.bin", ios::out | ios::binary);
	outfile.write((char*)Host_img, int(sizeof(char) * 1080 * 1920 * 3));
	outfile.close();

	cudaMemcpy(host_image.data, image_data_original, 1920 * testHeight * 3 * sizeof(unsigned char), cudaMemcpyDeviceToHost);//?????Կ???????ͼ????????
																															//    for (int j = 0; j < host_image.rows; j++)
																															//    {
																															//        uchar *pts = host_image.ptr<uchar>(j);
																															//        for (int i = 0; i < host_image.cols; i++)
																															//        {
																															//            //pts[3 * i] = cv::saturate_cast<uchar>(Host_img[3 * (j*host_image.cols + i) + 0]);                                     //b
																															//            //pts[3 * i + 1] = cv::saturate_cast<uchar>(Host_img[3 * (j*host_image.cols + i) + 1]);             //g
																															//            //pts[3 * i + 2] = cv::saturate_cast<uchar>(Host_img[3 * (j*host_image.cols + i) + 2]);         //r
																															//            pts[3 * i] = cv::saturate_cast<uchar>(Host_img[j* host_image.cols*3 + 3 * i]);                                     //b
																															//            pts[3 * i + 1] = cv::saturate_cast<uchar>(Host_img[j* host_image.cols*3 + 3 * i + 1]);             //g
																															//            pts[3 * i + 2] = cv::saturate_cast<uchar>(Host_img[j* host_image.cols*3 + 3 * i + 2]);         //r
																															//        }
																															//    }
	cv::imwrite("input3.jpg", host_image);
	return host_image;
}