DogPoseDetector.cpp 5.46 KB
#include "DogPoseDetector.h"
#include "cuda_kernels.h"
#include <algorithm>

#include "opencv2/opencv.hpp"

// @brief 用于创建IBuilder、IRuntime或IRefitter实例的记录器用于通过该接口创建的所有对象。
// 在释放所有创建的对象之前,记录器应一直有效。
// 主要是实例化ILogger类下的log()方法。
class Logger : public nvinfer1::ILogger
{
	void log(Severity severity, const char* message)  noexcept
	{
		// suppress info-level messages
		if (severity != Severity::kINFO)
			std::cout << message << std::endl;
	}
} gLogger;

bool DogPoseDetector::init() {
	const char* model_path_onnx = "../weights/best.onnx";
	const char* model_path_engine = "../weights/best.engine";
	const char* input_node_name = "images";
	const char* output_node_name = "output";

	// 读取本地模型文件
	std::ifstream file_ptr(model_path_engine, std::ios::binary);
	if (!file_ptr.good()) {
		std::cerr << "文件无法打开,请确定文件是否可用!" << std::endl;
		return false;
	}

	size_t size = 0;
	file_ptr.seekg(0, file_ptr.end);	// 将读指针从文件末尾开始移动0个字节
	size = file_ptr.tellg();	// 返回读指针的位置,此时读指针的位置就是文件的字节数
	file_ptr.seekg(0, file_ptr.beg);	// 将读指针从文件开头开始移动0个字节
	char* model_stream = new char[size];
	file_ptr.read(model_stream, size);
	file_ptr.close();

	// 日志记录接口
	//Logger logger;
	// 反序列化引擎
	nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger);
	// 推理引擎
	// 保存模型的模型结构、模型参数以及最优计算kernel配置;
	// 不能跨平台和跨TensorRT版本移植
	nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(model_stream, size);
	// 上下文
	// 储存中间值,实际进行推理的对象
	// 由engine创建,可创建多个对象,进行多推理任务
	context = engine->createExecutionContext();

	// 创建GPU显存缓冲区
	m_data_buffer = new void*[2];
	// 创建GPU显存输入缓冲区
	m_input_node_index = engine->getBindingIndex(input_node_name);
	m_input_node_dim = engine->getBindingDimensions(m_input_node_index);
	size_t input_data_length = m_input_node_dim.d[1] * m_input_node_dim.d[2] * m_input_node_dim.d[3];
	cudaMalloc(&(m_data_buffer[m_input_node_index]), input_data_length * sizeof(float));
	// 创建GPU显存输出缓冲区
	m_output_node_index = engine->getBindingIndex(output_node_name);
	m_output_node_dim = engine->getBindingDimensions(m_output_node_index);
	size_t output_data_length = m_output_node_dim.d[1] * m_output_node_dim.d[2];
	cudaMalloc(&(m_data_buffer[m_output_node_index]), output_data_length * sizeof(float));

	return true;
}

static void saveCUDAImg(unsigned char *pGpuBgb, int src_width, int src_height, std::string filename) {
	int rgb_size = 3 * src_width * src_height;
	unsigned char *cpu_data = new unsigned char[rgb_size];
	cudaError_t cudaStatus = cudaMemcpy(cpu_data, pGpuBgb, rgb_size * sizeof(unsigned char), cudaMemcpyDeviceToHost);
	cv::Mat img(src_height, src_width, CV_8UC3, cpu_data);
	cv::imwrite(filename.c_str(), img);
	delete[] cpu_data;
	cpu_data = nullptr;
}

static void showFloatData(float* pGpuData, int dst_width, int dst_height) {
	int buf_size = 3 * dst_width * dst_height;
	float* pBuf = new float[buf_size];
	cudaMemcpy(pBuf, pGpuData, buf_size * sizeof(float), cudaMemcpyDeviceToHost);
	cv::Mat image(dst_height, dst_width, CV_32FC3, pBuf);
	cv::imshow("show", image);
	cv::waitKey(1);
	delete[] pBuf;
	pBuf = nullptr;
}

std::vector<DogPoseResult> DogPoseDetector::detect(unsigned char *pGpuBgr, int src_width, int src_height) {
	int dst_width = m_input_node_dim.d[2];
	int dst_height = m_input_node_dim.d[3];

	int max_side_length = std::max(src_width, src_height);



	//saveCUDAImg(pGpuBgr, src_width, src_height, "src.jpg");

	cudaStream_t stream;
	cudaStreamCreate(&stream);

	{
		// 显存-->内存-->显存
		//int rgb_size = 3 * src_width * src_height;
		//uint8 *cpu_data = new uint8[rgb_size];
		//cudaError_t cudaStatus = cudaMemcpy(cpu_data, pGpuBgr, rgb_size * sizeof(uint8), cudaMemcpyDeviceToHost);
		//cv::Mat image(src_height, src_width, CV_8UC3, cpu_data);


		//cv::Mat max_image = cv::Mat::zeros(cv::Size(max_side_length, max_side_length), CV_8UC3);
		//cv::Rect roi(0, 0, image.cols, image.rows);
		//image.copyTo(max_image(roi));
		//// 将图像归一化,并放缩到指定大小
		//cv::Size input_node_shape(m_input_node_dim.d[2], m_input_node_dim.d[3]);
		//cv::Mat BN_image = cv::dnn::blobFromImage(max_image, 1 / 255.0, input_node_shape, cv::Scalar(0, 0, 0), true, false);

		//size_t input_data_length = m_input_node_dim.d[1] * m_input_node_dim.d[2] * m_input_node_dim.d[3];
		//std::vector<float> input_data(input_data_length);
		//memcpy(input_data.data(), BN_image.ptr<float>(), input_data_length * sizeof(float));

		//cudaMemcpyAsync(m_data_buffer[m_input_node_index], input_data.data(), input_data_length * sizeof(float), cudaMemcpyHostToDevice, stream);
	}

	cuda_common::resizeAndNorm(pGpuBgr, src_width, src_height, (float*)m_data_buffer[m_input_node_index], dst_width, dst_height);


	// 模型推理
	context->enqueueV2(m_data_buffer, stream, nullptr);

	size_t output_data_length = m_output_node_dim.d[1] * m_output_node_dim.d[2];
	float* result_array = new float[output_data_length];
	cudaMemcpyAsync(result_array, m_data_buffer[m_output_node_index], output_data_length * sizeof(float), cudaMemcpyDeviceToHost, stream);

	cudaDeviceSynchronize();
	

	ResultYolov5 result;
	result.factor = max_side_length / (float)m_input_node_dim.d[2];
	result.read_class_names("F:/dog_trainer_sys/train2/classes.txt");

	return result.yolov5_result(result_array, 0.6);
}