#include "DogPoseDetector.h" #include "cuda_kernels.h" #include #include "opencv2/opencv.hpp" // @brief 用于创建IBuilder、IRuntime或IRefitter实例的记录器用于通过该接口创建的所有对象。 // 在释放所有创建的对象之前,记录器应一直有效。 // 主要是实例化ILogger类下的log()方法。 class Logger : public nvinfer1::ILogger { void log(Severity severity, const char* message) noexcept { // suppress info-level messages if (severity != Severity::kINFO) std::cout << message << std::endl; } } gLogger; bool DogPoseDetector::init() { const char* model_path_onnx = "../weights/best.onnx"; const char* model_path_engine = "../weights/best.engine"; const char* input_node_name = "images"; const char* output_node_name = "output"; // 读取本地模型文件 std::ifstream file_ptr(model_path_engine, std::ios::binary); if (!file_ptr.good()) { std::cerr << "文件无法打开,请确定文件是否可用!" << std::endl; return false; } size_t size = 0; file_ptr.seekg(0, file_ptr.end); // 将读指针从文件末尾开始移动0个字节 size = file_ptr.tellg(); // 返回读指针的位置,此时读指针的位置就是文件的字节数 file_ptr.seekg(0, file_ptr.beg); // 将读指针从文件开头开始移动0个字节 char* model_stream = new char[size]; file_ptr.read(model_stream, size); file_ptr.close(); // 日志记录接口 //Logger logger; // 反序列化引擎 nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger); // 推理引擎 // 保存模型的模型结构、模型参数以及最优计算kernel配置; // 不能跨平台和跨TensorRT版本移植 nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(model_stream, size); // 上下文 // 储存中间值,实际进行推理的对象 // 由engine创建,可创建多个对象,进行多推理任务 context = engine->createExecutionContext(); // 创建GPU显存缓冲区 m_data_buffer = new void*[2]; // 创建GPU显存输入缓冲区 m_input_node_index = engine->getBindingIndex(input_node_name); m_input_node_dim = engine->getBindingDimensions(m_input_node_index); size_t input_data_length = m_input_node_dim.d[1] * m_input_node_dim.d[2] * m_input_node_dim.d[3]; cudaMalloc(&(m_data_buffer[m_input_node_index]), input_data_length * sizeof(float)); // 创建GPU显存输出缓冲区 m_output_node_index = engine->getBindingIndex(output_node_name); m_output_node_dim = engine->getBindingDimensions(m_output_node_index); size_t output_data_length = m_output_node_dim.d[1] * m_output_node_dim.d[2]; cudaMalloc(&(m_data_buffer[m_output_node_index]), output_data_length * sizeof(float)); return true; } static void saveCUDAImg(unsigned char *pGpuBgb, int src_width, int src_height, std::string filename) { int rgb_size = 3 * src_width * src_height; unsigned char *cpu_data = new unsigned char[rgb_size]; cudaError_t cudaStatus = cudaMemcpy(cpu_data, pGpuBgb, rgb_size * sizeof(unsigned char), cudaMemcpyDeviceToHost); cv::Mat img(src_height, src_width, CV_8UC3, cpu_data); cv::imwrite(filename.c_str(), img); delete[] cpu_data; cpu_data = nullptr; } static void showFloatData(float* pGpuData, int dst_width, int dst_height) { int buf_size = 3 * dst_width * dst_height; float* pBuf = new float[buf_size]; cudaMemcpy(pBuf, pGpuData, buf_size * sizeof(float), cudaMemcpyDeviceToHost); cv::Mat image(dst_height, dst_width, CV_32FC3, pBuf); cv::imshow("show", image); cv::waitKey(1); delete[] pBuf; pBuf = nullptr; } std::vector DogPoseDetector::detect(unsigned char *pGpuBgr, int src_width, int src_height) { int dst_width = m_input_node_dim.d[2]; int dst_height = m_input_node_dim.d[3]; int max_side_length = std::max(src_width, src_height); //saveCUDAImg(pGpuBgr, src_width, src_height, "src.jpg"); cudaStream_t stream; cudaStreamCreate(&stream); { // 显存-->内存-->显存 //int rgb_size = 3 * src_width * src_height; //uint8 *cpu_data = new uint8[rgb_size]; //cudaError_t cudaStatus = cudaMemcpy(cpu_data, pGpuBgr, rgb_size * sizeof(uint8), cudaMemcpyDeviceToHost); //cv::Mat image(src_height, src_width, CV_8UC3, cpu_data); //cv::Mat max_image = cv::Mat::zeros(cv::Size(max_side_length, max_side_length), CV_8UC3); //cv::Rect roi(0, 0, image.cols, image.rows); //image.copyTo(max_image(roi)); //// 将图像归一化,并放缩到指定大小 //cv::Size input_node_shape(m_input_node_dim.d[2], m_input_node_dim.d[3]); //cv::Mat BN_image = cv::dnn::blobFromImage(max_image, 1 / 255.0, input_node_shape, cv::Scalar(0, 0, 0), true, false); //size_t input_data_length = m_input_node_dim.d[1] * m_input_node_dim.d[2] * m_input_node_dim.d[3]; //std::vector input_data(input_data_length); //memcpy(input_data.data(), BN_image.ptr(), input_data_length * sizeof(float)); //cudaMemcpyAsync(m_data_buffer[m_input_node_index], input_data.data(), input_data_length * sizeof(float), cudaMemcpyHostToDevice, stream); } cuda_common::resizeAndNorm(pGpuBgr, src_width, src_height, (float*)m_data_buffer[m_input_node_index], dst_width, dst_height); // 模型推理 context->enqueueV2(m_data_buffer, stream, nullptr); size_t output_data_length = m_output_node_dim.d[1] * m_output_node_dim.d[2]; float* result_array = new float[output_data_length]; cudaMemcpyAsync(result_array, m_data_buffer[m_output_node_index], output_data_length * sizeof(float), cudaMemcpyDeviceToHost, stream); cudaDeviceSynchronize(); ResultYolov5 result; result.factor = max_side_length / (float)m_input_node_dim.d[2]; result.read_class_names("F:/dog_trainer_sys/train2/classes.txt"); return result.yolov5_result(result_array, 0.6); }