#include "DogPoseDetectorOnnx.h" #include "logger.h" #include "ErrorRecorder.h" #include "logging.h" #include "common.h" #include "cuda_kernels.h" #include #include #include "opencv2/opencv.hpp" using namespace nvinfer1; SampleErrorRecorder gRecorder; namespace sample { Logger gLogger{ Logger::Severity::kINFO }; LogStreamConsumer gLogVerbose{ LOG_VERBOSE(gLogger) }; LogStreamConsumer gLogInfo{ LOG_INFO(gLogger) }; LogStreamConsumer gLogWarning{ LOG_WARN(gLogger) }; LogStreamConsumer gLogError{ LOG_ERROR(gLogger) }; LogStreamConsumer gLogFatal{ LOG_FATAL(gLogger) }; void setReportableSeverity(Logger::Severity severity) { gLogger.setReportableSeverity(severity); gLogVerbose.setReportableSeverity(severity); gLogInfo.setReportableSeverity(severity); gLogWarning.setReportableSeverity(severity); gLogError.setReportableSeverity(severity); gLogFatal.setReportableSeverity(severity); } } // namespace sample DogPoseDetectorOnnx::DogPoseDetectorOnnx() { } DogPoseDetectorOnnx::~DogPoseDetectorOnnx() { } static size_t getFileSize(FILE* file) { size_t fileSize = -1; if (file != NULL) { if (fseek(file, 0L, SEEK_END) == 0) { fileSize = ftell(file); } rewind(file); } return fileSize; } bool DogPoseDetectorOnnx::init() { const char* model_path_onnx = "../weights/best.onnx"; const char* input_node_name = "images"; const char* output_node_name = "output"; const char* plan_file_name = "dog_pose_detect_plan.bin"; FILE* f = fopen(plan_file_name, "rb"); if (f == nullptr) { auto builder = SampleUniquePtr(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())); if (!builder) { return false; } const auto explicitBatch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); auto network = SampleUniquePtr(builder->createNetworkV2(explicitBatch)); if (!network) { return false; } auto config = SampleUniquePtr(builder->createBuilderConfig()); if (!config) { return false; } auto parser = SampleUniquePtr(nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger())); if (!parser) { return false; } auto parsed = parser->parseFromFile(model_path_onnx, static_cast(sample::gLogger.getReportableSeverity())); if (!parsed) { return false; } if (m_bUseFP16) { config->setFlag(BuilderFlag::kFP16); } else { config->setFlag(BuilderFlag::kINT8); samplesCommon::setAllDynamicRanges(network.get(), 127.0F, 127.0F); } samplesCommon::enableDLA(builder.get(), config.get(), m_dlaCore); // CUDA stream used for profiling by the builder. auto profileStream = samplesCommon::makeCudaStream(); if (!profileStream) { return false; } config->setProfileStream(*profileStream); SampleUniquePtr plan{ builder->buildSerializedNetwork(*network, *config) }; //plan = SampleUniquePtr(builder->buildSerializedNetwork(*network, *config)); if (!plan) { return false; } FILE* fp = fopen(plan_file_name, "wb"); fwrite(plan->data(), 1, plan->size(), fp); fclose(fp); mRuntime = std::shared_ptr(createInferRuntime(sample::gLogger.getTRTLogger())); if (!mRuntime) { return false; } mEngine = std::shared_ptr( mRuntime->deserializeCudaEngine(plan->data(), plan->size()), samplesCommon::InferDeleter()); if (!mEngine) { return false; } } else { size_t file_size = getFileSize(f); void* pPlanData = malloc(file_size); fread(pPlanData, 1, file_size, f); bool bSucceed = false; do { mRuntime = std::shared_ptr(createInferRuntime(sample::gLogger.getTRTLogger())); if (!mRuntime) { break; } mEngine = std::shared_ptr( mRuntime->deserializeCudaEngine(pPlanData, file_size), samplesCommon::InferDeleter()); if (!mEngine) { break; } bSucceed = true; } while (0); free(pPlanData); if (!bSucceed) { return false; } } context = SampleUniquePtr(mEngine->createExecutionContext()); if (!context) { return false; } // 创建GPU显存缓冲区 m_data_buffer = new void*[2]; // 创建GPU显存输入缓冲区 m_input_node_index = mEngine->getBindingIndex(input_node_name); m_input_node_dim = mEngine->getBindingDimensions(m_input_node_index); size_t input_data_length = m_input_node_dim.d[1] * m_input_node_dim.d[2] * m_input_node_dim.d[3]; cudaMalloc(&(m_data_buffer[m_input_node_index]), input_data_length * sizeof(float)); // 创建GPU显存输出缓冲区 m_output_node_index = mEngine->getBindingIndex(output_node_name); m_output_node_dim = mEngine->getBindingDimensions(m_output_node_index); size_t output_data_length = m_output_node_dim.d[1] * m_output_node_dim.d[2]; cudaMalloc(&(m_data_buffer[m_output_node_index]), output_data_length * sizeof(float)); return true; } std::vector DogPoseDetectorOnnx::detect(unsigned char *pGpuBgr, int src_width, int src_height) { int dst_width = m_input_node_dim.d[2]; int dst_height = m_input_node_dim.d[3]; int max_side_length = std::max(src_width, src_height); //saveCUDAImg(pGpuBgr, src_width, src_height, "src.jpg"); cudaStream_t stream; cudaStreamCreate(&stream); { // 显存-->内存-->显存 //int rgb_size = 3 * src_width * src_height; //uint8 *cpu_data = new uint8[rgb_size]; //cudaError_t cudaStatus = cudaMemcpy(cpu_data, pGpuBgr, rgb_size * sizeof(uint8), cudaMemcpyDeviceToHost); //cv::Mat image(src_height, src_width, CV_8UC3, cpu_data); //cv::Mat max_image = cv::Mat::zeros(cv::Size(max_side_length, max_side_length), CV_8UC3); //cv::Rect roi(0, 0, image.cols, image.rows); //image.copyTo(max_image(roi)); //// 将图像归一化,并放缩到指定大小 //cv::Size input_node_shape(m_input_node_dim.d[2], m_input_node_dim.d[3]); //cv::Mat BN_image = cv::dnn::blobFromImage(max_image, 1 / 255.0, input_node_shape, cv::Scalar(0, 0, 0), true, false); //size_t input_data_length = m_input_node_dim.d[1] * m_input_node_dim.d[2] * m_input_node_dim.d[3]; //std::vector input_data(input_data_length); //memcpy(input_data.data(), BN_image.ptr(), input_data_length * sizeof(float)); //cudaMemcpyAsync(m_data_buffer[m_input_node_index], input_data.data(), input_data_length * sizeof(float), cudaMemcpyHostToDevice, stream); } cuda_common::resizeAndNorm(pGpuBgr, src_width, src_height, (float*)m_data_buffer[m_input_node_index], dst_width, dst_height); // 模型推理 context->enqueueV2(m_data_buffer, stream, nullptr); size_t output_data_length = m_output_node_dim.d[1] * m_output_node_dim.d[2]; float* result_array = new float[output_data_length]; cudaMemcpyAsync(result_array, m_data_buffer[m_output_node_index], output_data_length * sizeof(float), cudaMemcpyDeviceToHost, stream); cudaDeviceSynchronize(); ResultYolov5 result; result.factor = max_side_length / (float)m_input_node_dim.d[2]; result.read_class_names("../weights/classes.txt"); return result.yolov5_result(result_array, 0.6); }