Commit 07639e75fe40de0f5e41eb26e9fc72252421e275
1 parent
7128e494
实现狗狗姿态检测
Showing
14 changed files
with
880 additions
and
51 deletions
.vs/FFNvDecoder/v14/.suo
No preview for this file type
FFNvDecoder/AlgorithmResult.cpp
0 → 100644
1 | +#include "AlgorithmResult.h" | |
2 | + | |
3 | +#include "opencv2/opencv.hpp" | |
4 | + | |
5 | +void ResultYolov5::read_class_names(std::string path_name) | |
6 | +{ | |
7 | + std::ifstream infile; | |
8 | + infile.open(path_name.data()); //将文件流对象与文件连接起来 | |
9 | + assert(infile.is_open()); //若失败,则输出错误消息,并终止程序运行 | |
10 | + | |
11 | + std::string str; | |
12 | + while (getline(infile, str)) { | |
13 | + class_names.push_back(str); | |
14 | + str.clear(); | |
15 | + | |
16 | + } | |
17 | + infile.close(); //关闭文件输入流 | |
18 | + | |
19 | +} | |
20 | + | |
21 | +std::vector<DogPoseResult> ResultYolov5::yolov5_result(float* result, float threshold) { | |
22 | + cv::Mat det_output = cv::Mat(25200, 13, CV_32F, result); | |
23 | + //// post-process | |
24 | + std::vector<cv::Rect> position_boxes; | |
25 | + std::vector<int> classIds; | |
26 | + std::vector<float> confidences; | |
27 | + | |
28 | + //std::cout << det_output.rows << std::endl; | |
29 | + for (int i = 0; i < det_output.rows; i++) { | |
30 | + float confidence = det_output.at<float>(i, 4); | |
31 | + if (confidence < 0.2) { | |
32 | + continue; | |
33 | + } | |
34 | + //std::cout << "confidence " << confidence << std::endl; | |
35 | + cv::Mat classes_scores = det_output.row(i).colRange(5, 13); | |
36 | + cv::Point classIdPoint; | |
37 | + double score; | |
38 | + // 获取一组数据中最大值及其位置 | |
39 | + minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint); | |
40 | + // 置信度 0~1之间 | |
41 | + if (score > 0.25) | |
42 | + { | |
43 | + float cx = det_output.at<float>(i, 0); | |
44 | + float cy = det_output.at<float>(i, 1); | |
45 | + float ow = det_output.at<float>(i, 2); | |
46 | + float oh = det_output.at<float>(i, 3); | |
47 | + int x = static_cast<int>((cx - 0.5 * ow) * factor); | |
48 | + int y = static_cast<int>((cy - 0.5 * oh) * factor); | |
49 | + int width = static_cast<int>(ow * factor); | |
50 | + int height = static_cast<int>(oh * factor); | |
51 | + cv::Rect box; | |
52 | + box.x = x; | |
53 | + box.y = y; | |
54 | + box.width = width; | |
55 | + box.height = height; | |
56 | + | |
57 | + position_boxes.push_back(box); | |
58 | + classIds.push_back(classIdPoint.x); | |
59 | + confidences.push_back(score); | |
60 | + } | |
61 | + } | |
62 | + // NMS | |
63 | + std::vector<int> indexes; | |
64 | + cv::dnn::NMSBoxes(position_boxes, confidences, 0.25, 0.45, indexes); | |
65 | + | |
66 | + //for (size_t i = 0; i < indexes.size(); i++) { | |
67 | + // int index = indexes[i]; | |
68 | + // int idx = classIds[index]; | |
69 | + // cv::rectangle(image, position_boxes[index], cv::Scalar(0, 0, 255), 2, 8); | |
70 | + // cv::rectangle(image, cv::Point(position_boxes[index].tl().x, position_boxes[index].tl().y - 20), | |
71 | + // cv::Point(position_boxes[index].br().x, position_boxes[index].tl().y), cv::Scalar(0, 255, 255), -1); | |
72 | + // cv::putText(image, class_names[idx], cv::Point(position_boxes[index].tl().x, position_boxes[index].tl().y - 10), cv::FONT_HERSHEY_SIMPLEX, .5, cv::Scalar(0, 0, 0)); | |
73 | + //} | |
74 | + | |
75 | + std::vector<DogPoseResult> vecPoseResult; | |
76 | + for (size_t i = 0; i < indexes.size(); i++) { | |
77 | + int index = indexes[i]; | |
78 | + int idx = classIds[index]; | |
79 | + | |
80 | + DogPoseResult poseResult; | |
81 | + poseResult.x = position_boxes[index].x; | |
82 | + poseResult.y = position_boxes[index].y; | |
83 | + poseResult.width = position_boxes[index].width; | |
84 | + poseResult.height = position_boxes[index].height; | |
85 | + poseResult.confidence = confidences[index]; | |
86 | + poseResult.classId = classIds[index]; | |
87 | + poseResult.className = class_names[idx]; | |
88 | + | |
89 | + vecPoseResult.push_back(poseResult); | |
90 | + } | |
91 | + | |
92 | + return vecPoseResult; | |
93 | +} | |
0 | 94 | \ No newline at end of file | ... | ... |
FFNvDecoder/AlgorithmResult.h
0 → 100644
1 | +#pragma once | |
2 | + | |
3 | +#ifndef RESULT_H | |
4 | +#define RESULT_H | |
5 | + | |
6 | +#include <fstream> | |
7 | +#include <iterator> | |
8 | +#include <memory> | |
9 | +#include <sstream> | |
10 | +#include <string> | |
11 | +#include <vector> | |
12 | + | |
13 | + | |
14 | +struct DogPoseResult { | |
15 | + int x; | |
16 | + int y; | |
17 | + int height; | |
18 | + int width; | |
19 | + float confidence; | |
20 | + int classId; | |
21 | + std::string className; | |
22 | +}; | |
23 | + | |
24 | +// @brief 处理yolov5的结果 | |
25 | + class ResultYolov5 { | |
26 | +public: | |
27 | + std::vector<std::string> class_names; | |
28 | + float factor; | |
29 | + | |
30 | + //ResultYolov5(); | |
31 | + void read_class_names(std::string path_name); | |
32 | + std::vector<DogPoseResult> yolov5_result(float* result, float threshold); | |
33 | +}; | |
34 | + | |
35 | + | |
36 | +#endif // !RESULT_H | |
0 | 37 | \ No newline at end of file | ... | ... |
FFNvDecoder/DogPoseDetector.cpp
0 → 100644
1 | +#include "DogPoseDetector.h" | |
2 | +#include "cuda_kernels.h" | |
3 | +#include <algorithm> | |
4 | + | |
5 | +#include "opencv2/opencv.hpp" | |
6 | + | |
7 | +// @brief 用于创建IBuilder、IRuntime或IRefitter实例的记录器用于通过该接口创建的所有对象。 | |
8 | +// 在释放所有创建的对象之前,记录器应一直有效。 | |
9 | +// 主要是实例化ILogger类下的log()方法。 | |
10 | +class Logger : public nvinfer1::ILogger | |
11 | +{ | |
12 | + void log(Severity severity, const char* message) noexcept | |
13 | + { | |
14 | + // suppress info-level messages | |
15 | + if (severity != Severity::kINFO) | |
16 | + std::cout << message << std::endl; | |
17 | + } | |
18 | +} gLogger; | |
19 | + | |
20 | +bool DogPoseDetector::init() { | |
21 | + const char* model_path_onnx = "E:/Archime/dog_pose_detect/yolov5/runs/train/exp10/weights/best.onnx"; | |
22 | + const char* model_path_engine = "E:/Archime/dog_pose_detect/yolov5/runs/train/exp10/weights/best.engine"; | |
23 | + //const char* image_path = "F:/dog_trainer_sys/test1/IMG_6837.JPG"; | |
24 | + std::string lable_path = "F:/dog_trainer_sys/train2/classes.txt"; | |
25 | + const char* input_node_name = "images"; | |
26 | + const char* output_node_name = "output"; | |
27 | + | |
28 | + // 读取本地模型文件 | |
29 | + std::ifstream file_ptr(model_path_engine, std::ios::binary); | |
30 | + if (!file_ptr.good()) { | |
31 | + std::cerr << "文件无法打开,请确定文件是否可用!" << std::endl; | |
32 | + return false; | |
33 | + } | |
34 | + | |
35 | + size_t size = 0; | |
36 | + file_ptr.seekg(0, file_ptr.end); // 将读指针从文件末尾开始移动0个字节 | |
37 | + size = file_ptr.tellg(); // 返回读指针的位置,此时读指针的位置就是文件的字节数 | |
38 | + file_ptr.seekg(0, file_ptr.beg); // 将读指针从文件开头开始移动0个字节 | |
39 | + char* model_stream = new char[size]; | |
40 | + file_ptr.read(model_stream, size); | |
41 | + file_ptr.close(); | |
42 | + | |
43 | + // 日志记录接口 | |
44 | + //Logger logger; | |
45 | + // 反序列化引擎 | |
46 | + nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger); | |
47 | + // 推理引擎 | |
48 | + // 保存模型的模型结构、模型参数以及最优计算kernel配置; | |
49 | + // 不能跨平台和跨TensorRT版本移植 | |
50 | + nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(model_stream, size); | |
51 | + // 上下文 | |
52 | + // 储存中间值,实际进行推理的对象 | |
53 | + // 由engine创建,可创建多个对象,进行多推理任务 | |
54 | + context = engine->createExecutionContext(); | |
55 | + | |
56 | + // 创建GPU显存缓冲区 | |
57 | + m_data_buffer = new void*[2]; | |
58 | + // 创建GPU显存输入缓冲区 | |
59 | + m_input_node_index = engine->getBindingIndex(input_node_name); | |
60 | + m_input_node_dim = engine->getBindingDimensions(m_input_node_index); | |
61 | + size_t input_data_length = m_input_node_dim.d[1] * m_input_node_dim.d[2] * m_input_node_dim.d[3]; | |
62 | + cudaMalloc(&(m_data_buffer[m_input_node_index]), input_data_length * sizeof(float)); | |
63 | + // 创建GPU显存输出缓冲区 | |
64 | + m_output_node_index = engine->getBindingIndex(output_node_name); | |
65 | + m_output_node_dim = engine->getBindingDimensions(m_output_node_index); | |
66 | + size_t output_data_length = m_output_node_dim.d[1] * m_output_node_dim.d[2]; | |
67 | + cudaMalloc(&(m_data_buffer[m_output_node_index]), output_data_length * sizeof(float)); | |
68 | + | |
69 | + //cv::namedWindow("show", cv::WINDOW_NORMAL); | |
70 | + | |
71 | + return true; | |
72 | +} | |
73 | + | |
74 | +static void saveCUDAImg(unsigned char *pGpuBgb, int src_width, int src_height, std::string filename) { | |
75 | + int rgb_size = 3 * src_width * src_height; | |
76 | + unsigned char *cpu_data = new unsigned char[rgb_size]; | |
77 | + cudaError_t cudaStatus = cudaMemcpy(cpu_data, pGpuBgb, rgb_size * sizeof(unsigned char), cudaMemcpyDeviceToHost); | |
78 | + cv::Mat img(src_height, src_width, CV_8UC3, cpu_data); | |
79 | + cv::imwrite(filename.c_str(), img); | |
80 | + delete[] cpu_data; | |
81 | + cpu_data = nullptr; | |
82 | +} | |
83 | + | |
84 | +bool DogPoseDetector::detect(unsigned char *pGpuBgr, int src_width, int src_height) { | |
85 | + int dst_width = m_input_node_dim.d[2]; | |
86 | + int dst_height = m_input_node_dim.d[3]; | |
87 | + | |
88 | + int max_side_length = std::max(src_width, src_height); | |
89 | + | |
90 | + | |
91 | + //int buf_size = 3 * src_width * src_height; | |
92 | + //float* pBuf = new float[buf_size]; | |
93 | + //cudaMemcpy(pBuf, pGpuBgr, buf_size * sizeof(unsigned char), cudaMemcpyDeviceToHost); | |
94 | + //cv::Mat image(src_height, src_width, CV_8UC3, pBuf); | |
95 | + | |
96 | + //saveCUDAImg(pGpuBgr, src_width, src_height, "src.jpg"); | |
97 | + | |
98 | + cudaStream_t stream; | |
99 | + cudaStreamCreate(&stream); | |
100 | + | |
101 | + { | |
102 | + //int rgb_size = 3 * src_width * src_height; | |
103 | + //uint8 *cpu_data = new uint8[rgb_size]; | |
104 | + //cudaError_t cudaStatus = cudaMemcpy(cpu_data, pGpuBgr, rgb_size * sizeof(uint8), cudaMemcpyDeviceToHost); | |
105 | + //cv::Mat image(src_height, src_width, CV_8UC3, cpu_data); | |
106 | + | |
107 | + | |
108 | + //cv::Mat max_image = cv::Mat::zeros(cv::Size(max_side_length, max_side_length), CV_8UC3); | |
109 | + //cv::Rect roi(0, 0, image.cols, image.rows); | |
110 | + //image.copyTo(max_image(roi)); | |
111 | + //// 将图像归一化,并放缩到指定大小 | |
112 | + //cv::Size input_node_shape(m_input_node_dim.d[2], m_input_node_dim.d[3]); | |
113 | + //cv::Mat BN_image = cv::dnn::blobFromImage(max_image, 1 / 255.0, input_node_shape, cv::Scalar(0, 0, 0), true, false); | |
114 | + | |
115 | + //size_t input_data_length = m_input_node_dim.d[1] * m_input_node_dim.d[2] * m_input_node_dim.d[3]; | |
116 | + //std::vector<float> input_data(input_data_length); | |
117 | + //memcpy(input_data.data(), BN_image.ptr<float>(), input_data_length * sizeof(float)); | |
118 | + | |
119 | + //cudaMemcpyAsync(m_data_buffer[m_input_node_index], input_data.data(), input_data_length * sizeof(float), cudaMemcpyHostToDevice, stream); | |
120 | + } | |
121 | + | |
122 | + cuda_common::resizeAndNorm(pGpuBgr, src_width, src_height, (float*)m_data_buffer[m_input_node_index], dst_width, dst_height); | |
123 | + | |
124 | + //int buf_size = 3 * dst_width * dst_height; | |
125 | + //float* pBuf = new float[buf_size]; | |
126 | + //cudaMemcpy(pBuf, m_data_buffer[m_input_node_index], buf_size * sizeof(float), cudaMemcpyDeviceToHost); | |
127 | + //cv::Mat image(dst_height, dst_width, CV_32FC3, pBuf); | |
128 | + //cv::imshow("show", image); | |
129 | + //cv::waitKey(1); | |
130 | + //delete[] pBuf; | |
131 | + //pBuf = nullptr; | |
132 | + | |
133 | + | |
134 | + | |
135 | + | |
136 | + | |
137 | + // 模型推理 | |
138 | + context->enqueueV2(m_data_buffer, stream, nullptr); | |
139 | + | |
140 | + size_t output_data_length = m_output_node_dim.d[1] * m_output_node_dim.d[2]; | |
141 | + float* result_array = new float[output_data_length]; | |
142 | + cudaMemcpyAsync(result_array, m_data_buffer[m_output_node_index], output_data_length * sizeof(float), cudaMemcpyDeviceToHost, stream); | |
143 | + | |
144 | + cudaDeviceSynchronize(); | |
145 | + | |
146 | + | |
147 | + ResultYolov5 result; | |
148 | + result.factor = max_side_length / (float)m_input_node_dim.d[2]; | |
149 | + result.read_class_names("F:/dog_trainer_sys/train2/classes.txt"); | |
150 | + | |
151 | + std::vector<DogPoseResult> vec_result = result.yolov5_result(result_array, 0.6); | |
152 | + if (vec_result.size() > 0) { | |
153 | + DogPoseResult poseResult = vec_result[0]; | |
154 | + std::cout << poseResult.x << std::endl; | |
155 | + std::cout << poseResult.y << std::endl; | |
156 | + std::cout << poseResult.width << std::endl; | |
157 | + std::cout << poseResult.height << std::endl; | |
158 | + std::cout << poseResult.confidence << std::endl; | |
159 | + std::cout << poseResult.classId << std::endl; | |
160 | + std::cout << poseResult.className << std::endl; | |
161 | + | |
162 | + | |
163 | + //cv::Rect position_boxe; | |
164 | + //position_boxe.x = poseResult.x; | |
165 | + //position_boxe.y = poseResult.y; | |
166 | + //position_boxe.width = poseResult.width; | |
167 | + //position_boxe.height = poseResult.height; | |
168 | + //cv::rectangle(image, position_boxe, cv::Scalar(0, 0, 255), 2, 8); | |
169 | + //cv::rectangle(image, cv::Point(position_boxe.x, position_boxe.y - 20), cv::Point(position_boxe.x, position_boxe.y), cv::Scalar(0, 255, 255), -1); | |
170 | + //cv::putText(image, poseResult.className, cv::Point(position_boxe.x, position_boxe.y - 10), cv::FONT_HERSHEY_SIMPLEX, .5, cv::Scalar(0, 0, 0)); | |
171 | + | |
172 | + //cv::imwrite("result.jpg", image); | |
173 | + //cv::imshow("show", image); | |
174 | + //cv::waitKey(1); | |
175 | + } | |
176 | + | |
177 | + //delete pBuf; | |
178 | + | |
179 | + return true; | |
180 | +} | |
0 | 181 | \ No newline at end of file | ... | ... |
FFNvDecoder/DogPoseDetector.h
0 → 100644
1 | +#pragma once | |
2 | + | |
3 | +#include <fstream> | |
4 | +#include <iostream> | |
5 | +#include <sstream> | |
6 | +#include <vector> | |
7 | + | |
8 | +#include "NvInfer.h" | |
9 | +#include "NvOnnxParser.h" | |
10 | + | |
11 | +#include "AlgorithmResult.h" | |
12 | + | |
13 | +class DogPoseDetector { | |
14 | +public: | |
15 | + bool init(); | |
16 | + | |
17 | + bool detect(unsigned char *pGpuBgb, int src_width, int src_height); | |
18 | + | |
19 | +private: | |
20 | + nvinfer1::IExecutionContext* context; | |
21 | + | |
22 | + void** m_data_buffer; | |
23 | + int m_input_node_index; | |
24 | + nvinfer1::Dims m_input_node_dim; | |
25 | + int m_output_node_index; | |
26 | + nvinfer1::Dims m_output_node_dim; | |
27 | + | |
28 | + unsigned char* pSquareData{ nullptr }; | |
29 | +}; | |
0 | 30 | \ No newline at end of file | ... | ... |
FFNvDecoder/FFNvDecoder.cpp
... | ... | @@ -79,6 +79,11 @@ bool FFNvDecoder::init(const char* uri, const char* gpuid, bool force_tcp) |
79 | 79 | av_dict_set( &options, "rtsp_transport", force_tcp ? "tcp" : "udp", 0 ); |
80 | 80 | // av_dict_set( &options, "listen_timeout", "30", 0 ); // 单位为s |
81 | 81 | av_dict_set( &options, "stimeout", "30000000", 0 ); // 单位为 百万分之一秒 |
82 | + av_dict_set(&options, " max_delay", " 30000000", 0); | |
83 | + // av_dict_set( &options, "buffer_size", "655360", 0 ); | |
84 | + // av_dict_set( &options, "pkt_size", "655360", 0 ); | |
85 | + av_dict_set(&options, "fifo_size", "6553600", 0); | |
86 | + //av_dict_set(&options, "fflags", "discardcorrupt", 0); | |
82 | 87 | |
83 | 88 | fmt_ctx = avformat_alloc_context(); |
84 | 89 | const char* input_file = uri; | ... | ... |
FFNvDecoder/FFNvDecoder.vcxproj
... | ... | @@ -48,13 +48,13 @@ |
48 | 48 | <Optimization>Disabled</Optimization> |
49 | 49 | <PreprocessorDefinitions>WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
50 | 50 | <AdditionalOptions>/utf-8</AdditionalOptions> |
51 | - <AdditionalIncludeDirectories>..\3rdparty\ffmpeg-5.0.1-win64-dev\include;./;./common/inc;./common/UtilNPP;D:\win_dev\opencv\build\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> | |
51 | + <AdditionalIncludeDirectories>..\3rdparty\ffmpeg-5.0.1-win64-dev\include;./;./common/inc;./common/UtilNPP;D:\win_dev\opencv\build\include;..\3rdparty\TensorRT-8.6.1.6\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> | |
52 | 52 | </ClCompile> |
53 | 53 | <Link> |
54 | 54 | <GenerateDebugInformation>true</GenerateDebugInformation> |
55 | 55 | <SubSystem>Console</SubSystem> |
56 | - <AdditionalDependencies>avcodec.lib;avdevice.lib;avfilter.lib;avformat.lib;avutil.lib;postproc.lib;swresample.lib;swscale.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;nvjpeg.lib;opencv_world455d.lib;freeglut.lib;glew64.lib;%(AdditionalDependencies)</AdditionalDependencies> | |
57 | - <AdditionalLibraryDirectories>..\3rdparty\ffmpeg-5.0.1-win64-dev\lib;D:\win_dev\opencv\build\x64\vc14\lib;../3rdparty/gl;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories> | |
56 | + <AdditionalDependencies>avcodec.lib;avdevice.lib;avfilter.lib;avformat.lib;avutil.lib;postproc.lib;swresample.lib;swscale.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;nvjpeg.lib;opencv_world455d.lib;freeglut.lib;glew64.lib;nvinfer.lib;nvinfer_plugin.lib;nvonnxparser.lib;nvparsers.lib;cudnn.lib;cublas.lib;cudart.lib;%(AdditionalDependencies)</AdditionalDependencies> | |
57 | + <AdditionalLibraryDirectories>..\3rdparty\ffmpeg-5.0.1-win64-dev\lib;D:\win_dev\opencv\build\x64\vc14\lib;../3rdparty/gl;..\3rdparty\TensorRT-8.6.1.6\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories> | |
58 | 58 | </Link> |
59 | 59 | <CudaCompile> |
60 | 60 | <TargetMachinePlatform>64</TargetMachinePlatform> |
... | ... | @@ -81,9 +81,13 @@ |
81 | 81 | </ItemDefinitionGroup> |
82 | 82 | <ItemGroup> |
83 | 83 | <CudaCompile Include="NV12ToRGB.cu" /> |
84 | + <CudaCompile Include="ResizeAndNorm.cu" /> | |
84 | 85 | </ItemGroup> |
85 | 86 | <ItemGroup> |
87 | + <ClCompile Include="AlgorithmResult.cpp" /> | |
86 | 88 | <ClCompile Include="check_tool.cpp" /> |
89 | + <ClCompile Include="DogPoseDetector.cpp" /> | |
90 | + <ClCompile Include="dog_train_sys.cpp" /> | |
87 | 91 | <ClCompile Include="FFCuContextManager.cpp" /> |
88 | 92 | <ClCompile Include="FFNvDecoder.cpp" /> |
89 | 93 | <ClCompile Include="FFNvDecoderManager.cpp" /> |
... | ... | @@ -92,8 +96,10 @@ |
92 | 96 | <ClCompile Include="NvJpegEncoder.cpp" /> |
93 | 97 | </ItemGroup> |
94 | 98 | <ItemGroup> |
99 | + <ClInclude Include="AlgorithmResult.h" /> | |
95 | 100 | <ClInclude Include="check_tool.h" /> |
96 | 101 | <ClInclude Include="cuda_kernels.h" /> |
102 | + <ClInclude Include="DogPoseDetector.h" /> | |
97 | 103 | <ClInclude Include="FFCuContextManager.h" /> |
98 | 104 | <ClInclude Include="FFNvDecoder.h" /> |
99 | 105 | <ClInclude Include="FFNvDecoderManager.h" /> | ... | ... |
FFNvDecoder/FFNvDecoder.vcxproj.filters
... | ... | @@ -4,6 +4,9 @@ |
4 | 4 | <CudaCompile Include="NV12ToRGB.cu"> |
5 | 5 | <Filter>cu_src</Filter> |
6 | 6 | </CudaCompile> |
7 | + <CudaCompile Include="ResizeAndNorm.cu"> | |
8 | + <Filter>cu_src</Filter> | |
9 | + </CudaCompile> | |
7 | 10 | </ItemGroup> |
8 | 11 | <ItemGroup> |
9 | 12 | <Filter Include="src"> |
... | ... | @@ -38,6 +41,15 @@ |
38 | 41 | <ClCompile Include="NvJpegEncoder.cpp"> |
39 | 42 | <Filter>cu_src</Filter> |
40 | 43 | </ClCompile> |
44 | + <ClCompile Include="AlgorithmResult.cpp"> | |
45 | + <Filter>src</Filter> | |
46 | + </ClCompile> | |
47 | + <ClCompile Include="dog_train_sys.cpp"> | |
48 | + <Filter>src</Filter> | |
49 | + </ClCompile> | |
50 | + <ClCompile Include="DogPoseDetector.cpp"> | |
51 | + <Filter>src</Filter> | |
52 | + </ClCompile> | |
41 | 53 | </ItemGroup> |
42 | 54 | <ItemGroup> |
43 | 55 | <ClInclude Include="check_tool.h"> |
... | ... | @@ -61,5 +73,11 @@ |
61 | 73 | <ClInclude Include="NvJpegEncoder.h"> |
62 | 74 | <Filter>cu_src</Filter> |
63 | 75 | </ClInclude> |
76 | + <ClInclude Include="AlgorithmResult.h"> | |
77 | + <Filter>include</Filter> | |
78 | + </ClInclude> | |
79 | + <ClInclude Include="DogPoseDetector.h"> | |
80 | + <Filter>include</Filter> | |
81 | + </ClInclude> | |
64 | 82 | </ItemGroup> |
65 | 83 | </Project> |
66 | 84 | \ No newline at end of file | ... | ... |
FFNvDecoder/FFNvDecoder.vcxproj.user
1 | 1 | <?xml version="1.0" encoding="utf-8"?> |
2 | 2 | <Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
3 | 3 | <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
4 | - <LocalDebuggerCommandArguments>rtsp://122.97.218.170:8604/openUrl/V5nXRHa?params=eyJwcm90b2NhbCI6InJ0c3AiLCJjbGllbnRUeXBlIjoib3Blbl9hcGkiLCJleHByaWVUaW1lIjotMSwicHJvdG9jb2wiOiJydHNwIiwiZXhwaXJlVGltZSI6MzAwLCJlbmFibGVNR0MiOnRydWUsImV4cGFuZCI6InN0YW5kYXJkPXJ0c3Amc3RyZWFtZm9ybT1ydHAiLCJhIjoiMTBjZjM4N2JjY2Y5NDg3YzhjNWYzNjE2M2ViMWUyNTJ8MXwwfDEiLCJ0IjoxfQ== 0</LocalDebuggerCommandArguments> | |
4 | + <LocalDebuggerCommandArguments> | |
5 | + </LocalDebuggerCommandArguments> | |
5 | 6 | <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor> |
6 | 7 | </PropertyGroup> |
7 | 8 | </Project> |
8 | 9 | \ No newline at end of file | ... | ... |
FFNvDecoder/NV12ToRGB.cu
FFNvDecoder/ResizeAndNorm.cu
0 → 100644
1 | +#include <cuda_runtime.h> | |
2 | +#include <device_launch_parameters.h> | |
3 | +#include <stdio.h> | |
4 | + | |
5 | +#include "cuda_kernels.h" | |
6 | + | |
7 | +namespace cuda_common | |
8 | +{ | |
9 | + | |
10 | + __forceinline__ __device__ float3 get(uchar3* src, int x, int y, int w, int h) { | |
11 | + if (x < 0 || x >= w || y < 0 || y >= h) return make_float3(0.5, 0.5, 0.5); | |
12 | + uchar3 temp = src[y*w + x]; | |
13 | + return make_float3(float(temp.x) / 255., float(temp.y) / 255., float(temp.z) / 255.); | |
14 | + } | |
15 | + | |
16 | + __global__ void resizeNormKernel(uchar3* src, float *dst, int dstW, int dstH, int srcW, int srcH, | |
17 | + float scaleX, float scaleY, float shiftX, float shiftY) { | |
18 | + int idx = blockIdx.x * blockDim.x + threadIdx.x; | |
19 | + const int x = idx % dstW; | |
20 | + const int y = idx / dstW; | |
21 | + if (x >= dstW || y >= dstH) | |
22 | + return; | |
23 | + float w = (x - shiftX + 0.5) * scaleX - 0.5; // Ëõ·ÅµÄ·´ÏòÓ³É侨Õó | |
24 | + float h = (y - shiftY + 0.5) * scaleY - 0.5; // opencv | |
25 | + int h_low = (int)h; | |
26 | + int w_low = (int)w; | |
27 | + int h_high = h_low + 1; | |
28 | + int w_high = w_low + 1; | |
29 | + float lh = h - h_low; | |
30 | + float lw = w - w_low; | |
31 | + float hh = 1 - lh, hw = 1 - lw; | |
32 | + float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; | |
33 | + float3 v1 = get(src, w_low, h_low, srcW, srcH); | |
34 | + float3 v2 = get(src, w_high, h_low, srcW, srcH); | |
35 | + float3 v3 = get(src, w_low, h_high, srcW, srcH); | |
36 | + float3 v4 = get(src, w_high, h_high, srcW, srcH); | |
37 | + int stride = dstW*dstH; | |
38 | + dst[y*dstW + x] = w1 *v1.x + w2 * v2.x + w3 *v3.x + w4 * v4.x; | |
39 | + dst[stride + y*dstW + x] = w1 *v1.y + w2 * v2.y + w3 *v3.y + w4 * v4.y; | |
40 | + dst[stride * 2 + y*dstW + x] = w1 *v1.z + w2 * v2.z + w3 *v3.z + w4 * v4.z; | |
41 | + } | |
42 | + | |
43 | + __global__ void copy2square(uchar3 *dataIn, uchar3 *dataOut, int imgWidth, int imgHeight, int squareWidth) | |
44 | + { | |
45 | + // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread | |
46 | + int32 x = blockIdx.x * blockDim.x + threadIdx.x; | |
47 | + int32 y = blockIdx.y * blockDim.y + threadIdx.y; | |
48 | + | |
49 | + if (x >= imgWidth) | |
50 | + { | |
51 | + return; | |
52 | + } | |
53 | + | |
54 | + if (y >= imgHeight) | |
55 | + { | |
56 | + return; | |
57 | + } | |
58 | + | |
59 | + dataOut[y*squareWidth + x] = dataIn[y*imgWidth + x]; | |
60 | + } | |
61 | + | |
62 | + __global__ void kernel_bilinear(uint8 *src_img, int src_width, int src_height, float *dst_img, int dst_width, int dst_height) | |
63 | + { | |
64 | + const int x = blockIdx.x * blockDim.x + threadIdx.x; | |
65 | + const int y = blockIdx.y * blockDim.y + threadIdx.y; | |
66 | + | |
67 | + if (x < dst_width && y < dst_height) | |
68 | + { | |
69 | + float fx = (x + 0.5)*src_width / (float)dst_width - 0.5; | |
70 | + float fy = (y + 0.5)*src_height / (float)dst_height - 0.5; | |
71 | + int ax = floor(fx); | |
72 | + int ay = floor(fy); | |
73 | + if (ax < 0) | |
74 | + { | |
75 | + ax = 0; | |
76 | + } | |
77 | + else if (ax > src_width - 2) | |
78 | + { | |
79 | + ax = src_width - 2; | |
80 | + } | |
81 | + | |
82 | + if (ay < 0) { | |
83 | + ay = 0; | |
84 | + } | |
85 | + else if (ay > src_height - 2) | |
86 | + { | |
87 | + ay = src_height - 2; | |
88 | + } | |
89 | + | |
90 | + int A = ax + ay*src_width; | |
91 | + int B = ax + ay*src_width + 1; | |
92 | + int C = ax + ay*src_width + src_width; | |
93 | + int D = ax + ay*src_width + src_width + 1; | |
94 | + | |
95 | + float w1, w2, w3, w4; | |
96 | + w1 = fx - ax; | |
97 | + w2 = 1 - w1; | |
98 | + w3 = fy - ay; | |
99 | + w4 = 1 - w3; | |
100 | + | |
101 | + float blue = src_img[A] * w2*w4 + src_img[B] * w1*w4 + src_img[C] * w2*w3 + src_img[D] * w1*w3; | |
102 | + | |
103 | + float green = src_img[src_width * src_height + A] * w2*w4 + src_img[src_width * src_height + B] * w1*w4 | |
104 | + + src_img[src_width * src_height + C] * w2*w3 + src_img[src_width * src_height + D] * w1*w3; | |
105 | + | |
106 | + float red = src_img[src_width * src_height * 2 + A] * w2*w4 + src_img[src_width * src_height * 2 + B] * w1*w4 | |
107 | + + src_img[src_width * src_height * 2 + C] * w2*w3 + src_img[src_width * src_height * 2 + D] * w1*w3; | |
108 | + | |
109 | + dst_img[y * dst_width + x] = red; | |
110 | + dst_img[dst_width * dst_height + y * dst_width + x] = green; | |
111 | + dst_img[dst_width * dst_height * 2 + y * dst_width + x] = blue; | |
112 | + } | |
113 | + } | |
114 | + | |
115 | + __global__ void resize_norm_kernel(uchar3 *src_img, int src_width, int src_height, float *dataOut, int dst_width, int dst_height) | |
116 | + { | |
117 | + // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread | |
118 | + const int x = blockIdx.x * blockDim.x + threadIdx.x; | |
119 | + const int y = blockIdx.y * blockDim.y + threadIdx.y; | |
120 | + | |
121 | + if (x >= dst_width || y >= dst_height) { | |
122 | + return; | |
123 | + } | |
124 | + | |
125 | + double ratio = 0; | |
126 | + if (src_width >= src_height) { | |
127 | + ratio = src_width / (float)dst_width; | |
128 | + } | |
129 | + else | |
130 | + { | |
131 | + ratio = src_height / (float)dst_height; | |
132 | + } | |
133 | + | |
134 | + float fx = (x + 0.5)*ratio - 0.5; | |
135 | + float fy = (y + 0.5)*ratio - 0.5; | |
136 | + int ax = floor(fx); | |
137 | + int ay = floor(fy); | |
138 | + if (ax < 0) | |
139 | + { | |
140 | + ax = 0; | |
141 | + } | |
142 | + else if (ax >= (src_width - 2)) | |
143 | + { | |
144 | + return; | |
145 | + } | |
146 | + | |
147 | + if (ay < 0) { | |
148 | + ay = 0; | |
149 | + } | |
150 | + else if (ay >= (src_height - 2)) | |
151 | + { | |
152 | + return; | |
153 | + } | |
154 | + | |
155 | + //int A = ay * src_width + ax; | |
156 | + | |
157 | + //dataOut[y * dst_width + x].x = src_img[A].x / 255.0; | |
158 | + //dataOut[y * dst_width + x].y = src_img[A].x / 255.0; | |
159 | + //dataOut[y * dst_width + x].z = src_img[A].x / 255.0; | |
160 | + | |
161 | + int A = ax + ay*src_width; | |
162 | + int B = ax + ay*src_width + 1; | |
163 | + int C = ax + ay*src_width + src_width; | |
164 | + int D = ax + ay*src_width + src_width + 1; | |
165 | + | |
166 | + float w1, w2, w3, w4; | |
167 | + w1 = fx - ax; | |
168 | + w2 = 1 - w1; | |
169 | + w3 = fy - ay; | |
170 | + w4 = 1 - w3; | |
171 | + | |
172 | + float blue = src_img[A].x * w2*w4 + src_img[B].x * w1*w4 + src_img[C].x * w2*w3 + src_img[D].x * w1*w3; | |
173 | + float green = src_img[A].y * w2*w4 + src_img[B].y * w1*w4 + src_img[C].y * w2*w3 + src_img[D].y * w1*w3; | |
174 | + float red = src_img[A].z * w2*w4 + src_img[B].z * w1*w4 + src_img[C].z * w2*w3 + src_img[D].z * w1*w3; | |
175 | + | |
176 | + /* dataOut[y * dst_width + x].x = red / 255.0; | |
177 | + dataOut[y * dst_width + x].y = green / 255.0; | |
178 | + dataOut[y * dst_width + x].z = blue / 255.0;*/ | |
179 | + | |
180 | + // Clamp the results to RRRRR....GGGGGGG.......BBBBBBB.... | |
181 | + dataOut[y * dst_width + x] = red / 255.0; | |
182 | + dataOut[dst_width * dst_height + y * dst_width + x] = green / 255.0; | |
183 | + dataOut[dst_width * dst_height * 2 + y * dst_width + x] = blue / 255.0; | |
184 | + } | |
185 | + | |
186 | + cudaError_t resizeAndNorm(unsigned char* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height) | |
187 | + { | |
188 | + dim3 block(32, 16, 1); | |
189 | + dim3 grid((dst_width + (block.x - 1)) / block.x, (dst_height + (block.y - 1)) / block.y, 1); | |
190 | + | |
191 | + resize_norm_kernel << < grid, block >> >((uchar3 *)d_srcRGB, src_width, src_height, d_dstRGB, dst_width, dst_height); | |
192 | + | |
193 | + cudaError_t cudaStatus = cudaGetLastError(); | |
194 | + if (cudaStatus != cudaSuccess) { | |
195 | + fprintf(stderr, "kernel_bilinear launch failed: %s\n", cudaGetErrorString(cudaStatus)); | |
196 | + return cudaStatus; | |
197 | + } | |
198 | + | |
199 | + cudaStatus = cudaDeviceSynchronize(); | |
200 | + if (cudaStatus != cudaSuccess) { | |
201 | + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_bilinear!\n", cudaStatus); | |
202 | + return cudaStatus; | |
203 | + } | |
204 | + | |
205 | + return cudaStatus; | |
206 | + } | |
207 | + | |
208 | + //int resizeAndNorm(void * p, int in_w, int in_h, float *d, int w, int h, bool keepration, bool keepcenter, cudaStream_t stream) { | |
209 | + // float scaleX = (w*1.0f / in_w); | |
210 | + // float scaleY = (h*1.0f / in_h); | |
211 | + // float shiftX = 0.f, shiftY = 0.f; | |
212 | + // if (keepration)scaleX = scaleY = scaleX > scaleY ? scaleX : scaleY; | |
213 | + // if (keepration && keepcenter) { shiftX = (in_w - w / scaleX) / 2.f; shiftY = (in_h - h / scaleY) / 2.f; } | |
214 | + // const int n = in_w*in_h; | |
215 | + // int blockSize = 1024; | |
216 | + // const int gridSize = (n + blockSize - 1) / blockSize; | |
217 | + // resizeNormKernel << <gridSize, blockSize, 0, stream >> > ((uchar3*)(p), d, in_w, in_h, w, h, scaleX, scaleY, shiftX, shiftY); | |
218 | + // return 0; | |
219 | + //} | |
220 | + | |
221 | + //int resizeAndNorm(void * p, int in_w, int in_h, float *d, int w, int h, bool keepration, bool keepcenter) { | |
222 | + // float scaleX = (w*1.0f / in_w); | |
223 | + // float scaleY = (h*1.0f / in_h); | |
224 | + // float shiftX = 0.f, shiftY = 0.f; | |
225 | + // if (keepration)scaleX = scaleY = scaleX > scaleY ? scaleX : scaleY; | |
226 | + // if (keepration && keepcenter) { shiftX = (in_w - w / scaleX) / 2.f; shiftY = (in_h - h / scaleY) / 2.f; } | |
227 | + // const int n = in_w*in_h; | |
228 | + // int blockSize = 1024; | |
229 | + // const int gridSize = (n + blockSize - 1) / blockSize; | |
230 | + // resizeNormKernel << <gridSize, blockSize, 0 >> > ((uchar3*)(p), d, in_w, in_h, w, h, scaleX, scaleY, shiftX, shiftY); | |
231 | + // return 0; | |
232 | + //} | |
233 | + | |
234 | + int copy2square(void * p, void *d, int w, int h, int squareWidth, cudaStream_t stream) { | |
235 | + dim3 block(32, 16, 1); | |
236 | + dim3 grid((w + (block.x - 1)) / (block.x), (h + (block.y - 1)) / block.y, 1); | |
237 | + copy2square << <grid, block, 0, stream>> > ((uchar3 *)(p), (uchar3 *)d, w, h, squareWidth); | |
238 | + return 0; | |
239 | + } | |
240 | + | |
241 | +} | |
0 | 242 | \ No newline at end of file | ... | ... |
FFNvDecoder/cuda_kernels.h
... | ... | @@ -10,6 +10,10 @@ |
10 | 10 | |
11 | 11 | #include <cuda.h> |
12 | 12 | |
13 | +typedef unsigned char uint8; | |
14 | +typedef unsigned int uint32; | |
15 | +typedef int int32; | |
16 | + | |
13 | 17 | typedef enum |
14 | 18 | { |
15 | 19 | ITU601 = 1, |
... | ... | @@ -22,5 +26,12 @@ namespace cuda_common |
22 | 26 | |
23 | 27 | cudaError_t NV12ToRGBnot(CUdeviceptr d_srcNV12, size_t nSourcePitch, unsigned char* d_dstRGB, int width, int height); |
24 | 28 | cudaError_t CUDAToBGR(CUdeviceptr dataY, CUdeviceptr dataUV, size_t pitchY, size_t pitchUV, unsigned char* d_dstRGB, int width, int height); |
29 | + | |
30 | + //int resizeAndNorm(void * p, int in_w, int in_h, float *d, int w, int h, bool keepration, bool keepcenter, cudaStream_t stream); | |
31 | + //int resizeAndNorm(void * p, int in_w, int in_h, float *d, int w, int h, bool keepration, bool keepcenter); | |
32 | + cudaError_t resizeAndNorm(unsigned char* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height); | |
33 | + | |
34 | + int copy2square(void * p, void *d, int w, int h, int max_side_length, cudaStream_t stream); | |
35 | + | |
25 | 36 | } |
26 | 37 | ... | ... |
FFNvDecoder/dog_train_sys.cpp
0 → 100644
1 | +// | |
2 | +//#include <fstream> | |
3 | +//#include <iostream> | |
4 | +//#include <sstream> | |
5 | +//#include <vector> | |
6 | +// | |
7 | +// | |
8 | +//#include "NvInfer.h" | |
9 | +//#include "NvOnnxParser.h" | |
10 | +//#include <opencv2/opencv.hpp> | |
11 | +// | |
12 | +//#include "AlgorithmResult.h" | |
13 | +//#include "cuda_kernels.h" | |
14 | +// | |
15 | +// | |
16 | +//// @brief 用于创建IBuilder、IRuntime或IRefitter实例的记录器用于通过该接口创建的所有对象。 | |
17 | +//// 在释放所有创建的对象之前,记录器应一直有效。 | |
18 | +//// 主要是实例化ILogger类下的log()方法。 | |
19 | +//class Logger : public nvinfer1::ILogger | |
20 | +//{ | |
21 | +// void log(Severity severity, const char* message) noexcept | |
22 | +// { | |
23 | +// // suppress info-level messages | |
24 | +// if (severity != Severity::kINFO) | |
25 | +// std::cout << message << std::endl; | |
26 | +// } | |
27 | +//} gLogger; | |
28 | +// | |
29 | +// | |
30 | +// | |
31 | +//void onnx_to_engine(std::string onnx_file_path, std::string engine_file_path, int type) { | |
32 | +// | |
33 | +// // 构建器,获取cuda内核目录以获取最快的实现 | |
34 | +// // 用于创建config、network、engine的其他对象的核心类 | |
35 | +// nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger); | |
36 | +// const auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); | |
37 | +// // 解析onnx网络文件 | |
38 | +// // tensorRT模型类 | |
39 | +// nvinfer1::INetworkDefinition* network = builder->createNetworkV2(explicitBatch); | |
40 | +// // onnx文件解析类 | |
41 | +// // 将onnx文件解析,并填充rensorRT网络结构 | |
42 | +// nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, gLogger); | |
43 | +// // 解析onnx文件 | |
44 | +// parser->parseFromFile(onnx_file_path.c_str(), 2); | |
45 | +// for (int i = 0; i < parser->getNbErrors(); ++i) { | |
46 | +// std::cout << "load error: " << parser->getError(i)->desc() << std::endl; | |
47 | +// } | |
48 | +// printf("tensorRT load mask onnx model successfully!!!...\n"); | |
49 | +// | |
50 | +// // 创建推理引擎 | |
51 | +// // 创建生成器配置对象。 | |
52 | +// nvinfer1::IBuilderConfig* config = builder->createBuilderConfig(); | |
53 | +// // 设置最大工作空间大小。 | |
54 | +// config->setMaxWorkspaceSize(16 * (1 << 20)); | |
55 | +// // 设置模型输出精度 | |
56 | +// if (type == 1) { | |
57 | +// config->setFlag(nvinfer1::BuilderFlag::kFP16); | |
58 | +// } | |
59 | +// if (type == 2) { | |
60 | +// config->setFlag(nvinfer1::BuilderFlag::kINT8); | |
61 | +// } | |
62 | +// // 创建推理引擎 | |
63 | +// nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config); | |
64 | +// // 将推理银枪保存到本地 | |
65 | +// std::cout << "try to save engine file now~~~" << std::endl; | |
66 | +// std::ofstream file_ptr(engine_file_path, std::ios::binary); | |
67 | +// if (!file_ptr) { | |
68 | +// std::cerr << "could not open plan output file" << std::endl; | |
69 | +// return; | |
70 | +// } | |
71 | +// // 将模型转化为文件流数据 | |
72 | +// nvinfer1::IHostMemory* model_stream = engine->serialize(); | |
73 | +// // 将文件保存到本地 | |
74 | +// file_ptr.write(reinterpret_cast<const char*>(model_stream->data()), model_stream->size()); | |
75 | +// // 销毁创建的对象 | |
76 | +// model_stream->destroy(); | |
77 | +// engine->destroy(); | |
78 | +// network->destroy(); | |
79 | +// parser->destroy(); | |
80 | +// std::cout << "convert onnx model to TensorRT engine model successfully!" << std::endl; | |
81 | +//} | |
82 | +// | |
83 | +//int main() { | |
84 | +// | |
85 | +// | |
86 | +// const char* model_path_onnx = "E:/Archime/dog_pose_detect/yolov5/runs/train/exp10/weights/best.onnx"; | |
87 | +// const char* model_path_engine = "E:/Archime/dog_pose_detect/yolov5/runs/train/exp10/weights/best.engine"; | |
88 | +// const char* image_path = "F:/dog_trainer_sys/test1/IMG_6837.JPG"; | |
89 | +// std::string lable_path = "F:/dog_trainer_sys/train2/classes.txt"; | |
90 | +// const char* input_node_name = "images"; | |
91 | +// const char* output_node_name = "output"; | |
92 | +// int num_ionode = 2; | |
93 | +// | |
94 | +// // 读取本地模型文件 | |
95 | +// std::ifstream file_ptr(model_path_engine, std::ios::binary); | |
96 | +// if (!file_ptr.good()) { | |
97 | +// std::cerr << "文件无法打开,请确定文件是否可用!" << std::endl; | |
98 | +// } | |
99 | +// | |
100 | +// size_t size = 0; | |
101 | +// file_ptr.seekg(0, file_ptr.end); // 将读指针从文件末尾开始移动0个字节 | |
102 | +// size = file_ptr.tellg(); // 返回读指针的位置,此时读指针的位置就是文件的字节数 | |
103 | +// file_ptr.seekg(0, file_ptr.beg); // 将读指针从文件开头开始移动0个字节 | |
104 | +// char* model_stream = new char[size]; | |
105 | +// file_ptr.read(model_stream, size); | |
106 | +// file_ptr.close(); | |
107 | +// | |
108 | +// // 日志记录接口 | |
109 | +// //Logger logger; | |
110 | +// // 反序列化引擎 | |
111 | +// nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger); | |
112 | +// // 推理引擎 | |
113 | +// // 保存模型的模型结构、模型参数以及最优计算kernel配置; | |
114 | +// // 不能跨平台和跨TensorRT版本移植 | |
115 | +// nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(model_stream, size); | |
116 | +// // 上下文 | |
117 | +// // 储存中间值,实际进行推理的对象 | |
118 | +// // 由engine创建,可创建多个对象,进行多推理任务 | |
119 | +// nvinfer1::IExecutionContext* context = engine->createExecutionContext(); | |
120 | +// | |
121 | +// | |
122 | +// delete[] model_stream; | |
123 | +// | |
124 | +// // 创建GPU显存缓冲区 | |
125 | +// void** data_buffer = new void*[num_ionode]; | |
126 | +// // 创建GPU显存输入缓冲区 | |
127 | +// int input_node_index = engine->getBindingIndex(input_node_name); | |
128 | +// nvinfer1::Dims input_node_dim = engine->getBindingDimensions(input_node_index); | |
129 | +// size_t input_data_length = input_node_dim.d[1] * input_node_dim.d[2] * input_node_dim.d[3]; | |
130 | +// cudaMalloc(&(data_buffer[input_node_index]), input_data_length * sizeof(float)); | |
131 | +// // 创建GPU显存输出缓冲区 | |
132 | +// int output_node_index = engine->getBindingIndex(output_node_name); | |
133 | +// nvinfer1::Dims output_node_dim = engine->getBindingDimensions(output_node_index); | |
134 | +// size_t output_data_length = output_node_dim.d[1] * output_node_dim.d[2]; | |
135 | +// cudaMalloc(&(data_buffer[output_node_index]), output_data_length * sizeof(float)); | |
136 | +// | |
137 | +// | |
138 | +// // 图象预处理 - 格式化操作 | |
139 | +// cv::Mat image = cv::imread(image_path); | |
140 | +// int max_side_length = std::max(image.cols, image.rows); | |
141 | +// cv::Mat max_image = cv::Mat::zeros(cv::Size(max_side_length, max_side_length), CV_8UC3); | |
142 | +// cv::Rect roi(0, 0, image.cols, image.rows); | |
143 | +// image.copyTo(max_image(roi)); | |
144 | +// // 将图像归一化,并放缩到指定大小 | |
145 | +// cv::Size input_node_shape(input_node_dim.d[2], input_node_dim.d[3]); | |
146 | +// cv::Mat BN_image = cv::dnn::blobFromImage(max_image, 1 / 255.0, input_node_shape, cv::Scalar(0, 0, 0), true, false); | |
147 | +// | |
148 | +// std::vector<float> input_data(input_data_length); | |
149 | +// memcpy(input_data.data(), BN_image.ptr<float>(), input_data_length * sizeof(float)); | |
150 | +// | |
151 | +// //void* pGPUData; | |
152 | +// //cudaMalloc(&pGPUData, 3 * image.cols * image.rows * sizeof(unsigned char)); | |
153 | +// //cudaMemcpy(pGPUData, (void*)(image.data), 3 * image.cols * image.rows * sizeof(unsigned char), cudaMemcpyHostToDevice); | |
154 | +// //cuda_common::resizeAndNorm((unsigned char*)pGPUData, max_side_length, max_side_length, (float*)data_buffer[input_node_index], input_node_dim.d[2], input_node_dim.d[3]); | |
155 | +// | |
156 | +// // 创建输入cuda流 | |
157 | +// cudaStream_t stream; | |
158 | +// cudaStreamCreate(&stream); | |
159 | +// | |
160 | +// // 输入数据由内存到GPU显存 | |
161 | +// cudaMemcpyAsync(data_buffer[input_node_index], input_data.data(), input_data_length * sizeof(float), cudaMemcpyHostToDevice, stream); | |
162 | +// | |
163 | +// // 模型推理 | |
164 | +// context->enqueueV2(data_buffer, stream, nullptr); | |
165 | +// | |
166 | +// float* result_array = new float[output_data_length]; | |
167 | +// cudaMemcpyAsync(result_array, data_buffer[output_node_index], output_data_length * sizeof(float), cudaMemcpyDeviceToHost, stream); | |
168 | +// | |
169 | +// ResultYolov5 result; | |
170 | +// result.factor = max_side_length / (float)input_node_dim.d[2]; | |
171 | +// result.read_class_names(lable_path); | |
172 | +// | |
173 | +// //cv::Mat result_image = result.yolov5_result(image, result_array); | |
174 | +// | |
175 | +// //// 查看输出结果 | |
176 | +// //cv::imshow("C++ + OpenVINO + Yolov5 推理结果", result_image); | |
177 | +// //cv::waitKey(); | |
178 | +// | |
179 | +// std::vector<DogPoseResult> vec_result = result.yolov5_result(result_array, 0.6); | |
180 | +// if (vec_result.size() > 0) { | |
181 | +// DogPoseResult poseResult = vec_result[0]; | |
182 | +// std::cout << poseResult.x << std::endl; | |
183 | +// std::cout << poseResult.y << std::endl; | |
184 | +// std::cout << poseResult.width << std::endl; | |
185 | +// std::cout << poseResult.height << std::endl; | |
186 | +// std::cout << poseResult.confidence << std::endl; | |
187 | +// std::cout << poseResult.classId << std::endl; | |
188 | +// std::cout << poseResult.className << std::endl; | |
189 | +// | |
190 | +// | |
191 | +// cv::Rect position_boxe; | |
192 | +// position_boxe.x = poseResult.x; | |
193 | +// position_boxe.y = poseResult.y; | |
194 | +// position_boxe.width = poseResult.width; | |
195 | +// position_boxe.height = poseResult.height; | |
196 | +// cv::rectangle(image, position_boxe, cv::Scalar(0, 0, 255), 2, 8); | |
197 | +// cv::rectangle(image, cv::Point(position_boxe.x, position_boxe.y - 20), cv::Point(position_boxe.x, position_boxe.y), cv::Scalar(0, 255, 255), -1); | |
198 | +// cv::putText(image, poseResult.className, cv::Point(position_boxe.x, position_boxe.y - 10), cv::FONT_HERSHEY_SIMPLEX, .5, cv::Scalar(0, 0, 0)); | |
199 | +// | |
200 | +// cv::imwrite("result.jpg", image); | |
201 | +// cv::imshow("show", image); | |
202 | +// cv::waitKey(); | |
203 | +// } | |
204 | +//} | |
0 | 205 | \ No newline at end of file | ... | ... |
FFNvDecoder/main.cpp
... | ... | @@ -12,6 +12,9 @@ |
12 | 12 | |
13 | 13 | #include "opencv2\opencv.hpp" |
14 | 14 | |
15 | +#include "DogPoseDetector.h" | |
16 | + | |
17 | + | |
15 | 18 | using namespace std; |
16 | 19 | using namespace cv; |
17 | 20 | |
... | ... | @@ -20,6 +23,8 @@ unsigned char *pHwRgb[2] = {nullptr, nullptr}; |
20 | 23 | int sum1 = 0; |
21 | 24 | int sum2 = 0; |
22 | 25 | |
26 | +DogPoseDetector poseDetector; | |
27 | + | |
23 | 28 | |
24 | 29 | mutex m_mutex; |
25 | 30 | void saveFrame(AVFrame * gpuFrame, string file_name) { |
... | ... | @@ -46,47 +51,45 @@ void saveFrame(AVFrame * gpuFrame, string file_name) { |
46 | 51 | } |
47 | 52 | |
48 | 53 | mutex m_mutex_show; |
54 | +unsigned char *pShowData = nullptr; | |
55 | + | |
49 | 56 | void showFrame(AVFrame * gpuFrame) { |
50 | 57 | std::lock_guard<std::mutex> l(m_mutex_show); |
51 | 58 | |
52 | - unsigned char *pHwData = nullptr; | |
53 | - cudaError_t cudaStatus = cudaMalloc((void **)&pHwData, 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char)); | |
54 | - | |
59 | + cudaError_t cudaStatus = cudaSuccess; | |
60 | + if (pShowData == nullptr) | |
61 | + { | |
62 | + cudaError_t cudaStatus = cudaMalloc((void **)&pShowData, 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char)); | |
63 | + } | |
64 | + | |
55 | 65 | cuda_common::setColorSpace(ITU709, 0); |
56 | - cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0], (CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwData, gpuFrame->width, gpuFrame->height); | |
57 | - cudaDeviceSynchronize(); | |
66 | + cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0], (CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pShowData, gpuFrame->width, gpuFrame->height); | |
58 | 67 | if (cudaStatus != cudaSuccess) { |
59 | 68 | cout << "CUDAToBGR failed !!!" << endl; |
60 | 69 | return; |
61 | 70 | } |
62 | 71 | |
63 | - | |
64 | - | |
65 | - unsigned char * pHwRgb = pHwData; | |
66 | 72 | int channel = 3; |
67 | 73 | int width = gpuFrame->width; |
68 | 74 | int height = gpuFrame->height; |
69 | 75 | |
70 | - if (pHwRgb != nullptr && channel > 0 && width > 0 && height > 0) { | |
71 | - int nSize = channel * height * width; | |
72 | - unsigned char* cpu_data = new unsigned char[nSize]; | |
76 | + if (pShowData != nullptr && channel > 0 && width > 0 && height > 0) { | |
77 | + poseDetector.detect(pShowData, width, height); | |
73 | 78 | |
74 | - cudaMemcpy(cpu_data, pHwRgb, nSize * sizeof(unsigned char), cudaMemcpyDeviceToHost); | |
75 | - cudaDeviceSynchronize(); | |
79 | + //int nSize = channel * height * width; | |
80 | + //unsigned char* cpu_data = new unsigned char[nSize]; | |
76 | 81 | |
77 | - cv::Mat img_(height, width, CV_8UC3, cpu_data); | |
78 | - bool bWrite = cv::imwrite("dec0.jpg", img_); | |
82 | + //cudaMemcpy(cpu_data, pShowData, nSize * sizeof(unsigned char), cudaMemcpyDeviceToHost); | |
83 | + //cudaDeviceSynchronize(); | |
79 | 84 | |
80 | - imshow("show", img_); | |
81 | - waitKey(0); | |
85 | + //cv::Mat img_(height, width, CV_8UC3, cpu_data); | |
86 | + //imshow("show", img_); | |
87 | + //waitKey(1); | |
82 | 88 | |
83 | - delete[] cpu_data; | |
84 | - cpu_data = nullptr; | |
89 | + //delete[] cpu_data; | |
90 | + //cpu_data = nullptr; | |
85 | 91 | |
86 | - } | |
87 | - | |
88 | - cudaFree(pHwData); | |
89 | - pHwData = nullptr; | |
92 | + } | |
90 | 93 | } |
91 | 94 | |
92 | 95 | /** |
... | ... | @@ -108,7 +111,7 @@ void postDecoded(const void * userPtr, AVFrame * gpuFrame){ |
108 | 111 | // cout << "gpuid = " << atoi(decoder->m_cfg.gpuid.c_str()) << endl; |
109 | 112 | cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str())); |
110 | 113 | |
111 | - saveFrame(gpuFrame, decoder->getName()); | |
114 | + //saveFrame(gpuFrame, decoder->getName()); | |
112 | 115 | showFrame(gpuFrame); |
113 | 116 | } |
114 | 117 | } |
... | ... | @@ -172,7 +175,8 @@ void decode_finished_cbk(const void* userPtr){ |
172 | 175 | // string test_uri = "/home/cmhu/data/output_1920x1080.mp4"; |
173 | 176 | // string test_uri = "rtsp://176.10.0.2:8554/stream"; |
174 | 177 | // string test_uri = "/mnt/f/fiss/test_data/h265.mp4"; |
175 | -string test_uri = "rtsp://176.10.0.4:8554/stream"; | |
178 | +//string test_uri = "rtsp://176.10.0.4:8554/stream"; | |
179 | +string test_uri = "f://data/caishenkezhan.mp4"; | |
176 | 180 | |
177 | 181 | void createDecode(int index){ |
178 | 182 | FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); |
... | ... | @@ -210,16 +214,20 @@ void logFF(void *, int level, const char *fmt, va_list ap) |
210 | 214 | int main(int argc, char* argv[]) { |
211 | 215 | |
212 | 216 | printf("start \n"); |
213 | - if (argc != 3) { | |
214 | - fprintf(stderr, "./xxx uri gpu_id\n"); | |
215 | - return -1; | |
216 | - } | |
217 | + //if (argc != 3) { | |
218 | + // fprintf(stderr, "./xxx uri gpu_id\n"); | |
219 | + // return -1; | |
220 | + //} | |
217 | 221 | |
218 | - char* uri = argv[1]; | |
219 | - char* gpuid = argv[2]; | |
222 | + char* uri = "F:/dog_trainer_sys/test1/5min.mp4";//argv[1]; | |
223 | + char* gpuid = "0";//argv[2]; | |
220 | 224 | |
221 | 225 | cout << av_version_info() << endl; |
222 | 226 | |
227 | + poseDetector.init(); | |
228 | + | |
229 | + //namedWindow("show", WINDOW_NORMAL); | |
230 | + | |
223 | 231 | //evalQuality(uri, gpuid); |
224 | 232 | |
225 | 233 | |
... | ... | @@ -253,19 +261,19 @@ int main(int argc, char* argv[]) { |
253 | 261 | pDecManager->getResolution(config.name, w,h); |
254 | 262 | printf( "%s : %dx%d\n", config.name.c_str() , w,h ); |
255 | 263 | |
256 | - thread* m_thread = new thread([](void* arg) | |
257 | - { | |
258 | - while (true) | |
259 | - { | |
260 | - std::this_thread::sleep_for(std::chrono::milliseconds(5000)); | |
261 | - FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); | |
262 | - int count = pDecManager->count(); | |
263 | - cout << "当前运行路数: " << pDecManager->count() << endl; | |
264 | - } | |
265 | - | |
266 | - return (void*)0; | |
267 | - } | |
268 | - , nullptr); | |
264 | + //thread* m_thread = new thread([](void* arg) | |
265 | + // { | |
266 | + // while (true) | |
267 | + // { | |
268 | + // std::this_thread::sleep_for(std::chrono::milliseconds(5000)); | |
269 | + // FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); | |
270 | + // int count = pDecManager->count(); | |
271 | + // cout << "当前运行路数: " << pDecManager->count() << endl; | |
272 | + // } | |
273 | + | |
274 | + // return (void*)0; | |
275 | + // } | |
276 | + //, nullptr); | |
269 | 277 | |
270 | 278 | |
271 | 279 | while (getchar() != 'q'); | ... | ... |