Commit 07639e75fe40de0f5e41eb26e9fc72252421e275
1 parent
7128e494
实现狗狗姿态检测
Showing
14 changed files
with
880 additions
and
51 deletions
.vs/FFNvDecoder/v14/.suo
No preview for this file type
FFNvDecoder/AlgorithmResult.cpp
0 → 100644
1 | +#include "AlgorithmResult.h" | ||
2 | + | ||
3 | +#include "opencv2/opencv.hpp" | ||
4 | + | ||
5 | +void ResultYolov5::read_class_names(std::string path_name) | ||
6 | +{ | ||
7 | + std::ifstream infile; | ||
8 | + infile.open(path_name.data()); //将文件流对象与文件连接起来 | ||
9 | + assert(infile.is_open()); //若失败,则输出错误消息,并终止程序运行 | ||
10 | + | ||
11 | + std::string str; | ||
12 | + while (getline(infile, str)) { | ||
13 | + class_names.push_back(str); | ||
14 | + str.clear(); | ||
15 | + | ||
16 | + } | ||
17 | + infile.close(); //关闭文件输入流 | ||
18 | + | ||
19 | +} | ||
20 | + | ||
21 | +std::vector<DogPoseResult> ResultYolov5::yolov5_result(float* result, float threshold) { | ||
22 | + cv::Mat det_output = cv::Mat(25200, 13, CV_32F, result); | ||
23 | + //// post-process | ||
24 | + std::vector<cv::Rect> position_boxes; | ||
25 | + std::vector<int> classIds; | ||
26 | + std::vector<float> confidences; | ||
27 | + | ||
28 | + //std::cout << det_output.rows << std::endl; | ||
29 | + for (int i = 0; i < det_output.rows; i++) { | ||
30 | + float confidence = det_output.at<float>(i, 4); | ||
31 | + if (confidence < 0.2) { | ||
32 | + continue; | ||
33 | + } | ||
34 | + //std::cout << "confidence " << confidence << std::endl; | ||
35 | + cv::Mat classes_scores = det_output.row(i).colRange(5, 13); | ||
36 | + cv::Point classIdPoint; | ||
37 | + double score; | ||
38 | + // 获取一组数据中最大值及其位置 | ||
39 | + minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint); | ||
40 | + // 置信度 0~1之间 | ||
41 | + if (score > 0.25) | ||
42 | + { | ||
43 | + float cx = det_output.at<float>(i, 0); | ||
44 | + float cy = det_output.at<float>(i, 1); | ||
45 | + float ow = det_output.at<float>(i, 2); | ||
46 | + float oh = det_output.at<float>(i, 3); | ||
47 | + int x = static_cast<int>((cx - 0.5 * ow) * factor); | ||
48 | + int y = static_cast<int>((cy - 0.5 * oh) * factor); | ||
49 | + int width = static_cast<int>(ow * factor); | ||
50 | + int height = static_cast<int>(oh * factor); | ||
51 | + cv::Rect box; | ||
52 | + box.x = x; | ||
53 | + box.y = y; | ||
54 | + box.width = width; | ||
55 | + box.height = height; | ||
56 | + | ||
57 | + position_boxes.push_back(box); | ||
58 | + classIds.push_back(classIdPoint.x); | ||
59 | + confidences.push_back(score); | ||
60 | + } | ||
61 | + } | ||
62 | + // NMS | ||
63 | + std::vector<int> indexes; | ||
64 | + cv::dnn::NMSBoxes(position_boxes, confidences, 0.25, 0.45, indexes); | ||
65 | + | ||
66 | + //for (size_t i = 0; i < indexes.size(); i++) { | ||
67 | + // int index = indexes[i]; | ||
68 | + // int idx = classIds[index]; | ||
69 | + // cv::rectangle(image, position_boxes[index], cv::Scalar(0, 0, 255), 2, 8); | ||
70 | + // cv::rectangle(image, cv::Point(position_boxes[index].tl().x, position_boxes[index].tl().y - 20), | ||
71 | + // cv::Point(position_boxes[index].br().x, position_boxes[index].tl().y), cv::Scalar(0, 255, 255), -1); | ||
72 | + // cv::putText(image, class_names[idx], cv::Point(position_boxes[index].tl().x, position_boxes[index].tl().y - 10), cv::FONT_HERSHEY_SIMPLEX, .5, cv::Scalar(0, 0, 0)); | ||
73 | + //} | ||
74 | + | ||
75 | + std::vector<DogPoseResult> vecPoseResult; | ||
76 | + for (size_t i = 0; i < indexes.size(); i++) { | ||
77 | + int index = indexes[i]; | ||
78 | + int idx = classIds[index]; | ||
79 | + | ||
80 | + DogPoseResult poseResult; | ||
81 | + poseResult.x = position_boxes[index].x; | ||
82 | + poseResult.y = position_boxes[index].y; | ||
83 | + poseResult.width = position_boxes[index].width; | ||
84 | + poseResult.height = position_boxes[index].height; | ||
85 | + poseResult.confidence = confidences[index]; | ||
86 | + poseResult.classId = classIds[index]; | ||
87 | + poseResult.className = class_names[idx]; | ||
88 | + | ||
89 | + vecPoseResult.push_back(poseResult); | ||
90 | + } | ||
91 | + | ||
92 | + return vecPoseResult; | ||
93 | +} | ||
0 | \ No newline at end of file | 94 | \ No newline at end of file |
FFNvDecoder/AlgorithmResult.h
0 → 100644
1 | +#pragma once | ||
2 | + | ||
3 | +#ifndef RESULT_H | ||
4 | +#define RESULT_H | ||
5 | + | ||
6 | +#include <fstream> | ||
7 | +#include <iterator> | ||
8 | +#include <memory> | ||
9 | +#include <sstream> | ||
10 | +#include <string> | ||
11 | +#include <vector> | ||
12 | + | ||
13 | + | ||
14 | +struct DogPoseResult { | ||
15 | + int x; | ||
16 | + int y; | ||
17 | + int height; | ||
18 | + int width; | ||
19 | + float confidence; | ||
20 | + int classId; | ||
21 | + std::string className; | ||
22 | +}; | ||
23 | + | ||
24 | +// @brief 处理yolov5的结果 | ||
25 | + class ResultYolov5 { | ||
26 | +public: | ||
27 | + std::vector<std::string> class_names; | ||
28 | + float factor; | ||
29 | + | ||
30 | + //ResultYolov5(); | ||
31 | + void read_class_names(std::string path_name); | ||
32 | + std::vector<DogPoseResult> yolov5_result(float* result, float threshold); | ||
33 | +}; | ||
34 | + | ||
35 | + | ||
36 | +#endif // !RESULT_H | ||
0 | \ No newline at end of file | 37 | \ No newline at end of file |
FFNvDecoder/DogPoseDetector.cpp
0 → 100644
1 | +#include "DogPoseDetector.h" | ||
2 | +#include "cuda_kernels.h" | ||
3 | +#include <algorithm> | ||
4 | + | ||
5 | +#include "opencv2/opencv.hpp" | ||
6 | + | ||
7 | +// @brief 用于创建IBuilder、IRuntime或IRefitter实例的记录器用于通过该接口创建的所有对象。 | ||
8 | +// 在释放所有创建的对象之前,记录器应一直有效。 | ||
9 | +// 主要是实例化ILogger类下的log()方法。 | ||
10 | +class Logger : public nvinfer1::ILogger | ||
11 | +{ | ||
12 | + void log(Severity severity, const char* message) noexcept | ||
13 | + { | ||
14 | + // suppress info-level messages | ||
15 | + if (severity != Severity::kINFO) | ||
16 | + std::cout << message << std::endl; | ||
17 | + } | ||
18 | +} gLogger; | ||
19 | + | ||
20 | +bool DogPoseDetector::init() { | ||
21 | + const char* model_path_onnx = "E:/Archime/dog_pose_detect/yolov5/runs/train/exp10/weights/best.onnx"; | ||
22 | + const char* model_path_engine = "E:/Archime/dog_pose_detect/yolov5/runs/train/exp10/weights/best.engine"; | ||
23 | + //const char* image_path = "F:/dog_trainer_sys/test1/IMG_6837.JPG"; | ||
24 | + std::string lable_path = "F:/dog_trainer_sys/train2/classes.txt"; | ||
25 | + const char* input_node_name = "images"; | ||
26 | + const char* output_node_name = "output"; | ||
27 | + | ||
28 | + // 读取本地模型文件 | ||
29 | + std::ifstream file_ptr(model_path_engine, std::ios::binary); | ||
30 | + if (!file_ptr.good()) { | ||
31 | + std::cerr << "文件无法打开,请确定文件是否可用!" << std::endl; | ||
32 | + return false; | ||
33 | + } | ||
34 | + | ||
35 | + size_t size = 0; | ||
36 | + file_ptr.seekg(0, file_ptr.end); // 将读指针从文件末尾开始移动0个字节 | ||
37 | + size = file_ptr.tellg(); // 返回读指针的位置,此时读指针的位置就是文件的字节数 | ||
38 | + file_ptr.seekg(0, file_ptr.beg); // 将读指针从文件开头开始移动0个字节 | ||
39 | + char* model_stream = new char[size]; | ||
40 | + file_ptr.read(model_stream, size); | ||
41 | + file_ptr.close(); | ||
42 | + | ||
43 | + // 日志记录接口 | ||
44 | + //Logger logger; | ||
45 | + // 反序列化引擎 | ||
46 | + nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger); | ||
47 | + // 推理引擎 | ||
48 | + // 保存模型的模型结构、模型参数以及最优计算kernel配置; | ||
49 | + // 不能跨平台和跨TensorRT版本移植 | ||
50 | + nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(model_stream, size); | ||
51 | + // 上下文 | ||
52 | + // 储存中间值,实际进行推理的对象 | ||
53 | + // 由engine创建,可创建多个对象,进行多推理任务 | ||
54 | + context = engine->createExecutionContext(); | ||
55 | + | ||
56 | + // 创建GPU显存缓冲区 | ||
57 | + m_data_buffer = new void*[2]; | ||
58 | + // 创建GPU显存输入缓冲区 | ||
59 | + m_input_node_index = engine->getBindingIndex(input_node_name); | ||
60 | + m_input_node_dim = engine->getBindingDimensions(m_input_node_index); | ||
61 | + size_t input_data_length = m_input_node_dim.d[1] * m_input_node_dim.d[2] * m_input_node_dim.d[3]; | ||
62 | + cudaMalloc(&(m_data_buffer[m_input_node_index]), input_data_length * sizeof(float)); | ||
63 | + // 创建GPU显存输出缓冲区 | ||
64 | + m_output_node_index = engine->getBindingIndex(output_node_name); | ||
65 | + m_output_node_dim = engine->getBindingDimensions(m_output_node_index); | ||
66 | + size_t output_data_length = m_output_node_dim.d[1] * m_output_node_dim.d[2]; | ||
67 | + cudaMalloc(&(m_data_buffer[m_output_node_index]), output_data_length * sizeof(float)); | ||
68 | + | ||
69 | + //cv::namedWindow("show", cv::WINDOW_NORMAL); | ||
70 | + | ||
71 | + return true; | ||
72 | +} | ||
73 | + | ||
74 | +static void saveCUDAImg(unsigned char *pGpuBgb, int src_width, int src_height, std::string filename) { | ||
75 | + int rgb_size = 3 * src_width * src_height; | ||
76 | + unsigned char *cpu_data = new unsigned char[rgb_size]; | ||
77 | + cudaError_t cudaStatus = cudaMemcpy(cpu_data, pGpuBgb, rgb_size * sizeof(unsigned char), cudaMemcpyDeviceToHost); | ||
78 | + cv::Mat img(src_height, src_width, CV_8UC3, cpu_data); | ||
79 | + cv::imwrite(filename.c_str(), img); | ||
80 | + delete[] cpu_data; | ||
81 | + cpu_data = nullptr; | ||
82 | +} | ||
83 | + | ||
84 | +bool DogPoseDetector::detect(unsigned char *pGpuBgr, int src_width, int src_height) { | ||
85 | + int dst_width = m_input_node_dim.d[2]; | ||
86 | + int dst_height = m_input_node_dim.d[3]; | ||
87 | + | ||
88 | + int max_side_length = std::max(src_width, src_height); | ||
89 | + | ||
90 | + | ||
91 | + //int buf_size = 3 * src_width * src_height; | ||
92 | + //float* pBuf = new float[buf_size]; | ||
93 | + //cudaMemcpy(pBuf, pGpuBgr, buf_size * sizeof(unsigned char), cudaMemcpyDeviceToHost); | ||
94 | + //cv::Mat image(src_height, src_width, CV_8UC3, pBuf); | ||
95 | + | ||
96 | + //saveCUDAImg(pGpuBgr, src_width, src_height, "src.jpg"); | ||
97 | + | ||
98 | + cudaStream_t stream; | ||
99 | + cudaStreamCreate(&stream); | ||
100 | + | ||
101 | + { | ||
102 | + //int rgb_size = 3 * src_width * src_height; | ||
103 | + //uint8 *cpu_data = new uint8[rgb_size]; | ||
104 | + //cudaError_t cudaStatus = cudaMemcpy(cpu_data, pGpuBgr, rgb_size * sizeof(uint8), cudaMemcpyDeviceToHost); | ||
105 | + //cv::Mat image(src_height, src_width, CV_8UC3, cpu_data); | ||
106 | + | ||
107 | + | ||
108 | + //cv::Mat max_image = cv::Mat::zeros(cv::Size(max_side_length, max_side_length), CV_8UC3); | ||
109 | + //cv::Rect roi(0, 0, image.cols, image.rows); | ||
110 | + //image.copyTo(max_image(roi)); | ||
111 | + //// 将图像归一化,并放缩到指定大小 | ||
112 | + //cv::Size input_node_shape(m_input_node_dim.d[2], m_input_node_dim.d[3]); | ||
113 | + //cv::Mat BN_image = cv::dnn::blobFromImage(max_image, 1 / 255.0, input_node_shape, cv::Scalar(0, 0, 0), true, false); | ||
114 | + | ||
115 | + //size_t input_data_length = m_input_node_dim.d[1] * m_input_node_dim.d[2] * m_input_node_dim.d[3]; | ||
116 | + //std::vector<float> input_data(input_data_length); | ||
117 | + //memcpy(input_data.data(), BN_image.ptr<float>(), input_data_length * sizeof(float)); | ||
118 | + | ||
119 | + //cudaMemcpyAsync(m_data_buffer[m_input_node_index], input_data.data(), input_data_length * sizeof(float), cudaMemcpyHostToDevice, stream); | ||
120 | + } | ||
121 | + | ||
122 | + cuda_common::resizeAndNorm(pGpuBgr, src_width, src_height, (float*)m_data_buffer[m_input_node_index], dst_width, dst_height); | ||
123 | + | ||
124 | + //int buf_size = 3 * dst_width * dst_height; | ||
125 | + //float* pBuf = new float[buf_size]; | ||
126 | + //cudaMemcpy(pBuf, m_data_buffer[m_input_node_index], buf_size * sizeof(float), cudaMemcpyDeviceToHost); | ||
127 | + //cv::Mat image(dst_height, dst_width, CV_32FC3, pBuf); | ||
128 | + //cv::imshow("show", image); | ||
129 | + //cv::waitKey(1); | ||
130 | + //delete[] pBuf; | ||
131 | + //pBuf = nullptr; | ||
132 | + | ||
133 | + | ||
134 | + | ||
135 | + | ||
136 | + | ||
137 | + // 模型推理 | ||
138 | + context->enqueueV2(m_data_buffer, stream, nullptr); | ||
139 | + | ||
140 | + size_t output_data_length = m_output_node_dim.d[1] * m_output_node_dim.d[2]; | ||
141 | + float* result_array = new float[output_data_length]; | ||
142 | + cudaMemcpyAsync(result_array, m_data_buffer[m_output_node_index], output_data_length * sizeof(float), cudaMemcpyDeviceToHost, stream); | ||
143 | + | ||
144 | + cudaDeviceSynchronize(); | ||
145 | + | ||
146 | + | ||
147 | + ResultYolov5 result; | ||
148 | + result.factor = max_side_length / (float)m_input_node_dim.d[2]; | ||
149 | + result.read_class_names("F:/dog_trainer_sys/train2/classes.txt"); | ||
150 | + | ||
151 | + std::vector<DogPoseResult> vec_result = result.yolov5_result(result_array, 0.6); | ||
152 | + if (vec_result.size() > 0) { | ||
153 | + DogPoseResult poseResult = vec_result[0]; | ||
154 | + std::cout << poseResult.x << std::endl; | ||
155 | + std::cout << poseResult.y << std::endl; | ||
156 | + std::cout << poseResult.width << std::endl; | ||
157 | + std::cout << poseResult.height << std::endl; | ||
158 | + std::cout << poseResult.confidence << std::endl; | ||
159 | + std::cout << poseResult.classId << std::endl; | ||
160 | + std::cout << poseResult.className << std::endl; | ||
161 | + | ||
162 | + | ||
163 | + //cv::Rect position_boxe; | ||
164 | + //position_boxe.x = poseResult.x; | ||
165 | + //position_boxe.y = poseResult.y; | ||
166 | + //position_boxe.width = poseResult.width; | ||
167 | + //position_boxe.height = poseResult.height; | ||
168 | + //cv::rectangle(image, position_boxe, cv::Scalar(0, 0, 255), 2, 8); | ||
169 | + //cv::rectangle(image, cv::Point(position_boxe.x, position_boxe.y - 20), cv::Point(position_boxe.x, position_boxe.y), cv::Scalar(0, 255, 255), -1); | ||
170 | + //cv::putText(image, poseResult.className, cv::Point(position_boxe.x, position_boxe.y - 10), cv::FONT_HERSHEY_SIMPLEX, .5, cv::Scalar(0, 0, 0)); | ||
171 | + | ||
172 | + //cv::imwrite("result.jpg", image); | ||
173 | + //cv::imshow("show", image); | ||
174 | + //cv::waitKey(1); | ||
175 | + } | ||
176 | + | ||
177 | + //delete pBuf; | ||
178 | + | ||
179 | + return true; | ||
180 | +} | ||
0 | \ No newline at end of file | 181 | \ No newline at end of file |
FFNvDecoder/DogPoseDetector.h
0 → 100644
1 | +#pragma once | ||
2 | + | ||
3 | +#include <fstream> | ||
4 | +#include <iostream> | ||
5 | +#include <sstream> | ||
6 | +#include <vector> | ||
7 | + | ||
8 | +#include "NvInfer.h" | ||
9 | +#include "NvOnnxParser.h" | ||
10 | + | ||
11 | +#include "AlgorithmResult.h" | ||
12 | + | ||
13 | +class DogPoseDetector { | ||
14 | +public: | ||
15 | + bool init(); | ||
16 | + | ||
17 | + bool detect(unsigned char *pGpuBgb, int src_width, int src_height); | ||
18 | + | ||
19 | +private: | ||
20 | + nvinfer1::IExecutionContext* context; | ||
21 | + | ||
22 | + void** m_data_buffer; | ||
23 | + int m_input_node_index; | ||
24 | + nvinfer1::Dims m_input_node_dim; | ||
25 | + int m_output_node_index; | ||
26 | + nvinfer1::Dims m_output_node_dim; | ||
27 | + | ||
28 | + unsigned char* pSquareData{ nullptr }; | ||
29 | +}; | ||
0 | \ No newline at end of file | 30 | \ No newline at end of file |
FFNvDecoder/FFNvDecoder.cpp
@@ -79,6 +79,11 @@ bool FFNvDecoder::init(const char* uri, const char* gpuid, bool force_tcp) | @@ -79,6 +79,11 @@ bool FFNvDecoder::init(const char* uri, const char* gpuid, bool force_tcp) | ||
79 | av_dict_set( &options, "rtsp_transport", force_tcp ? "tcp" : "udp", 0 ); | 79 | av_dict_set( &options, "rtsp_transport", force_tcp ? "tcp" : "udp", 0 ); |
80 | // av_dict_set( &options, "listen_timeout", "30", 0 ); // 单位为s | 80 | // av_dict_set( &options, "listen_timeout", "30", 0 ); // 单位为s |
81 | av_dict_set( &options, "stimeout", "30000000", 0 ); // 单位为 百万分之一秒 | 81 | av_dict_set( &options, "stimeout", "30000000", 0 ); // 单位为 百万分之一秒 |
82 | + av_dict_set(&options, " max_delay", " 30000000", 0); | ||
83 | + // av_dict_set( &options, "buffer_size", "655360", 0 ); | ||
84 | + // av_dict_set( &options, "pkt_size", "655360", 0 ); | ||
85 | + av_dict_set(&options, "fifo_size", "6553600", 0); | ||
86 | + //av_dict_set(&options, "fflags", "discardcorrupt", 0); | ||
82 | 87 | ||
83 | fmt_ctx = avformat_alloc_context(); | 88 | fmt_ctx = avformat_alloc_context(); |
84 | const char* input_file = uri; | 89 | const char* input_file = uri; |
FFNvDecoder/FFNvDecoder.vcxproj
@@ -48,13 +48,13 @@ | @@ -48,13 +48,13 @@ | ||
48 | <Optimization>Disabled</Optimization> | 48 | <Optimization>Disabled</Optimization> |
49 | <PreprocessorDefinitions>WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> | 49 | <PreprocessorDefinitions>WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> |
50 | <AdditionalOptions>/utf-8</AdditionalOptions> | 50 | <AdditionalOptions>/utf-8</AdditionalOptions> |
51 | - <AdditionalIncludeDirectories>..\3rdparty\ffmpeg-5.0.1-win64-dev\include;./;./common/inc;./common/UtilNPP;D:\win_dev\opencv\build\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> | 51 | + <AdditionalIncludeDirectories>..\3rdparty\ffmpeg-5.0.1-win64-dev\include;./;./common/inc;./common/UtilNPP;D:\win_dev\opencv\build\include;..\3rdparty\TensorRT-8.6.1.6\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> |
52 | </ClCompile> | 52 | </ClCompile> |
53 | <Link> | 53 | <Link> |
54 | <GenerateDebugInformation>true</GenerateDebugInformation> | 54 | <GenerateDebugInformation>true</GenerateDebugInformation> |
55 | <SubSystem>Console</SubSystem> | 55 | <SubSystem>Console</SubSystem> |
56 | - <AdditionalDependencies>avcodec.lib;avdevice.lib;avfilter.lib;avformat.lib;avutil.lib;postproc.lib;swresample.lib;swscale.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;nvjpeg.lib;opencv_world455d.lib;freeglut.lib;glew64.lib;%(AdditionalDependencies)</AdditionalDependencies> | ||
57 | - <AdditionalLibraryDirectories>..\3rdparty\ffmpeg-5.0.1-win64-dev\lib;D:\win_dev\opencv\build\x64\vc14\lib;../3rdparty/gl;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories> | 56 | + <AdditionalDependencies>avcodec.lib;avdevice.lib;avfilter.lib;avformat.lib;avutil.lib;postproc.lib;swresample.lib;swscale.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;nvjpeg.lib;opencv_world455d.lib;freeglut.lib;glew64.lib;nvinfer.lib;nvinfer_plugin.lib;nvonnxparser.lib;nvparsers.lib;cudnn.lib;cublas.lib;cudart.lib;%(AdditionalDependencies)</AdditionalDependencies> |
57 | + <AdditionalLibraryDirectories>..\3rdparty\ffmpeg-5.0.1-win64-dev\lib;D:\win_dev\opencv\build\x64\vc14\lib;../3rdparty/gl;..\3rdparty\TensorRT-8.6.1.6\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories> | ||
58 | </Link> | 58 | </Link> |
59 | <CudaCompile> | 59 | <CudaCompile> |
60 | <TargetMachinePlatform>64</TargetMachinePlatform> | 60 | <TargetMachinePlatform>64</TargetMachinePlatform> |
@@ -81,9 +81,13 @@ | @@ -81,9 +81,13 @@ | ||
81 | </ItemDefinitionGroup> | 81 | </ItemDefinitionGroup> |
82 | <ItemGroup> | 82 | <ItemGroup> |
83 | <CudaCompile Include="NV12ToRGB.cu" /> | 83 | <CudaCompile Include="NV12ToRGB.cu" /> |
84 | + <CudaCompile Include="ResizeAndNorm.cu" /> | ||
84 | </ItemGroup> | 85 | </ItemGroup> |
85 | <ItemGroup> | 86 | <ItemGroup> |
87 | + <ClCompile Include="AlgorithmResult.cpp" /> | ||
86 | <ClCompile Include="check_tool.cpp" /> | 88 | <ClCompile Include="check_tool.cpp" /> |
89 | + <ClCompile Include="DogPoseDetector.cpp" /> | ||
90 | + <ClCompile Include="dog_train_sys.cpp" /> | ||
87 | <ClCompile Include="FFCuContextManager.cpp" /> | 91 | <ClCompile Include="FFCuContextManager.cpp" /> |
88 | <ClCompile Include="FFNvDecoder.cpp" /> | 92 | <ClCompile Include="FFNvDecoder.cpp" /> |
89 | <ClCompile Include="FFNvDecoderManager.cpp" /> | 93 | <ClCompile Include="FFNvDecoderManager.cpp" /> |
@@ -92,8 +96,10 @@ | @@ -92,8 +96,10 @@ | ||
92 | <ClCompile Include="NvJpegEncoder.cpp" /> | 96 | <ClCompile Include="NvJpegEncoder.cpp" /> |
93 | </ItemGroup> | 97 | </ItemGroup> |
94 | <ItemGroup> | 98 | <ItemGroup> |
99 | + <ClInclude Include="AlgorithmResult.h" /> | ||
95 | <ClInclude Include="check_tool.h" /> | 100 | <ClInclude Include="check_tool.h" /> |
96 | <ClInclude Include="cuda_kernels.h" /> | 101 | <ClInclude Include="cuda_kernels.h" /> |
102 | + <ClInclude Include="DogPoseDetector.h" /> | ||
97 | <ClInclude Include="FFCuContextManager.h" /> | 103 | <ClInclude Include="FFCuContextManager.h" /> |
98 | <ClInclude Include="FFNvDecoder.h" /> | 104 | <ClInclude Include="FFNvDecoder.h" /> |
99 | <ClInclude Include="FFNvDecoderManager.h" /> | 105 | <ClInclude Include="FFNvDecoderManager.h" /> |
FFNvDecoder/FFNvDecoder.vcxproj.filters
@@ -4,6 +4,9 @@ | @@ -4,6 +4,9 @@ | ||
4 | <CudaCompile Include="NV12ToRGB.cu"> | 4 | <CudaCompile Include="NV12ToRGB.cu"> |
5 | <Filter>cu_src</Filter> | 5 | <Filter>cu_src</Filter> |
6 | </CudaCompile> | 6 | </CudaCompile> |
7 | + <CudaCompile Include="ResizeAndNorm.cu"> | ||
8 | + <Filter>cu_src</Filter> | ||
9 | + </CudaCompile> | ||
7 | </ItemGroup> | 10 | </ItemGroup> |
8 | <ItemGroup> | 11 | <ItemGroup> |
9 | <Filter Include="src"> | 12 | <Filter Include="src"> |
@@ -38,6 +41,15 @@ | @@ -38,6 +41,15 @@ | ||
38 | <ClCompile Include="NvJpegEncoder.cpp"> | 41 | <ClCompile Include="NvJpegEncoder.cpp"> |
39 | <Filter>cu_src</Filter> | 42 | <Filter>cu_src</Filter> |
40 | </ClCompile> | 43 | </ClCompile> |
44 | + <ClCompile Include="AlgorithmResult.cpp"> | ||
45 | + <Filter>src</Filter> | ||
46 | + </ClCompile> | ||
47 | + <ClCompile Include="dog_train_sys.cpp"> | ||
48 | + <Filter>src</Filter> | ||
49 | + </ClCompile> | ||
50 | + <ClCompile Include="DogPoseDetector.cpp"> | ||
51 | + <Filter>src</Filter> | ||
52 | + </ClCompile> | ||
41 | </ItemGroup> | 53 | </ItemGroup> |
42 | <ItemGroup> | 54 | <ItemGroup> |
43 | <ClInclude Include="check_tool.h"> | 55 | <ClInclude Include="check_tool.h"> |
@@ -61,5 +73,11 @@ | @@ -61,5 +73,11 @@ | ||
61 | <ClInclude Include="NvJpegEncoder.h"> | 73 | <ClInclude Include="NvJpegEncoder.h"> |
62 | <Filter>cu_src</Filter> | 74 | <Filter>cu_src</Filter> |
63 | </ClInclude> | 75 | </ClInclude> |
76 | + <ClInclude Include="AlgorithmResult.h"> | ||
77 | + <Filter>include</Filter> | ||
78 | + </ClInclude> | ||
79 | + <ClInclude Include="DogPoseDetector.h"> | ||
80 | + <Filter>include</Filter> | ||
81 | + </ClInclude> | ||
64 | </ItemGroup> | 82 | </ItemGroup> |
65 | </Project> | 83 | </Project> |
66 | \ No newline at end of file | 84 | \ No newline at end of file |
FFNvDecoder/FFNvDecoder.vcxproj.user
1 | <?xml version="1.0" encoding="utf-8"?> | 1 | <?xml version="1.0" encoding="utf-8"?> |
2 | <Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> | 2 | <Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
3 | <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> | 3 | <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> |
4 | - <LocalDebuggerCommandArguments>rtsp://122.97.218.170:8604/openUrl/V5nXRHa?params=eyJwcm90b2NhbCI6InJ0c3AiLCJjbGllbnRUeXBlIjoib3Blbl9hcGkiLCJleHByaWVUaW1lIjotMSwicHJvdG9jb2wiOiJydHNwIiwiZXhwaXJlVGltZSI6MzAwLCJlbmFibGVNR0MiOnRydWUsImV4cGFuZCI6InN0YW5kYXJkPXJ0c3Amc3RyZWFtZm9ybT1ydHAiLCJhIjoiMTBjZjM4N2JjY2Y5NDg3YzhjNWYzNjE2M2ViMWUyNTJ8MXwwfDEiLCJ0IjoxfQ== 0</LocalDebuggerCommandArguments> | 4 | + <LocalDebuggerCommandArguments> |
5 | + </LocalDebuggerCommandArguments> | ||
5 | <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor> | 6 | <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor> |
6 | </PropertyGroup> | 7 | </PropertyGroup> |
7 | </Project> | 8 | </Project> |
8 | \ No newline at end of file | 9 | \ No newline at end of file |
FFNvDecoder/NV12ToRGB.cu
@@ -4,9 +4,6 @@ | @@ -4,9 +4,6 @@ | ||
4 | #include <builtin_types.h> | 4 | #include <builtin_types.h> |
5 | #include "common/inc/helper_cuda_drvapi.h" | 5 | #include "common/inc/helper_cuda_drvapi.h" |
6 | 6 | ||
7 | -typedef unsigned char uint8; | ||
8 | -typedef unsigned int uint32; | ||
9 | -typedef int int32; | ||
10 | 7 | ||
11 | #define COLOR_COMPONENT_MASK 0x3FF | 8 | #define COLOR_COMPONENT_MASK 0x3FF |
12 | #define COLOR_COMPONENT_BIT_SIZE 10 | 9 | #define COLOR_COMPONENT_BIT_SIZE 10 |
FFNvDecoder/ResizeAndNorm.cu
0 → 100644
1 | +#include <cuda_runtime.h> | ||
2 | +#include <device_launch_parameters.h> | ||
3 | +#include <stdio.h> | ||
4 | + | ||
5 | +#include "cuda_kernels.h" | ||
6 | + | ||
7 | +namespace cuda_common | ||
8 | +{ | ||
9 | + | ||
10 | + __forceinline__ __device__ float3 get(uchar3* src, int x, int y, int w, int h) { | ||
11 | + if (x < 0 || x >= w || y < 0 || y >= h) return make_float3(0.5, 0.5, 0.5); | ||
12 | + uchar3 temp = src[y*w + x]; | ||
13 | + return make_float3(float(temp.x) / 255., float(temp.y) / 255., float(temp.z) / 255.); | ||
14 | + } | ||
15 | + | ||
16 | + __global__ void resizeNormKernel(uchar3* src, float *dst, int dstW, int dstH, int srcW, int srcH, | ||
17 | + float scaleX, float scaleY, float shiftX, float shiftY) { | ||
18 | + int idx = blockIdx.x * blockDim.x + threadIdx.x; | ||
19 | + const int x = idx % dstW; | ||
20 | + const int y = idx / dstW; | ||
21 | + if (x >= dstW || y >= dstH) | ||
22 | + return; | ||
23 | + float w = (x - shiftX + 0.5) * scaleX - 0.5; // Ëõ·ÅµÄ·´ÏòÓ³É侨Õó | ||
24 | + float h = (y - shiftY + 0.5) * scaleY - 0.5; // opencv | ||
25 | + int h_low = (int)h; | ||
26 | + int w_low = (int)w; | ||
27 | + int h_high = h_low + 1; | ||
28 | + int w_high = w_low + 1; | ||
29 | + float lh = h - h_low; | ||
30 | + float lw = w - w_low; | ||
31 | + float hh = 1 - lh, hw = 1 - lw; | ||
32 | + float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; | ||
33 | + float3 v1 = get(src, w_low, h_low, srcW, srcH); | ||
34 | + float3 v2 = get(src, w_high, h_low, srcW, srcH); | ||
35 | + float3 v3 = get(src, w_low, h_high, srcW, srcH); | ||
36 | + float3 v4 = get(src, w_high, h_high, srcW, srcH); | ||
37 | + int stride = dstW*dstH; | ||
38 | + dst[y*dstW + x] = w1 *v1.x + w2 * v2.x + w3 *v3.x + w4 * v4.x; | ||
39 | + dst[stride + y*dstW + x] = w1 *v1.y + w2 * v2.y + w3 *v3.y + w4 * v4.y; | ||
40 | + dst[stride * 2 + y*dstW + x] = w1 *v1.z + w2 * v2.z + w3 *v3.z + w4 * v4.z; | ||
41 | + } | ||
42 | + | ||
43 | + __global__ void copy2square(uchar3 *dataIn, uchar3 *dataOut, int imgWidth, int imgHeight, int squareWidth) | ||
44 | + { | ||
45 | + // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread | ||
46 | + int32 x = blockIdx.x * blockDim.x + threadIdx.x; | ||
47 | + int32 y = blockIdx.y * blockDim.y + threadIdx.y; | ||
48 | + | ||
49 | + if (x >= imgWidth) | ||
50 | + { | ||
51 | + return; | ||
52 | + } | ||
53 | + | ||
54 | + if (y >= imgHeight) | ||
55 | + { | ||
56 | + return; | ||
57 | + } | ||
58 | + | ||
59 | + dataOut[y*squareWidth + x] = dataIn[y*imgWidth + x]; | ||
60 | + } | ||
61 | + | ||
62 | + __global__ void kernel_bilinear(uint8 *src_img, int src_width, int src_height, float *dst_img, int dst_width, int dst_height) | ||
63 | + { | ||
64 | + const int x = blockIdx.x * blockDim.x + threadIdx.x; | ||
65 | + const int y = blockIdx.y * blockDim.y + threadIdx.y; | ||
66 | + | ||
67 | + if (x < dst_width && y < dst_height) | ||
68 | + { | ||
69 | + float fx = (x + 0.5)*src_width / (float)dst_width - 0.5; | ||
70 | + float fy = (y + 0.5)*src_height / (float)dst_height - 0.5; | ||
71 | + int ax = floor(fx); | ||
72 | + int ay = floor(fy); | ||
73 | + if (ax < 0) | ||
74 | + { | ||
75 | + ax = 0; | ||
76 | + } | ||
77 | + else if (ax > src_width - 2) | ||
78 | + { | ||
79 | + ax = src_width - 2; | ||
80 | + } | ||
81 | + | ||
82 | + if (ay < 0) { | ||
83 | + ay = 0; | ||
84 | + } | ||
85 | + else if (ay > src_height - 2) | ||
86 | + { | ||
87 | + ay = src_height - 2; | ||
88 | + } | ||
89 | + | ||
90 | + int A = ax + ay*src_width; | ||
91 | + int B = ax + ay*src_width + 1; | ||
92 | + int C = ax + ay*src_width + src_width; | ||
93 | + int D = ax + ay*src_width + src_width + 1; | ||
94 | + | ||
95 | + float w1, w2, w3, w4; | ||
96 | + w1 = fx - ax; | ||
97 | + w2 = 1 - w1; | ||
98 | + w3 = fy - ay; | ||
99 | + w4 = 1 - w3; | ||
100 | + | ||
101 | + float blue = src_img[A] * w2*w4 + src_img[B] * w1*w4 + src_img[C] * w2*w3 + src_img[D] * w1*w3; | ||
102 | + | ||
103 | + float green = src_img[src_width * src_height + A] * w2*w4 + src_img[src_width * src_height + B] * w1*w4 | ||
104 | + + src_img[src_width * src_height + C] * w2*w3 + src_img[src_width * src_height + D] * w1*w3; | ||
105 | + | ||
106 | + float red = src_img[src_width * src_height * 2 + A] * w2*w4 + src_img[src_width * src_height * 2 + B] * w1*w4 | ||
107 | + + src_img[src_width * src_height * 2 + C] * w2*w3 + src_img[src_width * src_height * 2 + D] * w1*w3; | ||
108 | + | ||
109 | + dst_img[y * dst_width + x] = red; | ||
110 | + dst_img[dst_width * dst_height + y * dst_width + x] = green; | ||
111 | + dst_img[dst_width * dst_height * 2 + y * dst_width + x] = blue; | ||
112 | + } | ||
113 | + } | ||
114 | + | ||
115 | + __global__ void resize_norm_kernel(uchar3 *src_img, int src_width, int src_height, float *dataOut, int dst_width, int dst_height) | ||
116 | + { | ||
117 | + // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread | ||
118 | + const int x = blockIdx.x * blockDim.x + threadIdx.x; | ||
119 | + const int y = blockIdx.y * blockDim.y + threadIdx.y; | ||
120 | + | ||
121 | + if (x >= dst_width || y >= dst_height) { | ||
122 | + return; | ||
123 | + } | ||
124 | + | ||
125 | + double ratio = 0; | ||
126 | + if (src_width >= src_height) { | ||
127 | + ratio = src_width / (float)dst_width; | ||
128 | + } | ||
129 | + else | ||
130 | + { | ||
131 | + ratio = src_height / (float)dst_height; | ||
132 | + } | ||
133 | + | ||
134 | + float fx = (x + 0.5)*ratio - 0.5; | ||
135 | + float fy = (y + 0.5)*ratio - 0.5; | ||
136 | + int ax = floor(fx); | ||
137 | + int ay = floor(fy); | ||
138 | + if (ax < 0) | ||
139 | + { | ||
140 | + ax = 0; | ||
141 | + } | ||
142 | + else if (ax >= (src_width - 2)) | ||
143 | + { | ||
144 | + return; | ||
145 | + } | ||
146 | + | ||
147 | + if (ay < 0) { | ||
148 | + ay = 0; | ||
149 | + } | ||
150 | + else if (ay >= (src_height - 2)) | ||
151 | + { | ||
152 | + return; | ||
153 | + } | ||
154 | + | ||
155 | + //int A = ay * src_width + ax; | ||
156 | + | ||
157 | + //dataOut[y * dst_width + x].x = src_img[A].x / 255.0; | ||
158 | + //dataOut[y * dst_width + x].y = src_img[A].x / 255.0; | ||
159 | + //dataOut[y * dst_width + x].z = src_img[A].x / 255.0; | ||
160 | + | ||
161 | + int A = ax + ay*src_width; | ||
162 | + int B = ax + ay*src_width + 1; | ||
163 | + int C = ax + ay*src_width + src_width; | ||
164 | + int D = ax + ay*src_width + src_width + 1; | ||
165 | + | ||
166 | + float w1, w2, w3, w4; | ||
167 | + w1 = fx - ax; | ||
168 | + w2 = 1 - w1; | ||
169 | + w3 = fy - ay; | ||
170 | + w4 = 1 - w3; | ||
171 | + | ||
172 | + float blue = src_img[A].x * w2*w4 + src_img[B].x * w1*w4 + src_img[C].x * w2*w3 + src_img[D].x * w1*w3; | ||
173 | + float green = src_img[A].y * w2*w4 + src_img[B].y * w1*w4 + src_img[C].y * w2*w3 + src_img[D].y * w1*w3; | ||
174 | + float red = src_img[A].z * w2*w4 + src_img[B].z * w1*w4 + src_img[C].z * w2*w3 + src_img[D].z * w1*w3; | ||
175 | + | ||
176 | + /* dataOut[y * dst_width + x].x = red / 255.0; | ||
177 | + dataOut[y * dst_width + x].y = green / 255.0; | ||
178 | + dataOut[y * dst_width + x].z = blue / 255.0;*/ | ||
179 | + | ||
180 | + // Clamp the results to RRRRR....GGGGGGG.......BBBBBBB.... | ||
181 | + dataOut[y * dst_width + x] = red / 255.0; | ||
182 | + dataOut[dst_width * dst_height + y * dst_width + x] = green / 255.0; | ||
183 | + dataOut[dst_width * dst_height * 2 + y * dst_width + x] = blue / 255.0; | ||
184 | + } | ||
185 | + | ||
186 | + cudaError_t resizeAndNorm(unsigned char* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height) | ||
187 | + { | ||
188 | + dim3 block(32, 16, 1); | ||
189 | + dim3 grid((dst_width + (block.x - 1)) / block.x, (dst_height + (block.y - 1)) / block.y, 1); | ||
190 | + | ||
191 | + resize_norm_kernel << < grid, block >> >((uchar3 *)d_srcRGB, src_width, src_height, d_dstRGB, dst_width, dst_height); | ||
192 | + | ||
193 | + cudaError_t cudaStatus = cudaGetLastError(); | ||
194 | + if (cudaStatus != cudaSuccess) { | ||
195 | + fprintf(stderr, "kernel_bilinear launch failed: %s\n", cudaGetErrorString(cudaStatus)); | ||
196 | + return cudaStatus; | ||
197 | + } | ||
198 | + | ||
199 | + cudaStatus = cudaDeviceSynchronize(); | ||
200 | + if (cudaStatus != cudaSuccess) { | ||
201 | + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_bilinear!\n", cudaStatus); | ||
202 | + return cudaStatus; | ||
203 | + } | ||
204 | + | ||
205 | + return cudaStatus; | ||
206 | + } | ||
207 | + | ||
208 | + //int resizeAndNorm(void * p, int in_w, int in_h, float *d, int w, int h, bool keepration, bool keepcenter, cudaStream_t stream) { | ||
209 | + // float scaleX = (w*1.0f / in_w); | ||
210 | + // float scaleY = (h*1.0f / in_h); | ||
211 | + // float shiftX = 0.f, shiftY = 0.f; | ||
212 | + // if (keepration)scaleX = scaleY = scaleX > scaleY ? scaleX : scaleY; | ||
213 | + // if (keepration && keepcenter) { shiftX = (in_w - w / scaleX) / 2.f; shiftY = (in_h - h / scaleY) / 2.f; } | ||
214 | + // const int n = in_w*in_h; | ||
215 | + // int blockSize = 1024; | ||
216 | + // const int gridSize = (n + blockSize - 1) / blockSize; | ||
217 | + // resizeNormKernel << <gridSize, blockSize, 0, stream >> > ((uchar3*)(p), d, in_w, in_h, w, h, scaleX, scaleY, shiftX, shiftY); | ||
218 | + // return 0; | ||
219 | + //} | ||
220 | + | ||
221 | + //int resizeAndNorm(void * p, int in_w, int in_h, float *d, int w, int h, bool keepration, bool keepcenter) { | ||
222 | + // float scaleX = (w*1.0f / in_w); | ||
223 | + // float scaleY = (h*1.0f / in_h); | ||
224 | + // float shiftX = 0.f, shiftY = 0.f; | ||
225 | + // if (keepration)scaleX = scaleY = scaleX > scaleY ? scaleX : scaleY; | ||
226 | + // if (keepration && keepcenter) { shiftX = (in_w - w / scaleX) / 2.f; shiftY = (in_h - h / scaleY) / 2.f; } | ||
227 | + // const int n = in_w*in_h; | ||
228 | + // int blockSize = 1024; | ||
229 | + // const int gridSize = (n + blockSize - 1) / blockSize; | ||
230 | + // resizeNormKernel << <gridSize, blockSize, 0 >> > ((uchar3*)(p), d, in_w, in_h, w, h, scaleX, scaleY, shiftX, shiftY); | ||
231 | + // return 0; | ||
232 | + //} | ||
233 | + | ||
234 | + int copy2square(void * p, void *d, int w, int h, int squareWidth, cudaStream_t stream) { | ||
235 | + dim3 block(32, 16, 1); | ||
236 | + dim3 grid((w + (block.x - 1)) / (block.x), (h + (block.y - 1)) / block.y, 1); | ||
237 | + copy2square << <grid, block, 0, stream>> > ((uchar3 *)(p), (uchar3 *)d, w, h, squareWidth); | ||
238 | + return 0; | ||
239 | + } | ||
240 | + | ||
241 | +} | ||
0 | \ No newline at end of file | 242 | \ No newline at end of file |
FFNvDecoder/cuda_kernels.h
@@ -10,6 +10,10 @@ | @@ -10,6 +10,10 @@ | ||
10 | 10 | ||
11 | #include <cuda.h> | 11 | #include <cuda.h> |
12 | 12 | ||
13 | +typedef unsigned char uint8; | ||
14 | +typedef unsigned int uint32; | ||
15 | +typedef int int32; | ||
16 | + | ||
13 | typedef enum | 17 | typedef enum |
14 | { | 18 | { |
15 | ITU601 = 1, | 19 | ITU601 = 1, |
@@ -22,5 +26,12 @@ namespace cuda_common | @@ -22,5 +26,12 @@ namespace cuda_common | ||
22 | 26 | ||
23 | cudaError_t NV12ToRGBnot(CUdeviceptr d_srcNV12, size_t nSourcePitch, unsigned char* d_dstRGB, int width, int height); | 27 | cudaError_t NV12ToRGBnot(CUdeviceptr d_srcNV12, size_t nSourcePitch, unsigned char* d_dstRGB, int width, int height); |
24 | cudaError_t CUDAToBGR(CUdeviceptr dataY, CUdeviceptr dataUV, size_t pitchY, size_t pitchUV, unsigned char* d_dstRGB, int width, int height); | 28 | cudaError_t CUDAToBGR(CUdeviceptr dataY, CUdeviceptr dataUV, size_t pitchY, size_t pitchUV, unsigned char* d_dstRGB, int width, int height); |
29 | + | ||
30 | + //int resizeAndNorm(void * p, int in_w, int in_h, float *d, int w, int h, bool keepration, bool keepcenter, cudaStream_t stream); | ||
31 | + //int resizeAndNorm(void * p, int in_w, int in_h, float *d, int w, int h, bool keepration, bool keepcenter); | ||
32 | + cudaError_t resizeAndNorm(unsigned char* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height); | ||
33 | + | ||
34 | + int copy2square(void * p, void *d, int w, int h, int max_side_length, cudaStream_t stream); | ||
35 | + | ||
25 | } | 36 | } |
26 | 37 |
FFNvDecoder/dog_train_sys.cpp
0 → 100644
1 | +// | ||
2 | +//#include <fstream> | ||
3 | +//#include <iostream> | ||
4 | +//#include <sstream> | ||
5 | +//#include <vector> | ||
6 | +// | ||
7 | +// | ||
8 | +//#include "NvInfer.h" | ||
9 | +//#include "NvOnnxParser.h" | ||
10 | +//#include <opencv2/opencv.hpp> | ||
11 | +// | ||
12 | +//#include "AlgorithmResult.h" | ||
13 | +//#include "cuda_kernels.h" | ||
14 | +// | ||
15 | +// | ||
16 | +//// @brief 用于创建IBuilder、IRuntime或IRefitter实例的记录器用于通过该接口创建的所有对象。 | ||
17 | +//// 在释放所有创建的对象之前,记录器应一直有效。 | ||
18 | +//// 主要是实例化ILogger类下的log()方法。 | ||
19 | +//class Logger : public nvinfer1::ILogger | ||
20 | +//{ | ||
21 | +// void log(Severity severity, const char* message) noexcept | ||
22 | +// { | ||
23 | +// // suppress info-level messages | ||
24 | +// if (severity != Severity::kINFO) | ||
25 | +// std::cout << message << std::endl; | ||
26 | +// } | ||
27 | +//} gLogger; | ||
28 | +// | ||
29 | +// | ||
30 | +// | ||
31 | +//void onnx_to_engine(std::string onnx_file_path, std::string engine_file_path, int type) { | ||
32 | +// | ||
33 | +// // 构建器,获取cuda内核目录以获取最快的实现 | ||
34 | +// // 用于创建config、network、engine的其他对象的核心类 | ||
35 | +// nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger); | ||
36 | +// const auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); | ||
37 | +// // 解析onnx网络文件 | ||
38 | +// // tensorRT模型类 | ||
39 | +// nvinfer1::INetworkDefinition* network = builder->createNetworkV2(explicitBatch); | ||
40 | +// // onnx文件解析类 | ||
41 | +// // 将onnx文件解析,并填充rensorRT网络结构 | ||
42 | +// nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, gLogger); | ||
43 | +// // 解析onnx文件 | ||
44 | +// parser->parseFromFile(onnx_file_path.c_str(), 2); | ||
45 | +// for (int i = 0; i < parser->getNbErrors(); ++i) { | ||
46 | +// std::cout << "load error: " << parser->getError(i)->desc() << std::endl; | ||
47 | +// } | ||
48 | +// printf("tensorRT load mask onnx model successfully!!!...\n"); | ||
49 | +// | ||
50 | +// // 创建推理引擎 | ||
51 | +// // 创建生成器配置对象。 | ||
52 | +// nvinfer1::IBuilderConfig* config = builder->createBuilderConfig(); | ||
53 | +// // 设置最大工作空间大小。 | ||
54 | +// config->setMaxWorkspaceSize(16 * (1 << 20)); | ||
55 | +// // 设置模型输出精度 | ||
56 | +// if (type == 1) { | ||
57 | +// config->setFlag(nvinfer1::BuilderFlag::kFP16); | ||
58 | +// } | ||
59 | +// if (type == 2) { | ||
60 | +// config->setFlag(nvinfer1::BuilderFlag::kINT8); | ||
61 | +// } | ||
62 | +// // 创建推理引擎 | ||
63 | +// nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config); | ||
64 | +// // 将推理银枪保存到本地 | ||
65 | +// std::cout << "try to save engine file now~~~" << std::endl; | ||
66 | +// std::ofstream file_ptr(engine_file_path, std::ios::binary); | ||
67 | +// if (!file_ptr) { | ||
68 | +// std::cerr << "could not open plan output file" << std::endl; | ||
69 | +// return; | ||
70 | +// } | ||
71 | +// // 将模型转化为文件流数据 | ||
72 | +// nvinfer1::IHostMemory* model_stream = engine->serialize(); | ||
73 | +// // 将文件保存到本地 | ||
74 | +// file_ptr.write(reinterpret_cast<const char*>(model_stream->data()), model_stream->size()); | ||
75 | +// // 销毁创建的对象 | ||
76 | +// model_stream->destroy(); | ||
77 | +// engine->destroy(); | ||
78 | +// network->destroy(); | ||
79 | +// parser->destroy(); | ||
80 | +// std::cout << "convert onnx model to TensorRT engine model successfully!" << std::endl; | ||
81 | +//} | ||
82 | +// | ||
83 | +//int main() { | ||
84 | +// | ||
85 | +// | ||
86 | +// const char* model_path_onnx = "E:/Archime/dog_pose_detect/yolov5/runs/train/exp10/weights/best.onnx"; | ||
87 | +// const char* model_path_engine = "E:/Archime/dog_pose_detect/yolov5/runs/train/exp10/weights/best.engine"; | ||
88 | +// const char* image_path = "F:/dog_trainer_sys/test1/IMG_6837.JPG"; | ||
89 | +// std::string lable_path = "F:/dog_trainer_sys/train2/classes.txt"; | ||
90 | +// const char* input_node_name = "images"; | ||
91 | +// const char* output_node_name = "output"; | ||
92 | +// int num_ionode = 2; | ||
93 | +// | ||
94 | +// // 读取本地模型文件 | ||
95 | +// std::ifstream file_ptr(model_path_engine, std::ios::binary); | ||
96 | +// if (!file_ptr.good()) { | ||
97 | +// std::cerr << "文件无法打开,请确定文件是否可用!" << std::endl; | ||
98 | +// } | ||
99 | +// | ||
100 | +// size_t size = 0; | ||
101 | +// file_ptr.seekg(0, file_ptr.end); // 将读指针从文件末尾开始移动0个字节 | ||
102 | +// size = file_ptr.tellg(); // 返回读指针的位置,此时读指针的位置就是文件的字节数 | ||
103 | +// file_ptr.seekg(0, file_ptr.beg); // 将读指针从文件开头开始移动0个字节 | ||
104 | +// char* model_stream = new char[size]; | ||
105 | +// file_ptr.read(model_stream, size); | ||
106 | +// file_ptr.close(); | ||
107 | +// | ||
108 | +// // 日志记录接口 | ||
109 | +// //Logger logger; | ||
110 | +// // 反序列化引擎 | ||
111 | +// nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger); | ||
112 | +// // 推理引擎 | ||
113 | +// // 保存模型的模型结构、模型参数以及最优计算kernel配置; | ||
114 | +// // 不能跨平台和跨TensorRT版本移植 | ||
115 | +// nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(model_stream, size); | ||
116 | +// // 上下文 | ||
117 | +// // 储存中间值,实际进行推理的对象 | ||
118 | +// // 由engine创建,可创建多个对象,进行多推理任务 | ||
119 | +// nvinfer1::IExecutionContext* context = engine->createExecutionContext(); | ||
120 | +// | ||
121 | +// | ||
122 | +// delete[] model_stream; | ||
123 | +// | ||
124 | +// // 创建GPU显存缓冲区 | ||
125 | +// void** data_buffer = new void*[num_ionode]; | ||
126 | +// // 创建GPU显存输入缓冲区 | ||
127 | +// int input_node_index = engine->getBindingIndex(input_node_name); | ||
128 | +// nvinfer1::Dims input_node_dim = engine->getBindingDimensions(input_node_index); | ||
129 | +// size_t input_data_length = input_node_dim.d[1] * input_node_dim.d[2] * input_node_dim.d[3]; | ||
130 | +// cudaMalloc(&(data_buffer[input_node_index]), input_data_length * sizeof(float)); | ||
131 | +// // 创建GPU显存输出缓冲区 | ||
132 | +// int output_node_index = engine->getBindingIndex(output_node_name); | ||
133 | +// nvinfer1::Dims output_node_dim = engine->getBindingDimensions(output_node_index); | ||
134 | +// size_t output_data_length = output_node_dim.d[1] * output_node_dim.d[2]; | ||
135 | +// cudaMalloc(&(data_buffer[output_node_index]), output_data_length * sizeof(float)); | ||
136 | +// | ||
137 | +// | ||
138 | +// // 图象预处理 - 格式化操作 | ||
139 | +// cv::Mat image = cv::imread(image_path); | ||
140 | +// int max_side_length = std::max(image.cols, image.rows); | ||
141 | +// cv::Mat max_image = cv::Mat::zeros(cv::Size(max_side_length, max_side_length), CV_8UC3); | ||
142 | +// cv::Rect roi(0, 0, image.cols, image.rows); | ||
143 | +// image.copyTo(max_image(roi)); | ||
144 | +// // 将图像归一化,并放缩到指定大小 | ||
145 | +// cv::Size input_node_shape(input_node_dim.d[2], input_node_dim.d[3]); | ||
146 | +// cv::Mat BN_image = cv::dnn::blobFromImage(max_image, 1 / 255.0, input_node_shape, cv::Scalar(0, 0, 0), true, false); | ||
147 | +// | ||
148 | +// std::vector<float> input_data(input_data_length); | ||
149 | +// memcpy(input_data.data(), BN_image.ptr<float>(), input_data_length * sizeof(float)); | ||
150 | +// | ||
151 | +// //void* pGPUData; | ||
152 | +// //cudaMalloc(&pGPUData, 3 * image.cols * image.rows * sizeof(unsigned char)); | ||
153 | +// //cudaMemcpy(pGPUData, (void*)(image.data), 3 * image.cols * image.rows * sizeof(unsigned char), cudaMemcpyHostToDevice); | ||
154 | +// //cuda_common::resizeAndNorm((unsigned char*)pGPUData, max_side_length, max_side_length, (float*)data_buffer[input_node_index], input_node_dim.d[2], input_node_dim.d[3]); | ||
155 | +// | ||
156 | +// // 创建输入cuda流 | ||
157 | +// cudaStream_t stream; | ||
158 | +// cudaStreamCreate(&stream); | ||
159 | +// | ||
160 | +// // 输入数据由内存到GPU显存 | ||
161 | +// cudaMemcpyAsync(data_buffer[input_node_index], input_data.data(), input_data_length * sizeof(float), cudaMemcpyHostToDevice, stream); | ||
162 | +// | ||
163 | +// // 模型推理 | ||
164 | +// context->enqueueV2(data_buffer, stream, nullptr); | ||
165 | +// | ||
166 | +// float* result_array = new float[output_data_length]; | ||
167 | +// cudaMemcpyAsync(result_array, data_buffer[output_node_index], output_data_length * sizeof(float), cudaMemcpyDeviceToHost, stream); | ||
168 | +// | ||
169 | +// ResultYolov5 result; | ||
170 | +// result.factor = max_side_length / (float)input_node_dim.d[2]; | ||
171 | +// result.read_class_names(lable_path); | ||
172 | +// | ||
173 | +// //cv::Mat result_image = result.yolov5_result(image, result_array); | ||
174 | +// | ||
175 | +// //// 查看输出结果 | ||
176 | +// //cv::imshow("C++ + OpenVINO + Yolov5 推理结果", result_image); | ||
177 | +// //cv::waitKey(); | ||
178 | +// | ||
179 | +// std::vector<DogPoseResult> vec_result = result.yolov5_result(result_array, 0.6); | ||
180 | +// if (vec_result.size() > 0) { | ||
181 | +// DogPoseResult poseResult = vec_result[0]; | ||
182 | +// std::cout << poseResult.x << std::endl; | ||
183 | +// std::cout << poseResult.y << std::endl; | ||
184 | +// std::cout << poseResult.width << std::endl; | ||
185 | +// std::cout << poseResult.height << std::endl; | ||
186 | +// std::cout << poseResult.confidence << std::endl; | ||
187 | +// std::cout << poseResult.classId << std::endl; | ||
188 | +// std::cout << poseResult.className << std::endl; | ||
189 | +// | ||
190 | +// | ||
191 | +// cv::Rect position_boxe; | ||
192 | +// position_boxe.x = poseResult.x; | ||
193 | +// position_boxe.y = poseResult.y; | ||
194 | +// position_boxe.width = poseResult.width; | ||
195 | +// position_boxe.height = poseResult.height; | ||
196 | +// cv::rectangle(image, position_boxe, cv::Scalar(0, 0, 255), 2, 8); | ||
197 | +// cv::rectangle(image, cv::Point(position_boxe.x, position_boxe.y - 20), cv::Point(position_boxe.x, position_boxe.y), cv::Scalar(0, 255, 255), -1); | ||
198 | +// cv::putText(image, poseResult.className, cv::Point(position_boxe.x, position_boxe.y - 10), cv::FONT_HERSHEY_SIMPLEX, .5, cv::Scalar(0, 0, 0)); | ||
199 | +// | ||
200 | +// cv::imwrite("result.jpg", image); | ||
201 | +// cv::imshow("show", image); | ||
202 | +// cv::waitKey(); | ||
203 | +// } | ||
204 | +//} | ||
0 | \ No newline at end of file | 205 | \ No newline at end of file |
FFNvDecoder/main.cpp
@@ -12,6 +12,9 @@ | @@ -12,6 +12,9 @@ | ||
12 | 12 | ||
13 | #include "opencv2\opencv.hpp" | 13 | #include "opencv2\opencv.hpp" |
14 | 14 | ||
15 | +#include "DogPoseDetector.h" | ||
16 | + | ||
17 | + | ||
15 | using namespace std; | 18 | using namespace std; |
16 | using namespace cv; | 19 | using namespace cv; |
17 | 20 | ||
@@ -20,6 +23,8 @@ unsigned char *pHwRgb[2] = {nullptr, nullptr}; | @@ -20,6 +23,8 @@ unsigned char *pHwRgb[2] = {nullptr, nullptr}; | ||
20 | int sum1 = 0; | 23 | int sum1 = 0; |
21 | int sum2 = 0; | 24 | int sum2 = 0; |
22 | 25 | ||
26 | +DogPoseDetector poseDetector; | ||
27 | + | ||
23 | 28 | ||
24 | mutex m_mutex; | 29 | mutex m_mutex; |
25 | void saveFrame(AVFrame * gpuFrame, string file_name) { | 30 | void saveFrame(AVFrame * gpuFrame, string file_name) { |
@@ -46,47 +51,45 @@ void saveFrame(AVFrame * gpuFrame, string file_name) { | @@ -46,47 +51,45 @@ void saveFrame(AVFrame * gpuFrame, string file_name) { | ||
46 | } | 51 | } |
47 | 52 | ||
48 | mutex m_mutex_show; | 53 | mutex m_mutex_show; |
54 | +unsigned char *pShowData = nullptr; | ||
55 | + | ||
49 | void showFrame(AVFrame * gpuFrame) { | 56 | void showFrame(AVFrame * gpuFrame) { |
50 | std::lock_guard<std::mutex> l(m_mutex_show); | 57 | std::lock_guard<std::mutex> l(m_mutex_show); |
51 | 58 | ||
52 | - unsigned char *pHwData = nullptr; | ||
53 | - cudaError_t cudaStatus = cudaMalloc((void **)&pHwData, 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char)); | ||
54 | - | 59 | + cudaError_t cudaStatus = cudaSuccess; |
60 | + if (pShowData == nullptr) | ||
61 | + { | ||
62 | + cudaError_t cudaStatus = cudaMalloc((void **)&pShowData, 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char)); | ||
63 | + } | ||
64 | + | ||
55 | cuda_common::setColorSpace(ITU709, 0); | 65 | cuda_common::setColorSpace(ITU709, 0); |
56 | - cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0], (CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwData, gpuFrame->width, gpuFrame->height); | ||
57 | - cudaDeviceSynchronize(); | 66 | + cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0], (CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pShowData, gpuFrame->width, gpuFrame->height); |
58 | if (cudaStatus != cudaSuccess) { | 67 | if (cudaStatus != cudaSuccess) { |
59 | cout << "CUDAToBGR failed !!!" << endl; | 68 | cout << "CUDAToBGR failed !!!" << endl; |
60 | return; | 69 | return; |
61 | } | 70 | } |
62 | 71 | ||
63 | - | ||
64 | - | ||
65 | - unsigned char * pHwRgb = pHwData; | ||
66 | int channel = 3; | 72 | int channel = 3; |
67 | int width = gpuFrame->width; | 73 | int width = gpuFrame->width; |
68 | int height = gpuFrame->height; | 74 | int height = gpuFrame->height; |
69 | 75 | ||
70 | - if (pHwRgb != nullptr && channel > 0 && width > 0 && height > 0) { | ||
71 | - int nSize = channel * height * width; | ||
72 | - unsigned char* cpu_data = new unsigned char[nSize]; | 76 | + if (pShowData != nullptr && channel > 0 && width > 0 && height > 0) { |
77 | + poseDetector.detect(pShowData, width, height); | ||
73 | 78 | ||
74 | - cudaMemcpy(cpu_data, pHwRgb, nSize * sizeof(unsigned char), cudaMemcpyDeviceToHost); | ||
75 | - cudaDeviceSynchronize(); | 79 | + //int nSize = channel * height * width; |
80 | + //unsigned char* cpu_data = new unsigned char[nSize]; | ||
76 | 81 | ||
77 | - cv::Mat img_(height, width, CV_8UC3, cpu_data); | ||
78 | - bool bWrite = cv::imwrite("dec0.jpg", img_); | 82 | + //cudaMemcpy(cpu_data, pShowData, nSize * sizeof(unsigned char), cudaMemcpyDeviceToHost); |
83 | + //cudaDeviceSynchronize(); | ||
79 | 84 | ||
80 | - imshow("show", img_); | ||
81 | - waitKey(0); | 85 | + //cv::Mat img_(height, width, CV_8UC3, cpu_data); |
86 | + //imshow("show", img_); | ||
87 | + //waitKey(1); | ||
82 | 88 | ||
83 | - delete[] cpu_data; | ||
84 | - cpu_data = nullptr; | 89 | + //delete[] cpu_data; |
90 | + //cpu_data = nullptr; | ||
85 | 91 | ||
86 | - } | ||
87 | - | ||
88 | - cudaFree(pHwData); | ||
89 | - pHwData = nullptr; | 92 | + } |
90 | } | 93 | } |
91 | 94 | ||
92 | /** | 95 | /** |
@@ -108,7 +111,7 @@ void postDecoded(const void * userPtr, AVFrame * gpuFrame){ | @@ -108,7 +111,7 @@ void postDecoded(const void * userPtr, AVFrame * gpuFrame){ | ||
108 | // cout << "gpuid = " << atoi(decoder->m_cfg.gpuid.c_str()) << endl; | 111 | // cout << "gpuid = " << atoi(decoder->m_cfg.gpuid.c_str()) << endl; |
109 | cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str())); | 112 | cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str())); |
110 | 113 | ||
111 | - saveFrame(gpuFrame, decoder->getName()); | 114 | + //saveFrame(gpuFrame, decoder->getName()); |
112 | showFrame(gpuFrame); | 115 | showFrame(gpuFrame); |
113 | } | 116 | } |
114 | } | 117 | } |
@@ -172,7 +175,8 @@ void decode_finished_cbk(const void* userPtr){ | @@ -172,7 +175,8 @@ void decode_finished_cbk(const void* userPtr){ | ||
172 | // string test_uri = "/home/cmhu/data/output_1920x1080.mp4"; | 175 | // string test_uri = "/home/cmhu/data/output_1920x1080.mp4"; |
173 | // string test_uri = "rtsp://176.10.0.2:8554/stream"; | 176 | // string test_uri = "rtsp://176.10.0.2:8554/stream"; |
174 | // string test_uri = "/mnt/f/fiss/test_data/h265.mp4"; | 177 | // string test_uri = "/mnt/f/fiss/test_data/h265.mp4"; |
175 | -string test_uri = "rtsp://176.10.0.4:8554/stream"; | 178 | +//string test_uri = "rtsp://176.10.0.4:8554/stream"; |
179 | +string test_uri = "f://data/caishenkezhan.mp4"; | ||
176 | 180 | ||
177 | void createDecode(int index){ | 181 | void createDecode(int index){ |
178 | FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); | 182 | FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); |
@@ -210,16 +214,20 @@ void logFF(void *, int level, const char *fmt, va_list ap) | @@ -210,16 +214,20 @@ void logFF(void *, int level, const char *fmt, va_list ap) | ||
210 | int main(int argc, char* argv[]) { | 214 | int main(int argc, char* argv[]) { |
211 | 215 | ||
212 | printf("start \n"); | 216 | printf("start \n"); |
213 | - if (argc != 3) { | ||
214 | - fprintf(stderr, "./xxx uri gpu_id\n"); | ||
215 | - return -1; | ||
216 | - } | 217 | + //if (argc != 3) { |
218 | + // fprintf(stderr, "./xxx uri gpu_id\n"); | ||
219 | + // return -1; | ||
220 | + //} | ||
217 | 221 | ||
218 | - char* uri = argv[1]; | ||
219 | - char* gpuid = argv[2]; | 222 | + char* uri = "F:/dog_trainer_sys/test1/5min.mp4";//argv[1]; |
223 | + char* gpuid = "0";//argv[2]; | ||
220 | 224 | ||
221 | cout << av_version_info() << endl; | 225 | cout << av_version_info() << endl; |
222 | 226 | ||
227 | + poseDetector.init(); | ||
228 | + | ||
229 | + //namedWindow("show", WINDOW_NORMAL); | ||
230 | + | ||
223 | //evalQuality(uri, gpuid); | 231 | //evalQuality(uri, gpuid); |
224 | 232 | ||
225 | 233 | ||
@@ -253,19 +261,19 @@ int main(int argc, char* argv[]) { | @@ -253,19 +261,19 @@ int main(int argc, char* argv[]) { | ||
253 | pDecManager->getResolution(config.name, w,h); | 261 | pDecManager->getResolution(config.name, w,h); |
254 | printf( "%s : %dx%d\n", config.name.c_str() , w,h ); | 262 | printf( "%s : %dx%d\n", config.name.c_str() , w,h ); |
255 | 263 | ||
256 | - thread* m_thread = new thread([](void* arg) | ||
257 | - { | ||
258 | - while (true) | ||
259 | - { | ||
260 | - std::this_thread::sleep_for(std::chrono::milliseconds(5000)); | ||
261 | - FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); | ||
262 | - int count = pDecManager->count(); | ||
263 | - cout << "当前运行路数: " << pDecManager->count() << endl; | ||
264 | - } | ||
265 | - | ||
266 | - return (void*)0; | ||
267 | - } | ||
268 | - , nullptr); | 264 | + //thread* m_thread = new thread([](void* arg) |
265 | + // { | ||
266 | + // while (true) | ||
267 | + // { | ||
268 | + // std::this_thread::sleep_for(std::chrono::milliseconds(5000)); | ||
269 | + // FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); | ||
270 | + // int count = pDecManager->count(); | ||
271 | + // cout << "当前运行路数: " << pDecManager->count() << endl; | ||
272 | + // } | ||
273 | + | ||
274 | + // return (void*)0; | ||
275 | + // } | ||
276 | + //, nullptr); | ||
269 | 277 | ||
270 | 278 | ||
271 | while (getchar() != 'q'); | 279 | while (getchar() != 'q'); |