DogPoseDetector.cpp
5.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#include "DogPoseDetector.h"
#include "cuda_kernels.h"
#include <algorithm>
#include "opencv2/opencv.hpp"
// @brief 用于创建IBuilder、IRuntime或IRefitter实例的记录器用于通过该接口创建的所有对象。
// 在释放所有创建的对象之前,记录器应一直有效。
// 主要是实例化ILogger类下的log()方法。
class Logger : public nvinfer1::ILogger
{
void log(Severity severity, const char* message) noexcept
{
// suppress info-level messages
if (severity != Severity::kINFO)
std::cout << message << std::endl;
}
} gLogger;
bool DogPoseDetector::init() {
const char* model_path_onnx = "../weights/best.onnx";
const char* model_path_engine = "../weights/best.engine";
const char* input_node_name = "images";
const char* output_node_name = "output";
// 读取本地模型文件
std::ifstream file_ptr(model_path_engine, std::ios::binary);
if (!file_ptr.good()) {
std::cerr << "文件无法打开,请确定文件是否可用!" << std::endl;
return false;
}
size_t size = 0;
file_ptr.seekg(0, file_ptr.end); // 将读指针从文件末尾开始移动0个字节
size = file_ptr.tellg(); // 返回读指针的位置,此时读指针的位置就是文件的字节数
file_ptr.seekg(0, file_ptr.beg); // 将读指针从文件开头开始移动0个字节
char* model_stream = new char[size];
file_ptr.read(model_stream, size);
file_ptr.close();
// 日志记录接口
//Logger logger;
// 反序列化引擎
nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger);
// 推理引擎
// 保存模型的模型结构、模型参数以及最优计算kernel配置;
// 不能跨平台和跨TensorRT版本移植
nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(model_stream, size);
// 上下文
// 储存中间值,实际进行推理的对象
// 由engine创建,可创建多个对象,进行多推理任务
context = engine->createExecutionContext();
// 创建GPU显存缓冲区
m_data_buffer = new void*[2];
// 创建GPU显存输入缓冲区
m_input_node_index = engine->getBindingIndex(input_node_name);
m_input_node_dim = engine->getBindingDimensions(m_input_node_index);
size_t input_data_length = m_input_node_dim.d[1] * m_input_node_dim.d[2] * m_input_node_dim.d[3];
cudaMalloc(&(m_data_buffer[m_input_node_index]), input_data_length * sizeof(float));
// 创建GPU显存输出缓冲区
m_output_node_index = engine->getBindingIndex(output_node_name);
m_output_node_dim = engine->getBindingDimensions(m_output_node_index);
size_t output_data_length = m_output_node_dim.d[1] * m_output_node_dim.d[2];
cudaMalloc(&(m_data_buffer[m_output_node_index]), output_data_length * sizeof(float));
return true;
}
static void saveCUDAImg(unsigned char *pGpuBgb, int src_width, int src_height, std::string filename) {
int rgb_size = 3 * src_width * src_height;
unsigned char *cpu_data = new unsigned char[rgb_size];
cudaError_t cudaStatus = cudaMemcpy(cpu_data, pGpuBgb, rgb_size * sizeof(unsigned char), cudaMemcpyDeviceToHost);
cv::Mat img(src_height, src_width, CV_8UC3, cpu_data);
cv::imwrite(filename.c_str(), img);
delete[] cpu_data;
cpu_data = nullptr;
}
static void showFloatData(float* pGpuData, int dst_width, int dst_height) {
int buf_size = 3 * dst_width * dst_height;
float* pBuf = new float[buf_size];
cudaMemcpy(pBuf, pGpuData, buf_size * sizeof(float), cudaMemcpyDeviceToHost);
cv::Mat image(dst_height, dst_width, CV_32FC3, pBuf);
cv::imshow("show", image);
cv::waitKey(1);
delete[] pBuf;
pBuf = nullptr;
}
std::vector<DogPoseResult> DogPoseDetector::detect(unsigned char *pGpuBgr, int src_width, int src_height) {
int dst_width = m_input_node_dim.d[2];
int dst_height = m_input_node_dim.d[3];
int max_side_length = std::max(src_width, src_height);
//saveCUDAImg(pGpuBgr, src_width, src_height, "src.jpg");
cudaStream_t stream;
cudaStreamCreate(&stream);
{
// 显存-->内存-->显存
//int rgb_size = 3 * src_width * src_height;
//uint8 *cpu_data = new uint8[rgb_size];
//cudaError_t cudaStatus = cudaMemcpy(cpu_data, pGpuBgr, rgb_size * sizeof(uint8), cudaMemcpyDeviceToHost);
//cv::Mat image(src_height, src_width, CV_8UC3, cpu_data);
//cv::Mat max_image = cv::Mat::zeros(cv::Size(max_side_length, max_side_length), CV_8UC3);
//cv::Rect roi(0, 0, image.cols, image.rows);
//image.copyTo(max_image(roi));
//// 将图像归一化,并放缩到指定大小
//cv::Size input_node_shape(m_input_node_dim.d[2], m_input_node_dim.d[3]);
//cv::Mat BN_image = cv::dnn::blobFromImage(max_image, 1 / 255.0, input_node_shape, cv::Scalar(0, 0, 0), true, false);
//size_t input_data_length = m_input_node_dim.d[1] * m_input_node_dim.d[2] * m_input_node_dim.d[3];
//std::vector<float> input_data(input_data_length);
//memcpy(input_data.data(), BN_image.ptr<float>(), input_data_length * sizeof(float));
//cudaMemcpyAsync(m_data_buffer[m_input_node_index], input_data.data(), input_data_length * sizeof(float), cudaMemcpyHostToDevice, stream);
}
cuda_common::resizeAndNorm(pGpuBgr, src_width, src_height, (float*)m_data_buffer[m_input_node_index], dst_width, dst_height);
// 模型推理
context->enqueueV2(m_data_buffer, stream, nullptr);
size_t output_data_length = m_output_node_dim.d[1] * m_output_node_dim.d[2];
float* result_array = new float[output_data_length];
cudaMemcpyAsync(result_array, m_data_buffer[m_output_node_index], output_data_length * sizeof(float), cudaMemcpyDeviceToHost, stream);
cudaDeviceSynchronize();
ResultYolov5 result;
result.factor = max_side_length / (float)m_input_node_dim.d[2];
result.read_class_names("F:/dog_trainer_sys/train2/classes.txt");
return result.yolov5_result(result_array, 0.6);
}