实现狗狗姿态检测

Hu Chunming
1 parent 7128e494
Showing 14 changed files with 880 additions and 51 deletions
.vs/FFNvDecoder/v14/.suo
FFNvDecoder/AlgorithmResult.cpp
FFNvDecoder/AlgorithmResult.h
FFNvDecoder/DogPoseDetector.cpp
FFNvDecoder/DogPoseDetector.h
FFNvDecoder/FFNvDecoder.cpp
FFNvDecoder/FFNvDecoder.vcxproj
FFNvDecoder/FFNvDecoder.vcxproj.filters
FFNvDecoder/FFNvDecoder.vcxproj.user
FFNvDecoder/NV12ToRGB.cu
FFNvDecoder/ResizeAndNorm.cu
FFNvDecoder/cuda_kernels.h
FFNvDecoder/dog_train_sys.cpp
FFNvDecoder/main.cpp
+#include "AlgorithmResult.h"
+
+#include "opencv2/opencv.hpp"
+
+void ResultYolov5::read_class_names(std::string path_name)
+{
+	std::ifstream infile;
+	infile.open(path_name.data());   //将文件流对象与文件连接起来 
+	assert(infile.is_open());   //若失败,则输出错误消息,并终止程序运行 
+
+	std::string str;
+	while (getline(infile, str)) {
+		class_names.push_back(str);
+		str.clear();
+
+	}
+	infile.close();             //关闭文件输入流 
+
+}
+
+std::vector<DogPoseResult> ResultYolov5::yolov5_result(float* result, float threshold) {
+	cv::Mat det_output = cv::Mat(25200, 13, CV_32F, result);
+	//// post-process
+	std::vector<cv::Rect> position_boxes;
+	std::vector<int> classIds;
+	std::vector<float> confidences;
+
+	//std::cout << det_output.rows << std::endl;
+	for (int i = 0; i < det_output.rows; i++) {
+		float confidence = det_output.at<float>(i, 4);
+		if (confidence < 0.2) {
+			continue;
+		}
+		//std::cout << "confidence " << confidence << std::endl;
+		cv::Mat classes_scores = det_output.row(i).colRange(5, 13);
+		cv::Point classIdPoint;
+		double score;
+		// 获取一组数据中最大值及其位置
+		minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);
+		// 置信度 0～1之间
+		if (score > 0.25)
+		{
+			float cx = det_output.at<float>(i, 0);
+			float cy = det_output.at<float>(i, 1);
+			float ow = det_output.at<float>(i, 2);
+			float oh = det_output.at<float>(i, 3);
+			int x = static_cast<int>((cx - 0.5 * ow) * factor);
+			int y = static_cast<int>((cy - 0.5 * oh) * factor);
+			int width = static_cast<int>(ow * factor);
+			int height = static_cast<int>(oh * factor);
+			cv::Rect box;
+			box.x = x;
+			box.y = y;
+			box.width = width;
+			box.height = height;
+
+			position_boxes.push_back(box);
+			classIds.push_back(classIdPoint.x);
+			confidences.push_back(score);
+		}
+	}
+	// NMS
+	std::vector<int> indexes;
+	cv::dnn::NMSBoxes(position_boxes, confidences, 0.25, 0.45, indexes);
+
+	//for (size_t i = 0; i < indexes.size(); i++) {
+	//	int index = indexes[i];
+	//	int idx = classIds[index];
+	//	cv::rectangle(image, position_boxes[index], cv::Scalar(0, 0, 255), 2, 8);
+	//	cv::rectangle(image, cv::Point(position_boxes[index].tl().x, position_boxes[index].tl().y - 20),
+	//		cv::Point(position_boxes[index].br().x, position_boxes[index].tl().y), cv::Scalar(0, 255, 255), -1);
+	//	cv::putText(image, class_names[idx], cv::Point(position_boxes[index].tl().x, position_boxes[index].tl().y - 10), cv::FONT_HERSHEY_SIMPLEX, .5, cv::Scalar(0, 0, 0));
+	//}
+
+	std::vector<DogPoseResult> vecPoseResult;
+	for (size_t i = 0; i < indexes.size(); i++) {
+		int index = indexes[i];
+		int idx = classIds[index];
+
+		DogPoseResult poseResult;
+		poseResult.x = position_boxes[index].x;
+		poseResult.y = position_boxes[index].y;
+		poseResult.width = position_boxes[index].width;
+		poseResult.height = position_boxes[index].height;
+		poseResult.confidence = confidences[index];
+		poseResult.classId = classIds[index];
+		poseResult.className = class_names[idx];
+
+		vecPoseResult.push_back(poseResult);
+	}
+
+	return vecPoseResult;
+}
 \ No newline at end of file
+#pragma once
+
+#ifndef RESULT_H
+#define RESULT_H
+
+#include <fstream>
+#include <iterator>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <vector>
+
+
+struct DogPoseResult {
+	int x;
+	int y;
+	int height;
+	int width;
+	float confidence;
+	int classId;
+	std::string className;
+};
+
+// @brief 处理yolov5的结果
+ class ResultYolov5 {
+public:
+	std::vector<std::string> class_names;
+	float factor;
+
+	//ResultYolov5();
+	void read_class_names(std::string path_name);
+	std::vector<DogPoseResult> yolov5_result(float* result, float threshold);
+};
+
+
+#endif // !RESULT_H
 \ No newline at end of file
+#include "DogPoseDetector.h"
+#include "cuda_kernels.h"
+#include <algorithm>
+
+#include "opencv2/opencv.hpp"
+
+// @brief 用于创建IBuilder、IRuntime或IRefitter实例的记录器用于通过该接口创建的所有对象。
+// 在释放所有创建的对象之前，记录器应一直有效。
+// 主要是实例化ILogger类下的log()方法。
+class Logger : public nvinfer1::ILogger
+{
+	void log(Severity severity, const char* message)  noexcept
+	{
+		// suppress info-level messages
+		if (severity != Severity::kINFO)
+			std::cout << message << std::endl;
+	}
+} gLogger;
+
+bool DogPoseDetector::init() {
+	const char* model_path_onnx = "E:/Archime/dog_pose_detect/yolov5/runs/train/exp10/weights/best.onnx";
+	const char* model_path_engine = "E:/Archime/dog_pose_detect/yolov5/runs/train/exp10/weights/best.engine";
+	//const char* image_path = "F:/dog_trainer_sys/test1/IMG_6837.JPG";
+	std::string lable_path = "F:/dog_trainer_sys/train2/classes.txt";
+	const char* input_node_name = "images";
+	const char* output_node_name = "output";
+
+	// 读取本地模型文件
+	std::ifstream file_ptr(model_path_engine, std::ios::binary);
+	if (!file_ptr.good()) {
+		std::cerr << "文件无法打开，请确定文件是否可用！" << std::endl;
+		return false;
+	}
+
+	size_t size = 0;
+	file_ptr.seekg(0, file_ptr.end);	// 将读指针从文件末尾开始移动0个字节
+	size = file_ptr.tellg();	// 返回读指针的位置，此时读指针的位置就是文件的字节数
+	file_ptr.seekg(0, file_ptr.beg);	// 将读指针从文件开头开始移动0个字节
+	char* model_stream = new char[size];
+	file_ptr.read(model_stream, size);
+	file_ptr.close();
+
+	// 日志记录接口
+	//Logger logger;
+	// 反序列化引擎
+	nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger);
+	// 推理引擎
+	// 保存模型的模型结构、模型参数以及最优计算kernel配置；
+	// 不能跨平台和跨TensorRT版本移植
+	nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(model_stream, size);
+	// 上下文
+	// 储存中间值，实际进行推理的对象
+	// 由engine创建，可创建多个对象，进行多推理任务
+	context = engine->createExecutionContext();
+
+	// 创建GPU显存缓冲区
+	m_data_buffer = new void*[2];
+	// 创建GPU显存输入缓冲区
+	m_input_node_index = engine->getBindingIndex(input_node_name);
+	m_input_node_dim = engine->getBindingDimensions(m_input_node_index);
+	size_t input_data_length = m_input_node_dim.d[1] * m_input_node_dim.d[2] * m_input_node_dim.d[3];
+	cudaMalloc(&(m_data_buffer[m_input_node_index]), input_data_length * sizeof(float));
+	// 创建GPU显存输出缓冲区
+	m_output_node_index = engine->getBindingIndex(output_node_name);
+	m_output_node_dim = engine->getBindingDimensions(m_output_node_index);
+	size_t output_data_length = m_output_node_dim.d[1] * m_output_node_dim.d[2];
+	cudaMalloc(&(m_data_buffer[m_output_node_index]), output_data_length * sizeof(float));
+
+	//cv::namedWindow("show", cv::WINDOW_NORMAL);
+
+	return true;
+}
+
+static void saveCUDAImg(unsigned char *pGpuBgb, int src_width, int src_height, std::string filename) {
+	int rgb_size = 3 * src_width * src_height;
+	unsigned char *cpu_data = new unsigned char[rgb_size];
+	cudaError_t cudaStatus = cudaMemcpy(cpu_data, pGpuBgb, rgb_size * sizeof(unsigned char), cudaMemcpyDeviceToHost);
+	cv::Mat img(src_height, src_width, CV_8UC3, cpu_data);
+	cv::imwrite(filename.c_str(), img);
+	delete[] cpu_data;
+	cpu_data = nullptr;
+}
+
+bool DogPoseDetector::detect(unsigned char *pGpuBgr, int src_width, int src_height) {
+	int dst_width = m_input_node_dim.d[2];
+	int dst_height = m_input_node_dim.d[3];
+
+	int max_side_length = std::max(src_width, src_height);
+
+
+	//int buf_size = 3 * src_width * src_height;
+	//float* pBuf = new float[buf_size];
+	//cudaMemcpy(pBuf, pGpuBgr, buf_size * sizeof(unsigned char), cudaMemcpyDeviceToHost);
+	//cv::Mat image(src_height, src_width, CV_8UC3, pBuf);
+
+	//saveCUDAImg(pGpuBgr, src_width, src_height, "src.jpg");
+
+	cudaStream_t stream;
+	cudaStreamCreate(&stream);
+
+	{
+		//int rgb_size = 3 * src_width * src_height;
+		//uint8 *cpu_data = new uint8[rgb_size];
+		//cudaError_t cudaStatus = cudaMemcpy(cpu_data, pGpuBgr, rgb_size * sizeof(uint8), cudaMemcpyDeviceToHost);
+		//cv::Mat image(src_height, src_width, CV_8UC3, cpu_data);
+
+
+		//cv::Mat max_image = cv::Mat::zeros(cv::Size(max_side_length, max_side_length), CV_8UC3);
+		//cv::Rect roi(0, 0, image.cols, image.rows);
+		//image.copyTo(max_image(roi));
+		//// 将图像归一化，并放缩到指定大小
+		//cv::Size input_node_shape(m_input_node_dim.d[2], m_input_node_dim.d[3]);
+		//cv::Mat BN_image = cv::dnn::blobFromImage(max_image, 1 / 255.0, input_node_shape, cv::Scalar(0, 0, 0), true, false);
+
+		//size_t input_data_length = m_input_node_dim.d[1] * m_input_node_dim.d[2] * m_input_node_dim.d[3];
+		//std::vector<float> input_data(input_data_length);
+		//memcpy(input_data.data(), BN_image.ptr<float>(), input_data_length * sizeof(float));
+
+		//cudaMemcpyAsync(m_data_buffer[m_input_node_index], input_data.data(), input_data_length * sizeof(float), cudaMemcpyHostToDevice, stream);
+	}
+
+	cuda_common::resizeAndNorm(pGpuBgr, src_width, src_height, (float*)m_data_buffer[m_input_node_index], dst_width, dst_height);
+
+	//int buf_size = 3 * dst_width * dst_height;
+	//float* pBuf = new float[buf_size];
+	//cudaMemcpy(pBuf, m_data_buffer[m_input_node_index], buf_size * sizeof(float), cudaMemcpyDeviceToHost);
+	//cv::Mat image(dst_height, dst_width, CV_32FC3, pBuf);
+	//cv::imshow("show", image);
+	//cv::waitKey(1);
+	//delete[] pBuf;
+	//pBuf = nullptr;
+
+	
+
+	
+
+	// 模型推理
+	context->enqueueV2(m_data_buffer, stream, nullptr);
+
+	size_t output_data_length = m_output_node_dim.d[1] * m_output_node_dim.d[2];
+	float* result_array = new float[output_data_length];
+	cudaMemcpyAsync(result_array, m_data_buffer[m_output_node_index], output_data_length * sizeof(float), cudaMemcpyDeviceToHost, stream);
+
+	cudaDeviceSynchronize();
+	
+
+	ResultYolov5 result;
+	result.factor = max_side_length / (float)m_input_node_dim.d[2];
+	result.read_class_names("F:/dog_trainer_sys/train2/classes.txt");
+
+	std::vector<DogPoseResult> vec_result = result.yolov5_result(result_array, 0.6);
+	if (vec_result.size() > 0) {
+		DogPoseResult poseResult = vec_result[0];
+		std::cout << poseResult.x << std::endl;
+		std::cout << poseResult.y << std::endl;
+		std::cout << poseResult.width << std::endl;
+		std::cout << poseResult.height << std::endl;
+		std::cout << poseResult.confidence << std::endl;
+		std::cout << poseResult.classId << std::endl;
+		std::cout << poseResult.className << std::endl;
+		
+
+		//cv::Rect position_boxe;
+		//position_boxe.x = poseResult.x;
+		//position_boxe.y = poseResult.y;
+		//position_boxe.width = poseResult.width;
+		//position_boxe.height = poseResult.height;
+		//cv::rectangle(image, position_boxe, cv::Scalar(0, 0, 255), 2, 8);
+		//cv::rectangle(image, cv::Point(position_boxe.x, position_boxe.y - 20), cv::Point(position_boxe.x, position_boxe.y), cv::Scalar(0, 255, 255), -1);
+		//cv::putText(image, poseResult.className, cv::Point(position_boxe.x, position_boxe.y - 10), cv::FONT_HERSHEY_SIMPLEX, .5, cv::Scalar(0, 0, 0));
+
+		//cv::imwrite("result.jpg", image);
+		//cv::imshow("show", image);
+		//cv::waitKey(1);
+	}
+
+	//delete pBuf;
+
+	return true;
+}
 \ No newline at end of file
+#pragma once
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <vector>
+
+#include "NvInfer.h"
+#include "NvOnnxParser.h"
+
+#include "AlgorithmResult.h"
+
+class DogPoseDetector {
+public:
+	bool init();
+
+	bool detect(unsigned char *pGpuBgb, int src_width, int src_height);
+
+private:
+	nvinfer1::IExecutionContext* context;
+
+	void** m_data_buffer;
+	int m_input_node_index;
+	nvinfer1::Dims m_input_node_dim;
+	int m_output_node_index;
+	nvinfer1::Dims m_output_node_dim;
+	
+	unsigned char* pSquareData{ nullptr };
+};
 \ No newline at end of file
@@ -79,6 +79,11 @@ bool FFNvDecoder::init(const char* uri, const char* gpuid, bool force_tcp)
 	av_dict_set( &options, "rtsp_transport", force_tcp ? "tcp" : "udp", 0 );
 	// av_dict_set( &options, "listen_timeout", "30", 0 ); // 单位为s
 	av_dict_set( &options, "stimeout", "30000000", 0 ); // 单位为 百万分之一秒
+	av_dict_set(&options, " max_delay", " 30000000", 0);
+	// av_dict_set( &options, "buffer_size", "655360", 0 ); 
+	// av_dict_set( &options, "pkt_size", "655360", 0 ); 
+	av_dict_set(&options, "fifo_size", "6553600", 0);
+	//av_dict_set(&options, "fflags", "discardcorrupt", 0);
  
 	fmt_ctx = avformat_alloc_context();
 	const char* input_file = uri;
@@ -48,13 +48,13 @@
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalOptions>/utf-8</AdditionalOptions>
-      <AdditionalIncludeDirectories>..\3rdparty\ffmpeg-5.0.1-win64-dev\include;./;./common/inc;./common/UtilNPP;D:\win_dev\opencv\build\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\3rdparty\ffmpeg-5.0.1-win64-dev\include;./;./common/inc;./common/UtilNPP;D:\win_dev\opencv\build\include;..\3rdparty\TensorRT-8.6.1.6\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <SubSystem>Console</SubSystem>
-      <AdditionalDependencies>avcodec.lib;avdevice.lib;avfilter.lib;avformat.lib;avutil.lib;postproc.lib;swresample.lib;swscale.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;nvjpeg.lib;opencv_world455d.lib;freeglut.lib;glew64.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <AdditionalLibraryDirectories>..\3rdparty\ffmpeg-5.0.1-win64-dev\lib;D:\win_dev\opencv\build\x64\vc14\lib;../3rdparty/gl;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>avcodec.lib;avdevice.lib;avfilter.lib;avformat.lib;avutil.lib;postproc.lib;swresample.lib;swscale.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;nvjpeg.lib;opencv_world455d.lib;freeglut.lib;glew64.lib;nvinfer.lib;nvinfer_plugin.lib;nvonnxparser.lib;nvparsers.lib;cudnn.lib;cublas.lib;cudart.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>..\3rdparty\ffmpeg-5.0.1-win64-dev\lib;D:\win_dev\opencv\build\x64\vc14\lib;../3rdparty/gl;..\3rdparty\TensorRT-8.6.1.6\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
     </Link>
     <CudaCompile>
       <TargetMachinePlatform>64</TargetMachinePlatform>
@@ -81,9 +81,13 @@
   </ItemDefinitionGroup>
   <ItemGroup>
     <CudaCompile Include="NV12ToRGB.cu" />
+    <CudaCompile Include="ResizeAndNorm.cu" />
   </ItemGroup>
   <ItemGroup>
+    <ClCompile Include="AlgorithmResult.cpp" />
     <ClCompile Include="check_tool.cpp" />
+    <ClCompile Include="DogPoseDetector.cpp" />
+    <ClCompile Include="dog_train_sys.cpp" />
     <ClCompile Include="FFCuContextManager.cpp" />
     <ClCompile Include="FFNvDecoder.cpp" />
     <ClCompile Include="FFNvDecoderManager.cpp" />
@@ -92,8 +96,10 @@
     <ClCompile Include="NvJpegEncoder.cpp" />
   </ItemGroup>
   <ItemGroup>
+    <ClInclude Include="AlgorithmResult.h" />
     <ClInclude Include="check_tool.h" />
     <ClInclude Include="cuda_kernels.h" />
+    <ClInclude Include="DogPoseDetector.h" />
     <ClInclude Include="FFCuContextManager.h" />
     <ClInclude Include="FFNvDecoder.h" />
     <ClInclude Include="FFNvDecoderManager.h" />
@@ -4,6 +4,9 @@
     <CudaCompile Include="NV12ToRGB.cu">
       <Filter>cu_src</Filter>
     </CudaCompile>
+    <CudaCompile Include="ResizeAndNorm.cu">
+      <Filter>cu_src</Filter>
+    </CudaCompile>
   </ItemGroup>
   <ItemGroup>
     <Filter Include="src">
@@ -38,6 +41,15 @@
     <ClCompile Include="NvJpegEncoder.cpp">
       <Filter>cu_src</Filter>
     </ClCompile>
+    <ClCompile Include="AlgorithmResult.cpp">
+      <Filter>src</Filter>
+    </ClCompile>
+    <ClCompile Include="dog_train_sys.cpp">
+      <Filter>src</Filter>
+    </ClCompile>
+    <ClCompile Include="DogPoseDetector.cpp">
+      <Filter>src</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="check_tool.h">
@@ -61,5 +73,11 @@
     <ClInclude Include="NvJpegEncoder.h">
       <Filter>cu_src</Filter>
     </ClInclude>
+    <ClInclude Include="AlgorithmResult.h">
+      <Filter>include</Filter>
+    </ClInclude>
+    <ClInclude Include="DogPoseDetector.h">
+      <Filter>include</Filter>
+    </ClInclude>
   </ItemGroup>
 </Project>
 \ No newline at end of file
 <?xml version="1.0" encoding="utf-8"?>
 <Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <LocalDebuggerCommandArguments>rtsp://122.97.218.170:8604/openUrl/V5nXRHa?params=eyJwcm90b2NhbCI6InJ0c3AiLCJjbGllbnRUeXBlIjoib3Blbl9hcGkiLCJleHByaWVUaW1lIjotMSwicHJvdG9jb2wiOiJydHNwIiwiZXhwaXJlVGltZSI6MzAwLCJlbmFibGVNR0MiOnRydWUsImV4cGFuZCI6InN0YW5kYXJkPXJ0c3Amc3RyZWFtZm9ybT1ydHAiLCJhIjoiMTBjZjM4N2JjY2Y5NDg3YzhjNWYzNjE2M2ViMWUyNTJ8MXwwfDEiLCJ0IjoxfQ== 0</LocalDebuggerCommandArguments>
+    <LocalDebuggerCommandArguments>
+    </LocalDebuggerCommandArguments>
     <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
   </PropertyGroup>
 </Project>
 \ No newline at end of file
@@ -4,9 +4,6 @@
 #include <builtin_types.h>
 #include "common/inc/helper_cuda_drvapi.h"
  
-typedef unsigned char   uint8;
-typedef unsigned int    uint32;
-typedef int             int32;
  
 #define COLOR_COMPONENT_MASK            0x3FF
 #define COLOR_COMPONENT_BIT_SIZE        10
+#include <cuda_runtime.h>
+#include <device_launch_parameters.h>
+#include <stdio.h>
+
+#include "cuda_kernels.h"
+
+namespace cuda_common
+{
+
+	__forceinline__ __device__ float3 get(uchar3* src, int x, int y, int w, int h) {
+		if (x < 0 || x >= w || y < 0 || y >= h) return make_float3(0.5, 0.5, 0.5);
+		uchar3 temp = src[y*w + x];
+		return make_float3(float(temp.x) / 255., float(temp.y) / 255., float(temp.z) / 255.);
+	}
+
+	__global__ void resizeNormKernel(uchar3* src, float *dst, int dstW, int dstH, int srcW, int srcH,
+		float scaleX, float scaleY, float shiftX, float shiftY) {
+		int idx = blockIdx.x * blockDim.x + threadIdx.x;
+		const int x = idx % dstW;
+		const int y = idx / dstW;
+		if (x >= dstW || y >= dstH)
+			return;
+		float w = (x - shiftX + 0.5) * scaleX - 0.5;        // Ëõ·ÅµÄ·´ÏòÓ³Éä¾ØÕó
+		float h = (y - shiftY + 0.5) * scaleY - 0.5;        // opencv 
+		int h_low = (int)h;
+		int w_low = (int)w;
+		int h_high = h_low + 1;
+		int w_high = w_low + 1;
+		float lh = h - h_low;
+		float lw = w - w_low;
+		float hh = 1 - lh, hw = 1 - lw;
+		float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
+		float3 v1 = get(src, w_low, h_low, srcW, srcH);
+		float3 v2 = get(src, w_high, h_low, srcW, srcH);
+		float3 v3 = get(src, w_low, h_high, srcW, srcH);
+		float3 v4 = get(src, w_high, h_high, srcW, srcH);
+		int stride = dstW*dstH;
+		dst[y*dstW + x] = w1 *v1.x + w2 * v2.x + w3 *v3.x + w4 * v4.x;
+		dst[stride + y*dstW + x] = w1 *v1.y + w2 * v2.y + w3 *v3.y + w4 * v4.y;
+		dst[stride * 2 + y*dstW + x] = w1 *v1.z + w2 * v2.z + w3 *v3.z + w4 * v4.z;
+	}
+
+	__global__ void copy2square(uchar3 *dataIn, uchar3 *dataOut, int imgWidth, int imgHeight, int squareWidth)
+	{
+		// Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
+		int32 x = blockIdx.x * blockDim.x + threadIdx.x;
+		int32 y = blockIdx.y *  blockDim.y + threadIdx.y;
+
+		if (x >= imgWidth)
+		{
+			return;
+		}
+
+		if (y >= imgHeight)
+		{
+			return;
+		}
+
+		dataOut[y*squareWidth + x] = dataIn[y*imgWidth + x];
+	}
+
+	__global__ void kernel_bilinear(uint8 *src_img, int src_width, int src_height, float *dst_img, int dst_width, int dst_height)
+	{
+		const int x = blockIdx.x * blockDim.x + threadIdx.x;
+		const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+		if (x < dst_width && y < dst_height)
+		{
+			float fx = (x + 0.5)*src_width / (float)dst_width - 0.5;
+			float fy = (y + 0.5)*src_height / (float)dst_height - 0.5;
+			int ax = floor(fx);
+			int ay = floor(fy);
+			if (ax < 0)
+			{
+				ax = 0;
+			}
+			else if (ax > src_width - 2)
+			{
+				ax = src_width - 2;
+			}
+
+			if (ay < 0) {
+				ay = 0;
+			}
+			else if (ay > src_height - 2)
+			{
+				ay = src_height - 2;
+			}
+
+			int A = ax + ay*src_width;
+			int B = ax + ay*src_width + 1;
+			int C = ax + ay*src_width + src_width;
+			int D = ax + ay*src_width + src_width + 1;
+
+			float w1, w2, w3, w4;
+			w1 = fx - ax;
+			w2 = 1 - w1;
+			w3 = fy - ay;
+			w4 = 1 - w3;
+
+			float blue = src_img[A] * w2*w4 + src_img[B] * w1*w4 + src_img[C] * w2*w3 + src_img[D] * w1*w3;
+
+			float green = src_img[src_width * src_height + A] * w2*w4 + src_img[src_width * src_height + B] * w1*w4
+				+ src_img[src_width * src_height + C] * w2*w3 + src_img[src_width * src_height + D] * w1*w3;
+
+			float red = src_img[src_width * src_height * 2 + A] * w2*w4 + src_img[src_width * src_height * 2 + B] * w1*w4
+				+ src_img[src_width * src_height * 2 + C] * w2*w3 + src_img[src_width * src_height * 2 + D] * w1*w3;
+
+			dst_img[y * dst_width + x] = red;
+			dst_img[dst_width * dst_height + y * dst_width + x] = green;
+			dst_img[dst_width * dst_height * 2 + y * dst_width + x] = blue;
+		}
+	}
+
+	__global__ void resize_norm_kernel(uchar3 *src_img, int src_width, int src_height, float *dataOut, int dst_width, int dst_height)
+	{
+		// Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
+		const int x = blockIdx.x * blockDim.x + threadIdx.x;
+		const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+		if (x >= dst_width || y >= dst_height) {
+			return;
+		}
+
+		double ratio = 0;
+		if (src_width >= src_height) {
+			ratio = src_width / (float)dst_width;
+		}
+		else 
+		{
+			ratio = src_height / (float)dst_height;
+		}
+
+		float fx = (x + 0.5)*ratio - 0.5;
+		float fy = (y + 0.5)*ratio - 0.5;
+		int ax = floor(fx);
+		int ay = floor(fy);
+		if (ax < 0)
+		{
+			ax = 0;
+		}
+		else if (ax >= (src_width - 2))
+		{
+			return;
+		}
+
+		if (ay < 0) {
+			ay = 0;
+		}
+		else if (ay >= (src_height - 2))
+		{
+			return;
+		}
+
+		//int A = ay * src_width + ax;
+
+		//dataOut[y * dst_width + x].x = src_img[A].x / 255.0;
+		//dataOut[y * dst_width + x].y = src_img[A].x / 255.0;
+		//dataOut[y * dst_width + x].z = src_img[A].x / 255.0;
+
+		int A = ax + ay*src_width;
+		int B = ax + ay*src_width + 1;
+		int C = ax + ay*src_width + src_width;
+		int D = ax + ay*src_width + src_width + 1;
+
+		float w1, w2, w3, w4;
+		w1 = fx - ax;
+		w2 = 1 - w1;
+		w3 = fy - ay;
+		w4 = 1 - w3;
+
+		float blue = src_img[A].x * w2*w4 + src_img[B].x * w1*w4 + src_img[C].x * w2*w3 + src_img[D].x * w1*w3;
+		float green = src_img[A].y * w2*w4 + src_img[B].y * w1*w4 + src_img[C].y * w2*w3 + src_img[D].y * w1*w3;
+		float red = src_img[A].z * w2*w4 + src_img[B].z * w1*w4 + src_img[C].z * w2*w3 + src_img[D].z * w1*w3;
+
+	/*	dataOut[y * dst_width + x].x = red / 255.0;
+		dataOut[y * dst_width + x].y = green / 255.0;
+		dataOut[y * dst_width + x].z = blue / 255.0;*/
+
+		// Clamp the results to RRRRR....GGGGGGG.......BBBBBBB....
+		dataOut[y * dst_width + x] = red / 255.0;
+		dataOut[dst_width * dst_height + y * dst_width + x] = green / 255.0;
+		dataOut[dst_width * dst_height * 2 + y * dst_width + x] = blue / 255.0;
+	}
+
+	cudaError_t resizeAndNorm(unsigned char* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height)
+	{
+		dim3 block(32, 16, 1);
+		dim3 grid((dst_width + (block.x - 1)) / block.x, (dst_height + (block.y - 1)) / block.y, 1);
+
+		resize_norm_kernel << < grid, block >> >((uchar3 *)d_srcRGB, src_width, src_height, d_dstRGB, dst_width, dst_height);
+
+		cudaError_t cudaStatus = cudaGetLastError();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "kernel_bilinear launch failed: %s\n", cudaGetErrorString(cudaStatus));
+			return cudaStatus;
+		}
+
+		cudaStatus = cudaDeviceSynchronize();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_bilinear!\n", cudaStatus);
+			return cudaStatus;
+		}
+
+		return cudaStatus;
+	}
+
+	//int resizeAndNorm(void * p, int in_w, int in_h, float *d, int w, int h, bool keepration, bool keepcenter, cudaStream_t stream) {
+	//	float scaleX = (w*1.0f / in_w);
+	//	float scaleY = (h*1.0f / in_h);
+	//	float shiftX = 0.f, shiftY = 0.f;
+	//	if (keepration)scaleX = scaleY = scaleX > scaleY ? scaleX : scaleY;
+	//	if (keepration && keepcenter) { shiftX = (in_w - w / scaleX) / 2.f; shiftY = (in_h - h / scaleY) / 2.f; }
+	//	const int n = in_w*in_h;
+	//	int blockSize = 1024;
+	//	const int gridSize = (n + blockSize - 1) / blockSize;
+	//	resizeNormKernel << <gridSize, blockSize, 0, stream >> > ((uchar3*)(p), d, in_w, in_h, w, h, scaleX, scaleY, shiftX, shiftY);
+	//	return 0;
+	//}
+
+	//int resizeAndNorm(void * p, int in_w, int in_h, float *d, int w, int h, bool keepration, bool keepcenter) {
+	//	float scaleX = (w*1.0f / in_w);
+	//	float scaleY = (h*1.0f / in_h);
+	//	float shiftX = 0.f, shiftY = 0.f;
+	//	if (keepration)scaleX = scaleY = scaleX > scaleY ? scaleX : scaleY;
+	//	if (keepration && keepcenter) { shiftX = (in_w - w / scaleX) / 2.f; shiftY = (in_h - h / scaleY) / 2.f; }
+	//	const int n = in_w*in_h;
+	//	int blockSize = 1024;
+	//	const int gridSize = (n + blockSize - 1) / blockSize;
+	//	resizeNormKernel << <gridSize, blockSize, 0 >> > ((uchar3*)(p), d, in_w, in_h, w, h, scaleX, scaleY, shiftX, shiftY);
+	//	return 0;
+	//}
+
+	int copy2square(void * p, void *d, int w, int h, int squareWidth, cudaStream_t stream) {
+		dim3 block(32, 16, 1);
+		dim3 grid((w + (block.x - 1)) / (block.x), (h + (block.y - 1)) / block.y, 1);
+		copy2square << <grid, block, 0, stream>> > ((uchar3 *)(p), (uchar3 *)d, w, h, squareWidth);
+		return 0;
+	}
+
+}
 \ No newline at end of file
@@ -10,6 +10,10 @@
  
 #include <cuda.h>
  
+typedef unsigned char   uint8;
+typedef unsigned int    uint32;
+typedef int             int32;
+
 typedef enum
 {
 	ITU601 = 1,
@@ -22,5 +26,12 @@ namespace cuda_common
  
 	cudaError_t NV12ToRGBnot(CUdeviceptr d_srcNV12, size_t nSourcePitch, unsigned char* d_dstRGB, int width, int height);
 	cudaError_t CUDAToBGR(CUdeviceptr dataY, CUdeviceptr dataUV, size_t pitchY, size_t pitchUV, unsigned char* d_dstRGB, int width, int height);
+
+	//int resizeAndNorm(void * p, int in_w, int in_h, float *d, int w, int h, bool keepration, bool keepcenter, cudaStream_t stream);
+	//int resizeAndNorm(void * p, int in_w, int in_h, float *d, int w, int h, bool keepration, bool keepcenter);
+	cudaError_t resizeAndNorm(unsigned char* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height);
+
+	int copy2square(void * p, void *d, int w, int h, int max_side_length, cudaStream_t stream);
+
 }
  
+//
+//#include <fstream>
+//#include <iostream>
+//#include <sstream>
+//#include <vector>
+//
+//
+//#include "NvInfer.h"
+//#include "NvOnnxParser.h"
+//#include <opencv2/opencv.hpp>
+//
+//#include "AlgorithmResult.h"
+//#include "cuda_kernels.h"
+//
+//
+//// @brief 用于创建IBuilder、IRuntime或IRefitter实例的记录器用于通过该接口创建的所有对象。
+//// 在释放所有创建的对象之前，记录器应一直有效。
+//// 主要是实例化ILogger类下的log()方法。
+//class Logger : public nvinfer1::ILogger
+//{
+//	void log(Severity severity, const char* message)  noexcept
+//	{
+//		// suppress info-level messages
+//		if (severity != Severity::kINFO)
+//			std::cout << message << std::endl;
+//	}
+//} gLogger;
+//
+//
+//
+//void onnx_to_engine(std::string onnx_file_path, std::string engine_file_path, int type) {
+//
+//	// 构建器，获取cuda内核目录以获取最快的实现
+//	// 用于创建config、network、engine的其他对象的核心类
+//	nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);
+//	const auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
+//	// 解析onnx网络文件
+//	// tensorRT模型类
+//	nvinfer1::INetworkDefinition* network = builder->createNetworkV2(explicitBatch);
+//	// onnx文件解析类
+//	// 将onnx文件解析，并填充rensorRT网络结构
+//	nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, gLogger);
+//	// 解析onnx文件
+//	parser->parseFromFile(onnx_file_path.c_str(), 2);
+//	for (int i = 0; i < parser->getNbErrors(); ++i) {
+//		std::cout << "load error: " << parser->getError(i)->desc() << std::endl;
+//	}
+//	printf("tensorRT load mask onnx model successfully!!!...\n");
+//
+//	// 创建推理引擎
+//	// 创建生成器配置对象。
+//	nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();
+//	// 设置最大工作空间大小。
+//	config->setMaxWorkspaceSize(16 * (1 << 20));
+//	// 设置模型输出精度
+//	if (type == 1) {
+//		config->setFlag(nvinfer1::BuilderFlag::kFP16);
+//	}
+//	if (type == 2) {
+//		config->setFlag(nvinfer1::BuilderFlag::kINT8);
+//	}
+//	// 创建推理引擎
+//	nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
+//	// 将推理银枪保存到本地
+//	std::cout << "try to save engine file now~~~" << std::endl;
+//	std::ofstream file_ptr(engine_file_path, std::ios::binary);
+//	if (!file_ptr) {
+//		std::cerr << "could not open plan output file" << std::endl;
+//		return;
+//	}
+//	// 将模型转化为文件流数据
+//	nvinfer1::IHostMemory* model_stream = engine->serialize();
+//	// 将文件保存到本地
+//	file_ptr.write(reinterpret_cast<const char*>(model_stream->data()), model_stream->size());
+//	// 销毁创建的对象
+//	model_stream->destroy();
+//	engine->destroy();
+//	network->destroy();
+//	parser->destroy();
+//	std::cout << "convert onnx model to TensorRT engine model successfully!" << std::endl;
+//}
+//
+//int main() {
+//
+//
+//	const char* model_path_onnx = "E:/Archime/dog_pose_detect/yolov5/runs/train/exp10/weights/best.onnx";
+//	const char* model_path_engine = "E:/Archime/dog_pose_detect/yolov5/runs/train/exp10/weights/best.engine";
+//	const char* image_path = "F:/dog_trainer_sys/test1/IMG_6837.JPG";
+//	std::string lable_path = "F:/dog_trainer_sys/train2/classes.txt";
+//	const char* input_node_name = "images";
+//	const char* output_node_name = "output";
+//	int num_ionode = 2;
+//
+//	// 读取本地模型文件
+//	std::ifstream file_ptr(model_path_engine, std::ios::binary);
+//	if (!file_ptr.good()) {
+//		std::cerr << "文件无法打开，请确定文件是否可用！" << std::endl;
+//	}
+//
+//	size_t size = 0;
+//	file_ptr.seekg(0, file_ptr.end);	// 将读指针从文件末尾开始移动0个字节
+//	size = file_ptr.tellg();	// 返回读指针的位置，此时读指针的位置就是文件的字节数
+//	file_ptr.seekg(0, file_ptr.beg);	// 将读指针从文件开头开始移动0个字节
+//	char* model_stream = new char[size];
+//	file_ptr.read(model_stream, size);
+//	file_ptr.close();
+//
+//	// 日志记录接口
+//	//Logger logger;
+//	// 反序列化引擎
+//	nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger);
+//	// 推理引擎
+//	// 保存模型的模型结构、模型参数以及最优计算kernel配置；
+//	// 不能跨平台和跨TensorRT版本移植
+//	nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(model_stream, size);
+//	// 上下文
+//	// 储存中间值，实际进行推理的对象
+//	// 由engine创建，可创建多个对象，进行多推理任务
+//	nvinfer1::IExecutionContext* context = engine->createExecutionContext();
+//
+//
+//	delete[] model_stream;
+//
+//	// 创建GPU显存缓冲区
+//	void** data_buffer = new void*[num_ionode];
+//	// 创建GPU显存输入缓冲区
+//	int input_node_index = engine->getBindingIndex(input_node_name);
+//	nvinfer1::Dims input_node_dim = engine->getBindingDimensions(input_node_index);
+//	size_t input_data_length = input_node_dim.d[1] * input_node_dim.d[2] * input_node_dim.d[3];
+//	cudaMalloc(&(data_buffer[input_node_index]), input_data_length * sizeof(float));
+//	// 创建GPU显存输出缓冲区
+//	int output_node_index = engine->getBindingIndex(output_node_name);
+//	nvinfer1::Dims output_node_dim = engine->getBindingDimensions(output_node_index);
+//	size_t output_data_length = output_node_dim.d[1] * output_node_dim.d[2];
+//	cudaMalloc(&(data_buffer[output_node_index]), output_data_length * sizeof(float));
+//
+//
+//	// 图象预处理 - 格式化操作
+//	cv::Mat image = cv::imread(image_path);
+//	int max_side_length = std::max(image.cols, image.rows);
+//	cv::Mat max_image = cv::Mat::zeros(cv::Size(max_side_length, max_side_length), CV_8UC3);
+//	cv::Rect roi(0, 0, image.cols, image.rows);
+//	image.copyTo(max_image(roi));
+//	// 将图像归一化，并放缩到指定大小
+//	cv::Size input_node_shape(input_node_dim.d[2], input_node_dim.d[3]);
+//	cv::Mat BN_image = cv::dnn::blobFromImage(max_image, 1 / 255.0, input_node_shape, cv::Scalar(0, 0, 0), true, false);
+//
+//	std::vector<float> input_data(input_data_length);
+//	memcpy(input_data.data(), BN_image.ptr<float>(), input_data_length * sizeof(float));
+//
+//	//void* pGPUData;
+//	//cudaMalloc(&pGPUData, 3 * image.cols * image.rows * sizeof(unsigned char));
+//	//cudaMemcpy(pGPUData, (void*)(image.data), 3 * image.cols * image.rows * sizeof(unsigned char), cudaMemcpyHostToDevice);
+//	//cuda_common::resizeAndNorm((unsigned char*)pGPUData, max_side_length, max_side_length, (float*)data_buffer[input_node_index], input_node_dim.d[2], input_node_dim.d[3]);
+//
+//	// 创建输入cuda流
+//	cudaStream_t stream;
+//	cudaStreamCreate(&stream);
+//
+//	// 输入数据由内存到GPU显存
+//	cudaMemcpyAsync(data_buffer[input_node_index], input_data.data(), input_data_length * sizeof(float), cudaMemcpyHostToDevice, stream);
+//
+//	// 模型推理
+//	context->enqueueV2(data_buffer, stream, nullptr);
+//
+//	float* result_array = new float[output_data_length];
+//	cudaMemcpyAsync(result_array, data_buffer[output_node_index], output_data_length * sizeof(float), cudaMemcpyDeviceToHost, stream);
+//
+//	ResultYolov5 result;
+//	result.factor = max_side_length / (float)input_node_dim.d[2];
+//	result.read_class_names(lable_path);
+//
+//	//cv::Mat result_image = result.yolov5_result(image, result_array);
+//
+//	//// 查看输出结果
+//	//cv::imshow("C++ + OpenVINO + Yolov5 推理结果", result_image);
+//	//cv::waitKey();
+//
+//	std::vector<DogPoseResult> vec_result = result.yolov5_result(result_array, 0.6);
+//	if (vec_result.size() > 0) {
+//		DogPoseResult poseResult = vec_result[0];
+//		std::cout << poseResult.x << std::endl;
+//		std::cout << poseResult.y << std::endl;
+//		std::cout << poseResult.width << std::endl;
+//		std::cout << poseResult.height << std::endl;
+//		std::cout << poseResult.confidence << std::endl;
+//		std::cout << poseResult.classId << std::endl;
+//		std::cout << poseResult.className << std::endl;
+//
+//
+//		cv::Rect position_boxe;
+//		position_boxe.x = poseResult.x;
+//		position_boxe.y = poseResult.y;
+//		position_boxe.width = poseResult.width;
+//		position_boxe.height = poseResult.height;
+//		cv::rectangle(image, position_boxe, cv::Scalar(0, 0, 255), 2, 8);
+//		cv::rectangle(image, cv::Point(position_boxe.x, position_boxe.y - 20), cv::Point(position_boxe.x, position_boxe.y), cv::Scalar(0, 255, 255), -1);
+//		cv::putText(image, poseResult.className, cv::Point(position_boxe.x, position_boxe.y - 10), cv::FONT_HERSHEY_SIMPLEX, .5, cv::Scalar(0, 0, 0));
+//
+//		cv::imwrite("result.jpg", image);
+//		cv::imshow("show", image);
+//		cv::waitKey();
+//	}
+//}
 \ No newline at end of file
@@ -12,6 +12,9 @@
  
 #include "opencv2\opencv.hpp"
  
+#include "DogPoseDetector.h"
+
+
 using namespace std;
 using namespace cv;
  
@@ -20,6 +23,8 @@ unsigned char *pHwRgb[2] = {nullptr, nullptr};
 int sum1 = 0;
 int sum2 = 0;
  
+DogPoseDetector poseDetector;
+
  
 mutex m_mutex;
 void saveFrame(AVFrame * gpuFrame, string file_name) {
@@ -46,47 +51,45 @@ void saveFrame(AVFrame * gpuFrame, string file_name) {
 }
  
 mutex m_mutex_show;
+unsigned char *pShowData = nullptr;
+
 void showFrame(AVFrame * gpuFrame) {
 	std::lock_guard<std::mutex> l(m_mutex_show);
  
-	unsigned char *pHwData = nullptr;
-	cudaError_t cudaStatus = cudaMalloc((void **)&pHwData, 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char));
-
+	cudaError_t cudaStatus = cudaSuccess;
+	if (pShowData == nullptr)
+	{
+		cudaError_t cudaStatus = cudaMalloc((void **)&pShowData, 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char));
+	}
+	
 	cuda_common::setColorSpace(ITU709, 0);
-	cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0], (CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwData, gpuFrame->width, gpuFrame->height);
-	cudaDeviceSynchronize();
+	cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0], (CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pShowData, gpuFrame->width, gpuFrame->height);
 	if (cudaStatus != cudaSuccess) {
 		cout << "CUDAToBGR failed !!!" << endl;
 		return;
 	}
  
-
-
-	unsigned char * pHwRgb = pHwData;
 	int channel = 3;
 	int width = gpuFrame->width;
 	int height = gpuFrame->height;
  
-	if (pHwRgb != nullptr && channel > 0 && width > 0 && height > 0) {
-		int nSize = channel * height * width;
-		unsigned char* cpu_data = new unsigned char[nSize];
+	if (pShowData != nullptr && channel > 0 && width > 0 && height > 0) {
+		poseDetector.detect(pShowData, width, height);
  
-		cudaMemcpy(cpu_data, pHwRgb, nSize * sizeof(unsigned char), cudaMemcpyDeviceToHost);
-		cudaDeviceSynchronize();
+		//int nSize = channel * height * width;
+		//unsigned char* cpu_data = new unsigned char[nSize];
  
-		cv::Mat img_(height, width, CV_8UC3, cpu_data);
-		bool bWrite = cv::imwrite("dec0.jpg", img_);
+		//cudaMemcpy(cpu_data, pShowData, nSize * sizeof(unsigned char), cudaMemcpyDeviceToHost);
+		//cudaDeviceSynchronize();
  
-		imshow("show", img_);
-		waitKey(0);
+		//cv::Mat img_(height, width, CV_8UC3, cpu_data);
+		//imshow("show", img_);
+		//waitKey(1);
  
-		delete[] cpu_data;
-		cpu_data = nullptr;
+		//delete[] cpu_data;
+		//cpu_data = nullptr;
  
-	}
-
-	cudaFree(pHwData);
-	pHwData = nullptr;
+	}
 }
  
 /**
@@ -108,7 +111,7 @@ void postDecoded(const void * userPtr, AVFrame * gpuFrame){
 			// cout << "gpuid = " << atoi(decoder->m_cfg.gpuid.c_str()) << endl;
 			cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str()));
  
-			saveFrame(gpuFrame, decoder->getName());
+			//saveFrame(gpuFrame, decoder->getName());
 			showFrame(gpuFrame);
 		}
     }
@@ -172,7 +175,8 @@ void decode_finished_cbk(const void* userPtr){
 // string test_uri = "/home/cmhu/data/output_1920x1080.mp4";
 // string test_uri = "rtsp://176.10.0.2:8554/stream";
 // string test_uri = "/mnt/f/fiss/test_data/h265.mp4";
-string test_uri = "rtsp://176.10.0.4:8554/stream";
+//string test_uri = "rtsp://176.10.0.4:8554/stream";
+string test_uri = "f://data/caishenkezhan.mp4";
  
 void createDecode(int index){
     FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
@@ -210,16 +214,20 @@ void logFF(void *, int level, const char *fmt, va_list ap)
 int main(int argc, char* argv[]) {
  
     printf("start \n");
-    if (argc != 3) {
-        fprintf(stderr, "./xxx uri gpu_id\n");
-        return -1;
-    }
+    //if (argc != 3) {
+    //    fprintf(stderr, "./xxx uri gpu_id\n");
+    //    return -1;
+    //}
  
-    char* uri = argv[1];
-    char* gpuid = argv[2];
+	char* uri = "F:/dog_trainer_sys/test1/5min.mp4";//argv[1];
+	char* gpuid = "0";//argv[2];
  
     cout << av_version_info() << endl;
  
+	poseDetector.init();
+
+	//namedWindow("show", WINDOW_NORMAL);
+
     //evalQuality(uri, gpuid);
  
  
@@ -253,19 +261,19 @@ int main(int argc, char* argv[]) {
     pDecManager->getResolution(config.name, w,h);
     printf( "%s : %dx%d\n", config.name.c_str() , w,h );
  
-    thread* m_thread = new thread([](void* arg)
-        {
-            while (true)
-            {
-                std::this_thread::sleep_for(std::chrono::milliseconds(5000));
-                FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
-                int count = pDecManager->count();
-                cout << "当前运行路数： " << pDecManager->count() << endl;
-            }
-
-            return (void*)0;
-        }
-    , nullptr);
+    //thread* m_thread = new thread([](void* arg)
+    //    {
+    //        while (true)
+    //        {
+    //            std::this_thread::sleep_for(std::chrono::milliseconds(5000));
+    //            FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
+    //            int count = pDecManager->count();
+    //            cout << "当前运行路数： " << pDecManager->count() << endl;
+    //        }
+
+    //        return (void*)0;
+    //    }
+    //, nullptr);
  
  
     while (getchar() != 'q');