Blame view

src/main.cpp 11.3 KB
aac5773f   hucm   功能基本完成,接口待打磨
1
2
3
  #include "FFNvDecoderManager.h"
  #include <iostream>
  
e41a52bb   Hu Chunming   1.优化数据读取线程;2. 添加A...
4
5
6
7
  #include "cuda_kernels.h"
  
  #include "NvJpegEncoder.h"
  
f40cc409   Hu Chunming   优化显存占用。当前在3080显卡上...
8
9
10
11
12
  #include <pthread.h>
  #include <thread>
  
  #include <chrono>
  
e65720d4   Hu Chunming   优化demo
13
14
15
16
17
18
  unsigned char *pHwRgb[2] = {nullptr, nullptr};
  
  int sum1 = 0;
  int sum2 = 0;
  
  cudaStream_t stream[2];
e41a52bb   Hu Chunming   1.优化数据读取线程;2. 添加A...
19
  
0b43216c   Hu Chunming   添加重要注释
20
21
22
  /**
   * 注意: gpuFrame 在解码器设置的显卡上,后续操作要十分注意这一点,尤其是多线程情况
   * */
aac5773f   hucm   功能基本完成,接口待打磨
23
24
25
26
  void postDecoded(const void * userPtr, AVFrame * gpuFrame){
      FFNvDecoder* decoder = (FFNvDecoder*)userPtr;
      if (decoder!= nullptr)
      {
f40cc409   Hu Chunming   优化显存占用。当前在3080显卡上...
27
28
          // cout << "decode name: " << decoder->getName() << endl;
  
e65720d4   Hu Chunming   优化demo
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
              // const char* gpu_pixfmt = av_get_pix_fmt_name((AVPixelFormat)gpuFrame->format);
              // cout << "pixfmt: " << gpu_pixfmt << endl;
              // cout << "keyframe: " << gpuFrame->key_frame << " width: " << gpuFrame->width << " height: "<< gpuFrame->height << endl;
              // cout << "decode successed ✿✿ヽ(°▽°)ノ✿ " << endl;
  
              int sum = sum1;
              if (decoder->getName() == "dec1")
              {
                  sum1 ++ ;
                  sum = sum1;
  
                  if (gpuFrame->format == AV_PIX_FMT_CUDA)
                  {   
                      cout << "gpuid = " << atoi(decoder->m_cfg.gpuid.c_str()) << endl;
                      cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str()));
                      cudaError_t cudaStatus;
                      if(pHwRgb[0] == nullptr){
                          // cudaStreamCreate(&stream[0]);
                          cuda_common::setColorSpace2( ITU709, 0 );
                          cudaStatus = cudaMalloc((void **)&pHwRgb[0], 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char));
                      }
                      cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwRgb[0], gpuFrame->width, gpuFrame->height);
                      cudaDeviceSynchronize();
                      if (cudaStatus != cudaSuccess) {
                          cout << "CUDAToBGR failed !!!" << endl;
                          return;
                      }
  
0a826b3d   Hu Chunming   适应WSL的修改
57
                      string path = "/mnt/f/fiss/data/" + decoder->getName() + "/" + to_string(sum) + ".jpg";
e65720d4   Hu Chunming   优化demo
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
                      saveJpeg(path.c_str(), pHwRgb[0], gpuFrame->width, gpuFrame->height, stream[0]);  // 验证 CUDAToRGB 
                  }
              } else if (decoder->getName() == "dec2") 
              {
                  sum2 ++ ;
                  sum = sum2;
  
                  if (gpuFrame->format == AV_PIX_FMT_CUDA)
                  {   
                      cout << "gpuid = " << atoi(decoder->m_cfg.gpuid.c_str()) << endl;
                      cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str()));
                      cudaError_t cudaStatus;
                      if(pHwRgb[1] == nullptr){
                          // cudaStreamCreate(&stream[1]);
                          cuda_common::setColorSpace2( ITU709, 0 );
                          cudaStatus = cudaMalloc((void **)&pHwRgb[1], 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char));
                      }
                      cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwRgb[1], gpuFrame->width, gpuFrame->height);
                      cudaDeviceSynchronize();
                      if (cudaStatus != cudaSuccess) {
                          cout << "CUDAToBGR failed !!!" << endl;
                          return;
                      }
  
0a826b3d   Hu Chunming   适应WSL的修改
82
                      string path = "/mnt/f/fiss/data/" + decoder->getName() + "/" + to_string(sum) + ".jpg";
e65720d4   Hu Chunming   优化demo
83
84
85
                      saveJpeg(path.c_str(), pHwRgb[1], gpuFrame->width, gpuFrame->height, stream[1]);  // 验证 CUDAToRGB 
                  }
              }
aac5773f   hucm   功能基本完成,接口待打磨
86
87
88
      }
  }
  
f40cc409   Hu Chunming   优化显存占用。当前在3080显卡上...
89
90
91
92
93
94
  long start_time = 0;
  long end_time = 0;
  bool count_flag = false;
  int count = 0;
  int count_std = 100;
  
bc52e542   Hu Chunming   添加关键帧解码功能
95
  static long long get_cur_time(){
f40cc409   Hu Chunming   优化显存占用。当前在3080显卡上...
96
      // 获取操作系统当前时间点(精确到微秒)
6fc86385   ming   代码优化
97
98
      chrono::time_point<chrono::system_clock, chrono::milliseconds> tpMicro
          = chrono::time_point_cast<chrono::milliseconds>(chrono::system_clock::now());
f40cc409   Hu Chunming   优化显存占用。当前在3080显卡上...
99
      // (微秒精度的)时间点 => (微秒精度的)时间戳
6fc86385   ming   代码优化
100
      return tpMicro.time_since_epoch().count();
f40cc409   Hu Chunming   优化显存占用。当前在3080显卡上...
101
102
  }
  
bc52e542   Hu Chunming   添加关键帧解码功能
103
  static int sum = 0;
e65720d4   Hu Chunming   优化demo
104
105
  unsigned char *pHwData = nullptr;
  
f40cc409   Hu Chunming   优化显存占用。当前在3080显卡上...
106
  void postDecoded0(const void * userPtr, AVFrame * gpuFrame){
bc52e542   Hu Chunming   添加关键帧解码功能
107
108
      // std::this_thread::sleep_for(std::chrono::milliseconds(30000));
  
f40cc409   Hu Chunming   优化显存占用。当前在3080显卡上...
109
110
111
112
113
114
115
116
117
118
119
120
121
122
      FFNvDecoder* decoder = (FFNvDecoder*)userPtr;
      if (decoder!= nullptr)
      {
          // cout << "decode name: " << decoder->getName() << endl;
          if (decoder->getName() == "dec")
          {
              if (! count_flag)
              {
                  count_flag = true;
                  count = 0;
                  end_time = start_time = get_cur_time();
              }
              count++;
              sum ++ ;
0a826b3d   Hu Chunming   适应WSL的修改
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
              if (count >= count_std)
              {
                  // end_time = get_cur_time();
                  // long time_using = end_time - start_time;
                  // double time_per_frame = double(time_using)/count_std ;
                  // cout << count_std << "帧用时:" << time_using << "ms 每帧用时:" << time_per_frame << "ms" << endl;
                  cout << "keyframe: " << gpuFrame->key_frame << " width: " << gpuFrame->width << " height: "<< gpuFrame->height << endl;
                  cout << gpuFrame->pts << endl;
  
                  count_flag = false;
              }
              cout << "帧数:" << sum << endl;
  
              if (gpuFrame->format == AV_PIX_FMT_CUDA)
              {   
                  cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str()));
                  // cout << "gpu id : " << decoder->m_cfg.gpuid.c_str() << endl;
                  cudaError_t cudaStatus;
                  if(pHwData == nullptr){
                      cuda_common::setColorSpace2( ITU709, 0 );
                      cudaStatus = cudaMalloc((void **)&pHwData, 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char));
                  }
                  cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwData, gpuFrame->width, gpuFrame->height);
                  cudaDeviceSynchronize();
                  if (cudaStatus != cudaSuccess) {
                      cout << "CUDAToBGR failed !!!" << endl;
                      return;
                  }
  
                  string path = "/mnt/f/fiss/data/" + to_string(sum) + ".jpg";
                  saveJpeg(path.c_str(), pHwData, gpuFrame->width, gpuFrame->height, nullptr);  // 验证 CUDAToRGB 
              }
f40cc409   Hu Chunming   优化显存占用。当前在3080显卡上...
155
156
157
158
          }
      }
  }
  
6fc86385   ming   代码优化
159
160
161
162
  void decode_finished_cbk(const void* userPtr){
      cout << "decode_finish timestamp: " << get_cur_time() << endl;
  }
  
f40cc409   Hu Chunming   优化显存占用。当前在3080显卡上...
163
  // string test_uri = "rtmp://192.168.10.56:1935/objecteye/1";
e65720d4   Hu Chunming   优化demo
164
  // string test_uri = "/home/cmhu/data/output_800x480.mp4";
bc52e542   Hu Chunming   添加关键帧解码功能
165
166
  // string test_uri = "/home/cmhu/data/output_1920x1080.mp4";
  // string test_uri = "rtsp://176.10.0.2:8554/stream";
d384f0e9   Hu Chunming   代码优化
167
168
  // string test_uri = "/mnt/f/fiss/test_data/h265.mp4";
  string test_uri = "rtsp://176.10.0.4:8554/stream";
f40cc409   Hu Chunming   优化显存占用。当前在3080显卡上...
169
  
48330793   Hu Chunming   修正解码线程自然结束时解码器内存没...
170
  void createDecode(int index){
aac5773f   hucm   功能基本完成,接口待打磨
171
      FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
7319ea36   Hu Chunming   多显卡设置
172
      MgrDecConfig config;
48330793   Hu Chunming   修正解码线程自然结束时解码器内存没...
173
      config.name = "dec" + to_string(index);
f40cc409   Hu Chunming   优化显存占用。当前在3080显卡上...
174
      config.cfg.uri = test_uri;
7319ea36   Hu Chunming   多显卡设置
175
      config.cfg.post_decoded_cbk = postDecoded;
6fc86385   ming   代码优化
176
      config.cfg.decode_finished_cbk = decode_finished_cbk;
7319ea36   Hu Chunming   多显卡设置
177
      config.cfg.force_tcp = true;
f40cc409   Hu Chunming   优化显存占用。当前在3080显卡上...
178
179
180
  
      if (index % 2 == 0)
      {
6fc86385   ming   代码优化
181
          config.cfg.gpuid = "0";
f40cc409   Hu Chunming   优化显存占用。当前在3080显卡上...
182
183
184
      }
      else
      {
6fc86385   ming   代码优化
185
          config.cfg.gpuid = "0";
f40cc409   Hu Chunming   优化显存占用。当前在3080显卡上...
186
187
      }
      
7319ea36   Hu Chunming   多显卡设置
188
189
190
      FFNvDecoder* decoder = pDecManager->createDecoder(config);
      if (!decoder)
      {
48330793   Hu Chunming   修正解码线程自然结束时解码器内存没...
191
          return ;
7319ea36   Hu Chunming   多显卡设置
192
193
194
      }
      pDecManager->setUserPtr(config.name, decoder);
      pDecManager->startDecodeByName(config.name);
48330793   Hu Chunming   修正解码线程自然结束时解码器内存没...
195
196
  }
  
f40cc409   Hu Chunming   优化显存占用。当前在3080显卡上...
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
  #define checkCudaErrors(S) do {CUresult  status; \
          status = S; \
          if (status != CUDA_SUCCESS ) std::cout << __LINE__ <<" checkCudaErrors - status = " << status << std::endl; \
          } while (false)
  
  int CheckCUDAProperty( int devId )
  {
      cuInit(0);
  
  	CUdevice dev = devId;
  	size_t memSize = 0;
  	char devName[256] = {0};
  	int major = 0, minor = 0;
  	CUresult rlt = CUDA_SUCCESS;
  
d384f0e9   Hu Chunming   代码优化
212
213
214
215
      rlt = cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, dev);
      checkCudaErrors( rlt );
  
      rlt = cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, dev);
f40cc409   Hu Chunming   优化显存占用。当前在3080显卡上...
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
  	checkCudaErrors( rlt );
  
  	rlt = cuDeviceGetName( devName, sizeof( devName ), dev );
  	checkCudaErrors( rlt );
  
  	printf( "Using GPU Device %d: %s has SM %d.%d compute capability\n",
  		    dev, devName, major, minor );
  
  	rlt = cuDeviceTotalMem( &memSize, dev );
  	checkCudaErrors( rlt );
  
  	printf( "Total amount of global memory:   %4.4f MB\n",
  		   (float)memSize / ( 1024 * 1024 ) );
  
  	return 0;
  }
  
3c7e3e11   Hu Chunming   1.修改日志
233
234
235
236
237
238
  void logFF(void *, int level, const char *fmt, va_list ap)
  {
      vfprintf(stdout, fmt, ap);
  }
  
  
48330793   Hu Chunming   修正解码线程自然结束时解码器内存没...
239
240
  int main(){
  
3c7e3e11   Hu Chunming   1.修改日志
241
242
      // av_log_set_callback(&logFF);
  
0a826b3d   Hu Chunming   适应WSL的修改
243
      CheckCUDAProperty(0);
f40cc409   Hu Chunming   优化显存占用。当前在3080显卡上...
244
  
48330793   Hu Chunming   修正解码线程自然结束时解码器内存没...
245
      FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
7319ea36   Hu Chunming   多显卡设置
246
  
bc52e542   Hu Chunming   添加关键帧解码功能
247
248
249
250
251
      // int count = 99;
      // for (size_t i = 0; i < count ; i++)
      // {
      //     createDecode(i);
      // }
48330793   Hu Chunming   修正解码线程自然结束时解码器内存没...
252
253
  
      MgrDecConfig config;
f40cc409   Hu Chunming   优化显存占用。当前在3080显卡上...
254
255
256
      config.name = "dec";
      config.cfg.uri = test_uri;
      config.cfg.post_decoded_cbk = postDecoded0;
6fc86385   ming   代码优化
257
      config.cfg.decode_finished_cbk = decode_finished_cbk;
48330793   Hu Chunming   修正解码线程自然结束时解码器内存没...
258
      config.cfg.force_tcp = true;
0a826b3d   Hu Chunming   适应WSL的修改
259
      config.cfg.gpuid = "0";
7319ea36   Hu Chunming   多显卡设置
260
      FFNvDecoder* dec2 = pDecManager->createDecoder(config);
aac5773f   hucm   功能基本完成,接口待打磨
261
262
263
264
      if (!dec2)
      {
          return 1;
      }
7319ea36   Hu Chunming   多显卡设置
265
      pDecManager->setUserPtr(config.name, dec2);
0a826b3d   Hu Chunming   适应WSL的修改
266
      // pDecManager->setDecKeyframe(config.name, true);
7319ea36   Hu Chunming   多显卡设置
267
268
      pDecManager->startDecodeByName(config.name);
  
d384f0e9   Hu Chunming   代码优化
269
270
271
272
      int w,h;
      pDecManager->getResolution(config.name, w,h);
      printf( "%s : %dx%d\n", config.name.c_str() , w,h );
  
f40cc409   Hu Chunming   优化显存占用。当前在3080显卡上...
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
      pthread_t m_decode_thread;
      pthread_create(&m_decode_thread,0,
          [](void* arg)
          {
              while (true)
              {
                  std::this_thread::sleep_for(std::chrono::milliseconds(5000));
                  FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
                  int count = pDecManager->count();
                  cout << "当前运行路数: " << pDecManager->count() << endl;
                  if (count <= 0)
                  {
                      break;
                  }
              }  
  
              return (void*)0;
          }
      ,nullptr);
  
7319ea36   Hu Chunming   多显卡设置
293
      
48330793   Hu Chunming   修正解码线程自然结束时解码器内存没...
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
      // config.name = "dec0";
      // config.cfg.uri = "rtmp://192.168.10.56:1935/objecteye/1";
      // config.cfg.gpuid = "0";
      // FFNvDecoder* dec0 = pDecManager->createDecoder(config);
      // if (!dec0)
      // {
      //     return 1;
      // }
      // pDecManager->setUserPtr(config.name, dec0);
      // pDecManager->startDecodeByName(config.name);
  
      // config.name = "dec01";
      // config.cfg.uri = "rtmp://192.168.10.56:1935/objecteye/1";
      // config.cfg.gpuid = "0";
      // FFNvDecoder* dec01 = pDecManager->createDecoder(config);
      // if (!dec01)
      // {
      //     return 1;
      // }
      // pDecManager->setUserPtr(config.name, dec01);
      // pDecManager->startDecodeByName(config.name);
aac5773f   hucm   功能基本完成,接口待打磨
315
  
e41a52bb   Hu Chunming   1.优化数据读取线程;2. 添加A...
316
      // while (getchar() != 'q');  
aac5773f   hucm   功能基本完成,接口待打磨
317
  
e41a52bb   Hu Chunming   1.优化数据读取线程;2. 添加A...
318
319
320
      // // pDecManager->closeDecoderByName("dec1");
      // // pDecManager->pauseDecoder("dec1");
      // pDecManager->pauseDecoder("dec2");
aac5773f   hucm   功能基本完成,接口待打磨
321
  
e41a52bb   Hu Chunming   1.优化数据读取线程;2. 添加A...
322
      // while (getchar() != 'q');
aac5773f   hucm   功能基本完成,接口待打磨
323
  
e41a52bb   Hu Chunming   1.优化数据读取线程;2. 添加A...
324
325
      // // pDecManager->resumeDecoder("dec1");
      // pDecManager->resumeDecoder("dec2");
aac5773f   hucm   功能基本完成,接口待打磨
326
  
aac5773f   hucm   功能基本完成,接口待打磨
327
328
      while (getchar() != 'q');
  
bc52e542   Hu Chunming   添加关键帧解码功能
329
330
      cout << "总共帧数:" << sum << endl;
  
aac5773f   hucm   功能基本完成,接口待打磨
331
332
      pDecManager->closeAllDecoder();
  }