Commit f40cc409e0cd8c22099105f9d6358e59db43ab26

Authored by Hu Chunming
1 parent 48330793

优化显存占用。当前在3080显卡上支持106路解码

ffmpeg-4.2.2/libavcodec/cuviddec.c 100644 → 100755
@@ -1012,7 +1012,7 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx) @@ -1012,7 +1012,7 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
1012 } 1012 }
1013 1013
1014 ctx->cuparseinfo.ulMaxNumDecodeSurfaces = ctx->nb_surfaces; 1014 ctx->cuparseinfo.ulMaxNumDecodeSurfaces = ctx->nb_surfaces;
1015 - ctx->cuparseinfo.ulMaxDisplayDelay = 4; 1015 + ctx->cuparseinfo.ulMaxDisplayDelay = 2;
1016 ctx->cuparseinfo.pUserData = avctx; 1016 ctx->cuparseinfo.pUserData = avctx;
1017 ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence; 1017 ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence;
1018 ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode; 1018 ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode;
src/FFCuContextManager.cpp 0 → 100644
  1 +#include "FFCuContextManager.h"
  2 +#include <iostream>
  3 +
  4 +using namespace std;
  5 +
  6 +FFCuContextManager::~FFCuContextManager()
  7 +{
  8 + for(auto iter = ctxMap.begin(); iter != ctxMap.end(); iter++){
  9 + av_buffer_unref(&iter->second);
  10 + }
  11 + ctxMap.clear();
  12 +}
  13 +
  14 +AVBufferRef *FFCuContextManager::getCuCtx(string gpuid)
  15 +{
  16 + AVBufferRef *hw_device_ctx = ctxMap[gpuid];
  17 + if (nullptr == hw_device_ctx)
  18 + {
  19 + // 初始化硬件解码器
  20 + if (av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_CUDA, gpuid.c_str(), nullptr, 0) < 0)
  21 + {
  22 + cout << "Failed to create specified HW device.";
  23 + return nullptr;
  24 + }
  25 + ctxMap[gpuid] = hw_device_ctx;
  26 + }
  27 + return hw_device_ctx;
  28 +}
0 \ No newline at end of file 29 \ No newline at end of file
src/FFCuContextManager.h 0 → 100644
  1 +
  2 +#include<map>
  3 +#include<string>
  4 +
  5 +extern "C"
  6 +{
  7 + #include <libavcodec/avcodec.h>
  8 + #include <libavdevice/avdevice.h>
  9 + #include <libavformat/avformat.h>
  10 + #include <libavfilter/avfilter.h>
  11 + #include <libavutil/avutil.h>
  12 + #include <libavutil/pixdesc.h>
  13 + #include <libswscale/swscale.h>
  14 +}
  15 +
  16 +using namespace std;
  17 +
  18 +class FFCuContextManager{
  19 +public:
  20 + static FFCuContextManager* getInstance(){
  21 + static FFCuContextManager* singleton = nullptr;
  22 + if (singleton == nullptr){
  23 + singleton = new FFCuContextManager();
  24 + }
  25 + return singleton;
  26 + }
  27 +
  28 + AVBufferRef *getCuCtx(string gpuid);
  29 +
  30 +private:
  31 + FFCuContextManager(){}
  32 + ~FFCuContextManager();
  33 +
  34 +private:
  35 + map<string,AVBufferRef *> ctxMap;
  36 +
  37 +};
0 \ No newline at end of file 38 \ No newline at end of file
src/FFNvDecoder.cpp
@@ -5,6 +5,8 @@ @@ -5,6 +5,8 @@
5 #include <thread> 5 #include <thread>
6 #include <fstream> 6 #include <fstream>
7 7
  8 +#include "FFCuContextManager.h"
  9 +
8 using namespace std; 10 using namespace std;
9 11
10 // 参考博客: https://blog.csdn.net/qq_40116098/article/details/120704340 12 // 参考博客: https://blog.csdn.net/qq_40116098/article/details/120704340
@@ -67,7 +69,8 @@ bool FFNvDecoder::init(FFDecConfig&amp; cfg) @@ -67,7 +69,8 @@ bool FFNvDecoder::init(FFDecConfig&amp; cfg)
67 69
68 bool FFNvDecoder::init(const char* uri, const char* gpuid, bool force_tcp) 70 bool FFNvDecoder::init(const char* uri, const char* gpuid, bool force_tcp)
69 { 71 {
70 - av_register_all(); 72 + // av_log_set_level(AV_LOG_DEBUG);
  73 +
71 avformat_network_init(); 74 avformat_network_init();
72 75
73 // 打开输入视频文件 76 // 打开输入视频文件
@@ -114,9 +117,17 @@ bool FFNvDecoder::init(const char* uri, const char* gpuid, bool force_tcp) @@ -114,9 +117,17 @@ bool FFNvDecoder::init(const char* uri, const char* gpuid, bool force_tcp)
114 117
115 hw_pix_fmt = AV_PIX_FMT_CUDA; 118 hw_pix_fmt = AV_PIX_FMT_CUDA;
116 119
  120 + FFCuContextManager* pCtxMgr = FFCuContextManager::getInstance();
  121 + avctx->hw_device_ctx = av_buffer_ref(pCtxMgr->getCuCtx(gpuid));
  122 + if (nullptr == avctx->hw_device_ctx)
  123 + {
  124 + return false;
  125 + }
  126 +
117 // 打开解码器流 127 // 打开解码器流
118 AVDictionary *op = nullptr; 128 AVDictionary *op = nullptr;
119 av_dict_set( &op, "gpu", gpuid, 0 ); 129 av_dict_set( &op, "gpu", gpuid, 0 );
  130 + av_dict_set( &op, "surfaces", "3", 0 );
120 if (avcodec_open2(avctx, vcodec, &op) < 0) { 131 if (avcodec_open2(avctx, vcodec, &op) < 0) {
121 cout << "Failed to open codec for stream" << stream_index; 132 cout << "Failed to open codec for stream" << stream_index;
122 return false; 133 return false;
@@ -231,10 +242,6 @@ void FFNvDecoder::decode_finished() @@ -231,10 +242,6 @@ void FFNvDecoder::decode_finished()
231 { 242 {
232 if (avctx) 243 if (avctx)
233 { 244 {
234 - if (avctx->hw_device_ctx)  
235 - {  
236 - av_buffer_unref(&avctx->hw_device_ctx);  
237 - }  
238 avcodec_free_context(&avctx); 245 avcodec_free_context(&avctx);
239 } 246 }
240 247
src/FFNvDecoderManager.cpp
@@ -112,38 +112,52 @@ bool FFNvDecoderManager::closeDecoderByName(string name){ @@ -112,38 +112,52 @@ bool FFNvDecoderManager::closeDecoderByName(string name){
112 return false; 112 return false;
113 } 113 }
114 114
  115 + m_mutex_erase.lock();
115 auto dec = decoderMap.find(name); 116 auto dec = decoderMap.find(name);
116 if (dec != decoderMap.end()) 117 if (dec != decoderMap.end())
117 { 118 {
118 dec->second->close(); 119 dec->second->close();
119 delete dec->second; 120 delete dec->second;
  121 + dec->second = nullptr;
120 decoderMap.erase(dec); 122 decoderMap.erase(dec);
  123 +
  124 + m_mutex_erase.unlock();
121 return true; 125 return true;
122 } 126 }
123 127
124 - 128 + m_mutex_erase.unlock();
125 cout << "没有找到name为" << name << "的解码器!" << endl; 129 cout << "没有找到name为" << name << "的解码器!" << endl;
126 return false; 130 return false;
127 } 131 }
128 132
129 void FFNvDecoderManager::closeAllDecoder() 133 void FFNvDecoderManager::closeAllDecoder()
130 { 134 {
  135 + m_mutex_erase.lock();
131 for(auto iter = decoderMap.begin(); iter != decoderMap.end(); iter++){ 136 for(auto iter = decoderMap.begin(); iter != decoderMap.end(); iter++){
132 iter->second->close(); 137 iter->second->close();
133 delete iter->second; 138 delete iter->second;
  139 + iter->second = nullptr;
134 } 140 }
135 decoderMap.clear(); 141 decoderMap.clear();
  142 + m_mutex_erase.unlock();
136 } 143 }
137 144
138 void FFNvDecoderManager::closeAllFinishedDecoder() 145 void FFNvDecoderManager::closeAllFinishedDecoder()
139 { 146 {
140 - for(auto iter = decoderMap.begin(); iter != decoderMap.end(); iter++){ 147 + m_mutex_erase.lock();
  148 + for(auto iter = decoderMap.begin(); iter != decoderMap.end(); ){
141 if (iter->second->isFinished()) 149 if (iter->second->isFinished())
142 { 150 {
143 delete iter->second; 151 delete iter->second;
144 - decoderMap.erase(iter); 152 + iter->second = nullptr;
  153 + iter = decoderMap.erase(iter);
  154 + }
  155 + else
  156 + {
  157 + iter++ ;
145 } 158 }
146 } 159 }
  160 + m_mutex_erase.unlock();
147 } 161 }
148 162
149 int FFNvDecoderManager::count() 163 int FFNvDecoderManager::count()
src/FFNvDecoderManager.h
@@ -3,6 +3,8 @@ @@ -3,6 +3,8 @@
3 #include<vector> 3 #include<vector>
4 #include<map> 4 #include<map>
5 5
  6 +#include <mutex>
  7 +
6 using namespace std; 8 using namespace std;
7 9
8 struct MgrDecConfig 10 struct MgrDecConfig
@@ -55,4 +57,6 @@ private: @@ -55,4 +57,6 @@ private:
55 57
56 private: 58 private:
57 map<string, FFNvDecoder*> decoderMap; 59 map<string, FFNvDecoder*> decoderMap;
  60 +
  61 + mutex m_mutex_erase;
58 }; 62 };
59 \ No newline at end of file 63 \ No newline at end of file
src/main.cpp
@@ -5,6 +5,11 @@ @@ -5,6 +5,11 @@
5 5
6 #include "NvJpegEncoder.h" 6 #include "NvJpegEncoder.h"
7 7
  8 +#include <pthread.h>
  9 +#include <thread>
  10 +
  11 +#include <chrono>
  12 +
8 unsigned char *pHwRgb = nullptr; 13 unsigned char *pHwRgb = nullptr;
9 14
10 /** 15 /**
@@ -14,15 +19,21 @@ void postDecoded(const void * userPtr, AVFrame * gpuFrame){ @@ -14,15 +19,21 @@ void postDecoded(const void * userPtr, AVFrame * gpuFrame){
14 FFNvDecoder* decoder = (FFNvDecoder*)userPtr; 19 FFNvDecoder* decoder = (FFNvDecoder*)userPtr;
15 if (decoder!= nullptr) 20 if (decoder!= nullptr)
16 { 21 {
17 - cout << "decode name: " << decoder->getName() << endl; 22 + // cout << "decode name: " << decoder->getName() << endl;
  23 +
  24 + if (decoder->getName() == "dec1")
  25 + {
  26 + /* code */
  27 + }
  28 +
18 // const char* gpu_pixfmt = av_get_pix_fmt_name((AVPixelFormat)gpuFrame->format); 29 // const char* gpu_pixfmt = av_get_pix_fmt_name((AVPixelFormat)gpuFrame->format);
19 // cout << "pixfmt: " << gpu_pixfmt << endl; 30 // cout << "pixfmt: " << gpu_pixfmt << endl;
20 - cout << "keyframe: " << gpuFrame->key_frame << " width: " << gpuFrame->width << " height: "<< gpuFrame->height << endl; 31 + // cout << "keyframe: " << gpuFrame->key_frame << " width: " << gpuFrame->width << " height: "<< gpuFrame->height << endl;
21 // cout << "decode successed ✿✿ヽ(°▽°)ノ✿ " << endl; 32 // cout << "decode successed ✿✿ヽ(°▽°)ノ✿ " << endl;
22 33
23 if (gpuFrame->format == AV_PIX_FMT_CUDA) 34 if (gpuFrame->format == AV_PIX_FMT_CUDA)
24 { 35 {
25 - cout << "gpuid = " << atoi(decoder->m_cfg.gpuid.c_str()) << endl; 36 + // cout << "gpuid = " << atoi(decoder->m_cfg.gpuid.c_str()) << endl;
26 // cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str())); 37 // cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str()));
27 // cudaError_t cudaStatus; 38 // cudaError_t cudaStatus;
28 // if(pHwRgb == nullptr){ 39 // if(pHwRgb == nullptr){
@@ -42,14 +53,75 @@ void postDecoded(const void * userPtr, AVFrame * gpuFrame){ @@ -42,14 +53,75 @@ void postDecoded(const void * userPtr, AVFrame * gpuFrame){
42 } 53 }
43 } 54 }
44 55
  56 +long start_time = 0;
  57 +long end_time = 0;
  58 +bool count_flag = false;
  59 +int count = 0;
  60 +int count_std = 100;
  61 +
  62 +long long get_cur_time(){
  63 + // 获取操作系统当前时间点(精确到微秒)
  64 + chrono::time_point<chrono::system_clock, chrono::microseconds> tpMicro
  65 + = chrono::time_point_cast<chrono::microseconds>(chrono::system_clock::now());
  66 + // (微秒精度的)时间点 => (微秒精度的)时间戳
  67 + time_t totalMicroSeconds = tpMicro.time_since_epoch().count();
  68 +
  69 + long long currentTime = ((long long)totalMicroSeconds)/1000;
  70 +
  71 + return currentTime;
  72 +}
  73 +
  74 +int sum = 0;
  75 +void postDecoded0(const void * userPtr, AVFrame * gpuFrame){
  76 + FFNvDecoder* decoder = (FFNvDecoder*)userPtr;
  77 + if (decoder!= nullptr)
  78 + {
  79 + // cout << "decode name: " << decoder->getName() << endl;
  80 + if (decoder->getName() == "dec")
  81 + {
  82 + if (! count_flag)
  83 + {
  84 + count_flag = true;
  85 + count = 0;
  86 + end_time = start_time = get_cur_time();
  87 + }
  88 + count++;
  89 + sum ++ ;
  90 + if (count >= count_std)
  91 + {
  92 + end_time = get_cur_time();
  93 + long time_using = end_time - start_time;
  94 + double time_per_frame = double(time_using)/count_std ;
  95 + cout << count_std << "帧用时:" << time_using << "ms 每帧用时:" << time_per_frame << "ms" << endl;
  96 + cout << "keyframe: " << gpuFrame->key_frame << " width: " << gpuFrame->width << " height: "<< gpuFrame->height << endl;
  97 + cout << gpuFrame->pts << endl;
  98 +
  99 + count_flag = false;
  100 + }
  101 + }
  102 + }
  103 +}
  104 +
  105 +// string test_uri = "rtmp://192.168.10.56:1935/objecteye/1";
  106 +string test_uri = "/home/cmhu/data/test.mp4";
  107 +
45 void createDecode(int index){ 108 void createDecode(int index){
46 FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); 109 FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
47 MgrDecConfig config; 110 MgrDecConfig config;
48 config.name = "dec" + to_string(index); 111 config.name = "dec" + to_string(index);
49 - config.cfg.uri = "rtsp://176.10.0.4:8554/stream"; 112 + config.cfg.uri = test_uri;
50 config.cfg.post_decoded_cbk = postDecoded; 113 config.cfg.post_decoded_cbk = postDecoded;
51 config.cfg.force_tcp = true; 114 config.cfg.force_tcp = true;
52 - config.cfg.gpuid = "1"; 115 +
  116 + if (index % 2 == 0)
  117 + {
  118 + config.cfg.gpuid = "2";
  119 + }
  120 + else
  121 + {
  122 + config.cfg.gpuid = "1";
  123 + }
  124 +
53 FFNvDecoder* decoder = pDecManager->createDecoder(config); 125 FFNvDecoder* decoder = pDecManager->createDecoder(config);
54 if (!decoder) 126 if (!decoder)
55 { 127 {
@@ -59,21 +131,57 @@ void createDecode(int index){ @@ -59,21 +131,57 @@ void createDecode(int index){
59 pDecManager->startDecodeByName(config.name); 131 pDecManager->startDecodeByName(config.name);
60 } 132 }
61 133
  134 +#define checkCudaErrors(S) do {CUresult status; \
  135 + status = S; \
  136 + if (status != CUDA_SUCCESS ) std::cout << __LINE__ <<" checkCudaErrors - status = " << status << std::endl; \
  137 + } while (false)
  138 +
  139 +int CheckCUDAProperty( int devId )
  140 +{
  141 + cuInit(0);
  142 +
  143 + CUdevice dev = devId;
  144 + size_t memSize = 0;
  145 + char devName[256] = {0};
  146 + int major = 0, minor = 0;
  147 + CUresult rlt = CUDA_SUCCESS;
  148 +
  149 + rlt = cuDeviceComputeCapability( &major, &minor, dev );
  150 + checkCudaErrors( rlt );
  151 +
  152 + rlt = cuDeviceGetName( devName, sizeof( devName ), dev );
  153 + checkCudaErrors( rlt );
  154 +
  155 + printf( "Using GPU Device %d: %s has SM %d.%d compute capability\n",
  156 + dev, devName, major, minor );
  157 +
  158 + rlt = cuDeviceTotalMem( &memSize, dev );
  159 + checkCudaErrors( rlt );
  160 +
  161 + printf( "Total amount of global memory: %4.4f MB\n",
  162 + (float)memSize / ( 1024 * 1024 ) );
  163 +
  164 + return 0;
  165 +}
  166 +
62 int main(){ 167 int main(){
63 168
  169 + CheckCUDAProperty(1);
  170 +
64 FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); 171 FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
65 172
66 - // for (size_t i = 0; i < 20; i++)  
67 - // {  
68 - // createDecode(i);  
69 - // } 173 + int count = 105;
  174 + for (size_t i = 0; i < count ; i++)
  175 + {
  176 + createDecode(i);
  177 + }
70 178
71 MgrDecConfig config; 179 MgrDecConfig config;
72 - config.name = "dec2";  
73 - config.cfg.uri = "/home/cmhu/data/test.mp4";  
74 - config.cfg.post_decoded_cbk = postDecoded; 180 + config.name = "dec";
  181 + config.cfg.uri = test_uri;
  182 + config.cfg.post_decoded_cbk = postDecoded0;
75 config.cfg.force_tcp = true; 183 config.cfg.force_tcp = true;
76 - config.cfg.gpuid = "2"; 184 + config.cfg.gpuid = "1";
77 FFNvDecoder* dec2 = pDecManager->createDecoder(config); 185 FFNvDecoder* dec2 = pDecManager->createDecoder(config);
78 if (!dec2) 186 if (!dec2)
79 { 187 {
@@ -82,6 +190,26 @@ int main(){ @@ -82,6 +190,26 @@ int main(){
82 pDecManager->setUserPtr(config.name, dec2); 190 pDecManager->setUserPtr(config.name, dec2);
83 pDecManager->startDecodeByName(config.name); 191 pDecManager->startDecodeByName(config.name);
84 192
  193 + pthread_t m_decode_thread;
  194 + pthread_create(&m_decode_thread,0,
  195 + [](void* arg)
  196 + {
  197 + while (true)
  198 + {
  199 + std::this_thread::sleep_for(std::chrono::milliseconds(5000));
  200 + FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
  201 + int count = pDecManager->count();
  202 + cout << "当前运行路数: " << pDecManager->count() << endl;
  203 + if (count <= 0)
  204 + {
  205 + break;
  206 + }
  207 + }
  208 +
  209 + return (void*)0;
  210 + }
  211 + ,nullptr);
  212 +
85 213
86 // config.name = "dec0"; 214 // config.name = "dec0";
87 // config.cfg.uri = "rtmp://192.168.10.56:1935/objecteye/1"; 215 // config.cfg.uri = "rtmp://192.168.10.56:1935/objecteye/1";
@@ -116,6 +244,8 @@ int main(){ @@ -116,6 +244,8 @@ int main(){
116 // // pDecManager->resumeDecoder("dec1"); 244 // // pDecManager->resumeDecoder("dec1");
117 // pDecManager->resumeDecoder("dec2"); 245 // pDecManager->resumeDecoder("dec2");
118 246
  247 + cout << "总共帧数:" << sum << endl;
  248 +
119 while (getchar() != 'q'); 249 while (getchar() != 'q');
120 250
121 pDecManager->closeAllDecoder(); 251 pDecManager->closeAllDecoder();