Commit f40cc409e0cd8c22099105f9d6358e59db43ab26

Authored by Hu Chunming
1 parent 48330793

优化显存占用。当前在3080显卡上支持106路解码

ffmpeg-4.2.2/libavcodec/cuviddec.c 100644 → 100755
... ... @@ -1012,7 +1012,7 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
1012 1012 }
1013 1013  
1014 1014 ctx->cuparseinfo.ulMaxNumDecodeSurfaces = ctx->nb_surfaces;
1015   - ctx->cuparseinfo.ulMaxDisplayDelay = 4;
  1015 + ctx->cuparseinfo.ulMaxDisplayDelay = 2;
1016 1016 ctx->cuparseinfo.pUserData = avctx;
1017 1017 ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence;
1018 1018 ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode;
... ...
src/FFCuContextManager.cpp 0 → 100644
  1 +#include "FFCuContextManager.h"
  2 +#include <iostream>
  3 +
  4 +using namespace std;
  5 +
  6 +FFCuContextManager::~FFCuContextManager()
  7 +{
  8 + for(auto iter = ctxMap.begin(); iter != ctxMap.end(); iter++){
  9 + av_buffer_unref(&iter->second);
  10 + }
  11 + ctxMap.clear();
  12 +}
  13 +
  14 +AVBufferRef *FFCuContextManager::getCuCtx(string gpuid)
  15 +{
  16 + AVBufferRef *hw_device_ctx = ctxMap[gpuid];
  17 + if (nullptr == hw_device_ctx)
  18 + {
  19 + // 初始化硬件解码器
  20 + if (av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_CUDA, gpuid.c_str(), nullptr, 0) < 0)
  21 + {
  22 + cout << "Failed to create specified HW device.";
  23 + return nullptr;
  24 + }
  25 + ctxMap[gpuid] = hw_device_ctx;
  26 + }
  27 + return hw_device_ctx;
  28 +}
0 29 \ No newline at end of file
... ...
src/FFCuContextManager.h 0 → 100644
  1 +
  2 +#include<map>
  3 +#include<string>
  4 +
  5 +extern "C"
  6 +{
  7 + #include <libavcodec/avcodec.h>
  8 + #include <libavdevice/avdevice.h>
  9 + #include <libavformat/avformat.h>
  10 + #include <libavfilter/avfilter.h>
  11 + #include <libavutil/avutil.h>
  12 + #include <libavutil/pixdesc.h>
  13 + #include <libswscale/swscale.h>
  14 +}
  15 +
  16 +using namespace std;
  17 +
  18 +class FFCuContextManager{
  19 +public:
  20 + static FFCuContextManager* getInstance(){
  21 + static FFCuContextManager* singleton = nullptr;
  22 + if (singleton == nullptr){
  23 + singleton = new FFCuContextManager();
  24 + }
  25 + return singleton;
  26 + }
  27 +
  28 + AVBufferRef *getCuCtx(string gpuid);
  29 +
  30 +private:
  31 + FFCuContextManager(){}
  32 + ~FFCuContextManager();
  33 +
  34 +private:
  35 + map<string,AVBufferRef *> ctxMap;
  36 +
  37 +};
0 38 \ No newline at end of file
... ...
src/FFNvDecoder.cpp
... ... @@ -5,6 +5,8 @@
5 5 #include <thread>
6 6 #include <fstream>
7 7  
  8 +#include "FFCuContextManager.h"
  9 +
8 10 using namespace std;
9 11  
10 12 // 参考博客: https://blog.csdn.net/qq_40116098/article/details/120704340
... ... @@ -67,7 +69,8 @@ bool FFNvDecoder::init(FFDecConfig&amp; cfg)
67 69  
68 70 bool FFNvDecoder::init(const char* uri, const char* gpuid, bool force_tcp)
69 71 {
70   - av_register_all();
  72 + // av_log_set_level(AV_LOG_DEBUG);
  73 +
71 74 avformat_network_init();
72 75  
73 76 // 打开输入视频文件
... ... @@ -114,9 +117,17 @@ bool FFNvDecoder::init(const char* uri, const char* gpuid, bool force_tcp)
114 117  
115 118 hw_pix_fmt = AV_PIX_FMT_CUDA;
116 119  
  120 + FFCuContextManager* pCtxMgr = FFCuContextManager::getInstance();
  121 + avctx->hw_device_ctx = av_buffer_ref(pCtxMgr->getCuCtx(gpuid));
  122 + if (nullptr == avctx->hw_device_ctx)
  123 + {
  124 + return false;
  125 + }
  126 +
117 127 // 打开解码器流
118 128 AVDictionary *op = nullptr;
119 129 av_dict_set( &op, "gpu", gpuid, 0 );
  130 + av_dict_set( &op, "surfaces", "3", 0 );
120 131 if (avcodec_open2(avctx, vcodec, &op) < 0) {
121 132 cout << "Failed to open codec for stream" << stream_index;
122 133 return false;
... ... @@ -231,10 +242,6 @@ void FFNvDecoder::decode_finished()
231 242 {
232 243 if (avctx)
233 244 {
234   - if (avctx->hw_device_ctx)
235   - {
236   - av_buffer_unref(&avctx->hw_device_ctx);
237   - }
238 245 avcodec_free_context(&avctx);
239 246 }
240 247  
... ...
src/FFNvDecoderManager.cpp
... ... @@ -112,38 +112,52 @@ bool FFNvDecoderManager::closeDecoderByName(string name){
112 112 return false;
113 113 }
114 114  
  115 + m_mutex_erase.lock();
115 116 auto dec = decoderMap.find(name);
116 117 if (dec != decoderMap.end())
117 118 {
118 119 dec->second->close();
119 120 delete dec->second;
  121 + dec->second = nullptr;
120 122 decoderMap.erase(dec);
  123 +
  124 + m_mutex_erase.unlock();
121 125 return true;
122 126 }
123 127  
124   -
  128 + m_mutex_erase.unlock();
125 129 cout << "没有找到name为" << name << "的解码器!" << endl;
126 130 return false;
127 131 }
128 132  
129 133 void FFNvDecoderManager::closeAllDecoder()
130 134 {
  135 + m_mutex_erase.lock();
131 136 for(auto iter = decoderMap.begin(); iter != decoderMap.end(); iter++){
132 137 iter->second->close();
133 138 delete iter->second;
  139 + iter->second = nullptr;
134 140 }
135 141 decoderMap.clear();
  142 + m_mutex_erase.unlock();
136 143 }
137 144  
138 145 void FFNvDecoderManager::closeAllFinishedDecoder()
139 146 {
140   - for(auto iter = decoderMap.begin(); iter != decoderMap.end(); iter++){
  147 + m_mutex_erase.lock();
  148 + for(auto iter = decoderMap.begin(); iter != decoderMap.end(); ){
141 149 if (iter->second->isFinished())
142 150 {
143 151 delete iter->second;
144   - decoderMap.erase(iter);
  152 + iter->second = nullptr;
  153 + iter = decoderMap.erase(iter);
  154 + }
  155 + else
  156 + {
  157 + iter++ ;
145 158 }
146 159 }
  160 + m_mutex_erase.unlock();
147 161 }
148 162  
149 163 int FFNvDecoderManager::count()
... ...
src/FFNvDecoderManager.h
... ... @@ -3,6 +3,8 @@
3 3 #include<vector>
4 4 #include<map>
5 5  
  6 +#include <mutex>
  7 +
6 8 using namespace std;
7 9  
8 10 struct MgrDecConfig
... ... @@ -55,4 +57,6 @@ private:
55 57  
56 58 private:
57 59 map<string, FFNvDecoder*> decoderMap;
  60 +
  61 + mutex m_mutex_erase;
58 62 };
59 63 \ No newline at end of file
... ...
src/main.cpp
... ... @@ -5,6 +5,11 @@
5 5  
6 6 #include "NvJpegEncoder.h"
7 7  
  8 +#include <pthread.h>
  9 +#include <thread>
  10 +
  11 +#include <chrono>
  12 +
8 13 unsigned char *pHwRgb = nullptr;
9 14  
10 15 /**
... ... @@ -14,15 +19,21 @@ void postDecoded(const void * userPtr, AVFrame * gpuFrame){
14 19 FFNvDecoder* decoder = (FFNvDecoder*)userPtr;
15 20 if (decoder!= nullptr)
16 21 {
17   - cout << "decode name: " << decoder->getName() << endl;
  22 + // cout << "decode name: " << decoder->getName() << endl;
  23 +
  24 + if (decoder->getName() == "dec1")
  25 + {
  26 + /* code */
  27 + }
  28 +
18 29 // const char* gpu_pixfmt = av_get_pix_fmt_name((AVPixelFormat)gpuFrame->format);
19 30 // cout << "pixfmt: " << gpu_pixfmt << endl;
20   - cout << "keyframe: " << gpuFrame->key_frame << " width: " << gpuFrame->width << " height: "<< gpuFrame->height << endl;
  31 + // cout << "keyframe: " << gpuFrame->key_frame << " width: " << gpuFrame->width << " height: "<< gpuFrame->height << endl;
21 32 // cout << "decode successed ✿✿ヽ(°▽°)ノ✿ " << endl;
22 33  
23 34 if (gpuFrame->format == AV_PIX_FMT_CUDA)
24 35 {
25   - cout << "gpuid = " << atoi(decoder->m_cfg.gpuid.c_str()) << endl;
  36 + // cout << "gpuid = " << atoi(decoder->m_cfg.gpuid.c_str()) << endl;
26 37 // cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str()));
27 38 // cudaError_t cudaStatus;
28 39 // if(pHwRgb == nullptr){
... ... @@ -42,14 +53,75 @@ void postDecoded(const void * userPtr, AVFrame * gpuFrame){
42 53 }
43 54 }
44 55  
  56 +long start_time = 0;
  57 +long end_time = 0;
  58 +bool count_flag = false;
  59 +int count = 0;
  60 +int count_std = 100;
  61 +
  62 +long long get_cur_time(){
  63 + // 获取操作系统当前时间点(精确到微秒)
  64 + chrono::time_point<chrono::system_clock, chrono::microseconds> tpMicro
  65 + = chrono::time_point_cast<chrono::microseconds>(chrono::system_clock::now());
  66 + // (微秒精度的)时间点 => (微秒精度的)时间戳
  67 + time_t totalMicroSeconds = tpMicro.time_since_epoch().count();
  68 +
  69 + long long currentTime = ((long long)totalMicroSeconds)/1000;
  70 +
  71 + return currentTime;
  72 +}
  73 +
  74 +int sum = 0;
  75 +void postDecoded0(const void * userPtr, AVFrame * gpuFrame){
  76 + FFNvDecoder* decoder = (FFNvDecoder*)userPtr;
  77 + if (decoder!= nullptr)
  78 + {
  79 + // cout << "decode name: " << decoder->getName() << endl;
  80 + if (decoder->getName() == "dec")
  81 + {
  82 + if (! count_flag)
  83 + {
  84 + count_flag = true;
  85 + count = 0;
  86 + end_time = start_time = get_cur_time();
  87 + }
  88 + count++;
  89 + sum ++ ;
  90 + if (count >= count_std)
  91 + {
  92 + end_time = get_cur_time();
  93 + long time_using = end_time - start_time;
  94 + double time_per_frame = double(time_using)/count_std ;
  95 + cout << count_std << "帧用时:" << time_using << "ms 每帧用时:" << time_per_frame << "ms" << endl;
  96 + cout << "keyframe: " << gpuFrame->key_frame << " width: " << gpuFrame->width << " height: "<< gpuFrame->height << endl;
  97 + cout << gpuFrame->pts << endl;
  98 +
  99 + count_flag = false;
  100 + }
  101 + }
  102 + }
  103 +}
  104 +
  105 +// string test_uri = "rtmp://192.168.10.56:1935/objecteye/1";
  106 +string test_uri = "/home/cmhu/data/test.mp4";
  107 +
45 108 void createDecode(int index){
46 109 FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
47 110 MgrDecConfig config;
48 111 config.name = "dec" + to_string(index);
49   - config.cfg.uri = "rtsp://176.10.0.4:8554/stream";
  112 + config.cfg.uri = test_uri;
50 113 config.cfg.post_decoded_cbk = postDecoded;
51 114 config.cfg.force_tcp = true;
52   - config.cfg.gpuid = "1";
  115 +
  116 + if (index % 2 == 0)
  117 + {
  118 + config.cfg.gpuid = "2";
  119 + }
  120 + else
  121 + {
  122 + config.cfg.gpuid = "1";
  123 + }
  124 +
53 125 FFNvDecoder* decoder = pDecManager->createDecoder(config);
54 126 if (!decoder)
55 127 {
... ... @@ -59,21 +131,57 @@ void createDecode(int index){
59 131 pDecManager->startDecodeByName(config.name);
60 132 }
61 133  
  134 +#define checkCudaErrors(S) do {CUresult status; \
  135 + status = S; \
  136 + if (status != CUDA_SUCCESS ) std::cout << __LINE__ <<" checkCudaErrors - status = " << status << std::endl; \
  137 + } while (false)
  138 +
  139 +int CheckCUDAProperty( int devId )
  140 +{
  141 + cuInit(0);
  142 +
  143 + CUdevice dev = devId;
  144 + size_t memSize = 0;
  145 + char devName[256] = {0};
  146 + int major = 0, minor = 0;
  147 + CUresult rlt = CUDA_SUCCESS;
  148 +
  149 + rlt = cuDeviceComputeCapability( &major, &minor, dev );
  150 + checkCudaErrors( rlt );
  151 +
  152 + rlt = cuDeviceGetName( devName, sizeof( devName ), dev );
  153 + checkCudaErrors( rlt );
  154 +
  155 + printf( "Using GPU Device %d: %s has SM %d.%d compute capability\n",
  156 + dev, devName, major, minor );
  157 +
  158 + rlt = cuDeviceTotalMem( &memSize, dev );
  159 + checkCudaErrors( rlt );
  160 +
  161 + printf( "Total amount of global memory: %4.4f MB\n",
  162 + (float)memSize / ( 1024 * 1024 ) );
  163 +
  164 + return 0;
  165 +}
  166 +
62 167 int main(){
63 168  
  169 + CheckCUDAProperty(1);
  170 +
64 171 FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
65 172  
66   - // for (size_t i = 0; i < 20; i++)
67   - // {
68   - // createDecode(i);
69   - // }
  173 + int count = 105;
  174 + for (size_t i = 0; i < count ; i++)
  175 + {
  176 + createDecode(i);
  177 + }
70 178  
71 179 MgrDecConfig config;
72   - config.name = "dec2";
73   - config.cfg.uri = "/home/cmhu/data/test.mp4";
74   - config.cfg.post_decoded_cbk = postDecoded;
  180 + config.name = "dec";
  181 + config.cfg.uri = test_uri;
  182 + config.cfg.post_decoded_cbk = postDecoded0;
75 183 config.cfg.force_tcp = true;
76   - config.cfg.gpuid = "2";
  184 + config.cfg.gpuid = "1";
77 185 FFNvDecoder* dec2 = pDecManager->createDecoder(config);
78 186 if (!dec2)
79 187 {
... ... @@ -82,6 +190,26 @@ int main(){
82 190 pDecManager->setUserPtr(config.name, dec2);
83 191 pDecManager->startDecodeByName(config.name);
84 192  
  193 + pthread_t m_decode_thread;
  194 + pthread_create(&m_decode_thread,0,
  195 + [](void* arg)
  196 + {
  197 + while (true)
  198 + {
  199 + std::this_thread::sleep_for(std::chrono::milliseconds(5000));
  200 + FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
  201 + int count = pDecManager->count();
  202 + cout << "当前运行路数: " << pDecManager->count() << endl;
  203 + if (count <= 0)
  204 + {
  205 + break;
  206 + }
  207 + }
  208 +
  209 + return (void*)0;
  210 + }
  211 + ,nullptr);
  212 +
85 213  
86 214 // config.name = "dec0";
87 215 // config.cfg.uri = "rtmp://192.168.10.56:1935/objecteye/1";
... ... @@ -116,6 +244,8 @@ int main(){
116 244 // // pDecManager->resumeDecoder("dec1");
117 245 // pDecManager->resumeDecoder("dec2");
118 246  
  247 + cout << "总共帧数:" << sum << endl;
  248 +
119 249 while (getchar() != 'q');
120 250  
121 251 pDecManager->closeAllDecoder();
... ...