Commit 92989af0db1827cabea63ec2a20ff37eb9ac047d

Authored by ming
1 parent 372e629f

更新解码器

.gitignore
@@ -2,3 +2,5 @@ ffmpeg-4.2.2/ @@ -2,3 +2,5 @@ ffmpeg-4.2.2/
2 .vscode/ 2 .vscode/
3 bin/ 3 bin/
4 .idea/ 4 .idea/
  5 +
  6 +3rdparty/
5 \ No newline at end of file 7 \ No newline at end of file
.vscode/launch.json
@@ -6,7 +6,7 @@ @@ -6,7 +6,7 @@
6 "type": "cppdbg", 6 "type": "cppdbg",
7 "request": "launch", 7 "request": "launch",
8 "program": "${workspaceFolder}/bin/lib/test", 8 "program": "${workspaceFolder}/bin/lib/test",
9 - "args": ["rtsp://122.97.218.170:8604/openUrl/V5nXRHa?params=eyJwcm90b2NhbCI6InJ0c3AiLCJjbGllbnRUeXBlIjoib3Blbl9hcGkiLCJleHByaWVUaW1lIjotMSwicHJvdG9jb2wiOiJydHNwIiwiZXhwaXJlVGltZSI6MzAwLCJlbmFibGVNR0MiOnRydWUsImV4cGFuZCI6InN0YW5kYXJkPXJ0c3Amc3RyZWFtZm9ybT1ydHAiLCJhIjoiMTBjZjM4N2JjY2Y5NDg3YzhjNWYzNjE2M2ViMWUyNTJ8MXwwfDEiLCJ0IjoxfQ==","0"], 9 + "args": ["rtsp","3", "30012"],
10 "stopAtEntry": false, 10 "stopAtEntry": false,
11 "cwd": "${workspaceFolder}/bin/lib", 11 "cwd": "${workspaceFolder}/bin/lib",
12 "environment": [], 12 "environment": [],
README.md
@@ -4,7 +4,7 @@ @@ -4,7 +4,7 @@
4 支持 cuvid 需要安装 nv-codec-headers, 进入 nv-codec-headers 文件夹后以sudo权限make && make install即可 4 支持 cuvid 需要安装 nv-codec-headers, 进入 nv-codec-headers 文件夹后以sudo权限make && make install即可
5 3. 编译ffmpeg 5 3. 编译ffmpeg
6 ~~~ 6 ~~~
7 -./configure --enable-debug --extra-cflags=-g --extra-ldflags=-g --disable-optimizations --disable-stripping --enable-cuda --enable-cuvid --enable-nvenc --disable-x86asm --enable-nonfree --enable-libnpp --extra-cflags=-I/usr/local/cuda-11.7/targets/x86_64-linux/include --extra-cflags=-fPIC --extra-ldflags=-L/usr/local/cuda-11.7/targets/x86_64-linux/lib --enable-shared --enable-pic --enable-ffplay --prefix=../bin 7 +./configure --enable-debug --extra-cflags=-g --extra-ldflags=-g --disable-optimizations --disable-stripping --enable-cuda --enable-cuvid --enable-nvenc --disable-x86asm --enable-nonfree --enable-libnpp --disable-vaapi --extra-cflags=-I/usr/local/cuda-11.7/targets/x86_64-linux/include --extra-cflags=-fPIC --extra-ldflags=-L/usr/local/cuda-11.7/targets/x86_64-linux/lib --enable-shared --enable-pic --enable-ffplay --prefix=../bin
8 ~~~ 8 ~~~
9 其中以下是用于调试的,编译release可以去掉: 9 其中以下是用于调试的,编译release可以去掉:
10 ~~~ 10 ~~~
src/AbstractDecoder.cpp 0 → 100644
  1 +#include "AbstractDecoder.h"
  2 +
  3 +#include "logger.hpp"
  4 +#include "GpuRgbMemory.hpp"
  5 +#include "cuda_kernels.h"
  6 +
  7 +#include "utiltools.hpp"
  8 +
  9 +
  10 +FFImgInfo* AbstractDecoder::snapshot(){
  11 +
  12 + // 锁住停止队列消耗
  13 + std::lock_guard<std::mutex> l(m_snapshot_mutex);
  14 +
  15 + AVFrame * gpuFrame = nullptr;
  16 +
  17 + bool bFirst = true;
  18 + while(true){
  19 + m_queue_mutex.lock();
  20 + if(mFrameQueue.size() <= 0){
  21 + m_queue_mutex.unlock();
  22 + if(bFirst){
  23 + std::this_thread::sleep_for(std::chrono::milliseconds(100));
  24 + bFirst = false;
  25 + continue;
  26 + }else{
  27 + // 再进来说明前面已经等了 100 ms
  28 + // 100 ms都没有等到解码数据,则退出
  29 + return nullptr;
  30 + }
  31 + }
  32 +
  33 + // 队列中数据大于1
  34 + gpuFrame = mFrameQueue.front();
  35 + m_queue_mutex.unlock();
  36 + break;
  37 + }
  38 +
  39 + if (gpuFrame != nullptr && gpuFrame->format == AV_PIX_FMT_CUDA ){
  40 + LOG_DEBUG("decode task: gpuid: {} width: {} height: {}", m_cfg.gpuid, gpuFrame->width, gpuFrame->height);
  41 + GpuRgbMemory* gpuMem = new GpuRgbMemory(3, gpuFrame->width, gpuFrame->height, getName(), m_cfg.gpuid , true);
  42 +
  43 + if (gpuMem->getMem() == nullptr){
  44 + LOG_ERROR("new GpuRgbMemory failed !!!");
  45 + return nullptr;
  46 + }
  47 +
  48 + cudaSetDevice(atoi(m_cfg.gpuid.c_str()));
  49 + cuda_common::setColorSpace( ITU_709, 0 );
  50 + cudaError_t cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], gpuMem->getMem(), gpuFrame->width, gpuFrame->height);
  51 + cudaDeviceSynchronize();
  52 + if (cudaStatus != cudaSuccess) {
  53 + LOG_ERROR("CUDAToBGR failed failed !!!");
  54 + return nullptr;
  55 + }
  56 +
  57 + unsigned char * pHwRgb = gpuMem->getMem();
  58 + int channel = gpuMem->getChannel();
  59 + int width = gpuMem->getWidth();
  60 + int height = gpuMem->getHeight();
  61 +
  62 + if (pHwRgb != nullptr && channel > 0 && width > 0 && height > 0){
  63 + int nSize = channel * height * width;
  64 +
  65 + LOG_INFO("channel:{} height:{} width:{}", channel, height, width);
  66 + // unsigned char* cpu_data = new unsigned char[nSize];
  67 +
  68 + unsigned char* cpu_data = (unsigned char *)av_malloc(nSize * sizeof(unsigned char));
  69 +
  70 + cudaMemcpy(cpu_data, pHwRgb, nSize * sizeof(unsigned char), cudaMemcpyDeviceToHost);
  71 + cudaDeviceSynchronize();
  72 +
  73 + delete gpuMem;
  74 + gpuMem = nullptr;
  75 +
  76 + FFImgInfo* imgInfo = new FFImgInfo();
  77 + imgInfo->dec_name = m_dec_name;
  78 + imgInfo->pData = cpu_data;
  79 + imgInfo->height = height;
  80 + imgInfo->width = width;
  81 + imgInfo->timestamp = UtilTools::get_cur_time_ms();
  82 + imgInfo->index = m_index;
  83 +
  84 + m_index++;
  85 +
  86 + return imgInfo;
  87 + }
  88 +
  89 + delete gpuMem;
  90 + gpuMem = nullptr;
  91 + }
  92 +
  93 + return nullptr;
  94 +}
  95 +
  96 +bool AbstractDecoder::isSnapTime(){
  97 + if(m_snap_time_interval <= 0){
  98 + return false;
  99 + }
  100 + long cur_time = UtilTools::get_cur_time_ms();
  101 + if(cur_time - m_last_snap_time > m_snap_time_interval){
  102 + return true;
  103 + }
  104 + return false;
  105 +}
  106 +
  107 +void AbstractDecoder::updateLastSnapTime(){
  108 + m_last_snap_time = UtilTools::get_cur_time_ms();
  109 +}
  110 +
  111 +void AbstractDecoder::setSnapTimeInterval(long interval){
  112 + m_snap_time_interval = interval;
  113 + m_last_snap_time = UtilTools::get_cur_time_ms();
  114 +}
0 \ No newline at end of file 115 \ No newline at end of file
src/AbstractDecoder.h
@@ -15,6 +15,9 @@ extern &quot;C&quot; @@ -15,6 +15,9 @@ extern &quot;C&quot;
15 #include <libavutil/imgutils.h> 15 #include <libavutil/imgutils.h>
16 } 16 }
17 17
  18 +#include <queue>
  19 +#include <mutex>
  20 +
18 using namespace std; 21 using namespace std;
19 22
20 /************************************************** 23 /**************************************************
@@ -32,7 +35,7 @@ typedef void(*POST_DECODE_CALLBACK)(const void * userPtr, AVFrame * gpuFrame); @@ -32,7 +35,7 @@ typedef void(*POST_DECODE_CALLBACK)(const void * userPtr, AVFrame * gpuFrame);
32 35
33 typedef void(*DECODE_FINISHED_CALLBACK)(const void* userPtr); 36 typedef void(*DECODE_FINISHED_CALLBACK)(const void* userPtr);
34 37
35 -typedef bool(*DECODE_REQUEST_STREAM_CALLBACK)(); 38 +typedef bool(*DECODE_REQUEST_STREAM_CALLBACK)(const char* deviceId);
36 39
37 struct FFDecConfig{ 40 struct FFDecConfig{
38 string uri; // 视频地址 41 string uri; // 视频地址
@@ -51,6 +54,15 @@ enum DECODER_TYPE{ @@ -51,6 +54,15 @@ enum DECODER_TYPE{
51 DECODER_TYPE_FFMPEG 54 DECODER_TYPE_FFMPEG
52 }; 55 };
53 56
  57 +struct FFImgInfo{
  58 + string dec_name;
  59 + int width;
  60 + int height;
  61 + unsigned char * pData;
  62 + long timestamp;
  63 + long index;
  64 +};
  65 +
54 class AbstractDecoder { 66 class AbstractDecoder {
55 public: 67 public:
56 virtual ~AbstractDecoder(){}; 68 virtual ~AbstractDecoder(){};
@@ -83,6 +95,14 @@ public: @@ -83,6 +95,14 @@ public:
83 return m_dec_name; 95 return m_dec_name;
84 } 96 }
85 97
  98 + FFImgInfo* snapshot();
  99 +
  100 + bool isSnapTime();
  101 +
  102 + void updateLastSnapTime();
  103 +
  104 + void setSnapTimeInterval(long interval);
  105 +
86 public: 106 public:
87 const void * m_postDecArg; 107 const void * m_postDecArg;
88 POST_DECODE_CALLBACK post_decoded_cbk; 108 POST_DECODE_CALLBACK post_decoded_cbk;
@@ -95,6 +115,14 @@ public: @@ -95,6 +115,14 @@ public:
95 bool m_dec_keyframe; 115 bool m_dec_keyframe;
96 116
97 FFDecConfig m_cfg; 117 FFDecConfig m_cfg;
  118 +
  119 + queue<AVFrame*> mFrameQueue;
  120 + mutex m_queue_mutex;
  121 + mutex m_snapshot_mutex;
  122 +
  123 + long m_snap_time_interval{-1};
  124 + long m_last_snap_time;
  125 + long m_index{0};
98 }; 126 };
99 127
100 #endif // _ABSTRACT_DECODER_H_ 128 #endif // _ABSTRACT_DECODER_H_
101 \ No newline at end of file 129 \ No newline at end of file
src/DrawImageOnGPU.cu 0 → 100644
  1 +#include "cuda_kernels.h"
  2 +
  3 +#include "logger.hpp"
  4 +
  5 +typedef unsigned char uchar;
  6 +typedef unsigned int uint32;
  7 +typedef int int32;
  8 +
  9 +namespace cuda_common
  10 +{
  11 + __global__ void kernel_drawPixel(float* d_srcRGB, int src_width, int src_height,
  12 + int left, int top, int right, int bottom)
  13 + {
  14 + const int x = blockIdx.x * blockDim.x + threadIdx.x;
  15 + const int y = blockIdx.y * blockDim.y + threadIdx.y;
  16 +
  17 + if (((x == left || x == right) && y >= top && y <= bottom) || ((y == top || y == bottom) && x >= left && x <= right))
  18 + {
  19 + d_srcRGB[(y*src_width) + x] = 0;
  20 + d_srcRGB[(src_width*src_height) + (y*src_width) + x] = 255;
  21 + d_srcRGB[(2 * src_width*src_height) + (y*src_width) + x] = 0;
  22 + }
  23 + }
  24 +
  25 + cudaError_t DrawImage(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom)
  26 + {
  27 + dim3 block(32, 16, 1);
  28 + dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1);
  29 +
  30 + kernel_drawPixel << < grid, block >> >(d_srcRGB, src_width, src_height, left, top, right, bottom);
  31 +
  32 + cudaError_t cudaStatus = cudaGetLastError();
  33 + if (cudaStatus != cudaSuccess) {
  34 + LOG_ERROR("Draw 32 kernel_memcopy launch failed:{}",cudaGetErrorString(cudaStatus));
  35 + return cudaStatus;
  36 + }
  37 +
  38 + cudaStatus = cudaDeviceSynchronize();
  39 + if (cudaStatus != cudaSuccess) {
  40 + LOG_ERROR("cudaDeviceSynchronize returned error code {} after launching kernel_bilinear!", cudaStatus);
  41 + return cudaStatus;
  42 + }
  43 +
  44 + return cudaStatus;
  45 + }
  46 +
  47 + __global__ void kernel_drawPixel(unsigned char* d_srcRGB, int src_width, int src_height,
  48 + int left, int top, int right, int bottom)
  49 + {
  50 + const int x = blockIdx.x * blockDim.x + threadIdx.x;
  51 + const int y = blockIdx.y * blockDim.y + threadIdx.y;
  52 +
  53 + if (((x == left || x == right) && y >= top && y <= bottom) || ((y == top || y == bottom) && x >= left && x <= right))
  54 + {
  55 + d_srcRGB[(y*src_width) + x] = 0;
  56 + d_srcRGB[(src_width*src_height) + (y*src_width) + x] = 255;
  57 + d_srcRGB[(2 * src_width*src_height) + (y*src_width) + x] = 0;
  58 + }
  59 + }
  60 +
  61 + cudaError_t DrawImage(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom)
  62 + {
  63 + dim3 block(32, 16, 1);
  64 + dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1);
  65 +
  66 + kernel_drawPixel << < grid, block >> >(d_srcRGB, src_width, src_height, left, top, right, bottom);
  67 +
  68 + cudaError_t cudaStatus = cudaGetLastError();
  69 + if (cudaStatus != cudaSuccess) {
  70 + LOG_ERROR("Draw 68 kernel_memcopy launch failed: {}",cudaGetErrorString(cudaStatus));
  71 + return cudaStatus;
  72 + }
  73 +
  74 + cudaStatus = cudaDeviceSynchronize();
  75 + if (cudaStatus != cudaSuccess) {
  76 + LOG_ERROR("cudaDeviceSynchronize returned error code {} after launching kernel_bilinear!", cudaStatus);
  77 + return cudaStatus;
  78 + }
  79 +
  80 + return cudaStatus;
  81 + }
  82 +
  83 + __global__ void kernel_drawLine(float* d_srcRGB, int src_width, int src_height,
  84 + int begin_x, int begin_y, int end_x, int end_y)
  85 + {
  86 + int min_x = end_x < begin_x ? end_x : begin_x;
  87 + int max_x = end_x < begin_x ? begin_x : end_x;
  88 +
  89 + int min_y = end_y < begin_y ? end_y : begin_y;
  90 + int max_y = end_y < begin_y ? begin_y : end_y;
  91 +
  92 + const int x = blockIdx.x * blockDim.x + threadIdx.x;
  93 + const int y = blockIdx.y * blockDim.y + threadIdx.y;
  94 +
  95 + if ((x - begin_x) * (end_y - begin_y) == (end_x - begin_x) * (y - begin_y)
  96 + && min_x <= x && x <= max_x
  97 + && min_y <= y && y <= max_y)
  98 + {
  99 + d_srcRGB[(y*src_width) + x] = 0;
  100 + d_srcRGB[(src_width*src_height) + (y*src_width) + x] = 255;
  101 + d_srcRGB[(2 * src_width*src_height) + (y*src_width) + x] = 0;
  102 + }
  103 + }
  104 +
  105 + cudaError_t DrawLine(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y)
  106 + {
  107 + dim3 block(32, 16, 1);
  108 + dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1);
  109 +
  110 + kernel_drawLine << < grid, block >> >(d_srcRGB, src_width, src_height, begin_x, begin_y, end_x, end_y);
  111 +
  112 + cudaError_t cudaStatus = cudaGetLastError();
  113 + if (cudaStatus != cudaSuccess) {
  114 + LOG_ERROR("Draw 112 kernel_memcopy launch failed: {}",cudaGetErrorString(cudaStatus));
  115 + return cudaStatus;
  116 + }
  117 +
  118 + cudaStatus = cudaDeviceSynchronize();
  119 + if (cudaStatus != cudaSuccess) {
  120 + LOG_ERROR("cudaDeviceSynchronize returned error code {} after launching kernel_bilinear!", cudaStatus);
  121 + return cudaStatus;
  122 + }
  123 +
  124 + return cudaStatus;
  125 + }
  126 +}
0 \ No newline at end of file 127 \ No newline at end of file
src/FFCuContextManager.cpp
1 #include "FFCuContextManager.h" 1 #include "FFCuContextManager.h"
2 -#include <iostream> 2 +
  3 +#include "logger.hpp"
3 4
4 using namespace std; 5 using namespace std;
5 6
@@ -19,7 +20,7 @@ AVBufferRef *FFCuContextManager::getCuCtx(string gpuid) @@ -19,7 +20,7 @@ AVBufferRef *FFCuContextManager::getCuCtx(string gpuid)
19 // 初始化硬件解码器 20 // 初始化硬件解码器
20 if (av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_CUDA, gpuid.c_str(), nullptr, 0) < 0) 21 if (av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_CUDA, gpuid.c_str(), nullptr, 0) < 0)
21 { 22 {
22 - av_log(nullptr, AV_LOG_ERROR, "Failed to create specified HW device ! \n"); 23 + LOG_ERROR("Failed to create specified HW device.");
23 return nullptr; 24 return nullptr;
24 } 25 }
25 ctxMap[gpuid] = hw_device_ctx; 26 ctxMap[gpuid] = hw_device_ctx;
src/FFNvDecoder.cpp
@@ -10,6 +10,8 @@ @@ -10,6 +10,8 @@
10 10
11 #include "logger.hpp" 11 #include "logger.hpp"
12 12
  13 +#include "utiltools.hpp"
  14 +
13 using namespace std; 15 using namespace std;
14 16
15 // 参考博客: https://blog.csdn.net/qq_40116098/article/details/120704340 17 // 参考博客: https://blog.csdn.net/qq_40116098/article/details/120704340
@@ -175,18 +177,6 @@ bool FFNvDecoder::start(){ @@ -175,18 +177,6 @@ bool FFNvDecoder::start(){
175 return true; 177 return true;
176 } 178 }
177 179
178 -static long long get_cur_time(){  
179 - // 获取操作系统当前时间点(精确到微秒)  
180 - chrono::time_point<chrono::system_clock, chrono::microseconds> tpMicro  
181 - = chrono::time_point_cast<chrono::microseconds>(chrono::system_clock::now());  
182 - // (微秒精度的)时间点 => (微秒精度的)时间戳  
183 - time_t totalMicroSeconds = tpMicro.time_since_epoch().count();  
184 -  
185 - long long currentTime = ((long long)totalMicroSeconds)/1000;  
186 -  
187 - return currentTime;  
188 -}  
189 -  
190 void FFNvDecoder::decode_thread() 180 void FFNvDecoder::decode_thread()
191 { 181 {
192 AVPacket* pkt ; 182 AVPacket* pkt ;
@@ -202,7 +192,7 @@ void FFNvDecoder::decode_thread() @@ -202,7 +192,7 @@ void FFNvDecoder::decode_thread()
202 } 192 }
203 ,this); 193 ,this);
204 194
205 - // long start_time = get_cur_time(); 195 + // long start_time = UtilTools::get_cur_time_ms();
206 196
207 while (m_bRunning) 197 while (m_bRunning)
208 { 198 {
@@ -214,13 +204,6 @@ void FFNvDecoder::decode_thread() @@ -214,13 +204,6 @@ void FFNvDecoder::decode_thread()
214 continue; 204 continue;
215 } 205 }
216 } 206 }
217 -  
218 - AVFrame * gpuFrame = mFrameQueue.getTail();  
219 - if (gpuFrame == nullptr)  
220 - {  
221 - std::this_thread::sleep_for(std::chrono::milliseconds(1));  
222 - continue;  
223 - }  
224 207
225 int result = av_read_frame(fmt_ctx, pkt); 208 int result = av_read_frame(fmt_ctx, pkt);
226 if (result == AVERROR_EOF || result < 0) 209 if (result == AVERROR_EOF || result < 0)
@@ -247,25 +230,37 @@ void FFNvDecoder::decode_thread() @@ -247,25 +230,37 @@ void FFNvDecoder::decode_thread()
247 if (stream_index == pkt->stream_index){ 230 if (stream_index == pkt->stream_index){
248 result = avcodec_send_packet(avctx, pkt); 231 result = avcodec_send_packet(avctx, pkt);
249 if (result < 0){ 232 if (result < 0){
  233 + av_packet_unref(pkt);
250 LOG_ERROR("{} - Failed to send pkt: {}", m_dec_name, result); 234 LOG_ERROR("{} - Failed to send pkt: {}", m_dec_name, result);
251 continue; 235 continue;
252 } 236 }
253 237
  238 + AVFrame* gpuFrame = av_frame_alloc();
254 result = avcodec_receive_frame(avctx, gpuFrame); 239 result = avcodec_receive_frame(avctx, gpuFrame);
255 if ((result == AVERROR(EAGAIN) || result == AVERROR_EOF) || result < 0){ 240 if ((result == AVERROR(EAGAIN) || result == AVERROR_EOF) || result < 0){
256 LOG_ERROR("{} - Failed to receive frame: {}", m_dec_name, result); 241 LOG_ERROR("{} - Failed to receive frame: {}", m_dec_name, result);
  242 + av_frame_free(&gpuFrame);
  243 + av_packet_unref(pkt);
257 continue; 244 continue;
258 } 245 }
  246 + av_packet_unref(pkt);
259 247
260 - mFrameQueue.addTail(); 248 + if(gpuFrame != nullptr){
  249 + m_queue_mutex.lock();
  250 + if(mFrameQueue.size() <= 10){
  251 + mFrameQueue.push(gpuFrame);
  252 + }else{
  253 + av_frame_free(&gpuFrame);
  254 + }
  255 + m_queue_mutex.unlock();
  256 + }
261 } 257 }
262 av_packet_unref(pkt); 258 av_packet_unref(pkt);
263 } 259 }
264 260
265 m_bRunning = false; 261 m_bRunning = false;
266 262
267 - // long end_time = get_cur_time();  
268 - 263 + // long end_time = UtilTools::get_cur_time_ms();
269 // cout << "解码用时:" << end_time - start_time << endl; 264 // cout << "解码用时:" << end_time - start_time << endl;
270 265
271 if (m_post_decode_thread != 0) 266 if (m_post_decode_thread != 0)
@@ -277,6 +272,13 @@ void FFNvDecoder::decode_thread() @@ -277,6 +272,13 @@ void FFNvDecoder::decode_thread()
277 272
278 decode_finished(); 273 decode_finished();
279 274
  275 + // 清空队列
  276 + while(mFrameQueue.size() > 0){
  277 + AVFrame * gpuFrame = mFrameQueue.front();
  278 + av_frame_free(&gpuFrame);
  279 + mFrameQueue.pop();
  280 + }
  281 +
280 LOG_INFO("{} - decode thread exited.", m_dec_name); 282 LOG_INFO("{} - decode thread exited.", m_dec_name);
281 } 283 }
282 284
@@ -302,24 +304,25 @@ void FFNvDecoder::post_decode_thread(){ @@ -302,24 +304,25 @@ void FFNvDecoder::post_decode_thread(){
302 } 304 }
303 305
304 int index = 0; 306 int index = 0;
305 - while (m_bRunning || mFrameQueue.length() > 0) 307 + while (m_bRunning)
306 { 308 {
307 - AVFrame * gpuFrame = mFrameQueue.getHead();  
308 - if (gpuFrame == nullptr)  
309 - {  
310 - std::this_thread::sleep_for(std::chrono::milliseconds(3));  
311 - continue;  
312 - } 309 + if(mFrameQueue.size() > 0){
  310 + std::lock_guard<std::mutex> l(m_snapshot_mutex);
  311 + // 取队头数据
  312 + m_queue_mutex.lock();
  313 + AVFrame * gpuFrame = mFrameQueue.front();
  314 + mFrameQueue.pop();
  315 + m_queue_mutex.unlock();
  316 + // 跳帧
  317 + if (skip_frame == 1 || index % skip_frame == 0){
  318 + post_decoded_cbk(m_postDecArg, gpuFrame);
  319 + index = 0;
  320 + }
313 321
314 - // 跳帧  
315 - if (skip_frame == 1 || index % skip_frame == 0){  
316 - post_decoded_cbk(m_postDecArg, gpuFrame);  
317 - index = 0;  
318 - }  
319 -  
320 - mFrameQueue.addHead(); 322 + av_frame_free(&gpuFrame);
321 323
322 - index++; 324 + index++;
  325 + }
323 } 326 }
324 327
325 LOG_INFO("post decode thread exited."); 328 LOG_INFO("post decode thread exited.");
@@ -374,7 +377,10 @@ void FFNvDecoder::setDecKeyframe(bool bKeyframe) @@ -374,7 +377,10 @@ void FFNvDecoder::setDecKeyframe(bool bKeyframe)
374 } 377 }
375 378
376 int FFNvDecoder::getCachedQueueLength(){ 379 int FFNvDecoder::getCachedQueueLength(){
377 - return mFrameQueue.length(); 380 + m_queue_mutex.lock();
  381 + int queue_size = mFrameQueue.size();
  382 + m_queue_mutex.lock();
  383 + return queue_size;
378 } 384 }
379 385
380 float FFNvDecoder::fps(){ 386 float FFNvDecoder::fps(){
src/FFNvDecoder.h
1 #include<string> 1 #include<string>
2 #include <pthread.h> 2 #include <pthread.h>
3 3
4 -#include "FrameQueue.h"  
5 -  
6 #include "AbstractDecoder.h" 4 #include "AbstractDecoder.h"
7 5
  6 +#include <mutex>
  7 +
8 using namespace std; 8 using namespace std;
9 9
10 class FFNvDecoder : public AbstractDecoder{ 10 class FFNvDecoder : public AbstractDecoder{
@@ -55,7 +55,6 @@ private: @@ -55,7 +55,6 @@ private:
55 bool m_bFinished; 55 bool m_bFinished;
56 56
57 bool m_bPause; 57 bool m_bPause;
58 - FrameQueue mFrameQueue;  
59 58
60 bool m_bReal; // 是否实时流 59 bool m_bReal; // 是否实时流
61 60
src/FFNvDecoderManager.cpp
@@ -116,11 +116,12 @@ AbstractDecoder* FFNvDecoderManager::getDecoderByName(const string name) @@ -116,11 +116,12 @@ AbstractDecoder* FFNvDecoderManager::getDecoderByName(const string name)
116 return nullptr; 116 return nullptr;
117 } 117 }
118 118
119 -void FFNvDecoderManager::startDecode(AbstractDecoder* dec){ 119 +bool FFNvDecoderManager::startDecode(AbstractDecoder* dec){
120 if (dec != nullptr && !dec->isRunning()) 120 if (dec != nullptr && !dec->isRunning())
121 { 121 {
122 - dec->start(); 122 + return dec->start();
123 } 123 }
  124 + return false;
124 } 125 }
125 126
126 bool FFNvDecoderManager::startDecodeByName(const string name){ 127 bool FFNvDecoderManager::startDecodeByName(const string name){
@@ -486,7 +487,7 @@ FFImgInfo* FFNvDecoderManager::snapshot(const string&amp; uri){ @@ -486,7 +487,7 @@ FFImgInfo* FFNvDecoderManager::snapshot(const string&amp; uri){
486 } 487 }
487 488
488 // 计算解码后原始数据所需缓冲区大小,并分配内存空间 Determine required buffer size and allocate buffer 489 // 计算解码后原始数据所需缓冲区大小,并分配内存空间 Determine required buffer size and allocate buffer
489 - numBytes = av_image_get_buffer_size(AV_PIX_FMT_RGB24, codec_ctx->width, codec_ctx->height, 1); 490 + numBytes = av_image_get_buffer_size(AV_PIX_FMT_BGR24, codec_ctx->width, codec_ctx->height, 1);
490 buffer = (uint8_t *)av_malloc(numBytes * sizeof(uint8_t)); 491 buffer = (uint8_t *)av_malloc(numBytes * sizeof(uint8_t));
491 492
492 pFrameRGB = av_frame_alloc(); 493 pFrameRGB = av_frame_alloc();
@@ -560,3 +561,40 @@ void FFNvDecoderManager::releaseFFImgInfo(FFImgInfo* info){ @@ -560,3 +561,40 @@ void FFNvDecoderManager::releaseFFImgInfo(FFImgInfo* info){
560 info = nullptr; 561 info = nullptr;
561 } 562 }
562 } 563 }
  564 +
  565 +FFImgInfo* FFNvDecoderManager::snapshot_in_task(const string name){
  566 + if (name.empty()){
  567 + LOG_ERROR("name 为空!");
  568 + return nullptr;
  569 + }
  570 +
  571 + std::lock_guard<std::mutex> l(m_mutex);
  572 +
  573 + auto dec = decoderMap.find(name);
  574 + if (dec != decoderMap.end()){
  575 + return dec->second->snapshot();
  576 + }
  577 +
  578 + LOG_ERROR("没有找到name为{}的解码器",name);
  579 + return nullptr;
  580 +}
  581 +
  582 +vector<FFImgInfo*> FFNvDecoderManager::timing_snapshot_all(){
  583 +
  584 + closeAllFinishedDecoder();
  585 +
  586 + std::lock_guard<std::mutex> l(m_mutex);
  587 +
  588 + vector<FFImgInfo*> vec;
  589 + for(auto it = decoderMap.begin(); it != decoderMap.end(); ++it){
  590 + if(it->second->isSnapTime()){
  591 + FFImgInfo* imginfo = it->second->snapshot();
  592 + if(imginfo != nullptr){
  593 + vec.push_back(imginfo);
  594 + }
  595 + it->second->updateLastSnapTime();
  596 + }
  597 + }
  598 +
  599 + return vec;
  600 +}
563 \ No newline at end of file 601 \ No newline at end of file
src/FFNvDecoderManager.h
@@ -14,14 +14,9 @@ struct MgrDecConfig @@ -14,14 +14,9 @@ struct MgrDecConfig
14 string name{""}; // 解码器名称 14 string name{""}; // 解码器名称
15 }; 15 };
16 16
17 -struct FFImgInfo{  
18 - int width;  
19 - int height;  
20 - unsigned char * pData;  
21 -};  
22 -  
23 /** 17 /**
24 * 解码器管理类,单例类 18 * 解码器管理类,单例类
  19 + * 谨防死锁
25 **/ 20 **/
26 class FFNvDecoderManager { 21 class FFNvDecoderManager {
27 public: 22 public:
@@ -90,7 +85,7 @@ public: @@ -90,7 +85,7 @@ public:
90 * 返回:void 85 * 返回:void
91 * 备注: 86 * 备注:
92 **************************************************/ 87 **************************************************/
93 - void startDecode(AbstractDecoder*); 88 + bool startDecode(AbstractDecoder*);
94 89
95 /************************************************** 90 /**************************************************
96 * 接口:startAllDecode 91 * 接口:startAllDecode
@@ -257,6 +252,10 @@ public: @@ -257,6 +252,10 @@ public:
257 **************************************************/ 252 **************************************************/
258 void releaseFFImgInfo(FFImgInfo* info); 253 void releaseFFImgInfo(FFImgInfo* info);
259 254
  255 + FFImgInfo* snapshot_in_task(const string name);
  256 +
  257 + vector<FFImgInfo*> timing_snapshot_all();
  258 +
260 private: 259 private:
261 FFNvDecoderManager(){} 260 FFNvDecoderManager(){}
262 261
src/FrameQueue.cpp deleted
1 -#include "FrameQueue.h"  
2 -  
3 -FrameQueue::FrameQueue(/* args */)  
4 -{  
5 - for (size_t i = 0; i < Maxsize; i++)  
6 - {  
7 - base[i] = av_frame_alloc();  
8 - }  
9 -  
10 - front = rear = 0;//头指针和尾指针置为零,队列为空  
11 -}  
12 -  
13 -FrameQueue::~FrameQueue()  
14 -{  
15 - if (base)  
16 - {  
17 - for (size_t i = 0; i < Maxsize; i++)  
18 - {  
19 - if (base[i])  
20 - {  
21 - av_frame_free(&base[i]);  
22 - }  
23 - }  
24 - }  
25 -  
26 - rear = front = 0;  
27 -}  
28 -  
29 -//循环队列的入队  
30 -AVFrame* FrameQueue::getTail()  
31 -{  
32 - //插入一个元素e为Q的新的队尾元素  
33 - if ((rear + 1) % Maxsize == front)  
34 - return nullptr;//队满  
35 - return base[rear];//获取队尾元素  
36 -}  
37 -  
38 -// 将队尾元素添加到队列中  
39 -void FrameQueue::addTail()  
40 -{  
41 - rear = (rear + 1) % Maxsize;//队尾指针加1  
42 -}  
43 -  
44 -//循环队列的出队  
45 -AVFrame* FrameQueue::deQueue()  
46 -{  
47 - //删除Q的队头元素,用e返回其值  
48 - if (front == rear)  
49 - return nullptr;//队空  
50 - AVFrame* e = base[front];//保存队头元素  
51 - front = (front + 1) % Maxsize;//队头指针加1  
52 - return e;  
53 -}  
54 -  
55 -//取循环队列的队头元素  
56 -AVFrame* FrameQueue::getHead()  
57 -{  
58 - //返回Q的队头元素,不修改队头指针  
59 - if (front == rear)  
60 - return nullptr;//队列为空,取元素失败  
61 - return base[front];  
62 -}  
63 -  
64 -void FrameQueue::addHead()  
65 -{  
66 - front = (front + 1) % Maxsize;//队头指针加1  
67 -}  
68 -  
69 -int FrameQueue::length()  
70 -{  
71 - return (rear - front + Maxsize) % Maxsize;  
72 -}  
73 -  
74 -bool FrameQueue::isEmpty()  
75 -{  
76 - if (front == rear)  
77 - return true;  
78 -  
79 - return false;  
80 -}  
81 -  
82 -void FrameQueue::clearQueue()  
83 -{  
84 - rear = front = 0;  
85 -}  
86 \ No newline at end of file 0 \ No newline at end of file
src/FrameQueue.h deleted
1 -#include <iostream>  
2 -#include <atomic>  
3 -  
4 -extern "C"  
5 -{  
6 - #include <libavcodec/avcodec.h>  
7 - #include <libavdevice/avdevice.h>  
8 - #include <libavformat/avformat.h>  
9 - #include <libavfilter/avfilter.h>  
10 - #include <libavutil/avutil.h>  
11 - #include <libavutil/pixdesc.h>  
12 - #include <libswscale/swscale.h>  
13 -}  
14 -  
15 -using namespace std;  
16 -  
17 -#define Maxsize 5 // 循环队列的大小  
18 -  
19 -// 循环队列  
20 -class FrameQueue  
21 -{  
22 -private:  
23 - /* data */  
24 -public:  
25 - FrameQueue(/* args */);  
26 - ~FrameQueue();  
27 -  
28 - AVFrame* getTail();  
29 - void addTail();  
30 - AVFrame* deQueue();  
31 - AVFrame* getHead();  
32 - void addHead();  
33 - void clearQueue();  
34 -  
35 - int length();  
36 - bool isEmpty();  
37 -  
38 -private:  
39 - AVFrame* base[Maxsize];  
40 - atomic<int> front;  
41 - atomic<int> rear;  
42 -};  
43 \ No newline at end of file 0 \ No newline at end of file
src/GpuRgbMemory.hpp 0 → 100644
  1 +#include<string>
  2 +
  3 +#include "cuda_kernels.h"
  4 +#include "define.hpp"
  5 +#include "utiltools.hpp"
  6 +
  7 +using namespace std;
  8 +
  9 +class GpuRgbMemory{
  10 +
  11 +public:
  12 + GpuRgbMemory(int _channel, int _width, int _height, string _id, string _gpuid, bool _isused){
  13 + channel = _channel;
  14 + width = _width;
  15 + height = _height;
  16 + size = channel * width * height;
  17 + isused = _isused;
  18 + id = _id;
  19 + gpuid = _gpuid;
  20 + timestamp = UtilTools::get_cur_time_ms();
  21 +
  22 + cudaSetDevice(atoi(gpuid.c_str()));
  23 + CHECK_CUDA(cudaMalloc((void **)&pHwRgb, size * sizeof(unsigned char)));
  24 + }
  25 +
  26 + ~GpuRgbMemory(){
  27 + if (pHwRgb) {
  28 + cudaSetDevice(atoi(gpuid.c_str()));
  29 + CHECK_CUDA(cudaFree(pHwRgb));
  30 + pHwRgb = nullptr;
  31 + }
  32 + }
  33 +
  34 + int getSize() {
  35 + return size;
  36 + }
  37 +
  38 + bool isIsused() {
  39 + return isused;
  40 + }
  41 +
  42 + void setIsused(bool _isused) {
  43 + isused = _isused;
  44 + // 更新时间戳
  45 + timestamp = UtilTools::get_cur_time_ms();
  46 + }
  47 +
  48 + string getId() {
  49 + return id;
  50 + }
  51 +
  52 + string getGpuId() {
  53 + return gpuid;
  54 + }
  55 +
  56 + unsigned char* getMem(){
  57 + return pHwRgb;
  58 + }
  59 +
  60 + long long getTimesstamp(){
  61 + return timestamp;
  62 + }
  63 +
  64 + int getWidth(){
  65 + return width;
  66 + }
  67 +
  68 + int getHeight(){
  69 + return height;
  70 + }
  71 +
  72 + int getChannel(){
  73 + return channel;
  74 + }
  75 +
  76 +private:
  77 + int size;
  78 + bool isused;
  79 + string id;
  80 + string gpuid;
  81 + unsigned char * pHwRgb{nullptr};
  82 + long long timestamp;
  83 + int width{0};
  84 + int height{0};
  85 + int channel{3};
  86 +};
0 \ No newline at end of file 87 \ No newline at end of file
src/ImageSaveGPU.cpp 0 → 100644
  1 +#include "cuda_kernels.h"
  2 +
  3 +#include "logger.hpp"
  4 +
  5 +
  6 +//int saveJPEG(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height)
  7 +//{
  8 +// return jpegNPP(szOutputFile, d_srcRGB, img_width, img_height);
  9 +// //return 0;
  10 +//}
  11 +//
  12 +//int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height)
  13 +//{
  14 +// return jpegNPP(szOutputFile, d_srcRGB, img_width, img_height);
  15 +// //return 0;
  16 +//}
  17 +//
  18 +//int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB)
  19 +//{
  20 +// return jpegNPP(szOutputFile, d_srcRGB);
  21 +//}
  22 +//
  23 +//int saveJPEG(const char *szOutputFile, float* d_srcRGB)
  24 +//{
  25 +// return jpegNPP(szOutputFile, d_srcRGB);
  26 +//}
  27 +
  28 +int resizeFrame(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height)
  29 +{
  30 + cudaError_t cudaStatus = cuda_common::ResizeImage(d_srcRGB, src_width, src_height, d_dstRGB, dst_width, dst_height);
  31 + if (cudaStatus != cudaSuccess) {
  32 + LOG_ERROR("cuda_common::ResizeImage failed: {}",cudaGetErrorString(cudaStatus));
  33 + return -1;
  34 + }
  35 +
  36 + return 0;
  37 +}
  38 +
  39 +//int initTables()
  40 +//{
  41 +// initTable();
  42 +// return 0;
  43 +//}
  44 +//
  45 +//int initTables(int flag, int width, int height)
  46 +//{
  47 +// initTable(0, width, height);
  48 +// return 0;
  49 +//}
  50 +
  51 +int drawImageOnGPU(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom)
  52 +{
  53 + cuda_common::DrawImage(d_srcRGB, src_width, src_height, left, top, right, bottom);
  54 + return 0;
  55 +}
  56 +
  57 +int drawImageOnGPU(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom)
  58 +{
  59 + cuda_common::DrawImage(d_srcRGB, src_width, src_height, left, top, right, bottom);
  60 + return 0;
  61 +}
  62 +
  63 +int drawLineOnGPU(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y)
  64 +{
  65 + cuda_common::DrawLine(d_srcRGB, src_width, src_height, begin_x, begin_y, end_x, end_y);
  66 + return 0;
  67 +}
  68 +
  69 +//int releaseJpegSaver()
  70 +//{
  71 +// releaseJpegNPP();
  72 +// return 0;
  73 +//}
  74 +
  75 +int partMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom)
  76 +{
  77 + cudaError_t cudaStatus = cuda_common::PartMemCopy(d_srcRGB, src_width, src_height, d_dstRGB, left, top, right, bottom);
  78 + if (cudaStatus != cudaSuccess) {
  79 + LOG_ERROR("cuda_common::77 PartMemCopy failed: {} {} {} {} {} {} {}",cudaGetErrorString(cudaStatus), left, top, right, bottom, src_height, d_dstRGB);
  80 + return -1;
  81 + }
  82 +
  83 + return 0;
  84 +}
  85 +//#include <fstream>
  86 +//extern std::ofstream g_os;
  87 +int PartMemResizeBatch(unsigned char * d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB,
  88 + int count, int* vleft, int * vtop, int* vright, int* vbottom, int *dst_w, int *dst_h,
  89 + float submeanb, float submeang, float submeanr,
  90 + float varianceb, float varianceg, float variancer)
  91 +{
  92 + //g_os << "cudaMemcpyHostToDevice begin 9" << std::endl;
  93 + cudaError_t cudaStatus = cuda_common::PartMemResizeBatch(
  94 + d_srcRGB, src_width, src_height, d_dstRGB, count, vleft, vtop, vright, vbottom, dst_w, dst_h,
  95 + submeanb, submeang, submeanr,
  96 + varianceb, varianceg, variancer);
  97 + //g_os << "cudaMemcpyHostToDevice end 9" << std::endl;
  98 + if (cudaStatus != cudaSuccess) {
  99 + LOG_ERROR("cuda_common::PartMemResizeBatch failed: {}",cudaGetErrorString(cudaStatus));
  100 + return -1;
  101 + }
  102 +
  103 + return 0;
  104 +}
  105 +
  106 +
  107 +//int PartMemResizeBatch(float * d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB,
  108 +// int count, int* vleft, int * vtop, int* vright, int* vbottom, int dst_w, int dst_h,
  109 +// float submeanb, float submeang, float submeanr,
  110 +// float varianceb, float varianceg, float variancer)
  111 +//
  112 +//{
  113 +// cudaError_t cudaStatus = cuda_common::PartMemResizeBatch(
  114 +// d_srcRGB, src_width, src_height, d_dstRGB, count, vleft, vtop, vright, vbottom, dst_w, dst_h,
  115 +// submeanb, submeang, submeanr,
  116 +// varianceb, varianceg, variancer);
  117 +// if (cudaStatus != cudaSuccess) {
  118 +// fprintf(stderr, "cuda_common::PartMemCopy failed: %s\n", cudaGetErrorString(cudaStatus));
  119 +// return -1;
  120 +// }
  121 +//
  122 +// return 0;
  123 +//}
0 \ No newline at end of file 124 \ No newline at end of file
src/ImageSaveGPU.h 0 → 100644
  1 +/*******************************************************************************************
  2 +* Version: VPT_x64_V2.0.0_20170904
  3 +* CopyRight: 中科院自动化研究所模式识别实验室图像视频组
  4 +* UpdateDate: 20170904
  5 +* Content: 人车物监测跟踪
  6 +********************************************************************************************/
  7 +
  8 +#ifndef IMAGESAVEGPU_H_
  9 +#define IMAGESAVEGPU_H_
  10 +
  11 +#ifdef _MSC_VER
  12 + #ifdef IMAGESAVEGPU_EXPORTS
  13 + #define IMAGESAVEGPU_API __declspec(dllexport)
  14 + #else
  15 + #define IMAGESAVEGPU_API __declspec(dllimport)
  16 + #endif
  17 +#else
  18 +#define IMAGESAVEGPU_API __attribute__((visibility ("default")))
  19 +#endif
  20 +// 功能:保存成jpeg文件
  21 +// szOutputFile 输出图片路径,如D:\\out.jpg
  22 +// d_srcRGB 输入RGB数据,由cudaMalloc分配的显存空间,数据排列形式为:BBBBBB......GGGGGG......RRRRRRRR......
  23 +// img_width RGB数据图片的宽度
  24 +// img_height RGB数据图片的高度
  25 +//
  26 +//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height);
  27 +//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, float* d_srcRGB);
  28 +//
  29 +//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height);
  30 +//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB);
  31 +
  32 +// 功能:防缩图像
  33 +IMAGESAVEGPU_API int resizeFrame(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height);
  34 +
  35 +// 功能:部分拷贝数据
  36 +IMAGESAVEGPU_API int partMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom);
  37 +
  38 +//IMAGESAVEGPU_API int partMemResizeImage(float * d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB,
  39 +// int* vleft, int * vtop, int* vright, int* vbottom, int *dst_w, int *dst_h,
  40 +// float submeanb, float submeang, float submeanr,
  41 +// float varianceb, float varianceg, float variancer);
  42 +
  43 +
  44 +IMAGESAVEGPU_API int PartMemResizeBatch(unsigned char * d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB,
  45 + int count, int* vleft, int * vtop, int* vright, int* vbottom, int *dst_w, int *dst_h,
  46 + float submeanb, float submeang, float submeanr,
  47 + float varianceb, float varianceg, float variancer);
  48 +
  49 +
  50 +//// 功能:初始化GPU保存图像的各种量化表
  51 +//IMAGESAVEGPU_API int initTables();
  52 +//IMAGESAVEGPU_API int initTables(int falg, int width, int height);
  53 +//
  54 +//// 功能:释放资源
  55 +//IMAGESAVEGPU_API int releaseJpegSaver();
  56 +
  57 +// 功能:在GPU中绘制快照包围框
  58 +IMAGESAVEGPU_API int drawImageOnGPU(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom);
  59 +
  60 +IMAGESAVEGPU_API int drawImageOnGPU(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom);
  61 +
  62 +// 功能:在GPU中绘制直线
  63 +IMAGESAVEGPU_API int drawLineOnGPU(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y);
  64 +
  65 +#endif
src/NV12ToRGB.cu
@@ -257,14 +257,13 @@ namespace cuda_common @@ -257,14 +257,13 @@ namespace cuda_common
257 dstImage[width * y * 3 + x * 3 + 5] = clip_v(red[1] * 0.25,0 ,255); 257 dstImage[width * y * 3 + x * 3 + 5] = clip_v(red[1] * 0.25,0 ,255);
258 } 258 }
259 259
260 - cudaError_t setColorSpace(e_ColorSpace CSC, float hue) 260 + cudaError_t setColorSpace(FF_ColorSpace CSC, float hue)
261 { 261 {
262 -  
263 float hueSin = sin(hue); 262 float hueSin = sin(hue);
264 float hueCos = cos(hue); 263 float hueCos = cos(hue);
265 264
266 float hueCSC[9]; 265 float hueCSC[9];
267 - if (CSC == ITU601) 266 + if (CSC == ITU_601)
268 { 267 {
269 //CCIR 601 268 //CCIR 601
270 hueCSC[0] = 1.1644f; 269 hueCSC[0] = 1.1644f;
@@ -277,7 +276,7 @@ namespace cuda_common @@ -277,7 +276,7 @@ namespace cuda_common
277 hueCSC[7] = hueCos * 2.0172f; 276 hueCSC[7] = hueCos * 2.0172f;
278 hueCSC[8] = hueSin * -2.0172f; 277 hueCSC[8] = hueSin * -2.0172f;
279 } 278 }
280 - else if (CSC == ITU709) 279 + else if (CSC == ITU_709)
281 { 280 {
282 //CCIR 709 281 //CCIR 709
283 hueCSC[0] = 1.0f; 282 hueCSC[0] = 1.0f;
src/PartMemCopy.cu 0 → 100644
  1 +#include "cuda_kernels.h"
  2 +#include <algorithm>
  3 +typedef unsigned char uchar;
  4 +typedef unsigned int uint32;
  5 +typedef int int32;
  6 +
  7 +#define MAX_SNAPSHOT_WIDTH 320
  8 +#define MAX_SNAPSHOT_HEIGHT 320
  9 +
  10 +namespace cuda_common
  11 +{
  12 + __global__ void kernel_memcopy(unsigned char* d_srcRGB, int src_width, int src_height,
  13 + unsigned char* d_dstRGB, int left, int top, int right, int bottom)
  14 + {
  15 + const int dst_x = blockIdx.x * blockDim.x + threadIdx.x;
  16 + const int dst_y = blockIdx.y * blockDim.y + threadIdx.y;
  17 + const int dst_width = right - left;
  18 + const int dst_height = bottom - top;
  19 + if (dst_x < dst_width && dst_y < dst_height)
  20 + {
  21 + int src_x = left + dst_x;
  22 + int src_y = top + dst_y;
  23 +
  24 + //bgr...bgr...bgr...
  25 + d_dstRGB[(dst_y*dst_width + dst_x) * 3] = (unsigned char)d_srcRGB[(src_y*src_width + src_x) * 3];
  26 + d_dstRGB[(dst_y*dst_width + dst_x)
  27 + * 3 + 1] = (unsigned char)d_srcRGB[(src_y*src_width + src_x) * 3 + 1];
  28 + d_dstRGB[(dst_y*dst_width + dst_x) * 3 + 2] = (unsigned char)d_srcRGB[(src_y*src_width + src_x) * 3 + 2];
  29 +
  30 + //bbb...ggg...rrr...
  31 + //d_dstRGB[(dst_y*dst_width) + dst_x] = (unsigned char)d_srcRGB[(src_y*src_width) + src_x];
  32 + //d_dstRGB[(dst_width*dst_height) + (dst_y*dst_width) + dst_x] = (unsigned char)d_srcRGB[(src_width*src_height) + (src_y*src_width) + src_x];
  33 + //d_dstRGB[(2 * dst_width*dst_height) + (dst_y*dst_width) + dst_x] = (unsigned char)d_srcRGB[(2 * src_width*src_height) + (src_y*src_width) + src_x];
  34 +
  35 + /* memcpy(d_dstRGB + (dst_y*src_width) + dst_x, d_srcRGB + (src_y*src_width) + src_x, sizeof(float));
  36 + memcpy(d_dstRGB + (src_width*src_height) + (dst_y*src_width) + dst_x, d_srcRGB + (src_width*src_height) + (src_y*src_width) + src_x, sizeof(float));
  37 + memcpy(d_dstRGB + (2 * src_width*src_height) + (dst_y*src_width) + dst_x, d_srcRGB + (2 * src_width*src_height) + (src_y*src_width) + src_x, sizeof(float));*/
  38 + }
  39 + }
  40 +
  41 + cudaError_t PartMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom)
  42 + {
  43 + dim3 block(32, 16, 1);
  44 + dim3 grid(((right - left) + (block.x - 1)) / block.x, ((bottom - top) + (block.y - 1)) / block.y, 1);
  45 +
  46 + kernel_memcopy << < grid, block >> > (d_srcRGB, src_width, src_height, d_dstRGB, left, top, right, bottom);
  47 +
  48 + cudaError_t cudaStatus = cudaGetLastError();
  49 + if (cudaStatus != cudaSuccess) {
  50 + fprintf(stderr, "Part 50 kernel_memcopy launch failed: %s\n", cudaGetErrorString(cudaStatus));
  51 + return cudaStatus;
  52 + }
  53 + cudaStatus = cudaDeviceSynchronize();
  54 + if (cudaStatus != cudaSuccess) {
  55 + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_bilinear!\n", cudaStatus);
  56 + return cudaStatus;
  57 + }
  58 + return cudaStatus;
  59 + }
  60 +
  61 +
  62 + // __global__ void kernel_memcopy_mean_variance(float* d_srcRGB, int src_width, int src_height,
  63 + // unsigned char* vd_dstRGB, int count, int * vleft, int* vtop, int* vright, int * vbottom, float submeanb,float submeang, float submeanr, float varianceb,float varianceg, float variancer)
  64 + // {
  65 + // const int dst_x = blockIdx.x * blockDim.x + threadIdx.x;
  66 + // const int dst_y = blockIdx.y * blockDim.y + threadIdx.y;
  67 + // for (int i=0;i<count;i++)
  68 + // {
  69 + // const int left = vleft[i];
  70 + // const int right = vright[i];
  71 + // const int top = vtop[i];
  72 + // const int bottom = vbottom[i];
  73 + //
  74 + // const int dst_width = right - left;
  75 + // const int dst_height = bottom - top;
  76 + //
  77 + //
  78 + // unsigned char * d_dstRGB = vd_dstRGB + i * ;
  79 + //
  80 + // if (dst_x < dst_width && dst_y < dst_height)
  81 + // {
  82 + // int src_x = left + dst_x;
  83 + // int src_y = top + dst_y;
  84 + //
  85 + // d_dstRGB[(dst_y*dst_width) + dst_x] = (d_srcRGB[(src_y*src_width) + src_x] - submeanb)*varianceb;
  86 + // d_dstRGB[(dst_width*dst_height) + (dst_y*dst_width) + dst_x] = (d_srcRGB[(src_width*src_height) + (src_y*src_width) + src_x] -submeang)*varianceg;
  87 + // d_dstRGB[(2 * dst_width*dst_height) + (dst_y*dst_width) + dst_x] = (d_srcRGB[(2 * src_width*src_height) + (src_y*src_width) + src_x] - submeanr) * variancer;
  88 + //
  89 + // }
  90 + // }
  91 + // }
  92 + __global__ void PartCopy_ResizeImgBilinearBGR_Mean_Variance_CUDAKernel(
  93 + unsigned char * d_srcRGB, int srcimg_width, int srcimg_height,
  94 + int* vleft, int* vtop, int* vright, int * vbottom,
  95 + unsigned char** vd_dstRGB, int count, int *dst_width, int *dst_height,
  96 + float submeanb, float submeang, float submeanr,
  97 + float varianceb, float varianceg, float variancer)
  98 + {
  99 + int i = blockIdx.z;
  100 +
  101 + //for (int i = 0; i<count; i++)
  102 + {
  103 + const int left = vleft[i];
  104 + const int right = vright[i];
  105 + const int top = vtop[i];
  106 + const int bottom = vbottom[i];
  107 + const int cur_dst_width = dst_width[i];
  108 + const int cur_dst_height = dst_height[i];
  109 +
  110 + unsigned char* d_dstRGB = vd_dstRGB[i];
  111 +
  112 + const int src_width = right - left;
  113 + const int src_height = bottom - top;
  114 + const int x = blockIdx.x * blockDim.x + threadIdx.x;// + left;
  115 + const int y = blockIdx.y * blockDim.y + threadIdx.y;//+ top;
  116 + const int dst_x = blockIdx.x * blockDim.x + threadIdx.x;
  117 + const int dst_y = blockIdx.y * blockDim.y + threadIdx.y;
  118 +
  119 + /*if (dst_x == 0 && dst_y == 0)
  120 + printf("%d %d %d %d %d\n", i, vleft[i], vright[i], cur_dst_width, cur_dst_height);*/
  121 +
  122 + unsigned char * src_img = d_srcRGB;
  123 + unsigned char * dst_img = d_dstRGB;
  124 + if (dst_x < cur_dst_width && dst_y < cur_dst_height)
  125 + {
  126 + float fx = (x + 0.5)*src_width / (float)cur_dst_width - 0.5 + left;
  127 + float fy = (y + 0.5)*src_height / (float)cur_dst_height - 0.5 + top;
  128 + int ax = floor(fx);
  129 + int ay = floor(fy);
  130 + if (ax < 0)
  131 + {
  132 + ax = 0;
  133 + }
  134 + if (ax > srcimg_width - 2)
  135 + {
  136 + ax = srcimg_width - 2;
  137 + }
  138 + if (ay < 0) {
  139 + ay = 0;
  140 + }
  141 + if (ay > srcimg_height - 2)
  142 + {
  143 + ay = srcimg_height - 2;
  144 + }
  145 +
  146 + int A = ax + ay*srcimg_width;
  147 + int B = ax + ay*srcimg_width + 1;
  148 + int C = ax + ay*srcimg_width + srcimg_width;
  149 + int D = ax + ay*srcimg_width + srcimg_width + 1;
  150 +
  151 + float w1, w2, w3, w4;
  152 + w1 = fx - ax;
  153 + w2 = 1 - w1;
  154 + w3 = fy - ay;
  155 + w4 = 1 - w3;
  156 + float blue = src_img[A * 3] * w2*w4 + src_img[B * 3] * w1*w4 + src_img[C * 3] * w2*w3 + src_img[D * 3] * w1*w3;
  157 + float green = src_img[A * 3 + 1] * w2*w4 + src_img[B * 3 + 1] * w1*w4
  158 + + src_img[C * 3 + 1] * w2*w3 + src_img[D * 3 + 1] * w1*w3;
  159 + float red = src_img[A * 3 + 2] * w2*w4 + src_img[B * 3 + 2] * w1*w4
  160 + + src_img[C * 3 + 2] * w2*w3 + src_img[D * 3 + 2] * w1*w3;
  161 +
  162 + /*dst_img[(dst_y * dst_width + dst_x) * 3] = (unsigned char)(blue - submeanb)*varianceb;
  163 + dst_img[(dst_y * dst_width + dst_x) * 3 + 1] =(unsigned char) (green - submeang)*varianceg;
  164 + dst_img[(dst_y * dst_width + dst_x) * 3 + 2] = (unsigned char) (red - submeanr)*variancer;*/
  165 +
  166 + if (blue < 0)
  167 + blue = 0;
  168 + else if (blue > 255)
  169 + blue = 255;
  170 +
  171 + if (green < 0)
  172 + green = 0;
  173 + else if (green > 255)
  174 + green = 255;
  175 +
  176 + if (red < 0)
  177 + red = 0;
  178 + else if (red > 255)
  179 + red = 255;
  180 +
  181 + dst_img[(dst_y * cur_dst_width + dst_x) * 3] = (unsigned char)blue;
  182 + dst_img[(dst_y * cur_dst_width + dst_x) * 3 + 1] = (unsigned char)green;
  183 + dst_img[(dst_y * cur_dst_width + dst_x) * 3 + 2] = (unsigned char)red;
  184 +
  185 +
  186 + /*if (src_img[(dst_y * dst_width + dst_x) * 3] < 0)
  187 + src_img[(dst_y * dst_width + dst_x) * 3] = 0;
  188 + else if (src_img[(dst_y * dst_width + dst_x) * 3] > 255)
  189 + src_img[(dst_y * dst_width + dst_x) * 3] = 255;
  190 +
  191 + if (src_img[(dst_y * dst_width + dst_x) * 3 + 1] < 0)
  192 + src_img[(dst_y * dst_width + dst_x) * 3 + 1] = 0;
  193 + else if (src_img[(dst_y * dst_width + dst_x) * 3 + 1] > 255)
  194 + src_img[(dst_y * dst_width + dst_x) * 3 + 1] = 255;
  195 +
  196 + if (src_img[(dst_y * dst_width + dst_x) * 3 + 2] < 0)
  197 + src_img[(dst_y * dst_width + dst_x) * 3 + 2] = 0;
  198 + else if (src_img[(dst_y * dst_width + dst_x) * 3 + 2] > 255)
  199 + src_img[(dst_y * dst_width + dst_x) * 3 + 2] = 255;
  200 +
  201 +
  202 + dst_img[(dst_y * dst_width + dst_x) * 3] = (unsigned char)src_img[(dst_y * dst_width + dst_x) * 3];
  203 + dst_img[(dst_y * dst_width + dst_x) * 3 + 1] = (unsigned char)src_img[(dst_y * dst_width + dst_x) * 3 + 1];
  204 + dst_img[(dst_y * dst_width + dst_x) * 3 + 2] = (unsigned char)src_img[(dst_y * dst_width + dst_x) * 3 + 2];*/
  205 + }
  206 + }
  207 + }
  208 +
  209 + cudaError_t PartMemResizeBatch(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB, int count, int* left, int* top, int* right, int* bottom, int *dst_w, int *dst_h, float submeanb, float submeang, float submeanr,
  210 + float varianceb, float varianceg, float variancer)
  211 + {
  212 + /* cudaEvent_t start, stop;
  213 + float time;
  214 + cudaEventCreate(&start);
  215 + cudaEventCreate(&stop);
  216 + cudaEventRecord(start, 0);*/
  217 +
  218 + dim3 block(32, 16, 1);
  219 + dim3 grid((*std::max_element(dst_w, dst_w+ count) + (block.x - 1)) / block.x, (*std::max_element(dst_h, dst_h + count) + (block.y - 1)) / block.y, count);
  220 +
  221 + int * gpu_left;
  222 + cudaMalloc(&gpu_left, 1000 * sizeof(int));
  223 + cudaMemcpy(gpu_left, left, count * sizeof(int), cudaMemcpyHostToDevice);
  224 +
  225 + int * gpu_right;
  226 + cudaMalloc(&gpu_right, 1000 * sizeof(int));
  227 + cudaMemcpy(gpu_right, right, count * sizeof(int), cudaMemcpyHostToDevice);
  228 +
  229 + int * gpu_top;
  230 + cudaMalloc(&gpu_top, 1000 * sizeof(int));
  231 + cudaMemcpy(gpu_top, top, count * sizeof(int), cudaMemcpyHostToDevice);
  232 +
  233 + int * gpu_bottom;
  234 + cudaMalloc(&gpu_bottom, 1000 * sizeof(int));
  235 + cudaMemcpy(gpu_bottom, bottom, count * sizeof(int), cudaMemcpyHostToDevice);
  236 +
  237 + int * gpu_dst_w;
  238 + cudaMalloc(&gpu_dst_w, 1000 * sizeof(int));
  239 + cudaMemcpy(gpu_dst_w, dst_w, count * sizeof(int), cudaMemcpyHostToDevice);
  240 +
  241 + int * gpu_dst_h;
  242 + cudaMalloc(&gpu_dst_h, 1000 * sizeof(int));
  243 + cudaMemcpy(gpu_dst_h, dst_h, count * sizeof(int), cudaMemcpyHostToDevice);
  244 +
  245 + unsigned char** gpu_dst_rgb;
  246 + cudaMalloc(&gpu_dst_rgb, 1000 * sizeof(unsigned char*));
  247 + cudaMemcpy(gpu_dst_rgb, d_dstRGB, count * sizeof(unsigned char*), cudaMemcpyHostToDevice);
  248 +
  249 + //cudaMemcpy(cpu_personfloat, d_srcRGB, 112*224*2*sizeof(float), cudaMemcpyDeviceToHost);
  250 + // for(int i=0;i<100;i++)
  251 + // {
  252 + // printf("the score is %f\t",cpu_personfloat[i]);
  253 + // }
  254 + PartCopy_ResizeImgBilinearBGR_Mean_Variance_CUDAKernel << < grid, block >> > (
  255 + d_srcRGB, src_width, src_height,
  256 + gpu_left, gpu_top, gpu_right, gpu_bottom,
  257 + gpu_dst_rgb, count, gpu_dst_w, gpu_dst_h,
  258 + submeanb, submeang, submeanr,
  259 + varianceb, varianceg, variancer);
  260 + cudaFree(gpu_top);
  261 + cudaFree(gpu_bottom);
  262 + cudaFree(gpu_left);
  263 + cudaFree(gpu_right);
  264 + cudaFree(gpu_dst_w);
  265 + cudaFree(gpu_dst_h);
  266 + cudaFree(gpu_dst_rgb);
  267 +
  268 + cudaError_t cudaStatus = cudaGetLastError();
  269 + if (cudaStatus != cudaSuccess) {
  270 + fprintf(stderr, "Part 270 kernel_memcopy launch failed: %s\n", cudaGetErrorString(cudaStatus));
  271 + return cudaStatus;
  272 + }
  273 + cudaStatus = cudaDeviceSynchronize();
  274 + if (cudaStatus != cudaSuccess) {
  275 + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_bilinear!\n", cudaStatus);
  276 + return cudaStatus;
  277 + }
  278 +
  279 + /*cudaEventRecord(stop, 0);
  280 + cudaEventSynchronize(stop);
  281 + cudaEventElapsedTime(&time, start, stop);
  282 + cudaEventDestroy(start);
  283 + cudaEventDestroy(stop);
  284 + printf("ºËº¯ÊýÏûºÄʱ¼ä:%f\n", time);*/
  285 +
  286 + return cudaStatus;
  287 + }
  288 +
  289 +}
0 \ No newline at end of file 290 \ No newline at end of file
src/RGB2YUV.cu 0 → 100644
  1 +
  2 +
  3 +#include "cuda_kernels.h"
  4 +
  5 +typedef unsigned char uint8;
  6 +typedef unsigned int uint32;
  7 +typedef int int32;
  8 +
  9 +namespace cuda_common
  10 +{
  11 + __device__ unsigned char clip_value(unsigned char x, unsigned char min_val, unsigned char max_val){
  12 + if (x>max_val){
  13 + return max_val;
  14 + }
  15 + else if (x<min_val){
  16 + return min_val;
  17 + }
  18 + else{
  19 + return x;
  20 + }
  21 + }
  22 +
  23 + __global__ void kernel_rgb2yuv(unsigned char *src_img, unsigned char* Y, unsigned char* u, unsigned char* v,
  24 + int src_width, int src_height, size_t yPitch)
  25 + {
  26 + const int x = blockIdx.x * blockDim.x + threadIdx.x;
  27 + const int y = blockIdx.y * blockDim.y + threadIdx.y;
  28 +
  29 + if (x >= src_width)
  30 + return; //x = width - 1;
  31 +
  32 + if (y >= src_height)
  33 + return; // y = height - 1;
  34 +
  35 + int B = src_img[y * src_width * 3 + x * 3];
  36 + int G = src_img[y * src_width * 3 + x * 3 + 1];
  37 + int R = src_img[y * src_width * 3 + x * 3 + 2];
  38 +
  39 + /*int B = src_img[y * src_width + x];
  40 + int G = src_img[src_width * src_height + y * src_width + x];
  41 + int R = src_img[src_width * src_height * 2 + y * src_width + x];*/
  42 +
  43 + Y[y * yPitch + x] = clip_value((unsigned char)(0.299 * R + 0.587 * G + 0.114 * B), 0, 255);
  44 + u[y * src_width + x] = clip_value((unsigned char)(-0.147 * R - 0.289 * G + 0.436 * B + 128), 0, 255);
  45 + v[y * src_width + x] = clip_value((unsigned char)(0.615 * R - 0.515 * G - 0.100 * B + 128), 0, 255);
  46 +
  47 + //Y[y * yPitch + x] = clip_value((unsigned char)(0.257 * R + 0.504 * G + 0.098 * B + 16), 0, 255);
  48 + //u[y * src_width + x] = clip_value((unsigned char)(-0.148 * R - 0.291 * G + 0.439 * B + 128), 0, 255);
  49 + //v[y * src_width + x] = clip_value((unsigned char)(0.439 * R - 0.368 * G - 0.071 * B + 128), 0, 255);
  50 + }
  51 +
  52 + __global__ void kernel_rgb2yuv(float *src_img, unsigned char* Y, unsigned char* u, unsigned char* v,
  53 + int src_width, int src_height, size_t yPitch)
  54 + {
  55 + const int x = blockIdx.x * blockDim.x + threadIdx.x;
  56 + const int y = blockIdx.y * blockDim.y + threadIdx.y;
  57 +
  58 + if (x >= src_width)
  59 + return; //x = width - 1;
  60 +
  61 + if (y >= src_height)
  62 + return; // y = height - 1;
  63 +
  64 + float B = src_img[y * src_width + x];
  65 + float G = src_img[src_width * src_height + y * src_width + x];
  66 + float R = src_img[src_width * src_height * 2 + y * src_width + x];
  67 +
  68 + Y[y * yPitch + x] = clip_value((unsigned char)(0.299 * R + 0.587 * G + 0.114 * B), 0, 255);
  69 + u[y * src_width + x] = clip_value((unsigned char)(-0.147 * R - 0.289 * G + 0.436 * B + 128), 0, 255);
  70 + v[y * src_width + x] = clip_value((unsigned char)(0.615 * R - 0.515 * G - 0.100 * B + 128), 0, 255);
  71 +
  72 + //Y[y * yPitch + x] = clip_value((unsigned char)(0.257 * R + 0.504 * G + 0.098 * B + 16), 0, 255);
  73 + //u[y * src_width + x] = clip_value((unsigned char)(-0.148 * R - 0.291 * G + 0.439 * B + 128), 0, 255);
  74 + //v[y * src_width + x] = clip_value((unsigned char)(0.439 * R - 0.368 * G - 0.071 * B + 128), 0, 255);
  75 + }
  76 +
  77 + extern "C"
  78 + __global__ void kernel_resize_UV(unsigned char* src_img, unsigned char *dst_img,
  79 + int src_width, int src_height, int dst_width, int dst_height, int nPitch)
  80 + {
  81 + const int x = blockIdx.x * blockDim.x + threadIdx.x;
  82 + const int y = blockIdx.y * blockDim.y + threadIdx.y;
  83 +
  84 + if (x >= dst_width)
  85 + return; //x = width - 1;
  86 +
  87 + if (y >= dst_height)
  88 + return; // y = height - 1;
  89 +
  90 + float fx = (x + 0.5)*src_width / (float)dst_width - 0.5;
  91 + float fy = (y + 0.5)*src_height / (float)dst_height - 0.5;
  92 + int ax = floor(fx);
  93 + int ay = floor(fy);
  94 + if (ax < 0)
  95 + {
  96 + ax = 0;
  97 + }
  98 + else if (ax > src_width - 2)
  99 + {
  100 + ax = src_width - 2;
  101 + }
  102 +
  103 + if (ay < 0){
  104 + ay = 0;
  105 + }
  106 + else if (ay > src_height - 2)
  107 + {
  108 + ay = src_height - 2;
  109 + }
  110 +
  111 + int A = ax + ay*src_width;
  112 + int B = ax + ay*src_width + 1;
  113 + int C = ax + ay*src_width + src_width;
  114 + int D = ax + ay*src_width + src_width + 1;
  115 +
  116 + float w1, w2, w3, w4;
  117 + w1 = fx - ax;
  118 + w2 = 1 - w1;
  119 + w3 = fy - ay;
  120 + w4 = 1 - w3;
  121 +
  122 + unsigned char val = src_img[A] * w2*w4 + src_img[B] * w1*w4 + src_img[C] * w2*w3 + src_img[D] * w1*w3;
  123 +
  124 + dst_img[y * nPitch + x] = clip_value(val,0,255);
  125 + }
  126 +
  127 + cudaError_t RGB2YUV(float* d_srcRGB, int src_width, int src_height,
  128 + unsigned char* Y, size_t yPitch, int yWidth, int yHeight,
  129 + unsigned char* U, size_t uPitch, int uWidth, int uHeight,
  130 + unsigned char* V, size_t vPitch, int vWidth, int vHeight)
  131 + {
  132 + unsigned char * u ;
  133 + unsigned char * v ;
  134 +
  135 + cudaError_t cudaStatus;
  136 +
  137 + cudaStatus = cudaMalloc((void**)&u, src_width * src_height * sizeof(unsigned char));
  138 + cudaStatus = cudaMalloc((void**)&v, src_width * src_height * sizeof(unsigned char));
  139 +
  140 + dim3 block(32, 16, 1);
  141 + dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1);
  142 + dim3 grid1((uWidth + (block.x - 1)) / block.x, (uHeight + (block.y - 1)) / block.y, 1);
  143 + dim3 grid2((vWidth + (block.x - 1)) / block.x, (vHeight + (block.y - 1)) / block.y, 1);
  144 +
  145 + kernel_rgb2yuv << < grid, block >> >(d_srcRGB, Y, u, v, src_width, src_height, yPitch);
  146 +
  147 + cudaStatus = cudaGetLastError();
  148 + if (cudaStatus != cudaSuccess) {
  149 + fprintf(stderr, "kernel_rgb2yuv launch failed: %s\n", cudaGetErrorString(cudaStatus));
  150 + goto Error;
  151 + }
  152 +
  153 + cudaStatus = cudaDeviceSynchronize();
  154 + if (cudaStatus != cudaSuccess) {
  155 + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_rgb2yuv!\n", cudaStatus);
  156 + goto Error;
  157 + }
  158 +
  159 + kernel_resize_UV << < grid1, block >> >(u, U, src_width, src_height, uWidth, uHeight, uPitch);
  160 +
  161 + cudaStatus = cudaGetLastError();
  162 + if (cudaStatus != cudaSuccess) {
  163 + fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus));
  164 + goto Error;
  165 + }
  166 +
  167 + cudaStatus = cudaDeviceSynchronize();
  168 + if (cudaStatus != cudaSuccess) {
  169 + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus);
  170 + goto Error;
  171 + }
  172 +
  173 + kernel_resize_UV << < grid2, block >> >(v, V, src_width, src_height, vWidth, vHeight, vPitch);
  174 +
  175 + cudaStatus = cudaGetLastError();
  176 + if (cudaStatus != cudaSuccess) {
  177 + fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus));
  178 + goto Error;
  179 + }
  180 +
  181 + cudaStatus = cudaDeviceSynchronize();
  182 + if (cudaStatus != cudaSuccess) {
  183 + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus);
  184 + goto Error;
  185 + }
  186 +
  187 +Error :
  188 + cudaFree(u);
  189 + cudaFree(v);
  190 +
  191 + return cudaStatus;
  192 + }
  193 +
  194 +
  195 +
  196 + cudaError_t RGB2YUV(unsigned char* d_srcRGB, int src_width, int src_height,
  197 + unsigned char* Y, size_t yPitch, int yWidth, int yHeight,
  198 + unsigned char* U, size_t uPitch, int uWidth, int uHeight,
  199 + unsigned char* V, size_t vPitch, int vWidth, int vHeight)
  200 + {
  201 + unsigned char * u;
  202 + unsigned char * v;
  203 +
  204 + cudaError_t cudaStatus;
  205 +
  206 + cudaStatus = cudaMalloc((void**)&u, src_width * src_height * sizeof(unsigned char));
  207 + cudaStatus = cudaMalloc((void**)&v, src_width * src_height * sizeof(unsigned char));
  208 +
  209 + dim3 block(32, 16, 1);
  210 + dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1);
  211 + dim3 grid1((uWidth + (block.x - 1)) / block.x, (uHeight + (block.y - 1)) / block.y, 1);
  212 + dim3 grid2((vWidth + (block.x - 1)) / block.x, (vHeight + (block.y - 1)) / block.y, 1);
  213 +
  214 + kernel_rgb2yuv << < grid, block >> >(d_srcRGB, Y, u, v, src_width, src_height, yPitch);
  215 +
  216 + cudaStatus = cudaGetLastError();
  217 + if (cudaStatus != cudaSuccess) {
  218 + fprintf(stderr, "kernel_rgb2yuv launch failed: %s\n", cudaGetErrorString(cudaStatus));
  219 + goto Error;
  220 + }
  221 +
  222 + cudaStatus = cudaDeviceSynchronize();
  223 + if (cudaStatus != cudaSuccess) {
  224 + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_rgb2yuv!\n", cudaStatus);
  225 + goto Error;
  226 + }
  227 +
  228 + kernel_resize_UV << < grid1, block >> >(u, U, src_width, src_height, uWidth, uHeight, uPitch);
  229 +
  230 + cudaStatus = cudaGetLastError();
  231 + if (cudaStatus != cudaSuccess) {
  232 + fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus));
  233 + goto Error;
  234 + }
  235 +
  236 + cudaStatus = cudaDeviceSynchronize();
  237 + if (cudaStatus != cudaSuccess) {
  238 + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus);
  239 + goto Error;
  240 + }
  241 +
  242 + kernel_resize_UV << < grid2, block >> >(v, V, src_width, src_height, vWidth, vHeight, vPitch);
  243 +
  244 + cudaStatus = cudaGetLastError();
  245 + if (cudaStatus != cudaSuccess) {
  246 + fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus));
  247 + goto Error;
  248 + }
  249 +
  250 + cudaStatus = cudaDeviceSynchronize();
  251 + if (cudaStatus != cudaSuccess) {
  252 + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus);
  253 + goto Error;
  254 + }
  255 +
  256 + Error:
  257 + cudaFree(u);
  258 + cudaFree(v);
  259 +
  260 + return cudaStatus;
  261 + }
  262 +}
  263 +
src/ResizeImage.cu 0 → 100644
  1 +#include "cuda_kernels.h"
  2 +
  3 +typedef unsigned char uchar;
  4 +typedef unsigned int uint32;
  5 +typedef int int32;
  6 +
  7 +namespace cuda_common
  8 +{
  9 + __global__ void kernel_bilinear(float *src_img, float *dst_img,
  10 + int src_width, int src_height, int dst_width, int dst_height)
  11 + {
  12 + const int x = blockIdx.x * blockDim.x + threadIdx.x;
  13 + const int y = blockIdx.y * blockDim.y + threadIdx.y;
  14 +
  15 + if (x < dst_width && y < dst_height)
  16 + {
  17 + float fx = (x + 0.5)*src_width / (float)dst_width - 0.5;
  18 + float fy = (y + 0.5)*src_height / (float)dst_height - 0.5;
  19 + int ax = floor(fx);
  20 + int ay = floor(fy);
  21 + if (ax < 0)
  22 + {
  23 + ax = 0;
  24 + }
  25 + else if (ax > src_width - 2)
  26 + {
  27 + ax = src_width - 2;
  28 + }
  29 +
  30 + if (ay < 0){
  31 + ay = 0;
  32 + }
  33 + else if (ay > src_height - 2)
  34 + {
  35 + ay = src_height - 2;
  36 + }
  37 +
  38 + int A = ax + ay*src_width;
  39 + int B = ax + ay*src_width + 1;
  40 + int C = ax + ay*src_width + src_width;
  41 + int D = ax + ay*src_width + src_width + 1;
  42 +
  43 + float w1, w2, w3, w4;
  44 + w1 = fx - ax;
  45 + w2 = 1 - w1;
  46 + w3 = fy - ay;
  47 + w4 = 1 - w3;
  48 +
  49 + float blue = src_img[A] * w2*w4 + src_img[B] * w1*w4 + src_img[C] * w2*w3 + src_img[D] * w1*w3;
  50 +
  51 + float green = src_img[src_width * src_height + A] * w2*w4 + src_img[src_width * src_height + B] * w1*w4
  52 + + src_img[src_width * src_height + C] * w2*w3 + src_img[src_width * src_height + D] * w1*w3;
  53 +
  54 + float red = src_img[src_width * src_height * 2 + A] * w2*w4 + src_img[src_width * src_height * 2 + B] * w1*w4
  55 + + src_img[src_width * src_height * 2 + C] * w2*w3 + src_img[src_width * src_height * 2 + D] * w1*w3;
  56 +
  57 + dst_img[y * dst_width + x] = blue;
  58 + dst_img[dst_width * dst_height + y * dst_width + x] = green;
  59 + dst_img[dst_width * dst_height * 2 + y * dst_width + x] = red;
  60 + }
  61 + }
  62 +
  63 + cudaError_t ResizeImage(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height)
  64 + {
  65 + dim3 block(32, 16, 1);
  66 + dim3 grid((dst_width + (block.x - 1)) / block.x, (dst_height + (block.y - 1)) / block.y, 1);
  67 +
  68 + kernel_bilinear << < grid, block >> >(d_srcRGB, d_dstRGB, src_width, src_height, dst_width, dst_height);
  69 +
  70 + cudaError_t cudaStatus = cudaGetLastError();
  71 + if (cudaStatus != cudaSuccess) {
  72 + fprintf(stderr, "kernel_bilinear launch failed: %s\n", cudaGetErrorString(cudaStatus));
  73 + return cudaStatus;
  74 + }
  75 +
  76 + cudaStatus = cudaDeviceSynchronize();
  77 + if (cudaStatus != cudaSuccess) {
  78 + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_bilinear!\n", cudaStatus);
  79 + return cudaStatus;
  80 + }
  81 +
  82 + return cudaStatus;
  83 + }
  84 +}
0 \ No newline at end of file 85 \ No newline at end of file
src/common/inc/helper_cuda_drvapi.h
@@ -218,8 +218,7 @@ inline int gpuGetMaxGflopsDeviceIdDRV() @@ -218,8 +218,7 @@ inline int gpuGetMaxGflopsDeviceIdDRV()
218 // Find the best major SM Architecture GPU device 218 // Find the best major SM Architecture GPU device
219 while (current_device < device_count) 219 while (current_device < device_count)
220 { 220 {
221 - checkCudaErrors(cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, current_device));  
222 - checkCudaErrors(cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, current_device)); 221 + checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device));
223 222
224 if (major > 0 && major < 9999) 223 if (major > 0 && major < 9999)
225 { 224 {
@@ -240,9 +239,7 @@ inline int gpuGetMaxGflopsDeviceIdDRV() @@ -240,9 +239,7 @@ inline int gpuGetMaxGflopsDeviceIdDRV()
240 checkCudaErrors(cuDeviceGetAttribute(&clockRate, 239 checkCudaErrors(cuDeviceGetAttribute(&clockRate,
241 CU_DEVICE_ATTRIBUTE_CLOCK_RATE, 240 CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
242 current_device)); 241 current_device));
243 -  
244 - checkCudaErrors(cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, current_device));  
245 - checkCudaErrors(cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, current_device)); 242 + checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device));
246 243
247 int computeMode; 244 int computeMode;
248 getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, current_device); 245 getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, current_device);
@@ -320,9 +317,7 @@ inline int gpuGetMaxGflopsGLDeviceIdDRV() @@ -320,9 +317,7 @@ inline int gpuGetMaxGflopsGLDeviceIdDRV()
320 while (current_device < device_count) 317 while (current_device < device_count)
321 { 318 {
322 checkCudaErrors(cuDeviceGetName(deviceName, 256, current_device)); 319 checkCudaErrors(cuDeviceGetName(deviceName, 256, current_device));
323 -  
324 - checkCudaErrors(cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, current_device));  
325 - checkCudaErrors(cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, current_device)); 320 + checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device));
326 321
327 #if CUDA_VERSION >= 3020 322 #if CUDA_VERSION >= 3020
328 checkCudaErrors(cuDeviceGetAttribute(&bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device)); 323 checkCudaErrors(cuDeviceGetAttribute(&bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device));
@@ -374,9 +369,7 @@ inline int gpuGetMaxGflopsGLDeviceIdDRV() @@ -374,9 +369,7 @@ inline int gpuGetMaxGflopsGLDeviceIdDRV()
374 checkCudaErrors(cuDeviceGetAttribute(&clockRate, 369 checkCudaErrors(cuDeviceGetAttribute(&clockRate,
375 CU_DEVICE_ATTRIBUTE_CLOCK_RATE, 370 CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
376 current_device)); 371 current_device));
377 -  
378 - checkCudaErrors(cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, current_device));  
379 - checkCudaErrors(cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, current_device)); 372 + checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device));
380 373
381 #if CUDA_VERSION >= 3020 374 #if CUDA_VERSION >= 3020
382 checkCudaErrors(cuDeviceGetAttribute(&bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device)); 375 checkCudaErrors(cuDeviceGetAttribute(&bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device));
@@ -507,9 +500,7 @@ inline bool checkCudaCapabilitiesDRV(int major_version, int minor_version, int d @@ -507,9 +500,7 @@ inline bool checkCudaCapabilitiesDRV(int major_version, int minor_version, int d
507 500
508 checkCudaErrors(cuDeviceGet(&cuDevice, devID)); 501 checkCudaErrors(cuDeviceGet(&cuDevice, devID));
509 checkCudaErrors(cuDeviceGetName(name, 100, cuDevice)); 502 checkCudaErrors(cuDeviceGetName(name, 100, cuDevice));
510 -  
511 - checkCudaErrors(cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, devID));  
512 - checkCudaErrors(cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, devID)); 503 + checkCudaErrors(cuDeviceComputeCapability(&major, &minor, devID));
513 504
514 if ((major > major_version) || 505 if ((major > major_version) ||
515 (major == major_version && minor >= minor_version)) 506 (major == major_version && minor >= minor_version))
src/cuda_kernels.h
@@ -12,15 +12,52 @@ @@ -12,15 +12,52 @@
12 12
13 typedef enum 13 typedef enum
14 { 14 {
15 - ITU601 = 1,  
16 - ITU709 = 2  
17 -} e_ColorSpace; 15 + ITU_601 = 1,
  16 + ITU_709 = 2
  17 +} FF_ColorSpace;
18 18
19 namespace cuda_common 19 namespace cuda_common
20 { 20 {
21 - cudaError_t setColorSpace(e_ColorSpace CSC, float hue); 21 + cudaError_t setColorSpace(FF_ColorSpace CSC, float hue);
22 22
23 cudaError_t NV12ToRGBnot(CUdeviceptr d_srcNV12, size_t nSourcePitch, unsigned char* d_dstRGB, int width, int height); 23 cudaError_t NV12ToRGBnot(CUdeviceptr d_srcNV12, size_t nSourcePitch, unsigned char* d_dstRGB, int width, int height);
24 cudaError_t CUDAToBGR(CUdeviceptr dataY, CUdeviceptr dataUV, size_t pitchY, size_t pitchUV, unsigned char* d_dstRGB, int width, int height); 24 cudaError_t CUDAToBGR(CUdeviceptr dataY, CUdeviceptr dataUV, size_t pitchY, size_t pitchUV, unsigned char* d_dstRGB, int width, int height);
  25 +
  26 +
  27 + cudaError_t ResizeImage(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height);
  28 +
  29 + cudaError_t RGB2YUV(float* d_srcRGB, int src_width, int src_height,
  30 + unsigned char* Y, size_t yPitch, int yWidth, int yHeight,
  31 + unsigned char* U, size_t uPitch, int uWidth, int uHeight,
  32 + unsigned char* V, size_t vPitch, int vWidth, int vHeight);
  33 +
  34 + cudaError_t RGB2YUV(unsigned char* d_srcRGB, int src_width, int src_height,
  35 + unsigned char* Y, size_t yPitch, int yWidth, int yHeight,
  36 + unsigned char* U, size_t uPitch, int uWidth, int uHeight,
  37 + unsigned char* V, size_t vPitch, int vWidth, int vHeight);
  38 +
  39 + cudaError_t PartMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom);
  40 + // cudaError_t PartMemResize(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int left, int top, int right, int bottom);
  41 +
  42 + cudaError_t PartMemResizeBatch(unsigned char* d_srcRGB, int srcimg_width, int srcimg_height, unsigned char** d_dstRGB, int count,
  43 + int* left, int* top, int* right, int* bottom, int *dst_w, int *dst_h,
  44 + float submeanb, float submeang, float submeanr,
  45 + float varianceb, float varianceg, float variancer);
  46 +
  47 + cudaError_t DrawImage(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom);
  48 + cudaError_t DrawImage(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom);
  49 +
  50 + cudaError_t DrawLine(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y);
25 } 51 }
26 52
  53 +
  54 +int jpegNPP(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height);
  55 +int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height);
  56 +
  57 +int jpegNPP(const char *szOutputFile, float* d_srcRGB);
  58 +int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB);
  59 +
  60 +int initTable();
  61 +int initTable(int flag, int width, int height);
  62 +int releaseJpegNPP();
  63 +
src/define.hpp
@@ -5,3 +5,9 @@ @@ -5,3 +5,9 @@
5 #define __FILENAME__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) 5 #define __FILENAME__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
6 6
7 7
  8 +#define CHECK_CUDA(call) \
  9 +{\
  10 + const cudaError_t error_code = call;\
  11 + if (cudaSuccess != error_code)\
  12 + LOG_ERROR("CUDA error, code: {} reason: {}", error_code, cudaGetErrorString(error_code));\
  13 +}
8 \ No newline at end of file 14 \ No newline at end of file
src/gb28181/FFGB28181Decoder.cpp
@@ -10,11 +10,13 @@ extern &quot;C&quot; { @@ -10,11 +10,13 @@ extern &quot;C&quot; {
10 #include "libswscale/swscale.h" 10 #include "libswscale/swscale.h"
11 } 11 }
12 12
13 -#include "../logger.hpp"  
14 -  
15 #include"RTPTcpReceiver.h" 13 #include"RTPTcpReceiver.h"
16 #include"RTPUdpReceiver.h" 14 #include"RTPUdpReceiver.h"
17 15
  16 +#include <cuda_runtime.h>
  17 +
  18 +#include "common_header.h"
  19 +
18 #define ECLOSED 0 20 #define ECLOSED 0
19 #define ECLOSING 1 21 #define ECLOSING 1
20 #define ERUNNING 2 22 #define ERUNNING 2
@@ -36,6 +38,7 @@ FFGB28181Decoder::FFGB28181Decoder() { @@ -36,6 +38,7 @@ FFGB28181Decoder::FFGB28181Decoder() {
36 m_frameSkip = 1; 38 m_frameSkip = 1;
37 m_port = -1; 39 m_port = -1;
38 m_dec_keyframe = false; 40 m_dec_keyframe = false;
  41 + m_post_decode_thread = 0;
39 } 42 }
40 43
41 FFGB28181Decoder::~FFGB28181Decoder() 44 FFGB28181Decoder::~FFGB28181Decoder()
@@ -47,11 +50,6 @@ FFGB28181Decoder::~FFGB28181Decoder() @@ -47,11 +50,6 @@ FFGB28181Decoder::~FFGB28181Decoder()
47 avcodec_free_context(&m_pAVCodecCtx); 50 avcodec_free_context(&m_pAVCodecCtx);
48 } 51 }
49 52
50 - if (m_pAVFrame) {  
51 - av_frame_free(&m_pAVFrame);  
52 - m_pAVFrame = NULL;  
53 - }  
54 -  
55 m_dec_keyframe = false; 53 m_dec_keyframe = false;
56 54
57 LOG_INFO("destroy OK--{}", m_dec_name); 55 LOG_INFO("destroy OK--{}", m_dec_name);
@@ -74,9 +72,22 @@ void FFGB28181Decoder::close(){ @@ -74,9 +72,22 @@ void FFGB28181Decoder::close(){
74 m_rtpPtr = nullptr; 72 m_rtpPtr = nullptr;
75 } 73 }
76 74
77 - LOG_INFO("解码器关闭成功 --{}", m_dec_name); 75 + if (gpu_options) av_dict_free(&gpu_options);
  76 +
  77 + if (m_post_decode_thread != 0)
  78 + {
  79 + pthread_join(m_post_decode_thread,0);
  80 + }
  81 +
  82 + while(mFrameQueue.size() > 0){
  83 + AVFrame * gpuFrame = mFrameQueue.front();
  84 + av_frame_free(&gpuFrame);
  85 + mFrameQueue.pop();
  86 + }
78 87
79 m_status = ECLOSED; 88 m_status = ECLOSED;
  89 +
  90 + LOG_INFO("解码器关闭成功 --{}", m_dec_name);
80 } 91 }
81 92
82 bool FFGB28181Decoder::init(FFDecConfig& cfg){ 93 bool FFGB28181Decoder::init(FFDecConfig& cfg){
@@ -124,7 +135,18 @@ bool FFGB28181Decoder::start() { @@ -124,7 +135,18 @@ bool FFGB28181Decoder::start() {
124 135
125 LOG_INFO("start - {} {}: ", m_dec_name, m_port); 136 LOG_INFO("start - {} {}: ", m_dec_name, m_port);
126 137
127 - return m_rtpPtr->Open((uint16_t)m_port); 138 + bool bRet = m_rtpPtr->Open((uint16_t)m_port);
  139 + if(bRet){
  140 + pthread_create(&m_post_decode_thread,0,
  141 + [](void* arg)
  142 + {
  143 + FFGB28181Decoder* a=(FFGB28181Decoder*)arg;
  144 + a->post_decode_thread();
  145 + return (void*)0;
  146 + }
  147 + ,this);
  148 + }
  149 + return bRet;
128 } 150 }
129 151
130 void FFGB28181Decoder::setDecKeyframe(bool bKeyframe){ 152 void FFGB28181Decoder::setDecKeyframe(bool bKeyframe){
@@ -151,15 +173,12 @@ void FFGB28181Decoder::stream_callback(int videoType, char* data, int len, int i @@ -151,15 +173,12 @@ void FFGB28181Decoder::stream_callback(int videoType, char* data, int len, int i
151 return; 173 return;
152 } 174 }
153 175
154 - AVPacket framePacket = {}, mp4Packet = {}; 176 + AVPacket framePacket = {};
155 av_init_packet(&framePacket); 177 av_init_packet(&framePacket);
156 - av_init_packet(&mp4Packet);  
157 178
158 framePacket.size = len; 179 framePacket.size = len;
159 framePacket.data = (uint8_t*)data; 180 framePacket.data = (uint8_t*)data;
160 181
161 - AVDictionary *gpu_options = nullptr;  
162 -  
163 if (m_pAVCodecCtx == nullptr) { 182 if (m_pAVCodecCtx == nullptr) {
164 LOG_INFO("frame data is zero --{}", m_dec_name); 183 LOG_INFO("frame data is zero --{}", m_dec_name);
165 if (VIDEO_TYPE_H264 == videoType) { 184 if (VIDEO_TYPE_H264 == videoType) {
@@ -192,7 +211,6 @@ void FFGB28181Decoder::stream_callback(int videoType, char* data, int len, int i @@ -192,7 +211,6 @@ void FFGB28181Decoder::stream_callback(int videoType, char* data, int len, int i
192 } 211 }
193 212
194 m_pAVCodecCtx = avcodec_alloc_context3(m_pAVCodec); 213 m_pAVCodecCtx = avcodec_alloc_context3(m_pAVCodec);
195 -  
196 214
197 if (m_gpuid >= 0) { 215 if (m_gpuid >= 0) {
198 char gpui[8] = { 0 }; 216 char gpui[8] = { 0 };
@@ -211,8 +229,6 @@ void FFGB28181Decoder::stream_callback(int videoType, char* data, int len, int i @@ -211,8 +229,6 @@ void FFGB28181Decoder::stream_callback(int videoType, char* data, int len, int i
211 229
212 if (avcodec_open2(m_pAVCodecCtx, m_pAVCodec, &gpu_options) < 0) 230 if (avcodec_open2(m_pAVCodecCtx, m_pAVCodec, &gpu_options) < 0)
213 return; 231 return;
214 -  
215 - m_pAVFrame = av_frame_alloc();  
216 } 232 }
217 233
218 //开始解码 234 //开始解码
@@ -220,6 +236,7 @@ void FFGB28181Decoder::stream_callback(int videoType, char* data, int len, int i @@ -220,6 +236,7 @@ void FFGB28181Decoder::stream_callback(int videoType, char* data, int len, int i
220 if (ret < 0) { 236 if (ret < 0) {
221 //send_exception(RunMessageType::E2002, e_msg); 237 //send_exception(RunMessageType::E2002, e_msg);
222 LOG_ERROR("Real stream视频解码失败,请检查视频设备{}: avcodec_send_packet failed. ret={}", m_dec_name, ret); 238 LOG_ERROR("Real stream视频解码失败,请检查视频设备{}: avcodec_send_packet failed. ret={}", m_dec_name, ret);
  239 + av_packet_unref(&framePacket);
223 return; 240 return;
224 } 241 }
225 242
@@ -228,61 +245,67 @@ void FFGB28181Decoder::stream_callback(int videoType, char* data, int len, int i @@ -228,61 +245,67 @@ void FFGB28181Decoder::stream_callback(int videoType, char* data, int len, int i
228 frameH = m_pAVCodecCtx->height; 245 frameH = m_pAVCodecCtx->height;
229 if (frameW <= 0 || frameH <= 0) { 246 if (frameW <= 0 || frameH <= 0) {
230 LOG_ERROR("[{}] frame W or H is error! ({},{})", m_dec_name, frameW, frameH); 247 LOG_ERROR("[{}] frame W or H is error! ({},{})", m_dec_name, frameW, frameH);
  248 + av_packet_unref(&framePacket);
231 return; 249 return;
232 } 250 }
233 } 251 }
234 // m_fps = m_pAVCodecCtx->pkt_timebase.den == 0 ? 25.0 : av_q2d(m_pAVCodecCtx->pkt_timebase); 252 // m_fps = m_pAVCodecCtx->pkt_timebase.den == 0 ? 25.0 : av_q2d(m_pAVCodecCtx->pkt_timebase);
235 m_fps = av_q2d(m_pAVCodecCtx->framerate); 253 m_fps = av_q2d(m_pAVCodecCtx->framerate);
236 - LOG_DEBUG("frameW {}--frameH {}", frameW, frameH);  
237 - while (ret >= 0) {  
238 - ret = avcodec_receive_frame(m_pAVCodecCtx, m_pAVFrame);  
239 - if (ret == AVERROR_EOF || ret == AVERROR(EAGAIN))  
240 - return;  
241 - else if (ret < 0) {  
242 - if (m_frameCount % 10 == 0){  
243 - //send_exception(RunMessageType::E2002, e_msg);  
244 - LOG_ERROR("Real stream视频解码失败,请检查视频设备{}: avcodec_receive_frame failed. ret={}", m_dec_name, ret);  
245 - }  
246 - continue;  
247 - } 254 + // LOG_DEBUG("frameW {}--frameH {}", frameW, frameH);
  255 +
  256 + AVFrame* gpuFrame = av_frame_alloc();
  257 + ret = avcodec_receive_frame(m_pAVCodecCtx, gpuFrame);
  258 + if ((ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) || ret < 0){
  259 + LOG_ERROR("{} - Failed to receive frame: {}", m_dec_name, ret);
  260 + av_packet_unref(&framePacket);
  261 + av_frame_free(&gpuFrame);
  262 + return;
  263 + }
248 264
249 - if (++m_frameCount % m_frameSkip != 0) continue;  
250 -  
251 - if (m_pAVFrame->width != frameW || m_pAVFrame->height != frameH){  
252 - LOG_INFO("AVFrame is inconsistent: width is {}, height is {}; original frameW is {}, frameH is {}--{}", m_pAVFrame->width, m_pAVFrame->height, frameW, frameH , m_dec_name);  
253 - continue;  
254 - }  
255 -  
256 - LOG_DEBUG("curpos is: {}", m_frameCount);  
257 -  
258 - post_decoded_cbk(m_postDecArg, m_pAVFrame);  
259 -  
260 - //LOG_count++;  
261 - //if (LOG_count > 100000) {  
262 - // LOG_INFO("Real frame send_shm_videoframe pts={}-{}", localPts, m_dec_name);  
263 - // //LOG_count = 0;  
264 - //}  
265 - //}  
266 - //catch (GeneralException2& e)  
267 - //{  
268 - // LOG_ERROR("send_shm_videoframe failed! {}--{}--{}", e.err_code(), e.err_msg(), m_dec_name);  
269 - // if (e.err_code() == -666) {  
270 - // this->close();  
271 - // }  
272 - //  
273 - // if (e.err_code() == ERROR_MEMORY) {  
274 - // if (m_frameCount % 10 == 0) {  
275 - // string e_msg;  
276 - // format_string(e_msg, "服务器资源内存分配失败, 在vas模块%s文件%d行出现无法获取内存的情况!", __FILE__, __LINE__);  
277 - // send_exception(RunMessageType::F4001, e_msg);  
278 - // LOG_ERROR("{}", e_msg);  
279 - // }  
280 - // }  
281 - // return;  
282 - //} 265 + av_packet_unref(&framePacket);
  266 +
  267 + if (gpuFrame->width != frameW || gpuFrame->height != frameH){
  268 + LOG_INFO("AVFrame is inconsistent: width is {}, height is {}; original frameW is {}, frameH is {}--{}", gpuFrame->width, gpuFrame->height, frameW, frameH , m_dec_name);
  269 + av_frame_free(&gpuFrame);
  270 + return;
  271 + }
  272 +
  273 + m_queue_mutex.lock();
  274 + if(mFrameQueue.size() <= 10){
  275 + mFrameQueue.push(gpuFrame);
  276 + }else{
  277 + av_frame_free(&gpuFrame);
283 } 278 }
  279 + m_queue_mutex.unlock();
  280 +}
  281 +
  282 +void FFGB28181Decoder::post_decode_thread(){
  283 +
  284 + int index = 0;
  285 + while (isRunning())
  286 + {
  287 + if(mFrameQueue.size() > 0){
  288 + std::lock_guard<std::mutex> l(m_snapshot_mutex);
  289 + // 取队头数据
  290 + m_queue_mutex.lock();
  291 + AVFrame * gpuFrame = mFrameQueue.front();
  292 + mFrameQueue.pop();
  293 + m_queue_mutex.unlock();
  294 + // 跳帧
  295 + if (m_frameSkip == 1 || index % m_frameSkip == 0){
  296 + post_decoded_cbk(m_postDecArg, gpuFrame);
  297 + }
  298 +
  299 + av_frame_free(&gpuFrame);
  300 +
  301 + index++;
  302 + if(index >= 100000){
  303 + index = 0;
  304 + }
  305 + }
  306 + }
284 307
285 - if (gpu_options) av_dict_free(&gpu_options); 308 + LOG_INFO("post decode thread exited.");
286 } 309 }
287 310
288 void FFGB28181Decoder::stream_end_callback() 311 void FFGB28181Decoder::stream_end_callback()
src/gb28181/FFGB28181Decoder.h
@@ -6,6 +6,7 @@ @@ -6,6 +6,7 @@
6 #include "../AbstractDecoder.h" 6 #include "../AbstractDecoder.h"
7 7
8 #include <atomic> 8 #include <atomic>
  9 +#include <mutex>
9 10
10 struct AVFormatContext; 11 struct AVFormatContext;
11 struct AVCodecContext; 12 struct AVCodecContext;
@@ -14,6 +15,7 @@ struct AVFrame; @@ -14,6 +15,7 @@ struct AVFrame;
14 struct AVPacket; 15 struct AVPacket;
15 struct SwsContext; 16 struct SwsContext;
16 17
  18 +using namespace std;
17 19
18 class FFGB28181Decoder: public AbstractDecoder 20 class FFGB28181Decoder: public AbstractDecoder
19 { 21 {
@@ -45,19 +47,16 @@ public: @@ -45,19 +47,16 @@ public:
45 public: 47 public:
46 void stream_callback(int videoType, char* data, int len, int isKey, uint64_t pts, uint64_t localPts); 48 void stream_callback(int videoType, char* data, int len, int isKey, uint64_t pts, uint64_t localPts);
47 void stream_end_callback(); 49 void stream_end_callback();
  50 + void post_decode_thread();
48 51
49 private: 52 private:
50 AVCodecContext* m_pAVCodecCtx {}; 53 AVCodecContext* m_pAVCodecCtx {};
51 const AVCodec* m_pAVCodec {}; 54 const AVCodec* m_pAVCodec {};
52 - AVFrame* m_pAVFrame {};  
53 55
54 int m_gpuid {-1}; 56 int m_gpuid {-1};
55 57
56 RTPReceiver* m_rtpPtr; 58 RTPReceiver* m_rtpPtr;
57 int m_port; 59 int m_port;
58 - uint64_t m_frameCount {};  
59 -  
60 - AVFrame* pFrameRGB {};  
61 60
62 uint64_t m_startPts {}; 61 uint64_t m_startPts {};
63 uint64_t m_lastPts {}; //上一次pts的值 62 uint64_t m_lastPts {}; //上一次pts的值
@@ -71,6 +70,10 @@ private: @@ -71,6 +70,10 @@ private:
71 int log_count {}; 70 int log_count {};
72 71
73 std::atomic_int m_status {}; 72 std::atomic_int m_status {};
  73 +
  74 + AVDictionary *gpu_options = nullptr;
  75 +
  76 + pthread_t m_post_decode_thread;
74 }; 77 };
75 78
76 #endif // _GB28181_DECODER_H_ 79 #endif // _GB28181_DECODER_H_
src/gb28181/RTPReceiver.cpp
1 -#include "RTPReceiver.h" 1 +#include "RTPReceiver.h"
2 #include "rtppacket.h" 2 #include "rtppacket.h"
3 -#include "../logger.hpp"  
4 #include <thread> 3 #include <thread>
5 4
  5 +#include "common_header.h"
  6 +
6 #define BUFFERSIZE_1024 1024 7 #define BUFFERSIZE_1024 1024
7 const int kVideoFrameSize = BUFFERSIZE_1024*BUFFERSIZE_1024*5*2; 8 const int kVideoFrameSize = BUFFERSIZE_1024*BUFFERSIZE_1024*5*2;
8 9
@@ -174,7 +175,7 @@ int RTPReceiver::OnPsProcess() @@ -174,7 +175,7 @@ int RTPReceiver::OnPsProcess()
174 LOG_INFO("[{}] started.", m_deviceID); 175 LOG_INFO("[{}] started.", m_deviceID);
175 while (!m_bPsExit) { 176 while (!m_bPsExit) {
176 m_psFrameMutex.lock(); 177 m_psFrameMutex.lock();
177 - LOG_DEBUG("[{}] PS frame size : {}", m_deviceID, m_psVideoFrames.size()); 178 + // LOG_DEBUG("[{}] PS frame size : {}", m_deviceID, m_psVideoFrames.size());
178 if (m_psVideoFrames.size() <= 0){ 179 if (m_psVideoFrames.size() <= 0){
179 m_psFrameMutex.unlock(); 180 m_psFrameMutex.unlock();
180 std::this_thread::sleep_for(std::chrono::milliseconds(10)); 181 std::this_thread::sleep_for(std::chrono::milliseconds(10));
@@ -257,7 +258,7 @@ int RTPReceiver::ParsePacket(RTPPacket* packet){ @@ -257,7 +258,7 @@ int RTPReceiver::ParsePacket(RTPPacket* packet){
257 break; 258 break;
258 } 259 }
259 260
260 - LOG_DEBUG("[{}] ParsePacket GetPayloadLength", m_deviceID); 261 + // LOG_DEBUG("[{}] ParsePacket GetPayloadLength", m_deviceID);
261 262
262 if (mark) 263 if (mark)
263 { 264 {
@@ -271,7 +272,7 @@ int RTPReceiver::ParsePacket(RTPPacket* packet){ @@ -271,7 +272,7 @@ int RTPReceiver::ParsePacket(RTPPacket* packet){
271 std::lock_guard<std::mutex> l(m_psFrameMutex); 272 std::lock_guard<std::mutex> l(m_psFrameMutex);
272 if (m_psVideoFrames.size() < 100) 273 if (m_psVideoFrames.size() < 100)
273 { 274 {
274 - LOG_DEBUG("[{}]ParsePacket push", m_deviceID); 275 + // LOG_DEBUG("[{}]ParsePacket push", m_deviceID);
275 m_psVideoFrames.push(new Frame(frameBuf, offset, false)); 276 m_psVideoFrames.push(new Frame(frameBuf, offset, false));
276 } 277 }
277 else { 278 else {
src/gb28181/RTPReceiver.h
@@ -32,7 +32,7 @@ typedef void(*CallBack_VodFileEnd)(void* userdata); @@ -32,7 +32,7 @@ typedef void(*CallBack_VodFileEnd)(void* userdata);
32 /** 32 /**
33 * 请求流 33 * 请求流
34 */ 34 */
35 -typedef bool(*CallBack_Request_Stream)(); 35 +typedef bool(*CallBack_Request_Stream)(const char* deviceId);
36 36
37 // 标识帧, 注意buffer需要自己开辟和释放 37 // 标识帧, 注意buffer需要自己开辟和释放
38 struct Frame { 38 struct Frame {
@@ -85,7 +85,7 @@ class RTPReceiver{ @@ -85,7 +85,7 @@ class RTPReceiver{
85 85
86 public: 86 public:
87 RTPReceiver(); 87 RTPReceiver();
88 - ~RTPReceiver(); 88 + virtual ~RTPReceiver();
89 89
90 virtual bool Open(uint16_t localPort) = 0; 90 virtual bool Open(uint16_t localPort) = 0;
91 virtual bool IsOpened() = 0; 91 virtual bool IsOpened() = 0;
src/gb28181/RTPTcpReceiver.cpp
1 #include"RTPTcpReceiver.h" 1 #include"RTPTcpReceiver.h"
2 -#include "../logger.hpp"  
3 2
  3 +#include "common_header.h"
4 4
5 -static long long get_cur_time() {  
6 -  
7 - chrono::time_point<chrono::system_clock, chrono::milliseconds> tpMicro  
8 - = chrono::time_point_cast<chrono::milliseconds>(chrono::system_clock::now());  
9 -  
10 - return tpMicro.time_since_epoch().count();  
11 -}  
12 5
13 // class TcpRTPSession : public RTPSession 6 // class TcpRTPSession : public RTPSession
14 // { 7 // {
@@ -65,7 +58,7 @@ public: @@ -65,7 +58,7 @@ public:
65 LOG_ERROR("Error sending over socket {}, removing destination", sock); 58 LOG_ERROR("Error sending over socket {}, removing destination", sock);
66 DeleteDestination(RTPTCPAddress(sock)); 59 DeleteDestination(RTPTCPAddress(sock));
67 if(nullptr != tcpReceiver && !tcpReceiver->isClosing()){ 60 if(nullptr != tcpReceiver && !tcpReceiver->isClosing()){
68 - tcpReceiver->RequestStream(); 61 + tcpReceiver->ReConnect();
69 } 62 }
70 } 63 }
71 64
@@ -90,6 +83,16 @@ static int rtp_revc_thread_(void* param) @@ -90,6 +83,16 @@ static int rtp_revc_thread_(void* param)
90 return self->OnRtpRecv(); 83 return self->OnRtpRecv();
91 } 84 }
92 85
  86 +static int listen_finish_thread_(void* param)
  87 +{
  88 + if (!param)
  89 + {
  90 + return -1;
  91 + }
  92 +
  93 + RTPTcpReceiver* self = (RTPTcpReceiver*)param;
  94 + return self->ListenFinish();
  95 +}
93 96
94 RTPTcpReceiver::RTPTcpReceiver() 97 RTPTcpReceiver::RTPTcpReceiver()
95 : m_bRtpExit(false) 98 : m_bRtpExit(false)
@@ -143,11 +146,19 @@ bool RTPTcpReceiver::IsOpened(){ @@ -143,11 +146,19 @@ bool RTPTcpReceiver::IsOpened(){
143 } 146 }
144 147
145 void RTPTcpReceiver::Close(){ 148 void RTPTcpReceiver::Close(){
  149 + m_bRtpExit = true;
  150 +
  151 + if(m_listenFinishThread.joinable()){
  152 + m_listenFinishThread.join();
  153 + }
  154 +}
  155 +
  156 +void RTPTcpReceiver::close_task(){
  157 + m_bRtpExit = true;
146 158
147 m_bClosing = true; 159 m_bClosing = true;
148 160
149 m_bAccepted = true; 161 m_bAccepted = true;
150 - m_bRtpExit = true;  
151 162
152 LOG_DEBUG("[{}] 1.", m_deviceID); 163 LOG_DEBUG("[{}] 1.", m_deviceID);
153 164
@@ -207,20 +218,22 @@ int RTPTcpReceiver::initSession(int localPort){ @@ -207,20 +218,22 @@ int RTPTcpReceiver::initSession(int localPort){
207 status = m_rtpSessionPtr->Create(*m_pSessparams, m_pTrans); 218 status = m_rtpSessionPtr->Create(*m_pSessparams, m_pTrans);
208 if (status < 0) 219 if (status < 0)
209 { 220 {
210 - LOG_ERROR("[{}] create session error!!", m_deviceID); 221 + // 若status = -59 ,需运行 export LOGNAME=root ,见 https://blog.csdn.net/m0_37876242/article/details/128588162
  222 + LOG_ERROR("[{}] create session error: {}", m_deviceID, status);
211 return -1; 223 return -1;
212 } 224 }
213 225
214 m_rtpThread = std::thread(rtp_revc_thread_, this); 226 m_rtpThread = std::thread(rtp_revc_thread_, this);
  227 + m_listenFinishThread = std::thread(listen_finish_thread_, this);
215 228
216 InitPS(); 229 InitPS();
217 230
218 - bool bRet = RequestStream();  
219 - if (!bRet)  
220 - {  
221 - LOG_INFO("[{}] 请求流失败!", m_deviceID);  
222 - return -1;  
223 - } 231 + // bool bRet = RequestStream();
  232 + // if (!bRet)
  233 + // {
  234 + // LOG_INFO("[{}] 请求流失败!", m_deviceID);
  235 + // return -1;
  236 + // }
224 237
225 LOG_INFO("[{}] 初始化成功, congratulations !!!", m_deviceID); 238 LOG_INFO("[{}] 初始化成功, congratulations !!!", m_deviceID);
226 239
@@ -240,17 +253,56 @@ int RTPTcpReceiver::OnRtpRecv() @@ -240,17 +253,56 @@ int RTPTcpReceiver::OnRtpRecv()
240 SocketType nServer = -1; 253 SocketType nServer = -1;
241 254
242 LOG_INFO("[{}] Poll started.", m_deviceID); 255 LOG_INFO("[{}] Poll started.", m_deviceID);
243 - int status = -1; 256 + int reconn_times = 0;
  257 + int reaccept_times = 0;
  258 + bool bReconn = false;
244 while(!m_bRtpExit){ 259 while(!m_bRtpExit){
245 while(!m_bAccepted){ 260 while(!m_bAccepted){
  261 + if(m_bRtpExit){
  262 + goto end_flag;
  263 + }
  264 +
  265 + while (!bReconn){
  266 + if(m_bRtpExit){
  267 + goto end_flag;
  268 + }
  269 +
  270 + reconn_times++;
  271 + if(reconn_times > 10){
  272 + // 10次请求都失败,结束任务
  273 + m_bRtpExit = true;
  274 + goto end_flag;
  275 + }
  276 + LOG_DEBUG("[{}] RequestStream...", m_deviceID);
  277 + bReconn = RequestStream();
  278 + if (bReconn){
  279 + LOG_DEBUG("[{}] RequestStream, True", m_deviceID);
  280 + continue;
  281 + }
  282 + LOG_DEBUG("[{}] RequestStream, False", m_deviceID);
  283 +
  284 + std::this_thread::sleep_for(std::chrono::seconds(3));
  285 + }
  286 +
246 LOG_DEBUG("[{}] accepting...", m_deviceID); 287 LOG_DEBUG("[{}] accepting...", m_deviceID);
247 nServer = accept(m_nListener, (sockaddr*)&clientAddr, (socklen_t * ) &nLen); 288 nServer = accept(m_nListener, (sockaddr*)&clientAddr, (socklen_t * ) &nLen);
248 if (-1 == nServer){ 289 if (-1 == nServer){
249 - std::this_thread::sleep_for(std::chrono::milliseconds(10)); 290 + reaccept_times++;
  291 + LOG_DEBUG("[{}] reaccept_times = {}", m_deviceID, reaccept_times);
  292 + if(reaccept_times > 600){
  293 + LOG_DEBUG("[{}] reaccept_times > 600", m_deviceID);
  294 + bReconn = false;
  295 + reaccept_times = 0;
  296 + }
  297 + std::this_thread::sleep_for(std::chrono::milliseconds(50));
250 continue; 298 continue;
251 } 299 }
  300 + LOG_DEBUG("[{}] accept success", m_deviceID);
252 m_rtpSessionPtr->AddDestination(RTPTCPAddress(nServer)); 301 m_rtpSessionPtr->AddDestination(RTPTCPAddress(nServer));
253 m_bAccepted = true; 302 m_bAccepted = true;
  303 + bReconn = false;
  304 + reconn_times = 0;
  305 + reaccept_times = 0;
254 306
255 LOG_INFO("[{}] nServer={}", m_deviceID, nServer); 307 LOG_INFO("[{}] nServer={}", m_deviceID, nServer);
256 break; 308 break;
@@ -265,7 +317,7 @@ int RTPTcpReceiver::OnRtpRecv() @@ -265,7 +317,7 @@ int RTPTcpReceiver::OnRtpRecv()
265 317
266 while ((pack = m_rtpSessionPtr->GetNextPacket()) != NULL) 318 while ((pack = m_rtpSessionPtr->GetNextPacket()) != NULL)
267 { 319 {
268 - LOG_DEBUG("[{}] time: {} ", m_deviceID, get_cur_time()); 320 + // LOG_DEBUG("[{}] time: {} ", m_deviceID, UtilTools::get_cur_time_ms());
269 ParsePacket(pack); 321 ParsePacket(pack);
270 322
271 m_rtpSessionPtr->DeletePacket(pack); 323 m_rtpSessionPtr->DeletePacket(pack);
@@ -279,6 +331,8 @@ int RTPTcpReceiver::OnRtpRecv() @@ -279,6 +331,8 @@ int RTPTcpReceiver::OnRtpRecv()
279 std::this_thread::sleep_for(std::chrono::milliseconds(10)); 331 std::this_thread::sleep_for(std::chrono::milliseconds(10));
280 } 332 }
281 333
  334 +end_flag:
  335 +
282 m_rtpSessionPtr->Destroy(); 336 m_rtpSessionPtr->Destroy();
283 337
284 if(nServer > 0){ 338 if(nServer > 0){
@@ -293,13 +347,18 @@ int RTPTcpReceiver::OnRtpRecv() @@ -293,13 +347,18 @@ int RTPTcpReceiver::OnRtpRecv()
293 return 0; 347 return 0;
294 } 348 }
295 349
296 -bool RTPTcpReceiver::RequestStream(){  
297 - bool bConnect = m_callback_request_stream();  
298 - if(!bConnect){  
299 - Close();  
300 - return false; 350 +int RTPTcpReceiver::ListenFinish(){
  351 + while(!m_bRtpExit){
  352 + std::this_thread::sleep_for(std::chrono::seconds(3));
301 } 353 }
  354 +
  355 + close_task();
  356 +}
  357 +
  358 +bool RTPTcpReceiver::ReConnect(){
302 m_bAccepted = false; 359 m_bAccepted = false;
  360 +}
303 361
304 - return true; 362 +bool RTPTcpReceiver::RequestStream(){
  363 + return m_callback_request_stream(m_deviceID.c_str());
305 } 364 }
306 \ No newline at end of file 365 \ No newline at end of file
src/gb28181/RTPTcpReceiver.h
@@ -57,11 +57,14 @@ public: @@ -57,11 +57,14 @@ public:
57 57
58 public: 58 public:
59 int OnRtpRecv(); 59 int OnRtpRecv();
  60 + bool ReConnect();
  61 + int ListenFinish();
60 bool RequestStream(); 62 bool RequestStream();
61 bool isClosing(); 63 bool isClosing();
62 64
63 private: 65 private:
64 int initSession(int localPort); 66 int initSession(int localPort);
  67 + void close_task();
65 68
66 private: 69 private:
67 70
@@ -77,9 +80,12 @@ private: @@ -77,9 +80,12 @@ private:
77 std::thread m_rtpThread; // RTP接收线程 80 std::thread m_rtpThread; // RTP接收线程
78 SocketType m_nListener; 81 SocketType m_nListener;
79 82
80 - RTPSession* m_rtpSessionPtr; // RTP会话  
81 - RTPSessionParams* m_pSessparams;  
82 - MyTCPTransmitter* m_pTrans; 83 + RTPSession* m_rtpSessionPtr; // RTP会话
  84 + RTPSessionParams* m_pSessparams;
  85 + MyTCPTransmitter* m_pTrans;
  86 +
  87 + std::thread m_listenFinishThread; // RTP接收线程
  88 +
83 }; 89 };
84 90
85 #endif // _RTP_TCP_RECEIVER_H_ 91 #endif // _RTP_TCP_RECEIVER_H_
src/gb28181/RTPUdpReceiver.cpp
@@ -6,7 +6,7 @@ @@ -6,7 +6,7 @@
6 #include <thread> 6 #include <thread>
7 #include <chrono> 7 #include <chrono>
8 8
9 -#include "../logger.hpp" 9 +#include "common_header.h"
10 10
11 using namespace std; 11 using namespace std;
12 12
@@ -42,15 +42,6 @@ private: @@ -42,15 +42,6 @@ private:
42 } 42 }
43 }; 43 };
44 44
45 -  
46 -static long long get_cur_time() {  
47 -  
48 - chrono::time_point<chrono::system_clock, chrono::milliseconds> tpMicro  
49 - = chrono::time_point_cast<chrono::milliseconds>(chrono::system_clock::now());  
50 -  
51 - return tpMicro.time_since_epoch().count();  
52 -}  
53 -  
54 static int rtp_revc_thread_(void* param) 45 static int rtp_revc_thread_(void* param)
55 { 46 {
56 if (!param) 47 if (!param)
@@ -175,7 +166,7 @@ int RTPUdpReceiver::OnRtpRecv() @@ -175,7 +166,7 @@ int RTPUdpReceiver::OnRtpRecv()
175 if (m_rtpSessionPtr->GotoFirstSourceWithData()) 166 if (m_rtpSessionPtr->GotoFirstSourceWithData())
176 { 167 {
177 LOG_INFO("OnRtpRecv GotoFirstSourceWithData --{}", m_deviceID); 168 LOG_INFO("OnRtpRecv GotoFirstSourceWithData --{}", m_deviceID);
178 - last_recv_ts = get_cur_time(); 169 + last_recv_ts = UtilTools::get_cur_time_ms();
179 m_idleCount = 0; 170 m_idleCount = 0;
180 m_noDataCount = 0; 171 m_noDataCount = 0;
181 do 172 do
@@ -261,7 +252,7 @@ int RTPUdpReceiver::OnRtpRecv() @@ -261,7 +252,7 @@ int RTPUdpReceiver::OnRtpRecv()
261 // //若是30000,时长大约 18s 252 // //若是30000,时长大约 18s
262 // if(m_idleCount > 30000) 253 // if(m_idleCount > 30000)
263 // { 254 // {
264 - // uint64_t cts = get_cur_time(); 255 + // uint64_t cts = UtilTools::get_cur_time_ms();
265 // float duration_not_recv = (cts - last_recv_ts) / 1000.0; 256 // float duration_not_recv = (cts - last_recv_ts) / 1000.0;
266 // 257 //
267 // //LOG_ERROR("************I haven't got stream from hik gateway exceed {}s,send eof********{}******", duration_not_recv, m_deviceID); 258 // //LOG_ERROR("************I haven't got stream from hik gateway exceed {}s,send eof********{}******", duration_not_recv, m_deviceID);
src/gb28181/common_header.h 0 → 100644
  1 +#ifndef _COMMON_HEADER_H_
  2 +#define _COMMON_HEADER_H_
  3 +
  4 +
  5 +#include "../logger.hpp"
  6 +#include "../utiltools.hpp"
  7 +
  8 +#endif
0 \ No newline at end of file 9 \ No newline at end of file
src/gb28181/demuxer.h
@@ -8,9 +8,11 @@ @@ -8,9 +8,11 @@
8 { CMpeg2Demux class. } 8 { CMpeg2Demux class. }
9 { } 9 { }
10 {*******************************************************/ 10 {*******************************************************/
  11 +
11 #ifndef _DEMUXER_H_ 12 #ifndef _DEMUXER_H_
12 #define _DEMUXER_H_ 13 #define _DEMUXER_H_
13 14
  15 +
14 #include <stdint.h> 16 #include <stdint.h>
15 #include "buffer.h" 17 #include "buffer.h"
16 18
src/jpegNPP.cpp-1 0 → 100644
  1 +/*
  2 +* Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
  3 +*
  4 +* NOTICE TO USER:
  5 +*
  6 +* This source code is subject to NVIDIA ownership rights under U.S. and
  7 +* international Copyright laws.
  8 +*
  9 +* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
  10 +* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
  11 +* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
  12 +* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
  13 +* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
  14 +* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
  15 +* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
  16 +* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
  17 +* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
  18 +* OR PERFORMANCE OF THIS SOURCE CODE.
  19 +*
  20 +* U.S. Government End Users. This source code is a "commercial item" as
  21 +* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
  22 +* "commercial computer software" and "commercial computer software
  23 +* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
  24 +* and is provided to the U.S. Government only as a commercial end item.
  25 +* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
  26 +* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
  27 +* source code with only those rights set forth herein.
  28 +*/
  29 +
  30 +// This sample needs at least CUDA 5.5 and a GPU that has at least Compute Capability 2.0
  31 +
  32 +// This sample demonstrates a simple image processing pipeline.
  33 +// First, a JPEG file is huffman decoded and inverse DCT transformed and dequantized.
  34 +// Then the different planes are resized. Finally, the resized image is quantized, forward
  35 +// DCT transformed and huffman encoded.
  36 +
  37 +#include "cuda_kernels.h"
  38 +
  39 +#include <npp.h>
  40 +#include <cuda_runtime.h>
  41 +#include "common/UtilNPP/Exceptions.h"
  42 +
  43 +#include "Endianess.h"
  44 +#include <math.h>
  45 +
  46 +#include <string.h>
  47 +#include <fstream>
  48 +#include <iostream>
  49 +
  50 +#include "common/inc/helper_string.h"
  51 +#include "common/inc/helper_cuda.h"
  52 +//#include "MacroDef.h"
  53 +#include "cuda.h"
  54 +
  55 +using namespace std;
  56 +
  57 +struct FrameHeader
  58 +{
  59 + unsigned char nSamplePrecision;
  60 + unsigned short nHeight;
  61 + unsigned short nWidth;
  62 + unsigned char nComponents;
  63 + unsigned char aComponentIdentifier[3];
  64 + unsigned char aSamplingFactors[3];
  65 + unsigned char aQuantizationTableSelector[3];
  66 +};
  67 +
  68 +struct ScanHeader
  69 +{
  70 + unsigned char nComponents;
  71 + unsigned char aComponentSelector[3];
  72 + unsigned char aHuffmanTablesSelector[3];
  73 + unsigned char nSs;
  74 + unsigned char nSe;
  75 + unsigned char nA;
  76 +};
  77 +
  78 +struct QuantizationTable
  79 +{
  80 + unsigned char nPrecisionAndIdentifier;
  81 + unsigned char aTable[64];
  82 +};
  83 +
  84 +struct HuffmanTable
  85 +{
  86 + unsigned char nClassAndIdentifier;
  87 + unsigned char aCodes[16];
  88 + unsigned char aTable[256];
  89 +};
  90 +
  91 +//??准?炼??藕?量??模??
  92 +//unsigned char std_Y_QT[64] =
  93 +//{
  94 +// 16, 11, 10, 16, 24, 40, 51, 61,
  95 +// 12, 12, 14, 19, 26, 58, 60, 55,
  96 +// 14, 13, 16, 24, 40, 57, 69, 56,
  97 +// 14, 17, 22, 29, 51, 87, 80, 62,
  98 +// 18, 22, 37, 56, 68, 109, 103, 77,
  99 +// 24, 35, 55, 64, 81, 104, 113, 92,
  100 +// 49, 64, 78, 87, 103, 121, 120, 101,
  101 +// 72, 92, 95, 98, 112, 100, 103, 99
  102 +//};
  103 +//
  104 +////??准色???藕?量??模??
  105 +//unsigned char std_UV_QT[64] =
  106 +//{
  107 +// 17, 18, 24, 47, 99, 99, 99, 99,
  108 +// 18, 21, 26, 66, 99, 99, 99, 99,
  109 +// 24, 26, 56, 99, 99, 99, 99, 99,
  110 +// 47, 66, 99, 99, 99, 99, 99, 99,
  111 +// 99, 99, 99, 99, 99, 99, 99, 99,
  112 +// 99, 99, 99, 99, 99, 99, 99, 99,
  113 +// 99, 99, 99, 99, 99, 99, 99, 99,
  114 +// 99, 99, 99, 99, 99, 99, 99, 99
  115 +//};
  116 +
  117 +////?炼??藕?量??模??
  118 +//unsigned char std_Y_QT[64] =
  119 +//{
  120 +// 6, 4, 5, 6, 5, 4, 6, 6,
  121 +// 5, 6, 7, 7, 6, 8, 10, 16,
  122 +// 10, 10, 9, 9, 10, 20, 14, 15,
  123 +// 12, 16, 23, 20, 24, 24, 23, 20,
  124 +// 22, 22, 26, 29, 37, 31, 26, 27,
  125 +// 35, 28, 22, 22, 32, 44, 32, 35,
  126 +// 38, 39, 41, 42, 41, 25, 31, 45,
  127 +// 48, 45, 40, 48, 37, 40, 41, 40
  128 +//};
  129 +//
  130 +////色???藕?量??模??
  131 +//unsigned char std_UV_QT[64] =
  132 +//{
  133 +// 7, 7, 7, 10, 8, 10, 19, 10,
  134 +// 10, 19, 40, 26, 22, 26, 40, 40,
  135 +// 40, 40, 40, 40, 40, 40, 40, 40,
  136 +// 40, 40, 40, 40, 40, 40, 40, 40,
  137 +// 40, 40, 40, 40, 40, 40, 40, 40,
  138 +// 40, 40, 40, 40, 40, 40, 40, 40,
  139 +// 40, 40, 40, 40, 40, 40, 40, 40,
  140 +// 40, 40, 40, 40, 40, 40, 40, 40
  141 +//};
  142 +
  143 +//?炼??藕?量??模??
  144 +unsigned char std_Y_QT[64] =
  145 +{
  146 + 0.75 * 6, 0.75 * 4, 0.75 * 5, 0.75 * 6, 0.75 * 5, 0.75 * 4, 0.75 * 6, 0.75 * 6,
  147 + 0.75 * 5, 0.75 * 6, 0.75 * 7, 0.75 * 7, 0.75 * 6, 0.75 * 8, 0.75 * 10, 0.75 * 16,
  148 + 0.75 * 10, 0.75 * 10, 0.75 * 9, 0.75 * 9, 0.75 * 10, 0.75 * 20, 0.75 * 14, 0.75 * 15,
  149 + 0.75 * 12, 0.75 * 16, 0.75 * 23, 0.75 * 20, 0.75 * 24, 0.75 * 24, 0.75 * 23, 0.75 * 20,
  150 + 0.75 * 22, 0.75 * 22, 0.75 * 26, 0.75 * 29, 0.75 * 37, 0.75 * 31, 0.75 * 26, 0.75 * 27,
  151 + 0.75 * 35, 0.75 * 28, 0.75 * 22, 0.75 * 22, 0.75 * 32, 0.75 * 44, 0.75 * 32, 0.75 * 35,
  152 + 0.75 * 38, 0.75 * 39, 0.75 * 41, 0.75 * 42, 0.75 * 41, 0.75 * 25, 0.75 * 31, 0.75 * 45,
  153 + 0.75 * 48, 0.75 * 45, 0.75 * 40, 0.75 * 48, 0.75 * 37, 0.75 * 40, 0.75 * 41, 0.75 * 40
  154 +};
  155 +
  156 +//色???藕?量??模??
  157 +unsigned char std_UV_QT[64] =
  158 +{
  159 + 0.75 * 7, 0.75 * 7, 0.75 * 7, 0.75 * 10, 0.75 * 8, 0.75 * 10, 0.75 * 19, 0.75 * 10,
  160 + 0.75 * 10, 0.75 * 19, 0.75 * 40, 0.75 * 26, 0.75 * 22, 0.75 * 26, 0.75 * 40, 0.75 * 40,
  161 + 30, 30, 30, 30, 30, 30, 30, 30,
  162 + 30, 30, 30, 30, 30, 30, 30, 30,
  163 + 30, 30, 30, 30, 30, 30, 30, 30,
  164 + 30, 30, 30, 30, 30, 30, 30, 30,
  165 + 30, 30, 30, 30, 30, 30, 30, 30,
  166 + 30, 30, 30, 30, 30, 30, 30, 30
  167 +};
  168 +
  169 +unsigned char STD_DC_Y_NRCODES[16] = { 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
  170 +unsigned char STD_DC_Y_VALUES[12] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
  171 +
  172 +unsigned char STD_DC_UV_NRCODES[16] = { 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
  173 +unsigned char STD_DC_UV_VALUES[12] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
  174 +
  175 +unsigned char STD_AC_Y_NRCODES[16] = { 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0X7D };
  176 +unsigned char STD_AC_Y_VALUES[162] =
  177 +{
  178 + 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
  179 + 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
  180 + 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
  181 + 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
  182 + 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
  183 + 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
  184 + 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
  185 + 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
  186 + 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
  187 + 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
  188 + 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
  189 + 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
  190 + 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
  191 + 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
  192 + 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
  193 + 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
  194 + 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
  195 + 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
  196 + 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
  197 + 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
  198 + 0xf9, 0xfa
  199 +};
  200 +
  201 +unsigned char STD_AC_UV_NRCODES[16] = { 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0X77 };
  202 +unsigned char STD_AC_UV_VALUES[162] =
  203 +{
  204 + 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
  205 + 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
  206 + 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
  207 + 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
  208 + 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
  209 + 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
  210 + 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
  211 + 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
  212 + 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
  213 + 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
  214 + 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
  215 + 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
  216 + 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
  217 + 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
  218 + 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
  219 + 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
  220 + 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
  221 + 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
  222 + 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
  223 + 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
  224 + 0xf9, 0xfa
  225 +};
  226 +
  227 +int DivUp(int x, int d)
  228 +{
  229 + return (x + d - 1) / d;
  230 +}
  231 +
  232 +template<typename T>
  233 +void writeAndAdvance(unsigned char *&pData, T nElement)
  234 +{
  235 + writeBigEndian<T>(pData, nElement);
  236 + pData += sizeof(T);
  237 +}
  238 +
  239 +void writeMarker(unsigned char nMarker, unsigned char *&pData)
  240 +{
  241 + *pData++ = 0x0ff;
  242 + *pData++ = nMarker;
  243 +}
  244 +
  245 +void writeJFIFTag(unsigned char *&pData)
  246 +{
  247 + const char JFIF_TAG[] =
  248 + {
  249 + 0x4a, 0x46, 0x49, 0x46, 0x00,
  250 + 0x01, 0x02,
  251 + 0x00,
  252 + 0x00, 0x01, 0x00, 0x01,
  253 + 0x00, 0x00
  254 + };
  255 +
  256 + writeMarker(0x0e0, pData);
  257 + writeAndAdvance<unsigned short>(pData, sizeof(JFIF_TAG) + sizeof(unsigned short));
  258 + memcpy(pData, JFIF_TAG, sizeof(JFIF_TAG));
  259 + pData += sizeof(JFIF_TAG);
  260 +}
  261 +
  262 +void writeFrameHeader(const FrameHeader &header, unsigned char *&pData)
  263 +{
  264 + unsigned char aTemp[128];
  265 + unsigned char *pTemp = aTemp;
  266 +
  267 + writeAndAdvance<unsigned char>(pTemp, header.nSamplePrecision);
  268 + writeAndAdvance<unsigned short>(pTemp, header.nHeight);
  269 + writeAndAdvance<unsigned short>(pTemp, header.nWidth);
  270 + writeAndAdvance<unsigned char>(pTemp, header.nComponents);
  271 +
  272 + for (int c = 0; c<header.nComponents; ++c)
  273 + {
  274 + writeAndAdvance<unsigned char>(pTemp, header.aComponentIdentifier[c]);
  275 + writeAndAdvance<unsigned char>(pTemp, header.aSamplingFactors[c]);
  276 + writeAndAdvance<unsigned char>(pTemp, header.aQuantizationTableSelector[c]);
  277 + }
  278 +
  279 + unsigned short nLength = (unsigned short)(pTemp - aTemp);
  280 +
  281 + writeMarker(0x0C0, pData);
  282 + writeAndAdvance<unsigned short>(pData, nLength + 2);
  283 + memcpy(pData, aTemp, nLength);
  284 + pData += nLength;
  285 +}
  286 +
  287 +void writeScanHeader(const ScanHeader &header, unsigned char *&pData)
  288 +{
  289 + unsigned char aTemp[128];
  290 + unsigned char *pTemp = aTemp;
  291 +
  292 + writeAndAdvance<unsigned char>(pTemp, header.nComponents);
  293 +
  294 + for (int c = 0; c<header.nComponents; ++c)
  295 + {
  296 + writeAndAdvance<unsigned char>(pTemp, header.aComponentSelector[c]);
  297 + writeAndAdvance<unsigned char>(pTemp, header.aHuffmanTablesSelector[c]);
  298 + }
  299 +
  300 + writeAndAdvance<unsigned char>(pTemp, header.nSs);
  301 + writeAndAdvance<unsigned char>(pTemp, header.nSe);
  302 + writeAndAdvance<unsigned char>(pTemp, header.nA);
  303 +
  304 + unsigned short nLength = (unsigned short)(pTemp - aTemp);
  305 +
  306 + writeMarker(0x0DA, pData);
  307 + writeAndAdvance<unsigned short>(pData, nLength + 2);
  308 + memcpy(pData, aTemp, nLength);
  309 + pData += nLength;
  310 +}
  311 +
  312 +void writeQuantizationTable(const QuantizationTable &table, unsigned char *&pData)
  313 +{
  314 + writeMarker(0x0DB, pData);
  315 + writeAndAdvance<unsigned short>(pData, sizeof(QuantizationTable) + 2);
  316 + memcpy(pData, &table, sizeof(QuantizationTable));
  317 + pData += sizeof(QuantizationTable);
  318 +}
  319 +
  320 +void writeHuffmanTable(const HuffmanTable &table, unsigned char *&pData)
  321 +{
  322 + writeMarker(0x0C4, pData);
  323 +
  324 + // Number of Codes for Bit Lengths [1..16]
  325 + int nCodeCount = 0;
  326 +
  327 + for (int i = 0; i < 16; ++i)
  328 + {
  329 + nCodeCount += table.aCodes[i];
  330 + }
  331 +
  332 + writeAndAdvance<unsigned short>(pData, 17 + nCodeCount + 2);
  333 + memcpy(pData, &table, 17 + nCodeCount);
  334 + pData += 17 + nCodeCount;
  335 +}
  336 +
  337 +bool printfNPPinfo(int cudaVerMajor, int cudaVerMinor)
  338 +{
  339 + const NppLibraryVersion *libVer = nppGetLibVersion();
  340 +
  341 + printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
  342 +
  343 + int driverVersion, runtimeVersion;
  344 + cudaDriverGetVersion(&driverVersion);
  345 + cudaRuntimeGetVersion(&runtimeVersion);
  346 +
  347 + printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000, (driverVersion % 100) / 10);
  348 + printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000, (runtimeVersion % 100) / 10);
  349 +
  350 + bool bVal = checkCudaCapabilities(cudaVerMajor, cudaVerMinor);
  351 + return bVal;
  352 +}
  353 +
  354 +NppiDCTState *pDCTState;
  355 +FrameHeader oFrameHeader;
  356 +FrameHeader oFrameHeaderFixedSize;
  357 +ScanHeader oScanHeader;
  358 +QuantizationTable aQuantizationTables[4];
  359 +Npp8u *pdQuantizationTables;
  360 +HuffmanTable aHuffmanTables[4];
  361 +HuffmanTable *pHuffmanDCTables;
  362 +HuffmanTable *pHuffmanACTables;
  363 +int nMCUBlocksH;
  364 +int nMCUBlocksV;
  365 +int nMCUBlocksHFixedSize;
  366 +int nMCUBlocksVFixedSize;
  367 +Npp8u *pdScan;
  368 +NppiEncodeHuffmanSpec *apHuffmanDCTable[3];
  369 +NppiEncodeHuffmanSpec *apHuffmanACTable[3];
  370 +unsigned char *pDstJpeg;
  371 +unsigned char *pDstOutput;
  372 +int nRestartInterval;
  373 +
  374 +int initTable()
  375 +{
  376 + NPP_CHECK_NPP(nppiDCTInitAlloc(&pDCTState));
  377 +
  378 + nRestartInterval = -1;
  379 +
  380 + cudaMalloc(&pdQuantizationTables, 64 * 4);
  381 + pHuffmanDCTables = aHuffmanTables;
  382 + pHuffmanACTables = &aHuffmanTables[2];
  383 + memset(aQuantizationTables, 0, 4 * sizeof(QuantizationTable));
  384 + memset(aHuffmanTables, 0, 4 * sizeof(HuffmanTable));
  385 + memset(&oFrameHeader, 0, sizeof(FrameHeader));
  386 +
  387 +
  388 + //????Huffman??
  389 + aHuffmanTables[0].nClassAndIdentifier = 0;
  390 + memcpy(aHuffmanTables[0].aCodes, STD_DC_Y_NRCODES, 16);
  391 + memcpy(aHuffmanTables[0].aTable, STD_DC_Y_VALUES, 12);
  392 +
  393 + aHuffmanTables[1].nClassAndIdentifier = 1;
  394 + memcpy(aHuffmanTables[1].aCodes, STD_DC_UV_NRCODES, 16);
  395 + memcpy(aHuffmanTables[1].aTable, STD_DC_UV_VALUES, 12);
  396 +
  397 + aHuffmanTables[2].nClassAndIdentifier = 16;
  398 + memcpy(aHuffmanTables[2].aCodes, STD_AC_Y_NRCODES, 16);
  399 + memcpy(aHuffmanTables[2].aTable, STD_AC_Y_VALUES, 162);
  400 +
  401 + aHuffmanTables[3].nClassAndIdentifier = 17;
  402 + memcpy(aHuffmanTables[3].aCodes, STD_AC_UV_NRCODES, 16);
  403 + memcpy(aHuffmanTables[3].aTable, STD_AC_UV_VALUES, 162);
  404 +
  405 +
  406 + //????量????
  407 + aQuantizationTables[0].nPrecisionAndIdentifier = 0;
  408 + memcpy(aQuantizationTables[0].aTable, std_Y_QT, 64);
  409 + aQuantizationTables[1].nPrecisionAndIdentifier = 1;
  410 + memcpy(aQuantizationTables[1].aTable, std_UV_QT, 64);
  411 +
  412 + NPP_CHECK_CUDA(cudaMemcpyAsync(pdQuantizationTables, aQuantizationTables[0].aTable, 64, cudaMemcpyHostToDevice));
  413 + NPP_CHECK_CUDA(cudaMemcpyAsync(pdQuantizationTables + 64, aQuantizationTables[1].aTable, 64, cudaMemcpyHostToDevice));
  414 +
  415 + oFrameHeader.nSamplePrecision = 8;
  416 + oFrameHeader.nComponents = 3;
  417 + oFrameHeader.aComponentIdentifier[0] = 1;
  418 + oFrameHeader.aComponentIdentifier[1] = 2;
  419 + oFrameHeader.aComponentIdentifier[2] = 3;
  420 + oFrameHeader.aSamplingFactors[0] = 34;
  421 + oFrameHeader.aSamplingFactors[1] = 17;
  422 + oFrameHeader.aSamplingFactors[2] = 17;
  423 + oFrameHeader.aQuantizationTableSelector[0] = 0;
  424 + oFrameHeader.aQuantizationTableSelector[1] = 1;
  425 + oFrameHeader.aQuantizationTableSelector[2] = 1;
  426 +
  427 + for (int i = 0; i < oFrameHeader.nComponents; ++i)
  428 + {
  429 + nMCUBlocksV = max(nMCUBlocksV, oFrameHeader.aSamplingFactors[i] & 0x0f);
  430 + nMCUBlocksH = max(nMCUBlocksH, oFrameHeader.aSamplingFactors[i] >> 4);
  431 + }
  432 + NPP_CHECK_CUDA(cudaMalloc(&pdScan, 4 << 20));
  433 +
  434 +
  435 +
  436 + oScanHeader.nComponents = 3;
  437 + oScanHeader.aComponentSelector[0] = 1;
  438 + oScanHeader.aComponentSelector[1] = 2;
  439 + oScanHeader.aComponentSelector[2] = 3;
  440 + oScanHeader.aHuffmanTablesSelector[0] = 0;
  441 + oScanHeader.aHuffmanTablesSelector[1] = 17;
  442 + oScanHeader.aHuffmanTablesSelector[2] = 17;
  443 + oScanHeader.nSs = 0;
  444 + oScanHeader.nSe = 63;
  445 + oScanHeader.nA = 0;
  446 +
  447 +
  448 + return 0;
  449 +}
  450 +
  451 +NppiSize aSrcSize[3];
  452 +Npp16s *apdDCT[3];// = { 0, 0, 0 };
  453 +Npp32s aDCTStep[3];
  454 +
  455 +Npp8u *apSrcImage[3];// = { 0, 0, 0 };
  456 +Npp32s aSrcImageStep[3];
  457 +size_t aSrcPitch[3];
  458 +
  459 +
  460 +int releaseJpegNPP()
  461 +{
  462 + nppiDCTFree(pDCTState);
  463 + cudaFree(pdQuantizationTables);
  464 + cudaFree(pdScan);
  465 + for (int i = 0; i < 3; ++i)
  466 + {
  467 + cudaFree(apdDCT[i]);
  468 + cudaFree(apSrcImage[i]);
  469 + }
  470 + return 0;
  471 +}
  472 +
  473 +
  474 +int initTable(int flag, int width, int height)
  475 +{
  476 + //????帧头
  477 + oFrameHeaderFixedSize.nSamplePrecision = 8;
  478 + oFrameHeaderFixedSize.nComponents = 3;
  479 + oFrameHeaderFixedSize.aComponentIdentifier[0] = 1;
  480 + oFrameHeaderFixedSize.aComponentIdentifier[1] = 2;
  481 + oFrameHeaderFixedSize.aComponentIdentifier[2] = 3;
  482 + oFrameHeaderFixedSize.aSamplingFactors[0] = 34;
  483 + oFrameHeaderFixedSize.aSamplingFactors[1] = 17;
  484 + oFrameHeaderFixedSize.aSamplingFactors[2] = 17;
  485 + oFrameHeaderFixedSize.aQuantizationTableSelector[0] = 0;
  486 + oFrameHeaderFixedSize.aQuantizationTableSelector[1] = 1;
  487 + oFrameHeaderFixedSize.aQuantizationTableSelector[2] = 1;
  488 + oFrameHeaderFixedSize.nWidth = width;
  489 + oFrameHeaderFixedSize.nHeight = height;
  490 +
  491 + for (int i = 0; i < oFrameHeaderFixedSize.nComponents; ++i)
  492 + {
  493 + nMCUBlocksVFixedSize = max(nMCUBlocksVFixedSize, oFrameHeaderFixedSize.aSamplingFactors[i] & 0x0f);
  494 + nMCUBlocksHFixedSize = max(nMCUBlocksHFixedSize, oFrameHeaderFixedSize.aSamplingFactors[i] >> 4);
  495 + }
  496 +
  497 + for (int i = 0; i < oFrameHeaderFixedSize.nComponents; ++i)
  498 + {
  499 + NppiSize oBlocks;
  500 + NppiSize oBlocksPerMCU = { oFrameHeaderFixedSize.aSamplingFactors[i] >> 4, oFrameHeaderFixedSize.aSamplingFactors[i] & 0x0f };
  501 +
  502 + oBlocks.width = (int)ceil((oFrameHeaderFixedSize.nWidth + 7) / 8 *
  503 + static_cast<float>(oBlocksPerMCU.width) / nMCUBlocksHFixedSize);
  504 + oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width;
  505 +
  506 + oBlocks.height = (int)ceil((oFrameHeaderFixedSize.nHeight + 7) / 8 *
  507 + static_cast<float>(oBlocksPerMCU.height) / nMCUBlocksVFixedSize);
  508 + oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height;
  509 +
  510 + aSrcSize[i].width = oBlocks.width * 8;
  511 + aSrcSize[i].height = oBlocks.height * 8;
  512 +
  513 + // Allocate Memory
  514 + size_t nPitch;
  515 + NPP_CHECK_CUDA(cudaMallocPitch(&apdDCT[i], &nPitch, oBlocks.width * 64 * sizeof(Npp16s), oBlocks.height));
  516 + aDCTStep[i] = static_cast<Npp32s>(nPitch);
  517 +
  518 + NPP_CHECK_CUDA(cudaMallocPitch(&apSrcImage[i], &nPitch, aSrcSize[i].width, aSrcSize[i].height));
  519 +
  520 + aSrcPitch[i] = nPitch;
  521 + aSrcImageStep[i] = static_cast<Npp32s>(nPitch);
  522 + }
  523 +
  524 + return 0;
  525 +}
  526 +
  527 +int jpegNPP(const char *szOutputFile, float* d_srcRGB)
  528 +{
  529 + //RGB2YUV
  530 + cudaError_t cudaStatus;
  531 + cudaStatus = cuda_common::RGB2YUV(d_srcRGB, oFrameHeaderFixedSize.nWidth, oFrameHeaderFixedSize.nHeight,
  532 + apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height,
  533 + apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height,
  534 + apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height);
  535 +
  536 + /**
  537 + * Forward DCT, quantization and level shift part of the JPEG encoding.
  538 + * Input is expected in 8x8 macro blocks and output is expected to be in 64x1
  539 + * macro blocks. The new version of the primitive takes the ROI in image pixel size and
  540 + * works with DCT coefficients that are in zig-zag order.
  541 + */
  542 + int k = 0;
  543 + //LOG_INFO("NPP_CHECK_NPP:%d", 1);
  544 + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0],
  545 + apdDCT[0], aDCTStep[0],
  546 + pdQuantizationTables + k * 64,
  547 + aSrcSize[0],
  548 + pDCTState)))
  549 + {
  550 + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
  551 + return EXIT_FAILURE;
  552 + }
  553 +
  554 + k = 1;
  555 + //LOG_INFO("NPP_CHECK_NPP:%d", 2);
  556 + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1],
  557 + apdDCT[1], aDCTStep[1],
  558 + pdQuantizationTables + k * 64,
  559 + aSrcSize[1],
  560 + pDCTState)))
  561 + {
  562 + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
  563 + return EXIT_FAILURE;
  564 + }
  565 +
  566 + //LOG_INFO("NPP_CHECK_NPP:%d", 3);
  567 + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2],
  568 + apdDCT[2], aDCTStep[2],
  569 + pdQuantizationTables + k * 64,
  570 + aSrcSize[2],
  571 + pDCTState)))
  572 + {
  573 + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
  574 + return EXIT_FAILURE;
  575 + }
  576 +
  577 + // Huffman Encoding
  578 +
  579 + Npp32s nScanLength;
  580 + Npp8u *pJpegEncoderTemp;
  581 +
  582 +#if (CUDA_VERSION == 8000)
  583 + Npp32s nTempSize; //when using CUDA8
  584 +#else
  585 + size_t nTempSize; //when using CUDA9
  586 +#endif
  587 + //modified by Junlin 190221
  588 +
  589 + //LOG_INFO("NPP_CHECK_NPP:%d",4);
  590 + if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize)))
  591 + {
  592 + printf("nppiEncodeHuffmanGetSize Failed!\n");
  593 + return EXIT_FAILURE;
  594 + }
  595 +
  596 + //LOG_INFO("NPP_CHECK_CUDA:%d",5);
  597 + NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize));
  598 +
  599 + /**
  600 + * Allocates memory and creates a Huffman table in a format that is suitable for the encoder.
  601 + */
  602 + NppStatus t_status;
  603 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]);
  604 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]);
  605 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]);
  606 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]);
  607 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]);
  608 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]);
  609 +
  610 + /**
  611 + * Huffman Encoding of the JPEG Encoding.
  612 + * Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan.
  613 + */
  614 + Npp32s nSs = 0;
  615 + Npp32s nSe = 63;
  616 + Npp32s nH = 0;
  617 + Npp32s nL = 0;
  618 + //LOG_INFO("NPP_CHECK_NPP:%d",6);
  619 + if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep,
  620 + 0, nSs, nSe, nH, nL,
  621 + pdScan, &nScanLength,
  622 + apHuffmanDCTable,
  623 + apHuffmanACTable,
  624 + aSrcSize,
  625 + pJpegEncoderTemp)))
  626 + {
  627 + printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n");
  628 + return EXIT_FAILURE;
  629 + }
  630 +
  631 + for (int i = 0; i < 3; ++i)
  632 + {
  633 + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]);
  634 + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]);
  635 + }
  636 + // Write JPEG
  637 + pDstJpeg = new unsigned char[4 << 20]{};
  638 + pDstOutput = pDstJpeg;
  639 +
  640 + writeMarker(0x0D8, pDstOutput);
  641 + writeJFIFTag(pDstOutput);
  642 + writeQuantizationTable(aQuantizationTables[0], pDstOutput);
  643 + writeQuantizationTable(aQuantizationTables[1], pDstOutput);
  644 + writeHuffmanTable(pHuffmanDCTables[0], pDstOutput);
  645 + writeHuffmanTable(pHuffmanACTables[0], pDstOutput);
  646 + writeHuffmanTable(pHuffmanDCTables[1], pDstOutput);
  647 + writeHuffmanTable(pHuffmanACTables[1], pDstOutput);
  648 + writeFrameHeader(oFrameHeaderFixedSize, pDstOutput);
  649 + writeScanHeader(oScanHeader, pDstOutput);
  650 +
  651 + //LOG_INFO("NPP_CHECK_CUDA:%d",7);
  652 + NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost));
  653 +
  654 + pDstOutput += nScanLength;
  655 + writeMarker(0x0D9, pDstOutput);
  656 + {
  657 + // Write result to file.
  658 + std::ofstream outputFile(szOutputFile, ios::out | ios::binary);
  659 + outputFile.write(reinterpret_cast<const char *>(pDstJpeg), static_cast<int>(pDstOutput - pDstJpeg));
  660 + }
  661 +
  662 + // Cleanup
  663 + cudaFree(pJpegEncoderTemp);
  664 + delete[] pDstJpeg;
  665 +
  666 +
  667 + return EXIT_SUCCESS;
  668 +}
  669 +
  670 +int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB)
  671 +{
  672 + //RGB2YUV
  673 + cudaError_t cudaStatus;
  674 + cudaStatus = cuda_common::RGB2YUV(d_srcRGB, oFrameHeaderFixedSize.nWidth, oFrameHeaderFixedSize.nHeight,
  675 + apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height,
  676 + apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height,
  677 + apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height);
  678 +
  679 + /**
  680 + * Forward DCT, quantization and level shift part of the JPEG encoding.
  681 + * Input is expected in 8x8 macro blocks and output is expected to be in 64x1
  682 + * macro blocks. The new version of the primitive takes the ROI in image pixel size and
  683 + * works with DCT coefficients that are in zig-zag order.
  684 + */
  685 + int k = 0;
  686 + //LOG_INFO("NPP_CHECK_NPP:%d", 1);
  687 + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0],
  688 + apdDCT[0], aDCTStep[0],
  689 + pdQuantizationTables + k * 64,
  690 + aSrcSize[0],
  691 + pDCTState)))
  692 + {
  693 + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
  694 + return EXIT_FAILURE;
  695 + }
  696 +
  697 + k = 1;
  698 + //LOG_INFO("NPP_CHECK_NPP:%d", 2);
  699 + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1],
  700 + apdDCT[1], aDCTStep[1],
  701 + pdQuantizationTables + k * 64,
  702 + aSrcSize[1],
  703 + pDCTState)))
  704 + {
  705 + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
  706 + return EXIT_FAILURE;
  707 + }
  708 +
  709 + //LOG_INFO("NPP_CHECK_NPP:%d", 3);
  710 + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2],
  711 + apdDCT[2], aDCTStep[2],
  712 + pdQuantizationTables + k * 64,
  713 + aSrcSize[2],
  714 + pDCTState)))
  715 + {
  716 + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
  717 + return EXIT_FAILURE;
  718 + }
  719 +
  720 + // Huffman Encoding
  721 +
  722 + Npp32s nScanLength;
  723 + Npp8u *pJpegEncoderTemp;
  724 +
  725 +#if (CUDA_VERSION == 8000)
  726 + Npp32s nTempSize; //when using CUDA8
  727 +#else
  728 + size_t nTempSize; //when using CUDA9
  729 +#endif
  730 + //modified by Junlin 190221
  731 +
  732 + //LOG_INFO("NPP_CHECK_NPP:%d",4);
  733 + if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize)))
  734 + {
  735 + printf("nppiEncodeHuffmanGetSize Failed!\n");
  736 + return EXIT_FAILURE;
  737 + }
  738 +
  739 + //LOG_INFO("NPP_CHECK_CUDA:%d",5);
  740 + NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize));
  741 +
  742 + /**
  743 + * Allocates memory and creates a Huffman table in a format that is suitable for the encoder.
  744 + */
  745 + NppStatus t_status;
  746 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]);
  747 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]);
  748 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]);
  749 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]);
  750 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]);
  751 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]);
  752 +
  753 + /**
  754 + * Huffman Encoding of the JPEG Encoding.
  755 + * Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan.
  756 + */
  757 + Npp32s nSs = 0;
  758 + Npp32s nSe = 63;
  759 + Npp32s nH = 0;
  760 + Npp32s nL = 0;
  761 + //LOG_INFO("NPP_CHECK_NPP:%d",6);
  762 + if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep,
  763 + 0, nSs, nSe, nH, nL,
  764 + pdScan, &nScanLength,
  765 + apHuffmanDCTable,
  766 + apHuffmanACTable,
  767 + aSrcSize,
  768 + pJpegEncoderTemp)))
  769 + {
  770 + printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n");
  771 + return EXIT_FAILURE;
  772 + }
  773 +
  774 + for (int i = 0; i < 3; ++i)
  775 + {
  776 + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]);
  777 + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]);
  778 + }
  779 + // Write JPEG
  780 + pDstJpeg = new unsigned char[4 << 20]{};
  781 + pDstOutput = pDstJpeg;
  782 +
  783 + writeMarker(0x0D8, pDstOutput);
  784 + writeJFIFTag(pDstOutput);
  785 + writeQuantizationTable(aQuantizationTables[0], pDstOutput);
  786 + writeQuantizationTable(aQuantizationTables[1], pDstOutput);
  787 + writeHuffmanTable(pHuffmanDCTables[0], pDstOutput);
  788 + writeHuffmanTable(pHuffmanACTables[0], pDstOutput);
  789 + writeHuffmanTable(pHuffmanDCTables[1], pDstOutput);
  790 + writeHuffmanTable(pHuffmanACTables[1], pDstOutput);
  791 + writeFrameHeader(oFrameHeaderFixedSize, pDstOutput);
  792 + writeScanHeader(oScanHeader, pDstOutput);
  793 +
  794 + //LOG_INFO("NPP_CHECK_CUDA:%d",7);
  795 + NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost));
  796 +
  797 + pDstOutput += nScanLength;
  798 + writeMarker(0x0D9, pDstOutput);
  799 + {
  800 + // Write result to file.
  801 + std::ofstream outputFile(szOutputFile, ios::out | ios::binary);
  802 + outputFile.write(reinterpret_cast<const char *>(pDstJpeg), static_cast<int>(pDstOutput - pDstJpeg));
  803 + }
  804 +
  805 + // Cleanup
  806 + cudaFree(pJpegEncoderTemp);
  807 + delete[] pDstJpeg;
  808 +
  809 +
  810 + return EXIT_SUCCESS;
  811 +}
  812 +
  813 +
  814 +int jpegNPP(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height)
  815 +{
  816 + NppiSize aSrcSize[3];
  817 + Npp16s *apdDCT[3] = { 0, 0, 0 };
  818 + Npp32s aDCTStep[3];
  819 +
  820 + Npp8u *apSrcImage[3] = { 0, 0, 0 };
  821 + Npp32s aSrcImageStep[3];
  822 + size_t aSrcPitch[3];
  823 +
  824 +
  825 + //????帧头
  826 + oFrameHeader.nWidth = img_width;
  827 + oFrameHeader.nHeight = img_height;
  828 +
  829 + for (int i = 0; i < oFrameHeader.nComponents; ++i)
  830 + {
  831 + NppiSize oBlocks;
  832 + NppiSize oBlocksPerMCU = { oFrameHeader.aSamplingFactors[i] >> 4, oFrameHeader.aSamplingFactors[i] & 0x0f };
  833 +
  834 + oBlocks.width = (int)ceil((oFrameHeader.nWidth + 7) / 8 *
  835 + static_cast<float>(oBlocksPerMCU.width) / nMCUBlocksH);
  836 + oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width;
  837 +
  838 + oBlocks.height = (int)ceil((oFrameHeader.nHeight + 7) / 8 *
  839 + static_cast<float>(oBlocksPerMCU.height) / nMCUBlocksV);
  840 + oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height;
  841 +
  842 + aSrcSize[i].width = oBlocks.width * 8;
  843 + aSrcSize[i].height = oBlocks.height * 8;
  844 +
  845 + // Allocate Memory
  846 + size_t nPitch;
  847 + //LOG_INFO("NPP_CHECK_CUDA:%d",1);
  848 + NPP_CHECK_CUDA(cudaMallocPitch(&apdDCT[i], &nPitch, oBlocks.width * 64 * sizeof(Npp16s), oBlocks.height));
  849 + aDCTStep[i] = static_cast<Npp32s>(nPitch);
  850 +
  851 + //LOG_INFO("NPP_CHECK_CUDA:%d",2);
  852 + NPP_CHECK_CUDA(cudaMallocPitch(&apSrcImage[i], &nPitch, aSrcSize[i].width, aSrcSize[i].height));
  853 +
  854 + aSrcPitch[i] = nPitch;
  855 + aSrcImageStep[i] = static_cast<Npp32s>(nPitch);
  856 + }
  857 +
  858 + //RGB2YUV
  859 + cudaError_t cudaStatus;
  860 + cudaStatus = cuda_common::RGB2YUV(d_srcRGB, img_width, img_height,
  861 + apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height,
  862 + apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height,
  863 + apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height);
  864 +
  865 + /**
  866 + * Forward DCT, quantization and level shift part of the JPEG encoding.
  867 + * Input is expected in 8x8 macro blocks and output is expected to be in 64x1
  868 + * macro blocks. The new version of the primitive takes the ROI in image pixel size and
  869 + * works with DCT coefficients that are in zig-zag order.
  870 + */
  871 + int k = 0;
  872 + //LOG_INFO("NPP_CHECK_CUDA:%d",3);
  873 + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0],
  874 + apdDCT[0], aDCTStep[0],
  875 + pdQuantizationTables + k * 64,
  876 + aSrcSize[0],
  877 + pDCTState)))
  878 + {
  879 + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
  880 + return EXIT_FAILURE;
  881 + }
  882 + k = 1;
  883 +
  884 + //LOG_INFO("NPP_CHECK_CUDA:%d",4);
  885 + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1],
  886 + apdDCT[1], aDCTStep[1],
  887 + pdQuantizationTables + k * 64,
  888 + aSrcSize[1],
  889 + pDCTState)))
  890 + {
  891 + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
  892 + return EXIT_FAILURE;
  893 + }
  894 +
  895 + //LOG_INFO("NPP_CHECK_CUDA:%d",5);
  896 + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2],
  897 + apdDCT[2], aDCTStep[2],
  898 + pdQuantizationTables + k * 64,
  899 + aSrcSize[2],
  900 + pDCTState)))
  901 + {
  902 + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
  903 + return EXIT_FAILURE;
  904 + }
  905 +
  906 + // Huffman Encoding
  907 +
  908 + Npp32s nScanLength;
  909 + Npp8u *pJpegEncoderTemp;
  910 +
  911 +#if (CUDA_VERSION == 8000)
  912 + Npp32s nTempSize; //when using CUDA8
  913 +#else
  914 + size_t nTempSize; //when using CUDA9
  915 +#endif
  916 + //modified by Junlin 190221
  917 +
  918 + //LOG_INFO("NPP_CHECK_CUDA:%d",6);
  919 + if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize)))
  920 + {
  921 + printf("nppiEncodeHuffmanGetSize Failed!\n");
  922 + return EXIT_FAILURE;
  923 + }
  924 +
  925 + //LOG_INFO("NPP_CHECK_CUDA:%d",7);
  926 + NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize));
  927 +
  928 + /**
  929 + * Allocates memory and creates a Huffman table in a format that is suitable for the encoder.
  930 + */
  931 + NppStatus t_status;
  932 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]);
  933 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]);
  934 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]);
  935 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]);
  936 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]);
  937 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]);
  938 +
  939 + /**
  940 + * Huffman Encoding of the JPEG Encoding.
  941 + * Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan.
  942 + */
  943 + Npp32s nSs = 0;
  944 + Npp32s nSe = 63;
  945 + Npp32s nH = 0;
  946 + Npp32s nL = 0;
  947 + //LOG_INFO("NPP_CHECK_CUDA:%d",8);
  948 + if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep,
  949 + 0, nSs, nSe, nH, nL,
  950 + pdScan, &nScanLength,
  951 + apHuffmanDCTable,
  952 + apHuffmanACTable,
  953 + aSrcSize,
  954 + pJpegEncoderTemp)))
  955 + {
  956 + printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n");
  957 + return EXIT_FAILURE;
  958 + }
  959 +
  960 + for (int i = 0; i < 3; ++i)
  961 + {
  962 + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]);
  963 + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]);
  964 + }
  965 + // Write JPEG
  966 + pDstJpeg = new unsigned char[4 << 20]{};
  967 + pDstOutput = pDstJpeg;
  968 +
  969 + writeMarker(0x0D8, pDstOutput);
  970 + writeJFIFTag(pDstOutput);
  971 + writeQuantizationTable(aQuantizationTables[0], pDstOutput);
  972 + writeQuantizationTable(aQuantizationTables[1], pDstOutput);
  973 + writeHuffmanTable(pHuffmanDCTables[0], pDstOutput);
  974 + writeHuffmanTable(pHuffmanACTables[0], pDstOutput);
  975 + writeHuffmanTable(pHuffmanDCTables[1], pDstOutput);
  976 + writeHuffmanTable(pHuffmanACTables[1], pDstOutput);
  977 + writeFrameHeader(oFrameHeader, pDstOutput);
  978 + writeScanHeader(oScanHeader, pDstOutput);
  979 +
  980 + //LOG_INFO("NPP_CHECK_CUDA:%d",9);
  981 + NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost));
  982 +
  983 + pDstOutput += nScanLength;
  984 + writeMarker(0x0D9, pDstOutput);
  985 +
  986 + {
  987 + // Write result to file.
  988 + std::ofstream outputFile(szOutputFile, ios::out | ios::binary);
  989 + outputFile.write(reinterpret_cast<const char *>(pDstJpeg), static_cast<int>(pDstOutput - pDstJpeg));
  990 + }
  991 +
  992 + // Cleanup
  993 + cudaFree(pJpegEncoderTemp);
  994 + delete[] pDstJpeg;
  995 + for (int i = 0; i < 3; ++i)
  996 + {
  997 + cudaFree(apdDCT[i]);
  998 + cudaFree(apSrcImage[i]);
  999 + }
  1000 +
  1001 + return EXIT_SUCCESS;
  1002 +}
  1003 +
  1004 +
  1005 +int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height)
  1006 +{
  1007 + NppiSize aSrcSize[3];
  1008 + Npp16s *apdDCT[3] = { 0, 0, 0 };
  1009 + Npp32s aDCTStep[3];
  1010 +
  1011 + Npp8u *apSrcImage[3] = { 0, 0, 0 };
  1012 + Npp32s aSrcImageStep[3];
  1013 + size_t aSrcPitch[3];
  1014 +
  1015 +
  1016 + //????帧头
  1017 + oFrameHeader.nWidth = img_width;
  1018 + oFrameHeader.nHeight = img_height;
  1019 +
  1020 + for (int i = 0; i < oFrameHeader.nComponents; ++i)
  1021 + {
  1022 + NppiSize oBlocks;
  1023 + NppiSize oBlocksPerMCU = { oFrameHeader.aSamplingFactors[i] >> 4, oFrameHeader.aSamplingFactors[i] & 0x0f };
  1024 +
  1025 + oBlocks.width = (int)ceil((oFrameHeader.nWidth + 7) / 8 *
  1026 + static_cast<float>(oBlocksPerMCU.width) / nMCUBlocksH);
  1027 + oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width;
  1028 +
  1029 + oBlocks.height = (int)ceil((oFrameHeader.nHeight + 7) / 8 *
  1030 + static_cast<float>(oBlocksPerMCU.height) / nMCUBlocksV);
  1031 + oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height;
  1032 +
  1033 + aSrcSize[i].width = oBlocks.width * 8;
  1034 + aSrcSize[i].height = oBlocks.height * 8;
  1035 +
  1036 + // Allocate Memory
  1037 + size_t nPitch;
  1038 + //LOG_INFO("NPP_CHECK_CUDA:%d",1);
  1039 + NPP_CHECK_CUDA(cudaMallocPitch(&apdDCT[i], &nPitch, oBlocks.width * 64 * sizeof(Npp16s), oBlocks.height));
  1040 + aDCTStep[i] = static_cast<Npp32s>(nPitch);
  1041 +
  1042 + //LOG_INFO("NPP_CHECK_CUDA:%d",2);
  1043 + NPP_CHECK_CUDA(cudaMallocPitch(&apSrcImage[i], &nPitch, aSrcSize[i].width, aSrcSize[i].height));
  1044 +
  1045 + aSrcPitch[i] = nPitch;
  1046 + aSrcImageStep[i] = static_cast<Npp32s>(nPitch);
  1047 + }
  1048 +
  1049 + //RGB2YUV
  1050 + cudaError_t cudaStatus;
  1051 + cudaStatus = cuda_common::RGB2YUV(d_srcRGB, img_width, img_height,
  1052 + apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height,
  1053 + apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height,
  1054 + apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height);
  1055 +
  1056 + /**
  1057 + * Forward DCT, quantization and level shift part of the JPEG encoding.
  1058 + * Input is expected in 8x8 macro blocks and output is expected to be in 64x1
  1059 + * macro blocks. The new version of the primitive takes the ROI in image pixel size and
  1060 + * works with DCT coefficients that are in zig-zag order.
  1061 + */
  1062 + int k = 0;
  1063 + //LOG_INFO("NPP_CHECK_CUDA:%d",3);
  1064 + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0],
  1065 + apdDCT[0], aDCTStep[0],
  1066 + pdQuantizationTables + k * 64,
  1067 + aSrcSize[0],
  1068 + pDCTState)))
  1069 + {
  1070 + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
  1071 + return EXIT_FAILURE;
  1072 + }
  1073 + k = 1;
  1074 +
  1075 + //LOG_INFO("NPP_CHECK_CUDA:%d",4);
  1076 + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1],
  1077 + apdDCT[1], aDCTStep[1],
  1078 + pdQuantizationTables + k * 64,
  1079 + aSrcSize[1],
  1080 + pDCTState)))
  1081 + {
  1082 + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
  1083 + return EXIT_FAILURE;
  1084 + }
  1085 +
  1086 + //LOG_INFO("NPP_CHECK_CUDA:%d",5);
  1087 + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2],
  1088 + apdDCT[2], aDCTStep[2],
  1089 + pdQuantizationTables + k * 64,
  1090 + aSrcSize[2],
  1091 + pDCTState)))
  1092 + {
  1093 + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
  1094 + return EXIT_FAILURE;
  1095 + }
  1096 +
  1097 + // Huffman Encoding
  1098 +
  1099 + Npp32s nScanLength;
  1100 + Npp8u *pJpegEncoderTemp;
  1101 +
  1102 +#if (CUDA_VERSION == 8000)
  1103 + Npp32s nTempSize; //when using CUDA8
  1104 +#else
  1105 + size_t nTempSize; //when using CUDA9
  1106 +#endif
  1107 + //modified by Junlin 190221
  1108 +
  1109 + //LOG_INFO("NPP_CHECK_CUDA:%d",6);
  1110 + if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize)))
  1111 + {
  1112 + printf("nppiEncodeHuffmanGetSize Failed!\n");
  1113 + return EXIT_FAILURE;
  1114 + }
  1115 +
  1116 + //LOG_INFO("NPP_CHECK_CUDA:%d",7);
  1117 + NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize));
  1118 +
  1119 + /**
  1120 + * Allocates memory and creates a Huffman table in a format that is suitable for the encoder.
  1121 + */
  1122 + NppStatus t_status;
  1123 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]);
  1124 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]);
  1125 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]);
  1126 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]);
  1127 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]);
  1128 + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]);
  1129 +
  1130 + /**
  1131 + * Huffman Encoding of the JPEG Encoding.
  1132 + * Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan.
  1133 + */
  1134 + Npp32s nSs = 0;
  1135 + Npp32s nSe = 63;
  1136 + Npp32s nH = 0;
  1137 + Npp32s nL = 0;
  1138 + //LOG_INFO("NPP_CHECK_CUDA:%d",8);
  1139 + if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep,
  1140 + 0, nSs, nSe, nH, nL,
  1141 + pdScan, &nScanLength,
  1142 + apHuffmanDCTable,
  1143 + apHuffmanACTable,
  1144 + aSrcSize,
  1145 + pJpegEncoderTemp)))
  1146 + {
  1147 + printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n");
  1148 + return EXIT_FAILURE;
  1149 + }
  1150 +
  1151 + for (int i = 0; i < 3; ++i)
  1152 + {
  1153 + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]);
  1154 + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]);
  1155 + }
  1156 + // Write JPEG
  1157 + pDstJpeg = new unsigned char[4 << 20]{};
  1158 + pDstOutput = pDstJpeg;
  1159 +
  1160 + writeMarker(0x0D8, pDstOutput);
  1161 + writeJFIFTag(pDstOutput);
  1162 + writeQuantizationTable(aQuantizationTables[0], pDstOutput);
  1163 + writeQuantizationTable(aQuantizationTables[1], pDstOutput);
  1164 + writeHuffmanTable(pHuffmanDCTables[0], pDstOutput);
  1165 + writeHuffmanTable(pHuffmanACTables[0], pDstOutput);
  1166 + writeHuffmanTable(pHuffmanDCTables[1], pDstOutput);
  1167 + writeHuffmanTable(pHuffmanACTables[1], pDstOutput);
  1168 + writeFrameHeader(oFrameHeader, pDstOutput);
  1169 + writeScanHeader(oScanHeader, pDstOutput);
  1170 +
  1171 + //LOG_INFO("NPP_CHECK_CUDA:%d",9);
  1172 + NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost));
  1173 +
  1174 + pDstOutput += nScanLength;
  1175 + writeMarker(0x0D9, pDstOutput);
  1176 +
  1177 + {
  1178 + // Write result to file.
  1179 + std::ofstream outputFile(szOutputFile, ios::out | ios::binary);
  1180 + outputFile.write(reinterpret_cast<const char *>(pDstJpeg), static_cast<int>(pDstOutput - pDstJpeg));
  1181 + }
  1182 +
  1183 + // Cleanup
  1184 + cudaFree(pJpegEncoderTemp);
  1185 + delete[] pDstJpeg;
  1186 + for (int i = 0; i < 3; ++i)
  1187 + {
  1188 + cudaFree(apdDCT[i]);
  1189 + cudaFree(apSrcImage[i]);
  1190 + }
  1191 +
  1192 + return EXIT_SUCCESS;
  1193 +}
src/main.cpp
@@ -10,8 +10,6 @@ @@ -10,8 +10,6 @@
10 10
11 #include <chrono> 11 #include <chrono>
12 12
13 -  
14 -  
15 #include <unistd.h> 13 #include <unistd.h>
16 14
17 15
@@ -24,6 +22,7 @@ @@ -24,6 +22,7 @@
24 #include "arpa/inet.h" 22 #include "arpa/inet.h"
25 #endif 23 #endif
26 24
  25 +#include "utiltools.hpp"
27 26
28 #define MIN_RTP_PORT 10000 27 #define MIN_RTP_PORT 10000
29 #define MAX_RTP_PORT 60000 28 #define MAX_RTP_PORT 60000
@@ -88,7 +87,7 @@ int sum2 = 0; @@ -88,7 +87,7 @@ int sum2 = 0;
88 87
89 cudaStream_t stream[2]; 88 cudaStream_t stream[2];
90 89
91 -string data_home = "/data/tongtu/"; 90 +string data_home = "/mnt/data/cmhu/tmp/";
92 91
93 92
94 #define checkCudaErrors(S) do {CUresult status; \ 93 #define checkCudaErrors(S) do {CUresult status; \
@@ -183,7 +182,7 @@ void postDecoded(const void * userPtr, AVFrame * gpuFrame){ @@ -183,7 +182,7 @@ void postDecoded(const void * userPtr, AVFrame * gpuFrame){
183 cudaError_t cudaStatus; 182 cudaError_t cudaStatus;
184 if(pHwRgb[0] == nullptr){ 183 if(pHwRgb[0] == nullptr){
185 // cudaStreamCreate(&stream[0]); 184 // cudaStreamCreate(&stream[0]);
186 - cuda_common::setColorSpace( ITU709, 0 ); 185 + cuda_common::setColorSpace( ITU_709, 0 );
187 cudaStatus = cudaMalloc((void **)&pHwRgb[0], 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char)); 186 cudaStatus = cudaMalloc((void **)&pHwRgb[0], 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char));
188 } 187 }
189 cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwRgb[0], gpuFrame->width, gpuFrame->height); 188 cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwRgb[0], gpuFrame->width, gpuFrame->height);
@@ -208,7 +207,7 @@ void postDecoded(const void * userPtr, AVFrame * gpuFrame){ @@ -208,7 +207,7 @@ void postDecoded(const void * userPtr, AVFrame * gpuFrame){
208 cudaError_t cudaStatus; 207 cudaError_t cudaStatus;
209 if(pHwRgb[1] == nullptr){ 208 if(pHwRgb[1] == nullptr){
210 // cudaStreamCreate(&stream[1]); 209 // cudaStreamCreate(&stream[1]);
211 - cuda_common::setColorSpace( ITU709, 0 ); 210 + cuda_common::setColorSpace( ITU_709, 0 );
212 cudaStatus = cudaMalloc((void **)&pHwRgb[1], 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char)); 211 cudaStatus = cudaMalloc((void **)&pHwRgb[1], 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char));
213 } 212 }
214 cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwRgb[1], gpuFrame->width, gpuFrame->height); 213 cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwRgb[1], gpuFrame->width, gpuFrame->height);
@@ -231,13 +230,6 @@ bool count_flag = false; @@ -231,13 +230,6 @@ bool count_flag = false;
231 int count = 0; 230 int count = 0;
232 int count_std = 100; 231 int count_std = 100;
233 232
234 -static long long get_cur_time(){  
235 - // 获取操作系统当前时间点(精确到ms)  
236 - chrono::time_point<chrono::system_clock, chrono::milliseconds> tpMicro  
237 - = chrono::time_point_cast<chrono::milliseconds>(chrono::system_clock::now());  
238 -  
239 - return tpMicro.time_since_epoch().count();  
240 -}  
241 233
242 static int sum = 0; 234 static int sum = 0;
243 unsigned char *pHwData = nullptr; 235 unsigned char *pHwData = nullptr;
@@ -255,13 +247,13 @@ void postDecoded0(const void * userPtr, AVFrame * gpuFrame){ @@ -255,13 +247,13 @@ void postDecoded0(const void * userPtr, AVFrame * gpuFrame){
255 { 247 {
256 count_flag = true; 248 count_flag = true;
257 count = 0; 249 count = 0;
258 - end_time = start_time = get_cur_time(); 250 + end_time = start_time = UtilTools::get_cur_time_ms();
259 } 251 }
260 count++; 252 count++;
261 sum ++ ; 253 sum ++ ;
262 if (count >= count_std) 254 if (count >= count_std)
263 { 255 {
264 - // end_time = get_cur_time(); 256 + // end_time = UtilTools::get_cur_time_ms();
265 // long time_using = end_time - start_time; 257 // long time_using = end_time - start_time;
266 // double time_per_frame = double(time_using)/count_std ; 258 // double time_per_frame = double(time_using)/count_std ;
267 // cout << count_std << "帧用时:" << time_using << "ms 每帧用时:" << time_per_frame << "ms" << endl; 259 // cout << count_std << "帧用时:" << time_using << "ms 每帧用时:" << time_per_frame << "ms" << endl;
@@ -278,7 +270,7 @@ void postDecoded0(const void * userPtr, AVFrame * gpuFrame){ @@ -278,7 +270,7 @@ void postDecoded0(const void * userPtr, AVFrame * gpuFrame){
278 // cout << "gpu id : " << decoder->m_cfg.gpuid.c_str() << endl; 270 // cout << "gpu id : " << decoder->m_cfg.gpuid.c_str() << endl;
279 cudaError_t cudaStatus; 271 cudaError_t cudaStatus;
280 if(pHwData == nullptr){ 272 if(pHwData == nullptr){
281 - cuda_common::setColorSpace( ITU709, 0 ); 273 + cuda_common::setColorSpace( ITU_709, 0 );
282 cudaStatus = cudaMalloc((void **)&pHwData, 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char)); 274 cudaStatus = cudaMalloc((void **)&pHwData, 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char));
283 } 275 }
284 cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwData, gpuFrame->width, gpuFrame->height); 276 cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwData, gpuFrame->width, gpuFrame->height);
@@ -296,10 +288,10 @@ void postDecoded0(const void * userPtr, AVFrame * gpuFrame){ @@ -296,10 +288,10 @@ void postDecoded0(const void * userPtr, AVFrame * gpuFrame){
296 } 288 }
297 289
298 void decode_finished_cbk(const void* userPtr){ 290 void decode_finished_cbk(const void* userPtr){
299 - cout << "当前时间戳: " << get_cur_time() << endl; 291 + cout << "当前时间戳: " << UtilTools::get_cur_time_ms() << endl;
300 } 292 }
301 293
302 -bool decode_request_stream_cbk(){ 294 +bool decode_request_stream_cbk(const char* deviceId){
303 cout << "需在此请求流" << endl; 295 cout << "需在此请求流" << endl;
304 return true; 296 return true;
305 } 297 }
@@ -374,7 +366,7 @@ void logFF(void *, int level, const char *fmt, va_list ap) @@ -374,7 +366,7 @@ void logFF(void *, int level, const char *fmt, va_list ap)
374 366
375 int main(int argc, char* argv[]){ 367 int main(int argc, char* argv[]){
376 368
377 - test_uri = argv[1]; 369 + test_uri = "rtsp://admin:admin@123456@192.168.60.176:554/cam/realmonitor?channel=1&subtype=0";//argv[1];
378 char* gpuid = argv[2]; 370 char* gpuid = argv[2];
379 int port = atoi(argv[3]); 371 int port = atoi(argv[3]);
380 cout << test_uri << " gpu_id:" << gpuid << " port:" << port << endl; 372 cout << test_uri << " gpu_id:" << gpuid << " port:" << port << endl;
@@ -393,7 +385,7 @@ int main(int argc, char* argv[]){ @@ -393,7 +385,7 @@ int main(int argc, char* argv[]){
393 std::this_thread::sleep_for(std::chrono::minutes(1)); 385 std::this_thread::sleep_for(std::chrono::minutes(1));
394 FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); 386 FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
395 int count = pDecManager->count(); 387 int count = pDecManager->count();
396 - cout << "当前时间:" << get_cur_time() << " 当前运行路数: " << pDecManager->count() << endl; 388 + cout << "当前时间:" << UtilTools::get_cur_time_ms() << " 当前运行路数: " << pDecManager->count() << endl;
397 } 389 }
398 390
399 return (void*)0; 391 return (void*)0;
src/utiltools.hpp 0 → 100644
  1 +#ifndef _UTIL_TOOLS_HPP_
  2 +#define _UTIL_TOOLS_HPP_
  3 +
  4 +#include<chrono>
  5 +
  6 +using namespace std;
  7 +
  8 +namespace UtilTools{
  9 +
  10 + static long get_cur_time_ms() {
  11 + chrono::time_point<chrono::system_clock, chrono::milliseconds> tpMicro
  12 + = chrono::time_point_cast<chrono::milliseconds>(chrono::system_clock::now());
  13 + return tpMicro.time_since_epoch().count();
  14 + }
  15 +
  16 +}
  17 +
  18 +#endif
0 \ No newline at end of file 19 \ No newline at end of file