Commit 92989af0db1827cabea63ec2a20ff37eb9ac047d
1 parent
372e629f
更新解码器
Showing
35 changed files
with
2765 additions
and
340 deletions
.gitignore
.vscode/launch.json
@@ -6,7 +6,7 @@ | @@ -6,7 +6,7 @@ | ||
6 | "type": "cppdbg", | 6 | "type": "cppdbg", |
7 | "request": "launch", | 7 | "request": "launch", |
8 | "program": "${workspaceFolder}/bin/lib/test", | 8 | "program": "${workspaceFolder}/bin/lib/test", |
9 | - "args": ["rtsp://122.97.218.170:8604/openUrl/V5nXRHa?params=eyJwcm90b2NhbCI6InJ0c3AiLCJjbGllbnRUeXBlIjoib3Blbl9hcGkiLCJleHByaWVUaW1lIjotMSwicHJvdG9jb2wiOiJydHNwIiwiZXhwaXJlVGltZSI6MzAwLCJlbmFibGVNR0MiOnRydWUsImV4cGFuZCI6InN0YW5kYXJkPXJ0c3Amc3RyZWFtZm9ybT1ydHAiLCJhIjoiMTBjZjM4N2JjY2Y5NDg3YzhjNWYzNjE2M2ViMWUyNTJ8MXwwfDEiLCJ0IjoxfQ==","0"], | 9 | + "args": ["rtsp","3", "30012"], |
10 | "stopAtEntry": false, | 10 | "stopAtEntry": false, |
11 | "cwd": "${workspaceFolder}/bin/lib", | 11 | "cwd": "${workspaceFolder}/bin/lib", |
12 | "environment": [], | 12 | "environment": [], |
README.md
@@ -4,7 +4,7 @@ | @@ -4,7 +4,7 @@ | ||
4 | 支持 cuvid 需要安装 nv-codec-headers, 进入 nv-codec-headers 文件夹后以sudo权限make && make install即可 | 4 | 支持 cuvid 需要安装 nv-codec-headers, 进入 nv-codec-headers 文件夹后以sudo权限make && make install即可 |
5 | 3. 编译ffmpeg | 5 | 3. 编译ffmpeg |
6 | ~~~ | 6 | ~~~ |
7 | -./configure --enable-debug --extra-cflags=-g --extra-ldflags=-g --disable-optimizations --disable-stripping --enable-cuda --enable-cuvid --enable-nvenc --disable-x86asm --enable-nonfree --enable-libnpp --extra-cflags=-I/usr/local/cuda-11.7/targets/x86_64-linux/include --extra-cflags=-fPIC --extra-ldflags=-L/usr/local/cuda-11.7/targets/x86_64-linux/lib --enable-shared --enable-pic --enable-ffplay --prefix=../bin | 7 | +./configure --enable-debug --extra-cflags=-g --extra-ldflags=-g --disable-optimizations --disable-stripping --enable-cuda --enable-cuvid --enable-nvenc --disable-x86asm --enable-nonfree --enable-libnpp --disable-vaapi --extra-cflags=-I/usr/local/cuda-11.7/targets/x86_64-linux/include --extra-cflags=-fPIC --extra-ldflags=-L/usr/local/cuda-11.7/targets/x86_64-linux/lib --enable-shared --enable-pic --enable-ffplay --prefix=../bin |
8 | ~~~ | 8 | ~~~ |
9 | 其中以下是用于调试的,编译release可以去掉: | 9 | 其中以下是用于调试的,编译release可以去掉: |
10 | ~~~ | 10 | ~~~ |
src/AbstractDecoder.cpp
0 → 100644
1 | +#include "AbstractDecoder.h" | ||
2 | + | ||
3 | +#include "logger.hpp" | ||
4 | +#include "GpuRgbMemory.hpp" | ||
5 | +#include "cuda_kernels.h" | ||
6 | + | ||
7 | +#include "utiltools.hpp" | ||
8 | + | ||
9 | + | ||
10 | +FFImgInfo* AbstractDecoder::snapshot(){ | ||
11 | + | ||
12 | + // 锁住停止队列消耗 | ||
13 | + std::lock_guard<std::mutex> l(m_snapshot_mutex); | ||
14 | + | ||
15 | + AVFrame * gpuFrame = nullptr; | ||
16 | + | ||
17 | + bool bFirst = true; | ||
18 | + while(true){ | ||
19 | + m_queue_mutex.lock(); | ||
20 | + if(mFrameQueue.size() <= 0){ | ||
21 | + m_queue_mutex.unlock(); | ||
22 | + if(bFirst){ | ||
23 | + std::this_thread::sleep_for(std::chrono::milliseconds(100)); | ||
24 | + bFirst = false; | ||
25 | + continue; | ||
26 | + }else{ | ||
27 | + // 再进来说明前面已经等了 100 ms | ||
28 | + // 100 ms都没有等到解码数据,则退出 | ||
29 | + return nullptr; | ||
30 | + } | ||
31 | + } | ||
32 | + | ||
33 | + // 队列中数据大于1 | ||
34 | + gpuFrame = mFrameQueue.front(); | ||
35 | + m_queue_mutex.unlock(); | ||
36 | + break; | ||
37 | + } | ||
38 | + | ||
39 | + if (gpuFrame != nullptr && gpuFrame->format == AV_PIX_FMT_CUDA ){ | ||
40 | + LOG_DEBUG("decode task: gpuid: {} width: {} height: {}", m_cfg.gpuid, gpuFrame->width, gpuFrame->height); | ||
41 | + GpuRgbMemory* gpuMem = new GpuRgbMemory(3, gpuFrame->width, gpuFrame->height, getName(), m_cfg.gpuid , true); | ||
42 | + | ||
43 | + if (gpuMem->getMem() == nullptr){ | ||
44 | + LOG_ERROR("new GpuRgbMemory failed !!!"); | ||
45 | + return nullptr; | ||
46 | + } | ||
47 | + | ||
48 | + cudaSetDevice(atoi(m_cfg.gpuid.c_str())); | ||
49 | + cuda_common::setColorSpace( ITU_709, 0 ); | ||
50 | + cudaError_t cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], gpuMem->getMem(), gpuFrame->width, gpuFrame->height); | ||
51 | + cudaDeviceSynchronize(); | ||
52 | + if (cudaStatus != cudaSuccess) { | ||
53 | + LOG_ERROR("CUDAToBGR failed failed !!!"); | ||
54 | + return nullptr; | ||
55 | + } | ||
56 | + | ||
57 | + unsigned char * pHwRgb = gpuMem->getMem(); | ||
58 | + int channel = gpuMem->getChannel(); | ||
59 | + int width = gpuMem->getWidth(); | ||
60 | + int height = gpuMem->getHeight(); | ||
61 | + | ||
62 | + if (pHwRgb != nullptr && channel > 0 && width > 0 && height > 0){ | ||
63 | + int nSize = channel * height * width; | ||
64 | + | ||
65 | + LOG_INFO("channel:{} height:{} width:{}", channel, height, width); | ||
66 | + // unsigned char* cpu_data = new unsigned char[nSize]; | ||
67 | + | ||
68 | + unsigned char* cpu_data = (unsigned char *)av_malloc(nSize * sizeof(unsigned char)); | ||
69 | + | ||
70 | + cudaMemcpy(cpu_data, pHwRgb, nSize * sizeof(unsigned char), cudaMemcpyDeviceToHost); | ||
71 | + cudaDeviceSynchronize(); | ||
72 | + | ||
73 | + delete gpuMem; | ||
74 | + gpuMem = nullptr; | ||
75 | + | ||
76 | + FFImgInfo* imgInfo = new FFImgInfo(); | ||
77 | + imgInfo->dec_name = m_dec_name; | ||
78 | + imgInfo->pData = cpu_data; | ||
79 | + imgInfo->height = height; | ||
80 | + imgInfo->width = width; | ||
81 | + imgInfo->timestamp = UtilTools::get_cur_time_ms(); | ||
82 | + imgInfo->index = m_index; | ||
83 | + | ||
84 | + m_index++; | ||
85 | + | ||
86 | + return imgInfo; | ||
87 | + } | ||
88 | + | ||
89 | + delete gpuMem; | ||
90 | + gpuMem = nullptr; | ||
91 | + } | ||
92 | + | ||
93 | + return nullptr; | ||
94 | +} | ||
95 | + | ||
96 | +bool AbstractDecoder::isSnapTime(){ | ||
97 | + if(m_snap_time_interval <= 0){ | ||
98 | + return false; | ||
99 | + } | ||
100 | + long cur_time = UtilTools::get_cur_time_ms(); | ||
101 | + if(cur_time - m_last_snap_time > m_snap_time_interval){ | ||
102 | + return true; | ||
103 | + } | ||
104 | + return false; | ||
105 | +} | ||
106 | + | ||
107 | +void AbstractDecoder::updateLastSnapTime(){ | ||
108 | + m_last_snap_time = UtilTools::get_cur_time_ms(); | ||
109 | +} | ||
110 | + | ||
111 | +void AbstractDecoder::setSnapTimeInterval(long interval){ | ||
112 | + m_snap_time_interval = interval; | ||
113 | + m_last_snap_time = UtilTools::get_cur_time_ms(); | ||
114 | +} | ||
0 | \ No newline at end of file | 115 | \ No newline at end of file |
src/AbstractDecoder.h
@@ -15,6 +15,9 @@ extern "C" | @@ -15,6 +15,9 @@ extern "C" | ||
15 | #include <libavutil/imgutils.h> | 15 | #include <libavutil/imgutils.h> |
16 | } | 16 | } |
17 | 17 | ||
18 | +#include <queue> | ||
19 | +#include <mutex> | ||
20 | + | ||
18 | using namespace std; | 21 | using namespace std; |
19 | 22 | ||
20 | /************************************************** | 23 | /************************************************** |
@@ -32,7 +35,7 @@ typedef void(*POST_DECODE_CALLBACK)(const void * userPtr, AVFrame * gpuFrame); | @@ -32,7 +35,7 @@ typedef void(*POST_DECODE_CALLBACK)(const void * userPtr, AVFrame * gpuFrame); | ||
32 | 35 | ||
33 | typedef void(*DECODE_FINISHED_CALLBACK)(const void* userPtr); | 36 | typedef void(*DECODE_FINISHED_CALLBACK)(const void* userPtr); |
34 | 37 | ||
35 | -typedef bool(*DECODE_REQUEST_STREAM_CALLBACK)(); | 38 | +typedef bool(*DECODE_REQUEST_STREAM_CALLBACK)(const char* deviceId); |
36 | 39 | ||
37 | struct FFDecConfig{ | 40 | struct FFDecConfig{ |
38 | string uri; // 视频地址 | 41 | string uri; // 视频地址 |
@@ -51,6 +54,15 @@ enum DECODER_TYPE{ | @@ -51,6 +54,15 @@ enum DECODER_TYPE{ | ||
51 | DECODER_TYPE_FFMPEG | 54 | DECODER_TYPE_FFMPEG |
52 | }; | 55 | }; |
53 | 56 | ||
57 | +struct FFImgInfo{ | ||
58 | + string dec_name; | ||
59 | + int width; | ||
60 | + int height; | ||
61 | + unsigned char * pData; | ||
62 | + long timestamp; | ||
63 | + long index; | ||
64 | +}; | ||
65 | + | ||
54 | class AbstractDecoder { | 66 | class AbstractDecoder { |
55 | public: | 67 | public: |
56 | virtual ~AbstractDecoder(){}; | 68 | virtual ~AbstractDecoder(){}; |
@@ -83,6 +95,14 @@ public: | @@ -83,6 +95,14 @@ public: | ||
83 | return m_dec_name; | 95 | return m_dec_name; |
84 | } | 96 | } |
85 | 97 | ||
98 | + FFImgInfo* snapshot(); | ||
99 | + | ||
100 | + bool isSnapTime(); | ||
101 | + | ||
102 | + void updateLastSnapTime(); | ||
103 | + | ||
104 | + void setSnapTimeInterval(long interval); | ||
105 | + | ||
86 | public: | 106 | public: |
87 | const void * m_postDecArg; | 107 | const void * m_postDecArg; |
88 | POST_DECODE_CALLBACK post_decoded_cbk; | 108 | POST_DECODE_CALLBACK post_decoded_cbk; |
@@ -95,6 +115,14 @@ public: | @@ -95,6 +115,14 @@ public: | ||
95 | bool m_dec_keyframe; | 115 | bool m_dec_keyframe; |
96 | 116 | ||
97 | FFDecConfig m_cfg; | 117 | FFDecConfig m_cfg; |
118 | + | ||
119 | + queue<AVFrame*> mFrameQueue; | ||
120 | + mutex m_queue_mutex; | ||
121 | + mutex m_snapshot_mutex; | ||
122 | + | ||
123 | + long m_snap_time_interval{-1}; | ||
124 | + long m_last_snap_time; | ||
125 | + long m_index{0}; | ||
98 | }; | 126 | }; |
99 | 127 | ||
100 | #endif // _ABSTRACT_DECODER_H_ | 128 | #endif // _ABSTRACT_DECODER_H_ |
101 | \ No newline at end of file | 129 | \ No newline at end of file |
src/DrawImageOnGPU.cu
0 → 100644
1 | +#include "cuda_kernels.h" | ||
2 | + | ||
3 | +#include "logger.hpp" | ||
4 | + | ||
5 | +typedef unsigned char uchar; | ||
6 | +typedef unsigned int uint32; | ||
7 | +typedef int int32; | ||
8 | + | ||
9 | +namespace cuda_common | ||
10 | +{ | ||
11 | + __global__ void kernel_drawPixel(float* d_srcRGB, int src_width, int src_height, | ||
12 | + int left, int top, int right, int bottom) | ||
13 | + { | ||
14 | + const int x = blockIdx.x * blockDim.x + threadIdx.x; | ||
15 | + const int y = blockIdx.y * blockDim.y + threadIdx.y; | ||
16 | + | ||
17 | + if (((x == left || x == right) && y >= top && y <= bottom) || ((y == top || y == bottom) && x >= left && x <= right)) | ||
18 | + { | ||
19 | + d_srcRGB[(y*src_width) + x] = 0; | ||
20 | + d_srcRGB[(src_width*src_height) + (y*src_width) + x] = 255; | ||
21 | + d_srcRGB[(2 * src_width*src_height) + (y*src_width) + x] = 0; | ||
22 | + } | ||
23 | + } | ||
24 | + | ||
25 | + cudaError_t DrawImage(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom) | ||
26 | + { | ||
27 | + dim3 block(32, 16, 1); | ||
28 | + dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1); | ||
29 | + | ||
30 | + kernel_drawPixel << < grid, block >> >(d_srcRGB, src_width, src_height, left, top, right, bottom); | ||
31 | + | ||
32 | + cudaError_t cudaStatus = cudaGetLastError(); | ||
33 | + if (cudaStatus != cudaSuccess) { | ||
34 | + LOG_ERROR("Draw 32 kernel_memcopy launch failed:{}",cudaGetErrorString(cudaStatus)); | ||
35 | + return cudaStatus; | ||
36 | + } | ||
37 | + | ||
38 | + cudaStatus = cudaDeviceSynchronize(); | ||
39 | + if (cudaStatus != cudaSuccess) { | ||
40 | + LOG_ERROR("cudaDeviceSynchronize returned error code {} after launching kernel_bilinear!", cudaStatus); | ||
41 | + return cudaStatus; | ||
42 | + } | ||
43 | + | ||
44 | + return cudaStatus; | ||
45 | + } | ||
46 | + | ||
47 | + __global__ void kernel_drawPixel(unsigned char* d_srcRGB, int src_width, int src_height, | ||
48 | + int left, int top, int right, int bottom) | ||
49 | + { | ||
50 | + const int x = blockIdx.x * blockDim.x + threadIdx.x; | ||
51 | + const int y = blockIdx.y * blockDim.y + threadIdx.y; | ||
52 | + | ||
53 | + if (((x == left || x == right) && y >= top && y <= bottom) || ((y == top || y == bottom) && x >= left && x <= right)) | ||
54 | + { | ||
55 | + d_srcRGB[(y*src_width) + x] = 0; | ||
56 | + d_srcRGB[(src_width*src_height) + (y*src_width) + x] = 255; | ||
57 | + d_srcRGB[(2 * src_width*src_height) + (y*src_width) + x] = 0; | ||
58 | + } | ||
59 | + } | ||
60 | + | ||
61 | + cudaError_t DrawImage(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom) | ||
62 | + { | ||
63 | + dim3 block(32, 16, 1); | ||
64 | + dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1); | ||
65 | + | ||
66 | + kernel_drawPixel << < grid, block >> >(d_srcRGB, src_width, src_height, left, top, right, bottom); | ||
67 | + | ||
68 | + cudaError_t cudaStatus = cudaGetLastError(); | ||
69 | + if (cudaStatus != cudaSuccess) { | ||
70 | + LOG_ERROR("Draw 68 kernel_memcopy launch failed: {}",cudaGetErrorString(cudaStatus)); | ||
71 | + return cudaStatus; | ||
72 | + } | ||
73 | + | ||
74 | + cudaStatus = cudaDeviceSynchronize(); | ||
75 | + if (cudaStatus != cudaSuccess) { | ||
76 | + LOG_ERROR("cudaDeviceSynchronize returned error code {} after launching kernel_bilinear!", cudaStatus); | ||
77 | + return cudaStatus; | ||
78 | + } | ||
79 | + | ||
80 | + return cudaStatus; | ||
81 | + } | ||
82 | + | ||
83 | + __global__ void kernel_drawLine(float* d_srcRGB, int src_width, int src_height, | ||
84 | + int begin_x, int begin_y, int end_x, int end_y) | ||
85 | + { | ||
86 | + int min_x = end_x < begin_x ? end_x : begin_x; | ||
87 | + int max_x = end_x < begin_x ? begin_x : end_x; | ||
88 | + | ||
89 | + int min_y = end_y < begin_y ? end_y : begin_y; | ||
90 | + int max_y = end_y < begin_y ? begin_y : end_y; | ||
91 | + | ||
92 | + const int x = blockIdx.x * blockDim.x + threadIdx.x; | ||
93 | + const int y = blockIdx.y * blockDim.y + threadIdx.y; | ||
94 | + | ||
95 | + if ((x - begin_x) * (end_y - begin_y) == (end_x - begin_x) * (y - begin_y) | ||
96 | + && min_x <= x && x <= max_x | ||
97 | + && min_y <= y && y <= max_y) | ||
98 | + { | ||
99 | + d_srcRGB[(y*src_width) + x] = 0; | ||
100 | + d_srcRGB[(src_width*src_height) + (y*src_width) + x] = 255; | ||
101 | + d_srcRGB[(2 * src_width*src_height) + (y*src_width) + x] = 0; | ||
102 | + } | ||
103 | + } | ||
104 | + | ||
105 | + cudaError_t DrawLine(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y) | ||
106 | + { | ||
107 | + dim3 block(32, 16, 1); | ||
108 | + dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1); | ||
109 | + | ||
110 | + kernel_drawLine << < grid, block >> >(d_srcRGB, src_width, src_height, begin_x, begin_y, end_x, end_y); | ||
111 | + | ||
112 | + cudaError_t cudaStatus = cudaGetLastError(); | ||
113 | + if (cudaStatus != cudaSuccess) { | ||
114 | + LOG_ERROR("Draw 112 kernel_memcopy launch failed: {}",cudaGetErrorString(cudaStatus)); | ||
115 | + return cudaStatus; | ||
116 | + } | ||
117 | + | ||
118 | + cudaStatus = cudaDeviceSynchronize(); | ||
119 | + if (cudaStatus != cudaSuccess) { | ||
120 | + LOG_ERROR("cudaDeviceSynchronize returned error code {} after launching kernel_bilinear!", cudaStatus); | ||
121 | + return cudaStatus; | ||
122 | + } | ||
123 | + | ||
124 | + return cudaStatus; | ||
125 | + } | ||
126 | +} | ||
0 | \ No newline at end of file | 127 | \ No newline at end of file |
src/FFCuContextManager.cpp
1 | #include "FFCuContextManager.h" | 1 | #include "FFCuContextManager.h" |
2 | -#include <iostream> | 2 | + |
3 | +#include "logger.hpp" | ||
3 | 4 | ||
4 | using namespace std; | 5 | using namespace std; |
5 | 6 | ||
@@ -19,7 +20,7 @@ AVBufferRef *FFCuContextManager::getCuCtx(string gpuid) | @@ -19,7 +20,7 @@ AVBufferRef *FFCuContextManager::getCuCtx(string gpuid) | ||
19 | // 初始化硬件解码器 | 20 | // 初始化硬件解码器 |
20 | if (av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_CUDA, gpuid.c_str(), nullptr, 0) < 0) | 21 | if (av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_CUDA, gpuid.c_str(), nullptr, 0) < 0) |
21 | { | 22 | { |
22 | - av_log(nullptr, AV_LOG_ERROR, "Failed to create specified HW device ! \n"); | 23 | + LOG_ERROR("Failed to create specified HW device."); |
23 | return nullptr; | 24 | return nullptr; |
24 | } | 25 | } |
25 | ctxMap[gpuid] = hw_device_ctx; | 26 | ctxMap[gpuid] = hw_device_ctx; |
src/FFNvDecoder.cpp
@@ -10,6 +10,8 @@ | @@ -10,6 +10,8 @@ | ||
10 | 10 | ||
11 | #include "logger.hpp" | 11 | #include "logger.hpp" |
12 | 12 | ||
13 | +#include "utiltools.hpp" | ||
14 | + | ||
13 | using namespace std; | 15 | using namespace std; |
14 | 16 | ||
15 | // 参考博客: https://blog.csdn.net/qq_40116098/article/details/120704340 | 17 | // 参考博客: https://blog.csdn.net/qq_40116098/article/details/120704340 |
@@ -175,18 +177,6 @@ bool FFNvDecoder::start(){ | @@ -175,18 +177,6 @@ bool FFNvDecoder::start(){ | ||
175 | return true; | 177 | return true; |
176 | } | 178 | } |
177 | 179 | ||
178 | -static long long get_cur_time(){ | ||
179 | - // 获取操作系统当前时间点(精确到微秒) | ||
180 | - chrono::time_point<chrono::system_clock, chrono::microseconds> tpMicro | ||
181 | - = chrono::time_point_cast<chrono::microseconds>(chrono::system_clock::now()); | ||
182 | - // (微秒精度的)时间点 => (微秒精度的)时间戳 | ||
183 | - time_t totalMicroSeconds = tpMicro.time_since_epoch().count(); | ||
184 | - | ||
185 | - long long currentTime = ((long long)totalMicroSeconds)/1000; | ||
186 | - | ||
187 | - return currentTime; | ||
188 | -} | ||
189 | - | ||
190 | void FFNvDecoder::decode_thread() | 180 | void FFNvDecoder::decode_thread() |
191 | { | 181 | { |
192 | AVPacket* pkt ; | 182 | AVPacket* pkt ; |
@@ -202,7 +192,7 @@ void FFNvDecoder::decode_thread() | @@ -202,7 +192,7 @@ void FFNvDecoder::decode_thread() | ||
202 | } | 192 | } |
203 | ,this); | 193 | ,this); |
204 | 194 | ||
205 | - // long start_time = get_cur_time(); | 195 | + // long start_time = UtilTools::get_cur_time_ms(); |
206 | 196 | ||
207 | while (m_bRunning) | 197 | while (m_bRunning) |
208 | { | 198 | { |
@@ -214,13 +204,6 @@ void FFNvDecoder::decode_thread() | @@ -214,13 +204,6 @@ void FFNvDecoder::decode_thread() | ||
214 | continue; | 204 | continue; |
215 | } | 205 | } |
216 | } | 206 | } |
217 | - | ||
218 | - AVFrame * gpuFrame = mFrameQueue.getTail(); | ||
219 | - if (gpuFrame == nullptr) | ||
220 | - { | ||
221 | - std::this_thread::sleep_for(std::chrono::milliseconds(1)); | ||
222 | - continue; | ||
223 | - } | ||
224 | 207 | ||
225 | int result = av_read_frame(fmt_ctx, pkt); | 208 | int result = av_read_frame(fmt_ctx, pkt); |
226 | if (result == AVERROR_EOF || result < 0) | 209 | if (result == AVERROR_EOF || result < 0) |
@@ -247,25 +230,37 @@ void FFNvDecoder::decode_thread() | @@ -247,25 +230,37 @@ void FFNvDecoder::decode_thread() | ||
247 | if (stream_index == pkt->stream_index){ | 230 | if (stream_index == pkt->stream_index){ |
248 | result = avcodec_send_packet(avctx, pkt); | 231 | result = avcodec_send_packet(avctx, pkt); |
249 | if (result < 0){ | 232 | if (result < 0){ |
233 | + av_packet_unref(pkt); | ||
250 | LOG_ERROR("{} - Failed to send pkt: {}", m_dec_name, result); | 234 | LOG_ERROR("{} - Failed to send pkt: {}", m_dec_name, result); |
251 | continue; | 235 | continue; |
252 | } | 236 | } |
253 | 237 | ||
238 | + AVFrame* gpuFrame = av_frame_alloc(); | ||
254 | result = avcodec_receive_frame(avctx, gpuFrame); | 239 | result = avcodec_receive_frame(avctx, gpuFrame); |
255 | if ((result == AVERROR(EAGAIN) || result == AVERROR_EOF) || result < 0){ | 240 | if ((result == AVERROR(EAGAIN) || result == AVERROR_EOF) || result < 0){ |
256 | LOG_ERROR("{} - Failed to receive frame: {}", m_dec_name, result); | 241 | LOG_ERROR("{} - Failed to receive frame: {}", m_dec_name, result); |
242 | + av_frame_free(&gpuFrame); | ||
243 | + av_packet_unref(pkt); | ||
257 | continue; | 244 | continue; |
258 | } | 245 | } |
246 | + av_packet_unref(pkt); | ||
259 | 247 | ||
260 | - mFrameQueue.addTail(); | 248 | + if(gpuFrame != nullptr){ |
249 | + m_queue_mutex.lock(); | ||
250 | + if(mFrameQueue.size() <= 10){ | ||
251 | + mFrameQueue.push(gpuFrame); | ||
252 | + }else{ | ||
253 | + av_frame_free(&gpuFrame); | ||
254 | + } | ||
255 | + m_queue_mutex.unlock(); | ||
256 | + } | ||
261 | } | 257 | } |
262 | av_packet_unref(pkt); | 258 | av_packet_unref(pkt); |
263 | } | 259 | } |
264 | 260 | ||
265 | m_bRunning = false; | 261 | m_bRunning = false; |
266 | 262 | ||
267 | - // long end_time = get_cur_time(); | ||
268 | - | 263 | + // long end_time = UtilTools::get_cur_time_ms(); |
269 | // cout << "解码用时:" << end_time - start_time << endl; | 264 | // cout << "解码用时:" << end_time - start_time << endl; |
270 | 265 | ||
271 | if (m_post_decode_thread != 0) | 266 | if (m_post_decode_thread != 0) |
@@ -277,6 +272,13 @@ void FFNvDecoder::decode_thread() | @@ -277,6 +272,13 @@ void FFNvDecoder::decode_thread() | ||
277 | 272 | ||
278 | decode_finished(); | 273 | decode_finished(); |
279 | 274 | ||
275 | + // 清空队列 | ||
276 | + while(mFrameQueue.size() > 0){ | ||
277 | + AVFrame * gpuFrame = mFrameQueue.front(); | ||
278 | + av_frame_free(&gpuFrame); | ||
279 | + mFrameQueue.pop(); | ||
280 | + } | ||
281 | + | ||
280 | LOG_INFO("{} - decode thread exited.", m_dec_name); | 282 | LOG_INFO("{} - decode thread exited.", m_dec_name); |
281 | } | 283 | } |
282 | 284 | ||
@@ -302,24 +304,25 @@ void FFNvDecoder::post_decode_thread(){ | @@ -302,24 +304,25 @@ void FFNvDecoder::post_decode_thread(){ | ||
302 | } | 304 | } |
303 | 305 | ||
304 | int index = 0; | 306 | int index = 0; |
305 | - while (m_bRunning || mFrameQueue.length() > 0) | 307 | + while (m_bRunning) |
306 | { | 308 | { |
307 | - AVFrame * gpuFrame = mFrameQueue.getHead(); | ||
308 | - if (gpuFrame == nullptr) | ||
309 | - { | ||
310 | - std::this_thread::sleep_for(std::chrono::milliseconds(3)); | ||
311 | - continue; | ||
312 | - } | 309 | + if(mFrameQueue.size() > 0){ |
310 | + std::lock_guard<std::mutex> l(m_snapshot_mutex); | ||
311 | + // 取队头数据 | ||
312 | + m_queue_mutex.lock(); | ||
313 | + AVFrame * gpuFrame = mFrameQueue.front(); | ||
314 | + mFrameQueue.pop(); | ||
315 | + m_queue_mutex.unlock(); | ||
316 | + // 跳帧 | ||
317 | + if (skip_frame == 1 || index % skip_frame == 0){ | ||
318 | + post_decoded_cbk(m_postDecArg, gpuFrame); | ||
319 | + index = 0; | ||
320 | + } | ||
313 | 321 | ||
314 | - // 跳帧 | ||
315 | - if (skip_frame == 1 || index % skip_frame == 0){ | ||
316 | - post_decoded_cbk(m_postDecArg, gpuFrame); | ||
317 | - index = 0; | ||
318 | - } | ||
319 | - | ||
320 | - mFrameQueue.addHead(); | 322 | + av_frame_free(&gpuFrame); |
321 | 323 | ||
322 | - index++; | 324 | + index++; |
325 | + } | ||
323 | } | 326 | } |
324 | 327 | ||
325 | LOG_INFO("post decode thread exited."); | 328 | LOG_INFO("post decode thread exited."); |
@@ -374,7 +377,10 @@ void FFNvDecoder::setDecKeyframe(bool bKeyframe) | @@ -374,7 +377,10 @@ void FFNvDecoder::setDecKeyframe(bool bKeyframe) | ||
374 | } | 377 | } |
375 | 378 | ||
376 | int FFNvDecoder::getCachedQueueLength(){ | 379 | int FFNvDecoder::getCachedQueueLength(){ |
377 | - return mFrameQueue.length(); | 380 | + m_queue_mutex.lock(); |
381 | + int queue_size = mFrameQueue.size(); | ||
382 | + m_queue_mutex.lock(); | ||
383 | + return queue_size; | ||
378 | } | 384 | } |
379 | 385 | ||
380 | float FFNvDecoder::fps(){ | 386 | float FFNvDecoder::fps(){ |
src/FFNvDecoder.h
1 | #include<string> | 1 | #include<string> |
2 | #include <pthread.h> | 2 | #include <pthread.h> |
3 | 3 | ||
4 | -#include "FrameQueue.h" | ||
5 | - | ||
6 | #include "AbstractDecoder.h" | 4 | #include "AbstractDecoder.h" |
7 | 5 | ||
6 | +#include <mutex> | ||
7 | + | ||
8 | using namespace std; | 8 | using namespace std; |
9 | 9 | ||
10 | class FFNvDecoder : public AbstractDecoder{ | 10 | class FFNvDecoder : public AbstractDecoder{ |
@@ -55,7 +55,6 @@ private: | @@ -55,7 +55,6 @@ private: | ||
55 | bool m_bFinished; | 55 | bool m_bFinished; |
56 | 56 | ||
57 | bool m_bPause; | 57 | bool m_bPause; |
58 | - FrameQueue mFrameQueue; | ||
59 | 58 | ||
60 | bool m_bReal; // 是否实时流 | 59 | bool m_bReal; // 是否实时流 |
61 | 60 |
src/FFNvDecoderManager.cpp
@@ -116,11 +116,12 @@ AbstractDecoder* FFNvDecoderManager::getDecoderByName(const string name) | @@ -116,11 +116,12 @@ AbstractDecoder* FFNvDecoderManager::getDecoderByName(const string name) | ||
116 | return nullptr; | 116 | return nullptr; |
117 | } | 117 | } |
118 | 118 | ||
119 | -void FFNvDecoderManager::startDecode(AbstractDecoder* dec){ | 119 | +bool FFNvDecoderManager::startDecode(AbstractDecoder* dec){ |
120 | if (dec != nullptr && !dec->isRunning()) | 120 | if (dec != nullptr && !dec->isRunning()) |
121 | { | 121 | { |
122 | - dec->start(); | 122 | + return dec->start(); |
123 | } | 123 | } |
124 | + return false; | ||
124 | } | 125 | } |
125 | 126 | ||
126 | bool FFNvDecoderManager::startDecodeByName(const string name){ | 127 | bool FFNvDecoderManager::startDecodeByName(const string name){ |
@@ -486,7 +487,7 @@ FFImgInfo* FFNvDecoderManager::snapshot(const string& uri){ | @@ -486,7 +487,7 @@ FFImgInfo* FFNvDecoderManager::snapshot(const string& uri){ | ||
486 | } | 487 | } |
487 | 488 | ||
488 | // 计算解码后原始数据所需缓冲区大小,并分配内存空间 Determine required buffer size and allocate buffer | 489 | // 计算解码后原始数据所需缓冲区大小,并分配内存空间 Determine required buffer size and allocate buffer |
489 | - numBytes = av_image_get_buffer_size(AV_PIX_FMT_RGB24, codec_ctx->width, codec_ctx->height, 1); | 490 | + numBytes = av_image_get_buffer_size(AV_PIX_FMT_BGR24, codec_ctx->width, codec_ctx->height, 1); |
490 | buffer = (uint8_t *)av_malloc(numBytes * sizeof(uint8_t)); | 491 | buffer = (uint8_t *)av_malloc(numBytes * sizeof(uint8_t)); |
491 | 492 | ||
492 | pFrameRGB = av_frame_alloc(); | 493 | pFrameRGB = av_frame_alloc(); |
@@ -560,3 +561,40 @@ void FFNvDecoderManager::releaseFFImgInfo(FFImgInfo* info){ | @@ -560,3 +561,40 @@ void FFNvDecoderManager::releaseFFImgInfo(FFImgInfo* info){ | ||
560 | info = nullptr; | 561 | info = nullptr; |
561 | } | 562 | } |
562 | } | 563 | } |
564 | + | ||
565 | +FFImgInfo* FFNvDecoderManager::snapshot_in_task(const string name){ | ||
566 | + if (name.empty()){ | ||
567 | + LOG_ERROR("name 为空!"); | ||
568 | + return nullptr; | ||
569 | + } | ||
570 | + | ||
571 | + std::lock_guard<std::mutex> l(m_mutex); | ||
572 | + | ||
573 | + auto dec = decoderMap.find(name); | ||
574 | + if (dec != decoderMap.end()){ | ||
575 | + return dec->second->snapshot(); | ||
576 | + } | ||
577 | + | ||
578 | + LOG_ERROR("没有找到name为{}的解码器",name); | ||
579 | + return nullptr; | ||
580 | +} | ||
581 | + | ||
582 | +vector<FFImgInfo*> FFNvDecoderManager::timing_snapshot_all(){ | ||
583 | + | ||
584 | + closeAllFinishedDecoder(); | ||
585 | + | ||
586 | + std::lock_guard<std::mutex> l(m_mutex); | ||
587 | + | ||
588 | + vector<FFImgInfo*> vec; | ||
589 | + for(auto it = decoderMap.begin(); it != decoderMap.end(); ++it){ | ||
590 | + if(it->second->isSnapTime()){ | ||
591 | + FFImgInfo* imginfo = it->second->snapshot(); | ||
592 | + if(imginfo != nullptr){ | ||
593 | + vec.push_back(imginfo); | ||
594 | + } | ||
595 | + it->second->updateLastSnapTime(); | ||
596 | + } | ||
597 | + } | ||
598 | + | ||
599 | + return vec; | ||
600 | +} | ||
563 | \ No newline at end of file | 601 | \ No newline at end of file |
src/FFNvDecoderManager.h
@@ -14,14 +14,9 @@ struct MgrDecConfig | @@ -14,14 +14,9 @@ struct MgrDecConfig | ||
14 | string name{""}; // 解码器名称 | 14 | string name{""}; // 解码器名称 |
15 | }; | 15 | }; |
16 | 16 | ||
17 | -struct FFImgInfo{ | ||
18 | - int width; | ||
19 | - int height; | ||
20 | - unsigned char * pData; | ||
21 | -}; | ||
22 | - | ||
23 | /** | 17 | /** |
24 | * 解码器管理类,单例类 | 18 | * 解码器管理类,单例类 |
19 | + * 谨防死锁 | ||
25 | **/ | 20 | **/ |
26 | class FFNvDecoderManager { | 21 | class FFNvDecoderManager { |
27 | public: | 22 | public: |
@@ -90,7 +85,7 @@ public: | @@ -90,7 +85,7 @@ public: | ||
90 | * 返回:void | 85 | * 返回:void |
91 | * 备注: | 86 | * 备注: |
92 | **************************************************/ | 87 | **************************************************/ |
93 | - void startDecode(AbstractDecoder*); | 88 | + bool startDecode(AbstractDecoder*); |
94 | 89 | ||
95 | /************************************************** | 90 | /************************************************** |
96 | * 接口:startAllDecode | 91 | * 接口:startAllDecode |
@@ -257,6 +252,10 @@ public: | @@ -257,6 +252,10 @@ public: | ||
257 | **************************************************/ | 252 | **************************************************/ |
258 | void releaseFFImgInfo(FFImgInfo* info); | 253 | void releaseFFImgInfo(FFImgInfo* info); |
259 | 254 | ||
255 | + FFImgInfo* snapshot_in_task(const string name); | ||
256 | + | ||
257 | + vector<FFImgInfo*> timing_snapshot_all(); | ||
258 | + | ||
260 | private: | 259 | private: |
261 | FFNvDecoderManager(){} | 260 | FFNvDecoderManager(){} |
262 | 261 |
src/FrameQueue.cpp deleted
1 | -#include "FrameQueue.h" | ||
2 | - | ||
3 | -FrameQueue::FrameQueue(/* args */) | ||
4 | -{ | ||
5 | - for (size_t i = 0; i < Maxsize; i++) | ||
6 | - { | ||
7 | - base[i] = av_frame_alloc(); | ||
8 | - } | ||
9 | - | ||
10 | - front = rear = 0;//头指针和尾指针置为零,队列为空 | ||
11 | -} | ||
12 | - | ||
13 | -FrameQueue::~FrameQueue() | ||
14 | -{ | ||
15 | - if (base) | ||
16 | - { | ||
17 | - for (size_t i = 0; i < Maxsize; i++) | ||
18 | - { | ||
19 | - if (base[i]) | ||
20 | - { | ||
21 | - av_frame_free(&base[i]); | ||
22 | - } | ||
23 | - } | ||
24 | - } | ||
25 | - | ||
26 | - rear = front = 0; | ||
27 | -} | ||
28 | - | ||
29 | -//循环队列的入队 | ||
30 | -AVFrame* FrameQueue::getTail() | ||
31 | -{ | ||
32 | - //插入一个元素e为Q的新的队尾元素 | ||
33 | - if ((rear + 1) % Maxsize == front) | ||
34 | - return nullptr;//队满 | ||
35 | - return base[rear];//获取队尾元素 | ||
36 | -} | ||
37 | - | ||
38 | -// 将队尾元素添加到队列中 | ||
39 | -void FrameQueue::addTail() | ||
40 | -{ | ||
41 | - rear = (rear + 1) % Maxsize;//队尾指针加1 | ||
42 | -} | ||
43 | - | ||
44 | -//循环队列的出队 | ||
45 | -AVFrame* FrameQueue::deQueue() | ||
46 | -{ | ||
47 | - //删除Q的队头元素,用e返回其值 | ||
48 | - if (front == rear) | ||
49 | - return nullptr;//队空 | ||
50 | - AVFrame* e = base[front];//保存队头元素 | ||
51 | - front = (front + 1) % Maxsize;//队头指针加1 | ||
52 | - return e; | ||
53 | -} | ||
54 | - | ||
55 | -//取循环队列的队头元素 | ||
56 | -AVFrame* FrameQueue::getHead() | ||
57 | -{ | ||
58 | - //返回Q的队头元素,不修改队头指针 | ||
59 | - if (front == rear) | ||
60 | - return nullptr;//队列为空,取元素失败 | ||
61 | - return base[front]; | ||
62 | -} | ||
63 | - | ||
64 | -void FrameQueue::addHead() | ||
65 | -{ | ||
66 | - front = (front + 1) % Maxsize;//队头指针加1 | ||
67 | -} | ||
68 | - | ||
69 | -int FrameQueue::length() | ||
70 | -{ | ||
71 | - return (rear - front + Maxsize) % Maxsize; | ||
72 | -} | ||
73 | - | ||
74 | -bool FrameQueue::isEmpty() | ||
75 | -{ | ||
76 | - if (front == rear) | ||
77 | - return true; | ||
78 | - | ||
79 | - return false; | ||
80 | -} | ||
81 | - | ||
82 | -void FrameQueue::clearQueue() | ||
83 | -{ | ||
84 | - rear = front = 0; | ||
85 | -} | ||
86 | \ No newline at end of file | 0 | \ No newline at end of file |
src/FrameQueue.h deleted
1 | -#include <iostream> | ||
2 | -#include <atomic> | ||
3 | - | ||
4 | -extern "C" | ||
5 | -{ | ||
6 | - #include <libavcodec/avcodec.h> | ||
7 | - #include <libavdevice/avdevice.h> | ||
8 | - #include <libavformat/avformat.h> | ||
9 | - #include <libavfilter/avfilter.h> | ||
10 | - #include <libavutil/avutil.h> | ||
11 | - #include <libavutil/pixdesc.h> | ||
12 | - #include <libswscale/swscale.h> | ||
13 | -} | ||
14 | - | ||
15 | -using namespace std; | ||
16 | - | ||
17 | -#define Maxsize 5 // 循环队列的大小 | ||
18 | - | ||
19 | -// 循环队列 | ||
20 | -class FrameQueue | ||
21 | -{ | ||
22 | -private: | ||
23 | - /* data */ | ||
24 | -public: | ||
25 | - FrameQueue(/* args */); | ||
26 | - ~FrameQueue(); | ||
27 | - | ||
28 | - AVFrame* getTail(); | ||
29 | - void addTail(); | ||
30 | - AVFrame* deQueue(); | ||
31 | - AVFrame* getHead(); | ||
32 | - void addHead(); | ||
33 | - void clearQueue(); | ||
34 | - | ||
35 | - int length(); | ||
36 | - bool isEmpty(); | ||
37 | - | ||
38 | -private: | ||
39 | - AVFrame* base[Maxsize]; | ||
40 | - atomic<int> front; | ||
41 | - atomic<int> rear; | ||
42 | -}; | ||
43 | \ No newline at end of file | 0 | \ No newline at end of file |
src/GpuRgbMemory.hpp
0 → 100644
1 | +#include<string> | ||
2 | + | ||
3 | +#include "cuda_kernels.h" | ||
4 | +#include "define.hpp" | ||
5 | +#include "utiltools.hpp" | ||
6 | + | ||
7 | +using namespace std; | ||
8 | + | ||
9 | +class GpuRgbMemory{ | ||
10 | + | ||
11 | +public: | ||
12 | + GpuRgbMemory(int _channel, int _width, int _height, string _id, string _gpuid, bool _isused){ | ||
13 | + channel = _channel; | ||
14 | + width = _width; | ||
15 | + height = _height; | ||
16 | + size = channel * width * height; | ||
17 | + isused = _isused; | ||
18 | + id = _id; | ||
19 | + gpuid = _gpuid; | ||
20 | + timestamp = UtilTools::get_cur_time_ms(); | ||
21 | + | ||
22 | + cudaSetDevice(atoi(gpuid.c_str())); | ||
23 | + CHECK_CUDA(cudaMalloc((void **)&pHwRgb, size * sizeof(unsigned char))); | ||
24 | + } | ||
25 | + | ||
26 | + ~GpuRgbMemory(){ | ||
27 | + if (pHwRgb) { | ||
28 | + cudaSetDevice(atoi(gpuid.c_str())); | ||
29 | + CHECK_CUDA(cudaFree(pHwRgb)); | ||
30 | + pHwRgb = nullptr; | ||
31 | + } | ||
32 | + } | ||
33 | + | ||
34 | + int getSize() { | ||
35 | + return size; | ||
36 | + } | ||
37 | + | ||
38 | + bool isIsused() { | ||
39 | + return isused; | ||
40 | + } | ||
41 | + | ||
42 | + void setIsused(bool _isused) { | ||
43 | + isused = _isused; | ||
44 | + // 更新时间戳 | ||
45 | + timestamp = UtilTools::get_cur_time_ms(); | ||
46 | + } | ||
47 | + | ||
48 | + string getId() { | ||
49 | + return id; | ||
50 | + } | ||
51 | + | ||
52 | + string getGpuId() { | ||
53 | + return gpuid; | ||
54 | + } | ||
55 | + | ||
56 | + unsigned char* getMem(){ | ||
57 | + return pHwRgb; | ||
58 | + } | ||
59 | + | ||
60 | + long long getTimesstamp(){ | ||
61 | + return timestamp; | ||
62 | + } | ||
63 | + | ||
64 | + int getWidth(){ | ||
65 | + return width; | ||
66 | + } | ||
67 | + | ||
68 | + int getHeight(){ | ||
69 | + return height; | ||
70 | + } | ||
71 | + | ||
72 | + int getChannel(){ | ||
73 | + return channel; | ||
74 | + } | ||
75 | + | ||
76 | +private: | ||
77 | + int size; | ||
78 | + bool isused; | ||
79 | + string id; | ||
80 | + string gpuid; | ||
81 | + unsigned char * pHwRgb{nullptr}; | ||
82 | + long long timestamp; | ||
83 | + int width{0}; | ||
84 | + int height{0}; | ||
85 | + int channel{3}; | ||
86 | +}; | ||
0 | \ No newline at end of file | 87 | \ No newline at end of file |
src/ImageSaveGPU.cpp
0 → 100644
1 | +#include "cuda_kernels.h" | ||
2 | + | ||
3 | +#include "logger.hpp" | ||
4 | + | ||
5 | + | ||
6 | +//int saveJPEG(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height) | ||
7 | +//{ | ||
8 | +// return jpegNPP(szOutputFile, d_srcRGB, img_width, img_height); | ||
9 | +// //return 0; | ||
10 | +//} | ||
11 | +// | ||
12 | +//int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height) | ||
13 | +//{ | ||
14 | +// return jpegNPP(szOutputFile, d_srcRGB, img_width, img_height); | ||
15 | +// //return 0; | ||
16 | +//} | ||
17 | +// | ||
18 | +//int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB) | ||
19 | +//{ | ||
20 | +// return jpegNPP(szOutputFile, d_srcRGB); | ||
21 | +//} | ||
22 | +// | ||
23 | +//int saveJPEG(const char *szOutputFile, float* d_srcRGB) | ||
24 | +//{ | ||
25 | +// return jpegNPP(szOutputFile, d_srcRGB); | ||
26 | +//} | ||
27 | + | ||
28 | +int resizeFrame(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height) | ||
29 | +{ | ||
30 | + cudaError_t cudaStatus = cuda_common::ResizeImage(d_srcRGB, src_width, src_height, d_dstRGB, dst_width, dst_height); | ||
31 | + if (cudaStatus != cudaSuccess) { | ||
32 | + LOG_ERROR("cuda_common::ResizeImage failed: {}",cudaGetErrorString(cudaStatus)); | ||
33 | + return -1; | ||
34 | + } | ||
35 | + | ||
36 | + return 0; | ||
37 | +} | ||
38 | + | ||
39 | +//int initTables() | ||
40 | +//{ | ||
41 | +// initTable(); | ||
42 | +// return 0; | ||
43 | +//} | ||
44 | +// | ||
45 | +//int initTables(int flag, int width, int height) | ||
46 | +//{ | ||
47 | +// initTable(0, width, height); | ||
48 | +// return 0; | ||
49 | +//} | ||
50 | + | ||
51 | +int drawImageOnGPU(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom) | ||
52 | +{ | ||
53 | + cuda_common::DrawImage(d_srcRGB, src_width, src_height, left, top, right, bottom); | ||
54 | + return 0; | ||
55 | +} | ||
56 | + | ||
57 | +int drawImageOnGPU(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom) | ||
58 | +{ | ||
59 | + cuda_common::DrawImage(d_srcRGB, src_width, src_height, left, top, right, bottom); | ||
60 | + return 0; | ||
61 | +} | ||
62 | + | ||
63 | +int drawLineOnGPU(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y) | ||
64 | +{ | ||
65 | + cuda_common::DrawLine(d_srcRGB, src_width, src_height, begin_x, begin_y, end_x, end_y); | ||
66 | + return 0; | ||
67 | +} | ||
68 | + | ||
69 | +//int releaseJpegSaver() | ||
70 | +//{ | ||
71 | +// releaseJpegNPP(); | ||
72 | +// return 0; | ||
73 | +//} | ||
74 | + | ||
75 | +int partMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom) | ||
76 | +{ | ||
77 | + cudaError_t cudaStatus = cuda_common::PartMemCopy(d_srcRGB, src_width, src_height, d_dstRGB, left, top, right, bottom); | ||
78 | + if (cudaStatus != cudaSuccess) { | ||
79 | + LOG_ERROR("cuda_common::77 PartMemCopy failed: {} {} {} {} {} {} {}",cudaGetErrorString(cudaStatus), left, top, right, bottom, src_height, d_dstRGB); | ||
80 | + return -1; | ||
81 | + } | ||
82 | + | ||
83 | + return 0; | ||
84 | +} | ||
85 | +//#include <fstream> | ||
86 | +//extern std::ofstream g_os; | ||
87 | +int PartMemResizeBatch(unsigned char * d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB, | ||
88 | + int count, int* vleft, int * vtop, int* vright, int* vbottom, int *dst_w, int *dst_h, | ||
89 | + float submeanb, float submeang, float submeanr, | ||
90 | + float varianceb, float varianceg, float variancer) | ||
91 | +{ | ||
92 | + //g_os << "cudaMemcpyHostToDevice begin 9" << std::endl; | ||
93 | + cudaError_t cudaStatus = cuda_common::PartMemResizeBatch( | ||
94 | + d_srcRGB, src_width, src_height, d_dstRGB, count, vleft, vtop, vright, vbottom, dst_w, dst_h, | ||
95 | + submeanb, submeang, submeanr, | ||
96 | + varianceb, varianceg, variancer); | ||
97 | + //g_os << "cudaMemcpyHostToDevice end 9" << std::endl; | ||
98 | + if (cudaStatus != cudaSuccess) { | ||
99 | + LOG_ERROR("cuda_common::PartMemResizeBatch failed: {}",cudaGetErrorString(cudaStatus)); | ||
100 | + return -1; | ||
101 | + } | ||
102 | + | ||
103 | + return 0; | ||
104 | +} | ||
105 | + | ||
106 | + | ||
107 | +//int PartMemResizeBatch(float * d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, | ||
108 | +// int count, int* vleft, int * vtop, int* vright, int* vbottom, int dst_w, int dst_h, | ||
109 | +// float submeanb, float submeang, float submeanr, | ||
110 | +// float varianceb, float varianceg, float variancer) | ||
111 | +// | ||
112 | +//{ | ||
113 | +// cudaError_t cudaStatus = cuda_common::PartMemResizeBatch( | ||
114 | +// d_srcRGB, src_width, src_height, d_dstRGB, count, vleft, vtop, vright, vbottom, dst_w, dst_h, | ||
115 | +// submeanb, submeang, submeanr, | ||
116 | +// varianceb, varianceg, variancer); | ||
117 | +// if (cudaStatus != cudaSuccess) { | ||
118 | +// fprintf(stderr, "cuda_common::PartMemCopy failed: %s\n", cudaGetErrorString(cudaStatus)); | ||
119 | +// return -1; | ||
120 | +// } | ||
121 | +// | ||
122 | +// return 0; | ||
123 | +//} | ||
0 | \ No newline at end of file | 124 | \ No newline at end of file |
src/ImageSaveGPU.h
0 → 100644
1 | +/******************************************************************************************* | ||
2 | +* Version: VPT_x64_V2.0.0_20170904 | ||
3 | +* CopyRight: 中科院自动化研究所模式识别实验室图像视频组 | ||
4 | +* UpdateDate: 20170904 | ||
5 | +* Content: 人车物监测跟踪 | ||
6 | +********************************************************************************************/ | ||
7 | + | ||
8 | +#ifndef IMAGESAVEGPU_H_ | ||
9 | +#define IMAGESAVEGPU_H_ | ||
10 | + | ||
11 | +#ifdef _MSC_VER | ||
12 | + #ifdef IMAGESAVEGPU_EXPORTS | ||
13 | + #define IMAGESAVEGPU_API __declspec(dllexport) | ||
14 | + #else | ||
15 | + #define IMAGESAVEGPU_API __declspec(dllimport) | ||
16 | + #endif | ||
17 | +#else | ||
18 | +#define IMAGESAVEGPU_API __attribute__((visibility ("default"))) | ||
19 | +#endif | ||
20 | +// 功能:保存成jpeg文件 | ||
21 | +// szOutputFile 输出图片路径,如D:\\out.jpg | ||
22 | +// d_srcRGB 输入RGB数据,由cudaMalloc分配的显存空间,数据排列形式为:BBBBBB......GGGGGG......RRRRRRRR...... | ||
23 | +// img_width RGB数据图片的宽度 | ||
24 | +// img_height RGB数据图片的高度 | ||
25 | +// | ||
26 | +//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height); | ||
27 | +//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, float* d_srcRGB); | ||
28 | +// | ||
29 | +//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height); | ||
30 | +//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB); | ||
31 | + | ||
32 | +// 功能:防缩图像 | ||
33 | +IMAGESAVEGPU_API int resizeFrame(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height); | ||
34 | + | ||
35 | +// 功能:部分拷贝数据 | ||
36 | +IMAGESAVEGPU_API int partMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom); | ||
37 | + | ||
38 | +//IMAGESAVEGPU_API int partMemResizeImage(float * d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB, | ||
39 | +// int* vleft, int * vtop, int* vright, int* vbottom, int *dst_w, int *dst_h, | ||
40 | +// float submeanb, float submeang, float submeanr, | ||
41 | +// float varianceb, float varianceg, float variancer); | ||
42 | + | ||
43 | + | ||
44 | +IMAGESAVEGPU_API int PartMemResizeBatch(unsigned char * d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB, | ||
45 | + int count, int* vleft, int * vtop, int* vright, int* vbottom, int *dst_w, int *dst_h, | ||
46 | + float submeanb, float submeang, float submeanr, | ||
47 | + float varianceb, float varianceg, float variancer); | ||
48 | + | ||
49 | + | ||
50 | +//// 功能:初始化GPU保存图像的各种量化表 | ||
51 | +//IMAGESAVEGPU_API int initTables(); | ||
52 | +//IMAGESAVEGPU_API int initTables(int falg, int width, int height); | ||
53 | +// | ||
54 | +//// 功能:释放资源 | ||
55 | +//IMAGESAVEGPU_API int releaseJpegSaver(); | ||
56 | + | ||
57 | +// 功能:在GPU中绘制快照包围框 | ||
58 | +IMAGESAVEGPU_API int drawImageOnGPU(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom); | ||
59 | + | ||
60 | +IMAGESAVEGPU_API int drawImageOnGPU(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom); | ||
61 | + | ||
62 | +// 功能:在GPU中绘制直线 | ||
63 | +IMAGESAVEGPU_API int drawLineOnGPU(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y); | ||
64 | + | ||
65 | +#endif |
src/NV12ToRGB.cu
@@ -257,14 +257,13 @@ namespace cuda_common | @@ -257,14 +257,13 @@ namespace cuda_common | ||
257 | dstImage[width * y * 3 + x * 3 + 5] = clip_v(red[1] * 0.25,0 ,255); | 257 | dstImage[width * y * 3 + x * 3 + 5] = clip_v(red[1] * 0.25,0 ,255); |
258 | } | 258 | } |
259 | 259 | ||
260 | - cudaError_t setColorSpace(e_ColorSpace CSC, float hue) | 260 | + cudaError_t setColorSpace(FF_ColorSpace CSC, float hue) |
261 | { | 261 | { |
262 | - | ||
263 | float hueSin = sin(hue); | 262 | float hueSin = sin(hue); |
264 | float hueCos = cos(hue); | 263 | float hueCos = cos(hue); |
265 | 264 | ||
266 | float hueCSC[9]; | 265 | float hueCSC[9]; |
267 | - if (CSC == ITU601) | 266 | + if (CSC == ITU_601) |
268 | { | 267 | { |
269 | //CCIR 601 | 268 | //CCIR 601 |
270 | hueCSC[0] = 1.1644f; | 269 | hueCSC[0] = 1.1644f; |
@@ -277,7 +276,7 @@ namespace cuda_common | @@ -277,7 +276,7 @@ namespace cuda_common | ||
277 | hueCSC[7] = hueCos * 2.0172f; | 276 | hueCSC[7] = hueCos * 2.0172f; |
278 | hueCSC[8] = hueSin * -2.0172f; | 277 | hueCSC[8] = hueSin * -2.0172f; |
279 | } | 278 | } |
280 | - else if (CSC == ITU709) | 279 | + else if (CSC == ITU_709) |
281 | { | 280 | { |
282 | //CCIR 709 | 281 | //CCIR 709 |
283 | hueCSC[0] = 1.0f; | 282 | hueCSC[0] = 1.0f; |
src/PartMemCopy.cu
0 → 100644
1 | +#include "cuda_kernels.h" | ||
2 | +#include <algorithm> | ||
3 | +typedef unsigned char uchar; | ||
4 | +typedef unsigned int uint32; | ||
5 | +typedef int int32; | ||
6 | + | ||
7 | +#define MAX_SNAPSHOT_WIDTH 320 | ||
8 | +#define MAX_SNAPSHOT_HEIGHT 320 | ||
9 | + | ||
10 | +namespace cuda_common | ||
11 | +{ | ||
12 | + __global__ void kernel_memcopy(unsigned char* d_srcRGB, int src_width, int src_height, | ||
13 | + unsigned char* d_dstRGB, int left, int top, int right, int bottom) | ||
14 | + { | ||
15 | + const int dst_x = blockIdx.x * blockDim.x + threadIdx.x; | ||
16 | + const int dst_y = blockIdx.y * blockDim.y + threadIdx.y; | ||
17 | + const int dst_width = right - left; | ||
18 | + const int dst_height = bottom - top; | ||
19 | + if (dst_x < dst_width && dst_y < dst_height) | ||
20 | + { | ||
21 | + int src_x = left + dst_x; | ||
22 | + int src_y = top + dst_y; | ||
23 | + | ||
24 | + //bgr...bgr...bgr... | ||
25 | + d_dstRGB[(dst_y*dst_width + dst_x) * 3] = (unsigned char)d_srcRGB[(src_y*src_width + src_x) * 3]; | ||
26 | + d_dstRGB[(dst_y*dst_width + dst_x) | ||
27 | + * 3 + 1] = (unsigned char)d_srcRGB[(src_y*src_width + src_x) * 3 + 1]; | ||
28 | + d_dstRGB[(dst_y*dst_width + dst_x) * 3 + 2] = (unsigned char)d_srcRGB[(src_y*src_width + src_x) * 3 + 2]; | ||
29 | + | ||
30 | + //bbb...ggg...rrr... | ||
31 | + //d_dstRGB[(dst_y*dst_width) + dst_x] = (unsigned char)d_srcRGB[(src_y*src_width) + src_x]; | ||
32 | + //d_dstRGB[(dst_width*dst_height) + (dst_y*dst_width) + dst_x] = (unsigned char)d_srcRGB[(src_width*src_height) + (src_y*src_width) + src_x]; | ||
33 | + //d_dstRGB[(2 * dst_width*dst_height) + (dst_y*dst_width) + dst_x] = (unsigned char)d_srcRGB[(2 * src_width*src_height) + (src_y*src_width) + src_x]; | ||
34 | + | ||
35 | + /* memcpy(d_dstRGB + (dst_y*src_width) + dst_x, d_srcRGB + (src_y*src_width) + src_x, sizeof(float)); | ||
36 | + memcpy(d_dstRGB + (src_width*src_height) + (dst_y*src_width) + dst_x, d_srcRGB + (src_width*src_height) + (src_y*src_width) + src_x, sizeof(float)); | ||
37 | + memcpy(d_dstRGB + (2 * src_width*src_height) + (dst_y*src_width) + dst_x, d_srcRGB + (2 * src_width*src_height) + (src_y*src_width) + src_x, sizeof(float));*/ | ||
38 | + } | ||
39 | + } | ||
40 | + | ||
41 | + cudaError_t PartMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom) | ||
42 | + { | ||
43 | + dim3 block(32, 16, 1); | ||
44 | + dim3 grid(((right - left) + (block.x - 1)) / block.x, ((bottom - top) + (block.y - 1)) / block.y, 1); | ||
45 | + | ||
46 | + kernel_memcopy << < grid, block >> > (d_srcRGB, src_width, src_height, d_dstRGB, left, top, right, bottom); | ||
47 | + | ||
48 | + cudaError_t cudaStatus = cudaGetLastError(); | ||
49 | + if (cudaStatus != cudaSuccess) { | ||
50 | + fprintf(stderr, "Part 50 kernel_memcopy launch failed: %s\n", cudaGetErrorString(cudaStatus)); | ||
51 | + return cudaStatus; | ||
52 | + } | ||
53 | + cudaStatus = cudaDeviceSynchronize(); | ||
54 | + if (cudaStatus != cudaSuccess) { | ||
55 | + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_bilinear!\n", cudaStatus); | ||
56 | + return cudaStatus; | ||
57 | + } | ||
58 | + return cudaStatus; | ||
59 | + } | ||
60 | + | ||
61 | + | ||
62 | + // __global__ void kernel_memcopy_mean_variance(float* d_srcRGB, int src_width, int src_height, | ||
63 | + // unsigned char* vd_dstRGB, int count, int * vleft, int* vtop, int* vright, int * vbottom, float submeanb,float submeang, float submeanr, float varianceb,float varianceg, float variancer) | ||
64 | + // { | ||
65 | + // const int dst_x = blockIdx.x * blockDim.x + threadIdx.x; | ||
66 | + // const int dst_y = blockIdx.y * blockDim.y + threadIdx.y; | ||
67 | + // for (int i=0;i<count;i++) | ||
68 | + // { | ||
69 | + // const int left = vleft[i]; | ||
70 | + // const int right = vright[i]; | ||
71 | + // const int top = vtop[i]; | ||
72 | + // const int bottom = vbottom[i]; | ||
73 | + // | ||
74 | + // const int dst_width = right - left; | ||
75 | + // const int dst_height = bottom - top; | ||
76 | + // | ||
77 | + // | ||
78 | + // unsigned char * d_dstRGB = vd_dstRGB + i * ; | ||
79 | + // | ||
80 | + // if (dst_x < dst_width && dst_y < dst_height) | ||
81 | + // { | ||
82 | + // int src_x = left + dst_x; | ||
83 | + // int src_y = top + dst_y; | ||
84 | + // | ||
85 | + // d_dstRGB[(dst_y*dst_width) + dst_x] = (d_srcRGB[(src_y*src_width) + src_x] - submeanb)*varianceb; | ||
86 | + // d_dstRGB[(dst_width*dst_height) + (dst_y*dst_width) + dst_x] = (d_srcRGB[(src_width*src_height) + (src_y*src_width) + src_x] -submeang)*varianceg; | ||
87 | + // d_dstRGB[(2 * dst_width*dst_height) + (dst_y*dst_width) + dst_x] = (d_srcRGB[(2 * src_width*src_height) + (src_y*src_width) + src_x] - submeanr) * variancer; | ||
88 | + // | ||
89 | + // } | ||
90 | + // } | ||
91 | + // } | ||
92 | + __global__ void PartCopy_ResizeImgBilinearBGR_Mean_Variance_CUDAKernel( | ||
93 | + unsigned char * d_srcRGB, int srcimg_width, int srcimg_height, | ||
94 | + int* vleft, int* vtop, int* vright, int * vbottom, | ||
95 | + unsigned char** vd_dstRGB, int count, int *dst_width, int *dst_height, | ||
96 | + float submeanb, float submeang, float submeanr, | ||
97 | + float varianceb, float varianceg, float variancer) | ||
98 | + { | ||
99 | + int i = blockIdx.z; | ||
100 | + | ||
101 | + //for (int i = 0; i<count; i++) | ||
102 | + { | ||
103 | + const int left = vleft[i]; | ||
104 | + const int right = vright[i]; | ||
105 | + const int top = vtop[i]; | ||
106 | + const int bottom = vbottom[i]; | ||
107 | + const int cur_dst_width = dst_width[i]; | ||
108 | + const int cur_dst_height = dst_height[i]; | ||
109 | + | ||
110 | + unsigned char* d_dstRGB = vd_dstRGB[i]; | ||
111 | + | ||
112 | + const int src_width = right - left; | ||
113 | + const int src_height = bottom - top; | ||
114 | + const int x = blockIdx.x * blockDim.x + threadIdx.x;// + left; | ||
115 | + const int y = blockIdx.y * blockDim.y + threadIdx.y;//+ top; | ||
116 | + const int dst_x = blockIdx.x * blockDim.x + threadIdx.x; | ||
117 | + const int dst_y = blockIdx.y * blockDim.y + threadIdx.y; | ||
118 | + | ||
119 | + /*if (dst_x == 0 && dst_y == 0) | ||
120 | + printf("%d %d %d %d %d\n", i, vleft[i], vright[i], cur_dst_width, cur_dst_height);*/ | ||
121 | + | ||
122 | + unsigned char * src_img = d_srcRGB; | ||
123 | + unsigned char * dst_img = d_dstRGB; | ||
124 | + if (dst_x < cur_dst_width && dst_y < cur_dst_height) | ||
125 | + { | ||
126 | + float fx = (x + 0.5)*src_width / (float)cur_dst_width - 0.5 + left; | ||
127 | + float fy = (y + 0.5)*src_height / (float)cur_dst_height - 0.5 + top; | ||
128 | + int ax = floor(fx); | ||
129 | + int ay = floor(fy); | ||
130 | + if (ax < 0) | ||
131 | + { | ||
132 | + ax = 0; | ||
133 | + } | ||
134 | + if (ax > srcimg_width - 2) | ||
135 | + { | ||
136 | + ax = srcimg_width - 2; | ||
137 | + } | ||
138 | + if (ay < 0) { | ||
139 | + ay = 0; | ||
140 | + } | ||
141 | + if (ay > srcimg_height - 2) | ||
142 | + { | ||
143 | + ay = srcimg_height - 2; | ||
144 | + } | ||
145 | + | ||
146 | + int A = ax + ay*srcimg_width; | ||
147 | + int B = ax + ay*srcimg_width + 1; | ||
148 | + int C = ax + ay*srcimg_width + srcimg_width; | ||
149 | + int D = ax + ay*srcimg_width + srcimg_width + 1; | ||
150 | + | ||
151 | + float w1, w2, w3, w4; | ||
152 | + w1 = fx - ax; | ||
153 | + w2 = 1 - w1; | ||
154 | + w3 = fy - ay; | ||
155 | + w4 = 1 - w3; | ||
156 | + float blue = src_img[A * 3] * w2*w4 + src_img[B * 3] * w1*w4 + src_img[C * 3] * w2*w3 + src_img[D * 3] * w1*w3; | ||
157 | + float green = src_img[A * 3 + 1] * w2*w4 + src_img[B * 3 + 1] * w1*w4 | ||
158 | + + src_img[C * 3 + 1] * w2*w3 + src_img[D * 3 + 1] * w1*w3; | ||
159 | + float red = src_img[A * 3 + 2] * w2*w4 + src_img[B * 3 + 2] * w1*w4 | ||
160 | + + src_img[C * 3 + 2] * w2*w3 + src_img[D * 3 + 2] * w1*w3; | ||
161 | + | ||
162 | + /*dst_img[(dst_y * dst_width + dst_x) * 3] = (unsigned char)(blue - submeanb)*varianceb; | ||
163 | + dst_img[(dst_y * dst_width + dst_x) * 3 + 1] =(unsigned char) (green - submeang)*varianceg; | ||
164 | + dst_img[(dst_y * dst_width + dst_x) * 3 + 2] = (unsigned char) (red - submeanr)*variancer;*/ | ||
165 | + | ||
166 | + if (blue < 0) | ||
167 | + blue = 0; | ||
168 | + else if (blue > 255) | ||
169 | + blue = 255; | ||
170 | + | ||
171 | + if (green < 0) | ||
172 | + green = 0; | ||
173 | + else if (green > 255) | ||
174 | + green = 255; | ||
175 | + | ||
176 | + if (red < 0) | ||
177 | + red = 0; | ||
178 | + else if (red > 255) | ||
179 | + red = 255; | ||
180 | + | ||
181 | + dst_img[(dst_y * cur_dst_width + dst_x) * 3] = (unsigned char)blue; | ||
182 | + dst_img[(dst_y * cur_dst_width + dst_x) * 3 + 1] = (unsigned char)green; | ||
183 | + dst_img[(dst_y * cur_dst_width + dst_x) * 3 + 2] = (unsigned char)red; | ||
184 | + | ||
185 | + | ||
186 | + /*if (src_img[(dst_y * dst_width + dst_x) * 3] < 0) | ||
187 | + src_img[(dst_y * dst_width + dst_x) * 3] = 0; | ||
188 | + else if (src_img[(dst_y * dst_width + dst_x) * 3] > 255) | ||
189 | + src_img[(dst_y * dst_width + dst_x) * 3] = 255; | ||
190 | + | ||
191 | + if (src_img[(dst_y * dst_width + dst_x) * 3 + 1] < 0) | ||
192 | + src_img[(dst_y * dst_width + dst_x) * 3 + 1] = 0; | ||
193 | + else if (src_img[(dst_y * dst_width + dst_x) * 3 + 1] > 255) | ||
194 | + src_img[(dst_y * dst_width + dst_x) * 3 + 1] = 255; | ||
195 | + | ||
196 | + if (src_img[(dst_y * dst_width + dst_x) * 3 + 2] < 0) | ||
197 | + src_img[(dst_y * dst_width + dst_x) * 3 + 2] = 0; | ||
198 | + else if (src_img[(dst_y * dst_width + dst_x) * 3 + 2] > 255) | ||
199 | + src_img[(dst_y * dst_width + dst_x) * 3 + 2] = 255; | ||
200 | + | ||
201 | + | ||
202 | + dst_img[(dst_y * dst_width + dst_x) * 3] = (unsigned char)src_img[(dst_y * dst_width + dst_x) * 3]; | ||
203 | + dst_img[(dst_y * dst_width + dst_x) * 3 + 1] = (unsigned char)src_img[(dst_y * dst_width + dst_x) * 3 + 1]; | ||
204 | + dst_img[(dst_y * dst_width + dst_x) * 3 + 2] = (unsigned char)src_img[(dst_y * dst_width + dst_x) * 3 + 2];*/ | ||
205 | + } | ||
206 | + } | ||
207 | + } | ||
208 | + | ||
209 | + cudaError_t PartMemResizeBatch(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB, int count, int* left, int* top, int* right, int* bottom, int *dst_w, int *dst_h, float submeanb, float submeang, float submeanr, | ||
210 | + float varianceb, float varianceg, float variancer) | ||
211 | + { | ||
212 | + /* cudaEvent_t start, stop; | ||
213 | + float time; | ||
214 | + cudaEventCreate(&start); | ||
215 | + cudaEventCreate(&stop); | ||
216 | + cudaEventRecord(start, 0);*/ | ||
217 | + | ||
218 | + dim3 block(32, 16, 1); | ||
219 | + dim3 grid((*std::max_element(dst_w, dst_w+ count) + (block.x - 1)) / block.x, (*std::max_element(dst_h, dst_h + count) + (block.y - 1)) / block.y, count); | ||
220 | + | ||
221 | + int * gpu_left; | ||
222 | + cudaMalloc(&gpu_left, 1000 * sizeof(int)); | ||
223 | + cudaMemcpy(gpu_left, left, count * sizeof(int), cudaMemcpyHostToDevice); | ||
224 | + | ||
225 | + int * gpu_right; | ||
226 | + cudaMalloc(&gpu_right, 1000 * sizeof(int)); | ||
227 | + cudaMemcpy(gpu_right, right, count * sizeof(int), cudaMemcpyHostToDevice); | ||
228 | + | ||
229 | + int * gpu_top; | ||
230 | + cudaMalloc(&gpu_top, 1000 * sizeof(int)); | ||
231 | + cudaMemcpy(gpu_top, top, count * sizeof(int), cudaMemcpyHostToDevice); | ||
232 | + | ||
233 | + int * gpu_bottom; | ||
234 | + cudaMalloc(&gpu_bottom, 1000 * sizeof(int)); | ||
235 | + cudaMemcpy(gpu_bottom, bottom, count * sizeof(int), cudaMemcpyHostToDevice); | ||
236 | + | ||
237 | + int * gpu_dst_w; | ||
238 | + cudaMalloc(&gpu_dst_w, 1000 * sizeof(int)); | ||
239 | + cudaMemcpy(gpu_dst_w, dst_w, count * sizeof(int), cudaMemcpyHostToDevice); | ||
240 | + | ||
241 | + int * gpu_dst_h; | ||
242 | + cudaMalloc(&gpu_dst_h, 1000 * sizeof(int)); | ||
243 | + cudaMemcpy(gpu_dst_h, dst_h, count * sizeof(int), cudaMemcpyHostToDevice); | ||
244 | + | ||
245 | + unsigned char** gpu_dst_rgb; | ||
246 | + cudaMalloc(&gpu_dst_rgb, 1000 * sizeof(unsigned char*)); | ||
247 | + cudaMemcpy(gpu_dst_rgb, d_dstRGB, count * sizeof(unsigned char*), cudaMemcpyHostToDevice); | ||
248 | + | ||
249 | + //cudaMemcpy(cpu_personfloat, d_srcRGB, 112*224*2*sizeof(float), cudaMemcpyDeviceToHost); | ||
250 | + // for(int i=0;i<100;i++) | ||
251 | + // { | ||
252 | + // printf("the score is %f\t",cpu_personfloat[i]); | ||
253 | + // } | ||
254 | + PartCopy_ResizeImgBilinearBGR_Mean_Variance_CUDAKernel << < grid, block >> > ( | ||
255 | + d_srcRGB, src_width, src_height, | ||
256 | + gpu_left, gpu_top, gpu_right, gpu_bottom, | ||
257 | + gpu_dst_rgb, count, gpu_dst_w, gpu_dst_h, | ||
258 | + submeanb, submeang, submeanr, | ||
259 | + varianceb, varianceg, variancer); | ||
260 | + cudaFree(gpu_top); | ||
261 | + cudaFree(gpu_bottom); | ||
262 | + cudaFree(gpu_left); | ||
263 | + cudaFree(gpu_right); | ||
264 | + cudaFree(gpu_dst_w); | ||
265 | + cudaFree(gpu_dst_h); | ||
266 | + cudaFree(gpu_dst_rgb); | ||
267 | + | ||
268 | + cudaError_t cudaStatus = cudaGetLastError(); | ||
269 | + if (cudaStatus != cudaSuccess) { | ||
270 | + fprintf(stderr, "Part 270 kernel_memcopy launch failed: %s\n", cudaGetErrorString(cudaStatus)); | ||
271 | + return cudaStatus; | ||
272 | + } | ||
273 | + cudaStatus = cudaDeviceSynchronize(); | ||
274 | + if (cudaStatus != cudaSuccess) { | ||
275 | + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_bilinear!\n", cudaStatus); | ||
276 | + return cudaStatus; | ||
277 | + } | ||
278 | + | ||
279 | + /*cudaEventRecord(stop, 0); | ||
280 | + cudaEventSynchronize(stop); | ||
281 | + cudaEventElapsedTime(&time, start, stop); | ||
282 | + cudaEventDestroy(start); | ||
283 | + cudaEventDestroy(stop); | ||
284 | + printf("ºËº¯ÊýÏûºÄʱ¼ä:%f\n", time);*/ | ||
285 | + | ||
286 | + return cudaStatus; | ||
287 | + } | ||
288 | + | ||
289 | +} | ||
0 | \ No newline at end of file | 290 | \ No newline at end of file |
src/RGB2YUV.cu
0 → 100644
1 | + | ||
2 | + | ||
3 | +#include "cuda_kernels.h" | ||
4 | + | ||
5 | +typedef unsigned char uint8; | ||
6 | +typedef unsigned int uint32; | ||
7 | +typedef int int32; | ||
8 | + | ||
9 | +namespace cuda_common | ||
10 | +{ | ||
11 | + __device__ unsigned char clip_value(unsigned char x, unsigned char min_val, unsigned char max_val){ | ||
12 | + if (x>max_val){ | ||
13 | + return max_val; | ||
14 | + } | ||
15 | + else if (x<min_val){ | ||
16 | + return min_val; | ||
17 | + } | ||
18 | + else{ | ||
19 | + return x; | ||
20 | + } | ||
21 | + } | ||
22 | + | ||
23 | + __global__ void kernel_rgb2yuv(unsigned char *src_img, unsigned char* Y, unsigned char* u, unsigned char* v, | ||
24 | + int src_width, int src_height, size_t yPitch) | ||
25 | + { | ||
26 | + const int x = blockIdx.x * blockDim.x + threadIdx.x; | ||
27 | + const int y = blockIdx.y * blockDim.y + threadIdx.y; | ||
28 | + | ||
29 | + if (x >= src_width) | ||
30 | + return; //x = width - 1; | ||
31 | + | ||
32 | + if (y >= src_height) | ||
33 | + return; // y = height - 1; | ||
34 | + | ||
35 | + int B = src_img[y * src_width * 3 + x * 3]; | ||
36 | + int G = src_img[y * src_width * 3 + x * 3 + 1]; | ||
37 | + int R = src_img[y * src_width * 3 + x * 3 + 2]; | ||
38 | + | ||
39 | + /*int B = src_img[y * src_width + x]; | ||
40 | + int G = src_img[src_width * src_height + y * src_width + x]; | ||
41 | + int R = src_img[src_width * src_height * 2 + y * src_width + x];*/ | ||
42 | + | ||
43 | + Y[y * yPitch + x] = clip_value((unsigned char)(0.299 * R + 0.587 * G + 0.114 * B), 0, 255); | ||
44 | + u[y * src_width + x] = clip_value((unsigned char)(-0.147 * R - 0.289 * G + 0.436 * B + 128), 0, 255); | ||
45 | + v[y * src_width + x] = clip_value((unsigned char)(0.615 * R - 0.515 * G - 0.100 * B + 128), 0, 255); | ||
46 | + | ||
47 | + //Y[y * yPitch + x] = clip_value((unsigned char)(0.257 * R + 0.504 * G + 0.098 * B + 16), 0, 255); | ||
48 | + //u[y * src_width + x] = clip_value((unsigned char)(-0.148 * R - 0.291 * G + 0.439 * B + 128), 0, 255); | ||
49 | + //v[y * src_width + x] = clip_value((unsigned char)(0.439 * R - 0.368 * G - 0.071 * B + 128), 0, 255); | ||
50 | + } | ||
51 | + | ||
52 | + __global__ void kernel_rgb2yuv(float *src_img, unsigned char* Y, unsigned char* u, unsigned char* v, | ||
53 | + int src_width, int src_height, size_t yPitch) | ||
54 | + { | ||
55 | + const int x = blockIdx.x * blockDim.x + threadIdx.x; | ||
56 | + const int y = blockIdx.y * blockDim.y + threadIdx.y; | ||
57 | + | ||
58 | + if (x >= src_width) | ||
59 | + return; //x = width - 1; | ||
60 | + | ||
61 | + if (y >= src_height) | ||
62 | + return; // y = height - 1; | ||
63 | + | ||
64 | + float B = src_img[y * src_width + x]; | ||
65 | + float G = src_img[src_width * src_height + y * src_width + x]; | ||
66 | + float R = src_img[src_width * src_height * 2 + y * src_width + x]; | ||
67 | + | ||
68 | + Y[y * yPitch + x] = clip_value((unsigned char)(0.299 * R + 0.587 * G + 0.114 * B), 0, 255); | ||
69 | + u[y * src_width + x] = clip_value((unsigned char)(-0.147 * R - 0.289 * G + 0.436 * B + 128), 0, 255); | ||
70 | + v[y * src_width + x] = clip_value((unsigned char)(0.615 * R - 0.515 * G - 0.100 * B + 128), 0, 255); | ||
71 | + | ||
72 | + //Y[y * yPitch + x] = clip_value((unsigned char)(0.257 * R + 0.504 * G + 0.098 * B + 16), 0, 255); | ||
73 | + //u[y * src_width + x] = clip_value((unsigned char)(-0.148 * R - 0.291 * G + 0.439 * B + 128), 0, 255); | ||
74 | + //v[y * src_width + x] = clip_value((unsigned char)(0.439 * R - 0.368 * G - 0.071 * B + 128), 0, 255); | ||
75 | + } | ||
76 | + | ||
77 | + extern "C" | ||
78 | + __global__ void kernel_resize_UV(unsigned char* src_img, unsigned char *dst_img, | ||
79 | + int src_width, int src_height, int dst_width, int dst_height, int nPitch) | ||
80 | + { | ||
81 | + const int x = blockIdx.x * blockDim.x + threadIdx.x; | ||
82 | + const int y = blockIdx.y * blockDim.y + threadIdx.y; | ||
83 | + | ||
84 | + if (x >= dst_width) | ||
85 | + return; //x = width - 1; | ||
86 | + | ||
87 | + if (y >= dst_height) | ||
88 | + return; // y = height - 1; | ||
89 | + | ||
90 | + float fx = (x + 0.5)*src_width / (float)dst_width - 0.5; | ||
91 | + float fy = (y + 0.5)*src_height / (float)dst_height - 0.5; | ||
92 | + int ax = floor(fx); | ||
93 | + int ay = floor(fy); | ||
94 | + if (ax < 0) | ||
95 | + { | ||
96 | + ax = 0; | ||
97 | + } | ||
98 | + else if (ax > src_width - 2) | ||
99 | + { | ||
100 | + ax = src_width - 2; | ||
101 | + } | ||
102 | + | ||
103 | + if (ay < 0){ | ||
104 | + ay = 0; | ||
105 | + } | ||
106 | + else if (ay > src_height - 2) | ||
107 | + { | ||
108 | + ay = src_height - 2; | ||
109 | + } | ||
110 | + | ||
111 | + int A = ax + ay*src_width; | ||
112 | + int B = ax + ay*src_width + 1; | ||
113 | + int C = ax + ay*src_width + src_width; | ||
114 | + int D = ax + ay*src_width + src_width + 1; | ||
115 | + | ||
116 | + float w1, w2, w3, w4; | ||
117 | + w1 = fx - ax; | ||
118 | + w2 = 1 - w1; | ||
119 | + w3 = fy - ay; | ||
120 | + w4 = 1 - w3; | ||
121 | + | ||
122 | + unsigned char val = src_img[A] * w2*w4 + src_img[B] * w1*w4 + src_img[C] * w2*w3 + src_img[D] * w1*w3; | ||
123 | + | ||
124 | + dst_img[y * nPitch + x] = clip_value(val,0,255); | ||
125 | + } | ||
126 | + | ||
127 | + cudaError_t RGB2YUV(float* d_srcRGB, int src_width, int src_height, | ||
128 | + unsigned char* Y, size_t yPitch, int yWidth, int yHeight, | ||
129 | + unsigned char* U, size_t uPitch, int uWidth, int uHeight, | ||
130 | + unsigned char* V, size_t vPitch, int vWidth, int vHeight) | ||
131 | + { | ||
132 | + unsigned char * u ; | ||
133 | + unsigned char * v ; | ||
134 | + | ||
135 | + cudaError_t cudaStatus; | ||
136 | + | ||
137 | + cudaStatus = cudaMalloc((void**)&u, src_width * src_height * sizeof(unsigned char)); | ||
138 | + cudaStatus = cudaMalloc((void**)&v, src_width * src_height * sizeof(unsigned char)); | ||
139 | + | ||
140 | + dim3 block(32, 16, 1); | ||
141 | + dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1); | ||
142 | + dim3 grid1((uWidth + (block.x - 1)) / block.x, (uHeight + (block.y - 1)) / block.y, 1); | ||
143 | + dim3 grid2((vWidth + (block.x - 1)) / block.x, (vHeight + (block.y - 1)) / block.y, 1); | ||
144 | + | ||
145 | + kernel_rgb2yuv << < grid, block >> >(d_srcRGB, Y, u, v, src_width, src_height, yPitch); | ||
146 | + | ||
147 | + cudaStatus = cudaGetLastError(); | ||
148 | + if (cudaStatus != cudaSuccess) { | ||
149 | + fprintf(stderr, "kernel_rgb2yuv launch failed: %s\n", cudaGetErrorString(cudaStatus)); | ||
150 | + goto Error; | ||
151 | + } | ||
152 | + | ||
153 | + cudaStatus = cudaDeviceSynchronize(); | ||
154 | + if (cudaStatus != cudaSuccess) { | ||
155 | + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_rgb2yuv!\n", cudaStatus); | ||
156 | + goto Error; | ||
157 | + } | ||
158 | + | ||
159 | + kernel_resize_UV << < grid1, block >> >(u, U, src_width, src_height, uWidth, uHeight, uPitch); | ||
160 | + | ||
161 | + cudaStatus = cudaGetLastError(); | ||
162 | + if (cudaStatus != cudaSuccess) { | ||
163 | + fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus)); | ||
164 | + goto Error; | ||
165 | + } | ||
166 | + | ||
167 | + cudaStatus = cudaDeviceSynchronize(); | ||
168 | + if (cudaStatus != cudaSuccess) { | ||
169 | + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus); | ||
170 | + goto Error; | ||
171 | + } | ||
172 | + | ||
173 | + kernel_resize_UV << < grid2, block >> >(v, V, src_width, src_height, vWidth, vHeight, vPitch); | ||
174 | + | ||
175 | + cudaStatus = cudaGetLastError(); | ||
176 | + if (cudaStatus != cudaSuccess) { | ||
177 | + fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus)); | ||
178 | + goto Error; | ||
179 | + } | ||
180 | + | ||
181 | + cudaStatus = cudaDeviceSynchronize(); | ||
182 | + if (cudaStatus != cudaSuccess) { | ||
183 | + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus); | ||
184 | + goto Error; | ||
185 | + } | ||
186 | + | ||
187 | +Error : | ||
188 | + cudaFree(u); | ||
189 | + cudaFree(v); | ||
190 | + | ||
191 | + return cudaStatus; | ||
192 | + } | ||
193 | + | ||
194 | + | ||
195 | + | ||
196 | + cudaError_t RGB2YUV(unsigned char* d_srcRGB, int src_width, int src_height, | ||
197 | + unsigned char* Y, size_t yPitch, int yWidth, int yHeight, | ||
198 | + unsigned char* U, size_t uPitch, int uWidth, int uHeight, | ||
199 | + unsigned char* V, size_t vPitch, int vWidth, int vHeight) | ||
200 | + { | ||
201 | + unsigned char * u; | ||
202 | + unsigned char * v; | ||
203 | + | ||
204 | + cudaError_t cudaStatus; | ||
205 | + | ||
206 | + cudaStatus = cudaMalloc((void**)&u, src_width * src_height * sizeof(unsigned char)); | ||
207 | + cudaStatus = cudaMalloc((void**)&v, src_width * src_height * sizeof(unsigned char)); | ||
208 | + | ||
209 | + dim3 block(32, 16, 1); | ||
210 | + dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1); | ||
211 | + dim3 grid1((uWidth + (block.x - 1)) / block.x, (uHeight + (block.y - 1)) / block.y, 1); | ||
212 | + dim3 grid2((vWidth + (block.x - 1)) / block.x, (vHeight + (block.y - 1)) / block.y, 1); | ||
213 | + | ||
214 | + kernel_rgb2yuv << < grid, block >> >(d_srcRGB, Y, u, v, src_width, src_height, yPitch); | ||
215 | + | ||
216 | + cudaStatus = cudaGetLastError(); | ||
217 | + if (cudaStatus != cudaSuccess) { | ||
218 | + fprintf(stderr, "kernel_rgb2yuv launch failed: %s\n", cudaGetErrorString(cudaStatus)); | ||
219 | + goto Error; | ||
220 | + } | ||
221 | + | ||
222 | + cudaStatus = cudaDeviceSynchronize(); | ||
223 | + if (cudaStatus != cudaSuccess) { | ||
224 | + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_rgb2yuv!\n", cudaStatus); | ||
225 | + goto Error; | ||
226 | + } | ||
227 | + | ||
228 | + kernel_resize_UV << < grid1, block >> >(u, U, src_width, src_height, uWidth, uHeight, uPitch); | ||
229 | + | ||
230 | + cudaStatus = cudaGetLastError(); | ||
231 | + if (cudaStatus != cudaSuccess) { | ||
232 | + fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus)); | ||
233 | + goto Error; | ||
234 | + } | ||
235 | + | ||
236 | + cudaStatus = cudaDeviceSynchronize(); | ||
237 | + if (cudaStatus != cudaSuccess) { | ||
238 | + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus); | ||
239 | + goto Error; | ||
240 | + } | ||
241 | + | ||
242 | + kernel_resize_UV << < grid2, block >> >(v, V, src_width, src_height, vWidth, vHeight, vPitch); | ||
243 | + | ||
244 | + cudaStatus = cudaGetLastError(); | ||
245 | + if (cudaStatus != cudaSuccess) { | ||
246 | + fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus)); | ||
247 | + goto Error; | ||
248 | + } | ||
249 | + | ||
250 | + cudaStatus = cudaDeviceSynchronize(); | ||
251 | + if (cudaStatus != cudaSuccess) { | ||
252 | + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus); | ||
253 | + goto Error; | ||
254 | + } | ||
255 | + | ||
256 | + Error: | ||
257 | + cudaFree(u); | ||
258 | + cudaFree(v); | ||
259 | + | ||
260 | + return cudaStatus; | ||
261 | + } | ||
262 | +} | ||
263 | + |
src/ResizeImage.cu
0 → 100644
1 | +#include "cuda_kernels.h" | ||
2 | + | ||
3 | +typedef unsigned char uchar; | ||
4 | +typedef unsigned int uint32; | ||
5 | +typedef int int32; | ||
6 | + | ||
7 | +namespace cuda_common | ||
8 | +{ | ||
9 | + __global__ void kernel_bilinear(float *src_img, float *dst_img, | ||
10 | + int src_width, int src_height, int dst_width, int dst_height) | ||
11 | + { | ||
12 | + const int x = blockIdx.x * blockDim.x + threadIdx.x; | ||
13 | + const int y = blockIdx.y * blockDim.y + threadIdx.y; | ||
14 | + | ||
15 | + if (x < dst_width && y < dst_height) | ||
16 | + { | ||
17 | + float fx = (x + 0.5)*src_width / (float)dst_width - 0.5; | ||
18 | + float fy = (y + 0.5)*src_height / (float)dst_height - 0.5; | ||
19 | + int ax = floor(fx); | ||
20 | + int ay = floor(fy); | ||
21 | + if (ax < 0) | ||
22 | + { | ||
23 | + ax = 0; | ||
24 | + } | ||
25 | + else if (ax > src_width - 2) | ||
26 | + { | ||
27 | + ax = src_width - 2; | ||
28 | + } | ||
29 | + | ||
30 | + if (ay < 0){ | ||
31 | + ay = 0; | ||
32 | + } | ||
33 | + else if (ay > src_height - 2) | ||
34 | + { | ||
35 | + ay = src_height - 2; | ||
36 | + } | ||
37 | + | ||
38 | + int A = ax + ay*src_width; | ||
39 | + int B = ax + ay*src_width + 1; | ||
40 | + int C = ax + ay*src_width + src_width; | ||
41 | + int D = ax + ay*src_width + src_width + 1; | ||
42 | + | ||
43 | + float w1, w2, w3, w4; | ||
44 | + w1 = fx - ax; | ||
45 | + w2 = 1 - w1; | ||
46 | + w3 = fy - ay; | ||
47 | + w4 = 1 - w3; | ||
48 | + | ||
49 | + float blue = src_img[A] * w2*w4 + src_img[B] * w1*w4 + src_img[C] * w2*w3 + src_img[D] * w1*w3; | ||
50 | + | ||
51 | + float green = src_img[src_width * src_height + A] * w2*w4 + src_img[src_width * src_height + B] * w1*w4 | ||
52 | + + src_img[src_width * src_height + C] * w2*w3 + src_img[src_width * src_height + D] * w1*w3; | ||
53 | + | ||
54 | + float red = src_img[src_width * src_height * 2 + A] * w2*w4 + src_img[src_width * src_height * 2 + B] * w1*w4 | ||
55 | + + src_img[src_width * src_height * 2 + C] * w2*w3 + src_img[src_width * src_height * 2 + D] * w1*w3; | ||
56 | + | ||
57 | + dst_img[y * dst_width + x] = blue; | ||
58 | + dst_img[dst_width * dst_height + y * dst_width + x] = green; | ||
59 | + dst_img[dst_width * dst_height * 2 + y * dst_width + x] = red; | ||
60 | + } | ||
61 | + } | ||
62 | + | ||
63 | + cudaError_t ResizeImage(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height) | ||
64 | + { | ||
65 | + dim3 block(32, 16, 1); | ||
66 | + dim3 grid((dst_width + (block.x - 1)) / block.x, (dst_height + (block.y - 1)) / block.y, 1); | ||
67 | + | ||
68 | + kernel_bilinear << < grid, block >> >(d_srcRGB, d_dstRGB, src_width, src_height, dst_width, dst_height); | ||
69 | + | ||
70 | + cudaError_t cudaStatus = cudaGetLastError(); | ||
71 | + if (cudaStatus != cudaSuccess) { | ||
72 | + fprintf(stderr, "kernel_bilinear launch failed: %s\n", cudaGetErrorString(cudaStatus)); | ||
73 | + return cudaStatus; | ||
74 | + } | ||
75 | + | ||
76 | + cudaStatus = cudaDeviceSynchronize(); | ||
77 | + if (cudaStatus != cudaSuccess) { | ||
78 | + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_bilinear!\n", cudaStatus); | ||
79 | + return cudaStatus; | ||
80 | + } | ||
81 | + | ||
82 | + return cudaStatus; | ||
83 | + } | ||
84 | +} | ||
0 | \ No newline at end of file | 85 | \ No newline at end of file |
src/common/inc/helper_cuda_drvapi.h
@@ -218,8 +218,7 @@ inline int gpuGetMaxGflopsDeviceIdDRV() | @@ -218,8 +218,7 @@ inline int gpuGetMaxGflopsDeviceIdDRV() | ||
218 | // Find the best major SM Architecture GPU device | 218 | // Find the best major SM Architecture GPU device |
219 | while (current_device < device_count) | 219 | while (current_device < device_count) |
220 | { | 220 | { |
221 | - checkCudaErrors(cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, current_device)); | ||
222 | - checkCudaErrors(cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, current_device)); | 221 | + checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device)); |
223 | 222 | ||
224 | if (major > 0 && major < 9999) | 223 | if (major > 0 && major < 9999) |
225 | { | 224 | { |
@@ -240,9 +239,7 @@ inline int gpuGetMaxGflopsDeviceIdDRV() | @@ -240,9 +239,7 @@ inline int gpuGetMaxGflopsDeviceIdDRV() | ||
240 | checkCudaErrors(cuDeviceGetAttribute(&clockRate, | 239 | checkCudaErrors(cuDeviceGetAttribute(&clockRate, |
241 | CU_DEVICE_ATTRIBUTE_CLOCK_RATE, | 240 | CU_DEVICE_ATTRIBUTE_CLOCK_RATE, |
242 | current_device)); | 241 | current_device)); |
243 | - | ||
244 | - checkCudaErrors(cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, current_device)); | ||
245 | - checkCudaErrors(cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, current_device)); | 242 | + checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device)); |
246 | 243 | ||
247 | int computeMode; | 244 | int computeMode; |
248 | getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, current_device); | 245 | getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, current_device); |
@@ -320,9 +317,7 @@ inline int gpuGetMaxGflopsGLDeviceIdDRV() | @@ -320,9 +317,7 @@ inline int gpuGetMaxGflopsGLDeviceIdDRV() | ||
320 | while (current_device < device_count) | 317 | while (current_device < device_count) |
321 | { | 318 | { |
322 | checkCudaErrors(cuDeviceGetName(deviceName, 256, current_device)); | 319 | checkCudaErrors(cuDeviceGetName(deviceName, 256, current_device)); |
323 | - | ||
324 | - checkCudaErrors(cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, current_device)); | ||
325 | - checkCudaErrors(cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, current_device)); | 320 | + checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device)); |
326 | 321 | ||
327 | #if CUDA_VERSION >= 3020 | 322 | #if CUDA_VERSION >= 3020 |
328 | checkCudaErrors(cuDeviceGetAttribute(&bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device)); | 323 | checkCudaErrors(cuDeviceGetAttribute(&bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device)); |
@@ -374,9 +369,7 @@ inline int gpuGetMaxGflopsGLDeviceIdDRV() | @@ -374,9 +369,7 @@ inline int gpuGetMaxGflopsGLDeviceIdDRV() | ||
374 | checkCudaErrors(cuDeviceGetAttribute(&clockRate, | 369 | checkCudaErrors(cuDeviceGetAttribute(&clockRate, |
375 | CU_DEVICE_ATTRIBUTE_CLOCK_RATE, | 370 | CU_DEVICE_ATTRIBUTE_CLOCK_RATE, |
376 | current_device)); | 371 | current_device)); |
377 | - | ||
378 | - checkCudaErrors(cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, current_device)); | ||
379 | - checkCudaErrors(cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, current_device)); | 372 | + checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device)); |
380 | 373 | ||
381 | #if CUDA_VERSION >= 3020 | 374 | #if CUDA_VERSION >= 3020 |
382 | checkCudaErrors(cuDeviceGetAttribute(&bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device)); | 375 | checkCudaErrors(cuDeviceGetAttribute(&bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device)); |
@@ -507,9 +500,7 @@ inline bool checkCudaCapabilitiesDRV(int major_version, int minor_version, int d | @@ -507,9 +500,7 @@ inline bool checkCudaCapabilitiesDRV(int major_version, int minor_version, int d | ||
507 | 500 | ||
508 | checkCudaErrors(cuDeviceGet(&cuDevice, devID)); | 501 | checkCudaErrors(cuDeviceGet(&cuDevice, devID)); |
509 | checkCudaErrors(cuDeviceGetName(name, 100, cuDevice)); | 502 | checkCudaErrors(cuDeviceGetName(name, 100, cuDevice)); |
510 | - | ||
511 | - checkCudaErrors(cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, devID)); | ||
512 | - checkCudaErrors(cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, devID)); | 503 | + checkCudaErrors(cuDeviceComputeCapability(&major, &minor, devID)); |
513 | 504 | ||
514 | if ((major > major_version) || | 505 | if ((major > major_version) || |
515 | (major == major_version && minor >= minor_version)) | 506 | (major == major_version && minor >= minor_version)) |
src/cuda_kernels.h
@@ -12,15 +12,52 @@ | @@ -12,15 +12,52 @@ | ||
12 | 12 | ||
13 | typedef enum | 13 | typedef enum |
14 | { | 14 | { |
15 | - ITU601 = 1, | ||
16 | - ITU709 = 2 | ||
17 | -} e_ColorSpace; | 15 | + ITU_601 = 1, |
16 | + ITU_709 = 2 | ||
17 | +} FF_ColorSpace; | ||
18 | 18 | ||
19 | namespace cuda_common | 19 | namespace cuda_common |
20 | { | 20 | { |
21 | - cudaError_t setColorSpace(e_ColorSpace CSC, float hue); | 21 | + cudaError_t setColorSpace(FF_ColorSpace CSC, float hue); |
22 | 22 | ||
23 | cudaError_t NV12ToRGBnot(CUdeviceptr d_srcNV12, size_t nSourcePitch, unsigned char* d_dstRGB, int width, int height); | 23 | cudaError_t NV12ToRGBnot(CUdeviceptr d_srcNV12, size_t nSourcePitch, unsigned char* d_dstRGB, int width, int height); |
24 | cudaError_t CUDAToBGR(CUdeviceptr dataY, CUdeviceptr dataUV, size_t pitchY, size_t pitchUV, unsigned char* d_dstRGB, int width, int height); | 24 | cudaError_t CUDAToBGR(CUdeviceptr dataY, CUdeviceptr dataUV, size_t pitchY, size_t pitchUV, unsigned char* d_dstRGB, int width, int height); |
25 | + | ||
26 | + | ||
27 | + cudaError_t ResizeImage(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height); | ||
28 | + | ||
29 | + cudaError_t RGB2YUV(float* d_srcRGB, int src_width, int src_height, | ||
30 | + unsigned char* Y, size_t yPitch, int yWidth, int yHeight, | ||
31 | + unsigned char* U, size_t uPitch, int uWidth, int uHeight, | ||
32 | + unsigned char* V, size_t vPitch, int vWidth, int vHeight); | ||
33 | + | ||
34 | + cudaError_t RGB2YUV(unsigned char* d_srcRGB, int src_width, int src_height, | ||
35 | + unsigned char* Y, size_t yPitch, int yWidth, int yHeight, | ||
36 | + unsigned char* U, size_t uPitch, int uWidth, int uHeight, | ||
37 | + unsigned char* V, size_t vPitch, int vWidth, int vHeight); | ||
38 | + | ||
39 | + cudaError_t PartMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom); | ||
40 | + // cudaError_t PartMemResize(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int left, int top, int right, int bottom); | ||
41 | + | ||
42 | + cudaError_t PartMemResizeBatch(unsigned char* d_srcRGB, int srcimg_width, int srcimg_height, unsigned char** d_dstRGB, int count, | ||
43 | + int* left, int* top, int* right, int* bottom, int *dst_w, int *dst_h, | ||
44 | + float submeanb, float submeang, float submeanr, | ||
45 | + float varianceb, float varianceg, float variancer); | ||
46 | + | ||
47 | + cudaError_t DrawImage(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom); | ||
48 | + cudaError_t DrawImage(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom); | ||
49 | + | ||
50 | + cudaError_t DrawLine(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y); | ||
25 | } | 51 | } |
26 | 52 | ||
53 | + | ||
54 | +int jpegNPP(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height); | ||
55 | +int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height); | ||
56 | + | ||
57 | +int jpegNPP(const char *szOutputFile, float* d_srcRGB); | ||
58 | +int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB); | ||
59 | + | ||
60 | +int initTable(); | ||
61 | +int initTable(int flag, int width, int height); | ||
62 | +int releaseJpegNPP(); | ||
63 | + |
src/define.hpp
@@ -5,3 +5,9 @@ | @@ -5,3 +5,9 @@ | ||
5 | #define __FILENAME__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) | 5 | #define __FILENAME__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) |
6 | 6 | ||
7 | 7 | ||
8 | +#define CHECK_CUDA(call) \ | ||
9 | +{\ | ||
10 | + const cudaError_t error_code = call;\ | ||
11 | + if (cudaSuccess != error_code)\ | ||
12 | + LOG_ERROR("CUDA error, code: {} reason: {}", error_code, cudaGetErrorString(error_code));\ | ||
13 | +} | ||
8 | \ No newline at end of file | 14 | \ No newline at end of file |
src/gb28181/FFGB28181Decoder.cpp
@@ -10,11 +10,13 @@ extern "C" { | @@ -10,11 +10,13 @@ extern "C" { | ||
10 | #include "libswscale/swscale.h" | 10 | #include "libswscale/swscale.h" |
11 | } | 11 | } |
12 | 12 | ||
13 | -#include "../logger.hpp" | ||
14 | - | ||
15 | #include"RTPTcpReceiver.h" | 13 | #include"RTPTcpReceiver.h" |
16 | #include"RTPUdpReceiver.h" | 14 | #include"RTPUdpReceiver.h" |
17 | 15 | ||
16 | +#include <cuda_runtime.h> | ||
17 | + | ||
18 | +#include "common_header.h" | ||
19 | + | ||
18 | #define ECLOSED 0 | 20 | #define ECLOSED 0 |
19 | #define ECLOSING 1 | 21 | #define ECLOSING 1 |
20 | #define ERUNNING 2 | 22 | #define ERUNNING 2 |
@@ -36,6 +38,7 @@ FFGB28181Decoder::FFGB28181Decoder() { | @@ -36,6 +38,7 @@ FFGB28181Decoder::FFGB28181Decoder() { | ||
36 | m_frameSkip = 1; | 38 | m_frameSkip = 1; |
37 | m_port = -1; | 39 | m_port = -1; |
38 | m_dec_keyframe = false; | 40 | m_dec_keyframe = false; |
41 | + m_post_decode_thread = 0; | ||
39 | } | 42 | } |
40 | 43 | ||
41 | FFGB28181Decoder::~FFGB28181Decoder() | 44 | FFGB28181Decoder::~FFGB28181Decoder() |
@@ -47,11 +50,6 @@ FFGB28181Decoder::~FFGB28181Decoder() | @@ -47,11 +50,6 @@ FFGB28181Decoder::~FFGB28181Decoder() | ||
47 | avcodec_free_context(&m_pAVCodecCtx); | 50 | avcodec_free_context(&m_pAVCodecCtx); |
48 | } | 51 | } |
49 | 52 | ||
50 | - if (m_pAVFrame) { | ||
51 | - av_frame_free(&m_pAVFrame); | ||
52 | - m_pAVFrame = NULL; | ||
53 | - } | ||
54 | - | ||
55 | m_dec_keyframe = false; | 53 | m_dec_keyframe = false; |
56 | 54 | ||
57 | LOG_INFO("destroy OK--{}", m_dec_name); | 55 | LOG_INFO("destroy OK--{}", m_dec_name); |
@@ -74,9 +72,22 @@ void FFGB28181Decoder::close(){ | @@ -74,9 +72,22 @@ void FFGB28181Decoder::close(){ | ||
74 | m_rtpPtr = nullptr; | 72 | m_rtpPtr = nullptr; |
75 | } | 73 | } |
76 | 74 | ||
77 | - LOG_INFO("解码器关闭成功 --{}", m_dec_name); | 75 | + if (gpu_options) av_dict_free(&gpu_options); |
76 | + | ||
77 | + if (m_post_decode_thread != 0) | ||
78 | + { | ||
79 | + pthread_join(m_post_decode_thread,0); | ||
80 | + } | ||
81 | + | ||
82 | + while(mFrameQueue.size() > 0){ | ||
83 | + AVFrame * gpuFrame = mFrameQueue.front(); | ||
84 | + av_frame_free(&gpuFrame); | ||
85 | + mFrameQueue.pop(); | ||
86 | + } | ||
78 | 87 | ||
79 | m_status = ECLOSED; | 88 | m_status = ECLOSED; |
89 | + | ||
90 | + LOG_INFO("解码器关闭成功 --{}", m_dec_name); | ||
80 | } | 91 | } |
81 | 92 | ||
82 | bool FFGB28181Decoder::init(FFDecConfig& cfg){ | 93 | bool FFGB28181Decoder::init(FFDecConfig& cfg){ |
@@ -124,7 +135,18 @@ bool FFGB28181Decoder::start() { | @@ -124,7 +135,18 @@ bool FFGB28181Decoder::start() { | ||
124 | 135 | ||
125 | LOG_INFO("start - {} {}: ", m_dec_name, m_port); | 136 | LOG_INFO("start - {} {}: ", m_dec_name, m_port); |
126 | 137 | ||
127 | - return m_rtpPtr->Open((uint16_t)m_port); | 138 | + bool bRet = m_rtpPtr->Open((uint16_t)m_port); |
139 | + if(bRet){ | ||
140 | + pthread_create(&m_post_decode_thread,0, | ||
141 | + [](void* arg) | ||
142 | + { | ||
143 | + FFGB28181Decoder* a=(FFGB28181Decoder*)arg; | ||
144 | + a->post_decode_thread(); | ||
145 | + return (void*)0; | ||
146 | + } | ||
147 | + ,this); | ||
148 | + } | ||
149 | + return bRet; | ||
128 | } | 150 | } |
129 | 151 | ||
130 | void FFGB28181Decoder::setDecKeyframe(bool bKeyframe){ | 152 | void FFGB28181Decoder::setDecKeyframe(bool bKeyframe){ |
@@ -151,15 +173,12 @@ void FFGB28181Decoder::stream_callback(int videoType, char* data, int len, int i | @@ -151,15 +173,12 @@ void FFGB28181Decoder::stream_callback(int videoType, char* data, int len, int i | ||
151 | return; | 173 | return; |
152 | } | 174 | } |
153 | 175 | ||
154 | - AVPacket framePacket = {}, mp4Packet = {}; | 176 | + AVPacket framePacket = {}; |
155 | av_init_packet(&framePacket); | 177 | av_init_packet(&framePacket); |
156 | - av_init_packet(&mp4Packet); | ||
157 | 178 | ||
158 | framePacket.size = len; | 179 | framePacket.size = len; |
159 | framePacket.data = (uint8_t*)data; | 180 | framePacket.data = (uint8_t*)data; |
160 | 181 | ||
161 | - AVDictionary *gpu_options = nullptr; | ||
162 | - | ||
163 | if (m_pAVCodecCtx == nullptr) { | 182 | if (m_pAVCodecCtx == nullptr) { |
164 | LOG_INFO("frame data is zero --{}", m_dec_name); | 183 | LOG_INFO("frame data is zero --{}", m_dec_name); |
165 | if (VIDEO_TYPE_H264 == videoType) { | 184 | if (VIDEO_TYPE_H264 == videoType) { |
@@ -192,7 +211,6 @@ void FFGB28181Decoder::stream_callback(int videoType, char* data, int len, int i | @@ -192,7 +211,6 @@ void FFGB28181Decoder::stream_callback(int videoType, char* data, int len, int i | ||
192 | } | 211 | } |
193 | 212 | ||
194 | m_pAVCodecCtx = avcodec_alloc_context3(m_pAVCodec); | 213 | m_pAVCodecCtx = avcodec_alloc_context3(m_pAVCodec); |
195 | - | ||
196 | 214 | ||
197 | if (m_gpuid >= 0) { | 215 | if (m_gpuid >= 0) { |
198 | char gpui[8] = { 0 }; | 216 | char gpui[8] = { 0 }; |
@@ -211,8 +229,6 @@ void FFGB28181Decoder::stream_callback(int videoType, char* data, int len, int i | @@ -211,8 +229,6 @@ void FFGB28181Decoder::stream_callback(int videoType, char* data, int len, int i | ||
211 | 229 | ||
212 | if (avcodec_open2(m_pAVCodecCtx, m_pAVCodec, &gpu_options) < 0) | 230 | if (avcodec_open2(m_pAVCodecCtx, m_pAVCodec, &gpu_options) < 0) |
213 | return; | 231 | return; |
214 | - | ||
215 | - m_pAVFrame = av_frame_alloc(); | ||
216 | } | 232 | } |
217 | 233 | ||
218 | //开始解码 | 234 | //开始解码 |
@@ -220,6 +236,7 @@ void FFGB28181Decoder::stream_callback(int videoType, char* data, int len, int i | @@ -220,6 +236,7 @@ void FFGB28181Decoder::stream_callback(int videoType, char* data, int len, int i | ||
220 | if (ret < 0) { | 236 | if (ret < 0) { |
221 | //send_exception(RunMessageType::E2002, e_msg); | 237 | //send_exception(RunMessageType::E2002, e_msg); |
222 | LOG_ERROR("Real stream视频解码失败,请检查视频设备{}: avcodec_send_packet failed. ret={}", m_dec_name, ret); | 238 | LOG_ERROR("Real stream视频解码失败,请检查视频设备{}: avcodec_send_packet failed. ret={}", m_dec_name, ret); |
239 | + av_packet_unref(&framePacket); | ||
223 | return; | 240 | return; |
224 | } | 241 | } |
225 | 242 | ||
@@ -228,61 +245,67 @@ void FFGB28181Decoder::stream_callback(int videoType, char* data, int len, int i | @@ -228,61 +245,67 @@ void FFGB28181Decoder::stream_callback(int videoType, char* data, int len, int i | ||
228 | frameH = m_pAVCodecCtx->height; | 245 | frameH = m_pAVCodecCtx->height; |
229 | if (frameW <= 0 || frameH <= 0) { | 246 | if (frameW <= 0 || frameH <= 0) { |
230 | LOG_ERROR("[{}] frame W or H is error! ({},{})", m_dec_name, frameW, frameH); | 247 | LOG_ERROR("[{}] frame W or H is error! ({},{})", m_dec_name, frameW, frameH); |
248 | + av_packet_unref(&framePacket); | ||
231 | return; | 249 | return; |
232 | } | 250 | } |
233 | } | 251 | } |
234 | // m_fps = m_pAVCodecCtx->pkt_timebase.den == 0 ? 25.0 : av_q2d(m_pAVCodecCtx->pkt_timebase); | 252 | // m_fps = m_pAVCodecCtx->pkt_timebase.den == 0 ? 25.0 : av_q2d(m_pAVCodecCtx->pkt_timebase); |
235 | m_fps = av_q2d(m_pAVCodecCtx->framerate); | 253 | m_fps = av_q2d(m_pAVCodecCtx->framerate); |
236 | - LOG_DEBUG("frameW {}--frameH {}", frameW, frameH); | ||
237 | - while (ret >= 0) { | ||
238 | - ret = avcodec_receive_frame(m_pAVCodecCtx, m_pAVFrame); | ||
239 | - if (ret == AVERROR_EOF || ret == AVERROR(EAGAIN)) | ||
240 | - return; | ||
241 | - else if (ret < 0) { | ||
242 | - if (m_frameCount % 10 == 0){ | ||
243 | - //send_exception(RunMessageType::E2002, e_msg); | ||
244 | - LOG_ERROR("Real stream视频解码失败,请检查视频设备{}: avcodec_receive_frame failed. ret={}", m_dec_name, ret); | ||
245 | - } | ||
246 | - continue; | ||
247 | - } | 254 | + // LOG_DEBUG("frameW {}--frameH {}", frameW, frameH); |
255 | + | ||
256 | + AVFrame* gpuFrame = av_frame_alloc(); | ||
257 | + ret = avcodec_receive_frame(m_pAVCodecCtx, gpuFrame); | ||
258 | + if ((ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) || ret < 0){ | ||
259 | + LOG_ERROR("{} - Failed to receive frame: {}", m_dec_name, ret); | ||
260 | + av_packet_unref(&framePacket); | ||
261 | + av_frame_free(&gpuFrame); | ||
262 | + return; | ||
263 | + } | ||
248 | 264 | ||
249 | - if (++m_frameCount % m_frameSkip != 0) continue; | ||
250 | - | ||
251 | - if (m_pAVFrame->width != frameW || m_pAVFrame->height != frameH){ | ||
252 | - LOG_INFO("AVFrame is inconsistent: width is {}, height is {}; original frameW is {}, frameH is {}--{}", m_pAVFrame->width, m_pAVFrame->height, frameW, frameH , m_dec_name); | ||
253 | - continue; | ||
254 | - } | ||
255 | - | ||
256 | - LOG_DEBUG("curpos is: {}", m_frameCount); | ||
257 | - | ||
258 | - post_decoded_cbk(m_postDecArg, m_pAVFrame); | ||
259 | - | ||
260 | - //LOG_count++; | ||
261 | - //if (LOG_count > 100000) { | ||
262 | - // LOG_INFO("Real frame send_shm_videoframe pts={}-{}", localPts, m_dec_name); | ||
263 | - // //LOG_count = 0; | ||
264 | - //} | ||
265 | - //} | ||
266 | - //catch (GeneralException2& e) | ||
267 | - //{ | ||
268 | - // LOG_ERROR("send_shm_videoframe failed! {}--{}--{}", e.err_code(), e.err_msg(), m_dec_name); | ||
269 | - // if (e.err_code() == -666) { | ||
270 | - // this->close(); | ||
271 | - // } | ||
272 | - // | ||
273 | - // if (e.err_code() == ERROR_MEMORY) { | ||
274 | - // if (m_frameCount % 10 == 0) { | ||
275 | - // string e_msg; | ||
276 | - // format_string(e_msg, "服务器资源内存分配失败, 在vas模块%s文件%d行出现无法获取内存的情况!", __FILE__, __LINE__); | ||
277 | - // send_exception(RunMessageType::F4001, e_msg); | ||
278 | - // LOG_ERROR("{}", e_msg); | ||
279 | - // } | ||
280 | - // } | ||
281 | - // return; | ||
282 | - //} | 265 | + av_packet_unref(&framePacket); |
266 | + | ||
267 | + if (gpuFrame->width != frameW || gpuFrame->height != frameH){ | ||
268 | + LOG_INFO("AVFrame is inconsistent: width is {}, height is {}; original frameW is {}, frameH is {}--{}", gpuFrame->width, gpuFrame->height, frameW, frameH , m_dec_name); | ||
269 | + av_frame_free(&gpuFrame); | ||
270 | + return; | ||
271 | + } | ||
272 | + | ||
273 | + m_queue_mutex.lock(); | ||
274 | + if(mFrameQueue.size() <= 10){ | ||
275 | + mFrameQueue.push(gpuFrame); | ||
276 | + }else{ | ||
277 | + av_frame_free(&gpuFrame); | ||
283 | } | 278 | } |
279 | + m_queue_mutex.unlock(); | ||
280 | +} | ||
281 | + | ||
282 | +void FFGB28181Decoder::post_decode_thread(){ | ||
283 | + | ||
284 | + int index = 0; | ||
285 | + while (isRunning()) | ||
286 | + { | ||
287 | + if(mFrameQueue.size() > 0){ | ||
288 | + std::lock_guard<std::mutex> l(m_snapshot_mutex); | ||
289 | + // 取队头数据 | ||
290 | + m_queue_mutex.lock(); | ||
291 | + AVFrame * gpuFrame = mFrameQueue.front(); | ||
292 | + mFrameQueue.pop(); | ||
293 | + m_queue_mutex.unlock(); | ||
294 | + // 跳帧 | ||
295 | + if (m_frameSkip == 1 || index % m_frameSkip == 0){ | ||
296 | + post_decoded_cbk(m_postDecArg, gpuFrame); | ||
297 | + } | ||
298 | + | ||
299 | + av_frame_free(&gpuFrame); | ||
300 | + | ||
301 | + index++; | ||
302 | + if(index >= 100000){ | ||
303 | + index = 0; | ||
304 | + } | ||
305 | + } | ||
306 | + } | ||
284 | 307 | ||
285 | - if (gpu_options) av_dict_free(&gpu_options); | 308 | + LOG_INFO("post decode thread exited."); |
286 | } | 309 | } |
287 | 310 | ||
288 | void FFGB28181Decoder::stream_end_callback() | 311 | void FFGB28181Decoder::stream_end_callback() |
src/gb28181/FFGB28181Decoder.h
@@ -6,6 +6,7 @@ | @@ -6,6 +6,7 @@ | ||
6 | #include "../AbstractDecoder.h" | 6 | #include "../AbstractDecoder.h" |
7 | 7 | ||
8 | #include <atomic> | 8 | #include <atomic> |
9 | +#include <mutex> | ||
9 | 10 | ||
10 | struct AVFormatContext; | 11 | struct AVFormatContext; |
11 | struct AVCodecContext; | 12 | struct AVCodecContext; |
@@ -14,6 +15,7 @@ struct AVFrame; | @@ -14,6 +15,7 @@ struct AVFrame; | ||
14 | struct AVPacket; | 15 | struct AVPacket; |
15 | struct SwsContext; | 16 | struct SwsContext; |
16 | 17 | ||
18 | +using namespace std; | ||
17 | 19 | ||
18 | class FFGB28181Decoder: public AbstractDecoder | 20 | class FFGB28181Decoder: public AbstractDecoder |
19 | { | 21 | { |
@@ -45,19 +47,16 @@ public: | @@ -45,19 +47,16 @@ public: | ||
45 | public: | 47 | public: |
46 | void stream_callback(int videoType, char* data, int len, int isKey, uint64_t pts, uint64_t localPts); | 48 | void stream_callback(int videoType, char* data, int len, int isKey, uint64_t pts, uint64_t localPts); |
47 | void stream_end_callback(); | 49 | void stream_end_callback(); |
50 | + void post_decode_thread(); | ||
48 | 51 | ||
49 | private: | 52 | private: |
50 | AVCodecContext* m_pAVCodecCtx {}; | 53 | AVCodecContext* m_pAVCodecCtx {}; |
51 | const AVCodec* m_pAVCodec {}; | 54 | const AVCodec* m_pAVCodec {}; |
52 | - AVFrame* m_pAVFrame {}; | ||
53 | 55 | ||
54 | int m_gpuid {-1}; | 56 | int m_gpuid {-1}; |
55 | 57 | ||
56 | RTPReceiver* m_rtpPtr; | 58 | RTPReceiver* m_rtpPtr; |
57 | int m_port; | 59 | int m_port; |
58 | - uint64_t m_frameCount {}; | ||
59 | - | ||
60 | - AVFrame* pFrameRGB {}; | ||
61 | 60 | ||
62 | uint64_t m_startPts {}; | 61 | uint64_t m_startPts {}; |
63 | uint64_t m_lastPts {}; //上一次pts的值 | 62 | uint64_t m_lastPts {}; //上一次pts的值 |
@@ -71,6 +70,10 @@ private: | @@ -71,6 +70,10 @@ private: | ||
71 | int log_count {}; | 70 | int log_count {}; |
72 | 71 | ||
73 | std::atomic_int m_status {}; | 72 | std::atomic_int m_status {}; |
73 | + | ||
74 | + AVDictionary *gpu_options = nullptr; | ||
75 | + | ||
76 | + pthread_t m_post_decode_thread; | ||
74 | }; | 77 | }; |
75 | 78 | ||
76 | #endif // _GB28181_DECODER_H_ | 79 | #endif // _GB28181_DECODER_H_ |
src/gb28181/RTPReceiver.cpp
1 | -#include "RTPReceiver.h" | 1 | +#include "RTPReceiver.h" |
2 | #include "rtppacket.h" | 2 | #include "rtppacket.h" |
3 | -#include "../logger.hpp" | ||
4 | #include <thread> | 3 | #include <thread> |
5 | 4 | ||
5 | +#include "common_header.h" | ||
6 | + | ||
6 | #define BUFFERSIZE_1024 1024 | 7 | #define BUFFERSIZE_1024 1024 |
7 | const int kVideoFrameSize = BUFFERSIZE_1024*BUFFERSIZE_1024*5*2; | 8 | const int kVideoFrameSize = BUFFERSIZE_1024*BUFFERSIZE_1024*5*2; |
8 | 9 | ||
@@ -174,7 +175,7 @@ int RTPReceiver::OnPsProcess() | @@ -174,7 +175,7 @@ int RTPReceiver::OnPsProcess() | ||
174 | LOG_INFO("[{}] started.", m_deviceID); | 175 | LOG_INFO("[{}] started.", m_deviceID); |
175 | while (!m_bPsExit) { | 176 | while (!m_bPsExit) { |
176 | m_psFrameMutex.lock(); | 177 | m_psFrameMutex.lock(); |
177 | - LOG_DEBUG("[{}] PS frame size : {}", m_deviceID, m_psVideoFrames.size()); | 178 | + // LOG_DEBUG("[{}] PS frame size : {}", m_deviceID, m_psVideoFrames.size()); |
178 | if (m_psVideoFrames.size() <= 0){ | 179 | if (m_psVideoFrames.size() <= 0){ |
179 | m_psFrameMutex.unlock(); | 180 | m_psFrameMutex.unlock(); |
180 | std::this_thread::sleep_for(std::chrono::milliseconds(10)); | 181 | std::this_thread::sleep_for(std::chrono::milliseconds(10)); |
@@ -257,7 +258,7 @@ int RTPReceiver::ParsePacket(RTPPacket* packet){ | @@ -257,7 +258,7 @@ int RTPReceiver::ParsePacket(RTPPacket* packet){ | ||
257 | break; | 258 | break; |
258 | } | 259 | } |
259 | 260 | ||
260 | - LOG_DEBUG("[{}] ParsePacket GetPayloadLength", m_deviceID); | 261 | + // LOG_DEBUG("[{}] ParsePacket GetPayloadLength", m_deviceID); |
261 | 262 | ||
262 | if (mark) | 263 | if (mark) |
263 | { | 264 | { |
@@ -271,7 +272,7 @@ int RTPReceiver::ParsePacket(RTPPacket* packet){ | @@ -271,7 +272,7 @@ int RTPReceiver::ParsePacket(RTPPacket* packet){ | ||
271 | std::lock_guard<std::mutex> l(m_psFrameMutex); | 272 | std::lock_guard<std::mutex> l(m_psFrameMutex); |
272 | if (m_psVideoFrames.size() < 100) | 273 | if (m_psVideoFrames.size() < 100) |
273 | { | 274 | { |
274 | - LOG_DEBUG("[{}]ParsePacket push", m_deviceID); | 275 | + // LOG_DEBUG("[{}]ParsePacket push", m_deviceID); |
275 | m_psVideoFrames.push(new Frame(frameBuf, offset, false)); | 276 | m_psVideoFrames.push(new Frame(frameBuf, offset, false)); |
276 | } | 277 | } |
277 | else { | 278 | else { |
src/gb28181/RTPReceiver.h
@@ -32,7 +32,7 @@ typedef void(*CallBack_VodFileEnd)(void* userdata); | @@ -32,7 +32,7 @@ typedef void(*CallBack_VodFileEnd)(void* userdata); | ||
32 | /** | 32 | /** |
33 | * 请求流 | 33 | * 请求流 |
34 | */ | 34 | */ |
35 | -typedef bool(*CallBack_Request_Stream)(); | 35 | +typedef bool(*CallBack_Request_Stream)(const char* deviceId); |
36 | 36 | ||
37 | // 标识帧, 注意buffer需要自己开辟和释放 | 37 | // 标识帧, 注意buffer需要自己开辟和释放 |
38 | struct Frame { | 38 | struct Frame { |
@@ -85,7 +85,7 @@ class RTPReceiver{ | @@ -85,7 +85,7 @@ class RTPReceiver{ | ||
85 | 85 | ||
86 | public: | 86 | public: |
87 | RTPReceiver(); | 87 | RTPReceiver(); |
88 | - ~RTPReceiver(); | 88 | + virtual ~RTPReceiver(); |
89 | 89 | ||
90 | virtual bool Open(uint16_t localPort) = 0; | 90 | virtual bool Open(uint16_t localPort) = 0; |
91 | virtual bool IsOpened() = 0; | 91 | virtual bool IsOpened() = 0; |
src/gb28181/RTPTcpReceiver.cpp
1 | #include"RTPTcpReceiver.h" | 1 | #include"RTPTcpReceiver.h" |
2 | -#include "../logger.hpp" | ||
3 | 2 | ||
3 | +#include "common_header.h" | ||
4 | 4 | ||
5 | -static long long get_cur_time() { | ||
6 | - | ||
7 | - chrono::time_point<chrono::system_clock, chrono::milliseconds> tpMicro | ||
8 | - = chrono::time_point_cast<chrono::milliseconds>(chrono::system_clock::now()); | ||
9 | - | ||
10 | - return tpMicro.time_since_epoch().count(); | ||
11 | -} | ||
12 | 5 | ||
13 | // class TcpRTPSession : public RTPSession | 6 | // class TcpRTPSession : public RTPSession |
14 | // { | 7 | // { |
@@ -65,7 +58,7 @@ public: | @@ -65,7 +58,7 @@ public: | ||
65 | LOG_ERROR("Error sending over socket {}, removing destination", sock); | 58 | LOG_ERROR("Error sending over socket {}, removing destination", sock); |
66 | DeleteDestination(RTPTCPAddress(sock)); | 59 | DeleteDestination(RTPTCPAddress(sock)); |
67 | if(nullptr != tcpReceiver && !tcpReceiver->isClosing()){ | 60 | if(nullptr != tcpReceiver && !tcpReceiver->isClosing()){ |
68 | - tcpReceiver->RequestStream(); | 61 | + tcpReceiver->ReConnect(); |
69 | } | 62 | } |
70 | } | 63 | } |
71 | 64 | ||
@@ -90,6 +83,16 @@ static int rtp_revc_thread_(void* param) | @@ -90,6 +83,16 @@ static int rtp_revc_thread_(void* param) | ||
90 | return self->OnRtpRecv(); | 83 | return self->OnRtpRecv(); |
91 | } | 84 | } |
92 | 85 | ||
86 | +static int listen_finish_thread_(void* param) | ||
87 | +{ | ||
88 | + if (!param) | ||
89 | + { | ||
90 | + return -1; | ||
91 | + } | ||
92 | + | ||
93 | + RTPTcpReceiver* self = (RTPTcpReceiver*)param; | ||
94 | + return self->ListenFinish(); | ||
95 | +} | ||
93 | 96 | ||
94 | RTPTcpReceiver::RTPTcpReceiver() | 97 | RTPTcpReceiver::RTPTcpReceiver() |
95 | : m_bRtpExit(false) | 98 | : m_bRtpExit(false) |
@@ -143,11 +146,19 @@ bool RTPTcpReceiver::IsOpened(){ | @@ -143,11 +146,19 @@ bool RTPTcpReceiver::IsOpened(){ | ||
143 | } | 146 | } |
144 | 147 | ||
145 | void RTPTcpReceiver::Close(){ | 148 | void RTPTcpReceiver::Close(){ |
149 | + m_bRtpExit = true; | ||
150 | + | ||
151 | + if(m_listenFinishThread.joinable()){ | ||
152 | + m_listenFinishThread.join(); | ||
153 | + } | ||
154 | +} | ||
155 | + | ||
156 | +void RTPTcpReceiver::close_task(){ | ||
157 | + m_bRtpExit = true; | ||
146 | 158 | ||
147 | m_bClosing = true; | 159 | m_bClosing = true; |
148 | 160 | ||
149 | m_bAccepted = true; | 161 | m_bAccepted = true; |
150 | - m_bRtpExit = true; | ||
151 | 162 | ||
152 | LOG_DEBUG("[{}] 1.", m_deviceID); | 163 | LOG_DEBUG("[{}] 1.", m_deviceID); |
153 | 164 | ||
@@ -207,20 +218,22 @@ int RTPTcpReceiver::initSession(int localPort){ | @@ -207,20 +218,22 @@ int RTPTcpReceiver::initSession(int localPort){ | ||
207 | status = m_rtpSessionPtr->Create(*m_pSessparams, m_pTrans); | 218 | status = m_rtpSessionPtr->Create(*m_pSessparams, m_pTrans); |
208 | if (status < 0) | 219 | if (status < 0) |
209 | { | 220 | { |
210 | - LOG_ERROR("[{}] create session error!!", m_deviceID); | 221 | + // 若status = -59 ,需运行 export LOGNAME=root ,见 https://blog.csdn.net/m0_37876242/article/details/128588162 |
222 | + LOG_ERROR("[{}] create session error: {}", m_deviceID, status); | ||
211 | return -1; | 223 | return -1; |
212 | } | 224 | } |
213 | 225 | ||
214 | m_rtpThread = std::thread(rtp_revc_thread_, this); | 226 | m_rtpThread = std::thread(rtp_revc_thread_, this); |
227 | + m_listenFinishThread = std::thread(listen_finish_thread_, this); | ||
215 | 228 | ||
216 | InitPS(); | 229 | InitPS(); |
217 | 230 | ||
218 | - bool bRet = RequestStream(); | ||
219 | - if (!bRet) | ||
220 | - { | ||
221 | - LOG_INFO("[{}] 请求流失败!", m_deviceID); | ||
222 | - return -1; | ||
223 | - } | 231 | + // bool bRet = RequestStream(); |
232 | + // if (!bRet) | ||
233 | + // { | ||
234 | + // LOG_INFO("[{}] 请求流失败!", m_deviceID); | ||
235 | + // return -1; | ||
236 | + // } | ||
224 | 237 | ||
225 | LOG_INFO("[{}] 初始化成功, congratulations !!!", m_deviceID); | 238 | LOG_INFO("[{}] 初始化成功, congratulations !!!", m_deviceID); |
226 | 239 | ||
@@ -240,17 +253,56 @@ int RTPTcpReceiver::OnRtpRecv() | @@ -240,17 +253,56 @@ int RTPTcpReceiver::OnRtpRecv() | ||
240 | SocketType nServer = -1; | 253 | SocketType nServer = -1; |
241 | 254 | ||
242 | LOG_INFO("[{}] Poll started.", m_deviceID); | 255 | LOG_INFO("[{}] Poll started.", m_deviceID); |
243 | - int status = -1; | 256 | + int reconn_times = 0; |
257 | + int reaccept_times = 0; | ||
258 | + bool bReconn = false; | ||
244 | while(!m_bRtpExit){ | 259 | while(!m_bRtpExit){ |
245 | while(!m_bAccepted){ | 260 | while(!m_bAccepted){ |
261 | + if(m_bRtpExit){ | ||
262 | + goto end_flag; | ||
263 | + } | ||
264 | + | ||
265 | + while (!bReconn){ | ||
266 | + if(m_bRtpExit){ | ||
267 | + goto end_flag; | ||
268 | + } | ||
269 | + | ||
270 | + reconn_times++; | ||
271 | + if(reconn_times > 10){ | ||
272 | + // 10次请求都失败,结束任务 | ||
273 | + m_bRtpExit = true; | ||
274 | + goto end_flag; | ||
275 | + } | ||
276 | + LOG_DEBUG("[{}] RequestStream...", m_deviceID); | ||
277 | + bReconn = RequestStream(); | ||
278 | + if (bReconn){ | ||
279 | + LOG_DEBUG("[{}] RequestStream, True", m_deviceID); | ||
280 | + continue; | ||
281 | + } | ||
282 | + LOG_DEBUG("[{}] RequestStream, False", m_deviceID); | ||
283 | + | ||
284 | + std::this_thread::sleep_for(std::chrono::seconds(3)); | ||
285 | + } | ||
286 | + | ||
246 | LOG_DEBUG("[{}] accepting...", m_deviceID); | 287 | LOG_DEBUG("[{}] accepting...", m_deviceID); |
247 | nServer = accept(m_nListener, (sockaddr*)&clientAddr, (socklen_t * ) &nLen); | 288 | nServer = accept(m_nListener, (sockaddr*)&clientAddr, (socklen_t * ) &nLen); |
248 | if (-1 == nServer){ | 289 | if (-1 == nServer){ |
249 | - std::this_thread::sleep_for(std::chrono::milliseconds(10)); | 290 | + reaccept_times++; |
291 | + LOG_DEBUG("[{}] reaccept_times = {}", m_deviceID, reaccept_times); | ||
292 | + if(reaccept_times > 600){ | ||
293 | + LOG_DEBUG("[{}] reaccept_times > 600", m_deviceID); | ||
294 | + bReconn = false; | ||
295 | + reaccept_times = 0; | ||
296 | + } | ||
297 | + std::this_thread::sleep_for(std::chrono::milliseconds(50)); | ||
250 | continue; | 298 | continue; |
251 | } | 299 | } |
300 | + LOG_DEBUG("[{}] accept success", m_deviceID); | ||
252 | m_rtpSessionPtr->AddDestination(RTPTCPAddress(nServer)); | 301 | m_rtpSessionPtr->AddDestination(RTPTCPAddress(nServer)); |
253 | m_bAccepted = true; | 302 | m_bAccepted = true; |
303 | + bReconn = false; | ||
304 | + reconn_times = 0; | ||
305 | + reaccept_times = 0; | ||
254 | 306 | ||
255 | LOG_INFO("[{}] nServer={}", m_deviceID, nServer); | 307 | LOG_INFO("[{}] nServer={}", m_deviceID, nServer); |
256 | break; | 308 | break; |
@@ -265,7 +317,7 @@ int RTPTcpReceiver::OnRtpRecv() | @@ -265,7 +317,7 @@ int RTPTcpReceiver::OnRtpRecv() | ||
265 | 317 | ||
266 | while ((pack = m_rtpSessionPtr->GetNextPacket()) != NULL) | 318 | while ((pack = m_rtpSessionPtr->GetNextPacket()) != NULL) |
267 | { | 319 | { |
268 | - LOG_DEBUG("[{}] time: {} ", m_deviceID, get_cur_time()); | 320 | + // LOG_DEBUG("[{}] time: {} ", m_deviceID, UtilTools::get_cur_time_ms()); |
269 | ParsePacket(pack); | 321 | ParsePacket(pack); |
270 | 322 | ||
271 | m_rtpSessionPtr->DeletePacket(pack); | 323 | m_rtpSessionPtr->DeletePacket(pack); |
@@ -279,6 +331,8 @@ int RTPTcpReceiver::OnRtpRecv() | @@ -279,6 +331,8 @@ int RTPTcpReceiver::OnRtpRecv() | ||
279 | std::this_thread::sleep_for(std::chrono::milliseconds(10)); | 331 | std::this_thread::sleep_for(std::chrono::milliseconds(10)); |
280 | } | 332 | } |
281 | 333 | ||
334 | +end_flag: | ||
335 | + | ||
282 | m_rtpSessionPtr->Destroy(); | 336 | m_rtpSessionPtr->Destroy(); |
283 | 337 | ||
284 | if(nServer > 0){ | 338 | if(nServer > 0){ |
@@ -293,13 +347,18 @@ int RTPTcpReceiver::OnRtpRecv() | @@ -293,13 +347,18 @@ int RTPTcpReceiver::OnRtpRecv() | ||
293 | return 0; | 347 | return 0; |
294 | } | 348 | } |
295 | 349 | ||
296 | -bool RTPTcpReceiver::RequestStream(){ | ||
297 | - bool bConnect = m_callback_request_stream(); | ||
298 | - if(!bConnect){ | ||
299 | - Close(); | ||
300 | - return false; | 350 | +int RTPTcpReceiver::ListenFinish(){ |
351 | + while(!m_bRtpExit){ | ||
352 | + std::this_thread::sleep_for(std::chrono::seconds(3)); | ||
301 | } | 353 | } |
354 | + | ||
355 | + close_task(); | ||
356 | +} | ||
357 | + | ||
358 | +bool RTPTcpReceiver::ReConnect(){ | ||
302 | m_bAccepted = false; | 359 | m_bAccepted = false; |
360 | +} | ||
303 | 361 | ||
304 | - return true; | 362 | +bool RTPTcpReceiver::RequestStream(){ |
363 | + return m_callback_request_stream(m_deviceID.c_str()); | ||
305 | } | 364 | } |
306 | \ No newline at end of file | 365 | \ No newline at end of file |
src/gb28181/RTPTcpReceiver.h
@@ -57,11 +57,14 @@ public: | @@ -57,11 +57,14 @@ public: | ||
57 | 57 | ||
58 | public: | 58 | public: |
59 | int OnRtpRecv(); | 59 | int OnRtpRecv(); |
60 | + bool ReConnect(); | ||
61 | + int ListenFinish(); | ||
60 | bool RequestStream(); | 62 | bool RequestStream(); |
61 | bool isClosing(); | 63 | bool isClosing(); |
62 | 64 | ||
63 | private: | 65 | private: |
64 | int initSession(int localPort); | 66 | int initSession(int localPort); |
67 | + void close_task(); | ||
65 | 68 | ||
66 | private: | 69 | private: |
67 | 70 | ||
@@ -77,9 +80,12 @@ private: | @@ -77,9 +80,12 @@ private: | ||
77 | std::thread m_rtpThread; // RTP接收线程 | 80 | std::thread m_rtpThread; // RTP接收线程 |
78 | SocketType m_nListener; | 81 | SocketType m_nListener; |
79 | 82 | ||
80 | - RTPSession* m_rtpSessionPtr; // RTP会话 | ||
81 | - RTPSessionParams* m_pSessparams; | ||
82 | - MyTCPTransmitter* m_pTrans; | 83 | + RTPSession* m_rtpSessionPtr; // RTP会话 |
84 | + RTPSessionParams* m_pSessparams; | ||
85 | + MyTCPTransmitter* m_pTrans; | ||
86 | + | ||
87 | + std::thread m_listenFinishThread; // RTP接收线程 | ||
88 | + | ||
83 | }; | 89 | }; |
84 | 90 | ||
85 | #endif // _RTP_TCP_RECEIVER_H_ | 91 | #endif // _RTP_TCP_RECEIVER_H_ |
src/gb28181/RTPUdpReceiver.cpp
@@ -6,7 +6,7 @@ | @@ -6,7 +6,7 @@ | ||
6 | #include <thread> | 6 | #include <thread> |
7 | #include <chrono> | 7 | #include <chrono> |
8 | 8 | ||
9 | -#include "../logger.hpp" | 9 | +#include "common_header.h" |
10 | 10 | ||
11 | using namespace std; | 11 | using namespace std; |
12 | 12 | ||
@@ -42,15 +42,6 @@ private: | @@ -42,15 +42,6 @@ private: | ||
42 | } | 42 | } |
43 | }; | 43 | }; |
44 | 44 | ||
45 | - | ||
46 | -static long long get_cur_time() { | ||
47 | - | ||
48 | - chrono::time_point<chrono::system_clock, chrono::milliseconds> tpMicro | ||
49 | - = chrono::time_point_cast<chrono::milliseconds>(chrono::system_clock::now()); | ||
50 | - | ||
51 | - return tpMicro.time_since_epoch().count(); | ||
52 | -} | ||
53 | - | ||
54 | static int rtp_revc_thread_(void* param) | 45 | static int rtp_revc_thread_(void* param) |
55 | { | 46 | { |
56 | if (!param) | 47 | if (!param) |
@@ -175,7 +166,7 @@ int RTPUdpReceiver::OnRtpRecv() | @@ -175,7 +166,7 @@ int RTPUdpReceiver::OnRtpRecv() | ||
175 | if (m_rtpSessionPtr->GotoFirstSourceWithData()) | 166 | if (m_rtpSessionPtr->GotoFirstSourceWithData()) |
176 | { | 167 | { |
177 | LOG_INFO("OnRtpRecv GotoFirstSourceWithData --{}", m_deviceID); | 168 | LOG_INFO("OnRtpRecv GotoFirstSourceWithData --{}", m_deviceID); |
178 | - last_recv_ts = get_cur_time(); | 169 | + last_recv_ts = UtilTools::get_cur_time_ms(); |
179 | m_idleCount = 0; | 170 | m_idleCount = 0; |
180 | m_noDataCount = 0; | 171 | m_noDataCount = 0; |
181 | do | 172 | do |
@@ -261,7 +252,7 @@ int RTPUdpReceiver::OnRtpRecv() | @@ -261,7 +252,7 @@ int RTPUdpReceiver::OnRtpRecv() | ||
261 | // //若是30000,时长大约 18s | 252 | // //若是30000,时长大约 18s |
262 | // if(m_idleCount > 30000) | 253 | // if(m_idleCount > 30000) |
263 | // { | 254 | // { |
264 | - // uint64_t cts = get_cur_time(); | 255 | + // uint64_t cts = UtilTools::get_cur_time_ms(); |
265 | // float duration_not_recv = (cts - last_recv_ts) / 1000.0; | 256 | // float duration_not_recv = (cts - last_recv_ts) / 1000.0; |
266 | // | 257 | // |
267 | // //LOG_ERROR("************I haven't got stream from hik gateway exceed {}s,send eof********{}******", duration_not_recv, m_deviceID); | 258 | // //LOG_ERROR("************I haven't got stream from hik gateway exceed {}s,send eof********{}******", duration_not_recv, m_deviceID); |
src/gb28181/common_header.h
0 → 100644
src/gb28181/demuxer.h
@@ -8,9 +8,11 @@ | @@ -8,9 +8,11 @@ | ||
8 | { CMpeg2Demux class. } | 8 | { CMpeg2Demux class. } |
9 | { } | 9 | { } |
10 | {*******************************************************/ | 10 | {*******************************************************/ |
11 | + | ||
11 | #ifndef _DEMUXER_H_ | 12 | #ifndef _DEMUXER_H_ |
12 | #define _DEMUXER_H_ | 13 | #define _DEMUXER_H_ |
13 | 14 | ||
15 | + | ||
14 | #include <stdint.h> | 16 | #include <stdint.h> |
15 | #include "buffer.h" | 17 | #include "buffer.h" |
16 | 18 |
src/jpegNPP.cpp-1
0 → 100644
1 | +/* | ||
2 | +* Copyright 1993-2015 NVIDIA Corporation. All rights reserved. | ||
3 | +* | ||
4 | +* NOTICE TO USER: | ||
5 | +* | ||
6 | +* This source code is subject to NVIDIA ownership rights under U.S. and | ||
7 | +* international Copyright laws. | ||
8 | +* | ||
9 | +* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE | ||
10 | +* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR | ||
11 | +* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH | ||
12 | +* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF | ||
13 | +* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. | ||
14 | +* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, | ||
15 | +* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS | ||
16 | +* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE | ||
17 | +* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE | ||
18 | +* OR PERFORMANCE OF THIS SOURCE CODE. | ||
19 | +* | ||
20 | +* U.S. Government End Users. This source code is a "commercial item" as | ||
21 | +* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of | ||
22 | +* "commercial computer software" and "commercial computer software | ||
23 | +* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) | ||
24 | +* and is provided to the U.S. Government only as a commercial end item. | ||
25 | +* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through | ||
26 | +* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the | ||
27 | +* source code with only those rights set forth herein. | ||
28 | +*/ | ||
29 | + | ||
30 | +// This sample needs at least CUDA 5.5 and a GPU that has at least Compute Capability 2.0 | ||
31 | + | ||
32 | +// This sample demonstrates a simple image processing pipeline. | ||
33 | +// First, a JPEG file is huffman decoded and inverse DCT transformed and dequantized. | ||
34 | +// Then the different planes are resized. Finally, the resized image is quantized, forward | ||
35 | +// DCT transformed and huffman encoded. | ||
36 | + | ||
37 | +#include "cuda_kernels.h" | ||
38 | + | ||
39 | +#include <npp.h> | ||
40 | +#include <cuda_runtime.h> | ||
41 | +#include "common/UtilNPP/Exceptions.h" | ||
42 | + | ||
43 | +#include "Endianess.h" | ||
44 | +#include <math.h> | ||
45 | + | ||
46 | +#include <string.h> | ||
47 | +#include <fstream> | ||
48 | +#include <iostream> | ||
49 | + | ||
50 | +#include "common/inc/helper_string.h" | ||
51 | +#include "common/inc/helper_cuda.h" | ||
52 | +//#include "MacroDef.h" | ||
53 | +#include "cuda.h" | ||
54 | + | ||
55 | +using namespace std; | ||
56 | + | ||
57 | +struct FrameHeader | ||
58 | +{ | ||
59 | + unsigned char nSamplePrecision; | ||
60 | + unsigned short nHeight; | ||
61 | + unsigned short nWidth; | ||
62 | + unsigned char nComponents; | ||
63 | + unsigned char aComponentIdentifier[3]; | ||
64 | + unsigned char aSamplingFactors[3]; | ||
65 | + unsigned char aQuantizationTableSelector[3]; | ||
66 | +}; | ||
67 | + | ||
68 | +struct ScanHeader | ||
69 | +{ | ||
70 | + unsigned char nComponents; | ||
71 | + unsigned char aComponentSelector[3]; | ||
72 | + unsigned char aHuffmanTablesSelector[3]; | ||
73 | + unsigned char nSs; | ||
74 | + unsigned char nSe; | ||
75 | + unsigned char nA; | ||
76 | +}; | ||
77 | + | ||
78 | +struct QuantizationTable | ||
79 | +{ | ||
80 | + unsigned char nPrecisionAndIdentifier; | ||
81 | + unsigned char aTable[64]; | ||
82 | +}; | ||
83 | + | ||
84 | +struct HuffmanTable | ||
85 | +{ | ||
86 | + unsigned char nClassAndIdentifier; | ||
87 | + unsigned char aCodes[16]; | ||
88 | + unsigned char aTable[256]; | ||
89 | +}; | ||
90 | + | ||
91 | +//??准?炼??藕?量??模?? | ||
92 | +//unsigned char std_Y_QT[64] = | ||
93 | +//{ | ||
94 | +// 16, 11, 10, 16, 24, 40, 51, 61, | ||
95 | +// 12, 12, 14, 19, 26, 58, 60, 55, | ||
96 | +// 14, 13, 16, 24, 40, 57, 69, 56, | ||
97 | +// 14, 17, 22, 29, 51, 87, 80, 62, | ||
98 | +// 18, 22, 37, 56, 68, 109, 103, 77, | ||
99 | +// 24, 35, 55, 64, 81, 104, 113, 92, | ||
100 | +// 49, 64, 78, 87, 103, 121, 120, 101, | ||
101 | +// 72, 92, 95, 98, 112, 100, 103, 99 | ||
102 | +//}; | ||
103 | +// | ||
104 | +////??准色???藕?量??模?? | ||
105 | +//unsigned char std_UV_QT[64] = | ||
106 | +//{ | ||
107 | +// 17, 18, 24, 47, 99, 99, 99, 99, | ||
108 | +// 18, 21, 26, 66, 99, 99, 99, 99, | ||
109 | +// 24, 26, 56, 99, 99, 99, 99, 99, | ||
110 | +// 47, 66, 99, 99, 99, 99, 99, 99, | ||
111 | +// 99, 99, 99, 99, 99, 99, 99, 99, | ||
112 | +// 99, 99, 99, 99, 99, 99, 99, 99, | ||
113 | +// 99, 99, 99, 99, 99, 99, 99, 99, | ||
114 | +// 99, 99, 99, 99, 99, 99, 99, 99 | ||
115 | +//}; | ||
116 | + | ||
117 | +////?炼??藕?量??模?? | ||
118 | +//unsigned char std_Y_QT[64] = | ||
119 | +//{ | ||
120 | +// 6, 4, 5, 6, 5, 4, 6, 6, | ||
121 | +// 5, 6, 7, 7, 6, 8, 10, 16, | ||
122 | +// 10, 10, 9, 9, 10, 20, 14, 15, | ||
123 | +// 12, 16, 23, 20, 24, 24, 23, 20, | ||
124 | +// 22, 22, 26, 29, 37, 31, 26, 27, | ||
125 | +// 35, 28, 22, 22, 32, 44, 32, 35, | ||
126 | +// 38, 39, 41, 42, 41, 25, 31, 45, | ||
127 | +// 48, 45, 40, 48, 37, 40, 41, 40 | ||
128 | +//}; | ||
129 | +// | ||
130 | +////色???藕?量??模?? | ||
131 | +//unsigned char std_UV_QT[64] = | ||
132 | +//{ | ||
133 | +// 7, 7, 7, 10, 8, 10, 19, 10, | ||
134 | +// 10, 19, 40, 26, 22, 26, 40, 40, | ||
135 | +// 40, 40, 40, 40, 40, 40, 40, 40, | ||
136 | +// 40, 40, 40, 40, 40, 40, 40, 40, | ||
137 | +// 40, 40, 40, 40, 40, 40, 40, 40, | ||
138 | +// 40, 40, 40, 40, 40, 40, 40, 40, | ||
139 | +// 40, 40, 40, 40, 40, 40, 40, 40, | ||
140 | +// 40, 40, 40, 40, 40, 40, 40, 40 | ||
141 | +//}; | ||
142 | + | ||
143 | +//?炼??藕?量??模?? | ||
144 | +unsigned char std_Y_QT[64] = | ||
145 | +{ | ||
146 | + 0.75 * 6, 0.75 * 4, 0.75 * 5, 0.75 * 6, 0.75 * 5, 0.75 * 4, 0.75 * 6, 0.75 * 6, | ||
147 | + 0.75 * 5, 0.75 * 6, 0.75 * 7, 0.75 * 7, 0.75 * 6, 0.75 * 8, 0.75 * 10, 0.75 * 16, | ||
148 | + 0.75 * 10, 0.75 * 10, 0.75 * 9, 0.75 * 9, 0.75 * 10, 0.75 * 20, 0.75 * 14, 0.75 * 15, | ||
149 | + 0.75 * 12, 0.75 * 16, 0.75 * 23, 0.75 * 20, 0.75 * 24, 0.75 * 24, 0.75 * 23, 0.75 * 20, | ||
150 | + 0.75 * 22, 0.75 * 22, 0.75 * 26, 0.75 * 29, 0.75 * 37, 0.75 * 31, 0.75 * 26, 0.75 * 27, | ||
151 | + 0.75 * 35, 0.75 * 28, 0.75 * 22, 0.75 * 22, 0.75 * 32, 0.75 * 44, 0.75 * 32, 0.75 * 35, | ||
152 | + 0.75 * 38, 0.75 * 39, 0.75 * 41, 0.75 * 42, 0.75 * 41, 0.75 * 25, 0.75 * 31, 0.75 * 45, | ||
153 | + 0.75 * 48, 0.75 * 45, 0.75 * 40, 0.75 * 48, 0.75 * 37, 0.75 * 40, 0.75 * 41, 0.75 * 40 | ||
154 | +}; | ||
155 | + | ||
156 | +//色???藕?量??模?? | ||
157 | +unsigned char std_UV_QT[64] = | ||
158 | +{ | ||
159 | + 0.75 * 7, 0.75 * 7, 0.75 * 7, 0.75 * 10, 0.75 * 8, 0.75 * 10, 0.75 * 19, 0.75 * 10, | ||
160 | + 0.75 * 10, 0.75 * 19, 0.75 * 40, 0.75 * 26, 0.75 * 22, 0.75 * 26, 0.75 * 40, 0.75 * 40, | ||
161 | + 30, 30, 30, 30, 30, 30, 30, 30, | ||
162 | + 30, 30, 30, 30, 30, 30, 30, 30, | ||
163 | + 30, 30, 30, 30, 30, 30, 30, 30, | ||
164 | + 30, 30, 30, 30, 30, 30, 30, 30, | ||
165 | + 30, 30, 30, 30, 30, 30, 30, 30, | ||
166 | + 30, 30, 30, 30, 30, 30, 30, 30 | ||
167 | +}; | ||
168 | + | ||
169 | +unsigned char STD_DC_Y_NRCODES[16] = { 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 }; | ||
170 | +unsigned char STD_DC_Y_VALUES[12] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; | ||
171 | + | ||
172 | +unsigned char STD_DC_UV_NRCODES[16] = { 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }; | ||
173 | +unsigned char STD_DC_UV_VALUES[12] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; | ||
174 | + | ||
175 | +unsigned char STD_AC_Y_NRCODES[16] = { 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0X7D }; | ||
176 | +unsigned char STD_AC_Y_VALUES[162] = | ||
177 | +{ | ||
178 | + 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, | ||
179 | + 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, | ||
180 | + 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08, | ||
181 | + 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, | ||
182 | + 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, | ||
183 | + 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, | ||
184 | + 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, | ||
185 | + 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, | ||
186 | + 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, | ||
187 | + 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, | ||
188 | + 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, | ||
189 | + 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, | ||
190 | + 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, | ||
191 | + 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, | ||
192 | + 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, | ||
193 | + 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, | ||
194 | + 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, | ||
195 | + 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2, | ||
196 | + 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, | ||
197 | + 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, | ||
198 | + 0xf9, 0xfa | ||
199 | +}; | ||
200 | + | ||
201 | +unsigned char STD_AC_UV_NRCODES[16] = { 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0X77 }; | ||
202 | +unsigned char STD_AC_UV_VALUES[162] = | ||
203 | +{ | ||
204 | + 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, | ||
205 | + 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, | ||
206 | + 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, | ||
207 | + 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, | ||
208 | + 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34, | ||
209 | + 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26, | ||
210 | + 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, | ||
211 | + 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, | ||
212 | + 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, | ||
213 | + 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, | ||
214 | + 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, | ||
215 | + 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, | ||
216 | + 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, | ||
217 | + 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, | ||
218 | + 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, | ||
219 | + 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, | ||
220 | + 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, | ||
221 | + 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, | ||
222 | + 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, | ||
223 | + 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, | ||
224 | + 0xf9, 0xfa | ||
225 | +}; | ||
226 | + | ||
227 | +int DivUp(int x, int d) | ||
228 | +{ | ||
229 | + return (x + d - 1) / d; | ||
230 | +} | ||
231 | + | ||
232 | +template<typename T> | ||
233 | +void writeAndAdvance(unsigned char *&pData, T nElement) | ||
234 | +{ | ||
235 | + writeBigEndian<T>(pData, nElement); | ||
236 | + pData += sizeof(T); | ||
237 | +} | ||
238 | + | ||
239 | +void writeMarker(unsigned char nMarker, unsigned char *&pData) | ||
240 | +{ | ||
241 | + *pData++ = 0x0ff; | ||
242 | + *pData++ = nMarker; | ||
243 | +} | ||
244 | + | ||
245 | +void writeJFIFTag(unsigned char *&pData) | ||
246 | +{ | ||
247 | + const char JFIF_TAG[] = | ||
248 | + { | ||
249 | + 0x4a, 0x46, 0x49, 0x46, 0x00, | ||
250 | + 0x01, 0x02, | ||
251 | + 0x00, | ||
252 | + 0x00, 0x01, 0x00, 0x01, | ||
253 | + 0x00, 0x00 | ||
254 | + }; | ||
255 | + | ||
256 | + writeMarker(0x0e0, pData); | ||
257 | + writeAndAdvance<unsigned short>(pData, sizeof(JFIF_TAG) + sizeof(unsigned short)); | ||
258 | + memcpy(pData, JFIF_TAG, sizeof(JFIF_TAG)); | ||
259 | + pData += sizeof(JFIF_TAG); | ||
260 | +} | ||
261 | + | ||
262 | +void writeFrameHeader(const FrameHeader &header, unsigned char *&pData) | ||
263 | +{ | ||
264 | + unsigned char aTemp[128]; | ||
265 | + unsigned char *pTemp = aTemp; | ||
266 | + | ||
267 | + writeAndAdvance<unsigned char>(pTemp, header.nSamplePrecision); | ||
268 | + writeAndAdvance<unsigned short>(pTemp, header.nHeight); | ||
269 | + writeAndAdvance<unsigned short>(pTemp, header.nWidth); | ||
270 | + writeAndAdvance<unsigned char>(pTemp, header.nComponents); | ||
271 | + | ||
272 | + for (int c = 0; c<header.nComponents; ++c) | ||
273 | + { | ||
274 | + writeAndAdvance<unsigned char>(pTemp, header.aComponentIdentifier[c]); | ||
275 | + writeAndAdvance<unsigned char>(pTemp, header.aSamplingFactors[c]); | ||
276 | + writeAndAdvance<unsigned char>(pTemp, header.aQuantizationTableSelector[c]); | ||
277 | + } | ||
278 | + | ||
279 | + unsigned short nLength = (unsigned short)(pTemp - aTemp); | ||
280 | + | ||
281 | + writeMarker(0x0C0, pData); | ||
282 | + writeAndAdvance<unsigned short>(pData, nLength + 2); | ||
283 | + memcpy(pData, aTemp, nLength); | ||
284 | + pData += nLength; | ||
285 | +} | ||
286 | + | ||
287 | +void writeScanHeader(const ScanHeader &header, unsigned char *&pData) | ||
288 | +{ | ||
289 | + unsigned char aTemp[128]; | ||
290 | + unsigned char *pTemp = aTemp; | ||
291 | + | ||
292 | + writeAndAdvance<unsigned char>(pTemp, header.nComponents); | ||
293 | + | ||
294 | + for (int c = 0; c<header.nComponents; ++c) | ||
295 | + { | ||
296 | + writeAndAdvance<unsigned char>(pTemp, header.aComponentSelector[c]); | ||
297 | + writeAndAdvance<unsigned char>(pTemp, header.aHuffmanTablesSelector[c]); | ||
298 | + } | ||
299 | + | ||
300 | + writeAndAdvance<unsigned char>(pTemp, header.nSs); | ||
301 | + writeAndAdvance<unsigned char>(pTemp, header.nSe); | ||
302 | + writeAndAdvance<unsigned char>(pTemp, header.nA); | ||
303 | + | ||
304 | + unsigned short nLength = (unsigned short)(pTemp - aTemp); | ||
305 | + | ||
306 | + writeMarker(0x0DA, pData); | ||
307 | + writeAndAdvance<unsigned short>(pData, nLength + 2); | ||
308 | + memcpy(pData, aTemp, nLength); | ||
309 | + pData += nLength; | ||
310 | +} | ||
311 | + | ||
312 | +void writeQuantizationTable(const QuantizationTable &table, unsigned char *&pData) | ||
313 | +{ | ||
314 | + writeMarker(0x0DB, pData); | ||
315 | + writeAndAdvance<unsigned short>(pData, sizeof(QuantizationTable) + 2); | ||
316 | + memcpy(pData, &table, sizeof(QuantizationTable)); | ||
317 | + pData += sizeof(QuantizationTable); | ||
318 | +} | ||
319 | + | ||
320 | +void writeHuffmanTable(const HuffmanTable &table, unsigned char *&pData) | ||
321 | +{ | ||
322 | + writeMarker(0x0C4, pData); | ||
323 | + | ||
324 | + // Number of Codes for Bit Lengths [1..16] | ||
325 | + int nCodeCount = 0; | ||
326 | + | ||
327 | + for (int i = 0; i < 16; ++i) | ||
328 | + { | ||
329 | + nCodeCount += table.aCodes[i]; | ||
330 | + } | ||
331 | + | ||
332 | + writeAndAdvance<unsigned short>(pData, 17 + nCodeCount + 2); | ||
333 | + memcpy(pData, &table, 17 + nCodeCount); | ||
334 | + pData += 17 + nCodeCount; | ||
335 | +} | ||
336 | + | ||
337 | +bool printfNPPinfo(int cudaVerMajor, int cudaVerMinor) | ||
338 | +{ | ||
339 | + const NppLibraryVersion *libVer = nppGetLibVersion(); | ||
340 | + | ||
341 | + printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build); | ||
342 | + | ||
343 | + int driverVersion, runtimeVersion; | ||
344 | + cudaDriverGetVersion(&driverVersion); | ||
345 | + cudaRuntimeGetVersion(&runtimeVersion); | ||
346 | + | ||
347 | + printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000, (driverVersion % 100) / 10); | ||
348 | + printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000, (runtimeVersion % 100) / 10); | ||
349 | + | ||
350 | + bool bVal = checkCudaCapabilities(cudaVerMajor, cudaVerMinor); | ||
351 | + return bVal; | ||
352 | +} | ||
353 | + | ||
354 | +NppiDCTState *pDCTState; | ||
355 | +FrameHeader oFrameHeader; | ||
356 | +FrameHeader oFrameHeaderFixedSize; | ||
357 | +ScanHeader oScanHeader; | ||
358 | +QuantizationTable aQuantizationTables[4]; | ||
359 | +Npp8u *pdQuantizationTables; | ||
360 | +HuffmanTable aHuffmanTables[4]; | ||
361 | +HuffmanTable *pHuffmanDCTables; | ||
362 | +HuffmanTable *pHuffmanACTables; | ||
363 | +int nMCUBlocksH; | ||
364 | +int nMCUBlocksV; | ||
365 | +int nMCUBlocksHFixedSize; | ||
366 | +int nMCUBlocksVFixedSize; | ||
367 | +Npp8u *pdScan; | ||
368 | +NppiEncodeHuffmanSpec *apHuffmanDCTable[3]; | ||
369 | +NppiEncodeHuffmanSpec *apHuffmanACTable[3]; | ||
370 | +unsigned char *pDstJpeg; | ||
371 | +unsigned char *pDstOutput; | ||
372 | +int nRestartInterval; | ||
373 | + | ||
374 | +int initTable() | ||
375 | +{ | ||
376 | + NPP_CHECK_NPP(nppiDCTInitAlloc(&pDCTState)); | ||
377 | + | ||
378 | + nRestartInterval = -1; | ||
379 | + | ||
380 | + cudaMalloc(&pdQuantizationTables, 64 * 4); | ||
381 | + pHuffmanDCTables = aHuffmanTables; | ||
382 | + pHuffmanACTables = &aHuffmanTables[2]; | ||
383 | + memset(aQuantizationTables, 0, 4 * sizeof(QuantizationTable)); | ||
384 | + memset(aHuffmanTables, 0, 4 * sizeof(HuffmanTable)); | ||
385 | + memset(&oFrameHeader, 0, sizeof(FrameHeader)); | ||
386 | + | ||
387 | + | ||
388 | + //????Huffman?? | ||
389 | + aHuffmanTables[0].nClassAndIdentifier = 0; | ||
390 | + memcpy(aHuffmanTables[0].aCodes, STD_DC_Y_NRCODES, 16); | ||
391 | + memcpy(aHuffmanTables[0].aTable, STD_DC_Y_VALUES, 12); | ||
392 | + | ||
393 | + aHuffmanTables[1].nClassAndIdentifier = 1; | ||
394 | + memcpy(aHuffmanTables[1].aCodes, STD_DC_UV_NRCODES, 16); | ||
395 | + memcpy(aHuffmanTables[1].aTable, STD_DC_UV_VALUES, 12); | ||
396 | + | ||
397 | + aHuffmanTables[2].nClassAndIdentifier = 16; | ||
398 | + memcpy(aHuffmanTables[2].aCodes, STD_AC_Y_NRCODES, 16); | ||
399 | + memcpy(aHuffmanTables[2].aTable, STD_AC_Y_VALUES, 162); | ||
400 | + | ||
401 | + aHuffmanTables[3].nClassAndIdentifier = 17; | ||
402 | + memcpy(aHuffmanTables[3].aCodes, STD_AC_UV_NRCODES, 16); | ||
403 | + memcpy(aHuffmanTables[3].aTable, STD_AC_UV_VALUES, 162); | ||
404 | + | ||
405 | + | ||
406 | + //????量???? | ||
407 | + aQuantizationTables[0].nPrecisionAndIdentifier = 0; | ||
408 | + memcpy(aQuantizationTables[0].aTable, std_Y_QT, 64); | ||
409 | + aQuantizationTables[1].nPrecisionAndIdentifier = 1; | ||
410 | + memcpy(aQuantizationTables[1].aTable, std_UV_QT, 64); | ||
411 | + | ||
412 | + NPP_CHECK_CUDA(cudaMemcpyAsync(pdQuantizationTables, aQuantizationTables[0].aTable, 64, cudaMemcpyHostToDevice)); | ||
413 | + NPP_CHECK_CUDA(cudaMemcpyAsync(pdQuantizationTables + 64, aQuantizationTables[1].aTable, 64, cudaMemcpyHostToDevice)); | ||
414 | + | ||
415 | + oFrameHeader.nSamplePrecision = 8; | ||
416 | + oFrameHeader.nComponents = 3; | ||
417 | + oFrameHeader.aComponentIdentifier[0] = 1; | ||
418 | + oFrameHeader.aComponentIdentifier[1] = 2; | ||
419 | + oFrameHeader.aComponentIdentifier[2] = 3; | ||
420 | + oFrameHeader.aSamplingFactors[0] = 34; | ||
421 | + oFrameHeader.aSamplingFactors[1] = 17; | ||
422 | + oFrameHeader.aSamplingFactors[2] = 17; | ||
423 | + oFrameHeader.aQuantizationTableSelector[0] = 0; | ||
424 | + oFrameHeader.aQuantizationTableSelector[1] = 1; | ||
425 | + oFrameHeader.aQuantizationTableSelector[2] = 1; | ||
426 | + | ||
427 | + for (int i = 0; i < oFrameHeader.nComponents; ++i) | ||
428 | + { | ||
429 | + nMCUBlocksV = max(nMCUBlocksV, oFrameHeader.aSamplingFactors[i] & 0x0f); | ||
430 | + nMCUBlocksH = max(nMCUBlocksH, oFrameHeader.aSamplingFactors[i] >> 4); | ||
431 | + } | ||
432 | + NPP_CHECK_CUDA(cudaMalloc(&pdScan, 4 << 20)); | ||
433 | + | ||
434 | + | ||
435 | + | ||
436 | + oScanHeader.nComponents = 3; | ||
437 | + oScanHeader.aComponentSelector[0] = 1; | ||
438 | + oScanHeader.aComponentSelector[1] = 2; | ||
439 | + oScanHeader.aComponentSelector[2] = 3; | ||
440 | + oScanHeader.aHuffmanTablesSelector[0] = 0; | ||
441 | + oScanHeader.aHuffmanTablesSelector[1] = 17; | ||
442 | + oScanHeader.aHuffmanTablesSelector[2] = 17; | ||
443 | + oScanHeader.nSs = 0; | ||
444 | + oScanHeader.nSe = 63; | ||
445 | + oScanHeader.nA = 0; | ||
446 | + | ||
447 | + | ||
448 | + return 0; | ||
449 | +} | ||
450 | + | ||
451 | +NppiSize aSrcSize[3]; | ||
452 | +Npp16s *apdDCT[3];// = { 0, 0, 0 }; | ||
453 | +Npp32s aDCTStep[3]; | ||
454 | + | ||
455 | +Npp8u *apSrcImage[3];// = { 0, 0, 0 }; | ||
456 | +Npp32s aSrcImageStep[3]; | ||
457 | +size_t aSrcPitch[3]; | ||
458 | + | ||
459 | + | ||
460 | +int releaseJpegNPP() | ||
461 | +{ | ||
462 | + nppiDCTFree(pDCTState); | ||
463 | + cudaFree(pdQuantizationTables); | ||
464 | + cudaFree(pdScan); | ||
465 | + for (int i = 0; i < 3; ++i) | ||
466 | + { | ||
467 | + cudaFree(apdDCT[i]); | ||
468 | + cudaFree(apSrcImage[i]); | ||
469 | + } | ||
470 | + return 0; | ||
471 | +} | ||
472 | + | ||
473 | + | ||
474 | +int initTable(int flag, int width, int height) | ||
475 | +{ | ||
476 | + //????帧头 | ||
477 | + oFrameHeaderFixedSize.nSamplePrecision = 8; | ||
478 | + oFrameHeaderFixedSize.nComponents = 3; | ||
479 | + oFrameHeaderFixedSize.aComponentIdentifier[0] = 1; | ||
480 | + oFrameHeaderFixedSize.aComponentIdentifier[1] = 2; | ||
481 | + oFrameHeaderFixedSize.aComponentIdentifier[2] = 3; | ||
482 | + oFrameHeaderFixedSize.aSamplingFactors[0] = 34; | ||
483 | + oFrameHeaderFixedSize.aSamplingFactors[1] = 17; | ||
484 | + oFrameHeaderFixedSize.aSamplingFactors[2] = 17; | ||
485 | + oFrameHeaderFixedSize.aQuantizationTableSelector[0] = 0; | ||
486 | + oFrameHeaderFixedSize.aQuantizationTableSelector[1] = 1; | ||
487 | + oFrameHeaderFixedSize.aQuantizationTableSelector[2] = 1; | ||
488 | + oFrameHeaderFixedSize.nWidth = width; | ||
489 | + oFrameHeaderFixedSize.nHeight = height; | ||
490 | + | ||
491 | + for (int i = 0; i < oFrameHeaderFixedSize.nComponents; ++i) | ||
492 | + { | ||
493 | + nMCUBlocksVFixedSize = max(nMCUBlocksVFixedSize, oFrameHeaderFixedSize.aSamplingFactors[i] & 0x0f); | ||
494 | + nMCUBlocksHFixedSize = max(nMCUBlocksHFixedSize, oFrameHeaderFixedSize.aSamplingFactors[i] >> 4); | ||
495 | + } | ||
496 | + | ||
497 | + for (int i = 0; i < oFrameHeaderFixedSize.nComponents; ++i) | ||
498 | + { | ||
499 | + NppiSize oBlocks; | ||
500 | + NppiSize oBlocksPerMCU = { oFrameHeaderFixedSize.aSamplingFactors[i] >> 4, oFrameHeaderFixedSize.aSamplingFactors[i] & 0x0f }; | ||
501 | + | ||
502 | + oBlocks.width = (int)ceil((oFrameHeaderFixedSize.nWidth + 7) / 8 * | ||
503 | + static_cast<float>(oBlocksPerMCU.width) / nMCUBlocksHFixedSize); | ||
504 | + oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width; | ||
505 | + | ||
506 | + oBlocks.height = (int)ceil((oFrameHeaderFixedSize.nHeight + 7) / 8 * | ||
507 | + static_cast<float>(oBlocksPerMCU.height) / nMCUBlocksVFixedSize); | ||
508 | + oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height; | ||
509 | + | ||
510 | + aSrcSize[i].width = oBlocks.width * 8; | ||
511 | + aSrcSize[i].height = oBlocks.height * 8; | ||
512 | + | ||
513 | + // Allocate Memory | ||
514 | + size_t nPitch; | ||
515 | + NPP_CHECK_CUDA(cudaMallocPitch(&apdDCT[i], &nPitch, oBlocks.width * 64 * sizeof(Npp16s), oBlocks.height)); | ||
516 | + aDCTStep[i] = static_cast<Npp32s>(nPitch); | ||
517 | + | ||
518 | + NPP_CHECK_CUDA(cudaMallocPitch(&apSrcImage[i], &nPitch, aSrcSize[i].width, aSrcSize[i].height)); | ||
519 | + | ||
520 | + aSrcPitch[i] = nPitch; | ||
521 | + aSrcImageStep[i] = static_cast<Npp32s>(nPitch); | ||
522 | + } | ||
523 | + | ||
524 | + return 0; | ||
525 | +} | ||
526 | + | ||
527 | +int jpegNPP(const char *szOutputFile, float* d_srcRGB) | ||
528 | +{ | ||
529 | + //RGB2YUV | ||
530 | + cudaError_t cudaStatus; | ||
531 | + cudaStatus = cuda_common::RGB2YUV(d_srcRGB, oFrameHeaderFixedSize.nWidth, oFrameHeaderFixedSize.nHeight, | ||
532 | + apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height, | ||
533 | + apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height, | ||
534 | + apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height); | ||
535 | + | ||
536 | + /** | ||
537 | + * Forward DCT, quantization and level shift part of the JPEG encoding. | ||
538 | + * Input is expected in 8x8 macro blocks and output is expected to be in 64x1 | ||
539 | + * macro blocks. The new version of the primitive takes the ROI in image pixel size and | ||
540 | + * works with DCT coefficients that are in zig-zag order. | ||
541 | + */ | ||
542 | + int k = 0; | ||
543 | + //LOG_INFO("NPP_CHECK_NPP:%d", 1); | ||
544 | + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0], | ||
545 | + apdDCT[0], aDCTStep[0], | ||
546 | + pdQuantizationTables + k * 64, | ||
547 | + aSrcSize[0], | ||
548 | + pDCTState))) | ||
549 | + { | ||
550 | + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); | ||
551 | + return EXIT_FAILURE; | ||
552 | + } | ||
553 | + | ||
554 | + k = 1; | ||
555 | + //LOG_INFO("NPP_CHECK_NPP:%d", 2); | ||
556 | + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1], | ||
557 | + apdDCT[1], aDCTStep[1], | ||
558 | + pdQuantizationTables + k * 64, | ||
559 | + aSrcSize[1], | ||
560 | + pDCTState))) | ||
561 | + { | ||
562 | + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); | ||
563 | + return EXIT_FAILURE; | ||
564 | + } | ||
565 | + | ||
566 | + //LOG_INFO("NPP_CHECK_NPP:%d", 3); | ||
567 | + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2], | ||
568 | + apdDCT[2], aDCTStep[2], | ||
569 | + pdQuantizationTables + k * 64, | ||
570 | + aSrcSize[2], | ||
571 | + pDCTState))) | ||
572 | + { | ||
573 | + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); | ||
574 | + return EXIT_FAILURE; | ||
575 | + } | ||
576 | + | ||
577 | + // Huffman Encoding | ||
578 | + | ||
579 | + Npp32s nScanLength; | ||
580 | + Npp8u *pJpegEncoderTemp; | ||
581 | + | ||
582 | +#if (CUDA_VERSION == 8000) | ||
583 | + Npp32s nTempSize; //when using CUDA8 | ||
584 | +#else | ||
585 | + size_t nTempSize; //when using CUDA9 | ||
586 | +#endif | ||
587 | + //modified by Junlin 190221 | ||
588 | + | ||
589 | + //LOG_INFO("NPP_CHECK_NPP:%d",4); | ||
590 | + if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize))) | ||
591 | + { | ||
592 | + printf("nppiEncodeHuffmanGetSize Failed!\n"); | ||
593 | + return EXIT_FAILURE; | ||
594 | + } | ||
595 | + | ||
596 | + //LOG_INFO("NPP_CHECK_CUDA:%d",5); | ||
597 | + NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize)); | ||
598 | + | ||
599 | + /** | ||
600 | + * Allocates memory and creates a Huffman table in a format that is suitable for the encoder. | ||
601 | + */ | ||
602 | + NppStatus t_status; | ||
603 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]); | ||
604 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]); | ||
605 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]); | ||
606 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]); | ||
607 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]); | ||
608 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]); | ||
609 | + | ||
610 | + /** | ||
611 | + * Huffman Encoding of the JPEG Encoding. | ||
612 | + * Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan. | ||
613 | + */ | ||
614 | + Npp32s nSs = 0; | ||
615 | + Npp32s nSe = 63; | ||
616 | + Npp32s nH = 0; | ||
617 | + Npp32s nL = 0; | ||
618 | + //LOG_INFO("NPP_CHECK_NPP:%d",6); | ||
619 | + if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep, | ||
620 | + 0, nSs, nSe, nH, nL, | ||
621 | + pdScan, &nScanLength, | ||
622 | + apHuffmanDCTable, | ||
623 | + apHuffmanACTable, | ||
624 | + aSrcSize, | ||
625 | + pJpegEncoderTemp))) | ||
626 | + { | ||
627 | + printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n"); | ||
628 | + return EXIT_FAILURE; | ||
629 | + } | ||
630 | + | ||
631 | + for (int i = 0; i < 3; ++i) | ||
632 | + { | ||
633 | + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]); | ||
634 | + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]); | ||
635 | + } | ||
636 | + // Write JPEG | ||
637 | + pDstJpeg = new unsigned char[4 << 20]{}; | ||
638 | + pDstOutput = pDstJpeg; | ||
639 | + | ||
640 | + writeMarker(0x0D8, pDstOutput); | ||
641 | + writeJFIFTag(pDstOutput); | ||
642 | + writeQuantizationTable(aQuantizationTables[0], pDstOutput); | ||
643 | + writeQuantizationTable(aQuantizationTables[1], pDstOutput); | ||
644 | + writeHuffmanTable(pHuffmanDCTables[0], pDstOutput); | ||
645 | + writeHuffmanTable(pHuffmanACTables[0], pDstOutput); | ||
646 | + writeHuffmanTable(pHuffmanDCTables[1], pDstOutput); | ||
647 | + writeHuffmanTable(pHuffmanACTables[1], pDstOutput); | ||
648 | + writeFrameHeader(oFrameHeaderFixedSize, pDstOutput); | ||
649 | + writeScanHeader(oScanHeader, pDstOutput); | ||
650 | + | ||
651 | + //LOG_INFO("NPP_CHECK_CUDA:%d",7); | ||
652 | + NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost)); | ||
653 | + | ||
654 | + pDstOutput += nScanLength; | ||
655 | + writeMarker(0x0D9, pDstOutput); | ||
656 | + { | ||
657 | + // Write result to file. | ||
658 | + std::ofstream outputFile(szOutputFile, ios::out | ios::binary); | ||
659 | + outputFile.write(reinterpret_cast<const char *>(pDstJpeg), static_cast<int>(pDstOutput - pDstJpeg)); | ||
660 | + } | ||
661 | + | ||
662 | + // Cleanup | ||
663 | + cudaFree(pJpegEncoderTemp); | ||
664 | + delete[] pDstJpeg; | ||
665 | + | ||
666 | + | ||
667 | + return EXIT_SUCCESS; | ||
668 | +} | ||
669 | + | ||
670 | +int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB) | ||
671 | +{ | ||
672 | + //RGB2YUV | ||
673 | + cudaError_t cudaStatus; | ||
674 | + cudaStatus = cuda_common::RGB2YUV(d_srcRGB, oFrameHeaderFixedSize.nWidth, oFrameHeaderFixedSize.nHeight, | ||
675 | + apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height, | ||
676 | + apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height, | ||
677 | + apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height); | ||
678 | + | ||
679 | + /** | ||
680 | + * Forward DCT, quantization and level shift part of the JPEG encoding. | ||
681 | + * Input is expected in 8x8 macro blocks and output is expected to be in 64x1 | ||
682 | + * macro blocks. The new version of the primitive takes the ROI in image pixel size and | ||
683 | + * works with DCT coefficients that are in zig-zag order. | ||
684 | + */ | ||
685 | + int k = 0; | ||
686 | + //LOG_INFO("NPP_CHECK_NPP:%d", 1); | ||
687 | + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0], | ||
688 | + apdDCT[0], aDCTStep[0], | ||
689 | + pdQuantizationTables + k * 64, | ||
690 | + aSrcSize[0], | ||
691 | + pDCTState))) | ||
692 | + { | ||
693 | + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); | ||
694 | + return EXIT_FAILURE; | ||
695 | + } | ||
696 | + | ||
697 | + k = 1; | ||
698 | + //LOG_INFO("NPP_CHECK_NPP:%d", 2); | ||
699 | + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1], | ||
700 | + apdDCT[1], aDCTStep[1], | ||
701 | + pdQuantizationTables + k * 64, | ||
702 | + aSrcSize[1], | ||
703 | + pDCTState))) | ||
704 | + { | ||
705 | + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); | ||
706 | + return EXIT_FAILURE; | ||
707 | + } | ||
708 | + | ||
709 | + //LOG_INFO("NPP_CHECK_NPP:%d", 3); | ||
710 | + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2], | ||
711 | + apdDCT[2], aDCTStep[2], | ||
712 | + pdQuantizationTables + k * 64, | ||
713 | + aSrcSize[2], | ||
714 | + pDCTState))) | ||
715 | + { | ||
716 | + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); | ||
717 | + return EXIT_FAILURE; | ||
718 | + } | ||
719 | + | ||
720 | + // Huffman Encoding | ||
721 | + | ||
722 | + Npp32s nScanLength; | ||
723 | + Npp8u *pJpegEncoderTemp; | ||
724 | + | ||
725 | +#if (CUDA_VERSION == 8000) | ||
726 | + Npp32s nTempSize; //when using CUDA8 | ||
727 | +#else | ||
728 | + size_t nTempSize; //when using CUDA9 | ||
729 | +#endif | ||
730 | + //modified by Junlin 190221 | ||
731 | + | ||
732 | + //LOG_INFO("NPP_CHECK_NPP:%d",4); | ||
733 | + if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize))) | ||
734 | + { | ||
735 | + printf("nppiEncodeHuffmanGetSize Failed!\n"); | ||
736 | + return EXIT_FAILURE; | ||
737 | + } | ||
738 | + | ||
739 | + //LOG_INFO("NPP_CHECK_CUDA:%d",5); | ||
740 | + NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize)); | ||
741 | + | ||
742 | + /** | ||
743 | + * Allocates memory and creates a Huffman table in a format that is suitable for the encoder. | ||
744 | + */ | ||
745 | + NppStatus t_status; | ||
746 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]); | ||
747 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]); | ||
748 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]); | ||
749 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]); | ||
750 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]); | ||
751 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]); | ||
752 | + | ||
753 | + /** | ||
754 | + * Huffman Encoding of the JPEG Encoding. | ||
755 | + * Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan. | ||
756 | + */ | ||
757 | + Npp32s nSs = 0; | ||
758 | + Npp32s nSe = 63; | ||
759 | + Npp32s nH = 0; | ||
760 | + Npp32s nL = 0; | ||
761 | + //LOG_INFO("NPP_CHECK_NPP:%d",6); | ||
762 | + if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep, | ||
763 | + 0, nSs, nSe, nH, nL, | ||
764 | + pdScan, &nScanLength, | ||
765 | + apHuffmanDCTable, | ||
766 | + apHuffmanACTable, | ||
767 | + aSrcSize, | ||
768 | + pJpegEncoderTemp))) | ||
769 | + { | ||
770 | + printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n"); | ||
771 | + return EXIT_FAILURE; | ||
772 | + } | ||
773 | + | ||
774 | + for (int i = 0; i < 3; ++i) | ||
775 | + { | ||
776 | + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]); | ||
777 | + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]); | ||
778 | + } | ||
779 | + // Write JPEG | ||
780 | + pDstJpeg = new unsigned char[4 << 20]{}; | ||
781 | + pDstOutput = pDstJpeg; | ||
782 | + | ||
783 | + writeMarker(0x0D8, pDstOutput); | ||
784 | + writeJFIFTag(pDstOutput); | ||
785 | + writeQuantizationTable(aQuantizationTables[0], pDstOutput); | ||
786 | + writeQuantizationTable(aQuantizationTables[1], pDstOutput); | ||
787 | + writeHuffmanTable(pHuffmanDCTables[0], pDstOutput); | ||
788 | + writeHuffmanTable(pHuffmanACTables[0], pDstOutput); | ||
789 | + writeHuffmanTable(pHuffmanDCTables[1], pDstOutput); | ||
790 | + writeHuffmanTable(pHuffmanACTables[1], pDstOutput); | ||
791 | + writeFrameHeader(oFrameHeaderFixedSize, pDstOutput); | ||
792 | + writeScanHeader(oScanHeader, pDstOutput); | ||
793 | + | ||
794 | + //LOG_INFO("NPP_CHECK_CUDA:%d",7); | ||
795 | + NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost)); | ||
796 | + | ||
797 | + pDstOutput += nScanLength; | ||
798 | + writeMarker(0x0D9, pDstOutput); | ||
799 | + { | ||
800 | + // Write result to file. | ||
801 | + std::ofstream outputFile(szOutputFile, ios::out | ios::binary); | ||
802 | + outputFile.write(reinterpret_cast<const char *>(pDstJpeg), static_cast<int>(pDstOutput - pDstJpeg)); | ||
803 | + } | ||
804 | + | ||
805 | + // Cleanup | ||
806 | + cudaFree(pJpegEncoderTemp); | ||
807 | + delete[] pDstJpeg; | ||
808 | + | ||
809 | + | ||
810 | + return EXIT_SUCCESS; | ||
811 | +} | ||
812 | + | ||
813 | + | ||
814 | +int jpegNPP(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height) | ||
815 | +{ | ||
816 | + NppiSize aSrcSize[3]; | ||
817 | + Npp16s *apdDCT[3] = { 0, 0, 0 }; | ||
818 | + Npp32s aDCTStep[3]; | ||
819 | + | ||
820 | + Npp8u *apSrcImage[3] = { 0, 0, 0 }; | ||
821 | + Npp32s aSrcImageStep[3]; | ||
822 | + size_t aSrcPitch[3]; | ||
823 | + | ||
824 | + | ||
825 | + //????帧头 | ||
826 | + oFrameHeader.nWidth = img_width; | ||
827 | + oFrameHeader.nHeight = img_height; | ||
828 | + | ||
829 | + for (int i = 0; i < oFrameHeader.nComponents; ++i) | ||
830 | + { | ||
831 | + NppiSize oBlocks; | ||
832 | + NppiSize oBlocksPerMCU = { oFrameHeader.aSamplingFactors[i] >> 4, oFrameHeader.aSamplingFactors[i] & 0x0f }; | ||
833 | + | ||
834 | + oBlocks.width = (int)ceil((oFrameHeader.nWidth + 7) / 8 * | ||
835 | + static_cast<float>(oBlocksPerMCU.width) / nMCUBlocksH); | ||
836 | + oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width; | ||
837 | + | ||
838 | + oBlocks.height = (int)ceil((oFrameHeader.nHeight + 7) / 8 * | ||
839 | + static_cast<float>(oBlocksPerMCU.height) / nMCUBlocksV); | ||
840 | + oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height; | ||
841 | + | ||
842 | + aSrcSize[i].width = oBlocks.width * 8; | ||
843 | + aSrcSize[i].height = oBlocks.height * 8; | ||
844 | + | ||
845 | + // Allocate Memory | ||
846 | + size_t nPitch; | ||
847 | + //LOG_INFO("NPP_CHECK_CUDA:%d",1); | ||
848 | + NPP_CHECK_CUDA(cudaMallocPitch(&apdDCT[i], &nPitch, oBlocks.width * 64 * sizeof(Npp16s), oBlocks.height)); | ||
849 | + aDCTStep[i] = static_cast<Npp32s>(nPitch); | ||
850 | + | ||
851 | + //LOG_INFO("NPP_CHECK_CUDA:%d",2); | ||
852 | + NPP_CHECK_CUDA(cudaMallocPitch(&apSrcImage[i], &nPitch, aSrcSize[i].width, aSrcSize[i].height)); | ||
853 | + | ||
854 | + aSrcPitch[i] = nPitch; | ||
855 | + aSrcImageStep[i] = static_cast<Npp32s>(nPitch); | ||
856 | + } | ||
857 | + | ||
858 | + //RGB2YUV | ||
859 | + cudaError_t cudaStatus; | ||
860 | + cudaStatus = cuda_common::RGB2YUV(d_srcRGB, img_width, img_height, | ||
861 | + apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height, | ||
862 | + apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height, | ||
863 | + apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height); | ||
864 | + | ||
865 | + /** | ||
866 | + * Forward DCT, quantization and level shift part of the JPEG encoding. | ||
867 | + * Input is expected in 8x8 macro blocks and output is expected to be in 64x1 | ||
868 | + * macro blocks. The new version of the primitive takes the ROI in image pixel size and | ||
869 | + * works with DCT coefficients that are in zig-zag order. | ||
870 | + */ | ||
871 | + int k = 0; | ||
872 | + //LOG_INFO("NPP_CHECK_CUDA:%d",3); | ||
873 | + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0], | ||
874 | + apdDCT[0], aDCTStep[0], | ||
875 | + pdQuantizationTables + k * 64, | ||
876 | + aSrcSize[0], | ||
877 | + pDCTState))) | ||
878 | + { | ||
879 | + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); | ||
880 | + return EXIT_FAILURE; | ||
881 | + } | ||
882 | + k = 1; | ||
883 | + | ||
884 | + //LOG_INFO("NPP_CHECK_CUDA:%d",4); | ||
885 | + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1], | ||
886 | + apdDCT[1], aDCTStep[1], | ||
887 | + pdQuantizationTables + k * 64, | ||
888 | + aSrcSize[1], | ||
889 | + pDCTState))) | ||
890 | + { | ||
891 | + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); | ||
892 | + return EXIT_FAILURE; | ||
893 | + } | ||
894 | + | ||
895 | + //LOG_INFO("NPP_CHECK_CUDA:%d",5); | ||
896 | + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2], | ||
897 | + apdDCT[2], aDCTStep[2], | ||
898 | + pdQuantizationTables + k * 64, | ||
899 | + aSrcSize[2], | ||
900 | + pDCTState))) | ||
901 | + { | ||
902 | + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); | ||
903 | + return EXIT_FAILURE; | ||
904 | + } | ||
905 | + | ||
906 | + // Huffman Encoding | ||
907 | + | ||
908 | + Npp32s nScanLength; | ||
909 | + Npp8u *pJpegEncoderTemp; | ||
910 | + | ||
911 | +#if (CUDA_VERSION == 8000) | ||
912 | + Npp32s nTempSize; //when using CUDA8 | ||
913 | +#else | ||
914 | + size_t nTempSize; //when using CUDA9 | ||
915 | +#endif | ||
916 | + //modified by Junlin 190221 | ||
917 | + | ||
918 | + //LOG_INFO("NPP_CHECK_CUDA:%d",6); | ||
919 | + if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize))) | ||
920 | + { | ||
921 | + printf("nppiEncodeHuffmanGetSize Failed!\n"); | ||
922 | + return EXIT_FAILURE; | ||
923 | + } | ||
924 | + | ||
925 | + //LOG_INFO("NPP_CHECK_CUDA:%d",7); | ||
926 | + NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize)); | ||
927 | + | ||
928 | + /** | ||
929 | + * Allocates memory and creates a Huffman table in a format that is suitable for the encoder. | ||
930 | + */ | ||
931 | + NppStatus t_status; | ||
932 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]); | ||
933 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]); | ||
934 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]); | ||
935 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]); | ||
936 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]); | ||
937 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]); | ||
938 | + | ||
939 | + /** | ||
940 | + * Huffman Encoding of the JPEG Encoding. | ||
941 | + * Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan. | ||
942 | + */ | ||
943 | + Npp32s nSs = 0; | ||
944 | + Npp32s nSe = 63; | ||
945 | + Npp32s nH = 0; | ||
946 | + Npp32s nL = 0; | ||
947 | + //LOG_INFO("NPP_CHECK_CUDA:%d",8); | ||
948 | + if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep, | ||
949 | + 0, nSs, nSe, nH, nL, | ||
950 | + pdScan, &nScanLength, | ||
951 | + apHuffmanDCTable, | ||
952 | + apHuffmanACTable, | ||
953 | + aSrcSize, | ||
954 | + pJpegEncoderTemp))) | ||
955 | + { | ||
956 | + printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n"); | ||
957 | + return EXIT_FAILURE; | ||
958 | + } | ||
959 | + | ||
960 | + for (int i = 0; i < 3; ++i) | ||
961 | + { | ||
962 | + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]); | ||
963 | + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]); | ||
964 | + } | ||
965 | + // Write JPEG | ||
966 | + pDstJpeg = new unsigned char[4 << 20]{}; | ||
967 | + pDstOutput = pDstJpeg; | ||
968 | + | ||
969 | + writeMarker(0x0D8, pDstOutput); | ||
970 | + writeJFIFTag(pDstOutput); | ||
971 | + writeQuantizationTable(aQuantizationTables[0], pDstOutput); | ||
972 | + writeQuantizationTable(aQuantizationTables[1], pDstOutput); | ||
973 | + writeHuffmanTable(pHuffmanDCTables[0], pDstOutput); | ||
974 | + writeHuffmanTable(pHuffmanACTables[0], pDstOutput); | ||
975 | + writeHuffmanTable(pHuffmanDCTables[1], pDstOutput); | ||
976 | + writeHuffmanTable(pHuffmanACTables[1], pDstOutput); | ||
977 | + writeFrameHeader(oFrameHeader, pDstOutput); | ||
978 | + writeScanHeader(oScanHeader, pDstOutput); | ||
979 | + | ||
980 | + //LOG_INFO("NPP_CHECK_CUDA:%d",9); | ||
981 | + NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost)); | ||
982 | + | ||
983 | + pDstOutput += nScanLength; | ||
984 | + writeMarker(0x0D9, pDstOutput); | ||
985 | + | ||
986 | + { | ||
987 | + // Write result to file. | ||
988 | + std::ofstream outputFile(szOutputFile, ios::out | ios::binary); | ||
989 | + outputFile.write(reinterpret_cast<const char *>(pDstJpeg), static_cast<int>(pDstOutput - pDstJpeg)); | ||
990 | + } | ||
991 | + | ||
992 | + // Cleanup | ||
993 | + cudaFree(pJpegEncoderTemp); | ||
994 | + delete[] pDstJpeg; | ||
995 | + for (int i = 0; i < 3; ++i) | ||
996 | + { | ||
997 | + cudaFree(apdDCT[i]); | ||
998 | + cudaFree(apSrcImage[i]); | ||
999 | + } | ||
1000 | + | ||
1001 | + return EXIT_SUCCESS; | ||
1002 | +} | ||
1003 | + | ||
1004 | + | ||
1005 | +int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height) | ||
1006 | +{ | ||
1007 | + NppiSize aSrcSize[3]; | ||
1008 | + Npp16s *apdDCT[3] = { 0, 0, 0 }; | ||
1009 | + Npp32s aDCTStep[3]; | ||
1010 | + | ||
1011 | + Npp8u *apSrcImage[3] = { 0, 0, 0 }; | ||
1012 | + Npp32s aSrcImageStep[3]; | ||
1013 | + size_t aSrcPitch[3]; | ||
1014 | + | ||
1015 | + | ||
1016 | + //????帧头 | ||
1017 | + oFrameHeader.nWidth = img_width; | ||
1018 | + oFrameHeader.nHeight = img_height; | ||
1019 | + | ||
1020 | + for (int i = 0; i < oFrameHeader.nComponents; ++i) | ||
1021 | + { | ||
1022 | + NppiSize oBlocks; | ||
1023 | + NppiSize oBlocksPerMCU = { oFrameHeader.aSamplingFactors[i] >> 4, oFrameHeader.aSamplingFactors[i] & 0x0f }; | ||
1024 | + | ||
1025 | + oBlocks.width = (int)ceil((oFrameHeader.nWidth + 7) / 8 * | ||
1026 | + static_cast<float>(oBlocksPerMCU.width) / nMCUBlocksH); | ||
1027 | + oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width; | ||
1028 | + | ||
1029 | + oBlocks.height = (int)ceil((oFrameHeader.nHeight + 7) / 8 * | ||
1030 | + static_cast<float>(oBlocksPerMCU.height) / nMCUBlocksV); | ||
1031 | + oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height; | ||
1032 | + | ||
1033 | + aSrcSize[i].width = oBlocks.width * 8; | ||
1034 | + aSrcSize[i].height = oBlocks.height * 8; | ||
1035 | + | ||
1036 | + // Allocate Memory | ||
1037 | + size_t nPitch; | ||
1038 | + //LOG_INFO("NPP_CHECK_CUDA:%d",1); | ||
1039 | + NPP_CHECK_CUDA(cudaMallocPitch(&apdDCT[i], &nPitch, oBlocks.width * 64 * sizeof(Npp16s), oBlocks.height)); | ||
1040 | + aDCTStep[i] = static_cast<Npp32s>(nPitch); | ||
1041 | + | ||
1042 | + //LOG_INFO("NPP_CHECK_CUDA:%d",2); | ||
1043 | + NPP_CHECK_CUDA(cudaMallocPitch(&apSrcImage[i], &nPitch, aSrcSize[i].width, aSrcSize[i].height)); | ||
1044 | + | ||
1045 | + aSrcPitch[i] = nPitch; | ||
1046 | + aSrcImageStep[i] = static_cast<Npp32s>(nPitch); | ||
1047 | + } | ||
1048 | + | ||
1049 | + //RGB2YUV | ||
1050 | + cudaError_t cudaStatus; | ||
1051 | + cudaStatus = cuda_common::RGB2YUV(d_srcRGB, img_width, img_height, | ||
1052 | + apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height, | ||
1053 | + apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height, | ||
1054 | + apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height); | ||
1055 | + | ||
1056 | + /** | ||
1057 | + * Forward DCT, quantization and level shift part of the JPEG encoding. | ||
1058 | + * Input is expected in 8x8 macro blocks and output is expected to be in 64x1 | ||
1059 | + * macro blocks. The new version of the primitive takes the ROI in image pixel size and | ||
1060 | + * works with DCT coefficients that are in zig-zag order. | ||
1061 | + */ | ||
1062 | + int k = 0; | ||
1063 | + //LOG_INFO("NPP_CHECK_CUDA:%d",3); | ||
1064 | + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0], | ||
1065 | + apdDCT[0], aDCTStep[0], | ||
1066 | + pdQuantizationTables + k * 64, | ||
1067 | + aSrcSize[0], | ||
1068 | + pDCTState))) | ||
1069 | + { | ||
1070 | + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); | ||
1071 | + return EXIT_FAILURE; | ||
1072 | + } | ||
1073 | + k = 1; | ||
1074 | + | ||
1075 | + //LOG_INFO("NPP_CHECK_CUDA:%d",4); | ||
1076 | + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1], | ||
1077 | + apdDCT[1], aDCTStep[1], | ||
1078 | + pdQuantizationTables + k * 64, | ||
1079 | + aSrcSize[1], | ||
1080 | + pDCTState))) | ||
1081 | + { | ||
1082 | + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); | ||
1083 | + return EXIT_FAILURE; | ||
1084 | + } | ||
1085 | + | ||
1086 | + //LOG_INFO("NPP_CHECK_CUDA:%d",5); | ||
1087 | + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2], | ||
1088 | + apdDCT[2], aDCTStep[2], | ||
1089 | + pdQuantizationTables + k * 64, | ||
1090 | + aSrcSize[2], | ||
1091 | + pDCTState))) | ||
1092 | + { | ||
1093 | + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); | ||
1094 | + return EXIT_FAILURE; | ||
1095 | + } | ||
1096 | + | ||
1097 | + // Huffman Encoding | ||
1098 | + | ||
1099 | + Npp32s nScanLength; | ||
1100 | + Npp8u *pJpegEncoderTemp; | ||
1101 | + | ||
1102 | +#if (CUDA_VERSION == 8000) | ||
1103 | + Npp32s nTempSize; //when using CUDA8 | ||
1104 | +#else | ||
1105 | + size_t nTempSize; //when using CUDA9 | ||
1106 | +#endif | ||
1107 | + //modified by Junlin 190221 | ||
1108 | + | ||
1109 | + //LOG_INFO("NPP_CHECK_CUDA:%d",6); | ||
1110 | + if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize))) | ||
1111 | + { | ||
1112 | + printf("nppiEncodeHuffmanGetSize Failed!\n"); | ||
1113 | + return EXIT_FAILURE; | ||
1114 | + } | ||
1115 | + | ||
1116 | + //LOG_INFO("NPP_CHECK_CUDA:%d",7); | ||
1117 | + NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize)); | ||
1118 | + | ||
1119 | + /** | ||
1120 | + * Allocates memory and creates a Huffman table in a format that is suitable for the encoder. | ||
1121 | + */ | ||
1122 | + NppStatus t_status; | ||
1123 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]); | ||
1124 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]); | ||
1125 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]); | ||
1126 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]); | ||
1127 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]); | ||
1128 | + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]); | ||
1129 | + | ||
1130 | + /** | ||
1131 | + * Huffman Encoding of the JPEG Encoding. | ||
1132 | + * Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan. | ||
1133 | + */ | ||
1134 | + Npp32s nSs = 0; | ||
1135 | + Npp32s nSe = 63; | ||
1136 | + Npp32s nH = 0; | ||
1137 | + Npp32s nL = 0; | ||
1138 | + //LOG_INFO("NPP_CHECK_CUDA:%d",8); | ||
1139 | + if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep, | ||
1140 | + 0, nSs, nSe, nH, nL, | ||
1141 | + pdScan, &nScanLength, | ||
1142 | + apHuffmanDCTable, | ||
1143 | + apHuffmanACTable, | ||
1144 | + aSrcSize, | ||
1145 | + pJpegEncoderTemp))) | ||
1146 | + { | ||
1147 | + printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n"); | ||
1148 | + return EXIT_FAILURE; | ||
1149 | + } | ||
1150 | + | ||
1151 | + for (int i = 0; i < 3; ++i) | ||
1152 | + { | ||
1153 | + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]); | ||
1154 | + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]); | ||
1155 | + } | ||
1156 | + // Write JPEG | ||
1157 | + pDstJpeg = new unsigned char[4 << 20]{}; | ||
1158 | + pDstOutput = pDstJpeg; | ||
1159 | + | ||
1160 | + writeMarker(0x0D8, pDstOutput); | ||
1161 | + writeJFIFTag(pDstOutput); | ||
1162 | + writeQuantizationTable(aQuantizationTables[0], pDstOutput); | ||
1163 | + writeQuantizationTable(aQuantizationTables[1], pDstOutput); | ||
1164 | + writeHuffmanTable(pHuffmanDCTables[0], pDstOutput); | ||
1165 | + writeHuffmanTable(pHuffmanACTables[0], pDstOutput); | ||
1166 | + writeHuffmanTable(pHuffmanDCTables[1], pDstOutput); | ||
1167 | + writeHuffmanTable(pHuffmanACTables[1], pDstOutput); | ||
1168 | + writeFrameHeader(oFrameHeader, pDstOutput); | ||
1169 | + writeScanHeader(oScanHeader, pDstOutput); | ||
1170 | + | ||
1171 | + //LOG_INFO("NPP_CHECK_CUDA:%d",9); | ||
1172 | + NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost)); | ||
1173 | + | ||
1174 | + pDstOutput += nScanLength; | ||
1175 | + writeMarker(0x0D9, pDstOutput); | ||
1176 | + | ||
1177 | + { | ||
1178 | + // Write result to file. | ||
1179 | + std::ofstream outputFile(szOutputFile, ios::out | ios::binary); | ||
1180 | + outputFile.write(reinterpret_cast<const char *>(pDstJpeg), static_cast<int>(pDstOutput - pDstJpeg)); | ||
1181 | + } | ||
1182 | + | ||
1183 | + // Cleanup | ||
1184 | + cudaFree(pJpegEncoderTemp); | ||
1185 | + delete[] pDstJpeg; | ||
1186 | + for (int i = 0; i < 3; ++i) | ||
1187 | + { | ||
1188 | + cudaFree(apdDCT[i]); | ||
1189 | + cudaFree(apSrcImage[i]); | ||
1190 | + } | ||
1191 | + | ||
1192 | + return EXIT_SUCCESS; | ||
1193 | +} |
src/main.cpp
@@ -10,8 +10,6 @@ | @@ -10,8 +10,6 @@ | ||
10 | 10 | ||
11 | #include <chrono> | 11 | #include <chrono> |
12 | 12 | ||
13 | - | ||
14 | - | ||
15 | #include <unistd.h> | 13 | #include <unistd.h> |
16 | 14 | ||
17 | 15 | ||
@@ -24,6 +22,7 @@ | @@ -24,6 +22,7 @@ | ||
24 | #include "arpa/inet.h" | 22 | #include "arpa/inet.h" |
25 | #endif | 23 | #endif |
26 | 24 | ||
25 | +#include "utiltools.hpp" | ||
27 | 26 | ||
28 | #define MIN_RTP_PORT 10000 | 27 | #define MIN_RTP_PORT 10000 |
29 | #define MAX_RTP_PORT 60000 | 28 | #define MAX_RTP_PORT 60000 |
@@ -88,7 +87,7 @@ int sum2 = 0; | @@ -88,7 +87,7 @@ int sum2 = 0; | ||
88 | 87 | ||
89 | cudaStream_t stream[2]; | 88 | cudaStream_t stream[2]; |
90 | 89 | ||
91 | -string data_home = "/data/tongtu/"; | 90 | +string data_home = "/mnt/data/cmhu/tmp/"; |
92 | 91 | ||
93 | 92 | ||
94 | #define checkCudaErrors(S) do {CUresult status; \ | 93 | #define checkCudaErrors(S) do {CUresult status; \ |
@@ -183,7 +182,7 @@ void postDecoded(const void * userPtr, AVFrame * gpuFrame){ | @@ -183,7 +182,7 @@ void postDecoded(const void * userPtr, AVFrame * gpuFrame){ | ||
183 | cudaError_t cudaStatus; | 182 | cudaError_t cudaStatus; |
184 | if(pHwRgb[0] == nullptr){ | 183 | if(pHwRgb[0] == nullptr){ |
185 | // cudaStreamCreate(&stream[0]); | 184 | // cudaStreamCreate(&stream[0]); |
186 | - cuda_common::setColorSpace( ITU709, 0 ); | 185 | + cuda_common::setColorSpace( ITU_709, 0 ); |
187 | cudaStatus = cudaMalloc((void **)&pHwRgb[0], 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char)); | 186 | cudaStatus = cudaMalloc((void **)&pHwRgb[0], 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char)); |
188 | } | 187 | } |
189 | cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwRgb[0], gpuFrame->width, gpuFrame->height); | 188 | cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwRgb[0], gpuFrame->width, gpuFrame->height); |
@@ -208,7 +207,7 @@ void postDecoded(const void * userPtr, AVFrame * gpuFrame){ | @@ -208,7 +207,7 @@ void postDecoded(const void * userPtr, AVFrame * gpuFrame){ | ||
208 | cudaError_t cudaStatus; | 207 | cudaError_t cudaStatus; |
209 | if(pHwRgb[1] == nullptr){ | 208 | if(pHwRgb[1] == nullptr){ |
210 | // cudaStreamCreate(&stream[1]); | 209 | // cudaStreamCreate(&stream[1]); |
211 | - cuda_common::setColorSpace( ITU709, 0 ); | 210 | + cuda_common::setColorSpace( ITU_709, 0 ); |
212 | cudaStatus = cudaMalloc((void **)&pHwRgb[1], 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char)); | 211 | cudaStatus = cudaMalloc((void **)&pHwRgb[1], 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char)); |
213 | } | 212 | } |
214 | cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwRgb[1], gpuFrame->width, gpuFrame->height); | 213 | cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwRgb[1], gpuFrame->width, gpuFrame->height); |
@@ -231,13 +230,6 @@ bool count_flag = false; | @@ -231,13 +230,6 @@ bool count_flag = false; | ||
231 | int count = 0; | 230 | int count = 0; |
232 | int count_std = 100; | 231 | int count_std = 100; |
233 | 232 | ||
234 | -static long long get_cur_time(){ | ||
235 | - // 获取操作系统当前时间点(精确到ms) | ||
236 | - chrono::time_point<chrono::system_clock, chrono::milliseconds> tpMicro | ||
237 | - = chrono::time_point_cast<chrono::milliseconds>(chrono::system_clock::now()); | ||
238 | - | ||
239 | - return tpMicro.time_since_epoch().count(); | ||
240 | -} | ||
241 | 233 | ||
242 | static int sum = 0; | 234 | static int sum = 0; |
243 | unsigned char *pHwData = nullptr; | 235 | unsigned char *pHwData = nullptr; |
@@ -255,13 +247,13 @@ void postDecoded0(const void * userPtr, AVFrame * gpuFrame){ | @@ -255,13 +247,13 @@ void postDecoded0(const void * userPtr, AVFrame * gpuFrame){ | ||
255 | { | 247 | { |
256 | count_flag = true; | 248 | count_flag = true; |
257 | count = 0; | 249 | count = 0; |
258 | - end_time = start_time = get_cur_time(); | 250 | + end_time = start_time = UtilTools::get_cur_time_ms(); |
259 | } | 251 | } |
260 | count++; | 252 | count++; |
261 | sum ++ ; | 253 | sum ++ ; |
262 | if (count >= count_std) | 254 | if (count >= count_std) |
263 | { | 255 | { |
264 | - // end_time = get_cur_time(); | 256 | + // end_time = UtilTools::get_cur_time_ms(); |
265 | // long time_using = end_time - start_time; | 257 | // long time_using = end_time - start_time; |
266 | // double time_per_frame = double(time_using)/count_std ; | 258 | // double time_per_frame = double(time_using)/count_std ; |
267 | // cout << count_std << "帧用时:" << time_using << "ms 每帧用时:" << time_per_frame << "ms" << endl; | 259 | // cout << count_std << "帧用时:" << time_using << "ms 每帧用时:" << time_per_frame << "ms" << endl; |
@@ -278,7 +270,7 @@ void postDecoded0(const void * userPtr, AVFrame * gpuFrame){ | @@ -278,7 +270,7 @@ void postDecoded0(const void * userPtr, AVFrame * gpuFrame){ | ||
278 | // cout << "gpu id : " << decoder->m_cfg.gpuid.c_str() << endl; | 270 | // cout << "gpu id : " << decoder->m_cfg.gpuid.c_str() << endl; |
279 | cudaError_t cudaStatus; | 271 | cudaError_t cudaStatus; |
280 | if(pHwData == nullptr){ | 272 | if(pHwData == nullptr){ |
281 | - cuda_common::setColorSpace( ITU709, 0 ); | 273 | + cuda_common::setColorSpace( ITU_709, 0 ); |
282 | cudaStatus = cudaMalloc((void **)&pHwData, 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char)); | 274 | cudaStatus = cudaMalloc((void **)&pHwData, 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char)); |
283 | } | 275 | } |
284 | cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwData, gpuFrame->width, gpuFrame->height); | 276 | cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwData, gpuFrame->width, gpuFrame->height); |
@@ -296,10 +288,10 @@ void postDecoded0(const void * userPtr, AVFrame * gpuFrame){ | @@ -296,10 +288,10 @@ void postDecoded0(const void * userPtr, AVFrame * gpuFrame){ | ||
296 | } | 288 | } |
297 | 289 | ||
298 | void decode_finished_cbk(const void* userPtr){ | 290 | void decode_finished_cbk(const void* userPtr){ |
299 | - cout << "当前时间戳: " << get_cur_time() << endl; | 291 | + cout << "当前时间戳: " << UtilTools::get_cur_time_ms() << endl; |
300 | } | 292 | } |
301 | 293 | ||
302 | -bool decode_request_stream_cbk(){ | 294 | +bool decode_request_stream_cbk(const char* deviceId){ |
303 | cout << "需在此请求流" << endl; | 295 | cout << "需在此请求流" << endl; |
304 | return true; | 296 | return true; |
305 | } | 297 | } |
@@ -374,7 +366,7 @@ void logFF(void *, int level, const char *fmt, va_list ap) | @@ -374,7 +366,7 @@ void logFF(void *, int level, const char *fmt, va_list ap) | ||
374 | 366 | ||
375 | int main(int argc, char* argv[]){ | 367 | int main(int argc, char* argv[]){ |
376 | 368 | ||
377 | - test_uri = argv[1]; | 369 | + test_uri = "rtsp://admin:admin@123456@192.168.60.176:554/cam/realmonitor?channel=1&subtype=0";//argv[1]; |
378 | char* gpuid = argv[2]; | 370 | char* gpuid = argv[2]; |
379 | int port = atoi(argv[3]); | 371 | int port = atoi(argv[3]); |
380 | cout << test_uri << " gpu_id:" << gpuid << " port:" << port << endl; | 372 | cout << test_uri << " gpu_id:" << gpuid << " port:" << port << endl; |
@@ -393,7 +385,7 @@ int main(int argc, char* argv[]){ | @@ -393,7 +385,7 @@ int main(int argc, char* argv[]){ | ||
393 | std::this_thread::sleep_for(std::chrono::minutes(1)); | 385 | std::this_thread::sleep_for(std::chrono::minutes(1)); |
394 | FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); | 386 | FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); |
395 | int count = pDecManager->count(); | 387 | int count = pDecManager->count(); |
396 | - cout << "当前时间:" << get_cur_time() << " 当前运行路数: " << pDecManager->count() << endl; | 388 | + cout << "当前时间:" << UtilTools::get_cur_time_ms() << " 当前运行路数: " << pDecManager->count() << endl; |
397 | } | 389 | } |
398 | 390 | ||
399 | return (void*)0; | 391 | return (void*)0; |
src/utiltools.hpp
0 → 100644
1 | +#ifndef _UTIL_TOOLS_HPP_ | ||
2 | +#define _UTIL_TOOLS_HPP_ | ||
3 | + | ||
4 | +#include<chrono> | ||
5 | + | ||
6 | +using namespace std; | ||
7 | + | ||
8 | +namespace UtilTools{ | ||
9 | + | ||
10 | + static long get_cur_time_ms() { | ||
11 | + chrono::time_point<chrono::system_clock, chrono::milliseconds> tpMicro | ||
12 | + = chrono::time_point_cast<chrono::milliseconds>(chrono::system_clock::now()); | ||
13 | + return tpMicro.time_since_epoch().count(); | ||
14 | + } | ||
15 | + | ||
16 | +} | ||
17 | + | ||
18 | +#endif | ||
0 | \ No newline at end of file | 19 | \ No newline at end of file |