From 63e6f7bc581e7aa20757a0da8490155ef38032e6 Mon Sep 17 00:00:00 2001 From: fiss <2657262686@qq.com> Date: Thu, 16 Mar 2023 07:02:38 +0000 Subject: [PATCH] 完成dvpp。但是nv和gb28181的代码弄乱了,需要重构代码 --- .vscode/launch.json | 44 ++++---------------------------------------- .vscode/settings.json | 10 +++++++++- README.md | 7 ++++++- src/AbstractDecoder.cpp | 114 ------------------------------------------------------------------------------------------------------------------ src/AbstractDecoder.h | 128 -------------------------------------------------------------------------------------------------------------------------------- src/DrawImageOnGPU.cu | 126 ------------------------------------------------------------------------------------------------------------------------------ src/FFCuContextManager.cpp | 29 ----------------------------- src/FFCuContextManager.h | 37 ------------------------------------- src/FFNvDecoder.cpp | 388 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- src/FFNvDecoder.h | 62 -------------------------------------------------------------- src/FFNvDecoderManager.cpp |src/FFNvDecoderManager.h | 268 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- src/GpuRgbMemory.hpp | 86 -------------------------------------------------------------------------------------- src/ImageSaveGPU.cpp | 123 --------------------------------------------------------------------------------------------------------------------------- src/ImageSaveGPU.h | 65 ----------------------------------------------------------------- src/Makefile | 91 +++++++++++++++++++++++++++++++++++++++++++------------------------------------------------ src/Makefile.bak | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/Makefile.bak0308 | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/NV12ToRGB.cu | 345 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- src/NvJpegEncoder.cpp | 90 ------------------------------------------------------------------------------------------ src/NvJpegEncoder.h | 3 --- src/PartMemCopy.cu | 289 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- src/RGB2YUV.cu | 263 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- src/ResizeImage.cu | 84 ------------------------------------------------------------------------------------ src/cuda_kernels.h | 63 --------------------------------------------------------------- src/define.hpp | 13 ------------- src/demo/Makefile | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/demo/Makefile.BK0308 | 43 +++++++++++++++++++++++++++++++++++++++++++ src/demo/main_dvpp.cpp | 349 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/demo/main_nvdec.cpp1 | 452 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/dvpp/CircularQueue.hpp | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/dvpp/DvppDec.cpp | 421 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/dvpp/DvppDec.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/dvpp/DvppDecoder.cpp |src/dvpp/DvppDecoder.h | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/dvpp/DvppDecoder2.h | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/dvpp/DvppDecoderApi.cpp | 133 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/dvpp/DvppDecoderApi.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ src/dvpp/DvppRgbMemory.hpp | 25 +++++++++++++++++++++++++ src/dvpp/DvppSourceManager.cpp | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/dvpp/DvppSourceManager.h | 36 ++++++++++++++++++++++++++++++++++++ src/dvpp/FFReceiver.cpp | 281 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/dvpp/FFReceiver.h | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/dvpp/Makefile | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/dvpp/VpcPicConverter.cpp | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/dvpp/VpcPicConverter.h | 19 +++++++++++++++++++ src/dvpp/depend_headers.h | 38 ++++++++++++++++++++++++++++++++++++++ src/dvpp/dvpp_headers.h | 31 +++++++++++++++++++++++++++++++ src/dvpp/threadsafe_queue.h | 128 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/dvpp/user_mem.h | 33 +++++++++++++++++++++++++++++++++ src/gb28181/FFGB28181Decoder.cpp | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/gb28181/FFGB28181Decoder.h | 6 ++++++ src/gb28181/Makefile | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/gb28181/common_header.h | 4 ++-- src/interface/AbstractDecoder.cpp | 25 +++++++++++++++++++++++++ src/interface/AbstractDecoder.h | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/interface/DeviceRgbMemory.hpp | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/interface/FFNvDecoderManager.cpp | 494 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/interface/FFNvDecoderManager.h | 261 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/interface/Makefile | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/interface/interface_headers.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/interface/logger.hpp | 344 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/interface/utiltools.hpp | 18 ++++++++++++++++++ src/jpegNPP.cpp-1 |src/logger.hpp | 342 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ src/main.cpp | 452 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- src/nvdecoder/DrawImageOnGPU.cu | 126 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/nvdecoder/FFCuContextManager.cpp | 29 +++++++++++++++++++++++++++++ src/nvdecoder/FFCuContextManager.h | 37 +++++++++++++++++++++++++++++++++++++ src/nvdecoder/FFNvDecoder.cpp | 474 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/nvdecoder/FFNvDecoder.h | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/nvdecoder/GpuRgbMemory.hpp | 34 ++++++++++++++++++++++++++++++++++ src/nvdecoder/ImageSaveGPU.cpp | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/nvdecoder/ImageSaveGPU.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/nvdecoder/NV12ToRGB.cu | 345 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/nvdecoder/NvJpegEncoder.cpp | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/nvdecoder/NvJpegEncoder.h | 3 +++ src/nvdecoder/PartMemCopy.cu | 289 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/nvdecoder/RGB2YUV.cu | 263 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/nvdecoder/ResizeImage.cu | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/nvdecoder/common_header.h | 8 ++++++++ src/nvdecoder/cuda_kernels.h | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/nvdecoder/define.hpp | 11 +++++++++++ src/nvdecoder/jpegNPP.cpp-1 |src/utiltools.hpp | 18 ------------------ 85 files changed, 8594 insertions(+), 5273 deletions(-) delete mode 100644 src/AbstractDecoder.cpp delete mode 100644 src/AbstractDecoder.h delete mode 100644 src/DrawImageOnGPU.cu delete mode 100644 src/FFCuContextManager.cpp delete mode 100644 src/FFCuContextManager.h delete mode 100644 src/FFNvDecoder.cpp delete mode 100644 src/FFNvDecoder.h delete mode 100644 src/FFNvDecoderManager.cpp delete mode 100644 src/FFNvDecoderManager.h delete mode 100644 src/GpuRgbMemory.hpp delete mode 100644 src/ImageSaveGPU.cpp delete mode 100644 src/ImageSaveGPU.h create mode 100644 src/Makefile.bak create mode 100644 src/Makefile.bak0308 delete mode 100644 src/NV12ToRGB.cu delete mode 100644 src/NvJpegEncoder.cpp delete mode 100644 src/NvJpegEncoder.h delete mode 100644 src/PartMemCopy.cu delete mode 100644 src/RGB2YUV.cu delete mode 100644 src/ResizeImage.cu delete mode 100644 src/cuda_kernels.h delete mode 100644 src/define.hpp create mode 100644 src/demo/Makefile create mode 100644 src/demo/Makefile.BK0308 create mode 100644 src/demo/main_dvpp.cpp create mode 100644 src/demo/main_nvdec.cpp1 create mode 100644 src/dvpp/CircularQueue.hpp create mode 100644 src/dvpp/DvppDec.cpp create mode 100644 src/dvpp/DvppDec.h create mode 100644 src/dvpp/DvppDecoder.cpp create mode 100644 src/dvpp/DvppDecoder.h create mode 100644 src/dvpp/DvppDecoder2.h create mode 100644 src/dvpp/DvppDecoderApi.cpp create mode 100644 src/dvpp/DvppDecoderApi.h create mode 100644 src/dvpp/DvppRgbMemory.hpp create mode 100644 src/dvpp/DvppSourceManager.cpp create mode 100644 src/dvpp/DvppSourceManager.h create mode 100644 src/dvpp/FFReceiver.cpp create mode 100644 src/dvpp/FFReceiver.h create mode 100644 src/dvpp/Makefile create mode 100644 src/dvpp/VpcPicConverter.cpp create mode 100644 src/dvpp/VpcPicConverter.h create mode 100644 src/dvpp/depend_headers.h create mode 100644 src/dvpp/dvpp_headers.h create mode 100644 src/dvpp/threadsafe_queue.h create mode 100644 src/dvpp/user_mem.h create mode 100644 src/gb28181/Makefile create mode 100644 src/interface/AbstractDecoder.cpp create mode 100644 src/interface/AbstractDecoder.h create mode 100644 src/interface/DeviceRgbMemory.hpp create mode 100644 src/interface/FFNvDecoderManager.cpp create mode 100644 src/interface/FFNvDecoderManager.h create mode 100644 src/interface/Makefile create mode 100644 src/interface/interface_headers.h create mode 100644 src/interface/logger.hpp create mode 100644 src/interface/utiltools.hpp delete mode 100644 src/jpegNPP.cpp-1 delete mode 100644 src/logger.hpp delete mode 100644 src/main.cpp create mode 100644 src/nvdecoder/DrawImageOnGPU.cu create mode 100644 src/nvdecoder/FFCuContextManager.cpp create mode 100644 src/nvdecoder/FFCuContextManager.h create mode 100644 src/nvdecoder/FFNvDecoder.cpp create mode 100644 src/nvdecoder/FFNvDecoder.h create mode 100644 src/nvdecoder/GpuRgbMemory.hpp create mode 100644 src/nvdecoder/ImageSaveGPU.cpp create mode 100644 src/nvdecoder/ImageSaveGPU.h create mode 100644 src/nvdecoder/NV12ToRGB.cu create mode 100644 src/nvdecoder/NvJpegEncoder.cpp create mode 100644 src/nvdecoder/NvJpegEncoder.h create mode 100644 src/nvdecoder/PartMemCopy.cu create mode 100644 src/nvdecoder/RGB2YUV.cu create mode 100644 src/nvdecoder/ResizeImage.cu create mode 100644 src/nvdecoder/common_header.h create mode 100644 src/nvdecoder/cuda_kernels.h create mode 100644 src/nvdecoder/define.hpp create mode 100644 src/nvdecoder/jpegNPP.cpp-1 delete mode 100644 src/utiltools.hpp diff --git a/.vscode/launch.json b/.vscode/launch.json index cc4d00d..258f7e1 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -2,49 +2,13 @@ "version": "0.2.0", "configurations": [ { - "name": "(gdb) Launch", + "name": "dvpp", "type": "cppdbg", "request": "launch", - "program": "${workspaceFolder}/bin/lib/test", - "args": ["rtsp","3", "30012"], + "program": "${workspaceFolder}/src/build/bin/demo", + "args": ["/home/huchunming/data/woyikewangh265.mp4","0", "0"], "stopAtEntry": false, - "cwd": "${workspaceFolder}/bin/lib", - "environment": [], - "externalConsole": false, - "MIMode": "gdb", - "setupCommands": [ - { - "description": "Enable pretty-printing for gdb", - "text": "-enable-pretty-printing", - "ignoreFailures": true - } - ] - },{ - "name": "ffmpeg", - "type": "cppdbg", - "request": "launch", - "program": "${workspaceFolder}/bin/lib/ffmpeg_g", - "args": ["-vsync","0","-hwaccel","cuvid","-hwaccel_device","1","-c:v","h264_cuvid","-i","rtmp://192.168.10.56:1935/objecteye/116","-c:a","copy","-vf","scale_npp=800:480","-c:v","h264","/mnt/data/cmhu/FFNvDecoder/data/output1.mp4"], - "stopAtEntry": false, - "cwd": "${workspaceFolder}/bin/lib", - "environment": [], - "externalConsole": false, - "MIMode": "gdb", - "setupCommands": [ - { - "description": "Enable pretty-printing for gdb", - "text": "-enable-pretty-printing", - "ignoreFailures": true - } - ] - },{ - "name": "jrtp", - "type": "cppdbg", - "request": "launch", - "program": "${workspaceFolder}/bin/lib/jrtp_exe", - "args": ["40030","t"], - "stopAtEntry": false, - "cwd": "${workspaceFolder}/bin/lib", + "cwd": "${workspaceFolder}/src/build/bin", "environment": [], "externalConsole": false, "MIMode": "gdb", diff --git a/.vscode/settings.json b/.vscode/settings.json index 69283fc..ab95e37 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -62,6 +62,14 @@ "cfenv": "cpp", "cinttypes": "cpp", "__nullptr": "cpp", - "list": "cpp" + "list": "cpp", + "hash_map": "cpp", + "hash_set": "cpp", + "complex": "cpp", + "unordered_set": "cpp", + "regex": "cpp", + "shared_mutex": "cpp", + "variant": "cpp", + "ios": "cpp" } } \ No newline at end of file diff --git a/README.md b/README.md index 4f3dc70..ad6bb4d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -#### ffmpeg编译配置 +#### 基于CUDA的ffmpeg编译配置 1. 安装cuda 2. 安装 nv-codec-headers 支持 cuvid 需要安装 nv-codec-headers, 进入 nv-codec-headers 文件夹后以sudo权限make && make install即可 @@ -11,5 +11,10 @@ --enable-debug --extra-cflags=-g --extra-ldflags=-g --disable-optimizations --disable-stripping ~~~ +#### 普通ffmpeg编译配置 +~~~ +./configure --enable-debug --extra-cflags=-g --extra-ldflags=-g --disable-optimizations --disable-stripping --disable-x86asm --enable-nonfree --disable-vaapi --extra-cflags=-fPIC --enable-shared --enable-pic --enable-ffplay --prefix=../bin +~~~ + #### SDK说明 1. 对外接口主要是 FFNvDecoderManager 类,可支持多个解码器;也可直接使用 FFNvDecoder ,但是不建议,FFNvDecoderManager已经封装了 FFNvDecoder 的接口 \ No newline at end of file diff --git a/src/AbstractDecoder.cpp b/src/AbstractDecoder.cpp deleted file mode 100644 index 0e51524..0000000 --- a/src/AbstractDecoder.cpp +++ /dev/null @@ -1,114 +0,0 @@ -#include "AbstractDecoder.h" - -#include "logger.hpp" -#include "GpuRgbMemory.hpp" -#include "cuda_kernels.h" - -#include "utiltools.hpp" - - -FFImgInfo* AbstractDecoder::snapshot(){ - - // 锁住停止队列消耗 - std::lock_guard l(m_snapshot_mutex); - - AVFrame * gpuFrame = nullptr; - - bool bFirst = true; - while(true){ - m_queue_mutex.lock(); - if(mFrameQueue.size() <= 0){ - m_queue_mutex.unlock(); - if(bFirst){ - std::this_thread::sleep_for(std::chrono::milliseconds(100)); - bFirst = false; - continue; - }else{ - // 再进来说明前面已经等了 100 ms - // 100 ms都没有等到解码数据,则退出 - return nullptr; - } - } - - // 队列中数据大于1 - gpuFrame = mFrameQueue.front(); - m_queue_mutex.unlock(); - break; - } - - if (gpuFrame != nullptr && gpuFrame->format == AV_PIX_FMT_CUDA ){ - LOG_DEBUG("decode task: gpuid: {} width: {} height: {}", m_cfg.gpuid, gpuFrame->width, gpuFrame->height); - GpuRgbMemory* gpuMem = new GpuRgbMemory(3, gpuFrame->width, gpuFrame->height, getName(), m_cfg.gpuid , true); - - if (gpuMem->getMem() == nullptr){ - LOG_ERROR("new GpuRgbMemory failed !!!"); - return nullptr; - } - - cudaSetDevice(atoi(m_cfg.gpuid.c_str())); - cuda_common::setColorSpace( ITU_709, 0 ); - cudaError_t cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], gpuMem->getMem(), gpuFrame->width, gpuFrame->height); - cudaDeviceSynchronize(); - if (cudaStatus != cudaSuccess) { - LOG_ERROR("CUDAToBGR failed failed !!!"); - return nullptr; - } - - unsigned char * pHwRgb = gpuMem->getMem(); - int channel = gpuMem->getChannel(); - int width = gpuMem->getWidth(); - int height = gpuMem->getHeight(); - - if (pHwRgb != nullptr && channel > 0 && width > 0 && height > 0){ - int nSize = channel * height * width; - - LOG_INFO("channel:{} height:{} width:{}", channel, height, width); - // unsigned char* cpu_data = new unsigned char[nSize]; - - unsigned char* cpu_data = (unsigned char *)av_malloc(nSize * sizeof(unsigned char)); - - cudaMemcpy(cpu_data, pHwRgb, nSize * sizeof(unsigned char), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - - delete gpuMem; - gpuMem = nullptr; - - FFImgInfo* imgInfo = new FFImgInfo(); - imgInfo->dec_name = m_dec_name; - imgInfo->pData = cpu_data; - imgInfo->height = height; - imgInfo->width = width; - imgInfo->timestamp = UtilTools::get_cur_time_ms(); - imgInfo->index = m_index; - - m_index++; - - return imgInfo; - } - - delete gpuMem; - gpuMem = nullptr; - } - - return nullptr; -} - -bool AbstractDecoder::isSnapTime(){ - if(m_snap_time_interval <= 0){ - return false; - } - long cur_time = UtilTools::get_cur_time_ms(); - if(cur_time - m_last_snap_time > m_snap_time_interval){ - return true; - } - return false; -} - -void AbstractDecoder::updateLastSnapTime(){ - m_last_snap_time = UtilTools::get_cur_time_ms(); -} - -void AbstractDecoder::setSnapTimeInterval(long interval){ - m_snap_time_interval = interval; - m_last_snap_time = UtilTools::get_cur_time_ms(); -} \ No newline at end of file diff --git a/src/AbstractDecoder.h b/src/AbstractDecoder.h deleted file mode 100644 index b5a5665..0000000 --- a/src/AbstractDecoder.h +++ /dev/null @@ -1,128 +0,0 @@ -#ifndef _ABSTRACT_DECODER_H_ -#define _ABSTRACT_DECODER_H_ - -#include - -extern "C" -{ - #include - #include - #include - #include - #include - #include - #include - #include -} - -#include -#include - -using namespace std; - -/************************************************** -* 接口:DXDECODER_CALLBACK -* 功能:解码数据回调接口 -* 参数:const dx_void * userPtr 用户自定义数据 -* AVFrame * gpuFrame 解码结果帧数据,在设置的对应的gpu上,要十分注意这一点,尤其是多线程情况 -* 返回:无 -* 备注:当解码库数据源为实时流时(RTSP/GB28181),本接 -* 口内不可进行阻塞/耗时操作。当解码库数据源为 -* 非实时流时(本地/网络文件),本接口可以进行 -* 阻塞/耗时操作 -**************************************************/ -typedef void(*POST_DECODE_CALLBACK)(const void * userPtr, AVFrame * gpuFrame); - -typedef void(*DECODE_FINISHED_CALLBACK)(const void* userPtr); - -typedef bool(*DECODE_REQUEST_STREAM_CALLBACK)(const char* deviceId); - -struct FFDecConfig{ - string uri; // 视频地址 - POST_DECODE_CALLBACK post_decoded_cbk; // 解码数据回调接口 - DECODE_FINISHED_CALLBACK decode_finished_cbk; // 解码线程结束后的回调接口 - string gpuid; // gpu id - bool force_tcp{true}; // 是否指定使用tcp连接 - int skip_frame{1}; // 跳帧数 - - int port; // gb28181接收数据的端口号 - DECODE_REQUEST_STREAM_CALLBACK request_stream_cbk; // gb28181请求流 -}; - -enum DECODER_TYPE{ - DECODER_TYPE_GB28181, - DECODER_TYPE_FFMPEG -}; - -struct FFImgInfo{ - string dec_name; - int width; - int height; - unsigned char * pData; - long timestamp; - long index; -}; - -class AbstractDecoder { -public: - virtual ~AbstractDecoder(){}; - virtual bool init(FFDecConfig& cfg) = 0; - virtual void close() = 0; - virtual bool start() = 0; - virtual void pause() = 0; - virtual void resume() = 0; - - virtual void setDecKeyframe(bool bKeyframe) = 0; - - virtual bool isRunning() = 0; - virtual bool isFinished() = 0; - virtual bool isPausing() = 0; - virtual bool getResolution( int &width, int &height ) = 0; - - virtual bool isSurport(FFDecConfig& cfg) = 0; - - virtual int getCachedQueueLength() = 0; - - virtual float fps() = 0; - - virtual DECODER_TYPE getDecoderType() = 0; - - void setName(string nm){ - m_dec_name = nm; - } - - string getName(){ - return m_dec_name; - } - - FFImgInfo* snapshot(); - - bool isSnapTime(); - - void updateLastSnapTime(); - - void setSnapTimeInterval(long interval); - -public: - const void * m_postDecArg; - POST_DECODE_CALLBACK post_decoded_cbk; - const void * m_finishedDecArg; - DECODE_FINISHED_CALLBACK decode_finished_cbk; - -public: - string m_dec_name; - - bool m_dec_keyframe; - - FFDecConfig m_cfg; - - queue mFrameQueue; - mutex m_queue_mutex; - mutex m_snapshot_mutex; - - long m_snap_time_interval{-1}; - long m_last_snap_time; - long m_index{0}; -}; - -#endif // _ABSTRACT_DECODER_H_ \ No newline at end of file diff --git a/src/DrawImageOnGPU.cu b/src/DrawImageOnGPU.cu deleted file mode 100644 index 8770cea..0000000 --- a/src/DrawImageOnGPU.cu +++ /dev/null @@ -1,126 +0,0 @@ -#include "cuda_kernels.h" - -#include "logger.hpp" - -typedef unsigned char uchar; -typedef unsigned int uint32; -typedef int int32; - -namespace cuda_common -{ - __global__ void kernel_drawPixel(float* d_srcRGB, int src_width, int src_height, - int left, int top, int right, int bottom) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (((x == left || x == right) && y >= top && y <= bottom) || ((y == top || y == bottom) && x >= left && x <= right)) - { - d_srcRGB[(y*src_width) + x] = 0; - d_srcRGB[(src_width*src_height) + (y*src_width) + x] = 255; - d_srcRGB[(2 * src_width*src_height) + (y*src_width) + x] = 0; - } - } - - cudaError_t DrawImage(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom) - { - dim3 block(32, 16, 1); - dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1); - - kernel_drawPixel << < grid, block >> >(d_srcRGB, src_width, src_height, left, top, right, bottom); - - cudaError_t cudaStatus = cudaGetLastError(); - if (cudaStatus != cudaSuccess) { - LOG_ERROR("Draw 32 kernel_memcopy launch failed:{}",cudaGetErrorString(cudaStatus)); - return cudaStatus; - } - - cudaStatus = cudaDeviceSynchronize(); - if (cudaStatus != cudaSuccess) { - LOG_ERROR("cudaDeviceSynchronize returned error code {} after launching kernel_bilinear!", cudaStatus); - return cudaStatus; - } - - return cudaStatus; - } - - __global__ void kernel_drawPixel(unsigned char* d_srcRGB, int src_width, int src_height, - int left, int top, int right, int bottom) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (((x == left || x == right) && y >= top && y <= bottom) || ((y == top || y == bottom) && x >= left && x <= right)) - { - d_srcRGB[(y*src_width) + x] = 0; - d_srcRGB[(src_width*src_height) + (y*src_width) + x] = 255; - d_srcRGB[(2 * src_width*src_height) + (y*src_width) + x] = 0; - } - } - - cudaError_t DrawImage(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom) - { - dim3 block(32, 16, 1); - dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1); - - kernel_drawPixel << < grid, block >> >(d_srcRGB, src_width, src_height, left, top, right, bottom); - - cudaError_t cudaStatus = cudaGetLastError(); - if (cudaStatus != cudaSuccess) { - LOG_ERROR("Draw 68 kernel_memcopy launch failed: {}",cudaGetErrorString(cudaStatus)); - return cudaStatus; - } - - cudaStatus = cudaDeviceSynchronize(); - if (cudaStatus != cudaSuccess) { - LOG_ERROR("cudaDeviceSynchronize returned error code {} after launching kernel_bilinear!", cudaStatus); - return cudaStatus; - } - - return cudaStatus; - } - - __global__ void kernel_drawLine(float* d_srcRGB, int src_width, int src_height, - int begin_x, int begin_y, int end_x, int end_y) - { - int min_x = end_x < begin_x ? end_x : begin_x; - int max_x = end_x < begin_x ? begin_x : end_x; - - int min_y = end_y < begin_y ? end_y : begin_y; - int max_y = end_y < begin_y ? begin_y : end_y; - - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if ((x - begin_x) * (end_y - begin_y) == (end_x - begin_x) * (y - begin_y) - && min_x <= x && x <= max_x - && min_y <= y && y <= max_y) - { - d_srcRGB[(y*src_width) + x] = 0; - d_srcRGB[(src_width*src_height) + (y*src_width) + x] = 255; - d_srcRGB[(2 * src_width*src_height) + (y*src_width) + x] = 0; - } - } - - cudaError_t DrawLine(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y) - { - dim3 block(32, 16, 1); - dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1); - - kernel_drawLine << < grid, block >> >(d_srcRGB, src_width, src_height, begin_x, begin_y, end_x, end_y); - - cudaError_t cudaStatus = cudaGetLastError(); - if (cudaStatus != cudaSuccess) { - LOG_ERROR("Draw 112 kernel_memcopy launch failed: {}",cudaGetErrorString(cudaStatus)); - return cudaStatus; - } - - cudaStatus = cudaDeviceSynchronize(); - if (cudaStatus != cudaSuccess) { - LOG_ERROR("cudaDeviceSynchronize returned error code {} after launching kernel_bilinear!", cudaStatus); - return cudaStatus; - } - - return cudaStatus; - } -} \ No newline at end of file diff --git a/src/FFCuContextManager.cpp b/src/FFCuContextManager.cpp deleted file mode 100644 index db097d6..0000000 --- a/src/FFCuContextManager.cpp +++ /dev/null @@ -1,29 +0,0 @@ -#include "FFCuContextManager.h" - -#include "logger.hpp" - -using namespace std; - -FFCuContextManager::~FFCuContextManager() -{ - for(auto iter = ctxMap.begin(); iter != ctxMap.end(); iter++){ - av_buffer_unref(&iter->second); - } - ctxMap.clear(); -} - -AVBufferRef *FFCuContextManager::getCuCtx(string gpuid) -{ - AVBufferRef *hw_device_ctx = ctxMap[gpuid]; - if (nullptr == hw_device_ctx) - { - // 初始化硬件解码器 - if (av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_CUDA, gpuid.c_str(), nullptr, 0) < 0) - { - LOG_ERROR("Failed to create specified HW device."); - return nullptr; - } - ctxMap[gpuid] = hw_device_ctx; - } - return hw_device_ctx; -} \ No newline at end of file diff --git a/src/FFCuContextManager.h b/src/FFCuContextManager.h deleted file mode 100644 index 3050641..0000000 --- a/src/FFCuContextManager.h +++ /dev/null @@ -1,37 +0,0 @@ - -#include -#include - -extern "C" -{ - #include - #include - #include - #include - #include - #include - #include -} - -using namespace std; - -class FFCuContextManager{ -public: - static FFCuContextManager* getInstance(){ - static FFCuContextManager* singleton = nullptr; - if (singleton == nullptr){ - singleton = new FFCuContextManager(); - } - return singleton; - } - - AVBufferRef *getCuCtx(string gpuid); - -private: - FFCuContextManager(){} - ~FFCuContextManager(); - -private: - map ctxMap; - -}; \ No newline at end of file diff --git a/src/FFNvDecoder.cpp b/src/FFNvDecoder.cpp deleted file mode 100644 index 9aff5fd..0000000 --- a/src/FFNvDecoder.cpp +++ /dev/null @@ -1,388 +0,0 @@ -#include "FFNvDecoder.h" - -#include -#include -#include - -#include - -#include "FFCuContextManager.h" - -#include "logger.hpp" - -#include "utiltools.hpp" - -using namespace std; - -// 参考博客: https://blog.csdn.net/qq_40116098/article/details/120704340 - -static AVPixelFormat get_hw_format(AVCodecContext *avctx, const AVPixelFormat *pix_fmts) -{ - FFNvDecoder* _this = (FFNvDecoder*)avctx->opaque; - - const AVPixelFormat *p; - - for (p = pix_fmts; *p != -1; p++) { - if (*p == _this->getHwPixFmt()) - return *p; - } - - LOG_ERROR("Failed to get HW surface format"); - return AV_PIX_FMT_NONE; -} - -FFNvDecoder::FFNvDecoder() -{ - // 初始化解码对象 - fmt_ctx = nullptr; - avctx = nullptr; - m_bRunning = false; - - stream = nullptr; - stream_index = -1; - hw_pix_fmt = AV_PIX_FMT_NONE; - m_dec_name = ""; - - m_bPause = false; - m_bReal = true; - - m_decode_thread = 0; - m_post_decode_thread = 0; - - m_bFinished = false; - m_dec_keyframe = false; - m_fps = 0.0; -} - -FFNvDecoder::~FFNvDecoder() -{ - m_dec_keyframe = false; -} - -bool FFNvDecoder::init(FFDecConfig& cfg) -{ - m_cfg = cfg; - - fstream infile(cfg.uri); - if (infile.is_open()){ - m_bReal = false; - infile.close(); - }else { - m_bReal = true; - } - - post_decoded_cbk = cfg.post_decoded_cbk; - decode_finished_cbk = cfg.decode_finished_cbk; - - return init(cfg.uri.c_str(), cfg.gpuid.c_str(),cfg.force_tcp); -} - -bool FFNvDecoder::init(const char* uri, const char* gpuid, bool force_tcp) -{ - // av_log_set_level(AV_LOG_DEBUG); - - avformat_network_init(); - - // 打开输入视频文件 - AVDictionary *options = nullptr; - av_dict_set( &options, "bufsize", "655360", 0 ); - av_dict_set( &options, "rtsp_transport", force_tcp ? "tcp" : "udp", 0 ); - // av_dict_set( &options, "listen_timeout", "30", 0 ); // 单位为s - av_dict_set( &options, "stimeout", "30000000", 0 ); // 单位为 百万分之一秒 - - fmt_ctx = avformat_alloc_context(); - const char* input_file = uri; - if (avformat_open_input(&fmt_ctx, input_file, nullptr, &options) != 0) { - LOG_ERROR("Cannot open input file:{}",input_file); - return false; - } - - // 查找流信息 - if (avformat_find_stream_info(fmt_ctx, nullptr) < 0) { - LOG_ERROR("Cannot find input stream information"); - return false; - } - - // 查找视频流信息 - AVCodec *decoder = nullptr; - stream_index = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &decoder, 0); - if (stream_index < 0) { - LOG_ERROR("Cannot find a video stream in the input file"); - return false; - } - - string cuvid_dec_name = string(decoder->name) + "_cuvid"; - AVCodec *vcodec = avcodec_find_decoder_by_name(cuvid_dec_name.c_str()); - if (!(avctx = avcodec_alloc_context3(vcodec))) - return (bool)AVERROR(ENOMEM); - - // 得到视频流对象 - stream = fmt_ctx->streams[stream_index]; - if (avcodec_parameters_to_context(avctx, stream->codecpar) < 0) - return false; - - m_fps = av_q2d(stream ->avg_frame_rate); - - avctx->opaque = this; - // 设置解码器管理器的像素格式回调函数 - avctx->get_format = get_hw_format; - - hw_pix_fmt = AV_PIX_FMT_CUDA; - - FFCuContextManager* pCtxMgr = FFCuContextManager::getInstance(); - - AVBufferRef *hw_device_ctx = pCtxMgr->getCuCtx(gpuid); - if(nullptr == hw_device_ctx){ - av_log(nullptr, AV_LOG_ERROR, "create CUDA context failed ! \n"); - return false; - } - avctx->hw_device_ctx = av_buffer_ref(hw_device_ctx); - if (nullptr == avctx->hw_device_ctx) - { - return false; - } - - // 打开解码器流 - AVDictionary *op = nullptr; - av_dict_set( &op, "gpu", gpuid, 0 ); - // av_dict_set( &op, "surfaces", "5", 0 ); - if (avcodec_open2(avctx, vcodec, &op) < 0) { - LOG_ERROR("Failed to open codec for stream"); - return false; - } - - return true; -} - -bool FFNvDecoder::isSurport(FFDecConfig& cfg) -{ - bool bRet = init(cfg); - decode_finished(); - return bRet; -} - -bool FFNvDecoder::start(){ - - m_bRunning = true; - - pthread_create(&m_decode_thread,0, - [](void* arg) - { - FFNvDecoder* a=(FFNvDecoder*)arg; - a->decode_thread(); - return (void*)0; - } - ,this); - - return true; -} - -void FFNvDecoder::decode_thread() -{ - AVPacket* pkt ; - pkt = av_packet_alloc(); - av_init_packet( pkt ); - - pthread_create(&m_post_decode_thread,0, - [](void* arg) - { - FFNvDecoder* a=(FFNvDecoder*)arg; - a->post_decode_thread(); - return (void*)0; - } - ,this); - - // long start_time = UtilTools::get_cur_time_ms(); - - while (m_bRunning) - { - if (!m_bReal) - { - if (m_bPause) - { - std::this_thread::sleep_for(std::chrono::milliseconds(3)); - continue; - } - } - - int result = av_read_frame(fmt_ctx, pkt); - if (result == AVERROR_EOF || result < 0) - { - LOG_ERROR("Failed to read frame!"); - break; - } - - if (m_dec_keyframe && !(pkt->flags & AV_PKT_FLAG_KEY)) { - av_packet_unref(pkt); - continue; - } - - if (m_bReal) - { - if (m_bPause) - { - av_packet_unref(pkt); - std::this_thread::sleep_for(std::chrono::milliseconds(3)); - continue; - } - } - - if (stream_index == pkt->stream_index){ - result = avcodec_send_packet(avctx, pkt); - if (result < 0){ - av_packet_unref(pkt); - LOG_ERROR("{} - Failed to send pkt: {}", m_dec_name, result); - continue; - } - - AVFrame* gpuFrame = av_frame_alloc(); - result = avcodec_receive_frame(avctx, gpuFrame); - if ((result == AVERROR(EAGAIN) || result == AVERROR_EOF) || result < 0){ - LOG_ERROR("{} - Failed to receive frame: {}", m_dec_name, result); - av_frame_free(&gpuFrame); - av_packet_unref(pkt); - continue; - } - av_packet_unref(pkt); - - if(gpuFrame != nullptr){ - m_queue_mutex.lock(); - if(mFrameQueue.size() <= 10){ - mFrameQueue.push(gpuFrame); - }else{ - av_frame_free(&gpuFrame); - } - m_queue_mutex.unlock(); - } - } - av_packet_unref(pkt); - } - - m_bRunning = false; - - // long end_time = UtilTools::get_cur_time_ms(); - // cout << "解码用时:" << end_time - start_time << endl; - - if (m_post_decode_thread != 0) - { - pthread_join(m_post_decode_thread,0); - } - - decode_finished_cbk(m_finishedDecArg); - - decode_finished(); - - // 清空队列 - while(mFrameQueue.size() > 0){ - AVFrame * gpuFrame = mFrameQueue.front(); - av_frame_free(&gpuFrame); - mFrameQueue.pop(); - } - - LOG_INFO("{} - decode thread exited.", m_dec_name); -} - -void FFNvDecoder::decode_finished(){ - if (avctx) - { - avcodec_free_context(&avctx); - } - - if (fmt_ctx) - { - avformat_close_input(&fmt_ctx); - } - - m_bFinished = true; - m_dec_keyframe = false; -} - -void FFNvDecoder::post_decode_thread(){ - int skip_frame = m_cfg.skip_frame; - if (skip_frame <= 0){ - skip_frame = 1; - } - - int index = 0; - while (m_bRunning) - { - if(mFrameQueue.size() > 0){ - std::lock_guard l(m_snapshot_mutex); - // 取队头数据 - m_queue_mutex.lock(); - AVFrame * gpuFrame = mFrameQueue.front(); - mFrameQueue.pop(); - m_queue_mutex.unlock(); - // 跳帧 - if (skip_frame == 1 || index % skip_frame == 0){ - post_decoded_cbk(m_postDecArg, gpuFrame); - index = 0; - } - - av_frame_free(&gpuFrame); - - index++; - } - } - - LOG_INFO("post decode thread exited."); -} - -void FFNvDecoder::close(){ - m_bRunning=false; - if(m_decode_thread != 0){ - pthread_join(m_decode_thread,0); - } - m_dec_keyframe = false; -} - -AVPixelFormat FFNvDecoder::getHwPixFmt(){ - return hw_pix_fmt; -} - -bool FFNvDecoder::isRunning(){ - return m_bRunning; -} - -bool FFNvDecoder::isFinished(){ - return m_bFinished; -} - -bool FFNvDecoder::isPausing(){ - return m_bPause; -} - -bool FFNvDecoder::getResolution( int &width, int &height ){ - if (avctx != nullptr) - { - width = avctx->width; - height = avctx->height; - return true; - } - - return false; -} - -void FFNvDecoder::pause(){ - m_bPause = true; -} - -void FFNvDecoder::resume(){ - m_bPause = false; -} - -void FFNvDecoder::setDecKeyframe(bool bKeyframe) -{ - m_dec_keyframe = bKeyframe; -} - -int FFNvDecoder::getCachedQueueLength(){ - m_queue_mutex.lock(); - int queue_size = mFrameQueue.size(); - m_queue_mutex.lock(); - return queue_size; -} - -float FFNvDecoder::fps(){ - return m_fps; -} diff --git a/src/FFNvDecoder.h b/src/FFNvDecoder.h deleted file mode 100644 index 68d2a2f..0000000 --- a/src/FFNvDecoder.h +++ /dev/null @@ -1,62 +0,0 @@ -#include -#include - -#include "AbstractDecoder.h" - -#include - -using namespace std; - -class FFNvDecoder : public AbstractDecoder{ -public: - FFNvDecoder(); - ~FFNvDecoder(); - bool init(FFDecConfig& cfg); - void close(); - bool start(); - void pause(); - void resume(); - - void setDecKeyframe(bool bKeyframe); - - bool isRunning(); - bool isFinished(); - bool isPausing(); - bool getResolution( int &width, int &height ); - - bool isSurport(FFDecConfig& cfg); - - int getCachedQueueLength(); - - float fps(); - - DECODER_TYPE getDecoderType(){ return DECODER_TYPE_FFMPEG; } - -public: - AVPixelFormat getHwPixFmt(); - -private: - void decode_thread(); - void post_decode_thread(); - bool init(const char* uri, const char* gpuid, bool force_tcp); - void decode_finished(); - -private: - AVStream* stream; - AVCodecContext *avctx; - int stream_index; - AVFormatContext *fmt_ctx; - AVPixelFormat hw_pix_fmt; - - pthread_t m_decode_thread; - pthread_t m_post_decode_thread; - - bool m_bRunning; - bool m_bFinished; - - bool m_bPause; - - bool m_bReal; // 是否实时流 - - float m_fps; -}; \ No newline at end of file diff --git a/src/FFNvDecoderManager.cpp b/src/FFNvDecoderManager.cpp deleted file mode 100644 index b15ef22..0000000 --- a/src/FFNvDecoderManager.cpp +++ /dev/null @@ -1,600 +0,0 @@ -#include "FFNvDecoderManager.h" - -#include "FFNvDecoder.h" -#include "./gb28181/FFGB28181Decoder.h" - -#include "logger.hpp" - -using namespace std; - - -AbstractDecoder* FFNvDecoderManager::createDecoder(MgrDecConfig config){ - - closeAllFinishedDecoder(); - - if (config.cfg.post_decoded_cbk == nullptr || config.cfg.decode_finished_cbk== nullptr){ - return nullptr; - } - - std::lock_guard l(m_mutex); - - auto it = decoderMap.find(config.name); - if (it != decoderMap.end()){ - LOG_ERROR("已存在name为{}的解码器", config.name); - return nullptr; - } - - AbstractDecoder* dec = nullptr; - if(DECODER_TYPE_FFMPEG == config.dec_type){ - dec = new FFNvDecoder(); - }else if(DECODER_TYPE_GB28181 == config.dec_type){ - dec = new FFGB28181Decoder(); - } - - if (dec == nullptr){ - LOG_ERROR("没有指定解码器类型"); - return nullptr; - } - - bool bRet= dec->init(config.cfg); - if (bRet) - { - dec->setName(config.name) ; - decoderMap[config.name] = dec; - - LOG_INFO("[{}][{}]- 解码器初始化成功",config.name, config.cfg.uri); - return dec; - } - - // 创建失败,关闭解码器 - dec->close(); - delete dec; - - LOG_ERROR("[{}][{}]- 解码器初始化失败!",config.name, config.cfg.uri); - return nullptr; -} - -bool FFNvDecoderManager::setPostDecArg(const string name, const void * userPtr) -{ - if (name.empty()) - { - LOG_ERROR("name 为空!"); - return false; - } - - std::lock_guard l(m_mutex); - - auto dec = decoderMap.find(name); - if (dec != decoderMap.end()) - { - dec->second->m_postDecArg = userPtr; - return true; - } - - LOG_ERROR("没有找到name为{}的解码器",name); - return false; -} - -bool FFNvDecoderManager::setFinishedDecArg(const string name, const void * userPtr) -{ - if (name.empty()) - { - LOG_ERROR("name 为空!"); - return false; - } - - std::lock_guard l(m_mutex); - - auto dec = decoderMap.find(name); - if (dec != decoderMap.end()) - { - dec->second->m_finishedDecArg = userPtr; - return true; - } - - LOG_ERROR("没有找到name为{}的解码器",name); - return false; -} - -AbstractDecoder* FFNvDecoderManager::getDecoderByName(const string name) -{ - if (name.empty()) - { - LOG_ERROR("name 为空!"); - return nullptr; - } - - std::lock_guard l(m_mutex); - - auto dec = decoderMap.find(name); - if (dec != decoderMap.end()) - { - return dec->second; - } - - LOG_ERROR("没有找到name为{}的解码器",name); - return nullptr; -} - -bool FFNvDecoderManager::startDecode(AbstractDecoder* dec){ - if (dec != nullptr && !dec->isRunning()) - { - return dec->start(); - } - return false; -} - -bool FFNvDecoderManager::startDecodeByName(const string name){ - if (name.empty()) - { - LOG_ERROR("name 为空!"); - return false; - } - - std::lock_guard l(m_mutex); - - auto dec = decoderMap.find(name); - if (dec != decoderMap.end()) - { - return dec->second->start(); - } - - LOG_ERROR("没有找到name为{}的解码器",name); - return false; -} - -void FFNvDecoderManager::startAllDecode(){ - - std::lock_guard l(m_mutex); - - for(auto iter = decoderMap.begin(); iter != decoderMap.end(); iter++){ - if (!iter->second->isRunning()) - { - iter->second->start(); - } - } -} - -bool FFNvDecoderManager::closeDecoderByName(const string name){ - if (name.empty()) - { - LOG_ERROR("name 为空!"); - return false; - } - - std::lock_guard l(m_mutex); - - auto dec = decoderMap.find(name); - if (dec != decoderMap.end()) - { - dec->second->close(); - delete dec->second; - dec->second = nullptr; - decoderMap.erase(dec); - - return true; - } - - LOG_ERROR("没有找到name为{}的解码器",name); - return false; -} - -void FFNvDecoderManager::closeAllDecoder() -{ - std::lock_guard l(m_mutex); - - for(auto iter = decoderMap.begin(); iter != decoderMap.end(); iter++){ - iter->second->close(); - delete iter->second; - iter->second = nullptr; - } - decoderMap.clear(); -} - -void FFNvDecoderManager::closeAllFinishedDecoder() -{ - std::lock_guard l(m_mutex); - - for(auto iter = decoderMap.begin(); iter != decoderMap.end(); ){ - if (iter->second->isFinished()) - { - delete iter->second; - iter->second = nullptr; - iter = decoderMap.erase(iter); - } - else - { - iter++ ; - } - } -} - -int FFNvDecoderManager::count() -{ - closeAllFinishedDecoder(); - - std::lock_guard l(m_mutex); - return decoderMap.size(); -} - -bool FFNvDecoderManager::pauseDecoder(const string name) -{ - if (name.empty()) - { - LOG_ERROR("name 为空!"); - return false; - } - - std::lock_guard l(m_mutex); - - auto dec = decoderMap.find(name); - if (dec != decoderMap.end()) - { - dec->second->pause(); - return true; - } - - LOG_ERROR("没有找到name为{}的解码器",name); - return false; -} - -bool FFNvDecoderManager::resumeDecoder(const string name) -{ - if (name.empty()) - { - LOG_ERROR("name 为空!"); - return false; - } - - std::lock_guard l(m_mutex); - - auto dec = decoderMap.find(name); - if (dec != decoderMap.end()) - { - dec->second->resume(); - return true; - } - - LOG_ERROR("没有找到name为{}的解码器",name); - return false; -} - -bool FFNvDecoderManager::isSurport(MgrDecConfig& config) -{ - { - std::lock_guard l(m_mutex); - - auto it = decoderMap.find(config.name); - if (it != decoderMap.end()){ - LOG_ERROR("已存在name所标记的解码器"); - return false; - } - } - - AbstractDecoder* dec = nullptr; - if(config.dec_type = DECODER_TYPE_FFMPEG){ - dec = new FFNvDecoder(); - }else if(config.dec_type = DECODER_TYPE_GB28181){ - dec = new FFGB28181Decoder(); - } - - if (dec == nullptr){ - LOG_ERROR("没有指定解码器类型"); - return false; - } - - bool bRet = dec->isSurport(config.cfg); - delete dec; - dec = nullptr; - - return bRet; -} - -bool FFNvDecoderManager::isRunning(const string name){ - if (name.empty()) - { - LOG_ERROR("name 为空!"); - return false; - } - - std::lock_guard l(m_mutex); - - auto dec = decoderMap.find(name); - if (dec != decoderMap.end()) - { - return dec->second->isRunning(); - } - - LOG_ERROR("没有找到name为{}的解码器",name); - return false; -} - -bool FFNvDecoderManager::isFinished(const string name){ - if (name.empty()) - { - LOG_ERROR("name 为空!"); - return false; - } - - std::lock_guard l(m_mutex); - - auto dec = decoderMap.find(name); - if (dec != decoderMap.end()) - { - return dec->second->isFinished(); - } - - LOG_ERROR("没有找到name为{}的解码器",name); - return false; -} - -bool FFNvDecoderManager::isPausing(const string name){ - if (name.empty()) - { - LOG_ERROR("name 为空!"); - return false; - } - - std::lock_guard l(m_mutex); - - auto dec = decoderMap.find(name); - if (dec != decoderMap.end()) - { - return dec->second->isPausing(); - } - - LOG_ERROR("没有找到name为{}的解码器",name); - return false; -} - -bool FFNvDecoderManager::setDecKeyframe(const string name, bool bKeyframe) -{ - if (name.empty()) - { - LOG_ERROR("name 为空!"); - return false; - } - - std::lock_guard l(m_mutex); - - auto dec = decoderMap.find(name); - if (dec != decoderMap.end()) - { - dec->second->setDecKeyframe(bKeyframe); - return true; - } - - LOG_ERROR("没有找到name为{}的解码器",name); - return false; -} - -bool FFNvDecoderManager::getResolution(const string name, int &width, int &height) -{ - if (name.empty()) - { - LOG_ERROR("name 为空!"); - return false; - } - - std::lock_guard l(m_mutex); - - auto dec = decoderMap.find(name); - if (dec != decoderMap.end()) - { - dec->second->getResolution(width, height); - return true; - } - - LOG_ERROR("没有找到name为{}的解码器",name); - return false; -} - -vector FFNvDecoderManager::getAllDecodeName(){ - - closeAllFinishedDecoder(); - - std::lock_guard l(m_mutex); - - vector decode_names; - for(auto it = decoderMap.begin(); it != decoderMap.end(); ++it){ - decode_names.push_back(it->first); - } - return decode_names; -} - -int FFNvDecoderManager::getCachedQueueLength(const string name){ - if (name.empty()){ - LOG_ERROR("name 为空!"); - return -1; - } - - std::lock_guard l(m_mutex); - - auto dec = decoderMap.find(name); - if (dec != decoderMap.end()){ - return dec->second->getCachedQueueLength(); - } - - LOG_ERROR("没有找到name为{}的解码器",name); - return -1; -} - -FFImgInfo* FFNvDecoderManager::snapshot(const string& uri){ - if (uri.empty()){ - return nullptr; - } - - AVFormatContext* ifmt_ctx = nullptr; - AVCodecContext* codec_ctx = nullptr; - AVCodec* codec = nullptr; - AVPacket* pkt = nullptr; - AVFrame *frame = nullptr; - AVFrame *pFrameRGB = nullptr; - int video_index = -1; - AVStream* st = nullptr; - SwsContext *img_convert_ctx = nullptr; - uint8_t *buffer = nullptr; - int numBytes = 0; - int index = 0; - - FFImgInfo* imgInfo = nullptr; - - //av_register_all(); - avformat_network_init(); - - // 参数设置 - AVDictionary *options = nullptr; - av_dict_set( &options, "bufsize", "655360", 0 ); - av_dict_set( &options, "rtsp_transport", "tcp", 0 ); - av_dict_set( &options, "stimeout", "30000000", 0 ); // 单位为 百万分之一秒 - - ///打开输入的流 - int ret = avformat_open_input(&ifmt_ctx, uri.c_str(), nullptr, &options); - if (ret != 0){ - printf("Couldn't open input stream.\n"); - goto end_flag ; - } - - //查找流信息 - if (avformat_find_stream_info(ifmt_ctx, nullptr) < 0){ - printf("Couldn't find stream information.\n"); - goto end_flag ; - } - - //找到视频流索引 - video_index = av_find_best_stream(ifmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0); - - st = ifmt_ctx->streams[video_index]; - - //找到解码器 - codec = avcodec_find_decoder(st->codecpar->codec_id); - if (!codec){ - fprintf(stderr, "Codec not found\n"); - goto end_flag ; - } - - //申请AVCodecContext - codec_ctx = avcodec_alloc_context3(codec); - if (!codec_ctx){ - goto end_flag ; - } - - avcodec_parameters_to_context(codec_ctx, ifmt_ctx->streams[video_index]->codecpar); - - //打开解码器 - if ((ret = avcodec_open2(codec_ctx, codec, nullptr) < 0)){ - goto end_flag ; - } - - // 计算解码后原始数据所需缓冲区大小,并分配内存空间 Determine required buffer size and allocate buffer - numBytes = av_image_get_buffer_size(AV_PIX_FMT_BGR24, codec_ctx->width, codec_ctx->height, 1); - buffer = (uint8_t *)av_malloc(numBytes * sizeof(uint8_t)); - - pFrameRGB = av_frame_alloc(); - av_image_fill_arrays(pFrameRGB->data, pFrameRGB->linesize, buffer, AV_PIX_FMT_BGR24, codec_ctx->width, codec_ctx->height, 1); - - img_convert_ctx = sws_getContext(codec_ctx->width, codec_ctx->height,codec_ctx->pix_fmt, codec_ctx->width, codec_ctx->height, AV_PIX_FMT_BGR24, - SWS_BICUBIC, nullptr, nullptr, nullptr); - - pkt = av_packet_alloc(); - frame = av_frame_alloc(); - while (av_read_frame(ifmt_ctx, pkt) >= 0){ - if (pkt->stream_index == video_index){ - int ret = avcodec_send_packet(codec_ctx, pkt); - if (ret >= 0){ - ret = avcodec_receive_frame(codec_ctx, frame); - if ((ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) || ret < 0){ - LOG_ERROR("Failed to receive frame: {}",ret); - continue; - } - - index ++ ; - - if (index >= 5){ - // 取解码出来的第三帧,应该可以一定程度优化花屏问题 - sws_scale(img_convert_ctx, (const unsigned char* const*)frame->data, frame->linesize, 0, codec_ctx->height, pFrameRGB->data, pFrameRGB->linesize); - - imgInfo = new FFImgInfo(); - imgInfo->pData = buffer; - imgInfo->height = codec_ctx->height; - imgInfo->width = codec_ctx->width; - - break; - } - } - } - av_packet_unref(pkt); - } - -end_flag: - if (codec_ctx != nullptr){ - avcodec_close(codec_ctx); - avcodec_free_context(&codec_ctx); - } - - if (ifmt_ctx != nullptr){ - avformat_close_input(&ifmt_ctx); - } - - if (frame != nullptr){ - av_frame_free(&frame); - } - - if (pFrameRGB != nullptr){ - av_frame_free(&pFrameRGB); - } - - if (pkt != nullptr){ - av_packet_free(&pkt); - } - - return imgInfo; -} - -void FFNvDecoderManager::releaseFFImgInfo(FFImgInfo* info){ - if(nullptr != info){ - if(info->pData != nullptr){ - av_free(info->pData); - info->pData = nullptr; - } - delete info; - info = nullptr; - } -} - -FFImgInfo* FFNvDecoderManager::snapshot_in_task(const string name){ - if (name.empty()){ - LOG_ERROR("name 为空!"); - return nullptr; - } - - std::lock_guard l(m_mutex); - - auto dec = decoderMap.find(name); - if (dec != decoderMap.end()){ - return dec->second->snapshot(); - } - - LOG_ERROR("没有找到name为{}的解码器",name); - return nullptr; -} - -vector FFNvDecoderManager::timing_snapshot_all(){ - - closeAllFinishedDecoder(); - - std::lock_guard l(m_mutex); - - vector vec; - for(auto it = decoderMap.begin(); it != decoderMap.end(); ++it){ - if(it->second->isSnapTime()){ - FFImgInfo* imginfo = it->second->snapshot(); - if(imginfo != nullptr){ - vec.push_back(imginfo); - } - it->second->updateLastSnapTime(); - } - } - - return vec; -} \ No newline at end of file diff --git a/src/FFNvDecoderManager.h b/src/FFNvDecoderManager.h deleted file mode 100644 index 685b1f9..0000000 --- a/src/FFNvDecoderManager.h +++ /dev/null @@ -1,268 +0,0 @@ -#include "AbstractDecoder.h" -#include -#include -#include - -#include - -using namespace std; - -struct MgrDecConfig -{ - DECODER_TYPE dec_type; // 解码器类型 - FFDecConfig cfg; // 解码器配置 - string name{""}; // 解码器名称 -}; - -/** - * 解码器管理类,单例类 - * 谨防死锁 - **/ -class FFNvDecoderManager { -public: - /************************************************** - * 接口:getInstance - * 功能:获取解码器管理者实例 - * 参数:无 - * 返回:成功返回 解码器管理者实例, 失败返回 nullptr - * 备注:调用其他接口前,需要先调用该接口获取管理者实例 - **************************************************/ - static FFNvDecoderManager* getInstance(){ - static FFNvDecoderManager* singleton = nullptr; - if (singleton == nullptr){ - singleton = new FFNvDecoderManager(); - } - return singleton; - } - - ~FFNvDecoderManager() - { - closeAllDecoder(); - } - - /************************************************** - * 接口:createDecoder - * 功能:根据配置信息创建解码器 - * 参数:MgrDecConfig& config 解码器配置信息 - * 返回:成功返回解码器, 失败返回 nullptr - * 备注: - **************************************************/ - AbstractDecoder* createDecoder(MgrDecConfig config); - - /************************************************** - * 接口:setPostDecArg - * 功能:设置解码数据回调接口的用户自定义参数 - * 参数:string name 解码器名称 - * const void * userPtr 用户自定义的要传到解码数据回调接口的数据 - * 返回:设置成功返回true,失败返回false - * 备注: - **************************************************/ - bool setPostDecArg(const string name, const void * userPtr); - - /************************************************** - * 接口:setFinishedDecArg - * 功能:设置解码结束回调接口的用户自定义参数 - * 参数:string name 解码器名称 - * const void * userPtr 用户自定义的要传到解码数据回调接口的数据 - * 返回:设置成功返回true,失败返回false - * 备注: - **************************************************/ - bool setFinishedDecArg(const string name, const void * userPtr); - - /************************************************** - * 接口:getDecoderByName - * 功能:根据解码器名称返回解码器对象指针 - * 参数:const string name 解码器名称 - * 返回:成功返回对应的解码器对象的指针,失败返回nullptr - * 备注: - **************************************************/ - AbstractDecoder* getDecoderByName(const string name); - - /************************************************** - * 接口:startDecode - * 功能:启动解码 - * 参数:FFNvDecoder* 解码器指针 - * 返回:void - * 备注: - **************************************************/ - bool startDecode(AbstractDecoder*); - - /************************************************** - * 接口:startAllDecode - * 功能:启动全部解码 - * 参数:void - * 返回:void - * 备注: - **************************************************/ - void startAllDecode(); - - /************************************************** - * 接口:startDecodeByName - * 功能:启动名称对应的解码器 - * 参数:string name 解码器名称 - * 返回:成功返回true,失败返回false - * 备注: - **************************************************/ - bool startDecodeByName(const string name); - - /************************************************** - * 接口:closeDecoderByName - * 功能:关闭解码器名称对应的解码 - * 参数:const string name 解码器名称 - * 返回:成功返回true,失败返回false - * 备注: - **************************************************/ - bool closeDecoderByName(const string name); - - /************************************************** - * 接口:closeAllDecoder - * 功能:关闭全部解码器 - * 参数:void - * 返回:void - * 备注: - **************************************************/ - void closeAllDecoder(); - - /************************************************** - * 接口:closeAllDecoderByGpuid - * 功能:关闭某张显卡撒花姑娘的全部解码器 - * 参数:const string gpuid gpu的id - * 返回:void - * 备注: - **************************************************/ - void closeAllDecoderByGpuid(const string gpuid); - - /************************************************** - * 接口:pauseDecoder - * 功能:暂停指定名称的解码器 - * 参数:const string name 解码器名称 - * 返回:成功返回true,失败返回false - * 备注: - **************************************************/ - bool pauseDecoder(const string name); - - /************************************************** - * 接口:pauseDecoder - * 功能:恢复指定名称的解码器 - * 参数:const string name 解码器名称 - * 返回:成功返回true,失败返回false - * 备注: - **************************************************/ - bool resumeDecoder(const string name); - - /************************************************** - * 接口:isSurport - * 功能:是否支持指定配置的解码 - * 参数:FFDecConfig& cfg 解码器配置 - * 返回:支持返回true,不支持返回false - * 备注: - **************************************************/ - bool isSurport(MgrDecConfig& config); - - /************************************************** - * 接口:isRunning - * 功能:根据解码器名称判断解码器是否正在运行 - * 参数:const string name 解码器名称 - * 返回:正在运行返回true,否则返回false - * 备注: - **************************************************/ - bool isRunning(const string name); - - /************************************************** - * 接口:isFinished - * 功能:根据解码器名称判断解码器是否已经结束 - * 参数:const string name 解码器名称 - * 返回:正在运行返回true,否则返回false - * 备注: - **************************************************/ - bool isFinished(const string name); - - /************************************************** - * 接口:isPausing - * 功能:根据解码器名称判断解码器是否暂停 - * 参数:const string name 解码器名称 - * 返回:正在运行返回true,否则返回false - * 备注: - **************************************************/ - bool isPausing(const string name); - - /************************************************** - * 接口:count - * 功能:获取正在运行的解码器数量 - * 参数:void - * 返回:正在运行的解码器数量 - * 备注: - **************************************************/ - int count(); - - /************************************************** - * 接口:setDecKeyframe - * 功能:设置是否只解码关键帧。默认全解 - * 参数:const string name 解码器名称 - * bool bKeyframe 是否只解码关键帧。true,只解码关键帧;false,普通的全解码 - * 返回:bool 成功返回true,失败返回false - * 备注: - **************************************************/ - bool setDecKeyframe(const string name, bool bKeyframe); - - /************************************************** - * 接口:getResolution - * 功能:获取视频分辨率 - * 参数:const string name 解码器名称 - * int &width 从 width 返回视频宽度 - * int &height 从 height 返回视频高度 - * 返回:bool 成功获取返回true,失败返回false - * 备注: - **************************************************/ - bool getResolution(const string name, int &width, int &height); - - /************************************************** - * 接口:getAllDecodeName - * 功能:获取全部解码器名称 - * 参数:void - * 返回:vector 返回全部解码器名称 - * 备注: - **************************************************/ - vector getAllDecodeName(); - - /************************************************** - * 接口:getCachedQueueLength - * 功能:获取解码缓冲队列当前长度 - * 参数:const string name 解码器名称 - * 返回:int 解码缓冲队列当前长度 - * 备注: - **************************************************/ - int getCachedQueueLength(const string name); - - /************************************************** - * 接口:snapshot - * 功能:获取视频快照 - * 参数:const string& uri 视频地址 - * 返回:FFImgInfo* 快照信息 - * 备注: - **************************************************/ - FFImgInfo* snapshot(const string& uri); - - /************************************************** - * 接口:releaseFFImgInfo - * 功能:释放视频快照信息 - * 参数:FFImgInfo* info 视频快照信息 - * 返回:void - * 备注: - **************************************************/ - void releaseFFImgInfo(FFImgInfo* info); - - FFImgInfo* snapshot_in_task(const string name); - - vector timing_snapshot_all(); - -private: - FFNvDecoderManager(){} - - void closeAllFinishedDecoder(); - -private: - map decoderMap; - - mutex m_mutex; -}; \ No newline at end of file diff --git a/src/GpuRgbMemory.hpp b/src/GpuRgbMemory.hpp deleted file mode 100644 index 8e3d15b..0000000 --- a/src/GpuRgbMemory.hpp +++ /dev/null @@ -1,86 +0,0 @@ -#include - -#include "cuda_kernels.h" -#include "define.hpp" -#include "utiltools.hpp" - -using namespace std; - -class GpuRgbMemory{ - -public: - GpuRgbMemory(int _channel, int _width, int _height, string _id, string _gpuid, bool _isused){ - channel = _channel; - width = _width; - height = _height; - size = channel * width * height; - isused = _isused; - id = _id; - gpuid = _gpuid; - timestamp = UtilTools::get_cur_time_ms(); - - cudaSetDevice(atoi(gpuid.c_str())); - CHECK_CUDA(cudaMalloc((void **)&pHwRgb, size * sizeof(unsigned char))); - } - - ~GpuRgbMemory(){ - if (pHwRgb) { - cudaSetDevice(atoi(gpuid.c_str())); - CHECK_CUDA(cudaFree(pHwRgb)); - pHwRgb = nullptr; - } - } - - int getSize() { - return size; - } - - bool isIsused() { - return isused; - } - - void setIsused(bool _isused) { - isused = _isused; - // 更新时间戳 - timestamp = UtilTools::get_cur_time_ms(); - } - - string getId() { - return id; - } - - string getGpuId() { - return gpuid; - } - - unsigned char* getMem(){ - return pHwRgb; - } - - long long getTimesstamp(){ - return timestamp; - } - - int getWidth(){ - return width; - } - - int getHeight(){ - return height; - } - - int getChannel(){ - return channel; - } - -private: - int size; - bool isused; - string id; - string gpuid; - unsigned char * pHwRgb{nullptr}; - long long timestamp; - int width{0}; - int height{0}; - int channel{3}; -}; \ No newline at end of file diff --git a/src/ImageSaveGPU.cpp b/src/ImageSaveGPU.cpp deleted file mode 100644 index 9382a27..0000000 --- a/src/ImageSaveGPU.cpp +++ /dev/null @@ -1,123 +0,0 @@ -#include "cuda_kernels.h" - -#include "logger.hpp" - - -//int saveJPEG(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height) -//{ -// return jpegNPP(szOutputFile, d_srcRGB, img_width, img_height); -// //return 0; -//} -// -//int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height) -//{ -// return jpegNPP(szOutputFile, d_srcRGB, img_width, img_height); -// //return 0; -//} -// -//int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB) -//{ -// return jpegNPP(szOutputFile, d_srcRGB); -//} -// -//int saveJPEG(const char *szOutputFile, float* d_srcRGB) -//{ -// return jpegNPP(szOutputFile, d_srcRGB); -//} - -int resizeFrame(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height) -{ - cudaError_t cudaStatus = cuda_common::ResizeImage(d_srcRGB, src_width, src_height, d_dstRGB, dst_width, dst_height); - if (cudaStatus != cudaSuccess) { - LOG_ERROR("cuda_common::ResizeImage failed: {}",cudaGetErrorString(cudaStatus)); - return -1; - } - - return 0; -} - -//int initTables() -//{ -// initTable(); -// return 0; -//} -// -//int initTables(int flag, int width, int height) -//{ -// initTable(0, width, height); -// return 0; -//} - -int drawImageOnGPU(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom) -{ - cuda_common::DrawImage(d_srcRGB, src_width, src_height, left, top, right, bottom); - return 0; -} - -int drawImageOnGPU(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom) -{ - cuda_common::DrawImage(d_srcRGB, src_width, src_height, left, top, right, bottom); - return 0; -} - -int drawLineOnGPU(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y) -{ - cuda_common::DrawLine(d_srcRGB, src_width, src_height, begin_x, begin_y, end_x, end_y); - return 0; -} - -//int releaseJpegSaver() -//{ -// releaseJpegNPP(); -// return 0; -//} - -int partMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom) -{ - cudaError_t cudaStatus = cuda_common::PartMemCopy(d_srcRGB, src_width, src_height, d_dstRGB, left, top, right, bottom); - if (cudaStatus != cudaSuccess) { - LOG_ERROR("cuda_common::77 PartMemCopy failed: {} {} {} {} {} {} {}",cudaGetErrorString(cudaStatus), left, top, right, bottom, src_height, d_dstRGB); - return -1; - } - - return 0; -} -//#include -//extern std::ofstream g_os; -int PartMemResizeBatch(unsigned char * d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB, - int count, int* vleft, int * vtop, int* vright, int* vbottom, int *dst_w, int *dst_h, - float submeanb, float submeang, float submeanr, - float varianceb, float varianceg, float variancer) -{ - //g_os << "cudaMemcpyHostToDevice begin 9" << std::endl; - cudaError_t cudaStatus = cuda_common::PartMemResizeBatch( - d_srcRGB, src_width, src_height, d_dstRGB, count, vleft, vtop, vright, vbottom, dst_w, dst_h, - submeanb, submeang, submeanr, - varianceb, varianceg, variancer); - //g_os << "cudaMemcpyHostToDevice end 9" << std::endl; - if (cudaStatus != cudaSuccess) { - LOG_ERROR("cuda_common::PartMemResizeBatch failed: {}",cudaGetErrorString(cudaStatus)); - return -1; - } - - return 0; -} - - -//int PartMemResizeBatch(float * d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, -// int count, int* vleft, int * vtop, int* vright, int* vbottom, int dst_w, int dst_h, -// float submeanb, float submeang, float submeanr, -// float varianceb, float varianceg, float variancer) -// -//{ -// cudaError_t cudaStatus = cuda_common::PartMemResizeBatch( -// d_srcRGB, src_width, src_height, d_dstRGB, count, vleft, vtop, vright, vbottom, dst_w, dst_h, -// submeanb, submeang, submeanr, -// varianceb, varianceg, variancer); -// if (cudaStatus != cudaSuccess) { -// fprintf(stderr, "cuda_common::PartMemCopy failed: %s\n", cudaGetErrorString(cudaStatus)); -// return -1; -// } -// -// return 0; -//} \ No newline at end of file diff --git a/src/ImageSaveGPU.h b/src/ImageSaveGPU.h deleted file mode 100644 index 272a6d2..0000000 --- a/src/ImageSaveGPU.h +++ /dev/null @@ -1,65 +0,0 @@ -/******************************************************************************************* -* Version: VPT_x64_V2.0.0_20170904 -* CopyRight: 中科院自动化研究所模式识别实验室图像视频组 -* UpdateDate: 20170904 -* Content: 人车物监测跟踪 -********************************************************************************************/ - -#ifndef IMAGESAVEGPU_H_ -#define IMAGESAVEGPU_H_ - -#ifdef _MSC_VER - #ifdef IMAGESAVEGPU_EXPORTS - #define IMAGESAVEGPU_API __declspec(dllexport) - #else - #define IMAGESAVEGPU_API __declspec(dllimport) - #endif -#else -#define IMAGESAVEGPU_API __attribute__((visibility ("default"))) -#endif -// 功能:保存成jpeg文件 -// szOutputFile 输出图片路径,如D:\\out.jpg -// d_srcRGB 输入RGB数据,由cudaMalloc分配的显存空间,数据排列形式为:BBBBBB......GGGGGG......RRRRRRRR...... -// img_width RGB数据图片的宽度 -// img_height RGB数据图片的高度 -// -//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height); -//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, float* d_srcRGB); -// -//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height); -//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB); - -// 功能:防缩图像 -IMAGESAVEGPU_API int resizeFrame(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height); - -// 功能:部分拷贝数据 -IMAGESAVEGPU_API int partMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom); - -//IMAGESAVEGPU_API int partMemResizeImage(float * d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB, -// int* vleft, int * vtop, int* vright, int* vbottom, int *dst_w, int *dst_h, -// float submeanb, float submeang, float submeanr, -// float varianceb, float varianceg, float variancer); - - -IMAGESAVEGPU_API int PartMemResizeBatch(unsigned char * d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB, - int count, int* vleft, int * vtop, int* vright, int* vbottom, int *dst_w, int *dst_h, - float submeanb, float submeang, float submeanr, - float varianceb, float varianceg, float variancer); - - -//// 功能:初始化GPU保存图像的各种量化表 -//IMAGESAVEGPU_API int initTables(); -//IMAGESAVEGPU_API int initTables(int falg, int width, int height); -// -//// 功能:释放资源 -//IMAGESAVEGPU_API int releaseJpegSaver(); - -// 功能:在GPU中绘制快照包围框 -IMAGESAVEGPU_API int drawImageOnGPU(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom); - -IMAGESAVEGPU_API int drawImageOnGPU(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom); - -// 功能:在GPU中绘制直线 -IMAGESAVEGPU_API int drawLineOnGPU(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y); - -#endif diff --git a/src/Makefile b/src/Makefile index 3da0ec5..2daafeb 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,67 +1,62 @@ -XX = g++ +# 项目根目录 +TOP_DIR:=$(patsubst %/, %, $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) +export TOP_DIR -CUDA_ROOT = /usr/local/cuda-11.1 -NVCC = $(CUDA_ROOT)/bin/nvcc +# 各项目录 +BUILD_DIR:=$(TOP_DIR)/build +BIN_DIR:=$(BUILD_DIR)/bin +export BUILD_DIR - -PROJECT_ROOT= /mnt/data/cmhu/FFNvDecoder +PROJECT_ROOT= /home/huchunming/FFNvDecoder DEPEND_DIR = $(PROJECT_ROOT)/bin SRC_ROOT = $(PROJECT_ROOT)/src THIRDPARTY_ROOT = $(PROJECT_ROOT)/3rdparty - - -TARGET= $(DEPEND_DIR)/lib/test - - SPDLOG_ROOT = $(THIRDPARTY_ROOT)/spdlog-1.9.2/release -JRTP_ROOT = $(THIRDPARTY_ROOT)/jrtp_export - - -INCLUDE= -I $(DEPEND_DIR)/include \ - -I $(CUDA_ROOT)/include \ - -I $(SRC_ROOT)/common/inc \ - -I $(SRC_ROOT)/common/UtilNPP \ - -I $(SRC_ROOT)\ - -I $(SPDLOG_ROOT)/include \ - -I $(SRC_ROOT)/gb28181 \ - -I $(JRTP_ROOT)/jrtplib/include/jrtplib3 \ - -I $(JRTP_ROOT)/jthread/include/jthread -LIBSPATH= -L $(DEPEND_DIR)/lib -lavformat -lavcodec -lswscale -lavutil -lavfilter -lswresample -lavdevice \ - -L $(CUDA_ROOT)/lib64 -lcuda -lcudart -lnvcuvid -lcurand -lcublas -lnvjpeg \ - -L $(SPDLOG_ROOT) -l:libspdlog.a \ - -L $(JRTP_ROOT)/jthread/lib -l:libjthread.a \ - -L $(JRTP_ROOT)/jrtplib/lib -l:libjrtp.a +#编译器 +CXX:=g++ +CXXFLAGS:=-std=c++11 -Wall -Wextra -Og -g +INCS:=-I $(TOP_DIR) \ + -I $(SPDLOG_ROOT)/include \ -CFLAGS= -g -fPIC -O0 $(INCLUDE) -pthread -lrt -lz -std=c++11 -fvisibility=hidden -Wl,-Bsymbolic -ldl - # -DUNICODE -D_UNICODE +MACROS:= +export CXX CXXFLAGS INCS MACROS -NFLAGS_LIB=-g -c -shared -Xcompiler -fPIC -Xcompiler -fvisibility=hidden -NFLAGS = $(NFLAGS_LIB) $(INCLUDE) -std=c++11 +# 链接器 +LD:=g++ +LDFLAGS:= +LIBS:= -L $(SPDLOG_ROOT)/lib -l:libspdlog.a \ -SRCS:=$(wildcard $(SRC_ROOT)/*.cpp) \ - $(wildcard $(SRC_ROOT)/gb28181/*.cpp) -OBJS = $(patsubst %.cpp, %.o, $(notdir $(SRCS))) +# 各个模块 +MODULES:= dvpp interface demo -CU_SOURCES = $(wildcard ${SRC_ROOT}/*.cu) -CU_OBJS = $(patsubst %.cu, %.o, $(notdir $(CU_SOURCES))) +# 各个模块对应的库 +MODULE_LIBS:=$(BUILD_DIR)/dvpp/lib/libdvpp.a\ + $(BUILD_DIR)/interface/lib/interface.a\ +# 最终目标文件 +TARGET:=$(BIN_DIR)/test -$(TARGET):$(OBJS) $(CU_OBJS) - rm -f $(TARGET) - $(XX) -o $@ $^ $(CFLAGS) $(LIBSPATH) $(LIBS) -Wwrite-strings - rm -f *.o +# 默认最终目标 +.PHONY:all +all:$(TARGET) -%.o:$(SRC_ROOT)/%.cpp - $(XX) $(CFLAGS) -c $< +# 最终目标依赖关系 +$(TARGET):FORCE | $(BIN_DIR) + @for n in $(MODULES); do make -s -f $(TOP_DIR)/$$n/Makefile MODULE=$$n || exit "$$?"; done + @echo -e "\e[32m""Linking executable $(TARGET)""\e[0m" +#@$(LD) $(LDFLAGS) -o $@ $(MODULE_LIBS) $(LIBS) -%.o:$(SRC_ROOT)/gb28181/%.cpp - $(XX) $(CFLAGS) -c $< +# 若没有bin目录则自动生成 +$(BIN_DIR): + @mkdir -p $@ -%.o:$(SRC_ROOT)/%.cu - @echo "#######################CU_OBJS:$@###############" - $(NVCC) $(NFLAGS) -o $@ $< +# 强制执行命令 +.PHONY:FORCE +FORCE: +# make clean直接删除整个build目录 +.PHONY:clean clean: - rm -f *.o $(TARGET) \ No newline at end of file + @rm -rf $(BUILD_DIR) diff --git a/src/Makefile.bak b/src/Makefile.bak new file mode 100644 index 0000000..2e225eb --- /dev/null +++ b/src/Makefile.bak @@ -0,0 +1,71 @@ +XX = g++ + +CUDA_ROOT = /usr/local/cuda-11.1 +NVCC = $(CUDA_ROOT)/bin/nvcc + + +PROJECT_ROOT= /home/huchunming/FFNvDecoder + +DEPEND_DIR = $(PROJECT_ROOT)/bin +SRC_ROOT = $(PROJECT_ROOT)/src +THIRDPARTY_ROOT = $(PROJECT_ROOT)/3rdparty + + +TARGET= $(DEPEND_DIR)/lib/test + + +SPDLOG_ROOT = $(THIRDPARTY_ROOT)/spdlog-1.9.2/release +JRTP_ROOT = $(THIRDPARTY_ROOT)/jrtp_export + + +INCLUDE= -I $(DEPEND_DIR)/include \ + -I $(CUDA_ROOT)/include \ + -I $(SRC_ROOT)/common/inc \ + -I $(SRC_ROOT)/common/UtilNPP \ + -I $(SRC_ROOT)\ + -I $(SPDLOG_ROOT)/include \ + -I $(SRC_ROOT)/gb28181 \ + -I $(JRTP_ROOT)/jrtplib/include/jrtplib3 \ + -I $(JRTP_ROOT)/jthread/include/jthread + +LIBSPATH= -L $(DEPEND_DIR)/lib -lavformat -lavcodec -lswscale -lavutil -lavfilter -lswresample -lavdevice \ + -L $(CUDA_ROOT)/lib64 -lcuda -lcudart -lnvcuvid -lcurand -lcublas -lnvjpeg \ + -L $(SPDLOG_ROOT) -l:libspdlog.a \ + -L $(JRTP_ROOT)/jthread/lib -l:libjthread.a \ + -L $(JRTP_ROOT)/jrtplib/lib -l:libjrtp.a + +CFLAGS= -g -fPIC -O0 $(INCLUDE) -pthread -lrt -lz -std=c++11 -fvisibility=hidden -Wl,-Bsymbolic -ldl + # -DUNICODE -D_UNICODE + +NFLAGS_LIB=-g -c -shared -Xcompiler -fPIC -Xcompiler -fvisibility=hidden +NFLAGS = $(NFLAGS_LIB) $(INCLUDE) -std=c++11 + +SRCS:=$(wildcard $(SRC_ROOT)/nvdecoder/*.cpp) \ + $(wildcard $(SRC_ROOT)/gb28181/*.cpp) \ + $(wildcard $(SRC_ROOT)/dvpp/*.cpp) +OBJS = $(patsubst %.cpp, %.o, $(notdir $(SRCS))) + +CU_SOURCES = $(wildcard ${SRC_ROOT}/*.cu) +CU_OBJS = $(patsubst %.cu, %.o, $(notdir $(CU_SOURCES))) + + +$(TARGET):$(OBJS) $(CU_OBJS) + rm -f $(TARGET) + $(XX) -o $@ $^ $(CFLAGS) $(LIBSPATH) $(LIBS) -Wwrite-strings + rm -f *.o + +# %.o:$(SRC_ROOT)/nvdecoder/%.cpp +# $(XX) $(CFLAGS) -c $< + +%.o:$(SRC_ROOT)/gb28181/%.cpp + $(XX) $(CFLAGS) -c $< + +%.o:$(SRC_ROOT)/dvpp/%.cpp + $(XX) $(CFLAGS) -c $< + +%.o:$(SRC_ROOT)/%.cu + @echo "#######################CU_OBJS:$@###############" + $(NVCC) $(NFLAGS) -o $@ $< + +clean: + rm -f *.o $(TARGET) \ No newline at end of file diff --git a/src/Makefile.bak0308 b/src/Makefile.bak0308 new file mode 100644 index 0000000..b8f7d89 --- /dev/null +++ b/src/Makefile.bak0308 @@ -0,0 +1,62 @@ +# 项目根目录 +TOP_DIR:=$(patsubst %/, %, $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) +export TOP_DIR + +# 各项目录 +BUILD_DIR:=$(TOP_DIR)/build +BIN_DIR:=$(BUILD_DIR)/bin +export BUILD_DIR + +PROJECT_ROOT= /home/huchunming/FFNvDecoder + +DEPEND_DIR = $(PROJECT_ROOT)/bin +SRC_ROOT = $(PROJECT_ROOT)/src +THIRDPARTY_ROOT = $(PROJECT_ROOT)/3rdparty +SPDLOG_ROOT = $(THIRDPARTY_ROOT)/spdlog-1.9.2/release + +#编译器 +CXX:=g++ +CXXFLAGS:=-std=c++11 -Wall -Wextra -Og -g +INCS:=-I $(TOP_DIR) \ + -I $(SPDLOG_ROOT)/include \ + +MACROS:= +export CXX CXXFLAGS INCS MACROS + +# 链接器 +LD:=g++ +LDFLAGS:= +LIBS:= -L $(SPDLOG_ROOT)/lib -l:libspdlog.a \ + +# 各个模块 +MODULES:= dvpp interface + +# 各个模块对应的库 +MODULE_LIBS:=$(BUILD_DIR)/dvpp/lib/libdvpp.a\ + $(BUILD_DIR)/interface/lib/interface.a\ + +# 最终目标文件 +TARGET:=$(BIN_DIR)/test + +# 默认最终目标 +.PHONY:all +all:$(TARGET) + +# 最终目标依赖关系 +$(TARGET):FORCE | $(BIN_DIR) + @for n in $(MODULES); do make -s -f $(TOP_DIR)/$$n/Makefile MODULE=$$n || exit "$$?"; done + @echo -e "\e[32m""Linking executable $(TARGET)""\e[0m" +#@$(LD) $(LDFLAGS) -o $@ $(MODULE_LIBS) $(LIBS) + +# 若没有bin目录则自动生成 +$(BIN_DIR): + @mkdir -p $@ + +# 强制执行命令 +.PHONY:FORCE +FORCE: + +# make clean直接删除整个build目录 +.PHONY:clean +clean: + @rm -rf $(BUILD_DIR) diff --git a/src/NV12ToRGB.cu b/src/NV12ToRGB.cu deleted file mode 100644 index 58e1dff..0000000 --- a/src/NV12ToRGB.cu +++ /dev/null @@ -1,345 +0,0 @@ - -#include "cuda_kernels.h" - -#include -#include "common/inc/helper_cuda_drvapi.h" - -typedef unsigned char uint8; -typedef unsigned int uint32; -typedef int int32; - -#define COLOR_COMPONENT_MASK 0x3FF -#define COLOR_COMPONENT_BIT_SIZE 10 - -namespace cuda_common -{ - -#define MUL(x,y) ((x)*(y)) - - __constant__ float constHueColorSpaceMat2[9]; //默认分配到0卡上,未找到分配到指定卡上设置方法,当前也未用到,先注释掉 - - __device__ void YUV2RGB2(uint32 *yuvi, float *red, float *green, float *blue) - { - float luma, chromaCb, chromaCr; - - // Prepare for hue adjustment - luma = (float)yuvi[0]; - chromaCb = (float)((int32)yuvi[1] - 512.0f); - chromaCr = (float)((int32)yuvi[2] - 512.0f); - - - // Convert YUV To RGB with hue adjustment - *red = MUL(luma, constHueColorSpaceMat2[0]) + - MUL(chromaCb, constHueColorSpaceMat2[1]) + - MUL(chromaCr, constHueColorSpaceMat2[2]); - *green = MUL(luma, constHueColorSpaceMat2[3]) + - MUL(chromaCb, constHueColorSpaceMat2[4]) + - MUL(chromaCr, constHueColorSpaceMat2[5]); - *blue = MUL(luma, constHueColorSpaceMat2[6]) + - MUL(chromaCb, constHueColorSpaceMat2[7]) + - MUL(chromaCr, constHueColorSpaceMat2[8]); - - } - - __device__ unsigned char clip_v(int x, int min_val, int max_val) { - if (x>max_val) { - return max_val; - } - else if (x= width) - { - //printf("x >= width\n"); - //*flag = -1; - return; //x = width - 1; - } - //return; //x = width - 1; - - if (y >= height) - { - //printf("y >= height\n"); - //*flag = -1; - return; // y = height - 1; - } - - // Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way. - // if we move to texture we could read 4 luminance values - yuv101010Pel[0] = (srcImageU8[y * processingPitch + x]) << 2; - yuv101010Pel[1] = (srcImageU8[y * processingPitch + x + 1]) << 2; - - uint32 chromaOffset = processingPitch * height; - int32 y_chroma = y >> 1; - - if (y & 1) // odd scanline ? - { - uint32 chromaCb; - uint32 chromaCr; - - chromaCb = srcImageU8[chromaOffset + y_chroma * processingPitch + x]; - chromaCr = srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1]; - - if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically - { - chromaCb = (chromaCb + srcImageU8[chromaOffset + (y_chroma + 1) * processingPitch + x] + 1) >> 1; - chromaCr = (chromaCr + srcImageU8[chromaOffset + (y_chroma + 1) * processingPitch + x + 1] + 1) >> 1; - } - - yuv101010Pel[0] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2)); - yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); - - yuv101010Pel[1] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2)); - yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); - } - else - { - yuv101010Pel[0] |= ((uint32)srcImageU8[chromaOffset + y_chroma * processingPitch + x] << (COLOR_COMPONENT_BIT_SIZE + 2)); - yuv101010Pel[0] |= ((uint32)srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); - - yuv101010Pel[1] |= ((uint32)srcImageU8[chromaOffset + y_chroma * processingPitch + x] << (COLOR_COMPONENT_BIT_SIZE + 2)); - yuv101010Pel[1] |= ((uint32)srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); - } - - // this steps performs the color conversion - uint32 yuvi[6]; - float red[2], green[2], blue[2]; - - yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK); - yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); - yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); - - yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK); - yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); - yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); - - // YUV to RGB Transformation conversion - YUV2RGB2(&yuvi[0], &red[0], &green[0], &blue[0]); - YUV2RGB2(&yuvi[3], &red[1], &green[1], &blue[1]); - - - dstImage[y * width * 3 + x * 3] = clip_v(blue[0] * 0.25,0 ,255); - dstImage[y * width * 3 + x * 3 + 3] = clip_v(blue[1] * 0.25,0, 255); - - dstImage[width * y * 3 + x * 3 + 1] = clip_v(green[0] * 0.25,0 ,255); - dstImage[width * y * 3 + x * 3 + 4] = clip_v(green[1] * 0.25,0, 255); - - dstImage[width * y * 3 + x * 3 + 2] = clip_v(red[0] * 0.25, 0, 255); - dstImage[width * y * 3 + x * 3 + 5] = clip_v(red[1] * 0.25,0 ,255); - - - //dstImage[y * width * 3 + x * 3] = blue[0] * 0.25; - //dstImage[y * width * 3 + x * 3 + 3] = blue[1] * 0.25; - - //dstImage[width * y * 3 + x * 3 + 1] =green[0] * 0.25; - //dstImage[width * y * 3 + x * 3 + 4] = green[1] * 0.25; - - //dstImage[width * y * 3 + x * 3 + 2] = red[0] * 0.25; - //dstImage[width * y * 3 + x * 3 + 5] = red[1] * 0.25; - - // Clamp the results to BBBBBB....GGGGGGG.......RRRRRRR.... - // dstImage[y * width + x] = blue[0] * 0.25; - // dstImage[y * width + x + 1] = blue[1] * 0.25; - - // dstImage[width * height + y * width + x] = green[0] * 0.25; - // dstImage[width * height + y * width + x + 1] = green[1] * 0.25; - - // dstImage[width * height * 2 + y * width + x] = red[0] * 0.25; - // dstImage[width * height * 2 + y * width + x + 1] = red[1] * 0.25; - return; - - } - - // CUDA kernel for outputing the final RGB output from NV12; - extern "C" - __global__ void CUDAToBGR_drvapi(uint32 *dataY, uint32 *dataUV, size_t pitchY, size_t pitchUV, unsigned char *dstImage, int width, int height) - { - - int32 x, y; - - // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread - x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1); - y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x >= width) - { - return; - } - - if (y >= height) - { - return; - } - - uint32 yuv101010Pel[2]; - uint8 *srcImageU8_Y = (uint8 *)dataY; - uint8 *srcImageU8_UV = (uint8 *)dataUV; - - // Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way. - // if we move to texture we could read 4 luminance values - yuv101010Pel[0] = (srcImageU8_Y[y * pitchY + x]) << 2; - yuv101010Pel[1] = (srcImageU8_Y[y * pitchY + x + 1]) << 2; - - int32 y_chroma = y >> 1; - - if (y & 1) // odd scanline ? - { - uint32 chromaCb; - uint32 chromaCr; - - chromaCb = srcImageU8_UV[y_chroma * pitchUV + x]; - chromaCr = srcImageU8_UV[y_chroma * pitchUV + x + 1]; - - if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically - { - chromaCb = (chromaCb + srcImageU8_UV[(y_chroma + 1) * pitchUV + x] + 1) >> 1; - chromaCr = (chromaCr + srcImageU8_UV[(y_chroma + 1) * pitchUV + x + 1] + 1) >> 1; - } - - yuv101010Pel[0] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2)); - yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); - - yuv101010Pel[1] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2)); - yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); - } - else - { - yuv101010Pel[0] |= ((uint32)srcImageU8_UV[y_chroma * pitchUV + x] << (COLOR_COMPONENT_BIT_SIZE + 2)); - yuv101010Pel[0] |= ((uint32)srcImageU8_UV[y_chroma * pitchUV + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); - - yuv101010Pel[1] |= ((uint32)srcImageU8_UV[y_chroma * pitchUV + x] << (COLOR_COMPONENT_BIT_SIZE + 2)); - yuv101010Pel[1] |= ((uint32)srcImageU8_UV[y_chroma * pitchUV + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); - } - - // this steps performs the color conversion - uint32 yuvi[6]; - float red[2], green[2], blue[2]; - - yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK); - yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); - yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); - - yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK); - yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); - yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); - - // YUV to RGB Transformation conversion - YUV2RGB2(&yuvi[0], &red[0], &green[0], &blue[0]); - YUV2RGB2(&yuvi[3], &red[1], &green[1], &blue[1]); - - - dstImage[y * width * 3 + x * 3] = clip_v(blue[0] * 0.25,0 ,255); - dstImage[y * width * 3 + x * 3 + 3] = clip_v(blue[1] * 0.25,0, 255); - - dstImage[width * y * 3 + x * 3 + 1] = clip_v(green[0] * 0.25,0 ,255); - dstImage[width * y * 3 + x * 3 + 4] = clip_v(green[1] * 0.25,0, 255); - - dstImage[width * y * 3 + x * 3 + 2] = clip_v(red[0] * 0.25, 0, 255); - dstImage[width * y * 3 + x * 3 + 5] = clip_v(red[1] * 0.25,0 ,255); - } - - cudaError_t setColorSpace(FF_ColorSpace CSC, float hue) - { - float hueSin = sin(hue); - float hueCos = cos(hue); - - float hueCSC[9]; - if (CSC == ITU_601) - { - //CCIR 601 - hueCSC[0] = 1.1644f; - hueCSC[1] = hueSin * 1.5960f; - hueCSC[2] = hueCos * 1.5960f; - hueCSC[3] = 1.1644f; - hueCSC[4] = (hueCos * -0.3918f) - (hueSin * 0.8130f); - hueCSC[5] = (hueSin * 0.3918f) - (hueCos * 0.8130f); - hueCSC[6] = 1.1644f; - hueCSC[7] = hueCos * 2.0172f; - hueCSC[8] = hueSin * -2.0172f; - } - else if (CSC == ITU_709) - { - //CCIR 709 - hueCSC[0] = 1.0f; - hueCSC[1] = hueSin * 1.57480f; - hueCSC[2] = hueCos * 1.57480f; - hueCSC[3] = 1.0; - hueCSC[4] = (hueCos * -0.18732f) - (hueSin * 0.46812f); - hueCSC[5] = (hueSin * 0.18732f) - (hueCos * 0.46812f); - hueCSC[6] = 1.0f; - hueCSC[7] = hueCos * 1.85560f; - hueCSC[8] = hueSin * -1.85560f; - } - - cudaError_t cudaStatus = cudaMemcpyToSymbol(constHueColorSpaceMat2, hueCSC, 9 * sizeof(float), 0, cudaMemcpyHostToDevice); - float tmpf[9]; - memset(tmpf, 0, 9 * sizeof(float)); - cudaMemcpyFromSymbol(tmpf, constHueColorSpaceMat2, 9 * sizeof(float), 0, ::cudaMemcpyDefault); - cudaDeviceSynchronize(); - - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaMemcpyToSymbol failed: %s\n", cudaGetErrorString(cudaStatus)); - } - - return cudaStatus; - } - - cudaError_t NV12ToRGBnot(CUdeviceptr d_srcNV12, size_t nSourcePitch, unsigned char* d_dstRGB, int width, int height) - { - dim3 block(32, 16, 1); - dim3 grid((width + (2 * block.x - 1)) / (2 * block.x), (height + (block.y - 1)) / block.y, 1); - NV12ToRGB_drvapi2 << < grid, block >> >((uint32 *)d_srcNV12, nSourcePitch, d_dstRGB, width, height); - cudaError_t cudaStatus = cudaGetLastError(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "NV12ToRGB_drvapi launch failed: %s\n", cudaGetErrorString(cudaStatus)); - return cudaStatus; - } - - cudaStatus = cudaDeviceSynchronize(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching NV12ToRGB_drvapi !\n", cudaStatus); - return cudaStatus; - } - - return cudaStatus; - } - - cudaError_t CUDAToBGR(CUdeviceptr dataY, CUdeviceptr dataUV, size_t pitchY, size_t pitchUV, unsigned char* d_dstRGB, int width, int height) - { - dim3 block(32, 16, 1); - dim3 grid((width + (2 * block.x - 1)) / (2 * block.x), (height + (block.y - 1)) / block.y, 1); - CUDAToBGR_drvapi << < grid, block >> >((uint32 *)dataY, (uint32 *)dataUV, pitchY, pitchUV, d_dstRGB, width, height); - cudaError_t cudaStatus = cudaGetLastError(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "NV12ToRGB_drvapi launch failed: %s\n", cudaGetErrorString(cudaStatus)); - return cudaStatus; - } - - cudaStatus = cudaDeviceSynchronize(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching NV12ToRGB_drvapi !\n", cudaStatus); - return cudaStatus; - } - - return cudaStatus; - } -} \ No newline at end of file diff --git a/src/NvJpegEncoder.cpp b/src/NvJpegEncoder.cpp deleted file mode 100644 index 7ee0727..0000000 --- a/src/NvJpegEncoder.cpp +++ /dev/null @@ -1,90 +0,0 @@ -#include "NvJpegEncoder.h" - -#include -#include -#include - - -#define CHECK_NVJPEG(S) do {nvjpegStatus_t status; \ - status = S; \ - if (status != NVJPEG_STATUS_SUCCESS ) std::cout << __LINE__ <<" CHECK_NVJPEG - status = " << status << std::endl; \ - } while (false) - - -int saveJpeg(const char * filepath, unsigned char* d_srcBGR, int width, int height, cudaStream_t stream) -{ - nvjpegHandle_t nvjpeg_handle; - nvjpegEncoderState_t encoder_state; - nvjpegEncoderParams_t encoder_params; - - cudaEvent_t ev_start, ev_end; - cudaEventCreate(&ev_start); - cudaEventCreate(&ev_end); - - nvjpegImage_t input; - nvjpegInputFormat_t input_format = NVJPEG_INPUT_BGRI; - int image_width = width; - int image_height = height; - - // int channel_size = image_width * image_height; - // for (int i = 0; i < 3; i++) - // { - // input.pitch[i] = image_width; - // (cudaMalloc((void**)&(input.channel[i]), channel_size)); - // (cudaMemset(input.channel[i], 50 * 40 * i, channel_size)); - // } - - input.channel[0] = d_srcBGR; - input.pitch[0] = image_width * 3; - - nvjpegBackend_t backend = NVJPEG_BACKEND_DEFAULT; - - CHECK_NVJPEG(nvjpegCreate(backend, nullptr, &nvjpeg_handle)); - - CHECK_NVJPEG(nvjpegEncoderParamsCreate(nvjpeg_handle, &encoder_params, stream)); - CHECK_NVJPEG(nvjpegEncoderStateCreate(nvjpeg_handle, &encoder_state, stream)); - - // set params - CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(encoder_params, nvjpegJpegEncoding_t::NVJPEG_ENCODING_PROGRESSIVE_DCT_HUFFMAN, stream)); - CHECK_NVJPEG(nvjpegEncoderParamsSetOptimizedHuffman(encoder_params, 1, stream)); - CHECK_NVJPEG(nvjpegEncoderParamsSetQuality(encoder_params, 70, stream)); - CHECK_NVJPEG(nvjpegEncoderParamsSetSamplingFactors(encoder_params, nvjpegChromaSubsampling_t::NVJPEG_CSS_420, stream)); - - cudaEventRecord(ev_start); - CHECK_NVJPEG(nvjpegEncodeImage(nvjpeg_handle, encoder_state, encoder_params, &input, input_format, image_width, image_height, stream)); - cudaEventRecord(ev_end); - - std::vector obuffer; - size_t length; - CHECK_NVJPEG(nvjpegEncodeRetrieveBitstream( - nvjpeg_handle, - encoder_state, - NULL, - &length, - stream)); - - obuffer.resize(length); - CHECK_NVJPEG(nvjpegEncodeRetrieveBitstream( - nvjpeg_handle, - encoder_state, - obuffer.data(), - &length, - stream)); - - cudaEventSynchronize(ev_end); - - // 用完销毁,避免显存泄露 - nvjpegEncoderParamsDestroy(encoder_params); - nvjpegEncoderStateDestroy(encoder_state); - nvjpegDestroy(nvjpeg_handle); - - float ms; - cudaEventElapsedTime(&ms, ev_start, ev_end); - // std::cout << "time spend " << ms << " ms" << std::endl; - - std::ofstream outputFile(filepath, std::ios::out | std::ios::binary); - outputFile.write(reinterpret_cast(obuffer.data()), static_cast(length)); - outputFile.close(); - - return 0; -} \ No newline at end of file diff --git a/src/NvJpegEncoder.h b/src/NvJpegEncoder.h deleted file mode 100644 index 3c27ba8..0000000 --- a/src/NvJpegEncoder.h +++ /dev/null @@ -1,3 +0,0 @@ -#include - -int saveJpeg(const char * filepath, unsigned char* d_srcBGR, int width, int height, cudaStream_t stream); \ No newline at end of file diff --git a/src/PartMemCopy.cu b/src/PartMemCopy.cu deleted file mode 100644 index 396765b..0000000 --- a/src/PartMemCopy.cu +++ /dev/null @@ -1,289 +0,0 @@ -#include "cuda_kernels.h" -#include -typedef unsigned char uchar; -typedef unsigned int uint32; -typedef int int32; - -#define MAX_SNAPSHOT_WIDTH 320 -#define MAX_SNAPSHOT_HEIGHT 320 - -namespace cuda_common -{ - __global__ void kernel_memcopy(unsigned char* d_srcRGB, int src_width, int src_height, - unsigned char* d_dstRGB, int left, int top, int right, int bottom) - { - const int dst_x = blockIdx.x * blockDim.x + threadIdx.x; - const int dst_y = blockIdx.y * blockDim.y + threadIdx.y; - const int dst_width = right - left; - const int dst_height = bottom - top; - if (dst_x < dst_width && dst_y < dst_height) - { - int src_x = left + dst_x; - int src_y = top + dst_y; - - //bgr...bgr...bgr... - d_dstRGB[(dst_y*dst_width + dst_x) * 3] = (unsigned char)d_srcRGB[(src_y*src_width + src_x) * 3]; - d_dstRGB[(dst_y*dst_width + dst_x) - * 3 + 1] = (unsigned char)d_srcRGB[(src_y*src_width + src_x) * 3 + 1]; - d_dstRGB[(dst_y*dst_width + dst_x) * 3 + 2] = (unsigned char)d_srcRGB[(src_y*src_width + src_x) * 3 + 2]; - - //bbb...ggg...rrr... - //d_dstRGB[(dst_y*dst_width) + dst_x] = (unsigned char)d_srcRGB[(src_y*src_width) + src_x]; - //d_dstRGB[(dst_width*dst_height) + (dst_y*dst_width) + dst_x] = (unsigned char)d_srcRGB[(src_width*src_height) + (src_y*src_width) + src_x]; - //d_dstRGB[(2 * dst_width*dst_height) + (dst_y*dst_width) + dst_x] = (unsigned char)d_srcRGB[(2 * src_width*src_height) + (src_y*src_width) + src_x]; - - /* memcpy(d_dstRGB + (dst_y*src_width) + dst_x, d_srcRGB + (src_y*src_width) + src_x, sizeof(float)); - memcpy(d_dstRGB + (src_width*src_height) + (dst_y*src_width) + dst_x, d_srcRGB + (src_width*src_height) + (src_y*src_width) + src_x, sizeof(float)); - memcpy(d_dstRGB + (2 * src_width*src_height) + (dst_y*src_width) + dst_x, d_srcRGB + (2 * src_width*src_height) + (src_y*src_width) + src_x, sizeof(float));*/ - } - } - - cudaError_t PartMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom) - { - dim3 block(32, 16, 1); - dim3 grid(((right - left) + (block.x - 1)) / block.x, ((bottom - top) + (block.y - 1)) / block.y, 1); - - kernel_memcopy << < grid, block >> > (d_srcRGB, src_width, src_height, d_dstRGB, left, top, right, bottom); - - cudaError_t cudaStatus = cudaGetLastError(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "Part 50 kernel_memcopy launch failed: %s\n", cudaGetErrorString(cudaStatus)); - return cudaStatus; - } - cudaStatus = cudaDeviceSynchronize(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_bilinear!\n", cudaStatus); - return cudaStatus; - } - return cudaStatus; - } - - - // __global__ void kernel_memcopy_mean_variance(float* d_srcRGB, int src_width, int src_height, - // unsigned char* vd_dstRGB, int count, int * vleft, int* vtop, int* vright, int * vbottom, float submeanb,float submeang, float submeanr, float varianceb,float varianceg, float variancer) - // { - // const int dst_x = blockIdx.x * blockDim.x + threadIdx.x; - // const int dst_y = blockIdx.y * blockDim.y + threadIdx.y; - // for (int i=0;i srcimg_width - 2) - { - ax = srcimg_width - 2; - } - if (ay < 0) { - ay = 0; - } - if (ay > srcimg_height - 2) - { - ay = srcimg_height - 2; - } - - int A = ax + ay*srcimg_width; - int B = ax + ay*srcimg_width + 1; - int C = ax + ay*srcimg_width + srcimg_width; - int D = ax + ay*srcimg_width + srcimg_width + 1; - - float w1, w2, w3, w4; - w1 = fx - ax; - w2 = 1 - w1; - w3 = fy - ay; - w4 = 1 - w3; - float blue = src_img[A * 3] * w2*w4 + src_img[B * 3] * w1*w4 + src_img[C * 3] * w2*w3 + src_img[D * 3] * w1*w3; - float green = src_img[A * 3 + 1] * w2*w4 + src_img[B * 3 + 1] * w1*w4 - + src_img[C * 3 + 1] * w2*w3 + src_img[D * 3 + 1] * w1*w3; - float red = src_img[A * 3 + 2] * w2*w4 + src_img[B * 3 + 2] * w1*w4 - + src_img[C * 3 + 2] * w2*w3 + src_img[D * 3 + 2] * w1*w3; - - /*dst_img[(dst_y * dst_width + dst_x) * 3] = (unsigned char)(blue - submeanb)*varianceb; - dst_img[(dst_y * dst_width + dst_x) * 3 + 1] =(unsigned char) (green - submeang)*varianceg; - dst_img[(dst_y * dst_width + dst_x) * 3 + 2] = (unsigned char) (red - submeanr)*variancer;*/ - - if (blue < 0) - blue = 0; - else if (blue > 255) - blue = 255; - - if (green < 0) - green = 0; - else if (green > 255) - green = 255; - - if (red < 0) - red = 0; - else if (red > 255) - red = 255; - - dst_img[(dst_y * cur_dst_width + dst_x) * 3] = (unsigned char)blue; - dst_img[(dst_y * cur_dst_width + dst_x) * 3 + 1] = (unsigned char)green; - dst_img[(dst_y * cur_dst_width + dst_x) * 3 + 2] = (unsigned char)red; - - - /*if (src_img[(dst_y * dst_width + dst_x) * 3] < 0) - src_img[(dst_y * dst_width + dst_x) * 3] = 0; - else if (src_img[(dst_y * dst_width + dst_x) * 3] > 255) - src_img[(dst_y * dst_width + dst_x) * 3] = 255; - - if (src_img[(dst_y * dst_width + dst_x) * 3 + 1] < 0) - src_img[(dst_y * dst_width + dst_x) * 3 + 1] = 0; - else if (src_img[(dst_y * dst_width + dst_x) * 3 + 1] > 255) - src_img[(dst_y * dst_width + dst_x) * 3 + 1] = 255; - - if (src_img[(dst_y * dst_width + dst_x) * 3 + 2] < 0) - src_img[(dst_y * dst_width + dst_x) * 3 + 2] = 0; - else if (src_img[(dst_y * dst_width + dst_x) * 3 + 2] > 255) - src_img[(dst_y * dst_width + dst_x) * 3 + 2] = 255; - - - dst_img[(dst_y * dst_width + dst_x) * 3] = (unsigned char)src_img[(dst_y * dst_width + dst_x) * 3]; - dst_img[(dst_y * dst_width + dst_x) * 3 + 1] = (unsigned char)src_img[(dst_y * dst_width + dst_x) * 3 + 1]; - dst_img[(dst_y * dst_width + dst_x) * 3 + 2] = (unsigned char)src_img[(dst_y * dst_width + dst_x) * 3 + 2];*/ - } - } - } - - cudaError_t PartMemResizeBatch(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB, int count, int* left, int* top, int* right, int* bottom, int *dst_w, int *dst_h, float submeanb, float submeang, float submeanr, - float varianceb, float varianceg, float variancer) - { - /* cudaEvent_t start, stop; - float time; - cudaEventCreate(&start); - cudaEventCreate(&stop); - cudaEventRecord(start, 0);*/ - - dim3 block(32, 16, 1); - dim3 grid((*std::max_element(dst_w, dst_w+ count) + (block.x - 1)) / block.x, (*std::max_element(dst_h, dst_h + count) + (block.y - 1)) / block.y, count); - - int * gpu_left; - cudaMalloc(&gpu_left, 1000 * sizeof(int)); - cudaMemcpy(gpu_left, left, count * sizeof(int), cudaMemcpyHostToDevice); - - int * gpu_right; - cudaMalloc(&gpu_right, 1000 * sizeof(int)); - cudaMemcpy(gpu_right, right, count * sizeof(int), cudaMemcpyHostToDevice); - - int * gpu_top; - cudaMalloc(&gpu_top, 1000 * sizeof(int)); - cudaMemcpy(gpu_top, top, count * sizeof(int), cudaMemcpyHostToDevice); - - int * gpu_bottom; - cudaMalloc(&gpu_bottom, 1000 * sizeof(int)); - cudaMemcpy(gpu_bottom, bottom, count * sizeof(int), cudaMemcpyHostToDevice); - - int * gpu_dst_w; - cudaMalloc(&gpu_dst_w, 1000 * sizeof(int)); - cudaMemcpy(gpu_dst_w, dst_w, count * sizeof(int), cudaMemcpyHostToDevice); - - int * gpu_dst_h; - cudaMalloc(&gpu_dst_h, 1000 * sizeof(int)); - cudaMemcpy(gpu_dst_h, dst_h, count * sizeof(int), cudaMemcpyHostToDevice); - - unsigned char** gpu_dst_rgb; - cudaMalloc(&gpu_dst_rgb, 1000 * sizeof(unsigned char*)); - cudaMemcpy(gpu_dst_rgb, d_dstRGB, count * sizeof(unsigned char*), cudaMemcpyHostToDevice); - - //cudaMemcpy(cpu_personfloat, d_srcRGB, 112*224*2*sizeof(float), cudaMemcpyDeviceToHost); - // for(int i=0;i<100;i++) - // { - // printf("the score is %f\t",cpu_personfloat[i]); - // } - PartCopy_ResizeImgBilinearBGR_Mean_Variance_CUDAKernel << < grid, block >> > ( - d_srcRGB, src_width, src_height, - gpu_left, gpu_top, gpu_right, gpu_bottom, - gpu_dst_rgb, count, gpu_dst_w, gpu_dst_h, - submeanb, submeang, submeanr, - varianceb, varianceg, variancer); - cudaFree(gpu_top); - cudaFree(gpu_bottom); - cudaFree(gpu_left); - cudaFree(gpu_right); - cudaFree(gpu_dst_w); - cudaFree(gpu_dst_h); - cudaFree(gpu_dst_rgb); - - cudaError_t cudaStatus = cudaGetLastError(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "Part 270 kernel_memcopy launch failed: %s\n", cudaGetErrorString(cudaStatus)); - return cudaStatus; - } - cudaStatus = cudaDeviceSynchronize(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_bilinear!\n", cudaStatus); - return cudaStatus; - } - - /*cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - cudaEventElapsedTime(&time, start, stop); - cudaEventDestroy(start); - cudaEventDestroy(stop); - printf("˺ʱ:%f\n", time);*/ - - return cudaStatus; - } - -} \ No newline at end of file diff --git a/src/RGB2YUV.cu b/src/RGB2YUV.cu deleted file mode 100644 index 7202c3a..0000000 --- a/src/RGB2YUV.cu +++ /dev/null @@ -1,263 +0,0 @@ - - -#include "cuda_kernels.h" - -typedef unsigned char uint8; -typedef unsigned int uint32; -typedef int int32; - -namespace cuda_common -{ - __device__ unsigned char clip_value(unsigned char x, unsigned char min_val, unsigned char max_val){ - if (x>max_val){ - return max_val; - } - else if (x= src_width) - return; //x = width - 1; - - if (y >= src_height) - return; // y = height - 1; - - int B = src_img[y * src_width * 3 + x * 3]; - int G = src_img[y * src_width * 3 + x * 3 + 1]; - int R = src_img[y * src_width * 3 + x * 3 + 2]; - - /*int B = src_img[y * src_width + x]; - int G = src_img[src_width * src_height + y * src_width + x]; - int R = src_img[src_width * src_height * 2 + y * src_width + x];*/ - - Y[y * yPitch + x] = clip_value((unsigned char)(0.299 * R + 0.587 * G + 0.114 * B), 0, 255); - u[y * src_width + x] = clip_value((unsigned char)(-0.147 * R - 0.289 * G + 0.436 * B + 128), 0, 255); - v[y * src_width + x] = clip_value((unsigned char)(0.615 * R - 0.515 * G - 0.100 * B + 128), 0, 255); - - //Y[y * yPitch + x] = clip_value((unsigned char)(0.257 * R + 0.504 * G + 0.098 * B + 16), 0, 255); - //u[y * src_width + x] = clip_value((unsigned char)(-0.148 * R - 0.291 * G + 0.439 * B + 128), 0, 255); - //v[y * src_width + x] = clip_value((unsigned char)(0.439 * R - 0.368 * G - 0.071 * B + 128), 0, 255); - } - - __global__ void kernel_rgb2yuv(float *src_img, unsigned char* Y, unsigned char* u, unsigned char* v, - int src_width, int src_height, size_t yPitch) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x >= src_width) - return; //x = width - 1; - - if (y >= src_height) - return; // y = height - 1; - - float B = src_img[y * src_width + x]; - float G = src_img[src_width * src_height + y * src_width + x]; - float R = src_img[src_width * src_height * 2 + y * src_width + x]; - - Y[y * yPitch + x] = clip_value((unsigned char)(0.299 * R + 0.587 * G + 0.114 * B), 0, 255); - u[y * src_width + x] = clip_value((unsigned char)(-0.147 * R - 0.289 * G + 0.436 * B + 128), 0, 255); - v[y * src_width + x] = clip_value((unsigned char)(0.615 * R - 0.515 * G - 0.100 * B + 128), 0, 255); - - //Y[y * yPitch + x] = clip_value((unsigned char)(0.257 * R + 0.504 * G + 0.098 * B + 16), 0, 255); - //u[y * src_width + x] = clip_value((unsigned char)(-0.148 * R - 0.291 * G + 0.439 * B + 128), 0, 255); - //v[y * src_width + x] = clip_value((unsigned char)(0.439 * R - 0.368 * G - 0.071 * B + 128), 0, 255); - } - - extern "C" - __global__ void kernel_resize_UV(unsigned char* src_img, unsigned char *dst_img, - int src_width, int src_height, int dst_width, int dst_height, int nPitch) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x >= dst_width) - return; //x = width - 1; - - if (y >= dst_height) - return; // y = height - 1; - - float fx = (x + 0.5)*src_width / (float)dst_width - 0.5; - float fy = (y + 0.5)*src_height / (float)dst_height - 0.5; - int ax = floor(fx); - int ay = floor(fy); - if (ax < 0) - { - ax = 0; - } - else if (ax > src_width - 2) - { - ax = src_width - 2; - } - - if (ay < 0){ - ay = 0; - } - else if (ay > src_height - 2) - { - ay = src_height - 2; - } - - int A = ax + ay*src_width; - int B = ax + ay*src_width + 1; - int C = ax + ay*src_width + src_width; - int D = ax + ay*src_width + src_width + 1; - - float w1, w2, w3, w4; - w1 = fx - ax; - w2 = 1 - w1; - w3 = fy - ay; - w4 = 1 - w3; - - unsigned char val = src_img[A] * w2*w4 + src_img[B] * w1*w4 + src_img[C] * w2*w3 + src_img[D] * w1*w3; - - dst_img[y * nPitch + x] = clip_value(val,0,255); - } - - cudaError_t RGB2YUV(float* d_srcRGB, int src_width, int src_height, - unsigned char* Y, size_t yPitch, int yWidth, int yHeight, - unsigned char* U, size_t uPitch, int uWidth, int uHeight, - unsigned char* V, size_t vPitch, int vWidth, int vHeight) - { - unsigned char * u ; - unsigned char * v ; - - cudaError_t cudaStatus; - - cudaStatus = cudaMalloc((void**)&u, src_width * src_height * sizeof(unsigned char)); - cudaStatus = cudaMalloc((void**)&v, src_width * src_height * sizeof(unsigned char)); - - dim3 block(32, 16, 1); - dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1); - dim3 grid1((uWidth + (block.x - 1)) / block.x, (uHeight + (block.y - 1)) / block.y, 1); - dim3 grid2((vWidth + (block.x - 1)) / block.x, (vHeight + (block.y - 1)) / block.y, 1); - - kernel_rgb2yuv << < grid, block >> >(d_srcRGB, Y, u, v, src_width, src_height, yPitch); - - cudaStatus = cudaGetLastError(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "kernel_rgb2yuv launch failed: %s\n", cudaGetErrorString(cudaStatus)); - goto Error; - } - - cudaStatus = cudaDeviceSynchronize(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_rgb2yuv!\n", cudaStatus); - goto Error; - } - - kernel_resize_UV << < grid1, block >> >(u, U, src_width, src_height, uWidth, uHeight, uPitch); - - cudaStatus = cudaGetLastError(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus)); - goto Error; - } - - cudaStatus = cudaDeviceSynchronize(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus); - goto Error; - } - - kernel_resize_UV << < grid2, block >> >(v, V, src_width, src_height, vWidth, vHeight, vPitch); - - cudaStatus = cudaGetLastError(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus)); - goto Error; - } - - cudaStatus = cudaDeviceSynchronize(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus); - goto Error; - } - -Error : - cudaFree(u); - cudaFree(v); - - return cudaStatus; - } - - - - cudaError_t RGB2YUV(unsigned char* d_srcRGB, int src_width, int src_height, - unsigned char* Y, size_t yPitch, int yWidth, int yHeight, - unsigned char* U, size_t uPitch, int uWidth, int uHeight, - unsigned char* V, size_t vPitch, int vWidth, int vHeight) - { - unsigned char * u; - unsigned char * v; - - cudaError_t cudaStatus; - - cudaStatus = cudaMalloc((void**)&u, src_width * src_height * sizeof(unsigned char)); - cudaStatus = cudaMalloc((void**)&v, src_width * src_height * sizeof(unsigned char)); - - dim3 block(32, 16, 1); - dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1); - dim3 grid1((uWidth + (block.x - 1)) / block.x, (uHeight + (block.y - 1)) / block.y, 1); - dim3 grid2((vWidth + (block.x - 1)) / block.x, (vHeight + (block.y - 1)) / block.y, 1); - - kernel_rgb2yuv << < grid, block >> >(d_srcRGB, Y, u, v, src_width, src_height, yPitch); - - cudaStatus = cudaGetLastError(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "kernel_rgb2yuv launch failed: %s\n", cudaGetErrorString(cudaStatus)); - goto Error; - } - - cudaStatus = cudaDeviceSynchronize(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_rgb2yuv!\n", cudaStatus); - goto Error; - } - - kernel_resize_UV << < grid1, block >> >(u, U, src_width, src_height, uWidth, uHeight, uPitch); - - cudaStatus = cudaGetLastError(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus)); - goto Error; - } - - cudaStatus = cudaDeviceSynchronize(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus); - goto Error; - } - - kernel_resize_UV << < grid2, block >> >(v, V, src_width, src_height, vWidth, vHeight, vPitch); - - cudaStatus = cudaGetLastError(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus)); - goto Error; - } - - cudaStatus = cudaDeviceSynchronize(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus); - goto Error; - } - - Error: - cudaFree(u); - cudaFree(v); - - return cudaStatus; - } -} - diff --git a/src/ResizeImage.cu b/src/ResizeImage.cu deleted file mode 100644 index fdc6961..0000000 --- a/src/ResizeImage.cu +++ /dev/null @@ -1,84 +0,0 @@ -#include "cuda_kernels.h" - -typedef unsigned char uchar; -typedef unsigned int uint32; -typedef int int32; - -namespace cuda_common -{ - __global__ void kernel_bilinear(float *src_img, float *dst_img, - int src_width, int src_height, int dst_width, int dst_height) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x < dst_width && y < dst_height) - { - float fx = (x + 0.5)*src_width / (float)dst_width - 0.5; - float fy = (y + 0.5)*src_height / (float)dst_height - 0.5; - int ax = floor(fx); - int ay = floor(fy); - if (ax < 0) - { - ax = 0; - } - else if (ax > src_width - 2) - { - ax = src_width - 2; - } - - if (ay < 0){ - ay = 0; - } - else if (ay > src_height - 2) - { - ay = src_height - 2; - } - - int A = ax + ay*src_width; - int B = ax + ay*src_width + 1; - int C = ax + ay*src_width + src_width; - int D = ax + ay*src_width + src_width + 1; - - float w1, w2, w3, w4; - w1 = fx - ax; - w2 = 1 - w1; - w3 = fy - ay; - w4 = 1 - w3; - - float blue = src_img[A] * w2*w4 + src_img[B] * w1*w4 + src_img[C] * w2*w3 + src_img[D] * w1*w3; - - float green = src_img[src_width * src_height + A] * w2*w4 + src_img[src_width * src_height + B] * w1*w4 - + src_img[src_width * src_height + C] * w2*w3 + src_img[src_width * src_height + D] * w1*w3; - - float red = src_img[src_width * src_height * 2 + A] * w2*w4 + src_img[src_width * src_height * 2 + B] * w1*w4 - + src_img[src_width * src_height * 2 + C] * w2*w3 + src_img[src_width * src_height * 2 + D] * w1*w3; - - dst_img[y * dst_width + x] = blue; - dst_img[dst_width * dst_height + y * dst_width + x] = green; - dst_img[dst_width * dst_height * 2 + y * dst_width + x] = red; - } - } - - cudaError_t ResizeImage(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height) - { - dim3 block(32, 16, 1); - dim3 grid((dst_width + (block.x - 1)) / block.x, (dst_height + (block.y - 1)) / block.y, 1); - - kernel_bilinear << < grid, block >> >(d_srcRGB, d_dstRGB, src_width, src_height, dst_width, dst_height); - - cudaError_t cudaStatus = cudaGetLastError(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "kernel_bilinear launch failed: %s\n", cudaGetErrorString(cudaStatus)); - return cudaStatus; - } - - cudaStatus = cudaDeviceSynchronize(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_bilinear!\n", cudaStatus); - return cudaStatus; - } - - return cudaStatus; - } -} \ No newline at end of file diff --git a/src/cuda_kernels.h b/src/cuda_kernels.h deleted file mode 100644 index cd1eb00..0000000 --- a/src/cuda_kernels.h +++ /dev/null @@ -1,63 +0,0 @@ -#pragma once -#include "cuda_runtime.h" -#include "device_launch_parameters.h" - -#include -#include - -#include -#include - -#include - -typedef enum -{ - ITU_601 = 1, - ITU_709 = 2 -} FF_ColorSpace; - -namespace cuda_common -{ - cudaError_t setColorSpace(FF_ColorSpace CSC, float hue); - - cudaError_t NV12ToRGBnot(CUdeviceptr d_srcNV12, size_t nSourcePitch, unsigned char* d_dstRGB, int width, int height); - cudaError_t CUDAToBGR(CUdeviceptr dataY, CUdeviceptr dataUV, size_t pitchY, size_t pitchUV, unsigned char* d_dstRGB, int width, int height); - - - cudaError_t ResizeImage(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height); - - cudaError_t RGB2YUV(float* d_srcRGB, int src_width, int src_height, - unsigned char* Y, size_t yPitch, int yWidth, int yHeight, - unsigned char* U, size_t uPitch, int uWidth, int uHeight, - unsigned char* V, size_t vPitch, int vWidth, int vHeight); - - cudaError_t RGB2YUV(unsigned char* d_srcRGB, int src_width, int src_height, - unsigned char* Y, size_t yPitch, int yWidth, int yHeight, - unsigned char* U, size_t uPitch, int uWidth, int uHeight, - unsigned char* V, size_t vPitch, int vWidth, int vHeight); - - cudaError_t PartMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom); - // cudaError_t PartMemResize(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int left, int top, int right, int bottom); - - cudaError_t PartMemResizeBatch(unsigned char* d_srcRGB, int srcimg_width, int srcimg_height, unsigned char** d_dstRGB, int count, - int* left, int* top, int* right, int* bottom, int *dst_w, int *dst_h, - float submeanb, float submeang, float submeanr, - float varianceb, float varianceg, float variancer); - - cudaError_t DrawImage(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom); - cudaError_t DrawImage(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom); - - cudaError_t DrawLine(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y); -} - - -int jpegNPP(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height); -int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height); - -int jpegNPP(const char *szOutputFile, float* d_srcRGB); -int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB); - -int initTable(); -int initTable(int flag, int width, int height); -int releaseJpegNPP(); - diff --git a/src/define.hpp b/src/define.hpp deleted file mode 100644 index 26fcc61..0000000 --- a/src/define.hpp +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once - -#include - -#define __FILENAME__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) - - -#define CHECK_CUDA(call) \ -{\ - const cudaError_t error_code = call;\ - if (cudaSuccess != error_code)\ - LOG_ERROR("CUDA error, code: {} reason: {}", error_code, cudaGetErrorString(error_code));\ -} \ No newline at end of file diff --git a/src/demo/Makefile b/src/demo/Makefile new file mode 100644 index 0000000..25c49e5 --- /dev/null +++ b/src/demo/Makefile @@ -0,0 +1,61 @@ +XX = g++ + + +PROJECT_ROOT= /home/huchunming/FFNvDecoder + +DEPEND_DIR = $(PROJECT_ROOT)/bin +SRC_ROOT = $(PROJECT_ROOT)/src +THIRDPARTY_ROOT = $(PROJECT_ROOT)/3rdparty + + +TARGET= /home/huchunming/FFNvDecoder/src/build/bin/demo + + +SPDLOG_ROOT = $(THIRDPARTY_ROOT)/spdlog-1.9.2/release +JRTP_ROOT = $(THIRDPARTY_ROOT)/jrtp_export + + +include_dir=-I/usr/local/Ascend/ascend-toolkit/6.3.RC1.alpha001/runtime/include +lib_dir=-L/usr/lib \ + -L/usr/local/lib \ + -L/usr/local/Ascend/driver/lib64 \ + -L/usr/local/Ascend/ascend-toolkit/6.3.RC1.alpha001/atc/lib64\ + -L/usr/local/Ascend/ascend-toolkit/6.3.RC1.alpha001/runtime/lib64 \ + -L/usr/local/Ascend/ascend-toolkit/6.3.RC1.alpha001/runtime/lib64/stub \ + -L/usr/local/Ascend/ascend-toolkit/6.3.RC1.alpha001/lib64 \ + -L/usr/local/Ascend/driver/lib64/driver + +lib=-lacl_dvpp -lascendcl -lmmpa -lglog -lgflags -lpthread -lz -lacl_dvpp_mpi -lruntime -lascendalog -lc_sec -lmsprofiler -lgert -lge_executor -lge_common \ + -lgraph -lascend_protobuf -lprofapi -lerror_manager -lexe_graph -lregister -lplatform + +DEFS = -DENABLE_DVPP_INTERFACE + +INCLUDE= -I $(SRC_ROOT)/interface \ + -I $(SRC_ROOT)/dvpp \ + +LIBSPATH= + +LIBS= -lavformat -lavcodec -lswscale -lavutil -lavfilter -lswresample -lavdevice + +CXXFLAGS= -g -O0 -fPIC $(INCLUDE) $(include_dir) $(DEFS) -lpthread -lrt -lz -fexceptions -std=c++11 -fvisibility=hidden -Wl,-Bsymbolic -ldl + +SRCS:=$(wildcard $(SRC_ROOT)/demo/*.cpp) +OBJS = $(patsubst %.cpp, %.o, $(notdir $(SRCS))) + +OBJ_ROOT = /home/huchunming/FFNvDecoder/src/build +DVPP_SRCS:=$(wildcard $(OBJ_ROOT)/dvpp/obj/*.o) +INTEFACE_SRCS:=$(wildcard $(OBJ_ROOT)/interface/obj/*.o) + + +$(TARGET):$(OBJS) $(INTEFACE_SRCS) $(DVPP_SRCS) + rm -f $(TARGET) + @echo -e "\e[33m""Building object $@""\e[0m" + $(XX) -o $@ $^ $(CXXFLAGS) $(LIBS) $(lib_dir) $(lib) -Wwrite-strings + rm -f *.o + +%.o:$(SRC_ROOT)/demo/%.cpp + $(XX) $(CFLAGS) -c $< + + +clean: + rm -f *.o $(TARGET) \ No newline at end of file diff --git a/src/demo/Makefile.BK0308 b/src/demo/Makefile.BK0308 new file mode 100644 index 0000000..e096cc9 --- /dev/null +++ b/src/demo/Makefile.BK0308 @@ -0,0 +1,43 @@ +XX = g++ + + +PROJECT_ROOT= /home/huchunming/FFNvDecoder + +DEPEND_DIR = $(PROJECT_ROOT)/bin +SRC_ROOT = $(PROJECT_ROOT)/src +THIRDPARTY_ROOT = $(PROJECT_ROOT)/3rdparty + + +TARGET= /home/huchunming/FFNvDecoder/src/build/bin/test + + +SPDLOG_ROOT = $(THIRDPARTY_ROOT)/spdlog-1.9.2/release +JRTP_ROOT = $(THIRDPARTY_ROOT)/jrtp_export + + +INCLUDE= -I $(SRC_ROOT)/interface \ + +LIBSPATH= -L /home/huchunming/FFNvDecoder/src/build/interface/lib -l:interface.a \ + -L /home/huchunming/FFNvDecoder/src/build/dvpp/lib -l:libdvpp.a \ + + +LIBS= -lavformat -lavcodec -lswscale -lavutil -lavfilter -lswresample -lavdevice + +CFLAGS= -g -fPIC -O0 $(INCLUDE) -pthread -lrt -lz -std=c++11 -fvisibility=hidden -Wl,-Bsymbolic -ldl + # -DUNICODE -D_UNICODE + +SRCS:=$(wildcard $(SRC_ROOT)/demo/*.cpp) +OBJS = $(patsubst %.cpp, %.o, $(notdir $(SRCS))) + + +$(TARGET):$(OBJS) $(CU_OBJS) + rm -f $(TARGET) + $(XX) -o $@ $^ $(CFLAGS) $(LIBSPATH) $(LIBS) -Wwrite-strings + rm -f *.o + +%.o:$(SRC_ROOT)/demo/%.cpp + $(XX) $(CFLAGS) -c $< + + +clean: + rm -f *.o $(TARGET) \ No newline at end of file diff --git a/src/demo/main_dvpp.cpp b/src/demo/main_dvpp.cpp new file mode 100644 index 0000000..6a9e8e3 --- /dev/null +++ b/src/demo/main_dvpp.cpp @@ -0,0 +1,349 @@ +#include +#include +#include +#include +#include + + +#ifdef _WIN32 +#include "Winsock2.h" +#pragma comment(lib, "ws2_32.lib") +#endif + +#ifdef __linux__ +#include "arpa/inet.h" +#endif + +#include "../interface/FFNvDecoderManager.h" +#include "../interface/utiltools.hpp" + +#define MIN_RTP_PORT 10000 +#define MAX_RTP_PORT 60000 + +// ȡ MIN_RTP_PORT(10000)~MAX_RTP_PORT(60000)֮�������˿�(ż���������������˿ڿ���) +int allocRtpPort() { + + static int s_rtpPort = MIN_RTP_PORT; + if (MIN_RTP_PORT == s_rtpPort) + { + srand((unsigned int)time(NULL)); + s_rtpPort = MIN_RTP_PORT + (rand() % MIN_RTP_PORT); + } + + if (s_rtpPort % 2) + ++s_rtpPort; + + while (true) + { + s_rtpPort += 2; + s_rtpPort = s_rtpPort >= MAX_RTP_PORT ? MIN_RTP_PORT : s_rtpPort; + + int i = 0; + for (; i < 2; i++) + { + sockaddr_in sRecvAddr; + int s = socket(AF_INET, SOCK_DGRAM, 0); + + sRecvAddr.sin_family = AF_INET; + sRecvAddr.sin_addr.s_addr = htonl(INADDR_ANY); + sRecvAddr.sin_port = htons(s_rtpPort + i); + + int nResult = bind(s, (sockaddr *)&sRecvAddr, sizeof(sRecvAddr)); + if (nResult != 0) + { + break; + } + + nResult = close(s); + if (nResult != 0) + { + printf("closesocket failed:%d\n", nResult); + break; + } + } + + if (i == 2) + break; + } + + return s_rtpPort; +} + + + + + +unsigned char *pHwRgb[2] = {nullptr, nullptr}; + +int sum1 = 0; +int sum2 = 0; + + +string data_home = "/mnt/data/cmhu/tmp/"; + + + + +/** + * 注意: gpuFrame 在解码器设置的显卡上,后续操作要十分注意这一点,尤其是多线程情况 + * */ +void postDecoded(const void * userPtr, DeviceRgbMemory* devFrame){ + AbstractDecoder* decoder = (AbstractDecoder*)userPtr; + if (decoder!= nullptr) + { + // cout << "decode name: " << decoder->getName() << endl; + + // const char* gpu_pixfmt = av_get_pix_fmt_name((AVPixelFormat)gpuFrame->format); + // cout << "pixfmt: " << gpu_pixfmt << endl; + // cout << "keyframe: " << gpuFrame->key_frame << " width: " << gpuFrame->width << " height: "<< gpuFrame->height << endl; + // cout << "decode successed ✿✿ヽ(°▽°)ノ✿ " << endl; + + int sum = sum1; + if (decoder->getName() == "dec0") + { + sum1 ++ ; + sum = sum1; + } else if (decoder->getName() == "dec2") + { + sum2 ++ ; + sum = sum2; + } + } +} + +long start_time = 0; +long end_time = 0; +bool count_flag = false; +int count = 0; +int count_std = 100; + + +static int sum = 0; +unsigned char *pHwData = nullptr; + +void postDecoded0(const void * userPtr, DeviceRgbMemory* devFrame){ + // std::this_thread::sleep_for(std::chrono::milliseconds(30000)); + + AbstractDecoder* decoder = (AbstractDecoder*)userPtr; + if (decoder!= nullptr) + { + // cout << "decode name: " << decoder->getName() << endl; + if (decoder->getName() == "dec") + { + if (! count_flag) + { + count_flag = true; + count = 0; + end_time = start_time = UtilTools::get_cur_time_ms(); + } + count++; + sum ++ ; + if (count >= count_std) + { + // end_time = UtilTools::get_cur_time_ms(); + // long time_using = end_time - start_time; + // double time_per_frame = double(time_using)/count_std ; + // cout << count_std << "帧用时:" << time_using << "ms 每帧用时:" << time_per_frame << "ms" << endl; + cout << decoder->getName() << " keyframe: " << devFrame->isKeyFrame() << " width: " << devFrame->getWidth() << " height: "<< devFrame->getHeight() << endl; + // cout << gpuFrame->pts << endl; + + count_flag = false; + } + // cout << "帧数:" << sum << endl; + } + } +} + +void decode_finished_cbk(const void* userPtr){ + cout << "当前时间戳: " << UtilTools::get_cur_time_ms() << endl; +} + +bool decode_request_stream_cbk(const char* deviceId){ + cout << "需在此请求流" << endl; + return true; +} + +// string test_uri = "rtmp://192.168.10.56:1935/objecteye/1"; +// string test_uri = "/home/cmhu/data/output_800x480.mp4"; +// string test_uri = "/home/cmhu/data/output_1920x1080.mp4"; +// string test_uri = "rtsp://176.10.0.2:8554/stream"; +// string test_uri = "/mnt/f/fiss/test_data/h265.mp4"; +// string test_uri = "rtsp://176.10.0.4:8554/stream"; +string test_uri = "rtsp://admin:admin@123456@192.168.60.176:554/cam/realmonitor?channel=1&subtype=0"; + +void createDecode(int index, const char* gpu_id){ + FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); + MgrDecConfig config; + config.name = "dec" + to_string(index); + config.cfg.uri = test_uri; + config.cfg.post_decoded_cbk = postDecoded; + config.cfg.decode_finished_cbk = decode_finished_cbk; + config.cfg.force_tcp = true; + config.dec_type = DECODER_TYPE_FFMPEG; + + config.cfg.gpuid = gpu_id; + // if (index % 2 == 0) + // { + // config.cfg.gpuid = "0"; + // } + // else + // { + // config.cfg.gpuid = "0"; + // } + + AbstractDecoder* decoder = pDecManager->createDecoder(config); + if (!decoder) + { + return ; + } + pDecManager->setPostDecArg(config.name, decoder); + pDecManager->setFinishedDecArg(config.name, decoder); + pDecManager->startDecodeByName(config.name); +} + +void createGB28181Decode(int index, char* gpu_id, int port){ + FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); + MgrDecConfig config; + config.name = "dec" + to_string(index); + config.cfg.uri = config.name; + config.cfg.post_decoded_cbk = postDecoded; + config.cfg.decode_finished_cbk = decode_finished_cbk; + config.cfg.request_stream_cbk = decode_request_stream_cbk; + config.cfg.force_tcp = true; + + config.dec_type = DECODER_TYPE_GB28181; + config.cfg.port = port;//allocRtpPort(); + + config.cfg.gpuid = gpu_id; + + AbstractDecoder* decoder = pDecManager->createDecoder(config); + if (!decoder) + { + return ; + } + pDecManager->setPostDecArg(config.name, decoder); + pDecManager->setFinishedDecArg(config.name, decoder); + pDecManager->startDecodeByName(config.name); +} + +void createDvppDecoder(int index, char* devId, int channelId){ + FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); + MgrDecConfig config; + config.name = "dec" + to_string(index); + config.cfg.uri = test_uri; + config.cfg.post_decoded_cbk = postDecoded; + config.cfg.decode_finished_cbk = decode_finished_cbk; + config.cfg.force_tcp = true; + config.dec_type = DECODER_TYPE_DVPP; + + config.cfg.gpuid = devId; + + AbstractDecoder* decoder = pDecManager->createDecoder(config); + if (!decoder) + { + return ; + } + pDecManager->setPostDecArg(config.name, decoder); + pDecManager->setFinishedDecArg(config.name, decoder); + pDecManager->startDecodeByName(config.name); +} + +void logFF(void *, int level, const char *fmt, va_list ap) +{ + vfprintf(stdout, fmt, ap); +} + + +int main(int argc, char* argv[]){ + + test_uri = argv[1]; + char* gpuid = argv[2]; + int port = atoi(argv[3]); + cout << test_uri << " gpu_id:" << gpuid << " port:" << port << endl; + + // av_log_set_callback(&logFF); + + // CheckCUDAProperty(atoi(gpuid)); + + pthread_t m_decode_thread; + pthread_create(&m_decode_thread,0, + [](void* arg) + { + // cudaSetDevice(atoi(gpuid)); + while (true) + { + std::this_thread::sleep_for(std::chrono::minutes(1)); + FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); + int count = pDecManager->count(); + cout << "当前时间:" << UtilTools::get_cur_time_ms() << " 当前运行路数: " << pDecManager->count() << endl; + } + + return (void*)0; + } + ,nullptr); + + + FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); + int i = 0; + + createDvppDecoder(i, gpuid, 0); + + while (true) + { + int ch = getchar(); + if (ch == 'q') + { + break; + } + + switch (ch) + { + case 'f': + case 'F': + createDecode(i, gpuid); + i++; + break; + case 'g': + case 'G': + createGB28181Decode(i, gpuid, port); + i++; + break; + case 'd': + case 'D': + createDvppDecoder(i, gpuid, 0); + i++; + break; + case 'r': + case 'R': + pDecManager->resumeDecoder("dec0"); + break; + case 'p': + case 'P': + pDecManager->pauseDecoder("dec0"); + break; + + case 'c': + case 'C': + i--; + pDecManager->closeDecoderByName("dec" + to_string(i)); + break; + + case 'i': + case 'I': + { + int w,h; + pDecManager->getResolution("dec0", w,h); + printf( "%s : %dx%d\n", "dec0" , w,h ); + } + break; + + default: + break; + } + + /* code */ + } + + cout << "总共帧数:" << sum << endl; + pDecManager->closeAllDecoder(); +} \ No newline at end of file diff --git a/src/demo/main_nvdec.cpp1 b/src/demo/main_nvdec.cpp1 new file mode 100644 index 0000000..be0094d --- /dev/null +++ b/src/demo/main_nvdec.cpp1 @@ -0,0 +1,452 @@ +// #include "FFNvDecoderManager.h" +// #include + +// #include "cuda_kernels.h" + +// #include "NvJpegEncoder.h" + +// #include +// #include + +// #include + +// #include + + +// #ifdef _WIN32 +// #include "Winsock2.h" +// #pragma comment(lib, "ws2_32.lib") +// #endif + +// #ifdef __linux__ +// #include "arpa/inet.h" +// #endif + +// #include "utiltools.hpp" + +// #define MIN_RTP_PORT 10000 +// #define MAX_RTP_PORT 60000 + +// // ȡ MIN_RTP_PORT(10000)~MAX_RTP_PORT(60000)֮�������˿�(ż���������������˿ڿ���) +// int allocRtpPort() { + +// static int s_rtpPort = MIN_RTP_PORT; +// if (MIN_RTP_PORT == s_rtpPort) +// { +// srand((unsigned int)time(NULL)); +// s_rtpPort = MIN_RTP_PORT + (rand() % MIN_RTP_PORT); +// } + +// if (s_rtpPort % 2) +// ++s_rtpPort; + +// while (true) +// { +// s_rtpPort += 2; +// s_rtpPort = s_rtpPort >= MAX_RTP_PORT ? MIN_RTP_PORT : s_rtpPort; + +// int i = 0; +// for (; i < 2; i++) +// { +// sockaddr_in sRecvAddr; +// int s = socket(AF_INET, SOCK_DGRAM, 0); + +// sRecvAddr.sin_family = AF_INET; +// sRecvAddr.sin_addr.s_addr = htonl(INADDR_ANY); +// sRecvAddr.sin_port = htons(s_rtpPort + i); + +// int nResult = bind(s, (sockaddr *)&sRecvAddr, sizeof(sRecvAddr)); +// if (nResult != 0) +// { +// break; +// } + +// nResult = close(s); +// if (nResult != 0) +// { +// printf("closesocket failed:%d\n", nResult); +// break; +// } +// } + +// if (i == 2) +// break; +// } + +// return s_rtpPort; +// } + + + + + +// unsigned char *pHwRgb[2] = {nullptr, nullptr}; + +// int sum1 = 0; +// int sum2 = 0; + +// cudaStream_t stream[2]; + +// string data_home = "/mnt/data/cmhu/tmp/"; + + +// #define checkCudaErrors(S) do {CUresult status; \ +// status = S; \ +// if (status != CUDA_SUCCESS ) std::cout << __LINE__ <<" checkCudaErrors - status = " << status << std::endl; \ +// } while (false) + + +// static void gpu_helper(int gpuid) +// { +// cudaSetDevice(gpuid); + +// // int *dn; +// // cudaMalloc((void **)&dn, 1 * sizeof(int)); + +// size_t free_byte; +// size_t total_byte; + +// CUresult cuda_status = cuMemGetInfo(&free_byte, &total_byte); + +// const char *pStr = nullptr; +// if (CUDA_SUCCESS != cuda_status) { +// cuGetErrorString(cuda_status, &pStr); +// printf("Error: cudaMemGetInfo fails, %s \n", pStr); +// return; +// } + +// double free_db = (double)free_byte; +// double total_db = (double)total_byte; +// double used_db_1 = (total_db - free_db) / 1024.0 / 1024.0; + +// std::cout <<"显存已使用 " << used_db_1 << " MB\n"; + +// // cudaFree(dn); +// } + +// int CheckCUDAProperty( int devId ) +// { +// cuInit(0); + +// CUdevice dev = devId; +// size_t memSize = 0; +// char devName[256] = {0}; +// int major = 0, minor = 0; +// CUresult rlt = CUDA_SUCCESS; + +// rlt = cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, dev); +// checkCudaErrors( rlt ); + +// rlt = cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, dev); +// checkCudaErrors( rlt ); + +// rlt = cuDeviceGetName( devName, sizeof( devName ), dev ); +// checkCudaErrors( rlt ); + +// printf( "Using GPU Device %d: %s has SM %d.%d compute capability\n", +// dev, devName, major, minor ); + +// rlt = cuDeviceTotalMem( &memSize, dev ); +// checkCudaErrors( rlt ); + +// printf( "Total amount of global memory: %4.4f MB\n", +// (float)memSize / ( 1024 * 1024 ) ); + +// return 0; +// } + +// /** +// * 注意: gpuFrame 在解码器设置的显卡上,后续操作要十分注意这一点,尤其是多线程情况 +// * */ +// void postDecoded(const void * userPtr, AVFrame * gpuFrame){ +// AbstractDecoder* decoder = (AbstractDecoder*)userPtr; +// if (decoder!= nullptr) +// { +// // cout << "decode name: " << decoder->getName() << endl; + +// // const char* gpu_pixfmt = av_get_pix_fmt_name((AVPixelFormat)gpuFrame->format); +// // cout << "pixfmt: " << gpu_pixfmt << endl; +// // cout << "keyframe: " << gpuFrame->key_frame << " width: " << gpuFrame->width << " height: "<< gpuFrame->height << endl; +// // cout << "decode successed ✿✿ヽ(°▽°)ノ✿ " << endl; + +// int sum = sum1; +// if (decoder->getName() == "dec0") +// { +// sum1 ++ ; +// sum = sum1; + +// if (gpuFrame->format == AV_PIX_FMT_CUDA) +// { +// // cout << "gpuid = " << atoi(decoder->m_cfg.gpuid.c_str()) << endl; +// cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str())); +// cudaError_t cudaStatus; +// if(pHwRgb[0] == nullptr){ +// // cudaStreamCreate(&stream[0]); +// cuda_common::setColorSpace( ITU_709, 0 ); +// cudaStatus = cudaMalloc((void **)&pHwRgb[0], 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char)); +// } +// cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwRgb[0], gpuFrame->width, gpuFrame->height); +// cudaDeviceSynchronize(); +// if (cudaStatus != cudaSuccess) { +// cout << "CUDAToBGR failed !!!" << endl; +// return; +// } + +// string path = data_home + decoder->getName() + ".jpg"; +// saveJpeg(path.c_str(), pHwRgb[0], gpuFrame->width, gpuFrame->height, stream[0]); // 验证 CUDAToRGB +// } +// } else if (decoder->getName() == "dec2") +// { +// sum2 ++ ; +// sum = sum2; + +// if (gpuFrame->format == AV_PIX_FMT_CUDA) +// { +// // cout << "gpuid = " << atoi(decoder->m_cfg.gpuid.c_str()) << endl; +// cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str())); +// cudaError_t cudaStatus; +// if(pHwRgb[1] == nullptr){ +// // cudaStreamCreate(&stream[1]); +// cuda_common::setColorSpace( ITU_709, 0 ); +// cudaStatus = cudaMalloc((void **)&pHwRgb[1], 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char)); +// } +// cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwRgb[1], gpuFrame->width, gpuFrame->height); +// cudaDeviceSynchronize(); +// if (cudaStatus != cudaSuccess) { +// cout << "CUDAToBGR failed !!!" << endl; +// return; +// } + +// string path = data_home + decoder->getName() + ".jpg"; +// saveJpeg(path.c_str(), pHwRgb[1], gpuFrame->width, gpuFrame->height, stream[1]); // 验证 CUDAToRGB +// } +// } +// } +// } + +// long start_time = 0; +// long end_time = 0; +// bool count_flag = false; +// int count = 0; +// int count_std = 100; + + +// static int sum = 0; +// unsigned char *pHwData = nullptr; + +// void postDecoded0(const void * userPtr, AVFrame * gpuFrame){ +// // std::this_thread::sleep_for(std::chrono::milliseconds(30000)); + +// AbstractDecoder* decoder = (AbstractDecoder*)userPtr; +// if (decoder!= nullptr) +// { +// // cout << "decode name: " << decoder->getName() << endl; +// if (decoder->getName() == "dec") +// { +// if (! count_flag) +// { +// count_flag = true; +// count = 0; +// end_time = start_time = UtilTools::get_cur_time_ms(); +// } +// count++; +// sum ++ ; +// if (count >= count_std) +// { +// // end_time = UtilTools::get_cur_time_ms(); +// // long time_using = end_time - start_time; +// // double time_per_frame = double(time_using)/count_std ; +// // cout << count_std << "帧用时:" << time_using << "ms 每帧用时:" << time_per_frame << "ms" << endl; +// cout << decoder->getName() << " keyframe: " << gpuFrame->key_frame << " width: " << gpuFrame->width << " height: "<< gpuFrame->height << endl; +// // cout << gpuFrame->pts << endl; + +// count_flag = false; +// } +// // cout << "帧数:" << sum << endl; + +// if (gpuFrame->format == AV_PIX_FMT_CUDA) +// { +// cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str())); +// // cout << "gpu id : " << decoder->m_cfg.gpuid.c_str() << endl; +// cudaError_t cudaStatus; +// if(pHwData == nullptr){ +// cuda_common::setColorSpace( ITU_709, 0 ); +// cudaStatus = cudaMalloc((void **)&pHwData, 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char)); +// } +// cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwData, gpuFrame->width, gpuFrame->height); +// cudaDeviceSynchronize(); +// if (cudaStatus != cudaSuccess) { +// cout << "CUDAToBGR failed !!!" << endl; +// return; +// } + +// string path = data_home + decoder->getName() + ".jpg"; +// saveJpeg(path.c_str(), pHwData, gpuFrame->width, gpuFrame->height, nullptr); // 验证 CUDAToRGB +// } +// } +// } +// } + +// void decode_finished_cbk(const void* userPtr){ +// cout << "当前时间戳: " << UtilTools::get_cur_time_ms() << endl; +// } + +// bool decode_request_stream_cbk(const char* deviceId){ +// cout << "需在此请求流" << endl; +// return true; +// } + +// // string test_uri = "rtmp://192.168.10.56:1935/objecteye/1"; +// // string test_uri = "/home/cmhu/data/output_800x480.mp4"; +// // string test_uri = "/home/cmhu/data/output_1920x1080.mp4"; +// // string test_uri = "rtsp://176.10.0.2:8554/stream"; +// // string test_uri = "/mnt/f/fiss/test_data/h265.mp4"; +// string test_uri = "rtsp://176.10.0.4:8554/stream"; + +// void createDecode(int index, const char* gpu_id){ +// FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); +// MgrDecConfig config; +// config.name = "dec" + to_string(index); +// config.cfg.uri = test_uri; +// config.cfg.post_decoded_cbk = postDecoded; +// config.cfg.decode_finished_cbk = decode_finished_cbk; +// config.cfg.force_tcp = true; +// config.dec_type = DECODER_TYPE_FFMPEG; + +// config.cfg.gpuid = gpu_id; +// // if (index % 2 == 0) +// // { +// // config.cfg.gpuid = "0"; +// // } +// // else +// // { +// // config.cfg.gpuid = "0"; +// // } + +// AbstractDecoder* decoder = pDecManager->createDecoder(config); +// if (!decoder) +// { +// return ; +// } +// pDecManager->setPostDecArg(config.name, decoder); +// pDecManager->setFinishedDecArg(config.name, decoder); +// pDecManager->startDecodeByName(config.name); +// } + +// void createGB28181Decode(int index, char* gpu_id, int port){ +// FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); +// MgrDecConfig config; +// config.name = "dec" + to_string(index); +// config.cfg.uri = config.name; +// config.cfg.post_decoded_cbk = postDecoded; +// config.cfg.decode_finished_cbk = decode_finished_cbk; +// config.cfg.request_stream_cbk = decode_request_stream_cbk; +// config.cfg.force_tcp = true; + +// config.dec_type = DECODER_TYPE_GB28181; +// config.cfg.port = port;//allocRtpPort(); + +// config.cfg.gpuid = gpu_id; + +// AbstractDecoder* decoder = pDecManager->createDecoder(config); +// if (!decoder) +// { +// return ; +// } +// pDecManager->setPostDecArg(config.name, decoder); +// pDecManager->setFinishedDecArg(config.name, decoder); +// pDecManager->startDecodeByName(config.name); +// } + +// void logFF(void *, int level, const char *fmt, va_list ap) +// { +// vfprintf(stdout, fmt, ap); +// } + + +// int main(int argc, char* argv[]){ + +// test_uri = "rtsp://admin:admin@123456@192.168.60.176:554/cam/realmonitor?channel=1&subtype=0";//argv[1]; +// char* gpuid = argv[2]; +// int port = atoi(argv[3]); +// cout << test_uri << " gpu_id:" << gpuid << " port:" << port << endl; + +// // av_log_set_callback(&logFF); + +// CheckCUDAProperty(atoi(gpuid)); + +// pthread_t m_decode_thread; +// pthread_create(&m_decode_thread,0, +// [](void* arg) +// { +// // cudaSetDevice(atoi(gpuid)); +// while (true) +// { +// std::this_thread::sleep_for(std::chrono::minutes(1)); +// FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); +// int count = pDecManager->count(); +// cout << "当前时间:" << UtilTools::get_cur_time_ms() << " 当前运行路数: " << pDecManager->count() << endl; +// } + +// return (void*)0; +// } +// ,nullptr); + + +// FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); +// int i = 0; + +// while (true) +// { +// int ch = getchar(); +// if (ch == 'q') +// { +// break; +// } + +// switch (ch) +// { +// case 'f': +// case 'F': +// createDecode(i, gpuid); +// i++; +// break; +// case 'g': +// case 'G': +// createGB28181Decode(i, gpuid, port); +// i++; +// break; +// case 'r': +// case 'R': +// pDecManager->resumeDecoder("dec0"); +// break; +// case 'p': +// case 'P': +// pDecManager->pauseDecoder("dec0"); +// break; + +// case 'c': +// case 'C': +// i--; +// pDecManager->closeDecoderByName("dec" + to_string(i)); +// break; + +// case 'i': +// case 'I': +// { +// int w,h; +// pDecManager->getResolution("dec0", w,h); +// printf( "%s : %dx%d\n", "dec0" , w,h ); +// } +// break; + +// default: +// break; +// } + +// /* code */ +// } + +// cout << "总共帧数:" << sum << endl; +// pDecManager->closeAllDecoder(); +// } \ No newline at end of file diff --git a/src/dvpp/CircularQueue.hpp b/src/dvpp/CircularQueue.hpp new file mode 100644 index 0000000..368291c --- /dev/null +++ b/src/dvpp/CircularQueue.hpp @@ -0,0 +1,138 @@ +#ifndef __CIRCULAR_QUEUE_HPP__ +#define __CIRCULAR_QUEUE_HPP__ + +#include +#include +#include +#include + +using namespace std; + + +// 循环队列 +template +class CircularQueue +{ +private: + /* data */ +public: + CircularQueue(); + ~CircularQueue(); + + bool init(vector data); + T getTail(); + void addTail(); + T deQueue(); + T getHead(); + void addHead(); + void clearQueue(); + + int length(); + bool isEmpty(); + +private: + vector base; + atomic front; + atomic rear; + mutex m_mutex; + int max_size; +}; + + +template +CircularQueue::CircularQueue() +{ + front = rear = 0;//头指针和尾指针置为零,队列为空 +} + +template +CircularQueue::~CircularQueue() +{ + base.clear(); + rear = front = 0; +} + +template +bool CircularQueue::init(vector data){ + base = data; + front = rear = 0;//头指针和尾指针置为零,队列为空 + max_size = data.size(); + + return true; +} + +//循环队列的入队 +template +T CircularQueue::getTail() +{ + std::lock_guard l(m_mutex); + //插入一个元素e为Q的新的队尾元素 + if ((rear + 1) % max_size == front) + return nullptr;//队满 + return base[rear];//获取队尾元素 +} + +// 将队尾元素添加到队列中 +template +void CircularQueue::addTail() +{ + std::lock_guard l(m_mutex); + rear = (rear + 1) % max_size;//队尾指针加1 +} + +//循环队列的出队 +template +T CircularQueue::deQueue() +{ + std::lock_guard l(m_mutex); + //删除Q的队头元素,用e返回其值 + if (front == rear) + return nullptr;//队空 + T e = base[front];//保存队头元素 + front = (front + 1) % max_size;//队头指针加1 + return e; +} + +//取循环队列的队头元素 +template +T CircularQueue::getHead() +{ + std::lock_guard l(m_mutex); + //返回Q的队头元素,不修改队头指针 + if (front == rear) + return nullptr;//队列为空,取元素失败 + return base[front]; +} + +template +void CircularQueue::addHead() +{ + std::lock_guard l(m_mutex); + front = (front + 1) % max_size;//队头指针加1 +} + +template +int CircularQueue::length() +{ + std::lock_guard l(m_mutex); + return (rear - front + max_size) % max_size; +} + +template +bool CircularQueue::isEmpty() +{ + std::lock_guard l(m_mutex); + if (front == rear) + return true; + + return false; +} + +template +void CircularQueue::clearQueue() +{ + std::lock_guard l(m_mutex); + rear = front = 0; +} + +#endif \ No newline at end of file diff --git a/src/dvpp/DvppDec.cpp b/src/dvpp/DvppDec.cpp new file mode 100644 index 0000000..dfea4be --- /dev/null +++ b/src/dvpp/DvppDec.cpp @@ -0,0 +1,421 @@ +#include "DvppDec.h" +#include "DvppSourceManager.h" + +#define CHECK_AND_RETURN(ret, message) \ + if(ret != 0) {cout << "device: " << m_dvpp_deviceId << ", chn: " << m_dvpp_channel << ", ret: " << ret << ", [ERROR] " << message; return ret;} +#define CHECK_NOT_RETURN(ret, message) \ + if(ret != 0) {cout << "device: " << m_dvpp_deviceId << ", chn: " << m_dvpp_channel << ", ret: " << ret << ", [ERROR] " << message;} +#define CHECK_AND_RETURN_NOVALUE(ret, message) \ + if(ret != 0) {cout << "device: " << m_dvpp_deviceId << ", chn: " << m_dvpp_channel << ", ret: " << ret << ", [ERROR] " << message; return;} + +struct Vdec_CallBack_UserData { + uint64_t frameId; + long startTime; + long sendTime; + // void* vdecOutputBuf; + DvppDec* self; + shared_ptr inBufNode; + Vdec_CallBack_UserData() { + frameId = 0; + } +}; + +#ifdef TEST_DECODER +static void *vdecHostAddr = nullptr; +#endif + +static const int g_pkt_size = 1024 * 1024; + + DvppDec::DvppDec(){ + m_decode_thread = 0; + } + + DvppDec::~DvppDec(){ + + } + + bool DvppDec::init_vdpp(DvppDecConfig cfg){ + cout << "Init device....\n"; + + m_dvpp_deviceId = atoi(cfg.dev_id.c_str()); + + if(cfg.codec_id == 0){ + // 66:Baseline,77:Main,>=100:High + if(cfg.profile == 77){ + enType = H264_MAIN_LEVEL; + }else if(cfg.profile < 77){ + enType = H264_BASELINE_LEVEL; + }else{ + enType = H264_HIGH_LEVEL; + } + }else if(cfg.codec_id == 1){ + // h265只有main + enType = H265_MAIN_LEVEL; + }else { + cout << "codec_id is not supported!" << endl; + return false; + } + + post_decoded_cbk = cfg.post_decoded_cbk; + m_pktQueueptr = cfg.pktQueueptr; + + // DvppSourceManager 创建时包含 aclInit,析构时包含 aclFinalize + DvppSourceManager* pSrcMgr = DvppSourceManager::getInstance(); + m_context = pSrcMgr->getContext(m_dvpp_deviceId); + m_dvpp_channel = pSrcMgr->getChannel(m_dvpp_deviceId); + if(m_dvpp_channel < 0){ + cout << "该设备channel已经用完了" << endl; + return false; + } + + cout << "devProgram start, device: " << m_dvpp_deviceId << endl; + int ret = aclrtSetCurrentContext(m_context); + if (ret != ACL_ERROR_NONE) { + cout << "aclrtSetCurrentContext failed" << endl; + return false; + } + + // queue_size 最小应大于16,否则关键帧之间距离太远的时候会导致回调函数与循环队列卡死 + for (size_t i = 0; i < 20; i++){ + void *vdecInputbuf = nullptr; + int ret = acldvppMalloc((void **)&vdecInputbuf, g_pkt_size); + if(ret != ACL_ERROR_NONE){ + cout << "acldvppMalloc failed" << endl; + return false;; + } + m_vec_vdec.push_back(vdecInputbuf); + } + + if(!m_vdecQueue.init(m_vec_vdec)){ + return false; + } + + ret = picConverter.init(m_context); + if(!ret){ + picConverter.release(); + } + + m_vdec_out_size = cfg.width * cfg.height * 3 / 2; + m_dec_name = cfg.dec_name; + + cout << "init vdpp success!" << endl; + return true; +} + +bool DvppDec::start(){ + m_bRunning = true; + + pthread_create(&m_decode_thread,0, + [](void* arg) + { + DvppDec* a=(DvppDec*)arg; + a->decode_thread(); + return (void*)0; + } + ,this); + + return true; +} + +static void *ReportThd(void *arg) +{ + DvppDec *self = (DvppDec *)arg; + if(nullptr != self){ + self->doProcessReport(); + } + return (void *)0; +} + +void DvppDec::doProcessReport(){ + // aclrtContext thdContext = nullptr; + // CHECK_AND_RETURN_NOVALUE(aclrtCreateContext(&thdContext, m_dvpp_deviceId), "aclrtCreateContext failed"); + + CHECK_AND_RETURN_NOVALUE(aclrtSetCurrentContext(m_context), "aclrtSetCurrentContext failed"); + // 阻塞等待vdec线程开始 + + int ret; + while (!m_bExitReportThd) { + ret = aclrtProcessReport(1000); + if (ret != ACL_ERROR_NONE) { + cout << "device: " << m_dvpp_deviceId << ", chn: " << m_dvpp_channel << ", aclrtProcessReport failed, ret: " << ret << endl; + } + } + + // CHECK_AND_RETURN_NOVALUE(aclrtDestroyContext(thdContext), "aclrtDestroyContext failed"); +} + +static int count_frame = 0; +static long lastts = 0; +static void VdecCallback(acldvppStreamDesc *input, acldvppPicDesc *output, void *pUserData) +{ + cout << "VdecCallback: " << UtilTools::get_cur_time_ms() - lastts << endl; + lastts = UtilTools::get_cur_time_ms(); + + Vdec_CallBack_UserData *userData = (Vdec_CallBack_UserData *) pUserData; + DvppDec* self = userData->self; + if(self != nullptr){ + + self->doVdppVdecCallBack(input, output); + } + + delete userData; + userData = nullptr; +} + +void DvppDec::doVdppVdecCallBack(acldvppStreamDesc *input, acldvppPicDesc *output){ + + CHECK_AND_RETURN_NOVALUE(aclrtSetCurrentContext(m_context), "aclrtSetCurrentContext failed"); + + void *inputDataDev = acldvppGetStreamDescData(input); + void *outputDataDev = acldvppGetPicDescData(output); + uint32_t outputSize = acldvppGetPicDescSize(output); + uint32_t width = acldvppGetPicDescWidth(output); + uint32_t height = acldvppGetPicDescHeight(output); + + cout << "width = " << width << " height = " << height << " data_size:" << outputSize << endl; + + if (!m_bPause) + { + DvppRgbMemory* rgbMem = picConverter.convert2bgr(output, width, height, false); + post_decoded_cbk(m_postDecArg, rgbMem); +#ifdef TEST_DECODER + if(rgbMem != nullptr){ + // D2H + if(vdecHostAddr == nullptr){ + CHECK_NOT_RETURN(aclrtMallocHost(&vdecHostAddr, width * height * 3), "aclrtMallocHost failed"); + } + uint32_t data_size = rgbMem->getSize(); + CHECK_AND_RETURN_NOVALUE(aclrtMemcpy(vdecHostAddr, data_size, rgbMem->getMem(), data_size, ACL_MEMCPY_DEVICE_TO_HOST), "D2H aclrtMemcpy failed"); + + // 保存vdec结果 + if(count_frame > 45 && count_frame < 50) + { + string file_name = "./yuv_pic/vdec_out"+ m_dec_name +".rgb" ; + FILE *outputFile = fopen(file_name.c_str(), "a"); + if(outputFile){ + fwrite(vdecHostAddr, data_size, sizeof(char), outputFile); + fclose(outputFile); + } + } + else if(count_frame > 50 && vdecHostAddr != nullptr){ + CHECK_NOT_RETURN(aclrtFreeHost(vdecHostAddr), "aclrtFreeHost failed"); + vdecHostAddr = nullptr; + } + count_frame++; + } +#endif + + }else{ + std::this_thread::sleep_for(std::chrono::milliseconds(3)); + } + + acldvppFree((uint8_t*)outputDataDev); + outputDataDev = nullptr; + + m_vdecQueue.addHead(); + + CHECK_AND_RETURN_NOVALUE(acldvppDestroyStreamDesc(input), "acldvppDestroyStreamDesc failed"); + CHECK_AND_RETURN_NOVALUE(acldvppDestroyPicDesc(output), "acldvppDestroyPicDesc failed"); + + cout << "callback exit." << endl; +} + +void DvppDec::close(){ + m_bRunning=false; + + if(m_decode_thread != 0){ + pthread_join(m_decode_thread,0); + } +} + +bool DvppDec::sendVdecEos(aclvdecChannelDesc *vdecChannelDesc){ + // create stream desc + acldvppStreamDesc *streamInputDesc = acldvppCreateStreamDesc(); + if (streamInputDesc == nullptr) { + cout << "fail to create input stream desc" << endl; + return false; + } + aclError ret = acldvppSetStreamDescEos(streamInputDesc, 1); + if (ret != ACL_SUCCESS) { + cout << "fail to set eos for stream desc, errorCode = " << static_cast(ret) << endl; + (void)acldvppDestroyStreamDesc(streamInputDesc); + return false; + } + + // send vdec eos frame. when all vdec callback are completed, aclvdecSendFrame can be returned. + cout << "send eos" << endl; + ret = aclvdecSendFrame(vdecChannelDesc, streamInputDesc, nullptr, nullptr, nullptr); + if (ret != ACL_SUCCESS) { + cout << "fail to send eos frame, ret=" << ret << endl; + (void)acldvppDestroyStreamDesc(streamInputDesc); + return false; + } + (void)acldvppDestroyStreamDesc(streamInputDesc); + + return true; +} + +void DvppDec::releaseResource(){ + + for(int i = 0; i < m_vec_vdec.size(); i++){ + if(m_vec_vdec[i] != nullptr){ + acldvppFree((uint8_t*)m_vec_vdec[i]); + m_vec_vdec[i] = nullptr; + } + } + m_vec_vdec.clear(); + + DvppSourceManager* pSrcMgr = DvppSourceManager::getInstance(); + pSrcMgr->releaseChannel(m_dvpp_deviceId, m_dvpp_channel); +} + +void DvppDec::decode_thread(){ + + long startTime = UtilTools::get_cur_time_ms(); + + int ret = -1; + + // dvpp解码参数 + CHECK_AND_RETURN_NOVALUE(aclrtSetCurrentContext(m_context), "aclrtSetCurrentContext failed"); + + pthread_t report_thread; + ret = pthread_create(&report_thread, nullptr, ReportThd, (void *)this); + if(ret != 0){ + cout << "pthread_create failed" << endl; + return; + } + + // 创建aclvdecChannelDesc类型的数据 + aclvdecChannelDesc *vdecChannelDesc = aclvdecCreateChannelDesc(); + if (vdecChannelDesc == nullptr) { + cout << "aclvdecCreateChannelDesc failed"; + return; + } + // 创建 channel dec结构体 + // 通道ID在dvpp层面为0~31 + CHECK_AND_RETURN_NOVALUE(aclvdecSetChannelDescChannelId(vdecChannelDesc, m_dvpp_channel), "aclvdecSetChannelDescChannelId failed"); + CHECK_AND_RETURN_NOVALUE(aclvdecSetChannelDescThreadId(vdecChannelDesc, report_thread), "aclvdecSetChannelDescThreadId failed"); + CHECK_AND_RETURN_NOVALUE(aclvdecSetChannelDescCallback(vdecChannelDesc, VdecCallback), "aclvdecSetChannelDescCallback failed"); + CHECK_AND_RETURN_NOVALUE(aclvdecSetChannelDescEnType(vdecChannelDesc, enType), "aclvdecSetChannelDescEnType failed"); + CHECK_AND_RETURN_NOVALUE(aclvdecSetChannelDescOutPicFormat(vdecChannelDesc, PIXEL_FORMAT_YUV_SEMIPLANAR_420), "aclvdecSetChannelDescOutPicFormat failed"); + CHECK_AND_RETURN_NOVALUE(aclvdecCreateChannel(vdecChannelDesc), "aclvdecCreateChannel failed"); + + uint64_t frame_count = 0; + bool bBreak = false; + while (m_bRunning) + { + int ret = sentFrame(vdecChannelDesc, frame_count); + if(ret == 2){ + break; + bBreak = true; + }else if(ret == 1){ + continue; + } + + frame_count++; + } + + // 尽量保证数据全部解码完成 + // int sum = 0; + // if(!bBreak){ + // while(!m_pktQueueptr->isEmpty()){ + // int ret = sentFrame(vdecChannelDesc, frame_count); + // if(ret == 2){ + // break; + // } + // sum++; + // if(sum > 10){ + // // 避免卡死 + // break; + // } + // } + // } + + + sendVdecEos(vdecChannelDesc); + + CHECK_NOT_RETURN(aclvdecDestroyChannel(vdecChannelDesc), "aclvdecDestroyChannel failed"); + CHECK_NOT_RETURN(aclvdecDestroyChannelDesc(vdecChannelDesc), "aclvdecDestroyChannelDesc failed"); + + // report_thread 需后于destroy退出 + m_bRunning = false; + m_bExitReportThd = true; + CHECK_NOT_RETURN(pthread_join(report_thread, nullptr), "pthread_join failed"); + + cout << "decode thread exit." << endl; +} + +int DvppDec::sentFrame(aclvdecChannelDesc *vdecChannelDesc, uint64_t frame_count){ + + AVPacket * pkt = m_pktQueueptr->getHead(); + if(pkt == nullptr){ + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + // cout << "getTail failed" << endl; + // continue; + return 1; + } + // 解码 + void *vdecInputbuf = m_vdecQueue.getTail(); + if(vdecInputbuf == nullptr){ + std::this_thread::sleep_for(std::chrono::milliseconds(3)); + // cout << "getTail failed" << endl; + // continue; + return 1; + } + + int ret = aclrtMemcpy(vdecInputbuf, pkt->size, pkt->data, pkt->size, ACL_MEMCPY_HOST_TO_DEVICE); + if(ACL_ERROR_NONE != ret){ + cout << "aclrtMemcpy failed" << endl; + // break; + return 2; + } + + void *vdecOutputBuf = nullptr; + ret = acldvppMalloc((void **)&vdecOutputBuf, m_vdec_out_size); + if(ret != ACL_ERROR_NONE){ + cout << "acldvppMalloc failed" << endl; + // break; + return 2; + } + + /************ 解码*************/ + acldvppStreamDesc *input_stream_desc = acldvppCreateStreamDesc(); + if (input_stream_desc == nullptr) { cout << "acldvppCreateStreamDesc error" << endl; } + acldvppPicDesc *output_pic_desc = acldvppCreatePicDesc(); + if (output_pic_desc == nullptr) { cout<< "acldvppCreatePicDesc error" << endl; } + CHECK_NOT_RETURN(acldvppSetStreamDescData(input_stream_desc, vdecInputbuf), "acldvppSetStreamDescData failed"); + CHECK_NOT_RETURN(acldvppSetStreamDescSize(input_stream_desc, pkt->size), "acldvppSetStreamDescSize failed"); + CHECK_NOT_RETURN(acldvppSetPicDescData(output_pic_desc, vdecOutputBuf), "acldvppSetPicDescData failed"); + CHECK_NOT_RETURN(acldvppSetPicDescSize(output_pic_desc, m_vdec_out_size), "acldvppSetPicDescSize failed"); + + Vdec_CallBack_UserData *user_data = NULL; + user_data = new Vdec_CallBack_UserData; + user_data->frameId = frame_count; + // user_data->startTime = startTime; + user_data->sendTime = UtilTools::get_cur_time_ms(); + user_data->self = this; + // user_data->inBufNode = bufNode; + cout << "send frame" << endl; + CHECK_NOT_RETURN(aclvdecSendFrame(vdecChannelDesc, input_stream_desc, output_pic_desc, nullptr, reinterpret_cast(user_data)), + "aclvdecSendFrame failed"); + + m_vdecQueue.addTail(); + + m_pktQueueptr->addHead(); + av_packet_unref(pkt); + + return 0; +} + + +void DvppDec::setPostDecArg(const void* postDecArg){ + m_postDecArg = postDecArg; +} + +void DvppDec::pause(){ + m_bPause = true; +} + +void DvppDec::resume(){ + m_bPause = false; +} \ No newline at end of file diff --git a/src/dvpp/DvppDec.h b/src/dvpp/DvppDec.h new file mode 100644 index 0000000..08bde3a --- /dev/null +++ b/src/dvpp/DvppDec.h @@ -0,0 +1,80 @@ +#include +#include + +#include "dvpp_headers.h" +#include "depend_headers.h" +#include "user_mem.h" +#include "CircularQueue.hpp" +#include "VpcPicConverter.h" +#include "FFReceiver.h" + +#include + +using namespace std; + +#define TEST_DECODER + + +struct DvppDecConfig{ + string dec_name; + POST_DECODE_CALLBACK post_decoded_cbk; // 解码数据回调接口 + string dev_id; // gpu id + bool force_tcp{true}; // 是否指定使用tcp连接 + int skip_frame{1}; // 跳帧数 + int codec_id; // 0 : h264 1:h265 + int profile; + CircularQueue *pktQueueptr; + + int width; + int height; +}; + + +class DvppDec { +public: + DvppDec(); + ~DvppDec(); + bool init_vdpp(DvppDecConfig cfg); + void setPostDecArg(const void* postDecArg); + bool start(); + void close(); + void pause(); + void resume(); + +public: + void doProcessReport(); + void doVdppVdecCallBack(acldvppStreamDesc *input, acldvppPicDesc *output); + +private: + void decode_thread(); + void releaseResource(); + bool sendVdecEos(aclvdecChannelDesc *vdecChannelDesc); + int sentFrame(aclvdecChannelDesc *vdecChannelDesc, uint64_t frame_count); + +private: + + bool m_bRunning{false}; + bool m_bPause{false}; + bool m_bExitReportThd{false}; + + int m_dvpp_deviceId {-1}; + int m_dvpp_channel {-1}; + aclrtContext m_context; + acldvppStreamFormat enType; + + pthread_t m_decode_thread; + + DvppDecConfig m_cfg; + string m_dec_name; + + vector m_vec_vdec; + CircularQueue m_vdecQueue; + CircularQueue *m_pktQueueptr; + + const void * m_postDecArg; + POST_DECODE_CALLBACK post_decoded_cbk; + + VpcPicConverter picConverter; + + int m_vdec_out_size {-1}; +}; \ No newline at end of file diff --git a/src/dvpp/DvppDecoder.cpp b/src/dvpp/DvppDecoder.cpp new file mode 100644 index 0000000..efa52ea --- /dev/null +++ b/src/dvpp/DvppDecoder.cpp @@ -0,0 +1,640 @@ +#include "DvppDecoder.h" +#include "DvppSourceManager.h" + +#define CHECK_AND_RETURN(ret, message) \ + if(ret != 0) {cout << "device: " << m_dvpp_deviceId << ", chn: " << m_dvpp_channel << ", ret: " << ret << ", [ERROR] " << message; return ret;} +#define CHECK_NOT_RETURN(ret, message) \ + if(ret != 0) {cout << "device: " << m_dvpp_deviceId << ", chn: " << m_dvpp_channel << ", ret: " << ret << ", [ERROR] " << message;} +#define CHECK_AND_RETURN_NOVALUE(ret, message) \ + if(ret != 0) {cout << "device: " << m_dvpp_deviceId << ", chn: " << m_dvpp_channel << ", ret: " << ret << ", [ERROR] " << message; return;} + + + +struct Vdec_CallBack_UserData { + uint64_t frameId; + long startTime; + long sendTime; + // void* vdecOutputBuf; + DvppDecoder* self; + shared_ptr inBufNode; + Vdec_CallBack_UserData() { + frameId = 0; + } +}; + + +const int g_pkt_que_size = 10; +const int g_pkt_size = 1024 * 1024; + +#ifdef TEST_DECODER +void *vdecHostAddr; +#endif + +static long GetCurTimeUs(){ + chrono::time_point tpMicro + = chrono::time_point_cast(chrono::system_clock::now()); + + return tpMicro.time_since_epoch().count(); +} + +DvppDecoder::DvppDecoder() +{ + // 初始化解码对象 + fmt_ctx = nullptr; + m_bRunning = false; + + stream = nullptr; + stream_index = -1; + pix_fmt = AV_PIX_FMT_NONE; + m_dec_name = ""; + + m_bPause = false; + m_bReal = true; + + m_decode_thread = 0; + m_post_decode_thread = 0; + + m_bFinished = false; + m_dec_keyframe = false; + m_fps = 0.0; +} + +DvppDecoder::~DvppDecoder() +{ + m_dec_keyframe = false; + releaseResource(); +} + +bool DvppDecoder::init_FFmpeg(const char* uri, bool force_tcp){ + +#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 9, 100) + av_register_all(); +#endif +#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 10, 100) + avcodec_register_all(); +#endif + + avformat_network_init(); + + // 打开输入视频文件 + AVDictionary *options = nullptr; + av_dict_set( &options, "bufsize", "655360", 0 ); + av_dict_set( &options, "rtsp_transport", force_tcp ? "tcp" : "udp", 0 ); + // av_dict_set( &options, "listen_timeout", "30", 0 ); // 单位为s + av_dict_set( &options, "stimeout", "30000000", 0 ); // 单位为 百万分之一秒 + + fmt_ctx = avformat_alloc_context(); + const char* input_file = uri; + if (avformat_open_input(&fmt_ctx, input_file, nullptr, &options) != 0) { + cout << "Cannot open input file:" << input_file << endl; + return false; + } + av_dump_format(fmt_ctx, 0, input_file, 0); + + // 查找流信息 + if (avformat_find_stream_info(fmt_ctx, nullptr) < 0) { + cout << "Cannot find input stream information" << endl; + return false; + } + + // 查找视频流信息 + AVCodec *decoder = nullptr; + stream_index = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &decoder, 0); + if (stream_index < 0) { + cout << "Cannot find a video stream in the input file" << endl; + return false; + } + AVCodec *vcodec = avcodec_find_decoder(decoder->id); + + AVCodecContext *avctx = avcodec_alloc_context3(vcodec); + if(avctx == nullptr){ + cout << "alloc AVCodecContext failed." << endl; + return false; + } + + do{ + // 得到视频流对象 + AVStream* stream = fmt_ctx->streams[stream_index]; + AVCodecParameters *codecpar = stream->codecpar; + if (avcodec_parameters_to_context(avctx, codecpar) < 0) + break; + + const AVBitStreamFilter * filter = nullptr; + if(codecpar->codec_id == AV_CODEC_ID_H264){ + // 66:Baseline,77:Main,>=100:High + if(codecpar->profile == 77){ + enType = H264_MAIN_LEVEL; + }else if(codecpar->profile < 77){ + enType = H264_BASELINE_LEVEL; + }else{ + enType = H264_HIGH_LEVEL; + } + filter = av_bsf_get_by_name("h264_mp4toannexb"); + }else if(codecpar->codec_id == AV_CODEC_ID_HEVC){ + // h265只有main + enType = H265_MAIN_LEVEL; + filter = av_bsf_get_by_name("hevc_mp4toannexb"); + }else { + cout << "codec_id is not supported!" << endl; + break; + } + + int ret = av_bsf_alloc(filter, &h264bsfc); + if (ret < 0){ + break; + } + + avcodec_parameters_copy(h264bsfc->par_in, codecpar); + av_bsf_init(h264bsfc); + + frame_width = codecpar->width; + frame_height = codecpar->height; + pix_fmt = (AVPixelFormat)codecpar->format; + m_fps = av_q2d(stream ->avg_frame_rate); + + m_vdec_out_size = frame_width * frame_height * 3 /2; + + cout << "frame_width = " << frame_width << " frame_height = " << frame_height << " fps = " << m_fps << " m_vdec_out_size:" << m_vdec_out_size << endl; + + cout << "init ffmpeg success!" << endl; + + return true; + }while(0); + + avcodec_free_context(&avctx); + + return false; +} + +static void *ReportThd(void *arg) +{ + DvppDecoder *self = (DvppDecoder *)arg; + if(nullptr != self){ + self->doProcessReport(); + } + return (void *)0; +} + +void DvppDecoder::doProcessReport(){ + // aclrtContext thdContext = nullptr; + // CHECK_AND_RETURN_NOVALUE(aclrtCreateContext(&thdContext, m_dvpp_deviceId), "aclrtCreateContext failed"); + + CHECK_AND_RETURN_NOVALUE(aclrtSetCurrentContext(m_context), "aclrtSetCurrentContext failed"); + // 阻塞等待vdec线程开始 + + int ret; + while (m_bRunning) { + ret = aclrtProcessReport(1000); + if (ret != ACL_ERROR_NONE) { + cout << "device: " << m_dvpp_deviceId << ", chn: " << m_dvpp_channel << ", aclrtProcessReport failed, ret: " << ret << endl; + } + } + + // CHECK_AND_RETURN_NOVALUE(aclrtDestroyContext(thdContext), "aclrtDestroyContext failed"); +} + +int count_frame = 0; +long lastts = 0; +static void VdecCallback(acldvppStreamDesc *input, acldvppPicDesc *output, void *pUserData) +{ + cout << "VdecCallback: " << GetCurTimeUs() - lastts << endl; + lastts = GetCurTimeUs(); + + Vdec_CallBack_UserData *userData = (Vdec_CallBack_UserData *) pUserData; + DvppDecoder* self = userData->self; + if(self != nullptr){ + + self->doVdppVdecCallBack(input, output, self); + } + + delete userData; + userData = nullptr; +} + +void DvppDecoder::doVdppVdecCallBack(acldvppStreamDesc *input, acldvppPicDesc *output, DvppDecoder *self){ + + CHECK_AND_RETURN_NOVALUE(aclrtSetCurrentContext(m_context), "aclrtSetCurrentContext failed"); + + void *inputDataDev = acldvppGetStreamDescData(input); + void *outputDataDev = acldvppGetPicDescData(output); + uint32_t outputSize = acldvppGetPicDescSize(output); + uint32_t width = acldvppGetPicDescWidth(output); + uint32_t height = acldvppGetPicDescHeight(output); + + cout << "width = " << width << " height = " << height << " data_size:" << outputSize << endl; + + if (!m_bPause) + { + DeviceRgbMemory* rgbMem = picConverter.convert2bgr(output, width, height, false); +#ifdef TEST_DECODER + if(rgbMem != nullptr){ + // D2H + uint32_t data_size = rgbMem->getSize(); + CHECK_AND_RETURN_NOVALUE(aclrtMemcpy(vdecHostAddr, data_size, rgbMem->getMem(), data_size, ACL_MEMCPY_DEVICE_TO_HOST), "D2H aclrtMemcpy failed"); + + // 保存vdec结果 + if(count_frame > 45 && count_frame < 50) + { + string file_name = "./yuv_pic/vdec_out"+ getName() +".rgb" ; + FILE *outputFile = fopen(file_name.c_str(), "a"); + if(outputFile){ + fwrite(vdecHostAddr, data_size, sizeof(char), outputFile); + fclose(outputFile); + } + } + count_frame++; + } +#endif + + }else{ + std::this_thread::sleep_for(std::chrono::milliseconds(3)); + } + + cout << "callback acldvppFree." << endl; + + acldvppFree((uint8_t*)outputDataDev); + outputDataDev = nullptr; + + m_vdecQueue.addHead(); + + CHECK_AND_RETURN_NOVALUE(acldvppDestroyStreamDesc(input), "acldvppDestroyStreamDesc failed"); + CHECK_AND_RETURN_NOVALUE(acldvppDestroyPicDesc(output), "acldvppDestroyPicDesc failed"); + + cout << "callback exit." << endl; +} + +bool DvppDecoder::init_vdpp(int devId){ + cout << "Init device....\n"; + // DvppSourceManager 创建时包含 aclInit,析构时包含 aclFinalize + DvppSourceManager* pSrcMgr = DvppSourceManager::getInstance(); + m_context = pSrcMgr->getContext(m_dvpp_deviceId); + m_dvpp_channel = pSrcMgr->getChannel(m_dvpp_deviceId); + if(m_dvpp_channel < 0){ + cout << "该设备channel已经用完了" << endl; + return false; + } + + cout << "devProgram start, device: " << m_dvpp_deviceId << endl; + int ret = aclrtSetCurrentContext(m_context); + if (ret != ACL_ERROR_NONE) { + cout << "aclrtSetCurrentContext failed" << endl; + return false; + } + + // queue_size 最小应大于16,否则关键帧之间距离太远的时候会导致回调函数与循环队列卡死 + for (size_t i = 0; i < 20; i++){ + void *vdecInputbuf = nullptr; + int ret = acldvppMalloc((void **)&vdecInputbuf, g_pkt_size); + if(ret != ACL_ERROR_NONE){ + cout << "acldvppMalloc failed" << endl; + return false;; + } + m_vec_vdec.push_back(vdecInputbuf); + } + + if(!m_vdecQueue.init(m_vec_vdec)){ + return false; + } + +#ifdef TEST_DECODER + CHECK_NOT_RETURN(aclrtMallocHost(&vdecHostAddr, frame_width * frame_height * 3), "aclrtMallocHost failed"); +#endif + + cout << "init vdpp success!" << endl; + return true; +} + +bool DvppDecoder::init(FFDecConfig& cfg){ + m_cfg = cfg; + + fstream infile(cfg.uri); + if (infile.is_open()){ + m_bReal = false; + infile.close(); + }else { + m_bReal = true; + } + + post_decoded_cbk = cfg.post_decoded_cbk; + decode_finished_cbk = cfg.decode_finished_cbk; + + bool ret = init_FFmpeg(cfg.uri.c_str(), cfg.force_tcp); + if(!ret){ + return false; + } + + m_dvpp_deviceId = atoi(cfg.gpuid.c_str()); + ret = init_vdpp(m_dvpp_deviceId); + if (!ret) + { + releaseFFmpeg(); + } + + ret = picConverter.init(m_context); + if(!ret){ + picConverter.release(); + } + + return ret; +} + +bool DvppDecoder::start(){ + m_bRunning = true; + + pthread_create(&m_decode_thread,0, + [](void* arg) + { + DvppDecoder* a=(DvppDecoder*)arg; + a->decode_thread(); + return (void*)0; + } + ,this); + + return true; +} + +void DvppDecoder::close(){ + m_bRunning=false; + + if(m_decode_thread != 0){ + pthread_join(m_decode_thread,0); + } + +#ifdef TEST_DECODER + if(vdecHostAddr != nullptr){ + CHECK_NOT_RETURN(aclrtFreeHost(vdecHostAddr), "aclrtFreeHost failed"); + } +#endif +} + +bool DvppDecoder::sendVdecEos(aclvdecChannelDesc *vdecChannelDesc){ + // create stream desc + acldvppStreamDesc *streamInputDesc = acldvppCreateStreamDesc(); + if (streamInputDesc == nullptr) { + cout << "fail to create input stream desc" << endl; + return false; + } + aclError ret = acldvppSetStreamDescEos(streamInputDesc, 1); + if (ret != ACL_SUCCESS) { + cout << "fail to set eos for stream desc, errorCode = " << static_cast(ret) << endl; + (void)acldvppDestroyStreamDesc(streamInputDesc); + return false; + } + + // send vdec eos frame. when all vdec callback are completed, aclvdecSendFrame can be returned. + ret = aclvdecSendFrame(vdecChannelDesc, streamInputDesc, nullptr, nullptr, nullptr); + if (ret != ACL_SUCCESS) { + cout << "fail to send eos frame, ret=" << ret << endl; + (void)acldvppDestroyStreamDesc(streamInputDesc); + return false; + } + (void)acldvppDestroyStreamDesc(streamInputDesc); + + return true; +} + +void DvppDecoder::releaseFFmpeg(){ + m_dec_keyframe = false; + if(h264bsfc){ + av_bsf_free(&h264bsfc); + h264bsfc = nullptr; + } + if (fmt_ctx) + { + avformat_close_input(&fmt_ctx); + fmt_ctx = nullptr; + } +} + +void DvppDecoder::releaseResource(){ + releaseFFmpeg(); + + for(int i = 0; i < m_vec_vdec.size(); i++){ + if(m_vec_vdec[i] != nullptr){ + acldvppFree((uint8_t*)m_vec_vdec[i]); + m_vec_vdec[i] = nullptr; + } + } + m_vec_vdec.clear(); + + DvppSourceManager* pSrcMgr = DvppSourceManager::getInstance(); + pSrcMgr->releaseChannel(m_dvpp_deviceId, m_dvpp_channel); +} + +void DvppDecoder::decode_thread(){ + + int frame_count = 0; + long startTime = GetCurTimeUs(); + + int ret = -1; + + // dvpp解码参数 + CHECK_AND_RETURN_NOVALUE(aclrtSetCurrentContext(m_context), "aclrtSetCurrentContext failed"); + + pthread_t report_thread; + ret = pthread_create(&report_thread, nullptr, ReportThd, (void *)this); + if(ret != 0){ + cout << "pthread_create failed" << endl; + return; + } + + // 创建aclvdecChannelDesc类型的数据 + aclvdecChannelDesc *vdecChannelDesc = aclvdecCreateChannelDesc(); + if (vdecChannelDesc == nullptr) { + cout << "aclvdecCreateChannelDesc failed"; + return; + } + // 创建 channel dec结构体 + // 通道ID在dvpp层面为0~31 + CHECK_AND_RETURN_NOVALUE(aclvdecSetChannelDescChannelId(vdecChannelDesc, m_dvpp_channel), "aclvdecSetChannelDescChannelId failed"); + CHECK_AND_RETURN_NOVALUE(aclvdecSetChannelDescThreadId(vdecChannelDesc, report_thread), "aclvdecSetChannelDescThreadId failed"); + CHECK_AND_RETURN_NOVALUE(aclvdecSetChannelDescCallback(vdecChannelDesc, VdecCallback), "aclvdecSetChannelDescCallback failed"); + CHECK_AND_RETURN_NOVALUE(aclvdecSetChannelDescEnType(vdecChannelDesc, enType), "aclvdecSetChannelDescEnType failed"); + CHECK_AND_RETURN_NOVALUE(aclvdecSetChannelDescOutPicFormat(vdecChannelDesc, PIXEL_FORMAT_YUV_SEMIPLANAR_420), "aclvdecSetChannelDescOutPicFormat failed"); + CHECK_AND_RETURN_NOVALUE(aclvdecCreateChannel(vdecChannelDesc), "aclvdecCreateChannel failed"); + + AVPacket* pkt ; + pkt = av_packet_alloc(); + av_init_packet( pkt ); + + acldvppStreamDesc *input_stream_desc = nullptr; + acldvppPicDesc *output_pic_desc = nullptr; + + void *vdecInputbuf = nullptr; + void *vdecOutputBuf = nullptr; + while (m_bRunning) + { + if (!m_bReal) + { + if (m_bPause) + { + std::this_thread::sleep_for(std::chrono::milliseconds(3)); + continue; + } + } + + int result = av_read_frame(fmt_ctx, pkt); + if (result == AVERROR_EOF || result < 0) + { + cout << "Failed to read frame!" << endl; + break; + } + + if (m_dec_keyframe && !(pkt->flags & AV_PKT_FLAG_KEY)) { + av_packet_unref(pkt); + continue; + } + + if (stream_index == pkt->stream_index){ + + ret = av_bsf_send_packet(h264bsfc, pkt); + if(ret < 0) { + cout << "av_bsf_send_packet error" << endl; + } + + while ((ret = av_bsf_receive_packet(h264bsfc, pkt)) == 0) { + // 解码 + + if(pkt->size > g_pkt_size){ + cout << "pkt size 大于 预设" << endl; + break; + } + + if(!m_bRunning){ + break; + } + + vdecInputbuf = m_vdecQueue.getTail(); + if(vdecInputbuf == nullptr){ + std::this_thread::sleep_for(std::chrono::milliseconds(3)); + // cout << "getTail failed" << endl; + continue; + } + + ret = aclrtMemcpy(vdecInputbuf, pkt->size, pkt->data, pkt->size, ACL_MEMCPY_HOST_TO_DEVICE); + if(ACL_ERROR_NONE != ret){ + cout << "aclrtMemcpy failed" << endl; + goto end_flag; + } + + ret = acldvppMalloc((void **)&vdecOutputBuf, m_vdec_out_size); + if(ret != ACL_ERROR_NONE){ + cout << "acldvppMalloc failed" << endl; + goto end_flag; + } + + /************ 解码*************/ + input_stream_desc = acldvppCreateStreamDesc(); + if (input_stream_desc == nullptr) { cout << "acldvppCreateStreamDesc error" << endl; } + output_pic_desc = acldvppCreatePicDesc(); + if (output_pic_desc == nullptr) { cout<< "acldvppCreatePicDesc error" << endl; } + CHECK_NOT_RETURN(acldvppSetStreamDescData(input_stream_desc, vdecInputbuf), "acldvppSetStreamDescData failed"); + CHECK_NOT_RETURN(acldvppSetStreamDescSize(input_stream_desc, pkt->size), "acldvppSetStreamDescSize failed"); + CHECK_NOT_RETURN(acldvppSetPicDescData(output_pic_desc, vdecOutputBuf), "acldvppSetPicDescData failed"); + CHECK_NOT_RETURN(acldvppSetPicDescSize(output_pic_desc, m_vdec_out_size), "acldvppSetPicDescSize failed"); + + Vdec_CallBack_UserData *user_data = NULL; + user_data = new Vdec_CallBack_UserData; + user_data->frameId = frame_count; + user_data->startTime = startTime; + user_data->sendTime = GetCurTimeUs(); + user_data->self = this; + // user_data->inBufNode = bufNode; + cout << "send frame" << endl; + CHECK_NOT_RETURN(aclvdecSendFrame(vdecChannelDesc, input_stream_desc, output_pic_desc, nullptr, reinterpret_cast(user_data)), + "aclvdecSendFrame failed"); + + frame_count++; + + m_vdecQueue.addTail(); + + vdecInputbuf = nullptr; + vdecOutputBuf = nullptr; + } + /****************************/ + } + av_packet_unref(pkt); + } + +end_flag: + + av_packet_free(&pkt); + + sendVdecEos(vdecChannelDesc); + + CHECK_NOT_RETURN(aclvdecDestroyChannel(vdecChannelDesc), "aclvdecDestroyChannel failed"); + CHECK_NOT_RETURN(aclvdecDestroyChannelDesc(vdecChannelDesc), "aclvdecDestroyChannelDesc failed"); + + // report_thread 需后于destroy退出 + m_bRunning = false; + CHECK_NOT_RETURN(pthread_join(report_thread, nullptr), "pthread_join failed"); + + if(m_vdecQueue.length() > 0){ + cout << m_vdecQueue.length() << endl; + } + + if(vdecOutputBuf != nullptr){ + acldvppFree((uint8_t*)vdecOutputBuf); + vdecOutputBuf = nullptr; + } + + cout << "read thread exit." << endl; +} + +float DvppDecoder::fps(){ + return m_fps; +} + +bool DvppDecoder::isSurport(FFDecConfig& cfg){ + bool bRet = init(cfg); + return bRet; +} + +bool DvppDecoder::getResolution( int &width, int &height ){ + width = frame_width; + height = frame_height; + return true; +} + +void DvppDecoder::pause(){ + m_bPause = true; +} + +void DvppDecoder::resume(){ + m_bPause = false; +} + +void DvppDecoder::setDecKeyframe(bool bKeyframe) +{ + m_dec_keyframe = bKeyframe; +} + +bool DvppDecoder::isRunning(){ + return m_bRunning; +} + +bool DvppDecoder::isFinished(){ + return m_bFinished; +} + +bool DvppDecoder::isPausing(){ + return m_bPause; +} + +int DvppDecoder::getCachedQueueLength(){ + // TODO + return 0; +} + +FFImgInfo* DvppDecoder::snapshot(){ + // TODO + return nullptr; +} + +void DvppDecoder::setPostDecArg(const void* postDecArg){ + m_postDecArg = postDecArg; +} + +void DvppDecoder::setFinishedDecArg(const void* finishedDecArg){ + m_finishedDecArg = finishedDecArg; +} \ No newline at end of file diff --git a/src/dvpp/DvppDecoder.h b/src/dvpp/DvppDecoder.h new file mode 100644 index 0000000..db5064b --- /dev/null +++ b/src/dvpp/DvppDecoder.h @@ -0,0 +1,111 @@ +#include +#include + +#include "dvpp_headers.h" +#include "depend_headers.h" +#include "user_mem.h" +#include "CircularQueue.hpp" +#include "VpcPicConverter.h" + +#include + +using namespace std; + +#define TEST_DECODER + + +class DvppDecoder{ +public: + DvppDecoder(); + ~DvppDecoder(); + bool init(FFDecConfig& cfg); + void close(); + bool start(); + void pause(); + void resume(); + + void setDecKeyframe(bool bKeyframe); + + bool isRunning(); + bool isFinished(); + bool isPausing(); + bool getResolution( int &width, int &height ); + + bool isSurport(FFDecConfig& cfg); + + int getCachedQueueLength(); + + float fps(); + + DECODER_TYPE getDecoderType(){ return DECODER_TYPE_DVPP; } + + void setName(string nm){ + m_dec_name = nm; + } + + string getName(){ + return m_dec_name; + } + + FFImgInfo* snapshot(); + + void setPostDecArg(const void* postDecArg); + void setFinishedDecArg(const void* finishedDecArg); + +public: + void doProcessReport(); + void doVdppVdecCallBack(acldvppStreamDesc *input, acldvppPicDesc *output, DvppDecoder *self); + +private: + void decode_thread(); + void post_decode_thread(); + void releaseFFmpeg(); + void releaseResource(); + bool init_FFmpeg(const char* uri, bool force_tcp); + bool init_vdpp(int _deviceId); + + bool sendVdecEos(aclvdecChannelDesc *vdecChannelDesc); + +private: + AVStream* stream; + int stream_index; + AVFormatContext *fmt_ctx; + AVPixelFormat pix_fmt; + uint32_t m_vdec_out_size{0}; + int frame_width{0}; + int frame_height{0}; + + int m_dvpp_deviceId {-1}; + int m_dvpp_channel {-1}; + + pthread_t m_decode_thread; + pthread_t m_post_decode_thread; + + bool m_bRunning; + bool m_bFinished; + + bool m_bPause; + + bool m_bReal; // 是否实时流 + + float m_fps; + + FFDecConfig m_cfg; + string m_dec_name; + bool m_dec_keyframe; + + AVBSFContext * h264bsfc{nullptr}; + + aclrtContext m_context; + acldvppStreamFormat enType; + + vector m_vec_vdec; + CircularQueue m_vdecQueue; + + const void * m_postDecArg; + POST_DECODE_CALLBACK post_decoded_cbk; + const void * m_finishedDecArg; + DECODE_FINISHED_CALLBACK decode_finished_cbk; + + VpcPicConverter picConverter; +}; \ No newline at end of file diff --git a/src/dvpp/DvppDecoder2.h b/src/dvpp/DvppDecoder2.h new file mode 100644 index 0000000..d044f9b --- /dev/null +++ b/src/dvpp/DvppDecoder2.h @@ -0,0 +1,192 @@ +#include + +#include "depend_headers.h" +#include "CircularQueue.hpp" +#include "FFReceiver.h" +#include "DvppDec.h" + +using namespace std; + +class DvppDecoder2{ +public: + DvppDecoder2(); + ~DvppDecoder2(); + bool init(FFDecConfig cfg); + void close(); + bool start(); + void pause(); + void resume(); + + void setDecKeyframe(bool bKeyframe); + + bool isRunning(); + bool isFinished(); + bool isPausing(); + bool getResolution( int &width, int &height ); + + bool isSurport(FFDecConfig& cfg); + + float fps(); + + void setName(string nm){ + m_dec_name = nm; + } + + string getName(){ + return m_dec_name; + } + + FFImgInfo* snapshot(); + + void setPostDecArg(const void* postDecArg); + void setFinishedDecArg(const void* finishedDecArg); + + int getCachedQueueLength(); + +public: + void taskFinishing(); + +private: + + FFDecConfig m_cfg; + string m_dec_name; + + CircularQueue *m_pktQueueptr; + FFReceiver m_receiver; + DvppDec m_decoder; + + const void * m_finishedDecArg; + DECODE_FINISHED_CALLBACK decode_finished_cbk; + +}; + +void receiver_finish_cbk(const void* userPtr){ + if(userPtr != nullptr){ + DvppDecoder2* self = (DvppDecoder2*)userPtr; + self->taskFinishing(); + } +} + +DvppDecoder2::DvppDecoder2(){ + m_pktQueueptr = new CircularQueue(); +} + +DvppDecoder2::~DvppDecoder2(){ + delete m_pktQueueptr; + m_pktQueueptr = nullptr; +} + +bool DvppDecoder2::init(FFDecConfig cfg){ + + ReceiverConfig receiver_config; + receiver_config.uri = cfg.uri.c_str(); + receiver_config.dec_name = cfg.dec_name; + receiver_config.force_tcp = cfg.force_tcp; + receiver_config.pktQueueptr = m_pktQueueptr; + receiver_config.receiver_finished_cbk = receiver_finish_cbk; + AVCodecContext* avctx = m_receiver.init_FFmpeg(receiver_config); + if(avctx == nullptr){ + return false; + } + m_receiver.setFinishCbkArg(this); + + DvppDecConfig dec_cfg; + if(avctx->codec_id == AV_CODEC_ID_H264){ + dec_cfg.codec_id = 0; + }else if(avctx->codec_id == AV_CODEC_ID_HEVC){ + dec_cfg.codec_id = 1; + }else { + return false; + } + dec_cfg.dec_name = cfg.dec_name; + dec_cfg.post_decoded_cbk = cfg.post_decoded_cbk; + dec_cfg.dev_id = cfg.gpuid; + dec_cfg.force_tcp = cfg.force_tcp; + dec_cfg.skip_frame = cfg.skip_frame; + dec_cfg.profile = avctx->profile; + dec_cfg.pktQueueptr = m_pktQueueptr; + dec_cfg.width = avctx->width; + dec_cfg.height = avctx->height; + bool bRet = m_decoder.init_vdpp(dec_cfg); + if(!bRet){ + return false; + } + + m_cfg = cfg; + + decode_finished_cbk = cfg.decode_finished_cbk; + + return true; +} + +bool DvppDecoder2::isSurport(FFDecConfig& cfg){ + return true; +} + +bool DvppDecoder2::start(){ + m_receiver.start(); + m_decoder.start(); + return true; +} + +void DvppDecoder2::close(){ + m_receiver.close(); +} + +void DvppDecoder2::setPostDecArg(const void* postDecArg){ + m_decoder.setPostDecArg(postDecArg); +} + +void DvppDecoder2::setFinishedDecArg(const void* finishedDecArg){ + m_finishedDecArg = finishedDecArg; +} + +void DvppDecoder2::pause(){ + m_receiver.pause(); +} + +void DvppDecoder2::resume(){ + m_receiver.resume(); +} + +void DvppDecoder2::setDecKeyframe(bool bKeyframe){ + m_receiver.setDecKeyframe(bKeyframe); +} + +bool DvppDecoder2::isRunning(){ + return m_receiver.isRunning(); +} + +bool DvppDecoder2::isFinished(){ + return m_receiver.isFinished(); +} + +bool DvppDecoder2::isPausing(){ + return m_receiver.isPausing(); +} + +bool DvppDecoder2::getResolution(int &width, int &height){ + return m_receiver.getResolution(width, height); +} + +float DvppDecoder2::fps(){ + return m_receiver.fps(); +} + +FFImgInfo* DvppDecoder2::snapshot(){ + // TODO + return nullptr; +} + +int DvppDecoder2::getCachedQueueLength(){ + return 0; +} + +void DvppDecoder2::taskFinishing(){ + // receiver 中读取线程结束时执行 + m_decoder.close(); + decode_finished_cbk(m_finishedDecArg); + + LOG_INFO("[{}]- task finished.", m_dec_name); + +} \ No newline at end of file diff --git a/src/dvpp/DvppDecoderApi.cpp b/src/dvpp/DvppDecoderApi.cpp new file mode 100644 index 0000000..fd2f54e --- /dev/null +++ b/src/dvpp/DvppDecoderApi.cpp @@ -0,0 +1,133 @@ +#include "DvppDecoderApi.h" +#include "DvppDecoder2.h" + +DvppDecoderApi::DvppDecoderApi(){ + m_pDecoder = nullptr; +} + +DvppDecoderApi::~DvppDecoderApi(){ + if(m_pDecoder != nullptr){ + delete m_pDecoder; + m_pDecoder = nullptr; + } +} + +bool DvppDecoderApi::init(FFDecConfig& cfg){ + m_pDecoder = new DvppDecoder2(); + if(m_pDecoder != nullptr){ + return m_pDecoder->init(cfg); + } + return false; +} + +void DvppDecoderApi::close(){ + if(m_pDecoder != nullptr){ + return m_pDecoder->close(); + } +} + +bool DvppDecoderApi::start(){ + if(m_pDecoder != nullptr){ + return m_pDecoder->start(); + } + return false; +} + +void DvppDecoderApi::pause(){ + if(m_pDecoder != nullptr){ + return m_pDecoder->pause(); + } +} + +void DvppDecoderApi::resume(){ + if(m_pDecoder != nullptr){ + return m_pDecoder->resume(); + } +} + +void DvppDecoderApi::setDecKeyframe(bool bKeyframe){ + if(m_pDecoder != nullptr){ + return m_pDecoder->setDecKeyframe(bKeyframe); + } +} + +bool DvppDecoderApi::isRunning(){ + if(m_pDecoder != nullptr){ + return m_pDecoder->isRunning(); + } + return false; +} + +bool DvppDecoderApi::isFinished(){ + if(m_pDecoder != nullptr){ + return m_pDecoder->isFinished(); + } + return false; +} + +bool DvppDecoderApi::isPausing(){ + if(m_pDecoder != nullptr){ + return m_pDecoder->isPausing(); + } + return false; +} + +bool DvppDecoderApi::getResolution(int &width, int &height){ + if(m_pDecoder != nullptr){ + return m_pDecoder->getResolution(width, height); + } + return false; +} + +bool DvppDecoderApi::isSurport(FFDecConfig& cfg){ + if(m_pDecoder != nullptr){ + return m_pDecoder->isSurport(cfg); + } + return false; +} + +float DvppDecoderApi::fps(){ + if(m_pDecoder != nullptr){ + return m_pDecoder->fps(); + } + return 0.0; +} + +int DvppDecoderApi::getCachedQueueLength(){ + if(m_pDecoder != nullptr){ + return m_pDecoder->getCachedQueueLength(); + } + return 0; +} + +void DvppDecoderApi::setName(string nm){ + if(m_pDecoder != nullptr){ + return m_pDecoder->setName(nm); + } +} + +string DvppDecoderApi::getName(){ + if(m_pDecoder != nullptr){ + return m_pDecoder->getName(); + } + return nullptr; +} + +FFImgInfo* DvppDecoderApi::snapshot(){ + if(m_pDecoder != nullptr){ + return m_pDecoder->snapshot(); + } + return nullptr; +} + +void DvppDecoderApi::setPostDecArg(const void* postDecArg){ + if(m_pDecoder != nullptr){ + return m_pDecoder->setPostDecArg(postDecArg); + } +} + +void DvppDecoderApi::setFinishedDecArg(const void* finishedDecArg){ + if(m_pDecoder != nullptr){ + return m_pDecoder->setFinishedDecArg(finishedDecArg); + } +} \ No newline at end of file diff --git a/src/dvpp/DvppDecoderApi.h b/src/dvpp/DvppDecoderApi.h new file mode 100644 index 0000000..c465e03 --- /dev/null +++ b/src/dvpp/DvppDecoderApi.h @@ -0,0 +1,44 @@ +#include +#include + +#include "depend_headers.h" +#include "../interface/AbstractDecoder.h" + +using namespace std; + +class DvppDecoder2; + +class DvppDecoderApi : public AbstractDecoder{ +public: + DvppDecoderApi(); + ~DvppDecoderApi(); + bool init(FFDecConfig& cfg); + void close(); + bool start(); + void pause(); + void resume(); + + void setDecKeyframe(bool bKeyframe); + + bool isRunning(); + bool isFinished(); + bool isPausing(); + bool getResolution( int &width, int &height ); + + bool isSurport(FFDecConfig& cfg); + + int getCachedQueueLength(); + + float fps(); + + FFImgInfo* snapshot(); + + DECODER_TYPE getDecoderType(){ return DECODER_TYPE_DVPP; } + void setName(string nm); + string getName(); + + void setPostDecArg(const void* postDecArg); + void setFinishedDecArg(const void* finishedDecArg); +private: + DvppDecoder2* m_pDecoder; +}; \ No newline at end of file diff --git a/src/dvpp/DvppRgbMemory.hpp b/src/dvpp/DvppRgbMemory.hpp new file mode 100644 index 0000000..b6bc750 --- /dev/null +++ b/src/dvpp/DvppRgbMemory.hpp @@ -0,0 +1,25 @@ +#include + +#include "dvpp_headers.h" + +using namespace std; + +class DvppRgbMemory : public DeviceRgbMemory +{ +public: + DvppRgbMemory(int _channel, int _width, int _height, int _size, string _id, string _dev_id, bool _key_frame) + :DeviceRgbMemory(_channel, _width, _height, _id, _dev_id, _key_frame, false){ + data_size = _size; + int ret = acldvppMalloc((void **)&pHwRgb, data_size); + if(ret != ACL_ERROR_NONE){ + cout << "acldvppMalloc failed" << endl; + } + } + + ~DvppRgbMemory(){ + if (pHwRgb) { + acldvppFree((uint8_t*)pHwRgb); + pHwRgb = nullptr; + } + } +}; \ No newline at end of file diff --git a/src/dvpp/DvppSourceManager.cpp b/src/dvpp/DvppSourceManager.cpp new file mode 100644 index 0000000..a3a0f35 --- /dev/null +++ b/src/dvpp/DvppSourceManager.cpp @@ -0,0 +1,63 @@ +#include "DvppSourceManager.h" + +#include "dvpp_headers.h" +#include "depend_headers.h" + +using namespace std; + +DvppSourceManager::~DvppSourceManager() +{ + for(auto iter = ctxMap.begin(); iter != ctxMap.end(); iter++){ + aclError ret = aclrtDestroyContext(iter->second); + if(ret != ACL_ERROR_NONE){ + LOG_ERROR("aclrtDestroyContext failed !"); + continue; + } + } + ctxMap.clear(); + channelMap.clear(); + + aclFinalize(); +} + +aclrtContext DvppSourceManager::getContext(int devId) +{ + aclrtContext ctx = ctxMap[devId]; + if (ctx == nullptr) + { + // 初始化硬件解码器 + aclError ret = aclrtSetDevice(devId); + if(ret != ACL_ERROR_NONE){ + // cout << "aclrtSetDevice failed" << endl; + LOG_ERROR("aclrtSetDevice failed !"); + return nullptr; + } + + ret = aclrtCreateContext(&ctx, devId); + if (ret != ACL_ERROR_NONE) { + // cout << "aclrtCreateContext failed " << endl; + LOG_ERROR("aclrtCreateContext failed !"); + return nullptr; + } + ctxMap[devId] = ctx; + } + return ctx; +} + +int DvppSourceManager::getChannel(int devId){ + // channel 最大值暂定为32, 华为没有接口获取最大channel,只有文档说明 + for(int iChannel = 0; iChannel < 32; iChannel++){ + string channelKey = "channel_" + to_string(devId) + "_" + to_string(iChannel) ; + auto it = channelMap.find(channelKey); + if(it == channelMap.end()){ + channelMap[channelKey] = iChannel; + return iChannel; + } + } + return -1; +} + +void DvppSourceManager::releaseChannel(int devId, int iChannel){ + string channelKey = "channel_" + to_string(devId) + "_" + to_string(iChannel) ; + channelMap.erase(channelKey); +} \ No newline at end of file diff --git a/src/dvpp/DvppSourceManager.h b/src/dvpp/DvppSourceManager.h new file mode 100644 index 0000000..36a4b07 --- /dev/null +++ b/src/dvpp/DvppSourceManager.h @@ -0,0 +1,36 @@ + +#include +#include + +#include "dvpp_headers.h" + +using namespace std; + +class DvppSourceManager{ +public: + static DvppSourceManager* getInstance(){ + static DvppSourceManager* singleton = nullptr; + if (singleton == nullptr){ + singleton = new DvppSourceManager(); + int ret = aclInit(nullptr); + if (ret != ACL_ERROR_NONE) { + cout << "aclInit failed" << endl; + return nullptr; + } + } + return singleton; + } + + aclrtContext getContext(int devId); + + int getChannel(int devId); + void releaseChannel(int devId, int channel); + +private: + DvppSourceManager(){} + ~DvppSourceManager(); + +private: + map ctxMap; + map channelMap; +}; \ No newline at end of file diff --git a/src/dvpp/FFReceiver.cpp b/src/dvpp/FFReceiver.cpp new file mode 100644 index 0000000..84ae526 --- /dev/null +++ b/src/dvpp/FFReceiver.cpp @@ -0,0 +1,281 @@ +#include "FFReceiver.h" +#include + +const int g_pkt_size = 1024 * 1024; // 单个AVPacket大小的最大值 + +FFReceiver::FFReceiver(/* args */) +{ + fmt_ctx = nullptr; + m_bRunning = false; + + stream = nullptr; + stream_index = -1; + pix_fmt = AV_PIX_FMT_NONE; + m_dec_name = ""; + + m_bPause = false; + m_bReal = true; + + m_bFinished = false; + m_dec_keyframe = false; + m_fps = 0.0; + + m_read_thread = 0; +} + +FFReceiver::~FFReceiver() +{ + releaseFFmpeg(); +} + +AVCodecContext* FFReceiver::init_FFmpeg(ReceiverConfig config){ + +#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 9, 100) + av_register_all(); +#endif +#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 10, 100) + avcodec_register_all(); +#endif + + avformat_network_init(); + + const char* uri = config.uri; + fstream infile(uri); + if (infile.is_open()){ + m_bReal = false; + infile.close(); + }else { + m_bReal = true; + } + + m_dec_name = config.dec_name; + m_pktQueueptr = config.pktQueueptr; + receiver_finished_cbk = config.receiver_finished_cbk; + + // 打开输入视频文件 + AVDictionary *options = nullptr; + av_dict_set( &options, "bufsize", "655360", 0 ); + av_dict_set( &options, "rtsp_transport", config.force_tcp ? "tcp" : "udp", 0 ); + av_dict_set( &options, "stimeout", "30000000", 0 ); // 单位为 百万分之一秒 + + fmt_ctx = avformat_alloc_context(); + const char* input_file = uri; + if (avformat_open_input(&fmt_ctx, input_file, nullptr, &options) != 0) { + LOG_ERROR("[{}]- Cannot open input file: {}", m_dec_name, input_file); + return nullptr; + } + av_dump_format(fmt_ctx, 0, input_file, 0); + + // 查找流信息 + if (avformat_find_stream_info(fmt_ctx, nullptr) < 0) { + LOG_ERROR("[{}]- Cannot find input stream information!", m_dec_name); + return nullptr; + } + + // 查找视频流信息 + AVCodec *decoder = nullptr; + stream_index = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &decoder, 0); + if (stream_index < 0) { + LOG_ERROR("[{}]- Cannot find a video stream in the input file!", m_dec_name); + return nullptr; + } + AVCodec *vcodec = avcodec_find_decoder(decoder->id); + + avctx = avcodec_alloc_context3(vcodec); + if(avctx == nullptr){ + LOG_ERROR("[{}]- alloc AVCodecContext failed!", m_dec_name); + return nullptr; + } + + do{ + // 得到视频流对象 + AVStream* stream = fmt_ctx->streams[stream_index]; + AVCodecParameters *codecpar = stream->codecpar; + if (avcodec_parameters_to_context(avctx, codecpar) < 0) + break; + + const AVBitStreamFilter * filter = nullptr; + if(codecpar->codec_id == AV_CODEC_ID_H264){ + filter = av_bsf_get_by_name("h264_mp4toannexb"); + }else if(codecpar->codec_id == AV_CODEC_ID_HEVC){ + filter = av_bsf_get_by_name("hevc_mp4toannexb"); + }else { + LOG_ERROR("[{}]- codec_id is not supported!", m_dec_name); + break; + } + + int ret = av_bsf_alloc(filter, &h264bsfc); + if (ret < 0){ + break; + } + + avcodec_parameters_copy(h264bsfc->par_in, codecpar); + av_bsf_init(h264bsfc); + + frame_width = codecpar->width; + frame_height = codecpar->height; + pix_fmt = (AVPixelFormat)codecpar->format; + m_fps = av_q2d(stream ->avg_frame_rate); + + LOG_INFO("[{}]- init ffmpeg success! input:{} frame_width:{} frame_height:{} fps:{} ", m_dec_name, input_file, frame_width, frame_height, m_fps); + + for(int i = 0; i<5; i++){ + AVPacket* pkt = av_packet_alloc(); + av_init_packet( pkt ); + m_vec_pkt.push_back(pkt); + } + m_pktQueueptr->init(m_vec_pkt); + + return avctx; + }while(0); + + LOG_ERROR("[{}]- init ffmpeg failed ! input:{} ", m_dec_name); + + releaseFFmpeg(); + + return nullptr; +} + +void FFReceiver::releaseFFmpeg(){ + m_dec_keyframe = false; + if(h264bsfc){ + av_bsf_free(&h264bsfc); + h264bsfc = nullptr; + } + if (fmt_ctx){ + avformat_close_input(&fmt_ctx); + fmt_ctx = nullptr; + } + if(avctx){ + avcodec_free_context(&avctx); + avctx = nullptr; + } + + for(int i = 0; i < m_vec_pkt.size(); i++){ + av_packet_free(&m_vec_pkt[i]); + } +} + +void FFReceiver::read_thread(){ + + int frame_count = 0; + int ret = -1; + while (m_bRunning) + { + if (!m_bReal) + { + if (m_bPause) + { + std::this_thread::sleep_for(std::chrono::milliseconds(3)); + continue; + } + } + + AVPacket* pkt = m_pktQueueptr->getTail(); + if(pkt == nullptr){ + std::this_thread::sleep_for(std::chrono::milliseconds(3)); + continue; + } + + int result = av_read_frame(fmt_ctx, pkt); + if (result == AVERROR_EOF || result < 0) + { + LOG_ERROR("[{}]- Failed to read frame!", m_dec_name); + break; + } + + if (m_dec_keyframe && !(pkt->flags & AV_PKT_FLAG_KEY)) { + av_packet_unref(pkt); + continue; + } + + if (stream_index == pkt->stream_index){ + + ret = av_bsf_send_packet(h264bsfc, pkt); + if(ret < 0) { + LOG_ERROR("[{}]- av_bsf_send_packet error!", m_dec_name); + } + + while ((ret = av_bsf_receive_packet(h264bsfc, pkt)) == 0) { + if(pkt->size > g_pkt_size){ + LOG_ERROR("[{}]- pkt size 大于最大预设值!", m_dec_name); + break; + } + + if(!m_bRunning){ + break; + } + + m_pktQueueptr->addTail(); + + frame_count++; + } + } + } + + LOG_INFO("[{}]- read thread exit.", m_dec_name); + + receiver_finished_cbk(m_finishedReceiveArg); +} + +bool FFReceiver::start(){ + m_bRunning = true; + + pthread_create(&m_read_thread,0, + [](void* arg) + { + FFReceiver* a=(FFReceiver*)arg; + a->read_thread(); + return (void*)0; + } + ,this); + + return true; +} + +void FFReceiver::close(){ + m_bRunning=false; + + if(m_read_thread != 0){ + pthread_join(m_read_thread,0); + } +} + +float FFReceiver::fps(){ + return m_fps; +} + +bool FFReceiver::getResolution( int &width, int &height ){ + width = frame_width; + height = frame_height; + return true; +} + +void FFReceiver::pause(){ + m_bPause = true; +} + +void FFReceiver::resume(){ + m_bPause = false; +} + +void FFReceiver::setDecKeyframe(bool bKeyframe) +{ + m_dec_keyframe = bKeyframe; +} + +bool FFReceiver::isRunning(){ + return m_bRunning; +} + +bool FFReceiver::isFinished(){ + return m_bFinished; +} + +bool FFReceiver::isPausing(){ + return m_bPause; +} + +void FFReceiver::setFinishCbkArg(const void* userPtr){ + m_finishedReceiveArg = userPtr; +} \ No newline at end of file diff --git a/src/dvpp/FFReceiver.h b/src/dvpp/FFReceiver.h new file mode 100644 index 0000000..a380628 --- /dev/null +++ b/src/dvpp/FFReceiver.h @@ -0,0 +1,81 @@ +#ifndef __FFRECEIVER_H__ +#define __FFRECEIVER_H__ + +#include "depend_headers.h" +#include "CircularQueue.hpp" + +typedef void(*RECEIVER_FINISHED_CALLBACK)(const void* userPtr); + +struct ReceiverConfig{ + const char* uri; + string dec_name; + bool force_tcp; + CircularQueue *pktQueueptr; + RECEIVER_FINISHED_CALLBACK receiver_finished_cbk; // 解码线程结束后的回调接口 +}; + +class FFReceiver +{ +public: + FFReceiver(/* args */); + ~FFReceiver(); + + AVCodecContext* init_FFmpeg(ReceiverConfig config); + void releaseFFmpeg(); + void close(); + bool start(); + + void pause(); + void resume(); + void setDecKeyframe(bool bKeyframe); + bool isRunning(); + bool isFinished(); + bool isPausing(); + bool getResolution( int &width, int &height ); + float fps(); + + void setName(string nm){ + m_dec_name = nm; + } + + void setFinishCbkArg(const void* userPtr); + +private: + void read_thread(); + +private: + string m_dec_name; + + AVStream* stream; + int stream_index; + AVFormatContext *fmt_ctx; + AVPixelFormat pix_fmt; + int frame_width{0}; + int frame_height{0}; + + pthread_t m_read_thread; + + bool m_bRunning; + bool m_bFinished; + + bool m_bPause; + + bool m_bReal; // 是否实时流 + + float m_fps; + + FFDecConfig m_cfg; + bool m_dec_keyframe; + + AVCodecContext *avctx{nullptr}; + AVBSFContext * h264bsfc{nullptr}; + + vector m_vec_pkt; + CircularQueue *m_pktQueueptr; + + const void * m_finishedReceiveArg; + RECEIVER_FINISHED_CALLBACK receiver_finished_cbk; +}; + + +#endif \ No newline at end of file diff --git a/src/dvpp/Makefile b/src/dvpp/Makefile new file mode 100644 index 0000000..1f044f5 --- /dev/null +++ b/src/dvpp/Makefile @@ -0,0 +1,66 @@ +# 各项目录 +LIB_DIR:=$(BUILD_DIR)/$(MODULE)/lib +DEP_DIR:=$(BUILD_DIR)/$(MODULE)/.dep +OBJ_DIR:=$(BUILD_DIR)/$(MODULE)/obj +SRC_DIR:=$(TOP_DIR)/$(MODULE) + +# 源文件以及中间目标文件和依赖文件 +SRCS:=$(notdir $(wildcard $(SRC_DIR)/*.cpp)) +OBJS:=$(addprefix $(OBJ_DIR)/, $(patsubst %.cpp, %.o, $(SRCS))) +DEPS:=$(addprefix $(DEP_DIR)/, $(patsubst %.cpp, %.d,a $(SRCS))) + +# 自动生成头文件依赖选项 +DEPFLAGS=-MT $@ -MMD -MP -MF $(DEP_DIR)/$*.d + +DEFS = -DENABLE_DVPP_INTERFACE + +# 最终目标文件 +TARGET:=$(LIB_DIR)/lib$(MODULE).a + +export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/6.3.RC1.alpha001/runtime/lib64:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/6.3.RC1.alpha001/lib64:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/driver:$LD_LIBRARY_PATH + +include_dir=-I/usr/local/Ascend/ascend-toolkit/latest/acllib/include +lib_dir=-L/usr/lib -L/usr/local/lib -L/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64 -L/usr/local/Ascend/driver/lib64 -L/usr/local/Ascend/ascend-toolkit/latest/atc/lib64 +lib=-lacl_dvpp -lascendcl -lmmpa -lglog -lgflags -lpthread -lz + +CXXFLAGS= -g -O0 -fPIC $(include_dir) $(DEFS) -lpthread -lrt -lz -fexceptions -std=c++11 -fvisibility=hidden -Wl,-Bsymbolic -ldl + # -DUNICODE -D_UNICODE + +# 默认最终目标 +.PHONY:all +all:$(TARGET) + +# 生成最终目标 +$(TARGET):$(OBJS) | $(LIB_DIR) + @echo -e "\e[32m""Linking static library $(TARGET)""\e[0m" + @ar -rc $@ $^ + +# 若没有lib目录则自动生成 +$(LIB_DIR): + @mkdir -p $@ + +# 生成中间目标文件 +$(OBJ_DIR)/%.o:$(SRC_DIR)/%.cpp $(DEP_DIR)/%.d | $(OBJ_DIR) $(DEP_DIR) + @echo -e "\e[33m""Building object $@""\e[0m" + @$(CXX) -c $(DEPFLAGS) $(CXXFLAGS) $(INCS) $(LDFLAGS) $(lib_dir) $(lib) $(MACROS) -o $@ $< + +# 若没有obj目录则自动生成 +$(OBJ_DIR): + @mkdir -p $@ + +# 若没有.dep目录则自动生成 +$(DEP_DIR): + @mkdir -p $@ + +# 依赖文件会在生成中间文件的时候自动生成,这里只是为了防止报错 +$(DEPS): + +# 引入中间目标文件头文件依赖关系 +include $(wildcard $(DEPS)) + +# 直接删除组件build目录 +.PHONY:clean +clean: + @rm -rf $(BUILD_DIR)/$(MODULE) diff --git a/src/dvpp/VpcPicConverter.cpp b/src/dvpp/VpcPicConverter.cpp new file mode 100644 index 0000000..7af3508 --- /dev/null +++ b/src/dvpp/VpcPicConverter.cpp @@ -0,0 +1,83 @@ +#include "VpcPicConverter.h" +#include "depend_headers.h" + +#define ALIGN_UP(val, align) (((val) % (align) == 0) ? (val) : (((val) / (align) + 1) * (align))) + +bool VpcPicConverter::init(aclrtContext context){ + + aclrtSetCurrentContext(context); + aclrtCreateStream(&stream_); + + // 3. 创建图片数据处理通道时的通道描述信息,dvppChannelDesc_是acldvppChannelDesc类型 + dvppChannelDesc_ = acldvppCreateChannelDesc(); + + // 4. 创建图片数据处理的通道。 + int ret = acldvppCreateChannel(dvppChannelDesc_); + if(ret != ACL_ERROR_NONE){ + LOG_ERROR("acldvppCreateChannel failed !"); + return false; + } + + ret = acldvppSetChannelDescMode(dvppChannelDesc_, DVPP_CHNMODE_VPC); + if(ret != ACL_ERROR_NONE){ + LOG_ERROR("acldvppSetChannelDescMode failed !"); + return false; + } + + return true; +} + +DvppRgbMemory* VpcPicConverter::convert2bgr(acldvppPicDesc *inputDesc_, int out_width, int out_height, bool key_frame){ + + // 8. 创建色域转换的输出图片的描述信息,并设置各属性值, 输出的宽和高要求和输入一致 + // 如果色域转换的输出图片作为模型推理的输入,则输出图片的宽高要与模型要求的宽高保持一致 + // outputDesc_是acldvppPicDesc类型 + int out_buf_width = ALIGN_UP(out_width, 16) * 3; + int out_buf_height = ALIGN_UP(out_height, 2); + int out_buf_size = out_buf_width * out_buf_height; + + DvppRgbMemory* rgbMem = new DvppRgbMemory(3, out_buf_width, out_buf_height, out_buf_size, "", to_string(m_devId), key_frame); + void *outBufferDev_ = (void*)rgbMem->getMem(); + + acldvppPicDesc *outputDesc_= acldvppCreatePicDesc(); + acldvppSetPicDescData(outputDesc_, outBufferDev_); + acldvppSetPicDescFormat(outputDesc_, PIXEL_FORMAT_BGR_888); + acldvppSetPicDescWidth(outputDesc_, out_width); + acldvppSetPicDescHeight(outputDesc_, out_height); + acldvppSetPicDescWidthStride(outputDesc_, out_buf_width); + acldvppSetPicDescHeightStride(outputDesc_, out_buf_height); + acldvppSetPicDescSize(outputDesc_, out_buf_size); + + + + aclError ret = ACL_ERROR_NONE; + do{ + // 9. 执行异步色域转换,再调用aclrtSynchronizeStream接口阻塞程序运行,直到指定Stream中的所有任务都完成 + ret = acldvppVpcConvertColorAsync(dvppChannelDesc_, inputDesc_, outputDesc_, stream_); + if(ret != ACL_ERROR_NONE){ + LOG_ERROR("acldvppVpcConvertColorAsync failed - out_width:{} out_height:{} out_buf_width:{} out_buf_height:{} out_buf_size:{}", out_width, out_height, out_buf_width, out_buf_height, out_buf_size); + break; + } + ret = aclrtSynchronizeStream(stream_); + if(ret != ACL_ERROR_NONE){ + LOG_ERROR("aclrtSynchronizeStream failed - out_width:{} out_height:{} out_buf_width:{} out_buf_height:{} out_buf_size:{}", out_width, out_height, out_buf_width, out_buf_height, out_buf_size); + break; + } + }while(0); + + // 10. 色域转换结束后,释放资源,包括输入/输出图片的描述信息、输入/输出内存 + // acldvppDestroyPicDesc(inputDesc_); + acldvppDestroyPicDesc(outputDesc_); + + if(ret != ACL_ERROR_NONE){ + delete rgbMem; + rgbMem = nullptr; + } + + return rgbMem; +} + +void VpcPicConverter::release(){ + aclrtDestroyStream(stream_); + // aclrtDestroyContext(context_); +} \ No newline at end of file diff --git a/src/dvpp/VpcPicConverter.h b/src/dvpp/VpcPicConverter.h new file mode 100644 index 0000000..6d0d859 --- /dev/null +++ b/src/dvpp/VpcPicConverter.h @@ -0,0 +1,19 @@ +#include "dvpp_headers.h" +#include "depend_headers.h" +#include "DvppRgbMemory.hpp" + + +class VpcPicConverter{ +public: + bool init(aclrtContext context); + + DvppRgbMemory* convert2bgr(acldvppPicDesc *input, int out_width, int out_height, bool key_frame); + + void release(); + +private: + aclrtContext context_; + aclrtStream stream_; + int m_devId; + acldvppChannelDesc *dvppChannelDesc_ ; +}; \ No newline at end of file diff --git a/src/dvpp/depend_headers.h b/src/dvpp/depend_headers.h new file mode 100644 index 0000000..84788e3 --- /dev/null +++ b/src/dvpp/depend_headers.h @@ -0,0 +1,38 @@ +#ifndef __DEPEND_HEADERS_H__ +#define __DEPEND_HEADERS_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* +* 依赖模块外部的代码或库 +* 不要在此处添加模块内部的头文件 +*/ + +// ffmpeg 是c库 所以编译的时候要加入从 extern导入的C 来声明否则连接失败 +extern "C" { +#include "libavutil/imgutils.h" +#include "libavutil/samplefmt.h" +#include "libavformat/avformat.h" +#include "libavcodec/avcodec.h" +} + + +#include "../interface/logger.hpp" +#include "../interface/DeviceRgbMemory.hpp" +#include "../interface/interface_headers.h" +#include "../interface/utiltools.hpp" + +#endif \ No newline at end of file diff --git a/src/dvpp/dvpp_headers.h b/src/dvpp/dvpp_headers.h new file mode 100644 index 0000000..49e0e61 --- /dev/null +++ b/src/dvpp/dvpp_headers.h @@ -0,0 +1,31 @@ +/* +* 模块内部的头文件请在此处添加 +*/ + +#ifndef __DVPP_HEADERS_H__ +#define __DVPP_HEADERS_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "acl/acl_mdl.h" +#include "acl/acl_base.h" +#include "acl/acl_rt.h" +#include "acl/acl.h" +#include "acl/ops/acl_dvpp.h" + + +#endif + diff --git a/src/dvpp/threadsafe_queue.h b/src/dvpp/threadsafe_queue.h new file mode 100644 index 0000000..5a5b0f9 --- /dev/null +++ b/src/dvpp/threadsafe_queue.h @@ -0,0 +1,128 @@ + +#ifndef __THREADSAFE_QUEUE_H__ +#define __THREADSAFE_QUEUE_H__ + +#include +#include +#include +#include + +#include +using std::queue; +using namespace std; + +template +class ThreadedQueue : public queue { +public: + ThreadedQueue(); + ~ThreadedQueue(); + bool empty() const; + size_t size() const; + void push(const T& val); + void push(T& val); + bool pop(); + T& front(); + const T& front() const; + T& back(); + const T& back() const; + + void Put(T &data); + + T Take(); + void Get(T &data); + bool GetEmpty(); + + condition_variable *condition; + mutex *lock; +}; + +template +ThreadedQueue::ThreadedQueue() { + lock = new mutex; + condition = new condition_variable; +} + +template +ThreadedQueue::~ThreadedQueue() { + if(condition != nullptr){ + delete condition; + condition = nullptr; + } + if(lock != nullptr){ + delete lock; + lock = nullptr; + } +} + +template +T ThreadedQueue:: Take() +{ + std::unique_lock lk(this->lock); + this->condition->wait(lk, [this]{return !this->empty();}); + T val = this->front(); + this->pop(); + return val; +} + +template +void ThreadedQueue:: Put(T &data) +{ + std::unique_lock lk(*lock); + this->push(data); + this->condition->notify_one(); + return; +} + +template +void ThreadedQueue:: Get(T &data) +{ + std::unique_lock lk(*lock); + this->condition->wait(lk, [this]{return !this->empty();}); + data = this->front(); + this->pop(); +} + +template +bool ThreadedQueue::GetEmpty() +{ + std::unique_lock lk(*lock); + this->condition->wait(lk, [this]{return !this->empty();}); + return true; +} + + +template +bool ThreadedQueue::empty() const { + bool result = queue::empty(); + return result; +} + +template +size_t ThreadedQueue::size() const { + size_t result = queue::size(); + return result; +} + +template +void ThreadedQueue::push(T& val) { + queue::push(val); +} + + +template +T& ThreadedQueue::front() { + T& result = queue::front(); + return result; +} + +template +bool ThreadedQueue::pop() { + bool result = false; + if(!queue::empty()) { + queue::pop(); + result = true; + } + return result; +} + +#endif diff --git a/src/dvpp/user_mem.h b/src/dvpp/user_mem.h new file mode 100644 index 0000000..e6a7d11 --- /dev/null +++ b/src/dvpp/user_mem.h @@ -0,0 +1,33 @@ +#ifndef __USER_MEM_H__ +#define __USER_MEM_H__ + +#include +#include +#include +#include +#include "threadsafe_queue.h" + +#define ALIGN_MEM(val, align) (((val) % (align) == 0) ? (val) : (((val) / (align) + 1) * (align))) + +using namespace std; + +typedef enum { + RTSP_MEM, + VDEC_MEM, +} MemType; + +class MemNode{ +public: + uint8_t *bufAddr; + MemType memType; + + MemNode(){ + std::cout << "构造" << endl; + } + + ~MemNode(){ + std::cout << "析构" << std::endl; + } +} ; + +#endif \ No newline at end of file diff --git a/src/gb28181/FFGB28181Decoder.cpp b/src/gb28181/FFGB28181Decoder.cpp index 2207c98..68d4b8a 100644 --- a/src/gb28181/FFGB28181Decoder.cpp +++ b/src/gb28181/FFGB28181Decoder.cpp @@ -17,6 +17,9 @@ extern "C" { #include "common_header.h" +#include "../nvdecoder/GpuRgbMemory.hpp" +#include "../nvdecoder/cuda_kernels.h" + #define ECLOSED 0 #define ECLOSING 1 #define ERUNNING 2 @@ -365,4 +368,90 @@ bool FFGB28181Decoder::isSurport(FFDecConfig& cfg){ int FFGB28181Decoder::getCachedQueueLength(){ return m_rtpPtr->GetPsFrameListSize(); +} + +FFImgInfo* FFGB28181Decoder::snapshot(){ + + // 锁住停止队列消耗 + std::lock_guard l(m_snapshot_mutex); + + AVFrame * gpuFrame = nullptr; + + bool bFirst = true; + while(true){ + m_queue_mutex.lock(); + if(mFrameQueue.size() <= 0){ + m_queue_mutex.unlock(); + if(bFirst){ + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + bFirst = false; + continue; + }else{ + // 再进来说明前面已经等了 100 ms + // 100 ms都没有等到解码数据,则退出 + return nullptr; + } + } + + // 队列中数据大于1 + gpuFrame = mFrameQueue.front(); + m_queue_mutex.unlock(); + break; + } + + if (gpuFrame != nullptr && gpuFrame->format == AV_PIX_FMT_CUDA ){ + LOG_DEBUG("decode task: gpuid: {} width: {} height: {}", m_cfg.gpuid, gpuFrame->width, gpuFrame->height); + GpuRgbMemory* gpuMem = new GpuRgbMemory(3, gpuFrame->width, gpuFrame->height, getName(), m_cfg.gpuid , true); + + if (gpuMem->getMem() == nullptr){ + LOG_ERROR("new GpuRgbMemory failed !!!"); + return nullptr; + } + + cudaSetDevice(atoi(m_cfg.gpuid.c_str())); + cuda_common::setColorSpace( ITU_709, 0 ); + cudaError_t cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], gpuMem->getMem(), gpuFrame->width, gpuFrame->height); + cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + LOG_ERROR("CUDAToBGR failed failed !!!"); + return nullptr; + } + + unsigned char * pHwRgb = gpuMem->getMem(); + int channel = gpuMem->getChannel(); + int width = gpuMem->getWidth(); + int height = gpuMem->getHeight(); + + if (pHwRgb != nullptr && channel > 0 && width > 0 && height > 0){ + int nSize = channel * height * width; + + LOG_INFO("channel:{} height:{} width:{}", channel, height, width); + // unsigned char* cpu_data = new unsigned char[nSize]; + + unsigned char* cpu_data = (unsigned char *)av_malloc(nSize * sizeof(unsigned char)); + + cudaMemcpy(cpu_data, pHwRgb, nSize * sizeof(unsigned char), cudaMemcpyDeviceToHost); + cudaDeviceSynchronize(); + + delete gpuMem; + gpuMem = nullptr; + + FFImgInfo* imgInfo = new FFImgInfo(); + imgInfo->dec_name = m_dec_name; + imgInfo->pData = cpu_data; + imgInfo->height = height; + imgInfo->width = width; + imgInfo->timestamp = UtilTools::get_cur_time_ms(); + imgInfo->index = m_index; + + m_index++; + + return imgInfo; + } + + delete gpuMem; + gpuMem = nullptr; + } + + return nullptr; } \ No newline at end of file diff --git a/src/gb28181/FFGB28181Decoder.h b/src/gb28181/FFGB28181Decoder.h index 1f31a5b..9fee58f 100644 --- a/src/gb28181/FFGB28181Decoder.h +++ b/src/gb28181/FFGB28181Decoder.h @@ -44,6 +44,8 @@ public: DECODER_TYPE getDecoderType(){ return DECODER_TYPE_GB28181; } + FFImgInfo* snapshot(); + public: void stream_callback(int videoType, char* data, int len, int isKey, uint64_t pts, uint64_t localPts); void stream_end_callback(); @@ -74,6 +76,10 @@ private: AVDictionary *gpu_options = nullptr; pthread_t m_post_decode_thread; + + queue mFrameQueue; + mutex m_queue_mutex; + mutex m_snapshot_mutex; }; #endif // _GB28181_DECODER_H_ diff --git a/src/gb28181/Makefile b/src/gb28181/Makefile new file mode 100644 index 0000000..46094f5 --- /dev/null +++ b/src/gb28181/Makefile @@ -0,0 +1,53 @@ +# 各项目录 +LIB_DIR:=$(BUILD_DIR)/$(MODULE)/lib +DEP_DIR:=$(BUILD_DIR)/$(MODULE)/.dep +OBJ_DIR:=$(BUILD_DIR)/$(MODULE)/obj +SRC_DIR:=$(TOP_DIR)/$(MODULE) + +# 源文件以及中间目标文件和依赖文件 +SRCS:=$(notdir $(wildcard $(SRC_DIR)/*.cpp)) +OBJS:=$(addprefix $(OBJ_DIR)/, $(patsubst %.cpp, %.o, $(SRCS))) +DEPS:=$(addprefix $(DEP_DIR)/, $(patsubst %.cpp, %.d,a $(SRCS))) + +# 自动生成头文件依赖选项 +DEPFLAGS=-MT $@ -MMD -MP -MF $(DEP_DIR)/$*.d + +# 最终目标文件 +TARGET:=$(LIB_DIR)/$(MODULE).a + +# 默认最终目标 +.PHONY:all +all:$(TARGET) + +# 生成最终目标 +$(TARGET):$(OBJS) | $(LIB_DIR) + @echo -e "\e[32m""Linking static library $(TARGET)""\e[0m" + @ar -rc $@ $^ + +# 若没有lib目录则自动生成 +$(LIB_DIR): + @mkdir -p $@ + +# 生成中间目标文件 +$(OBJ_DIR)/%.o:$(SRC_DIR)/%.cpp $(DEP_DIR)/%.d | $(OBJ_DIR) $(DEP_DIR) + @echo -e "\e[33m""Building object $@""\e[0m" + @$(CXX) -c $(DEPFLAGS) $(CXXFLAGS) $(INCS) $(MACROS) -o $@ $< + +# 若没有obj目录则自动生成 +$(OBJ_DIR): + @mkdir -p $@ + +# 若没有.dep目录则自动生成 +$(DEP_DIR): + @mkdir -p $@ + +# 依赖文件会在生成中间文件的时候自动生成,这里只是为了防止报错 +$(DEPS): + +# 引入中间目标文件头文件依赖关系 +include $(wildcard $(DEPS)) + +# 直接删除组件build目录 +.PHONY:clean +clean: + @rm -rf $(BUILD_DIR)/$(MODULE) diff --git a/src/gb28181/common_header.h b/src/gb28181/common_header.h index 2f0c324..d5feed8 100644 --- a/src/gb28181/common_header.h +++ b/src/gb28181/common_header.h @@ -2,7 +2,7 @@ #define _COMMON_HEADER_H_ -#include "../logger.hpp" -#include "../utiltools.hpp" +#include "../interface/logger.hpp" +#include "../interface/utiltools.hpp" #endif \ No newline at end of file diff --git a/src/interface/AbstractDecoder.cpp b/src/interface/AbstractDecoder.cpp new file mode 100644 index 0000000..244dd45 --- /dev/null +++ b/src/interface/AbstractDecoder.cpp @@ -0,0 +1,25 @@ +#include "AbstractDecoder.h" + +#include "logger.hpp" +#include "utiltools.hpp" + + +bool AbstractDecoder::isSnapTime(){ + if(m_snap_time_interval <= 0){ + return false; + } + long cur_time = UtilTools::get_cur_time_ms(); + if(cur_time - m_last_snap_time > m_snap_time_interval){ + return true; + } + return false; +} + +void AbstractDecoder::updateLastSnapTime(){ + m_last_snap_time = UtilTools::get_cur_time_ms(); +} + +void AbstractDecoder::setSnapTimeInterval(long interval){ + m_snap_time_interval = interval; + m_last_snap_time = UtilTools::get_cur_time_ms(); +} \ No newline at end of file diff --git a/src/interface/AbstractDecoder.h b/src/interface/AbstractDecoder.h new file mode 100644 index 0000000..9f4cb3f --- /dev/null +++ b/src/interface/AbstractDecoder.h @@ -0,0 +1,54 @@ +#ifndef _ABSTRACT_DECODER_H_ +#define _ABSTRACT_DECODER_H_ + +#include "interface_headers.h" + +using namespace std; + +class AbstractDecoder{ +public: + virtual ~AbstractDecoder(){}; + virtual bool init(FFDecConfig& cfg) = 0; + virtual void close() = 0; + virtual bool start() = 0; + virtual void pause() = 0; + virtual void resume() = 0; + + virtual void setDecKeyframe(bool bKeyframe) = 0; + + virtual bool isRunning() = 0; + virtual bool isFinished() = 0; + virtual bool isPausing() = 0; + virtual bool getResolution( int &width, int &height ) = 0; + + virtual bool isSurport(FFDecConfig& cfg) = 0; + + virtual int getCachedQueueLength() = 0; + + virtual float fps() = 0; + + virtual DECODER_TYPE getDecoderType() = 0; + + virtual FFImgInfo* snapshot() = 0; + + virtual void setName(string nm) = 0; + + virtual string getName() = 0; + + virtual void setPostDecArg(const void* postDecArg) = 0; + virtual void setFinishedDecArg(const void* finishedDecArg) = 0; + +public: + bool isSnapTime(); + + void updateLastSnapTime(); + + void setSnapTimeInterval(long interval); + +public: + long m_snap_time_interval{-1}; + long m_last_snap_time; + long m_index{0}; +}; + +#endif // _ABSTRACT_DECODER_H_ \ No newline at end of file diff --git a/src/interface/DeviceRgbMemory.hpp b/src/interface/DeviceRgbMemory.hpp new file mode 100644 index 0000000..b5a3125 --- /dev/null +++ b/src/interface/DeviceRgbMemory.hpp @@ -0,0 +1,86 @@ +#ifndef __DEVICE_RGB_MEMORY_H__ +#define __DEVICE_RGB_MEMORY_H__ + +#include + +#include "utiltools.hpp" + +using namespace std; + +class DeviceRgbMemory{ + +public: + DeviceRgbMemory(int _channel, int _width, int _height, string _id, string _dev_id, bool _key_frame, bool _isused){ + channel = _channel; + width = _width; + height = _height; + data_size = channel * width * height; + isused = _isused; + id = _id; + device_id = _dev_id; + key_frame = _key_frame; + timestamp = UtilTools::get_cur_time_ms(); + } + + virtual ~DeviceRgbMemory(){} + + int getSize() { + return data_size; + } + + bool isIsused() { + return isused; + } + + void setIsused(bool _isused) { + isused = _isused; + // 更新时间戳 + timestamp = UtilTools::get_cur_time_ms(); + } + + string getId() { + return id; + } + + string getDeviceId() { + return device_id; + } + + unsigned char* getMem(){ + return pHwRgb; + } + + long long getTimesstamp(){ + return timestamp; + } + + int getWidth(){ + return width; + } + + int getHeight(){ + return height; + } + + int getChannel(){ + return channel; + } + + bool isKeyFrame(){ + return key_frame; + } + +public: + int data_size; + bool isused; + string id; + string device_id; + unsigned char * pHwRgb{nullptr}; + long long timestamp; + int width{0}; + int height{0}; + int channel{3}; + bool key_frame; +}; + +#endif \ No newline at end of file diff --git a/src/interface/FFNvDecoderManager.cpp b/src/interface/FFNvDecoderManager.cpp new file mode 100644 index 0000000..a32c4bb --- /dev/null +++ b/src/interface/FFNvDecoderManager.cpp @@ -0,0 +1,494 @@ +#include "FFNvDecoderManager.h" + +#ifdef USE_NVDEC +#include "../nvdecoder/FFNvDecoder.h" +#include "../gb28181/FFGB28181Decoder.h" +#endif + +#ifdef USE_DVPP +#include "./dvpp/DvppDecoderApi.h" +#endif + +#include "logger.hpp" + +using namespace std; + + +AbstractDecoder* FFNvDecoderManager::createDecoder(MgrDecConfig config){ + + closeAllFinishedDecoder(); + + if (config.cfg.post_decoded_cbk == nullptr || config.cfg.decode_finished_cbk== nullptr){ + return nullptr; + } + + std::lock_guard l(m_mutex); + + auto it = decoderMap.find(config.name); + if (it != decoderMap.end()){ + LOG_ERROR("已存在name为{}的解码器", config.name); + return nullptr; + } + + AbstractDecoder* dec = nullptr; +#ifdef USE_NVDEC + if(DECODER_TYPE_FFMPEG == config.dec_type){ + dec = new FFNvDecoder(); + } + + if(DECODER_TYPE_GB28181 == config.dec_type){ + dec = new FFGB28181Decoder(); + } +#endif + +#ifdef USE_DVPP + if(DECODER_TYPE_DVPP == config.dec_type){ + dec = new DvppDecoderApi(); + } +#endif + + if (dec == nullptr){ + LOG_ERROR("没有指定解码器类型"); + return nullptr; + } + + bool bRet= dec->init(config.cfg); + if (bRet) + { + dec->setName(config.name) ; + decoderMap[config.name] = dec; + + LOG_INFO("[{}][{}]- 解码器初始化成功",config.name, config.cfg.uri); + return dec; + } + + // 创建失败,关闭解码器 + dec->close(); + delete dec; + + LOG_ERROR("[{}][{}]- 解码器初始化失败!",config.name, config.cfg.uri); + return nullptr; +} + +bool FFNvDecoderManager::setPostDecArg(const string name, const void * userPtr) +{ + if (name.empty()) + { + LOG_ERROR("name 为空!"); + return false; + } + + std::lock_guard l(m_mutex); + + auto dec = decoderMap.find(name); + if (dec != decoderMap.end()) + { + dec->second->setPostDecArg(userPtr); + return true; + } + + LOG_ERROR("没有找到name为{}的解码器",name); + return false; +} + +bool FFNvDecoderManager::setFinishedDecArg(const string name, const void * userPtr) +{ + if (name.empty()) + { + LOG_ERROR("name 为空!"); + return false; + } + + std::lock_guard l(m_mutex); + + auto dec = decoderMap.find(name); + if (dec != decoderMap.end()) + { + dec->second->setFinishedDecArg(userPtr); + return true; + } + + LOG_ERROR("没有找到name为{}的解码器",name); + return false; +} + +AbstractDecoder* FFNvDecoderManager::getDecoderByName(const string name) +{ + if (name.empty()) + { + LOG_ERROR("name 为空!"); + return nullptr; + } + + std::lock_guard l(m_mutex); + + auto dec = decoderMap.find(name); + if (dec != decoderMap.end()) + { + return dec->second; + } + + LOG_ERROR("没有找到name为{}的解码器",name); + return nullptr; +} + +bool FFNvDecoderManager::startDecode(AbstractDecoder* dec){ + if (dec != nullptr && !dec->isRunning()) + { + return dec->start(); + } + return false; +} + +bool FFNvDecoderManager::startDecodeByName(const string name){ + if (name.empty()) + { + LOG_ERROR("name 为空!"); + return false; + } + + std::lock_guard l(m_mutex); + + auto dec = decoderMap.find(name); + if (dec != decoderMap.end()) + { + return dec->second->start(); + } + + LOG_ERROR("没有找到name为{}的解码器",name); + return false; +} + +void FFNvDecoderManager::startAllDecode(){ + + std::lock_guard l(m_mutex); + + for(auto iter = decoderMap.begin(); iter != decoderMap.end(); iter++){ + if (!iter->second->isRunning()) + { + iter->second->start(); + } + } +} + +bool FFNvDecoderManager::closeDecoderByName(const string name){ + if (name.empty()) + { + LOG_ERROR("name 为空!"); + return false; + } + + std::lock_guard l(m_mutex); + + auto dec = decoderMap.find(name); + if (dec != decoderMap.end()) + { + dec->second->close(); + delete dec->second; + dec->second = nullptr; + decoderMap.erase(dec); + + return true; + } + + LOG_ERROR("没有找到name为{}的解码器",name); + return false; +} + +void FFNvDecoderManager::closeAllDecoder() +{ + std::lock_guard l(m_mutex); + + for(auto iter = decoderMap.begin(); iter != decoderMap.end(); iter++){ + iter->second->close(); + delete iter->second; + iter->second = nullptr; + } + decoderMap.clear(); +} + +void FFNvDecoderManager::closeAllFinishedDecoder() +{ + std::lock_guard l(m_mutex); + + for(auto iter = decoderMap.begin(); iter != decoderMap.end(); ){ + if (iter->second->isFinished()) + { + delete iter->second; + iter->second = nullptr; + iter = decoderMap.erase(iter); + } + else + { + iter++ ; + } + } +} + +int FFNvDecoderManager::count() +{ + closeAllFinishedDecoder(); + + std::lock_guard l(m_mutex); + return decoderMap.size(); +} + +bool FFNvDecoderManager::pauseDecoder(const string name) +{ + if (name.empty()) + { + LOG_ERROR("name 为空!"); + return false; + } + + std::lock_guard l(m_mutex); + + auto dec = decoderMap.find(name); + if (dec != decoderMap.end()) + { + dec->second->pause(); + return true; + } + + LOG_ERROR("没有找到name为{}的解码器",name); + return false; +} + +bool FFNvDecoderManager::resumeDecoder(const string name) +{ + if (name.empty()) + { + LOG_ERROR("name 为空!"); + return false; + } + + std::lock_guard l(m_mutex); + + auto dec = decoderMap.find(name); + if (dec != decoderMap.end()) + { + dec->second->resume(); + return true; + } + + LOG_ERROR("没有找到name为{}的解码器",name); + return false; +} + +bool FFNvDecoderManager::isSurport(MgrDecConfig& config) +{ + { + std::lock_guard l(m_mutex); + + auto it = decoderMap.find(config.name); + if (it != decoderMap.end()){ + LOG_ERROR("已存在name所标记的解码器"); + return false; + } + } + + AbstractDecoder* dec = nullptr; +#ifdef USE_NVDEC + if(DECODER_TYPE_FFMPEG == config.dec_type){ + dec = new FFNvDecoder(); + } + + if(DECODER_TYPE_GB28181 == config.dec_type){ + dec = new FFGB28181Decoder(); + } +#endif + +#ifdef USE_DVPP + if(DECODER_TYPE_DVPP == config.dec_type){ + dec = new DvppDecoderApi(); + } +#endif + + if (dec == nullptr){ + LOG_ERROR("没有指定解码器类型"); + return false; + } + + bool bRet = dec->isSurport(config.cfg); + delete dec; + dec = nullptr; + + return bRet; +} + +bool FFNvDecoderManager::isRunning(const string name){ + if (name.empty()) + { + LOG_ERROR("name 为空!"); + return false; + } + + std::lock_guard l(m_mutex); + + auto dec = decoderMap.find(name); + if (dec != decoderMap.end()) + { + return dec->second->isRunning(); + } + + LOG_ERROR("没有找到name为{}的解码器",name); + return false; +} + +bool FFNvDecoderManager::isFinished(const string name){ + if (name.empty()) + { + LOG_ERROR("name 为空!"); + return false; + } + + std::lock_guard l(m_mutex); + + auto dec = decoderMap.find(name); + if (dec != decoderMap.end()) + { + return dec->second->isFinished(); + } + + LOG_ERROR("没有找到name为{}的解码器",name); + return false; +} + +bool FFNvDecoderManager::isPausing(const string name){ + if (name.empty()) + { + LOG_ERROR("name 为空!"); + return false; + } + + std::lock_guard l(m_mutex); + + auto dec = decoderMap.find(name); + if (dec != decoderMap.end()) + { + return dec->second->isPausing(); + } + + LOG_ERROR("没有找到name为{}的解码器",name); + return false; +} + +bool FFNvDecoderManager::setDecKeyframe(const string name, bool bKeyframe) +{ + if (name.empty()) + { + LOG_ERROR("name 为空!"); + return false; + } + + std::lock_guard l(m_mutex); + + auto dec = decoderMap.find(name); + if (dec != decoderMap.end()) + { + dec->second->setDecKeyframe(bKeyframe); + return true; + } + + LOG_ERROR("没有找到name为{}的解码器",name); + return false; +} + +bool FFNvDecoderManager::getResolution(const string name, int &width, int &height) +{ + if (name.empty()) + { + LOG_ERROR("name 为空!"); + return false; + } + + std::lock_guard l(m_mutex); + + auto dec = decoderMap.find(name); + if (dec != decoderMap.end()) + { + dec->second->getResolution(width, height); + return true; + } + + LOG_ERROR("没有找到name为{}的解码器",name); + return false; +} + +vector FFNvDecoderManager::getAllDecodeName(){ + + closeAllFinishedDecoder(); + + std::lock_guard l(m_mutex); + + vector decode_names; + for(auto it = decoderMap.begin(); it != decoderMap.end(); ++it){ + decode_names.push_back(it->first); + } + return decode_names; +} + +int FFNvDecoderManager::getCachedQueueLength(const string name){ + if (name.empty()){ + LOG_ERROR("name 为空!"); + return -1; + } + + std::lock_guard l(m_mutex); + + auto dec = decoderMap.find(name); + if (dec != decoderMap.end()){ + return dec->second->getCachedQueueLength(); + } + + LOG_ERROR("没有找到name为{}的解码器",name); + return -1; +} + +void FFNvDecoderManager::releaseFFImgInfo(FFImgInfo* info){ + if(nullptr != info){ + if(info->pData != nullptr){ + free(info->pData); + info->pData = nullptr; + } + delete info; + info = nullptr; + } +} + +FFImgInfo* FFNvDecoderManager::snapshot_in_task(const string name){ + if (name.empty()){ + LOG_ERROR("name 为空!"); + return nullptr; + } + + std::lock_guard l(m_mutex); + + auto dec = decoderMap.find(name); + if (dec != decoderMap.end()){ + return dec->second->snapshot(); + } + + LOG_ERROR("没有找到name为{}的解码器",name); + return nullptr; +} + +vector FFNvDecoderManager::timing_snapshot_all(){ + + closeAllFinishedDecoder(); + + std::lock_guard l(m_mutex); + + vector vec; + for(auto it = decoderMap.begin(); it != decoderMap.end(); ++it){ + if(it->second->isSnapTime()){ + FFImgInfo* imginfo = it->second->snapshot(); + if(imginfo != nullptr){ + vec.push_back(imginfo); + } + it->second->updateLastSnapTime(); + } + } + + return vec; +} \ No newline at end of file diff --git a/src/interface/FFNvDecoderManager.h b/src/interface/FFNvDecoderManager.h new file mode 100644 index 0000000..bb1c0de --- /dev/null +++ b/src/interface/FFNvDecoderManager.h @@ -0,0 +1,261 @@ +#include "AbstractDecoder.h" +#include +#include +#include + +#include + +using namespace std; + +struct MgrDecConfig +{ + DECODER_TYPE dec_type; // 解码器类型 + FFDecConfig cfg; // 解码器配置 + string name{""}; // 解码器名称 +}; + +// #define USE_NVDEC +#define USE_DVPP +/** + * 解码器管理类,单例类 + * 谨防死锁 + **/ +class FFNvDecoderManager { +public: + /************************************************** + * 接口:getInstance + * 功能:获取解码器管理者实例 + * 参数:无 + * 返回:成功返回 解码器管理者实例, 失败返回 nullptr + * 备注:调用其他接口前,需要先调用该接口获取管理者实例 + **************************************************/ + static FFNvDecoderManager* getInstance(){ + static FFNvDecoderManager* singleton = nullptr; + if (singleton == nullptr){ + singleton = new FFNvDecoderManager(); + } + return singleton; + } + + ~FFNvDecoderManager() + { + closeAllDecoder(); + } + + /************************************************** + * 接口:createDecoder + * 功能:根据配置信息创建解码器 + * 参数:MgrDecConfig& config 解码器配置信息 + * 返回:成功返回解码器, 失败返回 nullptr + * 备注: + **************************************************/ + AbstractDecoder* createDecoder(MgrDecConfig config); + + /************************************************** + * 接口:setPostDecArg + * 功能:设置解码数据回调接口的用户自定义参数 + * 参数:string name 解码器名称 + * const void * userPtr 用户自定义的要传到解码数据回调接口的数据 + * 返回:设置成功返回true,失败返回false + * 备注: + **************************************************/ + bool setPostDecArg(const string name, const void * userPtr); + + /************************************************** + * 接口:setFinishedDecArg + * 功能:设置解码结束回调接口的用户自定义参数 + * 参数:string name 解码器名称 + * const void * userPtr 用户自定义的要传到解码数据回调接口的数据 + * 返回:设置成功返回true,失败返回false + * 备注: + **************************************************/ + bool setFinishedDecArg(const string name, const void * userPtr); + + /************************************************** + * 接口:getDecoderByName + * 功能:根据解码器名称返回解码器对象指针 + * 参数:const string name 解码器名称 + * 返回:成功返回对应的解码器对象的指针,失败返回nullptr + * 备注: + **************************************************/ + AbstractDecoder* getDecoderByName(const string name); + + /************************************************** + * 接口:startDecode + * 功能:启动解码 + * 参数:FFNvDecoder* 解码器指针 + * 返回:void + * 备注: + **************************************************/ + bool startDecode(AbstractDecoder*); + + /************************************************** + * 接口:startAllDecode + * 功能:启动全部解码 + * 参数:void + * 返回:void + * 备注: + **************************************************/ + void startAllDecode(); + + /************************************************** + * 接口:startDecodeByName + * 功能:启动名称对应的解码器 + * 参数:string name 解码器名称 + * 返回:成功返回true,失败返回false + * 备注: + **************************************************/ + bool startDecodeByName(const string name); + + /************************************************** + * 接口:closeDecoderByName + * 功能:关闭解码器名称对应的解码 + * 参数:const string name 解码器名称 + * 返回:成功返回true,失败返回false + * 备注: + **************************************************/ + bool closeDecoderByName(const string name); + + /************************************************** + * 接口:closeAllDecoder + * 功能:关闭全部解码器 + * 参数:void + * 返回:void + * 备注: + **************************************************/ + void closeAllDecoder(); + + /************************************************** + * 接口:closeAllDecoderByGpuid + * 功能:关闭某张显卡撒花姑娘的全部解码器 + * 参数:const string gpuid gpu的id + * 返回:void + * 备注: + **************************************************/ + void closeAllDecoderByGpuid(const string gpuid); + + /************************************************** + * 接口:pauseDecoder + * 功能:暂停指定名称的解码器 + * 参数:const string name 解码器名称 + * 返回:成功返回true,失败返回false + * 备注: + **************************************************/ + bool pauseDecoder(const string name); + + /************************************************** + * 接口:pauseDecoder + * 功能:恢复指定名称的解码器 + * 参数:const string name 解码器名称 + * 返回:成功返回true,失败返回false + * 备注: + **************************************************/ + bool resumeDecoder(const string name); + + /************************************************** + * 接口:isSurport + * 功能:是否支持指定配置的解码 + * 参数:FFDecConfig& cfg 解码器配置 + * 返回:支持返回true,不支持返回false + * 备注: + **************************************************/ + bool isSurport(MgrDecConfig& config); + + /************************************************** + * 接口:isRunning + * 功能:根据解码器名称判断解码器是否正在运行 + * 参数:const string name 解码器名称 + * 返回:正在运行返回true,否则返回false + * 备注: + **************************************************/ + bool isRunning(const string name); + + /************************************************** + * 接口:isFinished + * 功能:根据解码器名称判断解码器是否已经结束 + * 参数:const string name 解码器名称 + * 返回:正在运行返回true,否则返回false + * 备注: + **************************************************/ + bool isFinished(const string name); + + /************************************************** + * 接口:isPausing + * 功能:根据解码器名称判断解码器是否暂停 + * 参数:const string name 解码器名称 + * 返回:正在运行返回true,否则返回false + * 备注: + **************************************************/ + bool isPausing(const string name); + + /************************************************** + * 接口:count + * 功能:获取正在运行的解码器数量 + * 参数:void + * 返回:正在运行的解码器数量 + * 备注: + **************************************************/ + int count(); + + /************************************************** + * 接口:setDecKeyframe + * 功能:设置是否只解码关键帧。默认全解 + * 参数:const string name 解码器名称 + * bool bKeyframe 是否只解码关键帧。true,只解码关键帧;false,普通的全解码 + * 返回:bool 成功返回true,失败返回false + * 备注: + **************************************************/ + bool setDecKeyframe(const string name, bool bKeyframe); + + /************************************************** + * 接口:getResolution + * 功能:获取视频分辨率 + * 参数:const string name 解码器名称 + * int &width 从 width 返回视频宽度 + * int &height 从 height 返回视频高度 + * 返回:bool 成功获取返回true,失败返回false + * 备注: + **************************************************/ + bool getResolution(const string name, int &width, int &height); + + /************************************************** + * 接口:getAllDecodeName + * 功能:获取全部解码器名称 + * 参数:void + * 返回:vector 返回全部解码器名称 + * 备注: + **************************************************/ + vector getAllDecodeName(); + + /************************************************** + * 接口:getCachedQueueLength + * 功能:获取解码缓冲队列当前长度 + * 参数:const string name 解码器名称 + * 返回:int 解码缓冲队列当前长度 + * 备注: + **************************************************/ + int getCachedQueueLength(const string name); + + /************************************************** + * 接口:releaseFFImgInfo + * 功能:释放视频快照信息 + * 参数:FFImgInfo* info 视频快照信息 + * 返回:void + * 备注: + **************************************************/ + void releaseFFImgInfo(FFImgInfo* info); + + FFImgInfo* snapshot_in_task(const string name); + + vector timing_snapshot_all(); + +private: + FFNvDecoderManager(){} + + void closeAllFinishedDecoder(); + +private: + map decoderMap; + + mutex m_mutex; +}; \ No newline at end of file diff --git a/src/interface/Makefile b/src/interface/Makefile new file mode 100644 index 0000000..60c3103 --- /dev/null +++ b/src/interface/Makefile @@ -0,0 +1,55 @@ +# 各项目录 +LIB_DIR:=$(BUILD_DIR)/$(MODULE)/lib +DEP_DIR:=$(BUILD_DIR)/$(MODULE)/.dep +OBJ_DIR:=$(BUILD_DIR)/$(MODULE)/obj +SRC_DIR:=$(TOP_DIR)/$(MODULE) + +# 源文件以及中间目标文件和依赖文件 +SRCS:=$(notdir $(wildcard $(SRC_DIR)/*.cpp)) +OBJS:=$(addprefix $(OBJ_DIR)/, $(patsubst %.cpp, %.o, $(SRCS))) +DEPS:=$(addprefix $(DEP_DIR)/, $(patsubst %.cpp, %.d,a $(SRCS))) + +# 自动生成头文件依赖选项 +DEPFLAGS=-MT $@ -MMD -MP -MF $(DEP_DIR)/$*.d + +# 最终目标文件 +TARGET:=$(LIB_DIR)/$(MODULE).a + +MODULE_LIBS:=$(BUILD_DIR)/dvpp/lib/libdvpp.a\ + +# 默认最终目标 +.PHONY:all +all:$(TARGET) + +# 生成最终目标 +$(TARGET):$(OBJS) | $(LIB_DIR) + @echo -e "\e[32m""Linking static library $(TARGET)""\e[0m" + @ar -rc $@ $^ + +# 若没有lib目录则自动生成 +$(LIB_DIR): + @mkdir -p $@ + +# 生成中间目标文件 +$(OBJ_DIR)/%.o:$(SRC_DIR)/%.cpp $(DEP_DIR)/%.d | $(OBJ_DIR) $(DEP_DIR) + @echo -e "\e[33m""Building object $@""\e[0m" + @$(CXX) -c $(DEPFLAGS) $(CXXFLAGS) $(INCS) $(MACROS) -o $@ $(MODULE_LIBS) $< + +# 若没有obj目录则自动生成 +$(OBJ_DIR): + @mkdir -p $@ + +# 若没有.dep目录则自动生成 +$(DEP_DIR): + @mkdir -p $@ + +# 依赖文件会在生成中间文件的时候自动生成,这里只是为了防止报错 +$(DEPS): + +# 引入中间目标文件头文件依赖关系 +include $(wildcard $(DEPS)) + +# 直接删除组件build目录 +.PHONY:clean +clean: + @rm -rf $(BUILD_DIR)/$(MODULE) diff --git a/src/interface/interface_headers.h b/src/interface/interface_headers.h new file mode 100644 index 0000000..43edcc9 --- /dev/null +++ b/src/interface/interface_headers.h @@ -0,0 +1,59 @@ +#ifndef _INTERFACE_HEADERS_H_ +#define _INTERFACE_HEADERS_H_ + + +#include +#include +#include + +#include "DeviceRgbMemory.hpp" + +using namespace std; + +/************************************************** +* 接口:DXDECODER_CALLBACK +* 功能:解码数据回调接口 +* 参数:const dx_void * userPtr 用户自定义数据 +* AVFrame * gpuFrame 解码结果帧数据,在设置的对应的gpu上,要十分注意这一点,尤其是多线程情况 +* 返回:无 +* 备注:当解码库数据源为实时流时(RTSP/GB28181),本接 +* 口内不可进行阻塞/耗时操作。当解码库数据源为 +* 非实时流时(本地/网络文件),本接口可以进行 +* 阻塞/耗时操作 +**************************************************/ +typedef void(*POST_DECODE_CALLBACK)(const void * userPtr, DeviceRgbMemory* devFrame); + +typedef void(*DECODE_FINISHED_CALLBACK)(const void* userPtr); + +typedef bool(*DECODE_REQUEST_STREAM_CALLBACK)(const char* deviceId); + +struct FFDecConfig{ + string uri; // 视频地址 + POST_DECODE_CALLBACK post_decoded_cbk; // 解码数据回调接口 + DECODE_FINISHED_CALLBACK decode_finished_cbk; // 解码线程结束后的回调接口 + string gpuid; // gpu id + bool force_tcp{true}; // 是否指定使用tcp连接 + int skip_frame{1}; // 跳帧数 + string dec_name; + + int port; // gb28181接收数据的端口号 + DECODE_REQUEST_STREAM_CALLBACK request_stream_cbk; // gb28181请求流 +}; + +enum DECODER_TYPE{ + DECODER_TYPE_GB28181, + DECODER_TYPE_FFMPEG, + DECODER_TYPE_DVPP +}; + +struct FFImgInfo{ + string dec_name; + int width; + int height; + unsigned char * pData; + int data_type; // 默认0=rgb, 1=nv12 + long timestamp; + long index; +}; + +#endif \ No newline at end of file diff --git a/src/interface/logger.hpp b/src/interface/logger.hpp new file mode 100644 index 0000000..1d67fea --- /dev/null +++ b/src/interface/logger.hpp @@ -0,0 +1,344 @@ +/* + * @Author: yangzilong + * @Date: 2021-12-21 11:07:11 + * @Last Modified by: yangzilong + * @Email: yangzilong@objecteye.com + * @Description: + */ + +#pragma once + +// #include "define.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#define __FILENAME__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) + +#define LOG_TRACE_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_TRACE(logger, __VA_ARGS__);} +#define LOG_DEBUG_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_DEBUG(logger, __VA_ARGS__);} +#define LOG_WARN_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_WARN(logger, __VA_ARGS__);} +#define LOG_ERROR_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_ERROR(logger, __VA_ARGS__);} +#define LOG_INFO_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_INFO(logger, __VA_ARGS__);} +#define LOG_CRITICAL_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_CRITICAL(logger, __VA_ARGS__);} + + +// use fmt lib, e.g. LOG_WARN("warn log, {1}, {1}, {2}", 1, 2); +#define LOG_TRACE(msg, ...) spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::trace, msg, ##__VA_ARGS__) +#define LOG_DEBUG(msg, ...) spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::debug, msg, ##__VA_ARGS__) +#define LOG_INFO(msg,...) spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::info, msg, ##__VA_ARGS__) +#define LOG_WARN(msg,...) spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::warn, msg, ##__VA_ARGS__) +#define LOG_ERROR(msg,...) spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::err, msg, ##__VA_ARGS__) +#define LOG_FATAL(msg,...) spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::critical, msg, ##__VA_ARGS__) + + + +namespace spdlog +{ + namespace sinks + { + template + class easy_file_sink final : public base_sink + { + public: + easy_file_sink(filename_t base_filename, size_t max_size, size_t max_keep_days = 0) + : base_filename_(std::move(base_filename)) + , max_size_(max_size) + , max_keep_days_(max_keep_days) + { + auto now = log_clock::now(); + auto filename = gen_filename_by_daliy(base_filename_, now_tm(now)); + + file_helper_.open(filename, false); + current_size_ = file_helper_.size(); + rotation_tp_ = next_rotation_tp_(); + + if (max_keep_days_ > 0) + { + filespath_q_.push_back(std::move(std::set())); + filespath_q_[filespath_q_.size() - 1].insert(filename); + } + } + + filename_t filename() + { + std::lock_guard lock(base_sink::mutex_); + return file_helper_.filename(); + } + + protected: + void sink_it_(const details::log_msg &msg) override + { + memory_buf_t formatted; + base_sink::formatter_->format(msg, formatted); + current_size_ += formatted.size(); + + auto time = msg.time; + if (time >= rotation_tp_) + { + file_helper_.close(); + auto filename = gen_filename_by_daliy(base_filename_, now_tm(time)); + file_helper_.open(filename, false); + current_size_ = file_helper_.size(); + rotation_tp_ = next_rotation_tp_(); + + { + filespath_q_.push_back(std::move(std::set())); + filespath_q_[filespath_q_.size() - 1].emplace(filename); + } + + // Do the cleaning only at the end because it might throw on failure. + if (max_keep_days_ > 0 && filespath_q_.size() > max_keep_days_) + delete_old_(); + } + else if (current_size_ >= max_size_) + { + file_helper_.close(); + auto src_name = gen_filename_by_daliy(base_filename_, now_tm(time)); + auto target_name = gen_filename_by_filesize(base_filename_, now_tm(time), filespath_q_[filespath_q_.size() - 1].size()); + + // rename file if failed then us `target_name` as src_name. + if (!rename_file_(src_name, target_name)) + { + details::os::sleep_for_millis(200); + if (!rename_file_(src_name, target_name)) + { + fprintf(stderr, "%s:%d rename %s to %s failed\n", __FILENAME__, __LINE__, src_name.c_str(), target_name.c_str()); + src_name = target_name; + } + } + + filespath_q_[filespath_q_.size() - 1].emplace(src_name); + if (src_name != target_name) + filespath_q_[filespath_q_.size() - 1].emplace(target_name); + + file_helper_.open(src_name, false); + current_size_ = file_helper_.size(); + rotation_tp_ = next_rotation_tp_(); + } + + file_helper_.write(formatted); + + + } + + void flush_() override + { + file_helper_.flush(); + } + + private: + + tm now_tm(log_clock::time_point tp) + { + time_t tnow = log_clock::to_time_t(tp); + return spdlog::details::os::localtime(tnow); + } + + /** + * @brief Get next day tm. + * + * @return log_clock::time_point + */ + log_clock::time_point next_rotation_tp_() + { + auto now = log_clock::now(); + tm date = now_tm(now); + date.tm_hour = 0; + date.tm_min = 0; + date.tm_sec = 0; + auto rotation_time = log_clock::from_time_t(std::mktime(&date)); + if (rotation_time > now) + return rotation_time; + return {rotation_time + std::chrono::hours(24)}; + } + + // Delete the file N rotations ago. + // Throw spdlog_ex on failure to delete the old file. + void delete_old_() + { + for (auto iter = filespath_q_.begin(); iter != filespath_q_.end();) + { + if (filespath_q_.size() <= max_keep_days_) + break; + + for (auto it = iter->begin(); it != iter->end(); ++it) + { + bool ok = details::os::remove_if_exists(*it) == 0; + if (!ok) + throw_spdlog_ex("Failed removing daily file " + details::os::filename_to_str(*it), errno); + } + filespath_q_.erase(iter); + } + } + + /* */ + static filename_t gen_filename_by_daliy(const filename_t &filename, const tm &now_tm) + { + filename_t basename, ext; + std::tie(basename, ext) = details::file_helper::split_by_extension(filename); + return fmt::format(SPDLOG_FILENAME_T("{}_{:04d}_{:02d}_{:02d}{}"), + basename, + now_tm.tm_year + 1900, + now_tm.tm_mon + 1, + now_tm.tm_mday, + ext); + } + + // + static filename_t gen_filename_by_filesize(const filename_t &filename, const tm &now_tm, const int &idx) + { + filename_t basename, ext; + std::tie(basename, ext) = details::file_helper::split_by_extension(filename); + return fmt::format(SPDLOG_FILENAME_T("{}_{:04d}_{:02d}_{:02d}_{:02d}{:02d}{:02d}.{:d}{}"), + basename, + now_tm.tm_year + 1900, + now_tm.tm_mon + 1, + now_tm.tm_mday, + now_tm.tm_hour, + now_tm.tm_min, + now_tm.tm_sec, + idx, + ext); + } + + static bool rename_file_(const filename_t &src_filename, const filename_t &target_filename) + { + (void)details::os::remove(target_filename); + return details::os::rename(src_filename, target_filename) == 0; + } + + filename_t base_filename_; + log_clock::time_point rotation_tp_; + details::file_helper file_helper_; + std::size_t max_size_; + std::size_t max_keep_days_; + std::size_t current_size_; + // std::vector<> filespath_q_; + std::vector> filespath_q_; + }; + + using easy_file_sink_mt = easy_file_sink; + using easy_file_sink_st = easy_file_sink; + + } // namespace sinks + + template + inline std::shared_ptr easy_logger_mt( + const std::string &logger_name, const filename_t &filename, size_t max_size, size_t max_keep_days = -1) + { + return Factory::template create(logger_name, filename, max_size, max_keep_days); + } + + template + inline std::shared_ptr easy_logger_st( + const std::string &logger_name, const filename_t &filename, size_t max_size, size_t max_keep_days = -1) + { + return Factory::template create(logger_name, filename, max_size, max_keep_days); + } + +} // namespace spdlog + + +enum class LogLevel +{ + CLOSE = -1, + TRACE = 0, + DEBUG = 1, + INFO = 2, + WARN = 3, + ERROR = 4, + FATAL = 5, +}; + + +class LoggerGenerator +{ +public: + static LoggerGenerator* get_instance() + { + static LoggerGenerator logger; + return &logger; + } + + void destory(LoggerGenerator *ptr) + { + if (ptr != nullptr) + { + delete ptr; + ptr = nullptr; + } + } + + std::shared_ptr gen_logger(const LogLevel &level, const std::string &logger_name, + const std::string &file_path, size_t max_file_size, size_t max_keep_days) + { + spdlog::level::level_enum spd_level; + if (LogLevel::TRACE == level) + spd_level = spdlog::level::trace; + else if (LogLevel::DEBUG == level) + spd_level = spdlog::level::debug; + else if (LogLevel::INFO == level) + spd_level = spdlog::level::info; + else if (LogLevel::WARN == level) + spd_level = spdlog::level::warn; + else if (LogLevel::ERROR == level) + spd_level = spdlog::level::err; + else if (LogLevel::FATAL == level) + spd_level = spdlog::level::critical; + else if (LogLevel::CLOSE == level) + spd_level = spdlog::level::off; + + auto sink_ptr = std::make_shared(file_path, max_file_size, max_keep_days); + auto logger = std::make_shared(logger_name, sink_ptr); + logger->set_level(spd_level); + logger->set_pattern("%s(%#): [%L %D %T.%e %P %t %!] %v"); + + return logger; + } + + void set_default_logger(const LogLevel &level, const std::string &logger_name, + const std::string &file_name, size_t max_file_size, size_t max_keep_days) + { + + auto logger = gen_logger(level, logger_name, file_name, max_file_size, max_keep_days); + spdlog::set_default_logger(logger); + spdlog::set_level(logger->level()); + spdlog::set_pattern("%s(%#): [%L %D %T.%e %P %t %!] %v"); + + spdlog::flush_on(spdlog::level::trace); + spdlog::flush_every(std::chrono::seconds(1)); + } + +}; + + +static void set_default_logger(const LogLevel &level, const std::string &logger_name, + const std::string &file_path, size_t max_file_size, size_t max_keep_days) +{ + static LoggerGenerator loggerGenerator; + loggerGenerator.set_default_logger(level, logger_name, file_path, max_file_size, max_keep_days); +} + + +static std::shared_ptr get_simple_logger(const LogLevel &level, const std::string &logger_name, + const std::string &file_path, size_t max_file_size, size_t max_keep_days) +{ + static LoggerGenerator loggerGenerator; + return loggerGenerator.gen_logger(level, logger_name, file_path, max_file_size, max_keep_days); +} diff --git a/src/interface/utiltools.hpp b/src/interface/utiltools.hpp new file mode 100644 index 0000000..8caff91 --- /dev/null +++ b/src/interface/utiltools.hpp @@ -0,0 +1,18 @@ +#ifndef _UTIL_TOOLS_HPP_ +#define _UTIL_TOOLS_HPP_ + +#include + +using namespace std; + +namespace UtilTools{ + + static long get_cur_time_ms() { + chrono::time_point tpMicro + = chrono::time_point_cast(chrono::system_clock::now()); + return tpMicro.time_since_epoch().count(); + } + +} + +#endif \ No newline at end of file diff --git a/src/jpegNPP.cpp-1 b/src/jpegNPP.cpp-1 deleted file mode 100644 index f0bf2e6..0000000 --- a/src/jpegNPP.cpp-1 +++ /dev/null @@ -1,1193 +0,0 @@ -/* -* Copyright 1993-2015 NVIDIA Corporation. All rights reserved. -* -* NOTICE TO USER: -* -* This source code is subject to NVIDIA ownership rights under U.S. and -* international Copyright laws. -* -* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE -* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR -* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH -* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF -* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. -* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, -* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE -* OR PERFORMANCE OF THIS SOURCE CODE. -* -* U.S. Government End Users. This source code is a "commercial item" as -* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of -* "commercial computer software" and "commercial computer software -* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) -* and is provided to the U.S. Government only as a commercial end item. -* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through -* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the -* source code with only those rights set forth herein. -*/ - -// This sample needs at least CUDA 5.5 and a GPU that has at least Compute Capability 2.0 - -// This sample demonstrates a simple image processing pipeline. -// First, a JPEG file is huffman decoded and inverse DCT transformed and dequantized. -// Then the different planes are resized. Finally, the resized image is quantized, forward -// DCT transformed and huffman encoded. - -#include "cuda_kernels.h" - -#include -#include -#include "common/UtilNPP/Exceptions.h" - -#include "Endianess.h" -#include - -#include -#include -#include - -#include "common/inc/helper_string.h" -#include "common/inc/helper_cuda.h" -//#include "MacroDef.h" -#include "cuda.h" - -using namespace std; - -struct FrameHeader -{ - unsigned char nSamplePrecision; - unsigned short nHeight; - unsigned short nWidth; - unsigned char nComponents; - unsigned char aComponentIdentifier[3]; - unsigned char aSamplingFactors[3]; - unsigned char aQuantizationTableSelector[3]; -}; - -struct ScanHeader -{ - unsigned char nComponents; - unsigned char aComponentSelector[3]; - unsigned char aHuffmanTablesSelector[3]; - unsigned char nSs; - unsigned char nSe; - unsigned char nA; -}; - -struct QuantizationTable -{ - unsigned char nPrecisionAndIdentifier; - unsigned char aTable[64]; -}; - -struct HuffmanTable -{ - unsigned char nClassAndIdentifier; - unsigned char aCodes[16]; - unsigned char aTable[256]; -}; - -//??准?炼??藕?量??模?? -//unsigned char std_Y_QT[64] = -//{ -// 16, 11, 10, 16, 24, 40, 51, 61, -// 12, 12, 14, 19, 26, 58, 60, 55, -// 14, 13, 16, 24, 40, 57, 69, 56, -// 14, 17, 22, 29, 51, 87, 80, 62, -// 18, 22, 37, 56, 68, 109, 103, 77, -// 24, 35, 55, 64, 81, 104, 113, 92, -// 49, 64, 78, 87, 103, 121, 120, 101, -// 72, 92, 95, 98, 112, 100, 103, 99 -//}; -// -////??准色???藕?量??模?? -//unsigned char std_UV_QT[64] = -//{ -// 17, 18, 24, 47, 99, 99, 99, 99, -// 18, 21, 26, 66, 99, 99, 99, 99, -// 24, 26, 56, 99, 99, 99, 99, 99, -// 47, 66, 99, 99, 99, 99, 99, 99, -// 99, 99, 99, 99, 99, 99, 99, 99, -// 99, 99, 99, 99, 99, 99, 99, 99, -// 99, 99, 99, 99, 99, 99, 99, 99, -// 99, 99, 99, 99, 99, 99, 99, 99 -//}; - -////?炼??藕?量??模?? -//unsigned char std_Y_QT[64] = -//{ -// 6, 4, 5, 6, 5, 4, 6, 6, -// 5, 6, 7, 7, 6, 8, 10, 16, -// 10, 10, 9, 9, 10, 20, 14, 15, -// 12, 16, 23, 20, 24, 24, 23, 20, -// 22, 22, 26, 29, 37, 31, 26, 27, -// 35, 28, 22, 22, 32, 44, 32, 35, -// 38, 39, 41, 42, 41, 25, 31, 45, -// 48, 45, 40, 48, 37, 40, 41, 40 -//}; -// -////色???藕?量??模?? -//unsigned char std_UV_QT[64] = -//{ -// 7, 7, 7, 10, 8, 10, 19, 10, -// 10, 19, 40, 26, 22, 26, 40, 40, -// 40, 40, 40, 40, 40, 40, 40, 40, -// 40, 40, 40, 40, 40, 40, 40, 40, -// 40, 40, 40, 40, 40, 40, 40, 40, -// 40, 40, 40, 40, 40, 40, 40, 40, -// 40, 40, 40, 40, 40, 40, 40, 40, -// 40, 40, 40, 40, 40, 40, 40, 40 -//}; - -//?炼??藕?量??模?? -unsigned char std_Y_QT[64] = -{ - 0.75 * 6, 0.75 * 4, 0.75 * 5, 0.75 * 6, 0.75 * 5, 0.75 * 4, 0.75 * 6, 0.75 * 6, - 0.75 * 5, 0.75 * 6, 0.75 * 7, 0.75 * 7, 0.75 * 6, 0.75 * 8, 0.75 * 10, 0.75 * 16, - 0.75 * 10, 0.75 * 10, 0.75 * 9, 0.75 * 9, 0.75 * 10, 0.75 * 20, 0.75 * 14, 0.75 * 15, - 0.75 * 12, 0.75 * 16, 0.75 * 23, 0.75 * 20, 0.75 * 24, 0.75 * 24, 0.75 * 23, 0.75 * 20, - 0.75 * 22, 0.75 * 22, 0.75 * 26, 0.75 * 29, 0.75 * 37, 0.75 * 31, 0.75 * 26, 0.75 * 27, - 0.75 * 35, 0.75 * 28, 0.75 * 22, 0.75 * 22, 0.75 * 32, 0.75 * 44, 0.75 * 32, 0.75 * 35, - 0.75 * 38, 0.75 * 39, 0.75 * 41, 0.75 * 42, 0.75 * 41, 0.75 * 25, 0.75 * 31, 0.75 * 45, - 0.75 * 48, 0.75 * 45, 0.75 * 40, 0.75 * 48, 0.75 * 37, 0.75 * 40, 0.75 * 41, 0.75 * 40 -}; - -//色???藕?量??模?? -unsigned char std_UV_QT[64] = -{ - 0.75 * 7, 0.75 * 7, 0.75 * 7, 0.75 * 10, 0.75 * 8, 0.75 * 10, 0.75 * 19, 0.75 * 10, - 0.75 * 10, 0.75 * 19, 0.75 * 40, 0.75 * 26, 0.75 * 22, 0.75 * 26, 0.75 * 40, 0.75 * 40, - 30, 30, 30, 30, 30, 30, 30, 30, - 30, 30, 30, 30, 30, 30, 30, 30, - 30, 30, 30, 30, 30, 30, 30, 30, - 30, 30, 30, 30, 30, 30, 30, 30, - 30, 30, 30, 30, 30, 30, 30, 30, - 30, 30, 30, 30, 30, 30, 30, 30 -}; - -unsigned char STD_DC_Y_NRCODES[16] = { 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 }; -unsigned char STD_DC_Y_VALUES[12] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; - -unsigned char STD_DC_UV_NRCODES[16] = { 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }; -unsigned char STD_DC_UV_VALUES[12] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; - -unsigned char STD_AC_Y_NRCODES[16] = { 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0X7D }; -unsigned char STD_AC_Y_VALUES[162] = -{ - 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, - 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, - 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08, - 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, - 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, - 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, - 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, - 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, - 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, - 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, - 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, - 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, - 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, - 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, - 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, - 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, - 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, - 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2, - 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, - 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, - 0xf9, 0xfa -}; - -unsigned char STD_AC_UV_NRCODES[16] = { 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0X77 }; -unsigned char STD_AC_UV_VALUES[162] = -{ - 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, - 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, - 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, - 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, - 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34, - 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26, - 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, - 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, - 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, - 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, - 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, - 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, - 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, - 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, - 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, - 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, - 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, - 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, - 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, - 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, - 0xf9, 0xfa -}; - -int DivUp(int x, int d) -{ - return (x + d - 1) / d; -} - -template -void writeAndAdvance(unsigned char *&pData, T nElement) -{ - writeBigEndian(pData, nElement); - pData += sizeof(T); -} - -void writeMarker(unsigned char nMarker, unsigned char *&pData) -{ - *pData++ = 0x0ff; - *pData++ = nMarker; -} - -void writeJFIFTag(unsigned char *&pData) -{ - const char JFIF_TAG[] = - { - 0x4a, 0x46, 0x49, 0x46, 0x00, - 0x01, 0x02, - 0x00, - 0x00, 0x01, 0x00, 0x01, - 0x00, 0x00 - }; - - writeMarker(0x0e0, pData); - writeAndAdvance(pData, sizeof(JFIF_TAG) + sizeof(unsigned short)); - memcpy(pData, JFIF_TAG, sizeof(JFIF_TAG)); - pData += sizeof(JFIF_TAG); -} - -void writeFrameHeader(const FrameHeader &header, unsigned char *&pData) -{ - unsigned char aTemp[128]; - unsigned char *pTemp = aTemp; - - writeAndAdvance(pTemp, header.nSamplePrecision); - writeAndAdvance(pTemp, header.nHeight); - writeAndAdvance(pTemp, header.nWidth); - writeAndAdvance(pTemp, header.nComponents); - - for (int c = 0; c(pTemp, header.aComponentIdentifier[c]); - writeAndAdvance(pTemp, header.aSamplingFactors[c]); - writeAndAdvance(pTemp, header.aQuantizationTableSelector[c]); - } - - unsigned short nLength = (unsigned short)(pTemp - aTemp); - - writeMarker(0x0C0, pData); - writeAndAdvance(pData, nLength + 2); - memcpy(pData, aTemp, nLength); - pData += nLength; -} - -void writeScanHeader(const ScanHeader &header, unsigned char *&pData) -{ - unsigned char aTemp[128]; - unsigned char *pTemp = aTemp; - - writeAndAdvance(pTemp, header.nComponents); - - for (int c = 0; c(pTemp, header.aComponentSelector[c]); - writeAndAdvance(pTemp, header.aHuffmanTablesSelector[c]); - } - - writeAndAdvance(pTemp, header.nSs); - writeAndAdvance(pTemp, header.nSe); - writeAndAdvance(pTemp, header.nA); - - unsigned short nLength = (unsigned short)(pTemp - aTemp); - - writeMarker(0x0DA, pData); - writeAndAdvance(pData, nLength + 2); - memcpy(pData, aTemp, nLength); - pData += nLength; -} - -void writeQuantizationTable(const QuantizationTable &table, unsigned char *&pData) -{ - writeMarker(0x0DB, pData); - writeAndAdvance(pData, sizeof(QuantizationTable) + 2); - memcpy(pData, &table, sizeof(QuantizationTable)); - pData += sizeof(QuantizationTable); -} - -void writeHuffmanTable(const HuffmanTable &table, unsigned char *&pData) -{ - writeMarker(0x0C4, pData); - - // Number of Codes for Bit Lengths [1..16] - int nCodeCount = 0; - - for (int i = 0; i < 16; ++i) - { - nCodeCount += table.aCodes[i]; - } - - writeAndAdvance(pData, 17 + nCodeCount + 2); - memcpy(pData, &table, 17 + nCodeCount); - pData += 17 + nCodeCount; -} - -bool printfNPPinfo(int cudaVerMajor, int cudaVerMinor) -{ - const NppLibraryVersion *libVer = nppGetLibVersion(); - - printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build); - - int driverVersion, runtimeVersion; - cudaDriverGetVersion(&driverVersion); - cudaRuntimeGetVersion(&runtimeVersion); - - printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000, (driverVersion % 100) / 10); - printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000, (runtimeVersion % 100) / 10); - - bool bVal = checkCudaCapabilities(cudaVerMajor, cudaVerMinor); - return bVal; -} - -NppiDCTState *pDCTState; -FrameHeader oFrameHeader; -FrameHeader oFrameHeaderFixedSize; -ScanHeader oScanHeader; -QuantizationTable aQuantizationTables[4]; -Npp8u *pdQuantizationTables; -HuffmanTable aHuffmanTables[4]; -HuffmanTable *pHuffmanDCTables; -HuffmanTable *pHuffmanACTables; -int nMCUBlocksH; -int nMCUBlocksV; -int nMCUBlocksHFixedSize; -int nMCUBlocksVFixedSize; -Npp8u *pdScan; -NppiEncodeHuffmanSpec *apHuffmanDCTable[3]; -NppiEncodeHuffmanSpec *apHuffmanACTable[3]; -unsigned char *pDstJpeg; -unsigned char *pDstOutput; -int nRestartInterval; - -int initTable() -{ - NPP_CHECK_NPP(nppiDCTInitAlloc(&pDCTState)); - - nRestartInterval = -1; - - cudaMalloc(&pdQuantizationTables, 64 * 4); - pHuffmanDCTables = aHuffmanTables; - pHuffmanACTables = &aHuffmanTables[2]; - memset(aQuantizationTables, 0, 4 * sizeof(QuantizationTable)); - memset(aHuffmanTables, 0, 4 * sizeof(HuffmanTable)); - memset(&oFrameHeader, 0, sizeof(FrameHeader)); - - - //????Huffman?? - aHuffmanTables[0].nClassAndIdentifier = 0; - memcpy(aHuffmanTables[0].aCodes, STD_DC_Y_NRCODES, 16); - memcpy(aHuffmanTables[0].aTable, STD_DC_Y_VALUES, 12); - - aHuffmanTables[1].nClassAndIdentifier = 1; - memcpy(aHuffmanTables[1].aCodes, STD_DC_UV_NRCODES, 16); - memcpy(aHuffmanTables[1].aTable, STD_DC_UV_VALUES, 12); - - aHuffmanTables[2].nClassAndIdentifier = 16; - memcpy(aHuffmanTables[2].aCodes, STD_AC_Y_NRCODES, 16); - memcpy(aHuffmanTables[2].aTable, STD_AC_Y_VALUES, 162); - - aHuffmanTables[3].nClassAndIdentifier = 17; - memcpy(aHuffmanTables[3].aCodes, STD_AC_UV_NRCODES, 16); - memcpy(aHuffmanTables[3].aTable, STD_AC_UV_VALUES, 162); - - - //????量???? - aQuantizationTables[0].nPrecisionAndIdentifier = 0; - memcpy(aQuantizationTables[0].aTable, std_Y_QT, 64); - aQuantizationTables[1].nPrecisionAndIdentifier = 1; - memcpy(aQuantizationTables[1].aTable, std_UV_QT, 64); - - NPP_CHECK_CUDA(cudaMemcpyAsync(pdQuantizationTables, aQuantizationTables[0].aTable, 64, cudaMemcpyHostToDevice)); - NPP_CHECK_CUDA(cudaMemcpyAsync(pdQuantizationTables + 64, aQuantizationTables[1].aTable, 64, cudaMemcpyHostToDevice)); - - oFrameHeader.nSamplePrecision = 8; - oFrameHeader.nComponents = 3; - oFrameHeader.aComponentIdentifier[0] = 1; - oFrameHeader.aComponentIdentifier[1] = 2; - oFrameHeader.aComponentIdentifier[2] = 3; - oFrameHeader.aSamplingFactors[0] = 34; - oFrameHeader.aSamplingFactors[1] = 17; - oFrameHeader.aSamplingFactors[2] = 17; - oFrameHeader.aQuantizationTableSelector[0] = 0; - oFrameHeader.aQuantizationTableSelector[1] = 1; - oFrameHeader.aQuantizationTableSelector[2] = 1; - - for (int i = 0; i < oFrameHeader.nComponents; ++i) - { - nMCUBlocksV = max(nMCUBlocksV, oFrameHeader.aSamplingFactors[i] & 0x0f); - nMCUBlocksH = max(nMCUBlocksH, oFrameHeader.aSamplingFactors[i] >> 4); - } - NPP_CHECK_CUDA(cudaMalloc(&pdScan, 4 << 20)); - - - - oScanHeader.nComponents = 3; - oScanHeader.aComponentSelector[0] = 1; - oScanHeader.aComponentSelector[1] = 2; - oScanHeader.aComponentSelector[2] = 3; - oScanHeader.aHuffmanTablesSelector[0] = 0; - oScanHeader.aHuffmanTablesSelector[1] = 17; - oScanHeader.aHuffmanTablesSelector[2] = 17; - oScanHeader.nSs = 0; - oScanHeader.nSe = 63; - oScanHeader.nA = 0; - - - return 0; -} - -NppiSize aSrcSize[3]; -Npp16s *apdDCT[3];// = { 0, 0, 0 }; -Npp32s aDCTStep[3]; - -Npp8u *apSrcImage[3];// = { 0, 0, 0 }; -Npp32s aSrcImageStep[3]; -size_t aSrcPitch[3]; - - -int releaseJpegNPP() -{ - nppiDCTFree(pDCTState); - cudaFree(pdQuantizationTables); - cudaFree(pdScan); - for (int i = 0; i < 3; ++i) - { - cudaFree(apdDCT[i]); - cudaFree(apSrcImage[i]); - } - return 0; -} - - -int initTable(int flag, int width, int height) -{ - //????帧头 - oFrameHeaderFixedSize.nSamplePrecision = 8; - oFrameHeaderFixedSize.nComponents = 3; - oFrameHeaderFixedSize.aComponentIdentifier[0] = 1; - oFrameHeaderFixedSize.aComponentIdentifier[1] = 2; - oFrameHeaderFixedSize.aComponentIdentifier[2] = 3; - oFrameHeaderFixedSize.aSamplingFactors[0] = 34; - oFrameHeaderFixedSize.aSamplingFactors[1] = 17; - oFrameHeaderFixedSize.aSamplingFactors[2] = 17; - oFrameHeaderFixedSize.aQuantizationTableSelector[0] = 0; - oFrameHeaderFixedSize.aQuantizationTableSelector[1] = 1; - oFrameHeaderFixedSize.aQuantizationTableSelector[2] = 1; - oFrameHeaderFixedSize.nWidth = width; - oFrameHeaderFixedSize.nHeight = height; - - for (int i = 0; i < oFrameHeaderFixedSize.nComponents; ++i) - { - nMCUBlocksVFixedSize = max(nMCUBlocksVFixedSize, oFrameHeaderFixedSize.aSamplingFactors[i] & 0x0f); - nMCUBlocksHFixedSize = max(nMCUBlocksHFixedSize, oFrameHeaderFixedSize.aSamplingFactors[i] >> 4); - } - - for (int i = 0; i < oFrameHeaderFixedSize.nComponents; ++i) - { - NppiSize oBlocks; - NppiSize oBlocksPerMCU = { oFrameHeaderFixedSize.aSamplingFactors[i] >> 4, oFrameHeaderFixedSize.aSamplingFactors[i] & 0x0f }; - - oBlocks.width = (int)ceil((oFrameHeaderFixedSize.nWidth + 7) / 8 * - static_cast(oBlocksPerMCU.width) / nMCUBlocksHFixedSize); - oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width; - - oBlocks.height = (int)ceil((oFrameHeaderFixedSize.nHeight + 7) / 8 * - static_cast(oBlocksPerMCU.height) / nMCUBlocksVFixedSize); - oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height; - - aSrcSize[i].width = oBlocks.width * 8; - aSrcSize[i].height = oBlocks.height * 8; - - // Allocate Memory - size_t nPitch; - NPP_CHECK_CUDA(cudaMallocPitch(&apdDCT[i], &nPitch, oBlocks.width * 64 * sizeof(Npp16s), oBlocks.height)); - aDCTStep[i] = static_cast(nPitch); - - NPP_CHECK_CUDA(cudaMallocPitch(&apSrcImage[i], &nPitch, aSrcSize[i].width, aSrcSize[i].height)); - - aSrcPitch[i] = nPitch; - aSrcImageStep[i] = static_cast(nPitch); - } - - return 0; -} - -int jpegNPP(const char *szOutputFile, float* d_srcRGB) -{ - //RGB2YUV - cudaError_t cudaStatus; - cudaStatus = cuda_common::RGB2YUV(d_srcRGB, oFrameHeaderFixedSize.nWidth, oFrameHeaderFixedSize.nHeight, - apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height, - apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height, - apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height); - - /** - * Forward DCT, quantization and level shift part of the JPEG encoding. - * Input is expected in 8x8 macro blocks and output is expected to be in 64x1 - * macro blocks. The new version of the primitive takes the ROI in image pixel size and - * works with DCT coefficients that are in zig-zag order. - */ - int k = 0; - //LOG_INFO("NPP_CHECK_NPP:%d", 1); - if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0], - apdDCT[0], aDCTStep[0], - pdQuantizationTables + k * 64, - aSrcSize[0], - pDCTState))) - { - printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); - return EXIT_FAILURE; - } - - k = 1; - //LOG_INFO("NPP_CHECK_NPP:%d", 2); - if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1], - apdDCT[1], aDCTStep[1], - pdQuantizationTables + k * 64, - aSrcSize[1], - pDCTState))) - { - printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); - return EXIT_FAILURE; - } - - //LOG_INFO("NPP_CHECK_NPP:%d", 3); - if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2], - apdDCT[2], aDCTStep[2], - pdQuantizationTables + k * 64, - aSrcSize[2], - pDCTState))) - { - printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); - return EXIT_FAILURE; - } - - // Huffman Encoding - - Npp32s nScanLength; - Npp8u *pJpegEncoderTemp; - -#if (CUDA_VERSION == 8000) - Npp32s nTempSize; //when using CUDA8 -#else - size_t nTempSize; //when using CUDA9 -#endif - //modified by Junlin 190221 - - //LOG_INFO("NPP_CHECK_NPP:%d",4); - if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize))) - { - printf("nppiEncodeHuffmanGetSize Failed!\n"); - return EXIT_FAILURE; - } - - //LOG_INFO("NPP_CHECK_CUDA:%d",5); - NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize)); - - /** - * Allocates memory and creates a Huffman table in a format that is suitable for the encoder. - */ - NppStatus t_status; - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]); - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]); - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]); - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]); - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]); - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]); - - /** - * Huffman Encoding of the JPEG Encoding. - * Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan. - */ - Npp32s nSs = 0; - Npp32s nSe = 63; - Npp32s nH = 0; - Npp32s nL = 0; - //LOG_INFO("NPP_CHECK_NPP:%d",6); - if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep, - 0, nSs, nSe, nH, nL, - pdScan, &nScanLength, - apHuffmanDCTable, - apHuffmanACTable, - aSrcSize, - pJpegEncoderTemp))) - { - printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n"); - return EXIT_FAILURE; - } - - for (int i = 0; i < 3; ++i) - { - nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]); - nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]); - } - // Write JPEG - pDstJpeg = new unsigned char[4 << 20]{}; - pDstOutput = pDstJpeg; - - writeMarker(0x0D8, pDstOutput); - writeJFIFTag(pDstOutput); - writeQuantizationTable(aQuantizationTables[0], pDstOutput); - writeQuantizationTable(aQuantizationTables[1], pDstOutput); - writeHuffmanTable(pHuffmanDCTables[0], pDstOutput); - writeHuffmanTable(pHuffmanACTables[0], pDstOutput); - writeHuffmanTable(pHuffmanDCTables[1], pDstOutput); - writeHuffmanTable(pHuffmanACTables[1], pDstOutput); - writeFrameHeader(oFrameHeaderFixedSize, pDstOutput); - writeScanHeader(oScanHeader, pDstOutput); - - //LOG_INFO("NPP_CHECK_CUDA:%d",7); - NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost)); - - pDstOutput += nScanLength; - writeMarker(0x0D9, pDstOutput); - { - // Write result to file. - std::ofstream outputFile(szOutputFile, ios::out | ios::binary); - outputFile.write(reinterpret_cast(pDstJpeg), static_cast(pDstOutput - pDstJpeg)); - } - - // Cleanup - cudaFree(pJpegEncoderTemp); - delete[] pDstJpeg; - - - return EXIT_SUCCESS; -} - -int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB) -{ - //RGB2YUV - cudaError_t cudaStatus; - cudaStatus = cuda_common::RGB2YUV(d_srcRGB, oFrameHeaderFixedSize.nWidth, oFrameHeaderFixedSize.nHeight, - apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height, - apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height, - apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height); - - /** - * Forward DCT, quantization and level shift part of the JPEG encoding. - * Input is expected in 8x8 macro blocks and output is expected to be in 64x1 - * macro blocks. The new version of the primitive takes the ROI in image pixel size and - * works with DCT coefficients that are in zig-zag order. - */ - int k = 0; - //LOG_INFO("NPP_CHECK_NPP:%d", 1); - if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0], - apdDCT[0], aDCTStep[0], - pdQuantizationTables + k * 64, - aSrcSize[0], - pDCTState))) - { - printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); - return EXIT_FAILURE; - } - - k = 1; - //LOG_INFO("NPP_CHECK_NPP:%d", 2); - if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1], - apdDCT[1], aDCTStep[1], - pdQuantizationTables + k * 64, - aSrcSize[1], - pDCTState))) - { - printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); - return EXIT_FAILURE; - } - - //LOG_INFO("NPP_CHECK_NPP:%d", 3); - if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2], - apdDCT[2], aDCTStep[2], - pdQuantizationTables + k * 64, - aSrcSize[2], - pDCTState))) - { - printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); - return EXIT_FAILURE; - } - - // Huffman Encoding - - Npp32s nScanLength; - Npp8u *pJpegEncoderTemp; - -#if (CUDA_VERSION == 8000) - Npp32s nTempSize; //when using CUDA8 -#else - size_t nTempSize; //when using CUDA9 -#endif - //modified by Junlin 190221 - - //LOG_INFO("NPP_CHECK_NPP:%d",4); - if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize))) - { - printf("nppiEncodeHuffmanGetSize Failed!\n"); - return EXIT_FAILURE; - } - - //LOG_INFO("NPP_CHECK_CUDA:%d",5); - NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize)); - - /** - * Allocates memory and creates a Huffman table in a format that is suitable for the encoder. - */ - NppStatus t_status; - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]); - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]); - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]); - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]); - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]); - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]); - - /** - * Huffman Encoding of the JPEG Encoding. - * Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan. - */ - Npp32s nSs = 0; - Npp32s nSe = 63; - Npp32s nH = 0; - Npp32s nL = 0; - //LOG_INFO("NPP_CHECK_NPP:%d",6); - if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep, - 0, nSs, nSe, nH, nL, - pdScan, &nScanLength, - apHuffmanDCTable, - apHuffmanACTable, - aSrcSize, - pJpegEncoderTemp))) - { - printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n"); - return EXIT_FAILURE; - } - - for (int i = 0; i < 3; ++i) - { - nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]); - nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]); - } - // Write JPEG - pDstJpeg = new unsigned char[4 << 20]{}; - pDstOutput = pDstJpeg; - - writeMarker(0x0D8, pDstOutput); - writeJFIFTag(pDstOutput); - writeQuantizationTable(aQuantizationTables[0], pDstOutput); - writeQuantizationTable(aQuantizationTables[1], pDstOutput); - writeHuffmanTable(pHuffmanDCTables[0], pDstOutput); - writeHuffmanTable(pHuffmanACTables[0], pDstOutput); - writeHuffmanTable(pHuffmanDCTables[1], pDstOutput); - writeHuffmanTable(pHuffmanACTables[1], pDstOutput); - writeFrameHeader(oFrameHeaderFixedSize, pDstOutput); - writeScanHeader(oScanHeader, pDstOutput); - - //LOG_INFO("NPP_CHECK_CUDA:%d",7); - NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost)); - - pDstOutput += nScanLength; - writeMarker(0x0D9, pDstOutput); - { - // Write result to file. - std::ofstream outputFile(szOutputFile, ios::out | ios::binary); - outputFile.write(reinterpret_cast(pDstJpeg), static_cast(pDstOutput - pDstJpeg)); - } - - // Cleanup - cudaFree(pJpegEncoderTemp); - delete[] pDstJpeg; - - - return EXIT_SUCCESS; -} - - -int jpegNPP(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height) -{ - NppiSize aSrcSize[3]; - Npp16s *apdDCT[3] = { 0, 0, 0 }; - Npp32s aDCTStep[3]; - - Npp8u *apSrcImage[3] = { 0, 0, 0 }; - Npp32s aSrcImageStep[3]; - size_t aSrcPitch[3]; - - - //????帧头 - oFrameHeader.nWidth = img_width; - oFrameHeader.nHeight = img_height; - - for (int i = 0; i < oFrameHeader.nComponents; ++i) - { - NppiSize oBlocks; - NppiSize oBlocksPerMCU = { oFrameHeader.aSamplingFactors[i] >> 4, oFrameHeader.aSamplingFactors[i] & 0x0f }; - - oBlocks.width = (int)ceil((oFrameHeader.nWidth + 7) / 8 * - static_cast(oBlocksPerMCU.width) / nMCUBlocksH); - oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width; - - oBlocks.height = (int)ceil((oFrameHeader.nHeight + 7) / 8 * - static_cast(oBlocksPerMCU.height) / nMCUBlocksV); - oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height; - - aSrcSize[i].width = oBlocks.width * 8; - aSrcSize[i].height = oBlocks.height * 8; - - // Allocate Memory - size_t nPitch; - //LOG_INFO("NPP_CHECK_CUDA:%d",1); - NPP_CHECK_CUDA(cudaMallocPitch(&apdDCT[i], &nPitch, oBlocks.width * 64 * sizeof(Npp16s), oBlocks.height)); - aDCTStep[i] = static_cast(nPitch); - - //LOG_INFO("NPP_CHECK_CUDA:%d",2); - NPP_CHECK_CUDA(cudaMallocPitch(&apSrcImage[i], &nPitch, aSrcSize[i].width, aSrcSize[i].height)); - - aSrcPitch[i] = nPitch; - aSrcImageStep[i] = static_cast(nPitch); - } - - //RGB2YUV - cudaError_t cudaStatus; - cudaStatus = cuda_common::RGB2YUV(d_srcRGB, img_width, img_height, - apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height, - apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height, - apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height); - - /** - * Forward DCT, quantization and level shift part of the JPEG encoding. - * Input is expected in 8x8 macro blocks and output is expected to be in 64x1 - * macro blocks. The new version of the primitive takes the ROI in image pixel size and - * works with DCT coefficients that are in zig-zag order. - */ - int k = 0; - //LOG_INFO("NPP_CHECK_CUDA:%d",3); - if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0], - apdDCT[0], aDCTStep[0], - pdQuantizationTables + k * 64, - aSrcSize[0], - pDCTState))) - { - printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); - return EXIT_FAILURE; - } - k = 1; - - //LOG_INFO("NPP_CHECK_CUDA:%d",4); - if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1], - apdDCT[1], aDCTStep[1], - pdQuantizationTables + k * 64, - aSrcSize[1], - pDCTState))) - { - printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); - return EXIT_FAILURE; - } - - //LOG_INFO("NPP_CHECK_CUDA:%d",5); - if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2], - apdDCT[2], aDCTStep[2], - pdQuantizationTables + k * 64, - aSrcSize[2], - pDCTState))) - { - printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); - return EXIT_FAILURE; - } - - // Huffman Encoding - - Npp32s nScanLength; - Npp8u *pJpegEncoderTemp; - -#if (CUDA_VERSION == 8000) - Npp32s nTempSize; //when using CUDA8 -#else - size_t nTempSize; //when using CUDA9 -#endif - //modified by Junlin 190221 - - //LOG_INFO("NPP_CHECK_CUDA:%d",6); - if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize))) - { - printf("nppiEncodeHuffmanGetSize Failed!\n"); - return EXIT_FAILURE; - } - - //LOG_INFO("NPP_CHECK_CUDA:%d",7); - NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize)); - - /** - * Allocates memory and creates a Huffman table in a format that is suitable for the encoder. - */ - NppStatus t_status; - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]); - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]); - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]); - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]); - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]); - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]); - - /** - * Huffman Encoding of the JPEG Encoding. - * Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan. - */ - Npp32s nSs = 0; - Npp32s nSe = 63; - Npp32s nH = 0; - Npp32s nL = 0; - //LOG_INFO("NPP_CHECK_CUDA:%d",8); - if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep, - 0, nSs, nSe, nH, nL, - pdScan, &nScanLength, - apHuffmanDCTable, - apHuffmanACTable, - aSrcSize, - pJpegEncoderTemp))) - { - printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n"); - return EXIT_FAILURE; - } - - for (int i = 0; i < 3; ++i) - { - nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]); - nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]); - } - // Write JPEG - pDstJpeg = new unsigned char[4 << 20]{}; - pDstOutput = pDstJpeg; - - writeMarker(0x0D8, pDstOutput); - writeJFIFTag(pDstOutput); - writeQuantizationTable(aQuantizationTables[0], pDstOutput); - writeQuantizationTable(aQuantizationTables[1], pDstOutput); - writeHuffmanTable(pHuffmanDCTables[0], pDstOutput); - writeHuffmanTable(pHuffmanACTables[0], pDstOutput); - writeHuffmanTable(pHuffmanDCTables[1], pDstOutput); - writeHuffmanTable(pHuffmanACTables[1], pDstOutput); - writeFrameHeader(oFrameHeader, pDstOutput); - writeScanHeader(oScanHeader, pDstOutput); - - //LOG_INFO("NPP_CHECK_CUDA:%d",9); - NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost)); - - pDstOutput += nScanLength; - writeMarker(0x0D9, pDstOutput); - - { - // Write result to file. - std::ofstream outputFile(szOutputFile, ios::out | ios::binary); - outputFile.write(reinterpret_cast(pDstJpeg), static_cast(pDstOutput - pDstJpeg)); - } - - // Cleanup - cudaFree(pJpegEncoderTemp); - delete[] pDstJpeg; - for (int i = 0; i < 3; ++i) - { - cudaFree(apdDCT[i]); - cudaFree(apSrcImage[i]); - } - - return EXIT_SUCCESS; -} - - -int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height) -{ - NppiSize aSrcSize[3]; - Npp16s *apdDCT[3] = { 0, 0, 0 }; - Npp32s aDCTStep[3]; - - Npp8u *apSrcImage[3] = { 0, 0, 0 }; - Npp32s aSrcImageStep[3]; - size_t aSrcPitch[3]; - - - //????帧头 - oFrameHeader.nWidth = img_width; - oFrameHeader.nHeight = img_height; - - for (int i = 0; i < oFrameHeader.nComponents; ++i) - { - NppiSize oBlocks; - NppiSize oBlocksPerMCU = { oFrameHeader.aSamplingFactors[i] >> 4, oFrameHeader.aSamplingFactors[i] & 0x0f }; - - oBlocks.width = (int)ceil((oFrameHeader.nWidth + 7) / 8 * - static_cast(oBlocksPerMCU.width) / nMCUBlocksH); - oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width; - - oBlocks.height = (int)ceil((oFrameHeader.nHeight + 7) / 8 * - static_cast(oBlocksPerMCU.height) / nMCUBlocksV); - oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height; - - aSrcSize[i].width = oBlocks.width * 8; - aSrcSize[i].height = oBlocks.height * 8; - - // Allocate Memory - size_t nPitch; - //LOG_INFO("NPP_CHECK_CUDA:%d",1); - NPP_CHECK_CUDA(cudaMallocPitch(&apdDCT[i], &nPitch, oBlocks.width * 64 * sizeof(Npp16s), oBlocks.height)); - aDCTStep[i] = static_cast(nPitch); - - //LOG_INFO("NPP_CHECK_CUDA:%d",2); - NPP_CHECK_CUDA(cudaMallocPitch(&apSrcImage[i], &nPitch, aSrcSize[i].width, aSrcSize[i].height)); - - aSrcPitch[i] = nPitch; - aSrcImageStep[i] = static_cast(nPitch); - } - - //RGB2YUV - cudaError_t cudaStatus; - cudaStatus = cuda_common::RGB2YUV(d_srcRGB, img_width, img_height, - apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height, - apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height, - apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height); - - /** - * Forward DCT, quantization and level shift part of the JPEG encoding. - * Input is expected in 8x8 macro blocks and output is expected to be in 64x1 - * macro blocks. The new version of the primitive takes the ROI in image pixel size and - * works with DCT coefficients that are in zig-zag order. - */ - int k = 0; - //LOG_INFO("NPP_CHECK_CUDA:%d",3); - if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0], - apdDCT[0], aDCTStep[0], - pdQuantizationTables + k * 64, - aSrcSize[0], - pDCTState))) - { - printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); - return EXIT_FAILURE; - } - k = 1; - - //LOG_INFO("NPP_CHECK_CUDA:%d",4); - if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1], - apdDCT[1], aDCTStep[1], - pdQuantizationTables + k * 64, - aSrcSize[1], - pDCTState))) - { - printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); - return EXIT_FAILURE; - } - - //LOG_INFO("NPP_CHECK_CUDA:%d",5); - if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2], - apdDCT[2], aDCTStep[2], - pdQuantizationTables + k * 64, - aSrcSize[2], - pDCTState))) - { - printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); - return EXIT_FAILURE; - } - - // Huffman Encoding - - Npp32s nScanLength; - Npp8u *pJpegEncoderTemp; - -#if (CUDA_VERSION == 8000) - Npp32s nTempSize; //when using CUDA8 -#else - size_t nTempSize; //when using CUDA9 -#endif - //modified by Junlin 190221 - - //LOG_INFO("NPP_CHECK_CUDA:%d",6); - if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize))) - { - printf("nppiEncodeHuffmanGetSize Failed!\n"); - return EXIT_FAILURE; - } - - //LOG_INFO("NPP_CHECK_CUDA:%d",7); - NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize)); - - /** - * Allocates memory and creates a Huffman table in a format that is suitable for the encoder. - */ - NppStatus t_status; - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]); - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]); - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]); - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]); - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]); - t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]); - - /** - * Huffman Encoding of the JPEG Encoding. - * Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan. - */ - Npp32s nSs = 0; - Npp32s nSe = 63; - Npp32s nH = 0; - Npp32s nL = 0; - //LOG_INFO("NPP_CHECK_CUDA:%d",8); - if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep, - 0, nSs, nSe, nH, nL, - pdScan, &nScanLength, - apHuffmanDCTable, - apHuffmanACTable, - aSrcSize, - pJpegEncoderTemp))) - { - printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n"); - return EXIT_FAILURE; - } - - for (int i = 0; i < 3; ++i) - { - nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]); - nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]); - } - // Write JPEG - pDstJpeg = new unsigned char[4 << 20]{}; - pDstOutput = pDstJpeg; - - writeMarker(0x0D8, pDstOutput); - writeJFIFTag(pDstOutput); - writeQuantizationTable(aQuantizationTables[0], pDstOutput); - writeQuantizationTable(aQuantizationTables[1], pDstOutput); - writeHuffmanTable(pHuffmanDCTables[0], pDstOutput); - writeHuffmanTable(pHuffmanACTables[0], pDstOutput); - writeHuffmanTable(pHuffmanDCTables[1], pDstOutput); - writeHuffmanTable(pHuffmanACTables[1], pDstOutput); - writeFrameHeader(oFrameHeader, pDstOutput); - writeScanHeader(oScanHeader, pDstOutput); - - //LOG_INFO("NPP_CHECK_CUDA:%d",9); - NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost)); - - pDstOutput += nScanLength; - writeMarker(0x0D9, pDstOutput); - - { - // Write result to file. - std::ofstream outputFile(szOutputFile, ios::out | ios::binary); - outputFile.write(reinterpret_cast(pDstJpeg), static_cast(pDstOutput - pDstJpeg)); - } - - // Cleanup - cudaFree(pJpegEncoderTemp); - delete[] pDstJpeg; - for (int i = 0; i < 3; ++i) - { - cudaFree(apdDCT[i]); - cudaFree(apSrcImage[i]); - } - - return EXIT_SUCCESS; -} diff --git a/src/logger.hpp b/src/logger.hpp deleted file mode 100644 index d249c3c..0000000 --- a/src/logger.hpp +++ /dev/null @@ -1,342 +0,0 @@ -/* - * @Author: yangzilong - * @Date: 2021-12-21 11:07:11 - * @Last Modified by: yangzilong - * @Email: yangzilong@objecteye.com - * @Description: - */ - -#pragma once - -#include "define.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#define LOG_TRACE_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_TRACE(logger, __VA_ARGS__);} -#define LOG_DEBUG_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_DEBUG(logger, __VA_ARGS__);} -#define LOG_WARN_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_WARN(logger, __VA_ARGS__);} -#define LOG_ERROR_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_ERROR(logger, __VA_ARGS__);} -#define LOG_INFO_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_INFO(logger, __VA_ARGS__);} -#define LOG_CRITICAL_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_CRITICAL(logger, __VA_ARGS__);} - - -// use fmt lib, e.g. LOG_WARN("warn log, {1}, {1}, {2}", 1, 2); -#define LOG_TRACE(msg, ...) spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::trace, msg, ##__VA_ARGS__) -#define LOG_DEBUG(msg, ...) spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::debug, msg, ##__VA_ARGS__) -#define LOG_INFO(msg,...) spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::info, msg, ##__VA_ARGS__) -#define LOG_WARN(msg,...) spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::warn, msg, ##__VA_ARGS__) -#define LOG_ERROR(msg,...) spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::err, msg, ##__VA_ARGS__) -#define LOG_FATAL(msg,...) spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::critical, msg, ##__VA_ARGS__) - - - -namespace spdlog -{ - namespace sinks - { - template - class easy_file_sink final : public base_sink - { - public: - easy_file_sink(filename_t base_filename, size_t max_size, size_t max_keep_days = 0) - : base_filename_(std::move(base_filename)) - , max_size_(max_size) - , max_keep_days_(max_keep_days) - { - auto now = log_clock::now(); - auto filename = gen_filename_by_daliy(base_filename_, now_tm(now)); - - file_helper_.open(filename, false); - current_size_ = file_helper_.size(); - rotation_tp_ = next_rotation_tp_(); - - if (max_keep_days_ > 0) - { - filespath_q_.push_back(std::move(std::set())); - filespath_q_[filespath_q_.size() - 1].insert(filename); - } - } - - filename_t filename() - { - std::lock_guard lock(base_sink::mutex_); - return file_helper_.filename(); - } - - protected: - void sink_it_(const details::log_msg &msg) override - { - memory_buf_t formatted; - base_sink::formatter_->format(msg, formatted); - current_size_ += formatted.size(); - - auto time = msg.time; - if (time >= rotation_tp_) - { - file_helper_.close(); - auto filename = gen_filename_by_daliy(base_filename_, now_tm(time)); - file_helper_.open(filename, false); - current_size_ = file_helper_.size(); - rotation_tp_ = next_rotation_tp_(); - - { - filespath_q_.push_back(std::move(std::set())); - filespath_q_[filespath_q_.size() - 1].emplace(filename); - } - - // Do the cleaning only at the end because it might throw on failure. - if (max_keep_days_ > 0 && filespath_q_.size() > max_keep_days_) - delete_old_(); - } - else if (current_size_ >= max_size_) - { - file_helper_.close(); - auto src_name = gen_filename_by_daliy(base_filename_, now_tm(time)); - auto target_name = gen_filename_by_filesize(base_filename_, now_tm(time), filespath_q_[filespath_q_.size() - 1].size()); - - // rename file if failed then us `target_name` as src_name. - if (!rename_file_(src_name, target_name)) - { - details::os::sleep_for_millis(200); - if (!rename_file_(src_name, target_name)) - { - fprintf(stderr, "%s:%d rename %s to %s failed\n", __FILENAME__, __LINE__, src_name.c_str(), target_name.c_str()); - src_name = target_name; - } - } - - filespath_q_[filespath_q_.size() - 1].emplace(src_name); - if (src_name != target_name) - filespath_q_[filespath_q_.size() - 1].emplace(target_name); - - file_helper_.open(src_name, false); - current_size_ = file_helper_.size(); - rotation_tp_ = next_rotation_tp_(); - } - - file_helper_.write(formatted); - - - } - - void flush_() override - { - file_helper_.flush(); - } - - private: - - tm now_tm(log_clock::time_point tp) - { - time_t tnow = log_clock::to_time_t(tp); - return spdlog::details::os::localtime(tnow); - } - - /** - * @brief Get next day tm. - * - * @return log_clock::time_point - */ - log_clock::time_point next_rotation_tp_() - { - auto now = log_clock::now(); - tm date = now_tm(now); - date.tm_hour = 0; - date.tm_min = 0; - date.tm_sec = 0; - auto rotation_time = log_clock::from_time_t(std::mktime(&date)); - if (rotation_time > now) - return rotation_time; - return {rotation_time + std::chrono::hours(24)}; - } - - // Delete the file N rotations ago. - // Throw spdlog_ex on failure to delete the old file. - void delete_old_() - { - for (auto iter = filespath_q_.begin(); iter != filespath_q_.end();) - { - if (filespath_q_.size() <= max_keep_days_) - break; - - for (auto it = iter->begin(); it != iter->end(); ++it) - { - bool ok = details::os::remove_if_exists(*it) == 0; - if (!ok) - throw_spdlog_ex("Failed removing daily file " + details::os::filename_to_str(*it), errno); - } - filespath_q_.erase(iter); - } - } - - /* */ - static filename_t gen_filename_by_daliy(const filename_t &filename, const tm &now_tm) - { - filename_t basename, ext; - std::tie(basename, ext) = details::file_helper::split_by_extension(filename); - return fmt::format(SPDLOG_FILENAME_T("{}_{:04d}_{:02d}_{:02d}{}"), - basename, - now_tm.tm_year + 1900, - now_tm.tm_mon + 1, - now_tm.tm_mday, - ext); - } - - // - static filename_t gen_filename_by_filesize(const filename_t &filename, const tm &now_tm, const int &idx) - { - filename_t basename, ext; - std::tie(basename, ext) = details::file_helper::split_by_extension(filename); - return fmt::format(SPDLOG_FILENAME_T("{}_{:04d}_{:02d}_{:02d}_{:02d}{:02d}{:02d}.{:d}{}"), - basename, - now_tm.tm_year + 1900, - now_tm.tm_mon + 1, - now_tm.tm_mday, - now_tm.tm_hour, - now_tm.tm_min, - now_tm.tm_sec, - idx, - ext); - } - - static bool rename_file_(const filename_t &src_filename, const filename_t &target_filename) - { - (void)details::os::remove(target_filename); - return details::os::rename(src_filename, target_filename) == 0; - } - - filename_t base_filename_; - log_clock::time_point rotation_tp_; - details::file_helper file_helper_; - std::size_t max_size_; - std::size_t max_keep_days_; - std::size_t current_size_; - // std::vector<> filespath_q_; - std::vector> filespath_q_; - }; - - using easy_file_sink_mt = easy_file_sink; - using easy_file_sink_st = easy_file_sink; - - } // namespace sinks - - template - inline std::shared_ptr easy_logger_mt( - const std::string &logger_name, const filename_t &filename, size_t max_size, size_t max_keep_days = -1) - { - return Factory::template create(logger_name, filename, max_size, max_keep_days); - } - - template - inline std::shared_ptr easy_logger_st( - const std::string &logger_name, const filename_t &filename, size_t max_size, size_t max_keep_days = -1) - { - return Factory::template create(logger_name, filename, max_size, max_keep_days); - } - -} // namespace spdlog - - -enum class LogLevel -{ - CLOSE = -1, - TRACE = 0, - DEBUG = 1, - INFO = 2, - WARN = 3, - ERROR = 4, - FATAL = 5, -}; - - -class LoggerGenerator -{ -public: - static LoggerGenerator* get_instance() - { - static LoggerGenerator logger; - return &logger; - } - - void destory(LoggerGenerator *ptr) - { - if (ptr != nullptr) - { - delete ptr; - ptr = nullptr; - } - } - - std::shared_ptr gen_logger(const LogLevel &level, const std::string &logger_name, - const std::string &file_path, size_t max_file_size, size_t max_keep_days) - { - spdlog::level::level_enum spd_level; - if (LogLevel::TRACE == level) - spd_level = spdlog::level::trace; - else if (LogLevel::DEBUG == level) - spd_level = spdlog::level::debug; - else if (LogLevel::INFO == level) - spd_level = spdlog::level::info; - else if (LogLevel::WARN == level) - spd_level = spdlog::level::warn; - else if (LogLevel::ERROR == level) - spd_level = spdlog::level::err; - else if (LogLevel::FATAL == level) - spd_level = spdlog::level::critical; - else if (LogLevel::CLOSE == level) - spd_level = spdlog::level::off; - - auto sink_ptr = std::make_shared(file_path, max_file_size, max_keep_days); - auto logger = std::make_shared(logger_name, sink_ptr); - logger->set_level(spd_level); - logger->set_pattern("%s(%#): [%L %D %T.%e %P %t %!] %v"); - - return logger; - } - - void set_default_logger(const LogLevel &level, const std::string &logger_name, - const std::string &file_name, size_t max_file_size, size_t max_keep_days) - { - - auto logger = gen_logger(level, logger_name, file_name, max_file_size, max_keep_days); - spdlog::set_default_logger(logger); - spdlog::set_level(logger->level()); - spdlog::set_pattern("%s(%#): [%L %D %T.%e %P %t %!] %v"); - - spdlog::flush_on(spdlog::level::trace); - spdlog::flush_every(std::chrono::seconds(1)); - } - -}; - - -static void set_default_logger(const LogLevel &level, const std::string &logger_name, - const std::string &file_path, size_t max_file_size, size_t max_keep_days) -{ - static LoggerGenerator loggerGenerator; - loggerGenerator.set_default_logger(level, logger_name, file_path, max_file_size, max_keep_days); -} - - -static std::shared_ptr get_simple_logger(const LogLevel &level, const std::string &logger_name, - const std::string &file_path, size_t max_file_size, size_t max_keep_days) -{ - static LoggerGenerator loggerGenerator; - return loggerGenerator.gen_logger(level, logger_name, file_path, max_file_size, max_keep_days); -} diff --git a/src/main.cpp b/src/main.cpp deleted file mode 100644 index d24e8f4..0000000 --- a/src/main.cpp +++ /dev/null @@ -1,452 +0,0 @@ -#include "FFNvDecoderManager.h" -#include - -#include "cuda_kernels.h" - -#include "NvJpegEncoder.h" - -#include -#include - -#include - -#include - - -#ifdef _WIN32 -#include "Winsock2.h" -#pragma comment(lib, "ws2_32.lib") -#endif - -#ifdef __linux__ -#include "arpa/inet.h" -#endif - -#include "utiltools.hpp" - -#define MIN_RTP_PORT 10000 -#define MAX_RTP_PORT 60000 - -// ȡ MIN_RTP_PORT(10000)~MAX_RTP_PORT(60000)֮�������˿�(ż���������������˿ڿ���) -int allocRtpPort() { - - static int s_rtpPort = MIN_RTP_PORT; - if (MIN_RTP_PORT == s_rtpPort) - { - srand((unsigned int)time(NULL)); - s_rtpPort = MIN_RTP_PORT + (rand() % MIN_RTP_PORT); - } - - if (s_rtpPort % 2) - ++s_rtpPort; - - while (true) - { - s_rtpPort += 2; - s_rtpPort = s_rtpPort >= MAX_RTP_PORT ? MIN_RTP_PORT : s_rtpPort; - - int i = 0; - for (; i < 2; i++) - { - sockaddr_in sRecvAddr; - int s = socket(AF_INET, SOCK_DGRAM, 0); - - sRecvAddr.sin_family = AF_INET; - sRecvAddr.sin_addr.s_addr = htonl(INADDR_ANY); - sRecvAddr.sin_port = htons(s_rtpPort + i); - - int nResult = bind(s, (sockaddr *)&sRecvAddr, sizeof(sRecvAddr)); - if (nResult != 0) - { - break; - } - - nResult = close(s); - if (nResult != 0) - { - printf("closesocket failed:%d\n", nResult); - break; - } - } - - if (i == 2) - break; - } - - return s_rtpPort; -} - - - - - -unsigned char *pHwRgb[2] = {nullptr, nullptr}; - -int sum1 = 0; -int sum2 = 0; - -cudaStream_t stream[2]; - -string data_home = "/mnt/data/cmhu/tmp/"; - - -#define checkCudaErrors(S) do {CUresult status; \ - status = S; \ - if (status != CUDA_SUCCESS ) std::cout << __LINE__ <<" checkCudaErrors - status = " << status << std::endl; \ - } while (false) - - -static void gpu_helper(int gpuid) -{ - cudaSetDevice(gpuid); - - // int *dn; - // cudaMalloc((void **)&dn, 1 * sizeof(int)); - - size_t free_byte; - size_t total_byte; - - CUresult cuda_status = cuMemGetInfo(&free_byte, &total_byte); - - const char *pStr = nullptr; - if (CUDA_SUCCESS != cuda_status) { - cuGetErrorString(cuda_status, &pStr); - printf("Error: cudaMemGetInfo fails, %s \n", pStr); - return; - } - - double free_db = (double)free_byte; - double total_db = (double)total_byte; - double used_db_1 = (total_db - free_db) / 1024.0 / 1024.0; - - std::cout <<"显存已使用 " << used_db_1 << " MB\n"; - - // cudaFree(dn); -} - -int CheckCUDAProperty( int devId ) -{ - cuInit(0); - - CUdevice dev = devId; - size_t memSize = 0; - char devName[256] = {0}; - int major = 0, minor = 0; - CUresult rlt = CUDA_SUCCESS; - - rlt = cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, dev); - checkCudaErrors( rlt ); - - rlt = cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, dev); - checkCudaErrors( rlt ); - - rlt = cuDeviceGetName( devName, sizeof( devName ), dev ); - checkCudaErrors( rlt ); - - printf( "Using GPU Device %d: %s has SM %d.%d compute capability\n", - dev, devName, major, minor ); - - rlt = cuDeviceTotalMem( &memSize, dev ); - checkCudaErrors( rlt ); - - printf( "Total amount of global memory: %4.4f MB\n", - (float)memSize / ( 1024 * 1024 ) ); - - return 0; -} - -/** - * 注意: gpuFrame 在解码器设置的显卡上,后续操作要十分注意这一点,尤其是多线程情况 - * */ -void postDecoded(const void * userPtr, AVFrame * gpuFrame){ - AbstractDecoder* decoder = (AbstractDecoder*)userPtr; - if (decoder!= nullptr) - { - // cout << "decode name: " << decoder->getName() << endl; - - // const char* gpu_pixfmt = av_get_pix_fmt_name((AVPixelFormat)gpuFrame->format); - // cout << "pixfmt: " << gpu_pixfmt << endl; - // cout << "keyframe: " << gpuFrame->key_frame << " width: " << gpuFrame->width << " height: "<< gpuFrame->height << endl; - // cout << "decode successed ✿✿ヽ(°▽°)ノ✿ " << endl; - - int sum = sum1; - if (decoder->getName() == "dec0") - { - sum1 ++ ; - sum = sum1; - - if (gpuFrame->format == AV_PIX_FMT_CUDA) - { - // cout << "gpuid = " << atoi(decoder->m_cfg.gpuid.c_str()) << endl; - cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str())); - cudaError_t cudaStatus; - if(pHwRgb[0] == nullptr){ - // cudaStreamCreate(&stream[0]); - cuda_common::setColorSpace( ITU_709, 0 ); - cudaStatus = cudaMalloc((void **)&pHwRgb[0], 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char)); - } - cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwRgb[0], gpuFrame->width, gpuFrame->height); - cudaDeviceSynchronize(); - if (cudaStatus != cudaSuccess) { - cout << "CUDAToBGR failed !!!" << endl; - return; - } - - string path = data_home + decoder->getName() + ".jpg"; - saveJpeg(path.c_str(), pHwRgb[0], gpuFrame->width, gpuFrame->height, stream[0]); // 验证 CUDAToRGB - } - } else if (decoder->getName() == "dec2") - { - sum2 ++ ; - sum = sum2; - - if (gpuFrame->format == AV_PIX_FMT_CUDA) - { - // cout << "gpuid = " << atoi(decoder->m_cfg.gpuid.c_str()) << endl; - cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str())); - cudaError_t cudaStatus; - if(pHwRgb[1] == nullptr){ - // cudaStreamCreate(&stream[1]); - cuda_common::setColorSpace( ITU_709, 0 ); - cudaStatus = cudaMalloc((void **)&pHwRgb[1], 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char)); - } - cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwRgb[1], gpuFrame->width, gpuFrame->height); - cudaDeviceSynchronize(); - if (cudaStatus != cudaSuccess) { - cout << "CUDAToBGR failed !!!" << endl; - return; - } - - string path = data_home + decoder->getName() + ".jpg"; - saveJpeg(path.c_str(), pHwRgb[1], gpuFrame->width, gpuFrame->height, stream[1]); // 验证 CUDAToRGB - } - } - } -} - -long start_time = 0; -long end_time = 0; -bool count_flag = false; -int count = 0; -int count_std = 100; - - -static int sum = 0; -unsigned char *pHwData = nullptr; - -void postDecoded0(const void * userPtr, AVFrame * gpuFrame){ - // std::this_thread::sleep_for(std::chrono::milliseconds(30000)); - - AbstractDecoder* decoder = (AbstractDecoder*)userPtr; - if (decoder!= nullptr) - { - // cout << "decode name: " << decoder->getName() << endl; - if (decoder->getName() == "dec") - { - if (! count_flag) - { - count_flag = true; - count = 0; - end_time = start_time = UtilTools::get_cur_time_ms(); - } - count++; - sum ++ ; - if (count >= count_std) - { - // end_time = UtilTools::get_cur_time_ms(); - // long time_using = end_time - start_time; - // double time_per_frame = double(time_using)/count_std ; - // cout << count_std << "帧用时:" << time_using << "ms 每帧用时:" << time_per_frame << "ms" << endl; - cout << decoder->getName() << " keyframe: " << gpuFrame->key_frame << " width: " << gpuFrame->width << " height: "<< gpuFrame->height << endl; - // cout << gpuFrame->pts << endl; - - count_flag = false; - } - // cout << "帧数:" << sum << endl; - - if (gpuFrame->format == AV_PIX_FMT_CUDA) - { - cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str())); - // cout << "gpu id : " << decoder->m_cfg.gpuid.c_str() << endl; - cudaError_t cudaStatus; - if(pHwData == nullptr){ - cuda_common::setColorSpace( ITU_709, 0 ); - cudaStatus = cudaMalloc((void **)&pHwData, 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char)); - } - cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwData, gpuFrame->width, gpuFrame->height); - cudaDeviceSynchronize(); - if (cudaStatus != cudaSuccess) { - cout << "CUDAToBGR failed !!!" << endl; - return; - } - - string path = data_home + decoder->getName() + ".jpg"; - saveJpeg(path.c_str(), pHwData, gpuFrame->width, gpuFrame->height, nullptr); // 验证 CUDAToRGB - } - } - } -} - -void decode_finished_cbk(const void* userPtr){ - cout << "当前时间戳: " << UtilTools::get_cur_time_ms() << endl; -} - -bool decode_request_stream_cbk(const char* deviceId){ - cout << "需在此请求流" << endl; - return true; -} - -// string test_uri = "rtmp://192.168.10.56:1935/objecteye/1"; -// string test_uri = "/home/cmhu/data/output_800x480.mp4"; -// string test_uri = "/home/cmhu/data/output_1920x1080.mp4"; -// string test_uri = "rtsp://176.10.0.2:8554/stream"; -// string test_uri = "/mnt/f/fiss/test_data/h265.mp4"; -string test_uri = "rtsp://176.10.0.4:8554/stream"; - -void createDecode(int index, const char* gpu_id){ - FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); - MgrDecConfig config; - config.name = "dec" + to_string(index); - config.cfg.uri = test_uri; - config.cfg.post_decoded_cbk = postDecoded; - config.cfg.decode_finished_cbk = decode_finished_cbk; - config.cfg.force_tcp = true; - config.dec_type = DECODER_TYPE_FFMPEG; - - config.cfg.gpuid = gpu_id; - // if (index % 2 == 0) - // { - // config.cfg.gpuid = "0"; - // } - // else - // { - // config.cfg.gpuid = "0"; - // } - - AbstractDecoder* decoder = pDecManager->createDecoder(config); - if (!decoder) - { - return ; - } - pDecManager->setPostDecArg(config.name, decoder); - pDecManager->setFinishedDecArg(config.name, decoder); - pDecManager->startDecodeByName(config.name); -} - -void createGB28181Decode(int index, char* gpu_id, int port){ - FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); - MgrDecConfig config; - config.name = "dec" + to_string(index); - config.cfg.uri = config.name; - config.cfg.post_decoded_cbk = postDecoded; - config.cfg.decode_finished_cbk = decode_finished_cbk; - config.cfg.request_stream_cbk = decode_request_stream_cbk; - config.cfg.force_tcp = true; - - config.dec_type = DECODER_TYPE_GB28181; - config.cfg.port = port;//allocRtpPort(); - - config.cfg.gpuid = gpu_id; - - AbstractDecoder* decoder = pDecManager->createDecoder(config); - if (!decoder) - { - return ; - } - pDecManager->setPostDecArg(config.name, decoder); - pDecManager->setFinishedDecArg(config.name, decoder); - pDecManager->startDecodeByName(config.name); -} - -void logFF(void *, int level, const char *fmt, va_list ap) -{ - vfprintf(stdout, fmt, ap); -} - - -int main(int argc, char* argv[]){ - - test_uri = "rtsp://admin:admin@123456@192.168.60.176:554/cam/realmonitor?channel=1&subtype=0";//argv[1]; - char* gpuid = argv[2]; - int port = atoi(argv[3]); - cout << test_uri << " gpu_id:" << gpuid << " port:" << port << endl; - - // av_log_set_callback(&logFF); - - CheckCUDAProperty(atoi(gpuid)); - - pthread_t m_decode_thread; - pthread_create(&m_decode_thread,0, - [](void* arg) - { - // cudaSetDevice(atoi(gpuid)); - while (true) - { - std::this_thread::sleep_for(std::chrono::minutes(1)); - FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); - int count = pDecManager->count(); - cout << "当前时间:" << UtilTools::get_cur_time_ms() << " 当前运行路数: " << pDecManager->count() << endl; - } - - return (void*)0; - } - ,nullptr); - - - FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance(); - int i = 0; - - while (true) - { - int ch = getchar(); - if (ch == 'q') - { - break; - } - - switch (ch) - { - case 'f': - case 'F': - createDecode(i, gpuid); - i++; - break; - case 'g': - case 'G': - createGB28181Decode(i, gpuid, port); - i++; - break; - case 'r': - case 'R': - pDecManager->resumeDecoder("dec0"); - break; - case 'p': - case 'P': - pDecManager->pauseDecoder("dec0"); - break; - - case 'c': - case 'C': - i--; - pDecManager->closeDecoderByName("dec" + to_string(i)); - break; - - case 'i': - case 'I': - { - int w,h; - pDecManager->getResolution("dec0", w,h); - printf( "%s : %dx%d\n", "dec0" , w,h ); - } - break; - - default: - break; - } - - /* code */ - } - - cout << "总共帧数:" << sum << endl; - pDecManager->closeAllDecoder(); -} \ No newline at end of file diff --git a/src/nvdecoder/DrawImageOnGPU.cu b/src/nvdecoder/DrawImageOnGPU.cu new file mode 100644 index 0000000..8770cea --- /dev/null +++ b/src/nvdecoder/DrawImageOnGPU.cu @@ -0,0 +1,126 @@ +#include "cuda_kernels.h" + +#include "logger.hpp" + +typedef unsigned char uchar; +typedef unsigned int uint32; +typedef int int32; + +namespace cuda_common +{ + __global__ void kernel_drawPixel(float* d_srcRGB, int src_width, int src_height, + int left, int top, int right, int bottom) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (((x == left || x == right) && y >= top && y <= bottom) || ((y == top || y == bottom) && x >= left && x <= right)) + { + d_srcRGB[(y*src_width) + x] = 0; + d_srcRGB[(src_width*src_height) + (y*src_width) + x] = 255; + d_srcRGB[(2 * src_width*src_height) + (y*src_width) + x] = 0; + } + } + + cudaError_t DrawImage(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom) + { + dim3 block(32, 16, 1); + dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1); + + kernel_drawPixel << < grid, block >> >(d_srcRGB, src_width, src_height, left, top, right, bottom); + + cudaError_t cudaStatus = cudaGetLastError(); + if (cudaStatus != cudaSuccess) { + LOG_ERROR("Draw 32 kernel_memcopy launch failed:{}",cudaGetErrorString(cudaStatus)); + return cudaStatus; + } + + cudaStatus = cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + LOG_ERROR("cudaDeviceSynchronize returned error code {} after launching kernel_bilinear!", cudaStatus); + return cudaStatus; + } + + return cudaStatus; + } + + __global__ void kernel_drawPixel(unsigned char* d_srcRGB, int src_width, int src_height, + int left, int top, int right, int bottom) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (((x == left || x == right) && y >= top && y <= bottom) || ((y == top || y == bottom) && x >= left && x <= right)) + { + d_srcRGB[(y*src_width) + x] = 0; + d_srcRGB[(src_width*src_height) + (y*src_width) + x] = 255; + d_srcRGB[(2 * src_width*src_height) + (y*src_width) + x] = 0; + } + } + + cudaError_t DrawImage(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom) + { + dim3 block(32, 16, 1); + dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1); + + kernel_drawPixel << < grid, block >> >(d_srcRGB, src_width, src_height, left, top, right, bottom); + + cudaError_t cudaStatus = cudaGetLastError(); + if (cudaStatus != cudaSuccess) { + LOG_ERROR("Draw 68 kernel_memcopy launch failed: {}",cudaGetErrorString(cudaStatus)); + return cudaStatus; + } + + cudaStatus = cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + LOG_ERROR("cudaDeviceSynchronize returned error code {} after launching kernel_bilinear!", cudaStatus); + return cudaStatus; + } + + return cudaStatus; + } + + __global__ void kernel_drawLine(float* d_srcRGB, int src_width, int src_height, + int begin_x, int begin_y, int end_x, int end_y) + { + int min_x = end_x < begin_x ? end_x : begin_x; + int max_x = end_x < begin_x ? begin_x : end_x; + + int min_y = end_y < begin_y ? end_y : begin_y; + int max_y = end_y < begin_y ? begin_y : end_y; + + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if ((x - begin_x) * (end_y - begin_y) == (end_x - begin_x) * (y - begin_y) + && min_x <= x && x <= max_x + && min_y <= y && y <= max_y) + { + d_srcRGB[(y*src_width) + x] = 0; + d_srcRGB[(src_width*src_height) + (y*src_width) + x] = 255; + d_srcRGB[(2 * src_width*src_height) + (y*src_width) + x] = 0; + } + } + + cudaError_t DrawLine(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y) + { + dim3 block(32, 16, 1); + dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1); + + kernel_drawLine << < grid, block >> >(d_srcRGB, src_width, src_height, begin_x, begin_y, end_x, end_y); + + cudaError_t cudaStatus = cudaGetLastError(); + if (cudaStatus != cudaSuccess) { + LOG_ERROR("Draw 112 kernel_memcopy launch failed: {}",cudaGetErrorString(cudaStatus)); + return cudaStatus; + } + + cudaStatus = cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + LOG_ERROR("cudaDeviceSynchronize returned error code {} after launching kernel_bilinear!", cudaStatus); + return cudaStatus; + } + + return cudaStatus; + } +} \ No newline at end of file diff --git a/src/nvdecoder/FFCuContextManager.cpp b/src/nvdecoder/FFCuContextManager.cpp new file mode 100644 index 0000000..9ae930c --- /dev/null +++ b/src/nvdecoder/FFCuContextManager.cpp @@ -0,0 +1,29 @@ +#include "FFCuContextManager.h" + +#include "common_header.h" + +using namespace std; + +FFCuContextManager::~FFCuContextManager() +{ + for(auto iter = ctxMap.begin(); iter != ctxMap.end(); iter++){ + av_buffer_unref(&iter->second); + } + ctxMap.clear(); +} + +AVBufferRef *FFCuContextManager::getCuCtx(string gpuid) +{ + AVBufferRef *hw_device_ctx = ctxMap[gpuid]; + if (nullptr == hw_device_ctx) + { + // 初始化硬件解码器 + if (av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_CUDA, gpuid.c_str(), nullptr, 0) < 0) + { + LOG_ERROR("Failed to create specified HW device."); + return nullptr; + } + ctxMap[gpuid] = hw_device_ctx; + } + return hw_device_ctx; +} \ No newline at end of file diff --git a/src/nvdecoder/FFCuContextManager.h b/src/nvdecoder/FFCuContextManager.h new file mode 100644 index 0000000..3050641 --- /dev/null +++ b/src/nvdecoder/FFCuContextManager.h @@ -0,0 +1,37 @@ + +#include +#include + +extern "C" +{ + #include + #include + #include + #include + #include + #include + #include +} + +using namespace std; + +class FFCuContextManager{ +public: + static FFCuContextManager* getInstance(){ + static FFCuContextManager* singleton = nullptr; + if (singleton == nullptr){ + singleton = new FFCuContextManager(); + } + return singleton; + } + + AVBufferRef *getCuCtx(string gpuid); + +private: + FFCuContextManager(){} + ~FFCuContextManager(); + +private: + map ctxMap; + +}; \ No newline at end of file diff --git a/src/nvdecoder/FFNvDecoder.cpp b/src/nvdecoder/FFNvDecoder.cpp new file mode 100644 index 0000000..3ebcd6c --- /dev/null +++ b/src/nvdecoder/FFNvDecoder.cpp @@ -0,0 +1,474 @@ +#include "FFNvDecoder.h" + +#include +#include +#include + +#include + +#include "FFCuContextManager.h" + +#include "common_header.h" + +#include "GpuRgbMemory.hpp" +#include "cuda_kernels.h" + +using namespace std; + +// 参考博客: https://blog.csdn.net/qq_40116098/article/details/120704340 + +static AVPixelFormat get_hw_format(AVCodecContext *avctx, const AVPixelFormat *pix_fmts) +{ + FFNvDecoder* _this = (FFNvDecoder*)avctx->opaque; + + const AVPixelFormat *p; + + for (p = pix_fmts; *p != -1; p++) { + if (*p == _this->getHwPixFmt()) + return *p; + } + + LOG_ERROR("Failed to get HW surface format"); + return AV_PIX_FMT_NONE; +} + +FFNvDecoder::FFNvDecoder() +{ + // 初始化解码对象 + fmt_ctx = nullptr; + avctx = nullptr; + m_bRunning = false; + + stream = nullptr; + stream_index = -1; + hw_pix_fmt = AV_PIX_FMT_NONE; + m_dec_name = ""; + + m_bPause = false; + m_bReal = true; + + m_decode_thread = 0; + m_post_decode_thread = 0; + + m_bFinished = false; + m_dec_keyframe = false; + m_fps = 0.0; +} + +FFNvDecoder::~FFNvDecoder() +{ + m_dec_keyframe = false; +} + +bool FFNvDecoder::init(FFDecConfig& cfg) +{ + m_cfg = cfg; + + fstream infile(cfg.uri); + if (infile.is_open()){ + m_bReal = false; + infile.close(); + }else { + m_bReal = true; + } + + post_decoded_cbk = cfg.post_decoded_cbk; + decode_finished_cbk = cfg.decode_finished_cbk; + + return init(cfg.uri.c_str(), cfg.gpuid.c_str(),cfg.force_tcp); +} + +bool FFNvDecoder::init(const char* uri, const char* gpuid, bool force_tcp) +{ + // av_log_set_level(AV_LOG_DEBUG); + + avformat_network_init(); + + // 打开输入视频文件 + AVDictionary *options = nullptr; + av_dict_set( &options, "bufsize", "655360", 0 ); + av_dict_set( &options, "rtsp_transport", force_tcp ? "tcp" : "udp", 0 ); + // av_dict_set( &options, "listen_timeout", "30", 0 ); // 单位为s + av_dict_set( &options, "stimeout", "30000000", 0 ); // 单位为 百万分之一秒 + + fmt_ctx = avformat_alloc_context(); + const char* input_file = uri; + if (avformat_open_input(&fmt_ctx, input_file, nullptr, &options) != 0) { + LOG_ERROR("Cannot open input file:{}",input_file); + return false; + } + + // 查找流信息 + if (avformat_find_stream_info(fmt_ctx, nullptr) < 0) { + LOG_ERROR("Cannot find input stream information"); + return false; + } + + // 查找视频流信息 + AVCodec *decoder = nullptr; + stream_index = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &decoder, 0); + if (stream_index < 0) { + LOG_ERROR("Cannot find a video stream in the input file"); + return false; + } + + string cuvid_dec_name = string(decoder->name) + "_cuvid"; + AVCodec *vcodec = avcodec_find_decoder_by_name(cuvid_dec_name.c_str()); + if (!(avctx = avcodec_alloc_context3(vcodec))) + return (bool)AVERROR(ENOMEM); + + // 得到视频流对象 + stream = fmt_ctx->streams[stream_index]; + if (avcodec_parameters_to_context(avctx, stream->codecpar) < 0) + return false; + + m_fps = av_q2d(stream ->avg_frame_rate); + + avctx->opaque = this; + // 设置解码器管理器的像素格式回调函数 + avctx->get_format = get_hw_format; + + hw_pix_fmt = AV_PIX_FMT_CUDA; + + FFCuContextManager* pCtxMgr = FFCuContextManager::getInstance(); + + AVBufferRef *hw_device_ctx = pCtxMgr->getCuCtx(gpuid); + if(nullptr == hw_device_ctx){ + av_log(nullptr, AV_LOG_ERROR, "create CUDA context failed ! \n"); + return false; + } + avctx->hw_device_ctx = av_buffer_ref(hw_device_ctx); + if (nullptr == avctx->hw_device_ctx) + { + return false; + } + + // 打开解码器流 + AVDictionary *op = nullptr; + av_dict_set( &op, "gpu", gpuid, 0 ); + // av_dict_set( &op, "surfaces", "5", 0 ); + if (avcodec_open2(avctx, vcodec, &op) < 0) { + LOG_ERROR("Failed to open codec for stream"); + return false; + } + + return true; +} + +bool FFNvDecoder::isSurport(FFDecConfig& cfg) +{ + bool bRet = init(cfg); + decode_finished(); + return bRet; +} + +bool FFNvDecoder::start(){ + + m_bRunning = true; + + pthread_create(&m_decode_thread,0, + [](void* arg) + { + FFNvDecoder* a=(FFNvDecoder*)arg; + a->decode_thread(); + return (void*)0; + } + ,this); + + return true; +} + +void FFNvDecoder::decode_thread() +{ + AVPacket* pkt ; + pkt = av_packet_alloc(); + av_init_packet( pkt ); + + pthread_create(&m_post_decode_thread,0, + [](void* arg) + { + FFNvDecoder* a=(FFNvDecoder*)arg; + a->post_decode_thread(); + return (void*)0; + } + ,this); + + // long start_time = UtilTools::get_cur_time_ms(); + + while (m_bRunning) + { + if (!m_bReal) + { + if (m_bPause) + { + std::this_thread::sleep_for(std::chrono::milliseconds(3)); + continue; + } + } + + int result = av_read_frame(fmt_ctx, pkt); + if (result == AVERROR_EOF || result < 0) + { + LOG_ERROR("Failed to read frame!"); + break; + } + + if (m_dec_keyframe && !(pkt->flags & AV_PKT_FLAG_KEY)) { + av_packet_unref(pkt); + continue; + } + + if (stream_index == pkt->stream_index){ + result = avcodec_send_packet(avctx, pkt); + if (result < 0){ + av_packet_unref(pkt); + LOG_ERROR("{} - Failed to send pkt: {}", m_dec_name, result); + continue; + } + + AVFrame* gpuFrame = av_frame_alloc(); + result = avcodec_receive_frame(avctx, gpuFrame); + if ((result == AVERROR(EAGAIN) || result == AVERROR_EOF) || result < 0){ + LOG_ERROR("{} - Failed to receive frame: {}", m_dec_name, result); + av_frame_free(&gpuFrame); + av_packet_unref(pkt); + continue; + } + av_packet_unref(pkt); + + if (m_bReal){ + if (m_bPause){ + av_frame_free(&gpuFrame); + std::this_thread::sleep_for(std::chrono::milliseconds(3)); + continue; + } + } + + if(gpuFrame != nullptr){ + m_queue_mutex.lock(); + if(mFrameQueue.size() <= 10){ + mFrameQueue.push(gpuFrame); + }else{ + av_frame_free(&gpuFrame); + } + m_queue_mutex.unlock(); + } + } + av_packet_unref(pkt); + } + + m_bRunning = false; + av_packet_free(&pkt); + + // long end_time = UtilTools::get_cur_time_ms(); + // cout << "解码用时:" << end_time - start_time << endl; + + if (m_post_decode_thread != 0) + { + pthread_join(m_post_decode_thread,0); + } + + decode_finished_cbk(m_finishedDecArg); + + decode_finished(); + + // 清空队列 + while(mFrameQueue.size() > 0){ + AVFrame * gpuFrame = mFrameQueue.front(); + av_frame_free(&gpuFrame); + mFrameQueue.pop(); + } + + LOG_INFO("{} - decode thread exited.", m_dec_name); +} + +void FFNvDecoder::decode_finished(){ + if (avctx) + { + avcodec_free_context(&avctx); + } + + if (fmt_ctx) + { + avformat_close_input(&fmt_ctx); + } + + m_bFinished = true; + m_dec_keyframe = false; +} + +void FFNvDecoder::post_decode_thread(){ + int skip_frame = m_cfg.skip_frame; + if (skip_frame <= 0){ + skip_frame = 1; + } + + int index = 0; + while (m_bRunning) + { + if(mFrameQueue.size() > 0){ + std::lock_guard l(m_snapshot_mutex); + // 取队头数据 + m_queue_mutex.lock(); + AVFrame * gpuFrame = mFrameQueue.front(); + mFrameQueue.pop(); + m_queue_mutex.unlock(); + // 跳帧 + if (skip_frame == 1 || index % skip_frame == 0){ + post_decoded_cbk(m_postDecArg, gpuFrame); + index = 0; + } + + av_frame_free(&gpuFrame); + + index++; + } + } + + LOG_INFO("post decode thread exited."); +} + +void FFNvDecoder::close(){ + m_bRunning=false; + if(m_decode_thread != 0){ + pthread_join(m_decode_thread,0); + } + m_dec_keyframe = false; +} + +AVPixelFormat FFNvDecoder::getHwPixFmt(){ + return hw_pix_fmt; +} + +bool FFNvDecoder::isRunning(){ + return m_bRunning; +} + +bool FFNvDecoder::isFinished(){ + return m_bFinished; +} + +bool FFNvDecoder::isPausing(){ + return m_bPause; +} + +bool FFNvDecoder::getResolution( int &width, int &height ){ + if (avctx != nullptr) + { + width = avctx->width; + height = avctx->height; + return true; + } + + return false; +} + +void FFNvDecoder::pause(){ + m_bPause = true; +} + +void FFNvDecoder::resume(){ + m_bPause = false; +} + +void FFNvDecoder::setDecKeyframe(bool bKeyframe) +{ + m_dec_keyframe = bKeyframe; +} + +int FFNvDecoder::getCachedQueueLength(){ + m_queue_mutex.lock(); + int queue_size = mFrameQueue.size(); + m_queue_mutex.lock(); + return queue_size; +} + +float FFNvDecoder::fps(){ + return m_fps; +} + +FFImgInfo* FFNvDecoder::snapshot(){ + + // 锁住停止队列消耗 + std::lock_guard l(m_snapshot_mutex); + + AVFrame * gpuFrame = nullptr; + + bool bFirst = true; + while(true){ + m_queue_mutex.lock(); + if(mFrameQueue.size() <= 0){ + m_queue_mutex.unlock(); + if(bFirst){ + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + bFirst = false; + continue; + }else{ + // 再进来说明前面已经等了 100 ms + // 100 ms都没有等到解码数据,则退出 + return nullptr; + } + } + + // 队列中数据大于1 + gpuFrame = mFrameQueue.front(); + m_queue_mutex.unlock(); + break; + } + + if (gpuFrame != nullptr && gpuFrame->format == AV_PIX_FMT_CUDA ){ + LOG_DEBUG("decode task: gpuid: {} width: {} height: {}", m_cfg.gpuid, gpuFrame->width, gpuFrame->height); + GpuRgbMemory* gpuMem = new GpuRgbMemory(3, gpuFrame->width, gpuFrame->height, getName(), m_cfg.gpuid , true); + + if (gpuMem->getMem() == nullptr){ + LOG_ERROR("new GpuRgbMemory failed !!!"); + return nullptr; + } + + cudaSetDevice(atoi(m_cfg.gpuid.c_str())); + cuda_common::setColorSpace( ITU_709, 0 ); + cudaError_t cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], gpuMem->getMem(), gpuFrame->width, gpuFrame->height); + cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + LOG_ERROR("CUDAToBGR failed failed !!!"); + return nullptr; + } + + unsigned char * pHwRgb = gpuMem->getMem(); + int channel = gpuMem->getChannel(); + int width = gpuMem->getWidth(); + int height = gpuMem->getHeight(); + + if (pHwRgb != nullptr && channel > 0 && width > 0 && height > 0){ + int nSize = channel * height * width; + + LOG_INFO("channel:{} height:{} width:{}", channel, height, width); + // unsigned char* cpu_data = new unsigned char[nSize]; + + unsigned char* cpu_data = (unsigned char *)av_malloc(nSize * sizeof(unsigned char)); + + cudaMemcpy(cpu_data, pHwRgb, nSize * sizeof(unsigned char), cudaMemcpyDeviceToHost); + cudaDeviceSynchronize(); + + delete gpuMem; + gpuMem = nullptr; + + FFImgInfo* imgInfo = new FFImgInfo(); + imgInfo->dec_name = m_dec_name; + imgInfo->pData = cpu_data; + imgInfo->height = height; + imgInfo->width = width; + imgInfo->timestamp = UtilTools::get_cur_time_ms(); + imgInfo->index = m_index; + + m_index++; + + return imgInfo; + } + + delete gpuMem; + gpuMem = nullptr; + } + + return nullptr; +} \ No newline at end of file diff --git a/src/nvdecoder/FFNvDecoder.h b/src/nvdecoder/FFNvDecoder.h new file mode 100644 index 0000000..4bc12e9 --- /dev/null +++ b/src/nvdecoder/FFNvDecoder.h @@ -0,0 +1,68 @@ +#include +#include + +#include "../AbstractDecoder.h" + +#include + +using namespace std; + +class FFNvDecoder : public AbstractDecoder{ +public: + FFNvDecoder(); + ~FFNvDecoder(); + bool init(FFDecConfig& cfg); + void close(); + bool start(); + void pause(); + void resume(); + + void setDecKeyframe(bool bKeyframe); + + bool isRunning(); + bool isFinished(); + bool isPausing(); + bool getResolution( int &width, int &height ); + + bool isSurport(FFDecConfig& cfg); + + int getCachedQueueLength(); + + float fps(); + + DECODER_TYPE getDecoderType(){ return DECODER_TYPE_FFMPEG; } + + FFImgInfo* snapshot(); + +public: + AVPixelFormat getHwPixFmt(); + +private: + void decode_thread(); + void post_decode_thread(); + bool init(const char* uri, const char* gpuid, bool force_tcp); + void decode_finished(); + +private: + AVStream* stream; + AVCodecContext *avctx; + int stream_index; + AVFormatContext *fmt_ctx; + AVPixelFormat hw_pix_fmt; + + pthread_t m_decode_thread; + pthread_t m_post_decode_thread; + + bool m_bRunning; + bool m_bFinished; + + bool m_bPause; + + bool m_bReal; // 是否实时流 + + float m_fps; + + queue mFrameQueue; + mutex m_queue_mutex; + mutex m_snapshot_mutex; +}; \ No newline at end of file diff --git a/src/nvdecoder/GpuRgbMemory.hpp b/src/nvdecoder/GpuRgbMemory.hpp new file mode 100644 index 0000000..31be476 --- /dev/null +++ b/src/nvdecoder/GpuRgbMemory.hpp @@ -0,0 +1,34 @@ +#include + +#include "../DeviceRgbMemory.hpp" +#include "cuda_kernels.h" +#include "define.hpp" +#include "utiltools.hpp" + +using namespace std; + +class GpuRgbMemory : public DeviceRgbMemory{ + +public: + GpuRgbMemory(int _channel, int _width, int _height, string _id, string _gpuid, bool _isused) + :DeviceRgbMemory(_channel, _width, _height, _id, _gpuid, _isused){ + gpuid = _gpuid; + cudaSetDevice(atoi(gpuid.c_str())); + CHECK_CUDA(cudaMalloc((void **)&pHwRgb, data_size * sizeof(unsigned char))); + } + + ~GpuRgbMemory(){ + if (pHwRgb) { + cudaSetDevice(atoi(gpuid.c_str())); + CHECK_CUDA(cudaFree(pHwRgb)); + pHwRgb = nullptr; + } + } + + string getGpuId() { + return gpuid; + } + +private: + string gpuid; +}; \ No newline at end of file diff --git a/src/nvdecoder/ImageSaveGPU.cpp b/src/nvdecoder/ImageSaveGPU.cpp new file mode 100644 index 0000000..dde9b64 --- /dev/null +++ b/src/nvdecoder/ImageSaveGPU.cpp @@ -0,0 +1,123 @@ +#include "cuda_kernels.h" + +#include "common_header.h" + + +//int saveJPEG(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height) +//{ +// return jpegNPP(szOutputFile, d_srcRGB, img_width, img_height); +// //return 0; +//} +// +//int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height) +//{ +// return jpegNPP(szOutputFile, d_srcRGB, img_width, img_height); +// //return 0; +//} +// +//int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB) +//{ +// return jpegNPP(szOutputFile, d_srcRGB); +//} +// +//int saveJPEG(const char *szOutputFile, float* d_srcRGB) +//{ +// return jpegNPP(szOutputFile, d_srcRGB); +//} + +int resizeFrame(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height) +{ + cudaError_t cudaStatus = cuda_common::ResizeImage(d_srcRGB, src_width, src_height, d_dstRGB, dst_width, dst_height); + if (cudaStatus != cudaSuccess) { + LOG_ERROR("cuda_common::ResizeImage failed: {}",cudaGetErrorString(cudaStatus)); + return -1; + } + + return 0; +} + +//int initTables() +//{ +// initTable(); +// return 0; +//} +// +//int initTables(int flag, int width, int height) +//{ +// initTable(0, width, height); +// return 0; +//} + +int drawImageOnGPU(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom) +{ + cuda_common::DrawImage(d_srcRGB, src_width, src_height, left, top, right, bottom); + return 0; +} + +int drawImageOnGPU(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom) +{ + cuda_common::DrawImage(d_srcRGB, src_width, src_height, left, top, right, bottom); + return 0; +} + +int drawLineOnGPU(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y) +{ + cuda_common::DrawLine(d_srcRGB, src_width, src_height, begin_x, begin_y, end_x, end_y); + return 0; +} + +//int releaseJpegSaver() +//{ +// releaseJpegNPP(); +// return 0; +//} + +int partMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom) +{ + cudaError_t cudaStatus = cuda_common::PartMemCopy(d_srcRGB, src_width, src_height, d_dstRGB, left, top, right, bottom); + if (cudaStatus != cudaSuccess) { + LOG_ERROR("cuda_common::77 PartMemCopy failed: {} {} {} {} {} {} {}",cudaGetErrorString(cudaStatus), left, top, right, bottom, src_height, d_dstRGB); + return -1; + } + + return 0; +} +//#include +//extern std::ofstream g_os; +int PartMemResizeBatch(unsigned char * d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB, + int count, int* vleft, int * vtop, int* vright, int* vbottom, int *dst_w, int *dst_h, + float submeanb, float submeang, float submeanr, + float varianceb, float varianceg, float variancer) +{ + //g_os << "cudaMemcpyHostToDevice begin 9" << std::endl; + cudaError_t cudaStatus = cuda_common::PartMemResizeBatch( + d_srcRGB, src_width, src_height, d_dstRGB, count, vleft, vtop, vright, vbottom, dst_w, dst_h, + submeanb, submeang, submeanr, + varianceb, varianceg, variancer); + //g_os << "cudaMemcpyHostToDevice end 9" << std::endl; + if (cudaStatus != cudaSuccess) { + LOG_ERROR("cuda_common::PartMemResizeBatch failed: {}",cudaGetErrorString(cudaStatus)); + return -1; + } + + return 0; +} + + +//int PartMemResizeBatch(float * d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, +// int count, int* vleft, int * vtop, int* vright, int* vbottom, int dst_w, int dst_h, +// float submeanb, float submeang, float submeanr, +// float varianceb, float varianceg, float variancer) +// +//{ +// cudaError_t cudaStatus = cuda_common::PartMemResizeBatch( +// d_srcRGB, src_width, src_height, d_dstRGB, count, vleft, vtop, vright, vbottom, dst_w, dst_h, +// submeanb, submeang, submeanr, +// varianceb, varianceg, variancer); +// if (cudaStatus != cudaSuccess) { +// fprintf(stderr, "cuda_common::PartMemCopy failed: %s\n", cudaGetErrorString(cudaStatus)); +// return -1; +// } +// +// return 0; +//} \ No newline at end of file diff --git a/src/nvdecoder/ImageSaveGPU.h b/src/nvdecoder/ImageSaveGPU.h new file mode 100644 index 0000000..272a6d2 --- /dev/null +++ b/src/nvdecoder/ImageSaveGPU.h @@ -0,0 +1,65 @@ +/******************************************************************************************* +* Version: VPT_x64_V2.0.0_20170904 +* CopyRight: 中科院自动化研究所模式识别实验室图像视频组 +* UpdateDate: 20170904 +* Content: 人车物监测跟踪 +********************************************************************************************/ + +#ifndef IMAGESAVEGPU_H_ +#define IMAGESAVEGPU_H_ + +#ifdef _MSC_VER + #ifdef IMAGESAVEGPU_EXPORTS + #define IMAGESAVEGPU_API __declspec(dllexport) + #else + #define IMAGESAVEGPU_API __declspec(dllimport) + #endif +#else +#define IMAGESAVEGPU_API __attribute__((visibility ("default"))) +#endif +// 功能:保存成jpeg文件 +// szOutputFile 输出图片路径,如D:\\out.jpg +// d_srcRGB 输入RGB数据,由cudaMalloc分配的显存空间,数据排列形式为:BBBBBB......GGGGGG......RRRRRRRR...... +// img_width RGB数据图片的宽度 +// img_height RGB数据图片的高度 +// +//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height); +//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, float* d_srcRGB); +// +//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height); +//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB); + +// 功能:防缩图像 +IMAGESAVEGPU_API int resizeFrame(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height); + +// 功能:部分拷贝数据 +IMAGESAVEGPU_API int partMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom); + +//IMAGESAVEGPU_API int partMemResizeImage(float * d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB, +// int* vleft, int * vtop, int* vright, int* vbottom, int *dst_w, int *dst_h, +// float submeanb, float submeang, float submeanr, +// float varianceb, float varianceg, float variancer); + + +IMAGESAVEGPU_API int PartMemResizeBatch(unsigned char * d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB, + int count, int* vleft, int * vtop, int* vright, int* vbottom, int *dst_w, int *dst_h, + float submeanb, float submeang, float submeanr, + float varianceb, float varianceg, float variancer); + + +//// 功能:初始化GPU保存图像的各种量化表 +//IMAGESAVEGPU_API int initTables(); +//IMAGESAVEGPU_API int initTables(int falg, int width, int height); +// +//// 功能:释放资源 +//IMAGESAVEGPU_API int releaseJpegSaver(); + +// 功能:在GPU中绘制快照包围框 +IMAGESAVEGPU_API int drawImageOnGPU(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom); + +IMAGESAVEGPU_API int drawImageOnGPU(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom); + +// 功能:在GPU中绘制直线 +IMAGESAVEGPU_API int drawLineOnGPU(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y); + +#endif diff --git a/src/nvdecoder/NV12ToRGB.cu b/src/nvdecoder/NV12ToRGB.cu new file mode 100644 index 0000000..58e1dff --- /dev/null +++ b/src/nvdecoder/NV12ToRGB.cu @@ -0,0 +1,345 @@ + +#include "cuda_kernels.h" + +#include +#include "common/inc/helper_cuda_drvapi.h" + +typedef unsigned char uint8; +typedef unsigned int uint32; +typedef int int32; + +#define COLOR_COMPONENT_MASK 0x3FF +#define COLOR_COMPONENT_BIT_SIZE 10 + +namespace cuda_common +{ + +#define MUL(x,y) ((x)*(y)) + + __constant__ float constHueColorSpaceMat2[9]; //默认分配到0卡上,未找到分配到指定卡上设置方法,当前也未用到,先注释掉 + + __device__ void YUV2RGB2(uint32 *yuvi, float *red, float *green, float *blue) + { + float luma, chromaCb, chromaCr; + + // Prepare for hue adjustment + luma = (float)yuvi[0]; + chromaCb = (float)((int32)yuvi[1] - 512.0f); + chromaCr = (float)((int32)yuvi[2] - 512.0f); + + + // Convert YUV To RGB with hue adjustment + *red = MUL(luma, constHueColorSpaceMat2[0]) + + MUL(chromaCb, constHueColorSpaceMat2[1]) + + MUL(chromaCr, constHueColorSpaceMat2[2]); + *green = MUL(luma, constHueColorSpaceMat2[3]) + + MUL(chromaCb, constHueColorSpaceMat2[4]) + + MUL(chromaCr, constHueColorSpaceMat2[5]); + *blue = MUL(luma, constHueColorSpaceMat2[6]) + + MUL(chromaCb, constHueColorSpaceMat2[7]) + + MUL(chromaCr, constHueColorSpaceMat2[8]); + + } + + __device__ unsigned char clip_v(int x, int min_val, int max_val) { + if (x>max_val) { + return max_val; + } + else if (x= width) + { + //printf("x >= width\n"); + //*flag = -1; + return; //x = width - 1; + } + //return; //x = width - 1; + + if (y >= height) + { + //printf("y >= height\n"); + //*flag = -1; + return; // y = height - 1; + } + + // Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way. + // if we move to texture we could read 4 luminance values + yuv101010Pel[0] = (srcImageU8[y * processingPitch + x]) << 2; + yuv101010Pel[1] = (srcImageU8[y * processingPitch + x + 1]) << 2; + + uint32 chromaOffset = processingPitch * height; + int32 y_chroma = y >> 1; + + if (y & 1) // odd scanline ? + { + uint32 chromaCb; + uint32 chromaCr; + + chromaCb = srcImageU8[chromaOffset + y_chroma * processingPitch + x]; + chromaCr = srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1]; + + if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically + { + chromaCb = (chromaCb + srcImageU8[chromaOffset + (y_chroma + 1) * processingPitch + x] + 1) >> 1; + chromaCr = (chromaCr + srcImageU8[chromaOffset + (y_chroma + 1) * processingPitch + x + 1] + 1) >> 1; + } + + yuv101010Pel[0] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); + + yuv101010Pel[1] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); + } + else + { + yuv101010Pel[0] |= ((uint32)srcImageU8[chromaOffset + y_chroma * processingPitch + x] << (COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[0] |= ((uint32)srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); + + yuv101010Pel[1] |= ((uint32)srcImageU8[chromaOffset + y_chroma * processingPitch + x] << (COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[1] |= ((uint32)srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); + } + + // this steps performs the color conversion + uint32 yuvi[6]; + float red[2], green[2], blue[2]; + + yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK); + yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); + yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); + + yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK); + yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); + yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); + + // YUV to RGB Transformation conversion + YUV2RGB2(&yuvi[0], &red[0], &green[0], &blue[0]); + YUV2RGB2(&yuvi[3], &red[1], &green[1], &blue[1]); + + + dstImage[y * width * 3 + x * 3] = clip_v(blue[0] * 0.25,0 ,255); + dstImage[y * width * 3 + x * 3 + 3] = clip_v(blue[1] * 0.25,0, 255); + + dstImage[width * y * 3 + x * 3 + 1] = clip_v(green[0] * 0.25,0 ,255); + dstImage[width * y * 3 + x * 3 + 4] = clip_v(green[1] * 0.25,0, 255); + + dstImage[width * y * 3 + x * 3 + 2] = clip_v(red[0] * 0.25, 0, 255); + dstImage[width * y * 3 + x * 3 + 5] = clip_v(red[1] * 0.25,0 ,255); + + + //dstImage[y * width * 3 + x * 3] = blue[0] * 0.25; + //dstImage[y * width * 3 + x * 3 + 3] = blue[1] * 0.25; + + //dstImage[width * y * 3 + x * 3 + 1] =green[0] * 0.25; + //dstImage[width * y * 3 + x * 3 + 4] = green[1] * 0.25; + + //dstImage[width * y * 3 + x * 3 + 2] = red[0] * 0.25; + //dstImage[width * y * 3 + x * 3 + 5] = red[1] * 0.25; + + // Clamp the results to BBBBBB....GGGGGGG.......RRRRRRR.... + // dstImage[y * width + x] = blue[0] * 0.25; + // dstImage[y * width + x + 1] = blue[1] * 0.25; + + // dstImage[width * height + y * width + x] = green[0] * 0.25; + // dstImage[width * height + y * width + x + 1] = green[1] * 0.25; + + // dstImage[width * height * 2 + y * width + x] = red[0] * 0.25; + // dstImage[width * height * 2 + y * width + x + 1] = red[1] * 0.25; + return; + + } + + // CUDA kernel for outputing the final RGB output from NV12; + extern "C" + __global__ void CUDAToBGR_drvapi(uint32 *dataY, uint32 *dataUV, size_t pitchY, size_t pitchUV, unsigned char *dstImage, int width, int height) + { + + int32 x, y; + + // Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread + x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1); + y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= width) + { + return; + } + + if (y >= height) + { + return; + } + + uint32 yuv101010Pel[2]; + uint8 *srcImageU8_Y = (uint8 *)dataY; + uint8 *srcImageU8_UV = (uint8 *)dataUV; + + // Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way. + // if we move to texture we could read 4 luminance values + yuv101010Pel[0] = (srcImageU8_Y[y * pitchY + x]) << 2; + yuv101010Pel[1] = (srcImageU8_Y[y * pitchY + x + 1]) << 2; + + int32 y_chroma = y >> 1; + + if (y & 1) // odd scanline ? + { + uint32 chromaCb; + uint32 chromaCr; + + chromaCb = srcImageU8_UV[y_chroma * pitchUV + x]; + chromaCr = srcImageU8_UV[y_chroma * pitchUV + x + 1]; + + if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically + { + chromaCb = (chromaCb + srcImageU8_UV[(y_chroma + 1) * pitchUV + x] + 1) >> 1; + chromaCr = (chromaCr + srcImageU8_UV[(y_chroma + 1) * pitchUV + x + 1] + 1) >> 1; + } + + yuv101010Pel[0] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); + + yuv101010Pel[1] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); + } + else + { + yuv101010Pel[0] |= ((uint32)srcImageU8_UV[y_chroma * pitchUV + x] << (COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[0] |= ((uint32)srcImageU8_UV[y_chroma * pitchUV + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); + + yuv101010Pel[1] |= ((uint32)srcImageU8_UV[y_chroma * pitchUV + x] << (COLOR_COMPONENT_BIT_SIZE + 2)); + yuv101010Pel[1] |= ((uint32)srcImageU8_UV[y_chroma * pitchUV + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2)); + } + + // this steps performs the color conversion + uint32 yuvi[6]; + float red[2], green[2], blue[2]; + + yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK); + yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); + yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); + + yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK); + yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK); + yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK); + + // YUV to RGB Transformation conversion + YUV2RGB2(&yuvi[0], &red[0], &green[0], &blue[0]); + YUV2RGB2(&yuvi[3], &red[1], &green[1], &blue[1]); + + + dstImage[y * width * 3 + x * 3] = clip_v(blue[0] * 0.25,0 ,255); + dstImage[y * width * 3 + x * 3 + 3] = clip_v(blue[1] * 0.25,0, 255); + + dstImage[width * y * 3 + x * 3 + 1] = clip_v(green[0] * 0.25,0 ,255); + dstImage[width * y * 3 + x * 3 + 4] = clip_v(green[1] * 0.25,0, 255); + + dstImage[width * y * 3 + x * 3 + 2] = clip_v(red[0] * 0.25, 0, 255); + dstImage[width * y * 3 + x * 3 + 5] = clip_v(red[1] * 0.25,0 ,255); + } + + cudaError_t setColorSpace(FF_ColorSpace CSC, float hue) + { + float hueSin = sin(hue); + float hueCos = cos(hue); + + float hueCSC[9]; + if (CSC == ITU_601) + { + //CCIR 601 + hueCSC[0] = 1.1644f; + hueCSC[1] = hueSin * 1.5960f; + hueCSC[2] = hueCos * 1.5960f; + hueCSC[3] = 1.1644f; + hueCSC[4] = (hueCos * -0.3918f) - (hueSin * 0.8130f); + hueCSC[5] = (hueSin * 0.3918f) - (hueCos * 0.8130f); + hueCSC[6] = 1.1644f; + hueCSC[7] = hueCos * 2.0172f; + hueCSC[8] = hueSin * -2.0172f; + } + else if (CSC == ITU_709) + { + //CCIR 709 + hueCSC[0] = 1.0f; + hueCSC[1] = hueSin * 1.57480f; + hueCSC[2] = hueCos * 1.57480f; + hueCSC[3] = 1.0; + hueCSC[4] = (hueCos * -0.18732f) - (hueSin * 0.46812f); + hueCSC[5] = (hueSin * 0.18732f) - (hueCos * 0.46812f); + hueCSC[6] = 1.0f; + hueCSC[7] = hueCos * 1.85560f; + hueCSC[8] = hueSin * -1.85560f; + } + + cudaError_t cudaStatus = cudaMemcpyToSymbol(constHueColorSpaceMat2, hueCSC, 9 * sizeof(float), 0, cudaMemcpyHostToDevice); + float tmpf[9]; + memset(tmpf, 0, 9 * sizeof(float)); + cudaMemcpyFromSymbol(tmpf, constHueColorSpaceMat2, 9 * sizeof(float), 0, ::cudaMemcpyDefault); + cudaDeviceSynchronize(); + + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMemcpyToSymbol failed: %s\n", cudaGetErrorString(cudaStatus)); + } + + return cudaStatus; + } + + cudaError_t NV12ToRGBnot(CUdeviceptr d_srcNV12, size_t nSourcePitch, unsigned char* d_dstRGB, int width, int height) + { + dim3 block(32, 16, 1); + dim3 grid((width + (2 * block.x - 1)) / (2 * block.x), (height + (block.y - 1)) / block.y, 1); + NV12ToRGB_drvapi2 << < grid, block >> >((uint32 *)d_srcNV12, nSourcePitch, d_dstRGB, width, height); + cudaError_t cudaStatus = cudaGetLastError(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "NV12ToRGB_drvapi launch failed: %s\n", cudaGetErrorString(cudaStatus)); + return cudaStatus; + } + + cudaStatus = cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching NV12ToRGB_drvapi !\n", cudaStatus); + return cudaStatus; + } + + return cudaStatus; + } + + cudaError_t CUDAToBGR(CUdeviceptr dataY, CUdeviceptr dataUV, size_t pitchY, size_t pitchUV, unsigned char* d_dstRGB, int width, int height) + { + dim3 block(32, 16, 1); + dim3 grid((width + (2 * block.x - 1)) / (2 * block.x), (height + (block.y - 1)) / block.y, 1); + CUDAToBGR_drvapi << < grid, block >> >((uint32 *)dataY, (uint32 *)dataUV, pitchY, pitchUV, d_dstRGB, width, height); + cudaError_t cudaStatus = cudaGetLastError(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "NV12ToRGB_drvapi launch failed: %s\n", cudaGetErrorString(cudaStatus)); + return cudaStatus; + } + + cudaStatus = cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching NV12ToRGB_drvapi !\n", cudaStatus); + return cudaStatus; + } + + return cudaStatus; + } +} \ No newline at end of file diff --git a/src/nvdecoder/NvJpegEncoder.cpp b/src/nvdecoder/NvJpegEncoder.cpp new file mode 100644 index 0000000..7ee0727 --- /dev/null +++ b/src/nvdecoder/NvJpegEncoder.cpp @@ -0,0 +1,90 @@ +#include "NvJpegEncoder.h" + +#include +#include +#include + + +#define CHECK_NVJPEG(S) do {nvjpegStatus_t status; \ + status = S; \ + if (status != NVJPEG_STATUS_SUCCESS ) std::cout << __LINE__ <<" CHECK_NVJPEG - status = " << status << std::endl; \ + } while (false) + + +int saveJpeg(const char * filepath, unsigned char* d_srcBGR, int width, int height, cudaStream_t stream) +{ + nvjpegHandle_t nvjpeg_handle; + nvjpegEncoderState_t encoder_state; + nvjpegEncoderParams_t encoder_params; + + cudaEvent_t ev_start, ev_end; + cudaEventCreate(&ev_start); + cudaEventCreate(&ev_end); + + nvjpegImage_t input; + nvjpegInputFormat_t input_format = NVJPEG_INPUT_BGRI; + int image_width = width; + int image_height = height; + + // int channel_size = image_width * image_height; + // for (int i = 0; i < 3; i++) + // { + // input.pitch[i] = image_width; + // (cudaMalloc((void**)&(input.channel[i]), channel_size)); + // (cudaMemset(input.channel[i], 50 * 40 * i, channel_size)); + // } + + input.channel[0] = d_srcBGR; + input.pitch[0] = image_width * 3; + + nvjpegBackend_t backend = NVJPEG_BACKEND_DEFAULT; + + CHECK_NVJPEG(nvjpegCreate(backend, nullptr, &nvjpeg_handle)); + + CHECK_NVJPEG(nvjpegEncoderParamsCreate(nvjpeg_handle, &encoder_params, stream)); + CHECK_NVJPEG(nvjpegEncoderStateCreate(nvjpeg_handle, &encoder_state, stream)); + + // set params + CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(encoder_params, nvjpegJpegEncoding_t::NVJPEG_ENCODING_PROGRESSIVE_DCT_HUFFMAN, stream)); + CHECK_NVJPEG(nvjpegEncoderParamsSetOptimizedHuffman(encoder_params, 1, stream)); + CHECK_NVJPEG(nvjpegEncoderParamsSetQuality(encoder_params, 70, stream)); + CHECK_NVJPEG(nvjpegEncoderParamsSetSamplingFactors(encoder_params, nvjpegChromaSubsampling_t::NVJPEG_CSS_420, stream)); + + cudaEventRecord(ev_start); + CHECK_NVJPEG(nvjpegEncodeImage(nvjpeg_handle, encoder_state, encoder_params, &input, input_format, image_width, image_height, stream)); + cudaEventRecord(ev_end); + + std::vector obuffer; + size_t length; + CHECK_NVJPEG(nvjpegEncodeRetrieveBitstream( + nvjpeg_handle, + encoder_state, + NULL, + &length, + stream)); + + obuffer.resize(length); + CHECK_NVJPEG(nvjpegEncodeRetrieveBitstream( + nvjpeg_handle, + encoder_state, + obuffer.data(), + &length, + stream)); + + cudaEventSynchronize(ev_end); + + // 用完销毁,避免显存泄露 + nvjpegEncoderParamsDestroy(encoder_params); + nvjpegEncoderStateDestroy(encoder_state); + nvjpegDestroy(nvjpeg_handle); + + float ms; + cudaEventElapsedTime(&ms, ev_start, ev_end); + // std::cout << "time spend " << ms << " ms" << std::endl; + + std::ofstream outputFile(filepath, std::ios::out | std::ios::binary); + outputFile.write(reinterpret_cast(obuffer.data()), static_cast(length)); + outputFile.close(); + + return 0; +} \ No newline at end of file diff --git a/src/nvdecoder/NvJpegEncoder.h b/src/nvdecoder/NvJpegEncoder.h new file mode 100644 index 0000000..3c27ba8 --- /dev/null +++ b/src/nvdecoder/NvJpegEncoder.h @@ -0,0 +1,3 @@ +#include + +int saveJpeg(const char * filepath, unsigned char* d_srcBGR, int width, int height, cudaStream_t stream); \ No newline at end of file diff --git a/src/nvdecoder/PartMemCopy.cu b/src/nvdecoder/PartMemCopy.cu new file mode 100644 index 0000000..396765b --- /dev/null +++ b/src/nvdecoder/PartMemCopy.cu @@ -0,0 +1,289 @@ +#include "cuda_kernels.h" +#include +typedef unsigned char uchar; +typedef unsigned int uint32; +typedef int int32; + +#define MAX_SNAPSHOT_WIDTH 320 +#define MAX_SNAPSHOT_HEIGHT 320 + +namespace cuda_common +{ + __global__ void kernel_memcopy(unsigned char* d_srcRGB, int src_width, int src_height, + unsigned char* d_dstRGB, int left, int top, int right, int bottom) + { + const int dst_x = blockIdx.x * blockDim.x + threadIdx.x; + const int dst_y = blockIdx.y * blockDim.y + threadIdx.y; + const int dst_width = right - left; + const int dst_height = bottom - top; + if (dst_x < dst_width && dst_y < dst_height) + { + int src_x = left + dst_x; + int src_y = top + dst_y; + + //bgr...bgr...bgr... + d_dstRGB[(dst_y*dst_width + dst_x) * 3] = (unsigned char)d_srcRGB[(src_y*src_width + src_x) * 3]; + d_dstRGB[(dst_y*dst_width + dst_x) + * 3 + 1] = (unsigned char)d_srcRGB[(src_y*src_width + src_x) * 3 + 1]; + d_dstRGB[(dst_y*dst_width + dst_x) * 3 + 2] = (unsigned char)d_srcRGB[(src_y*src_width + src_x) * 3 + 2]; + + //bbb...ggg...rrr... + //d_dstRGB[(dst_y*dst_width) + dst_x] = (unsigned char)d_srcRGB[(src_y*src_width) + src_x]; + //d_dstRGB[(dst_width*dst_height) + (dst_y*dst_width) + dst_x] = (unsigned char)d_srcRGB[(src_width*src_height) + (src_y*src_width) + src_x]; + //d_dstRGB[(2 * dst_width*dst_height) + (dst_y*dst_width) + dst_x] = (unsigned char)d_srcRGB[(2 * src_width*src_height) + (src_y*src_width) + src_x]; + + /* memcpy(d_dstRGB + (dst_y*src_width) + dst_x, d_srcRGB + (src_y*src_width) + src_x, sizeof(float)); + memcpy(d_dstRGB + (src_width*src_height) + (dst_y*src_width) + dst_x, d_srcRGB + (src_width*src_height) + (src_y*src_width) + src_x, sizeof(float)); + memcpy(d_dstRGB + (2 * src_width*src_height) + (dst_y*src_width) + dst_x, d_srcRGB + (2 * src_width*src_height) + (src_y*src_width) + src_x, sizeof(float));*/ + } + } + + cudaError_t PartMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom) + { + dim3 block(32, 16, 1); + dim3 grid(((right - left) + (block.x - 1)) / block.x, ((bottom - top) + (block.y - 1)) / block.y, 1); + + kernel_memcopy << < grid, block >> > (d_srcRGB, src_width, src_height, d_dstRGB, left, top, right, bottom); + + cudaError_t cudaStatus = cudaGetLastError(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "Part 50 kernel_memcopy launch failed: %s\n", cudaGetErrorString(cudaStatus)); + return cudaStatus; + } + cudaStatus = cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_bilinear!\n", cudaStatus); + return cudaStatus; + } + return cudaStatus; + } + + + // __global__ void kernel_memcopy_mean_variance(float* d_srcRGB, int src_width, int src_height, + // unsigned char* vd_dstRGB, int count, int * vleft, int* vtop, int* vright, int * vbottom, float submeanb,float submeang, float submeanr, float varianceb,float varianceg, float variancer) + // { + // const int dst_x = blockIdx.x * blockDim.x + threadIdx.x; + // const int dst_y = blockIdx.y * blockDim.y + threadIdx.y; + // for (int i=0;i srcimg_width - 2) + { + ax = srcimg_width - 2; + } + if (ay < 0) { + ay = 0; + } + if (ay > srcimg_height - 2) + { + ay = srcimg_height - 2; + } + + int A = ax + ay*srcimg_width; + int B = ax + ay*srcimg_width + 1; + int C = ax + ay*srcimg_width + srcimg_width; + int D = ax + ay*srcimg_width + srcimg_width + 1; + + float w1, w2, w3, w4; + w1 = fx - ax; + w2 = 1 - w1; + w3 = fy - ay; + w4 = 1 - w3; + float blue = src_img[A * 3] * w2*w4 + src_img[B * 3] * w1*w4 + src_img[C * 3] * w2*w3 + src_img[D * 3] * w1*w3; + float green = src_img[A * 3 + 1] * w2*w4 + src_img[B * 3 + 1] * w1*w4 + + src_img[C * 3 + 1] * w2*w3 + src_img[D * 3 + 1] * w1*w3; + float red = src_img[A * 3 + 2] * w2*w4 + src_img[B * 3 + 2] * w1*w4 + + src_img[C * 3 + 2] * w2*w3 + src_img[D * 3 + 2] * w1*w3; + + /*dst_img[(dst_y * dst_width + dst_x) * 3] = (unsigned char)(blue - submeanb)*varianceb; + dst_img[(dst_y * dst_width + dst_x) * 3 + 1] =(unsigned char) (green - submeang)*varianceg; + dst_img[(dst_y * dst_width + dst_x) * 3 + 2] = (unsigned char) (red - submeanr)*variancer;*/ + + if (blue < 0) + blue = 0; + else if (blue > 255) + blue = 255; + + if (green < 0) + green = 0; + else if (green > 255) + green = 255; + + if (red < 0) + red = 0; + else if (red > 255) + red = 255; + + dst_img[(dst_y * cur_dst_width + dst_x) * 3] = (unsigned char)blue; + dst_img[(dst_y * cur_dst_width + dst_x) * 3 + 1] = (unsigned char)green; + dst_img[(dst_y * cur_dst_width + dst_x) * 3 + 2] = (unsigned char)red; + + + /*if (src_img[(dst_y * dst_width + dst_x) * 3] < 0) + src_img[(dst_y * dst_width + dst_x) * 3] = 0; + else if (src_img[(dst_y * dst_width + dst_x) * 3] > 255) + src_img[(dst_y * dst_width + dst_x) * 3] = 255; + + if (src_img[(dst_y * dst_width + dst_x) * 3 + 1] < 0) + src_img[(dst_y * dst_width + dst_x) * 3 + 1] = 0; + else if (src_img[(dst_y * dst_width + dst_x) * 3 + 1] > 255) + src_img[(dst_y * dst_width + dst_x) * 3 + 1] = 255; + + if (src_img[(dst_y * dst_width + dst_x) * 3 + 2] < 0) + src_img[(dst_y * dst_width + dst_x) * 3 + 2] = 0; + else if (src_img[(dst_y * dst_width + dst_x) * 3 + 2] > 255) + src_img[(dst_y * dst_width + dst_x) * 3 + 2] = 255; + + + dst_img[(dst_y * dst_width + dst_x) * 3] = (unsigned char)src_img[(dst_y * dst_width + dst_x) * 3]; + dst_img[(dst_y * dst_width + dst_x) * 3 + 1] = (unsigned char)src_img[(dst_y * dst_width + dst_x) * 3 + 1]; + dst_img[(dst_y * dst_width + dst_x) * 3 + 2] = (unsigned char)src_img[(dst_y * dst_width + dst_x) * 3 + 2];*/ + } + } + } + + cudaError_t PartMemResizeBatch(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB, int count, int* left, int* top, int* right, int* bottom, int *dst_w, int *dst_h, float submeanb, float submeang, float submeanr, + float varianceb, float varianceg, float variancer) + { + /* cudaEvent_t start, stop; + float time; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0);*/ + + dim3 block(32, 16, 1); + dim3 grid((*std::max_element(dst_w, dst_w+ count) + (block.x - 1)) / block.x, (*std::max_element(dst_h, dst_h + count) + (block.y - 1)) / block.y, count); + + int * gpu_left; + cudaMalloc(&gpu_left, 1000 * sizeof(int)); + cudaMemcpy(gpu_left, left, count * sizeof(int), cudaMemcpyHostToDevice); + + int * gpu_right; + cudaMalloc(&gpu_right, 1000 * sizeof(int)); + cudaMemcpy(gpu_right, right, count * sizeof(int), cudaMemcpyHostToDevice); + + int * gpu_top; + cudaMalloc(&gpu_top, 1000 * sizeof(int)); + cudaMemcpy(gpu_top, top, count * sizeof(int), cudaMemcpyHostToDevice); + + int * gpu_bottom; + cudaMalloc(&gpu_bottom, 1000 * sizeof(int)); + cudaMemcpy(gpu_bottom, bottom, count * sizeof(int), cudaMemcpyHostToDevice); + + int * gpu_dst_w; + cudaMalloc(&gpu_dst_w, 1000 * sizeof(int)); + cudaMemcpy(gpu_dst_w, dst_w, count * sizeof(int), cudaMemcpyHostToDevice); + + int * gpu_dst_h; + cudaMalloc(&gpu_dst_h, 1000 * sizeof(int)); + cudaMemcpy(gpu_dst_h, dst_h, count * sizeof(int), cudaMemcpyHostToDevice); + + unsigned char** gpu_dst_rgb; + cudaMalloc(&gpu_dst_rgb, 1000 * sizeof(unsigned char*)); + cudaMemcpy(gpu_dst_rgb, d_dstRGB, count * sizeof(unsigned char*), cudaMemcpyHostToDevice); + + //cudaMemcpy(cpu_personfloat, d_srcRGB, 112*224*2*sizeof(float), cudaMemcpyDeviceToHost); + // for(int i=0;i<100;i++) + // { + // printf("the score is %f\t",cpu_personfloat[i]); + // } + PartCopy_ResizeImgBilinearBGR_Mean_Variance_CUDAKernel << < grid, block >> > ( + d_srcRGB, src_width, src_height, + gpu_left, gpu_top, gpu_right, gpu_bottom, + gpu_dst_rgb, count, gpu_dst_w, gpu_dst_h, + submeanb, submeang, submeanr, + varianceb, varianceg, variancer); + cudaFree(gpu_top); + cudaFree(gpu_bottom); + cudaFree(gpu_left); + cudaFree(gpu_right); + cudaFree(gpu_dst_w); + cudaFree(gpu_dst_h); + cudaFree(gpu_dst_rgb); + + cudaError_t cudaStatus = cudaGetLastError(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "Part 270 kernel_memcopy launch failed: %s\n", cudaGetErrorString(cudaStatus)); + return cudaStatus; + } + cudaStatus = cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_bilinear!\n", cudaStatus); + return cudaStatus; + } + + /*cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + printf("˺ʱ:%f\n", time);*/ + + return cudaStatus; + } + +} \ No newline at end of file diff --git a/src/nvdecoder/RGB2YUV.cu b/src/nvdecoder/RGB2YUV.cu new file mode 100644 index 0000000..7202c3a --- /dev/null +++ b/src/nvdecoder/RGB2YUV.cu @@ -0,0 +1,263 @@ + + +#include "cuda_kernels.h" + +typedef unsigned char uint8; +typedef unsigned int uint32; +typedef int int32; + +namespace cuda_common +{ + __device__ unsigned char clip_value(unsigned char x, unsigned char min_val, unsigned char max_val){ + if (x>max_val){ + return max_val; + } + else if (x= src_width) + return; //x = width - 1; + + if (y >= src_height) + return; // y = height - 1; + + int B = src_img[y * src_width * 3 + x * 3]; + int G = src_img[y * src_width * 3 + x * 3 + 1]; + int R = src_img[y * src_width * 3 + x * 3 + 2]; + + /*int B = src_img[y * src_width + x]; + int G = src_img[src_width * src_height + y * src_width + x]; + int R = src_img[src_width * src_height * 2 + y * src_width + x];*/ + + Y[y * yPitch + x] = clip_value((unsigned char)(0.299 * R + 0.587 * G + 0.114 * B), 0, 255); + u[y * src_width + x] = clip_value((unsigned char)(-0.147 * R - 0.289 * G + 0.436 * B + 128), 0, 255); + v[y * src_width + x] = clip_value((unsigned char)(0.615 * R - 0.515 * G - 0.100 * B + 128), 0, 255); + + //Y[y * yPitch + x] = clip_value((unsigned char)(0.257 * R + 0.504 * G + 0.098 * B + 16), 0, 255); + //u[y * src_width + x] = clip_value((unsigned char)(-0.148 * R - 0.291 * G + 0.439 * B + 128), 0, 255); + //v[y * src_width + x] = clip_value((unsigned char)(0.439 * R - 0.368 * G - 0.071 * B + 128), 0, 255); + } + + __global__ void kernel_rgb2yuv(float *src_img, unsigned char* Y, unsigned char* u, unsigned char* v, + int src_width, int src_height, size_t yPitch) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= src_width) + return; //x = width - 1; + + if (y >= src_height) + return; // y = height - 1; + + float B = src_img[y * src_width + x]; + float G = src_img[src_width * src_height + y * src_width + x]; + float R = src_img[src_width * src_height * 2 + y * src_width + x]; + + Y[y * yPitch + x] = clip_value((unsigned char)(0.299 * R + 0.587 * G + 0.114 * B), 0, 255); + u[y * src_width + x] = clip_value((unsigned char)(-0.147 * R - 0.289 * G + 0.436 * B + 128), 0, 255); + v[y * src_width + x] = clip_value((unsigned char)(0.615 * R - 0.515 * G - 0.100 * B + 128), 0, 255); + + //Y[y * yPitch + x] = clip_value((unsigned char)(0.257 * R + 0.504 * G + 0.098 * B + 16), 0, 255); + //u[y * src_width + x] = clip_value((unsigned char)(-0.148 * R - 0.291 * G + 0.439 * B + 128), 0, 255); + //v[y * src_width + x] = clip_value((unsigned char)(0.439 * R - 0.368 * G - 0.071 * B + 128), 0, 255); + } + + extern "C" + __global__ void kernel_resize_UV(unsigned char* src_img, unsigned char *dst_img, + int src_width, int src_height, int dst_width, int dst_height, int nPitch) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= dst_width) + return; //x = width - 1; + + if (y >= dst_height) + return; // y = height - 1; + + float fx = (x + 0.5)*src_width / (float)dst_width - 0.5; + float fy = (y + 0.5)*src_height / (float)dst_height - 0.5; + int ax = floor(fx); + int ay = floor(fy); + if (ax < 0) + { + ax = 0; + } + else if (ax > src_width - 2) + { + ax = src_width - 2; + } + + if (ay < 0){ + ay = 0; + } + else if (ay > src_height - 2) + { + ay = src_height - 2; + } + + int A = ax + ay*src_width; + int B = ax + ay*src_width + 1; + int C = ax + ay*src_width + src_width; + int D = ax + ay*src_width + src_width + 1; + + float w1, w2, w3, w4; + w1 = fx - ax; + w2 = 1 - w1; + w3 = fy - ay; + w4 = 1 - w3; + + unsigned char val = src_img[A] * w2*w4 + src_img[B] * w1*w4 + src_img[C] * w2*w3 + src_img[D] * w1*w3; + + dst_img[y * nPitch + x] = clip_value(val,0,255); + } + + cudaError_t RGB2YUV(float* d_srcRGB, int src_width, int src_height, + unsigned char* Y, size_t yPitch, int yWidth, int yHeight, + unsigned char* U, size_t uPitch, int uWidth, int uHeight, + unsigned char* V, size_t vPitch, int vWidth, int vHeight) + { + unsigned char * u ; + unsigned char * v ; + + cudaError_t cudaStatus; + + cudaStatus = cudaMalloc((void**)&u, src_width * src_height * sizeof(unsigned char)); + cudaStatus = cudaMalloc((void**)&v, src_width * src_height * sizeof(unsigned char)); + + dim3 block(32, 16, 1); + dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1); + dim3 grid1((uWidth + (block.x - 1)) / block.x, (uHeight + (block.y - 1)) / block.y, 1); + dim3 grid2((vWidth + (block.x - 1)) / block.x, (vHeight + (block.y - 1)) / block.y, 1); + + kernel_rgb2yuv << < grid, block >> >(d_srcRGB, Y, u, v, src_width, src_height, yPitch); + + cudaStatus = cudaGetLastError(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "kernel_rgb2yuv launch failed: %s\n", cudaGetErrorString(cudaStatus)); + goto Error; + } + + cudaStatus = cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_rgb2yuv!\n", cudaStatus); + goto Error; + } + + kernel_resize_UV << < grid1, block >> >(u, U, src_width, src_height, uWidth, uHeight, uPitch); + + cudaStatus = cudaGetLastError(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus)); + goto Error; + } + + cudaStatus = cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus); + goto Error; + } + + kernel_resize_UV << < grid2, block >> >(v, V, src_width, src_height, vWidth, vHeight, vPitch); + + cudaStatus = cudaGetLastError(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus)); + goto Error; + } + + cudaStatus = cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus); + goto Error; + } + +Error : + cudaFree(u); + cudaFree(v); + + return cudaStatus; + } + + + + cudaError_t RGB2YUV(unsigned char* d_srcRGB, int src_width, int src_height, + unsigned char* Y, size_t yPitch, int yWidth, int yHeight, + unsigned char* U, size_t uPitch, int uWidth, int uHeight, + unsigned char* V, size_t vPitch, int vWidth, int vHeight) + { + unsigned char * u; + unsigned char * v; + + cudaError_t cudaStatus; + + cudaStatus = cudaMalloc((void**)&u, src_width * src_height * sizeof(unsigned char)); + cudaStatus = cudaMalloc((void**)&v, src_width * src_height * sizeof(unsigned char)); + + dim3 block(32, 16, 1); + dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1); + dim3 grid1((uWidth + (block.x - 1)) / block.x, (uHeight + (block.y - 1)) / block.y, 1); + dim3 grid2((vWidth + (block.x - 1)) / block.x, (vHeight + (block.y - 1)) / block.y, 1); + + kernel_rgb2yuv << < grid, block >> >(d_srcRGB, Y, u, v, src_width, src_height, yPitch); + + cudaStatus = cudaGetLastError(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "kernel_rgb2yuv launch failed: %s\n", cudaGetErrorString(cudaStatus)); + goto Error; + } + + cudaStatus = cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_rgb2yuv!\n", cudaStatus); + goto Error; + } + + kernel_resize_UV << < grid1, block >> >(u, U, src_width, src_height, uWidth, uHeight, uPitch); + + cudaStatus = cudaGetLastError(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus)); + goto Error; + } + + cudaStatus = cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus); + goto Error; + } + + kernel_resize_UV << < grid2, block >> >(v, V, src_width, src_height, vWidth, vHeight, vPitch); + + cudaStatus = cudaGetLastError(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus)); + goto Error; + } + + cudaStatus = cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus); + goto Error; + } + + Error: + cudaFree(u); + cudaFree(v); + + return cudaStatus; + } +} + diff --git a/src/nvdecoder/ResizeImage.cu b/src/nvdecoder/ResizeImage.cu new file mode 100644 index 0000000..fdc6961 --- /dev/null +++ b/src/nvdecoder/ResizeImage.cu @@ -0,0 +1,84 @@ +#include "cuda_kernels.h" + +typedef unsigned char uchar; +typedef unsigned int uint32; +typedef int int32; + +namespace cuda_common +{ + __global__ void kernel_bilinear(float *src_img, float *dst_img, + int src_width, int src_height, int dst_width, int dst_height) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x < dst_width && y < dst_height) + { + float fx = (x + 0.5)*src_width / (float)dst_width - 0.5; + float fy = (y + 0.5)*src_height / (float)dst_height - 0.5; + int ax = floor(fx); + int ay = floor(fy); + if (ax < 0) + { + ax = 0; + } + else if (ax > src_width - 2) + { + ax = src_width - 2; + } + + if (ay < 0){ + ay = 0; + } + else if (ay > src_height - 2) + { + ay = src_height - 2; + } + + int A = ax + ay*src_width; + int B = ax + ay*src_width + 1; + int C = ax + ay*src_width + src_width; + int D = ax + ay*src_width + src_width + 1; + + float w1, w2, w3, w4; + w1 = fx - ax; + w2 = 1 - w1; + w3 = fy - ay; + w4 = 1 - w3; + + float blue = src_img[A] * w2*w4 + src_img[B] * w1*w4 + src_img[C] * w2*w3 + src_img[D] * w1*w3; + + float green = src_img[src_width * src_height + A] * w2*w4 + src_img[src_width * src_height + B] * w1*w4 + + src_img[src_width * src_height + C] * w2*w3 + src_img[src_width * src_height + D] * w1*w3; + + float red = src_img[src_width * src_height * 2 + A] * w2*w4 + src_img[src_width * src_height * 2 + B] * w1*w4 + + src_img[src_width * src_height * 2 + C] * w2*w3 + src_img[src_width * src_height * 2 + D] * w1*w3; + + dst_img[y * dst_width + x] = blue; + dst_img[dst_width * dst_height + y * dst_width + x] = green; + dst_img[dst_width * dst_height * 2 + y * dst_width + x] = red; + } + } + + cudaError_t ResizeImage(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height) + { + dim3 block(32, 16, 1); + dim3 grid((dst_width + (block.x - 1)) / block.x, (dst_height + (block.y - 1)) / block.y, 1); + + kernel_bilinear << < grid, block >> >(d_srcRGB, d_dstRGB, src_width, src_height, dst_width, dst_height); + + cudaError_t cudaStatus = cudaGetLastError(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "kernel_bilinear launch failed: %s\n", cudaGetErrorString(cudaStatus)); + return cudaStatus; + } + + cudaStatus = cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_bilinear!\n", cudaStatus); + return cudaStatus; + } + + return cudaStatus; + } +} \ No newline at end of file diff --git a/src/nvdecoder/common_header.h b/src/nvdecoder/common_header.h new file mode 100644 index 0000000..d5feed8 --- /dev/null +++ b/src/nvdecoder/common_header.h @@ -0,0 +1,8 @@ +#ifndef _COMMON_HEADER_H_ +#define _COMMON_HEADER_H_ + + +#include "../interface/logger.hpp" +#include "../interface/utiltools.hpp" + +#endif \ No newline at end of file diff --git a/src/nvdecoder/cuda_kernels.h b/src/nvdecoder/cuda_kernels.h new file mode 100644 index 0000000..cd1eb00 --- /dev/null +++ b/src/nvdecoder/cuda_kernels.h @@ -0,0 +1,63 @@ +#pragma once +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + +#include +#include + +#include +#include + +#include + +typedef enum +{ + ITU_601 = 1, + ITU_709 = 2 +} FF_ColorSpace; + +namespace cuda_common +{ + cudaError_t setColorSpace(FF_ColorSpace CSC, float hue); + + cudaError_t NV12ToRGBnot(CUdeviceptr d_srcNV12, size_t nSourcePitch, unsigned char* d_dstRGB, int width, int height); + cudaError_t CUDAToBGR(CUdeviceptr dataY, CUdeviceptr dataUV, size_t pitchY, size_t pitchUV, unsigned char* d_dstRGB, int width, int height); + + + cudaError_t ResizeImage(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height); + + cudaError_t RGB2YUV(float* d_srcRGB, int src_width, int src_height, + unsigned char* Y, size_t yPitch, int yWidth, int yHeight, + unsigned char* U, size_t uPitch, int uWidth, int uHeight, + unsigned char* V, size_t vPitch, int vWidth, int vHeight); + + cudaError_t RGB2YUV(unsigned char* d_srcRGB, int src_width, int src_height, + unsigned char* Y, size_t yPitch, int yWidth, int yHeight, + unsigned char* U, size_t uPitch, int uWidth, int uHeight, + unsigned char* V, size_t vPitch, int vWidth, int vHeight); + + cudaError_t PartMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom); + // cudaError_t PartMemResize(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int left, int top, int right, int bottom); + + cudaError_t PartMemResizeBatch(unsigned char* d_srcRGB, int srcimg_width, int srcimg_height, unsigned char** d_dstRGB, int count, + int* left, int* top, int* right, int* bottom, int *dst_w, int *dst_h, + float submeanb, float submeang, float submeanr, + float varianceb, float varianceg, float variancer); + + cudaError_t DrawImage(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom); + cudaError_t DrawImage(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom); + + cudaError_t DrawLine(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y); +} + + +int jpegNPP(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height); +int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height); + +int jpegNPP(const char *szOutputFile, float* d_srcRGB); +int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB); + +int initTable(); +int initTable(int flag, int width, int height); +int releaseJpegNPP(); + diff --git a/src/nvdecoder/define.hpp b/src/nvdecoder/define.hpp new file mode 100644 index 0000000..2eaafe0 --- /dev/null +++ b/src/nvdecoder/define.hpp @@ -0,0 +1,11 @@ +#pragma once + +#include + + +#define CHECK_CUDA(call) \ +{\ + const cudaError_t error_code = call;\ + if (cudaSuccess != error_code)\ + LOG_ERROR("CUDA error, code: {} reason: {}", error_code, cudaGetErrorString(error_code));\ +} \ No newline at end of file diff --git a/src/nvdecoder/jpegNPP.cpp-1 b/src/nvdecoder/jpegNPP.cpp-1 new file mode 100644 index 0000000..f0bf2e6 --- /dev/null +++ b/src/nvdecoder/jpegNPP.cpp-1 @@ -0,0 +1,1193 @@ +/* +* Copyright 1993-2015 NVIDIA Corporation. All rights reserved. +* +* NOTICE TO USER: +* +* This source code is subject to NVIDIA ownership rights under U.S. and +* international Copyright laws. +* +* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +* OR PERFORMANCE OF THIS SOURCE CODE. +* +* U.S. Government End Users. This source code is a "commercial item" as +* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +* "commercial computer software" and "commercial computer software +* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +* and is provided to the U.S. Government only as a commercial end item. +* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +* source code with only those rights set forth herein. +*/ + +// This sample needs at least CUDA 5.5 and a GPU that has at least Compute Capability 2.0 + +// This sample demonstrates a simple image processing pipeline. +// First, a JPEG file is huffman decoded and inverse DCT transformed and dequantized. +// Then the different planes are resized. Finally, the resized image is quantized, forward +// DCT transformed and huffman encoded. + +#include "cuda_kernels.h" + +#include +#include +#include "common/UtilNPP/Exceptions.h" + +#include "Endianess.h" +#include + +#include +#include +#include + +#include "common/inc/helper_string.h" +#include "common/inc/helper_cuda.h" +//#include "MacroDef.h" +#include "cuda.h" + +using namespace std; + +struct FrameHeader +{ + unsigned char nSamplePrecision; + unsigned short nHeight; + unsigned short nWidth; + unsigned char nComponents; + unsigned char aComponentIdentifier[3]; + unsigned char aSamplingFactors[3]; + unsigned char aQuantizationTableSelector[3]; +}; + +struct ScanHeader +{ + unsigned char nComponents; + unsigned char aComponentSelector[3]; + unsigned char aHuffmanTablesSelector[3]; + unsigned char nSs; + unsigned char nSe; + unsigned char nA; +}; + +struct QuantizationTable +{ + unsigned char nPrecisionAndIdentifier; + unsigned char aTable[64]; +}; + +struct HuffmanTable +{ + unsigned char nClassAndIdentifier; + unsigned char aCodes[16]; + unsigned char aTable[256]; +}; + +//??准?炼??藕?量??模?? +//unsigned char std_Y_QT[64] = +//{ +// 16, 11, 10, 16, 24, 40, 51, 61, +// 12, 12, 14, 19, 26, 58, 60, 55, +// 14, 13, 16, 24, 40, 57, 69, 56, +// 14, 17, 22, 29, 51, 87, 80, 62, +// 18, 22, 37, 56, 68, 109, 103, 77, +// 24, 35, 55, 64, 81, 104, 113, 92, +// 49, 64, 78, 87, 103, 121, 120, 101, +// 72, 92, 95, 98, 112, 100, 103, 99 +//}; +// +////??准色???藕?量??模?? +//unsigned char std_UV_QT[64] = +//{ +// 17, 18, 24, 47, 99, 99, 99, 99, +// 18, 21, 26, 66, 99, 99, 99, 99, +// 24, 26, 56, 99, 99, 99, 99, 99, +// 47, 66, 99, 99, 99, 99, 99, 99, +// 99, 99, 99, 99, 99, 99, 99, 99, +// 99, 99, 99, 99, 99, 99, 99, 99, +// 99, 99, 99, 99, 99, 99, 99, 99, +// 99, 99, 99, 99, 99, 99, 99, 99 +//}; + +////?炼??藕?量??模?? +//unsigned char std_Y_QT[64] = +//{ +// 6, 4, 5, 6, 5, 4, 6, 6, +// 5, 6, 7, 7, 6, 8, 10, 16, +// 10, 10, 9, 9, 10, 20, 14, 15, +// 12, 16, 23, 20, 24, 24, 23, 20, +// 22, 22, 26, 29, 37, 31, 26, 27, +// 35, 28, 22, 22, 32, 44, 32, 35, +// 38, 39, 41, 42, 41, 25, 31, 45, +// 48, 45, 40, 48, 37, 40, 41, 40 +//}; +// +////色???藕?量??模?? +//unsigned char std_UV_QT[64] = +//{ +// 7, 7, 7, 10, 8, 10, 19, 10, +// 10, 19, 40, 26, 22, 26, 40, 40, +// 40, 40, 40, 40, 40, 40, 40, 40, +// 40, 40, 40, 40, 40, 40, 40, 40, +// 40, 40, 40, 40, 40, 40, 40, 40, +// 40, 40, 40, 40, 40, 40, 40, 40, +// 40, 40, 40, 40, 40, 40, 40, 40, +// 40, 40, 40, 40, 40, 40, 40, 40 +//}; + +//?炼??藕?量??模?? +unsigned char std_Y_QT[64] = +{ + 0.75 * 6, 0.75 * 4, 0.75 * 5, 0.75 * 6, 0.75 * 5, 0.75 * 4, 0.75 * 6, 0.75 * 6, + 0.75 * 5, 0.75 * 6, 0.75 * 7, 0.75 * 7, 0.75 * 6, 0.75 * 8, 0.75 * 10, 0.75 * 16, + 0.75 * 10, 0.75 * 10, 0.75 * 9, 0.75 * 9, 0.75 * 10, 0.75 * 20, 0.75 * 14, 0.75 * 15, + 0.75 * 12, 0.75 * 16, 0.75 * 23, 0.75 * 20, 0.75 * 24, 0.75 * 24, 0.75 * 23, 0.75 * 20, + 0.75 * 22, 0.75 * 22, 0.75 * 26, 0.75 * 29, 0.75 * 37, 0.75 * 31, 0.75 * 26, 0.75 * 27, + 0.75 * 35, 0.75 * 28, 0.75 * 22, 0.75 * 22, 0.75 * 32, 0.75 * 44, 0.75 * 32, 0.75 * 35, + 0.75 * 38, 0.75 * 39, 0.75 * 41, 0.75 * 42, 0.75 * 41, 0.75 * 25, 0.75 * 31, 0.75 * 45, + 0.75 * 48, 0.75 * 45, 0.75 * 40, 0.75 * 48, 0.75 * 37, 0.75 * 40, 0.75 * 41, 0.75 * 40 +}; + +//色???藕?量??模?? +unsigned char std_UV_QT[64] = +{ + 0.75 * 7, 0.75 * 7, 0.75 * 7, 0.75 * 10, 0.75 * 8, 0.75 * 10, 0.75 * 19, 0.75 * 10, + 0.75 * 10, 0.75 * 19, 0.75 * 40, 0.75 * 26, 0.75 * 22, 0.75 * 26, 0.75 * 40, 0.75 * 40, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30 +}; + +unsigned char STD_DC_Y_NRCODES[16] = { 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 }; +unsigned char STD_DC_Y_VALUES[12] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; + +unsigned char STD_DC_UV_NRCODES[16] = { 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }; +unsigned char STD_DC_UV_VALUES[12] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; + +unsigned char STD_AC_Y_NRCODES[16] = { 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0X7D }; +unsigned char STD_AC_Y_VALUES[162] = +{ + 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, + 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, + 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08, + 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, + 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, + 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, + 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, + 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, + 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, + 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, + 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, + 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, + 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, + 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, + 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2, + 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, + 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, + 0xf9, 0xfa +}; + +unsigned char STD_AC_UV_NRCODES[16] = { 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0X77 }; +unsigned char STD_AC_UV_VALUES[162] = +{ + 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, + 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, + 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, + 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, + 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34, + 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26, + 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, + 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, + 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, + 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, + 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, + 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, + 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, + 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, + 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, + 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, + 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, + 0xf9, 0xfa +}; + +int DivUp(int x, int d) +{ + return (x + d - 1) / d; +} + +template +void writeAndAdvance(unsigned char *&pData, T nElement) +{ + writeBigEndian(pData, nElement); + pData += sizeof(T); +} + +void writeMarker(unsigned char nMarker, unsigned char *&pData) +{ + *pData++ = 0x0ff; + *pData++ = nMarker; +} + +void writeJFIFTag(unsigned char *&pData) +{ + const char JFIF_TAG[] = + { + 0x4a, 0x46, 0x49, 0x46, 0x00, + 0x01, 0x02, + 0x00, + 0x00, 0x01, 0x00, 0x01, + 0x00, 0x00 + }; + + writeMarker(0x0e0, pData); + writeAndAdvance(pData, sizeof(JFIF_TAG) + sizeof(unsigned short)); + memcpy(pData, JFIF_TAG, sizeof(JFIF_TAG)); + pData += sizeof(JFIF_TAG); +} + +void writeFrameHeader(const FrameHeader &header, unsigned char *&pData) +{ + unsigned char aTemp[128]; + unsigned char *pTemp = aTemp; + + writeAndAdvance(pTemp, header.nSamplePrecision); + writeAndAdvance(pTemp, header.nHeight); + writeAndAdvance(pTemp, header.nWidth); + writeAndAdvance(pTemp, header.nComponents); + + for (int c = 0; c(pTemp, header.aComponentIdentifier[c]); + writeAndAdvance(pTemp, header.aSamplingFactors[c]); + writeAndAdvance(pTemp, header.aQuantizationTableSelector[c]); + } + + unsigned short nLength = (unsigned short)(pTemp - aTemp); + + writeMarker(0x0C0, pData); + writeAndAdvance(pData, nLength + 2); + memcpy(pData, aTemp, nLength); + pData += nLength; +} + +void writeScanHeader(const ScanHeader &header, unsigned char *&pData) +{ + unsigned char aTemp[128]; + unsigned char *pTemp = aTemp; + + writeAndAdvance(pTemp, header.nComponents); + + for (int c = 0; c(pTemp, header.aComponentSelector[c]); + writeAndAdvance(pTemp, header.aHuffmanTablesSelector[c]); + } + + writeAndAdvance(pTemp, header.nSs); + writeAndAdvance(pTemp, header.nSe); + writeAndAdvance(pTemp, header.nA); + + unsigned short nLength = (unsigned short)(pTemp - aTemp); + + writeMarker(0x0DA, pData); + writeAndAdvance(pData, nLength + 2); + memcpy(pData, aTemp, nLength); + pData += nLength; +} + +void writeQuantizationTable(const QuantizationTable &table, unsigned char *&pData) +{ + writeMarker(0x0DB, pData); + writeAndAdvance(pData, sizeof(QuantizationTable) + 2); + memcpy(pData, &table, sizeof(QuantizationTable)); + pData += sizeof(QuantizationTable); +} + +void writeHuffmanTable(const HuffmanTable &table, unsigned char *&pData) +{ + writeMarker(0x0C4, pData); + + // Number of Codes for Bit Lengths [1..16] + int nCodeCount = 0; + + for (int i = 0; i < 16; ++i) + { + nCodeCount += table.aCodes[i]; + } + + writeAndAdvance(pData, 17 + nCodeCount + 2); + memcpy(pData, &table, 17 + nCodeCount); + pData += 17 + nCodeCount; +} + +bool printfNPPinfo(int cudaVerMajor, int cudaVerMinor) +{ + const NppLibraryVersion *libVer = nppGetLibVersion(); + + printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build); + + int driverVersion, runtimeVersion; + cudaDriverGetVersion(&driverVersion); + cudaRuntimeGetVersion(&runtimeVersion); + + printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000, (driverVersion % 100) / 10); + printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000, (runtimeVersion % 100) / 10); + + bool bVal = checkCudaCapabilities(cudaVerMajor, cudaVerMinor); + return bVal; +} + +NppiDCTState *pDCTState; +FrameHeader oFrameHeader; +FrameHeader oFrameHeaderFixedSize; +ScanHeader oScanHeader; +QuantizationTable aQuantizationTables[4]; +Npp8u *pdQuantizationTables; +HuffmanTable aHuffmanTables[4]; +HuffmanTable *pHuffmanDCTables; +HuffmanTable *pHuffmanACTables; +int nMCUBlocksH; +int nMCUBlocksV; +int nMCUBlocksHFixedSize; +int nMCUBlocksVFixedSize; +Npp8u *pdScan; +NppiEncodeHuffmanSpec *apHuffmanDCTable[3]; +NppiEncodeHuffmanSpec *apHuffmanACTable[3]; +unsigned char *pDstJpeg; +unsigned char *pDstOutput; +int nRestartInterval; + +int initTable() +{ + NPP_CHECK_NPP(nppiDCTInitAlloc(&pDCTState)); + + nRestartInterval = -1; + + cudaMalloc(&pdQuantizationTables, 64 * 4); + pHuffmanDCTables = aHuffmanTables; + pHuffmanACTables = &aHuffmanTables[2]; + memset(aQuantizationTables, 0, 4 * sizeof(QuantizationTable)); + memset(aHuffmanTables, 0, 4 * sizeof(HuffmanTable)); + memset(&oFrameHeader, 0, sizeof(FrameHeader)); + + + //????Huffman?? + aHuffmanTables[0].nClassAndIdentifier = 0; + memcpy(aHuffmanTables[0].aCodes, STD_DC_Y_NRCODES, 16); + memcpy(aHuffmanTables[0].aTable, STD_DC_Y_VALUES, 12); + + aHuffmanTables[1].nClassAndIdentifier = 1; + memcpy(aHuffmanTables[1].aCodes, STD_DC_UV_NRCODES, 16); + memcpy(aHuffmanTables[1].aTable, STD_DC_UV_VALUES, 12); + + aHuffmanTables[2].nClassAndIdentifier = 16; + memcpy(aHuffmanTables[2].aCodes, STD_AC_Y_NRCODES, 16); + memcpy(aHuffmanTables[2].aTable, STD_AC_Y_VALUES, 162); + + aHuffmanTables[3].nClassAndIdentifier = 17; + memcpy(aHuffmanTables[3].aCodes, STD_AC_UV_NRCODES, 16); + memcpy(aHuffmanTables[3].aTable, STD_AC_UV_VALUES, 162); + + + //????量???? + aQuantizationTables[0].nPrecisionAndIdentifier = 0; + memcpy(aQuantizationTables[0].aTable, std_Y_QT, 64); + aQuantizationTables[1].nPrecisionAndIdentifier = 1; + memcpy(aQuantizationTables[1].aTable, std_UV_QT, 64); + + NPP_CHECK_CUDA(cudaMemcpyAsync(pdQuantizationTables, aQuantizationTables[0].aTable, 64, cudaMemcpyHostToDevice)); + NPP_CHECK_CUDA(cudaMemcpyAsync(pdQuantizationTables + 64, aQuantizationTables[1].aTable, 64, cudaMemcpyHostToDevice)); + + oFrameHeader.nSamplePrecision = 8; + oFrameHeader.nComponents = 3; + oFrameHeader.aComponentIdentifier[0] = 1; + oFrameHeader.aComponentIdentifier[1] = 2; + oFrameHeader.aComponentIdentifier[2] = 3; + oFrameHeader.aSamplingFactors[0] = 34; + oFrameHeader.aSamplingFactors[1] = 17; + oFrameHeader.aSamplingFactors[2] = 17; + oFrameHeader.aQuantizationTableSelector[0] = 0; + oFrameHeader.aQuantizationTableSelector[1] = 1; + oFrameHeader.aQuantizationTableSelector[2] = 1; + + for (int i = 0; i < oFrameHeader.nComponents; ++i) + { + nMCUBlocksV = max(nMCUBlocksV, oFrameHeader.aSamplingFactors[i] & 0x0f); + nMCUBlocksH = max(nMCUBlocksH, oFrameHeader.aSamplingFactors[i] >> 4); + } + NPP_CHECK_CUDA(cudaMalloc(&pdScan, 4 << 20)); + + + + oScanHeader.nComponents = 3; + oScanHeader.aComponentSelector[0] = 1; + oScanHeader.aComponentSelector[1] = 2; + oScanHeader.aComponentSelector[2] = 3; + oScanHeader.aHuffmanTablesSelector[0] = 0; + oScanHeader.aHuffmanTablesSelector[1] = 17; + oScanHeader.aHuffmanTablesSelector[2] = 17; + oScanHeader.nSs = 0; + oScanHeader.nSe = 63; + oScanHeader.nA = 0; + + + return 0; +} + +NppiSize aSrcSize[3]; +Npp16s *apdDCT[3];// = { 0, 0, 0 }; +Npp32s aDCTStep[3]; + +Npp8u *apSrcImage[3];// = { 0, 0, 0 }; +Npp32s aSrcImageStep[3]; +size_t aSrcPitch[3]; + + +int releaseJpegNPP() +{ + nppiDCTFree(pDCTState); + cudaFree(pdQuantizationTables); + cudaFree(pdScan); + for (int i = 0; i < 3; ++i) + { + cudaFree(apdDCT[i]); + cudaFree(apSrcImage[i]); + } + return 0; +} + + +int initTable(int flag, int width, int height) +{ + //????帧头 + oFrameHeaderFixedSize.nSamplePrecision = 8; + oFrameHeaderFixedSize.nComponents = 3; + oFrameHeaderFixedSize.aComponentIdentifier[0] = 1; + oFrameHeaderFixedSize.aComponentIdentifier[1] = 2; + oFrameHeaderFixedSize.aComponentIdentifier[2] = 3; + oFrameHeaderFixedSize.aSamplingFactors[0] = 34; + oFrameHeaderFixedSize.aSamplingFactors[1] = 17; + oFrameHeaderFixedSize.aSamplingFactors[2] = 17; + oFrameHeaderFixedSize.aQuantizationTableSelector[0] = 0; + oFrameHeaderFixedSize.aQuantizationTableSelector[1] = 1; + oFrameHeaderFixedSize.aQuantizationTableSelector[2] = 1; + oFrameHeaderFixedSize.nWidth = width; + oFrameHeaderFixedSize.nHeight = height; + + for (int i = 0; i < oFrameHeaderFixedSize.nComponents; ++i) + { + nMCUBlocksVFixedSize = max(nMCUBlocksVFixedSize, oFrameHeaderFixedSize.aSamplingFactors[i] & 0x0f); + nMCUBlocksHFixedSize = max(nMCUBlocksHFixedSize, oFrameHeaderFixedSize.aSamplingFactors[i] >> 4); + } + + for (int i = 0; i < oFrameHeaderFixedSize.nComponents; ++i) + { + NppiSize oBlocks; + NppiSize oBlocksPerMCU = { oFrameHeaderFixedSize.aSamplingFactors[i] >> 4, oFrameHeaderFixedSize.aSamplingFactors[i] & 0x0f }; + + oBlocks.width = (int)ceil((oFrameHeaderFixedSize.nWidth + 7) / 8 * + static_cast(oBlocksPerMCU.width) / nMCUBlocksHFixedSize); + oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width; + + oBlocks.height = (int)ceil((oFrameHeaderFixedSize.nHeight + 7) / 8 * + static_cast(oBlocksPerMCU.height) / nMCUBlocksVFixedSize); + oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height; + + aSrcSize[i].width = oBlocks.width * 8; + aSrcSize[i].height = oBlocks.height * 8; + + // Allocate Memory + size_t nPitch; + NPP_CHECK_CUDA(cudaMallocPitch(&apdDCT[i], &nPitch, oBlocks.width * 64 * sizeof(Npp16s), oBlocks.height)); + aDCTStep[i] = static_cast(nPitch); + + NPP_CHECK_CUDA(cudaMallocPitch(&apSrcImage[i], &nPitch, aSrcSize[i].width, aSrcSize[i].height)); + + aSrcPitch[i] = nPitch; + aSrcImageStep[i] = static_cast(nPitch); + } + + return 0; +} + +int jpegNPP(const char *szOutputFile, float* d_srcRGB) +{ + //RGB2YUV + cudaError_t cudaStatus; + cudaStatus = cuda_common::RGB2YUV(d_srcRGB, oFrameHeaderFixedSize.nWidth, oFrameHeaderFixedSize.nHeight, + apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height, + apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height, + apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height); + + /** + * Forward DCT, quantization and level shift part of the JPEG encoding. + * Input is expected in 8x8 macro blocks and output is expected to be in 64x1 + * macro blocks. The new version of the primitive takes the ROI in image pixel size and + * works with DCT coefficients that are in zig-zag order. + */ + int k = 0; + //LOG_INFO("NPP_CHECK_NPP:%d", 1); + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0], + apdDCT[0], aDCTStep[0], + pdQuantizationTables + k * 64, + aSrcSize[0], + pDCTState))) + { + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); + return EXIT_FAILURE; + } + + k = 1; + //LOG_INFO("NPP_CHECK_NPP:%d", 2); + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1], + apdDCT[1], aDCTStep[1], + pdQuantizationTables + k * 64, + aSrcSize[1], + pDCTState))) + { + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); + return EXIT_FAILURE; + } + + //LOG_INFO("NPP_CHECK_NPP:%d", 3); + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2], + apdDCT[2], aDCTStep[2], + pdQuantizationTables + k * 64, + aSrcSize[2], + pDCTState))) + { + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); + return EXIT_FAILURE; + } + + // Huffman Encoding + + Npp32s nScanLength; + Npp8u *pJpegEncoderTemp; + +#if (CUDA_VERSION == 8000) + Npp32s nTempSize; //when using CUDA8 +#else + size_t nTempSize; //when using CUDA9 +#endif + //modified by Junlin 190221 + + //LOG_INFO("NPP_CHECK_NPP:%d",4); + if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize))) + { + printf("nppiEncodeHuffmanGetSize Failed!\n"); + return EXIT_FAILURE; + } + + //LOG_INFO("NPP_CHECK_CUDA:%d",5); + NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize)); + + /** + * Allocates memory and creates a Huffman table in a format that is suitable for the encoder. + */ + NppStatus t_status; + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]); + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]); + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]); + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]); + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]); + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]); + + /** + * Huffman Encoding of the JPEG Encoding. + * Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan. + */ + Npp32s nSs = 0; + Npp32s nSe = 63; + Npp32s nH = 0; + Npp32s nL = 0; + //LOG_INFO("NPP_CHECK_NPP:%d",6); + if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep, + 0, nSs, nSe, nH, nL, + pdScan, &nScanLength, + apHuffmanDCTable, + apHuffmanACTable, + aSrcSize, + pJpegEncoderTemp))) + { + printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n"); + return EXIT_FAILURE; + } + + for (int i = 0; i < 3; ++i) + { + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]); + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]); + } + // Write JPEG + pDstJpeg = new unsigned char[4 << 20]{}; + pDstOutput = pDstJpeg; + + writeMarker(0x0D8, pDstOutput); + writeJFIFTag(pDstOutput); + writeQuantizationTable(aQuantizationTables[0], pDstOutput); + writeQuantizationTable(aQuantizationTables[1], pDstOutput); + writeHuffmanTable(pHuffmanDCTables[0], pDstOutput); + writeHuffmanTable(pHuffmanACTables[0], pDstOutput); + writeHuffmanTable(pHuffmanDCTables[1], pDstOutput); + writeHuffmanTable(pHuffmanACTables[1], pDstOutput); + writeFrameHeader(oFrameHeaderFixedSize, pDstOutput); + writeScanHeader(oScanHeader, pDstOutput); + + //LOG_INFO("NPP_CHECK_CUDA:%d",7); + NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost)); + + pDstOutput += nScanLength; + writeMarker(0x0D9, pDstOutput); + { + // Write result to file. + std::ofstream outputFile(szOutputFile, ios::out | ios::binary); + outputFile.write(reinterpret_cast(pDstJpeg), static_cast(pDstOutput - pDstJpeg)); + } + + // Cleanup + cudaFree(pJpegEncoderTemp); + delete[] pDstJpeg; + + + return EXIT_SUCCESS; +} + +int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB) +{ + //RGB2YUV + cudaError_t cudaStatus; + cudaStatus = cuda_common::RGB2YUV(d_srcRGB, oFrameHeaderFixedSize.nWidth, oFrameHeaderFixedSize.nHeight, + apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height, + apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height, + apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height); + + /** + * Forward DCT, quantization and level shift part of the JPEG encoding. + * Input is expected in 8x8 macro blocks and output is expected to be in 64x1 + * macro blocks. The new version of the primitive takes the ROI in image pixel size and + * works with DCT coefficients that are in zig-zag order. + */ + int k = 0; + //LOG_INFO("NPP_CHECK_NPP:%d", 1); + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0], + apdDCT[0], aDCTStep[0], + pdQuantizationTables + k * 64, + aSrcSize[0], + pDCTState))) + { + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); + return EXIT_FAILURE; + } + + k = 1; + //LOG_INFO("NPP_CHECK_NPP:%d", 2); + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1], + apdDCT[1], aDCTStep[1], + pdQuantizationTables + k * 64, + aSrcSize[1], + pDCTState))) + { + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); + return EXIT_FAILURE; + } + + //LOG_INFO("NPP_CHECK_NPP:%d", 3); + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2], + apdDCT[2], aDCTStep[2], + pdQuantizationTables + k * 64, + aSrcSize[2], + pDCTState))) + { + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); + return EXIT_FAILURE; + } + + // Huffman Encoding + + Npp32s nScanLength; + Npp8u *pJpegEncoderTemp; + +#if (CUDA_VERSION == 8000) + Npp32s nTempSize; //when using CUDA8 +#else + size_t nTempSize; //when using CUDA9 +#endif + //modified by Junlin 190221 + + //LOG_INFO("NPP_CHECK_NPP:%d",4); + if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize))) + { + printf("nppiEncodeHuffmanGetSize Failed!\n"); + return EXIT_FAILURE; + } + + //LOG_INFO("NPP_CHECK_CUDA:%d",5); + NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize)); + + /** + * Allocates memory and creates a Huffman table in a format that is suitable for the encoder. + */ + NppStatus t_status; + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]); + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]); + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]); + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]); + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]); + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]); + + /** + * Huffman Encoding of the JPEG Encoding. + * Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan. + */ + Npp32s nSs = 0; + Npp32s nSe = 63; + Npp32s nH = 0; + Npp32s nL = 0; + //LOG_INFO("NPP_CHECK_NPP:%d",6); + if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep, + 0, nSs, nSe, nH, nL, + pdScan, &nScanLength, + apHuffmanDCTable, + apHuffmanACTable, + aSrcSize, + pJpegEncoderTemp))) + { + printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n"); + return EXIT_FAILURE; + } + + for (int i = 0; i < 3; ++i) + { + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]); + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]); + } + // Write JPEG + pDstJpeg = new unsigned char[4 << 20]{}; + pDstOutput = pDstJpeg; + + writeMarker(0x0D8, pDstOutput); + writeJFIFTag(pDstOutput); + writeQuantizationTable(aQuantizationTables[0], pDstOutput); + writeQuantizationTable(aQuantizationTables[1], pDstOutput); + writeHuffmanTable(pHuffmanDCTables[0], pDstOutput); + writeHuffmanTable(pHuffmanACTables[0], pDstOutput); + writeHuffmanTable(pHuffmanDCTables[1], pDstOutput); + writeHuffmanTable(pHuffmanACTables[1], pDstOutput); + writeFrameHeader(oFrameHeaderFixedSize, pDstOutput); + writeScanHeader(oScanHeader, pDstOutput); + + //LOG_INFO("NPP_CHECK_CUDA:%d",7); + NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost)); + + pDstOutput += nScanLength; + writeMarker(0x0D9, pDstOutput); + { + // Write result to file. + std::ofstream outputFile(szOutputFile, ios::out | ios::binary); + outputFile.write(reinterpret_cast(pDstJpeg), static_cast(pDstOutput - pDstJpeg)); + } + + // Cleanup + cudaFree(pJpegEncoderTemp); + delete[] pDstJpeg; + + + return EXIT_SUCCESS; +} + + +int jpegNPP(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height) +{ + NppiSize aSrcSize[3]; + Npp16s *apdDCT[3] = { 0, 0, 0 }; + Npp32s aDCTStep[3]; + + Npp8u *apSrcImage[3] = { 0, 0, 0 }; + Npp32s aSrcImageStep[3]; + size_t aSrcPitch[3]; + + + //????帧头 + oFrameHeader.nWidth = img_width; + oFrameHeader.nHeight = img_height; + + for (int i = 0; i < oFrameHeader.nComponents; ++i) + { + NppiSize oBlocks; + NppiSize oBlocksPerMCU = { oFrameHeader.aSamplingFactors[i] >> 4, oFrameHeader.aSamplingFactors[i] & 0x0f }; + + oBlocks.width = (int)ceil((oFrameHeader.nWidth + 7) / 8 * + static_cast(oBlocksPerMCU.width) / nMCUBlocksH); + oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width; + + oBlocks.height = (int)ceil((oFrameHeader.nHeight + 7) / 8 * + static_cast(oBlocksPerMCU.height) / nMCUBlocksV); + oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height; + + aSrcSize[i].width = oBlocks.width * 8; + aSrcSize[i].height = oBlocks.height * 8; + + // Allocate Memory + size_t nPitch; + //LOG_INFO("NPP_CHECK_CUDA:%d",1); + NPP_CHECK_CUDA(cudaMallocPitch(&apdDCT[i], &nPitch, oBlocks.width * 64 * sizeof(Npp16s), oBlocks.height)); + aDCTStep[i] = static_cast(nPitch); + + //LOG_INFO("NPP_CHECK_CUDA:%d",2); + NPP_CHECK_CUDA(cudaMallocPitch(&apSrcImage[i], &nPitch, aSrcSize[i].width, aSrcSize[i].height)); + + aSrcPitch[i] = nPitch; + aSrcImageStep[i] = static_cast(nPitch); + } + + //RGB2YUV + cudaError_t cudaStatus; + cudaStatus = cuda_common::RGB2YUV(d_srcRGB, img_width, img_height, + apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height, + apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height, + apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height); + + /** + * Forward DCT, quantization and level shift part of the JPEG encoding. + * Input is expected in 8x8 macro blocks and output is expected to be in 64x1 + * macro blocks. The new version of the primitive takes the ROI in image pixel size and + * works with DCT coefficients that are in zig-zag order. + */ + int k = 0; + //LOG_INFO("NPP_CHECK_CUDA:%d",3); + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0], + apdDCT[0], aDCTStep[0], + pdQuantizationTables + k * 64, + aSrcSize[0], + pDCTState))) + { + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); + return EXIT_FAILURE; + } + k = 1; + + //LOG_INFO("NPP_CHECK_CUDA:%d",4); + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1], + apdDCT[1], aDCTStep[1], + pdQuantizationTables + k * 64, + aSrcSize[1], + pDCTState))) + { + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); + return EXIT_FAILURE; + } + + //LOG_INFO("NPP_CHECK_CUDA:%d",5); + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2], + apdDCT[2], aDCTStep[2], + pdQuantizationTables + k * 64, + aSrcSize[2], + pDCTState))) + { + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); + return EXIT_FAILURE; + } + + // Huffman Encoding + + Npp32s nScanLength; + Npp8u *pJpegEncoderTemp; + +#if (CUDA_VERSION == 8000) + Npp32s nTempSize; //when using CUDA8 +#else + size_t nTempSize; //when using CUDA9 +#endif + //modified by Junlin 190221 + + //LOG_INFO("NPP_CHECK_CUDA:%d",6); + if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize))) + { + printf("nppiEncodeHuffmanGetSize Failed!\n"); + return EXIT_FAILURE; + } + + //LOG_INFO("NPP_CHECK_CUDA:%d",7); + NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize)); + + /** + * Allocates memory and creates a Huffman table in a format that is suitable for the encoder. + */ + NppStatus t_status; + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]); + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]); + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]); + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]); + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]); + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]); + + /** + * Huffman Encoding of the JPEG Encoding. + * Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan. + */ + Npp32s nSs = 0; + Npp32s nSe = 63; + Npp32s nH = 0; + Npp32s nL = 0; + //LOG_INFO("NPP_CHECK_CUDA:%d",8); + if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep, + 0, nSs, nSe, nH, nL, + pdScan, &nScanLength, + apHuffmanDCTable, + apHuffmanACTable, + aSrcSize, + pJpegEncoderTemp))) + { + printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n"); + return EXIT_FAILURE; + } + + for (int i = 0; i < 3; ++i) + { + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]); + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]); + } + // Write JPEG + pDstJpeg = new unsigned char[4 << 20]{}; + pDstOutput = pDstJpeg; + + writeMarker(0x0D8, pDstOutput); + writeJFIFTag(pDstOutput); + writeQuantizationTable(aQuantizationTables[0], pDstOutput); + writeQuantizationTable(aQuantizationTables[1], pDstOutput); + writeHuffmanTable(pHuffmanDCTables[0], pDstOutput); + writeHuffmanTable(pHuffmanACTables[0], pDstOutput); + writeHuffmanTable(pHuffmanDCTables[1], pDstOutput); + writeHuffmanTable(pHuffmanACTables[1], pDstOutput); + writeFrameHeader(oFrameHeader, pDstOutput); + writeScanHeader(oScanHeader, pDstOutput); + + //LOG_INFO("NPP_CHECK_CUDA:%d",9); + NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost)); + + pDstOutput += nScanLength; + writeMarker(0x0D9, pDstOutput); + + { + // Write result to file. + std::ofstream outputFile(szOutputFile, ios::out | ios::binary); + outputFile.write(reinterpret_cast(pDstJpeg), static_cast(pDstOutput - pDstJpeg)); + } + + // Cleanup + cudaFree(pJpegEncoderTemp); + delete[] pDstJpeg; + for (int i = 0; i < 3; ++i) + { + cudaFree(apdDCT[i]); + cudaFree(apSrcImage[i]); + } + + return EXIT_SUCCESS; +} + + +int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height) +{ + NppiSize aSrcSize[3]; + Npp16s *apdDCT[3] = { 0, 0, 0 }; + Npp32s aDCTStep[3]; + + Npp8u *apSrcImage[3] = { 0, 0, 0 }; + Npp32s aSrcImageStep[3]; + size_t aSrcPitch[3]; + + + //????帧头 + oFrameHeader.nWidth = img_width; + oFrameHeader.nHeight = img_height; + + for (int i = 0; i < oFrameHeader.nComponents; ++i) + { + NppiSize oBlocks; + NppiSize oBlocksPerMCU = { oFrameHeader.aSamplingFactors[i] >> 4, oFrameHeader.aSamplingFactors[i] & 0x0f }; + + oBlocks.width = (int)ceil((oFrameHeader.nWidth + 7) / 8 * + static_cast(oBlocksPerMCU.width) / nMCUBlocksH); + oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width; + + oBlocks.height = (int)ceil((oFrameHeader.nHeight + 7) / 8 * + static_cast(oBlocksPerMCU.height) / nMCUBlocksV); + oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height; + + aSrcSize[i].width = oBlocks.width * 8; + aSrcSize[i].height = oBlocks.height * 8; + + // Allocate Memory + size_t nPitch; + //LOG_INFO("NPP_CHECK_CUDA:%d",1); + NPP_CHECK_CUDA(cudaMallocPitch(&apdDCT[i], &nPitch, oBlocks.width * 64 * sizeof(Npp16s), oBlocks.height)); + aDCTStep[i] = static_cast(nPitch); + + //LOG_INFO("NPP_CHECK_CUDA:%d",2); + NPP_CHECK_CUDA(cudaMallocPitch(&apSrcImage[i], &nPitch, aSrcSize[i].width, aSrcSize[i].height)); + + aSrcPitch[i] = nPitch; + aSrcImageStep[i] = static_cast(nPitch); + } + + //RGB2YUV + cudaError_t cudaStatus; + cudaStatus = cuda_common::RGB2YUV(d_srcRGB, img_width, img_height, + apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height, + apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height, + apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height); + + /** + * Forward DCT, quantization and level shift part of the JPEG encoding. + * Input is expected in 8x8 macro blocks and output is expected to be in 64x1 + * macro blocks. The new version of the primitive takes the ROI in image pixel size and + * works with DCT coefficients that are in zig-zag order. + */ + int k = 0; + //LOG_INFO("NPP_CHECK_CUDA:%d",3); + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0], + apdDCT[0], aDCTStep[0], + pdQuantizationTables + k * 64, + aSrcSize[0], + pDCTState))) + { + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); + return EXIT_FAILURE; + } + k = 1; + + //LOG_INFO("NPP_CHECK_CUDA:%d",4); + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1], + apdDCT[1], aDCTStep[1], + pdQuantizationTables + k * 64, + aSrcSize[1], + pDCTState))) + { + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); + return EXIT_FAILURE; + } + + //LOG_INFO("NPP_CHECK_CUDA:%d",5); + if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2], + apdDCT[2], aDCTStep[2], + pdQuantizationTables + k * 64, + aSrcSize[2], + pDCTState))) + { + printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n"); + return EXIT_FAILURE; + } + + // Huffman Encoding + + Npp32s nScanLength; + Npp8u *pJpegEncoderTemp; + +#if (CUDA_VERSION == 8000) + Npp32s nTempSize; //when using CUDA8 +#else + size_t nTempSize; //when using CUDA9 +#endif + //modified by Junlin 190221 + + //LOG_INFO("NPP_CHECK_CUDA:%d",6); + if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize))) + { + printf("nppiEncodeHuffmanGetSize Failed!\n"); + return EXIT_FAILURE; + } + + //LOG_INFO("NPP_CHECK_CUDA:%d",7); + NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize)); + + /** + * Allocates memory and creates a Huffman table in a format that is suitable for the encoder. + */ + NppStatus t_status; + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]); + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]); + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]); + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]); + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]); + t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]); + + /** + * Huffman Encoding of the JPEG Encoding. + * Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan. + */ + Npp32s nSs = 0; + Npp32s nSe = 63; + Npp32s nH = 0; + Npp32s nL = 0; + //LOG_INFO("NPP_CHECK_CUDA:%d",8); + if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep, + 0, nSs, nSe, nH, nL, + pdScan, &nScanLength, + apHuffmanDCTable, + apHuffmanACTable, + aSrcSize, + pJpegEncoderTemp))) + { + printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n"); + return EXIT_FAILURE; + } + + for (int i = 0; i < 3; ++i) + { + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]); + nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]); + } + // Write JPEG + pDstJpeg = new unsigned char[4 << 20]{}; + pDstOutput = pDstJpeg; + + writeMarker(0x0D8, pDstOutput); + writeJFIFTag(pDstOutput); + writeQuantizationTable(aQuantizationTables[0], pDstOutput); + writeQuantizationTable(aQuantizationTables[1], pDstOutput); + writeHuffmanTable(pHuffmanDCTables[0], pDstOutput); + writeHuffmanTable(pHuffmanACTables[0], pDstOutput); + writeHuffmanTable(pHuffmanDCTables[1], pDstOutput); + writeHuffmanTable(pHuffmanACTables[1], pDstOutput); + writeFrameHeader(oFrameHeader, pDstOutput); + writeScanHeader(oScanHeader, pDstOutput); + + //LOG_INFO("NPP_CHECK_CUDA:%d",9); + NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost)); + + pDstOutput += nScanLength; + writeMarker(0x0D9, pDstOutput); + + { + // Write result to file. + std::ofstream outputFile(szOutputFile, ios::out | ios::binary); + outputFile.write(reinterpret_cast(pDstJpeg), static_cast(pDstOutput - pDstJpeg)); + } + + // Cleanup + cudaFree(pJpegEncoderTemp); + delete[] pDstJpeg; + for (int i = 0; i < 3; ++i) + { + cudaFree(apdDCT[i]); + cudaFree(apSrcImage[i]); + } + + return EXIT_SUCCESS; +} diff --git a/src/utiltools.hpp b/src/utiltools.hpp deleted file mode 100644 index 8caff91..0000000 --- a/src/utiltools.hpp +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef _UTIL_TOOLS_HPP_ -#define _UTIL_TOOLS_HPP_ - -#include - -using namespace std; - -namespace UtilTools{ - - static long get_cur_time_ms() { - chrono::time_point tpMicro - = chrono::time_point_cast(chrono::system_clock::now()); - return tpMicro.time_since_epoch().count(); - } - -} - -#endif \ No newline at end of file -- libgit2 0.21.4