From 63e6f7bc581e7aa20757a0da8490155ef38032e6 Mon Sep 17 00:00:00 2001
From: fiss <2657262686@qq.com>
Date: Thu, 16 Mar 2023 07:02:38 +0000
Subject: [PATCH] 完成dvpp。但是nv和gb28181的代码弄乱了，需要重构代码

---
 .vscode/launch.json                  |   44 ++++----------------------------------------
 .vscode/settings.json                |   10 +++++++++-
 README.md                            |    7 ++++++-
 src/AbstractDecoder.cpp              |  114 ------------------------------------------------------------------------------------------------------------------
 src/AbstractDecoder.h                |  128 --------------------------------------------------------------------------------------------------------------------------------
 src/DrawImageOnGPU.cu                |  126 ------------------------------------------------------------------------------------------------------------------------------
 src/FFCuContextManager.cpp           |   29 -----------------------------
 src/FFCuContextManager.h             |   37 -------------------------------------
 src/FFNvDecoder.cpp                  |  388 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 src/FFNvDecoder.h                    |   62 --------------------------------------------------------------
 src/FFNvDecoderManager.cpp           |  600 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 src/FFNvDecoderManager.h             |  268 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 src/GpuRgbMemory.hpp                 |   86 --------------------------------------------------------------------------------------
 src/ImageSaveGPU.cpp                 |  123 ---------------------------------------------------------------------------------------------------------------------------
 src/ImageSaveGPU.h                   |   65 -----------------------------------------------------------------
 src/Makefile                         |   91 +++++++++++++++++++++++++++++++++++++++++++------------------------------------------------
 src/Makefile.bak                     |   71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/Makefile.bak0308                 |   62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/NV12ToRGB.cu                     |  345 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 src/NvJpegEncoder.cpp                |   90 ------------------------------------------------------------------------------------------
 src/NvJpegEncoder.h                  |    3 ---
 src/PartMemCopy.cu                   |  289 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 src/RGB2YUV.cu                       |  263 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 src/ResizeImage.cu                   |   84 ------------------------------------------------------------------------------------
 src/cuda_kernels.h                   |   63 ---------------------------------------------------------------
 src/define.hpp                       |   13 -------------
 src/demo/Makefile                    |   61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/demo/Makefile.BK0308             |   43 +++++++++++++++++++++++++++++++++++++++++++
 src/demo/main_dvpp.cpp               |  349 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/demo/main_nvdec.cpp1             |  452 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/dvpp/CircularQueue.hpp           |  138 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/dvpp/DvppDec.cpp                 |  421 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/dvpp/DvppDec.h                   |   80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/dvpp/DvppDecoder.cpp             |  640 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/dvpp/DvppDecoder.h               |  111 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/dvpp/DvppDecoder2.h              |  192 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/dvpp/DvppDecoderApi.cpp          |  133 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/dvpp/DvppDecoderApi.h            |   44 ++++++++++++++++++++++++++++++++++++++++++++
 src/dvpp/DvppRgbMemory.hpp           |   25 +++++++++++++++++++++++++
 src/dvpp/DvppSourceManager.cpp       |   63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/dvpp/DvppSourceManager.h         |   36 ++++++++++++++++++++++++++++++++++++
 src/dvpp/FFReceiver.cpp              |  281 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/dvpp/FFReceiver.h                |   81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/dvpp/Makefile                    |   66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/dvpp/VpcPicConverter.cpp         |   83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/dvpp/VpcPicConverter.h           |   19 +++++++++++++++++++
 src/dvpp/depend_headers.h            |   38 ++++++++++++++++++++++++++++++++++++++
 src/dvpp/dvpp_headers.h              |   31 +++++++++++++++++++++++++++++++
 src/dvpp/threadsafe_queue.h          |  128 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/dvpp/user_mem.h                  |   33 +++++++++++++++++++++++++++++++++
 src/gb28181/FFGB28181Decoder.cpp     |   89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/gb28181/FFGB28181Decoder.h       |    6 ++++++
 src/gb28181/Makefile                 |   53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/gb28181/common_header.h          |    4 ++--
 src/interface/AbstractDecoder.cpp    |   25 +++++++++++++++++++++++++
 src/interface/AbstractDecoder.h      |   54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/interface/DeviceRgbMemory.hpp    |   86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/interface/FFNvDecoderManager.cpp |  494 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/interface/FFNvDecoderManager.h   |  261 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/interface/Makefile               |   55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/interface/interface_headers.h    |   59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/interface/logger.hpp             |  344 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/interface/utiltools.hpp          |   18 ++++++++++++++++++
 src/jpegNPP.cpp-1                    | 1193 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 src/logger.hpp                       |  342 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 src/main.cpp                         |  452 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 src/nvdecoder/DrawImageOnGPU.cu      |  126 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/nvdecoder/FFCuContextManager.cpp |   29 +++++++++++++++++++++++++++++
 src/nvdecoder/FFCuContextManager.h   |   37 +++++++++++++++++++++++++++++++++++++
 src/nvdecoder/FFNvDecoder.cpp        |  474 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/nvdecoder/FFNvDecoder.h          |   68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/nvdecoder/GpuRgbMemory.hpp       |   34 ++++++++++++++++++++++++++++++++++
 src/nvdecoder/ImageSaveGPU.cpp       |  123 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/nvdecoder/ImageSaveGPU.h         |   65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/nvdecoder/NV12ToRGB.cu           |  345 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/nvdecoder/NvJpegEncoder.cpp      |   90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/nvdecoder/NvJpegEncoder.h        |    3 +++
 src/nvdecoder/PartMemCopy.cu         |  289 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/nvdecoder/RGB2YUV.cu             |  263 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/nvdecoder/ResizeImage.cu         |   84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/nvdecoder/common_header.h        |    8 ++++++++
 src/nvdecoder/cuda_kernels.h         |   63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/nvdecoder/define.hpp             |   11 +++++++++++
 src/nvdecoder/jpegNPP.cpp-1          | 1193 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/utiltools.hpp                    |   18 ------------------
 85 files changed, 8594 insertions(+), 5273 deletions(-)
 delete mode 100644 src/AbstractDecoder.cpp
 delete mode 100644 src/AbstractDecoder.h
 delete mode 100644 src/DrawImageOnGPU.cu
 delete mode 100644 src/FFCuContextManager.cpp
 delete mode 100644 src/FFCuContextManager.h
 delete mode 100644 src/FFNvDecoder.cpp
 delete mode 100644 src/FFNvDecoder.h
 delete mode 100644 src/FFNvDecoderManager.cpp
 delete mode 100644 src/FFNvDecoderManager.h
 delete mode 100644 src/GpuRgbMemory.hpp
 delete mode 100644 src/ImageSaveGPU.cpp
 delete mode 100644 src/ImageSaveGPU.h
 create mode 100644 src/Makefile.bak
 create mode 100644 src/Makefile.bak0308
 delete mode 100644 src/NV12ToRGB.cu
 delete mode 100644 src/NvJpegEncoder.cpp
 delete mode 100644 src/NvJpegEncoder.h
 delete mode 100644 src/PartMemCopy.cu
 delete mode 100644 src/RGB2YUV.cu
 delete mode 100644 src/ResizeImage.cu
 delete mode 100644 src/cuda_kernels.h
 delete mode 100644 src/define.hpp
 create mode 100644 src/demo/Makefile
 create mode 100644 src/demo/Makefile.BK0308
 create mode 100644 src/demo/main_dvpp.cpp
 create mode 100644 src/demo/main_nvdec.cpp1
 create mode 100644 src/dvpp/CircularQueue.hpp
 create mode 100644 src/dvpp/DvppDec.cpp
 create mode 100644 src/dvpp/DvppDec.h
 create mode 100644 src/dvpp/DvppDecoder.cpp
 create mode 100644 src/dvpp/DvppDecoder.h
 create mode 100644 src/dvpp/DvppDecoder2.h
 create mode 100644 src/dvpp/DvppDecoderApi.cpp
 create mode 100644 src/dvpp/DvppDecoderApi.h
 create mode 100644 src/dvpp/DvppRgbMemory.hpp
 create mode 100644 src/dvpp/DvppSourceManager.cpp
 create mode 100644 src/dvpp/DvppSourceManager.h
 create mode 100644 src/dvpp/FFReceiver.cpp
 create mode 100644 src/dvpp/FFReceiver.h
 create mode 100644 src/dvpp/Makefile
 create mode 100644 src/dvpp/VpcPicConverter.cpp
 create mode 100644 src/dvpp/VpcPicConverter.h
 create mode 100644 src/dvpp/depend_headers.h
 create mode 100644 src/dvpp/dvpp_headers.h
 create mode 100644 src/dvpp/threadsafe_queue.h
 create mode 100644 src/dvpp/user_mem.h
 create mode 100644 src/gb28181/Makefile
 create mode 100644 src/interface/AbstractDecoder.cpp
 create mode 100644 src/interface/AbstractDecoder.h
 create mode 100644 src/interface/DeviceRgbMemory.hpp
 create mode 100644 src/interface/FFNvDecoderManager.cpp
 create mode 100644 src/interface/FFNvDecoderManager.h
 create mode 100644 src/interface/Makefile
 create mode 100644 src/interface/interface_headers.h
 create mode 100644 src/interface/logger.hpp
 create mode 100644 src/interface/utiltools.hpp
 delete mode 100644 src/jpegNPP.cpp-1
 delete mode 100644 src/logger.hpp
 delete mode 100644 src/main.cpp
 create mode 100644 src/nvdecoder/DrawImageOnGPU.cu
 create mode 100644 src/nvdecoder/FFCuContextManager.cpp
 create mode 100644 src/nvdecoder/FFCuContextManager.h
 create mode 100644 src/nvdecoder/FFNvDecoder.cpp
 create mode 100644 src/nvdecoder/FFNvDecoder.h
 create mode 100644 src/nvdecoder/GpuRgbMemory.hpp
 create mode 100644 src/nvdecoder/ImageSaveGPU.cpp
 create mode 100644 src/nvdecoder/ImageSaveGPU.h
 create mode 100644 src/nvdecoder/NV12ToRGB.cu
 create mode 100644 src/nvdecoder/NvJpegEncoder.cpp
 create mode 100644 src/nvdecoder/NvJpegEncoder.h
 create mode 100644 src/nvdecoder/PartMemCopy.cu
 create mode 100644 src/nvdecoder/RGB2YUV.cu
 create mode 100644 src/nvdecoder/ResizeImage.cu
 create mode 100644 src/nvdecoder/common_header.h
 create mode 100644 src/nvdecoder/cuda_kernels.h
 create mode 100644 src/nvdecoder/define.hpp
 create mode 100644 src/nvdecoder/jpegNPP.cpp-1
 delete mode 100644 src/utiltools.hpp

diff --git a/.vscode/launch.json b/.vscode/launch.json
index cc4d00d..258f7e1 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -2,49 +2,13 @@
     "version": "0.2.0",
     "configurations": [
       {
-        "name": "(gdb) Launch",
+        "name": "dvpp",
         "type": "cppdbg",
         "request": "launch",
-        "program": "${workspaceFolder}/bin/lib/test",
-        "args": ["rtsp","3", "30012"],
+        "program": "${workspaceFolder}/src/build/bin/demo",
+        "args": ["/home/huchunming/data/woyikewangh265.mp4","0", "0"],
         "stopAtEntry": false,
-        "cwd": "${workspaceFolder}/bin/lib",
-        "environment": [],
-        "externalConsole": false,
-        "MIMode": "gdb",
-        "setupCommands": [
-          {
-            "description": "Enable pretty-printing for gdb",
-            "text": "-enable-pretty-printing",
-            "ignoreFailures": true
-          }
-        ]
-      },{
-        "name": "ffmpeg",
-        "type": "cppdbg",
-        "request": "launch",
-        "program": "${workspaceFolder}/bin/lib/ffmpeg_g",
-        "args": ["-vsync","0","-hwaccel","cuvid","-hwaccel_device","1","-c:v","h264_cuvid","-i","rtmp://192.168.10.56:1935/objecteye/116","-c:a","copy","-vf","scale_npp=800:480","-c:v","h264","/mnt/data/cmhu/FFNvDecoder/data/output1.mp4"],
-        "stopAtEntry": false,
-        "cwd": "${workspaceFolder}/bin/lib",
-        "environment": [],
-        "externalConsole": false,
-        "MIMode": "gdb",
-        "setupCommands": [
-          {
-            "description": "Enable pretty-printing for gdb",
-            "text": "-enable-pretty-printing",
-            "ignoreFailures": true
-          }
-        ]
-      },{
-        "name": "jrtp",
-        "type": "cppdbg",
-        "request": "launch",
-        "program": "${workspaceFolder}/bin/lib/jrtp_exe",
-        "args": ["40030","t"],
-        "stopAtEntry": false,
-        "cwd": "${workspaceFolder}/bin/lib",
+        "cwd": "${workspaceFolder}/src/build/bin",
         "environment": [],
         "externalConsole": false,
         "MIMode": "gdb",
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 69283fc..ab95e37 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -62,6 +62,14 @@
         "cfenv": "cpp",
         "cinttypes": "cpp",
         "__nullptr": "cpp",
-        "list": "cpp"
+        "list": "cpp",
+        "hash_map": "cpp",
+        "hash_set": "cpp",
+        "complex": "cpp",
+        "unordered_set": "cpp",
+        "regex": "cpp",
+        "shared_mutex": "cpp",
+        "variant": "cpp",
+        "ios": "cpp"
     }
 }
\ No newline at end of file
diff --git a/README.md b/README.md
index 4f3dc70..ad6bb4d 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-#### ffmpeg编译配置
+#### 基于CUDA的ffmpeg编译配置
 1. 安装cuda
 2. 安装 nv-codec-headers
     支持 cuvid 需要安装 nv-codec-headers， 进入 nv-codec-headers 文件夹后以sudo权限make && make install即可
@@ -11,5 +11,10 @@
 --enable-debug --extra-cflags=-g --extra-ldflags=-g --disable-optimizations --disable-stripping
 ~~~
 
+#### 普通ffmpeg编译配置
+~~~
+./configure --enable-debug --extra-cflags=-g --extra-ldflags=-g --disable-optimizations --disable-stripping --disable-x86asm --enable-nonfree --disable-vaapi  --extra-cflags=-fPIC --enable-shared --enable-pic --enable-ffplay --prefix=../bin
+~~~
+
 #### SDK说明
 1. 对外接口主要是 FFNvDecoderManager 类，可支持多个解码器；也可直接使用 FFNvDecoder ，但是不建议，FFNvDecoderManager已经封装了 FFNvDecoder 的接口
\ No newline at end of file
diff --git a/src/AbstractDecoder.cpp b/src/AbstractDecoder.cpp
deleted file mode 100644
index 0e51524..0000000
--- a/src/AbstractDecoder.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-#include "AbstractDecoder.h"
-
-#include "logger.hpp"
-#include "GpuRgbMemory.hpp"
-#include "cuda_kernels.h"
-
-#include "utiltools.hpp"
-
-
-FFImgInfo* AbstractDecoder::snapshot(){
-
-	// 锁住停止队列消耗
-	std::lock_guard<std::mutex> l(m_snapshot_mutex);
-
-	AVFrame * gpuFrame = nullptr;
-
-	bool bFirst = true;
-	while(true){
-		m_queue_mutex.lock();
-		if(mFrameQueue.size() <= 0){
-			m_queue_mutex.unlock();
-			if(bFirst){
-				std::this_thread::sleep_for(std::chrono::milliseconds(100));
-				bFirst = false;
-				continue;
-			}else{
-				// 再进来说明前面已经等了 100 ms
-				// 100 ms都没有等到解码数据，则退出
-				return nullptr;
-			}
-		}
-
-		// 队列中数据大于1 
-		gpuFrame = mFrameQueue.front();
-		m_queue_mutex.unlock();
-		break;
-	}
-
-	if (gpuFrame != nullptr && gpuFrame->format == AV_PIX_FMT_CUDA ){
-		LOG_DEBUG("decode task: gpuid: {}  width: {} height: {}", m_cfg.gpuid, gpuFrame->width, gpuFrame->height);
-		GpuRgbMemory* gpuMem = new GpuRgbMemory(3, gpuFrame->width, gpuFrame->height, getName(), m_cfg.gpuid , true);
-
-		if (gpuMem->getMem() == nullptr){
-			LOG_ERROR("new GpuRgbMemory failed !!!");
-			return nullptr;
-		}
-		
-		cudaSetDevice(atoi(m_cfg.gpuid.c_str()));
-		cuda_common::setColorSpace( ITU_709, 0 );
-		cudaError_t cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], gpuMem->getMem(), gpuFrame->width, gpuFrame->height);
-		cudaDeviceSynchronize();
-		if (cudaStatus != cudaSuccess) {
-			LOG_ERROR("CUDAToBGR failed failed !!!");
-			return nullptr;
-		}
-
-		unsigned char * pHwRgb = gpuMem->getMem();
-		int channel = gpuMem->getChannel();
-		int width = gpuMem->getWidth();
-		int height = gpuMem->getHeight();
-
-		if (pHwRgb != nullptr && channel > 0 && width > 0 && height > 0){
-			int nSize = channel * height * width;
-
-			LOG_INFO("channel:{} height:{} width:{}", channel, height, width);
-			// unsigned char* cpu_data = new unsigned char[nSize];
-
-            unsigned char* cpu_data = (unsigned char *)av_malloc(nSize * sizeof(unsigned char));
-
-			cudaMemcpy(cpu_data, pHwRgb, nSize * sizeof(unsigned char), cudaMemcpyDeviceToHost);
-			cudaDeviceSynchronize();
-
-			delete gpuMem;
-			gpuMem = nullptr;
-
-			FFImgInfo* imgInfo = new FFImgInfo();
-			imgInfo->dec_name = m_dec_name;
-			imgInfo->pData = cpu_data;
-			imgInfo->height = height;
-			imgInfo->width = width;
-			imgInfo->timestamp = UtilTools::get_cur_time_ms();
-			imgInfo->index = m_index;
-
-			m_index++;
-
-			return imgInfo;
-		}
-
-		delete gpuMem;
-		gpuMem = nullptr;
-	}
-
-	return nullptr;
-}
-
-bool AbstractDecoder::isSnapTime(){
-	if(m_snap_time_interval <= 0){
-		return false;
-	}
-	long cur_time = UtilTools::get_cur_time_ms();
-	if(cur_time - m_last_snap_time > m_snap_time_interval){
-		return true;
-	}
-	return false;
-}
-
-void AbstractDecoder::updateLastSnapTime(){
-	m_last_snap_time = UtilTools::get_cur_time_ms();
-}
-
-void AbstractDecoder::setSnapTimeInterval(long interval){
-	m_snap_time_interval = interval;
-	m_last_snap_time = UtilTools::get_cur_time_ms();
-}
\ No newline at end of file
diff --git a/src/AbstractDecoder.h b/src/AbstractDecoder.h
deleted file mode 100644
index b5a5665..0000000
--- a/src/AbstractDecoder.h
+++ /dev/null
@@ -1,128 +0,0 @@
-#ifndef _ABSTRACT_DECODER_H_
-#define _ABSTRACT_DECODER_H_
-
-#include<string>
-
-extern "C"
-{
-	#include <libavcodec/avcodec.h> 
-	#include <libavdevice/avdevice.h> 
-	#include <libavformat/avformat.h> 
-	#include <libavfilter/avfilter.h> 
-	#include <libavutil/avutil.h> 
-    #include <libavutil/pixdesc.h> 
-	#include <libswscale/swscale.h>
-    #include <libavutil/imgutils.h>
-}
-
-#include <queue>
-#include <mutex>
-
-using namespace std;
-
-/**************************************************
-* 接口：DXDECODER_CALLBACK
-* 功能：解码数据回调接口
-* 参数：const dx_void * userPtr 用户自定义数据
-*       AVFrame * gpuFrame 解码结果帧数据，在设置的对应的gpu上，要十分注意这一点，尤其是多线程情况
-* 返回：无
-* 备注：当解码库数据源为实时流时(RTSP/GB28181)，本接
-*       口内不可进行阻塞/耗时操作。当解码库数据源为
-*       非实时流时（本地/网络文件），本接口可以进行
-*       阻塞/耗时操作
-**************************************************/
-typedef void(*POST_DECODE_CALLBACK)(const void * userPtr, AVFrame * gpuFrame);
-
-typedef void(*DECODE_FINISHED_CALLBACK)(const void* userPtr);
-
-typedef bool(*DECODE_REQUEST_STREAM_CALLBACK)(const char* deviceId);
-
-struct FFDecConfig{
-    string uri;                             // 视频地址
-    POST_DECODE_CALLBACK post_decoded_cbk;  // 解码数据回调接口
-    DECODE_FINISHED_CALLBACK decode_finished_cbk; // 解码线程结束后的回调接口
-    string gpuid;                           // gpu id
-    bool force_tcp{true};                   // 是否指定使用tcp连接
-    int skip_frame{1};                      // 跳帧数
-
-    int port;                               // gb28181接收数据的端口号
-    DECODE_REQUEST_STREAM_CALLBACK request_stream_cbk;  // gb28181请求流
-};
-
-enum DECODER_TYPE{ 
-    DECODER_TYPE_GB28181, 
-    DECODER_TYPE_FFMPEG
-};
-
-struct FFImgInfo{
-    string dec_name;
-    int width;
-    int height;
-    unsigned char * pData;
-    long timestamp;
-    long index;
-};
-
-class AbstractDecoder {
-public:
-    virtual ~AbstractDecoder(){};
-    virtual bool init(FFDecConfig& cfg) = 0;
-    virtual void close() = 0;
-    virtual bool start() = 0;
-    virtual void pause() = 0;
-    virtual void resume() = 0;
-            
-    virtual void setDecKeyframe(bool bKeyframe) = 0;
-            
-    virtual bool isRunning() = 0;
-    virtual bool isFinished() = 0;
-    virtual bool isPausing() = 0;
-    virtual bool getResolution( int &width, int &height ) = 0;
-
-    virtual bool isSurport(FFDecConfig& cfg) = 0;
-            
-    virtual int getCachedQueueLength() = 0;
-
-    virtual float fps() = 0;
-
-    virtual DECODER_TYPE getDecoderType() = 0;
-
-    void setName(string nm){
-        m_dec_name = nm;
-    }
-
-    string getName(){
-        return m_dec_name;
-    }
-
-    FFImgInfo* snapshot();
-
-    bool isSnapTime();
-    
-    void updateLastSnapTime();
-
-    void setSnapTimeInterval(long interval);
-
-public:
-    const void * m_postDecArg;
-    POST_DECODE_CALLBACK post_decoded_cbk;
-    const void * m_finishedDecArg;
-    DECODE_FINISHED_CALLBACK decode_finished_cbk;
-
-public:
-    string m_dec_name;
-
-    bool m_dec_keyframe;
-
-    FFDecConfig m_cfg;
-
-    queue<AVFrame*> mFrameQueue;
-    mutex m_queue_mutex;
-    mutex m_snapshot_mutex;
-
-    long m_snap_time_interval{-1};
-    long m_last_snap_time;
-    long m_index{0};
-};
-
-#endif // _ABSTRACT_DECODER_H_
\ No newline at end of file
diff --git a/src/DrawImageOnGPU.cu b/src/DrawImageOnGPU.cu
deleted file mode 100644
index 8770cea..0000000
--- a/src/DrawImageOnGPU.cu
+++ /dev/null
@@ -1,126 +0,0 @@
-#include "cuda_kernels.h"
-
-#include "logger.hpp"
-
-typedef unsigned char   uchar;
-typedef unsigned int    uint32;
-typedef int             int32;
-
-namespace cuda_common
-{
-	__global__ void kernel_drawPixel(float* d_srcRGB, int src_width, int src_height,
-		int left, int top, int right, int bottom)
-	{
-		const int x = blockIdx.x * blockDim.x + threadIdx.x;
-		const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-		if (((x == left || x == right) && y >= top && y <= bottom) || ((y == top || y == bottom) && x >= left && x <= right))
-		{
-			d_srcRGB[(y*src_width) + x] = 0;
-			d_srcRGB[(src_width*src_height) + (y*src_width) + x] = 255;
-			d_srcRGB[(2 * src_width*src_height) + (y*src_width) + x] = 0;
-		}
-	}
-
-	cudaError_t DrawImage(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom)
-	{
-		dim3 block(32, 16, 1);
-		dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1);
-
-		kernel_drawPixel << < grid, block >> >(d_srcRGB, src_width, src_height, left, top, right, bottom);
-
-		cudaError_t cudaStatus = cudaGetLastError();
-		if (cudaStatus != cudaSuccess) {
-			LOG_ERROR("Draw 32 kernel_memcopy launch failed:{}",cudaGetErrorString(cudaStatus));
-			return cudaStatus;
-		}
-
-		cudaStatus = cudaDeviceSynchronize();
-		if (cudaStatus != cudaSuccess) {
-			LOG_ERROR("cudaDeviceSynchronize returned error code {} after launching kernel_bilinear!", cudaStatus);
-			return cudaStatus;
-		}
-
-		return cudaStatus;
-	}
-
-	__global__ void kernel_drawPixel(unsigned char* d_srcRGB, int src_width, int src_height,
-		int left, int top, int right, int bottom)
-	{
-		const int x = blockIdx.x * blockDim.x + threadIdx.x;
-		const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-		if (((x == left || x == right) && y >= top && y <= bottom) || ((y == top || y == bottom) && x >= left && x <= right))
-		{
-			d_srcRGB[(y*src_width) + x] = 0;
-			d_srcRGB[(src_width*src_height) + (y*src_width) + x] = 255;
-			d_srcRGB[(2 * src_width*src_height) + (y*src_width) + x] = 0;
-		}
-	}
-
-	cudaError_t DrawImage(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom)
-	{
-		dim3 block(32, 16, 1);
-		dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1);
-
-		kernel_drawPixel << < grid, block >> >(d_srcRGB, src_width, src_height, left, top, right, bottom);
-
-		cudaError_t cudaStatus = cudaGetLastError();
-		if (cudaStatus != cudaSuccess) {
-			LOG_ERROR("Draw 68 kernel_memcopy launch failed: {}",cudaGetErrorString(cudaStatus));
-			return cudaStatus;
-		}
-
-		cudaStatus = cudaDeviceSynchronize();
-		if (cudaStatus != cudaSuccess) {
-			LOG_ERROR("cudaDeviceSynchronize returned error code {} after launching kernel_bilinear!", cudaStatus);
-			return cudaStatus;
-		}
-
-		return cudaStatus;
-	}
-
-	__global__ void kernel_drawLine(float* d_srcRGB, int src_width, int src_height,
-		int begin_x, int begin_y, int end_x, int end_y)
-	{
-		int min_x = end_x < begin_x ? end_x : begin_x;
-		int max_x = end_x < begin_x ? begin_x : end_x;
-
-		int min_y = end_y < begin_y ? end_y : begin_y;
-		int max_y = end_y < begin_y ? begin_y : end_y;
-
-		const int x = blockIdx.x * blockDim.x + threadIdx.x;
-		const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-		if ((x - begin_x) * (end_y - begin_y) == (end_x - begin_x) * (y - begin_y)
-			&& min_x <= x && x <= max_x
-			&& min_y <= y && y <= max_y)
-		{
-			d_srcRGB[(y*src_width) + x] = 0;
-			d_srcRGB[(src_width*src_height) + (y*src_width) + x] = 255;
-			d_srcRGB[(2 * src_width*src_height) + (y*src_width) + x] = 0;
-		}
-	}
-
-	cudaError_t DrawLine(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y)
-	{
-		dim3 block(32, 16, 1);
-		dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1);
-
-		kernel_drawLine << < grid, block >> >(d_srcRGB, src_width, src_height, begin_x, begin_y, end_x, end_y);
-
-		cudaError_t cudaStatus = cudaGetLastError();
-		if (cudaStatus != cudaSuccess) {
-			LOG_ERROR("Draw 112 kernel_memcopy launch failed: {}",cudaGetErrorString(cudaStatus));
-			return cudaStatus;
-		}
-
-		cudaStatus = cudaDeviceSynchronize();
-		if (cudaStatus != cudaSuccess) {
-			LOG_ERROR("cudaDeviceSynchronize returned error code {} after launching kernel_bilinear!", cudaStatus);
-			return cudaStatus;
-		}
-
-		return cudaStatus;
-	}
-}
\ No newline at end of file
diff --git a/src/FFCuContextManager.cpp b/src/FFCuContextManager.cpp
deleted file mode 100644
index db097d6..0000000
--- a/src/FFCuContextManager.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-#include "FFCuContextManager.h"
-
-#include "logger.hpp"
-
-using namespace std;
-
-FFCuContextManager::~FFCuContextManager()
-{
-    for(auto iter = ctxMap.begin(); iter != ctxMap.end(); iter++){
-        av_buffer_unref(&iter->second);
-    }
-    ctxMap.clear();
-}
-
-AVBufferRef *FFCuContextManager::getCuCtx(string gpuid)
-{
-     AVBufferRef *hw_device_ctx = ctxMap[gpuid];
-     if (nullptr == hw_device_ctx)
-     {
-        // 初始化硬件解码器
-        if (av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_CUDA, gpuid.c_str(), nullptr, 0) < 0) 
-        {
-            LOG_ERROR("Failed to create specified HW device.");
-            return nullptr;
-        }
-        ctxMap[gpuid] = hw_device_ctx;
-     }
-     return hw_device_ctx;
-}
\ No newline at end of file
diff --git a/src/FFCuContextManager.h b/src/FFCuContextManager.h
deleted file mode 100644
index 3050641..0000000
--- a/src/FFCuContextManager.h
+++ /dev/null
@@ -1,37 +0,0 @@
-
-#include<map>
-#include<string>
-
-extern "C"
-{
-	#include <libavcodec/avcodec.h> 
-	#include <libavdevice/avdevice.h> 
-	#include <libavformat/avformat.h> 
-	#include <libavfilter/avfilter.h> 
-	#include <libavutil/avutil.h> 
-    #include <libavutil/pixdesc.h> 
-	#include <libswscale/swscale.h>
-}
-
-using namespace std;
-
-class FFCuContextManager{
-public:
-    static FFCuContextManager* getInstance(){
-		static FFCuContextManager* singleton = nullptr;
-		if (singleton == nullptr){
-			singleton = new FFCuContextManager();
-		}
-		return singleton;
-	}
-
-    AVBufferRef *getCuCtx(string gpuid);
-
-private:
-    FFCuContextManager(){}
-	~FFCuContextManager();
-
-private:
-    map<string,AVBufferRef *> ctxMap;
-
-};
\ No newline at end of file
diff --git a/src/FFNvDecoder.cpp b/src/FFNvDecoder.cpp
deleted file mode 100644
index 9aff5fd..0000000
--- a/src/FFNvDecoder.cpp
+++ /dev/null
@@ -1,388 +0,0 @@
-#include "FFNvDecoder.h"
-
-#include <chrono>
-#include <thread>
-#include <fstream>
-
-#include <chrono>
-
-#include "FFCuContextManager.h"
-
-#include "logger.hpp"
-
-#include "utiltools.hpp"
-
-using namespace std;
-
-// 参考博客： https://blog.csdn.net/qq_40116098/article/details/120704340
-
-static AVPixelFormat get_hw_format(AVCodecContext *avctx, const AVPixelFormat *pix_fmts)
-{
-	FFNvDecoder* _this = (FFNvDecoder*)avctx->opaque;
-
-	const AVPixelFormat *p;
-
-	for (p = pix_fmts; *p != -1; p++) {
-		if (*p == _this->getHwPixFmt())
-			return *p;
-	}
-
-	LOG_ERROR("Failed to get HW surface format");
-	return AV_PIX_FMT_NONE;
-}
-
-FFNvDecoder::FFNvDecoder()
-{
-	// 初始化解码对象
-	fmt_ctx = nullptr;
-	avctx = nullptr;
-	m_bRunning = false;
-
-	stream = nullptr;
-    stream_index = -1;
-    hw_pix_fmt = AV_PIX_FMT_NONE;
-    m_dec_name = "";
-
-	m_bPause = false;
-	m_bReal = true;
-
-	m_decode_thread = 0;
-	m_post_decode_thread = 0;
-
-	m_bFinished = false;
-	m_dec_keyframe = false;
-	m_fps = 0.0;
-}
-
-FFNvDecoder::~FFNvDecoder()
-{
-	m_dec_keyframe = false;
-}
-
-bool FFNvDecoder::init(FFDecConfig& cfg)
-{
-	m_cfg = cfg;
-
-	fstream infile(cfg.uri);
-	if (infile.is_open()){
-		m_bReal = false;
-		infile.close();
-	}else {
-		m_bReal = true;
-	}
-
-	post_decoded_cbk = cfg.post_decoded_cbk;
-    decode_finished_cbk = cfg.decode_finished_cbk;
-
-	return init(cfg.uri.c_str(), cfg.gpuid.c_str(),cfg.force_tcp);
-}
-
-bool FFNvDecoder::init(const char* uri, const char* gpuid, bool force_tcp)
-{
-	// av_log_set_level(AV_LOG_DEBUG);
-
-	avformat_network_init();
-
-	// 打开输入视频文件
-	AVDictionary *options = nullptr;
-	av_dict_set( &options, "bufsize", "655360", 0 );
-	av_dict_set( &options, "rtsp_transport", force_tcp ? "tcp" : "udp", 0 );
-	// av_dict_set( &options, "listen_timeout", "30", 0 ); // 单位为s
-	av_dict_set( &options, "stimeout", "30000000", 0 ); // 单位为 百万分之一秒
-	
-	fmt_ctx = avformat_alloc_context();
-	const char* input_file = uri;
-	if (avformat_open_input(&fmt_ctx, input_file, nullptr, &options) != 0) {
-		LOG_ERROR("Cannot open input file:{}",input_file);
-		return false;
-	}
-
-	// 查找流信息
-	if (avformat_find_stream_info(fmt_ctx, nullptr) < 0) {
-		LOG_ERROR("Cannot find input stream information");
-		return false;
-	}
-
-	// 查找视频流信息
-	AVCodec *decoder = nullptr;
-	stream_index = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &decoder, 0);
-	if (stream_index < 0) {
-		LOG_ERROR("Cannot find a video stream in the input file");
-		return false;
-	}
-
-	string cuvid_dec_name = string(decoder->name) + "_cuvid";
-	AVCodec *vcodec = avcodec_find_decoder_by_name(cuvid_dec_name.c_str());
-	if (!(avctx = avcodec_alloc_context3(vcodec)))
-		return (bool)AVERROR(ENOMEM);
-
-	// 得到视频流对象
-	stream = fmt_ctx->streams[stream_index];
-	if (avcodec_parameters_to_context(avctx, stream->codecpar) < 0)
-		return false;
-
-	m_fps = av_q2d(stream ->avg_frame_rate);
-
-	avctx->opaque = this;
-	// 设置解码器管理器的像素格式回调函数
-	avctx->get_format = get_hw_format;
-
-	hw_pix_fmt = AV_PIX_FMT_CUDA;
-
-	FFCuContextManager* pCtxMgr = FFCuContextManager::getInstance();
-
-	AVBufferRef *hw_device_ctx = pCtxMgr->getCuCtx(gpuid);
-	if(nullptr == hw_device_ctx){
-		av_log(nullptr, AV_LOG_ERROR, "create CUDA context failed ! \n");
-		return false;
-	}
-	avctx->hw_device_ctx = av_buffer_ref(hw_device_ctx);
-	if (nullptr == avctx->hw_device_ctx)
-	{
-		return false;
-	}
-
-	// 打开解码器流
-	AVDictionary *op = nullptr;
-	av_dict_set( &op, "gpu", gpuid, 0 );
-	// av_dict_set( &op, "surfaces", "5", 0 );
-	if (avcodec_open2(avctx, vcodec, &op) < 0) {
-		LOG_ERROR("Failed to open codec for stream");
-		return false;
-	}
-	
-	return true;
-}
-
-bool FFNvDecoder::isSurport(FFDecConfig& cfg)
-{
-	bool bRet = init(cfg);
-    decode_finished();
-    return bRet;
-}
-
-bool FFNvDecoder::start(){
-
-	m_bRunning = true;
-
-	pthread_create(&m_decode_thread,0,
-        [](void* arg)
-        {
-            FFNvDecoder* a=(FFNvDecoder*)arg;
-            a->decode_thread();
-            return (void*)0;
-        }
-    ,this);
-
-	return true;
-}
-
-void FFNvDecoder::decode_thread()
-{
-	AVPacket* pkt ;
-	pkt = av_packet_alloc();
-	av_init_packet( pkt );
-
-	pthread_create(&m_post_decode_thread,0,
-        [](void* arg)
-        {
-            FFNvDecoder* a=(FFNvDecoder*)arg;
-            a->post_decode_thread();
-            return (void*)0;
-        }
-    ,this);
-
-	// long start_time = UtilTools::get_cur_time_ms();
-
-	while (m_bRunning)
-	{
-		if (!m_bReal)
-		{
-			if (m_bPause)
-			{
-				std::this_thread::sleep_for(std::chrono::milliseconds(3));
-				continue;
-			}
-		}
-		
-		int result = av_read_frame(fmt_ctx, pkt);
-		if (result == AVERROR_EOF || result < 0)
-		{
-			LOG_ERROR("Failed to read frame!");
-			break;
-		}
-
-		if (m_dec_keyframe && !(pkt->flags & AV_PKT_FLAG_KEY)) {
-			av_packet_unref(pkt);
-			continue;
-		}
-
-		if (m_bReal)
-		{
-			if (m_bPause)
-			{
-				av_packet_unref(pkt);
-				std::this_thread::sleep_for(std::chrono::milliseconds(3));
-				continue;
-			}
-		}
-
-		if (stream_index == pkt->stream_index){
-			result = avcodec_send_packet(avctx, pkt);
-			if (result < 0){
-				av_packet_unref(pkt);
-				LOG_ERROR("{} - Failed to send pkt: {}", m_dec_name, result);
-				continue;
-			}
-
-			AVFrame* gpuFrame = av_frame_alloc();
-			result = avcodec_receive_frame(avctx, gpuFrame);
-			if ((result == AVERROR(EAGAIN) || result == AVERROR_EOF) || result < 0){
-				LOG_ERROR("{} - Failed to receive frame: {}", m_dec_name, result);
-				av_frame_free(&gpuFrame); 
-				av_packet_unref(pkt);
-				continue;
-			}
-			av_packet_unref(pkt);
-
-			if(gpuFrame != nullptr){
-				m_queue_mutex.lock();
-				if(mFrameQueue.size() <= 10){
-					mFrameQueue.push(gpuFrame);
-				}else{
-					av_frame_free(&gpuFrame); 
-				}
-				m_queue_mutex.unlock();
-			}
-		}
-		av_packet_unref(pkt);
-	}
-
-	m_bRunning = false;
-
-	// long end_time = UtilTools::get_cur_time_ms();
-	// cout << "解码用时：" << end_time - start_time << endl;
-
-	if (m_post_decode_thread != 0)
-	{
-		pthread_join(m_post_decode_thread,0);
-	}
-
-	decode_finished_cbk(m_finishedDecArg);
-
-	decode_finished();
-
-	// 清空队列
-	while(mFrameQueue.size() > 0){
-		AVFrame * gpuFrame = mFrameQueue.front();
-		av_frame_free(&gpuFrame); 
-		mFrameQueue.pop();
-	}
-
-	LOG_INFO("{} - decode thread exited.", m_dec_name);
-}
-
-void FFNvDecoder::decode_finished(){
-	if (avctx)
-	{
-		avcodec_free_context(&avctx);
-	}
-	
-	if (fmt_ctx)
-	{
-		avformat_close_input(&fmt_ctx);
-	}
-
-	m_bFinished = true;
-	m_dec_keyframe = false;
-}
-
-void FFNvDecoder::post_decode_thread(){
-	int skip_frame = m_cfg.skip_frame;
-	if (skip_frame <= 0){
-		skip_frame = 1;
-	}
-	
-	int index = 0;
-	while (m_bRunning)
-	{
-		if(mFrameQueue.size() > 0){
-			std::lock_guard<std::mutex> l(m_snapshot_mutex);
-			// 取队头数据
-			m_queue_mutex.lock();
-			AVFrame * gpuFrame = mFrameQueue.front();
-			mFrameQueue.pop();
-			m_queue_mutex.unlock();
-			// 跳帧
-			if (skip_frame == 1 || index % skip_frame == 0){
-				post_decoded_cbk(m_postDecArg, gpuFrame);
-				index = 0;
-			}
-
-			av_frame_free(&gpuFrame); 
-
-			index++;
-		}
-	}
-
-	LOG_INFO("post decode thread exited.");
-}
-
-void FFNvDecoder::close(){
-	m_bRunning=false;
-	if(m_decode_thread != 0){
-		pthread_join(m_decode_thread,0);
-	}
-	m_dec_keyframe = false;
-}
-
-AVPixelFormat FFNvDecoder::getHwPixFmt(){
-	return hw_pix_fmt;
-}
-
-bool FFNvDecoder::isRunning(){
-	return m_bRunning;
-}
-
-bool FFNvDecoder::isFinished(){
-	return m_bFinished;
-}
-
-bool FFNvDecoder::isPausing(){
-	return m_bPause;
-}
-
-bool FFNvDecoder::getResolution( int &width, int &height ){
-	if (avctx != nullptr)
-	{
-		width = avctx->width;
-		height = avctx->height;
-		return true;
-	}
-	
-	return false;
-}
-
-void FFNvDecoder::pause(){
-	m_bPause = true;
-}
-
-void FFNvDecoder::resume(){
-	m_bPause = false;
-}
-
-void FFNvDecoder::setDecKeyframe(bool bKeyframe)
-{
-	m_dec_keyframe = bKeyframe;
-}
-
-int FFNvDecoder::getCachedQueueLength(){
-	m_queue_mutex.lock();
-	int queue_size = mFrameQueue.size(); 
-	m_queue_mutex.lock();
-	return queue_size;
-}
-
-float FFNvDecoder::fps(){
-	return m_fps;
-}
diff --git a/src/FFNvDecoder.h b/src/FFNvDecoder.h
deleted file mode 100644
index 68d2a2f..0000000
--- a/src/FFNvDecoder.h
+++ /dev/null
@@ -1,62 +0,0 @@
-#include<string>
-#include <pthread.h>
-
-#include "AbstractDecoder.h"
-
-#include <mutex>
-
-using namespace std;
-
-class FFNvDecoder : public AbstractDecoder{
-public:
-    FFNvDecoder();
-    ~FFNvDecoder();
-    bool init(FFDecConfig& cfg);
-    void close();
-    bool start();
-    void pause();
-    void resume();
-
-    void setDecKeyframe(bool bKeyframe);
-
-    bool isRunning();
-    bool isFinished();
-    bool isPausing();
-    bool getResolution( int &width, int &height );
-
-    bool isSurport(FFDecConfig& cfg);
-
-    int getCachedQueueLength();
-
-    float fps();
-
-    DECODER_TYPE getDecoderType(){ return DECODER_TYPE_FFMPEG; }
-
-public:
-    AVPixelFormat getHwPixFmt();
-
-private:
-    void decode_thread();
-    void post_decode_thread();
-    bool init(const char* uri, const char* gpuid, bool force_tcp);
-    void decode_finished();
-
-private:
-    AVStream* stream;
-    AVCodecContext *avctx;
-    int stream_index;
-    AVFormatContext *fmt_ctx;
-    AVPixelFormat hw_pix_fmt;
-
-    pthread_t m_decode_thread;
-    pthread_t m_post_decode_thread;
-    
-    bool m_bRunning;
-    bool m_bFinished;
-
-    bool m_bPause;
-
-    bool m_bReal; // 是否实时流
-
-    float m_fps;
-};
\ No newline at end of file
diff --git a/src/FFNvDecoderManager.cpp b/src/FFNvDecoderManager.cpp
deleted file mode 100644
index b15ef22..0000000
--- a/src/FFNvDecoderManager.cpp
+++ /dev/null
@@ -1,600 +0,0 @@
-#include "FFNvDecoderManager.h"
-
-#include "FFNvDecoder.h"
-#include "./gb28181/FFGB28181Decoder.h"
-
-#include "logger.hpp"
-
-using namespace std;
-
-
-AbstractDecoder* FFNvDecoderManager::createDecoder(MgrDecConfig config){
-
-    closeAllFinishedDecoder();
-
-    if (config.cfg.post_decoded_cbk == nullptr || config.cfg.decode_finished_cbk== nullptr){
-        return nullptr;
-    }
-
-    std::lock_guard<std::mutex> l(m_mutex);
-
-    auto it = decoderMap.find(config.name);
-    if (it != decoderMap.end()){
-        LOG_ERROR("已存在name为{}的解码器", config.name);
-        return nullptr;
-    }
-
-    AbstractDecoder* dec = nullptr;
-    if(DECODER_TYPE_FFMPEG == config.dec_type){
-        dec = new FFNvDecoder();
-    }else if(DECODER_TYPE_GB28181 == config.dec_type){
-        dec = new FFGB28181Decoder();
-    }
-    
-    if (dec == nullptr){
-        LOG_ERROR("没有指定解码器类型");
-        return nullptr;
-    }
-    
-    bool bRet= dec->init(config.cfg);
-    if (bRet)
-    {
-        dec->setName(config.name) ;
-        decoderMap[config.name] = dec;
-
-        LOG_INFO("[{}][{}]- 解码器初始化成功",config.name, config.cfg.uri);
-        return dec;
-    }
-    
-    // 创建失败，关闭解码器
-    dec->close();
-    delete dec;
-
-    LOG_ERROR("[{}][{}]- 解码器初始化失败！",config.name, config.cfg.uri);
-    return nullptr;
-}
-
-bool FFNvDecoderManager::setPostDecArg(const string name, const void * userPtr)
-{
-    if (name.empty())
-    {
-        LOG_ERROR("name 为空!");
-        return false;
-    }
-
-    std::lock_guard<std::mutex> l(m_mutex);
-
-    auto dec = decoderMap.find(name);
-    if (dec != decoderMap.end())
-    {
-        dec->second->m_postDecArg = userPtr;
-        return true;
-    }
-
-    LOG_ERROR("没有找到name为{}的解码器",name);
-    return false;
-}
-
-bool FFNvDecoderManager::setFinishedDecArg(const string name, const void * userPtr)
-{
-    if (name.empty())
-    {
-        LOG_ERROR("name 为空!");
-        return false;
-    }
-
-    std::lock_guard<std::mutex> l(m_mutex);
-
-    auto dec = decoderMap.find(name);
-    if (dec != decoderMap.end())
-    {
-        dec->second->m_finishedDecArg = userPtr;
-        return true;
-    }
-
-    LOG_ERROR("没有找到name为{}的解码器",name);
-    return false;
-}
-
-AbstractDecoder* FFNvDecoderManager::getDecoderByName(const string name)
-{
-    if (name.empty())
-    {
-        LOG_ERROR("name 为空!");
-        return nullptr;
-    }
-    
-    std::lock_guard<std::mutex> l(m_mutex);
-
-    auto dec = decoderMap.find(name);
-    if (dec != decoderMap.end())
-    {
-        return dec->second;
-    }
-
-    LOG_ERROR("没有找到name为{}的解码器",name);
-    return nullptr;
-}
-
-bool FFNvDecoderManager::startDecode(AbstractDecoder* dec){
-    if (dec != nullptr && !dec->isRunning())
-    {
-        return dec->start();
-    }
-    return false;
-}
-
-bool FFNvDecoderManager::startDecodeByName(const string name){
-     if (name.empty())
-    {
-        LOG_ERROR("name 为空!");
-        return false;
-    }
-
-    std::lock_guard<std::mutex> l(m_mutex);
-
-    auto dec = decoderMap.find(name);
-    if (dec != decoderMap.end())
-    {
-        return dec->second->start();
-    }
-
-    LOG_ERROR("没有找到name为{}的解码器",name);
-    return false;
-}
-
-void FFNvDecoderManager::startAllDecode(){
-
-    std::lock_guard<std::mutex> l(m_mutex);
-
-    for(auto iter = decoderMap.begin(); iter != decoderMap.end(); iter++){
-        if (!iter->second->isRunning())
-        {
-            iter->second->start();
-        }
-    }
-}
-
-bool FFNvDecoderManager::closeDecoderByName(const string name){
-    if (name.empty())
-    {
-        LOG_ERROR("name 为空!");
-        return false;
-    }
-
-    std::lock_guard<std::mutex> l(m_mutex);
-
-    auto dec = decoderMap.find(name);
-    if (dec != decoderMap.end())
-    {
-        dec->second->close();
-        delete dec->second;
-        dec->second = nullptr;
-        decoderMap.erase(dec);
-
-        return true;
-    }
-    
-    LOG_ERROR("没有找到name为{}的解码器",name);
-    return false;
-}
-
-void FFNvDecoderManager::closeAllDecoder()
-{
-    std::lock_guard<std::mutex> l(m_mutex);
-
-    for(auto iter = decoderMap.begin(); iter != decoderMap.end(); iter++){
-        iter->second->close();
-        delete iter->second;
-        iter->second = nullptr;
-    }
-    decoderMap.clear();
-}
-
-void FFNvDecoderManager::closeAllFinishedDecoder()
-{
-    std::lock_guard<std::mutex> l(m_mutex);
-
-     for(auto iter = decoderMap.begin(); iter != decoderMap.end(); ){
-        if (iter->second->isFinished())
-        {
-            delete iter->second;
-            iter->second = nullptr;
-            iter = decoderMap.erase(iter);
-        }
-        else
-        {
-            iter++ ;
-        }
-    }
-}
-
-int FFNvDecoderManager::count()
-{
-    closeAllFinishedDecoder();
-
-    std::lock_guard<std::mutex> l(m_mutex);
-    return decoderMap.size();
-}
-
-bool FFNvDecoderManager::pauseDecoder(const string name)
-{
-    if (name.empty())
-    {
-        LOG_ERROR("name 为空!");
-        return false;
-    }
-
-    std::lock_guard<std::mutex> l(m_mutex);
-
-    auto dec = decoderMap.find(name);
-    if (dec != decoderMap.end())
-    {
-        dec->second->pause();
-        return true;
-    }
-    
-    LOG_ERROR("没有找到name为{}的解码器",name);
-    return false;
-}
-
-bool FFNvDecoderManager::resumeDecoder(const string name)
-{
-    if (name.empty())
-    {
-        LOG_ERROR("name 为空!");
-        return false;
-    }
-
-    std::lock_guard<std::mutex> l(m_mutex);
-
-    auto dec = decoderMap.find(name);
-    if (dec != decoderMap.end())
-    {
-        dec->second->resume();
-        return true;
-    }
-    
-    LOG_ERROR("没有找到name为{}的解码器",name);
-    return false;
-}
-
-bool FFNvDecoderManager::isSurport(MgrDecConfig& config)
-{
-    {
-        std::lock_guard<std::mutex> l(m_mutex);
-
-        auto it = decoderMap.find(config.name);
-        if (it != decoderMap.end()){
-            LOG_ERROR("已存在name所标记的解码器");
-            return false;
-        }
-    }
-
-    AbstractDecoder* dec = nullptr;
-    if(config.dec_type = DECODER_TYPE_FFMPEG){
-        dec = new FFNvDecoder();
-    }else if(config.dec_type = DECODER_TYPE_GB28181){
-        dec = new FFGB28181Decoder();
-    }
-    
-    if (dec == nullptr){
-        LOG_ERROR("没有指定解码器类型");
-        return false;
-    }
-
-    bool bRet = dec->isSurport(config.cfg);
-    delete dec;
-    dec = nullptr;
-
-    return bRet;
-}
-
-bool FFNvDecoderManager::isRunning(const string name){
-    if (name.empty())
-    {
-        LOG_ERROR("name 为空!");
-        return false;
-    }
-
-    std::lock_guard<std::mutex> l(m_mutex);
-
-    auto dec = decoderMap.find(name);
-    if (dec != decoderMap.end())
-    {
-        return dec->second->isRunning();
-    }
-    
-    LOG_ERROR("没有找到name为{}的解码器",name);
-    return false;
-}
-
-bool FFNvDecoderManager::isFinished(const string name){
-    if (name.empty())
-    {
-        LOG_ERROR("name 为空!");
-        return false;
-    }
-
-    std::lock_guard<std::mutex> l(m_mutex);
-
-    auto dec = decoderMap.find(name);
-    if (dec != decoderMap.end())
-    {
-        return dec->second->isFinished();
-    }
-    
-    LOG_ERROR("没有找到name为{}的解码器",name);
-    return false;
-}
-
-bool FFNvDecoderManager::isPausing(const string name){
-    if (name.empty())
-    {
-        LOG_ERROR("name 为空!");
-        return false;
-    }
-
-    std::lock_guard<std::mutex> l(m_mutex);
-
-    auto dec = decoderMap.find(name);
-    if (dec != decoderMap.end())
-    {
-        return dec->second->isPausing();
-    }
-    
-    LOG_ERROR("没有找到name为{}的解码器",name);
-    return false;
-}
-
-bool FFNvDecoderManager::setDecKeyframe(const string name, bool bKeyframe)
-{
-    if (name.empty())
-    {
-        LOG_ERROR("name 为空!");
-        return false;
-    }
-
-    std::lock_guard<std::mutex> l(m_mutex);
-
-    auto dec = decoderMap.find(name);
-    if (dec != decoderMap.end())
-    {
-        dec->second->setDecKeyframe(bKeyframe);
-        return true;
-    }
-    
-    LOG_ERROR("没有找到name为{}的解码器",name);
-    return false;
-}
-
-bool FFNvDecoderManager::getResolution(const string name, int &width, int &height)
-{
-    if (name.empty())
-    {
-        LOG_ERROR("name 为空!");
-        return false;
-    }
-
-    std::lock_guard<std::mutex> l(m_mutex);
-
-    auto dec = decoderMap.find(name);
-    if (dec != decoderMap.end())
-    {
-        dec->second->getResolution(width, height);
-        return true;
-    }
-    
-    LOG_ERROR("没有找到name为{}的解码器",name);
-    return false;
-}
-
-vector<string> FFNvDecoderManager::getAllDecodeName(){
-    
-    closeAllFinishedDecoder();
-
-    std::lock_guard<std::mutex> l(m_mutex);
-
-    vector<string> decode_names;
-    for(auto it = decoderMap.begin(); it != decoderMap.end(); ++it){
-        decode_names.push_back(it->first);
-    }
-    return decode_names;
-}
-
-int FFNvDecoderManager::getCachedQueueLength(const string name){
-    if (name.empty()){
-        LOG_ERROR("name 为空!");
-        return -1;
-    }
-
-    std::lock_guard<std::mutex> l(m_mutex);
-
-    auto dec = decoderMap.find(name);
-    if (dec != decoderMap.end()){
-        return dec->second->getCachedQueueLength();
-    }
-    
-    LOG_ERROR("没有找到name为{}的解码器",name);
-    return -1;
-}
-
-FFImgInfo* FFNvDecoderManager::snapshot(const string& uri){
-    if (uri.empty()){
-        return nullptr;
-    }
- 
-    AVFormatContext* ifmt_ctx = nullptr;
-	AVCodecContext* codec_ctx = nullptr;
-	AVCodec* codec = nullptr;
-	AVPacket* pkt = nullptr;
-	AVFrame *frame = nullptr;
-	AVFrame *pFrameRGB = nullptr;	
-	int video_index = -1;
-	AVStream* st = nullptr;
-	SwsContext *img_convert_ctx = nullptr;
-	uint8_t *buffer = nullptr;
-    int numBytes = 0;
-	int index = 0;
-
-	FFImgInfo* imgInfo = nullptr;
- 
- 	//av_register_all();
-	avformat_network_init();
-
-	// 参数设置
-	AVDictionary *options = nullptr;
-	av_dict_set( &options, "bufsize", "655360", 0 );
-	av_dict_set( &options, "rtsp_transport", "tcp", 0 );
-	av_dict_set( &options, "stimeout", "30000000", 0 ); // 单位为 百万分之一秒
-	
-	///打开输入的流
-	int ret = avformat_open_input(&ifmt_ctx, uri.c_str(), nullptr, &options);
-	if (ret != 0){
-		printf("Couldn't open input stream.\n");
-		goto end_flag ;
-	}
- 
-	//查找流信息
-	if (avformat_find_stream_info(ifmt_ctx, nullptr) < 0){
-		printf("Couldn't find stream information.\n");
-		goto end_flag ;
-	}
- 
-	//找到视频流索引
-    video_index = av_find_best_stream(ifmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0);
- 
-    st = ifmt_ctx->streams[video_index];
-    
-    //找到解码器
-    codec = avcodec_find_decoder(st->codecpar->codec_id);
-    if (!codec){
-        fprintf(stderr, "Codec not found\n");
-        goto end_flag ;
-    }
- 
-    //申请AVCodecContext
-    codec_ctx = avcodec_alloc_context3(codec);
-    if (!codec_ctx){
-        goto end_flag ;
-    }
- 
-	avcodec_parameters_to_context(codec_ctx, ifmt_ctx->streams[video_index]->codecpar);
- 
-    //打开解码器
-    if ((ret = avcodec_open2(codec_ctx, codec, nullptr) < 0)){
-        goto end_flag ;
-    }
-	
-    // 计算解码后原始数据所需缓冲区大小，并分配内存空间 Determine required buffer size and allocate buffer
-    numBytes = av_image_get_buffer_size(AV_PIX_FMT_BGR24, codec_ctx->width, codec_ctx->height, 1);
-    buffer = (uint8_t *)av_malloc(numBytes * sizeof(uint8_t));
-    
-	pFrameRGB = av_frame_alloc();
-    av_image_fill_arrays(pFrameRGB->data, pFrameRGB->linesize, buffer, AV_PIX_FMT_BGR24, codec_ctx->width, codec_ctx->height, 1);
-
-	img_convert_ctx = sws_getContext(codec_ctx->width, codec_ctx->height,codec_ctx->pix_fmt, codec_ctx->width, codec_ctx->height, AV_PIX_FMT_BGR24, 
-		   SWS_BICUBIC, nullptr, nullptr, nullptr);
- 
-	pkt = av_packet_alloc();
-	frame = av_frame_alloc();
-	while (av_read_frame(ifmt_ctx, pkt) >= 0){
-		if (pkt->stream_index == video_index){
-			int ret = avcodec_send_packet(codec_ctx, pkt);
-			if (ret >= 0){
-				ret = avcodec_receive_frame(codec_ctx, frame);
-				if ((ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) || ret < 0){
-					LOG_ERROR("Failed to receive frame: {}",ret);
-					continue;
-				}
-
-				index ++ ;
-
-				if (index >= 5){
-					// 取解码出来的第三帧，应该可以一定程度优化花屏问题
-					sws_scale(img_convert_ctx, (const unsigned char* const*)frame->data, frame->linesize, 0, codec_ctx->height, pFrameRGB->data, pFrameRGB->linesize);
-
-					imgInfo = new FFImgInfo();
-					imgInfo->pData = buffer;
-					imgInfo->height = codec_ctx->height;
-					imgInfo->width = codec_ctx->width;
-
-					break;
-				}
-			}
-		}
-		av_packet_unref(pkt);
-	}
-
-end_flag:
-	if (codec_ctx != nullptr){
-		avcodec_close(codec_ctx);
-		avcodec_free_context(&codec_ctx);
-	}
-	
-	if (ifmt_ctx != nullptr){
-		avformat_close_input(&ifmt_ctx);
-	}
-	
-	if (frame != nullptr){
-		av_frame_free(&frame);
-	}
-
-	if (pFrameRGB != nullptr){
-		av_frame_free(&pFrameRGB);
-	}
-
-	if (pkt != nullptr){
-		av_packet_free(&pkt);
-	}
-
-	return imgInfo;
-}
-
-void FFNvDecoderManager::releaseFFImgInfo(FFImgInfo* info){
-	if(nullptr != info){
-		if(info->pData != nullptr){
-			av_free(info->pData);
-			info->pData = nullptr;
-		}
-		delete info;
-		info = nullptr;
-	}
-}
-
-FFImgInfo* FFNvDecoderManager::snapshot_in_task(const string name){
-    if (name.empty()){
-        LOG_ERROR("name 为空!");
-        return nullptr;
-    }
-
-    std::lock_guard<std::mutex> l(m_mutex);
-
-    auto dec = decoderMap.find(name);
-    if (dec != decoderMap.end()){
-        return dec->second->snapshot();
-    }
-    
-    LOG_ERROR("没有找到name为{}的解码器",name);
-    return nullptr;
-}
-
-vector<FFImgInfo*> FFNvDecoderManager::timing_snapshot_all(){
-
-    closeAllFinishedDecoder();
-
-    std::lock_guard<std::mutex> l(m_mutex);
-
-    vector<FFImgInfo*> vec;
-    for(auto it = decoderMap.begin(); it != decoderMap.end(); ++it){
-        if(it->second->isSnapTime()){
-            FFImgInfo* imginfo = it->second->snapshot();
-            if(imginfo != nullptr){
-                vec.push_back(imginfo);
-            }
-            it->second->updateLastSnapTime();
-        }
-    }
-
-    return vec;
-}
\ No newline at end of file
diff --git a/src/FFNvDecoderManager.h b/src/FFNvDecoderManager.h
deleted file mode 100644
index 685b1f9..0000000
--- a/src/FFNvDecoderManager.h
+++ /dev/null
@@ -1,268 +0,0 @@
-#include "AbstractDecoder.h"
-#include<iostream>
-#include<vector>
-#include<map>
-
-#include <mutex>
-
-using namespace std;
-
-struct MgrDecConfig
-{
-	DECODER_TYPE dec_type;	// 解码器类型
-    FFDecConfig cfg;    // 解码器配置
-    string name{""};    // 解码器名称
-};
-
-/**
- * 解码器管理类，单例类
- * 谨防死锁
- **/
-class FFNvDecoderManager {
-public:
-    /**************************************************
-	* 接口：getInstance
-	* 功能：获取解码器管理者实例
-	* 参数：无
-	* 返回：成功返回 解码器管理者实例， 失败返回 nullptr
-	* 备注：调用其他接口前，需要先调用该接口获取管理者实例
-	**************************************************/
-    static FFNvDecoderManager* getInstance(){
-		static FFNvDecoderManager* singleton = nullptr;
-		if (singleton == nullptr){
-			singleton = new FFNvDecoderManager();
-		}
-		return singleton;
-	}
-
-    ~FFNvDecoderManager()
-    {
-        closeAllDecoder();
-    }
-
-    /**************************************************
-	* 接口：createDecoder
-	* 功能：根据配置信息创建解码器
-	* 参数：MgrDecConfig& config 解码器配置信息
-	* 返回：成功返回解码器， 失败返回 nullptr
-	* 备注：
-	**************************************************/
-    AbstractDecoder* createDecoder(MgrDecConfig config);
-
-    /**************************************************
-	* 接口：setPostDecArg
-	* 功能：设置解码数据回调接口的用户自定义参数
-	* 参数：string name 解码器名称
-    *       const void * userPtr  用户自定义的要传到解码数据回调接口的数据
-	* 返回：设置成功返回true，失败返回false
-	* 备注：
-	**************************************************/
-    bool setPostDecArg(const string name, const void * userPtr);
-
-	/**************************************************
-	* 接口：setFinishedDecArg
-	* 功能：设置解码结束回调接口的用户自定义参数
-	* 参数：string name 解码器名称
-    *       const void * userPtr  用户自定义的要传到解码数据回调接口的数据
-	* 返回：设置成功返回true，失败返回false
-	* 备注：
-	**************************************************/
-    bool setFinishedDecArg(const string name, const void * userPtr);
-
-    /**************************************************
-	* 接口：getDecoderByName
-	* 功能：根据解码器名称返回解码器对象指针
-	* 参数：const string name 解码器名称
-	* 返回：成功返回对应的解码器对象的指针，失败返回nullptr
-	* 备注：
-	**************************************************/
-    AbstractDecoder* getDecoderByName(const string name);
-
-    /**************************************************
-	* 接口：startDecode
-	* 功能：启动解码
-	* 参数：FFNvDecoder* 解码器指针
-	* 返回：void
-	* 备注：
-	**************************************************/
-    bool startDecode(AbstractDecoder*);
-
-    /**************************************************
-	* 接口：startAllDecode
-	* 功能：启动全部解码
-	* 参数：void
-	* 返回：void
-	* 备注：
-	**************************************************/
-    void startAllDecode();
-
-    /**************************************************
-	* 接口：startDecodeByName
-	* 功能：启动名称对应的解码器
-	* 参数：string name 解码器名称
-	* 返回：成功返回true，失败返回false
-	* 备注：
-	**************************************************/
-    bool startDecodeByName(const string name);
-
-    /**************************************************
-	* 接口：closeDecoderByName
-	* 功能：关闭解码器名称对应的解码
-	* 参数：const string name 解码器名称
-	* 返回：成功返回true，失败返回false
-	* 备注：
-	**************************************************/
-    bool closeDecoderByName(const string name);
-
-    /**************************************************
-	* 接口：closeAllDecoder
-	* 功能：关闭全部解码器
-	* 参数：void
-	* 返回：void
-	* 备注：
-	**************************************************/
-    void closeAllDecoder();
-
-	/**************************************************
-	* 接口：closeAllDecoderByGpuid
-	* 功能：关闭某张显卡撒花姑娘的全部解码器
-	* 参数：const string gpuid gpu的id
-	* 返回：void
-	* 备注：
-	**************************************************/
-    void closeAllDecoderByGpuid(const string gpuid);
-
-    /**************************************************
-	* 接口：pauseDecoder
-	* 功能：暂停指定名称的解码器
-	* 参数：const string name 解码器名称
-	* 返回：成功返回true，失败返回false
-	* 备注：
-	**************************************************/
-    bool pauseDecoder(const string name);
-
-    /**************************************************
-	* 接口：pauseDecoder
-	* 功能：恢复指定名称的解码器
-	* 参数：const string name 解码器名称
-	* 返回：成功返回true，失败返回false
-	* 备注：
-	**************************************************/
-    bool resumeDecoder(const string name);
-
-    /**************************************************
-	* 接口：isSurport
-	* 功能：是否支持指定配置的解码
-	* 参数：FFDecConfig& cfg 解码器配置
-	* 返回：支持返回true，不支持返回false
-	* 备注：
-	**************************************************/
-    bool isSurport(MgrDecConfig& config);
-
-    /**************************************************
-	* 接口：isRunning
-	* 功能：根据解码器名称判断解码器是否正在运行
-	* 参数：const string name 解码器名称
-	* 返回：正在运行返回true，否则返回false
-	* 备注：
-	**************************************************/
-    bool isRunning(const string name);
-
-	/**************************************************
-	* 接口：isFinished
-	* 功能：根据解码器名称判断解码器是否已经结束
-	* 参数：const string name 解码器名称
-	* 返回：正在运行返回true，否则返回false
-	* 备注：
-	**************************************************/
-    bool isFinished(const string name);
-
-	/**************************************************
-	* 接口：isPausing
-	* 功能：根据解码器名称判断解码器是否暂停
-	* 参数：const string name 解码器名称
-	* 返回：正在运行返回true，否则返回false
-	* 备注：
-	**************************************************/
-    bool isPausing(const string name);
-
-    /**************************************************
-	* 接口：count
-	* 功能：获取正在运行的解码器数量
-	* 参数：void
-	* 返回：正在运行的解码器数量
-	* 备注：
-	**************************************************/
-    int count();
-
-	/**************************************************
-	* 接口：setDecKeyframe
-	* 功能：设置是否只解码关键帧。默认全解
-	* 参数：const string name 解码器名称
-	*		bool bKeyframe   是否只解码关键帧。true，只解码关键帧；false,普通的全解码
-	* 返回：bool 成功返回true,失败返回false
-	* 备注：
-	**************************************************/
-	bool setDecKeyframe(const string name, bool bKeyframe);
-
-	/**************************************************
-	* 接口：getResolution
-	* 功能：获取视频分辨率
-	* 参数：const string name 解码器名称
-	*		int &width   从 width 返回视频宽度
-	*		int &height	 从 height 返回视频高度
-	* 返回：bool 成功获取返回true,失败返回false
-	* 备注：
-	**************************************************/
-	bool getResolution(const string name, int &width, int &height);
-
-	/**************************************************
-	* 接口：getAllDecodeName
-	* 功能：获取全部解码器名称
-	* 参数：void
-	* 返回：vector<string> 返回全部解码器名称
-	* 备注：
-	**************************************************/
-	vector<string> getAllDecodeName();
-
-	/**************************************************
-	* 接口：getCachedQueueLength
-	* 功能：获取解码缓冲队列当前长度
-	* 参数：const string name 解码器名称
-	* 返回：int 解码缓冲队列当前长度
-	* 备注：
-	**************************************************/
-	int getCachedQueueLength(const string name);
-
-	/**************************************************
-	* 接口：snapshot
-	* 功能：获取视频快照
-	* 参数：const string& uri 视频地址
-	* 返回：FFImgInfo* 快照信息
-	* 备注：
-	**************************************************/
-	FFImgInfo* snapshot(const string& uri);
-
-	/**************************************************
-	* 接口：releaseFFImgInfo
-	* 功能：释放视频快照信息
-	* 参数：FFImgInfo* info 视频快照信息
-	* 返回：void
-	* 备注：
-	**************************************************/
-	void releaseFFImgInfo(FFImgInfo* info);
-
-	FFImgInfo* snapshot_in_task(const string name);
-
-	vector<FFImgInfo*> timing_snapshot_all();
-
-private:
-    FFNvDecoderManager(){}
-    
-    void closeAllFinishedDecoder();
-
-private:
-    map<string, AbstractDecoder*> decoderMap;
-
-    mutex m_mutex;
-};
\ No newline at end of file
diff --git a/src/GpuRgbMemory.hpp b/src/GpuRgbMemory.hpp
deleted file mode 100644
index 8e3d15b..0000000
--- a/src/GpuRgbMemory.hpp
+++ /dev/null
@@ -1,86 +0,0 @@
-#include<string>
-
-#include "cuda_kernels.h"
-#include "define.hpp"
-#include "utiltools.hpp"
-
-using namespace std;
-
-class GpuRgbMemory{
-
-public:
-     GpuRgbMemory(int _channel, int _width, int _height, string _id, string _gpuid, bool _isused){
-        channel = _channel;
-        width = _width;
-        height = _height;
-        size = channel * width * height;
-        isused = _isused;
-        id = _id;
-        gpuid = _gpuid;
-        timestamp = UtilTools::get_cur_time_ms();
-
-        cudaSetDevice(atoi(gpuid.c_str()));
-        CHECK_CUDA(cudaMalloc((void **)&pHwRgb, size * sizeof(unsigned char)));
-    }
-
-    ~GpuRgbMemory(){
-        if (pHwRgb) {
-            cudaSetDevice(atoi(gpuid.c_str()));
-            CHECK_CUDA(cudaFree(pHwRgb));
-            pHwRgb = nullptr;
-        }
-    }
-    
-    int getSize() {
-        return size;
-    }
-    
-    bool isIsused() {
-        return isused;
-    }
-
-    void setIsused(bool _isused) {
-        isused = _isused;
-        // 更新时间戳
-        timestamp = UtilTools::get_cur_time_ms();
-    }
-
-    string getId() {
-        return id;
-    }
-
-    string getGpuId() {
-        return gpuid;
-    }
-
-    unsigned char* getMem(){
-        return pHwRgb;
-    }
-
-    long long getTimesstamp(){
-        return timestamp;
-    }
-
-    int getWidth(){
-        return width;
-    }
-
-    int getHeight(){
-        return height;
-    }
-
-    int getChannel(){
-        return channel;
-    }
-
-private:
-    int size;
-    bool isused;
-    string id;
-    string gpuid;
-    unsigned char * pHwRgb{nullptr};
-    long long timestamp;
-    int width{0};
-    int height{0};
-    int channel{3};
-};
\ No newline at end of file
diff --git a/src/ImageSaveGPU.cpp b/src/ImageSaveGPU.cpp
deleted file mode 100644
index 9382a27..0000000
--- a/src/ImageSaveGPU.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-#include "cuda_kernels.h"
-
-#include "logger.hpp"
-
-
-//int saveJPEG(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height)
-//{
-//	return jpegNPP(szOutputFile, d_srcRGB, img_width, img_height);
-//	//return 0;
-//}
-//
-//int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height)
-//{
-//	return jpegNPP(szOutputFile, d_srcRGB, img_width, img_height);
-//	//return 0;
-//}
-//
-//int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB)
-//{
-//	return jpegNPP(szOutputFile, d_srcRGB);
-//}
-//
-//int saveJPEG(const char *szOutputFile, float* d_srcRGB)
-//{
-//	return jpegNPP(szOutputFile, d_srcRGB);
-//}
-
-int resizeFrame(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height)
-{
-	cudaError_t cudaStatus = cuda_common::ResizeImage(d_srcRGB, src_width, src_height, d_dstRGB, dst_width, dst_height);
-	if (cudaStatus != cudaSuccess) {
-		LOG_ERROR("cuda_common::ResizeImage failed: {}",cudaGetErrorString(cudaStatus));
-		return -1;
-	}
-
-	return 0;
-}
-
-//int initTables()
-//{
-//	initTable();
-//	return 0;
-//}
-//
-//int initTables(int flag, int width, int height)
-//{
-//	initTable(0, width, height);
-//	return 0;
-//}
-
-int drawImageOnGPU(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom)
-{
-	cuda_common::DrawImage(d_srcRGB, src_width, src_height, left, top, right, bottom);
-	return 0;
-}
-
-int drawImageOnGPU(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom)
-{
-	cuda_common::DrawImage(d_srcRGB, src_width, src_height, left, top, right, bottom);
-	return 0;
-}
-
-int drawLineOnGPU(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y)
-{
-	cuda_common::DrawLine(d_srcRGB, src_width, src_height, begin_x, begin_y, end_x, end_y);
-	return 0;
-}
-
-//int releaseJpegSaver()
-//{
-//	releaseJpegNPP();
-//	return 0;
-//}
-
-int partMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom)
-{
-	cudaError_t cudaStatus = cuda_common::PartMemCopy(d_srcRGB, src_width, src_height, d_dstRGB, left, top, right, bottom);
-	if (cudaStatus != cudaSuccess) {
-		LOG_ERROR("cuda_common::77 PartMemCopy failed: {} {} {} {} {} {} {}",cudaGetErrorString(cudaStatus), left, top, right, bottom, src_height, d_dstRGB);
-		return -1;
-	}
-
-	return 0;
-}
-//#include <fstream>
-//extern std::ofstream g_os;
-int PartMemResizeBatch(unsigned char * d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB,
-	int count, int* vleft, int * vtop, int* vright, int* vbottom, int *dst_w, int *dst_h,
-	float submeanb, float submeang, float submeanr,
-	float varianceb, float varianceg, float variancer)
-{
-	//g_os << "cudaMemcpyHostToDevice begin 9" << std::endl;
-	cudaError_t cudaStatus = cuda_common::PartMemResizeBatch(
-		d_srcRGB, src_width, src_height, d_dstRGB, count, vleft, vtop, vright, vbottom, dst_w, dst_h,
-		submeanb, submeang, submeanr,
-		varianceb, varianceg, variancer);
-	//g_os << "cudaMemcpyHostToDevice end 9" << std::endl;
-	if (cudaStatus != cudaSuccess) {
-		LOG_ERROR("cuda_common::PartMemResizeBatch failed: {}",cudaGetErrorString(cudaStatus));
-		return -1;
-	}
-
-	return 0;
-}
-
-
-//int PartMemResizeBatch(float * d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, 
-//	int count, int* vleft, int * vtop, int* vright, int* vbottom, int dst_w, int dst_h,
-//	float submeanb, float submeang, float submeanr,
-//	float varianceb, float varianceg, float variancer)
-//
-//{
-//	cudaError_t cudaStatus = cuda_common::PartMemResizeBatch(
-//		d_srcRGB, src_width, src_height, d_dstRGB, count, vleft, vtop, vright, vbottom, dst_w, dst_h,
-//		submeanb, submeang, submeanr,
-//		varianceb, varianceg, variancer);
-//	if (cudaStatus != cudaSuccess) {
-//		fprintf(stderr, "cuda_common::PartMemCopy failed: %s\n", cudaGetErrorString(cudaStatus));
-//		return -1;
-//	}
-//
-//	return 0;
-//}
\ No newline at end of file
diff --git a/src/ImageSaveGPU.h b/src/ImageSaveGPU.h
deleted file mode 100644
index 272a6d2..0000000
--- a/src/ImageSaveGPU.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*******************************************************************************************
-* Version: VPT_x64_V2.0.0_20170904
-* CopyRight: 中科院自动化研究所模式识别实验室图像视频组
-* UpdateDate: 20170904
-* Content: 人车物监测跟踪
-********************************************************************************************/
-
-#ifndef IMAGESAVEGPU_H_
-#define IMAGESAVEGPU_H_
-
-#ifdef _MSC_VER
-	#ifdef IMAGESAVEGPU_EXPORTS
-		#define IMAGESAVEGPU_API __declspec(dllexport)
-	#else
-		#define IMAGESAVEGPU_API __declspec(dllimport)
-	#endif
-#else
-#define IMAGESAVEGPU_API __attribute__((visibility ("default")))
-#endif
-// 功能：保存成jpeg文件
-// szOutputFile		输出图片路径，如D:\\out.jpg
-// d_srcRGB			输入RGB数据，由cudaMalloc分配的显存空间，数据排列形式为：BBBBBB......GGGGGG......RRRRRRRR......
-// img_width		RGB数据图片的宽度
-// img_height		RGB数据图片的高度
-//
-//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height);
-//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, float* d_srcRGB);
-//
-//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height);
-//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB);
-
-// 功能：防缩图像
-IMAGESAVEGPU_API int resizeFrame(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height);
-
-// 功能：部分拷贝数据
-IMAGESAVEGPU_API int partMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom);
-
-//IMAGESAVEGPU_API int partMemResizeImage(float * d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB,
-//	int* vleft, int * vtop, int* vright, int* vbottom, int *dst_w, int *dst_h,
-//	float submeanb, float submeang, float submeanr,
-//	float varianceb, float varianceg, float variancer);
-
-
-IMAGESAVEGPU_API int PartMemResizeBatch(unsigned char * d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB,
-	int count, int* vleft, int * vtop, int* vright, int* vbottom, int *dst_w, int *dst_h,
-	float submeanb, float submeang, float submeanr,
-	float varianceb, float varianceg, float variancer);
-
-
-//// 功能：初始化GPU保存图像的各种量化表
-//IMAGESAVEGPU_API int initTables();
-//IMAGESAVEGPU_API int initTables(int falg, int width, int height);
-//
-//// 功能：释放资源
-//IMAGESAVEGPU_API int releaseJpegSaver();
-
-// 功能：在GPU中绘制快照包围框
-IMAGESAVEGPU_API int drawImageOnGPU(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom);
-
-IMAGESAVEGPU_API int drawImageOnGPU(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom);
-
-// 功能：在GPU中绘制直线
-IMAGESAVEGPU_API int drawLineOnGPU(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y);
-
-#endif
diff --git a/src/Makefile b/src/Makefile
index 3da0ec5..2daafeb 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,67 +1,62 @@
-XX = g++
+# 项目根目录
+TOP_DIR:=$(patsubst %/, %, $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+export TOP_DIR
 
-CUDA_ROOT = /usr/local/cuda-11.1
-NVCC = $(CUDA_ROOT)/bin/nvcc
+# 各项目录
+BUILD_DIR:=$(TOP_DIR)/build
+BIN_DIR:=$(BUILD_DIR)/bin
+export BUILD_DIR
 
-
-PROJECT_ROOT= /mnt/data/cmhu/FFNvDecoder
+PROJECT_ROOT= /home/huchunming/FFNvDecoder
 
 DEPEND_DIR = $(PROJECT_ROOT)/bin
 SRC_ROOT = $(PROJECT_ROOT)/src
 THIRDPARTY_ROOT = $(PROJECT_ROOT)/3rdparty
-
-
-TARGET= $(DEPEND_DIR)/lib/test
-
-
 SPDLOG_ROOT = $(THIRDPARTY_ROOT)/spdlog-1.9.2/release
-JRTP_ROOT = $(THIRDPARTY_ROOT)/jrtp_export
-
-
-INCLUDE= -I $(DEPEND_DIR)/include \
-  -I $(CUDA_ROOT)/include \
-  -I $(SRC_ROOT)/common/inc \
-  -I $(SRC_ROOT)/common/UtilNPP \
-  -I $(SRC_ROOT)\
-  -I $(SPDLOG_ROOT)/include \
-  -I $(SRC_ROOT)/gb28181 \
-  -I $(JRTP_ROOT)/jrtplib/include/jrtplib3 \
-  -I $(JRTP_ROOT)/jthread/include/jthread
 
-LIBSPATH= -L $(DEPEND_DIR)/lib -lavformat -lavcodec -lswscale -lavutil -lavfilter -lswresample -lavdevice \
-   -L $(CUDA_ROOT)/lib64 -lcuda -lcudart -lnvcuvid -lcurand -lcublas -lnvjpeg \
-   -L $(SPDLOG_ROOT) -l:libspdlog.a \
-   -L $(JRTP_ROOT)/jthread/lib -l:libjthread.a \
-   -L $(JRTP_ROOT)/jrtplib/lib -l:libjrtp.a
+#编译器
+CXX:=g++
+CXXFLAGS:=-std=c++11 -Wall -Wextra -Og -g 
+INCS:=-I $(TOP_DIR) \
+		-I $(SPDLOG_ROOT)/include \
 
-CFLAGS= -g -fPIC -O0 $(INCLUDE) -pthread -lrt -lz -std=c++11 -fvisibility=hidden -Wl,-Bsymbolic -ldl
-	# -DUNICODE -D_UNICODE
+MACROS:=
+export CXX CXXFLAGS INCS MACROS
 
-NFLAGS_LIB=-g -c -shared -Xcompiler -fPIC -Xcompiler -fvisibility=hidden
-NFLAGS = $(NFLAGS_LIB) $(INCLUDE) -std=c++11
+# 链接器
+LD:=g++
+LDFLAGS:=
+LIBS:= -L $(SPDLOG_ROOT)/lib -l:libspdlog.a \
 
-SRCS:=$(wildcard $(SRC_ROOT)/*.cpp) \
-		$(wildcard $(SRC_ROOT)/gb28181/*.cpp)
-OBJS = $(patsubst %.cpp, %.o, $(notdir $(SRCS)))
+# 各个模块
+MODULES:= dvpp interface demo
 
-CU_SOURCES = $(wildcard ${SRC_ROOT}/*.cu)
-CU_OBJS = $(patsubst %.cu, %.o, $(notdir $(CU_SOURCES)))
+# 各个模块对应的库
+MODULE_LIBS:=$(BUILD_DIR)/dvpp/lib/libdvpp.a\
+			$(BUILD_DIR)/interface/lib/interface.a\
 
+# 最终目标文件
+TARGET:=$(BIN_DIR)/test
 
-$(TARGET):$(OBJS) $(CU_OBJS)
-	rm -f $(TARGET)
-	$(XX) -o $@ $^ $(CFLAGS)  $(LIBSPATH) $(LIBS) -Wwrite-strings
-	rm -f *.o
+# 默认最终目标
+.PHONY:all
+all:$(TARGET)
 
-%.o:$(SRC_ROOT)/%.cpp
-	$(XX) $(CFLAGS) -c $<
+# 最终目标依赖关系
+$(TARGET):FORCE | $(BIN_DIR)
+	@for n in $(MODULES); do make -s -f $(TOP_DIR)/$$n/Makefile MODULE=$$n || exit "$$?"; done
+	@echo -e "\e[32m""Linking executable $(TARGET)""\e[0m"
+#@$(LD) $(LDFLAGS) -o $@ $(MODULE_LIBS) $(LIBS)
 
-%.o:$(SRC_ROOT)/gb28181/%.cpp
-	$(XX) $(CFLAGS) -c $<
+# 若没有bin目录则自动生成
+$(BIN_DIR):
+	@mkdir -p $@
 
-%.o:$(SRC_ROOT)/%.cu
-	@echo "#######################CU_OBJS:$@###############"
-	$(NVCC) $(NFLAGS) -o $@ $< 
+# 强制执行命令
+.PHONY:FORCE
+FORCE:
 
+# make clean直接删除整个build目录
+.PHONY:clean
 clean:
-	rm -f *.o $(TARGET)
\ No newline at end of file
+	@rm -rf $(BUILD_DIR)
diff --git a/src/Makefile.bak b/src/Makefile.bak
new file mode 100644
index 0000000..2e225eb
--- /dev/null
+++ b/src/Makefile.bak
@@ -0,0 +1,71 @@
+XX = g++
+
+CUDA_ROOT = /usr/local/cuda-11.1
+NVCC = $(CUDA_ROOT)/bin/nvcc
+
+
+PROJECT_ROOT= /home/huchunming/FFNvDecoder
+
+DEPEND_DIR = $(PROJECT_ROOT)/bin
+SRC_ROOT = $(PROJECT_ROOT)/src
+THIRDPARTY_ROOT = $(PROJECT_ROOT)/3rdparty
+
+
+TARGET= $(DEPEND_DIR)/lib/test
+
+
+SPDLOG_ROOT = $(THIRDPARTY_ROOT)/spdlog-1.9.2/release
+JRTP_ROOT = $(THIRDPARTY_ROOT)/jrtp_export
+
+
+INCLUDE= -I $(DEPEND_DIR)/include \
+  -I $(CUDA_ROOT)/include \
+  -I $(SRC_ROOT)/common/inc \
+  -I $(SRC_ROOT)/common/UtilNPP \
+  -I $(SRC_ROOT)\
+  -I $(SPDLOG_ROOT)/include \
+  -I $(SRC_ROOT)/gb28181 \
+  -I $(JRTP_ROOT)/jrtplib/include/jrtplib3 \
+  -I $(JRTP_ROOT)/jthread/include/jthread
+
+LIBSPATH= -L $(DEPEND_DIR)/lib -lavformat -lavcodec -lswscale -lavutil -lavfilter -lswresample -lavdevice \
+   -L $(CUDA_ROOT)/lib64 -lcuda -lcudart -lnvcuvid -lcurand -lcublas -lnvjpeg \
+   -L $(SPDLOG_ROOT) -l:libspdlog.a \
+   -L $(JRTP_ROOT)/jthread/lib -l:libjthread.a \
+   -L $(JRTP_ROOT)/jrtplib/lib -l:libjrtp.a
+
+CFLAGS= -g -fPIC -O0 $(INCLUDE) -pthread -lrt -lz -std=c++11 -fvisibility=hidden -Wl,-Bsymbolic -ldl
+	# -DUNICODE -D_UNICODE
+
+NFLAGS_LIB=-g -c -shared -Xcompiler -fPIC -Xcompiler -fvisibility=hidden
+NFLAGS = $(NFLAGS_LIB) $(INCLUDE) -std=c++11
+
+SRCS:=$(wildcard $(SRC_ROOT)/nvdecoder/*.cpp) \
+		$(wildcard $(SRC_ROOT)/gb28181/*.cpp) \
+    $(wildcard $(SRC_ROOT)/dvpp/*.cpp) 
+OBJS = $(patsubst %.cpp, %.o, $(notdir $(SRCS)))
+
+CU_SOURCES = $(wildcard ${SRC_ROOT}/*.cu)
+CU_OBJS = $(patsubst %.cu, %.o, $(notdir $(CU_SOURCES)))
+
+
+$(TARGET):$(OBJS) $(CU_OBJS)
+	rm -f $(TARGET)
+	$(XX) -o $@ $^ $(CFLAGS)  $(LIBSPATH) $(LIBS) -Wwrite-strings
+	rm -f *.o
+
+# %.o:$(SRC_ROOT)/nvdecoder/%.cpp
+# 	$(XX) $(CFLAGS) -c $<
+
+%.o:$(SRC_ROOT)/gb28181/%.cpp
+	$(XX) $(CFLAGS) -c $<
+
+%.o:$(SRC_ROOT)/dvpp/%.cpp
+	$(XX) $(CFLAGS) -c $<
+
+%.o:$(SRC_ROOT)/%.cu
+	@echo "#######################CU_OBJS:$@###############"
+	$(NVCC) $(NFLAGS) -o $@ $< 
+
+clean:
+	rm -f *.o $(TARGET)
\ No newline at end of file
diff --git a/src/Makefile.bak0308 b/src/Makefile.bak0308
new file mode 100644
index 0000000..b8f7d89
--- /dev/null
+++ b/src/Makefile.bak0308
@@ -0,0 +1,62 @@
+# 项目根目录
+TOP_DIR:=$(patsubst %/, %, $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+export TOP_DIR
+
+# 各项目录
+BUILD_DIR:=$(TOP_DIR)/build
+BIN_DIR:=$(BUILD_DIR)/bin
+export BUILD_DIR
+
+PROJECT_ROOT= /home/huchunming/FFNvDecoder
+
+DEPEND_DIR = $(PROJECT_ROOT)/bin
+SRC_ROOT = $(PROJECT_ROOT)/src
+THIRDPARTY_ROOT = $(PROJECT_ROOT)/3rdparty
+SPDLOG_ROOT = $(THIRDPARTY_ROOT)/spdlog-1.9.2/release
+
+#编译器
+CXX:=g++
+CXXFLAGS:=-std=c++11 -Wall -Wextra -Og -g 
+INCS:=-I $(TOP_DIR) \
+		-I $(SPDLOG_ROOT)/include \
+
+MACROS:=
+export CXX CXXFLAGS INCS MACROS
+
+# 链接器
+LD:=g++
+LDFLAGS:=
+LIBS:= -L $(SPDLOG_ROOT)/lib -l:libspdlog.a \
+
+# 各个模块
+MODULES:= dvpp interface
+
+# 各个模块对应的库
+MODULE_LIBS:=$(BUILD_DIR)/dvpp/lib/libdvpp.a\
+			$(BUILD_DIR)/interface/lib/interface.a\
+
+# 最终目标文件
+TARGET:=$(BIN_DIR)/test
+
+# 默认最终目标
+.PHONY:all
+all:$(TARGET)
+
+# 最终目标依赖关系
+$(TARGET):FORCE | $(BIN_DIR)
+	@for n in $(MODULES); do make -s -f $(TOP_DIR)/$$n/Makefile MODULE=$$n || exit "$$?"; done
+	@echo -e "\e[32m""Linking executable $(TARGET)""\e[0m"
+#@$(LD) $(LDFLAGS) -o $@ $(MODULE_LIBS) $(LIBS)
+
+# 若没有bin目录则自动生成
+$(BIN_DIR):
+	@mkdir -p $@
+
+# 强制执行命令
+.PHONY:FORCE
+FORCE:
+
+# make clean直接删除整个build目录
+.PHONY:clean
+clean:
+	@rm -rf $(BUILD_DIR)
diff --git a/src/NV12ToRGB.cu b/src/NV12ToRGB.cu
deleted file mode 100644
index 58e1dff..0000000
--- a/src/NV12ToRGB.cu
+++ /dev/null
@@ -1,345 +0,0 @@
-
-#include "cuda_kernels.h"
-
-#include <builtin_types.h>
-#include "common/inc/helper_cuda_drvapi.h"
-
-typedef unsigned char   uint8;
-typedef unsigned int    uint32;
-typedef int             int32;
-
-#define COLOR_COMPONENT_MASK            0x3FF
-#define COLOR_COMPONENT_BIT_SIZE        10
-
-namespace cuda_common
-{
-
-#define MUL(x,y)    ((x)*(y))
-
-	__constant__ float  constHueColorSpaceMat2[9];  //默认分配到0卡上，未找到分配到指定卡上设置方法，当前也未用到，先注释掉
-
-	__device__ void YUV2RGB2(uint32 *yuvi, float *red, float *green, float *blue)
-	{
-		float luma, chromaCb, chromaCr;
-
-		// Prepare for hue adjustment
-		luma = (float)yuvi[0];
-		chromaCb = (float)((int32)yuvi[1] - 512.0f);
-		chromaCr = (float)((int32)yuvi[2] - 512.0f);
-
-
-		// Convert YUV To RGB with hue adjustment
-		*red = MUL(luma, constHueColorSpaceMat2[0]) +
-			MUL(chromaCb, constHueColorSpaceMat2[1]) +
-			MUL(chromaCr, constHueColorSpaceMat2[2]);
-		*green = MUL(luma, constHueColorSpaceMat2[3]) +
-			MUL(chromaCb, constHueColorSpaceMat2[4]) +
-			MUL(chromaCr, constHueColorSpaceMat2[5]);
-		*blue = MUL(luma, constHueColorSpaceMat2[6]) +
-			MUL(chromaCb, constHueColorSpaceMat2[7]) +
-			MUL(chromaCr, constHueColorSpaceMat2[8]);
-
-	}
-
-	__device__ unsigned char clip_v(int x, int min_val, int  max_val) {
-		if (x>max_val) {
-			return max_val;
-		}
-		else if (x<min_val) {
-			return min_val;
-		}
-		else {
-			return x;
-		}
-	}
-	// CUDA kernel for outputing the final RGB output from NV12;
-	extern "C"
-		__global__ void NV12ToRGB_drvapi2(uint32 *srcImage, size_t nSourcePitch, unsigned char *dstImage, int width, int height)
-	{
-
-		int32 x, y;
-		uint32 yuv101010Pel[2];
-		uint32 processingPitch = ((width)+63) & ~63;
-		uint8 *srcImageU8 = (uint8 *)srcImage;
-
-		processingPitch = nSourcePitch;
-
-		// Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
-		x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1);
-		y = blockIdx.y *  blockDim.y + threadIdx.y;
-
-		if (x >= width)
-		{
-			//printf("x >= width\n");
-			//*flag = -1;
-			return; //x = width - 1;
-		}
-			//return; //x = width - 1;
-
-		if (y >= height)
-		{
-			//printf("y >= height\n");
-			//*flag = -1;
-			return; // y = height - 1;
-		}
-
-		// Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way.
-		// if we move to texture we could read 4 luminance values
-		yuv101010Pel[0] = (srcImageU8[y * processingPitch + x]) << 2;
-		yuv101010Pel[1] = (srcImageU8[y * processingPitch + x + 1]) << 2;
-
-		uint32 chromaOffset = processingPitch * height;
-		int32 y_chroma = y >> 1;
-
-		if (y & 1)  // odd scanline ?
-		{
-			uint32 chromaCb;
-			uint32 chromaCr;
-
-			chromaCb = srcImageU8[chromaOffset + y_chroma * processingPitch + x];
-			chromaCr = srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1];
-
-			if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically
-			{
-				chromaCb = (chromaCb + srcImageU8[chromaOffset + (y_chroma + 1) * processingPitch + x] + 1) >> 1;
-				chromaCr = (chromaCr + srcImageU8[chromaOffset + (y_chroma + 1) * processingPitch + x + 1] + 1) >> 1;
-			}
-
-			yuv101010Pel[0] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2));
-			yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
-
-			yuv101010Pel[1] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2));
-			yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
-		}
-		else
-		{
-			yuv101010Pel[0] |= ((uint32)srcImageU8[chromaOffset + y_chroma * processingPitch + x] << (COLOR_COMPONENT_BIT_SIZE + 2));
-			yuv101010Pel[0] |= ((uint32)srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
-
-			yuv101010Pel[1] |= ((uint32)srcImageU8[chromaOffset + y_chroma * processingPitch + x] << (COLOR_COMPONENT_BIT_SIZE + 2));
-			yuv101010Pel[1] |= ((uint32)srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
-		}
-
-		// this steps performs the color conversion
-		uint32 yuvi[6];
-		float red[2], green[2], blue[2];
-
-		yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK);
-		yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE)       & COLOR_COMPONENT_MASK);
-		yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
-
-		yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK);
-		yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE)       & COLOR_COMPONENT_MASK);
-		yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
-
-		// YUV to RGB Transformation conversion
-		YUV2RGB2(&yuvi[0], &red[0], &green[0], &blue[0]);
-		YUV2RGB2(&yuvi[3], &red[1], &green[1], &blue[1]);
-
-
-		dstImage[y * width * 3 + x * 3] = clip_v(blue[0] * 0.25,0 ,255);
-		dstImage[y * width * 3 + x * 3 + 3] = clip_v(blue[1] * 0.25,0, 255);
-
-		dstImage[width * y * 3 + x * 3 + 1] = clip_v(green[0] * 0.25,0 ,255);
-		dstImage[width * y * 3 + x * 3 + 4] = clip_v(green[1] * 0.25,0, 255);
-
-		dstImage[width * y * 3 + x * 3 + 2] = clip_v(red[0] * 0.25, 0, 255);
-		dstImage[width * y * 3 + x * 3 + 5] = clip_v(red[1] * 0.25,0 ,255);
-
-
-		//dstImage[y * width * 3 + x * 3] = blue[0] * 0.25;
-		//dstImage[y * width * 3 + x * 3 + 3] = blue[1] * 0.25;
-
-		//dstImage[width * y * 3 + x * 3 + 1] =green[0] * 0.25;
-		//dstImage[width * y * 3 + x * 3 + 4] = green[1] * 0.25;
-
-		//dstImage[width * y * 3 + x * 3 + 2] = red[0] * 0.25;
-		//dstImage[width * y * 3 + x * 3 + 5] = red[1] * 0.25;
-
-		// Clamp the results to BBBBBB....GGGGGGG.......RRRRRRR....
-		//              dstImage[y * width + x] = blue[0] * 0.25;
-		//              dstImage[y * width + x + 1] = blue[1] * 0.25;
-
-		//              dstImage[width * height + y * width + x] = green[0] * 0.25;
-		//              dstImage[width * height + y * width + x + 1] = green[1] * 0.25;
-
-		//              dstImage[width * height * 2 + y * width + x] = red[0] * 0.25;
-		//              dstImage[width * height * 2 + y * width + x + 1] = red[1] * 0.25;
-		return;
-
-	}
-
-		// CUDA kernel for outputing the final RGB output from NV12;
-	extern "C"
-		__global__ void CUDAToBGR_drvapi(uint32 *dataY, uint32 *dataUV, size_t pitchY, size_t pitchUV, unsigned char *dstImage, int width, int height)
-	{
-
-		int32 x, y;
-
-		// Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
-		x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1);
-		y = blockIdx.y *  blockDim.y + threadIdx.y;
-
-		if (x >= width)
-		{
-			return; 
-		}
-
-		if (y >= height)
-		{
-			return; 
-		}
-
-		uint32 yuv101010Pel[2];
-		uint8 *srcImageU8_Y = (uint8 *)dataY;
-		uint8 *srcImageU8_UV = (uint8 *)dataUV;
-
-		// Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way.
-		// if we move to texture we could read 4 luminance values
-		yuv101010Pel[0] = (srcImageU8_Y[y * pitchY + x]) << 2;
-		yuv101010Pel[1] = (srcImageU8_Y[y * pitchY + x + 1]) << 2;
-
-		int32 y_chroma = y >> 1;
-
-		if (y & 1)  // odd scanline ?
-		{
-			uint32 chromaCb;
-			uint32 chromaCr;
-
-			chromaCb = srcImageU8_UV[y_chroma * pitchUV + x];
-			chromaCr = srcImageU8_UV[y_chroma * pitchUV + x + 1];
-
-			if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically
-			{
-				chromaCb = (chromaCb + srcImageU8_UV[(y_chroma + 1) * pitchUV + x] + 1) >> 1;
-				chromaCr = (chromaCr + srcImageU8_UV[(y_chroma + 1) * pitchUV + x + 1] + 1) >> 1;
-			}
-
-			yuv101010Pel[0] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2));
-			yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
-
-			yuv101010Pel[1] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2));
-			yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
-		}
-		else
-		{
-			yuv101010Pel[0] |= ((uint32)srcImageU8_UV[y_chroma * pitchUV + x] << (COLOR_COMPONENT_BIT_SIZE + 2));
-			yuv101010Pel[0] |= ((uint32)srcImageU8_UV[y_chroma * pitchUV + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
-
-			yuv101010Pel[1] |= ((uint32)srcImageU8_UV[y_chroma * pitchUV + x] << (COLOR_COMPONENT_BIT_SIZE + 2));
-			yuv101010Pel[1] |= ((uint32)srcImageU8_UV[y_chroma * pitchUV + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
-		}
-
-		// this steps performs the color conversion
-		uint32 yuvi[6];
-		float red[2], green[2], blue[2];
-
-		yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK);
-		yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE)       & COLOR_COMPONENT_MASK);
-		yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
-
-		yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK);
-		yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE)       & COLOR_COMPONENT_MASK);
-		yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
-
-		// YUV to RGB Transformation conversion
-		YUV2RGB2(&yuvi[0], &red[0], &green[0], &blue[0]);
-		YUV2RGB2(&yuvi[3], &red[1], &green[1], &blue[1]);
-
-
-		dstImage[y * width * 3 + x * 3] = clip_v(blue[0] * 0.25,0 ,255);
-		dstImage[y * width * 3 + x * 3 + 3] = clip_v(blue[1] * 0.25,0, 255);
-
-		dstImage[width * y * 3 + x * 3 + 1] = clip_v(green[0] * 0.25,0 ,255);
-		dstImage[width * y * 3 + x * 3 + 4] = clip_v(green[1] * 0.25,0, 255);
-
-		dstImage[width * y * 3 + x * 3 + 2] = clip_v(red[0] * 0.25, 0, 255);
-		dstImage[width * y * 3 + x * 3 + 5] = clip_v(red[1] * 0.25,0 ,255);
-	}
-
-	cudaError_t setColorSpace(FF_ColorSpace CSC, float hue)
-	{
-		float hueSin = sin(hue);
-		float hueCos = cos(hue);
-
-		float hueCSC[9];
-		if (CSC == ITU_601)
-		{
-			//CCIR 601
-			hueCSC[0] = 1.1644f;
-			hueCSC[1] = hueSin * 1.5960f;
-			hueCSC[2] = hueCos * 1.5960f;
-			hueCSC[3] = 1.1644f;
-			hueCSC[4] = (hueCos * -0.3918f) - (hueSin * 0.8130f);
-			hueCSC[5] = (hueSin *  0.3918f) - (hueCos * 0.8130f);
-			hueCSC[6] = 1.1644f;
-			hueCSC[7] = hueCos *  2.0172f;
-			hueCSC[8] = hueSin * -2.0172f;
-		}
-		else if (CSC == ITU_709)
-		{
-			//CCIR 709
-			hueCSC[0] = 1.0f;
-			hueCSC[1] = hueSin * 1.57480f;
-			hueCSC[2] = hueCos * 1.57480f;
-			hueCSC[3] = 1.0;
-			hueCSC[4] = (hueCos * -0.18732f) - (hueSin * 0.46812f);
-			hueCSC[5] = (hueSin *  0.18732f) - (hueCos * 0.46812f);
-			hueCSC[6] = 1.0f;
-			hueCSC[7] = hueCos *  1.85560f;
-			hueCSC[8] = hueSin * -1.85560f;
-		}
-
-		cudaError_t cudaStatus = cudaMemcpyToSymbol(constHueColorSpaceMat2, hueCSC, 9 * sizeof(float), 0, cudaMemcpyHostToDevice);
-		float tmpf[9];
-		memset(tmpf, 0, 9 * sizeof(float));
-		cudaMemcpyFromSymbol(tmpf, constHueColorSpaceMat2, 9 * sizeof(float), 0, ::cudaMemcpyDefault);
-		cudaDeviceSynchronize();
-
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "cudaMemcpyToSymbol failed: %s\n", cudaGetErrorString(cudaStatus));
-		}
-
-		return cudaStatus;
-	}
-
-	cudaError_t NV12ToRGBnot(CUdeviceptr d_srcNV12, size_t nSourcePitch, unsigned char* d_dstRGB, int width, int height)
-	{
-		dim3 block(32, 16, 1);
-		dim3 grid((width + (2 * block.x - 1)) / (2 * block.x), (height + (block.y - 1)) / block.y, 1);
-		NV12ToRGB_drvapi2 << < grid, block >> >((uint32 *)d_srcNV12, nSourcePitch, d_dstRGB, width, height);
-		cudaError_t cudaStatus = cudaGetLastError();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "NV12ToRGB_drvapi launch failed: %s\n", cudaGetErrorString(cudaStatus));
-			return cudaStatus;
-		}
-
-		cudaStatus = cudaDeviceSynchronize();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching NV12ToRGB_drvapi !\n", cudaStatus);
-			return cudaStatus;
-		}
-
-		return cudaStatus;
-	}
-
-	cudaError_t CUDAToBGR(CUdeviceptr dataY, CUdeviceptr dataUV, size_t pitchY, size_t pitchUV, unsigned char* d_dstRGB, int width, int height)
-	{
-		dim3 block(32, 16, 1);
-		dim3 grid((width + (2 * block.x - 1)) / (2 * block.x), (height + (block.y - 1)) / block.y, 1);
-		CUDAToBGR_drvapi << < grid, block >> >((uint32 *)dataY, (uint32 *)dataUV, pitchY, pitchUV, d_dstRGB, width, height);
-		cudaError_t cudaStatus = cudaGetLastError();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "NV12ToRGB_drvapi launch failed: %s\n", cudaGetErrorString(cudaStatus));
-			return cudaStatus;
-		}
-
-		cudaStatus = cudaDeviceSynchronize();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching NV12ToRGB_drvapi !\n", cudaStatus);
-			return cudaStatus;
-		}
-
-		return cudaStatus;
-	}
-}
\ No newline at end of file
diff --git a/src/NvJpegEncoder.cpp b/src/NvJpegEncoder.cpp
deleted file mode 100644
index 7ee0727..0000000
--- a/src/NvJpegEncoder.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-#include "NvJpegEncoder.h"
-
-#include <fstream>
-#include <vector>
-#include <iostream>
-
-
-#define CHECK_NVJPEG(S) do {nvjpegStatus_t  status; \
-        status = S; \
-        if (status != NVJPEG_STATUS_SUCCESS ) std::cout << __LINE__ <<" CHECK_NVJPEG - status = " << status << std::endl; \
-        } while (false)
-
-
-int saveJpeg(const char * filepath, unsigned char* d_srcBGR, int width, int height, cudaStream_t stream)
-{
-    nvjpegHandle_t nvjpeg_handle;
-    nvjpegEncoderState_t encoder_state;
-    nvjpegEncoderParams_t encoder_params;
-
-    cudaEvent_t ev_start, ev_end;
-    cudaEventCreate(&ev_start);
-    cudaEventCreate(&ev_end);
-
-    nvjpegImage_t input;
-    nvjpegInputFormat_t input_format = NVJPEG_INPUT_BGRI;
-    int image_width = width;
-    int image_height = height;
-
-    // int channel_size = image_width * image_height;
-    // for (int i = 0; i < 3; i++)
-    // {
-    //     input.pitch[i] = image_width;
-    //     (cudaMalloc((void**)&(input.channel[i]), channel_size));
-    //     (cudaMemset(input.channel[i], 50 * 40 * i, channel_size));
-    // }
-
-    input.channel[0] = d_srcBGR;
-    input.pitch[0] = image_width * 3;
-
-    nvjpegBackend_t backend = NVJPEG_BACKEND_DEFAULT;
-
-    CHECK_NVJPEG(nvjpegCreate(backend, nullptr, &nvjpeg_handle));
-    
-    CHECK_NVJPEG(nvjpegEncoderParamsCreate(nvjpeg_handle, &encoder_params, stream));
-    CHECK_NVJPEG(nvjpegEncoderStateCreate(nvjpeg_handle, &encoder_state, stream));
-
-    // set params
-    CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(encoder_params, nvjpegJpegEncoding_t::NVJPEG_ENCODING_PROGRESSIVE_DCT_HUFFMAN, stream));
-    CHECK_NVJPEG(nvjpegEncoderParamsSetOptimizedHuffman(encoder_params, 1, stream));
-    CHECK_NVJPEG(nvjpegEncoderParamsSetQuality(encoder_params, 70, stream));
-    CHECK_NVJPEG(nvjpegEncoderParamsSetSamplingFactors(encoder_params, nvjpegChromaSubsampling_t::NVJPEG_CSS_420, stream));
-
-    cudaEventRecord(ev_start);
-    CHECK_NVJPEG(nvjpegEncodeImage(nvjpeg_handle, encoder_state, encoder_params, &input, input_format, image_width, image_height, stream));
-    cudaEventRecord(ev_end);
-
-    std::vector<unsigned char> obuffer;
-    size_t length;
-    CHECK_NVJPEG(nvjpegEncodeRetrieveBitstream(
-        nvjpeg_handle,
-        encoder_state,
-        NULL,
-        &length,
-        stream));
-
-    obuffer.resize(length);
-    CHECK_NVJPEG(nvjpegEncodeRetrieveBitstream(
-        nvjpeg_handle,
-        encoder_state,
-        obuffer.data(),
-        &length,
-        stream));
-
-    cudaEventSynchronize(ev_end);
-
-    // 用完销毁，避免显存泄露
-    nvjpegEncoderParamsDestroy(encoder_params);
-    nvjpegEncoderStateDestroy(encoder_state);
-    nvjpegDestroy(nvjpeg_handle);
-
-    float ms;
-    cudaEventElapsedTime(&ms, ev_start, ev_end);
-    // std::cout << "time spend " << ms << " ms" << std::endl;
-
-    std::ofstream outputFile(filepath, std::ios::out | std::ios::binary);
-    outputFile.write(reinterpret_cast<const char *>(obuffer.data()), static_cast<int>(length));
-    outputFile.close();
-    
-    return 0;
-}
\ No newline at end of file
diff --git a/src/NvJpegEncoder.h b/src/NvJpegEncoder.h
deleted file mode 100644
index 3c27ba8..0000000
--- a/src/NvJpegEncoder.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#include <nvjpeg.h>
-
-int saveJpeg(const char * filepath, unsigned char* d_srcBGR, int width, int height, cudaStream_t stream);
\ No newline at end of file
diff --git a/src/PartMemCopy.cu b/src/PartMemCopy.cu
deleted file mode 100644
index 396765b..0000000
--- a/src/PartMemCopy.cu
+++ /dev/null
@@ -1,289 +0,0 @@
-#include "cuda_kernels.h"
-#include <algorithm>
-typedef unsigned char   uchar;
-typedef unsigned int    uint32;
-typedef int             int32;
-
-#define MAX_SNAPSHOT_WIDTH 320
-#define MAX_SNAPSHOT_HEIGHT 320
-
-namespace cuda_common
-{
-	__global__ void kernel_memcopy(unsigned char* d_srcRGB, int src_width, int src_height,
-		unsigned char* d_dstRGB, int left, int top, int right, int bottom)
-	{
-		const int dst_x = blockIdx.x * blockDim.x + threadIdx.x;
-		const int dst_y = blockIdx.y * blockDim.y + threadIdx.y;
-		const int dst_width = right - left;
-		const int dst_height = bottom - top;
-		if (dst_x < dst_width && dst_y < dst_height)
-		{
-			int src_x = left + dst_x;
-			int src_y = top + dst_y;
-
-			//bgr...bgr...bgr...
-			d_dstRGB[(dst_y*dst_width + dst_x) * 3] = (unsigned char)d_srcRGB[(src_y*src_width + src_x) * 3];
-			d_dstRGB[(dst_y*dst_width + dst_x)
-				* 3 + 1] = (unsigned char)d_srcRGB[(src_y*src_width + src_x) * 3 + 1];
-			d_dstRGB[(dst_y*dst_width + dst_x) * 3 + 2] = (unsigned char)d_srcRGB[(src_y*src_width + src_x) * 3 + 2];
-
-			//bbb...ggg...rrr...
-			//d_dstRGB[(dst_y*dst_width) + dst_x] = (unsigned char)d_srcRGB[(src_y*src_width) + src_x];
-			//d_dstRGB[(dst_width*dst_height) + (dst_y*dst_width) + dst_x] = (unsigned char)d_srcRGB[(src_width*src_height) + (src_y*src_width) + src_x];
-			//d_dstRGB[(2 * dst_width*dst_height) + (dst_y*dst_width) + dst_x] = (unsigned char)d_srcRGB[(2 * src_width*src_height) + (src_y*src_width) + src_x];
-
-			/*	memcpy(d_dstRGB + (dst_y*src_width) + dst_x, d_srcRGB + (src_y*src_width) + src_x, sizeof(float));
-			memcpy(d_dstRGB + (src_width*src_height) + (dst_y*src_width) + dst_x, d_srcRGB + (src_width*src_height) + (src_y*src_width) + src_x, sizeof(float));
-			memcpy(d_dstRGB + (2 * src_width*src_height) + (dst_y*src_width) + dst_x, d_srcRGB + (2 * src_width*src_height) + (src_y*src_width) + src_x, sizeof(float));*/
-		}
-	}
-
-	cudaError_t PartMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom)
-	{
-		dim3 block(32, 16, 1);
-		dim3 grid(((right - left) + (block.x - 1)) / block.x, ((bottom - top) + (block.y - 1)) / block.y, 1);
-
-		kernel_memcopy << < grid, block >> > (d_srcRGB, src_width, src_height, d_dstRGB, left, top, right, bottom);
-
-		cudaError_t cudaStatus = cudaGetLastError();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "Part 50 kernel_memcopy launch failed: %s\n", cudaGetErrorString(cudaStatus));
-			return cudaStatus;
-		}
-		cudaStatus = cudaDeviceSynchronize();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_bilinear!\n", cudaStatus);
-			return cudaStatus;
-		}
-		return cudaStatus;
-	}
-
-
-	//    __global__ void kernel_memcopy_mean_variance(float* d_srcRGB, int src_width, int src_height, 
-	//            unsigned char* vd_dstRGB, int count, int * vleft, int* vtop, int* vright, int * vbottom, float submeanb,float submeang, float submeanr, float varianceb,float varianceg, float variancer)
-	//    {
-	//        const int dst_x = blockIdx.x * blockDim.x + threadIdx.x;
-	//        const int dst_y = blockIdx.y * blockDim.y + threadIdx.y;
-	//        for (int i=0;i<count;i++)
-	//        {
-	//                const int left = vleft[i];
-	//                const int right = vright[i];
-	//                const int top = vtop[i];
-	//                const int bottom = vbottom[i];
-	//        
-	//                const int dst_width = right - left;
-	//                const int dst_height = bottom - top;
-	//
-	//
-	//                unsigned char * d_dstRGB = vd_dstRGB + i *   ;
-	//
-	//                if (dst_x < dst_width && dst_y < dst_height)
-	//                {
-	//                    int src_x = left + dst_x;
-	//                    int src_y = top + dst_y;
-	//        
-	//                    d_dstRGB[(dst_y*dst_width) + dst_x] = (d_srcRGB[(src_y*src_width) + src_x] - submeanb)*varianceb;
-	//                    d_dstRGB[(dst_width*dst_height) + (dst_y*dst_width) + dst_x] = (d_srcRGB[(src_width*src_height) + (src_y*src_width) + src_x] -submeang)*varianceg;
-	//                    d_dstRGB[(2 * dst_width*dst_height) + (dst_y*dst_width) + dst_x] = (d_srcRGB[(2 * src_width*src_height) + (src_y*src_width) + src_x] - submeanr) * variancer;
-	//        
-	//                }
-	//        }
-	//    }
-	__global__ void PartCopy_ResizeImgBilinearBGR_Mean_Variance_CUDAKernel(
-		unsigned char * d_srcRGB, int srcimg_width, int srcimg_height,
-		int* vleft, int* vtop, int* vright, int * vbottom,
-		unsigned char** vd_dstRGB, int count, int *dst_width, int *dst_height,
-		float submeanb, float submeang, float submeanr,
-		float varianceb, float varianceg, float variancer)
-	{
-		int i = blockIdx.z;
-
-		//for (int i = 0; i<count; i++)
-		{
-			const int left = vleft[i];
-			const int right = vright[i];
-			const int top = vtop[i];
-			const int bottom = vbottom[i];
-			const int cur_dst_width = dst_width[i];
-			const int cur_dst_height = dst_height[i];
-
-			unsigned char* d_dstRGB =  vd_dstRGB[i];
-
-			const int src_width = right - left;
-			const int src_height = bottom - top;
-			const int x = blockIdx.x * blockDim.x + threadIdx.x;// + left;
-			const int y = blockIdx.y * blockDim.y + threadIdx.y;//+ top;
-			const int dst_x = blockIdx.x * blockDim.x + threadIdx.x;
-			const int dst_y = blockIdx.y * blockDim.y + threadIdx.y;
-
-			/*if (dst_x == 0 && dst_y == 0)
-				printf("%d %d %d %d %d\n", i, vleft[i], vright[i], cur_dst_width, cur_dst_height);*/
-
-			unsigned char * src_img = d_srcRGB;
-			unsigned char * dst_img = d_dstRGB;
-			if (dst_x < cur_dst_width && dst_y < cur_dst_height)
-			{
-				float fx = (x + 0.5)*src_width / (float)cur_dst_width - 0.5 + left;
-				float fy = (y + 0.5)*src_height / (float)cur_dst_height - 0.5 + top;
-				int ax = floor(fx);
-				int ay = floor(fy);
-				if (ax < 0)
-				{
-					ax = 0;
-				}
-				if (ax > srcimg_width - 2)
-				{
-					ax = srcimg_width - 2;
-				}
-				if (ay < 0) {
-					ay = 0;
-				}
-				if (ay > srcimg_height - 2)
-				{
-					ay = srcimg_height - 2;
-				}
-
-				int A = ax + ay*srcimg_width;
-				int B = ax + ay*srcimg_width + 1;
-				int C = ax + ay*srcimg_width + srcimg_width;
-				int D = ax + ay*srcimg_width + srcimg_width + 1;
-
-				float w1, w2, w3, w4;
-				w1 = fx - ax;
-				w2 = 1 - w1;
-				w3 = fy - ay;
-				w4 = 1 - w3;
-				float blue = src_img[A * 3] * w2*w4 + src_img[B * 3] * w1*w4 + src_img[C * 3] * w2*w3 + src_img[D * 3] * w1*w3;
-				float green = src_img[A * 3 + 1] * w2*w4 + src_img[B * 3 + 1] * w1*w4
-					+ src_img[C * 3 + 1] * w2*w3 + src_img[D * 3 + 1] * w1*w3;
-				float red = src_img[A * 3 + 2] * w2*w4 + src_img[B * 3 + 2] * w1*w4
-					+ src_img[C * 3 + 2] * w2*w3 + src_img[D * 3 + 2] * w1*w3;
-
-				/*dst_img[(dst_y * dst_width + dst_x) * 3] = (unsigned char)(blue - submeanb)*varianceb;
-				dst_img[(dst_y * dst_width + dst_x) * 3 + 1] =(unsigned char) (green - submeang)*varianceg;
-				dst_img[(dst_y * dst_width + dst_x) * 3 + 2] = (unsigned char) (red - submeanr)*variancer;*/
-
-				if (blue < 0)
-					blue = 0;
-				else if (blue > 255)
-					blue = 255;
-
-				if (green < 0)
-					green = 0;
-				else if (green > 255)
-					green = 255;
-
-				if (red < 0)
-					red = 0;
-				else if (red > 255)
-					red = 255;
-
-				dst_img[(dst_y * cur_dst_width + dst_x) * 3] = (unsigned char)blue;
-				dst_img[(dst_y * cur_dst_width + dst_x) * 3 + 1] = (unsigned char)green;
-				dst_img[(dst_y * cur_dst_width + dst_x) * 3 + 2] = (unsigned char)red;
-
-
-				/*if (src_img[(dst_y * dst_width + dst_x) * 3] < 0)
-					src_img[(dst_y * dst_width + dst_x) * 3] = 0;
-				else if (src_img[(dst_y * dst_width + dst_x) * 3] > 255)
-					src_img[(dst_y * dst_width + dst_x) * 3] = 255;
-
-				if (src_img[(dst_y * dst_width + dst_x) * 3 + 1] < 0)
-					src_img[(dst_y * dst_width + dst_x) * 3 + 1] = 0;
-				else if (src_img[(dst_y * dst_width + dst_x) * 3 + 1] > 255)
-					src_img[(dst_y * dst_width + dst_x) * 3 + 1] = 255;
-
-				if (src_img[(dst_y * dst_width + dst_x) * 3 + 2] < 0)
-					src_img[(dst_y * dst_width + dst_x) * 3 + 2] = 0;
-				else if (src_img[(dst_y * dst_width + dst_x) * 3 + 2] > 255)
-					src_img[(dst_y * dst_width + dst_x) * 3 + 2] = 255;
-
-
-				dst_img[(dst_y * dst_width + dst_x) * 3] = (unsigned char)src_img[(dst_y * dst_width + dst_x) * 3];
-				dst_img[(dst_y * dst_width + dst_x) * 3 + 1] = (unsigned char)src_img[(dst_y * dst_width + dst_x) * 3 + 1];
-				dst_img[(dst_y * dst_width + dst_x) * 3 + 2] = (unsigned char)src_img[(dst_y * dst_width + dst_x) * 3 + 2];*/
-			}
-		}
-	}
-
-	cudaError_t PartMemResizeBatch(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB, int count, int* left, int* top, int* right, int* bottom, int *dst_w, int *dst_h, float submeanb, float submeang, float submeanr,
-		float varianceb, float varianceg, float variancer)
-	{
-	/*	cudaEvent_t start, stop;
-		float time;
-		cudaEventCreate(&start);
-		cudaEventCreate(&stop);
-		cudaEventRecord(start, 0);*/
-
-		dim3 block(32, 16, 1);
-		dim3 grid((*std::max_element(dst_w, dst_w+ count) + (block.x - 1)) / block.x, (*std::max_element(dst_h, dst_h + count) + (block.y - 1)) / block.y, count);
-
-		int * gpu_left;
-		cudaMalloc(&gpu_left, 1000 * sizeof(int));
-		cudaMemcpy(gpu_left, left, count * sizeof(int), cudaMemcpyHostToDevice);
-
-		int * gpu_right;
-		cudaMalloc(&gpu_right, 1000 * sizeof(int));
-		cudaMemcpy(gpu_right, right, count * sizeof(int), cudaMemcpyHostToDevice);
-
-		int * gpu_top;
-		cudaMalloc(&gpu_top, 1000 * sizeof(int));
-		cudaMemcpy(gpu_top, top, count * sizeof(int), cudaMemcpyHostToDevice);
-
-		int * gpu_bottom;
-		cudaMalloc(&gpu_bottom, 1000 * sizeof(int));
-		cudaMemcpy(gpu_bottom, bottom, count * sizeof(int), cudaMemcpyHostToDevice);
-
-		int * gpu_dst_w;
-		cudaMalloc(&gpu_dst_w, 1000 * sizeof(int));
-		cudaMemcpy(gpu_dst_w, dst_w, count * sizeof(int), cudaMemcpyHostToDevice);
-
-		int * gpu_dst_h;
-		cudaMalloc(&gpu_dst_h, 1000 * sizeof(int));
-		cudaMemcpy(gpu_dst_h, dst_h, count * sizeof(int), cudaMemcpyHostToDevice);
-
-		unsigned char** gpu_dst_rgb;
-		cudaMalloc(&gpu_dst_rgb, 1000 * sizeof(unsigned char*));
-		cudaMemcpy(gpu_dst_rgb, d_dstRGB, count * sizeof(unsigned char*), cudaMemcpyHostToDevice);
-
-		//cudaMemcpy(cpu_personfloat, d_srcRGB, 112*224*2*sizeof(float), cudaMemcpyDeviceToHost);
-		//            for(int i=0;i<100;i++)
-		//            {
-		//                  printf("the score is %f\t",cpu_personfloat[i]);
-		//            }
-		PartCopy_ResizeImgBilinearBGR_Mean_Variance_CUDAKernel << < grid, block >> > (
-			d_srcRGB, src_width, src_height,
-			gpu_left, gpu_top, gpu_right, gpu_bottom,
-			gpu_dst_rgb, count, gpu_dst_w, gpu_dst_h,
-			submeanb, submeang, submeanr,
-			varianceb, varianceg, variancer);
-		cudaFree(gpu_top);
-		cudaFree(gpu_bottom);
-		cudaFree(gpu_left);
-		cudaFree(gpu_right);
-		cudaFree(gpu_dst_w);
-		cudaFree(gpu_dst_h);
-		cudaFree(gpu_dst_rgb);
-	
-		cudaError_t cudaStatus = cudaGetLastError();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "Part 270 kernel_memcopy launch failed: %s\n", cudaGetErrorString(cudaStatus));
-			return cudaStatus;
-		}
-		cudaStatus = cudaDeviceSynchronize();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_bilinear!\n", cudaStatus);
-			return cudaStatus;
-		}
-
-		/*cudaEventRecord(stop, 0);
-		cudaEventSynchronize(stop);
-		cudaEventElapsedTime(&time, start, stop);
-		cudaEventDestroy(start);
-		cudaEventDestroy(stop);
-		printf("�˺�������ʱ��:%f\n", time);*/
-
-		return cudaStatus;
-	}
-
-}
\ No newline at end of file
diff --git a/src/RGB2YUV.cu b/src/RGB2YUV.cu
deleted file mode 100644
index 7202c3a..0000000
--- a/src/RGB2YUV.cu
+++ /dev/null
@@ -1,263 +0,0 @@
-
-
-#include "cuda_kernels.h"
-
-typedef unsigned char   uint8;
-typedef unsigned int    uint32;
-typedef int             int32;
-
-namespace cuda_common
-{
-	__device__ unsigned char clip_value(unsigned char x, unsigned char min_val, unsigned char  max_val){
-		if (x>max_val){
-			return max_val;
-		}
-		else if (x<min_val){
-			return min_val;
-		}
-		else{
-			return x;
-		}
-	}
-
-	__global__ void kernel_rgb2yuv(unsigned char *src_img, unsigned char* Y, unsigned char* u, unsigned char* v,
-		int src_width, int src_height, size_t yPitch)
-	{
-		const int x = blockIdx.x * blockDim.x + threadIdx.x;
-		const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-		if (x >= src_width)
-			return; //x = width - 1;
-
-		if (y >= src_height)
-			return; // y = height - 1;
-		
-		int B = src_img[y * src_width * 3 + x * 3];
-		int G = src_img[y * src_width * 3 + x * 3 + 1];
-		int R = src_img[y * src_width * 3 + x * 3 + 2];
-
-		/*int B = src_img[y * src_width + x];
-		int G = src_img[src_width * src_height + y * src_width + x];
-		int R = src_img[src_width * src_height * 2 + y * src_width + x];*/
-
-		Y[y * yPitch + x] = clip_value((unsigned char)(0.299 * R + 0.587 * G + 0.114 * B), 0, 255);
-		u[y * src_width + x] = clip_value((unsigned char)(-0.147 * R - 0.289 * G + 0.436 * B + 128), 0, 255);
-		v[y * src_width + x] = clip_value((unsigned char)(0.615 * R - 0.515 * G - 0.100 * B + 128), 0, 255);
-
-		//Y[y * yPitch + x] = clip_value((unsigned char)(0.257 * R + 0.504 * G + 0.098 * B + 16), 0, 255);
-		//u[y * src_width + x] = clip_value((unsigned char)(-0.148 * R - 0.291 * G + 0.439 * B + 128), 0, 255);
-		//v[y * src_width + x] = clip_value((unsigned char)(0.439 * R - 0.368 * G - 0.071 * B + 128), 0, 255);
-	}
-
-	__global__ void kernel_rgb2yuv(float *src_img, unsigned char* Y, unsigned char* u, unsigned char* v,
-		int src_width, int src_height, size_t yPitch)
-	{
-		const int x = blockIdx.x * blockDim.x + threadIdx.x;
-		const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-		if (x >= src_width)
-			return; //x = width - 1;
-
-		if (y >= src_height)
-			return; // y = height - 1;
-
-		float B = src_img[y * src_width + x];
-		float G = src_img[src_width * src_height + y * src_width + x];
-		float R = src_img[src_width * src_height * 2 + y * src_width + x];
-
-		Y[y * yPitch + x] = clip_value((unsigned char)(0.299 * R + 0.587 * G + 0.114 * B), 0, 255);
-		u[y * src_width + x] = clip_value((unsigned char)(-0.147 * R - 0.289 * G + 0.436 * B + 128), 0, 255);
-		v[y * src_width + x] = clip_value((unsigned char)(0.615 * R - 0.515 * G - 0.100 * B + 128), 0, 255);
-
-		//Y[y * yPitch + x] = clip_value((unsigned char)(0.257 * R + 0.504 * G + 0.098 * B + 16), 0, 255);
-		//u[y * src_width + x] = clip_value((unsigned char)(-0.148 * R - 0.291 * G + 0.439 * B + 128), 0, 255);
-		//v[y * src_width + x] = clip_value((unsigned char)(0.439 * R - 0.368 * G - 0.071 * B + 128), 0, 255);
-	}
-
-	extern "C"
-	__global__ void kernel_resize_UV(unsigned char* src_img, unsigned char *dst_img,
-		int src_width, int src_height, int dst_width, int dst_height, int nPitch)
-	{
-		const int x = blockIdx.x * blockDim.x + threadIdx.x;
-		const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-		if (x >= dst_width)
-			return; //x = width - 1;
-
-		if (y >= dst_height)
-			return; // y = height - 1;
-
-		float fx = (x + 0.5)*src_width / (float)dst_width - 0.5;
-		float fy = (y + 0.5)*src_height / (float)dst_height - 0.5;
-		int ax = floor(fx);
-		int ay = floor(fy);
-		if (ax < 0)
-		{
-			ax = 0;
-		}
-		else if (ax > src_width - 2)
-		{
-			ax = src_width - 2;
-		}
-
-		if (ay < 0){
-			ay = 0;
-		}
-		else if (ay > src_height - 2)
-		{
-			ay = src_height - 2;
-		}
-
-		int A = ax + ay*src_width;
-		int B = ax + ay*src_width + 1;
-		int C = ax + ay*src_width + src_width;
-		int D = ax + ay*src_width + src_width + 1;
-
-		float w1, w2, w3, w4;
-		w1 = fx - ax;
-		w2 = 1 - w1;
-		w3 = fy - ay;
-		w4 = 1 - w3;
-
-		unsigned char val = src_img[A] * w2*w4 + src_img[B] * w1*w4 + src_img[C] * w2*w3 + src_img[D] * w1*w3;
-
-		dst_img[y * nPitch + x] = clip_value(val,0,255);
-	}
-
-	cudaError_t RGB2YUV(float* d_srcRGB, int src_width, int src_height,
-						unsigned char* Y, size_t yPitch, int yWidth, int yHeight,
-						unsigned char* U, size_t uPitch, int uWidth, int uHeight,
-						unsigned char* V, size_t vPitch, int vWidth, int vHeight)
-	{
-		unsigned char * u ;
-		unsigned char * v ;
-
-		cudaError_t cudaStatus;
-
-		cudaStatus = cudaMalloc((void**)&u, src_width * src_height * sizeof(unsigned char));
-		cudaStatus = cudaMalloc((void**)&v, src_width * src_height * sizeof(unsigned char));
-
-		dim3 block(32, 16, 1);
-		dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1);
-		dim3 grid1((uWidth + (block.x - 1)) / block.x, (uHeight + (block.y - 1)) / block.y, 1);
-		dim3 grid2((vWidth + (block.x - 1)) / block.x, (vHeight + (block.y - 1)) / block.y, 1);
-
-		kernel_rgb2yuv << < grid, block >> >(d_srcRGB, Y, u, v, src_width, src_height, yPitch);
-
-		cudaStatus = cudaGetLastError();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "kernel_rgb2yuv launch failed: %s\n", cudaGetErrorString(cudaStatus));
-			goto Error;
-		}
-
-		cudaStatus = cudaDeviceSynchronize();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_rgb2yuv!\n", cudaStatus);
-			goto Error;
-		}
-
-		kernel_resize_UV << < grid1, block >> >(u, U, src_width, src_height, uWidth, uHeight, uPitch);
-
-		cudaStatus = cudaGetLastError();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus));
-			goto Error;
-		}
-
-		cudaStatus = cudaDeviceSynchronize();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus);
-			goto Error;
-		}
-
-		kernel_resize_UV << < grid2, block >> >(v, V, src_width, src_height, vWidth, vHeight, vPitch);
-
-		cudaStatus = cudaGetLastError();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus));
-			goto Error;
-		}
-
-		cudaStatus = cudaDeviceSynchronize();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus);
-			goto Error;
-		}
-
-Error :
-		cudaFree(u);
-		cudaFree(v);
-
-		return cudaStatus;
-	}
-
-
-
-	cudaError_t RGB2YUV(unsigned char* d_srcRGB, int src_width, int src_height,
-		unsigned char* Y, size_t yPitch, int yWidth, int yHeight,
-		unsigned char* U, size_t uPitch, int uWidth, int uHeight,
-		unsigned char* V, size_t vPitch, int vWidth, int vHeight)
-	{
-		unsigned char * u;
-		unsigned char * v;
-
-		cudaError_t cudaStatus;
-
-		cudaStatus = cudaMalloc((void**)&u, src_width * src_height * sizeof(unsigned char));
-		cudaStatus = cudaMalloc((void**)&v, src_width * src_height * sizeof(unsigned char));
-
-		dim3 block(32, 16, 1);
-		dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1);
-		dim3 grid1((uWidth + (block.x - 1)) / block.x, (uHeight + (block.y - 1)) / block.y, 1);
-		dim3 grid2((vWidth + (block.x - 1)) / block.x, (vHeight + (block.y - 1)) / block.y, 1);
-
-		kernel_rgb2yuv << < grid, block >> >(d_srcRGB, Y, u, v, src_width, src_height, yPitch);
-
-		cudaStatus = cudaGetLastError();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "kernel_rgb2yuv launch failed: %s\n", cudaGetErrorString(cudaStatus));
-			goto Error;
-		}
-
-		cudaStatus = cudaDeviceSynchronize();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_rgb2yuv!\n", cudaStatus);
-			goto Error;
-		}
-
-		kernel_resize_UV << < grid1, block >> >(u, U, src_width, src_height, uWidth, uHeight, uPitch);
-
-		cudaStatus = cudaGetLastError();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus));
-			goto Error;
-		}
-
-		cudaStatus = cudaDeviceSynchronize();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus);
-			goto Error;
-		}
-
-		kernel_resize_UV << < grid2, block >> >(v, V, src_width, src_height, vWidth, vHeight, vPitch);
-
-		cudaStatus = cudaGetLastError();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus));
-			goto Error;
-		}
-
-		cudaStatus = cudaDeviceSynchronize();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus);
-			goto Error;
-		}
-
-	Error:
-		cudaFree(u);
-		cudaFree(v);
-
-		return cudaStatus;
-	}
-}
-
diff --git a/src/ResizeImage.cu b/src/ResizeImage.cu
deleted file mode 100644
index fdc6961..0000000
--- a/src/ResizeImage.cu
+++ /dev/null
@@ -1,84 +0,0 @@
-#include "cuda_kernels.h"
-
-typedef unsigned char   uchar;
-typedef unsigned int    uint32;
-typedef int             int32;
-
-namespace cuda_common
-{
-	__global__ void kernel_bilinear(float *src_img, float *dst_img,
-		int src_width, int src_height, int dst_width, int dst_height)
-	{
-		const int x = blockIdx.x * blockDim.x + threadIdx.x;
-		const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
-		if (x < dst_width && y < dst_height)
-		{
-			float fx = (x + 0.5)*src_width / (float)dst_width - 0.5;
-			float fy = (y + 0.5)*src_height / (float)dst_height - 0.5;
-			int ax = floor(fx);
-			int ay = floor(fy);
-			if (ax < 0)
-			{
-				ax = 0;
-			}
-			else if (ax > src_width - 2)
-			{
-				ax = src_width - 2;
-			}
-
-			if (ay < 0){
-				ay = 0;
-			}
-			else if (ay > src_height - 2)
-			{
-				ay = src_height - 2;
-			}
-
-			int A = ax + ay*src_width;
-			int B = ax + ay*src_width + 1;
-			int C = ax + ay*src_width + src_width;
-			int D = ax + ay*src_width + src_width + 1;
-
-			float w1, w2, w3, w4;
-			w1 = fx - ax;
-			w2 = 1 - w1;
-			w3 = fy - ay;
-			w4 = 1 - w3;
-
-			float blue = src_img[A] * w2*w4 + src_img[B] * w1*w4 + src_img[C] * w2*w3 + src_img[D] * w1*w3;
-
-			float green = src_img[src_width * src_height + A] * w2*w4 + src_img[src_width * src_height + B] * w1*w4 
-				+ src_img[src_width * src_height + C] * w2*w3 + src_img[src_width * src_height + D] * w1*w3;
-
-			float red = src_img[src_width * src_height * 2 + A] * w2*w4 + src_img[src_width * src_height * 2 + B] * w1*w4 
-				+ src_img[src_width * src_height * 2 + C] * w2*w3 + src_img[src_width * src_height * 2 + D] * w1*w3;
-
-			dst_img[y * dst_width + x] = blue;
-			dst_img[dst_width * dst_height + y * dst_width + x] = green;
-			dst_img[dst_width * dst_height * 2 + y * dst_width + x] = red;
-		}
-	}
-
-	cudaError_t ResizeImage(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height)
-	{
-		dim3 block(32, 16, 1);
-		dim3 grid((dst_width + (block.x - 1)) / block.x, (dst_height + (block.y - 1)) / block.y, 1);
-
-		kernel_bilinear << < grid, block >> >(d_srcRGB, d_dstRGB, src_width, src_height, dst_width, dst_height);
-
-		cudaError_t cudaStatus = cudaGetLastError();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "kernel_bilinear launch failed: %s\n", cudaGetErrorString(cudaStatus));
-			return cudaStatus;
-		}
-
-		cudaStatus = cudaDeviceSynchronize();
-		if (cudaStatus != cudaSuccess) {
-			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_bilinear!\n", cudaStatus);
-			return cudaStatus;
-		}
-
-		return cudaStatus;
-	}
-}
\ No newline at end of file
diff --git a/src/cuda_kernels.h b/src/cuda_kernels.h
deleted file mode 100644
index cd1eb00..0000000
--- a/src/cuda_kernels.h
+++ /dev/null
@@ -1,63 +0,0 @@
-#pragma once
-#include "cuda_runtime.h"
-#include "device_launch_parameters.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <string.h>
-#include <math.h>
-
-#include <cuda.h>
-
-typedef enum
-{
-	ITU_601 = 1,
-	ITU_709 = 2
-} FF_ColorSpace;
-
-namespace cuda_common
-{
-	cudaError_t setColorSpace(FF_ColorSpace CSC, float hue);
-
-	cudaError_t NV12ToRGBnot(CUdeviceptr d_srcNV12, size_t nSourcePitch, unsigned char* d_dstRGB, int width, int height);
-	cudaError_t CUDAToBGR(CUdeviceptr dataY, CUdeviceptr dataUV, size_t pitchY, size_t pitchUV, unsigned char* d_dstRGB, int width, int height);
-
-	
-	cudaError_t ResizeImage(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height);
-
-	cudaError_t RGB2YUV(float* d_srcRGB, int src_width, int src_height,
-		unsigned char* Y, size_t yPitch, int yWidth, int yHeight,
-		unsigned char* U, size_t uPitch, int uWidth, int uHeight,
-		unsigned char* V, size_t vPitch, int vWidth, int vHeight);
-
-	cudaError_t RGB2YUV(unsigned char* d_srcRGB, int src_width, int src_height,
-		unsigned char* Y, size_t yPitch, int yWidth, int yHeight,
-		unsigned char* U, size_t uPitch, int uWidth, int uHeight,
-		unsigned char* V, size_t vPitch, int vWidth, int vHeight);
-
-	cudaError_t PartMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom);
-	//	cudaError_t PartMemResize(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int left, int top, int right, int bottom);
-
-	cudaError_t PartMemResizeBatch(unsigned char* d_srcRGB, int srcimg_width, int srcimg_height, unsigned char** d_dstRGB, int count,
-		int* left, int* top, int* right, int* bottom, int *dst_w, int *dst_h,
-		float submeanb, float submeang, float submeanr,
-		float varianceb, float varianceg, float variancer);
-
-	cudaError_t DrawImage(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom);
-	cudaError_t DrawImage(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom);
-
-	cudaError_t DrawLine(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y);
-}
-
-
-int jpegNPP(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height);
-int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height);
-
-int jpegNPP(const char *szOutputFile, float* d_srcRGB);
-int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB);
-
-int initTable();
-int initTable(int flag, int width, int height);
-int releaseJpegNPP();
-
diff --git a/src/define.hpp b/src/define.hpp
deleted file mode 100644
index 26fcc61..0000000
--- a/src/define.hpp
+++ /dev/null
@@ -1,13 +0,0 @@
-#pragma once
-
-#include <string>
-
-#define __FILENAME__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
-
-
-#define CHECK_CUDA(call) \
-{\
-    const cudaError_t error_code = call;\
-    if (cudaSuccess != error_code)\
-        LOG_ERROR("CUDA error, code: {} reason: {}", error_code, cudaGetErrorString(error_code));\
-}
\ No newline at end of file
diff --git a/src/demo/Makefile b/src/demo/Makefile
new file mode 100644
index 0000000..25c49e5
--- /dev/null
+++ b/src/demo/Makefile
@@ -0,0 +1,61 @@
+XX = g++
+
+
+PROJECT_ROOT= /home/huchunming/FFNvDecoder
+
+DEPEND_DIR = $(PROJECT_ROOT)/bin
+SRC_ROOT = $(PROJECT_ROOT)/src
+THIRDPARTY_ROOT = $(PROJECT_ROOT)/3rdparty
+
+
+TARGET= /home/huchunming/FFNvDecoder/src/build/bin/demo
+
+
+SPDLOG_ROOT = $(THIRDPARTY_ROOT)/spdlog-1.9.2/release
+JRTP_ROOT = $(THIRDPARTY_ROOT)/jrtp_export
+
+
+include_dir=-I/usr/local/Ascend/ascend-toolkit/6.3.RC1.alpha001/runtime/include
+lib_dir=-L/usr/lib \
+		-L/usr/local/lib \
+		-L/usr/local/Ascend/driver/lib64 \
+		-L/usr/local/Ascend/ascend-toolkit/6.3.RC1.alpha001/atc/lib64\
+		-L/usr/local/Ascend/ascend-toolkit/6.3.RC1.alpha001/runtime/lib64 \
+		-L/usr/local/Ascend/ascend-toolkit/6.3.RC1.alpha001/runtime/lib64/stub \
+		-L/usr/local/Ascend/ascend-toolkit/6.3.RC1.alpha001/lib64 \
+		-L/usr/local/Ascend/driver/lib64/driver
+		
+lib=-lacl_dvpp -lascendcl -lmmpa -lglog -lgflags -lpthread -lz -lacl_dvpp_mpi -lruntime -lascendalog -lc_sec -lmsprofiler -lgert -lge_executor -lge_common \
+	-lgraph -lascend_protobuf -lprofapi -lerror_manager -lexe_graph -lregister -lplatform
+
+DEFS = -DENABLE_DVPP_INTERFACE
+
+INCLUDE= -I $(SRC_ROOT)/interface \
+		-I $(SRC_ROOT)/dvpp \
+
+LIBSPATH=
+
+LIBS= -lavformat -lavcodec -lswscale -lavutil -lavfilter -lswresample -lavdevice
+
+CXXFLAGS= -g -O0 -fPIC $(INCLUDE) $(include_dir) $(DEFS) -lpthread -lrt -lz -fexceptions -std=c++11 -fvisibility=hidden -Wl,-Bsymbolic -ldl
+
+SRCS:=$(wildcard $(SRC_ROOT)/demo/*.cpp)
+OBJS = $(patsubst %.cpp, %.o, $(notdir $(SRCS)))
+
+OBJ_ROOT = /home/huchunming/FFNvDecoder/src/build
+DVPP_SRCS:=$(wildcard $(OBJ_ROOT)/dvpp/obj/*.o)
+INTEFACE_SRCS:=$(wildcard $(OBJ_ROOT)/interface/obj/*.o)
+
+
+$(TARGET):$(OBJS) $(INTEFACE_SRCS) $(DVPP_SRCS) 
+	rm -f $(TARGET)
+	@echo -e "\e[33m""Building object $@""\e[0m"
+	$(XX) -o $@ $^ $(CXXFLAGS) $(LIBS) $(lib_dir) $(lib) -Wwrite-strings
+	rm -f *.o
+
+%.o:$(SRC_ROOT)/demo/%.cpp
+	$(XX) $(CFLAGS) -c $<
+
+
+clean:
+	rm -f *.o $(TARGET)
\ No newline at end of file
diff --git a/src/demo/Makefile.BK0308 b/src/demo/Makefile.BK0308
new file mode 100644
index 0000000..e096cc9
--- /dev/null
+++ b/src/demo/Makefile.BK0308
@@ -0,0 +1,43 @@
+XX = g++
+
+
+PROJECT_ROOT= /home/huchunming/FFNvDecoder
+
+DEPEND_DIR = $(PROJECT_ROOT)/bin
+SRC_ROOT = $(PROJECT_ROOT)/src
+THIRDPARTY_ROOT = $(PROJECT_ROOT)/3rdparty
+
+
+TARGET= /home/huchunming/FFNvDecoder/src/build/bin/test
+
+
+SPDLOG_ROOT = $(THIRDPARTY_ROOT)/spdlog-1.9.2/release
+JRTP_ROOT = $(THIRDPARTY_ROOT)/jrtp_export
+
+
+INCLUDE= -I $(SRC_ROOT)/interface \
+
+LIBSPATH= -L /home/huchunming/FFNvDecoder/src/build/interface/lib -l:interface.a \
+		-L /home/huchunming/FFNvDecoder/src/build/dvpp/lib -l:libdvpp.a \
+			
+
+LIBS= -lavformat -lavcodec -lswscale -lavutil -lavfilter -lswresample -lavdevice
+
+CFLAGS= -g -fPIC -O0 $(INCLUDE) -pthread -lrt -lz -std=c++11 -fvisibility=hidden -Wl,-Bsymbolic -ldl
+	# -DUNICODE -D_UNICODE
+
+SRCS:=$(wildcard $(SRC_ROOT)/demo/*.cpp)
+OBJS = $(patsubst %.cpp, %.o, $(notdir $(SRCS)))
+
+
+$(TARGET):$(OBJS) $(CU_OBJS)
+	rm -f $(TARGET)
+	$(XX) -o $@ $^ $(CFLAGS)  $(LIBSPATH) $(LIBS) -Wwrite-strings
+	rm -f *.o
+
+%.o:$(SRC_ROOT)/demo/%.cpp
+	$(XX) $(CFLAGS) -c $<
+
+
+clean:
+	rm -f *.o $(TARGET)
\ No newline at end of file
diff --git a/src/demo/main_dvpp.cpp b/src/demo/main_dvpp.cpp
new file mode 100644
index 0000000..6a9e8e3
--- /dev/null
+++ b/src/demo/main_dvpp.cpp
@@ -0,0 +1,349 @@
+#include <iostream>
+#include <pthread.h>
+#include <thread>
+#include <chrono>
+#include <unistd.h>
+
+
+#ifdef _WIN32
+#include "Winsock2.h"
+#pragma comment(lib, "ws2_32.lib")
+#endif
+
+#ifdef __linux__
+#include "arpa/inet.h"
+#endif
+
+#include "../interface/FFNvDecoderManager.h"
+#include "../interface/utiltools.hpp"
+
+#define MIN_RTP_PORT		10000
+#define MAX_RTP_PORT		60000
+
+// ȡ MIN_RTP_PORT(10000)~MAX_RTP_PORT(60000)֮�������˿�(ż���������������˿ڿ���)
+int allocRtpPort() {
+
+	static int s_rtpPort = MIN_RTP_PORT;
+	if (MIN_RTP_PORT == s_rtpPort)
+	{
+		srand((unsigned int)time(NULL));
+		s_rtpPort = MIN_RTP_PORT + (rand() % MIN_RTP_PORT);
+	}
+
+	if (s_rtpPort % 2)
+		++s_rtpPort;
+
+	while (true)
+	{
+		s_rtpPort += 2;
+		s_rtpPort = s_rtpPort >= MAX_RTP_PORT ? MIN_RTP_PORT : s_rtpPort;
+
+		int i = 0;
+		for (; i < 2; i++)
+		{
+			sockaddr_in sRecvAddr;
+			int s = socket(AF_INET, SOCK_DGRAM, 0);
+
+			sRecvAddr.sin_family = AF_INET;        
+			sRecvAddr.sin_addr.s_addr = htonl(INADDR_ANY);    
+			sRecvAddr.sin_port = htons(s_rtpPort + i); 
+
+			int nResult = bind(s, (sockaddr *)&sRecvAddr, sizeof(sRecvAddr));
+			if (nResult != 0)
+			{
+				break;
+			}
+
+			nResult = close(s);
+			if (nResult != 0)
+			{
+				printf("closesocket failed:%d\n", nResult);
+				break;
+			}
+		}
+
+		if (i == 2)
+			break;
+	}
+
+	return s_rtpPort;
+}
+
+
+
+
+
+unsigned char *pHwRgb[2] = {nullptr, nullptr};
+
+int sum1 = 0;
+int sum2 = 0;
+
+
+string data_home = "/mnt/data/cmhu/tmp/";
+
+
+
+
+/**
+ * 注意： gpuFrame 在解码器设置的显卡上，后续操作要十分注意这一点，尤其是多线程情况
+ * */
+void postDecoded(const void * userPtr, DeviceRgbMemory* devFrame){
+    AbstractDecoder* decoder = (AbstractDecoder*)userPtr;
+    if (decoder!= nullptr)
+    {
+        // cout << "decode name: " << decoder->getName() << endl;
+
+            // const char* gpu_pixfmt = av_get_pix_fmt_name((AVPixelFormat)gpuFrame->format);
+            // cout << "pixfmt: " << gpu_pixfmt << endl;
+            // cout << "keyframe: " << gpuFrame->key_frame << " width: " << gpuFrame->width << " height: "<< gpuFrame->height << endl;
+            // cout << "decode successed ✿✿ヽ(°▽°)ノ✿ " << endl;
+
+            int sum = sum1;
+            if (decoder->getName() == "dec0")
+            {
+                sum1 ++ ;
+                sum = sum1;
+            } else if (decoder->getName() == "dec2") 
+            {
+                sum2 ++ ;
+                sum = sum2;
+            }
+    }
+}
+
+long start_time = 0;
+long end_time = 0;
+bool count_flag = false;
+int count = 0;
+int count_std = 100;
+
+
+static int sum = 0;
+unsigned char *pHwData = nullptr;
+
+void postDecoded0(const void * userPtr, DeviceRgbMemory* devFrame){
+    // std::this_thread::sleep_for(std::chrono::milliseconds(30000));
+
+    AbstractDecoder* decoder = (AbstractDecoder*)userPtr;
+    if (decoder!= nullptr)
+    {
+        // cout << "decode name: " << decoder->getName() << endl;
+        if (decoder->getName() == "dec")
+        {
+            if (! count_flag)
+            {
+                count_flag = true;
+                count = 0;
+                end_time = start_time = UtilTools::get_cur_time_ms();
+            }
+            count++;
+            sum ++ ;
+            if (count >= count_std)
+            {
+                // end_time = UtilTools::get_cur_time_ms();
+                // long time_using = end_time - start_time;
+                // double time_per_frame = double(time_using)/count_std ;
+                // cout << count_std << "帧用时:" << time_using << "ms 每帧用时：" << time_per_frame << "ms" << endl;
+                cout << decoder->getName() << " keyframe: " << devFrame->isKeyFrame() << " width: " << devFrame->getWidth() << " height: "<< devFrame->getHeight() << endl;
+                // cout << gpuFrame->pts << endl;
+
+                count_flag = false;
+            }
+            // cout << "帧数：" << sum << endl;
+        }
+    }
+}
+
+void decode_finished_cbk(const void* userPtr){
+    cout << "当前时间戳: " << UtilTools::get_cur_time_ms() << endl;
+}
+
+bool decode_request_stream_cbk(const char* deviceId){
+    cout << "需在此请求流" << endl;
+    return true;
+}
+
+// string test_uri = "rtmp://192.168.10.56:1935/objecteye/1";
+// string test_uri = "/home/cmhu/data/output_800x480.mp4";
+// string test_uri = "/home/cmhu/data/output_1920x1080.mp4";
+// string test_uri = "rtsp://176.10.0.2:8554/stream";
+// string test_uri = "/mnt/f/fiss/test_data/h265.mp4";
+// string test_uri = "rtsp://176.10.0.4:8554/stream";
+string test_uri = "rtsp://admin:admin@123456@192.168.60.176:554/cam/realmonitor?channel=1&subtype=0";
+
+void createDecode(int index, const char* gpu_id){
+    FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
+    MgrDecConfig config;
+    config.name = "dec" + to_string(index);
+    config.cfg.uri = test_uri;
+    config.cfg.post_decoded_cbk = postDecoded;
+    config.cfg.decode_finished_cbk = decode_finished_cbk;
+    config.cfg.force_tcp = true;
+    config.dec_type = DECODER_TYPE_FFMPEG;
+
+    config.cfg.gpuid = gpu_id;
+    // if (index % 2 == 0)
+    // {
+    //     config.cfg.gpuid = "0";
+    // }
+    // else
+    // {
+    //     config.cfg.gpuid = "0";
+    // }
+    
+    AbstractDecoder* decoder = pDecManager->createDecoder(config);
+    if (!decoder)
+    {
+        return ;
+    }
+    pDecManager->setPostDecArg(config.name, decoder);
+    pDecManager->setFinishedDecArg(config.name, decoder);
+    pDecManager->startDecodeByName(config.name);
+}
+
+void createGB28181Decode(int index, char* gpu_id, int port){
+    FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
+    MgrDecConfig config;
+    config.name = "dec" + to_string(index);
+    config.cfg.uri = config.name;
+    config.cfg.post_decoded_cbk = postDecoded;
+    config.cfg.decode_finished_cbk = decode_finished_cbk;
+    config.cfg.request_stream_cbk = decode_request_stream_cbk;
+    config.cfg.force_tcp = true;
+
+    config.dec_type = DECODER_TYPE_GB28181;
+    config.cfg.port = port;//allocRtpPort();
+
+    config.cfg.gpuid = gpu_id;
+    
+    AbstractDecoder* decoder = pDecManager->createDecoder(config);
+    if (!decoder)
+    {
+        return ;
+    }
+    pDecManager->setPostDecArg(config.name, decoder);
+    pDecManager->setFinishedDecArg(config.name, decoder);
+    pDecManager->startDecodeByName(config.name);
+}
+
+void createDvppDecoder(int index, char* devId, int channelId){
+    FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
+    MgrDecConfig config;
+    config.name = "dec" + to_string(index);
+    config.cfg.uri = test_uri;
+    config.cfg.post_decoded_cbk = postDecoded;
+    config.cfg.decode_finished_cbk = decode_finished_cbk;
+    config.cfg.force_tcp = true;
+    config.dec_type = DECODER_TYPE_DVPP;
+
+    config.cfg.gpuid = devId;
+    
+    AbstractDecoder* decoder = pDecManager->createDecoder(config);
+    if (!decoder)
+    {
+        return ;
+    }
+    pDecManager->setPostDecArg(config.name, decoder);
+    pDecManager->setFinishedDecArg(config.name, decoder);
+    pDecManager->startDecodeByName(config.name);
+}
+
+void logFF(void *, int level, const char *fmt, va_list ap)
+{
+    vfprintf(stdout, fmt, ap);
+}
+
+
+int main(int argc, char* argv[]){
+
+    test_uri = argv[1];
+    char* gpuid = argv[2];
+    int port = atoi(argv[3]);
+    cout << test_uri << "   gpu_id:" << gpuid << "   port:" << port << endl;
+
+    // av_log_set_callback(&logFF);
+
+    // CheckCUDAProperty(atoi(gpuid));
+
+    pthread_t m_decode_thread;
+    pthread_create(&m_decode_thread,0,
+        [](void* arg)
+        {
+            // cudaSetDevice(atoi(gpuid));
+            while (true)
+            {
+                std::this_thread::sleep_for(std::chrono::minutes(1));
+                FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
+                int count = pDecManager->count();
+                cout << "当前时间：" << UtilTools::get_cur_time_ms() << "  当前运行路数： " << pDecManager->count() << endl;
+            }  
+
+            return (void*)0;
+        }
+    ,nullptr);
+
+
+    FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
+    int i = 0;
+
+    createDvppDecoder(i, gpuid, 0);
+
+    while (true)
+    {
+        int ch = getchar();
+        if (ch == 'q')
+        {
+            break;
+        }
+
+        switch (ch)
+        {
+        case 'f':
+        case 'F':
+            createDecode(i, gpuid);
+            i++;
+            break;
+        case 'g':
+        case 'G':
+            createGB28181Decode(i, gpuid, port);
+            i++;
+            break;
+        case 'd':
+        case 'D':
+            createDvppDecoder(i, gpuid, 0);
+            i++;
+            break;
+        case 'r':
+        case 'R':
+            pDecManager->resumeDecoder("dec0");
+            break;
+        case 'p':
+        case 'P':
+            pDecManager->pauseDecoder("dec0");
+            break;
+
+        case 'c':
+        case 'C':
+            i--;
+            pDecManager->closeDecoderByName("dec" + to_string(i));
+            break;
+
+        case 'i':
+        case 'I':
+        {
+            int w,h;
+            pDecManager->getResolution("dec0", w,h);
+            printf( "%s : %dx%d\n", "dec0" , w,h );
+        }
+            break;
+
+        default:
+            break;
+        }
+
+        /* code */
+    }
+
+    cout << "总共帧数：" << sum << endl;
+    pDecManager->closeAllDecoder();
+}
\ No newline at end of file
diff --git a/src/demo/main_nvdec.cpp1 b/src/demo/main_nvdec.cpp1
new file mode 100644
index 0000000..be0094d
--- /dev/null
+++ b/src/demo/main_nvdec.cpp1
@@ -0,0 +1,452 @@
+// #include "FFNvDecoderManager.h"
+// #include <iostream>
+
+// #include "cuda_kernels.h"
+
+// #include "NvJpegEncoder.h"
+
+// #include <pthread.h>
+// #include <thread>
+
+// #include <chrono>
+
+// #include <unistd.h>
+
+
+// #ifdef _WIN32
+// #include "Winsock2.h"
+// #pragma comment(lib, "ws2_32.lib")
+// #endif
+
+// #ifdef __linux__
+// #include "arpa/inet.h"
+// #endif
+
+// #include "utiltools.hpp"
+
+// #define MIN_RTP_PORT		10000
+// #define MAX_RTP_PORT		60000
+
+// // ȡ MIN_RTP_PORT(10000)~MAX_RTP_PORT(60000)֮�������˿�(ż���������������˿ڿ���)
+// int allocRtpPort() {
+
+// 	static int s_rtpPort = MIN_RTP_PORT;
+// 	if (MIN_RTP_PORT == s_rtpPort)
+// 	{
+// 		srand((unsigned int)time(NULL));
+// 		s_rtpPort = MIN_RTP_PORT + (rand() % MIN_RTP_PORT);
+// 	}
+
+// 	if (s_rtpPort % 2)
+// 		++s_rtpPort;
+
+// 	while (true)
+// 	{
+// 		s_rtpPort += 2;
+// 		s_rtpPort = s_rtpPort >= MAX_RTP_PORT ? MIN_RTP_PORT : s_rtpPort;
+
+// 		int i = 0;
+// 		for (; i < 2; i++)
+// 		{
+// 			sockaddr_in sRecvAddr;
+// 			int s = socket(AF_INET, SOCK_DGRAM, 0);
+
+// 			sRecvAddr.sin_family = AF_INET;        
+// 			sRecvAddr.sin_addr.s_addr = htonl(INADDR_ANY);    
+// 			sRecvAddr.sin_port = htons(s_rtpPort + i); 
+
+// 			int nResult = bind(s, (sockaddr *)&sRecvAddr, sizeof(sRecvAddr));
+// 			if (nResult != 0)
+// 			{
+// 				break;
+// 			}
+
+// 			nResult = close(s);
+// 			if (nResult != 0)
+// 			{
+// 				printf("closesocket failed:%d\n", nResult);
+// 				break;
+// 			}
+// 		}
+
+// 		if (i == 2)
+// 			break;
+// 	}
+
+// 	return s_rtpPort;
+// }
+
+
+
+
+
+// unsigned char *pHwRgb[2] = {nullptr, nullptr};
+
+// int sum1 = 0;
+// int sum2 = 0;
+
+// cudaStream_t stream[2];
+
+// string data_home = "/mnt/data/cmhu/tmp/";
+
+
+// #define checkCudaErrors(S) do {CUresult  status; \
+//         status = S; \
+//         if (status != CUDA_SUCCESS ) std::cout << __LINE__ <<" checkCudaErrors - status = " << status << std::endl; \
+//         } while (false)
+
+
+// static void gpu_helper(int gpuid)
+// {
+//     cudaSetDevice(gpuid);
+
+//     // int *dn;
+//     // cudaMalloc((void **)&dn, 1 * sizeof(int));
+
+// 	size_t free_byte;
+// 	size_t total_byte;
+
+// 	CUresult cuda_status = cuMemGetInfo(&free_byte, &total_byte);
+
+// 	const char *pStr = nullptr;
+// 	if (CUDA_SUCCESS != cuda_status) {
+// 		cuGetErrorString(cuda_status, &pStr);
+// 		printf("Error: cudaMemGetInfo fails, %s \n", pStr);
+// 		return;
+// 	}
+
+// 	double free_db = (double)free_byte;
+// 	double total_db = (double)total_byte;
+// 	double used_db_1 = (total_db - free_db) / 1024.0 / 1024.0;
+
+// 	std::cout <<"显存已使用 " << used_db_1 << " MB\n";
+
+//     // cudaFree(dn);
+// }
+
+// int CheckCUDAProperty( int devId )
+// {
+//     cuInit(0);
+
+// 	CUdevice dev = devId;
+// 	size_t memSize = 0;
+// 	char devName[256] = {0};
+// 	int major = 0, minor = 0;
+// 	CUresult rlt = CUDA_SUCCESS;
+
+//     rlt = cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, dev);
+//     checkCudaErrors( rlt );
+
+//     rlt = cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, dev);
+// 	checkCudaErrors( rlt );
+
+// 	rlt = cuDeviceGetName( devName, sizeof( devName ), dev );
+// 	checkCudaErrors( rlt );
+
+// 	printf( "Using GPU Device %d: %s has SM %d.%d compute capability\n",
+// 		    dev, devName, major, minor );
+
+// 	rlt = cuDeviceTotalMem( &memSize, dev );
+// 	checkCudaErrors( rlt );
+
+// 	printf( "Total amount of global memory:   %4.4f MB\n",
+// 		   (float)memSize / ( 1024 * 1024 ) );
+
+// 	return 0;
+// }
+
+// /**
+//  * 注意： gpuFrame 在解码器设置的显卡上，后续操作要十分注意这一点，尤其是多线程情况
+//  * */
+// void postDecoded(const void * userPtr, AVFrame * gpuFrame){
+//     AbstractDecoder* decoder = (AbstractDecoder*)userPtr;
+//     if (decoder!= nullptr)
+//     {
+//         // cout << "decode name: " << decoder->getName() << endl;
+
+//             // const char* gpu_pixfmt = av_get_pix_fmt_name((AVPixelFormat)gpuFrame->format);
+//             // cout << "pixfmt: " << gpu_pixfmt << endl;
+//             // cout << "keyframe: " << gpuFrame->key_frame << " width: " << gpuFrame->width << " height: "<< gpuFrame->height << endl;
+//             // cout << "decode successed ✿✿ヽ(°▽°)ノ✿ " << endl;
+
+//             int sum = sum1;
+//             if (decoder->getName() == "dec0")
+//             {
+//                 sum1 ++ ;
+//                 sum = sum1;
+
+//                 if (gpuFrame->format == AV_PIX_FMT_CUDA)
+//                 {   
+//                     // cout << "gpuid = " << atoi(decoder->m_cfg.gpuid.c_str()) << endl;
+//                     cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str()));
+//                     cudaError_t cudaStatus;
+//                     if(pHwRgb[0] == nullptr){
+//                         // cudaStreamCreate(&stream[0]);
+//                         cuda_common::setColorSpace( ITU_709, 0 );
+//                         cudaStatus = cudaMalloc((void **)&pHwRgb[0], 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char));
+//                     }
+//                     cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwRgb[0], gpuFrame->width, gpuFrame->height);
+//                     cudaDeviceSynchronize();
+//                     if (cudaStatus != cudaSuccess) {
+//                         cout << "CUDAToBGR failed !!!" << endl;
+//                         return;
+//                     }
+
+//                     string path = data_home + decoder->getName() + ".jpg";
+//                     saveJpeg(path.c_str(), pHwRgb[0], gpuFrame->width, gpuFrame->height, stream[0]);  // 验证 CUDAToRGB 
+//                 }
+//             } else if (decoder->getName() == "dec2") 
+//             {
+//                 sum2 ++ ;
+//                 sum = sum2;
+
+//                 if (gpuFrame->format == AV_PIX_FMT_CUDA)
+//                 {   
+//                     // cout << "gpuid = " << atoi(decoder->m_cfg.gpuid.c_str()) << endl;
+//                     cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str()));
+//                     cudaError_t cudaStatus;
+//                     if(pHwRgb[1] == nullptr){
+//                         // cudaStreamCreate(&stream[1]);
+//                         cuda_common::setColorSpace( ITU_709, 0 );
+//                         cudaStatus = cudaMalloc((void **)&pHwRgb[1], 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char));
+//                     }
+//                     cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwRgb[1], gpuFrame->width, gpuFrame->height);
+//                     cudaDeviceSynchronize();
+//                     if (cudaStatus != cudaSuccess) {
+//                         cout << "CUDAToBGR failed !!!" << endl;
+//                         return;
+//                     }
+
+//                     string path = data_home + decoder->getName() + ".jpg";
+//                     saveJpeg(path.c_str(), pHwRgb[1], gpuFrame->width, gpuFrame->height, stream[1]);  // 验证 CUDAToRGB 
+//                 }
+//             }
+//     }
+// }
+
+// long start_time = 0;
+// long end_time = 0;
+// bool count_flag = false;
+// int count = 0;
+// int count_std = 100;
+
+
+// static int sum = 0;
+// unsigned char *pHwData = nullptr;
+
+// void postDecoded0(const void * userPtr, AVFrame * gpuFrame){
+//     // std::this_thread::sleep_for(std::chrono::milliseconds(30000));
+
+//     AbstractDecoder* decoder = (AbstractDecoder*)userPtr;
+//     if (decoder!= nullptr)
+//     {
+//         // cout << "decode name: " << decoder->getName() << endl;
+//         if (decoder->getName() == "dec")
+//         {
+//             if (! count_flag)
+//             {
+//                 count_flag = true;
+//                 count = 0;
+//                 end_time = start_time = UtilTools::get_cur_time_ms();
+//             }
+//             count++;
+//             sum ++ ;
+//             if (count >= count_std)
+//             {
+//                 // end_time = UtilTools::get_cur_time_ms();
+//                 // long time_using = end_time - start_time;
+//                 // double time_per_frame = double(time_using)/count_std ;
+//                 // cout << count_std << "帧用时:" << time_using << "ms 每帧用时：" << time_per_frame << "ms" << endl;
+//                 cout << decoder->getName() << " keyframe: " << gpuFrame->key_frame << " width: " << gpuFrame->width << " height: "<< gpuFrame->height << endl;
+//                 // cout << gpuFrame->pts << endl;
+
+//                 count_flag = false;
+//             }
+//             // cout << "帧数：" << sum << endl;
+
+//             if (gpuFrame->format == AV_PIX_FMT_CUDA)
+//             {   
+//                 cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str()));
+//                 // cout << "gpu id : " << decoder->m_cfg.gpuid.c_str() << endl;
+//                 cudaError_t cudaStatus;
+//                 if(pHwData == nullptr){
+//                     cuda_common::setColorSpace( ITU_709, 0 );
+//                     cudaStatus = cudaMalloc((void **)&pHwData, 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char));
+//                 }
+//                 cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwData, gpuFrame->width, gpuFrame->height);
+//                 cudaDeviceSynchronize();
+//                 if (cudaStatus != cudaSuccess) {
+//                     cout << "CUDAToBGR failed !!!" << endl;
+//                     return;
+//                 }
+
+//                 string path = data_home + decoder->getName() + ".jpg";
+//                 saveJpeg(path.c_str(), pHwData, gpuFrame->width, gpuFrame->height, nullptr);  // 验证 CUDAToRGB 
+//             }
+//         }
+//     }
+// }
+
+// void decode_finished_cbk(const void* userPtr){
+//     cout << "当前时间戳: " << UtilTools::get_cur_time_ms() << endl;
+// }
+
+// bool decode_request_stream_cbk(const char* deviceId){
+//     cout << "需在此请求流" << endl;
+//     return true;
+// }
+
+// // string test_uri = "rtmp://192.168.10.56:1935/objecteye/1";
+// // string test_uri = "/home/cmhu/data/output_800x480.mp4";
+// // string test_uri = "/home/cmhu/data/output_1920x1080.mp4";
+// // string test_uri = "rtsp://176.10.0.2:8554/stream";
+// // string test_uri = "/mnt/f/fiss/test_data/h265.mp4";
+// string test_uri = "rtsp://176.10.0.4:8554/stream";
+
+// void createDecode(int index, const char* gpu_id){
+//     FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
+//     MgrDecConfig config;
+//     config.name = "dec" + to_string(index);
+//     config.cfg.uri = test_uri;
+//     config.cfg.post_decoded_cbk = postDecoded;
+//     config.cfg.decode_finished_cbk = decode_finished_cbk;
+//     config.cfg.force_tcp = true;
+//     config.dec_type = DECODER_TYPE_FFMPEG;
+
+//     config.cfg.gpuid = gpu_id;
+//     // if (index % 2 == 0)
+//     // {
+//     //     config.cfg.gpuid = "0";
+//     // }
+//     // else
+//     // {
+//     //     config.cfg.gpuid = "0";
+//     // }
+    
+//     AbstractDecoder* decoder = pDecManager->createDecoder(config);
+//     if (!decoder)
+//     {
+//         return ;
+//     }
+//     pDecManager->setPostDecArg(config.name, decoder);
+//     pDecManager->setFinishedDecArg(config.name, decoder);
+//     pDecManager->startDecodeByName(config.name);
+// }
+
+// void createGB28181Decode(int index, char* gpu_id, int port){
+//     FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
+//     MgrDecConfig config;
+//     config.name = "dec" + to_string(index);
+//     config.cfg.uri = config.name;
+//     config.cfg.post_decoded_cbk = postDecoded;
+//     config.cfg.decode_finished_cbk = decode_finished_cbk;
+//     config.cfg.request_stream_cbk = decode_request_stream_cbk;
+//     config.cfg.force_tcp = true;
+
+//     config.dec_type = DECODER_TYPE_GB28181;
+//     config.cfg.port = port;//allocRtpPort();
+
+//     config.cfg.gpuid = gpu_id;
+    
+//     AbstractDecoder* decoder = pDecManager->createDecoder(config);
+//     if (!decoder)
+//     {
+//         return ;
+//     }
+//     pDecManager->setPostDecArg(config.name, decoder);
+//     pDecManager->setFinishedDecArg(config.name, decoder);
+//     pDecManager->startDecodeByName(config.name);
+// }
+
+// void logFF(void *, int level, const char *fmt, va_list ap)
+// {
+//     vfprintf(stdout, fmt, ap);
+// }
+
+
+// int main(int argc, char* argv[]){
+
+//     test_uri = "rtsp://admin:admin@123456@192.168.60.176:554/cam/realmonitor?channel=1&subtype=0";//argv[1];
+//     char* gpuid = argv[2];
+//     int port = atoi(argv[3]);
+//     cout << test_uri << "   gpu_id:" << gpuid << "   port:" << port << endl;
+
+//     // av_log_set_callback(&logFF);
+
+//     CheckCUDAProperty(atoi(gpuid));
+
+//     pthread_t m_decode_thread;
+//     pthread_create(&m_decode_thread,0,
+//         [](void* arg)
+//         {
+//             // cudaSetDevice(atoi(gpuid));
+//             while (true)
+//             {
+//                 std::this_thread::sleep_for(std::chrono::minutes(1));
+//                 FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
+//                 int count = pDecManager->count();
+//                 cout << "当前时间：" << UtilTools::get_cur_time_ms() << "  当前运行路数： " << pDecManager->count() << endl;
+//             }  
+
+//             return (void*)0;
+//         }
+//     ,nullptr);
+
+
+//     FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
+//     int i = 0;
+
+//     while (true)
+//     {
+//         int ch = getchar();
+//         if (ch == 'q')
+//         {
+//             break;
+//         }
+
+//         switch (ch)
+//         {
+//         case 'f':
+//         case 'F':
+//             createDecode(i, gpuid);
+//             i++;
+//             break;
+//         case 'g':
+//         case 'G':
+//             createGB28181Decode(i, gpuid, port);
+//             i++;
+//             break;
+//         case 'r':
+//         case 'R':
+//             pDecManager->resumeDecoder("dec0");
+//             break;
+//         case 'p':
+//         case 'P':
+//             pDecManager->pauseDecoder("dec0");
+//             break;
+
+//         case 'c':
+//         case 'C':
+//             i--;
+//             pDecManager->closeDecoderByName("dec" + to_string(i));
+//             break;
+
+//         case 'i':
+//         case 'I':
+//         {
+//             int w,h;
+//             pDecManager->getResolution("dec0", w,h);
+//             printf( "%s : %dx%d\n", "dec0" , w,h );
+//         }
+//             break;
+
+//         default:
+//             break;
+//         }
+
+//         /* code */
+//     }
+
+//     cout << "总共帧数：" << sum << endl;
+//     pDecManager->closeAllDecoder();
+// }
\ No newline at end of file
diff --git a/src/dvpp/CircularQueue.hpp b/src/dvpp/CircularQueue.hpp
new file mode 100644
index 0000000..368291c
--- /dev/null
+++ b/src/dvpp/CircularQueue.hpp
@@ -0,0 +1,138 @@
+#ifndef __CIRCULAR_QUEUE_HPP__
+#define __CIRCULAR_QUEUE_HPP__
+
+#include <iostream>
+#include <atomic>
+#include <vector>
+#include <mutex>
+
+using namespace std;
+
+
+// 循环队列
+template <typename T>
+class CircularQueue
+{
+private:
+    /* data */
+public:
+    CircularQueue();
+    ~CircularQueue();
+
+    bool init(vector<T> data);
+    T getTail();
+    void addTail();
+    T deQueue();
+    T getHead();
+	void addHead();
+    void clearQueue();
+
+    int length();
+    bool isEmpty();
+
+private:
+    vector<T> base;
+	atomic<int> front;
+	atomic<int> rear;
+    mutex m_mutex;
+    int max_size;
+};
+
+
+template <typename T>
+CircularQueue<T>::CircularQueue()
+{
+	front = rear = 0;//头指针和尾指针置为零，队列为空
+}
+
+template <typename T>
+CircularQueue<T>::~CircularQueue()
+{
+	base.clear();
+    rear = front = 0;
+}
+
+template <typename T>
+bool CircularQueue<T>::init(vector<T> data){
+	base = data;
+	front = rear = 0;//头指针和尾指针置为零，队列为空
+	max_size = data.size();
+
+	return true;
+}
+ 
+//循环队列的入队
+template <typename T>
+T CircularQueue<T>::getTail()
+{
+	std::lock_guard<std::mutex> l(m_mutex);
+	//插入一个元素e为Q的新的队尾元素
+	if ((rear + 1) % max_size == front)
+		return nullptr;//队满
+	return base[rear];//获取队尾元素
+}
+
+// 将队尾元素添加到队列中
+template <typename T>
+void CircularQueue<T>::addTail()
+{
+	std::lock_guard<std::mutex> l(m_mutex);
+     rear = (rear + 1) % max_size;//队尾指针加1
+}
+ 
+//循环队列的出队
+template <typename T>
+T CircularQueue<T>::deQueue()
+{
+	std::lock_guard<std::mutex> l(m_mutex);
+	//删除Q的队头元素，用e返回其值
+	if (front == rear)
+		return nullptr;//队空
+	T e = base[front];//保存队头元素
+	front = (front + 1) % max_size;//队头指针加1
+	return e;
+}
+ 
+//取循环队列的队头元素
+template <typename T>
+T CircularQueue<T>::getHead()
+{
+	std::lock_guard<std::mutex> l(m_mutex);
+	//返回Q的队头元素，不修改队头指针
+	if (front == rear)
+		return nullptr;//队列为空，取元素失败
+	return base[front];
+}
+
+template <typename T>
+void CircularQueue<T>::addHead()
+{
+	std::lock_guard<std::mutex> l(m_mutex);
+    front = (front + 1) % max_size;//队头指针加1
+}
+
+template <typename T>
+int CircularQueue<T>::length()
+{
+	std::lock_guard<std::mutex> l(m_mutex);
+    return (rear - front + max_size) % max_size;
+}
+
+template <typename T>
+bool CircularQueue<T>::isEmpty()
+{
+	std::lock_guard<std::mutex> l(m_mutex);
+	if (front == rear)
+		return true;
+
+	return false;
+}
+
+template <typename T>
+void CircularQueue<T>::clearQueue()
+{
+	std::lock_guard<std::mutex> l(m_mutex);
+	rear = front = 0;
+}
+
+#endif
\ No newline at end of file
diff --git a/src/dvpp/DvppDec.cpp b/src/dvpp/DvppDec.cpp
new file mode 100644
index 0000000..dfea4be
--- /dev/null
+++ b/src/dvpp/DvppDec.cpp
@@ -0,0 +1,421 @@
+#include "DvppDec.h"
+#include "DvppSourceManager.h"
+
+#define CHECK_AND_RETURN(ret, message)    \
+            if(ret != 0) {cout << "device: " << m_dvpp_deviceId << ", chn: " << m_dvpp_channel << ", ret: " << ret << ", [ERROR] " << message; return ret;}
+#define CHECK_NOT_RETURN(ret, message)    \
+            if(ret != 0) {cout << "device: " << m_dvpp_deviceId << ", chn: " << m_dvpp_channel << ", ret: " << ret << ", [ERROR] " << message;}
+#define CHECK_AND_RETURN_NOVALUE(ret, message)    \
+            if(ret != 0) {cout << "device: " << m_dvpp_deviceId << ", chn: " << m_dvpp_channel << ", ret: " << ret << ", [ERROR] " << message; return;}
+
+struct Vdec_CallBack_UserData {
+    uint64_t frameId;
+    long startTime;
+    long sendTime;
+    // void* vdecOutputBuf;
+	DvppDec* self;
+	shared_ptr<MemNode> inBufNode;
+    Vdec_CallBack_UserData() {
+        frameId = 0;
+    }
+};
+
+#ifdef TEST_DECODER
+static void *vdecHostAddr = nullptr;
+#endif
+
+static const int g_pkt_size = 1024 * 1024;
+
+ DvppDec::DvppDec(){
+    m_decode_thread = 0;
+ }
+
+ DvppDec::~DvppDec(){
+
+ }
+
+ bool DvppDec::init_vdpp(DvppDecConfig cfg){
+    cout << "Init device....\n";
+
+    m_dvpp_deviceId = atoi(cfg.dev_id.c_str());
+    
+    if(cfg.codec_id == 0){
+        // 66：Baseline，77：Main，>=100：High
+        if(cfg.profile == 77){
+            enType = H264_MAIN_LEVEL;
+        }else if(cfg.profile < 77){
+            enType = H264_BASELINE_LEVEL;
+        }else{
+            enType = H264_HIGH_LEVEL;
+        }
+    }else if(cfg.codec_id == 1){
+        // h265只有main
+        enType = H265_MAIN_LEVEL;
+    }else {
+        cout << "codec_id is not supported!" << endl;
+        return false;
+    }
+
+    post_decoded_cbk = cfg.post_decoded_cbk;
+    m_pktQueueptr = cfg.pktQueueptr;
+
+	// DvppSourceManager 创建时包含 aclInit，析构时包含 aclFinalize
+	DvppSourceManager* pSrcMgr = DvppSourceManager::getInstance();
+	m_context = pSrcMgr->getContext(m_dvpp_deviceId);
+	m_dvpp_channel = pSrcMgr->getChannel(m_dvpp_deviceId);
+	if(m_dvpp_channel < 0){
+		cout << "该设备channel已经用完了" << endl;
+		return false;
+	}
+
+    cout << "devProgram start, device: " << m_dvpp_deviceId << endl;
+    int ret = aclrtSetCurrentContext(m_context);
+    if (ret != ACL_ERROR_NONE) { 
+		cout << "aclrtSetCurrentContext failed" << endl;
+		return false;
+	}
+	
+	// queue_size 最小应大于16，否则关键帧之间距离太远的时候会导致回调函数与循环队列卡死
+	for (size_t i = 0; i < 20; i++){
+		void *vdecInputbuf = nullptr;
+		int ret = acldvppMalloc((void **)&vdecInputbuf, g_pkt_size);
+		if(ret != ACL_ERROR_NONE){
+		    cout << "acldvppMalloc failed" << endl;
+			return false;;
+		}
+		m_vec_vdec.push_back(vdecInputbuf);
+    }
+
+	if(!m_vdecQueue.init(m_vec_vdec)){
+		return false;
+	}
+
+    ret = picConverter.init(m_context);
+	if(!ret){
+		picConverter.release();
+	}
+
+    m_vdec_out_size = cfg.width * cfg.height * 3 / 2;
+    m_dec_name = cfg.dec_name;
+
+	cout << "init vdpp success!" << endl;
+	return true;
+}
+
+bool DvppDec::start(){
+	m_bRunning = true;
+
+	pthread_create(&m_decode_thread,0,
+        [](void* arg)
+        {
+            DvppDec* a=(DvppDec*)arg;
+            a->decode_thread();
+            return (void*)0;
+        }
+    ,this);
+
+	return true;
+}
+
+static void *ReportThd(void *arg)
+{
+    DvppDec *self = (DvppDec *)arg;
+	if(nullptr != self){
+		self->doProcessReport();
+	}
+    return (void *)0;
+}
+
+void DvppDec::doProcessReport(){
+	// aclrtContext thdContext = nullptr;
+    // CHECK_AND_RETURN_NOVALUE(aclrtCreateContext(&thdContext, m_dvpp_deviceId), "aclrtCreateContext failed");
+
+	CHECK_AND_RETURN_NOVALUE(aclrtSetCurrentContext(m_context), "aclrtSetCurrentContext failed");
+    // 阻塞等待vdec线程开始
+
+    int ret;
+    while (!m_bExitReportThd) {
+        ret = aclrtProcessReport(1000);
+        if (ret != ACL_ERROR_NONE) {
+            cout << "device: " << m_dvpp_deviceId << ", chn: " << m_dvpp_channel << ", aclrtProcessReport failed, ret: " << ret << endl;
+        }
+    }
+
+	// CHECK_AND_RETURN_NOVALUE(aclrtDestroyContext(thdContext), "aclrtDestroyContext failed");
+}
+
+static int count_frame = 0;
+static long lastts = 0;
+static void VdecCallback(acldvppStreamDesc *input, acldvppPicDesc *output, void *pUserData)
+{
+	cout << "VdecCallback: " << UtilTools::get_cur_time_ms() - lastts << endl;
+	lastts = UtilTools::get_cur_time_ms();
+
+	Vdec_CallBack_UserData *userData = (Vdec_CallBack_UserData *) pUserData;
+	DvppDec* self = userData->self;
+	if(self != nullptr){
+
+		self->doVdppVdecCallBack(input, output);
+	}
+	
+	delete userData;
+	userData = nullptr;
+}
+
+void DvppDec::doVdppVdecCallBack(acldvppStreamDesc *input, acldvppPicDesc *output){
+
+	CHECK_AND_RETURN_NOVALUE(aclrtSetCurrentContext(m_context), "aclrtSetCurrentContext failed");
+
+	void *inputDataDev = acldvppGetStreamDescData(input);
+	void *outputDataDev = acldvppGetPicDescData(output);
+	uint32_t outputSize = acldvppGetPicDescSize(output);
+	uint32_t width = acldvppGetPicDescWidth(output);
+	uint32_t height = acldvppGetPicDescHeight(output);
+	
+	cout << "width = " << width << "  height = " << height << "  data_size:" << outputSize << endl;
+
+	if (!m_bPause)
+	{
+		DvppRgbMemory* rgbMem = picConverter.convert2bgr(output, width, height, false);
+        post_decoded_cbk(m_postDecArg, rgbMem);
+#ifdef TEST_DECODER
+		if(rgbMem != nullptr){
+			// D2H
+            if(vdecHostAddr == nullptr){
+                CHECK_NOT_RETURN(aclrtMallocHost(&vdecHostAddr, width * height * 3), "aclrtMallocHost failed");
+            }
+			uint32_t data_size = rgbMem->getSize();
+			CHECK_AND_RETURN_NOVALUE(aclrtMemcpy(vdecHostAddr, data_size, rgbMem->getMem(), data_size, ACL_MEMCPY_DEVICE_TO_HOST), "D2H aclrtMemcpy failed");
+
+			// 保存vdec结果
+			if(count_frame > 45 && count_frame < 50)
+			{
+				string file_name = "./yuv_pic/vdec_out"+ m_dec_name +".rgb" ;
+				FILE *outputFile = fopen(file_name.c_str(), "a");
+				if(outputFile){
+					fwrite(vdecHostAddr, data_size, sizeof(char), outputFile);
+					fclose(outputFile);
+				}
+			}
+            else if(count_frame > 50 && vdecHostAddr != nullptr){
+                CHECK_NOT_RETURN(aclrtFreeHost(vdecHostAddr), "aclrtFreeHost failed");
+                vdecHostAddr = nullptr;
+            }
+			count_frame++;
+		}
+#endif
+
+	}else{
+		std::this_thread::sleep_for(std::chrono::milliseconds(3));
+	}
+
+    acldvppFree((uint8_t*)outputDataDev);
+    outputDataDev = nullptr;
+
+	m_vdecQueue.addHead();
+
+	CHECK_AND_RETURN_NOVALUE(acldvppDestroyStreamDesc(input), "acldvppDestroyStreamDesc failed");
+	CHECK_AND_RETURN_NOVALUE(acldvppDestroyPicDesc(output), "acldvppDestroyPicDesc failed");
+
+	cout << "callback exit." << endl;
+}
+
+void DvppDec::close(){
+	m_bRunning=false;
+
+	if(m_decode_thread != 0){
+		pthread_join(m_decode_thread,0);
+	}
+}
+
+bool DvppDec::sendVdecEos(aclvdecChannelDesc *vdecChannelDesc){
+    // create stream desc
+    acldvppStreamDesc *streamInputDesc = acldvppCreateStreamDesc();
+    if (streamInputDesc == nullptr) {
+        cout << "fail to create input stream desc" << endl;
+        return false;
+    }
+    aclError ret = acldvppSetStreamDescEos(streamInputDesc, 1);
+    if (ret != ACL_SUCCESS) {
+        cout << "fail to set eos for stream desc, errorCode = " << static_cast<int32_t>(ret) << endl;
+        (void)acldvppDestroyStreamDesc(streamInputDesc);
+        return false;
+    }
+
+    // send vdec eos frame. when all vdec callback are completed, aclvdecSendFrame can be returned.
+    cout << "send eos" << endl;
+    ret = aclvdecSendFrame(vdecChannelDesc, streamInputDesc, nullptr, nullptr, nullptr);
+    if (ret != ACL_SUCCESS) {
+        cout << "fail to send eos frame, ret=" << ret << endl;
+        (void)acldvppDestroyStreamDesc(streamInputDesc);
+        return false;
+    }
+    (void)acldvppDestroyStreamDesc(streamInputDesc);
+
+    return true;
+}
+
+void DvppDec::releaseResource(){
+
+	for(int i = 0; i < m_vec_vdec.size(); i++){
+		if(m_vec_vdec[i] != nullptr){
+			acldvppFree((uint8_t*)m_vec_vdec[i]);
+			m_vec_vdec[i] = nullptr;
+		}
+	}
+	m_vec_vdec.clear();
+
+	DvppSourceManager* pSrcMgr = DvppSourceManager::getInstance();
+	pSrcMgr->releaseChannel(m_dvpp_deviceId, m_dvpp_channel);
+}
+
+void DvppDec::decode_thread(){
+
+    long startTime = UtilTools::get_cur_time_ms();
+
+	int ret = -1;
+
+    // dvpp解码参数
+	CHECK_AND_RETURN_NOVALUE(aclrtSetCurrentContext(m_context), "aclrtSetCurrentContext failed");
+
+	pthread_t report_thread;
+	ret = pthread_create(&report_thread, nullptr, ReportThd, (void *)this);
+	if(ret != 0){
+		cout << "pthread_create failed" << endl;
+		return;
+	}
+
+    // 创建aclvdecChannelDesc类型的数据
+    aclvdecChannelDesc *vdecChannelDesc = aclvdecCreateChannelDesc();
+    if (vdecChannelDesc == nullptr) { 
+		cout << "aclvdecCreateChannelDesc failed"; 
+		return;
+	}
+    // 创建 channel dec结构体
+    // 通道ID在dvpp层面为0~31
+    CHECK_AND_RETURN_NOVALUE(aclvdecSetChannelDescChannelId(vdecChannelDesc, m_dvpp_channel), "aclvdecSetChannelDescChannelId failed");
+    CHECK_AND_RETURN_NOVALUE(aclvdecSetChannelDescThreadId(vdecChannelDesc, report_thread), "aclvdecSetChannelDescThreadId failed");
+    CHECK_AND_RETURN_NOVALUE(aclvdecSetChannelDescCallback(vdecChannelDesc, VdecCallback), "aclvdecSetChannelDescCallback failed");
+    CHECK_AND_RETURN_NOVALUE(aclvdecSetChannelDescEnType(vdecChannelDesc, enType), "aclvdecSetChannelDescEnType failed");
+    CHECK_AND_RETURN_NOVALUE(aclvdecSetChannelDescOutPicFormat(vdecChannelDesc, PIXEL_FORMAT_YUV_SEMIPLANAR_420), "aclvdecSetChannelDescOutPicFormat failed");
+    CHECK_AND_RETURN_NOVALUE(aclvdecCreateChannel(vdecChannelDesc), "aclvdecCreateChannel failed");
+
+    uint64_t frame_count = 0;
+    bool bBreak = false;
+	while (m_bRunning)
+	{
+        int ret = sentFrame(vdecChannelDesc, frame_count);
+        if(ret == 2){
+            break;
+            bBreak = true;
+        }else if(ret == 1){
+            continue;
+        }
+
+        frame_count++;
+	}
+
+    // 尽量保证数据全部解码完成
+    // int sum = 0;
+    // if(!bBreak){
+    //     while(!m_pktQueueptr->isEmpty()){
+    //         int ret = sentFrame(vdecChannelDesc, frame_count);
+    //         if(ret == 2){
+    //             break;
+    //         }
+    //         sum++;
+    //         if(sum > 10){
+    //             // 避免卡死
+    //             break;
+    //         }
+    //     }
+    // }
+    
+
+	sendVdecEos(vdecChannelDesc);
+
+	CHECK_NOT_RETURN(aclvdecDestroyChannel(vdecChannelDesc), "aclvdecDestroyChannel failed");
+    CHECK_NOT_RETURN(aclvdecDestroyChannelDesc(vdecChannelDesc), "aclvdecDestroyChannelDesc failed");
+
+	// report_thread 需后于destroy退出
+	m_bRunning = false;
+    m_bExitReportThd = true;
+	CHECK_NOT_RETURN(pthread_join(report_thread, nullptr), "pthread_join failed");
+
+	cout << "decode thread exit." << endl;
+}
+
+int DvppDec::sentFrame(aclvdecChannelDesc *vdecChannelDesc, uint64_t frame_count){
+
+    AVPacket * pkt = m_pktQueueptr->getHead();
+    if(pkt == nullptr){
+        std::this_thread::sleep_for(std::chrono::milliseconds(10));
+        // cout << "getTail failed" << endl;
+        // continue;
+        return 1;
+    }
+    // 解码
+    void *vdecInputbuf = m_vdecQueue.getTail();
+    if(vdecInputbuf == nullptr){
+        std::this_thread::sleep_for(std::chrono::milliseconds(3));
+        // cout << "getTail failed" << endl;
+        // continue;
+        return 1;
+    }
+    
+    int ret = aclrtMemcpy(vdecInputbuf, pkt->size, pkt->data, pkt->size, ACL_MEMCPY_HOST_TO_DEVICE);
+    if(ACL_ERROR_NONE != ret){
+        cout << "aclrtMemcpy failed" << endl;
+        // break;
+        return 2;
+    }
+
+    void *vdecOutputBuf = nullptr;
+    ret = acldvppMalloc((void **)&vdecOutputBuf, m_vdec_out_size);
+    if(ret != ACL_ERROR_NONE){
+        cout << "acldvppMalloc failed" << endl;
+        // break;
+        return 2;
+    }
+
+    /************ 解码*************/
+    acldvppStreamDesc *input_stream_desc = acldvppCreateStreamDesc();
+        if (input_stream_desc == nullptr) { cout << "acldvppCreateStreamDesc error" << endl; }
+    acldvppPicDesc *output_pic_desc = acldvppCreatePicDesc();
+        if (output_pic_desc == nullptr) { cout<< "acldvppCreatePicDesc error" << endl; }
+    CHECK_NOT_RETURN(acldvppSetStreamDescData(input_stream_desc, vdecInputbuf), "acldvppSetStreamDescData failed");
+    CHECK_NOT_RETURN(acldvppSetStreamDescSize(input_stream_desc, pkt->size), "acldvppSetStreamDescSize failed");
+    CHECK_NOT_RETURN(acldvppSetPicDescData(output_pic_desc, vdecOutputBuf), "acldvppSetPicDescData failed");
+    CHECK_NOT_RETURN(acldvppSetPicDescSize(output_pic_desc, m_vdec_out_size), "acldvppSetPicDescSize failed");
+    
+    Vdec_CallBack_UserData *user_data = NULL;
+    user_data = new Vdec_CallBack_UserData;
+    user_data->frameId = frame_count;
+    // user_data->startTime = startTime;
+    user_data->sendTime = UtilTools::get_cur_time_ms();
+    user_data->self = this;
+    // user_data->inBufNode = bufNode;
+    cout << "send frame" << endl;
+    CHECK_NOT_RETURN(aclvdecSendFrame(vdecChannelDesc, input_stream_desc, output_pic_desc, nullptr, reinterpret_cast<void *>(user_data)),
+        "aclvdecSendFrame failed");
+
+    m_vdecQueue.addTail();
+
+    m_pktQueueptr->addHead();
+    av_packet_unref(pkt);
+
+    return 0;
+}
+
+
+void DvppDec::setPostDecArg(const void* postDecArg){
+	m_postDecArg = postDecArg;
+}
+
+void DvppDec::pause(){
+    m_bPause = true;
+}
+
+void DvppDec::resume(){
+    m_bPause = false;
+}
\ No newline at end of file
diff --git a/src/dvpp/DvppDec.h b/src/dvpp/DvppDec.h
new file mode 100644
index 0000000..08bde3a
--- /dev/null
+++ b/src/dvpp/DvppDec.h
@@ -0,0 +1,80 @@
+#include<string>
+#include <pthread.h>
+
+#include "dvpp_headers.h"
+#include "depend_headers.h"
+#include "user_mem.h"
+#include "CircularQueue.hpp"
+#include "VpcPicConverter.h"
+#include "FFReceiver.h"
+
+#include <queue>
+
+using namespace std;
+
+#define TEST_DECODER
+
+
+struct DvppDecConfig{
+    string dec_name;                         
+    POST_DECODE_CALLBACK post_decoded_cbk;  // 解码数据回调接口
+    string dev_id;                           // gpu id
+    bool force_tcp{true};                   // 是否指定使用tcp连接
+    int skip_frame{1};                      // 跳帧数
+    int codec_id;                           // 0 : h264   1:h265
+    int profile;
+    CircularQueue<AVPacket*> *pktQueueptr;
+
+    int width;
+    int height;
+};
+
+
+class DvppDec {
+public:
+    DvppDec();
+    ~DvppDec();
+    bool init_vdpp(DvppDecConfig cfg);
+    void setPostDecArg(const void* postDecArg);
+    bool start();
+    void close();
+    void pause();
+    void resume();
+
+public:
+    void doProcessReport();
+    void doVdppVdecCallBack(acldvppStreamDesc *input, acldvppPicDesc *output);
+
+private:
+    void decode_thread();
+    void releaseResource();
+    bool sendVdecEos(aclvdecChannelDesc *vdecChannelDesc);
+    int sentFrame(aclvdecChannelDesc *vdecChannelDesc, uint64_t frame_count);
+
+private:
+
+    bool m_bRunning{false};
+    bool m_bPause{false};
+    bool m_bExitReportThd{false};
+
+    int m_dvpp_deviceId {-1};
+    int m_dvpp_channel {-1};
+    aclrtContext m_context;
+    acldvppStreamFormat enType;
+
+    pthread_t m_decode_thread;
+    
+    DvppDecConfig m_cfg;
+    string m_dec_name;
+
+    vector<void*> m_vec_vdec;
+    CircularQueue<void *> m_vdecQueue;
+    CircularQueue<AVPacket *> *m_pktQueueptr;
+
+    const void * m_postDecArg;
+    POST_DECODE_CALLBACK post_decoded_cbk;
+
+    VpcPicConverter picConverter;
+
+    int m_vdec_out_size {-1};
+};
\ No newline at end of file
diff --git a/src/dvpp/DvppDecoder.cpp b/src/dvpp/DvppDecoder.cpp
new file mode 100644
index 0000000..efa52ea
--- /dev/null
+++ b/src/dvpp/DvppDecoder.cpp
@@ -0,0 +1,640 @@
+#include "DvppDecoder.h"
+#include "DvppSourceManager.h"
+
+#define CHECK_AND_RETURN(ret, message)    \
+            if(ret != 0) {cout << "device: " << m_dvpp_deviceId << ", chn: " << m_dvpp_channel << ", ret: " << ret << ", [ERROR] " << message; return ret;}
+#define CHECK_NOT_RETURN(ret, message)    \
+            if(ret != 0) {cout << "device: " << m_dvpp_deviceId << ", chn: " << m_dvpp_channel << ", ret: " << ret << ", [ERROR] " << message;}
+#define CHECK_AND_RETURN_NOVALUE(ret, message)    \
+            if(ret != 0) {cout << "device: " << m_dvpp_deviceId << ", chn: " << m_dvpp_channel << ", ret: " << ret << ", [ERROR] " << message; return;}
+
+
+
+struct Vdec_CallBack_UserData {
+    uint64_t frameId;
+    long startTime;
+    long sendTime;
+    // void* vdecOutputBuf;
+	DvppDecoder* self;
+	shared_ptr<MemNode> inBufNode;
+    Vdec_CallBack_UserData() {
+        frameId = 0;
+    }
+};
+
+
+const int g_pkt_que_size = 10;
+const int g_pkt_size = 1024 * 1024;
+
+#ifdef TEST_DECODER
+void *vdecHostAddr;
+#endif
+
+static long GetCurTimeUs(){
+    chrono::time_point<chrono::system_clock, chrono::milliseconds> tpMicro
+        = chrono::time_point_cast<chrono::milliseconds>(chrono::system_clock::now());
+
+    return tpMicro.time_since_epoch().count();
+}
+
+DvppDecoder::DvppDecoder()
+{
+	// 初始化解码对象
+	fmt_ctx = nullptr;
+	m_bRunning = false;
+
+	stream = nullptr;
+    stream_index = -1;
+    pix_fmt = AV_PIX_FMT_NONE;
+    m_dec_name = "";
+
+	m_bPause = false;
+	m_bReal = true;
+
+	m_decode_thread = 0;
+	m_post_decode_thread = 0;
+
+	m_bFinished = false;
+	m_dec_keyframe = false;
+	m_fps = 0.0;
+}
+
+DvppDecoder::~DvppDecoder()
+{
+	m_dec_keyframe = false;
+	releaseResource();
+}
+
+bool DvppDecoder::init_FFmpeg(const char* uri, bool force_tcp){
+
+#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 9, 100)
+    av_register_all();
+#endif
+#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 10, 100)
+    avcodec_register_all();
+#endif
+
+    avformat_network_init();
+
+	// 打开输入视频文件
+	AVDictionary *options = nullptr;
+	av_dict_set( &options, "bufsize", "655360", 0 );
+	av_dict_set( &options, "rtsp_transport", force_tcp ? "tcp" : "udp", 0 );
+	// av_dict_set( &options, "listen_timeout", "30", 0 ); // 单位为s
+	av_dict_set( &options, "stimeout", "30000000", 0 ); // 单位为 百万分之一秒
+	
+	fmt_ctx = avformat_alloc_context();
+	const char* input_file = uri;
+	if (avformat_open_input(&fmt_ctx, input_file, nullptr, &options) != 0) {
+		cout << "Cannot open input file:" << input_file << endl;
+		return false;
+	}
+    av_dump_format(fmt_ctx, 0, input_file, 0);
+
+	// 查找流信息
+	if (avformat_find_stream_info(fmt_ctx, nullptr) < 0) {
+		cout << "Cannot find input stream information" << endl;
+		return false;
+	}
+
+	// 查找视频流信息
+	AVCodec *decoder = nullptr;
+	stream_index = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &decoder, 0);
+	if (stream_index < 0) {
+		cout << "Cannot find a video stream in the input file" << endl;
+		return false;
+	}
+	AVCodec *vcodec = avcodec_find_decoder(decoder->id);
+
+	AVCodecContext *avctx = avcodec_alloc_context3(vcodec);
+	if(avctx == nullptr){
+		cout << "alloc AVCodecContext failed." << endl;
+		return false;
+	}
+
+	do{
+		// 得到视频流对象
+		AVStream* stream = fmt_ctx->streams[stream_index];
+		AVCodecParameters *codecpar = stream->codecpar;
+		if (avcodec_parameters_to_context(avctx, codecpar) < 0)
+			break;
+
+		const AVBitStreamFilter * filter = nullptr;
+		if(codecpar->codec_id == AV_CODEC_ID_H264){
+			// 66：Baseline，77：Main，>=100：High
+			if(codecpar->profile == 77){
+				enType = H264_MAIN_LEVEL;
+			}else if(codecpar->profile < 77){
+				enType = H264_BASELINE_LEVEL;
+			}else{
+				enType = H264_HIGH_LEVEL;
+			}
+			filter = av_bsf_get_by_name("h264_mp4toannexb");
+		}else if(codecpar->codec_id == AV_CODEC_ID_HEVC){
+			// h265只有main
+			enType = H265_MAIN_LEVEL;
+			filter = av_bsf_get_by_name("hevc_mp4toannexb");
+		}else {
+			cout << "codec_id is not supported!" << endl;
+			break;
+		}
+
+		int ret = av_bsf_alloc(filter, &h264bsfc);
+		if (ret < 0){
+			break;
+		}
+		
+		avcodec_parameters_copy(h264bsfc->par_in, codecpar);
+		av_bsf_init(h264bsfc);
+
+		frame_width = codecpar->width;
+		frame_height = codecpar->height;
+		pix_fmt = (AVPixelFormat)codecpar->format;
+		m_fps = av_q2d(stream ->avg_frame_rate);
+
+		m_vdec_out_size = frame_width * frame_height * 3 /2;
+
+		cout << "frame_width = " << frame_width << "  frame_height = " << frame_height << "  fps = " << m_fps << "  m_vdec_out_size:" << m_vdec_out_size << endl;
+
+		cout << "init ffmpeg success!" << endl;
+
+		return true;
+	}while(0);
+
+	avcodec_free_context(&avctx);
+
+    return false;
+}
+
+static void *ReportThd(void *arg)
+{
+    DvppDecoder *self = (DvppDecoder *)arg;
+	if(nullptr != self){
+		self->doProcessReport();
+	}
+    return (void *)0;
+}
+
+void DvppDecoder::doProcessReport(){
+	// aclrtContext thdContext = nullptr;
+    // CHECK_AND_RETURN_NOVALUE(aclrtCreateContext(&thdContext, m_dvpp_deviceId), "aclrtCreateContext failed");
+
+	CHECK_AND_RETURN_NOVALUE(aclrtSetCurrentContext(m_context), "aclrtSetCurrentContext failed");
+    // 阻塞等待vdec线程开始
+
+    int ret;
+    while (m_bRunning) {
+        ret = aclrtProcessReport(1000);
+        if (ret != ACL_ERROR_NONE) {
+            cout << "device: " << m_dvpp_deviceId << ", chn: " << m_dvpp_channel << ", aclrtProcessReport failed, ret: " << ret << endl;
+        }
+    }
+
+	// CHECK_AND_RETURN_NOVALUE(aclrtDestroyContext(thdContext), "aclrtDestroyContext failed");
+}
+
+int count_frame = 0;
+long lastts = 0;
+static void VdecCallback(acldvppStreamDesc *input, acldvppPicDesc *output, void *pUserData)
+{
+	cout << "VdecCallback: " << GetCurTimeUs() - lastts << endl;
+	lastts = GetCurTimeUs();
+
+	Vdec_CallBack_UserData *userData = (Vdec_CallBack_UserData *) pUserData;
+	DvppDecoder* self = userData->self;
+	if(self != nullptr){
+
+		self->doVdppVdecCallBack(input, output, self);
+	}
+	
+	delete userData;
+	userData = nullptr;
+}
+
+void DvppDecoder::doVdppVdecCallBack(acldvppStreamDesc *input, acldvppPicDesc *output, DvppDecoder *self){
+
+	CHECK_AND_RETURN_NOVALUE(aclrtSetCurrentContext(m_context), "aclrtSetCurrentContext failed");
+
+	void *inputDataDev = acldvppGetStreamDescData(input);
+	void *outputDataDev = acldvppGetPicDescData(output);
+	uint32_t outputSize = acldvppGetPicDescSize(output);
+	uint32_t width = acldvppGetPicDescWidth(output);
+	uint32_t height = acldvppGetPicDescHeight(output);
+	
+	cout << "width = " << width << "  height = " << height << "  data_size:" << outputSize << endl;
+
+	if (!m_bPause)
+	{
+		DeviceRgbMemory* rgbMem = picConverter.convert2bgr(output, width, height, false);
+#ifdef TEST_DECODER
+		if(rgbMem != nullptr){
+			// D2H
+			uint32_t data_size = rgbMem->getSize();
+			CHECK_AND_RETURN_NOVALUE(aclrtMemcpy(vdecHostAddr, data_size, rgbMem->getMem(), data_size, ACL_MEMCPY_DEVICE_TO_HOST), "D2H aclrtMemcpy failed");
+
+			// 保存vdec结果
+			if(count_frame > 45 && count_frame < 50)
+			{
+				string file_name = "./yuv_pic/vdec_out"+ getName() +".rgb" ;
+				FILE *outputFile = fopen(file_name.c_str(), "a");
+				if(outputFile){
+					fwrite(vdecHostAddr, data_size, sizeof(char), outputFile);
+					fclose(outputFile);
+				}
+			}
+			count_frame++;
+		}
+#endif
+
+	}else{
+		std::this_thread::sleep_for(std::chrono::milliseconds(3));
+	}
+	
+	cout << "callback acldvppFree." << endl;
+
+    acldvppFree((uint8_t*)outputDataDev);
+    outputDataDev = nullptr;
+
+	m_vdecQueue.addHead();
+
+	CHECK_AND_RETURN_NOVALUE(acldvppDestroyStreamDesc(input), "acldvppDestroyStreamDesc failed");
+	CHECK_AND_RETURN_NOVALUE(acldvppDestroyPicDesc(output), "acldvppDestroyPicDesc failed");
+
+	cout << "callback exit." << endl;
+}
+
+bool DvppDecoder::init_vdpp(int devId){
+    cout << "Init device....\n";
+	// DvppSourceManager 创建时包含 aclInit，析构时包含 aclFinalize
+	DvppSourceManager* pSrcMgr = DvppSourceManager::getInstance();
+	m_context = pSrcMgr->getContext(m_dvpp_deviceId);
+	m_dvpp_channel = pSrcMgr->getChannel(m_dvpp_deviceId);
+	if(m_dvpp_channel < 0){
+		cout << "该设备channel已经用完了" << endl;
+		return false;
+	}
+
+    cout << "devProgram start, device: " << m_dvpp_deviceId << endl;
+    int ret = aclrtSetCurrentContext(m_context);
+    if (ret != ACL_ERROR_NONE) { 
+		cout << "aclrtSetCurrentContext failed" << endl;
+		return false;
+	}
+	
+	// queue_size 最小应大于16，否则关键帧之间距离太远的时候会导致回调函数与循环队列卡死
+	for (size_t i = 0; i < 20; i++){
+		void *vdecInputbuf = nullptr;
+		int ret = acldvppMalloc((void **)&vdecInputbuf, g_pkt_size);
+		if(ret != ACL_ERROR_NONE){
+		    cout << "acldvppMalloc failed" << endl;
+			return false;;
+		}
+		m_vec_vdec.push_back(vdecInputbuf);
+    }
+
+	if(!m_vdecQueue.init(m_vec_vdec)){
+		return false;
+	}
+
+#ifdef TEST_DECODER
+    CHECK_NOT_RETURN(aclrtMallocHost(&vdecHostAddr, frame_width * frame_height * 3), "aclrtMallocHost failed");
+#endif
+
+	cout << "init vdpp success!" << endl;
+	return true;
+}
+
+bool DvppDecoder::init(FFDecConfig& cfg){
+	m_cfg = cfg;
+
+	fstream infile(cfg.uri);
+	if (infile.is_open()){
+		m_bReal = false;
+		infile.close();
+	}else {
+		m_bReal = true;
+	}
+
+	post_decoded_cbk = cfg.post_decoded_cbk;
+    decode_finished_cbk = cfg.decode_finished_cbk;
+
+	bool ret = init_FFmpeg(cfg.uri.c_str(), cfg.force_tcp);
+	if(!ret){
+		return false;
+	}
+
+    m_dvpp_deviceId = atoi(cfg.gpuid.c_str());
+	ret = init_vdpp(m_dvpp_deviceId);
+	if (!ret)
+	{
+		releaseFFmpeg();
+	}
+
+	ret = picConverter.init(m_context);
+	if(!ret){
+		picConverter.release();
+	}
+	
+	return ret;
+}
+
+bool DvppDecoder::start(){
+	m_bRunning = true;
+
+	pthread_create(&m_decode_thread,0,
+        [](void* arg)
+        {
+            DvppDecoder* a=(DvppDecoder*)arg;
+            a->decode_thread();
+            return (void*)0;
+        }
+    ,this);
+
+	return true;
+}
+
+void DvppDecoder::close(){
+	m_bRunning=false;
+
+	if(m_decode_thread != 0){
+		pthread_join(m_decode_thread,0);
+	}
+
+#ifdef TEST_DECODER
+	if(vdecHostAddr != nullptr){
+		CHECK_NOT_RETURN(aclrtFreeHost(vdecHostAddr), "aclrtFreeHost failed");
+	}
+#endif
+}
+
+bool DvppDecoder::sendVdecEos(aclvdecChannelDesc *vdecChannelDesc){
+    // create stream desc
+    acldvppStreamDesc *streamInputDesc = acldvppCreateStreamDesc();
+    if (streamInputDesc == nullptr) {
+        cout << "fail to create input stream desc" << endl;
+        return false;
+    }
+    aclError ret = acldvppSetStreamDescEos(streamInputDesc, 1);
+    if (ret != ACL_SUCCESS) {
+        cout << "fail to set eos for stream desc, errorCode = " << static_cast<int32_t>(ret) << endl;
+        (void)acldvppDestroyStreamDesc(streamInputDesc);
+        return false;
+    }
+
+    // send vdec eos frame. when all vdec callback are completed, aclvdecSendFrame can be returned.
+    ret = aclvdecSendFrame(vdecChannelDesc, streamInputDesc, nullptr, nullptr, nullptr);
+    if (ret != ACL_SUCCESS) {
+        cout << "fail to send eos frame, ret=" << ret << endl;
+        (void)acldvppDestroyStreamDesc(streamInputDesc);
+        return false;
+    }
+    (void)acldvppDestroyStreamDesc(streamInputDesc);
+
+    return true;
+}
+
+void DvppDecoder::releaseFFmpeg(){
+	m_dec_keyframe = false;
+	if(h264bsfc){
+		av_bsf_free(&h264bsfc);
+		h264bsfc = nullptr;
+	}
+	if (fmt_ctx)
+	{
+		avformat_close_input(&fmt_ctx);
+		fmt_ctx = nullptr;
+	}
+}
+
+void DvppDecoder::releaseResource(){
+	releaseFFmpeg();
+
+	for(int i = 0; i < m_vec_vdec.size(); i++){
+		if(m_vec_vdec[i] != nullptr){
+			acldvppFree((uint8_t*)m_vec_vdec[i]);
+			m_vec_vdec[i] = nullptr;
+		}
+	}
+	m_vec_vdec.clear();
+
+	DvppSourceManager* pSrcMgr = DvppSourceManager::getInstance();
+	pSrcMgr->releaseChannel(m_dvpp_deviceId, m_dvpp_channel);
+}
+
+void DvppDecoder::decode_thread(){
+
+    int frame_count = 0;
+    long startTime = GetCurTimeUs();
+
+	int ret = -1;
+
+    // dvpp解码参数
+	CHECK_AND_RETURN_NOVALUE(aclrtSetCurrentContext(m_context), "aclrtSetCurrentContext failed");
+
+	pthread_t report_thread;
+	ret = pthread_create(&report_thread, nullptr, ReportThd, (void *)this);
+	if(ret != 0){
+		cout << "pthread_create failed" << endl;
+		return;
+	}
+
+    // 创建aclvdecChannelDesc类型的数据
+    aclvdecChannelDesc *vdecChannelDesc = aclvdecCreateChannelDesc();
+    if (vdecChannelDesc == nullptr) { 
+		cout << "aclvdecCreateChannelDesc failed"; 
+		return;
+	}
+    // 创建 channel dec结构体
+    // 通道ID在dvpp层面为0~31
+    CHECK_AND_RETURN_NOVALUE(aclvdecSetChannelDescChannelId(vdecChannelDesc, m_dvpp_channel), "aclvdecSetChannelDescChannelId failed");
+    CHECK_AND_RETURN_NOVALUE(aclvdecSetChannelDescThreadId(vdecChannelDesc, report_thread), "aclvdecSetChannelDescThreadId failed");
+    CHECK_AND_RETURN_NOVALUE(aclvdecSetChannelDescCallback(vdecChannelDesc, VdecCallback), "aclvdecSetChannelDescCallback failed");
+    CHECK_AND_RETURN_NOVALUE(aclvdecSetChannelDescEnType(vdecChannelDesc, enType), "aclvdecSetChannelDescEnType failed");
+    CHECK_AND_RETURN_NOVALUE(aclvdecSetChannelDescOutPicFormat(vdecChannelDesc, PIXEL_FORMAT_YUV_SEMIPLANAR_420), "aclvdecSetChannelDescOutPicFormat failed");
+    CHECK_AND_RETURN_NOVALUE(aclvdecCreateChannel(vdecChannelDesc), "aclvdecCreateChannel failed");
+
+	AVPacket* pkt ;
+	pkt = av_packet_alloc();
+	av_init_packet( pkt );
+
+	acldvppStreamDesc *input_stream_desc = nullptr;
+	acldvppPicDesc *output_pic_desc = nullptr;
+
+    void *vdecInputbuf = nullptr;
+	void *vdecOutputBuf = nullptr;
+	while (m_bRunning)
+	{
+		if (!m_bReal)
+		{
+			if (m_bPause)
+			{
+				std::this_thread::sleep_for(std::chrono::milliseconds(3));
+				continue;
+			}
+		}
+
+		int result = av_read_frame(fmt_ctx, pkt);
+		if (result == AVERROR_EOF || result < 0)
+		{
+			cout << "Failed to read frame!" << endl;
+			break;
+		}
+
+		if (m_dec_keyframe && !(pkt->flags & AV_PKT_FLAG_KEY)) {
+			av_packet_unref(pkt);
+			continue;
+		}
+
+		if (stream_index == pkt->stream_index){
+
+            ret = av_bsf_send_packet(h264bsfc, pkt);
+            if(ret < 0) {
+                cout << "av_bsf_send_packet error" << endl;
+            }
+
+            while ((ret = av_bsf_receive_packet(h264bsfc, pkt)) == 0) {
+                // 解码
+
+                if(pkt->size > g_pkt_size){
+                    cout << "pkt size 大于 预设" << endl;
+					break;
+                }
+
+				if(!m_bRunning){
+					break;
+				}
+
+				vdecInputbuf = m_vdecQueue.getTail();
+				if(vdecInputbuf == nullptr){
+					std::this_thread::sleep_for(std::chrono::milliseconds(3));
+					// cout << "getTail failed" << endl;
+					continue;
+				}
+				
+				ret = aclrtMemcpy(vdecInputbuf, pkt->size, pkt->data, pkt->size, ACL_MEMCPY_HOST_TO_DEVICE);
+				if(ACL_ERROR_NONE != ret){
+					cout << "aclrtMemcpy failed" << endl;
+					goto end_flag;
+				}
+
+                ret = acldvppMalloc((void **)&vdecOutputBuf, m_vdec_out_size);
+                if(ret != ACL_ERROR_NONE){
+                    cout << "acldvppMalloc failed" << endl;
+					goto end_flag;
+                }
+
+                /************ 解码*************/
+                input_stream_desc = acldvppCreateStreamDesc();
+                    if (input_stream_desc == nullptr) { cout << "acldvppCreateStreamDesc error" << endl; }
+                output_pic_desc = acldvppCreatePicDesc();
+                    if (output_pic_desc == nullptr) { cout<< "acldvppCreatePicDesc error" << endl; }
+                CHECK_NOT_RETURN(acldvppSetStreamDescData(input_stream_desc, vdecInputbuf), "acldvppSetStreamDescData failed");
+                CHECK_NOT_RETURN(acldvppSetStreamDescSize(input_stream_desc, pkt->size), "acldvppSetStreamDescSize failed");
+                CHECK_NOT_RETURN(acldvppSetPicDescData(output_pic_desc, vdecOutputBuf), "acldvppSetPicDescData failed");
+                CHECK_NOT_RETURN(acldvppSetPicDescSize(output_pic_desc, m_vdec_out_size), "acldvppSetPicDescSize failed");
+                
+                Vdec_CallBack_UserData *user_data = NULL;
+                user_data = new Vdec_CallBack_UserData;
+                user_data->frameId = frame_count;
+                user_data->startTime = startTime;
+                user_data->sendTime = GetCurTimeUs();
+				user_data->self = this;
+				// user_data->inBufNode = bufNode;
+                cout << "send frame" << endl;
+                CHECK_NOT_RETURN(aclvdecSendFrame(vdecChannelDesc, input_stream_desc, output_pic_desc, nullptr, reinterpret_cast<void *>(user_data)),
+                    "aclvdecSendFrame failed");
+
+                frame_count++;
+
+				m_vdecQueue.addTail();
+				
+				vdecInputbuf = nullptr;
+				vdecOutputBuf = nullptr;
+            }
+            /****************************/
+		}
+		av_packet_unref(pkt);
+	}
+
+end_flag:
+	
+	av_packet_free(&pkt);
+
+	sendVdecEos(vdecChannelDesc);
+
+	CHECK_NOT_RETURN(aclvdecDestroyChannel(vdecChannelDesc), "aclvdecDestroyChannel failed");
+    CHECK_NOT_RETURN(aclvdecDestroyChannelDesc(vdecChannelDesc), "aclvdecDestroyChannelDesc failed");
+
+	// report_thread 需后于destroy退出
+	m_bRunning = false;
+	CHECK_NOT_RETURN(pthread_join(report_thread, nullptr), "pthread_join failed");
+
+	if(m_vdecQueue.length() > 0){
+		cout << m_vdecQueue.length() << endl;
+	}
+	
+	if(vdecOutputBuf != nullptr){
+		acldvppFree((uint8_t*)vdecOutputBuf);
+    	vdecOutputBuf = nullptr;
+	}
+
+	cout << "read thread exit." << endl;
+}
+
+float DvppDecoder::fps(){
+	return m_fps;
+}
+
+bool DvppDecoder::isSurport(FFDecConfig& cfg){
+	bool bRet = init(cfg);
+	return bRet;
+}
+
+bool DvppDecoder::getResolution( int &width, int &height ){
+	width = frame_width;
+	height = frame_height;
+	return true;
+}
+
+void DvppDecoder::pause(){
+	m_bPause = true;
+}
+
+void DvppDecoder::resume(){
+	m_bPause = false;
+}
+
+void DvppDecoder::setDecKeyframe(bool bKeyframe)
+{
+	m_dec_keyframe = bKeyframe;
+}
+
+bool DvppDecoder::isRunning(){
+	return m_bRunning;
+}
+
+bool DvppDecoder::isFinished(){
+	return m_bFinished;
+}
+
+bool DvppDecoder::isPausing(){
+	return m_bPause;
+}
+
+int DvppDecoder::getCachedQueueLength(){
+	// TODO
+	return 0;
+}
+
+FFImgInfo* DvppDecoder::snapshot(){
+	// TODO
+	return nullptr;
+}
+
+void DvppDecoder::setPostDecArg(const void* postDecArg){
+	m_postDecArg = postDecArg;
+}
+
+void DvppDecoder::setFinishedDecArg(const void* finishedDecArg){
+	m_finishedDecArg = finishedDecArg;
+}
\ No newline at end of file
diff --git a/src/dvpp/DvppDecoder.h b/src/dvpp/DvppDecoder.h
new file mode 100644
index 0000000..db5064b
--- /dev/null
+++ b/src/dvpp/DvppDecoder.h
@@ -0,0 +1,111 @@
+#include<string>
+#include <pthread.h>
+
+#include "dvpp_headers.h"
+#include "depend_headers.h"
+#include "user_mem.h"
+#include "CircularQueue.hpp"
+#include "VpcPicConverter.h"
+
+#include <queue>
+
+using namespace std;
+
+#define TEST_DECODER
+
+
+class DvppDecoder{
+public:
+    DvppDecoder();
+    ~DvppDecoder();
+    bool init(FFDecConfig& cfg);
+    void close();
+    bool start();
+    void pause();
+    void resume();
+
+    void setDecKeyframe(bool bKeyframe);
+
+    bool isRunning();
+    bool isFinished();
+    bool isPausing();
+    bool getResolution( int &width, int &height );
+
+    bool isSurport(FFDecConfig& cfg);
+
+    int getCachedQueueLength();
+
+    float fps();
+
+    DECODER_TYPE getDecoderType(){ return DECODER_TYPE_DVPP; }
+
+    void setName(string nm){
+        m_dec_name = nm;
+    }
+
+    string getName(){
+        return m_dec_name;
+    }
+
+    FFImgInfo* snapshot();
+
+    void setPostDecArg(const void* postDecArg);
+    void setFinishedDecArg(const void* finishedDecArg);
+
+public:
+    void doProcessReport();
+    void doVdppVdecCallBack(acldvppStreamDesc *input, acldvppPicDesc *output, DvppDecoder *self);
+
+private:
+    void decode_thread();
+    void post_decode_thread();
+    void releaseFFmpeg();
+    void releaseResource();
+    bool init_FFmpeg(const char* uri, bool force_tcp);
+    bool init_vdpp(int _deviceId);
+
+    bool sendVdecEos(aclvdecChannelDesc *vdecChannelDesc);
+
+private:
+    AVStream* stream;
+    int stream_index;
+    AVFormatContext *fmt_ctx;
+    AVPixelFormat pix_fmt;
+    uint32_t m_vdec_out_size{0};
+    int frame_width{0};
+	int frame_height{0};
+
+    int m_dvpp_deviceId {-1};
+    int m_dvpp_channel {-1};
+
+    pthread_t m_decode_thread;
+    pthread_t m_post_decode_thread;
+    
+    bool m_bRunning;
+    bool m_bFinished;
+
+    bool m_bPause;
+
+    bool m_bReal; // 是否实时流
+
+    float m_fps;
+
+    FFDecConfig m_cfg;
+    string m_dec_name;
+    bool m_dec_keyframe;
+
+    AVBSFContext * h264bsfc{nullptr};
+
+    aclrtContext m_context;
+    acldvppStreamFormat enType;
+
+    vector<void*> m_vec_vdec;
+    CircularQueue<void *> m_vdecQueue;
+
+    const void * m_postDecArg;
+    POST_DECODE_CALLBACK post_decoded_cbk;
+    const void * m_finishedDecArg;
+    DECODE_FINISHED_CALLBACK decode_finished_cbk;
+
+    VpcPicConverter picConverter;
+};
\ No newline at end of file
diff --git a/src/dvpp/DvppDecoder2.h b/src/dvpp/DvppDecoder2.h
new file mode 100644
index 0000000..d044f9b
--- /dev/null
+++ b/src/dvpp/DvppDecoder2.h
@@ -0,0 +1,192 @@
+#include<string>
+
+#include "depend_headers.h"
+#include "CircularQueue.hpp"
+#include "FFReceiver.h"
+#include "DvppDec.h"
+
+using namespace std;
+
+class DvppDecoder2{
+public:
+    DvppDecoder2();
+    ~DvppDecoder2();
+    bool init(FFDecConfig cfg);
+    void close();
+    bool start();
+    void pause();
+    void resume();
+
+    void setDecKeyframe(bool bKeyframe);
+
+    bool isRunning();
+    bool isFinished();
+    bool isPausing();
+    bool getResolution( int &width, int &height );
+
+    bool isSurport(FFDecConfig& cfg);
+
+    float fps();
+
+    void setName(string nm){
+        m_dec_name = nm;
+    }
+
+    string getName(){
+        return m_dec_name;
+    }
+
+    FFImgInfo* snapshot();
+
+    void setPostDecArg(const void* postDecArg);
+    void setFinishedDecArg(const void* finishedDecArg);
+
+    int getCachedQueueLength();
+
+public:
+    void taskFinishing();
+
+private:
+
+    FFDecConfig m_cfg;
+    string m_dec_name;
+
+    CircularQueue<AVPacket *> *m_pktQueueptr;
+    FFReceiver m_receiver;
+    DvppDec m_decoder;
+
+    const void * m_finishedDecArg;
+    DECODE_FINISHED_CALLBACK decode_finished_cbk;
+
+};
+
+void receiver_finish_cbk(const void* userPtr){
+    if(userPtr != nullptr){
+        DvppDecoder2* self = (DvppDecoder2*)userPtr;
+        self->taskFinishing();
+    }
+}
+
+DvppDecoder2::DvppDecoder2(){
+    m_pktQueueptr = new CircularQueue<AVPacket *>();
+}
+
+DvppDecoder2::~DvppDecoder2(){
+    delete m_pktQueueptr;
+    m_pktQueueptr = nullptr;
+}
+
+bool DvppDecoder2::init(FFDecConfig cfg){
+    
+    ReceiverConfig receiver_config;
+    receiver_config.uri = cfg.uri.c_str();
+    receiver_config.dec_name = cfg.dec_name;
+    receiver_config.force_tcp = cfg.force_tcp;
+    receiver_config.pktQueueptr = m_pktQueueptr;
+    receiver_config.receiver_finished_cbk = receiver_finish_cbk;
+    AVCodecContext* avctx = m_receiver.init_FFmpeg(receiver_config);
+    if(avctx == nullptr){
+        return false;
+    }
+    m_receiver.setFinishCbkArg(this);
+
+    DvppDecConfig dec_cfg;
+    if(avctx->codec_id == AV_CODEC_ID_H264){
+        dec_cfg.codec_id = 0;
+    }else if(avctx->codec_id == AV_CODEC_ID_HEVC){
+        dec_cfg.codec_id = 1;
+    }else {
+        return false;
+    }
+    dec_cfg.dec_name = cfg.dec_name;
+    dec_cfg.post_decoded_cbk = cfg.post_decoded_cbk;
+    dec_cfg.dev_id = cfg.gpuid;
+    dec_cfg.force_tcp = cfg.force_tcp;
+    dec_cfg.skip_frame = cfg.skip_frame;
+    dec_cfg.profile = avctx->profile;
+    dec_cfg.pktQueueptr = m_pktQueueptr;
+    dec_cfg.width = avctx->width;
+    dec_cfg.height = avctx->height;
+    bool bRet = m_decoder.init_vdpp(dec_cfg);
+    if(!bRet){
+        return false;
+    }
+
+    m_cfg = cfg;
+
+    decode_finished_cbk = cfg.decode_finished_cbk;
+
+    return true;
+}
+
+bool DvppDecoder2::isSurport(FFDecConfig& cfg){
+    return true;
+}
+
+bool DvppDecoder2::start(){
+    m_receiver.start();
+    m_decoder.start();
+    return true;
+}
+
+void DvppDecoder2::close(){
+    m_receiver.close();
+}
+
+void DvppDecoder2::setPostDecArg(const void* postDecArg){
+    m_decoder.setPostDecArg(postDecArg);
+}
+
+void DvppDecoder2::setFinishedDecArg(const void* finishedDecArg){
+    m_finishedDecArg = finishedDecArg;
+}
+
+void DvppDecoder2::pause(){
+    m_receiver.pause();
+}
+
+void DvppDecoder2::resume(){
+    m_receiver.resume();
+}
+
+void DvppDecoder2::setDecKeyframe(bool bKeyframe){
+    m_receiver.setDecKeyframe(bKeyframe);
+}
+
+bool DvppDecoder2::isRunning(){
+    return m_receiver.isRunning();
+}
+
+bool DvppDecoder2::isFinished(){
+    return m_receiver.isFinished();
+}
+
+bool DvppDecoder2::isPausing(){
+    return m_receiver.isPausing();
+}
+
+bool DvppDecoder2::getResolution(int &width, int &height){
+    return m_receiver.getResolution(width, height);
+}
+
+float DvppDecoder2::fps(){
+    return m_receiver.fps();
+}
+
+FFImgInfo* DvppDecoder2::snapshot(){
+	// TODO
+	return nullptr;
+}
+
+int DvppDecoder2::getCachedQueueLength(){
+    return 0;
+}
+
+void DvppDecoder2::taskFinishing(){
+    // receiver 中读取线程结束时执行
+    m_decoder.close();
+    decode_finished_cbk(m_finishedDecArg);
+
+    LOG_INFO("[{}]- task finished.", m_dec_name);
+
+}
\ No newline at end of file
diff --git a/src/dvpp/DvppDecoderApi.cpp b/src/dvpp/DvppDecoderApi.cpp
new file mode 100644
index 0000000..fd2f54e
--- /dev/null
+++ b/src/dvpp/DvppDecoderApi.cpp
@@ -0,0 +1,133 @@
+#include "DvppDecoderApi.h"
+#include "DvppDecoder2.h"
+
+DvppDecoderApi::DvppDecoderApi(){
+    m_pDecoder = nullptr;
+}
+
+DvppDecoderApi::~DvppDecoderApi(){
+    if(m_pDecoder != nullptr){
+        delete m_pDecoder;
+        m_pDecoder = nullptr;
+    }
+}
+
+bool DvppDecoderApi::init(FFDecConfig& cfg){
+    m_pDecoder = new DvppDecoder2();
+    if(m_pDecoder != nullptr){
+        return m_pDecoder->init(cfg);
+    }
+    return false;
+}
+
+void DvppDecoderApi::close(){
+    if(m_pDecoder != nullptr){
+        return m_pDecoder->close();
+    }
+}
+
+bool DvppDecoderApi::start(){
+    if(m_pDecoder != nullptr){
+        return m_pDecoder->start();
+    }
+    return false;
+}
+
+void DvppDecoderApi::pause(){
+    if(m_pDecoder != nullptr){
+        return m_pDecoder->pause();
+    }
+}
+
+void DvppDecoderApi::resume(){
+    if(m_pDecoder != nullptr){
+        return m_pDecoder->resume();
+    }
+}
+
+void DvppDecoderApi::setDecKeyframe(bool bKeyframe){
+    if(m_pDecoder != nullptr){
+        return m_pDecoder->setDecKeyframe(bKeyframe);
+    }
+}
+
+bool DvppDecoderApi::isRunning(){
+    if(m_pDecoder != nullptr){
+        return m_pDecoder->isRunning();
+    }
+    return false;
+}
+
+bool DvppDecoderApi::isFinished(){
+    if(m_pDecoder != nullptr){
+        return m_pDecoder->isFinished();
+    }
+    return false;
+}
+
+bool DvppDecoderApi::isPausing(){
+    if(m_pDecoder != nullptr){
+        return m_pDecoder->isPausing();
+    }
+    return false;
+}
+
+bool DvppDecoderApi::getResolution(int &width, int &height){
+    if(m_pDecoder != nullptr){
+        return m_pDecoder->getResolution(width, height);
+    }
+    return false;
+}
+
+bool DvppDecoderApi::isSurport(FFDecConfig& cfg){
+    if(m_pDecoder != nullptr){
+        return m_pDecoder->isSurport(cfg);
+    }
+    return false;
+}
+
+float DvppDecoderApi::fps(){
+    if(m_pDecoder != nullptr){
+        return m_pDecoder->fps();
+    }
+    return 0.0;
+}
+
+int DvppDecoderApi::getCachedQueueLength(){
+    if(m_pDecoder != nullptr){
+        return m_pDecoder->getCachedQueueLength();
+    }
+    return 0;
+}
+
+void DvppDecoderApi::setName(string nm){
+    if(m_pDecoder != nullptr){
+        return m_pDecoder->setName(nm);
+    }
+}
+
+string DvppDecoderApi::getName(){
+    if(m_pDecoder != nullptr){
+        return m_pDecoder->getName();
+    }
+    return nullptr;
+}
+
+FFImgInfo* DvppDecoderApi::snapshot(){
+    if(m_pDecoder != nullptr){
+        return m_pDecoder->snapshot();
+    }
+    return nullptr;
+}
+
+void DvppDecoderApi::setPostDecArg(const void* postDecArg){
+    if(m_pDecoder != nullptr){
+        return m_pDecoder->setPostDecArg(postDecArg);
+    }
+}
+
+void DvppDecoderApi::setFinishedDecArg(const void* finishedDecArg){
+    if(m_pDecoder != nullptr){
+        return m_pDecoder->setFinishedDecArg(finishedDecArg);
+    }
+}
\ No newline at end of file
diff --git a/src/dvpp/DvppDecoderApi.h b/src/dvpp/DvppDecoderApi.h
new file mode 100644
index 0000000..c465e03
--- /dev/null
+++ b/src/dvpp/DvppDecoderApi.h
@@ -0,0 +1,44 @@
+#include<string>
+#include <pthread.h>
+
+#include "depend_headers.h"
+#include "../interface/AbstractDecoder.h"
+
+using namespace std;
+
+class DvppDecoder2;
+
+class DvppDecoderApi : public AbstractDecoder{
+public:
+    DvppDecoderApi();
+    ~DvppDecoderApi();
+    bool init(FFDecConfig& cfg);
+    void close();
+    bool start();
+    void pause();
+    void resume();
+
+    void setDecKeyframe(bool bKeyframe);
+
+    bool isRunning();
+    bool isFinished();
+    bool isPausing();
+    bool getResolution( int &width, int &height );
+
+    bool isSurport(FFDecConfig& cfg);
+
+    int getCachedQueueLength();
+
+    float fps();
+
+    FFImgInfo* snapshot();
+
+    DECODER_TYPE getDecoderType(){ return DECODER_TYPE_DVPP; }
+    void setName(string nm);
+    string getName();
+
+    void setPostDecArg(const void* postDecArg);
+    void setFinishedDecArg(const void* finishedDecArg);
+private:
+    DvppDecoder2* m_pDecoder;
+};
\ No newline at end of file
diff --git a/src/dvpp/DvppRgbMemory.hpp b/src/dvpp/DvppRgbMemory.hpp
new file mode 100644
index 0000000..b6bc750
--- /dev/null
+++ b/src/dvpp/DvppRgbMemory.hpp
@@ -0,0 +1,25 @@
+#include<string>
+
+#include "dvpp_headers.h"
+
+using namespace std;
+
+class DvppRgbMemory : public DeviceRgbMemory
+{
+public:
+     DvppRgbMemory(int _channel, int _width, int _height, int _size, string _id, string _dev_id, bool _key_frame)
+     :DeviceRgbMemory(_channel, _width, _height, _id, _dev_id, _key_frame, false){
+        data_size = _size;
+        int ret = acldvppMalloc((void **)&pHwRgb, data_size);
+        if(ret != ACL_ERROR_NONE){
+            cout << "acldvppMalloc failed" << endl;
+        }
+    }
+
+    ~DvppRgbMemory(){
+        if (pHwRgb) {
+            acldvppFree((uint8_t*)pHwRgb);
+            pHwRgb = nullptr;
+        }
+    }
+};
\ No newline at end of file
diff --git a/src/dvpp/DvppSourceManager.cpp b/src/dvpp/DvppSourceManager.cpp
new file mode 100644
index 0000000..a3a0f35
--- /dev/null
+++ b/src/dvpp/DvppSourceManager.cpp
@@ -0,0 +1,63 @@
+#include "DvppSourceManager.h"
+
+#include "dvpp_headers.h"
+#include "depend_headers.h"
+
+using namespace std;
+
+DvppSourceManager::~DvppSourceManager()
+{
+    for(auto iter = ctxMap.begin(); iter != ctxMap.end(); iter++){
+        aclError ret = aclrtDestroyContext(iter->second);
+        if(ret != ACL_ERROR_NONE){
+            LOG_ERROR("aclrtDestroyContext failed !");
+            continue;
+        }
+    }
+    ctxMap.clear();
+    channelMap.clear();
+
+    aclFinalize();
+}
+
+aclrtContext DvppSourceManager::getContext(int devId)
+{
+     aclrtContext ctx = ctxMap[devId];
+     if (ctx == nullptr)
+     {
+        // 初始化硬件解码器
+        aclError ret = aclrtSetDevice(devId);
+        if(ret != ACL_ERROR_NONE){
+            // cout << "aclrtSetDevice failed" << endl;
+            LOG_ERROR("aclrtSetDevice failed !");
+            return nullptr;
+        }
+
+        ret = aclrtCreateContext(&ctx, devId);
+        if (ret != ACL_ERROR_NONE) {
+            // cout << "aclrtCreateContext failed " << endl;
+            LOG_ERROR("aclrtCreateContext failed !");
+            return nullptr;
+        }
+        ctxMap[devId] = ctx;
+     }
+     return ctx;
+}
+
+int DvppSourceManager::getChannel(int devId){
+    // channel 最大值暂定为32， 华为没有接口获取最大channel，只有文档说明
+    for(int iChannel = 0; iChannel < 32; iChannel++){
+        string channelKey = "channel_" + to_string(devId) + "_" + to_string(iChannel) ;
+        auto it = channelMap.find(channelKey);
+        if(it == channelMap.end()){
+            channelMap[channelKey] = iChannel;
+            return iChannel;
+        }
+    }
+    return -1;
+}
+
+void DvppSourceManager::releaseChannel(int devId, int iChannel){
+    string channelKey = "channel_" + to_string(devId) + "_" + to_string(iChannel) ;
+    channelMap.erase(channelKey);
+}
\ No newline at end of file
diff --git a/src/dvpp/DvppSourceManager.h b/src/dvpp/DvppSourceManager.h
new file mode 100644
index 0000000..36a4b07
--- /dev/null
+++ b/src/dvpp/DvppSourceManager.h
@@ -0,0 +1,36 @@
+
+#include<map>
+#include<string>
+
+#include "dvpp_headers.h"
+
+using namespace std;
+
+class DvppSourceManager{
+public:
+    static DvppSourceManager* getInstance(){
+		static DvppSourceManager* singleton = nullptr;
+		if (singleton == nullptr){
+			singleton = new DvppSourceManager();
+			int ret = aclInit(nullptr);
+			if (ret != ACL_ERROR_NONE) { 
+				cout << "aclInit failed" << endl; 
+				return nullptr;
+			}
+		}
+		return singleton;
+	}
+
+    aclrtContext getContext(int devId);
+
+	int getChannel(int devId);
+	void releaseChannel(int devId, int channel);
+
+private:
+    DvppSourceManager(){}
+	~DvppSourceManager();
+
+private:
+    map<int, aclrtContext> ctxMap;
+	map<string, int> channelMap;
+};
\ No newline at end of file
diff --git a/src/dvpp/FFReceiver.cpp b/src/dvpp/FFReceiver.cpp
new file mode 100644
index 0000000..84ae526
--- /dev/null
+++ b/src/dvpp/FFReceiver.cpp
@@ -0,0 +1,281 @@
+#include "FFReceiver.h"
+#include <fstream>
+
+const int g_pkt_size = 1024 * 1024;  // 单个AVPacket大小的最大值
+
+FFReceiver::FFReceiver(/* args */)
+{
+    fmt_ctx = nullptr;
+	m_bRunning = false;
+
+	stream = nullptr;
+    stream_index = -1;
+    pix_fmt = AV_PIX_FMT_NONE;
+    m_dec_name = "";
+
+	m_bPause = false;
+	m_bReal = true;
+
+	m_bFinished = false;
+	m_dec_keyframe = false;
+	m_fps = 0.0;
+
+    m_read_thread = 0;
+}
+
+FFReceiver::~FFReceiver()
+{
+    releaseFFmpeg();
+}
+
+AVCodecContext* FFReceiver::init_FFmpeg(ReceiverConfig config){
+
+#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 9, 100)
+    av_register_all();
+#endif
+#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 10, 100)
+    avcodec_register_all();
+#endif
+
+    avformat_network_init();
+
+	const char* uri = config.uri;
+    fstream infile(uri);
+	if (infile.is_open()){
+		m_bReal = false;
+		infile.close();
+	}else {
+		m_bReal = true;
+	}
+
+    m_dec_name = config.dec_name;
+	m_pktQueueptr = config.pktQueueptr;
+	receiver_finished_cbk = config.receiver_finished_cbk;
+
+	// 打开输入视频文件
+	AVDictionary *options = nullptr;
+	av_dict_set( &options, "bufsize", "655360", 0 );
+	av_dict_set( &options, "rtsp_transport", config.force_tcp ? "tcp" : "udp", 0 );
+	av_dict_set( &options, "stimeout", "30000000", 0 ); // 单位为 百万分之一秒
+	
+	fmt_ctx = avformat_alloc_context();
+	const char* input_file = uri;
+	if (avformat_open_input(&fmt_ctx, input_file, nullptr, &options) != 0) {
+        LOG_ERROR("[{}]- Cannot open input file: {}", m_dec_name, input_file);
+		return nullptr;
+	}
+    av_dump_format(fmt_ctx, 0, input_file, 0);
+
+	// 查找流信息
+	if (avformat_find_stream_info(fmt_ctx, nullptr) < 0) {
+        LOG_ERROR("[{}]- Cannot find input stream information!", m_dec_name);
+		return nullptr;
+	}
+
+	// 查找视频流信息
+	AVCodec *decoder = nullptr;
+	stream_index = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &decoder, 0);
+	if (stream_index < 0) {
+        LOG_ERROR("[{}]- Cannot find a video stream in the input file!", m_dec_name);
+		return nullptr;
+	}
+	AVCodec *vcodec = avcodec_find_decoder(decoder->id);
+
+	avctx = avcodec_alloc_context3(vcodec);
+	if(avctx == nullptr){
+        LOG_ERROR("[{}]- alloc AVCodecContext failed!", m_dec_name);
+		return nullptr;
+	}
+
+	do{
+		// 得到视频流对象
+		AVStream* stream = fmt_ctx->streams[stream_index];
+		AVCodecParameters *codecpar = stream->codecpar;
+		if (avcodec_parameters_to_context(avctx, codecpar) < 0)
+			break;
+
+		const AVBitStreamFilter * filter = nullptr;
+		if(codecpar->codec_id == AV_CODEC_ID_H264){
+			filter = av_bsf_get_by_name("h264_mp4toannexb");
+		}else if(codecpar->codec_id == AV_CODEC_ID_HEVC){
+			filter = av_bsf_get_by_name("hevc_mp4toannexb");
+		}else {
+            LOG_ERROR("[{}]- codec_id is not supported!", m_dec_name);
+			break;
+		}
+
+		int ret = av_bsf_alloc(filter, &h264bsfc);
+		if (ret < 0){
+			break;
+		}
+		
+		avcodec_parameters_copy(h264bsfc->par_in, codecpar);
+		av_bsf_init(h264bsfc);
+
+		frame_width = codecpar->width;
+		frame_height = codecpar->height;
+		pix_fmt = (AVPixelFormat)codecpar->format;
+		m_fps = av_q2d(stream ->avg_frame_rate);
+
+        LOG_INFO("[{}]- init ffmpeg success! input:{} frame_width:{} frame_height:{} fps:{} ", m_dec_name, input_file, frame_width, frame_height, m_fps);
+
+		for(int i = 0; i<5; i++){
+			AVPacket* pkt = av_packet_alloc();
+			av_init_packet( pkt );
+			m_vec_pkt.push_back(pkt);
+		}
+		m_pktQueueptr->init(m_vec_pkt);
+
+		return avctx;
+	}while(0);
+
+    LOG_ERROR("[{}]- init ffmpeg failed ! input:{} ", m_dec_name);
+
+	releaseFFmpeg();
+
+    return nullptr;
+}
+
+void FFReceiver::releaseFFmpeg(){
+	m_dec_keyframe = false;
+	if(h264bsfc){
+		av_bsf_free(&h264bsfc);
+		h264bsfc = nullptr;
+	}
+	if (fmt_ctx){
+		avformat_close_input(&fmt_ctx);
+		fmt_ctx = nullptr;
+	}
+    if(avctx){
+        avcodec_free_context(&avctx);
+        avctx = nullptr;
+    }
+
+	for(int i = 0; i < m_vec_pkt.size(); i++){
+		av_packet_free(&m_vec_pkt[i]);
+	}
+}
+
+void FFReceiver::read_thread(){
+
+    int frame_count = 0;
+	int ret = -1;
+	while (m_bRunning)
+	{
+		if (!m_bReal)
+		{
+			if (m_bPause)
+			{
+				std::this_thread::sleep_for(std::chrono::milliseconds(3));
+				continue;
+			}
+		}
+
+		AVPacket* pkt = m_pktQueueptr->getTail();
+		if(pkt == nullptr){
+			std::this_thread::sleep_for(std::chrono::milliseconds(3));
+			continue;
+		}
+
+		int result = av_read_frame(fmt_ctx, pkt);
+		if (result == AVERROR_EOF || result < 0)
+		{
+            LOG_ERROR("[{}]- Failed to read frame!", m_dec_name);
+			break;
+		}
+
+		if (m_dec_keyframe && !(pkt->flags & AV_PKT_FLAG_KEY)) {
+			av_packet_unref(pkt);
+			continue;
+		}
+
+		if (stream_index == pkt->stream_index){
+
+            ret = av_bsf_send_packet(h264bsfc, pkt);
+            if(ret < 0) {
+                LOG_ERROR("[{}]- av_bsf_send_packet error!", m_dec_name);
+            }
+
+            while ((ret = av_bsf_receive_packet(h264bsfc, pkt)) == 0) {
+                if(pkt->size > g_pkt_size){
+                    LOG_ERROR("[{}]- pkt size 大于最大预设值!", m_dec_name);
+					break;
+                }
+
+				if(!m_bRunning){
+					break;
+				}
+
+                m_pktQueueptr->addTail();
+
+                frame_count++;
+            }
+		}
+	}
+
+    LOG_INFO("[{}]- read thread exit.", m_dec_name);
+
+	receiver_finished_cbk(m_finishedReceiveArg);
+}
+
+bool FFReceiver::start(){
+	m_bRunning = true;
+
+	pthread_create(&m_read_thread,0,
+        [](void* arg)
+        {
+            FFReceiver* a=(FFReceiver*)arg;
+            a->read_thread();
+            return (void*)0;
+        }
+    ,this);
+
+	return true;
+}
+
+void FFReceiver::close(){
+	m_bRunning=false;
+
+	if(m_read_thread != 0){
+		pthread_join(m_read_thread,0);
+	}
+}
+
+float FFReceiver::fps(){
+	return m_fps;
+}
+
+bool FFReceiver::getResolution( int &width, int &height ){
+	width = frame_width;
+	height = frame_height;
+	return true;
+}
+
+void FFReceiver::pause(){
+	m_bPause = true;
+}
+
+void FFReceiver::resume(){
+	m_bPause = false;
+}
+
+void FFReceiver::setDecKeyframe(bool bKeyframe)
+{
+	m_dec_keyframe = bKeyframe;
+}
+
+bool FFReceiver::isRunning(){
+	return m_bRunning;
+}
+
+bool FFReceiver::isFinished(){
+	return m_bFinished;
+}
+
+bool FFReceiver::isPausing(){
+	return m_bPause;
+}
+
+void FFReceiver::setFinishCbkArg(const void* userPtr){
+	m_finishedReceiveArg = userPtr;
+}
\ No newline at end of file
diff --git a/src/dvpp/FFReceiver.h b/src/dvpp/FFReceiver.h
new file mode 100644
index 0000000..a380628
--- /dev/null
+++ b/src/dvpp/FFReceiver.h
@@ -0,0 +1,81 @@
+#ifndef __FFRECEIVER_H__
+#define __FFRECEIVER_H__
+
+#include "depend_headers.h"
+#include "CircularQueue.hpp"
+
+typedef void(*RECEIVER_FINISHED_CALLBACK)(const void* userPtr);
+
+struct ReceiverConfig{
+    const char* uri;
+    string dec_name;
+    bool force_tcp;
+    CircularQueue<AVPacket*> *pktQueueptr;
+    RECEIVER_FINISHED_CALLBACK receiver_finished_cbk; // 解码线程结束后的回调接口
+};
+
+class FFReceiver
+{
+public:
+    FFReceiver(/* args */);
+    ~FFReceiver();
+
+    AVCodecContext* init_FFmpeg(ReceiverConfig config);
+    void releaseFFmpeg();
+    void close();
+    bool start();
+
+    void pause();
+    void resume();
+    void setDecKeyframe(bool bKeyframe);
+    bool isRunning();
+    bool isFinished();
+    bool isPausing();
+    bool getResolution( int &width, int &height );
+    float fps();
+
+    void setName(string nm){
+        m_dec_name = nm;
+    }
+
+    void setFinishCbkArg(const void* userPtr);
+
+private:
+    void read_thread();
+
+private:
+    string m_dec_name;
+
+    AVStream* stream;
+    int stream_index;
+    AVFormatContext *fmt_ctx;
+    AVPixelFormat pix_fmt;
+    int frame_width{0};
+	int frame_height{0};
+
+    pthread_t m_read_thread;
+    
+    bool m_bRunning;
+    bool m_bFinished;
+
+    bool m_bPause;
+
+    bool m_bReal; // 是否实时流
+
+    float m_fps;
+
+    FFDecConfig m_cfg;
+    bool m_dec_keyframe;
+
+    AVCodecContext *avctx{nullptr};
+    AVBSFContext * h264bsfc{nullptr};
+
+    vector<AVPacket*> m_vec_pkt;
+    CircularQueue<AVPacket *> *m_pktQueueptr;
+
+    const void * m_finishedReceiveArg;
+    RECEIVER_FINISHED_CALLBACK receiver_finished_cbk;
+};
+
+
+#endif
\ No newline at end of file
diff --git a/src/dvpp/Makefile b/src/dvpp/Makefile
new file mode 100644
index 0000000..1f044f5
--- /dev/null
+++ b/src/dvpp/Makefile
@@ -0,0 +1,66 @@
+# 各项目录
+LIB_DIR:=$(BUILD_DIR)/$(MODULE)/lib
+DEP_DIR:=$(BUILD_DIR)/$(MODULE)/.dep
+OBJ_DIR:=$(BUILD_DIR)/$(MODULE)/obj
+SRC_DIR:=$(TOP_DIR)/$(MODULE)
+
+# 源文件以及中间目标文件和依赖文件
+SRCS:=$(notdir $(wildcard $(SRC_DIR)/*.cpp))
+OBJS:=$(addprefix $(OBJ_DIR)/, $(patsubst %.cpp, %.o, $(SRCS)))
+DEPS:=$(addprefix $(DEP_DIR)/, $(patsubst %.cpp, %.d,a $(SRCS)))
+
+# 自动生成头文件依赖选项
+DEPFLAGS=-MT $@ -MMD -MP -MF $(DEP_DIR)/$*.d
+
+DEFS = -DENABLE_DVPP_INTERFACE
+
+# 最终目标文件
+TARGET:=$(LIB_DIR)/lib$(MODULE).a
+
+export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/6.3.RC1.alpha001/runtime/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/6.3.RC1.alpha001/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/driver:$LD_LIBRARY_PATH
+
+include_dir=-I/usr/local/Ascend/ascend-toolkit/latest/acllib/include
+lib_dir=-L/usr/lib -L/usr/local/lib -L/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64 -L/usr/local/Ascend/driver/lib64 -L/usr/local/Ascend/ascend-toolkit/latest/atc/lib64
+lib=-lacl_dvpp -lascendcl -lmmpa -lglog -lgflags -lpthread -lz
+
+CXXFLAGS= -g -O0 -fPIC $(include_dir) $(DEFS) -lpthread -lrt -lz -fexceptions -std=c++11 -fvisibility=hidden -Wl,-Bsymbolic -ldl
+	# -DUNICODE -D_UNICODE
+
+# 默认最终目标
+.PHONY:all
+all:$(TARGET)
+
+# 生成最终目标
+$(TARGET):$(OBJS) | $(LIB_DIR)
+	@echo -e "\e[32m""Linking static library $(TARGET)""\e[0m"
+	@ar -rc $@ $^
+
+# 若没有lib目录则自动生成
+$(LIB_DIR):
+	@mkdir -p $@
+
+# 生成中间目标文件
+$(OBJ_DIR)/%.o:$(SRC_DIR)/%.cpp $(DEP_DIR)/%.d | $(OBJ_DIR) $(DEP_DIR)
+	@echo -e "\e[33m""Building object $@""\e[0m"
+	@$(CXX) -c $(DEPFLAGS) $(CXXFLAGS) $(INCS) $(LDFLAGS) $(lib_dir) $(lib) $(MACROS) -o $@ $<
+
+# 若没有obj目录则自动生成
+$(OBJ_DIR):
+	@mkdir -p $@
+
+# 若没有.dep目录则自动生成
+$(DEP_DIR):
+	@mkdir -p $@
+
+# 依赖文件会在生成中间文件的时候自动生成，这里只是为了防止报错
+$(DEPS):
+
+# 引入中间目标文件头文件依赖关系
+include $(wildcard $(DEPS))
+
+# 直接删除组件build目录
+.PHONY:clean
+clean:
+	@rm -rf $(BUILD_DIR)/$(MODULE)
diff --git a/src/dvpp/VpcPicConverter.cpp b/src/dvpp/VpcPicConverter.cpp
new file mode 100644
index 0000000..7af3508
--- /dev/null
+++ b/src/dvpp/VpcPicConverter.cpp
@@ -0,0 +1,83 @@
+#include "VpcPicConverter.h"
+#include "depend_headers.h"
+
+#define ALIGN_UP(val, align) (((val) % (align) == 0) ? (val) : (((val) / (align) + 1) * (align)))
+
+bool VpcPicConverter::init(aclrtContext context){
+
+    aclrtSetCurrentContext(context);
+    aclrtCreateStream(&stream_);
+
+    // 3. 创建图片数据处理通道时的通道描述信息，dvppChannelDesc_是acldvppChannelDesc类型
+    dvppChannelDesc_ = acldvppCreateChannelDesc();
+
+    // 4. 创建图片数据处理的通道。
+    int ret = acldvppCreateChannel(dvppChannelDesc_);
+    if(ret != ACL_ERROR_NONE){
+        LOG_ERROR("acldvppCreateChannel failed !");
+        return false;
+    }
+
+    ret = acldvppSetChannelDescMode(dvppChannelDesc_, DVPP_CHNMODE_VPC);
+    if(ret != ACL_ERROR_NONE){
+        LOG_ERROR("acldvppSetChannelDescMode failed !");
+        return false;
+    }
+
+    return true;
+}
+
+DvppRgbMemory* VpcPicConverter::convert2bgr(acldvppPicDesc *inputDesc_, int out_width, int out_height, bool key_frame){
+
+    // 8. 创建色域转换的输出图片的描述信息，并设置各属性值, 输出的宽和高要求和输入一致
+    // 如果色域转换的输出图片作为模型推理的输入，则输出图片的宽高要与模型要求的宽高保持一致
+    // outputDesc_是acldvppPicDesc类型
+    int out_buf_width = ALIGN_UP(out_width, 16) * 3;
+    int out_buf_height = ALIGN_UP(out_height, 2);
+    int out_buf_size = out_buf_width * out_buf_height;
+
+    DvppRgbMemory* rgbMem = new DvppRgbMemory(3, out_buf_width, out_buf_height, out_buf_size, "", to_string(m_devId), key_frame);
+    void *outBufferDev_ = (void*)rgbMem->getMem();
+
+    acldvppPicDesc *outputDesc_= acldvppCreatePicDesc();
+    acldvppSetPicDescData(outputDesc_, outBufferDev_);
+    acldvppSetPicDescFormat(outputDesc_, PIXEL_FORMAT_BGR_888); 
+    acldvppSetPicDescWidth(outputDesc_, out_width);
+    acldvppSetPicDescHeight(outputDesc_, out_height);
+    acldvppSetPicDescWidthStride(outputDesc_, out_buf_width);
+    acldvppSetPicDescHeightStride(outputDesc_, out_buf_height);
+    acldvppSetPicDescSize(outputDesc_, out_buf_size);
+
+    
+
+    aclError ret = ACL_ERROR_NONE;
+    do{
+        // 9. 执行异步色域转换，再调用aclrtSynchronizeStream接口阻塞程序运行，直到指定Stream中的所有任务都完成
+        ret = acldvppVpcConvertColorAsync(dvppChannelDesc_, inputDesc_, outputDesc_, stream_);
+        if(ret != ACL_ERROR_NONE){
+            LOG_ERROR("acldvppVpcConvertColorAsync failed - out_width:{} out_height:{} out_buf_width:{}  out_buf_height:{} out_buf_size:{}", out_width, out_height, out_buf_width,  out_buf_height, out_buf_size);
+            break;
+        }
+        ret = aclrtSynchronizeStream(stream_);
+        if(ret != ACL_ERROR_NONE){
+            LOG_ERROR("aclrtSynchronizeStream failed - out_width:{} out_height:{} out_buf_width:{}  out_buf_height:{} out_buf_size:{}", out_width, out_height, out_buf_width,  out_buf_height, out_buf_size);
+            break;
+        }
+    }while(0);
+
+    // 10. 色域转换结束后，释放资源，包括输入/输出图片的描述信息、输入/输出内存
+    // acldvppDestroyPicDesc(inputDesc_); 
+    acldvppDestroyPicDesc(outputDesc_);
+
+    if(ret != ACL_ERROR_NONE){
+        delete rgbMem;
+        rgbMem = nullptr;
+    }
+
+    return rgbMem;
+}
+
+void VpcPicConverter::release(){
+    aclrtDestroyStream(stream_);
+    // aclrtDestroyContext(context_);
+}
\ No newline at end of file
diff --git a/src/dvpp/VpcPicConverter.h b/src/dvpp/VpcPicConverter.h
new file mode 100644
index 0000000..6d0d859
--- /dev/null
+++ b/src/dvpp/VpcPicConverter.h
@@ -0,0 +1,19 @@
+#include "dvpp_headers.h"
+#include "depend_headers.h"
+#include "DvppRgbMemory.hpp"
+
+
+class VpcPicConverter{
+public:
+    bool init(aclrtContext context);
+
+    DvppRgbMemory* convert2bgr(acldvppPicDesc *input, int out_width, int out_height, bool key_frame);
+
+    void release();
+
+private:
+    aclrtContext context_;
+    aclrtStream stream_;
+    int m_devId;
+    acldvppChannelDesc *dvppChannelDesc_ ;
+};
\ No newline at end of file
diff --git a/src/dvpp/depend_headers.h b/src/dvpp/depend_headers.h
new file mode 100644
index 0000000..84788e3
--- /dev/null
+++ b/src/dvpp/depend_headers.h
@@ -0,0 +1,38 @@
+#ifndef __DEPEND_HEADERS_H__
+#define __DEPEND_HEADERS_H__
+
+#include <iostream>
+#include <utility>
+#include <chrono>
+#include <thread>
+#include <functional>
+#include <atomic>
+#include <fstream>
+#include <signal.h>
+#include <time.h>
+#include <unistd.h>
+#include <set>
+#include <mutex>
+#include <vector>
+#include <condition_variable>
+
+/*
+* 依赖模块外部的代码或库
+* 不要在此处添加模块内部的头文件
+*/
+
+// ffmpeg 是c库 所以编译的时候要加入从 extern导入的C 来声明否则连接失败
+extern "C" {
+#include "libavutil/imgutils.h"
+#include "libavutil/samplefmt.h"
+#include "libavformat/avformat.h"
+#include "libavcodec/avcodec.h"
+}
+
+
+#include "../interface/logger.hpp"
+#include "../interface/DeviceRgbMemory.hpp"
+#include "../interface/interface_headers.h"
+#include "../interface/utiltools.hpp"
+
+#endif
\ No newline at end of file
diff --git a/src/dvpp/dvpp_headers.h b/src/dvpp/dvpp_headers.h
new file mode 100644
index 0000000..49e0e61
--- /dev/null
+++ b/src/dvpp/dvpp_headers.h
@@ -0,0 +1,31 @@
+/*
+* 模块内部的头文件请在此处添加
+*/
+
+#ifndef __DVPP_HEADERS_H__
+#define __DVPP_HEADERS_H__
+
+#include <iostream>
+#include <utility>
+#include <chrono>
+#include <thread>
+#include <functional>
+#include <atomic>
+#include <fstream>
+#include <signal.h>
+#include <time.h>
+#include <unistd.h>
+#include <set>
+#include <mutex>
+#include <vector>
+#include <condition_variable>
+
+#include "acl/acl_mdl.h"
+#include "acl/acl_base.h"
+#include "acl/acl_rt.h"
+#include "acl/acl.h"
+#include "acl/ops/acl_dvpp.h"
+
+
+#endif
+
diff --git a/src/dvpp/threadsafe_queue.h b/src/dvpp/threadsafe_queue.h
new file mode 100644
index 0000000..5a5b0f9
--- /dev/null
+++ b/src/dvpp/threadsafe_queue.h
@@ -0,0 +1,128 @@
+
+#ifndef __THREADSAFE_QUEUE_H__
+#define __THREADSAFE_QUEUE_H__
+
+#include <queue>
+#include <mutex>
+#include <condition_variable>
+#include <initializer_list>
+
+#include <pthread.h>
+using std::queue;
+using namespace std;
+
+template <typename T>
+class ThreadedQueue : public queue<T> {
+public:
+    ThreadedQueue();
+    ~ThreadedQueue();
+    bool empty() const;
+    size_t size() const;
+    void push(const T& val);
+    void push(T& val);
+    bool pop();
+    T& front();
+    const T& front() const;
+    T& back();
+    const T& back() const;
+
+    void Put(T &data);
+
+    T Take();
+    void Get(T &data);
+    bool GetEmpty();
+
+    condition_variable *condition;
+    mutex *lock;
+};
+
+template <typename T>
+ThreadedQueue<T>::ThreadedQueue() {
+    lock = new mutex;
+    condition = new condition_variable;
+}
+
+template <typename T>
+ThreadedQueue<T>::~ThreadedQueue() {
+    if(condition != nullptr){
+        delete condition;
+        condition = nullptr;
+    }
+    if(lock != nullptr){
+        delete lock;
+        lock = nullptr;
+    }
+}
+
+template <typename T>
+T ThreadedQueue<T>:: Take()
+{
+    std::unique_lock<std::mutex> lk(this->lock);
+    this->condition->wait(lk, [this]{return !this->empty();});
+    T val = this->front();
+    this->pop();
+    return val;
+}
+
+template <typename T>
+void ThreadedQueue<T>:: Put(T &data)
+{
+  std::unique_lock<std::mutex> lk(*lock);
+  this->push(data);
+  this->condition->notify_one();
+  return;
+}
+
+template <typename T>
+void ThreadedQueue<T>:: Get(T &data)
+{
+    std::unique_lock<std::mutex> lk(*lock);
+    this->condition->wait(lk, [this]{return !this->empty();});
+    data = this->front();
+    this->pop();
+}
+
+template <typename T>
+bool ThreadedQueue<T>::GetEmpty()
+{
+    std::unique_lock<std::mutex> lk(*lock);
+    this->condition->wait(lk, [this]{return !this->empty();});
+    return true;
+}
+
+
+template <typename T>
+bool ThreadedQueue<T>::empty() const {
+    bool result = queue<T>::empty();
+    return result;
+}
+
+template <typename T>
+size_t ThreadedQueue<T>::size() const {
+    size_t result = queue<T>::size();
+    return result;
+}
+
+template <typename T>
+void ThreadedQueue<T>::push(T& val) {
+    queue<T>::push(val);
+}
+
+
+template <typename T>
+T& ThreadedQueue<T>::front() {
+    T& result = queue<T>::front();
+    return result;
+}
+
+template <typename T>
+bool ThreadedQueue<T>::pop() {
+    bool result = false;
+    if(!queue<T>::empty()) {
+        queue<T>::pop();
+        result = true;
+    }
+    return result;
+}
+
+#endif
diff --git a/src/dvpp/user_mem.h b/src/dvpp/user_mem.h
new file mode 100644
index 0000000..e6a7d11
--- /dev/null
+++ b/src/dvpp/user_mem.h
@@ -0,0 +1,33 @@
+#ifndef __USER_MEM_H__
+#define __USER_MEM_H__
+
+#include <mutex>
+#include <memory>
+#include <iostream>
+#include <condition_variable>
+#include "threadsafe_queue.h"
+
+#define ALIGN_MEM(val, align) (((val) % (align) == 0) ? (val) : (((val) / (align) + 1) * (align)))
+
+using namespace std;
+
+typedef enum {
+    RTSP_MEM,
+    VDEC_MEM,
+} MemType;
+
+class MemNode{
+public:
+    uint8_t *bufAddr;
+    MemType memType;
+
+    MemNode(){
+        std::cout << "构造" << endl;
+    }
+
+    ~MemNode(){
+        std::cout << "析构" << std::endl;
+    }
+} ;
+
+#endif
\ No newline at end of file
diff --git a/src/gb28181/FFGB28181Decoder.cpp b/src/gb28181/FFGB28181Decoder.cpp
index 2207c98..68d4b8a 100644
--- a/src/gb28181/FFGB28181Decoder.cpp
+++ b/src/gb28181/FFGB28181Decoder.cpp
@@ -17,6 +17,9 @@ extern "C" {
 
 #include "common_header.h"
 
+#include "../nvdecoder/GpuRgbMemory.hpp"
+#include "../nvdecoder/cuda_kernels.h"
+
 #define ECLOSED 0
 #define ECLOSING 1
 #define ERUNNING 2
@@ -365,4 +368,90 @@ bool FFGB28181Decoder::isSurport(FFDecConfig& cfg){
 
 int FFGB28181Decoder::getCachedQueueLength(){
     return m_rtpPtr->GetPsFrameListSize();
+}
+
+FFImgInfo* FFGB28181Decoder::snapshot(){
+
+	// 锁住停止队列消耗
+	std::lock_guard<std::mutex> l(m_snapshot_mutex);
+
+	AVFrame * gpuFrame = nullptr;
+
+	bool bFirst = true;
+	while(true){
+		m_queue_mutex.lock();
+		if(mFrameQueue.size() <= 0){
+			m_queue_mutex.unlock();
+			if(bFirst){
+				std::this_thread::sleep_for(std::chrono::milliseconds(100));
+				bFirst = false;
+				continue;
+			}else{
+				// 再进来说明前面已经等了 100 ms
+				// 100 ms都没有等到解码数据，则退出
+				return nullptr;
+			}
+		}
+
+		// 队列中数据大于1 
+		gpuFrame = mFrameQueue.front();
+		m_queue_mutex.unlock();
+		break;
+	}
+
+	if (gpuFrame != nullptr && gpuFrame->format == AV_PIX_FMT_CUDA ){
+		LOG_DEBUG("decode task: gpuid: {}  width: {} height: {}", m_cfg.gpuid, gpuFrame->width, gpuFrame->height);
+		GpuRgbMemory* gpuMem = new GpuRgbMemory(3, gpuFrame->width, gpuFrame->height, getName(), m_cfg.gpuid , true);
+
+		if (gpuMem->getMem() == nullptr){
+			LOG_ERROR("new GpuRgbMemory failed !!!");
+			return nullptr;
+		}
+		
+		cudaSetDevice(atoi(m_cfg.gpuid.c_str()));
+		cuda_common::setColorSpace( ITU_709, 0 );
+		cudaError_t cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], gpuMem->getMem(), gpuFrame->width, gpuFrame->height);
+		cudaDeviceSynchronize();
+		if (cudaStatus != cudaSuccess) {
+			LOG_ERROR("CUDAToBGR failed failed !!!");
+			return nullptr;
+		}
+
+		unsigned char * pHwRgb = gpuMem->getMem();
+		int channel = gpuMem->getChannel();
+		int width = gpuMem->getWidth();
+		int height = gpuMem->getHeight();
+
+		if (pHwRgb != nullptr && channel > 0 && width > 0 && height > 0){
+			int nSize = channel * height * width;
+
+			LOG_INFO("channel:{} height:{} width:{}", channel, height, width);
+			// unsigned char* cpu_data = new unsigned char[nSize];
+
+            unsigned char* cpu_data = (unsigned char *)av_malloc(nSize * sizeof(unsigned char));
+
+			cudaMemcpy(cpu_data, pHwRgb, nSize * sizeof(unsigned char), cudaMemcpyDeviceToHost);
+			cudaDeviceSynchronize();
+
+			delete gpuMem;
+			gpuMem = nullptr;
+
+			FFImgInfo* imgInfo = new FFImgInfo();
+			imgInfo->dec_name = m_dec_name;
+			imgInfo->pData = cpu_data;
+			imgInfo->height = height;
+			imgInfo->width = width;
+			imgInfo->timestamp = UtilTools::get_cur_time_ms();
+			imgInfo->index = m_index;
+
+			m_index++;
+
+			return imgInfo;
+		}
+
+		delete gpuMem;
+		gpuMem = nullptr;
+	}
+
+	return nullptr;
 }
\ No newline at end of file
diff --git a/src/gb28181/FFGB28181Decoder.h b/src/gb28181/FFGB28181Decoder.h
index 1f31a5b..9fee58f 100644
--- a/src/gb28181/FFGB28181Decoder.h
+++ b/src/gb28181/FFGB28181Decoder.h
@@ -44,6 +44,8 @@ public:
 
     DECODER_TYPE getDecoderType(){ return DECODER_TYPE_GB28181; }
 
+    FFImgInfo* snapshot();
+
 public:
     void stream_callback(int videoType, char* data, int len, int isKey, uint64_t pts, uint64_t localPts);
 	void stream_end_callback();
@@ -74,6 +76,10 @@ private:
     AVDictionary *gpu_options = nullptr;
 
     pthread_t m_post_decode_thread;
+
+    queue<AVFrame*> mFrameQueue;
+    mutex m_queue_mutex;
+    mutex m_snapshot_mutex;
 };
 
 #endif // _GB28181_DECODER_H_
diff --git a/src/gb28181/Makefile b/src/gb28181/Makefile
new file mode 100644
index 0000000..46094f5
--- /dev/null
+++ b/src/gb28181/Makefile
@@ -0,0 +1,53 @@
+# 各项目录
+LIB_DIR:=$(BUILD_DIR)/$(MODULE)/lib
+DEP_DIR:=$(BUILD_DIR)/$(MODULE)/.dep
+OBJ_DIR:=$(BUILD_DIR)/$(MODULE)/obj
+SRC_DIR:=$(TOP_DIR)/$(MODULE)
+
+# 源文件以及中间目标文件和依赖文件
+SRCS:=$(notdir $(wildcard $(SRC_DIR)/*.cpp))
+OBJS:=$(addprefix $(OBJ_DIR)/, $(patsubst %.cpp, %.o, $(SRCS)))
+DEPS:=$(addprefix $(DEP_DIR)/, $(patsubst %.cpp, %.d,a $(SRCS)))
+
+# 自动生成头文件依赖选项
+DEPFLAGS=-MT $@ -MMD -MP -MF $(DEP_DIR)/$*.d
+
+# 最终目标文件
+TARGET:=$(LIB_DIR)/$(MODULE).a
+
+# 默认最终目标
+.PHONY:all
+all:$(TARGET)
+
+# 生成最终目标
+$(TARGET):$(OBJS) | $(LIB_DIR)
+	@echo -e "\e[32m""Linking static library $(TARGET)""\e[0m"
+	@ar -rc $@ $^
+
+# 若没有lib目录则自动生成
+$(LIB_DIR):
+	@mkdir -p $@
+
+# 生成中间目标文件
+$(OBJ_DIR)/%.o:$(SRC_DIR)/%.cpp $(DEP_DIR)/%.d | $(OBJ_DIR) $(DEP_DIR)
+	@echo -e "\e[33m""Building object $@""\e[0m"
+	@$(CXX) -c $(DEPFLAGS) $(CXXFLAGS) $(INCS) $(MACROS) -o $@ $<
+
+# 若没有obj目录则自动生成
+$(OBJ_DIR):
+	@mkdir -p $@
+
+# 若没有.dep目录则自动生成
+$(DEP_DIR):
+	@mkdir -p $@
+
+# 依赖文件会在生成中间文件的时候自动生成，这里只是为了防止报错
+$(DEPS):
+
+# 引入中间目标文件头文件依赖关系
+include $(wildcard $(DEPS))
+
+# 直接删除组件build目录
+.PHONY:clean
+clean:
+	@rm -rf $(BUILD_DIR)/$(MODULE)
diff --git a/src/gb28181/common_header.h b/src/gb28181/common_header.h
index 2f0c324..d5feed8 100644
--- a/src/gb28181/common_header.h
+++ b/src/gb28181/common_header.h
@@ -2,7 +2,7 @@
 #define _COMMON_HEADER_H_
 
 
-#include "../logger.hpp"
-#include "../utiltools.hpp"
+#include "../interface/logger.hpp"
+#include "../interface/utiltools.hpp"
 
 #endif
\ No newline at end of file
diff --git a/src/interface/AbstractDecoder.cpp b/src/interface/AbstractDecoder.cpp
new file mode 100644
index 0000000..244dd45
--- /dev/null
+++ b/src/interface/AbstractDecoder.cpp
@@ -0,0 +1,25 @@
+#include "AbstractDecoder.h"
+
+#include "logger.hpp"
+#include "utiltools.hpp"
+
+
+bool AbstractDecoder::isSnapTime(){
+	if(m_snap_time_interval <= 0){
+		return false;
+	}
+	long cur_time = UtilTools::get_cur_time_ms();
+	if(cur_time - m_last_snap_time > m_snap_time_interval){
+		return true;
+	}
+	return false;
+}
+
+void AbstractDecoder::updateLastSnapTime(){
+	m_last_snap_time = UtilTools::get_cur_time_ms();
+}
+
+void AbstractDecoder::setSnapTimeInterval(long interval){
+	m_snap_time_interval = interval;
+	m_last_snap_time = UtilTools::get_cur_time_ms();
+}
\ No newline at end of file
diff --git a/src/interface/AbstractDecoder.h b/src/interface/AbstractDecoder.h
new file mode 100644
index 0000000..9f4cb3f
--- /dev/null
+++ b/src/interface/AbstractDecoder.h
@@ -0,0 +1,54 @@
+#ifndef _ABSTRACT_DECODER_H_
+#define _ABSTRACT_DECODER_H_
+
+#include "interface_headers.h"
+
+using namespace std;
+
+class AbstractDecoder{
+public:
+    virtual ~AbstractDecoder(){};
+    virtual bool init(FFDecConfig& cfg) = 0;
+    virtual void close() = 0;
+    virtual bool start() = 0;
+    virtual void pause() = 0;
+    virtual void resume() = 0;
+            
+    virtual void setDecKeyframe(bool bKeyframe) = 0;
+            
+    virtual bool isRunning() = 0;
+    virtual bool isFinished() = 0;
+    virtual bool isPausing() = 0;
+    virtual bool getResolution( int &width, int &height ) = 0;
+
+    virtual bool isSurport(FFDecConfig& cfg) = 0;
+            
+    virtual int getCachedQueueLength() = 0;
+
+    virtual float fps() = 0;
+
+    virtual DECODER_TYPE getDecoderType() = 0;
+
+    virtual FFImgInfo* snapshot() = 0;
+
+    virtual void setName(string nm) = 0;
+
+    virtual string getName() = 0;
+
+    virtual void setPostDecArg(const void* postDecArg) = 0;
+    virtual void setFinishedDecArg(const void* finishedDecArg) = 0;
+
+public:
+    bool isSnapTime();
+    
+    void updateLastSnapTime();
+
+    void setSnapTimeInterval(long interval);
+
+public:
+    long m_snap_time_interval{-1};
+    long m_last_snap_time;
+    long m_index{0};
+};
+
+#endif // _ABSTRACT_DECODER_H_
\ No newline at end of file
diff --git a/src/interface/DeviceRgbMemory.hpp b/src/interface/DeviceRgbMemory.hpp
new file mode 100644
index 0000000..b5a3125
--- /dev/null
+++ b/src/interface/DeviceRgbMemory.hpp
@@ -0,0 +1,86 @@
+#ifndef __DEVICE_RGB_MEMORY_H__
+#define __DEVICE_RGB_MEMORY_H__
+
+#include<string>
+
+#include "utiltools.hpp"
+
+using namespace std;
+
+class DeviceRgbMemory{
+
+public:
+     DeviceRgbMemory(int _channel, int _width, int _height, string _id, string _dev_id, bool _key_frame, bool _isused){
+        channel = _channel;
+        width = _width;
+        height = _height;
+        data_size = channel * width * height;
+        isused = _isused;
+        id = _id;
+        device_id = _dev_id;
+        key_frame = _key_frame;
+        timestamp = UtilTools::get_cur_time_ms();
+    }
+
+    virtual ~DeviceRgbMemory(){}
+    
+    int getSize() {
+        return data_size;
+    }
+    
+    bool isIsused() {
+        return isused;
+    }
+
+    void setIsused(bool _isused) {
+        isused = _isused;
+        // 更新时间戳
+        timestamp = UtilTools::get_cur_time_ms();
+    }
+
+    string getId() {
+        return id;
+    }
+
+    string getDeviceId() {
+        return device_id;
+    }
+
+    unsigned char* getMem(){
+        return pHwRgb;
+    }
+
+    long long getTimesstamp(){
+        return timestamp;
+    }
+
+    int getWidth(){
+        return width;
+    }
+
+    int getHeight(){
+        return height;
+    }
+
+    int getChannel(){
+        return channel;
+    }
+
+    bool isKeyFrame(){
+        return key_frame;
+    }
+
+public:
+    int data_size;
+    bool isused;
+    string id;
+    string device_id;
+    unsigned char * pHwRgb{nullptr};
+    long long timestamp;
+    int width{0};
+    int height{0};
+    int channel{3};
+    bool key_frame;
+};
+
+#endif
\ No newline at end of file
diff --git a/src/interface/FFNvDecoderManager.cpp b/src/interface/FFNvDecoderManager.cpp
new file mode 100644
index 0000000..a32c4bb
--- /dev/null
+++ b/src/interface/FFNvDecoderManager.cpp
@@ -0,0 +1,494 @@
+#include "FFNvDecoderManager.h"
+
+#ifdef USE_NVDEC
+#include "../nvdecoder/FFNvDecoder.h"
+#include "../gb28181/FFGB28181Decoder.h"
+#endif
+
+#ifdef USE_DVPP
+#include "./dvpp/DvppDecoderApi.h"
+#endif
+
+#include "logger.hpp"
+
+using namespace std;
+
+
+AbstractDecoder* FFNvDecoderManager::createDecoder(MgrDecConfig config){
+
+    closeAllFinishedDecoder();
+
+    if (config.cfg.post_decoded_cbk == nullptr || config.cfg.decode_finished_cbk== nullptr){
+        return nullptr;
+    }
+
+    std::lock_guard<std::mutex> l(m_mutex);
+
+    auto it = decoderMap.find(config.name);
+    if (it != decoderMap.end()){
+        LOG_ERROR("已存在name为{}的解码器", config.name);
+        return nullptr;
+    }
+
+    AbstractDecoder* dec = nullptr;
+#ifdef USE_NVDEC
+    if(DECODER_TYPE_FFMPEG == config.dec_type){
+        dec = new FFNvDecoder();
+    }
+    
+    if(DECODER_TYPE_GB28181 == config.dec_type){
+        dec = new FFGB28181Decoder();
+    }
+#endif
+
+#ifdef USE_DVPP
+    if(DECODER_TYPE_DVPP == config.dec_type){
+        dec = new DvppDecoderApi();
+    }
+#endif
+    
+    if (dec == nullptr){
+        LOG_ERROR("没有指定解码器类型");
+        return nullptr;
+    }
+    
+    bool bRet= dec->init(config.cfg);
+    if (bRet)
+    {
+        dec->setName(config.name) ;
+        decoderMap[config.name] = dec;
+
+        LOG_INFO("[{}][{}]- 解码器初始化成功",config.name, config.cfg.uri);
+        return dec;
+    }
+    
+    // 创建失败，关闭解码器
+    dec->close();
+    delete dec;
+
+    LOG_ERROR("[{}][{}]- 解码器初始化失败！",config.name, config.cfg.uri);
+    return nullptr;
+}
+
+bool FFNvDecoderManager::setPostDecArg(const string name, const void * userPtr)
+{
+    if (name.empty())
+    {
+        LOG_ERROR("name 为空!");
+        return false;
+    }
+
+    std::lock_guard<std::mutex> l(m_mutex);
+
+    auto dec = decoderMap.find(name);
+    if (dec != decoderMap.end())
+    {
+        dec->second->setPostDecArg(userPtr);
+        return true;
+    }
+
+    LOG_ERROR("没有找到name为{}的解码器",name);
+    return false;
+}
+
+bool FFNvDecoderManager::setFinishedDecArg(const string name, const void * userPtr)
+{
+    if (name.empty())
+    {
+        LOG_ERROR("name 为空!");
+        return false;
+    }
+
+    std::lock_guard<std::mutex> l(m_mutex);
+
+    auto dec = decoderMap.find(name);
+    if (dec != decoderMap.end())
+    {
+        dec->second->setFinishedDecArg(userPtr);
+        return true;
+    }
+
+    LOG_ERROR("没有找到name为{}的解码器",name);
+    return false;
+}
+
+AbstractDecoder* FFNvDecoderManager::getDecoderByName(const string name)
+{
+    if (name.empty())
+    {
+        LOG_ERROR("name 为空!");
+        return nullptr;
+    }
+    
+    std::lock_guard<std::mutex> l(m_mutex);
+
+    auto dec = decoderMap.find(name);
+    if (dec != decoderMap.end())
+    {
+        return dec->second;
+    }
+
+    LOG_ERROR("没有找到name为{}的解码器",name);
+    return nullptr;
+}
+
+bool FFNvDecoderManager::startDecode(AbstractDecoder* dec){
+    if (dec != nullptr && !dec->isRunning())
+    {
+        return dec->start();
+    }
+    return false;
+}
+
+bool FFNvDecoderManager::startDecodeByName(const string name){
+     if (name.empty())
+    {
+        LOG_ERROR("name 为空!");
+        return false;
+    }
+
+    std::lock_guard<std::mutex> l(m_mutex);
+
+    auto dec = decoderMap.find(name);
+    if (dec != decoderMap.end())
+    {
+        return dec->second->start();
+    }
+
+    LOG_ERROR("没有找到name为{}的解码器",name);
+    return false;
+}
+
+void FFNvDecoderManager::startAllDecode(){
+
+    std::lock_guard<std::mutex> l(m_mutex);
+
+    for(auto iter = decoderMap.begin(); iter != decoderMap.end(); iter++){
+        if (!iter->second->isRunning())
+        {
+            iter->second->start();
+        }
+    }
+}
+
+bool FFNvDecoderManager::closeDecoderByName(const string name){
+    if (name.empty())
+    {
+        LOG_ERROR("name 为空!");
+        return false;
+    }
+
+    std::lock_guard<std::mutex> l(m_mutex);
+
+    auto dec = decoderMap.find(name);
+    if (dec != decoderMap.end())
+    {
+        dec->second->close();
+        delete dec->second;
+        dec->second = nullptr;
+        decoderMap.erase(dec);
+
+        return true;
+    }
+    
+    LOG_ERROR("没有找到name为{}的解码器",name);
+    return false;
+}
+
+void FFNvDecoderManager::closeAllDecoder()
+{
+    std::lock_guard<std::mutex> l(m_mutex);
+
+    for(auto iter = decoderMap.begin(); iter != decoderMap.end(); iter++){
+        iter->second->close();
+        delete iter->second;
+        iter->second = nullptr;
+    }
+    decoderMap.clear();
+}
+
+void FFNvDecoderManager::closeAllFinishedDecoder()
+{
+    std::lock_guard<std::mutex> l(m_mutex);
+
+     for(auto iter = decoderMap.begin(); iter != decoderMap.end(); ){
+        if (iter->second->isFinished())
+        {
+            delete iter->second;
+            iter->second = nullptr;
+            iter = decoderMap.erase(iter);
+        }
+        else
+        {
+            iter++ ;
+        }
+    }
+}
+
+int FFNvDecoderManager::count()
+{
+    closeAllFinishedDecoder();
+
+    std::lock_guard<std::mutex> l(m_mutex);
+    return decoderMap.size();
+}
+
+bool FFNvDecoderManager::pauseDecoder(const string name)
+{
+    if (name.empty())
+    {
+        LOG_ERROR("name 为空!");
+        return false;
+    }
+
+    std::lock_guard<std::mutex> l(m_mutex);
+
+    auto dec = decoderMap.find(name);
+    if (dec != decoderMap.end())
+    {
+        dec->second->pause();
+        return true;
+    }
+    
+    LOG_ERROR("没有找到name为{}的解码器",name);
+    return false;
+}
+
+bool FFNvDecoderManager::resumeDecoder(const string name)
+{
+    if (name.empty())
+    {
+        LOG_ERROR("name 为空!");
+        return false;
+    }
+
+    std::lock_guard<std::mutex> l(m_mutex);
+
+    auto dec = decoderMap.find(name);
+    if (dec != decoderMap.end())
+    {
+        dec->second->resume();
+        return true;
+    }
+    
+    LOG_ERROR("没有找到name为{}的解码器",name);
+    return false;
+}
+
+bool FFNvDecoderManager::isSurport(MgrDecConfig& config)
+{
+    {
+        std::lock_guard<std::mutex> l(m_mutex);
+
+        auto it = decoderMap.find(config.name);
+        if (it != decoderMap.end()){
+            LOG_ERROR("已存在name所标记的解码器");
+            return false;
+        }
+    }
+
+    AbstractDecoder* dec = nullptr;
+#ifdef USE_NVDEC
+    if(DECODER_TYPE_FFMPEG == config.dec_type){
+        dec = new FFNvDecoder();
+    }
+    
+    if(DECODER_TYPE_GB28181 == config.dec_type){
+        dec = new FFGB28181Decoder();
+    }
+#endif
+
+#ifdef USE_DVPP
+    if(DECODER_TYPE_DVPP == config.dec_type){
+        dec = new DvppDecoderApi();
+    }
+#endif
+    
+    if (dec == nullptr){
+        LOG_ERROR("没有指定解码器类型");
+        return false;
+    }
+
+    bool bRet = dec->isSurport(config.cfg);
+    delete dec;
+    dec = nullptr;
+
+    return bRet;
+}
+
+bool FFNvDecoderManager::isRunning(const string name){
+    if (name.empty())
+    {
+        LOG_ERROR("name 为空!");
+        return false;
+    }
+
+    std::lock_guard<std::mutex> l(m_mutex);
+
+    auto dec = decoderMap.find(name);
+    if (dec != decoderMap.end())
+    {
+        return dec->second->isRunning();
+    }
+    
+    LOG_ERROR("没有找到name为{}的解码器",name);
+    return false;
+}
+
+bool FFNvDecoderManager::isFinished(const string name){
+    if (name.empty())
+    {
+        LOG_ERROR("name 为空!");
+        return false;
+    }
+
+    std::lock_guard<std::mutex> l(m_mutex);
+
+    auto dec = decoderMap.find(name);
+    if (dec != decoderMap.end())
+    {
+        return dec->second->isFinished();
+    }
+    
+    LOG_ERROR("没有找到name为{}的解码器",name);
+    return false;
+}
+
+bool FFNvDecoderManager::isPausing(const string name){
+    if (name.empty())
+    {
+        LOG_ERROR("name 为空!");
+        return false;
+    }
+
+    std::lock_guard<std::mutex> l(m_mutex);
+
+    auto dec = decoderMap.find(name);
+    if (dec != decoderMap.end())
+    {
+        return dec->second->isPausing();
+    }
+    
+    LOG_ERROR("没有找到name为{}的解码器",name);
+    return false;
+}
+
+bool FFNvDecoderManager::setDecKeyframe(const string name, bool bKeyframe)
+{
+    if (name.empty())
+    {
+        LOG_ERROR("name 为空!");
+        return false;
+    }
+
+    std::lock_guard<std::mutex> l(m_mutex);
+
+    auto dec = decoderMap.find(name);
+    if (dec != decoderMap.end())
+    {
+        dec->second->setDecKeyframe(bKeyframe);
+        return true;
+    }
+    
+    LOG_ERROR("没有找到name为{}的解码器",name);
+    return false;
+}
+
+bool FFNvDecoderManager::getResolution(const string name, int &width, int &height)
+{
+    if (name.empty())
+    {
+        LOG_ERROR("name 为空!");
+        return false;
+    }
+
+    std::lock_guard<std::mutex> l(m_mutex);
+
+    auto dec = decoderMap.find(name);
+    if (dec != decoderMap.end())
+    {
+        dec->second->getResolution(width, height);
+        return true;
+    }
+    
+    LOG_ERROR("没有找到name为{}的解码器",name);
+    return false;
+}
+
+vector<string> FFNvDecoderManager::getAllDecodeName(){
+    
+    closeAllFinishedDecoder();
+
+    std::lock_guard<std::mutex> l(m_mutex);
+
+    vector<string> decode_names;
+    for(auto it = decoderMap.begin(); it != decoderMap.end(); ++it){
+        decode_names.push_back(it->first);
+    }
+    return decode_names;
+}
+
+int FFNvDecoderManager::getCachedQueueLength(const string name){
+    if (name.empty()){
+        LOG_ERROR("name 为空!");
+        return -1;
+    }
+
+    std::lock_guard<std::mutex> l(m_mutex);
+
+    auto dec = decoderMap.find(name);
+    if (dec != decoderMap.end()){
+        return dec->second->getCachedQueueLength();
+    }
+    
+    LOG_ERROR("没有找到name为{}的解码器",name);
+    return -1;
+}
+
+void FFNvDecoderManager::releaseFFImgInfo(FFImgInfo* info){
+	if(nullptr != info){
+		if(info->pData != nullptr){
+			free(info->pData);
+			info->pData = nullptr;
+		}
+		delete info;
+		info = nullptr;
+	}
+}
+
+FFImgInfo* FFNvDecoderManager::snapshot_in_task(const string name){
+    if (name.empty()){
+        LOG_ERROR("name 为空!");
+        return nullptr;
+    }
+
+    std::lock_guard<std::mutex> l(m_mutex);
+
+    auto dec = decoderMap.find(name);
+    if (dec != decoderMap.end()){
+        return dec->second->snapshot();
+    }
+    
+    LOG_ERROR("没有找到name为{}的解码器",name);
+    return nullptr;
+}
+
+vector<FFImgInfo*> FFNvDecoderManager::timing_snapshot_all(){
+
+    closeAllFinishedDecoder();
+
+    std::lock_guard<std::mutex> l(m_mutex);
+
+    vector<FFImgInfo*> vec;
+    for(auto it = decoderMap.begin(); it != decoderMap.end(); ++it){
+        if(it->second->isSnapTime()){
+            FFImgInfo* imginfo = it->second->snapshot();
+            if(imginfo != nullptr){
+                vec.push_back(imginfo);
+            }
+            it->second->updateLastSnapTime();
+        }
+    }
+
+    return vec;
+}
\ No newline at end of file
diff --git a/src/interface/FFNvDecoderManager.h b/src/interface/FFNvDecoderManager.h
new file mode 100644
index 0000000..bb1c0de
--- /dev/null
+++ b/src/interface/FFNvDecoderManager.h
@@ -0,0 +1,261 @@
+#include "AbstractDecoder.h"
+#include<iostream>
+#include<vector>
+#include<map>
+
+#include <mutex>
+
+using namespace std;
+
+struct MgrDecConfig
+{
+	DECODER_TYPE dec_type;	// 解码器类型
+    FFDecConfig cfg;    // 解码器配置
+    string name{""};    // 解码器名称
+};
+
+// #define USE_NVDEC
+#define USE_DVPP
+/**
+ * 解码器管理类，单例类
+ * 谨防死锁
+ **/
+class FFNvDecoderManager {
+public:
+    /**************************************************
+	* 接口：getInstance
+	* 功能：获取解码器管理者实例
+	* 参数：无
+	* 返回：成功返回 解码器管理者实例， 失败返回 nullptr
+	* 备注：调用其他接口前，需要先调用该接口获取管理者实例
+	**************************************************/
+    static FFNvDecoderManager* getInstance(){
+		static FFNvDecoderManager* singleton = nullptr;
+		if (singleton == nullptr){
+			singleton = new FFNvDecoderManager();
+		}
+		return singleton;
+	}
+
+    ~FFNvDecoderManager()
+    {
+        closeAllDecoder();
+    }
+
+    /**************************************************
+	* 接口：createDecoder
+	* 功能：根据配置信息创建解码器
+	* 参数：MgrDecConfig& config 解码器配置信息
+	* 返回：成功返回解码器， 失败返回 nullptr
+	* 备注：
+	**************************************************/
+    AbstractDecoder* createDecoder(MgrDecConfig config);
+
+    /**************************************************
+	* 接口：setPostDecArg
+	* 功能：设置解码数据回调接口的用户自定义参数
+	* 参数：string name 解码器名称
+    *       const void * userPtr  用户自定义的要传到解码数据回调接口的数据
+	* 返回：设置成功返回true，失败返回false
+	* 备注：
+	**************************************************/
+    bool setPostDecArg(const string name, const void * userPtr);
+
+	/**************************************************
+	* 接口：setFinishedDecArg
+	* 功能：设置解码结束回调接口的用户自定义参数
+	* 参数：string name 解码器名称
+    *       const void * userPtr  用户自定义的要传到解码数据回调接口的数据
+	* 返回：设置成功返回true，失败返回false
+	* 备注：
+	**************************************************/
+    bool setFinishedDecArg(const string name, const void * userPtr);
+
+    /**************************************************
+	* 接口：getDecoderByName
+	* 功能：根据解码器名称返回解码器对象指针
+	* 参数：const string name 解码器名称
+	* 返回：成功返回对应的解码器对象的指针，失败返回nullptr
+	* 备注：
+	**************************************************/
+    AbstractDecoder* getDecoderByName(const string name);
+
+    /**************************************************
+	* 接口：startDecode
+	* 功能：启动解码
+	* 参数：FFNvDecoder* 解码器指针
+	* 返回：void
+	* 备注：
+	**************************************************/
+    bool startDecode(AbstractDecoder*);
+
+    /**************************************************
+	* 接口：startAllDecode
+	* 功能：启动全部解码
+	* 参数：void
+	* 返回：void
+	* 备注：
+	**************************************************/
+    void startAllDecode();
+
+    /**************************************************
+	* 接口：startDecodeByName
+	* 功能：启动名称对应的解码器
+	* 参数：string name 解码器名称
+	* 返回：成功返回true，失败返回false
+	* 备注：
+	**************************************************/
+    bool startDecodeByName(const string name);
+
+    /**************************************************
+	* 接口：closeDecoderByName
+	* 功能：关闭解码器名称对应的解码
+	* 参数：const string name 解码器名称
+	* 返回：成功返回true，失败返回false
+	* 备注：
+	**************************************************/
+    bool closeDecoderByName(const string name);
+
+    /**************************************************
+	* 接口：closeAllDecoder
+	* 功能：关闭全部解码器
+	* 参数：void
+	* 返回：void
+	* 备注：
+	**************************************************/
+    void closeAllDecoder();
+
+	/**************************************************
+	* 接口：closeAllDecoderByGpuid
+	* 功能：关闭某张显卡撒花姑娘的全部解码器
+	* 参数：const string gpuid gpu的id
+	* 返回：void
+	* 备注：
+	**************************************************/
+    void closeAllDecoderByGpuid(const string gpuid);
+
+    /**************************************************
+	* 接口：pauseDecoder
+	* 功能：暂停指定名称的解码器
+	* 参数：const string name 解码器名称
+	* 返回：成功返回true，失败返回false
+	* 备注：
+	**************************************************/
+    bool pauseDecoder(const string name);
+
+    /**************************************************
+	* 接口：pauseDecoder
+	* 功能：恢复指定名称的解码器
+	* 参数：const string name 解码器名称
+	* 返回：成功返回true，失败返回false
+	* 备注：
+	**************************************************/
+    bool resumeDecoder(const string name);
+
+    /**************************************************
+	* 接口：isSurport
+	* 功能：是否支持指定配置的解码
+	* 参数：FFDecConfig& cfg 解码器配置
+	* 返回：支持返回true，不支持返回false
+	* 备注：
+	**************************************************/
+    bool isSurport(MgrDecConfig& config);
+
+    /**************************************************
+	* 接口：isRunning
+	* 功能：根据解码器名称判断解码器是否正在运行
+	* 参数：const string name 解码器名称
+	* 返回：正在运行返回true，否则返回false
+	* 备注：
+	**************************************************/
+    bool isRunning(const string name);
+
+	/**************************************************
+	* 接口：isFinished
+	* 功能：根据解码器名称判断解码器是否已经结束
+	* 参数：const string name 解码器名称
+	* 返回：正在运行返回true，否则返回false
+	* 备注：
+	**************************************************/
+    bool isFinished(const string name);
+
+	/**************************************************
+	* 接口：isPausing
+	* 功能：根据解码器名称判断解码器是否暂停
+	* 参数：const string name 解码器名称
+	* 返回：正在运行返回true，否则返回false
+	* 备注：
+	**************************************************/
+    bool isPausing(const string name);
+
+    /**************************************************
+	* 接口：count
+	* 功能：获取正在运行的解码器数量
+	* 参数：void
+	* 返回：正在运行的解码器数量
+	* 备注：
+	**************************************************/
+    int count();
+
+	/**************************************************
+	* 接口：setDecKeyframe
+	* 功能：设置是否只解码关键帧。默认全解
+	* 参数：const string name 解码器名称
+	*		bool bKeyframe   是否只解码关键帧。true，只解码关键帧；false,普通的全解码
+	* 返回：bool 成功返回true,失败返回false
+	* 备注：
+	**************************************************/
+	bool setDecKeyframe(const string name, bool bKeyframe);
+
+	/**************************************************
+	* 接口：getResolution
+	* 功能：获取视频分辨率
+	* 参数：const string name 解码器名称
+	*		int &width   从 width 返回视频宽度
+	*		int &height	 从 height 返回视频高度
+	* 返回：bool 成功获取返回true,失败返回false
+	* 备注：
+	**************************************************/
+	bool getResolution(const string name, int &width, int &height);
+
+	/**************************************************
+	* 接口：getAllDecodeName
+	* 功能：获取全部解码器名称
+	* 参数：void
+	* 返回：vector<string> 返回全部解码器名称
+	* 备注：
+	**************************************************/
+	vector<string> getAllDecodeName();
+
+	/**************************************************
+	* 接口：getCachedQueueLength
+	* 功能：获取解码缓冲队列当前长度
+	* 参数：const string name 解码器名称
+	* 返回：int 解码缓冲队列当前长度
+	* 备注：
+	**************************************************/
+	int getCachedQueueLength(const string name);
+
+	/**************************************************
+	* 接口：releaseFFImgInfo
+	* 功能：释放视频快照信息
+	* 参数：FFImgInfo* info 视频快照信息
+	* 返回：void
+	* 备注：
+	**************************************************/
+	void releaseFFImgInfo(FFImgInfo* info);
+
+	FFImgInfo* snapshot_in_task(const string name);
+
+	vector<FFImgInfo*> timing_snapshot_all();
+
+private:
+    FFNvDecoderManager(){}
+    
+    void closeAllFinishedDecoder();
+
+private:
+    map<string, AbstractDecoder*> decoderMap;
+
+    mutex m_mutex;
+};
\ No newline at end of file
diff --git a/src/interface/Makefile b/src/interface/Makefile
new file mode 100644
index 0000000..60c3103
--- /dev/null
+++ b/src/interface/Makefile
@@ -0,0 +1,55 @@
+# 各项目录
+LIB_DIR:=$(BUILD_DIR)/$(MODULE)/lib
+DEP_DIR:=$(BUILD_DIR)/$(MODULE)/.dep
+OBJ_DIR:=$(BUILD_DIR)/$(MODULE)/obj
+SRC_DIR:=$(TOP_DIR)/$(MODULE)
+
+# 源文件以及中间目标文件和依赖文件
+SRCS:=$(notdir $(wildcard $(SRC_DIR)/*.cpp))
+OBJS:=$(addprefix $(OBJ_DIR)/, $(patsubst %.cpp, %.o, $(SRCS)))
+DEPS:=$(addprefix $(DEP_DIR)/, $(patsubst %.cpp, %.d,a $(SRCS)))
+
+# 自动生成头文件依赖选项
+DEPFLAGS=-MT $@ -MMD -MP -MF $(DEP_DIR)/$*.d
+
+# 最终目标文件
+TARGET:=$(LIB_DIR)/$(MODULE).a
+
+MODULE_LIBS:=$(BUILD_DIR)/dvpp/lib/libdvpp.a\
+
+# 默认最终目标
+.PHONY:all
+all:$(TARGET)
+
+# 生成最终目标
+$(TARGET):$(OBJS) | $(LIB_DIR)
+	@echo -e "\e[32m""Linking static library $(TARGET)""\e[0m"
+	@ar -rc $@ $^
+
+# 若没有lib目录则自动生成
+$(LIB_DIR):
+	@mkdir -p $@
+
+# 生成中间目标文件
+$(OBJ_DIR)/%.o:$(SRC_DIR)/%.cpp $(DEP_DIR)/%.d | $(OBJ_DIR) $(DEP_DIR)
+	@echo -e "\e[33m""Building object $@""\e[0m"
+	@$(CXX) -c $(DEPFLAGS) $(CXXFLAGS) $(INCS) $(MACROS) -o $@  $(MODULE_LIBS) $<
+
+# 若没有obj目录则自动生成
+$(OBJ_DIR):
+	@mkdir -p $@
+
+# 若没有.dep目录则自动生成
+$(DEP_DIR):
+	@mkdir -p $@
+
+# 依赖文件会在生成中间文件的时候自动生成，这里只是为了防止报错
+$(DEPS):
+
+# 引入中间目标文件头文件依赖关系
+include $(wildcard $(DEPS))
+
+# 直接删除组件build目录
+.PHONY:clean
+clean:
+	@rm -rf $(BUILD_DIR)/$(MODULE)
diff --git a/src/interface/interface_headers.h b/src/interface/interface_headers.h
new file mode 100644
index 0000000..43edcc9
--- /dev/null
+++ b/src/interface/interface_headers.h
@@ -0,0 +1,59 @@
+#ifndef _INTERFACE_HEADERS_H_
+#define _INTERFACE_HEADERS_H_
+
+
+#include<string>
+#include <queue>
+#include <mutex>
+
+#include "DeviceRgbMemory.hpp"
+
+using namespace std;
+
+/**************************************************
+* 接口：DXDECODER_CALLBACK
+* 功能：解码数据回调接口
+* 参数：const dx_void * userPtr 用户自定义数据
+*       AVFrame * gpuFrame 解码结果帧数据，在设置的对应的gpu上，要十分注意这一点，尤其是多线程情况
+* 返回：无
+* 备注：当解码库数据源为实时流时(RTSP/GB28181)，本接
+*       口内不可进行阻塞/耗时操作。当解码库数据源为
+*       非实时流时（本地/网络文件），本接口可以进行
+*       阻塞/耗时操作
+**************************************************/
+typedef void(*POST_DECODE_CALLBACK)(const void * userPtr, DeviceRgbMemory* devFrame);
+
+typedef void(*DECODE_FINISHED_CALLBACK)(const void* userPtr);
+
+typedef bool(*DECODE_REQUEST_STREAM_CALLBACK)(const char* deviceId);
+
+struct FFDecConfig{
+    string uri;                             // 视频地址
+    POST_DECODE_CALLBACK post_decoded_cbk;  // 解码数据回调接口
+    DECODE_FINISHED_CALLBACK decode_finished_cbk; // 解码线程结束后的回调接口
+    string gpuid;                           // gpu id
+    bool force_tcp{true};                   // 是否指定使用tcp连接
+    int skip_frame{1};                      // 跳帧数
+    string dec_name;
+
+    int port;                               // gb28181接收数据的端口号
+    DECODE_REQUEST_STREAM_CALLBACK request_stream_cbk;  // gb28181请求流
+};
+
+enum DECODER_TYPE{ 
+    DECODER_TYPE_GB28181, 
+    DECODER_TYPE_FFMPEG,
+    DECODER_TYPE_DVPP
+};
+
+struct FFImgInfo{
+    string dec_name;
+    int width;
+    int height;
+    unsigned char * pData;
+    int data_type;  // 默认0=rgb, 1=nv12
+    long timestamp;
+    long index;
+};
+
+#endif
\ No newline at end of file
diff --git a/src/interface/logger.hpp b/src/interface/logger.hpp
new file mode 100644
index 0000000..1d67fea
--- /dev/null
+++ b/src/interface/logger.hpp
@@ -0,0 +1,344 @@
+/*
+ * @Author: yangzilong
+ * @Date: 2021-12-21 11:07:11
+ * @Last Modified by: yangzilong
+ * @Email: yangzilong@objecteye.com
+ * @Description:
+ */
+
+#pragma once
+
+// #include "define.hpp"
+#include <spdlog/spdlog.h>
+#include <spdlog/common.h>
+#include <spdlog/details/file_helper.h>
+#include <spdlog/details/null_mutex.h>
+#include <spdlog/fmt/fmt.h>
+#include <spdlog/sinks/base_sink.h>
+#include <spdlog/details/os.h>
+#include <spdlog/details/circular_q.h>
+#include <spdlog/details/synchronous_factory.h>
+
+#include <set>
+#include <chrono>
+#include <cstdio>
+#include <ctime>
+#include <mutex>
+#include <string>
+#include <memory>
+#include <vector>
+
+#define __FILENAME__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
+
+#define LOG_TRACE_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_TRACE(logger, __VA_ARGS__);}
+#define LOG_DEBUG_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_DEBUG(logger, __VA_ARGS__);}
+#define LOG_WARN_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_WARN(logger, __VA_ARGS__);}
+#define LOG_ERROR_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_ERROR(logger, __VA_ARGS__);}
+#define LOG_INFO_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_INFO(logger, __VA_ARGS__);}
+#define LOG_CRITICAL_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_CRITICAL(logger, __VA_ARGS__);}
+
+
+// use fmt lib, e.g. LOG_WARN("warn log, {1}, {1}, {2}", 1, 2);
+#define LOG_TRACE(msg, ...) spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::trace, msg, ##__VA_ARGS__)
+#define LOG_DEBUG(msg, ...) spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::debug, msg, ##__VA_ARGS__)
+#define LOG_INFO(msg,...)   spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::info, msg, ##__VA_ARGS__)
+#define LOG_WARN(msg,...)   spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::warn, msg, ##__VA_ARGS__)
+#define LOG_ERROR(msg,...)  spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::err, msg, ##__VA_ARGS__)
+#define LOG_FATAL(msg,...)  spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::critical, msg, ##__VA_ARGS__)
+
+
+
+namespace spdlog
+{
+    namespace sinks
+    {
+        template<typename Mutex>
+        class easy_file_sink final : public base_sink<Mutex>
+        {
+        public:
+            easy_file_sink(filename_t base_filename, size_t max_size, size_t max_keep_days = 0)
+                : base_filename_(std::move(base_filename))
+                , max_size_(max_size)
+                , max_keep_days_(max_keep_days)
+            {
+                auto now = log_clock::now();
+                auto filename = gen_filename_by_daliy(base_filename_, now_tm(now));
+
+                file_helper_.open(filename, false);
+                current_size_ = file_helper_.size();
+                rotation_tp_ = next_rotation_tp_();
+
+                if (max_keep_days_ > 0)
+                {
+                    filespath_q_.push_back(std::move(std::set<filename_t>()));
+                    filespath_q_[filespath_q_.size() - 1].insert(filename);
+                }
+            }
+
+            filename_t filename()
+            {
+                std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
+                return file_helper_.filename();
+            }
+
+        protected:
+            void sink_it_(const details::log_msg &msg) override
+            {
+                memory_buf_t formatted;
+                base_sink<Mutex>::formatter_->format(msg, formatted);
+                current_size_ += formatted.size();
+
+                auto time = msg.time;
+                if (time >= rotation_tp_)
+                {
+                    file_helper_.close();
+                    auto filename = gen_filename_by_daliy(base_filename_, now_tm(time));
+                    file_helper_.open(filename, false);
+                    current_size_ = file_helper_.size();
+                    rotation_tp_ = next_rotation_tp_();
+
+                    {
+                        filespath_q_.push_back(std::move(std::set<filename_t>()));
+                        filespath_q_[filespath_q_.size() - 1].emplace(filename);
+                    }
+
+                    // Do the cleaning only at the end because it might throw on failure.
+                    if (max_keep_days_ > 0 && filespath_q_.size() > max_keep_days_)
+                        delete_old_();
+                }
+                else if (current_size_ >= max_size_)
+                {
+                    file_helper_.close();
+                    auto src_name = gen_filename_by_daliy(base_filename_, now_tm(time));
+                    auto target_name = gen_filename_by_filesize(base_filename_, now_tm(time), filespath_q_[filespath_q_.size() - 1].size());
+
+                    // rename file if failed then us `target_name` as src_name.
+                    if (!rename_file_(src_name, target_name))
+                    {
+                        details::os::sleep_for_millis(200);
+                        if (!rename_file_(src_name, target_name))
+                        {
+                            fprintf(stderr, "%s:%d rename %s to %s failed\n", __FILENAME__, __LINE__, src_name.c_str(), target_name.c_str());
+                            src_name = target_name;
+                        }
+                    }
+
+                    filespath_q_[filespath_q_.size() - 1].emplace(src_name);
+                    if (src_name != target_name)
+                        filespath_q_[filespath_q_.size() - 1].emplace(target_name);
+
+                    file_helper_.open(src_name, false);
+                    current_size_ = file_helper_.size();
+                    rotation_tp_ = next_rotation_tp_();
+                }
+
+                file_helper_.write(formatted);
+
+
+            }
+
+            void flush_() override
+            {
+                file_helper_.flush();
+            }
+
+        private:
+
+            tm now_tm(log_clock::time_point tp)
+            {
+                time_t tnow = log_clock::to_time_t(tp);
+                return spdlog::details::os::localtime(tnow);
+            }
+
+            /**
+             * @brief Get next day tm.
+             *
+             * @return log_clock::time_point
+             */
+            log_clock::time_point next_rotation_tp_()
+            {
+                auto now = log_clock::now();
+                tm date = now_tm(now);
+                date.tm_hour = 0;
+                date.tm_min = 0;
+                date.tm_sec = 0;
+                auto rotation_time = log_clock::from_time_t(std::mktime(&date));
+                if (rotation_time > now)
+                    return rotation_time;
+                return {rotation_time + std::chrono::hours(24)};
+            }
+
+            // Delete the file N rotations ago.
+            // Throw spdlog_ex on failure to delete the old file.
+            void delete_old_()
+            {
+                for (auto iter = filespath_q_.begin(); iter != filespath_q_.end();)
+                {
+                    if (filespath_q_.size() <= max_keep_days_)
+                        break;
+
+                    for (auto it = iter->begin(); it != iter->end(); ++it)
+                    {
+                        bool ok = details::os::remove_if_exists(*it) == 0;
+                        if (!ok)
+                            throw_spdlog_ex("Failed removing daily file " + details::os::filename_to_str(*it), errno);
+                    }
+                    filespath_q_.erase(iter);
+                }
+            }
+
+            /*  */
+            static filename_t gen_filename_by_daliy(const filename_t &filename, const tm &now_tm)
+            {
+                filename_t basename, ext;
+                std::tie(basename, ext) = details::file_helper::split_by_extension(filename);
+                return fmt::format(SPDLOG_FILENAME_T("{}_{:04d}_{:02d}_{:02d}{}"),
+                                basename,
+                                now_tm.tm_year + 1900,
+                                now_tm.tm_mon + 1,
+                                now_tm.tm_mday,
+                                ext);
+            }
+
+            //
+            static filename_t gen_filename_by_filesize(const filename_t &filename, const tm &now_tm, const int &idx)
+            {
+                filename_t basename, ext;
+                std::tie(basename, ext) = details::file_helper::split_by_extension(filename);
+                return fmt::format(SPDLOG_FILENAME_T("{}_{:04d}_{:02d}_{:02d}_{:02d}{:02d}{:02d}.{:d}{}"),
+                                basename,
+                                now_tm.tm_year + 1900,
+                                now_tm.tm_mon + 1,
+                                now_tm.tm_mday,
+                                now_tm.tm_hour,
+                                now_tm.tm_min,
+                                now_tm.tm_sec,
+                                idx,
+                                ext);
+            }
+
+            static bool rename_file_(const filename_t &src_filename, const filename_t &target_filename)
+            {
+                (void)details::os::remove(target_filename);
+                return details::os::rename(src_filename, target_filename) == 0;
+            }
+
+            filename_t base_filename_;
+            log_clock::time_point rotation_tp_;
+            details::file_helper file_helper_;
+            std::size_t max_size_;
+            std::size_t max_keep_days_;
+            std::size_t current_size_;
+            // std::vector<<std::set<filename_t>> filespath_q_;
+            std::vector<std::set<filename_t>> filespath_q_;
+        };
+
+        using easy_file_sink_mt = easy_file_sink<std::mutex>;
+        using easy_file_sink_st = easy_file_sink<details::null_mutex>;
+
+    }  // namespace sinks
+
+    template<typename Factory = spdlog::synchronous_factory>
+    inline std::shared_ptr<logger> easy_logger_mt(
+            const std::string &logger_name, const filename_t &filename, size_t max_size, size_t max_keep_days = -1)
+    {
+        return Factory::template create<sinks::easy_file_sink_mt>(logger_name, filename, max_size, max_keep_days);
+    }
+
+    template<typename Factory = spdlog::synchronous_factory>
+    inline std::shared_ptr<logger> easy_logger_st(
+            const std::string &logger_name, const filename_t &filename, size_t max_size, size_t max_keep_days = -1)
+    {
+        return Factory::template create<sinks::easy_file_sink_st>(logger_name, filename, max_size, max_keep_days);
+    }
+
+}  // namespace spdlog
+
+
+enum class LogLevel
+{
+    CLOSE = -1,
+    TRACE = 0,
+    DEBUG = 1,
+    INFO = 2,
+    WARN = 3,
+    ERROR = 4,
+    FATAL = 5,
+};
+
+
+class LoggerGenerator
+{
+public:
+    static LoggerGenerator* get_instance()
+    {
+        static LoggerGenerator logger;
+        return &logger;
+    }
+
+    void destory(LoggerGenerator *ptr)
+    {
+        if (ptr != nullptr)
+        {
+            delete ptr;
+            ptr = nullptr;
+        }
+    }
+
+    std::shared_ptr<spdlog::logger> gen_logger(const LogLevel &level, const std::string &logger_name,
+                                               const std::string &file_path, size_t max_file_size, size_t max_keep_days)
+    {
+        spdlog::level::level_enum spd_level;
+        if (LogLevel::TRACE == level)
+            spd_level = spdlog::level::trace;
+        else if (LogLevel::DEBUG == level)
+            spd_level = spdlog::level::debug;
+        else if (LogLevel::INFO == level)
+            spd_level = spdlog::level::info;
+        else if (LogLevel::WARN == level)
+            spd_level = spdlog::level::warn;
+        else if (LogLevel::ERROR == level)
+            spd_level = spdlog::level::err;
+        else if (LogLevel::FATAL == level)
+            spd_level = spdlog::level::critical;
+        else if (LogLevel::CLOSE == level)
+            spd_level = spdlog::level::off;
+
+        auto sink_ptr = std::make_shared<spdlog::sinks::easy_file_sink_mt>(file_path, max_file_size, max_keep_days);
+        auto logger = std::make_shared<spdlog::logger>(logger_name, sink_ptr);
+        logger->set_level(spd_level);
+		logger->set_pattern("%s(%#): [%L %D %T.%e %P %t %!] %v");
+
+        return logger;
+    }
+
+    void set_default_logger(const LogLevel &level, const std::string &logger_name,
+                            const std::string &file_name, size_t max_file_size, size_t max_keep_days)
+    {
+
+        auto logger = gen_logger(level, logger_name, file_name, max_file_size, max_keep_days);
+        spdlog::set_default_logger(logger);
+        spdlog::set_level(logger->level());
+		spdlog::set_pattern("%s(%#): [%L %D %T.%e %P %t %!] %v");
+
+        spdlog::flush_on(spdlog::level::trace);
+        spdlog::flush_every(std::chrono::seconds(1));
+    }
+
+};
+
+
+static void set_default_logger(const LogLevel &level, const std::string &logger_name,
+                               const std::string &file_path, size_t max_file_size, size_t max_keep_days)
+{
+    static LoggerGenerator loggerGenerator;
+    loggerGenerator.set_default_logger(level, logger_name, file_path, max_file_size, max_keep_days);
+}
+
+
+static std::shared_ptr<spdlog::logger> get_simple_logger(const LogLevel &level, const std::string &logger_name,
+                                                         const std::string &file_path, size_t max_file_size, size_t max_keep_days)
+{
+    static LoggerGenerator loggerGenerator;
+    return loggerGenerator.gen_logger(level, logger_name, file_path, max_file_size, max_keep_days);
+}
diff --git a/src/interface/utiltools.hpp b/src/interface/utiltools.hpp
new file mode 100644
index 0000000..8caff91
--- /dev/null
+++ b/src/interface/utiltools.hpp
@@ -0,0 +1,18 @@
+#ifndef _UTIL_TOOLS_HPP_
+#define _UTIL_TOOLS_HPP_
+
+#include<chrono>
+
+using namespace std;
+
+namespace UtilTools{
+
+    static long get_cur_time_ms() {
+        chrono::time_point<chrono::system_clock, chrono::milliseconds> tpMicro
+            = chrono::time_point_cast<chrono::milliseconds>(chrono::system_clock::now());
+        return tpMicro.time_since_epoch().count();
+    }
+
+}
+
+#endif
\ No newline at end of file
diff --git a/src/jpegNPP.cpp-1 b/src/jpegNPP.cpp-1
deleted file mode 100644
index f0bf2e6..0000000
--- a/src/jpegNPP.cpp-1
+++ /dev/null
@@ -1,1193 +0,0 @@
-/*
-* Copyright 1993-2015 NVIDIA Corporation.  All rights reserved.
-*
-* NOTICE TO USER:
-*
-* This source code is subject to NVIDIA ownership rights under U.S. and
-* international Copyright laws.
-*
-* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
-* CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
-* IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
-* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
-* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
-* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
-* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
-* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
-* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
-* OR PERFORMANCE OF THIS SOURCE CODE.
-*
-* U.S. Government End Users.  This source code is a "commercial item" as
-* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting  of
-* "commercial computer software" and "commercial computer software
-* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
-* and is provided to the U.S. Government only as a commercial end item.
-* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
-* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
-* source code with only those rights set forth herein.
-*/
-
-// This sample needs at least CUDA 5.5 and a GPU that has at least Compute Capability 2.0
-
-// This sample demonstrates a simple image processing pipeline.
-// First, a JPEG file is huffman decoded and inverse DCT transformed and dequantized.
-// Then the different planes are resized. Finally, the resized image is quantized, forward
-// DCT transformed and huffman encoded.
-
-#include "cuda_kernels.h"
-
-#include <npp.h>
-#include <cuda_runtime.h>
-#include "common/UtilNPP/Exceptions.h"
-
-#include "Endianess.h"
-#include <math.h>
-
-#include <string.h>
-#include <fstream>
-#include <iostream>
-
-#include "common/inc/helper_string.h"
-#include "common/inc/helper_cuda.h"
-//#include "MacroDef.h"
-#include "cuda.h"
-
-using namespace std;
-
-struct FrameHeader
-{
-	unsigned char nSamplePrecision;
-	unsigned short nHeight;
-	unsigned short nWidth;
-	unsigned char nComponents;
-	unsigned char aComponentIdentifier[3];
-	unsigned char aSamplingFactors[3];
-	unsigned char aQuantizationTableSelector[3];
-};
-
-struct ScanHeader
-{
-	unsigned char nComponents;
-	unsigned char aComponentSelector[3];
-	unsigned char aHuffmanTablesSelector[3];
-	unsigned char nSs;
-	unsigned char nSe;
-	unsigned char nA;
-};
-
-struct QuantizationTable
-{
-	unsigned char nPrecisionAndIdentifier;
-	unsigned char aTable[64];
-};
-
-struct HuffmanTable
-{
-	unsigned char nClassAndIdentifier;
-	unsigned char aCodes[16];
-	unsigned char aTable[256];
-};
-
-//??准?炼??藕?量??模??
-//unsigned char std_Y_QT[64] =
-//{
-//	16, 11, 10, 16, 24, 40, 51, 61,
-//	12, 12, 14, 19, 26, 58, 60, 55,
-//	14, 13, 16, 24, 40, 57, 69, 56,
-//	14, 17, 22, 29, 51, 87, 80, 62,
-//	18, 22, 37, 56, 68, 109, 103, 77,
-//	24, 35, 55, 64, 81, 104, 113, 92,
-//	49, 64, 78, 87, 103, 121, 120, 101,
-//	72, 92, 95, 98, 112, 100, 103, 99
-//};
-//
-////??准色???藕?量??模??
-//unsigned char std_UV_QT[64] =
-//{
-//	17, 18, 24, 47, 99, 99, 99, 99,
-//	18, 21, 26, 66, 99, 99, 99, 99,
-//	24, 26, 56, 99, 99, 99, 99, 99,
-//	47, 66, 99, 99, 99, 99, 99, 99,
-//	99, 99, 99, 99, 99, 99, 99, 99,
-//	99, 99, 99, 99, 99, 99, 99, 99,
-//	99, 99, 99, 99, 99, 99, 99, 99,
-//	99, 99, 99, 99, 99, 99, 99, 99
-//};
-
-////?炼??藕?量??模??
-//unsigned char std_Y_QT[64] =
-//{
-//	6, 4, 5, 6, 5, 4, 6, 6,
-//	5, 6, 7, 7, 6, 8, 10, 16,
-//	10, 10, 9, 9, 10, 20, 14, 15,
-//	12, 16, 23, 20, 24, 24, 23, 20,
-//	22, 22, 26, 29, 37, 31, 26, 27,
-//	35, 28, 22, 22, 32, 44, 32, 35,
-//	38, 39, 41, 42, 41, 25, 31, 45,
-//	48, 45, 40, 48, 37, 40, 41, 40
-//};
-//
-////色???藕?量??模??
-//unsigned char std_UV_QT[64] =
-//{
-//	7, 7, 7, 10, 8, 10, 19, 10,
-//	10, 19, 40, 26, 22, 26, 40, 40,
-//	40, 40, 40, 40, 40, 40, 40, 40,
-//	40, 40, 40, 40, 40, 40, 40, 40,
-//	40, 40, 40, 40, 40, 40, 40, 40,
-//	40, 40, 40, 40, 40, 40, 40, 40,
-//	40, 40, 40, 40, 40, 40, 40, 40,
-//	40, 40, 40, 40, 40, 40, 40, 40
-//};
-
-//?炼??藕?量??模??
-unsigned char std_Y_QT[64] =
-{
-	0.75 * 6, 0.75 * 4, 0.75 * 5, 0.75 * 6, 0.75 * 5, 0.75 * 4, 0.75 * 6, 0.75 * 6,
-	0.75 * 5, 0.75 * 6, 0.75 * 7, 0.75 * 7, 0.75 * 6, 0.75 * 8, 0.75 * 10, 0.75 * 16,
-	0.75 * 10, 0.75 * 10, 0.75 * 9, 0.75 * 9, 0.75 * 10, 0.75 * 20, 0.75 * 14, 0.75 * 15,
-	0.75 * 12, 0.75 * 16, 0.75 * 23, 0.75 * 20, 0.75 * 24, 0.75 * 24, 0.75 * 23, 0.75 * 20,
-	0.75 * 22, 0.75 * 22, 0.75 * 26, 0.75 * 29, 0.75 * 37, 0.75 * 31, 0.75 * 26, 0.75 * 27,
-	0.75 * 35, 0.75 * 28, 0.75 * 22, 0.75 * 22, 0.75 * 32, 0.75 * 44, 0.75 * 32, 0.75 * 35,
-	0.75 * 38, 0.75 * 39, 0.75 * 41, 0.75 * 42, 0.75 * 41, 0.75 * 25, 0.75 * 31, 0.75 * 45,
-	0.75 * 48, 0.75 * 45, 0.75 * 40, 0.75 * 48, 0.75 * 37, 0.75 * 40, 0.75 * 41, 0.75 * 40
-};
-
-//色???藕?量??模??
-unsigned char std_UV_QT[64] =
-{
-	0.75 * 7, 0.75 * 7, 0.75 * 7, 0.75 * 10, 0.75 * 8, 0.75 * 10, 0.75 * 19, 0.75 * 10,
-	0.75 * 10, 0.75 * 19, 0.75 * 40, 0.75 * 26, 0.75 * 22, 0.75 * 26, 0.75 * 40, 0.75 * 40,
-	30, 30, 30, 30, 30, 30, 30, 30,
-	30, 30, 30, 30, 30, 30, 30, 30,
-	30, 30, 30, 30, 30, 30, 30, 30,
-	30, 30, 30, 30, 30, 30, 30, 30,
-	30, 30, 30, 30, 30, 30, 30, 30,
-	30, 30, 30, 30, 30, 30, 30, 30
-};
-
-unsigned char STD_DC_Y_NRCODES[16] = { 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
-unsigned char STD_DC_Y_VALUES[12] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
-
-unsigned char STD_DC_UV_NRCODES[16] = { 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
-unsigned char STD_DC_UV_VALUES[12] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
-
-unsigned char STD_AC_Y_NRCODES[16] = { 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0X7D };
-unsigned char STD_AC_Y_VALUES[162] =
-{
-	0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
-	0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
-	0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
-	0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
-	0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
-	0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
-	0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
-	0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
-	0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
-	0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
-	0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
-	0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
-	0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
-	0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
-	0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
-	0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
-	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
-	0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
-	0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
-	0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-	0xf9, 0xfa
-};
-
-unsigned char STD_AC_UV_NRCODES[16] = { 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0X77 };
-unsigned char STD_AC_UV_VALUES[162] =
-{
-	0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
-	0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
-	0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
-	0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
-	0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
-	0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
-	0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
-	0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
-	0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
-	0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
-	0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
-	0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
-	0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
-	0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
-	0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
-	0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
-	0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
-	0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
-	0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
-	0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-	0xf9, 0xfa
-};
-
-int DivUp(int x, int d)
-{
-	return (x + d - 1) / d;
-}
-
-template<typename T>
-void writeAndAdvance(unsigned char *&pData, T nElement)
-{
-	writeBigEndian<T>(pData, nElement);
-	pData += sizeof(T);
-}
-
-void writeMarker(unsigned char nMarker, unsigned char *&pData)
-{
-	*pData++ = 0x0ff;
-	*pData++ = nMarker;
-}
-
-void writeJFIFTag(unsigned char *&pData)
-{
-	const char JFIF_TAG[] =
-	{
-		0x4a, 0x46, 0x49, 0x46, 0x00,
-		0x01, 0x02,
-		0x00,
-		0x00, 0x01, 0x00, 0x01,
-		0x00, 0x00
-	};
-
-	writeMarker(0x0e0, pData);
-	writeAndAdvance<unsigned short>(pData, sizeof(JFIF_TAG) + sizeof(unsigned short));
-	memcpy(pData, JFIF_TAG, sizeof(JFIF_TAG));
-	pData += sizeof(JFIF_TAG);
-}
-
-void writeFrameHeader(const FrameHeader &header, unsigned char *&pData)
-{
-	unsigned char aTemp[128];
-	unsigned char *pTemp = aTemp;
-
-	writeAndAdvance<unsigned char>(pTemp, header.nSamplePrecision);
-	writeAndAdvance<unsigned short>(pTemp, header.nHeight);
-	writeAndAdvance<unsigned short>(pTemp, header.nWidth);
-	writeAndAdvance<unsigned char>(pTemp, header.nComponents);
-
-	for (int c = 0; c<header.nComponents; ++c)
-	{
-		writeAndAdvance<unsigned char>(pTemp, header.aComponentIdentifier[c]);
-		writeAndAdvance<unsigned char>(pTemp, header.aSamplingFactors[c]);
-		writeAndAdvance<unsigned char>(pTemp, header.aQuantizationTableSelector[c]);
-	}
-
-	unsigned short nLength = (unsigned short)(pTemp - aTemp);
-
-	writeMarker(0x0C0, pData);
-	writeAndAdvance<unsigned short>(pData, nLength + 2);
-	memcpy(pData, aTemp, nLength);
-	pData += nLength;
-}
-
-void writeScanHeader(const ScanHeader &header, unsigned char *&pData)
-{
-	unsigned char aTemp[128];
-	unsigned char *pTemp = aTemp;
-
-	writeAndAdvance<unsigned char>(pTemp, header.nComponents);
-
-	for (int c = 0; c<header.nComponents; ++c)
-	{
-		writeAndAdvance<unsigned char>(pTemp, header.aComponentSelector[c]);
-		writeAndAdvance<unsigned char>(pTemp, header.aHuffmanTablesSelector[c]);
-	}
-
-	writeAndAdvance<unsigned char>(pTemp, header.nSs);
-	writeAndAdvance<unsigned char>(pTemp, header.nSe);
-	writeAndAdvance<unsigned char>(pTemp, header.nA);
-
-	unsigned short nLength = (unsigned short)(pTemp - aTemp);
-
-	writeMarker(0x0DA, pData);
-	writeAndAdvance<unsigned short>(pData, nLength + 2);
-	memcpy(pData, aTemp, nLength);
-	pData += nLength;
-}
-
-void writeQuantizationTable(const QuantizationTable &table, unsigned char *&pData)
-{
-	writeMarker(0x0DB, pData);
-	writeAndAdvance<unsigned short>(pData, sizeof(QuantizationTable) + 2);
-	memcpy(pData, &table, sizeof(QuantizationTable));
-	pData += sizeof(QuantizationTable);
-}
-
-void writeHuffmanTable(const HuffmanTable &table, unsigned char *&pData)
-{
-	writeMarker(0x0C4, pData);
-
-	// Number of Codes for Bit Lengths [1..16]
-	int nCodeCount = 0;
-
-	for (int i = 0; i < 16; ++i)
-	{
-		nCodeCount += table.aCodes[i];
-	}
-
-	writeAndAdvance<unsigned short>(pData, 17 + nCodeCount + 2);
-	memcpy(pData, &table, 17 + nCodeCount);
-	pData += 17 + nCodeCount;
-}
-
-bool printfNPPinfo(int cudaVerMajor, int cudaVerMinor)
-{
-	const NppLibraryVersion *libVer = nppGetLibVersion();
-
-	printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
-
-	int driverVersion, runtimeVersion;
-	cudaDriverGetVersion(&driverVersion);
-	cudaRuntimeGetVersion(&runtimeVersion);
-
-	printf("  CUDA Driver  Version: %d.%d\n", driverVersion / 1000, (driverVersion % 100) / 10);
-	printf("  CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000, (runtimeVersion % 100) / 10);
-
-	bool bVal = checkCudaCapabilities(cudaVerMajor, cudaVerMinor);
-	return bVal;
-}
-
-NppiDCTState *pDCTState;
-FrameHeader oFrameHeader;
-FrameHeader oFrameHeaderFixedSize;
-ScanHeader oScanHeader;
-QuantizationTable aQuantizationTables[4];
-Npp8u *pdQuantizationTables;
-HuffmanTable aHuffmanTables[4];
-HuffmanTable *pHuffmanDCTables;
-HuffmanTable *pHuffmanACTables;
-int nMCUBlocksH;
-int nMCUBlocksV;
-int nMCUBlocksHFixedSize;
-int nMCUBlocksVFixedSize;
-Npp8u *pdScan;
-NppiEncodeHuffmanSpec *apHuffmanDCTable[3];
-NppiEncodeHuffmanSpec *apHuffmanACTable[3];
-unsigned char *pDstJpeg;
-unsigned char *pDstOutput;
-int nRestartInterval;
-
-int initTable()
-{
-	NPP_CHECK_NPP(nppiDCTInitAlloc(&pDCTState));
-
-	nRestartInterval = -1;
-
-	cudaMalloc(&pdQuantizationTables, 64 * 4);
-	pHuffmanDCTables = aHuffmanTables;
-	pHuffmanACTables = &aHuffmanTables[2];
-	memset(aQuantizationTables, 0, 4 * sizeof(QuantizationTable));
-	memset(aHuffmanTables, 0, 4 * sizeof(HuffmanTable));
-	memset(&oFrameHeader, 0, sizeof(FrameHeader));
-
-
-	//????Huffman??
-	aHuffmanTables[0].nClassAndIdentifier = 0;
-	memcpy(aHuffmanTables[0].aCodes, STD_DC_Y_NRCODES, 16);
-	memcpy(aHuffmanTables[0].aTable, STD_DC_Y_VALUES, 12);
-
-	aHuffmanTables[1].nClassAndIdentifier = 1;
-	memcpy(aHuffmanTables[1].aCodes, STD_DC_UV_NRCODES, 16);
-	memcpy(aHuffmanTables[1].aTable, STD_DC_UV_VALUES, 12);
-
-	aHuffmanTables[2].nClassAndIdentifier = 16;
-	memcpy(aHuffmanTables[2].aCodes, STD_AC_Y_NRCODES, 16);
-	memcpy(aHuffmanTables[2].aTable, STD_AC_Y_VALUES, 162);
-
-	aHuffmanTables[3].nClassAndIdentifier = 17;
-	memcpy(aHuffmanTables[3].aCodes, STD_AC_UV_NRCODES, 16);
-	memcpy(aHuffmanTables[3].aTable, STD_AC_UV_VALUES, 162);
-
-
-	//????量????
-	aQuantizationTables[0].nPrecisionAndIdentifier = 0;
-	memcpy(aQuantizationTables[0].aTable, std_Y_QT, 64);
-	aQuantizationTables[1].nPrecisionAndIdentifier = 1;
-	memcpy(aQuantizationTables[1].aTable, std_UV_QT, 64);
-
-	NPP_CHECK_CUDA(cudaMemcpyAsync(pdQuantizationTables, aQuantizationTables[0].aTable, 64, cudaMemcpyHostToDevice));
-	NPP_CHECK_CUDA(cudaMemcpyAsync(pdQuantizationTables + 64, aQuantizationTables[1].aTable, 64, cudaMemcpyHostToDevice));
-
-	oFrameHeader.nSamplePrecision = 8;
-	oFrameHeader.nComponents = 3;
-	oFrameHeader.aComponentIdentifier[0] = 1;
-	oFrameHeader.aComponentIdentifier[1] = 2;
-	oFrameHeader.aComponentIdentifier[2] = 3;
-	oFrameHeader.aSamplingFactors[0] = 34;
-	oFrameHeader.aSamplingFactors[1] = 17;
-	oFrameHeader.aSamplingFactors[2] = 17;
-	oFrameHeader.aQuantizationTableSelector[0] = 0;
-	oFrameHeader.aQuantizationTableSelector[1] = 1;
-	oFrameHeader.aQuantizationTableSelector[2] = 1;
-
-	for (int i = 0; i < oFrameHeader.nComponents; ++i)
-	{
-		nMCUBlocksV = max(nMCUBlocksV, oFrameHeader.aSamplingFactors[i] & 0x0f);
-		nMCUBlocksH = max(nMCUBlocksH, oFrameHeader.aSamplingFactors[i] >> 4);
-	}
-	NPP_CHECK_CUDA(cudaMalloc(&pdScan, 4 << 20));
-
-
-
-	oScanHeader.nComponents = 3;
-	oScanHeader.aComponentSelector[0] = 1;
-	oScanHeader.aComponentSelector[1] = 2;
-	oScanHeader.aComponentSelector[2] = 3;
-	oScanHeader.aHuffmanTablesSelector[0] = 0;
-	oScanHeader.aHuffmanTablesSelector[1] = 17;
-	oScanHeader.aHuffmanTablesSelector[2] = 17;
-	oScanHeader.nSs = 0;
-	oScanHeader.nSe = 63;
-	oScanHeader.nA = 0;
-
-
-	return 0;
-}
-
-NppiSize aSrcSize[3];
-Npp16s *apdDCT[3];// = { 0, 0, 0 };
-Npp32s aDCTStep[3];
-
-Npp8u *apSrcImage[3];// = { 0, 0, 0 };
-Npp32s aSrcImageStep[3];
-size_t aSrcPitch[3];
-
-
-int releaseJpegNPP()
-{
-	nppiDCTFree(pDCTState);
-	cudaFree(pdQuantizationTables);
-	cudaFree(pdScan);
-	for (int i = 0; i < 3; ++i)
-	{
-		cudaFree(apdDCT[i]);
-		cudaFree(apSrcImage[i]);
-	}
-	return 0;
-}
-
-
-int initTable(int flag, int width, int height)
-{
-	//????帧头
-	oFrameHeaderFixedSize.nSamplePrecision = 8;
-	oFrameHeaderFixedSize.nComponents = 3;
-	oFrameHeaderFixedSize.aComponentIdentifier[0] = 1;
-	oFrameHeaderFixedSize.aComponentIdentifier[1] = 2;
-	oFrameHeaderFixedSize.aComponentIdentifier[2] = 3;
-	oFrameHeaderFixedSize.aSamplingFactors[0] = 34;
-	oFrameHeaderFixedSize.aSamplingFactors[1] = 17;
-	oFrameHeaderFixedSize.aSamplingFactors[2] = 17;
-	oFrameHeaderFixedSize.aQuantizationTableSelector[0] = 0;
-	oFrameHeaderFixedSize.aQuantizationTableSelector[1] = 1;
-	oFrameHeaderFixedSize.aQuantizationTableSelector[2] = 1;
-	oFrameHeaderFixedSize.nWidth = width;
-	oFrameHeaderFixedSize.nHeight = height;
-
-	for (int i = 0; i < oFrameHeaderFixedSize.nComponents; ++i)
-	{
-		nMCUBlocksVFixedSize = max(nMCUBlocksVFixedSize, oFrameHeaderFixedSize.aSamplingFactors[i] & 0x0f);
-		nMCUBlocksHFixedSize = max(nMCUBlocksHFixedSize, oFrameHeaderFixedSize.aSamplingFactors[i] >> 4);
-	}
-
-	for (int i = 0; i < oFrameHeaderFixedSize.nComponents; ++i)
-	{
-		NppiSize oBlocks;
-		NppiSize oBlocksPerMCU = { oFrameHeaderFixedSize.aSamplingFactors[i] >> 4, oFrameHeaderFixedSize.aSamplingFactors[i] & 0x0f };
-
-		oBlocks.width = (int)ceil((oFrameHeaderFixedSize.nWidth + 7) / 8 *
-			static_cast<float>(oBlocksPerMCU.width) / nMCUBlocksHFixedSize);
-		oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width;
-
-		oBlocks.height = (int)ceil((oFrameHeaderFixedSize.nHeight + 7) / 8 *
-			static_cast<float>(oBlocksPerMCU.height) / nMCUBlocksVFixedSize);
-		oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height;
-
-		aSrcSize[i].width = oBlocks.width * 8;
-		aSrcSize[i].height = oBlocks.height * 8;
-
-		// Allocate Memory
-		size_t nPitch;
-		NPP_CHECK_CUDA(cudaMallocPitch(&apdDCT[i], &nPitch, oBlocks.width * 64 * sizeof(Npp16s), oBlocks.height));
-		aDCTStep[i] = static_cast<Npp32s>(nPitch);
-
-		NPP_CHECK_CUDA(cudaMallocPitch(&apSrcImage[i], &nPitch, aSrcSize[i].width, aSrcSize[i].height));
-
-		aSrcPitch[i] = nPitch;
-		aSrcImageStep[i] = static_cast<Npp32s>(nPitch);
-	}
-
-	return 0;
-}
-
-int jpegNPP(const char *szOutputFile, float* d_srcRGB)
-{
-	//RGB2YUV
-	cudaError_t cudaStatus;
-	cudaStatus = cuda_common::RGB2YUV(d_srcRGB, oFrameHeaderFixedSize.nWidth, oFrameHeaderFixedSize.nHeight,
-		apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height,
-		apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height,
-		apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height);
-
-	/**
-	* Forward DCT, quantization and level shift part of the JPEG encoding.
-	* Input is expected in 8x8 macro blocks and output is expected to be in 64x1
-	* macro blocks. The new version of the primitive takes the ROI in image pixel size and
-	* works with DCT coefficients that are in zig-zag order.
-	*/
-	int k = 0;
-	//LOG_INFO("NPP_CHECK_NPP:%d", 1);
-	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0],
-		apdDCT[0], aDCTStep[0],
-		pdQuantizationTables + k * 64,
-		aSrcSize[0],
-		pDCTState)))
-	{
-		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
-		return EXIT_FAILURE;
-	}
-
-	k = 1;
-	//LOG_INFO("NPP_CHECK_NPP:%d", 2);
-	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1],
-		apdDCT[1], aDCTStep[1],
-		pdQuantizationTables + k * 64,
-		aSrcSize[1],
-		pDCTState)))
-	{
-		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
-		return EXIT_FAILURE;
-	}
-
-	//LOG_INFO("NPP_CHECK_NPP:%d", 3);
-	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2],
-		apdDCT[2], aDCTStep[2],
-		pdQuantizationTables + k * 64,
-		aSrcSize[2],
-		pDCTState)))
-	{
-		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
-		return EXIT_FAILURE;
-	}
-
-	// Huffman Encoding
-
-	Npp32s nScanLength;
-	Npp8u *pJpegEncoderTemp;
-
-#if (CUDA_VERSION == 8000)
-		Npp32s nTempSize; //when using CUDA8
-#else
-		size_t nTempSize; //when using CUDA9
-#endif
-	//modified by Junlin 190221
-
-	//LOG_INFO("NPP_CHECK_NPP:%d",4);
-	if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize)))
-	{
-		printf("nppiEncodeHuffmanGetSize Failed!\n");
-		return EXIT_FAILURE;
-	}
-
-	//LOG_INFO("NPP_CHECK_CUDA:%d",5);
-	NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize));
-
-	/**
-	* Allocates memory and creates a Huffman table in a format that is suitable for the encoder.
-	*/
-	NppStatus t_status;
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]);
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]);
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]);
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]);
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]);
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]);
-
-	/**
-	* Huffman Encoding of the JPEG Encoding.
-	* Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan.
-	*/
-	Npp32s nSs = 0;
-	Npp32s nSe = 63;
-	Npp32s nH = 0;
-	Npp32s nL = 0;
-	//LOG_INFO("NPP_CHECK_NPP:%d",6);
-	if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep,
-		0, nSs, nSe, nH, nL,
-		pdScan, &nScanLength,
-		apHuffmanDCTable,
-		apHuffmanACTable,
-		aSrcSize,
-		pJpegEncoderTemp)))
-	{
-		printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n");
-		return EXIT_FAILURE;
-	}
-
-	for (int i = 0; i < 3; ++i)
-	{
-		nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]);
-		nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]);
-	}
-	// Write JPEG
-	pDstJpeg = new unsigned char[4 << 20]{};
-	pDstOutput = pDstJpeg;
-
-	writeMarker(0x0D8, pDstOutput);
-	writeJFIFTag(pDstOutput);
-	writeQuantizationTable(aQuantizationTables[0], pDstOutput);
-	writeQuantizationTable(aQuantizationTables[1], pDstOutput);
-	writeHuffmanTable(pHuffmanDCTables[0], pDstOutput);
-	writeHuffmanTable(pHuffmanACTables[0], pDstOutput);
-	writeHuffmanTable(pHuffmanDCTables[1], pDstOutput);
-	writeHuffmanTable(pHuffmanACTables[1], pDstOutput);
-	writeFrameHeader(oFrameHeaderFixedSize, pDstOutput);
-	writeScanHeader(oScanHeader, pDstOutput);
-
-	//LOG_INFO("NPP_CHECK_CUDA:%d",7);
-	NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost));
-
-	pDstOutput += nScanLength;
-	writeMarker(0x0D9, pDstOutput);
-	{
-		// Write result to file.
-		std::ofstream outputFile(szOutputFile, ios::out | ios::binary);
-		outputFile.write(reinterpret_cast<const char *>(pDstJpeg), static_cast<int>(pDstOutput - pDstJpeg));
-	}
-
-	// Cleanup
-	cudaFree(pJpegEncoderTemp);
-	delete[] pDstJpeg;
-
-
-	return EXIT_SUCCESS;
-}
-
-int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB)
-{
-	//RGB2YUV
-	cudaError_t cudaStatus;
-	cudaStatus = cuda_common::RGB2YUV(d_srcRGB, oFrameHeaderFixedSize.nWidth, oFrameHeaderFixedSize.nHeight,
-		apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height,
-		apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height,
-		apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height);
-
-	/**
-	* Forward DCT, quantization and level shift part of the JPEG encoding.
-	* Input is expected in 8x8 macro blocks and output is expected to be in 64x1
-	* macro blocks. The new version of the primitive takes the ROI in image pixel size and
-	* works with DCT coefficients that are in zig-zag order.
-	*/
-	int k = 0;
-	//LOG_INFO("NPP_CHECK_NPP:%d", 1);
-	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0],
-		apdDCT[0], aDCTStep[0],
-		pdQuantizationTables + k * 64,
-		aSrcSize[0],
-		pDCTState)))
-	{
-		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
-		return EXIT_FAILURE;
-	}
-
-	k = 1;
-	//LOG_INFO("NPP_CHECK_NPP:%d", 2);
-	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1],
-		apdDCT[1], aDCTStep[1],
-		pdQuantizationTables + k * 64,
-		aSrcSize[1],
-		pDCTState)))
-	{
-		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
-		return EXIT_FAILURE;
-	}
-
-	//LOG_INFO("NPP_CHECK_NPP:%d", 3);
-	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2],
-		apdDCT[2], aDCTStep[2],
-		pdQuantizationTables + k * 64,
-		aSrcSize[2],
-		pDCTState)))
-	{
-		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
-		return EXIT_FAILURE;
-	}
-
-	// Huffman Encoding
-
-	Npp32s nScanLength;
-	Npp8u *pJpegEncoderTemp;
-
-#if (CUDA_VERSION == 8000)
-	Npp32s nTempSize; //when using CUDA8
-#else
-	size_t nTempSize; //when using CUDA9
-#endif
-					  //modified by Junlin 190221
-
-					  //LOG_INFO("NPP_CHECK_NPP:%d",4);
-	if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize)))
-	{
-		printf("nppiEncodeHuffmanGetSize Failed!\n");
-		return EXIT_FAILURE;
-	}
-
-	//LOG_INFO("NPP_CHECK_CUDA:%d",5);
-	NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize));
-
-	/**
-	* Allocates memory and creates a Huffman table in a format that is suitable for the encoder.
-	*/
-	NppStatus t_status;
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]);
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]);
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]);
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]);
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]);
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]);
-
-	/**
-	* Huffman Encoding of the JPEG Encoding.
-	* Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan.
-	*/
-	Npp32s nSs = 0;
-	Npp32s nSe = 63;
-	Npp32s nH = 0;
-	Npp32s nL = 0;
-	//LOG_INFO("NPP_CHECK_NPP:%d",6);
-	if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep,
-		0, nSs, nSe, nH, nL,
-		pdScan, &nScanLength,
-		apHuffmanDCTable,
-		apHuffmanACTable,
-		aSrcSize,
-		pJpegEncoderTemp)))
-	{
-		printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n");
-		return EXIT_FAILURE;
-	}
-
-	for (int i = 0; i < 3; ++i)
-	{
-		nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]);
-		nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]);
-	}
-	// Write JPEG
-	pDstJpeg = new unsigned char[4 << 20]{};
-	pDstOutput = pDstJpeg;
-
-	writeMarker(0x0D8, pDstOutput);
-	writeJFIFTag(pDstOutput);
-	writeQuantizationTable(aQuantizationTables[0], pDstOutput);
-	writeQuantizationTable(aQuantizationTables[1], pDstOutput);
-	writeHuffmanTable(pHuffmanDCTables[0], pDstOutput);
-	writeHuffmanTable(pHuffmanACTables[0], pDstOutput);
-	writeHuffmanTable(pHuffmanDCTables[1], pDstOutput);
-	writeHuffmanTable(pHuffmanACTables[1], pDstOutput);
-	writeFrameHeader(oFrameHeaderFixedSize, pDstOutput);
-	writeScanHeader(oScanHeader, pDstOutput);
-
-	//LOG_INFO("NPP_CHECK_CUDA:%d",7);
-	NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost));
-
-	pDstOutput += nScanLength;
-	writeMarker(0x0D9, pDstOutput);
-	{
-		// Write result to file.
-		std::ofstream outputFile(szOutputFile, ios::out | ios::binary);
-		outputFile.write(reinterpret_cast<const char *>(pDstJpeg), static_cast<int>(pDstOutput - pDstJpeg));
-	}
-
-	// Cleanup
-	cudaFree(pJpegEncoderTemp);
-	delete[] pDstJpeg;
-
-
-	return EXIT_SUCCESS;
-}
-
-
-int jpegNPP(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height)
-{
-	NppiSize aSrcSize[3];
-	Npp16s *apdDCT[3] = { 0, 0, 0 };
-	Npp32s aDCTStep[3];
-
-	Npp8u *apSrcImage[3] = { 0, 0, 0 };
-	Npp32s aSrcImageStep[3];
-	size_t aSrcPitch[3];
-
-
-	//????帧头
-	oFrameHeader.nWidth = img_width;
-	oFrameHeader.nHeight = img_height;
-
-	for (int i = 0; i < oFrameHeader.nComponents; ++i)
-	{
-		NppiSize oBlocks;
-		NppiSize oBlocksPerMCU = { oFrameHeader.aSamplingFactors[i] >> 4, oFrameHeader.aSamplingFactors[i] & 0x0f };
-
-		oBlocks.width = (int)ceil((oFrameHeader.nWidth + 7) / 8 *
-			static_cast<float>(oBlocksPerMCU.width) / nMCUBlocksH);
-		oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width;
-
-		oBlocks.height = (int)ceil((oFrameHeader.nHeight + 7) / 8 *
-			static_cast<float>(oBlocksPerMCU.height) / nMCUBlocksV);
-		oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height;
-
-		aSrcSize[i].width = oBlocks.width * 8;
-		aSrcSize[i].height = oBlocks.height * 8;
-
-		// Allocate Memory
-		size_t nPitch;
-		//LOG_INFO("NPP_CHECK_CUDA:%d",1);
-		NPP_CHECK_CUDA(cudaMallocPitch(&apdDCT[i], &nPitch, oBlocks.width * 64 * sizeof(Npp16s), oBlocks.height));
-		aDCTStep[i] = static_cast<Npp32s>(nPitch);
-
-		//LOG_INFO("NPP_CHECK_CUDA:%d",2);
-		NPP_CHECK_CUDA(cudaMallocPitch(&apSrcImage[i], &nPitch, aSrcSize[i].width, aSrcSize[i].height));
-
-		aSrcPitch[i] = nPitch;
-		aSrcImageStep[i] = static_cast<Npp32s>(nPitch);
-	}
-
-	//RGB2YUV
-	cudaError_t cudaStatus;
-	cudaStatus = cuda_common::RGB2YUV(d_srcRGB, img_width, img_height,
-		apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height,
-		apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height,
-		apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height);
-
-	/**
-	* Forward DCT, quantization and level shift part of the JPEG encoding.
-	* Input is expected in 8x8 macro blocks and output is expected to be in 64x1
-	* macro blocks. The new version of the primitive takes the ROI in image pixel size and
-	* works with DCT coefficients that are in zig-zag order.
-	*/
-	int k = 0;
-	//LOG_INFO("NPP_CHECK_CUDA:%d",3);
-	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0],
-		apdDCT[0], aDCTStep[0],
-		pdQuantizationTables + k * 64,
-		aSrcSize[0],
-		pDCTState)))
-	{
-		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
-		return EXIT_FAILURE;
-	}
-	k = 1;
-
-	//LOG_INFO("NPP_CHECK_CUDA:%d",4);
-	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1],
-		apdDCT[1], aDCTStep[1],
-		pdQuantizationTables + k * 64,
-		aSrcSize[1],
-		pDCTState)))
-	{
-		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
-		return EXIT_FAILURE;
-	}
-
-	//LOG_INFO("NPP_CHECK_CUDA:%d",5);
-	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2],
-		apdDCT[2], aDCTStep[2],
-		pdQuantizationTables + k * 64,
-		aSrcSize[2],
-		pDCTState)))
-	{
-		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
-		return EXIT_FAILURE;
-	}
-
-	// Huffman Encoding
-
-	Npp32s nScanLength;
-	Npp8u *pJpegEncoderTemp;
-
-#if (CUDA_VERSION == 8000)
-	Npp32s nTempSize; //when using CUDA8
-#else
-	size_t nTempSize; //when using CUDA9
-#endif
-					  //modified by Junlin 190221
-
-	//LOG_INFO("NPP_CHECK_CUDA:%d",6);
-	if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize)))
-	{
-		printf("nppiEncodeHuffmanGetSize Failed!\n");
-		return EXIT_FAILURE;
-	}
-
-	//LOG_INFO("NPP_CHECK_CUDA:%d",7);
-	NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize));
-
-	/**
-	* Allocates memory and creates a Huffman table in a format that is suitable for the encoder.
-	*/
-	NppStatus t_status;
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]);
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]);
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]);
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]);
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]);
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]);
-
-	/**
-	* Huffman Encoding of the JPEG Encoding.
-	* Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan.
-	*/
-	Npp32s nSs = 0;
-	Npp32s nSe = 63;
-	Npp32s nH = 0;
-	Npp32s nL = 0;
-	//LOG_INFO("NPP_CHECK_CUDA:%d",8);
-	if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep,
-		0, nSs, nSe, nH, nL,
-		pdScan, &nScanLength,
-		apHuffmanDCTable,
-		apHuffmanACTable,
-		aSrcSize,
-		pJpegEncoderTemp)))
-	{
-		printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n");
-		return EXIT_FAILURE;
-	}
-
-	for (int i = 0; i < 3; ++i)
-	{
-		nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]);
-		nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]);
-	}
-	// Write JPEG
-	pDstJpeg = new unsigned char[4 << 20]{};
-	pDstOutput = pDstJpeg;
-
-	writeMarker(0x0D8, pDstOutput);
-	writeJFIFTag(pDstOutput);
-	writeQuantizationTable(aQuantizationTables[0], pDstOutput);
-	writeQuantizationTable(aQuantizationTables[1], pDstOutput);
-	writeHuffmanTable(pHuffmanDCTables[0], pDstOutput);
-	writeHuffmanTable(pHuffmanACTables[0], pDstOutput);
-	writeHuffmanTable(pHuffmanDCTables[1], pDstOutput);
-	writeHuffmanTable(pHuffmanACTables[1], pDstOutput);
-	writeFrameHeader(oFrameHeader, pDstOutput);
-	writeScanHeader(oScanHeader, pDstOutput);
-
-	//LOG_INFO("NPP_CHECK_CUDA:%d",9);
-	NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost));
-
-	pDstOutput += nScanLength;
-	writeMarker(0x0D9, pDstOutput);
-
-	{
-		// Write result to file.
-		std::ofstream outputFile(szOutputFile, ios::out | ios::binary);
-		outputFile.write(reinterpret_cast<const char *>(pDstJpeg), static_cast<int>(pDstOutput - pDstJpeg));
-	}
-
-	// Cleanup
-	cudaFree(pJpegEncoderTemp);
-	delete[] pDstJpeg;
-	for (int i = 0; i < 3; ++i)
-	{
-		cudaFree(apdDCT[i]);
-		cudaFree(apSrcImage[i]);
-	}
-
-	return EXIT_SUCCESS;
-}
-
-
-int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height)
-{
-	NppiSize aSrcSize[3];
-	Npp16s *apdDCT[3] = { 0, 0, 0 };
-	Npp32s aDCTStep[3];
-
-	Npp8u *apSrcImage[3] = { 0, 0, 0 };
-	Npp32s aSrcImageStep[3];
-	size_t aSrcPitch[3];
-
-
-	//????帧头
-	oFrameHeader.nWidth = img_width;
-	oFrameHeader.nHeight = img_height;
-
-	for (int i = 0; i < oFrameHeader.nComponents; ++i)
-	{
-		NppiSize oBlocks;
-		NppiSize oBlocksPerMCU = { oFrameHeader.aSamplingFactors[i] >> 4, oFrameHeader.aSamplingFactors[i] & 0x0f };
-
-		oBlocks.width = (int)ceil((oFrameHeader.nWidth + 7) / 8 *
-			static_cast<float>(oBlocksPerMCU.width) / nMCUBlocksH);
-		oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width;
-
-		oBlocks.height = (int)ceil((oFrameHeader.nHeight + 7) / 8 *
-			static_cast<float>(oBlocksPerMCU.height) / nMCUBlocksV);
-		oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height;
-
-		aSrcSize[i].width = oBlocks.width * 8;
-		aSrcSize[i].height = oBlocks.height * 8;
-
-		// Allocate Memory
-		size_t nPitch;
-		//LOG_INFO("NPP_CHECK_CUDA:%d",1);
-		NPP_CHECK_CUDA(cudaMallocPitch(&apdDCT[i], &nPitch, oBlocks.width * 64 * sizeof(Npp16s), oBlocks.height));
-		aDCTStep[i] = static_cast<Npp32s>(nPitch);
-
-		//LOG_INFO("NPP_CHECK_CUDA:%d",2);
-		NPP_CHECK_CUDA(cudaMallocPitch(&apSrcImage[i], &nPitch, aSrcSize[i].width, aSrcSize[i].height));
-
-		aSrcPitch[i] = nPitch;
-		aSrcImageStep[i] = static_cast<Npp32s>(nPitch);
-	}
-
-	//RGB2YUV
-	cudaError_t cudaStatus;
-	cudaStatus = cuda_common::RGB2YUV(d_srcRGB, img_width, img_height,
-		apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height,
-		apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height,
-		apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height);
-
-	/**
-	* Forward DCT, quantization and level shift part of the JPEG encoding.
-	* Input is expected in 8x8 macro blocks and output is expected to be in 64x1
-	* macro blocks. The new version of the primitive takes the ROI in image pixel size and
-	* works with DCT coefficients that are in zig-zag order.
-	*/
-	int k = 0;
-	//LOG_INFO("NPP_CHECK_CUDA:%d",3);
-	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0],
-		apdDCT[0], aDCTStep[0],
-		pdQuantizationTables + k * 64,
-		aSrcSize[0],
-		pDCTState)))
-	{
-		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
-		return EXIT_FAILURE;
-	}
-	k = 1;
-
-	//LOG_INFO("NPP_CHECK_CUDA:%d",4);
-	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1],
-		apdDCT[1], aDCTStep[1],
-		pdQuantizationTables + k * 64,
-		aSrcSize[1],
-		pDCTState)))
-	{
-		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
-		return EXIT_FAILURE;
-	}
-
-	//LOG_INFO("NPP_CHECK_CUDA:%d",5);
-	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2],
-		apdDCT[2], aDCTStep[2],
-		pdQuantizationTables + k * 64,
-		aSrcSize[2],
-		pDCTState)))
-	{
-		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
-		return EXIT_FAILURE;
-	}
-
-	// Huffman Encoding
-
-	Npp32s nScanLength;
-	Npp8u *pJpegEncoderTemp;
-
-#if (CUDA_VERSION == 8000)
-	Npp32s nTempSize; //when using CUDA8
-#else
-	size_t nTempSize; //when using CUDA9
-#endif
-					  //modified by Junlin 190221
-
-					  //LOG_INFO("NPP_CHECK_CUDA:%d",6);
-	if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize)))
-	{
-		printf("nppiEncodeHuffmanGetSize Failed!\n");
-		return EXIT_FAILURE;
-	}
-
-	//LOG_INFO("NPP_CHECK_CUDA:%d",7);
-	NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize));
-
-	/**
-	* Allocates memory and creates a Huffman table in a format that is suitable for the encoder.
-	*/
-	NppStatus t_status;
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]);
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]);
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]);
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]);
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]);
-	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]);
-
-	/**
-	* Huffman Encoding of the JPEG Encoding.
-	* Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan.
-	*/
-	Npp32s nSs = 0;
-	Npp32s nSe = 63;
-	Npp32s nH = 0;
-	Npp32s nL = 0;
-	//LOG_INFO("NPP_CHECK_CUDA:%d",8);
-	if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep,
-		0, nSs, nSe, nH, nL,
-		pdScan, &nScanLength,
-		apHuffmanDCTable,
-		apHuffmanACTable,
-		aSrcSize,
-		pJpegEncoderTemp)))
-	{
-		printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n");
-		return EXIT_FAILURE;
-	}
-
-	for (int i = 0; i < 3; ++i)
-	{
-		nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]);
-		nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]);
-	}
-	// Write JPEG
-	pDstJpeg = new unsigned char[4 << 20]{};
-	pDstOutput = pDstJpeg;
-
-	writeMarker(0x0D8, pDstOutput);
-	writeJFIFTag(pDstOutput);
-	writeQuantizationTable(aQuantizationTables[0], pDstOutput);
-	writeQuantizationTable(aQuantizationTables[1], pDstOutput);
-	writeHuffmanTable(pHuffmanDCTables[0], pDstOutput);
-	writeHuffmanTable(pHuffmanACTables[0], pDstOutput);
-	writeHuffmanTable(pHuffmanDCTables[1], pDstOutput);
-	writeHuffmanTable(pHuffmanACTables[1], pDstOutput);
-	writeFrameHeader(oFrameHeader, pDstOutput);
-	writeScanHeader(oScanHeader, pDstOutput);
-
-	//LOG_INFO("NPP_CHECK_CUDA:%d",9);
-	NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost));
-
-	pDstOutput += nScanLength;
-	writeMarker(0x0D9, pDstOutput);
-
-	{
-		// Write result to file.
-		std::ofstream outputFile(szOutputFile, ios::out | ios::binary);
-		outputFile.write(reinterpret_cast<const char *>(pDstJpeg), static_cast<int>(pDstOutput - pDstJpeg));
-	}
-
-	// Cleanup
-	cudaFree(pJpegEncoderTemp);
-	delete[] pDstJpeg;
-	for (int i = 0; i < 3; ++i)
-	{
-		cudaFree(apdDCT[i]);
-		cudaFree(apSrcImage[i]);
-	}
-
-	return EXIT_SUCCESS;
-}
diff --git a/src/logger.hpp b/src/logger.hpp
deleted file mode 100644
index d249c3c..0000000
--- a/src/logger.hpp
+++ /dev/null
@@ -1,342 +0,0 @@
-/*
- * @Author: yangzilong
- * @Date: 2021-12-21 11:07:11
- * @Last Modified by: yangzilong
- * @Email: yangzilong@objecteye.com
- * @Description:
- */
-
-#pragma once
-
-#include "define.hpp"
-#include <spdlog/spdlog.h>
-#include <spdlog/common.h>
-#include <spdlog/details/file_helper.h>
-#include <spdlog/details/null_mutex.h>
-#include <spdlog/fmt/fmt.h>
-#include <spdlog/sinks/base_sink.h>
-#include <spdlog/details/os.h>
-#include <spdlog/details/circular_q.h>
-#include <spdlog/details/synchronous_factory.h>
-
-#include <set>
-#include <chrono>
-#include <cstdio>
-#include <ctime>
-#include <mutex>
-#include <string>
-#include <memory>
-#include <vector>
-
-#define LOG_TRACE_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_TRACE(logger, __VA_ARGS__);}
-#define LOG_DEBUG_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_DEBUG(logger, __VA_ARGS__);}
-#define LOG_WARN_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_WARN(logger, __VA_ARGS__);}
-#define LOG_ERROR_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_ERROR(logger, __VA_ARGS__);}
-#define LOG_INFO_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_INFO(logger, __VA_ARGS__);}
-#define LOG_CRITICAL_WITH_LOGGER(logger, ...) {SPDLOG_LOGGER_CRITICAL(logger, __VA_ARGS__);}
-
-
-// use fmt lib, e.g. LOG_WARN("warn log, {1}, {1}, {2}", 1, 2);
-#define LOG_TRACE(msg, ...) spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::trace, msg, ##__VA_ARGS__)
-#define LOG_DEBUG(msg, ...) spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::debug, msg, ##__VA_ARGS__)
-#define LOG_INFO(msg,...)   spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::info, msg, ##__VA_ARGS__)
-#define LOG_WARN(msg,...)   spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::warn, msg, ##__VA_ARGS__)
-#define LOG_ERROR(msg,...)  spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::err, msg, ##__VA_ARGS__)
-#define LOG_FATAL(msg,...)  spdlog::log({__FILENAME__, __LINE__, __FUNCTION__}, spdlog::level::critical, msg, ##__VA_ARGS__)
-
-
-
-namespace spdlog
-{
-    namespace sinks
-    {
-        template<typename Mutex>
-        class easy_file_sink final : public base_sink<Mutex>
-        {
-        public:
-            easy_file_sink(filename_t base_filename, size_t max_size, size_t max_keep_days = 0)
-                : base_filename_(std::move(base_filename))
-                , max_size_(max_size)
-                , max_keep_days_(max_keep_days)
-            {
-                auto now = log_clock::now();
-                auto filename = gen_filename_by_daliy(base_filename_, now_tm(now));
-
-                file_helper_.open(filename, false);
-                current_size_ = file_helper_.size();
-                rotation_tp_ = next_rotation_tp_();
-
-                if (max_keep_days_ > 0)
-                {
-                    filespath_q_.push_back(std::move(std::set<filename_t>()));
-                    filespath_q_[filespath_q_.size() - 1].insert(filename);
-                }
-            }
-
-            filename_t filename()
-            {
-                std::lock_guard<Mutex> lock(base_sink<Mutex>::mutex_);
-                return file_helper_.filename();
-            }
-
-        protected:
-            void sink_it_(const details::log_msg &msg) override
-            {
-                memory_buf_t formatted;
-                base_sink<Mutex>::formatter_->format(msg, formatted);
-                current_size_ += formatted.size();
-
-                auto time = msg.time;
-                if (time >= rotation_tp_)
-                {
-                    file_helper_.close();
-                    auto filename = gen_filename_by_daliy(base_filename_, now_tm(time));
-                    file_helper_.open(filename, false);
-                    current_size_ = file_helper_.size();
-                    rotation_tp_ = next_rotation_tp_();
-
-                    {
-                        filespath_q_.push_back(std::move(std::set<filename_t>()));
-                        filespath_q_[filespath_q_.size() - 1].emplace(filename);
-                    }
-
-                    // Do the cleaning only at the end because it might throw on failure.
-                    if (max_keep_days_ > 0 && filespath_q_.size() > max_keep_days_)
-                        delete_old_();
-                }
-                else if (current_size_ >= max_size_)
-                {
-                    file_helper_.close();
-                    auto src_name = gen_filename_by_daliy(base_filename_, now_tm(time));
-                    auto target_name = gen_filename_by_filesize(base_filename_, now_tm(time), filespath_q_[filespath_q_.size() - 1].size());
-
-                    // rename file if failed then us `target_name` as src_name.
-                    if (!rename_file_(src_name, target_name))
-                    {
-                        details::os::sleep_for_millis(200);
-                        if (!rename_file_(src_name, target_name))
-                        {
-                            fprintf(stderr, "%s:%d rename %s to %s failed\n", __FILENAME__, __LINE__, src_name.c_str(), target_name.c_str());
-                            src_name = target_name;
-                        }
-                    }
-
-                    filespath_q_[filespath_q_.size() - 1].emplace(src_name);
-                    if (src_name != target_name)
-                        filespath_q_[filespath_q_.size() - 1].emplace(target_name);
-
-                    file_helper_.open(src_name, false);
-                    current_size_ = file_helper_.size();
-                    rotation_tp_ = next_rotation_tp_();
-                }
-
-                file_helper_.write(formatted);
-
-
-            }
-
-            void flush_() override
-            {
-                file_helper_.flush();
-            }
-
-        private:
-
-            tm now_tm(log_clock::time_point tp)
-            {
-                time_t tnow = log_clock::to_time_t(tp);
-                return spdlog::details::os::localtime(tnow);
-            }
-
-            /**
-             * @brief Get next day tm.
-             *
-             * @return log_clock::time_point
-             */
-            log_clock::time_point next_rotation_tp_()
-            {
-                auto now = log_clock::now();
-                tm date = now_tm(now);
-                date.tm_hour = 0;
-                date.tm_min = 0;
-                date.tm_sec = 0;
-                auto rotation_time = log_clock::from_time_t(std::mktime(&date));
-                if (rotation_time > now)
-                    return rotation_time;
-                return {rotation_time + std::chrono::hours(24)};
-            }
-
-            // Delete the file N rotations ago.
-            // Throw spdlog_ex on failure to delete the old file.
-            void delete_old_()
-            {
-                for (auto iter = filespath_q_.begin(); iter != filespath_q_.end();)
-                {
-                    if (filespath_q_.size() <= max_keep_days_)
-                        break;
-
-                    for (auto it = iter->begin(); it != iter->end(); ++it)
-                    {
-                        bool ok = details::os::remove_if_exists(*it) == 0;
-                        if (!ok)
-                            throw_spdlog_ex("Failed removing daily file " + details::os::filename_to_str(*it), errno);
-                    }
-                    filespath_q_.erase(iter);
-                }
-            }
-
-            /*  */
-            static filename_t gen_filename_by_daliy(const filename_t &filename, const tm &now_tm)
-            {
-                filename_t basename, ext;
-                std::tie(basename, ext) = details::file_helper::split_by_extension(filename);
-                return fmt::format(SPDLOG_FILENAME_T("{}_{:04d}_{:02d}_{:02d}{}"),
-                                basename,
-                                now_tm.tm_year + 1900,
-                                now_tm.tm_mon + 1,
-                                now_tm.tm_mday,
-                                ext);
-            }
-
-            //
-            static filename_t gen_filename_by_filesize(const filename_t &filename, const tm &now_tm, const int &idx)
-            {
-                filename_t basename, ext;
-                std::tie(basename, ext) = details::file_helper::split_by_extension(filename);
-                return fmt::format(SPDLOG_FILENAME_T("{}_{:04d}_{:02d}_{:02d}_{:02d}{:02d}{:02d}.{:d}{}"),
-                                basename,
-                                now_tm.tm_year + 1900,
-                                now_tm.tm_mon + 1,
-                                now_tm.tm_mday,
-                                now_tm.tm_hour,
-                                now_tm.tm_min,
-                                now_tm.tm_sec,
-                                idx,
-                                ext);
-            }
-
-            static bool rename_file_(const filename_t &src_filename, const filename_t &target_filename)
-            {
-                (void)details::os::remove(target_filename);
-                return details::os::rename(src_filename, target_filename) == 0;
-            }
-
-            filename_t base_filename_;
-            log_clock::time_point rotation_tp_;
-            details::file_helper file_helper_;
-            std::size_t max_size_;
-            std::size_t max_keep_days_;
-            std::size_t current_size_;
-            // std::vector<<std::set<filename_t>> filespath_q_;
-            std::vector<std::set<filename_t>> filespath_q_;
-        };
-
-        using easy_file_sink_mt = easy_file_sink<std::mutex>;
-        using easy_file_sink_st = easy_file_sink<details::null_mutex>;
-
-    }  // namespace sinks
-
-    template<typename Factory = spdlog::synchronous_factory>
-    inline std::shared_ptr<logger> easy_logger_mt(
-            const std::string &logger_name, const filename_t &filename, size_t max_size, size_t max_keep_days = -1)
-    {
-        return Factory::template create<sinks::easy_file_sink_mt>(logger_name, filename, max_size, max_keep_days);
-    }
-
-    template<typename Factory = spdlog::synchronous_factory>
-    inline std::shared_ptr<logger> easy_logger_st(
-            const std::string &logger_name, const filename_t &filename, size_t max_size, size_t max_keep_days = -1)
-    {
-        return Factory::template create<sinks::easy_file_sink_st>(logger_name, filename, max_size, max_keep_days);
-    }
-
-}  // namespace spdlog
-
-
-enum class LogLevel
-{
-    CLOSE = -1,
-    TRACE = 0,
-    DEBUG = 1,
-    INFO = 2,
-    WARN = 3,
-    ERROR = 4,
-    FATAL = 5,
-};
-
-
-class LoggerGenerator
-{
-public:
-    static LoggerGenerator* get_instance()
-    {
-        static LoggerGenerator logger;
-        return &logger;
-    }
-
-    void destory(LoggerGenerator *ptr)
-    {
-        if (ptr != nullptr)
-        {
-            delete ptr;
-            ptr = nullptr;
-        }
-    }
-
-    std::shared_ptr<spdlog::logger> gen_logger(const LogLevel &level, const std::string &logger_name,
-                                               const std::string &file_path, size_t max_file_size, size_t max_keep_days)
-    {
-        spdlog::level::level_enum spd_level;
-        if (LogLevel::TRACE == level)
-            spd_level = spdlog::level::trace;
-        else if (LogLevel::DEBUG == level)
-            spd_level = spdlog::level::debug;
-        else if (LogLevel::INFO == level)
-            spd_level = spdlog::level::info;
-        else if (LogLevel::WARN == level)
-            spd_level = spdlog::level::warn;
-        else if (LogLevel::ERROR == level)
-            spd_level = spdlog::level::err;
-        else if (LogLevel::FATAL == level)
-            spd_level = spdlog::level::critical;
-        else if (LogLevel::CLOSE == level)
-            spd_level = spdlog::level::off;
-
-        auto sink_ptr = std::make_shared<spdlog::sinks::easy_file_sink_mt>(file_path, max_file_size, max_keep_days);
-        auto logger = std::make_shared<spdlog::logger>(logger_name, sink_ptr);
-        logger->set_level(spd_level);
-		logger->set_pattern("%s(%#): [%L %D %T.%e %P %t %!] %v");
-
-        return logger;
-    }
-
-    void set_default_logger(const LogLevel &level, const std::string &logger_name,
-                            const std::string &file_name, size_t max_file_size, size_t max_keep_days)
-    {
-
-        auto logger = gen_logger(level, logger_name, file_name, max_file_size, max_keep_days);
-        spdlog::set_default_logger(logger);
-        spdlog::set_level(logger->level());
-		spdlog::set_pattern("%s(%#): [%L %D %T.%e %P %t %!] %v");
-
-        spdlog::flush_on(spdlog::level::trace);
-        spdlog::flush_every(std::chrono::seconds(1));
-    }
-
-};
-
-
-static void set_default_logger(const LogLevel &level, const std::string &logger_name,
-                               const std::string &file_path, size_t max_file_size, size_t max_keep_days)
-{
-    static LoggerGenerator loggerGenerator;
-    loggerGenerator.set_default_logger(level, logger_name, file_path, max_file_size, max_keep_days);
-}
-
-
-static std::shared_ptr<spdlog::logger> get_simple_logger(const LogLevel &level, const std::string &logger_name,
-                                                         const std::string &file_path, size_t max_file_size, size_t max_keep_days)
-{
-    static LoggerGenerator loggerGenerator;
-    return loggerGenerator.gen_logger(level, logger_name, file_path, max_file_size, max_keep_days);
-}
diff --git a/src/main.cpp b/src/main.cpp
deleted file mode 100644
index d24e8f4..0000000
--- a/src/main.cpp
+++ /dev/null
@@ -1,452 +0,0 @@
-#include "FFNvDecoderManager.h"
-#include <iostream>
-
-#include "cuda_kernels.h"
-
-#include "NvJpegEncoder.h"
-
-#include <pthread.h>
-#include <thread>
-
-#include <chrono>
-
-#include <unistd.h>
-
-
-#ifdef _WIN32
-#include "Winsock2.h"
-#pragma comment(lib, "ws2_32.lib")
-#endif
-
-#ifdef __linux__
-#include "arpa/inet.h"
-#endif
-
-#include "utiltools.hpp"
-
-#define MIN_RTP_PORT		10000
-#define MAX_RTP_PORT		60000
-
-// ȡ MIN_RTP_PORT(10000)~MAX_RTP_PORT(60000)֮�������˿�(ż���������������˿ڿ���)
-int allocRtpPort() {
-
-	static int s_rtpPort = MIN_RTP_PORT;
-	if (MIN_RTP_PORT == s_rtpPort)
-	{
-		srand((unsigned int)time(NULL));
-		s_rtpPort = MIN_RTP_PORT + (rand() % MIN_RTP_PORT);
-	}
-
-	if (s_rtpPort % 2)
-		++s_rtpPort;
-
-	while (true)
-	{
-		s_rtpPort += 2;
-		s_rtpPort = s_rtpPort >= MAX_RTP_PORT ? MIN_RTP_PORT : s_rtpPort;
-
-		int i = 0;
-		for (; i < 2; i++)
-		{
-			sockaddr_in sRecvAddr;
-			int s = socket(AF_INET, SOCK_DGRAM, 0);
-
-			sRecvAddr.sin_family = AF_INET;        
-			sRecvAddr.sin_addr.s_addr = htonl(INADDR_ANY);    
-			sRecvAddr.sin_port = htons(s_rtpPort + i); 
-
-			int nResult = bind(s, (sockaddr *)&sRecvAddr, sizeof(sRecvAddr));
-			if (nResult != 0)
-			{
-				break;
-			}
-
-			nResult = close(s);
-			if (nResult != 0)
-			{
-				printf("closesocket failed:%d\n", nResult);
-				break;
-			}
-		}
-
-		if (i == 2)
-			break;
-	}
-
-	return s_rtpPort;
-}
-
-
-
-
-
-unsigned char *pHwRgb[2] = {nullptr, nullptr};
-
-int sum1 = 0;
-int sum2 = 0;
-
-cudaStream_t stream[2];
-
-string data_home = "/mnt/data/cmhu/tmp/";
-
-
-#define checkCudaErrors(S) do {CUresult  status; \
-        status = S; \
-        if (status != CUDA_SUCCESS ) std::cout << __LINE__ <<" checkCudaErrors - status = " << status << std::endl; \
-        } while (false)
-
-
-static void gpu_helper(int gpuid)
-{
-    cudaSetDevice(gpuid);
-
-    // int *dn;
-    // cudaMalloc((void **)&dn, 1 * sizeof(int));
-
-	size_t free_byte;
-	size_t total_byte;
-
-	CUresult cuda_status = cuMemGetInfo(&free_byte, &total_byte);
-
-	const char *pStr = nullptr;
-	if (CUDA_SUCCESS != cuda_status) {
-		cuGetErrorString(cuda_status, &pStr);
-		printf("Error: cudaMemGetInfo fails, %s \n", pStr);
-		return;
-	}
-
-	double free_db = (double)free_byte;
-	double total_db = (double)total_byte;
-	double used_db_1 = (total_db - free_db) / 1024.0 / 1024.0;
-
-	std::cout <<"显存已使用 " << used_db_1 << " MB\n";
-
-    // cudaFree(dn);
-}
-
-int CheckCUDAProperty( int devId )
-{
-    cuInit(0);
-
-	CUdevice dev = devId;
-	size_t memSize = 0;
-	char devName[256] = {0};
-	int major = 0, minor = 0;
-	CUresult rlt = CUDA_SUCCESS;
-
-    rlt = cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, dev);
-    checkCudaErrors( rlt );
-
-    rlt = cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, dev);
-	checkCudaErrors( rlt );
-
-	rlt = cuDeviceGetName( devName, sizeof( devName ), dev );
-	checkCudaErrors( rlt );
-
-	printf( "Using GPU Device %d: %s has SM %d.%d compute capability\n",
-		    dev, devName, major, minor );
-
-	rlt = cuDeviceTotalMem( &memSize, dev );
-	checkCudaErrors( rlt );
-
-	printf( "Total amount of global memory:   %4.4f MB\n",
-		   (float)memSize / ( 1024 * 1024 ) );
-
-	return 0;
-}
-
-/**
- * 注意： gpuFrame 在解码器设置的显卡上，后续操作要十分注意这一点，尤其是多线程情况
- * */
-void postDecoded(const void * userPtr, AVFrame * gpuFrame){
-    AbstractDecoder* decoder = (AbstractDecoder*)userPtr;
-    if (decoder!= nullptr)
-    {
-        // cout << "decode name: " << decoder->getName() << endl;
-
-            // const char* gpu_pixfmt = av_get_pix_fmt_name((AVPixelFormat)gpuFrame->format);
-            // cout << "pixfmt: " << gpu_pixfmt << endl;
-            // cout << "keyframe: " << gpuFrame->key_frame << " width: " << gpuFrame->width << " height: "<< gpuFrame->height << endl;
-            // cout << "decode successed ✿✿ヽ(°▽°)ノ✿ " << endl;
-
-            int sum = sum1;
-            if (decoder->getName() == "dec0")
-            {
-                sum1 ++ ;
-                sum = sum1;
-
-                if (gpuFrame->format == AV_PIX_FMT_CUDA)
-                {   
-                    // cout << "gpuid = " << atoi(decoder->m_cfg.gpuid.c_str()) << endl;
-                    cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str()));
-                    cudaError_t cudaStatus;
-                    if(pHwRgb[0] == nullptr){
-                        // cudaStreamCreate(&stream[0]);
-                        cuda_common::setColorSpace( ITU_709, 0 );
-                        cudaStatus = cudaMalloc((void **)&pHwRgb[0], 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char));
-                    }
-                    cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwRgb[0], gpuFrame->width, gpuFrame->height);
-                    cudaDeviceSynchronize();
-                    if (cudaStatus != cudaSuccess) {
-                        cout << "CUDAToBGR failed !!!" << endl;
-                        return;
-                    }
-
-                    string path = data_home + decoder->getName() + ".jpg";
-                    saveJpeg(path.c_str(), pHwRgb[0], gpuFrame->width, gpuFrame->height, stream[0]);  // 验证 CUDAToRGB 
-                }
-            } else if (decoder->getName() == "dec2") 
-            {
-                sum2 ++ ;
-                sum = sum2;
-
-                if (gpuFrame->format == AV_PIX_FMT_CUDA)
-                {   
-                    // cout << "gpuid = " << atoi(decoder->m_cfg.gpuid.c_str()) << endl;
-                    cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str()));
-                    cudaError_t cudaStatus;
-                    if(pHwRgb[1] == nullptr){
-                        // cudaStreamCreate(&stream[1]);
-                        cuda_common::setColorSpace( ITU_709, 0 );
-                        cudaStatus = cudaMalloc((void **)&pHwRgb[1], 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char));
-                    }
-                    cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwRgb[1], gpuFrame->width, gpuFrame->height);
-                    cudaDeviceSynchronize();
-                    if (cudaStatus != cudaSuccess) {
-                        cout << "CUDAToBGR failed !!!" << endl;
-                        return;
-                    }
-
-                    string path = data_home + decoder->getName() + ".jpg";
-                    saveJpeg(path.c_str(), pHwRgb[1], gpuFrame->width, gpuFrame->height, stream[1]);  // 验证 CUDAToRGB 
-                }
-            }
-    }
-}
-
-long start_time = 0;
-long end_time = 0;
-bool count_flag = false;
-int count = 0;
-int count_std = 100;
-
-
-static int sum = 0;
-unsigned char *pHwData = nullptr;
-
-void postDecoded0(const void * userPtr, AVFrame * gpuFrame){
-    // std::this_thread::sleep_for(std::chrono::milliseconds(30000));
-
-    AbstractDecoder* decoder = (AbstractDecoder*)userPtr;
-    if (decoder!= nullptr)
-    {
-        // cout << "decode name: " << decoder->getName() << endl;
-        if (decoder->getName() == "dec")
-        {
-            if (! count_flag)
-            {
-                count_flag = true;
-                count = 0;
-                end_time = start_time = UtilTools::get_cur_time_ms();
-            }
-            count++;
-            sum ++ ;
-            if (count >= count_std)
-            {
-                // end_time = UtilTools::get_cur_time_ms();
-                // long time_using = end_time - start_time;
-                // double time_per_frame = double(time_using)/count_std ;
-                // cout << count_std << "帧用时:" << time_using << "ms 每帧用时：" << time_per_frame << "ms" << endl;
-                cout << decoder->getName() << " keyframe: " << gpuFrame->key_frame << " width: " << gpuFrame->width << " height: "<< gpuFrame->height << endl;
-                // cout << gpuFrame->pts << endl;
-
-                count_flag = false;
-            }
-            // cout << "帧数：" << sum << endl;
-
-            if (gpuFrame->format == AV_PIX_FMT_CUDA)
-            {   
-                cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str()));
-                // cout << "gpu id : " << decoder->m_cfg.gpuid.c_str() << endl;
-                cudaError_t cudaStatus;
-                if(pHwData == nullptr){
-                    cuda_common::setColorSpace( ITU_709, 0 );
-                    cudaStatus = cudaMalloc((void **)&pHwData, 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char));
-                }
-                cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwData, gpuFrame->width, gpuFrame->height);
-                cudaDeviceSynchronize();
-                if (cudaStatus != cudaSuccess) {
-                    cout << "CUDAToBGR failed !!!" << endl;
-                    return;
-                }
-
-                string path = data_home + decoder->getName() + ".jpg";
-                saveJpeg(path.c_str(), pHwData, gpuFrame->width, gpuFrame->height, nullptr);  // 验证 CUDAToRGB 
-            }
-        }
-    }
-}
-
-void decode_finished_cbk(const void* userPtr){
-    cout << "当前时间戳: " << UtilTools::get_cur_time_ms() << endl;
-}
-
-bool decode_request_stream_cbk(const char* deviceId){
-    cout << "需在此请求流" << endl;
-    return true;
-}
-
-// string test_uri = "rtmp://192.168.10.56:1935/objecteye/1";
-// string test_uri = "/home/cmhu/data/output_800x480.mp4";
-// string test_uri = "/home/cmhu/data/output_1920x1080.mp4";
-// string test_uri = "rtsp://176.10.0.2:8554/stream";
-// string test_uri = "/mnt/f/fiss/test_data/h265.mp4";
-string test_uri = "rtsp://176.10.0.4:8554/stream";
-
-void createDecode(int index, const char* gpu_id){
-    FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
-    MgrDecConfig config;
-    config.name = "dec" + to_string(index);
-    config.cfg.uri = test_uri;
-    config.cfg.post_decoded_cbk = postDecoded;
-    config.cfg.decode_finished_cbk = decode_finished_cbk;
-    config.cfg.force_tcp = true;
-    config.dec_type = DECODER_TYPE_FFMPEG;
-
-    config.cfg.gpuid = gpu_id;
-    // if (index % 2 == 0)
-    // {
-    //     config.cfg.gpuid = "0";
-    // }
-    // else
-    // {
-    //     config.cfg.gpuid = "0";
-    // }
-    
-    AbstractDecoder* decoder = pDecManager->createDecoder(config);
-    if (!decoder)
-    {
-        return ;
-    }
-    pDecManager->setPostDecArg(config.name, decoder);
-    pDecManager->setFinishedDecArg(config.name, decoder);
-    pDecManager->startDecodeByName(config.name);
-}
-
-void createGB28181Decode(int index, char* gpu_id, int port){
-    FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
-    MgrDecConfig config;
-    config.name = "dec" + to_string(index);
-    config.cfg.uri = config.name;
-    config.cfg.post_decoded_cbk = postDecoded;
-    config.cfg.decode_finished_cbk = decode_finished_cbk;
-    config.cfg.request_stream_cbk = decode_request_stream_cbk;
-    config.cfg.force_tcp = true;
-
-    config.dec_type = DECODER_TYPE_GB28181;
-    config.cfg.port = port;//allocRtpPort();
-
-    config.cfg.gpuid = gpu_id;
-    
-    AbstractDecoder* decoder = pDecManager->createDecoder(config);
-    if (!decoder)
-    {
-        return ;
-    }
-    pDecManager->setPostDecArg(config.name, decoder);
-    pDecManager->setFinishedDecArg(config.name, decoder);
-    pDecManager->startDecodeByName(config.name);
-}
-
-void logFF(void *, int level, const char *fmt, va_list ap)
-{
-    vfprintf(stdout, fmt, ap);
-}
-
-
-int main(int argc, char* argv[]){
-
-    test_uri = "rtsp://admin:admin@123456@192.168.60.176:554/cam/realmonitor?channel=1&subtype=0";//argv[1];
-    char* gpuid = argv[2];
-    int port = atoi(argv[3]);
-    cout << test_uri << "   gpu_id:" << gpuid << "   port:" << port << endl;
-
-    // av_log_set_callback(&logFF);
-
-    CheckCUDAProperty(atoi(gpuid));
-
-    pthread_t m_decode_thread;
-    pthread_create(&m_decode_thread,0,
-        [](void* arg)
-        {
-            // cudaSetDevice(atoi(gpuid));
-            while (true)
-            {
-                std::this_thread::sleep_for(std::chrono::minutes(1));
-                FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
-                int count = pDecManager->count();
-                cout << "当前时间：" << UtilTools::get_cur_time_ms() << "  当前运行路数： " << pDecManager->count() << endl;
-            }  
-
-            return (void*)0;
-        }
-    ,nullptr);
-
-
-    FFNvDecoderManager* pDecManager = FFNvDecoderManager::getInstance();
-    int i = 0;
-
-    while (true)
-    {
-        int ch = getchar();
-        if (ch == 'q')
-        {
-            break;
-        }
-
-        switch (ch)
-        {
-        case 'f':
-        case 'F':
-            createDecode(i, gpuid);
-            i++;
-            break;
-        case 'g':
-        case 'G':
-            createGB28181Decode(i, gpuid, port);
-            i++;
-            break;
-        case 'r':
-        case 'R':
-            pDecManager->resumeDecoder("dec0");
-            break;
-        case 'p':
-        case 'P':
-            pDecManager->pauseDecoder("dec0");
-            break;
-
-        case 'c':
-        case 'C':
-            i--;
-            pDecManager->closeDecoderByName("dec" + to_string(i));
-            break;
-
-        case 'i':
-        case 'I':
-        {
-            int w,h;
-            pDecManager->getResolution("dec0", w,h);
-            printf( "%s : %dx%d\n", "dec0" , w,h );
-        }
-            break;
-
-        default:
-            break;
-        }
-
-        /* code */
-    }
-
-    cout << "总共帧数：" << sum << endl;
-    pDecManager->closeAllDecoder();
-}
\ No newline at end of file
diff --git a/src/nvdecoder/DrawImageOnGPU.cu b/src/nvdecoder/DrawImageOnGPU.cu
new file mode 100644
index 0000000..8770cea
--- /dev/null
+++ b/src/nvdecoder/DrawImageOnGPU.cu
@@ -0,0 +1,126 @@
+#include "cuda_kernels.h"
+
+#include "logger.hpp"
+
+typedef unsigned char   uchar;
+typedef unsigned int    uint32;
+typedef int             int32;
+
+namespace cuda_common
+{
+	__global__ void kernel_drawPixel(float* d_srcRGB, int src_width, int src_height,
+		int left, int top, int right, int bottom)
+	{
+		const int x = blockIdx.x * blockDim.x + threadIdx.x;
+		const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+		if (((x == left || x == right) && y >= top && y <= bottom) || ((y == top || y == bottom) && x >= left && x <= right))
+		{
+			d_srcRGB[(y*src_width) + x] = 0;
+			d_srcRGB[(src_width*src_height) + (y*src_width) + x] = 255;
+			d_srcRGB[(2 * src_width*src_height) + (y*src_width) + x] = 0;
+		}
+	}
+
+	cudaError_t DrawImage(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom)
+	{
+		dim3 block(32, 16, 1);
+		dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1);
+
+		kernel_drawPixel << < grid, block >> >(d_srcRGB, src_width, src_height, left, top, right, bottom);
+
+		cudaError_t cudaStatus = cudaGetLastError();
+		if (cudaStatus != cudaSuccess) {
+			LOG_ERROR("Draw 32 kernel_memcopy launch failed:{}",cudaGetErrorString(cudaStatus));
+			return cudaStatus;
+		}
+
+		cudaStatus = cudaDeviceSynchronize();
+		if (cudaStatus != cudaSuccess) {
+			LOG_ERROR("cudaDeviceSynchronize returned error code {} after launching kernel_bilinear!", cudaStatus);
+			return cudaStatus;
+		}
+
+		return cudaStatus;
+	}
+
+	__global__ void kernel_drawPixel(unsigned char* d_srcRGB, int src_width, int src_height,
+		int left, int top, int right, int bottom)
+	{
+		const int x = blockIdx.x * blockDim.x + threadIdx.x;
+		const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+		if (((x == left || x == right) && y >= top && y <= bottom) || ((y == top || y == bottom) && x >= left && x <= right))
+		{
+			d_srcRGB[(y*src_width) + x] = 0;
+			d_srcRGB[(src_width*src_height) + (y*src_width) + x] = 255;
+			d_srcRGB[(2 * src_width*src_height) + (y*src_width) + x] = 0;
+		}
+	}
+
+	cudaError_t DrawImage(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom)
+	{
+		dim3 block(32, 16, 1);
+		dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1);
+
+		kernel_drawPixel << < grid, block >> >(d_srcRGB, src_width, src_height, left, top, right, bottom);
+
+		cudaError_t cudaStatus = cudaGetLastError();
+		if (cudaStatus != cudaSuccess) {
+			LOG_ERROR("Draw 68 kernel_memcopy launch failed: {}",cudaGetErrorString(cudaStatus));
+			return cudaStatus;
+		}
+
+		cudaStatus = cudaDeviceSynchronize();
+		if (cudaStatus != cudaSuccess) {
+			LOG_ERROR("cudaDeviceSynchronize returned error code {} after launching kernel_bilinear!", cudaStatus);
+			return cudaStatus;
+		}
+
+		return cudaStatus;
+	}
+
+	__global__ void kernel_drawLine(float* d_srcRGB, int src_width, int src_height,
+		int begin_x, int begin_y, int end_x, int end_y)
+	{
+		int min_x = end_x < begin_x ? end_x : begin_x;
+		int max_x = end_x < begin_x ? begin_x : end_x;
+
+		int min_y = end_y < begin_y ? end_y : begin_y;
+		int max_y = end_y < begin_y ? begin_y : end_y;
+
+		const int x = blockIdx.x * blockDim.x + threadIdx.x;
+		const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+		if ((x - begin_x) * (end_y - begin_y) == (end_x - begin_x) * (y - begin_y)
+			&& min_x <= x && x <= max_x
+			&& min_y <= y && y <= max_y)
+		{
+			d_srcRGB[(y*src_width) + x] = 0;
+			d_srcRGB[(src_width*src_height) + (y*src_width) + x] = 255;
+			d_srcRGB[(2 * src_width*src_height) + (y*src_width) + x] = 0;
+		}
+	}
+
+	cudaError_t DrawLine(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y)
+	{
+		dim3 block(32, 16, 1);
+		dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1);
+
+		kernel_drawLine << < grid, block >> >(d_srcRGB, src_width, src_height, begin_x, begin_y, end_x, end_y);
+
+		cudaError_t cudaStatus = cudaGetLastError();
+		if (cudaStatus != cudaSuccess) {
+			LOG_ERROR("Draw 112 kernel_memcopy launch failed: {}",cudaGetErrorString(cudaStatus));
+			return cudaStatus;
+		}
+
+		cudaStatus = cudaDeviceSynchronize();
+		if (cudaStatus != cudaSuccess) {
+			LOG_ERROR("cudaDeviceSynchronize returned error code {} after launching kernel_bilinear!", cudaStatus);
+			return cudaStatus;
+		}
+
+		return cudaStatus;
+	}
+}
\ No newline at end of file
diff --git a/src/nvdecoder/FFCuContextManager.cpp b/src/nvdecoder/FFCuContextManager.cpp
new file mode 100644
index 0000000..9ae930c
--- /dev/null
+++ b/src/nvdecoder/FFCuContextManager.cpp
@@ -0,0 +1,29 @@
+#include "FFCuContextManager.h"
+
+#include "common_header.h"
+
+using namespace std;
+
+FFCuContextManager::~FFCuContextManager()
+{
+    for(auto iter = ctxMap.begin(); iter != ctxMap.end(); iter++){
+        av_buffer_unref(&iter->second);
+    }
+    ctxMap.clear();
+}
+
+AVBufferRef *FFCuContextManager::getCuCtx(string gpuid)
+{
+     AVBufferRef *hw_device_ctx = ctxMap[gpuid];
+     if (nullptr == hw_device_ctx)
+     {
+        // 初始化硬件解码器
+        if (av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_CUDA, gpuid.c_str(), nullptr, 0) < 0) 
+        {
+            LOG_ERROR("Failed to create specified HW device.");
+            return nullptr;
+        }
+        ctxMap[gpuid] = hw_device_ctx;
+     }
+     return hw_device_ctx;
+}
\ No newline at end of file
diff --git a/src/nvdecoder/FFCuContextManager.h b/src/nvdecoder/FFCuContextManager.h
new file mode 100644
index 0000000..3050641
--- /dev/null
+++ b/src/nvdecoder/FFCuContextManager.h
@@ -0,0 +1,37 @@
+
+#include<map>
+#include<string>
+
+extern "C"
+{
+	#include <libavcodec/avcodec.h> 
+	#include <libavdevice/avdevice.h> 
+	#include <libavformat/avformat.h> 
+	#include <libavfilter/avfilter.h> 
+	#include <libavutil/avutil.h> 
+    #include <libavutil/pixdesc.h> 
+	#include <libswscale/swscale.h>
+}
+
+using namespace std;
+
+class FFCuContextManager{
+public:
+    static FFCuContextManager* getInstance(){
+		static FFCuContextManager* singleton = nullptr;
+		if (singleton == nullptr){
+			singleton = new FFCuContextManager();
+		}
+		return singleton;
+	}
+
+    AVBufferRef *getCuCtx(string gpuid);
+
+private:
+    FFCuContextManager(){}
+	~FFCuContextManager();
+
+private:
+    map<string,AVBufferRef *> ctxMap;
+
+};
\ No newline at end of file
diff --git a/src/nvdecoder/FFNvDecoder.cpp b/src/nvdecoder/FFNvDecoder.cpp
new file mode 100644
index 0000000..3ebcd6c
--- /dev/null
+++ b/src/nvdecoder/FFNvDecoder.cpp
@@ -0,0 +1,474 @@
+#include "FFNvDecoder.h"
+
+#include <chrono>
+#include <thread>
+#include <fstream>
+
+#include <chrono>
+
+#include "FFCuContextManager.h"
+
+#include "common_header.h"
+
+#include "GpuRgbMemory.hpp"
+#include "cuda_kernels.h"
+
+using namespace std;
+
+// 参考博客： https://blog.csdn.net/qq_40116098/article/details/120704340
+
+static AVPixelFormat get_hw_format(AVCodecContext *avctx, const AVPixelFormat *pix_fmts)
+{
+	FFNvDecoder* _this = (FFNvDecoder*)avctx->opaque;
+
+	const AVPixelFormat *p;
+
+	for (p = pix_fmts; *p != -1; p++) {
+		if (*p == _this->getHwPixFmt())
+			return *p;
+	}
+
+	LOG_ERROR("Failed to get HW surface format");
+	return AV_PIX_FMT_NONE;
+}
+
+FFNvDecoder::FFNvDecoder()
+{
+	// 初始化解码对象
+	fmt_ctx = nullptr;
+	avctx = nullptr;
+	m_bRunning = false;
+
+	stream = nullptr;
+    stream_index = -1;
+    hw_pix_fmt = AV_PIX_FMT_NONE;
+    m_dec_name = "";
+
+	m_bPause = false;
+	m_bReal = true;
+
+	m_decode_thread = 0;
+	m_post_decode_thread = 0;
+
+	m_bFinished = false;
+	m_dec_keyframe = false;
+	m_fps = 0.0;
+}
+
+FFNvDecoder::~FFNvDecoder()
+{
+	m_dec_keyframe = false;
+}
+
+bool FFNvDecoder::init(FFDecConfig& cfg)
+{
+	m_cfg = cfg;
+
+	fstream infile(cfg.uri);
+	if (infile.is_open()){
+		m_bReal = false;
+		infile.close();
+	}else {
+		m_bReal = true;
+	}
+
+	post_decoded_cbk = cfg.post_decoded_cbk;
+    decode_finished_cbk = cfg.decode_finished_cbk;
+
+	return init(cfg.uri.c_str(), cfg.gpuid.c_str(),cfg.force_tcp);
+}
+
+bool FFNvDecoder::init(const char* uri, const char* gpuid, bool force_tcp)
+{
+	// av_log_set_level(AV_LOG_DEBUG);
+
+	avformat_network_init();
+
+	// 打开输入视频文件
+	AVDictionary *options = nullptr;
+	av_dict_set( &options, "bufsize", "655360", 0 );
+	av_dict_set( &options, "rtsp_transport", force_tcp ? "tcp" : "udp", 0 );
+	// av_dict_set( &options, "listen_timeout", "30", 0 ); // 单位为s
+	av_dict_set( &options, "stimeout", "30000000", 0 ); // 单位为 百万分之一秒
+	
+	fmt_ctx = avformat_alloc_context();
+	const char* input_file = uri;
+	if (avformat_open_input(&fmt_ctx, input_file, nullptr, &options) != 0) {
+		LOG_ERROR("Cannot open input file:{}",input_file);
+		return false;
+	}
+
+	// 查找流信息
+	if (avformat_find_stream_info(fmt_ctx, nullptr) < 0) {
+		LOG_ERROR("Cannot find input stream information");
+		return false;
+	}
+
+	// 查找视频流信息
+	AVCodec *decoder = nullptr;
+	stream_index = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &decoder, 0);
+	if (stream_index < 0) {
+		LOG_ERROR("Cannot find a video stream in the input file");
+		return false;
+	}
+
+	string cuvid_dec_name = string(decoder->name) + "_cuvid";
+	AVCodec *vcodec = avcodec_find_decoder_by_name(cuvid_dec_name.c_str());
+	if (!(avctx = avcodec_alloc_context3(vcodec)))
+		return (bool)AVERROR(ENOMEM);
+
+	// 得到视频流对象
+	stream = fmt_ctx->streams[stream_index];
+	if (avcodec_parameters_to_context(avctx, stream->codecpar) < 0)
+		return false;
+
+	m_fps = av_q2d(stream ->avg_frame_rate);
+
+	avctx->opaque = this;
+	// 设置解码器管理器的像素格式回调函数
+	avctx->get_format = get_hw_format;
+
+	hw_pix_fmt = AV_PIX_FMT_CUDA;
+
+	FFCuContextManager* pCtxMgr = FFCuContextManager::getInstance();
+
+	AVBufferRef *hw_device_ctx = pCtxMgr->getCuCtx(gpuid);
+	if(nullptr == hw_device_ctx){
+		av_log(nullptr, AV_LOG_ERROR, "create CUDA context failed ! \n");
+		return false;
+	}
+	avctx->hw_device_ctx = av_buffer_ref(hw_device_ctx);
+	if (nullptr == avctx->hw_device_ctx)
+	{
+		return false;
+	}
+
+	// 打开解码器流
+	AVDictionary *op = nullptr;
+	av_dict_set( &op, "gpu", gpuid, 0 );
+	// av_dict_set( &op, "surfaces", "5", 0 );
+	if (avcodec_open2(avctx, vcodec, &op) < 0) {
+		LOG_ERROR("Failed to open codec for stream");
+		return false;
+	}
+	
+	return true;
+}
+
+bool FFNvDecoder::isSurport(FFDecConfig& cfg)
+{
+	bool bRet = init(cfg);
+    decode_finished();
+    return bRet;
+}
+
+bool FFNvDecoder::start(){
+
+	m_bRunning = true;
+
+	pthread_create(&m_decode_thread,0,
+        [](void* arg)
+        {
+            FFNvDecoder* a=(FFNvDecoder*)arg;
+            a->decode_thread();
+            return (void*)0;
+        }
+    ,this);
+
+	return true;
+}
+
+void FFNvDecoder::decode_thread()
+{
+	AVPacket* pkt ;
+	pkt = av_packet_alloc();
+	av_init_packet( pkt );
+
+	pthread_create(&m_post_decode_thread,0,
+        [](void* arg)
+        {
+            FFNvDecoder* a=(FFNvDecoder*)arg;
+            a->post_decode_thread();
+            return (void*)0;
+        }
+    ,this);
+
+	// long start_time = UtilTools::get_cur_time_ms();
+
+	while (m_bRunning)
+	{
+		if (!m_bReal)
+		{
+			if (m_bPause)
+			{
+				std::this_thread::sleep_for(std::chrono::milliseconds(3));
+				continue;
+			}
+		}
+		
+		int result = av_read_frame(fmt_ctx, pkt);
+		if (result == AVERROR_EOF || result < 0)
+		{
+			LOG_ERROR("Failed to read frame!");
+			break;
+		}
+
+		if (m_dec_keyframe && !(pkt->flags & AV_PKT_FLAG_KEY)) {
+			av_packet_unref(pkt);
+			continue;
+		}
+
+		if (stream_index == pkt->stream_index){
+			result = avcodec_send_packet(avctx, pkt);
+			if (result < 0){
+				av_packet_unref(pkt);
+				LOG_ERROR("{} - Failed to send pkt: {}", m_dec_name, result);
+				continue;
+			}
+
+			AVFrame* gpuFrame = av_frame_alloc();
+			result = avcodec_receive_frame(avctx, gpuFrame);
+			if ((result == AVERROR(EAGAIN) || result == AVERROR_EOF) || result < 0){
+				LOG_ERROR("{} - Failed to receive frame: {}", m_dec_name, result);
+				av_frame_free(&gpuFrame); 
+				av_packet_unref(pkt);
+				continue;
+			}
+			av_packet_unref(pkt);
+
+			if (m_bReal){
+				if (m_bPause){
+					av_frame_free(&gpuFrame); 
+					std::this_thread::sleep_for(std::chrono::milliseconds(3));
+					continue;
+				}
+			}
+
+			if(gpuFrame != nullptr){
+				m_queue_mutex.lock();
+				if(mFrameQueue.size() <= 10){
+					mFrameQueue.push(gpuFrame);
+				}else{
+					av_frame_free(&gpuFrame); 
+				}
+				m_queue_mutex.unlock();
+			}
+		}
+		av_packet_unref(pkt);
+	}
+
+	m_bRunning = false;
+	av_packet_free(&pkt);
+
+	// long end_time = UtilTools::get_cur_time_ms();
+	// cout << "解码用时：" << end_time - start_time << endl;
+
+	if (m_post_decode_thread != 0)
+	{
+		pthread_join(m_post_decode_thread,0);
+	}
+
+	decode_finished_cbk(m_finishedDecArg);
+
+	decode_finished();
+
+	// 清空队列
+	while(mFrameQueue.size() > 0){
+		AVFrame * gpuFrame = mFrameQueue.front();
+		av_frame_free(&gpuFrame); 
+		mFrameQueue.pop();
+	}
+
+	LOG_INFO("{} - decode thread exited.", m_dec_name);
+}
+
+void FFNvDecoder::decode_finished(){
+	if (avctx)
+	{
+		avcodec_free_context(&avctx);
+	}
+	
+	if (fmt_ctx)
+	{
+		avformat_close_input(&fmt_ctx);
+	}
+
+	m_bFinished = true;
+	m_dec_keyframe = false;
+}
+
+void FFNvDecoder::post_decode_thread(){
+	int skip_frame = m_cfg.skip_frame;
+	if (skip_frame <= 0){
+		skip_frame = 1;
+	}
+	
+	int index = 0;
+	while (m_bRunning)
+	{
+		if(mFrameQueue.size() > 0){
+			std::lock_guard<std::mutex> l(m_snapshot_mutex);
+			// 取队头数据
+			m_queue_mutex.lock();
+			AVFrame * gpuFrame = mFrameQueue.front();
+			mFrameQueue.pop();
+			m_queue_mutex.unlock();
+			// 跳帧
+			if (skip_frame == 1 || index % skip_frame == 0){
+				post_decoded_cbk(m_postDecArg, gpuFrame);
+				index = 0;
+			}
+
+			av_frame_free(&gpuFrame); 
+
+			index++;
+		}
+	}
+
+	LOG_INFO("post decode thread exited.");
+}
+
+void FFNvDecoder::close(){
+	m_bRunning=false;
+	if(m_decode_thread != 0){
+		pthread_join(m_decode_thread,0);
+	}
+	m_dec_keyframe = false;
+}
+
+AVPixelFormat FFNvDecoder::getHwPixFmt(){
+	return hw_pix_fmt;
+}
+
+bool FFNvDecoder::isRunning(){
+	return m_bRunning;
+}
+
+bool FFNvDecoder::isFinished(){
+	return m_bFinished;
+}
+
+bool FFNvDecoder::isPausing(){
+	return m_bPause;
+}
+
+bool FFNvDecoder::getResolution( int &width, int &height ){
+	if (avctx != nullptr)
+	{
+		width = avctx->width;
+		height = avctx->height;
+		return true;
+	}
+	
+	return false;
+}
+
+void FFNvDecoder::pause(){
+	m_bPause = true;
+}
+
+void FFNvDecoder::resume(){
+	m_bPause = false;
+}
+
+void FFNvDecoder::setDecKeyframe(bool bKeyframe)
+{
+	m_dec_keyframe = bKeyframe;
+}
+
+int FFNvDecoder::getCachedQueueLength(){
+	m_queue_mutex.lock();
+	int queue_size = mFrameQueue.size(); 
+	m_queue_mutex.lock();
+	return queue_size;
+}
+
+float FFNvDecoder::fps(){
+	return m_fps;
+}
+
+FFImgInfo* FFNvDecoder::snapshot(){
+
+	// 锁住停止队列消耗
+	std::lock_guard<std::mutex> l(m_snapshot_mutex);
+
+	AVFrame * gpuFrame = nullptr;
+
+	bool bFirst = true;
+	while(true){
+		m_queue_mutex.lock();
+		if(mFrameQueue.size() <= 0){
+			m_queue_mutex.unlock();
+			if(bFirst){
+				std::this_thread::sleep_for(std::chrono::milliseconds(100));
+				bFirst = false;
+				continue;
+			}else{
+				// 再进来说明前面已经等了 100 ms
+				// 100 ms都没有等到解码数据，则退出
+				return nullptr;
+			}
+		}
+
+		// 队列中数据大于1 
+		gpuFrame = mFrameQueue.front();
+		m_queue_mutex.unlock();
+		break;
+	}
+
+	if (gpuFrame != nullptr && gpuFrame->format == AV_PIX_FMT_CUDA ){
+		LOG_DEBUG("decode task: gpuid: {}  width: {} height: {}", m_cfg.gpuid, gpuFrame->width, gpuFrame->height);
+		GpuRgbMemory* gpuMem = new GpuRgbMemory(3, gpuFrame->width, gpuFrame->height, getName(), m_cfg.gpuid , true);
+
+		if (gpuMem->getMem() == nullptr){
+			LOG_ERROR("new GpuRgbMemory failed !!!");
+			return nullptr;
+		}
+		
+		cudaSetDevice(atoi(m_cfg.gpuid.c_str()));
+		cuda_common::setColorSpace( ITU_709, 0 );
+		cudaError_t cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0],(CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], gpuMem->getMem(), gpuFrame->width, gpuFrame->height);
+		cudaDeviceSynchronize();
+		if (cudaStatus != cudaSuccess) {
+			LOG_ERROR("CUDAToBGR failed failed !!!");
+			return nullptr;
+		}
+
+		unsigned char * pHwRgb = gpuMem->getMem();
+		int channel = gpuMem->getChannel();
+		int width = gpuMem->getWidth();
+		int height = gpuMem->getHeight();
+
+		if (pHwRgb != nullptr && channel > 0 && width > 0 && height > 0){
+			int nSize = channel * height * width;
+
+			LOG_INFO("channel:{} height:{} width:{}", channel, height, width);
+			// unsigned char* cpu_data = new unsigned char[nSize];
+
+            unsigned char* cpu_data = (unsigned char *)av_malloc(nSize * sizeof(unsigned char));
+
+			cudaMemcpy(cpu_data, pHwRgb, nSize * sizeof(unsigned char), cudaMemcpyDeviceToHost);
+			cudaDeviceSynchronize();
+
+			delete gpuMem;
+			gpuMem = nullptr;
+
+			FFImgInfo* imgInfo = new FFImgInfo();
+			imgInfo->dec_name = m_dec_name;
+			imgInfo->pData = cpu_data;
+			imgInfo->height = height;
+			imgInfo->width = width;
+			imgInfo->timestamp = UtilTools::get_cur_time_ms();
+			imgInfo->index = m_index;
+
+			m_index++;
+
+			return imgInfo;
+		}
+
+		delete gpuMem;
+		gpuMem = nullptr;
+	}
+
+	return nullptr;
+}
\ No newline at end of file
diff --git a/src/nvdecoder/FFNvDecoder.h b/src/nvdecoder/FFNvDecoder.h
new file mode 100644
index 0000000..4bc12e9
--- /dev/null
+++ b/src/nvdecoder/FFNvDecoder.h
@@ -0,0 +1,68 @@
+#include<string>
+#include <pthread.h>
+
+#include "../AbstractDecoder.h"
+
+#include <mutex>
+
+using namespace std;
+
+class FFNvDecoder : public AbstractDecoder{
+public:
+    FFNvDecoder();
+    ~FFNvDecoder();
+    bool init(FFDecConfig& cfg);
+    void close();
+    bool start();
+    void pause();
+    void resume();
+
+    void setDecKeyframe(bool bKeyframe);
+
+    bool isRunning();
+    bool isFinished();
+    bool isPausing();
+    bool getResolution( int &width, int &height );
+
+    bool isSurport(FFDecConfig& cfg);
+
+    int getCachedQueueLength();
+
+    float fps();
+
+    DECODER_TYPE getDecoderType(){ return DECODER_TYPE_FFMPEG; }
+
+    FFImgInfo* snapshot();
+
+public:
+    AVPixelFormat getHwPixFmt();
+
+private:
+    void decode_thread();
+    void post_decode_thread();
+    bool init(const char* uri, const char* gpuid, bool force_tcp);
+    void decode_finished();
+
+private:
+    AVStream* stream;
+    AVCodecContext *avctx;
+    int stream_index;
+    AVFormatContext *fmt_ctx;
+    AVPixelFormat hw_pix_fmt;
+
+    pthread_t m_decode_thread;
+    pthread_t m_post_decode_thread;
+    
+    bool m_bRunning;
+    bool m_bFinished;
+
+    bool m_bPause;
+
+    bool m_bReal; // 是否实时流
+
+    float m_fps;
+
+    queue<AVFrame*> mFrameQueue;
+    mutex m_queue_mutex;
+    mutex m_snapshot_mutex;
+};
\ No newline at end of file
diff --git a/src/nvdecoder/GpuRgbMemory.hpp b/src/nvdecoder/GpuRgbMemory.hpp
new file mode 100644
index 0000000..31be476
--- /dev/null
+++ b/src/nvdecoder/GpuRgbMemory.hpp
@@ -0,0 +1,34 @@
+#include<string>
+
+#include "../DeviceRgbMemory.hpp"
+#include "cuda_kernels.h"
+#include "define.hpp"
+#include "utiltools.hpp"
+
+using namespace std;
+
+class GpuRgbMemory : public DeviceRgbMemory{
+
+public:
+     GpuRgbMemory(int _channel, int _width, int _height, string _id, string _gpuid, bool _isused)
+     :DeviceRgbMemory(_channel, _width, _height, _id, _gpuid, _isused){
+        gpuid = _gpuid;
+        cudaSetDevice(atoi(gpuid.c_str()));
+        CHECK_CUDA(cudaMalloc((void **)&pHwRgb, data_size * sizeof(unsigned char)));
+    }
+
+    ~GpuRgbMemory(){
+        if (pHwRgb) {
+            cudaSetDevice(atoi(gpuid.c_str()));
+            CHECK_CUDA(cudaFree(pHwRgb));
+            pHwRgb = nullptr;
+        }
+    }
+
+    string getGpuId() {
+        return gpuid;
+    }
+
+private:
+    string gpuid;
+};
\ No newline at end of file
diff --git a/src/nvdecoder/ImageSaveGPU.cpp b/src/nvdecoder/ImageSaveGPU.cpp
new file mode 100644
index 0000000..dde9b64
--- /dev/null
+++ b/src/nvdecoder/ImageSaveGPU.cpp
@@ -0,0 +1,123 @@
+#include "cuda_kernels.h"
+
+#include "common_header.h"
+
+
+//int saveJPEG(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height)
+//{
+//	return jpegNPP(szOutputFile, d_srcRGB, img_width, img_height);
+//	//return 0;
+//}
+//
+//int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height)
+//{
+//	return jpegNPP(szOutputFile, d_srcRGB, img_width, img_height);
+//	//return 0;
+//}
+//
+//int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB)
+//{
+//	return jpegNPP(szOutputFile, d_srcRGB);
+//}
+//
+//int saveJPEG(const char *szOutputFile, float* d_srcRGB)
+//{
+//	return jpegNPP(szOutputFile, d_srcRGB);
+//}
+
+int resizeFrame(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height)
+{
+	cudaError_t cudaStatus = cuda_common::ResizeImage(d_srcRGB, src_width, src_height, d_dstRGB, dst_width, dst_height);
+	if (cudaStatus != cudaSuccess) {
+		LOG_ERROR("cuda_common::ResizeImage failed: {}",cudaGetErrorString(cudaStatus));
+		return -1;
+	}
+
+	return 0;
+}
+
+//int initTables()
+//{
+//	initTable();
+//	return 0;
+//}
+//
+//int initTables(int flag, int width, int height)
+//{
+//	initTable(0, width, height);
+//	return 0;
+//}
+
+int drawImageOnGPU(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom)
+{
+	cuda_common::DrawImage(d_srcRGB, src_width, src_height, left, top, right, bottom);
+	return 0;
+}
+
+int drawImageOnGPU(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom)
+{
+	cuda_common::DrawImage(d_srcRGB, src_width, src_height, left, top, right, bottom);
+	return 0;
+}
+
+int drawLineOnGPU(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y)
+{
+	cuda_common::DrawLine(d_srcRGB, src_width, src_height, begin_x, begin_y, end_x, end_y);
+	return 0;
+}
+
+//int releaseJpegSaver()
+//{
+//	releaseJpegNPP();
+//	return 0;
+//}
+
+int partMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom)
+{
+	cudaError_t cudaStatus = cuda_common::PartMemCopy(d_srcRGB, src_width, src_height, d_dstRGB, left, top, right, bottom);
+	if (cudaStatus != cudaSuccess) {
+		LOG_ERROR("cuda_common::77 PartMemCopy failed: {} {} {} {} {} {} {}",cudaGetErrorString(cudaStatus), left, top, right, bottom, src_height, d_dstRGB);
+		return -1;
+	}
+
+	return 0;
+}
+//#include <fstream>
+//extern std::ofstream g_os;
+int PartMemResizeBatch(unsigned char * d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB,
+	int count, int* vleft, int * vtop, int* vright, int* vbottom, int *dst_w, int *dst_h,
+	float submeanb, float submeang, float submeanr,
+	float varianceb, float varianceg, float variancer)
+{
+	//g_os << "cudaMemcpyHostToDevice begin 9" << std::endl;
+	cudaError_t cudaStatus = cuda_common::PartMemResizeBatch(
+		d_srcRGB, src_width, src_height, d_dstRGB, count, vleft, vtop, vright, vbottom, dst_w, dst_h,
+		submeanb, submeang, submeanr,
+		varianceb, varianceg, variancer);
+	//g_os << "cudaMemcpyHostToDevice end 9" << std::endl;
+	if (cudaStatus != cudaSuccess) {
+		LOG_ERROR("cuda_common::PartMemResizeBatch failed: {}",cudaGetErrorString(cudaStatus));
+		return -1;
+	}
+
+	return 0;
+}
+
+
+//int PartMemResizeBatch(float * d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, 
+//	int count, int* vleft, int * vtop, int* vright, int* vbottom, int dst_w, int dst_h,
+//	float submeanb, float submeang, float submeanr,
+//	float varianceb, float varianceg, float variancer)
+//
+//{
+//	cudaError_t cudaStatus = cuda_common::PartMemResizeBatch(
+//		d_srcRGB, src_width, src_height, d_dstRGB, count, vleft, vtop, vright, vbottom, dst_w, dst_h,
+//		submeanb, submeang, submeanr,
+//		varianceb, varianceg, variancer);
+//	if (cudaStatus != cudaSuccess) {
+//		fprintf(stderr, "cuda_common::PartMemCopy failed: %s\n", cudaGetErrorString(cudaStatus));
+//		return -1;
+//	}
+//
+//	return 0;
+//}
\ No newline at end of file
diff --git a/src/nvdecoder/ImageSaveGPU.h b/src/nvdecoder/ImageSaveGPU.h
new file mode 100644
index 0000000..272a6d2
--- /dev/null
+++ b/src/nvdecoder/ImageSaveGPU.h
@@ -0,0 +1,65 @@
+/*******************************************************************************************
+* Version: VPT_x64_V2.0.0_20170904
+* CopyRight: 中科院自动化研究所模式识别实验室图像视频组
+* UpdateDate: 20170904
+* Content: 人车物监测跟踪
+********************************************************************************************/
+
+#ifndef IMAGESAVEGPU_H_
+#define IMAGESAVEGPU_H_
+
+#ifdef _MSC_VER
+	#ifdef IMAGESAVEGPU_EXPORTS
+		#define IMAGESAVEGPU_API __declspec(dllexport)
+	#else
+		#define IMAGESAVEGPU_API __declspec(dllimport)
+	#endif
+#else
+#define IMAGESAVEGPU_API __attribute__((visibility ("default")))
+#endif
+// 功能：保存成jpeg文件
+// szOutputFile		输出图片路径，如D:\\out.jpg
+// d_srcRGB			输入RGB数据，由cudaMalloc分配的显存空间，数据排列形式为：BBBBBB......GGGGGG......RRRRRRRR......
+// img_width		RGB数据图片的宽度
+// img_height		RGB数据图片的高度
+//
+//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height);
+//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, float* d_srcRGB);
+//
+//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height);
+//IMAGESAVEGPU_API int saveJPEG(const char *szOutputFile, unsigned char* d_srcRGB);
+
+// 功能：防缩图像
+IMAGESAVEGPU_API int resizeFrame(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height);
+
+// 功能：部分拷贝数据
+IMAGESAVEGPU_API int partMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom);
+
+//IMAGESAVEGPU_API int partMemResizeImage(float * d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB,
+//	int* vleft, int * vtop, int* vright, int* vbottom, int *dst_w, int *dst_h,
+//	float submeanb, float submeang, float submeanr,
+//	float varianceb, float varianceg, float variancer);
+
+
+IMAGESAVEGPU_API int PartMemResizeBatch(unsigned char * d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB,
+	int count, int* vleft, int * vtop, int* vright, int* vbottom, int *dst_w, int *dst_h,
+	float submeanb, float submeang, float submeanr,
+	float varianceb, float varianceg, float variancer);
+
+
+//// 功能：初始化GPU保存图像的各种量化表
+//IMAGESAVEGPU_API int initTables();
+//IMAGESAVEGPU_API int initTables(int falg, int width, int height);
+//
+//// 功能：释放资源
+//IMAGESAVEGPU_API int releaseJpegSaver();
+
+// 功能：在GPU中绘制快照包围框
+IMAGESAVEGPU_API int drawImageOnGPU(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom);
+
+IMAGESAVEGPU_API int drawImageOnGPU(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom);
+
+// 功能：在GPU中绘制直线
+IMAGESAVEGPU_API int drawLineOnGPU(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y);
+
+#endif
diff --git a/src/nvdecoder/NV12ToRGB.cu b/src/nvdecoder/NV12ToRGB.cu
new file mode 100644
index 0000000..58e1dff
--- /dev/null
+++ b/src/nvdecoder/NV12ToRGB.cu
@@ -0,0 +1,345 @@
+
+#include "cuda_kernels.h"
+
+#include <builtin_types.h>
+#include "common/inc/helper_cuda_drvapi.h"
+
+typedef unsigned char   uint8;
+typedef unsigned int    uint32;
+typedef int             int32;
+
+#define COLOR_COMPONENT_MASK            0x3FF
+#define COLOR_COMPONENT_BIT_SIZE        10
+
+namespace cuda_common
+{
+
+#define MUL(x,y)    ((x)*(y))
+
+	__constant__ float  constHueColorSpaceMat2[9];  //默认分配到0卡上，未找到分配到指定卡上设置方法，当前也未用到，先注释掉
+
+	__device__ void YUV2RGB2(uint32 *yuvi, float *red, float *green, float *blue)
+	{
+		float luma, chromaCb, chromaCr;
+
+		// Prepare for hue adjustment
+		luma = (float)yuvi[0];
+		chromaCb = (float)((int32)yuvi[1] - 512.0f);
+		chromaCr = (float)((int32)yuvi[2] - 512.0f);
+
+
+		// Convert YUV To RGB with hue adjustment
+		*red = MUL(luma, constHueColorSpaceMat2[0]) +
+			MUL(chromaCb, constHueColorSpaceMat2[1]) +
+			MUL(chromaCr, constHueColorSpaceMat2[2]);
+		*green = MUL(luma, constHueColorSpaceMat2[3]) +
+			MUL(chromaCb, constHueColorSpaceMat2[4]) +
+			MUL(chromaCr, constHueColorSpaceMat2[5]);
+		*blue = MUL(luma, constHueColorSpaceMat2[6]) +
+			MUL(chromaCb, constHueColorSpaceMat2[7]) +
+			MUL(chromaCr, constHueColorSpaceMat2[8]);
+
+	}
+
+	__device__ unsigned char clip_v(int x, int min_val, int  max_val) {
+		if (x>max_val) {
+			return max_val;
+		}
+		else if (x<min_val) {
+			return min_val;
+		}
+		else {
+			return x;
+		}
+	}
+	// CUDA kernel for outputing the final RGB output from NV12;
+	extern "C"
+		__global__ void NV12ToRGB_drvapi2(uint32 *srcImage, size_t nSourcePitch, unsigned char *dstImage, int width, int height)
+	{
+
+		int32 x, y;
+		uint32 yuv101010Pel[2];
+		uint32 processingPitch = ((width)+63) & ~63;
+		uint8 *srcImageU8 = (uint8 *)srcImage;
+
+		processingPitch = nSourcePitch;
+
+		// Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
+		x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1);
+		y = blockIdx.y *  blockDim.y + threadIdx.y;
+
+		if (x >= width)
+		{
+			//printf("x >= width\n");
+			//*flag = -1;
+			return; //x = width - 1;
+		}
+			//return; //x = width - 1;
+
+		if (y >= height)
+		{
+			//printf("y >= height\n");
+			//*flag = -1;
+			return; // y = height - 1;
+		}
+
+		// Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way.
+		// if we move to texture we could read 4 luminance values
+		yuv101010Pel[0] = (srcImageU8[y * processingPitch + x]) << 2;
+		yuv101010Pel[1] = (srcImageU8[y * processingPitch + x + 1]) << 2;
+
+		uint32 chromaOffset = processingPitch * height;
+		int32 y_chroma = y >> 1;
+
+		if (y & 1)  // odd scanline ?
+		{
+			uint32 chromaCb;
+			uint32 chromaCr;
+
+			chromaCb = srcImageU8[chromaOffset + y_chroma * processingPitch + x];
+			chromaCr = srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1];
+
+			if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically
+			{
+				chromaCb = (chromaCb + srcImageU8[chromaOffset + (y_chroma + 1) * processingPitch + x] + 1) >> 1;
+				chromaCr = (chromaCr + srcImageU8[chromaOffset + (y_chroma + 1) * processingPitch + x + 1] + 1) >> 1;
+			}
+
+			yuv101010Pel[0] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2));
+			yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
+
+			yuv101010Pel[1] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2));
+			yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
+		}
+		else
+		{
+			yuv101010Pel[0] |= ((uint32)srcImageU8[chromaOffset + y_chroma * processingPitch + x] << (COLOR_COMPONENT_BIT_SIZE + 2));
+			yuv101010Pel[0] |= ((uint32)srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
+
+			yuv101010Pel[1] |= ((uint32)srcImageU8[chromaOffset + y_chroma * processingPitch + x] << (COLOR_COMPONENT_BIT_SIZE + 2));
+			yuv101010Pel[1] |= ((uint32)srcImageU8[chromaOffset + y_chroma * processingPitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
+		}
+
+		// this steps performs the color conversion
+		uint32 yuvi[6];
+		float red[2], green[2], blue[2];
+
+		yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK);
+		yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE)       & COLOR_COMPONENT_MASK);
+		yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
+
+		yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK);
+		yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE)       & COLOR_COMPONENT_MASK);
+		yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
+
+		// YUV to RGB Transformation conversion
+		YUV2RGB2(&yuvi[0], &red[0], &green[0], &blue[0]);
+		YUV2RGB2(&yuvi[3], &red[1], &green[1], &blue[1]);
+
+
+		dstImage[y * width * 3 + x * 3] = clip_v(blue[0] * 0.25,0 ,255);
+		dstImage[y * width * 3 + x * 3 + 3] = clip_v(blue[1] * 0.25,0, 255);
+
+		dstImage[width * y * 3 + x * 3 + 1] = clip_v(green[0] * 0.25,0 ,255);
+		dstImage[width * y * 3 + x * 3 + 4] = clip_v(green[1] * 0.25,0, 255);
+
+		dstImage[width * y * 3 + x * 3 + 2] = clip_v(red[0] * 0.25, 0, 255);
+		dstImage[width * y * 3 + x * 3 + 5] = clip_v(red[1] * 0.25,0 ,255);
+
+
+		//dstImage[y * width * 3 + x * 3] = blue[0] * 0.25;
+		//dstImage[y * width * 3 + x * 3 + 3] = blue[1] * 0.25;
+
+		//dstImage[width * y * 3 + x * 3 + 1] =green[0] * 0.25;
+		//dstImage[width * y * 3 + x * 3 + 4] = green[1] * 0.25;
+
+		//dstImage[width * y * 3 + x * 3 + 2] = red[0] * 0.25;
+		//dstImage[width * y * 3 + x * 3 + 5] = red[1] * 0.25;
+
+		// Clamp the results to BBBBBB....GGGGGGG.......RRRRRRR....
+		//              dstImage[y * width + x] = blue[0] * 0.25;
+		//              dstImage[y * width + x + 1] = blue[1] * 0.25;
+
+		//              dstImage[width * height + y * width + x] = green[0] * 0.25;
+		//              dstImage[width * height + y * width + x + 1] = green[1] * 0.25;
+
+		//              dstImage[width * height * 2 + y * width + x] = red[0] * 0.25;
+		//              dstImage[width * height * 2 + y * width + x + 1] = red[1] * 0.25;
+		return;
+
+	}
+
+		// CUDA kernel for outputing the final RGB output from NV12;
+	extern "C"
+		__global__ void CUDAToBGR_drvapi(uint32 *dataY, uint32 *dataUV, size_t pitchY, size_t pitchUV, unsigned char *dstImage, int width, int height)
+	{
+
+		int32 x, y;
+
+		// Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
+		x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1);
+		y = blockIdx.y *  blockDim.y + threadIdx.y;
+
+		if (x >= width)
+		{
+			return; 
+		}
+
+		if (y >= height)
+		{
+			return; 
+		}
+
+		uint32 yuv101010Pel[2];
+		uint8 *srcImageU8_Y = (uint8 *)dataY;
+		uint8 *srcImageU8_UV = (uint8 *)dataUV;
+
+		// Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way.
+		// if we move to texture we could read 4 luminance values
+		yuv101010Pel[0] = (srcImageU8_Y[y * pitchY + x]) << 2;
+		yuv101010Pel[1] = (srcImageU8_Y[y * pitchY + x + 1]) << 2;
+
+		int32 y_chroma = y >> 1;
+
+		if (y & 1)  // odd scanline ?
+		{
+			uint32 chromaCb;
+			uint32 chromaCr;
+
+			chromaCb = srcImageU8_UV[y_chroma * pitchUV + x];
+			chromaCr = srcImageU8_UV[y_chroma * pitchUV + x + 1];
+
+			if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically
+			{
+				chromaCb = (chromaCb + srcImageU8_UV[(y_chroma + 1) * pitchUV + x] + 1) >> 1;
+				chromaCr = (chromaCr + srcImageU8_UV[(y_chroma + 1) * pitchUV + x + 1] + 1) >> 1;
+			}
+
+			yuv101010Pel[0] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2));
+			yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
+
+			yuv101010Pel[1] |= (chromaCb << (COLOR_COMPONENT_BIT_SIZE + 2));
+			yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
+		}
+		else
+		{
+			yuv101010Pel[0] |= ((uint32)srcImageU8_UV[y_chroma * pitchUV + x] << (COLOR_COMPONENT_BIT_SIZE + 2));
+			yuv101010Pel[0] |= ((uint32)srcImageU8_UV[y_chroma * pitchUV + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
+
+			yuv101010Pel[1] |= ((uint32)srcImageU8_UV[y_chroma * pitchUV + x] << (COLOR_COMPONENT_BIT_SIZE + 2));
+			yuv101010Pel[1] |= ((uint32)srcImageU8_UV[y_chroma * pitchUV + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
+		}
+
+		// this steps performs the color conversion
+		uint32 yuvi[6];
+		float red[2], green[2], blue[2];
+
+		yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK);
+		yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE)       & COLOR_COMPONENT_MASK);
+		yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
+
+		yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK);
+		yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE)       & COLOR_COMPONENT_MASK);
+		yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
+
+		// YUV to RGB Transformation conversion
+		YUV2RGB2(&yuvi[0], &red[0], &green[0], &blue[0]);
+		YUV2RGB2(&yuvi[3], &red[1], &green[1], &blue[1]);
+
+
+		dstImage[y * width * 3 + x * 3] = clip_v(blue[0] * 0.25,0 ,255);
+		dstImage[y * width * 3 + x * 3 + 3] = clip_v(blue[1] * 0.25,0, 255);
+
+		dstImage[width * y * 3 + x * 3 + 1] = clip_v(green[0] * 0.25,0 ,255);
+		dstImage[width * y * 3 + x * 3 + 4] = clip_v(green[1] * 0.25,0, 255);
+
+		dstImage[width * y * 3 + x * 3 + 2] = clip_v(red[0] * 0.25, 0, 255);
+		dstImage[width * y * 3 + x * 3 + 5] = clip_v(red[1] * 0.25,0 ,255);
+	}
+
+	cudaError_t setColorSpace(FF_ColorSpace CSC, float hue)
+	{
+		float hueSin = sin(hue);
+		float hueCos = cos(hue);
+
+		float hueCSC[9];
+		if (CSC == ITU_601)
+		{
+			//CCIR 601
+			hueCSC[0] = 1.1644f;
+			hueCSC[1] = hueSin * 1.5960f;
+			hueCSC[2] = hueCos * 1.5960f;
+			hueCSC[3] = 1.1644f;
+			hueCSC[4] = (hueCos * -0.3918f) - (hueSin * 0.8130f);
+			hueCSC[5] = (hueSin *  0.3918f) - (hueCos * 0.8130f);
+			hueCSC[6] = 1.1644f;
+			hueCSC[7] = hueCos *  2.0172f;
+			hueCSC[8] = hueSin * -2.0172f;
+		}
+		else if (CSC == ITU_709)
+		{
+			//CCIR 709
+			hueCSC[0] = 1.0f;
+			hueCSC[1] = hueSin * 1.57480f;
+			hueCSC[2] = hueCos * 1.57480f;
+			hueCSC[3] = 1.0;
+			hueCSC[4] = (hueCos * -0.18732f) - (hueSin * 0.46812f);
+			hueCSC[5] = (hueSin *  0.18732f) - (hueCos * 0.46812f);
+			hueCSC[6] = 1.0f;
+			hueCSC[7] = hueCos *  1.85560f;
+			hueCSC[8] = hueSin * -1.85560f;
+		}
+
+		cudaError_t cudaStatus = cudaMemcpyToSymbol(constHueColorSpaceMat2, hueCSC, 9 * sizeof(float), 0, cudaMemcpyHostToDevice);
+		float tmpf[9];
+		memset(tmpf, 0, 9 * sizeof(float));
+		cudaMemcpyFromSymbol(tmpf, constHueColorSpaceMat2, 9 * sizeof(float), 0, ::cudaMemcpyDefault);
+		cudaDeviceSynchronize();
+
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "cudaMemcpyToSymbol failed: %s\n", cudaGetErrorString(cudaStatus));
+		}
+
+		return cudaStatus;
+	}
+
+	cudaError_t NV12ToRGBnot(CUdeviceptr d_srcNV12, size_t nSourcePitch, unsigned char* d_dstRGB, int width, int height)
+	{
+		dim3 block(32, 16, 1);
+		dim3 grid((width + (2 * block.x - 1)) / (2 * block.x), (height + (block.y - 1)) / block.y, 1);
+		NV12ToRGB_drvapi2 << < grid, block >> >((uint32 *)d_srcNV12, nSourcePitch, d_dstRGB, width, height);
+		cudaError_t cudaStatus = cudaGetLastError();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "NV12ToRGB_drvapi launch failed: %s\n", cudaGetErrorString(cudaStatus));
+			return cudaStatus;
+		}
+
+		cudaStatus = cudaDeviceSynchronize();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching NV12ToRGB_drvapi !\n", cudaStatus);
+			return cudaStatus;
+		}
+
+		return cudaStatus;
+	}
+
+	cudaError_t CUDAToBGR(CUdeviceptr dataY, CUdeviceptr dataUV, size_t pitchY, size_t pitchUV, unsigned char* d_dstRGB, int width, int height)
+	{
+		dim3 block(32, 16, 1);
+		dim3 grid((width + (2 * block.x - 1)) / (2 * block.x), (height + (block.y - 1)) / block.y, 1);
+		CUDAToBGR_drvapi << < grid, block >> >((uint32 *)dataY, (uint32 *)dataUV, pitchY, pitchUV, d_dstRGB, width, height);
+		cudaError_t cudaStatus = cudaGetLastError();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "NV12ToRGB_drvapi launch failed: %s\n", cudaGetErrorString(cudaStatus));
+			return cudaStatus;
+		}
+
+		cudaStatus = cudaDeviceSynchronize();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching NV12ToRGB_drvapi !\n", cudaStatus);
+			return cudaStatus;
+		}
+
+		return cudaStatus;
+	}
+}
\ No newline at end of file
diff --git a/src/nvdecoder/NvJpegEncoder.cpp b/src/nvdecoder/NvJpegEncoder.cpp
new file mode 100644
index 0000000..7ee0727
--- /dev/null
+++ b/src/nvdecoder/NvJpegEncoder.cpp
@@ -0,0 +1,90 @@
+#include "NvJpegEncoder.h"
+
+#include <fstream>
+#include <vector>
+#include <iostream>
+
+
+#define CHECK_NVJPEG(S) do {nvjpegStatus_t  status; \
+        status = S; \
+        if (status != NVJPEG_STATUS_SUCCESS ) std::cout << __LINE__ <<" CHECK_NVJPEG - status = " << status << std::endl; \
+        } while (false)
+
+
+int saveJpeg(const char * filepath, unsigned char* d_srcBGR, int width, int height, cudaStream_t stream)
+{
+    nvjpegHandle_t nvjpeg_handle;
+    nvjpegEncoderState_t encoder_state;
+    nvjpegEncoderParams_t encoder_params;
+
+    cudaEvent_t ev_start, ev_end;
+    cudaEventCreate(&ev_start);
+    cudaEventCreate(&ev_end);
+
+    nvjpegImage_t input;
+    nvjpegInputFormat_t input_format = NVJPEG_INPUT_BGRI;
+    int image_width = width;
+    int image_height = height;
+
+    // int channel_size = image_width * image_height;
+    // for (int i = 0; i < 3; i++)
+    // {
+    //     input.pitch[i] = image_width;
+    //     (cudaMalloc((void**)&(input.channel[i]), channel_size));
+    //     (cudaMemset(input.channel[i], 50 * 40 * i, channel_size));
+    // }
+
+    input.channel[0] = d_srcBGR;
+    input.pitch[0] = image_width * 3;
+
+    nvjpegBackend_t backend = NVJPEG_BACKEND_DEFAULT;
+
+    CHECK_NVJPEG(nvjpegCreate(backend, nullptr, &nvjpeg_handle));
+    
+    CHECK_NVJPEG(nvjpegEncoderParamsCreate(nvjpeg_handle, &encoder_params, stream));
+    CHECK_NVJPEG(nvjpegEncoderStateCreate(nvjpeg_handle, &encoder_state, stream));
+
+    // set params
+    CHECK_NVJPEG(nvjpegEncoderParamsSetEncoding(encoder_params, nvjpegJpegEncoding_t::NVJPEG_ENCODING_PROGRESSIVE_DCT_HUFFMAN, stream));
+    CHECK_NVJPEG(nvjpegEncoderParamsSetOptimizedHuffman(encoder_params, 1, stream));
+    CHECK_NVJPEG(nvjpegEncoderParamsSetQuality(encoder_params, 70, stream));
+    CHECK_NVJPEG(nvjpegEncoderParamsSetSamplingFactors(encoder_params, nvjpegChromaSubsampling_t::NVJPEG_CSS_420, stream));
+
+    cudaEventRecord(ev_start);
+    CHECK_NVJPEG(nvjpegEncodeImage(nvjpeg_handle, encoder_state, encoder_params, &input, input_format, image_width, image_height, stream));
+    cudaEventRecord(ev_end);
+
+    std::vector<unsigned char> obuffer;
+    size_t length;
+    CHECK_NVJPEG(nvjpegEncodeRetrieveBitstream(
+        nvjpeg_handle,
+        encoder_state,
+        NULL,
+        &length,
+        stream));
+
+    obuffer.resize(length);
+    CHECK_NVJPEG(nvjpegEncodeRetrieveBitstream(
+        nvjpeg_handle,
+        encoder_state,
+        obuffer.data(),
+        &length,
+        stream));
+
+    cudaEventSynchronize(ev_end);
+
+    // 用完销毁，避免显存泄露
+    nvjpegEncoderParamsDestroy(encoder_params);
+    nvjpegEncoderStateDestroy(encoder_state);
+    nvjpegDestroy(nvjpeg_handle);
+
+    float ms;
+    cudaEventElapsedTime(&ms, ev_start, ev_end);
+    // std::cout << "time spend " << ms << " ms" << std::endl;
+
+    std::ofstream outputFile(filepath, std::ios::out | std::ios::binary);
+    outputFile.write(reinterpret_cast<const char *>(obuffer.data()), static_cast<int>(length));
+    outputFile.close();
+    
+    return 0;
+}
\ No newline at end of file
diff --git a/src/nvdecoder/NvJpegEncoder.h b/src/nvdecoder/NvJpegEncoder.h
new file mode 100644
index 0000000..3c27ba8
--- /dev/null
+++ b/src/nvdecoder/NvJpegEncoder.h
@@ -0,0 +1,3 @@
+#include <nvjpeg.h>
+
+int saveJpeg(const char * filepath, unsigned char* d_srcBGR, int width, int height, cudaStream_t stream);
\ No newline at end of file
diff --git a/src/nvdecoder/PartMemCopy.cu b/src/nvdecoder/PartMemCopy.cu
new file mode 100644
index 0000000..396765b
--- /dev/null
+++ b/src/nvdecoder/PartMemCopy.cu
@@ -0,0 +1,289 @@
+#include "cuda_kernels.h"
+#include <algorithm>
+typedef unsigned char   uchar;
+typedef unsigned int    uint32;
+typedef int             int32;
+
+#define MAX_SNAPSHOT_WIDTH 320
+#define MAX_SNAPSHOT_HEIGHT 320
+
+namespace cuda_common
+{
+	__global__ void kernel_memcopy(unsigned char* d_srcRGB, int src_width, int src_height,
+		unsigned char* d_dstRGB, int left, int top, int right, int bottom)
+	{
+		const int dst_x = blockIdx.x * blockDim.x + threadIdx.x;
+		const int dst_y = blockIdx.y * blockDim.y + threadIdx.y;
+		const int dst_width = right - left;
+		const int dst_height = bottom - top;
+		if (dst_x < dst_width && dst_y < dst_height)
+		{
+			int src_x = left + dst_x;
+			int src_y = top + dst_y;
+
+			//bgr...bgr...bgr...
+			d_dstRGB[(dst_y*dst_width + dst_x) * 3] = (unsigned char)d_srcRGB[(src_y*src_width + src_x) * 3];
+			d_dstRGB[(dst_y*dst_width + dst_x)
+				* 3 + 1] = (unsigned char)d_srcRGB[(src_y*src_width + src_x) * 3 + 1];
+			d_dstRGB[(dst_y*dst_width + dst_x) * 3 + 2] = (unsigned char)d_srcRGB[(src_y*src_width + src_x) * 3 + 2];
+
+			//bbb...ggg...rrr...
+			//d_dstRGB[(dst_y*dst_width) + dst_x] = (unsigned char)d_srcRGB[(src_y*src_width) + src_x];
+			//d_dstRGB[(dst_width*dst_height) + (dst_y*dst_width) + dst_x] = (unsigned char)d_srcRGB[(src_width*src_height) + (src_y*src_width) + src_x];
+			//d_dstRGB[(2 * dst_width*dst_height) + (dst_y*dst_width) + dst_x] = (unsigned char)d_srcRGB[(2 * src_width*src_height) + (src_y*src_width) + src_x];
+
+			/*	memcpy(d_dstRGB + (dst_y*src_width) + dst_x, d_srcRGB + (src_y*src_width) + src_x, sizeof(float));
+			memcpy(d_dstRGB + (src_width*src_height) + (dst_y*src_width) + dst_x, d_srcRGB + (src_width*src_height) + (src_y*src_width) + src_x, sizeof(float));
+			memcpy(d_dstRGB + (2 * src_width*src_height) + (dst_y*src_width) + dst_x, d_srcRGB + (2 * src_width*src_height) + (src_y*src_width) + src_x, sizeof(float));*/
+		}
+	}
+
+	cudaError_t PartMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom)
+	{
+		dim3 block(32, 16, 1);
+		dim3 grid(((right - left) + (block.x - 1)) / block.x, ((bottom - top) + (block.y - 1)) / block.y, 1);
+
+		kernel_memcopy << < grid, block >> > (d_srcRGB, src_width, src_height, d_dstRGB, left, top, right, bottom);
+
+		cudaError_t cudaStatus = cudaGetLastError();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "Part 50 kernel_memcopy launch failed: %s\n", cudaGetErrorString(cudaStatus));
+			return cudaStatus;
+		}
+		cudaStatus = cudaDeviceSynchronize();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_bilinear!\n", cudaStatus);
+			return cudaStatus;
+		}
+		return cudaStatus;
+	}
+
+
+	//    __global__ void kernel_memcopy_mean_variance(float* d_srcRGB, int src_width, int src_height, 
+	//            unsigned char* vd_dstRGB, int count, int * vleft, int* vtop, int* vright, int * vbottom, float submeanb,float submeang, float submeanr, float varianceb,float varianceg, float variancer)
+	//    {
+	//        const int dst_x = blockIdx.x * blockDim.x + threadIdx.x;
+	//        const int dst_y = blockIdx.y * blockDim.y + threadIdx.y;
+	//        for (int i=0;i<count;i++)
+	//        {
+	//                const int left = vleft[i];
+	//                const int right = vright[i];
+	//                const int top = vtop[i];
+	//                const int bottom = vbottom[i];
+	//        
+	//                const int dst_width = right - left;
+	//                const int dst_height = bottom - top;
+	//
+	//
+	//                unsigned char * d_dstRGB = vd_dstRGB + i *   ;
+	//
+	//                if (dst_x < dst_width && dst_y < dst_height)
+	//                {
+	//                    int src_x = left + dst_x;
+	//                    int src_y = top + dst_y;
+	//        
+	//                    d_dstRGB[(dst_y*dst_width) + dst_x] = (d_srcRGB[(src_y*src_width) + src_x] - submeanb)*varianceb;
+	//                    d_dstRGB[(dst_width*dst_height) + (dst_y*dst_width) + dst_x] = (d_srcRGB[(src_width*src_height) + (src_y*src_width) + src_x] -submeang)*varianceg;
+	//                    d_dstRGB[(2 * dst_width*dst_height) + (dst_y*dst_width) + dst_x] = (d_srcRGB[(2 * src_width*src_height) + (src_y*src_width) + src_x] - submeanr) * variancer;
+	//        
+	//                }
+	//        }
+	//    }
+	__global__ void PartCopy_ResizeImgBilinearBGR_Mean_Variance_CUDAKernel(
+		unsigned char * d_srcRGB, int srcimg_width, int srcimg_height,
+		int* vleft, int* vtop, int* vright, int * vbottom,
+		unsigned char** vd_dstRGB, int count, int *dst_width, int *dst_height,
+		float submeanb, float submeang, float submeanr,
+		float varianceb, float varianceg, float variancer)
+	{
+		int i = blockIdx.z;
+
+		//for (int i = 0; i<count; i++)
+		{
+			const int left = vleft[i];
+			const int right = vright[i];
+			const int top = vtop[i];
+			const int bottom = vbottom[i];
+			const int cur_dst_width = dst_width[i];
+			const int cur_dst_height = dst_height[i];
+
+			unsigned char* d_dstRGB =  vd_dstRGB[i];
+
+			const int src_width = right - left;
+			const int src_height = bottom - top;
+			const int x = blockIdx.x * blockDim.x + threadIdx.x;// + left;
+			const int y = blockIdx.y * blockDim.y + threadIdx.y;//+ top;
+			const int dst_x = blockIdx.x * blockDim.x + threadIdx.x;
+			const int dst_y = blockIdx.y * blockDim.y + threadIdx.y;
+
+			/*if (dst_x == 0 && dst_y == 0)
+				printf("%d %d %d %d %d\n", i, vleft[i], vright[i], cur_dst_width, cur_dst_height);*/
+
+			unsigned char * src_img = d_srcRGB;
+			unsigned char * dst_img = d_dstRGB;
+			if (dst_x < cur_dst_width && dst_y < cur_dst_height)
+			{
+				float fx = (x + 0.5)*src_width / (float)cur_dst_width - 0.5 + left;
+				float fy = (y + 0.5)*src_height / (float)cur_dst_height - 0.5 + top;
+				int ax = floor(fx);
+				int ay = floor(fy);
+				if (ax < 0)
+				{
+					ax = 0;
+				}
+				if (ax > srcimg_width - 2)
+				{
+					ax = srcimg_width - 2;
+				}
+				if (ay < 0) {
+					ay = 0;
+				}
+				if (ay > srcimg_height - 2)
+				{
+					ay = srcimg_height - 2;
+				}
+
+				int A = ax + ay*srcimg_width;
+				int B = ax + ay*srcimg_width + 1;
+				int C = ax + ay*srcimg_width + srcimg_width;
+				int D = ax + ay*srcimg_width + srcimg_width + 1;
+
+				float w1, w2, w3, w4;
+				w1 = fx - ax;
+				w2 = 1 - w1;
+				w3 = fy - ay;
+				w4 = 1 - w3;
+				float blue = src_img[A * 3] * w2*w4 + src_img[B * 3] * w1*w4 + src_img[C * 3] * w2*w3 + src_img[D * 3] * w1*w3;
+				float green = src_img[A * 3 + 1] * w2*w4 + src_img[B * 3 + 1] * w1*w4
+					+ src_img[C * 3 + 1] * w2*w3 + src_img[D * 3 + 1] * w1*w3;
+				float red = src_img[A * 3 + 2] * w2*w4 + src_img[B * 3 + 2] * w1*w4
+					+ src_img[C * 3 + 2] * w2*w3 + src_img[D * 3 + 2] * w1*w3;
+
+				/*dst_img[(dst_y * dst_width + dst_x) * 3] = (unsigned char)(blue - submeanb)*varianceb;
+				dst_img[(dst_y * dst_width + dst_x) * 3 + 1] =(unsigned char) (green - submeang)*varianceg;
+				dst_img[(dst_y * dst_width + dst_x) * 3 + 2] = (unsigned char) (red - submeanr)*variancer;*/
+
+				if (blue < 0)
+					blue = 0;
+				else if (blue > 255)
+					blue = 255;
+
+				if (green < 0)
+					green = 0;
+				else if (green > 255)
+					green = 255;
+
+				if (red < 0)
+					red = 0;
+				else if (red > 255)
+					red = 255;
+
+				dst_img[(dst_y * cur_dst_width + dst_x) * 3] = (unsigned char)blue;
+				dst_img[(dst_y * cur_dst_width + dst_x) * 3 + 1] = (unsigned char)green;
+				dst_img[(dst_y * cur_dst_width + dst_x) * 3 + 2] = (unsigned char)red;
+
+
+				/*if (src_img[(dst_y * dst_width + dst_x) * 3] < 0)
+					src_img[(dst_y * dst_width + dst_x) * 3] = 0;
+				else if (src_img[(dst_y * dst_width + dst_x) * 3] > 255)
+					src_img[(dst_y * dst_width + dst_x) * 3] = 255;
+
+				if (src_img[(dst_y * dst_width + dst_x) * 3 + 1] < 0)
+					src_img[(dst_y * dst_width + dst_x) * 3 + 1] = 0;
+				else if (src_img[(dst_y * dst_width + dst_x) * 3 + 1] > 255)
+					src_img[(dst_y * dst_width + dst_x) * 3 + 1] = 255;
+
+				if (src_img[(dst_y * dst_width + dst_x) * 3 + 2] < 0)
+					src_img[(dst_y * dst_width + dst_x) * 3 + 2] = 0;
+				else if (src_img[(dst_y * dst_width + dst_x) * 3 + 2] > 255)
+					src_img[(dst_y * dst_width + dst_x) * 3 + 2] = 255;
+
+
+				dst_img[(dst_y * dst_width + dst_x) * 3] = (unsigned char)src_img[(dst_y * dst_width + dst_x) * 3];
+				dst_img[(dst_y * dst_width + dst_x) * 3 + 1] = (unsigned char)src_img[(dst_y * dst_width + dst_x) * 3 + 1];
+				dst_img[(dst_y * dst_width + dst_x) * 3 + 2] = (unsigned char)src_img[(dst_y * dst_width + dst_x) * 3 + 2];*/
+			}
+		}
+	}
+
+	cudaError_t PartMemResizeBatch(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char** d_dstRGB, int count, int* left, int* top, int* right, int* bottom, int *dst_w, int *dst_h, float submeanb, float submeang, float submeanr,
+		float varianceb, float varianceg, float variancer)
+	{
+	/*	cudaEvent_t start, stop;
+		float time;
+		cudaEventCreate(&start);
+		cudaEventCreate(&stop);
+		cudaEventRecord(start, 0);*/
+
+		dim3 block(32, 16, 1);
+		dim3 grid((*std::max_element(dst_w, dst_w+ count) + (block.x - 1)) / block.x, (*std::max_element(dst_h, dst_h + count) + (block.y - 1)) / block.y, count);
+
+		int * gpu_left;
+		cudaMalloc(&gpu_left, 1000 * sizeof(int));
+		cudaMemcpy(gpu_left, left, count * sizeof(int), cudaMemcpyHostToDevice);
+
+		int * gpu_right;
+		cudaMalloc(&gpu_right, 1000 * sizeof(int));
+		cudaMemcpy(gpu_right, right, count * sizeof(int), cudaMemcpyHostToDevice);
+
+		int * gpu_top;
+		cudaMalloc(&gpu_top, 1000 * sizeof(int));
+		cudaMemcpy(gpu_top, top, count * sizeof(int), cudaMemcpyHostToDevice);
+
+		int * gpu_bottom;
+		cudaMalloc(&gpu_bottom, 1000 * sizeof(int));
+		cudaMemcpy(gpu_bottom, bottom, count * sizeof(int), cudaMemcpyHostToDevice);
+
+		int * gpu_dst_w;
+		cudaMalloc(&gpu_dst_w, 1000 * sizeof(int));
+		cudaMemcpy(gpu_dst_w, dst_w, count * sizeof(int), cudaMemcpyHostToDevice);
+
+		int * gpu_dst_h;
+		cudaMalloc(&gpu_dst_h, 1000 * sizeof(int));
+		cudaMemcpy(gpu_dst_h, dst_h, count * sizeof(int), cudaMemcpyHostToDevice);
+
+		unsigned char** gpu_dst_rgb;
+		cudaMalloc(&gpu_dst_rgb, 1000 * sizeof(unsigned char*));
+		cudaMemcpy(gpu_dst_rgb, d_dstRGB, count * sizeof(unsigned char*), cudaMemcpyHostToDevice);
+
+		//cudaMemcpy(cpu_personfloat, d_srcRGB, 112*224*2*sizeof(float), cudaMemcpyDeviceToHost);
+		//            for(int i=0;i<100;i++)
+		//            {
+		//                  printf("the score is %f\t",cpu_personfloat[i]);
+		//            }
+		PartCopy_ResizeImgBilinearBGR_Mean_Variance_CUDAKernel << < grid, block >> > (
+			d_srcRGB, src_width, src_height,
+			gpu_left, gpu_top, gpu_right, gpu_bottom,
+			gpu_dst_rgb, count, gpu_dst_w, gpu_dst_h,
+			submeanb, submeang, submeanr,
+			varianceb, varianceg, variancer);
+		cudaFree(gpu_top);
+		cudaFree(gpu_bottom);
+		cudaFree(gpu_left);
+		cudaFree(gpu_right);
+		cudaFree(gpu_dst_w);
+		cudaFree(gpu_dst_h);
+		cudaFree(gpu_dst_rgb);
+	
+		cudaError_t cudaStatus = cudaGetLastError();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "Part 270 kernel_memcopy launch failed: %s\n", cudaGetErrorString(cudaStatus));
+			return cudaStatus;
+		}
+		cudaStatus = cudaDeviceSynchronize();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_bilinear!\n", cudaStatus);
+			return cudaStatus;
+		}
+
+		/*cudaEventRecord(stop, 0);
+		cudaEventSynchronize(stop);
+		cudaEventElapsedTime(&time, start, stop);
+		cudaEventDestroy(start);
+		cudaEventDestroy(stop);
+		printf("�˺�������ʱ��:%f\n", time);*/
+
+		return cudaStatus;
+	}
+
+}
\ No newline at end of file
diff --git a/src/nvdecoder/RGB2YUV.cu b/src/nvdecoder/RGB2YUV.cu
new file mode 100644
index 0000000..7202c3a
--- /dev/null
+++ b/src/nvdecoder/RGB2YUV.cu
@@ -0,0 +1,263 @@
+
+
+#include "cuda_kernels.h"
+
+typedef unsigned char   uint8;
+typedef unsigned int    uint32;
+typedef int             int32;
+
+namespace cuda_common
+{
+	__device__ unsigned char clip_value(unsigned char x, unsigned char min_val, unsigned char  max_val){
+		if (x>max_val){
+			return max_val;
+		}
+		else if (x<min_val){
+			return min_val;
+		}
+		else{
+			return x;
+		}
+	}
+
+	__global__ void kernel_rgb2yuv(unsigned char *src_img, unsigned char* Y, unsigned char* u, unsigned char* v,
+		int src_width, int src_height, size_t yPitch)
+	{
+		const int x = blockIdx.x * blockDim.x + threadIdx.x;
+		const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+		if (x >= src_width)
+			return; //x = width - 1;
+
+		if (y >= src_height)
+			return; // y = height - 1;
+		
+		int B = src_img[y * src_width * 3 + x * 3];
+		int G = src_img[y * src_width * 3 + x * 3 + 1];
+		int R = src_img[y * src_width * 3 + x * 3 + 2];
+
+		/*int B = src_img[y * src_width + x];
+		int G = src_img[src_width * src_height + y * src_width + x];
+		int R = src_img[src_width * src_height * 2 + y * src_width + x];*/
+
+		Y[y * yPitch + x] = clip_value((unsigned char)(0.299 * R + 0.587 * G + 0.114 * B), 0, 255);
+		u[y * src_width + x] = clip_value((unsigned char)(-0.147 * R - 0.289 * G + 0.436 * B + 128), 0, 255);
+		v[y * src_width + x] = clip_value((unsigned char)(0.615 * R - 0.515 * G - 0.100 * B + 128), 0, 255);
+
+		//Y[y * yPitch + x] = clip_value((unsigned char)(0.257 * R + 0.504 * G + 0.098 * B + 16), 0, 255);
+		//u[y * src_width + x] = clip_value((unsigned char)(-0.148 * R - 0.291 * G + 0.439 * B + 128), 0, 255);
+		//v[y * src_width + x] = clip_value((unsigned char)(0.439 * R - 0.368 * G - 0.071 * B + 128), 0, 255);
+	}
+
+	__global__ void kernel_rgb2yuv(float *src_img, unsigned char* Y, unsigned char* u, unsigned char* v,
+		int src_width, int src_height, size_t yPitch)
+	{
+		const int x = blockIdx.x * blockDim.x + threadIdx.x;
+		const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+		if (x >= src_width)
+			return; //x = width - 1;
+
+		if (y >= src_height)
+			return; // y = height - 1;
+
+		float B = src_img[y * src_width + x];
+		float G = src_img[src_width * src_height + y * src_width + x];
+		float R = src_img[src_width * src_height * 2 + y * src_width + x];
+
+		Y[y * yPitch + x] = clip_value((unsigned char)(0.299 * R + 0.587 * G + 0.114 * B), 0, 255);
+		u[y * src_width + x] = clip_value((unsigned char)(-0.147 * R - 0.289 * G + 0.436 * B + 128), 0, 255);
+		v[y * src_width + x] = clip_value((unsigned char)(0.615 * R - 0.515 * G - 0.100 * B + 128), 0, 255);
+
+		//Y[y * yPitch + x] = clip_value((unsigned char)(0.257 * R + 0.504 * G + 0.098 * B + 16), 0, 255);
+		//u[y * src_width + x] = clip_value((unsigned char)(-0.148 * R - 0.291 * G + 0.439 * B + 128), 0, 255);
+		//v[y * src_width + x] = clip_value((unsigned char)(0.439 * R - 0.368 * G - 0.071 * B + 128), 0, 255);
+	}
+
+	extern "C"
+	__global__ void kernel_resize_UV(unsigned char* src_img, unsigned char *dst_img,
+		int src_width, int src_height, int dst_width, int dst_height, int nPitch)
+	{
+		const int x = blockIdx.x * blockDim.x + threadIdx.x;
+		const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+		if (x >= dst_width)
+			return; //x = width - 1;
+
+		if (y >= dst_height)
+			return; // y = height - 1;
+
+		float fx = (x + 0.5)*src_width / (float)dst_width - 0.5;
+		float fy = (y + 0.5)*src_height / (float)dst_height - 0.5;
+		int ax = floor(fx);
+		int ay = floor(fy);
+		if (ax < 0)
+		{
+			ax = 0;
+		}
+		else if (ax > src_width - 2)
+		{
+			ax = src_width - 2;
+		}
+
+		if (ay < 0){
+			ay = 0;
+		}
+		else if (ay > src_height - 2)
+		{
+			ay = src_height - 2;
+		}
+
+		int A = ax + ay*src_width;
+		int B = ax + ay*src_width + 1;
+		int C = ax + ay*src_width + src_width;
+		int D = ax + ay*src_width + src_width + 1;
+
+		float w1, w2, w3, w4;
+		w1 = fx - ax;
+		w2 = 1 - w1;
+		w3 = fy - ay;
+		w4 = 1 - w3;
+
+		unsigned char val = src_img[A] * w2*w4 + src_img[B] * w1*w4 + src_img[C] * w2*w3 + src_img[D] * w1*w3;
+
+		dst_img[y * nPitch + x] = clip_value(val,0,255);
+	}
+
+	cudaError_t RGB2YUV(float* d_srcRGB, int src_width, int src_height,
+						unsigned char* Y, size_t yPitch, int yWidth, int yHeight,
+						unsigned char* U, size_t uPitch, int uWidth, int uHeight,
+						unsigned char* V, size_t vPitch, int vWidth, int vHeight)
+	{
+		unsigned char * u ;
+		unsigned char * v ;
+
+		cudaError_t cudaStatus;
+
+		cudaStatus = cudaMalloc((void**)&u, src_width * src_height * sizeof(unsigned char));
+		cudaStatus = cudaMalloc((void**)&v, src_width * src_height * sizeof(unsigned char));
+
+		dim3 block(32, 16, 1);
+		dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1);
+		dim3 grid1((uWidth + (block.x - 1)) / block.x, (uHeight + (block.y - 1)) / block.y, 1);
+		dim3 grid2((vWidth + (block.x - 1)) / block.x, (vHeight + (block.y - 1)) / block.y, 1);
+
+		kernel_rgb2yuv << < grid, block >> >(d_srcRGB, Y, u, v, src_width, src_height, yPitch);
+
+		cudaStatus = cudaGetLastError();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "kernel_rgb2yuv launch failed: %s\n", cudaGetErrorString(cudaStatus));
+			goto Error;
+		}
+
+		cudaStatus = cudaDeviceSynchronize();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_rgb2yuv!\n", cudaStatus);
+			goto Error;
+		}
+
+		kernel_resize_UV << < grid1, block >> >(u, U, src_width, src_height, uWidth, uHeight, uPitch);
+
+		cudaStatus = cudaGetLastError();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus));
+			goto Error;
+		}
+
+		cudaStatus = cudaDeviceSynchronize();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus);
+			goto Error;
+		}
+
+		kernel_resize_UV << < grid2, block >> >(v, V, src_width, src_height, vWidth, vHeight, vPitch);
+
+		cudaStatus = cudaGetLastError();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus));
+			goto Error;
+		}
+
+		cudaStatus = cudaDeviceSynchronize();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus);
+			goto Error;
+		}
+
+Error :
+		cudaFree(u);
+		cudaFree(v);
+
+		return cudaStatus;
+	}
+
+
+
+	cudaError_t RGB2YUV(unsigned char* d_srcRGB, int src_width, int src_height,
+		unsigned char* Y, size_t yPitch, int yWidth, int yHeight,
+		unsigned char* U, size_t uPitch, int uWidth, int uHeight,
+		unsigned char* V, size_t vPitch, int vWidth, int vHeight)
+	{
+		unsigned char * u;
+		unsigned char * v;
+
+		cudaError_t cudaStatus;
+
+		cudaStatus = cudaMalloc((void**)&u, src_width * src_height * sizeof(unsigned char));
+		cudaStatus = cudaMalloc((void**)&v, src_width * src_height * sizeof(unsigned char));
+
+		dim3 block(32, 16, 1);
+		dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1);
+		dim3 grid1((uWidth + (block.x - 1)) / block.x, (uHeight + (block.y - 1)) / block.y, 1);
+		dim3 grid2((vWidth + (block.x - 1)) / block.x, (vHeight + (block.y - 1)) / block.y, 1);
+
+		kernel_rgb2yuv << < grid, block >> >(d_srcRGB, Y, u, v, src_width, src_height, yPitch);
+
+		cudaStatus = cudaGetLastError();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "kernel_rgb2yuv launch failed: %s\n", cudaGetErrorString(cudaStatus));
+			goto Error;
+		}
+
+		cudaStatus = cudaDeviceSynchronize();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_rgb2yuv!\n", cudaStatus);
+			goto Error;
+		}
+
+		kernel_resize_UV << < grid1, block >> >(u, U, src_width, src_height, uWidth, uHeight, uPitch);
+
+		cudaStatus = cudaGetLastError();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus));
+			goto Error;
+		}
+
+		cudaStatus = cudaDeviceSynchronize();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus);
+			goto Error;
+		}
+
+		kernel_resize_UV << < grid2, block >> >(v, V, src_width, src_height, vWidth, vHeight, vPitch);
+
+		cudaStatus = cudaGetLastError();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "kernel_resize_UV launch failed: %s\n", cudaGetErrorString(cudaStatus));
+			goto Error;
+		}
+
+		cudaStatus = cudaDeviceSynchronize();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_resize_UV!\n", cudaStatus);
+			goto Error;
+		}
+
+	Error:
+		cudaFree(u);
+		cudaFree(v);
+
+		return cudaStatus;
+	}
+}
+
diff --git a/src/nvdecoder/ResizeImage.cu b/src/nvdecoder/ResizeImage.cu
new file mode 100644
index 0000000..fdc6961
--- /dev/null
+++ b/src/nvdecoder/ResizeImage.cu
@@ -0,0 +1,84 @@
+#include "cuda_kernels.h"
+
+typedef unsigned char   uchar;
+typedef unsigned int    uint32;
+typedef int             int32;
+
+namespace cuda_common
+{
+	__global__ void kernel_bilinear(float *src_img, float *dst_img,
+		int src_width, int src_height, int dst_width, int dst_height)
+	{
+		const int x = blockIdx.x * blockDim.x + threadIdx.x;
+		const int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+		if (x < dst_width && y < dst_height)
+		{
+			float fx = (x + 0.5)*src_width / (float)dst_width - 0.5;
+			float fy = (y + 0.5)*src_height / (float)dst_height - 0.5;
+			int ax = floor(fx);
+			int ay = floor(fy);
+			if (ax < 0)
+			{
+				ax = 0;
+			}
+			else if (ax > src_width - 2)
+			{
+				ax = src_width - 2;
+			}
+
+			if (ay < 0){
+				ay = 0;
+			}
+			else if (ay > src_height - 2)
+			{
+				ay = src_height - 2;
+			}
+
+			int A = ax + ay*src_width;
+			int B = ax + ay*src_width + 1;
+			int C = ax + ay*src_width + src_width;
+			int D = ax + ay*src_width + src_width + 1;
+
+			float w1, w2, w3, w4;
+			w1 = fx - ax;
+			w2 = 1 - w1;
+			w3 = fy - ay;
+			w4 = 1 - w3;
+
+			float blue = src_img[A] * w2*w4 + src_img[B] * w1*w4 + src_img[C] * w2*w3 + src_img[D] * w1*w3;
+
+			float green = src_img[src_width * src_height + A] * w2*w4 + src_img[src_width * src_height + B] * w1*w4 
+				+ src_img[src_width * src_height + C] * w2*w3 + src_img[src_width * src_height + D] * w1*w3;
+
+			float red = src_img[src_width * src_height * 2 + A] * w2*w4 + src_img[src_width * src_height * 2 + B] * w1*w4 
+				+ src_img[src_width * src_height * 2 + C] * w2*w3 + src_img[src_width * src_height * 2 + D] * w1*w3;
+
+			dst_img[y * dst_width + x] = blue;
+			dst_img[dst_width * dst_height + y * dst_width + x] = green;
+			dst_img[dst_width * dst_height * 2 + y * dst_width + x] = red;
+		}
+	}
+
+	cudaError_t ResizeImage(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height)
+	{
+		dim3 block(32, 16, 1);
+		dim3 grid((dst_width + (block.x - 1)) / block.x, (dst_height + (block.y - 1)) / block.y, 1);
+
+		kernel_bilinear << < grid, block >> >(d_srcRGB, d_dstRGB, src_width, src_height, dst_width, dst_height);
+
+		cudaError_t cudaStatus = cudaGetLastError();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "kernel_bilinear launch failed: %s\n", cudaGetErrorString(cudaStatus));
+			return cudaStatus;
+		}
+
+		cudaStatus = cudaDeviceSynchronize();
+		if (cudaStatus != cudaSuccess) {
+			fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching kernel_bilinear!\n", cudaStatus);
+			return cudaStatus;
+		}
+
+		return cudaStatus;
+	}
+}
\ No newline at end of file
diff --git a/src/nvdecoder/common_header.h b/src/nvdecoder/common_header.h
new file mode 100644
index 0000000..d5feed8
--- /dev/null
+++ b/src/nvdecoder/common_header.h
@@ -0,0 +1,8 @@
+#ifndef _COMMON_HEADER_H_
+#define _COMMON_HEADER_H_
+
+
+#include "../interface/logger.hpp"
+#include "../interface/utiltools.hpp"
+
+#endif
\ No newline at end of file
diff --git a/src/nvdecoder/cuda_kernels.h b/src/nvdecoder/cuda_kernels.h
new file mode 100644
index 0000000..cd1eb00
--- /dev/null
+++ b/src/nvdecoder/cuda_kernels.h
@@ -0,0 +1,63 @@
+#pragma once
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <string.h>
+#include <math.h>
+
+#include <cuda.h>
+
+typedef enum
+{
+	ITU_601 = 1,
+	ITU_709 = 2
+} FF_ColorSpace;
+
+namespace cuda_common
+{
+	cudaError_t setColorSpace(FF_ColorSpace CSC, float hue);
+
+	cudaError_t NV12ToRGBnot(CUdeviceptr d_srcNV12, size_t nSourcePitch, unsigned char* d_dstRGB, int width, int height);
+	cudaError_t CUDAToBGR(CUdeviceptr dataY, CUdeviceptr dataUV, size_t pitchY, size_t pitchUV, unsigned char* d_dstRGB, int width, int height);
+
+	
+	cudaError_t ResizeImage(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height);
+
+	cudaError_t RGB2YUV(float* d_srcRGB, int src_width, int src_height,
+		unsigned char* Y, size_t yPitch, int yWidth, int yHeight,
+		unsigned char* U, size_t uPitch, int uWidth, int uHeight,
+		unsigned char* V, size_t vPitch, int vWidth, int vHeight);
+
+	cudaError_t RGB2YUV(unsigned char* d_srcRGB, int src_width, int src_height,
+		unsigned char* Y, size_t yPitch, int yWidth, int yHeight,
+		unsigned char* U, size_t uPitch, int uWidth, int uHeight,
+		unsigned char* V, size_t vPitch, int vWidth, int vHeight);
+
+	cudaError_t PartMemCopy(unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom);
+	//	cudaError_t PartMemResize(float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int left, int top, int right, int bottom);
+
+	cudaError_t PartMemResizeBatch(unsigned char* d_srcRGB, int srcimg_width, int srcimg_height, unsigned char** d_dstRGB, int count,
+		int* left, int* top, int* right, int* bottom, int *dst_w, int *dst_h,
+		float submeanb, float submeang, float submeanr,
+		float varianceb, float varianceg, float variancer);
+
+	cudaError_t DrawImage(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom);
+	cudaError_t DrawImage(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom);
+
+	cudaError_t DrawLine(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y);
+}
+
+
+int jpegNPP(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height);
+int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height);
+
+int jpegNPP(const char *szOutputFile, float* d_srcRGB);
+int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB);
+
+int initTable();
+int initTable(int flag, int width, int height);
+int releaseJpegNPP();
+
diff --git a/src/nvdecoder/define.hpp b/src/nvdecoder/define.hpp
new file mode 100644
index 0000000..2eaafe0
--- /dev/null
+++ b/src/nvdecoder/define.hpp
@@ -0,0 +1,11 @@
+#pragma once
+
+#include <string>
+
+
+#define CHECK_CUDA(call) \
+{\
+    const cudaError_t error_code = call;\
+    if (cudaSuccess != error_code)\
+        LOG_ERROR("CUDA error, code: {} reason: {}", error_code, cudaGetErrorString(error_code));\
+}
\ No newline at end of file
diff --git a/src/nvdecoder/jpegNPP.cpp-1 b/src/nvdecoder/jpegNPP.cpp-1
new file mode 100644
index 0000000..f0bf2e6
--- /dev/null
+++ b/src/nvdecoder/jpegNPP.cpp-1
@@ -0,0 +1,1193 @@
+/*
+* Copyright 1993-2015 NVIDIA Corporation.  All rights reserved.
+*
+* NOTICE TO USER:
+*
+* This source code is subject to NVIDIA ownership rights under U.S. and
+* international Copyright laws.
+*
+* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+* CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+* IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+* OR PERFORMANCE OF THIS SOURCE CODE.
+*
+* U.S. Government End Users.  This source code is a "commercial item" as
+* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting  of
+* "commercial computer software" and "commercial computer software
+* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
+* and is provided to the U.S. Government only as a commercial end item.
+* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+* source code with only those rights set forth herein.
+*/
+
+// This sample needs at least CUDA 5.5 and a GPU that has at least Compute Capability 2.0
+
+// This sample demonstrates a simple image processing pipeline.
+// First, a JPEG file is huffman decoded and inverse DCT transformed and dequantized.
+// Then the different planes are resized. Finally, the resized image is quantized, forward
+// DCT transformed and huffman encoded.
+
+#include "cuda_kernels.h"
+
+#include <npp.h>
+#include <cuda_runtime.h>
+#include "common/UtilNPP/Exceptions.h"
+
+#include "Endianess.h"
+#include <math.h>
+
+#include <string.h>
+#include <fstream>
+#include <iostream>
+
+#include "common/inc/helper_string.h"
+#include "common/inc/helper_cuda.h"
+//#include "MacroDef.h"
+#include "cuda.h"
+
+using namespace std;
+
+struct FrameHeader
+{
+	unsigned char nSamplePrecision;
+	unsigned short nHeight;
+	unsigned short nWidth;
+	unsigned char nComponents;
+	unsigned char aComponentIdentifier[3];
+	unsigned char aSamplingFactors[3];
+	unsigned char aQuantizationTableSelector[3];
+};
+
+struct ScanHeader
+{
+	unsigned char nComponents;
+	unsigned char aComponentSelector[3];
+	unsigned char aHuffmanTablesSelector[3];
+	unsigned char nSs;
+	unsigned char nSe;
+	unsigned char nA;
+};
+
+struct QuantizationTable
+{
+	unsigned char nPrecisionAndIdentifier;
+	unsigned char aTable[64];
+};
+
+struct HuffmanTable
+{
+	unsigned char nClassAndIdentifier;
+	unsigned char aCodes[16];
+	unsigned char aTable[256];
+};
+
+//??准?炼??藕?量??模??
+//unsigned char std_Y_QT[64] =
+//{
+//	16, 11, 10, 16, 24, 40, 51, 61,
+//	12, 12, 14, 19, 26, 58, 60, 55,
+//	14, 13, 16, 24, 40, 57, 69, 56,
+//	14, 17, 22, 29, 51, 87, 80, 62,
+//	18, 22, 37, 56, 68, 109, 103, 77,
+//	24, 35, 55, 64, 81, 104, 113, 92,
+//	49, 64, 78, 87, 103, 121, 120, 101,
+//	72, 92, 95, 98, 112, 100, 103, 99
+//};
+//
+////??准色???藕?量??模??
+//unsigned char std_UV_QT[64] =
+//{
+//	17, 18, 24, 47, 99, 99, 99, 99,
+//	18, 21, 26, 66, 99, 99, 99, 99,
+//	24, 26, 56, 99, 99, 99, 99, 99,
+//	47, 66, 99, 99, 99, 99, 99, 99,
+//	99, 99, 99, 99, 99, 99, 99, 99,
+//	99, 99, 99, 99, 99, 99, 99, 99,
+//	99, 99, 99, 99, 99, 99, 99, 99,
+//	99, 99, 99, 99, 99, 99, 99, 99
+//};
+
+////?炼??藕?量??模??
+//unsigned char std_Y_QT[64] =
+//{
+//	6, 4, 5, 6, 5, 4, 6, 6,
+//	5, 6, 7, 7, 6, 8, 10, 16,
+//	10, 10, 9, 9, 10, 20, 14, 15,
+//	12, 16, 23, 20, 24, 24, 23, 20,
+//	22, 22, 26, 29, 37, 31, 26, 27,
+//	35, 28, 22, 22, 32, 44, 32, 35,
+//	38, 39, 41, 42, 41, 25, 31, 45,
+//	48, 45, 40, 48, 37, 40, 41, 40
+//};
+//
+////色???藕?量??模??
+//unsigned char std_UV_QT[64] =
+//{
+//	7, 7, 7, 10, 8, 10, 19, 10,
+//	10, 19, 40, 26, 22, 26, 40, 40,
+//	40, 40, 40, 40, 40, 40, 40, 40,
+//	40, 40, 40, 40, 40, 40, 40, 40,
+//	40, 40, 40, 40, 40, 40, 40, 40,
+//	40, 40, 40, 40, 40, 40, 40, 40,
+//	40, 40, 40, 40, 40, 40, 40, 40,
+//	40, 40, 40, 40, 40, 40, 40, 40
+//};
+
+//?炼??藕?量??模??
+unsigned char std_Y_QT[64] =
+{
+	0.75 * 6, 0.75 * 4, 0.75 * 5, 0.75 * 6, 0.75 * 5, 0.75 * 4, 0.75 * 6, 0.75 * 6,
+	0.75 * 5, 0.75 * 6, 0.75 * 7, 0.75 * 7, 0.75 * 6, 0.75 * 8, 0.75 * 10, 0.75 * 16,
+	0.75 * 10, 0.75 * 10, 0.75 * 9, 0.75 * 9, 0.75 * 10, 0.75 * 20, 0.75 * 14, 0.75 * 15,
+	0.75 * 12, 0.75 * 16, 0.75 * 23, 0.75 * 20, 0.75 * 24, 0.75 * 24, 0.75 * 23, 0.75 * 20,
+	0.75 * 22, 0.75 * 22, 0.75 * 26, 0.75 * 29, 0.75 * 37, 0.75 * 31, 0.75 * 26, 0.75 * 27,
+	0.75 * 35, 0.75 * 28, 0.75 * 22, 0.75 * 22, 0.75 * 32, 0.75 * 44, 0.75 * 32, 0.75 * 35,
+	0.75 * 38, 0.75 * 39, 0.75 * 41, 0.75 * 42, 0.75 * 41, 0.75 * 25, 0.75 * 31, 0.75 * 45,
+	0.75 * 48, 0.75 * 45, 0.75 * 40, 0.75 * 48, 0.75 * 37, 0.75 * 40, 0.75 * 41, 0.75 * 40
+};
+
+//色???藕?量??模??
+unsigned char std_UV_QT[64] =
+{
+	0.75 * 7, 0.75 * 7, 0.75 * 7, 0.75 * 10, 0.75 * 8, 0.75 * 10, 0.75 * 19, 0.75 * 10,
+	0.75 * 10, 0.75 * 19, 0.75 * 40, 0.75 * 26, 0.75 * 22, 0.75 * 26, 0.75 * 40, 0.75 * 40,
+	30, 30, 30, 30, 30, 30, 30, 30,
+	30, 30, 30, 30, 30, 30, 30, 30,
+	30, 30, 30, 30, 30, 30, 30, 30,
+	30, 30, 30, 30, 30, 30, 30, 30,
+	30, 30, 30, 30, 30, 30, 30, 30,
+	30, 30, 30, 30, 30, 30, 30, 30
+};
+
+unsigned char STD_DC_Y_NRCODES[16] = { 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
+unsigned char STD_DC_Y_VALUES[12] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+
+unsigned char STD_DC_UV_NRCODES[16] = { 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
+unsigned char STD_DC_UV_VALUES[12] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+
+unsigned char STD_AC_Y_NRCODES[16] = { 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0X7D };
+unsigned char STD_AC_Y_VALUES[162] =
+{
+	0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
+	0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
+	0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
+	0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
+	0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
+	0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
+	0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+	0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
+	0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+	0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+	0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+	0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+	0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+	0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+	0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
+	0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
+	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
+	0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
+	0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
+	0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+	0xf9, 0xfa
+};
+
+unsigned char STD_AC_UV_NRCODES[16] = { 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0X77 };
+unsigned char STD_AC_UV_VALUES[162] =
+{
+	0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
+	0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
+	0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
+	0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
+	0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
+	0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
+	0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
+	0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+	0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+	0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+	0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+	0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+	0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
+	0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
+	0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
+	0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
+	0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
+	0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
+	0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
+	0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+	0xf9, 0xfa
+};
+
+int DivUp(int x, int d)
+{
+	return (x + d - 1) / d;
+}
+
+template<typename T>
+void writeAndAdvance(unsigned char *&pData, T nElement)
+{
+	writeBigEndian<T>(pData, nElement);
+	pData += sizeof(T);
+}
+
+void writeMarker(unsigned char nMarker, unsigned char *&pData)
+{
+	*pData++ = 0x0ff;
+	*pData++ = nMarker;
+}
+
+void writeJFIFTag(unsigned char *&pData)
+{
+	const char JFIF_TAG[] =
+	{
+		0x4a, 0x46, 0x49, 0x46, 0x00,
+		0x01, 0x02,
+		0x00,
+		0x00, 0x01, 0x00, 0x01,
+		0x00, 0x00
+	};
+
+	writeMarker(0x0e0, pData);
+	writeAndAdvance<unsigned short>(pData, sizeof(JFIF_TAG) + sizeof(unsigned short));
+	memcpy(pData, JFIF_TAG, sizeof(JFIF_TAG));
+	pData += sizeof(JFIF_TAG);
+}
+
+void writeFrameHeader(const FrameHeader &header, unsigned char *&pData)
+{
+	unsigned char aTemp[128];
+	unsigned char *pTemp = aTemp;
+
+	writeAndAdvance<unsigned char>(pTemp, header.nSamplePrecision);
+	writeAndAdvance<unsigned short>(pTemp, header.nHeight);
+	writeAndAdvance<unsigned short>(pTemp, header.nWidth);
+	writeAndAdvance<unsigned char>(pTemp, header.nComponents);
+
+	for (int c = 0; c<header.nComponents; ++c)
+	{
+		writeAndAdvance<unsigned char>(pTemp, header.aComponentIdentifier[c]);
+		writeAndAdvance<unsigned char>(pTemp, header.aSamplingFactors[c]);
+		writeAndAdvance<unsigned char>(pTemp, header.aQuantizationTableSelector[c]);
+	}
+
+	unsigned short nLength = (unsigned short)(pTemp - aTemp);
+
+	writeMarker(0x0C0, pData);
+	writeAndAdvance<unsigned short>(pData, nLength + 2);
+	memcpy(pData, aTemp, nLength);
+	pData += nLength;
+}
+
+void writeScanHeader(const ScanHeader &header, unsigned char *&pData)
+{
+	unsigned char aTemp[128];
+	unsigned char *pTemp = aTemp;
+
+	writeAndAdvance<unsigned char>(pTemp, header.nComponents);
+
+	for (int c = 0; c<header.nComponents; ++c)
+	{
+		writeAndAdvance<unsigned char>(pTemp, header.aComponentSelector[c]);
+		writeAndAdvance<unsigned char>(pTemp, header.aHuffmanTablesSelector[c]);
+	}
+
+	writeAndAdvance<unsigned char>(pTemp, header.nSs);
+	writeAndAdvance<unsigned char>(pTemp, header.nSe);
+	writeAndAdvance<unsigned char>(pTemp, header.nA);
+
+	unsigned short nLength = (unsigned short)(pTemp - aTemp);
+
+	writeMarker(0x0DA, pData);
+	writeAndAdvance<unsigned short>(pData, nLength + 2);
+	memcpy(pData, aTemp, nLength);
+	pData += nLength;
+}
+
+void writeQuantizationTable(const QuantizationTable &table, unsigned char *&pData)
+{
+	writeMarker(0x0DB, pData);
+	writeAndAdvance<unsigned short>(pData, sizeof(QuantizationTable) + 2);
+	memcpy(pData, &table, sizeof(QuantizationTable));
+	pData += sizeof(QuantizationTable);
+}
+
+void writeHuffmanTable(const HuffmanTable &table, unsigned char *&pData)
+{
+	writeMarker(0x0C4, pData);
+
+	// Number of Codes for Bit Lengths [1..16]
+	int nCodeCount = 0;
+
+	for (int i = 0; i < 16; ++i)
+	{
+		nCodeCount += table.aCodes[i];
+	}
+
+	writeAndAdvance<unsigned short>(pData, 17 + nCodeCount + 2);
+	memcpy(pData, &table, 17 + nCodeCount);
+	pData += 17 + nCodeCount;
+}
+
+bool printfNPPinfo(int cudaVerMajor, int cudaVerMinor)
+{
+	const NppLibraryVersion *libVer = nppGetLibVersion();
+
+	printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
+
+	int driverVersion, runtimeVersion;
+	cudaDriverGetVersion(&driverVersion);
+	cudaRuntimeGetVersion(&runtimeVersion);
+
+	printf("  CUDA Driver  Version: %d.%d\n", driverVersion / 1000, (driverVersion % 100) / 10);
+	printf("  CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000, (runtimeVersion % 100) / 10);
+
+	bool bVal = checkCudaCapabilities(cudaVerMajor, cudaVerMinor);
+	return bVal;
+}
+
+NppiDCTState *pDCTState;
+FrameHeader oFrameHeader;
+FrameHeader oFrameHeaderFixedSize;
+ScanHeader oScanHeader;
+QuantizationTable aQuantizationTables[4];
+Npp8u *pdQuantizationTables;
+HuffmanTable aHuffmanTables[4];
+HuffmanTable *pHuffmanDCTables;
+HuffmanTable *pHuffmanACTables;
+int nMCUBlocksH;
+int nMCUBlocksV;
+int nMCUBlocksHFixedSize;
+int nMCUBlocksVFixedSize;
+Npp8u *pdScan;
+NppiEncodeHuffmanSpec *apHuffmanDCTable[3];
+NppiEncodeHuffmanSpec *apHuffmanACTable[3];
+unsigned char *pDstJpeg;
+unsigned char *pDstOutput;
+int nRestartInterval;
+
+int initTable()
+{
+	NPP_CHECK_NPP(nppiDCTInitAlloc(&pDCTState));
+
+	nRestartInterval = -1;
+
+	cudaMalloc(&pdQuantizationTables, 64 * 4);
+	pHuffmanDCTables = aHuffmanTables;
+	pHuffmanACTables = &aHuffmanTables[2];
+	memset(aQuantizationTables, 0, 4 * sizeof(QuantizationTable));
+	memset(aHuffmanTables, 0, 4 * sizeof(HuffmanTable));
+	memset(&oFrameHeader, 0, sizeof(FrameHeader));
+
+
+	//????Huffman??
+	aHuffmanTables[0].nClassAndIdentifier = 0;
+	memcpy(aHuffmanTables[0].aCodes, STD_DC_Y_NRCODES, 16);
+	memcpy(aHuffmanTables[0].aTable, STD_DC_Y_VALUES, 12);
+
+	aHuffmanTables[1].nClassAndIdentifier = 1;
+	memcpy(aHuffmanTables[1].aCodes, STD_DC_UV_NRCODES, 16);
+	memcpy(aHuffmanTables[1].aTable, STD_DC_UV_VALUES, 12);
+
+	aHuffmanTables[2].nClassAndIdentifier = 16;
+	memcpy(aHuffmanTables[2].aCodes, STD_AC_Y_NRCODES, 16);
+	memcpy(aHuffmanTables[2].aTable, STD_AC_Y_VALUES, 162);
+
+	aHuffmanTables[3].nClassAndIdentifier = 17;
+	memcpy(aHuffmanTables[3].aCodes, STD_AC_UV_NRCODES, 16);
+	memcpy(aHuffmanTables[3].aTable, STD_AC_UV_VALUES, 162);
+
+
+	//????量????
+	aQuantizationTables[0].nPrecisionAndIdentifier = 0;
+	memcpy(aQuantizationTables[0].aTable, std_Y_QT, 64);
+	aQuantizationTables[1].nPrecisionAndIdentifier = 1;
+	memcpy(aQuantizationTables[1].aTable, std_UV_QT, 64);
+
+	NPP_CHECK_CUDA(cudaMemcpyAsync(pdQuantizationTables, aQuantizationTables[0].aTable, 64, cudaMemcpyHostToDevice));
+	NPP_CHECK_CUDA(cudaMemcpyAsync(pdQuantizationTables + 64, aQuantizationTables[1].aTable, 64, cudaMemcpyHostToDevice));
+
+	oFrameHeader.nSamplePrecision = 8;
+	oFrameHeader.nComponents = 3;
+	oFrameHeader.aComponentIdentifier[0] = 1;
+	oFrameHeader.aComponentIdentifier[1] = 2;
+	oFrameHeader.aComponentIdentifier[2] = 3;
+	oFrameHeader.aSamplingFactors[0] = 34;
+	oFrameHeader.aSamplingFactors[1] = 17;
+	oFrameHeader.aSamplingFactors[2] = 17;
+	oFrameHeader.aQuantizationTableSelector[0] = 0;
+	oFrameHeader.aQuantizationTableSelector[1] = 1;
+	oFrameHeader.aQuantizationTableSelector[2] = 1;
+
+	for (int i = 0; i < oFrameHeader.nComponents; ++i)
+	{
+		nMCUBlocksV = max(nMCUBlocksV, oFrameHeader.aSamplingFactors[i] & 0x0f);
+		nMCUBlocksH = max(nMCUBlocksH, oFrameHeader.aSamplingFactors[i] >> 4);
+	}
+	NPP_CHECK_CUDA(cudaMalloc(&pdScan, 4 << 20));
+
+
+
+	oScanHeader.nComponents = 3;
+	oScanHeader.aComponentSelector[0] = 1;
+	oScanHeader.aComponentSelector[1] = 2;
+	oScanHeader.aComponentSelector[2] = 3;
+	oScanHeader.aHuffmanTablesSelector[0] = 0;
+	oScanHeader.aHuffmanTablesSelector[1] = 17;
+	oScanHeader.aHuffmanTablesSelector[2] = 17;
+	oScanHeader.nSs = 0;
+	oScanHeader.nSe = 63;
+	oScanHeader.nA = 0;
+
+
+	return 0;
+}
+
+NppiSize aSrcSize[3];
+Npp16s *apdDCT[3];// = { 0, 0, 0 };
+Npp32s aDCTStep[3];
+
+Npp8u *apSrcImage[3];// = { 0, 0, 0 };
+Npp32s aSrcImageStep[3];
+size_t aSrcPitch[3];
+
+
+int releaseJpegNPP()
+{
+	nppiDCTFree(pDCTState);
+	cudaFree(pdQuantizationTables);
+	cudaFree(pdScan);
+	for (int i = 0; i < 3; ++i)
+	{
+		cudaFree(apdDCT[i]);
+		cudaFree(apSrcImage[i]);
+	}
+	return 0;
+}
+
+
+int initTable(int flag, int width, int height)
+{
+	//????帧头
+	oFrameHeaderFixedSize.nSamplePrecision = 8;
+	oFrameHeaderFixedSize.nComponents = 3;
+	oFrameHeaderFixedSize.aComponentIdentifier[0] = 1;
+	oFrameHeaderFixedSize.aComponentIdentifier[1] = 2;
+	oFrameHeaderFixedSize.aComponentIdentifier[2] = 3;
+	oFrameHeaderFixedSize.aSamplingFactors[0] = 34;
+	oFrameHeaderFixedSize.aSamplingFactors[1] = 17;
+	oFrameHeaderFixedSize.aSamplingFactors[2] = 17;
+	oFrameHeaderFixedSize.aQuantizationTableSelector[0] = 0;
+	oFrameHeaderFixedSize.aQuantizationTableSelector[1] = 1;
+	oFrameHeaderFixedSize.aQuantizationTableSelector[2] = 1;
+	oFrameHeaderFixedSize.nWidth = width;
+	oFrameHeaderFixedSize.nHeight = height;
+
+	for (int i = 0; i < oFrameHeaderFixedSize.nComponents; ++i)
+	{
+		nMCUBlocksVFixedSize = max(nMCUBlocksVFixedSize, oFrameHeaderFixedSize.aSamplingFactors[i] & 0x0f);
+		nMCUBlocksHFixedSize = max(nMCUBlocksHFixedSize, oFrameHeaderFixedSize.aSamplingFactors[i] >> 4);
+	}
+
+	for (int i = 0; i < oFrameHeaderFixedSize.nComponents; ++i)
+	{
+		NppiSize oBlocks;
+		NppiSize oBlocksPerMCU = { oFrameHeaderFixedSize.aSamplingFactors[i] >> 4, oFrameHeaderFixedSize.aSamplingFactors[i] & 0x0f };
+
+		oBlocks.width = (int)ceil((oFrameHeaderFixedSize.nWidth + 7) / 8 *
+			static_cast<float>(oBlocksPerMCU.width) / nMCUBlocksHFixedSize);
+		oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width;
+
+		oBlocks.height = (int)ceil((oFrameHeaderFixedSize.nHeight + 7) / 8 *
+			static_cast<float>(oBlocksPerMCU.height) / nMCUBlocksVFixedSize);
+		oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height;
+
+		aSrcSize[i].width = oBlocks.width * 8;
+		aSrcSize[i].height = oBlocks.height * 8;
+
+		// Allocate Memory
+		size_t nPitch;
+		NPP_CHECK_CUDA(cudaMallocPitch(&apdDCT[i], &nPitch, oBlocks.width * 64 * sizeof(Npp16s), oBlocks.height));
+		aDCTStep[i] = static_cast<Npp32s>(nPitch);
+
+		NPP_CHECK_CUDA(cudaMallocPitch(&apSrcImage[i], &nPitch, aSrcSize[i].width, aSrcSize[i].height));
+
+		aSrcPitch[i] = nPitch;
+		aSrcImageStep[i] = static_cast<Npp32s>(nPitch);
+	}
+
+	return 0;
+}
+
+int jpegNPP(const char *szOutputFile, float* d_srcRGB)
+{
+	//RGB2YUV
+	cudaError_t cudaStatus;
+	cudaStatus = cuda_common::RGB2YUV(d_srcRGB, oFrameHeaderFixedSize.nWidth, oFrameHeaderFixedSize.nHeight,
+		apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height,
+		apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height,
+		apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height);
+
+	/**
+	* Forward DCT, quantization and level shift part of the JPEG encoding.
+	* Input is expected in 8x8 macro blocks and output is expected to be in 64x1
+	* macro blocks. The new version of the primitive takes the ROI in image pixel size and
+	* works with DCT coefficients that are in zig-zag order.
+	*/
+	int k = 0;
+	//LOG_INFO("NPP_CHECK_NPP:%d", 1);
+	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0],
+		apdDCT[0], aDCTStep[0],
+		pdQuantizationTables + k * 64,
+		aSrcSize[0],
+		pDCTState)))
+	{
+		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
+		return EXIT_FAILURE;
+	}
+
+	k = 1;
+	//LOG_INFO("NPP_CHECK_NPP:%d", 2);
+	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1],
+		apdDCT[1], aDCTStep[1],
+		pdQuantizationTables + k * 64,
+		aSrcSize[1],
+		pDCTState)))
+	{
+		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
+		return EXIT_FAILURE;
+	}
+
+	//LOG_INFO("NPP_CHECK_NPP:%d", 3);
+	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2],
+		apdDCT[2], aDCTStep[2],
+		pdQuantizationTables + k * 64,
+		aSrcSize[2],
+		pDCTState)))
+	{
+		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
+		return EXIT_FAILURE;
+	}
+
+	// Huffman Encoding
+
+	Npp32s nScanLength;
+	Npp8u *pJpegEncoderTemp;
+
+#if (CUDA_VERSION == 8000)
+		Npp32s nTempSize; //when using CUDA8
+#else
+		size_t nTempSize; //when using CUDA9
+#endif
+	//modified by Junlin 190221
+
+	//LOG_INFO("NPP_CHECK_NPP:%d",4);
+	if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize)))
+	{
+		printf("nppiEncodeHuffmanGetSize Failed!\n");
+		return EXIT_FAILURE;
+	}
+
+	//LOG_INFO("NPP_CHECK_CUDA:%d",5);
+	NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize));
+
+	/**
+	* Allocates memory and creates a Huffman table in a format that is suitable for the encoder.
+	*/
+	NppStatus t_status;
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]);
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]);
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]);
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]);
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]);
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]);
+
+	/**
+	* Huffman Encoding of the JPEG Encoding.
+	* Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan.
+	*/
+	Npp32s nSs = 0;
+	Npp32s nSe = 63;
+	Npp32s nH = 0;
+	Npp32s nL = 0;
+	//LOG_INFO("NPP_CHECK_NPP:%d",6);
+	if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep,
+		0, nSs, nSe, nH, nL,
+		pdScan, &nScanLength,
+		apHuffmanDCTable,
+		apHuffmanACTable,
+		aSrcSize,
+		pJpegEncoderTemp)))
+	{
+		printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n");
+		return EXIT_FAILURE;
+	}
+
+	for (int i = 0; i < 3; ++i)
+	{
+		nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]);
+		nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]);
+	}
+	// Write JPEG
+	pDstJpeg = new unsigned char[4 << 20]{};
+	pDstOutput = pDstJpeg;
+
+	writeMarker(0x0D8, pDstOutput);
+	writeJFIFTag(pDstOutput);
+	writeQuantizationTable(aQuantizationTables[0], pDstOutput);
+	writeQuantizationTable(aQuantizationTables[1], pDstOutput);
+	writeHuffmanTable(pHuffmanDCTables[0], pDstOutput);
+	writeHuffmanTable(pHuffmanACTables[0], pDstOutput);
+	writeHuffmanTable(pHuffmanDCTables[1], pDstOutput);
+	writeHuffmanTable(pHuffmanACTables[1], pDstOutput);
+	writeFrameHeader(oFrameHeaderFixedSize, pDstOutput);
+	writeScanHeader(oScanHeader, pDstOutput);
+
+	//LOG_INFO("NPP_CHECK_CUDA:%d",7);
+	NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost));
+
+	pDstOutput += nScanLength;
+	writeMarker(0x0D9, pDstOutput);
+	{
+		// Write result to file.
+		std::ofstream outputFile(szOutputFile, ios::out | ios::binary);
+		outputFile.write(reinterpret_cast<const char *>(pDstJpeg), static_cast<int>(pDstOutput - pDstJpeg));
+	}
+
+	// Cleanup
+	cudaFree(pJpegEncoderTemp);
+	delete[] pDstJpeg;
+
+
+	return EXIT_SUCCESS;
+}
+
+int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB)
+{
+	//RGB2YUV
+	cudaError_t cudaStatus;
+	cudaStatus = cuda_common::RGB2YUV(d_srcRGB, oFrameHeaderFixedSize.nWidth, oFrameHeaderFixedSize.nHeight,
+		apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height,
+		apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height,
+		apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height);
+
+	/**
+	* Forward DCT, quantization and level shift part of the JPEG encoding.
+	* Input is expected in 8x8 macro blocks and output is expected to be in 64x1
+	* macro blocks. The new version of the primitive takes the ROI in image pixel size and
+	* works with DCT coefficients that are in zig-zag order.
+	*/
+	int k = 0;
+	//LOG_INFO("NPP_CHECK_NPP:%d", 1);
+	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0],
+		apdDCT[0], aDCTStep[0],
+		pdQuantizationTables + k * 64,
+		aSrcSize[0],
+		pDCTState)))
+	{
+		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
+		return EXIT_FAILURE;
+	}
+
+	k = 1;
+	//LOG_INFO("NPP_CHECK_NPP:%d", 2);
+	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1],
+		apdDCT[1], aDCTStep[1],
+		pdQuantizationTables + k * 64,
+		aSrcSize[1],
+		pDCTState)))
+	{
+		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
+		return EXIT_FAILURE;
+	}
+
+	//LOG_INFO("NPP_CHECK_NPP:%d", 3);
+	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2],
+		apdDCT[2], aDCTStep[2],
+		pdQuantizationTables + k * 64,
+		aSrcSize[2],
+		pDCTState)))
+	{
+		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
+		return EXIT_FAILURE;
+	}
+
+	// Huffman Encoding
+
+	Npp32s nScanLength;
+	Npp8u *pJpegEncoderTemp;
+
+#if (CUDA_VERSION == 8000)
+	Npp32s nTempSize; //when using CUDA8
+#else
+	size_t nTempSize; //when using CUDA9
+#endif
+					  //modified by Junlin 190221
+
+					  //LOG_INFO("NPP_CHECK_NPP:%d",4);
+	if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize)))
+	{
+		printf("nppiEncodeHuffmanGetSize Failed!\n");
+		return EXIT_FAILURE;
+	}
+
+	//LOG_INFO("NPP_CHECK_CUDA:%d",5);
+	NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize));
+
+	/**
+	* Allocates memory and creates a Huffman table in a format that is suitable for the encoder.
+	*/
+	NppStatus t_status;
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]);
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]);
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]);
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]);
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]);
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]);
+
+	/**
+	* Huffman Encoding of the JPEG Encoding.
+	* Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan.
+	*/
+	Npp32s nSs = 0;
+	Npp32s nSe = 63;
+	Npp32s nH = 0;
+	Npp32s nL = 0;
+	//LOG_INFO("NPP_CHECK_NPP:%d",6);
+	if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep,
+		0, nSs, nSe, nH, nL,
+		pdScan, &nScanLength,
+		apHuffmanDCTable,
+		apHuffmanACTable,
+		aSrcSize,
+		pJpegEncoderTemp)))
+	{
+		printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n");
+		return EXIT_FAILURE;
+	}
+
+	for (int i = 0; i < 3; ++i)
+	{
+		nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]);
+		nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]);
+	}
+	// Write JPEG
+	pDstJpeg = new unsigned char[4 << 20]{};
+	pDstOutput = pDstJpeg;
+
+	writeMarker(0x0D8, pDstOutput);
+	writeJFIFTag(pDstOutput);
+	writeQuantizationTable(aQuantizationTables[0], pDstOutput);
+	writeQuantizationTable(aQuantizationTables[1], pDstOutput);
+	writeHuffmanTable(pHuffmanDCTables[0], pDstOutput);
+	writeHuffmanTable(pHuffmanACTables[0], pDstOutput);
+	writeHuffmanTable(pHuffmanDCTables[1], pDstOutput);
+	writeHuffmanTable(pHuffmanACTables[1], pDstOutput);
+	writeFrameHeader(oFrameHeaderFixedSize, pDstOutput);
+	writeScanHeader(oScanHeader, pDstOutput);
+
+	//LOG_INFO("NPP_CHECK_CUDA:%d",7);
+	NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost));
+
+	pDstOutput += nScanLength;
+	writeMarker(0x0D9, pDstOutput);
+	{
+		// Write result to file.
+		std::ofstream outputFile(szOutputFile, ios::out | ios::binary);
+		outputFile.write(reinterpret_cast<const char *>(pDstJpeg), static_cast<int>(pDstOutput - pDstJpeg));
+	}
+
+	// Cleanup
+	cudaFree(pJpegEncoderTemp);
+	delete[] pDstJpeg;
+
+
+	return EXIT_SUCCESS;
+}
+
+
+int jpegNPP(const char *szOutputFile, float* d_srcRGB, int img_width, int img_height)
+{
+	NppiSize aSrcSize[3];
+	Npp16s *apdDCT[3] = { 0, 0, 0 };
+	Npp32s aDCTStep[3];
+
+	Npp8u *apSrcImage[3] = { 0, 0, 0 };
+	Npp32s aSrcImageStep[3];
+	size_t aSrcPitch[3];
+
+
+	//????帧头
+	oFrameHeader.nWidth = img_width;
+	oFrameHeader.nHeight = img_height;
+
+	for (int i = 0; i < oFrameHeader.nComponents; ++i)
+	{
+		NppiSize oBlocks;
+		NppiSize oBlocksPerMCU = { oFrameHeader.aSamplingFactors[i] >> 4, oFrameHeader.aSamplingFactors[i] & 0x0f };
+
+		oBlocks.width = (int)ceil((oFrameHeader.nWidth + 7) / 8 *
+			static_cast<float>(oBlocksPerMCU.width) / nMCUBlocksH);
+		oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width;
+
+		oBlocks.height = (int)ceil((oFrameHeader.nHeight + 7) / 8 *
+			static_cast<float>(oBlocksPerMCU.height) / nMCUBlocksV);
+		oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height;
+
+		aSrcSize[i].width = oBlocks.width * 8;
+		aSrcSize[i].height = oBlocks.height * 8;
+
+		// Allocate Memory
+		size_t nPitch;
+		//LOG_INFO("NPP_CHECK_CUDA:%d",1);
+		NPP_CHECK_CUDA(cudaMallocPitch(&apdDCT[i], &nPitch, oBlocks.width * 64 * sizeof(Npp16s), oBlocks.height));
+		aDCTStep[i] = static_cast<Npp32s>(nPitch);
+
+		//LOG_INFO("NPP_CHECK_CUDA:%d",2);
+		NPP_CHECK_CUDA(cudaMallocPitch(&apSrcImage[i], &nPitch, aSrcSize[i].width, aSrcSize[i].height));
+
+		aSrcPitch[i] = nPitch;
+		aSrcImageStep[i] = static_cast<Npp32s>(nPitch);
+	}
+
+	//RGB2YUV
+	cudaError_t cudaStatus;
+	cudaStatus = cuda_common::RGB2YUV(d_srcRGB, img_width, img_height,
+		apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height,
+		apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height,
+		apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height);
+
+	/**
+	* Forward DCT, quantization and level shift part of the JPEG encoding.
+	* Input is expected in 8x8 macro blocks and output is expected to be in 64x1
+	* macro blocks. The new version of the primitive takes the ROI in image pixel size and
+	* works with DCT coefficients that are in zig-zag order.
+	*/
+	int k = 0;
+	//LOG_INFO("NPP_CHECK_CUDA:%d",3);
+	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0],
+		apdDCT[0], aDCTStep[0],
+		pdQuantizationTables + k * 64,
+		aSrcSize[0],
+		pDCTState)))
+	{
+		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
+		return EXIT_FAILURE;
+	}
+	k = 1;
+
+	//LOG_INFO("NPP_CHECK_CUDA:%d",4);
+	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1],
+		apdDCT[1], aDCTStep[1],
+		pdQuantizationTables + k * 64,
+		aSrcSize[1],
+		pDCTState)))
+	{
+		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
+		return EXIT_FAILURE;
+	}
+
+	//LOG_INFO("NPP_CHECK_CUDA:%d",5);
+	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2],
+		apdDCT[2], aDCTStep[2],
+		pdQuantizationTables + k * 64,
+		aSrcSize[2],
+		pDCTState)))
+	{
+		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
+		return EXIT_FAILURE;
+	}
+
+	// Huffman Encoding
+
+	Npp32s nScanLength;
+	Npp8u *pJpegEncoderTemp;
+
+#if (CUDA_VERSION == 8000)
+	Npp32s nTempSize; //when using CUDA8
+#else
+	size_t nTempSize; //when using CUDA9
+#endif
+					  //modified by Junlin 190221
+
+	//LOG_INFO("NPP_CHECK_CUDA:%d",6);
+	if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize)))
+	{
+		printf("nppiEncodeHuffmanGetSize Failed!\n");
+		return EXIT_FAILURE;
+	}
+
+	//LOG_INFO("NPP_CHECK_CUDA:%d",7);
+	NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize));
+
+	/**
+	* Allocates memory and creates a Huffman table in a format that is suitable for the encoder.
+	*/
+	NppStatus t_status;
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]);
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]);
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]);
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]);
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]);
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]);
+
+	/**
+	* Huffman Encoding of the JPEG Encoding.
+	* Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan.
+	*/
+	Npp32s nSs = 0;
+	Npp32s nSe = 63;
+	Npp32s nH = 0;
+	Npp32s nL = 0;
+	//LOG_INFO("NPP_CHECK_CUDA:%d",8);
+	if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep,
+		0, nSs, nSe, nH, nL,
+		pdScan, &nScanLength,
+		apHuffmanDCTable,
+		apHuffmanACTable,
+		aSrcSize,
+		pJpegEncoderTemp)))
+	{
+		printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n");
+		return EXIT_FAILURE;
+	}
+
+	for (int i = 0; i < 3; ++i)
+	{
+		nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]);
+		nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]);
+	}
+	// Write JPEG
+	pDstJpeg = new unsigned char[4 << 20]{};
+	pDstOutput = pDstJpeg;
+
+	writeMarker(0x0D8, pDstOutput);
+	writeJFIFTag(pDstOutput);
+	writeQuantizationTable(aQuantizationTables[0], pDstOutput);
+	writeQuantizationTable(aQuantizationTables[1], pDstOutput);
+	writeHuffmanTable(pHuffmanDCTables[0], pDstOutput);
+	writeHuffmanTable(pHuffmanACTables[0], pDstOutput);
+	writeHuffmanTable(pHuffmanDCTables[1], pDstOutput);
+	writeHuffmanTable(pHuffmanACTables[1], pDstOutput);
+	writeFrameHeader(oFrameHeader, pDstOutput);
+	writeScanHeader(oScanHeader, pDstOutput);
+
+	//LOG_INFO("NPP_CHECK_CUDA:%d",9);
+	NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost));
+
+	pDstOutput += nScanLength;
+	writeMarker(0x0D9, pDstOutput);
+
+	{
+		// Write result to file.
+		std::ofstream outputFile(szOutputFile, ios::out | ios::binary);
+		outputFile.write(reinterpret_cast<const char *>(pDstJpeg), static_cast<int>(pDstOutput - pDstJpeg));
+	}
+
+	// Cleanup
+	cudaFree(pJpegEncoderTemp);
+	delete[] pDstJpeg;
+	for (int i = 0; i < 3; ++i)
+	{
+		cudaFree(apdDCT[i]);
+		cudaFree(apSrcImage[i]);
+	}
+
+	return EXIT_SUCCESS;
+}
+
+
+int jpegNPP(const char *szOutputFile, unsigned char* d_srcRGB, int img_width, int img_height)
+{
+	NppiSize aSrcSize[3];
+	Npp16s *apdDCT[3] = { 0, 0, 0 };
+	Npp32s aDCTStep[3];
+
+	Npp8u *apSrcImage[3] = { 0, 0, 0 };
+	Npp32s aSrcImageStep[3];
+	size_t aSrcPitch[3];
+
+
+	//????帧头
+	oFrameHeader.nWidth = img_width;
+	oFrameHeader.nHeight = img_height;
+
+	for (int i = 0; i < oFrameHeader.nComponents; ++i)
+	{
+		NppiSize oBlocks;
+		NppiSize oBlocksPerMCU = { oFrameHeader.aSamplingFactors[i] >> 4, oFrameHeader.aSamplingFactors[i] & 0x0f };
+
+		oBlocks.width = (int)ceil((oFrameHeader.nWidth + 7) / 8 *
+			static_cast<float>(oBlocksPerMCU.width) / nMCUBlocksH);
+		oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width;
+
+		oBlocks.height = (int)ceil((oFrameHeader.nHeight + 7) / 8 *
+			static_cast<float>(oBlocksPerMCU.height) / nMCUBlocksV);
+		oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height;
+
+		aSrcSize[i].width = oBlocks.width * 8;
+		aSrcSize[i].height = oBlocks.height * 8;
+
+		// Allocate Memory
+		size_t nPitch;
+		//LOG_INFO("NPP_CHECK_CUDA:%d",1);
+		NPP_CHECK_CUDA(cudaMallocPitch(&apdDCT[i], &nPitch, oBlocks.width * 64 * sizeof(Npp16s), oBlocks.height));
+		aDCTStep[i] = static_cast<Npp32s>(nPitch);
+
+		//LOG_INFO("NPP_CHECK_CUDA:%d",2);
+		NPP_CHECK_CUDA(cudaMallocPitch(&apSrcImage[i], &nPitch, aSrcSize[i].width, aSrcSize[i].height));
+
+		aSrcPitch[i] = nPitch;
+		aSrcImageStep[i] = static_cast<Npp32s>(nPitch);
+	}
+
+	//RGB2YUV
+	cudaError_t cudaStatus;
+	cudaStatus = cuda_common::RGB2YUV(d_srcRGB, img_width, img_height,
+		apSrcImage[0], aSrcPitch[0], aSrcSize[0].width, aSrcSize[0].height,
+		apSrcImage[1], aSrcPitch[1], aSrcSize[1].width, aSrcSize[1].height,
+		apSrcImage[2], aSrcPitch[2], aSrcSize[2].width, aSrcSize[2].height);
+
+	/**
+	* Forward DCT, quantization and level shift part of the JPEG encoding.
+	* Input is expected in 8x8 macro blocks and output is expected to be in 64x1
+	* macro blocks. The new version of the primitive takes the ROI in image pixel size and
+	* works with DCT coefficients that are in zig-zag order.
+	*/
+	int k = 0;
+	//LOG_INFO("NPP_CHECK_CUDA:%d",3);
+	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[0], aSrcImageStep[0],
+		apdDCT[0], aDCTStep[0],
+		pdQuantizationTables + k * 64,
+		aSrcSize[0],
+		pDCTState)))
+	{
+		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
+		return EXIT_FAILURE;
+	}
+	k = 1;
+
+	//LOG_INFO("NPP_CHECK_CUDA:%d",4);
+	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[1], aSrcImageStep[1],
+		apdDCT[1], aDCTStep[1],
+		pdQuantizationTables + k * 64,
+		aSrcSize[1],
+		pDCTState)))
+	{
+		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
+		return EXIT_FAILURE;
+	}
+
+	//LOG_INFO("NPP_CHECK_CUDA:%d",5);
+	if (NPP_SUCCESS != (nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW(apSrcImage[2], aSrcImageStep[2],
+		apdDCT[2], aDCTStep[2],
+		pdQuantizationTables + k * 64,
+		aSrcSize[2],
+		pDCTState)))
+	{
+		printf("nppiDCTQuantFwd8x8LS_JPEG_8u16s_C1R_NEW Failed!\n");
+		return EXIT_FAILURE;
+	}
+
+	// Huffman Encoding
+
+	Npp32s nScanLength;
+	Npp8u *pJpegEncoderTemp;
+
+#if (CUDA_VERSION == 8000)
+	Npp32s nTempSize; //when using CUDA8
+#else
+	size_t nTempSize; //when using CUDA9
+#endif
+					  //modified by Junlin 190221
+
+					  //LOG_INFO("NPP_CHECK_CUDA:%d",6);
+	if (NPP_SUCCESS != (nppiEncodeHuffmanGetSize(aSrcSize[0], 3, &nTempSize)))
+	{
+		printf("nppiEncodeHuffmanGetSize Failed!\n");
+		return EXIT_FAILURE;
+	}
+
+	//LOG_INFO("NPP_CHECK_CUDA:%d",7);
+	NPP_CHECK_CUDA(cudaMalloc(&pJpegEncoderTemp, nTempSize));
+
+	/**
+	* Allocates memory and creates a Huffman table in a format that is suitable for the encoder.
+	*/
+	NppStatus t_status;
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[0].aCodes, nppiDCTable, &apHuffmanDCTable[0]);
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[0].aCodes, nppiACTable, &apHuffmanACTable[0]);
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[1]);
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[1]);
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanDCTables[1].aCodes, nppiDCTable, &apHuffmanDCTable[2]);
+	t_status = nppiEncodeHuffmanSpecInitAlloc_JPEG(pHuffmanACTables[1].aCodes, nppiACTable, &apHuffmanACTable[2]);
+
+	/**
+	* Huffman Encoding of the JPEG Encoding.
+	* Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan.
+	*/
+	Npp32s nSs = 0;
+	Npp32s nSe = 63;
+	Npp32s nH = 0;
+	Npp32s nL = 0;
+	//LOG_INFO("NPP_CHECK_CUDA:%d",8);
+	if (NPP_SUCCESS != (nppiEncodeHuffmanScan_JPEG_8u16s_P3R(apdDCT, aDCTStep,
+		0, nSs, nSe, nH, nL,
+		pdScan, &nScanLength,
+		apHuffmanDCTable,
+		apHuffmanACTable,
+		aSrcSize,
+		pJpegEncoderTemp)))
+	{
+		printf("nppiEncodeHuffmanScan_JPEG_8u16s_P3R Failed!\n");
+		return EXIT_FAILURE;
+	}
+
+	for (int i = 0; i < 3; ++i)
+	{
+		nppiEncodeHuffmanSpecFree_JPEG(apHuffmanDCTable[i]);
+		nppiEncodeHuffmanSpecFree_JPEG(apHuffmanACTable[i]);
+	}
+	// Write JPEG
+	pDstJpeg = new unsigned char[4 << 20]{};
+	pDstOutput = pDstJpeg;
+
+	writeMarker(0x0D8, pDstOutput);
+	writeJFIFTag(pDstOutput);
+	writeQuantizationTable(aQuantizationTables[0], pDstOutput);
+	writeQuantizationTable(aQuantizationTables[1], pDstOutput);
+	writeHuffmanTable(pHuffmanDCTables[0], pDstOutput);
+	writeHuffmanTable(pHuffmanACTables[0], pDstOutput);
+	writeHuffmanTable(pHuffmanDCTables[1], pDstOutput);
+	writeHuffmanTable(pHuffmanACTables[1], pDstOutput);
+	writeFrameHeader(oFrameHeader, pDstOutput);
+	writeScanHeader(oScanHeader, pDstOutput);
+
+	//LOG_INFO("NPP_CHECK_CUDA:%d",9);
+	NPP_CHECK_CUDA(cudaMemcpy(pDstOutput, pdScan, nScanLength, cudaMemcpyDeviceToHost));
+
+	pDstOutput += nScanLength;
+	writeMarker(0x0D9, pDstOutput);
+
+	{
+		// Write result to file.
+		std::ofstream outputFile(szOutputFile, ios::out | ios::binary);
+		outputFile.write(reinterpret_cast<const char *>(pDstJpeg), static_cast<int>(pDstOutput - pDstJpeg));
+	}
+
+	// Cleanup
+	cudaFree(pJpegEncoderTemp);
+	delete[] pDstJpeg;
+	for (int i = 0; i < 3; ++i)
+	{
+		cudaFree(apdDCT[i]);
+		cudaFree(apSrcImage[i]);
+	}
+
+	return EXIT_SUCCESS;
+}
diff --git a/src/utiltools.hpp b/src/utiltools.hpp
deleted file mode 100644
index 8caff91..0000000
--- a/src/utiltools.hpp
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef _UTIL_TOOLS_HPP_
-#define _UTIL_TOOLS_HPP_
-
-#include<chrono>
-
-using namespace std;
-
-namespace UtilTools{
-
-    static long get_cur_time_ms() {
-        chrono::time_point<chrono::system_clock, chrono::milliseconds> tpMicro
-            = chrono::time_point_cast<chrono::milliseconds>(chrono::system_clock::now());
-        return tpMicro.time_since_epoch().count();
-    }
-
-}
-
-#endif
\ No newline at end of file
--
libgit2 0.21.4