diff --git a/FFNvDecoder/FFNvDecoder.vcxproj b/FFNvDecoder/FFNvDecoder.vcxproj
index c5832ac..cc58141 100644
--- a/FFNvDecoder/FFNvDecoder.vcxproj
+++ b/FFNvDecoder/FFNvDecoder.vcxproj
@@ -48,13 +48,13 @@
Disabled
WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
/utf-8
- ..\3rdparty\ffmpeg-5.0.1-win64-dev\include;./;./common/inc;./common/UtilNPP;%(AdditionalIncludeDirectories)
+ ..\3rdparty\ffmpeg-5.0.1-win64-dev\include;./;./common/inc;./common/UtilNPP;D:\win_dev\opencv\build\include;%(AdditionalIncludeDirectories)
true
Console
- avcodec.lib;avdevice.lib;avfilter.lib;avformat.lib;avutil.lib;postproc.lib;swresample.lib;swscale.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;nvjpeg.lib;%(AdditionalDependencies)
- ..\3rdparty\ffmpeg-5.0.1-win64-dev\lib;%(AdditionalLibraryDirectories)
+ avcodec.lib;avdevice.lib;avfilter.lib;avformat.lib;avutil.lib;postproc.lib;swresample.lib;swscale.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;nvjpeg.lib;opencv_world455d.lib;freeglut.lib;glew64.lib;%(AdditionalDependencies)
+ ..\3rdparty\ffmpeg-5.0.1-win64-dev\lib;D:\win_dev\opencv\build\x64\vc14\lib;../3rdparty/gl;%(AdditionalLibraryDirectories)
64
@@ -80,7 +80,6 @@
-
@@ -94,7 +93,6 @@
-
diff --git a/FFNvDecoder/FFNvDecoder.vcxproj.filters b/FFNvDecoder/FFNvDecoder.vcxproj.filters
index ef6ce5e..8f99e54 100644
--- a/FFNvDecoder/FFNvDecoder.vcxproj.filters
+++ b/FFNvDecoder/FFNvDecoder.vcxproj.filters
@@ -1,9 +1,6 @@
-
- cu_src
-
cu_src
@@ -58,9 +55,6 @@
include
-
- cu_src
-
cu_src
diff --git a/FFNvDecoder/NV12ToRGB.cu b/FFNvDecoder/NV12ToRGB.cu
index 77ac403..0ec5f00 100644
--- a/FFNvDecoder/NV12ToRGB.cu
+++ b/FFNvDecoder/NV12ToRGB.cu
@@ -52,9 +52,9 @@ namespace cuda_common
return x;
}
}
-
// CUDA kernel for outputing the final RGB output from NV12;
- __global__ void NV12ToRGB_drvapi2(uint32 *srcImage, size_t nSourcePitch, unsigned char *dstImage, int width, int height)
+ extern "C"
+ __global__ void NV12ToRGB_drvapi2(uint32 *srcImage, size_t nSourcePitch, unsigned char *dstImage, int width, int height)
{
int32 x, y;
@@ -169,8 +169,9 @@ namespace cuda_common
}
- // CUDA kernel for outputing the final RGB output from NV12;
- __global__ void CUDAToBGR_drvapi(uint32 *dataY, uint32 *dataUV, size_t pitchY, size_t pitchUV, unsigned char *dstImage, int width, int height)
+ // CUDA kernel for outputing the final RGB output from NV12;
+ extern "C"
+ __global__ void CUDAToBGR_drvapi(uint32 *dataY, uint32 *dataUV, size_t pitchY, size_t pitchUV, unsigned char *dstImage, int width, int height)
{
int32 x, y;
@@ -307,7 +308,7 @@ namespace cuda_common
{
dim3 block(32, 16, 1);
dim3 grid((width + (2 * block.x - 1)) / (2 * block.x), (height + (block.y - 1)) / block.y, 1);
- NV12ToRGB_drvapi2 <<< grid, block >>>((uint32 *)d_srcNV12, nSourcePitch, d_dstRGB, width, height);
+ NV12ToRGB_drvapi2 << < grid, block >> >((uint32 *)d_srcNV12, nSourcePitch, d_dstRGB, width, height);
cudaError_t cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "NV12ToRGB_drvapi launch failed: %s\n", cudaGetErrorString(cudaStatus));
@@ -327,7 +328,7 @@ namespace cuda_common
{
dim3 block(32, 16, 1);
dim3 grid((width + (2 * block.x - 1)) / (2 * block.x), (height + (block.y - 1)) / block.y, 1);
- CUDAToBGR_drvapi <<< grid, block >>>((uint32 *)dataY, (uint32 *)dataUV, pitchY, pitchUV, d_dstRGB, width, height);
+ CUDAToBGR_drvapi << < grid, block >> >((uint32 *)dataY, (uint32 *)dataUV, pitchY, pitchUV, d_dstRGB, width, height);
cudaError_t cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "NV12ToRGB_drvapi launch failed: %s\n", cudaGetErrorString(cudaStatus));
diff --git a/FFNvDecoder/cudaHeader.cu b/FFNvDecoder/cudaHeader.cu
deleted file mode 100644
index 894f47a..0000000
--- a/FFNvDecoder/cudaHeader.cu
+++ /dev/null
@@ -1,30 +0,0 @@
-#include"cudaHeader.h"
-#include
-
-#include "cuda_runtime.h"
-#include
-
-//核函数,计算a+b
-__global__ void add(int a,int b,int *c)
-{
- //保存a+b的计算结果
- *c=a+b;
-}
-
-
-//cuda测试函数的实现
-void cudaTest()
-{
- int c = 0;
- //在gpu上开辟一个相同的内存
- int *deviceC;
- cudaMalloc((void**)&deviceC,sizeof(int));
- //调用核函数
- add<<<1,1>>>(3,7,deviceC);
- //把计算结果复制到cpu上
- cudaMemcpy(&c,deviceC,sizeof(int),cudaMemcpyDeviceToHost);
- //展示计算结果
- std::cout< l(m_mutex_show);
+
+ unsigned char *pHwData = nullptr;
+ cudaError_t cudaStatus = cudaMalloc((void **)&pHwData, 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char));
+
+ cuda_common::setColorSpace(ITU709, 0);
+ cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0], (CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwData, gpuFrame->width, gpuFrame->height);
+ cudaDeviceSynchronize();
+ if (cudaStatus != cudaSuccess) {
+ cout << "CUDAToBGR failed !!!" << endl;
+ return;
+ }
+
+
+
+ unsigned char * pHwRgb = pHwData;
+ int channel = 3;
+ int width = gpuFrame->width;
+ int height = gpuFrame->height;
+
+ if (pHwRgb != nullptr && channel > 0 && width > 0 && height > 0) {
+ int nSize = channel * height * width;
+ unsigned char* cpu_data = new unsigned char[nSize];
+
+ cudaMemcpy(cpu_data, pHwRgb, nSize * sizeof(unsigned char), cudaMemcpyDeviceToHost);
+ cudaDeviceSynchronize();
+
+ cv::Mat img_(height, width, CV_8UC3, cpu_data);
+ bool bWrite = cv::imwrite("dec0.jpg", img_);
+
+ imshow("show", img_);
+ waitKey(0);
+
+ delete[] cpu_data;
+ cpu_data = nullptr;
+
+ }
+
+ cudaFree(pHwData);
+ pHwData = nullptr;
+}
+
/**
* 注意: gpuFrame 在解码器设置的显卡上,后续操作要十分注意这一点,尤其是多线程情况
* */
@@ -64,6 +109,7 @@ void postDecoded(const void * userPtr, AVFrame * gpuFrame){
cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str()));
saveFrame(gpuFrame, decoder->getName());
+ showFrame(gpuFrame);
}
}
}
@@ -82,7 +128,7 @@ static long long get_cur_time(){
return tpMicro.time_since_epoch().count();
}
-static int sum = 0;
+static int suming = 0;
unsigned char *pHwData = nullptr;
void postDecoded0(const void * userPtr, AVFrame * gpuFrame){
@@ -101,7 +147,7 @@ void postDecoded0(const void * userPtr, AVFrame * gpuFrame){
end_time = start_time = get_cur_time();
}
count_num++;
- sum ++ ;
+ suming ++ ;
if (count_num >= count_std)
{
// end_time = get_cur_time();
@@ -112,7 +158,7 @@ void postDecoded0(const void * userPtr, AVFrame * gpuFrame){
count_flag = false;
}
- cout << "帧数:" << sum << endl;
+ cout << "帧数:" << suming << endl;
}
}
}
@@ -163,8 +209,6 @@ void logFF(void *, int level, const char *fmt, va_list ap)
int main(int argc, char* argv[]) {
- cudaTest();
-
printf("start \n");
if (argc != 3) {
fprintf(stderr, "./xxx uri gpu_id\n");