diff --git a/FFNvDecoder/FFNvDecoder.vcxproj b/FFNvDecoder/FFNvDecoder.vcxproj index c5832ac..cc58141 100644 --- a/FFNvDecoder/FFNvDecoder.vcxproj +++ b/FFNvDecoder/FFNvDecoder.vcxproj @@ -48,13 +48,13 @@ Disabled WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) /utf-8 - ..\3rdparty\ffmpeg-5.0.1-win64-dev\include;./;./common/inc;./common/UtilNPP;%(AdditionalIncludeDirectories) + ..\3rdparty\ffmpeg-5.0.1-win64-dev\include;./;./common/inc;./common/UtilNPP;D:\win_dev\opencv\build\include;%(AdditionalIncludeDirectories) true Console - avcodec.lib;avdevice.lib;avfilter.lib;avformat.lib;avutil.lib;postproc.lib;swresample.lib;swscale.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;nvjpeg.lib;%(AdditionalDependencies) - ..\3rdparty\ffmpeg-5.0.1-win64-dev\lib;%(AdditionalLibraryDirectories) + avcodec.lib;avdevice.lib;avfilter.lib;avformat.lib;avutil.lib;postproc.lib;swresample.lib;swscale.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;nvjpeg.lib;opencv_world455d.lib;freeglut.lib;glew64.lib;%(AdditionalDependencies) + ..\3rdparty\ffmpeg-5.0.1-win64-dev\lib;D:\win_dev\opencv\build\x64\vc14\lib;../3rdparty/gl;%(AdditionalLibraryDirectories) 64 @@ -80,7 +80,6 @@ - @@ -94,7 +93,6 @@ - diff --git a/FFNvDecoder/FFNvDecoder.vcxproj.filters b/FFNvDecoder/FFNvDecoder.vcxproj.filters index ef6ce5e..8f99e54 100644 --- a/FFNvDecoder/FFNvDecoder.vcxproj.filters +++ b/FFNvDecoder/FFNvDecoder.vcxproj.filters @@ -1,9 +1,6 @@  - - cu_src - cu_src @@ -58,9 +55,6 @@ include - - cu_src - cu_src diff --git a/FFNvDecoder/NV12ToRGB.cu b/FFNvDecoder/NV12ToRGB.cu index 77ac403..0ec5f00 100644 --- a/FFNvDecoder/NV12ToRGB.cu +++ b/FFNvDecoder/NV12ToRGB.cu @@ -52,9 +52,9 @@ namespace cuda_common return x; } } - // CUDA kernel for outputing the final RGB output from NV12; - __global__ void NV12ToRGB_drvapi2(uint32 *srcImage, size_t nSourcePitch, unsigned char *dstImage, int width, int height) + extern "C" + __global__ void NV12ToRGB_drvapi2(uint32 *srcImage, size_t nSourcePitch, unsigned char *dstImage, int width, int height) { int32 x, y; @@ -169,8 +169,9 @@ namespace cuda_common } - // CUDA kernel for outputing the final RGB output from NV12; - __global__ void CUDAToBGR_drvapi(uint32 *dataY, uint32 *dataUV, size_t pitchY, size_t pitchUV, unsigned char *dstImage, int width, int height) + // CUDA kernel for outputing the final RGB output from NV12; + extern "C" + __global__ void CUDAToBGR_drvapi(uint32 *dataY, uint32 *dataUV, size_t pitchY, size_t pitchUV, unsigned char *dstImage, int width, int height) { int32 x, y; @@ -307,7 +308,7 @@ namespace cuda_common { dim3 block(32, 16, 1); dim3 grid((width + (2 * block.x - 1)) / (2 * block.x), (height + (block.y - 1)) / block.y, 1); - NV12ToRGB_drvapi2 <<< grid, block >>>((uint32 *)d_srcNV12, nSourcePitch, d_dstRGB, width, height); + NV12ToRGB_drvapi2 << < grid, block >> >((uint32 *)d_srcNV12, nSourcePitch, d_dstRGB, width, height); cudaError_t cudaStatus = cudaGetLastError(); if (cudaStatus != cudaSuccess) { fprintf(stderr, "NV12ToRGB_drvapi launch failed: %s\n", cudaGetErrorString(cudaStatus)); @@ -327,7 +328,7 @@ namespace cuda_common { dim3 block(32, 16, 1); dim3 grid((width + (2 * block.x - 1)) / (2 * block.x), (height + (block.y - 1)) / block.y, 1); - CUDAToBGR_drvapi <<< grid, block >>>((uint32 *)dataY, (uint32 *)dataUV, pitchY, pitchUV, d_dstRGB, width, height); + CUDAToBGR_drvapi << < grid, block >> >((uint32 *)dataY, (uint32 *)dataUV, pitchY, pitchUV, d_dstRGB, width, height); cudaError_t cudaStatus = cudaGetLastError(); if (cudaStatus != cudaSuccess) { fprintf(stderr, "NV12ToRGB_drvapi launch failed: %s\n", cudaGetErrorString(cudaStatus)); diff --git a/FFNvDecoder/cudaHeader.cu b/FFNvDecoder/cudaHeader.cu deleted file mode 100644 index 894f47a..0000000 --- a/FFNvDecoder/cudaHeader.cu +++ /dev/null @@ -1,30 +0,0 @@ -#include"cudaHeader.h" -#include - -#include "cuda_runtime.h" -#include - -//核函数,计算a+b -__global__ void add(int a,int b,int *c) -{ - //保存a+b的计算结果 - *c=a+b; -} - - -//cuda测试函数的实现 -void cudaTest() -{ - int c = 0; - //在gpu上开辟一个相同的内存 - int *deviceC; - cudaMalloc((void**)&deviceC,sizeof(int)); - //调用核函数 - add<<<1,1>>>(3,7,deviceC); - //把计算结果复制到cpu上 - cudaMemcpy(&c,deviceC,sizeof(int),cudaMemcpyDeviceToHost); - //展示计算结果 - std::cout< l(m_mutex_show); + + unsigned char *pHwData = nullptr; + cudaError_t cudaStatus = cudaMalloc((void **)&pHwData, 3 * gpuFrame->width * gpuFrame->height * sizeof(unsigned char)); + + cuda_common::setColorSpace(ITU709, 0); + cudaStatus = cuda_common::CUDAToBGR((CUdeviceptr)gpuFrame->data[0], (CUdeviceptr)gpuFrame->data[1], gpuFrame->linesize[0], gpuFrame->linesize[1], pHwData, gpuFrame->width, gpuFrame->height); + cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + cout << "CUDAToBGR failed !!!" << endl; + return; + } + + + + unsigned char * pHwRgb = pHwData; + int channel = 3; + int width = gpuFrame->width; + int height = gpuFrame->height; + + if (pHwRgb != nullptr && channel > 0 && width > 0 && height > 0) { + int nSize = channel * height * width; + unsigned char* cpu_data = new unsigned char[nSize]; + + cudaMemcpy(cpu_data, pHwRgb, nSize * sizeof(unsigned char), cudaMemcpyDeviceToHost); + cudaDeviceSynchronize(); + + cv::Mat img_(height, width, CV_8UC3, cpu_data); + bool bWrite = cv::imwrite("dec0.jpg", img_); + + imshow("show", img_); + waitKey(0); + + delete[] cpu_data; + cpu_data = nullptr; + + } + + cudaFree(pHwData); + pHwData = nullptr; +} + /** * 注意: gpuFrame 在解码器设置的显卡上,后续操作要十分注意这一点,尤其是多线程情况 * */ @@ -64,6 +109,7 @@ void postDecoded(const void * userPtr, AVFrame * gpuFrame){ cudaSetDevice(atoi(decoder->m_cfg.gpuid.c_str())); saveFrame(gpuFrame, decoder->getName()); + showFrame(gpuFrame); } } } @@ -82,7 +128,7 @@ static long long get_cur_time(){ return tpMicro.time_since_epoch().count(); } -static int sum = 0; +static int suming = 0; unsigned char *pHwData = nullptr; void postDecoded0(const void * userPtr, AVFrame * gpuFrame){ @@ -101,7 +147,7 @@ void postDecoded0(const void * userPtr, AVFrame * gpuFrame){ end_time = start_time = get_cur_time(); } count_num++; - sum ++ ; + suming ++ ; if (count_num >= count_std) { // end_time = get_cur_time(); @@ -112,7 +158,7 @@ void postDecoded0(const void * userPtr, AVFrame * gpuFrame){ count_flag = false; } - cout << "帧数:" << sum << endl; + cout << "帧数:" << suming << endl; } } } @@ -163,8 +209,6 @@ void logFF(void *, int level, const char *fmt, va_list ap) int main(int argc, char* argv[]) { - cudaTest(); - printf("start \n"); if (argc != 3) { fprintf(stderr, "./xxx uri gpu_id\n");