#include "DxDecoder.h" #include "H264ParseSPS.h" #include "H265ParseSPS.h" //#include "DxCDecoder.h" int DxDecoderInterface::InitDecoderModule() { DxDecoder::InitDecoder(); return 0; } void DxDecoderInterface::UninitDecoderModule() { DxDecoder::UninitDecoder(); return; } DxDecoderInterface * DxDecoderInterface::AllocDecoder( const DxDecoderConfig * cfg ) { DxDecoderInterface * dec = NULL; static BOOL bInit = FALSE; if ( bInit ) { bInit = TRUE; } if ( NULL == cfg ) { return dec; } if ( DX_DECODER_TYPE_CUDA == cfg->cfg.type ) { dec = new DxDecoder( cfg ); } else if ( DX_DECODER_TYPE_CPU == cfg->cfg.type ) { //dec = new DxCDecoder( cfg ); //2019-01-30 } return dec; } void DxDecoderInterface::FreeDecoder( DxDecoderInterface * dec ) { if ( dec ) { delete dec; dec = NULL; } return; } /************************************************** * 接口:IsSupport * 功能:判断输入源该解码库是否支持 * 参数:const char * uri 待判断的资源路径 * 返回:解码库支持该文件则返回 真, 否则返回 假 * 备注:调用本接口之前可无须调用CloseDecoder **************************************************/ int DxDecoderInterface::IsSupport( const char * uri ) { int ret = 0; int spt = -1; AVStream * vStream = NULL; AVDictionary * dic = NULL; AVFormatContext * fmtCxt = NULL; cudaVideoCodec codecid = CUDA_UNKNOW_CODEC; ret = av_dict_set( &dic, "bufsize", "655360", 0 ); av_dict_set( &dic, "rtsp_transport", "tcp", 0 ); av_dict_set( &dic, "stimeout", "2000000", 0 ); av_register_all(); avformat_network_init(); fmtCxt = avformat_alloc_context(); if ( 0 != avformat_open_input( &fmtCxt, uri, NULL, &dic ) ) { spt = -1; goto _EXIT; } if ( avformat_find_stream_info( fmtCxt, NULL ) < 0 ) { spt = -2; goto _EXIT; } for ( unsigned int i = 0; i< fmtCxt->nb_streams; i++ ) { if (fmtCxt->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO) { vStream = fmtCxt->streams[i]; break; } } if ( NULL == vStream ) { spt = -3; goto _EXIT; } if ( vStream->codec->coded_height <= 0 || vStream->codec->coded_width <= 0 ) { spt = -4; goto _EXIT; } codecid = getCodecId( vStream->codec->codec_id ); // printf("codec type: %d %d\n", vStream->codec->codec_id, codecid); if ( CUDA_UNKNOW_CODEC == codecid ) { spt = 0 == vStream->codec->codec_id ? -5 : vStream->codec->codec_id; goto _EXIT; } else if (cudaVideoCodec_HEVC == codecid) { spt = 0 == vStream->codec->codec_id ? -5 : vStream->codec->codec_id; goto _EXIT; } /*if (cudaVideoCodec_MPEG1 == m_cudaCxt.fmt.codec || cudaVideoCodec_MPEG2 == m_cudaCxt.fmt.codec || cudaVideoCodec_MPEG4 == m_cudaCxt.fmt.codec || cudaVideoCodec_VC1 == m_cudaCxt.fmt.codec || cudaVideoCodec_H264 == m_cudaCxt.fmt.codec || cudaVideoCodec_JPEG == m_cudaCxt.fmt.codec || cudaVideoCodec_YUV420 == m_cudaCxt.fmt.codec || cudaVideoCodec_YV12 == m_cudaCxt.fmt.codec || cudaVideoCodec_NV12 == m_cudaCxt.fmt.codec || cudaVideoCodec_YUYV == m_cudaCxt.fmt.codec || cudaVideoCodec_UYVY == m_cudaCxt.fmt.codec)*/ //pThis->InitCUDA(); spt = 0; _EXIT: if ( fmtCxt ) { avformat_close_input( &fmtCxt ); fmtCxt = NULL; } avformat_network_deinit(); return spt; } DxDecoder::DxDecoder( const DxDecoderConfig * cfg ) { m_bRun = FALSE; m_parseFrame = 0; m_decodeFrame = 0; m_hThread = INVALID_HANDLE_VALUE; memset( &m_cfg, 0, sizeof( DxDecoderConfig ) ); memset( &m_cudaCxt, 0, sizeof( DxCUDAContext ) ); memcpy( &m_cfg, cfg, sizeof( DxDecoderConfig ) ); InitializeCriticalSection( &m_criTms ); return; } DxDecoder::~DxDecoder() { CloseDecoder(); DeleteCriticalSection( &m_criTms ); return; } int DxDecoder::OpenDecoder( const char * uri ) { DWORD dwRet = 0; if ( strlen( uri ) + sizeof( char ) > sizeof( m_uri ) ) { return -1; } memcpy( m_uri, uri, strlen( uri ) + sizeof( char ) ); while ( !m_tms.empty() ) { m_tms.pop(); } av_register_all(); //avcodec_register_all(); m_bRun = TRUE; m_hThread = CreateThread( NULL, NULL, StreamParseThread, this, NULL, &dwRet ); if ( INVALID_HANDLE_VALUE == m_hThread ) { return -1; } return 0; } int DxDecoder::CloseDecoder() { if ( m_cudaCxt.frames ) { m_cudaCxt.frames->endDecode(); } m_bRun = FALSE; if ( INVALID_HANDLE_VALUE != m_hThread ) { WaitForSingleObject( m_hThread, INFINITE ); m_hThread = INVALID_HANDLE_VALUE; } return 0; } int DxDecoder::CheckCUDAProperty( int devId ) { CUdevice dev = 0; size_t memSize = 0; char devName[256] = {0}; int major = 0, minor = 0; CUresult rlt = CUDA_SUCCESS; dev = devId; rlt = cuDeviceComputeCapability( &major, &minor, dev ); checkCudaErrors( rlt ); rlt = cuDeviceGetName( devName, sizeof( devName ), dev ); checkCudaErrors( rlt ); printf( "Using GPU Device %d: %s has SM %d.%d compute capability\n", dev, devName, major, minor ); rlt = cuDeviceTotalMem( &memSize, dev ); checkCudaErrors( rlt ); printf( "Total amount of global memory: %4.4f MB\n", (float)memSize / ( 1024 * 1024 ) ); return 0; } #define DX_MEM_DEBUG #ifdef DX_WINDOWS DWORD DxDecoder::StreamParseThread(void * params) { #endif #ifdef DX_LINUX void * DxDecoder::StreamParseThread(void * params) { #endif int ret = 0; DWORD dwRet = 0; AVPacket pktd = { 0 }; AVPacket * pkt = NULL; DxDecoder * pThis = NULL; AVStream * vStream = NULL; AVDictionary * dic = NULL; unsigned int frameNum = 0; AVFormatContext * fmtCxt = NULL; CUVIDSOURCEDATAPACKET cudaPkt = { 0 }; HANDLE hThread = INVALID_HANDLE_VALUE; AVBitStreamFilterContext* h264bsfc = NULL; sps_info_struct h264_info = {}; vc_params_t h265_info = {}; pThis = ( DxDecoder * )params; ret = av_dict_set( &dic, "bufsize", "655360", 0 ); av_dict_set( &dic, "rtsp_transport", pThis->m_cfg.cfg.forceTcp ? "tcp" : "udp", 0 ); av_dict_set( &dic, "stimeout", "2000000", 0 ); avformat_network_init(); fmtCxt = avformat_alloc_context(); if ( 0 != avformat_open_input( &fmtCxt, pThis->m_uri, NULL, &dic ) ) { dwRet = -1; goto _EXIT; } fmtCxt->probesize = 4000000 / 8 * 5; //后加 if ( avformat_find_stream_info( fmtCxt, NULL ) < 0 ) { dwRet = -1; goto _EXIT; } /******************************后加**********************/ { int i = av_find_best_stream(fmtCxt, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0); if (i >= 0) { vStream = fmtCxt->streams[i]; } } /****************************************************/ //for ( unsigned int i = 0; i< fmtCxt->nb_streams; i++ ) //{ // if (fmtCxt->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO) // { // vStream = fmtCxt->streams[i]; // break; // } //} if ( NULL == vStream ) { dwRet = -1; goto _EXIT; } switch ( vStream->codec->field_order ) { case AV_FIELD_PROGRESSIVE: case AV_FIELD_UNKNOWN: pThis->m_cudaCxt.fmt.progressive_sequence = true; break; default: pThis->m_cudaCxt.fmt.progressive_sequence = false; break; } if (AV_CODEC_ID_H264 == vStream->codec->codec_id) { int startCodeSpsIndex = 0; int spsLength = 0; //unsigned char * extradata = new unsigned char[64]{ 0x1,0x64,0x0,0x20,0xff,0xe1,0x0,0x18,0x67,0x64,0x0,0x20,0xac,0xb2,0x0,0xa0,0xb,0x76,0x2,0x20,0x0,0x0,0x3,0x0,0x20,0x0,0x0,0xc,0x81,0xe3,0x6,0x49,0x1,0x0,0x6,0x68,0xeb,0xc3,0xcb,0x22,0xc0 }; unsigned char * extradata = vStream->codec->extradata; for (int i = 0; i < vStream->codec->extradata_size; ++i) { if (extradata[i] == 0x67 && extradata[i - 2] == 0) { startCodeSpsIndex = i; spsLength = extradata[i - 1]; break; } } extradata += startCodeSpsIndex; if(spsLength > 1) h264_parse_sps(extradata, spsLength, &h264_info); } else if (AV_CODEC_ID_HEVC == vStream->codec->codec_id) { int startCodeSpsIndex = 0; int spsLength = 0; //unsigned char * extradata = new unsigned char[64]{ 0x1,0x64,0x0,0x20,0xff,0xe1,0x0,0x18,0x67,0x64,0x0,0x20,0xac,0xb2,0x0,0xa0,0xb,0x76,0x2,0x20,0x0,0x0,0x3,0x0,0x20,0x0,0x0,0xc,0x81,0xe3,0x6,0x49,0x1,0x0,0x6,0x68,0xeb,0xc3,0xcb,0x22,0xc0 }; unsigned char * extradata = vStream->codec->extradata; for (int i = 0; i < vStream->codec->extradata_size; ++i) { if (extradata[i] == 0x21 && extradata[i + 2] == 1 && extradata[i + 4] != 0) { startCodeSpsIndex = i+5; spsLength = extradata[i + 4]; break; } } extradata += startCodeSpsIndex; if (spsLength > 1) { ParseSequenceParameterSet((int8 *)extradata, spsLength, h265_info); } } vStream->codec->thread_safe_callbacks = 1; pThis->m_cudaCxt.width = vStream->codec->width; pThis->m_cudaCxt.height = vStream->codec->height; // printf("width=%d height=%d\n", pThis->m_cudaCxt.width, pThis->m_cudaCxt.height); if (vStream->codec->coded_width > 0) { pThis->m_cudaCxt.fmt.coded_width = vStream->codec->coded_width; pThis->m_cudaCxt.fmt.coded_height = vStream->codec->coded_height; } else if (h265_info.width > 0 || h264_info.width > 0) { if (AV_CODEC_ID_H264 == vStream->codec->codec_id) { pThis->m_cudaCxt.fmt.coded_width = h264_info.width; pThis->m_cudaCxt.fmt.coded_height = h264_info.height; } else if (AV_CODEC_ID_HEVC == vStream->codec->codec_id) { pThis->m_cudaCxt.fmt.coded_width = h265_info.width; pThis->m_cudaCxt.fmt.coded_height = h265_info.height; } } else { pThis->m_cudaCxt.fmt.coded_width = 1920; pThis->m_cudaCxt.fmt.coded_height = 1088; } pThis->m_cudaCxt.fmt.display_area.left = 0; pThis->m_cudaCxt.fmt.display_area.top = 0; pThis->m_cudaCxt.fmt.display_area.right = pThis->m_cudaCxt.width; pThis->m_cudaCxt.fmt.display_area.bottom = pThis->m_cudaCxt.height; pThis->m_cudaCxt.fmt.chroma_format = getColorFmt(vStream->codec->sw_pix_fmt); pThis->m_cudaCxt.fmt.codec = getCodecId(vStream->codec->codec_id); if (CUDA_UNKNOW_CODEC == pThis->m_cudaCxt.fmt.codec) { dwRet = -1; goto _EXIT; } printf("InitCUDA()\n"); ret = pThis->InitCUDA(); if (-1 == ret) { dwRet = -1; goto _EXIT; } hThread = CreateThread( NULL, NULL, CUDAWorkThread, pThis, NULL, &dwRet ); if ( INVALID_HANDLE_VALUE == hThread ) { dwRet = -1; goto _EXIT; } pThis->m_cudaCxt.cfg.width = pThis->m_cudaCxt.width; pThis->m_cudaCxt.cfg.height = pThis->m_cudaCxt.height; if ( pThis->m_cfg.escbk ) { pThis->m_cfg.escbk( pThis->m_cfg.userPtr, &pThis->m_cudaCxt.cfg, 0, 0 ); } if ( AV_CODEC_ID_H264 == vStream->codec->codec_id ) { h264bsfc = av_bitstream_filter_init("h264_mp4toannexb"); } else if ( AV_CODEC_ID_HEVC == vStream->codec->codec_id ) { h264bsfc = av_bitstream_filter_init("hevc_mp4toannexb"); } else if ( AV_CODEC_ID_MPEG4 == vStream->codec->codec_id ) { if ( NULL != vStream->codec->extradata && vStream->codec->extradata_size > 0 ) { cudaPkt.flags = CUVID_PKT_TIMESTAMP; cudaPkt.timestamp = 0; cudaPkt.payload_size = ( unsigned long )vStream->codec->extradata_size; cudaPkt.payload = ( const unsigned char * )vStream->codec->extradata; pThis->m_cudaCxt.parser->ParseVideoData( &cudaPkt ); } } pkt = av_packet_alloc(); av_init_packet( pkt ); while ( pThis->m_bRun ) { Sleep(1); int ret = 0; if (av_read_frame( fmtCxt, pkt ) < 0 ) { // printf("\nBREAK BREAK BREAK BREAK: %d\n", ret); break; } if ( pkt->stream_index != vStream->index ) { goto _CONTINUE; } if ( h264bsfc ) { ret = av_bitstream_filter_filter( h264bsfc, vStream->codec, NULL, &pktd.data, &pktd.size, pkt->data, pkt->size, pkt->flags & AV_PKT_FLAG_KEY ); if ( ret < 0 ) { goto _CONTINUE; } } else { pktd.data = pkt->data; pktd.size = pkt->size; } frameNum++; //printf( "%d->%d\n", frameNum, pkt.size ); if ( pkt->pts != AV_NOPTS_VALUE ) { cudaPkt.flags = CUVID_PKT_TIMESTAMP; if ( vStream->codec->pkt_timebase.num && vStream->codec->pkt_timebase.den) { AVRational tb; tb.num = 1; tb.den = AV_TIME_BASE; cudaPkt.timestamp = av_rescale_q(pkt->pts, vStream->codec->pkt_timebase, tb); } else { cudaPkt.timestamp = pkt->pts; } if ( cudaPkt.timestamp > 0 ) { cudaPkt.timestamp = cudaPkt.timestamp / 1000; } } EnterCriticalSection(&pThis->m_criTms); pThis->m_tms.push(cudaPkt.timestamp); LeaveCriticalSection(&pThis->m_criTms); cudaPkt.payload_size = ( unsigned long )pktd.size; cudaPkt.payload = ( const unsigned char * )pktd.data; //printf( "frames:%d %d %d\n", cudaPkt.flags, cudaPkt.payload_size, cudaPkt.timestamp ); //Sleep( 35 ); pThis->m_cudaCxt.parser->ParseVideoData( &cudaPkt ); pThis->m_parseFrame++; if ( h264bsfc ) { av_free( pktd.data ); pktd.data = NULL; } _CONTINUE: av_packet_unref( pkt ); } _EXIT: if ( NULL != pkt ) { av_packet_free( &pkt ); pkt = NULL; } pThis->m_bRun = FALSE; if ( INVALID_HANDLE_VALUE != hThread ) { WaitForSingleObject( hThread, INFINITE ); hThread = INVALID_HANDLE_VALUE; } //cudaPkt.payload = NULL; //cudaPkt.payload_size = 0; //cudaPkt.flags = CUVID_PKT_ENDOFSTREAM; //pThis->m_cudaCxt.parser->ParseVideoData( &cudaPkt ); // 2018-09-25 //_EXIT: pThis->UninitCUDA(); if ( NULL != h264bsfc ) { av_bitstream_filter_close( h264bsfc ); h264bsfc = NULL; } if ( dic ) { av_dict_free( &dic ); dic = NULL; } if ( fmtCxt ) { avformat_close_input( &fmtCxt ); fmtCxt = NULL; } avformat_network_deinit(); if ( pThis->m_cfg.logcbk ) { pThis->m_cfg.logcbk( pThis->m_cfg.userPtr, DX_LOG_LEVEL_CLEANUP, "DxDecoder[CUDA] module exited..", 0 ); } return 0; } #define DX_Y /* DWORD DxDecoder::StreamParseThread( void * params ) { int ret = 0; DWORD dwRet = 0; AVPacket pkt = {0}; AVPacket pktd = {0}; DxDecoder * pThis = NULL; AVStream * vStream = NULL; AVDictionary * dic = NULL; unsigned int frameNum = 0; AVFormatContext * fmtCxt = NULL; CUVIDSOURCEDATAPACKET cudaPkt = {0}; HANDLE hThread = INVALID_HANDLE_VALUE; AVBitStreamFilterContext* h264bsfc = NULL; pThis = ( DxDecoder * )params; av_register_all(); avcodec_register_all(); avformat_network_init(); ret = av_dict_set( &dic, "bufsize", "655360", 0 ); av_dict_set( &dic, "rtsp_transport", pThis->m_cfg.forceTcp ? "tcp" : "udp", 0 ); av_dict_set( &dic, "stimeout", "2000000", 0 ); if ( 0 != avformat_open_input( &fmtCxt, pThis->m_uri, NULL, &dic ) ) { dwRet = -1; goto _EXIT; } if ( avformat_find_stream_info( fmtCxt, NULL ) < 0 ) { dwRet = -1; goto _EXIT; } for ( unsigned int i = 0; i< fmtCxt->nb_streams; i++ ) { if ( fmtCxt->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO) { vStream = fmtCxt->streams[i]; break; } } if ( NULL == vStream ) { dwRet = -1; goto _EXIT; } switch ( vStream->codec->field_order) { case AV_FIELD_PROGRESSIVE: case AV_FIELD_UNKNOWN: pThis->m_cudaCxt.fmt.progressive_sequence = true; break; default: pThis->m_cudaCxt.fmt.progressive_sequence = false; break; } vStream->codec->thread_safe_callbacks = 1; pThis->m_cudaCxt.width = vStream->codec->width; pThis->m_cudaCxt.height = vStream->codec->height; pThis->m_cudaCxt.fmt.coded_width = vStream->codec->coded_width; pThis->m_cudaCxt.fmt.coded_height = vStream->codec->coded_height; pThis->m_cudaCxt.fmt.display_area.left = 0; pThis->m_cudaCxt.fmt.display_area.top = 0; pThis->m_cudaCxt.fmt.display_area.right = pThis->m_cudaCxt.width; pThis->m_cudaCxt.fmt.display_area.bottom = pThis->m_cudaCxt.height; pThis->m_cudaCxt.fmt.chroma_format = getColorFmt( vStream->codec->sw_pix_fmt ); pThis->m_cudaCxt.fmt.codec = getCodecId( vStream->codec->codec_id ); if ( CUDA_UNKNOW_CODEC == pThis->m_cudaCxt.fmt.codec ) { dwRet = -1; goto _EXIT; } if ( vStream->codec->codec_id == AV_CODEC_ID_H264 ) { h264bsfc = av_bitstream_filter_init("h264_mp4toannexb"); } if ( vStream->codec->codec_id == AV_CODEC_ID_HEVC ) { h264bsfc = av_bitstream_filter_init("hevc_mp4toannexb"); } pThis->InitCUDA(); hThread = CreateThread( NULL, NULL, CUDAWorkThread, pThis, NULL, &dwRet ); if ( INVALID_HANDLE_VALUE == hThread ) { dwRet = -1; goto _EXIT; } av_init_packet( &pkt ); while ( pThis->m_bRun ) { if ( av_read_frame( fmtCxt, &pkt ) < 0 ) { break; } if ( pkt.stream_index != vStream->index ) { av_free_packet( &pkt ); continue; } if ( h264bsfc ) { pktd = pkt; ret = av_bitstream_filter_filter( h264bsfc, vStream->codec, NULL, &pktd.data, &pktd.size, pkt.data, pkt.size, pkt.flags & AV_PKT_FLAG_KEY ); av_free_packet( &pkt ); if ( ret < 0 ) { continue; } pkt = pktd; av_init_packet( &pktd ); } frameNum++; //printf( "%d->%d\n", frameNum, pkt.size ); if ( pkt.pts != AV_NOPTS_VALUE) { cudaPkt.flags = CUVID_PKT_TIMESTAMP; if ( vStream->codec->pkt_timebase.num && vStream->codec->pkt_timebase.den) { AVRational tb; tb.num = 1; tb.den = AV_TIME_BASE; cudaPkt.timestamp = av_rescale_q( pkt.pts, vStream->codec->pkt_timebase, tb ); } else { cudaPkt.timestamp = pkt.pts; } if ( cudaPkt.timestamp > 0 ) { cudaPkt.timestamp = cudaPkt.timestamp / 1000; } } EnterCriticalSection( &pThis->m_criTms ); pThis->m_tms.push( cudaPkt.timestamp ); LeaveCriticalSection( &pThis->m_criTms ); cudaPkt.payload_size = ( unsigned long )pkt.size; cudaPkt.payload = ( const unsigned char * )pkt.data; pThis->m_cudaCxt.parser->ParseVideoData( &cudaPkt ); if ( h264bsfc ) { av_free( pkt.data ); } else { av_free_packet( &pkt ); } av_init_packet( &pkt ); } _EXIT: cudaPkt.payload = NULL; cudaPkt.payload_size = 0; cudaPkt.flags = CUVID_PKT_ENDOFSTREAM; pThis->m_cudaCxt.parser->ParseVideoData(&cudaPkt); pThis->m_bRun = FALSE; if ( INVALID_HANDLE_VALUE != hThread ) { WaitForSingleObject( hThread, INFINITE ); hThread = INVALID_HANDLE_VALUE; } pThis->UninitCUDA(); if ( NULL != h264bsfc ) { av_bitstream_filter_close( h264bsfc ); h264bsfc = NULL; } avformat_close_input( &fmtCxt ); avformat_network_deinit(); if ( pThis->m_cfg.logcbk ) { pThis->m_cfg.logcbk( pThis->m_cfg.userPtr, DX_LOG_LEVEL_CLEANUP, "DxDecoder[CUDA] module exited..", 0 ); } return 0; } */ #ifdef DX_WINDOWS DWORD DxDecoder::CUDAWorkThread( void * params ) { #endif #ifdef DX_LINUX void * DxDecoder::CUDAWorkThread(void * params) { #endif int ret = 0; float * vf = NULL; char tmp[256] = {0}; unsigned int size = 0; DxDecoder * pThis = NULL; CUVIDPROCPARAMS cuidPms = {0}; CUVIDPARSERDISPINFO cudadi = {0}; cudaError_t cudaStatus = cudaSuccess; unsigned long long timestamp = 0; pThis = ( DxDecoder * )params; #ifdef DX_Y //CCtxAutoLock lck( pThis->m_cudaCxt.lock ); cuCtxPushCurrent(pThis->m_cudaCxt.cxt); #endif //CCtxAutoLock lck( pThis->m_cudaCxt.lock ); //cuCtxPushCurrent( pThis->m_cudaCxt.cxt ); //#ifndef DX_DXDECODER_OUTPUT_NV12 size = 3 * pThis->m_cudaCxt.width * pThis->m_cudaCxt.height * sizeof( float ); cudaStatus = cudaMalloc( ( void** )&vf, size ); //#endif // printf("RUN: %d\n", pThis->m_bRun); while ( pThis->m_bRun ) { if ( !pThis->m_cudaCxt.frames->dequeue( &cudadi ) ) { Sleep( 2 ); continue; } CUdeviceptr pFrames[2] = { 0, 0 }; int num_fields = ( cudadi.progressive_frame ? (1) : (2+ cudadi.repeat_first_field)); for (int active_field=0; active_fieldm_cudaCxt.decoder->mapFrame( cudadi.picture_index, &pFrames[active_field], &nPitch, &cuidPms ); /* #ifndef DX_DXDECODER_OUTPUT_NV12 cudaStatus = cuda_common::NV12ToRGB( pFrames[active_field], nPitch, vf, pThis->m_cudaCxt.width, pThis->m_cudaCxt.height ); if (cudaStatus != cudaSuccess) { fprintf(stderr, "cuda_common::NV12ToRGB failed: %s\n", cudaGetErrorString(cudaStatus)); } #endif */ EnterCriticalSection( &pThis->m_criTms ); if ( !pThis->m_tms.empty() ) { timestamp = pThis->m_tms.front(); pThis->m_tms.pop(); } LeaveCriticalSection( &pThis->m_criTms ); if ( pThis->m_cfg.escbk ) { #ifndef DX_DXDECODER_OUTPUT_NV12 pThis->m_cfg.escbk( pThis->m_cfg.userPtr, vf, size, timestamp ); #else pThis->m_cfg.escbk( pThis->m_cfg.userPtr, ( void * )pFrames[active_field], nPitch, timestamp ); #endif } ret++; pThis->m_cudaCxt.decoder->unmapFrame( pFrames[active_field] ); pThis->m_cudaCxt.frames->releaseFrame( &cudadi ); pThis->m_decodeFrame++; } } //#ifndef DX_DXDECODER_OUTPUT_NV12 if ( vf ) { cudaFree( vf ); vf = NULL; } //#endif Sleep(100); while (pThis->m_cudaCxt.frames->dequeue(&cudadi)) { CUdeviceptr pFrames[2] = { 0, 0 }; int num_fields = (cudadi.progressive_frame ? (1) : (2 + cudadi.repeat_first_field)); for (int active_field = 0; active_fieldm_cudaCxt.decoder->mapFrame( cudadi.picture_index, &pFrames[active_field], &nPitch, &cuidPms ); EnterCriticalSection(&pThis->m_criTms); if (!pThis->m_tms.empty()) { timestamp = pThis->m_tms.front(); pThis->m_tms.pop(); } LeaveCriticalSection(&pThis->m_criTms); pThis->m_cudaCxt.decoder->unmapFrame(pFrames[active_field]); pThis->m_cudaCxt.frames->releaseFrame(&cudadi); pThis->m_decodeFrame++; } } #ifdef DX_Y cuCtxPopCurrent(NULL); #endif if ( pThis->m_cfg.logcbk ) { pThis->m_cfg.logcbk( pThis->m_cfg.userPtr, DX_LOG_LEVEL_CLEANUP, "CUDAWorkThread exited..", 0 ); } return 0; } DxCUDACoreContext * DxDecoder::s_coreCxt = NULL; int DxDecoder::InitDecoder() { int ret = -1; CUresult rlt = CUDA_SUCCESS; if ( NULL != DxDecoder::s_coreCxt ) { return 0; } DxDecoder::s_coreCxt = ( DxCUDACoreContext * )malloc( sizeof( DxCUDACoreContext ) ); if ( NULL == DxDecoder::s_coreCxt ) { goto _EXIT; } memset( DxDecoder::s_coreCxt, 0, sizeof( DxCUDACoreContext ) ); cuInit( 0 ); rlt = cuDeviceGetCount( &s_coreCxt->devCount ); if ( 0 != rlt ) { goto _EXIT; } s_coreCxt->cxt = ( CUcontext * )malloc( sizeof( CUcontext ) * s_coreCxt->devCount ); s_coreCxt->refs = ( unsigned int * )malloc( sizeof( unsigned int ) * s_coreCxt->devCount ); if ( NULL == s_coreCxt->cxt || NULL == s_coreCxt->refs ) { goto _EXIT; } memset( s_coreCxt->cxt, 0, sizeof( CUcontext ) * s_coreCxt->devCount ); memset( s_coreCxt->refs, 0, sizeof( unsigned int ) * s_coreCxt->devCount ); InitializeCriticalSection( &s_coreCxt->criCxt ); ret = 0; _EXIT: if ( 0 != ret ) { UninitDecoder(); } return ret; } void DxDecoder::UninitDecoder() { if ( NULL == s_coreCxt ) { return; } for ( int i = 0; i < s_coreCxt->devCount; i++ ) { if ( s_coreCxt->cxt && s_coreCxt->cxt[i] ) { cuCtxDestroy( s_coreCxt->cxt[i] ); s_coreCxt->cxt[i] = NULL; } } DeleteCriticalSection( &s_coreCxt->criCxt ); if ( NULL != s_coreCxt->cxt ) { free( s_coreCxt->cxt ); s_coreCxt->cxt = NULL; } if ( NULL != s_coreCxt->refs ) { free( s_coreCxt->refs ); s_coreCxt->refs = NULL; } if ( NULL != s_coreCxt ) { free( s_coreCxt ); s_coreCxt = NULL; } return; } int DxDecoder::InitCUDA() { //int devId = 0; //int devCount = 0; CUresult rlt = CUDA_SUCCESS; cuInit( 0 ); /* rlt = cuDeviceGetCount( &devCount ); if ( 0 != rlt ) { return -1; }*/ //if ( m_cfg.cfg.devId >= devCount ) if ( m_cfg.cfg.devId >= s_coreCxt->devCount ) { return -1; } CheckCUDAProperty( m_cfg.cfg.devId ); rlt = cuDeviceGet( &m_cudaCxt.devId, m_cfg.cfg.devId ); if ( 0 != rlt ) { return -1; } #ifdef DX_USE_SHARE_CONTEXT assert(m_cudaCxt.devId == m_cfg.cfg.devId); EnterCriticalSection(&s_coreCxt->criCxt); if (NULL == s_coreCxt->cxt[m_cfg.cfg.devId]) { cuCtxCreate(&s_coreCxt->cxt[m_cfg.cfg.devId], CU_CTX_SCHED_AUTO, m_cudaCxt.devId); } m_cudaCxt.cxt = s_coreCxt->cxt[m_cfg.cfg.devId]; LeaveCriticalSection(&s_coreCxt->criCxt); if (NULL == m_cudaCxt.cxt) { return -1; } #else rlt = cuCtxCreate(&m_cudaCxt.cxt, CU_CTX_SCHED_AUTO, m_cudaCxt.devId); if (0 != rlt) //checkCudaErrors( rlt ); { return -1; } #endif rlt = cuvidCtxLockCreate( &m_cudaCxt.lock, m_cudaCxt.cxt ); m_cudaCxt.flags = getDecodeMode( m_cfg.cfg.decMode ); //cuda_common::setColorSpace( 0 == m_cfg.cfg.colorFmt ? ITU709 : ITU601, 0 ); //cuda_common::setColorSpace2(0 == m_cfg.cfg.colorFmt ? ITU709 : ITU601, 0); m_cudaCxt.frames = new FrameQueue(); m_cudaCxt.decoder = new VideoDecoder( /*m_cudaCxt.fmt,*/ m_cudaCxt.cxt, m_cudaCxt.flags, m_cudaCxt.lock ); if (!m_cudaCxt.decoder->InitDecoder(m_cudaCxt.fmt)) { return -1; } m_cudaCxt.parser = new VideoParser( m_cudaCxt.decoder, m_cudaCxt.frames ); return 0; } int DxDecoder::UninitCUDA() { CUVIDPARSERDISPINFO cudadi = {0}; if (m_cudaCxt.decoder) { delete m_cudaCxt.decoder; m_cudaCxt.decoder = NULL; } if (m_cudaCxt.parser) { delete m_cudaCxt.parser; m_cudaCxt.parser = NULL; } if ( m_cudaCxt.frames ) { while ( m_cudaCxt.frames->dequeue( &cudadi ) ) { m_cudaCxt.frames->releaseFrame( &cudadi ); } m_cudaCxt.frames->endDecode(); delete m_cudaCxt.frames; m_cudaCxt.frames = NULL; } if ( NULL != m_cudaCxt.lock ) { cuvidCtxLockDestroy( m_cudaCxt.lock ); m_cudaCxt.lock = NULL; } if ( NULL != m_cudaCxt.cxt ) { #ifndef DX_USE_SHARE_CONTEXT cuCtxDestroy(m_cudaCxt.cxt); #endif //cuCtxDestroy( m_cudaCxt.cxt ); m_cudaCxt.cxt = NULL; } return 0; } cudaVideoCreateFlags getDecodeMode( int iMode ) { cudaVideoCreateFlags ret = cudaVideoCreate_Default;//cudaVideoCreate_PreferCUVID; return cudaVideoCreate_PreferCUVID; switch ( iMode ) { case 0: ret = cudaVideoCreate_PreferCUDA; break; case 1: ret = cudaVideoCreate_PreferDXVA; break; case 2: ret = cudaVideoCreate_PreferCUVID; break; default: //ret = cudaVideoCreate_PreferCUVID; cudaVideoCreate_Default break; } return ret; } cudaVideoCodec getCodecId( unsigned int id ) { cudaVideoCodec cudaId = CUDA_UNKNOW_CODEC; switch ( id ) { case AV_CODEC_ID_H263: cudaId = cudaVideoCodec_MPEG4; break; case AV_CODEC_ID_H264: cudaId = cudaVideoCodec_H264; break; case AV_CODEC_ID_HEVC: cudaId = cudaVideoCodec_HEVC; break; case AV_CODEC_ID_MJPEG: cudaId = cudaVideoCodec_JPEG; break; case AV_CODEC_ID_MPEG1VIDEO: cudaId = cudaVideoCodec_MPEG1; break; case AV_CODEC_ID_MPEG2VIDEO: cudaId = cudaVideoCodec_MPEG2; break; //case 17: case AV_CODEC_ID_MPEG4: cudaId = cudaVideoCodec_MPEG4; break; case AV_CODEC_ID_VC1: cudaId = cudaVideoCodec_VC1; break; default: break; } return cudaId; } cudaVideoChromaFormat getColorFmt( unsigned int fmt ) { cudaVideoChromaFormat colorFmt = cudaVideoChromaFormat_420; switch ( fmt ) { case AV_PIX_FMT_YUV420P: colorFmt = cudaVideoChromaFormat_420; break; case AV_PIX_FMT_YUV422P: colorFmt = cudaVideoChromaFormat_422; break; case AV_PIX_FMT_YUV444P: colorFmt = cudaVideoChromaFormat_444; break; default: break; } return colorFmt; }