// This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. #ifndef OPENCV_DNN_CUDA4DNN_CSL_CUDNN_POOLING_HPP #define OPENCV_DNN_CUDA4DNN_CSL_CUDNN_POOLING_HPP #include "cudnn.hpp" #include "../pointer.hpp" #include #include #include #include #include #include #include #include namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cudnn { class PoolingDescriptor { public: enum class PoolingType { MAX, MAX_DETERMINISTIC, AVERAGE_EXCLUDE_PADDING, AVERAGE_INCLUDE_PADDING }; PoolingDescriptor() noexcept : descriptor{ nullptr } { } PoolingDescriptor(const PoolingDescriptor&) = delete; PoolingDescriptor(PoolingDescriptor&& other) noexcept : descriptor{ other.descriptor } { other.descriptor = nullptr; } /** constructs a pooling descriptor * * Pre-conditions: * - \p window_size, \p padding and \p stride must have the same size * * The length of the containers is interpreted as the order of the pooling operation. * * Exception Guarantee: Basic */ template ()))> PoolingDescriptor( const SequenceContainer& window_size, const SequenceContainer& padding, const SequenceContainer& stride, PoolingType type) { constructor(window_size, padding, stride, type); } ~PoolingDescriptor() noexcept { if (descriptor != nullptr) { /* cudnnDestroyPoolingDescriptor will not fail for a valid descriptor */ CUDA4DNN_CHECK_CUDNN(cudnnDestroyPoolingDescriptor(descriptor)); } } PoolingDescriptor& operator=(const PoolingDescriptor&) = delete; PoolingDescriptor& operator=(PoolingDescriptor&& other) noexcept { descriptor = other.descriptor; other.descriptor = nullptr; return *this; }; cudnnPoolingDescriptor_t get() const noexcept { return descriptor; } private: template void constructor( const SequenceContainer& window_size, const SequenceContainer& padding, const SequenceContainer& stride, PoolingType type) { CV_Assert(window_size.size() == padding.size()); CV_Assert(window_size.size() == stride.size()); auto get_pooling_type = [] (PoolingType type) { switch (type) { case PoolingType::MAX: return CUDNN_POOLING_MAX; case PoolingType::MAX_DETERMINISTIC: return CUDNN_POOLING_MAX_DETERMINISTIC; case PoolingType::AVERAGE_EXCLUDE_PADDING: return CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING; case PoolingType::AVERAGE_INCLUDE_PADDING: return CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING; } CV_Error(Error::StsBadArg, "unknown pooling type"); }; CUDA4DNN_CHECK_CUDNN(cudnnCreatePoolingDescriptor(&descriptor)); try { const auto rank = window_size.size(); if (rank == 2) { CUDA4DNN_CHECK_CUDNN( cudnnSetPooling2dDescriptor( descriptor, get_pooling_type(type), CUDNN_PROPAGATE_NAN, window_size[0], window_size[1], padding[0], padding[1], stride[0], stride[1] ) ); } else { std::vector iwindow_size(std::begin(window_size), std::end(window_size)); std::vector ipadding(std::begin(padding), std::end(padding)); std::vector istride(std::begin(stride), std::end(stride)); CUDA4DNN_CHECK_CUDNN( cudnnSetPoolingNdDescriptor( descriptor, get_pooling_type(type), CUDNN_PROPAGATE_NAN, rank, iwindow_size.data(), ipadding.data(), istride.data() ) ); } } catch (...) { /* cudnnDestroyPoolingDescriptor will not fail for a valid descriptor */ CUDA4DNN_CHECK_CUDNN(cudnnDestroyPoolingDescriptor(descriptor)); throw; } } cudnnPoolingDescriptor_t descriptor; }; /** gives the shape of the output tensor after pooling * * @note it's not required to enforce the this shape in the output tensor; slightly different shapes will work * * Exception Guarantee: Basic */ template inline void getPoolingForwardOutputDim( const PoolingDescriptor& poolingDesc, const TensorDescriptor& inputDesc, std::vector& output_dim) { output_dim.clear(); output_dim.resize(CUDNN_DIM_MAX); /* we use `output_dim` to hold temporaries */ std::vector temp(CUDNN_DIM_MAX); cudnnDataType_t tempDataType; CUDA4DNN_CHECK_CUDNN( cudnnGetTensorNdDescriptor( inputDesc.get(), CUDNN_DIM_MAX + 1, /* according to docs, this is what we do to get the rank */ &tempDataType, output_dim.data(), temp.data(), temp.data() ) ); const auto rank = output_dim[0]; output_dim.resize(rank); CUDA4DNN_CHECK_CUDNN( cudnnGetPoolingNdForwardOutputDim(poolingDesc.get(), inputDesc.get(), rank, output_dim.data()) ); } /** @brief performs pooling operation * * dstValue = alpha * result + beta * priorDstValue * * @tparam T pooling element type (must be `half` or `float`) * * @param handle valid cuDNN Handle * @param poolingDesc pooling description * @param inputDesc tensor descriptor describing the input * @param[in] inputPtr pointer to input tensor in device memory * @param alpha result scale factor * @param beta previous value scale factor * @param outputDesc tensor descriptor describing the output * @param[out] outputPtr pointer to output tensor in device memory * * Exception Guarantee: Basic */ template void pool( const Handle& handle, const PoolingDescriptor& poolingDesc, const TensorDescriptor& inputDesc, const DevicePtr inputPtr, T alpha, T beta, const TensorDescriptor& outputDesc, DevicePtr outputPtr) { CV_Assert(handle); CUDA4DNN_CHECK_CUDNN( cudnnPoolingForward( handle.get(), poolingDesc.get(), &alpha, inputDesc.get(), inputPtr.get(), &beta, outputDesc.get(), outputPtr.get() ) ); } template <> inline void pool( const Handle& handle, const PoolingDescriptor& poolingDesc, const TensorDescriptor& inputDesc, const DevicePtr inputPtr, half alpha, half beta, const TensorDescriptor& outputDesc, DevicePtr outputPtr) { CV_Assert(handle); /* we specalize for fp16 as the scaling factors must be provided as `float` */ float alpha_ = alpha, beta_ = beta; CUDA4DNN_CHECK_CUDNN( cudnnPoolingForward( handle.get(), poolingDesc.get(), &alpha_, inputDesc.get(), inputPtr.get(), &beta_, outputDesc.get(), outputPtr.get() ) ); } }}}}} /* namespace cv::dnn::cuda4dnn::csl::cudnn */ #endif /* OPENCV_DNN_CUDA4DNN_CSL_CUDNN_POOLING_HPP */