// This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_LRN_HPP #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_LRN_HPP #include "../../op_cuda.hpp" #include "../csl/cudnn.hpp" #include "../csl/tensor_ops.hpp" #include #include namespace cv { namespace dnn { namespace cuda4dnn { enum class LRNType { ACROSS_CHANNELS, WITHIN_CHANNEL }; template class LRNOp final : public CUDABackendNode { public: using wrapper_type = GetCUDABackendWrapperType; LRNOp(csl::cudnn::Handle handle, LRNType type_, std::size_t local_size, T alpha, T beta, T bias, std::size_t largestInputSize) : scratch_mem_in_bytes { 0 } { typename csl::LRN::LRNType type{}; switch (type_) { case LRNType::ACROSS_CHANNELS: type = csl::LRN::LRNType::ACROSS_CHANNELS; break; case LRNType::WITHIN_CHANNEL: type = csl::LRN::LRNType::WITHIN_CHANNEL; break; } lrn = csl::LRN(std::move(handle), local_size, alpha, beta, bias, type); csl::WorkspaceBuilder builder; if (type_ == LRNType::WITHIN_CHANNEL) { /* this is not a bug; we require two of these */ builder.require(largestInputSize); builder.require(largestInputSize); } scratch_mem_in_bytes = builder.required_workspace_size(); } void forward( const std::vector>& inputs, const std::vector>& outputs, csl::Workspace& workspace) override { for (int i = 0; i < inputs.size(); i++) { auto input_wrapper = inputs[i].dynamicCast(); auto input = input_wrapper->getView(); auto output_wrapper = outputs[i].dynamicCast(); auto output = output_wrapper->getSpan(); csl::WorkspaceAllocator allocator(workspace); lrn.normalize(input, output, allocator.get_instance()); } } std::size_t get_workspace_memory_in_bytes() const noexcept override { return scratch_mem_in_bytes; } private: csl::LRN lrn; std::size_t scratch_mem_in_bytes; }; }}} /* namespace cv::dnn::cuda4dnn */ #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_LRN_HPP */