// This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_PADDING_HPP #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_PADDING_HPP #include "../../op_cuda.hpp" #include "../csl/stream.hpp" #include "../csl/tensor.hpp" #include "../kernels/fill_copy.hpp" #include "../kernels/concat.hpp" #include "../kernels/padding.hpp" #include #include #include #include #include namespace cv { namespace dnn { namespace cuda4dnn { enum class PaddingType { CONSTANT, REFLECTION101 }; template class PaddingOp final : public CUDABackendNode { public: using wrapper_type = GetCUDABackendWrapperType; /* `ranges` is indexed by axis and contains the range in the output where the input is copied to */ PaddingOp(csl::Stream stream_, PaddingType type_, T value_, std::vector ranges) : stream(std::move(stream_)), type{ type_ }, value{ value_ }, dstRanges(std::move(ranges)) { } void forward( const std::vector>& inputs, const std::vector>& outputs, csl::Workspace& workspace) override { CV_Assert(inputs.size() == 1 && outputs.size() == 1); auto input_wrapper = inputs[0].dynamicCast(); auto input = input_wrapper->getView(); auto output_wrapper = outputs[0].dynamicCast(); auto output = output_wrapper->getSpan(); /* suppose we require padding for the first spatial axis (H in NCHW or D in NCDHW) * * there could be a case where the batch axis, channel axis, and the first spatial axis are all one * this would result in effective rank being less than the number of axes requiring padding */ /* the effective rank of the input may be smaller than the effective rank of the output but the converse is never true * input: [1, 1, 1, 3]; effective rank = 1 * output: [1, 1, 3, 3]; effective rank = 2 * * hence, we use the effective rank of the output tensor for the padding operation */ auto effective_rank = get_effective_rank(output); CV_Assert(get_effective_rank(input) <= effective_rank); effective_rank = std::max(effective_rank, dstRanges.size()); for (int i = effective_rank - dstRanges.size(); i < effective_rank; i++) { if (dstRanges[i] == Range::all()) CV_Assert(input.get_axis_size(i) == output.get_axis_size(i)); else CV_Assert(input.get_axis_size(i) == dstRanges[i].size()); } if (type == PaddingType::CONSTANT) { kernels::fill(stream, output, value); std::vector offsets(effective_rank, 0); for (int i = 0; i < dstRanges.size(); i++) { const auto delta = effective_rank - dstRanges.size(); if (dstRanges[i] != Range::all()) offsets[delta + i] = dstRanges[i].start; } kernels::concat_with_offsets(stream, output, input, offsets); } else if (type == PaddingType::REFLECTION101) { std::vector> ranges(effective_rank); for (int i = 0; i < effective_rank; i++) { const auto delta = effective_rank - dstRanges.size(); if (i < delta || dstRanges[i - delta] == Range::all()) ranges[i] = { 0, input.get_axis_size(i) }; else ranges[i] = { dstRanges[i].start, dstRanges[i].end }; } kernels::copy_with_reflection101(stream, output, input, ranges); } } private: csl::Stream stream; PaddingType type; T value; std::vector dstRanges; }; }}} /* namespace cv::dnn::cuda4dnn */ #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_PADDING_HPP */