// This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_SCALE_SHIFT_HPP #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_SCALE_SHIFT_HPP #include "../../op_cuda.hpp" #include "../csl/stream.hpp" #include "../csl/tensor.hpp" #include "../kernels/scale_shift.hpp" #include #include #include namespace cv { namespace dnn { namespace cuda4dnn { struct ScaleShiftConfiguration { enum class OpMode { NONE, TRAINABLE, /* use a pretrained blob */ UNTRAINABLE /* use another input */ }; OpMode scaleMode; OpMode shiftMode; std::size_t axis; }; template class ScaleShiftOp final : public CUDABackendNode { public: using wrapper_type = GetCUDABackendWrapperType; ScaleShiftOp(csl::Stream stream_, const ScaleShiftConfiguration& config, const cv::Mat& weights, const cv::Mat& bias) : stream(std::move(stream_)), axis{ config.axis } { scaleMode = config.scaleMode; if (scaleMode == ScaleShiftConfiguration::OpMode::TRAINABLE) { CV_Assert(!weights.empty()); weightsTensor = csl::makeTensorHeader(weights); csl::copyMatToTensor(weights, weightsTensor, stream); } shiftMode = config.shiftMode; if (shiftMode == ScaleShiftConfiguration::OpMode::TRAINABLE) { CV_Assert(!bias.empty()); biasTensor = csl::makeTensorHeader(bias); csl::copyMatToTensor(bias, biasTensor, stream); } CV_Assert(scaleMode != ScaleShiftConfiguration::OpMode::NONE || shiftMode != ScaleShiftConfiguration::OpMode::NONE); if (scaleMode == ScaleShiftConfiguration::OpMode::UNTRAINABLE && shiftMode == ScaleShiftConfiguration::OpMode::UNTRAINABLE) { CV_Error(cv::Error::StsNotImplemented, "scale and shift both in untrainable mode is not supported"); } } void forward( const std::vector>& inputs, const std::vector>& outputs, csl::Workspace& workspace) override { CV_Assert(outputs.size() == 1); auto input_wrapper = inputs[0].dynamicCast(); auto input = input_wrapper->getView(); auto output_wrapper = outputs[0].dynamicCast(); auto output = output_wrapper->getSpan(); /* number of batches in the weights/bias * trainable mode: same for all batches * untrainable mode: could be different for different batch samples */ std::size_t parameter_batch_size = 1; csl::TensorView weights; if (scaleMode == ScaleShiftConfiguration::OpMode::TRAINABLE) { CV_Assert(!weightsTensor.empty()); weights = csl::TensorView(weightsTensor); } else if (scaleMode == ScaleShiftConfiguration::OpMode::UNTRAINABLE) { CV_Assert(inputs.size() == 2); auto wrapper = inputs[1].dynamicCast(); weights = wrapper->getView(); parameter_batch_size = weights.get_axis_size(0); CV_Assert(parameter_batch_size == input.get_axis_size(0)); } csl::TensorView bias; if (shiftMode == ScaleShiftConfiguration::OpMode::TRAINABLE) { CV_Assert(!biasTensor.empty()); bias = csl::TensorView(biasTensor); } else if (shiftMode == ScaleShiftConfiguration::OpMode::UNTRAINABLE) { CV_Assert(inputs.size() == 2); auto wrapper = inputs[1].dynamicCast(); bias = wrapper->getView(); parameter_batch_size = bias.get_axis_size(0); CV_Assert(parameter_batch_size == input.get_axis_size(0)); } CV_Assert(!weights.empty() || !bias.empty()); if (!weights.empty() && !bias.empty()) { CV_CheckEQ(weights.size(), bias.size(), "different broadcasting options for weights and bias is not supported"); } const auto num_parameters = !weights.empty() ? weights.size() : bias.size(); const auto mid_size = num_parameters / parameter_batch_size; /* the scale shift operation might require broadcasting */ const int end_axis = [&] { for (int endAxis = axis + 1; endAxis <= input.rank(); endAxis++) { if (input.size_range(axis, endAxis) == mid_size) return endAxis; } CV_Assert(0 /* failed to find a broadcast config */); }(); std::size_t inner_size = input.size_range(end_axis, input.rank()); if (!weights.empty() && !bias.empty()) kernels::scaleN_with_biasN(stream, output, input, inner_size, weights, bias); else if (!weights.empty()) kernels::scaleN(stream, output, input, inner_size, weights); else kernels::biasN(stream, output, input, inner_size, bias); } private: csl::Stream stream; csl::Tensor weightsTensor, biasTensor; std::size_t axis; ScaleShiftConfiguration::OpMode scaleMode, shiftMode; }; }}} /* namespace cv::dnn::cuda4dnn */ #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_SCALE_SHIFT_HPP */