Blame view

3rdparty/opencv-4.5.4/modules/dnn/src/cuda4dnn/csl/cudnn/softmax.hpp 2.45 KB
f4334277   Hu Chunming   提交3rdparty
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
  // This file is part of OpenCV project.
  // It is subject to the license terms in the LICENSE file found in the top-level directory
  // of this distribution and at http://opencv.org/license.html.
  
  #ifndef OPENCV_DNN_CUDA4DNN_CSL_CUDNN_SOFTMAX_HPP
  #define OPENCV_DNN_CUDA4DNN_CSL_CUDNN_SOFTMAX_HPP
  
  #include "cudnn.hpp"
  
  #include "../pointer.hpp"
  
  #include <cudnn.h>
  
  namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cudnn {
  
      /** @brief computes softmax (or log softmax)
       *
       * @tparam          T           element type (must be `half` or `float`)
       *
       * @param           handle      valid cuDNN handle
       * @param           outputDesc  tensor descriptor for A
       * @param[out]      output      pointer to tensor in device memory
       * @param           inputDesc   tensor descriptor for C
       * @param[in]       input       pointer to tensor in device memory
       * @param           log         apply log on probabilities
       *
       * Exception Guarantee: Basic
       */
      template <class T>
      void softmax(const cudnn::Handle& handle,
          const TensorDescriptor<T>& outputDesc, DevicePtr<T> output,
          const TensorDescriptor<T>& inputDesc, DevicePtr<const T> input,
          bool log)
      {
          T alpha = 1.0, beta = 0.0;
          cudnnSoftmaxAlgorithm_t algo = log ? CUDNN_SOFTMAX_LOG : CUDNN_SOFTMAX_ACCURATE;
          CUDA4DNN_CHECK_CUDNN(
              cudnnSoftmaxForward(
                  handle.get(),
                  algo, CUDNN_SOFTMAX_MODE_CHANNEL,
                  &alpha, inputDesc.get(), input.get(),
                  &beta, outputDesc.get(), output.get()
              )
          );
      }
  
      template <> inline
      void softmax(const cudnn::Handle& handle,
          const TensorDescriptor<half>& outputDesc, DevicePtr<half> output,
          const TensorDescriptor<half>& inputDesc, DevicePtr<const half> input,
          bool log)
      {
          /* we specalize for fp16 as the scaling factors must be provided as `float` */
          float alpha = 1.0, beta = 0.0;
          cudnnSoftmaxAlgorithm_t algo = log ? CUDNN_SOFTMAX_LOG : CUDNN_SOFTMAX_ACCURATE;
          CUDA4DNN_CHECK_CUDNN(
              cudnnSoftmaxForward(
                  handle.get(),
                  algo, CUDNN_SOFTMAX_MODE_CHANNEL,
                  &alpha, inputDesc.get(), input.get(),
                  &beta, outputDesc.get(), output.get()
              )
          );
      }
  
  }}}}} /* namespace cv::dnn::cuda4dnn::csl::cudnn */
  
  #endif /* OPENCV_DNN_CUDA4DNN_CSL_CUDNN_SOFTMAX_HPP */