Blame view

3rdparty/opencv-4.5.4/modules/dnn/src/opencl/lrn.cl 3.32 KB
f4334277   Hu Chunming   提交3rdparty
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
  /*************************************************************************************
   * Copyright (c) 2015, Advanced Micro Devices, Inc.
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without modification,
   * are permitted provided that the following conditions are met:
   *
   * 1. Redistributions of source code must retain the above copyright notice, this
   * list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright notice,
   * this list of conditions and the following disclaimer in the documentation and/or
   *  other materials provided with the distribution.
   *
   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   * POSSIBILITY OF SUCH DAMAGE.
   **************************************************************************************/
  
  __kernel void LRNComputeOutput(const int nthreads, __global T* in, __global T* scale, const T negative_beta, __global T* out) {
    int index = get_global_id(0);
    int tmp = get_global_size(0);
    for(index; index < nthreads; index += tmp)
    out[index] = in[index] * pow(scale[index], negative_beta);
  }
  
  __kernel void LRNFillScale(const int nthreads, __global T* in, const int num, const int channels, const int height, const int width, const int size, const T alpha_over_size, const T k, __global T* scale) {
    int index = get_global_id(0);
    int tmp = get_global_size(0);
    for(index; index < nthreads; index += tmp) {
      // find out the local offset
      const int w = index % width;
      const int h = (index / width) % height;
      const int n = index / width / height;
      const int offset = (n * channels * height + h) * width + w;
      const int step = height * width;
      in = in + offset;
      scale = scale + offset;
      int head = 0;
      const int pre_pad = (size - 1) / 2;
      const int post_pad = size - pre_pad - 1;
      T accum_scale = 0;
      // fill the scale at [n, :, h, w]
      // accumulate values
      while (head < post_pad && head < channels) {
        accum_scale += in[head * step] * in[head * step];
        ++head;
      }
      // both add and subtract
      while (head < channels) {
        accum_scale += in[head * step] * in[head * step];
        if (head - size >= 0) {
          accum_scale -= in[(head - size) * step]
          * in[(head - size) * step];
        }
        scale[(head - post_pad) * step] = k + accum_scale * alpha_over_size;
        ++head;
      }
      // subtract only
      while (head < channels + post_pad) {
        if (head - size >= 0) {
          accum_scale -= in[(head - size) * step]
          * in[(head - size) * step];
        }
        scale[(head - post_pad) * step] = k + accum_scale * alpha_over_size;
        ++head;
      }
    }
  }