Blame view

3rdparty/opencv-4.5.4/modules/dnn/src/opencl/pooling.cl 4.29 KB
f4334277   Hu Chunming   提交3rdparty
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
  /*************************************************************************************
   * Copyright (c) 2015, Advanced Micro Devices, Inc.
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without modification,
   * are permitted provided that the following conditions are met:
   *
   * 1. Redistributions of source code must retain the above copyright notice, this
   * list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright notice,
   * this list of conditions and the following disclaimer in the documentation and/or
   *  other materials provided with the distribution.
   *
   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   * POSSIBILITY OF SUCH DAMAGE.
   **************************************************************************************/
  
  __kernel void MaxPoolForward(const int nthreads,
      __global T* bottom_data, const int num, const int channels, const int height, const int width,
      const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w,
      const int stride_h, const int stride_w, const int pad_t, const int pad_l, const int pad_b, const int pad_r,
      __global T* top_data
  #ifdef MASK
      , __global float* mask
  #endif
      )
  {
    int index = get_global_id(0);
    int tmp = get_global_size(0);
    for(index; index < nthreads; index += tmp) {
      int pw = index % pooled_width;
      int ph = (index / pooled_width) % pooled_height;
      int c = (index / pooled_width / pooled_height) % channels;
      int n = index / pooled_width / pooled_height / channels;
      int hstart = ph * stride_h - pad_t;
      int wstart = pw * stride_w - pad_l;
      const int hend = min(hstart + kernel_h, height);
      const int wend = min(wstart + kernel_w, width);
      hstart = max(hstart, 0);
      wstart = max(wstart, 0);
      T maxval = -FLT_MAX;
      int maxidx = -1;
      bottom_data =
      bottom_data + (n * channels + c) * height * width;
      for (int h = hstart; h < hend; ++h) {
        for (int w = wstart; w < wend; ++w) {
          if (bottom_data[h * width + w] > maxval) {
            maxidx = h * width + w;
            maxval = bottom_data[maxidx];
          }
        }
      }
  
      top_data[index] = maxval;
  
  #ifdef MASK
      mask[index] = maxidx;
  #endif
    }
  }
  
  __kernel void AvePoolForward(const int nthreads,
      __global T* bottom_data, const int num, const int channels, const int height, const int width,
      const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w,
      const int stride_h, const int stride_w, const int pad_t, const int pad_l, const int pad_b, const int pad_r,
      __global T* top_data
  #ifdef MASK
      , __global float* mask // NOT USED
  #endif
      )
  {
    int index = get_global_id(0);
    int tmp = get_global_size(0);
    for(index; index < nthreads; index+=tmp) {
      int pw = index % pooled_width;
      int ph = (index / pooled_width) % pooled_height;
      int c = (index / pooled_width / pooled_height) % channels;
      int n = index / pooled_width / pooled_height / channels; int hstart = ph * stride_h - pad_t; int wstart = pw * stride_w - pad_l;
      int hend = min(hstart + kernel_h, height + pad_b);
      int wend = min(wstart + kernel_w, width + pad_r);
      const int pool_size = (hend - hstart) * (wend - wstart);
      hstart = max(hstart, 0);
      wstart = max(wstart, 0);
      hend = min(hend, height);
      wend = min(wend, width);
      T aveval = 0;
      bottom_data =
      bottom_data + (n * channels + c) * height * width;
      for (int h = hstart; h < hend; ++h) {
        for (int w = wstart; w < wend; ++w) {
          aveval += bottom_data[h * width + w];
        }
      }
      top_data[index] = aveval / pool_size;
    }
  
  }