Blame view

3rdparty/opencv-4.5.4/modules/core/src/stat.simd.hpp 3.28 KB
f4334277   Hu Chunming   提交3rdparty
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
  // This file is part of OpenCV project.
  // It is subject to the license terms in the LICENSE file found in the top-level directory
  // of this distribution and at http://opencv.org/license.html.
  
  #include "opencv2/core/hal/intrin.hpp"
  
  namespace cv { namespace hal {
  
  extern const uchar popCountTable[256];
  
  CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
  
  // forward declarations
  int normHamming(const uchar* a, int n);
  int normHamming(const uchar* a, const uchar* b, int n);
  
  #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
  
  #if CV_AVX2
  static inline int _mm256_extract_epi32_(__m256i reg, const int i)
  {
      CV_DECL_ALIGNED(32) int reg_data[8];
      CV_DbgAssert(0 <= i && i < 8);
      _mm256_store_si256((__m256i*)reg_data, reg);
      return reg_data[i];
  }
  #endif
  
  int normHamming(const uchar* a, int n)
  {
      CV_AVX_GUARD;
  
      int i = 0;
      int result = 0;
  
  #if CV_SIMD && CV_SIMD_WIDTH > 16
      {
          v_uint64 t = vx_setzero_u64();
          for (; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
              t += v_popcount(v_reinterpret_as_u64(vx_load(a + i)));
          result = (int)v_reduce_sum(t);
          vx_cleanup();
      }
  #endif
  
  #if CV_POPCNT
      {
  #  if defined CV_POPCNT_U64
          for(; i <= n - 8; i += 8)
          {
              result += (int)CV_POPCNT_U64(*(uint64*)(a + i));
          }
  #  endif
          for(; i <= n - 4; i += 4)
          {
              result += CV_POPCNT_U32(*(uint*)(a + i));
          }
      }
  #elif CV_SIMD
      {
          v_uint64x2 t = v_setzero_u64();
          for(; i <= n - v_uint8x16::nlanes; i += v_uint8x16::nlanes)
              t += v_popcount(v_reinterpret_as_u64(v_load(a + i)));
          result += (int)v_reduce_sum(t);
      }
  #endif
  #if CV_ENABLE_UNROLLED
      for(; i <= n - 4; i += 4)
      {
          result += popCountTable[a[i]] + popCountTable[a[i+1]] +
          popCountTable[a[i+2]] + popCountTable[a[i+3]];
      }
  #endif
      for(; i < n; i++)
      {
          result += popCountTable[a[i]];
      }
      return result;
  }
  
  int normHamming(const uchar* a, const uchar* b, int n)
  {
      CV_AVX_GUARD;
  
      int i = 0;
      int result = 0;
  
  #if CV_SIMD && CV_SIMD_WIDTH > 16
      {
          v_uint64 t = vx_setzero_u64();
          for (; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
              t += v_popcount(v_reinterpret_as_u64(vx_load(a + i) ^ vx_load(b + i)));
          result += (int)v_reduce_sum(t);
      }
  #endif
  
  #if CV_POPCNT
      {
  #  if defined CV_POPCNT_U64
          for(; i <= n - 8; i += 8)
          {
              result += (int)CV_POPCNT_U64(*(uint64*)(a + i) ^ *(uint64*)(b + i));
          }
  #  endif
          for(; i <= n - 4; i += 4)
          {
              result += CV_POPCNT_U32(*(uint*)(a + i) ^ *(uint*)(b + i));
          }
      }
  #elif CV_SIMD
      {
          v_uint64x2 t = v_setzero_u64();
          for(; i <= n - v_uint8x16::nlanes; i += v_uint8x16::nlanes)
              t += v_popcount(v_reinterpret_as_u64(v_load(a + i) ^ v_load(b + i)));
          result += (int)v_reduce_sum(t);
      }
  #endif
  #if CV_ENABLE_UNROLLED
      for(; i <= n - 4; i += 4)
      {
          result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] +
                  popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]];
      }
  #endif
      for(; i < n; i++)
      {
          result += popCountTable[a[i] ^ b[i]];
      }
      return result;
  }
  
  #endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
  
  CV_CPU_OPTIMIZATION_NAMESPACE_END
  }} //cv::hal