Blame view

3rdparty/opencv-4.5.4/samples/gpu/multi.cpp 2.19 KB
f4334277   Hu Chunming   提交3rdparty
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
  /* This sample demonstrates the way you can perform independent tasks
     on the different GPUs */
  
  // Disable some warnings which are caused with CUDA headers
  #if defined(_MSC_VER)
  #pragma warning(disable: 4201 4408 4100)
  #endif
  
  #include <iostream>
  #include "opencv2/core.hpp"
  #include "opencv2/cudaarithm.hpp"
  
  #if !defined(HAVE_CUDA)
  
  int main()
  {
      std::cout << "CUDA support is required (OpenCV CMake parameter 'WITH_CUDA' must be true)." << std::endl;
      return 0;
  }
  
  #else
  
  using namespace std;
  using namespace cv;
  using namespace cv::cuda;
  
  struct Worker : public cv::ParallelLoopBody
  {
      void operator()(const Range& r) const CV_OVERRIDE
      {
          for (int i = r.start; i < r.end; ++i) { this->operator()(i); }
      }
      void operator()(int device_id) const;
  };
  
  int main()
  {
      int num_devices = getCudaEnabledDeviceCount();
      if (num_devices < 2)
      {
          std::cout << "Two or more GPUs are required\n";
          return -1;
      }
      for (int i = 0; i < num_devices; ++i)
      {
          cv::cuda::printShortCudaDeviceInfo(i);
  
          DeviceInfo dev_info(i);
          if (!dev_info.isCompatible())
          {
              std::cout << "CUDA module isn't built for GPU #" << i << " ("
                   << dev_info.name() << ", CC " << dev_info.majorVersion()
                   << dev_info.minorVersion() << "\n";
              return -1;
          }
      }
  
      // Execute calculation in two threads using two GPUs
      cv::Range devices(0, 2);
      cv::parallel_for_(devices, Worker(), devices.size());
  
      return 0;
  }
  
  
  void Worker::operator()(int device_id) const
  {
      setDevice(device_id);
  
      Mat src(1000, 1000, CV_32F);
      Mat dst;
  
      RNG rng(0);
      rng.fill(src, RNG::UNIFORM, 0, 1);
  
      // CPU works
      cv::transpose(src, dst);
  
      // GPU works
      GpuMat d_src(src);
      GpuMat d_dst;
      cuda::transpose(d_src, d_dst);
  
      // Check results
      bool passed = cv::norm(dst - Mat(d_dst), NORM_INF) < 1e-3;
      std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() << "): "
          << (passed ? "passed" : "FAILED") << endl;
  
      // Deallocate data here, otherwise deallocation will be performed
      // after context is extracted from the stack
      d_src.release();
      d_dst.release();
  }
  
  #endif