Blame view

3rdparty/opencv-4.5.4/samples/dnn/human_parsing.cpp 4.63 KB
f4334277   Hu Chunming   提交3rdparty
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
  //
  // this sample demonstrates parsing (segmenting) human body parts from an image using opencv's dnn,
  // based on https://github.com/Engineering-Course/LIP_JPPNet
  //
  // get the pretrained model from: https://www.dropbox.com/s/qag9vzambhhkvxr/lip_jppnet_384.pb?dl=0
  //
  
  #include <opencv2/dnn.hpp>
  #include <opencv2/highgui.hpp>
  #include <opencv2/imgproc.hpp>
  using namespace cv;
  
  
  static Mat parse_human(const Mat &image, const std::string &model, int backend=dnn::DNN_BACKEND_DEFAULT, int target=dnn::DNN_TARGET_CPU) {
      // this network expects an image and a flipped copy as input
      Mat flipped;
      flip(image, flipped, 1);
      std::vector<Mat> batch;
      batch.push_back(image);
      batch.push_back(flipped);
      Mat blob = dnn::blobFromImages(batch, 1.0, Size(), Scalar(104.00698793, 116.66876762, 122.67891434));
  
      dnn::Net net = dnn::readNet(model);
      net.setPreferableBackend(backend);
      net.setPreferableTarget(target);
      net.setInput(blob);
      Mat out = net.forward();
      // expected output: [2, 20, 384, 384], (2 lists(orig, flipped) of 20 body part heatmaps 384x384)
  
      // LIP classes:
      // 0 Background, 1 Hat, 2 Hair, 3 Glove, 4 Sunglasses, 5 UpperClothes, 6 Dress, 7 Coat, 8 Socks, 9 Pants
      // 10 Jumpsuits, 11 Scarf, 12 Skirt, 13 Face, 14 LeftArm, 15 RightArm, 16 LeftLeg, 17 RightLeg, 18 LeftShoe. 19 RightShoe
      Vec3b colors[] = {
          Vec3b(0, 0, 0), Vec3b(128, 0, 0), Vec3b(255, 0, 0), Vec3b(0, 85, 0), Vec3b(170, 0, 51), Vec3b(255, 85, 0),
          Vec3b(0, 0, 85), Vec3b(0, 119, 221), Vec3b(85, 85, 0), Vec3b(0, 85, 85), Vec3b(85, 51, 0), Vec3b(52, 86, 128),
          Vec3b(0, 128, 0), Vec3b(0, 0, 255), Vec3b(51, 170, 221), Vec3b(0, 255, 255), Vec3b(85, 255, 170),
          Vec3b(170, 255, 85), Vec3b(255, 255, 0), Vec3b(255, 170, 0)
      };
  
      Mat segm(image.size(), CV_8UC3, Scalar(0,0,0));
      Mat maxval(image.size(), CV_32F, Scalar(0));
  
      // iterate over body part heatmaps (LIP classes)
      for (int i=0; i<out.size[1]; i++) {
          // resize heatmaps to original image size
          // "head" is  the original image result, "tail" the flipped copy
          Mat head, h(out.size[2], out.size[3], CV_32F, out.ptr<float>(0,i));
          resize(h, head, image.size());
  
          // we have to swap the last 3 pairs in the "tail" list
          static int tail_order[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,15,14,17,16,19,18};
          Mat tail, t(out.size[2], out.size[3], CV_32F, out.ptr<float>(1,tail_order[i]));
          resize(t, tail, image.size());
          flip(tail, tail, 1);
  
          // mix original and flipped result
          Mat avg = (head + tail) * 0.5;
  
          // write color if prob value > maxval
          Mat cmask;
          compare(avg, maxval, cmask, CMP_GT);
          segm.setTo(colors[i], cmask);
  
          // keep largest values for next iteration
          max(avg, maxval, maxval);
      }
      cvtColor(segm, segm, COLOR_RGB2BGR);
      return segm;
  }
  
  int main(int argc, char**argv)
  {
      CommandLineParser parser(argc,argv,
          "{help    h |                 | show help screen / args}"
          "{image   i |                 | person image to process }"
          "{model   m |lip_jppnet_384.pb| network model}"
          "{backend b | 0               | Choose one of computation backends: "
                                           "0: automatically (by default), "
                                           "1: Halide language (http://halide-lang.org/), "
                                           "2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
                                           "3: OpenCV implementation, "
                                           "4: VKCOM, "
                                           "5: CUDA }"
          "{target  t | 0               | Choose one of target computation devices: "
                                           "0: CPU target (by default), "
                                           "1: OpenCL, "
                                           "2: OpenCL fp16 (half-float precision), "
                                           "3: VPU, "
                                           "4: Vulkan, "
                                           "6: CUDA, "
                                           "7: CUDA fp16 (half-float preprocess) }"
      );
      if (argc == 1 || parser.has("help"))
      {
          parser.printMessage();
          return 0;
      }
      std::string model = parser.get<std::string>("model");
      std::string image = parser.get<std::string>("image");
      int backend = parser.get<int>("backend");
      int target = parser.get<int>("target");
  
      Mat input = imread(image);
      Mat segm = parse_human(input, model, backend, target);
  
      imshow("human parsing", segm);
      waitKey();
      return 0;
  }