Blame view

src/common/inc/nvrtc_helper.h 3.27 KB
e41a52bb   Hu Chunming   1.优化数据读取线程;2. 添加A...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
  #if !defined(__NVRTC_HELPER__)
  
  #define __NVRTC_HELPER__ 1
  
  #include <cuda.h>
  #include <nvrtc.h>
  #include <sstream>
  #include <iostream>
  #include <fstream>
  #include <helper_cuda_drvapi.h>
  
  #define NVRTC_SAFE_CALL(Name, x)                                             \
    do {                                                                       \
      nvrtcResult result = x;                                                  \
      if (result != NVRTC_SUCCESS) {                                           \
        std::cerr << "\nerror: " << Name << " failed with error " <<           \
                                                 nvrtcGetErrorString(result);  \
        exit(1);                                                               \
      }                                                                        \
    } while(0)
  
  void compileFileToPTX(char *filename, int argc, const char **argv,
                        char **ptxResult, size_t *ptxResultSize)
  {
      std::ifstream inputFile(filename, std::ios::in | std::ios::binary |
                                  std::ios::ate);
  
      if (!inputFile.is_open()) 
      {
          std::cerr << "\nerror: unable to open " << filename << " for reading!\n";
          exit(1);
      }
  
      std::streampos pos = inputFile.tellg();
      size_t inputSize = (size_t)pos;
      char * memBlock = new char [inputSize + 1];
  
      inputFile.seekg (0, std::ios::beg);
      inputFile.read (memBlock, inputSize);
      inputFile.close();
      memBlock[inputSize] = '\x0';
  
      // compile
      nvrtcProgram prog;
      NVRTC_SAFE_CALL("nvrtcCreateProgram", nvrtcCreateProgram(&prog, memBlock,
                                                       filename, 0, NULL, NULL));
      nvrtcResult res = nvrtcCompileProgram(prog, argc, argv);
  
      // dump log
      size_t logSize;
      NVRTC_SAFE_CALL("nvrtcGetProgramLogSize", nvrtcGetProgramLogSize(prog, &logSize));
      char *log = (char *) malloc(sizeof(char) * logSize + 1);
      NVRTC_SAFE_CALL("nvrtcGetProgramLog", nvrtcGetProgramLog(prog, log));
      log[logSize] = '\x0';
  
      /*
      std::cerr << "\n compilation log ---\n";
      std::cerr << log;
      std::cerr << "\n end log ---\n";
      */
      free(log);
  
      NVRTC_SAFE_CALL("nvrtcCompileProgram", res);
      // fetch PTX
      size_t ptxSize;
      NVRTC_SAFE_CALL("nvrtcGetPTXSize", nvrtcGetPTXSize(prog, &ptxSize));
      char *ptx = (char *) malloc(sizeof(char) * ptxSize);
      NVRTC_SAFE_CALL("nvrtcGetPTX", nvrtcGetPTX(prog, ptx));
      NVRTC_SAFE_CALL("nvrtcDestroyProgram", nvrtcDestroyProgram(&prog));
      *ptxResult = ptx;
      *ptxResultSize = ptxSize;
  }
  
  CUmodule loadPTX(char *ptx, int argc, char **argv)
  {
      CUmodule module;
      CUcontext context;
      int major = 0, minor = 0;
      char deviceName[256];
  
      // Picks the best CUDA device available
      CUdevice cuDevice = findCudaDeviceDRV(argc, (const char **)argv);
  
      // get compute capabilities and the devicename
      checkCudaErrors(cuDeviceComputeCapability(&major, &minor, cuDevice));
      checkCudaErrors(cuDeviceGetName(deviceName, 256, cuDevice));
      printf("> GPU Device has SM %d.%d compute capability\n", major, minor);
  
      checkCudaErrors(cuInit(0));
      checkCudaErrors(cuDeviceGet(&cuDevice, 0));
      checkCudaErrors(cuCtxCreate(&context, 0, cuDevice));
  
      checkCudaErrors(cuModuleLoadDataEx(&module, ptx, 0, 0, 0));
  
      return module;
  }
  
  #endif