Blame view

tsl_aiplatform/reprocessing_module/CropImg.cu 3.95 KB
85cc8cb9   Hu Chunming   ๅŽŸ็‰ˆไปฃ็ 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
  #include "CropImg.h"
  #include <iostream>
  
  namespace cudacommon {
  
  	__global__ void CropImg_CUDAKernel(const unsigned char* d_srcRGB, int src_width, int src_height,
  		unsigned char* d_dstRGB, int left, int top, int right, int bottom)
  	{
  		const int dst_x = blockIdx.x * blockDim.x + threadIdx.x;
  		const int dst_y = blockIdx.y * blockDim.y + threadIdx.y;
  
  		const int dst_width = right - left;
  		const int dst_height = bottom - top;
  
  		if (dst_x < dst_width && dst_y < dst_height)
  		{
  			int src_x = left + dst_x;
  			int src_y = top + dst_y;
  
  			d_dstRGB[(dst_y*dst_width * 3) + dst_x * 3] = d_srcRGB[(src_y*src_width * 3) + src_x * 3];
  			d_dstRGB[(dst_y*dst_width * 3) + dst_x * 3 + 1] = d_srcRGB[(src_y*src_width * 3) + src_x * 3 + 1];
  			d_dstRGB[(dst_y*dst_width * 3) + dst_x * 3 + 2] = d_srcRGB[(src_y*src_width * 3) + src_x * 3 + 2];
  		}
  	}
  
  	__global__ void ResizeImgBilinearBGR_CUDAKernel(const float * src_img, float * dst_img,
  		int src_width, int src_height, int dst_width, int dst_height)
  	{
  		const int x = blockIdx.x * blockDim.x + threadIdx.x;
  		const int y = blockIdx.y * blockDim.y + threadIdx.y;
  
  		if (x < dst_width && y < dst_height)
  		{
  			float fx = (x + 0.5)*src_width / (float)dst_width - 0.5;
  			float fy = (y + 0.5)*src_height / (float)dst_height - 0.5;
  			int ax = floor(fx);
  			int ay = floor(fy);
  			if (ax < 0)
  			{
  				ax = 0;
  			}
  			if (ax > src_width - 2)
  			{
  				ax = src_width - 2;
  			}
  			if (ay < 0) {
  				ay = 0;
  			}
  			if (ay > src_height - 2)
  			{
  				ay = src_height - 2;
  			}
  
  			int A = ax + ay*src_width;
  			int B = ax + ay*src_width + 1;
  			int C = ax + ay*src_width + src_width;
  			int D = ax + ay*src_width + src_width + 1;
  
  			float w1, w2, w3, w4;
  			w1 = fx - ax;
  			w2 = 1 - w1;
  			w3 = fy - ay;
  			w4 = 1 - w3;
  
  			//for (int c = 0; c < 3; c++)
  			//{
  			float blue = src_img[A * 3] * w2*w4 + src_img[B * 3] * w1*w4 + src_img[C * 3] * w2*w3 + src_img[D * 3] * w1*w3;
  
  			float green = src_img[A * 3 + 1] * w2*w4 + src_img[B * 3 + 1] * w1*w4
  				+ src_img[C * 3 + 1] * w2*w3 + src_img[D * 3 + 1] * w1*w3;
  
  			float red = src_img[A * 3 + 2] * w2*w4 + src_img[B * 3 + 2] * w1*w4
  				+ src_img[C * 3 + 2] * w2*w3 + src_img[D * 3 + 2] * w1*w3;
  
  			dst_img[(y * dst_width + x) * 3] = blue;
  			dst_img[(y * dst_width + x) * 3 + 1] = green;
  			dst_img[(y * dst_width + x) * 3 + 2] = red;
  			//}
  		}
  	}
  
  	cudaError_t CropImgGpu(const unsigned char * d_srcRGB, const int src_width, const int src_height, unsigned char* d_dstRGB, int left, int top, int width, int height)
  	{
  
  		if (!d_srcRGB || !d_dstRGB)
  			return cudaErrorInvalidDevicePointer;
  
  		if (left < 0 || top < 0 || width <= 0 || height <= 0)
  			return cudaErrorInvalidValue;
  
  		const int right = left + width;
  		const int bottom = top + height;
  
  		dim3 block(32, 16, 1);
  		dim3 grid(((right - left) + (block.x - 1)) / block.x, ((bottom - top) + (block.y - 1)) / block.y, 1);
  
  		CropImg_CUDAKernel << < grid, block >> >(d_srcRGB, src_width, src_height, d_dstRGB, left, top, right, bottom);
  
  		cudaError_t cudaStatus = cudaGetLastError();
  		if (cudaStatus != cudaSuccess)
  		{
  			printf("CropImg_CUDAKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
  			return cudaStatus;
  		}
  
  		cudaStatus = cudaDeviceSynchronize();
  		if (cudaStatus != cudaSuccess)
  		{
  			printf("cudaDeviceSynchronize returned error code %d error is %s after launching CropImgGpu!\n", cudaStatus, cudaGetErrorString(cudaStatus));
  			return cudaStatus;
  		}
  
  		return cudaSuccess;
  	}
  
  	void ResizeImgGpu(const float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height)
  	{
  		if (src_width == dst_width && src_height == dst_height)
  			return;
  		dim3 block(32, 16, 1);
  		dim3 grid((dst_width + (block.x - 1)) / block.x, (dst_height + (block.y - 1)) / block.y, 1);
  		//kernel_bilinear << < grid, block >> >(d_srcRGB_original, d_dstRGB_original, src_width, src_height, dst_width, dst_height);
  		ResizeImgBilinearBGR_CUDAKernel << < grid, block >> > (d_srcRGB, d_dstRGB, src_width, src_height, dst_width, dst_height);
  		return;
  	}
  
  }