CropImg.cu 6.88 KB
#include "CropImg.h"
#include <iostream>

namespace cudacommon {

	__global__ void CropImg_CUDAKernel(const unsigned char* d_srcRGB, int src_width, int src_height,
		unsigned char* d_dstRGB, int left, int top, int right, int bottom)
	{
		const int dst_x = blockIdx.x * blockDim.x + threadIdx.x;
		const int dst_y = blockIdx.y * blockDim.y + threadIdx.y;

		const int dst_width = right - left;
		const int dst_height = bottom - top;

		if (dst_x < dst_width && dst_y < dst_height)
		{
			int src_x = left + dst_x;
			int src_y = top + dst_y;

			/*d_dstRGB[(dst_y*dst_width) + dst_x] = d_srcRGB[(src_y*src_width) + src_x];
			d_dstRGB[(dst_width*dst_height) + (dst_y*dst_width) + dst_x] = d_srcRGB[(src_width*src_height) + (src_y*src_width) + src_x];
			d_dstRGB[(2 * dst_width*dst_height) + (dst_y*dst_width) + dst_x] = d_srcRGB[(2 * src_width*src_height) + (src_y*src_width) + src_x];*/

			d_dstRGB[(dst_y*dst_width * 3) + dst_x * 3] = d_srcRGB[(src_y*src_width * 3) + src_x * 3];
			d_dstRGB[(dst_y*dst_width * 3) + dst_x * 3 + 1] = d_srcRGB[(src_y*src_width * 3) + src_x * 3 + 1];
			d_dstRGB[(dst_y*dst_width * 3) + dst_x * 3 + 2] = d_srcRGB[(src_y*src_width * 3) + src_x * 3 + 2];
		}
	}

	__global__ void ResizeImgBilinearBGR_CUDAKernel(const float * src_img, float * dst_img,
		int src_width, int src_height, int dst_width, int dst_height)
	{
		const int x = blockIdx.x * blockDim.x + threadIdx.x;
		const int y = blockIdx.y * blockDim.y + threadIdx.y;

		if (x < dst_width && y < dst_height)
		{
			float fx = (x + 0.5)*src_width / (float)dst_width - 0.5;
			float fy = (y + 0.5)*src_height / (float)dst_height - 0.5;
			int ax = floor(fx);
			int ay = floor(fy);
			if (ax < 0)
			{
				ax = 0;
			}
			if (ax > src_width - 2)
			{
				ax = src_width - 2;
			}
			if (ay < 0) {
				ay = 0;
			}
			if (ay > src_height - 2)
			{
				ay = src_height - 2;
			}

			int A = ax + ay*src_width;
			int B = ax + ay*src_width + 1;
			int C = ax + ay*src_width + src_width;
			int D = ax + ay*src_width + src_width + 1;

			float w1, w2, w3, w4;
			w1 = fx - ax;
			w2 = 1 - w1;
			w3 = fy - ay;
			w4 = 1 - w3;

			//for (int c = 0; c < 3; c++)
			//{
			float blue = src_img[A * 3] * w2*w4 + src_img[B * 3] * w1*w4 + src_img[C * 3] * w2*w3 + src_img[D * 3] * w1*w3;

			float green = src_img[A * 3 + 1] * w2*w4 + src_img[B * 3 + 1] * w1*w4
				+ src_img[C * 3 + 1] * w2*w3 + src_img[D * 3 + 1] * w1*w3;

			float red = src_img[A * 3 + 2] * w2*w4 + src_img[B * 3 + 2] * w1*w4
				+ src_img[C * 3 + 2] * w2*w3 + src_img[D * 3 + 2] * w1*w3;

			dst_img[(y * dst_width + x) * 3] = blue;
			dst_img[(y * dst_width + x) * 3 + 1] = green;
			dst_img[(y * dst_width + x) * 3 + 2] = red;
			//}
		}
	}

	__global__ void ResizeImgBilinearBGR_uint8(const unsigned char * src_img, unsigned char * dst_img,
		int src_width, int src_height, int dst_width, int dst_height)
	{
		const int x = blockIdx.x * blockDim.x + threadIdx.x;
		const int y = blockIdx.y * blockDim.y + threadIdx.y;

		if (x < dst_width && y < dst_height)
		{
			float fx = (x + 0.5)*src_width / (float)dst_width - 0.5;
			float fy = (y + 0.5)*src_height / (float)dst_height - 0.5;
			int ax = floor(fx);
			int ay = floor(fy);
			if (ax < 0)
			{
				ax = 0;
			}
			if (ax > src_width - 2)
			{
				ax = src_width - 2;
			}
			if (ay < 0) {
				ay = 0;
			}
			if (ay > src_height - 2)
			{
				ay = src_height - 2;
			}

			int A = ax + ay*src_width;
			int B = ax + ay*src_width + 1;
			int C = ax + ay*src_width + src_width;
			int D = ax + ay*src_width + src_width + 1;

			float w1, w2, w3, w4;
			w1 = fx - ax;
			w2 = 1 - w1;
			w3 = fy - ay;
			w4 = 1 - w3;

			//for (int c = 0; c < 3; c++)
			//{
			float blue = src_img[A * 3] * w2*w4 + src_img[B * 3] * w1*w4 + src_img[C * 3] * w2*w3 + src_img[D * 3] * w1*w3;

			float green = src_img[A * 3 + 1] * w2*w4 + src_img[B * 3 + 1] * w1*w4
				+ src_img[C * 3 + 1] * w2*w3 + src_img[D * 3 + 1] * w1*w3;

			float red = src_img[A * 3 + 2] * w2*w4 + src_img[B * 3 + 2] * w1*w4
				+ src_img[C * 3 + 2] * w2*w3 + src_img[D * 3 + 2] * w1*w3;

			dst_img[(y * dst_width + x) * 3] = blue;
			dst_img[(y * dst_width + x) * 3 + 1] = green;
			dst_img[(y * dst_width + x) * 3 + 2] = red;
			//}
		}
	}

	cudaError CropImgGpu(const unsigned char * d_srcRGB, const int src_width, const int src_height, unsigned char* d_dstRGB, int left, int top, int width, int height)
	{
		int right = left + width;
		int bottom = top + height;

		dim3 block(32, 16, 1);
		dim3 grid(((right - left) + (block.x - 1)) / block.x, ((bottom - top) + (block.y - 1)) / block.y, 1);

		CropImg_CUDAKernel << < grid, block >> >(d_srcRGB, src_width, src_height, d_dstRGB, left, top, right, bottom);

		cudaError_t cudaStatus = cudaGetLastError();
		if (cudaStatus != cudaSuccess) {
			printf("CropImg_CUDAKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
		}

		cudaStatus = cudaDeviceSynchronize();
		if (cudaStatus != cudaSuccess) {
			printf("cudaDeviceSynchronize returned error code %d after launching CropImgGpu!\n", cudaStatus);
		}

		return cudaStatus;
	}

	void ResizeImgGpu(const float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height)
	{
		if (src_width == dst_width && src_height == dst_height)
			return;
		dim3 block(32, 16, 1);
		dim3 grid((dst_width + (block.x - 1)) / block.x, (dst_height + (block.y - 1)) / block.y, 1);
		//kernel_bilinear << < grid, block >> >(d_srcRGB_original, d_dstRGB_original, src_width, src_height, dst_width, dst_height);
		ResizeImgBilinearBGR_CUDAKernel << < grid, block >> > (d_srcRGB, d_dstRGB, src_width, src_height, dst_width, dst_height);

		cudaError_t cudaStatus = cudaGetLastError();
		if (cudaStatus != cudaSuccess) {
			printf("ResizeImgBilinearBGR_CUDAKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
		}

		cudaStatus = cudaDeviceSynchronize();
		if (cudaStatus != cudaSuccess) {
			printf("cudaDeviceSynchronize returned error code %d after launching CropImgGpu!\n", cudaStatus);
		}

		return;
	}

	void ResizeImgGpu_int8(const unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int dst_width, int dst_height)
	{
		if (src_width == dst_width && src_height == dst_height)
			return;
		dim3 block(32, 16, 1);
		dim3 grid((dst_width + (block.x - 1)) / block.x, (dst_height + (block.y - 1)) / block.y, 1);
		ResizeImgBilinearBGR_uint8 << < grid, block >> > (d_srcRGB, d_dstRGB, src_width, src_height, dst_width, dst_height);

		cudaError_t cudaStatus = cudaGetLastError();
		if (cudaStatus != cudaSuccess) {
			printf("ResizeImgBilinearBGR_CUDAKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
		}

		cudaStatus = cudaDeviceSynchronize();
		if (cudaStatus != cudaSuccess) {
			printf("cudaDeviceSynchronize returned error code %d after launching CropImgGpu!\n", cudaStatus);
		}
		return;
	}

}