#include "CropImg.h" #include namespace cudacommon { __global__ void CropImg_CUDAKernel(const unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int left, int top, int right, int bottom) { const int dst_x = blockIdx.x * blockDim.x + threadIdx.x; const int dst_y = blockIdx.y * blockDim.y + threadIdx.y; const int dst_width = right - left; const int dst_height = bottom - top; if (dst_x < dst_width && dst_y < dst_height) { int src_x = left + dst_x; int src_y = top + dst_y; /*d_dstRGB[(dst_y*dst_width) + dst_x] = d_srcRGB[(src_y*src_width) + src_x]; d_dstRGB[(dst_width*dst_height) + (dst_y*dst_width) + dst_x] = d_srcRGB[(src_width*src_height) + (src_y*src_width) + src_x]; d_dstRGB[(2 * dst_width*dst_height) + (dst_y*dst_width) + dst_x] = d_srcRGB[(2 * src_width*src_height) + (src_y*src_width) + src_x];*/ d_dstRGB[(dst_y*dst_width * 3) + dst_x * 3] = d_srcRGB[(src_y*src_width * 3) + src_x * 3]; d_dstRGB[(dst_y*dst_width * 3) + dst_x * 3 + 1] = d_srcRGB[(src_y*src_width * 3) + src_x * 3 + 1]; d_dstRGB[(dst_y*dst_width * 3) + dst_x * 3 + 2] = d_srcRGB[(src_y*src_width * 3) + src_x * 3 + 2]; } } __global__ void ResizeImgBilinearBGR_CUDAKernel(const float * src_img, float * dst_img, int src_width, int src_height, int dst_width, int dst_height) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; if (x < dst_width && y < dst_height) { float fx = (x + 0.5)*src_width / (float)dst_width - 0.5; float fy = (y + 0.5)*src_height / (float)dst_height - 0.5; int ax = floor(fx); int ay = floor(fy); if (ax < 0) { ax = 0; } if (ax > src_width - 2) { ax = src_width - 2; } if (ay < 0) { ay = 0; } if (ay > src_height - 2) { ay = src_height - 2; } int A = ax + ay*src_width; int B = ax + ay*src_width + 1; int C = ax + ay*src_width + src_width; int D = ax + ay*src_width + src_width + 1; float w1, w2, w3, w4; w1 = fx - ax; w2 = 1 - w1; w3 = fy - ay; w4 = 1 - w3; //for (int c = 0; c < 3; c++) //{ float blue = src_img[A * 3] * w2*w4 + src_img[B * 3] * w1*w4 + src_img[C * 3] * w2*w3 + src_img[D * 3] * w1*w3; float green = src_img[A * 3 + 1] * w2*w4 + src_img[B * 3 + 1] * w1*w4 + src_img[C * 3 + 1] * w2*w3 + src_img[D * 3 + 1] * w1*w3; float red = src_img[A * 3 + 2] * w2*w4 + src_img[B * 3 + 2] * w1*w4 + src_img[C * 3 + 2] * w2*w3 + src_img[D * 3 + 2] * w1*w3; dst_img[(y * dst_width + x) * 3] = blue; dst_img[(y * dst_width + x) * 3 + 1] = green; dst_img[(y * dst_width + x) * 3 + 2] = red; //} } } __global__ void ResizeImgBilinearBGR_uint8(const unsigned char * src_img, unsigned char * dst_img, int src_width, int src_height, int dst_width, int dst_height) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; if (x < dst_width && y < dst_height) { float fx = (x + 0.5)*src_width / (float)dst_width - 0.5; float fy = (y + 0.5)*src_height / (float)dst_height - 0.5; int ax = floor(fx); int ay = floor(fy); if (ax < 0) { ax = 0; } if (ax > src_width - 2) { ax = src_width - 2; } if (ay < 0) { ay = 0; } if (ay > src_height - 2) { ay = src_height - 2; } int A = ax + ay*src_width; int B = ax + ay*src_width + 1; int C = ax + ay*src_width + src_width; int D = ax + ay*src_width + src_width + 1; float w1, w2, w3, w4; w1 = fx - ax; w2 = 1 - w1; w3 = fy - ay; w4 = 1 - w3; //for (int c = 0; c < 3; c++) //{ float blue = src_img[A * 3] * w2*w4 + src_img[B * 3] * w1*w4 + src_img[C * 3] * w2*w3 + src_img[D * 3] * w1*w3; float green = src_img[A * 3 + 1] * w2*w4 + src_img[B * 3 + 1] * w1*w4 + src_img[C * 3 + 1] * w2*w3 + src_img[D * 3 + 1] * w1*w3; float red = src_img[A * 3 + 2] * w2*w4 + src_img[B * 3 + 2] * w1*w4 + src_img[C * 3 + 2] * w2*w3 + src_img[D * 3 + 2] * w1*w3; dst_img[(y * dst_width + x) * 3] = blue; dst_img[(y * dst_width + x) * 3 + 1] = green; dst_img[(y * dst_width + x) * 3 + 2] = red; //} } } cudaError CropImgGpu(const unsigned char * d_srcRGB, const int src_width, const int src_height, unsigned char* d_dstRGB, int left, int top, int width, int height) { int right = left + width; int bottom = top + height; dim3 block(32, 16, 1); dim3 grid(((right - left) + (block.x - 1)) / block.x, ((bottom - top) + (block.y - 1)) / block.y, 1); CropImg_CUDAKernel << < grid, block >> >(d_srcRGB, src_width, src_height, d_dstRGB, left, top, right, bottom); cudaError_t cudaStatus = cudaGetLastError(); if (cudaStatus != cudaSuccess) { printf("CropImg_CUDAKernel launch failed: %s\n", cudaGetErrorString(cudaStatus)); } cudaStatus = cudaDeviceSynchronize(); if (cudaStatus != cudaSuccess) { printf("cudaDeviceSynchronize returned error code %d after launching CropImgGpu!\n", cudaStatus); } return cudaStatus; } void ResizeImgGpu(const float* d_srcRGB, int src_width, int src_height, float* d_dstRGB, int dst_width, int dst_height) { if (src_width == dst_width && src_height == dst_height) return; dim3 block(32, 16, 1); dim3 grid((dst_width + (block.x - 1)) / block.x, (dst_height + (block.y - 1)) / block.y, 1); //kernel_bilinear << < grid, block >> >(d_srcRGB_original, d_dstRGB_original, src_width, src_height, dst_width, dst_height); ResizeImgBilinearBGR_CUDAKernel << < grid, block >> > (d_srcRGB, d_dstRGB, src_width, src_height, dst_width, dst_height); cudaError_t cudaStatus = cudaGetLastError(); if (cudaStatus != cudaSuccess) { printf("ResizeImgBilinearBGR_CUDAKernel launch failed: %s\n", cudaGetErrorString(cudaStatus)); } cudaStatus = cudaDeviceSynchronize(); if (cudaStatus != cudaSuccess) { printf("cudaDeviceSynchronize returned error code %d after launching CropImgGpu!\n", cudaStatus); } return; } void ResizeImgGpu_int8(const unsigned char* d_srcRGB, int src_width, int src_height, unsigned char* d_dstRGB, int dst_width, int dst_height) { if (src_width == dst_width && src_height == dst_height) return; dim3 block(32, 16, 1); dim3 grid((dst_width + (block.x - 1)) / block.x, (dst_height + (block.y - 1)) / block.y, 1); ResizeImgBilinearBGR_uint8 << < grid, block >> > (d_srcRGB, d_dstRGB, src_width, src_height, dst_width, dst_height); cudaError_t cudaStatus = cudaGetLastError(); if (cudaStatus != cudaSuccess) { printf("ResizeImgBilinearBGR_CUDAKernel launch failed: %s\n", cudaGetErrorString(cudaStatus)); } cudaStatus = cudaDeviceSynchronize(); if (cudaStatus != cudaSuccess) { printf("cudaDeviceSynchronize returned error code %d after launching CropImgGpu!\n", cudaStatus); } return; } }