#include "cuda_kernels.h"

#include "../interface/logger.hpp"

typedef unsigned char   uchar;
typedef unsigned int    uint32;
typedef int             int32;

namespace cuda_common
{
	__global__ void kernel_drawPixel(float* d_srcRGB, int src_width, int src_height,
		int left, int top, int right, int bottom)
	{
		const int x = blockIdx.x * blockDim.x + threadIdx.x;
		const int y = blockIdx.y * blockDim.y + threadIdx.y;

		if (((x == left || x == right) && y >= top && y <= bottom) || ((y == top || y == bottom) && x >= left && x <= right))
		{
			d_srcRGB[(y*src_width) + x] = 0;
			d_srcRGB[(src_width*src_height) + (y*src_width) + x] = 255;
			d_srcRGB[(2 * src_width*src_height) + (y*src_width) + x] = 0;
		}
	}

	cudaError_t DrawImage(float* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom)
	{
		dim3 block(32, 16, 1);
		dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1);

		kernel_drawPixel << < grid, block >> >(d_srcRGB, src_width, src_height, left, top, right, bottom);

		cudaError_t cudaStatus = cudaGetLastError();
		if (cudaStatus != cudaSuccess) {
			LOG_ERROR("Draw 32 kernel_memcopy launch failed:{}",cudaGetErrorString(cudaStatus));
			return cudaStatus;
		}

		cudaStatus = cudaDeviceSynchronize();
		if (cudaStatus != cudaSuccess) {
			LOG_ERROR("cudaDeviceSynchronize returned error code {} after launching kernel_bilinear!", cudaStatus);
			return cudaStatus;
		}

		return cudaStatus;
	}

	__global__ void kernel_drawPixel(unsigned char* d_srcRGB, int src_width, int src_height,
		int left, int top, int right, int bottom)
	{
		const int x = blockIdx.x * blockDim.x + threadIdx.x;
		const int y = blockIdx.y * blockDim.y + threadIdx.y;

		if (((x == left || x == right) && y >= top && y <= bottom) || ((y == top || y == bottom) && x >= left && x <= right))
		{
			d_srcRGB[(y*src_width) + x] = 0;
			d_srcRGB[(src_width*src_height) + (y*src_width) + x] = 255;
			d_srcRGB[(2 * src_width*src_height) + (y*src_width) + x] = 0;
		}
	}

	cudaError_t DrawImage(unsigned char* d_srcRGB, int src_width, int src_height, int left, int top, int right, int bottom)
	{
		dim3 block(32, 16, 1);
		dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1);

		kernel_drawPixel << < grid, block >> >(d_srcRGB, src_width, src_height, left, top, right, bottom);

		cudaError_t cudaStatus = cudaGetLastError();
		if (cudaStatus != cudaSuccess) {
			LOG_ERROR("Draw 68 kernel_memcopy launch failed: {}",cudaGetErrorString(cudaStatus));
			return cudaStatus;
		}

		cudaStatus = cudaDeviceSynchronize();
		if (cudaStatus != cudaSuccess) {
			LOG_ERROR("cudaDeviceSynchronize returned error code {} after launching kernel_bilinear!", cudaStatus);
			return cudaStatus;
		}

		return cudaStatus;
	}

	__global__ void kernel_drawLine(float* d_srcRGB, int src_width, int src_height,
		int begin_x, int begin_y, int end_x, int end_y)
	{
		int min_x = end_x < begin_x ? end_x : begin_x;
		int max_x = end_x < begin_x ? begin_x : end_x;

		int min_y = end_y < begin_y ? end_y : begin_y;
		int max_y = end_y < begin_y ? begin_y : end_y;

		const int x = blockIdx.x * blockDim.x + threadIdx.x;
		const int y = blockIdx.y * blockDim.y + threadIdx.y;

		if ((x - begin_x) * (end_y - begin_y) == (end_x - begin_x) * (y - begin_y)
			&& min_x <= x && x <= max_x
			&& min_y <= y && y <= max_y)
		{
			d_srcRGB[(y*src_width) + x] = 0;
			d_srcRGB[(src_width*src_height) + (y*src_width) + x] = 255;
			d_srcRGB[(2 * src_width*src_height) + (y*src_width) + x] = 0;
		}
	}

	cudaError_t DrawLine(float* d_srcRGB, int src_width, int src_height, int begin_x, int begin_y, int end_x, int end_y)
	{
		dim3 block(32, 16, 1);
		dim3 grid((src_width + (block.x - 1)) / block.x, (src_height + (block.y - 1)) / block.y, 1);

		kernel_drawLine << < grid, block >> >(d_srcRGB, src_width, src_height, begin_x, begin_y, end_x, end_y);

		cudaError_t cudaStatus = cudaGetLastError();
		if (cudaStatus != cudaSuccess) {
			LOG_ERROR("Draw 112 kernel_memcopy launch failed: {}",cudaGetErrorString(cudaStatus));
			return cudaStatus;
		}

		cudaStatus = cudaDeviceSynchronize();
		if (cudaStatus != cudaSuccess) {
			LOG_ERROR("cudaDeviceSynchronize returned error code {} after launching kernel_bilinear!", cudaStatus);
			return cudaStatus;
		}

		return cudaStatus;
	}
}