motor_phone_det.cpp 9.38 KB
#include "motor_phone_det.h"
#include "sy_errorinfo.h"
#include "cnn_extractor.h"
#include "dvpp_processx.h"
#include <time.h>
#include <sys/time.h>
#include "stream_data.h"
#include <algorithm>
#include <map>
#include <cstring>


using namespace atlas_utils;
using namespace std;

struct Resource {
    aclrtContext ctx;
    aclrtStream stream;
};

typedef struct Tools {
    Resource src;
    CNNExtract* phoneDetMotor;
    DvppProcessx* dvpp;
}Tools;


int motor_phone_init(void **handle, motor_phone_param param){
    
    int ret = SY_SUCCESS;
    Tools* tools = new Tools;
    // init resource
    // ACL_CALL(aclInit(nullptr), ACL_SUCCESS, SY_FAILED);
    // ACL_CALL(aclrtSetDevice(param.devId), ACL_SUCCESS, SY_FAILED);
    // ACL_CALL(aclrtCreateContext(&tools->src.ctx, param.devId), ACL_SUCCESS, SY_FAILED);
    ACL_CALL(aclrtCreateStream(&tools->src.stream), ACL_SUCCESS, SY_FAILED);

    // head_shoulder detection init
    tools->phoneDetMotor = new CNNExtract();
    tools->phoneDetMotor->config.confThr = param.thresld;

    ret = tools->phoneDetMotor->Init(param.modelNames);
    if (ret != SY_SUCCESS) {
        delete tools->phoneDetMotor;
        tools->phoneDetMotor = nullptr;
        return SY_FAILED;
    }

    tools->dvpp = new DvppProcessx();
    tools->dvpp->InitResource(tools->src.stream);

    *handle = tools;
    
    return SY_SUCCESS;
}


//======================= yolo v8 postprocess ============================//
float iou(float *lbox, float *rbox) {
	float interBox[] = {
		(std::max)(lbox[0] - lbox[2] / 2.f , rbox[0] - rbox[2] / 2.f), //left
		(std::min)(lbox[0] + lbox[2] / 2.f , rbox[0] + rbox[2] / 2.f), //right
		(std::max)(lbox[1] - lbox[3] / 2.f , rbox[1] - rbox[3] / 2.f), //top
		(std::min)(lbox[1] + lbox[3] / 2.f , rbox[1] + rbox[3] / 2.f), //bottom
	};

	if (interBox[2] > interBox[3] || interBox[0] > interBox[1])
		return 0.0f;

	float interBoxS = (interBox[1] - interBox[0])*(interBox[3] - interBox[2]);
	return interBoxS / (lbox[2] * lbox[3] + rbox[2] * rbox[3] - interBoxS);
}

bool cmp(const vector<float>& a, const vector<float>& b) {
	return a[4] > b[4];
}

//wh20230330
//yolov5的数据结构是4个bbox + 检测置信度+ 多cls
//yolov8的数据结构是4个bbox + 多cls ,选多cls中最大值作为检测置信度
// void nms_yolov8(std::vector<vector<float>>& res, float *output, int outnum, int CLS_NUM, float conf_thresh, float nms_thresh = 0.5)
void nms_yolov8(std::vector<vector<float>>& res, std::vector<float> &output, int outnum, int CLS_NUM, float conf_thresh, float nms_thresh = 0.5)
{
	//printf("nms_yolov8:outnum=%d,CLS_NUM=%d,conf_thresh=%f,nms_thresh=%f \n",outnum,CLS_NUM,conf_thresh,nms_thresh);
	int det_size = 6;// sizeof(Yolo::Detection) / sizeof(float);
	std::map<float, std::vector<vector<float>>> m;
	for (int i = 0; i < outnum; i++)
	{
		//vector<float> cls_prob(CLS_NUM - 5);
		vector<float> cls_prob(CLS_NUM - 4);//wh多cls的值
		//memcpy(cls_prob.data(), &output[CLS_NUM * i + 5], (CLS_NUM - 5) * sizeof(float));
		memcpy(cls_prob.data(), &output[CLS_NUM * i + 4], (CLS_NUM - 4) * sizeof(float));
		auto maxPosition = max_element(cls_prob.begin(), cls_prob.end());//wh多cls的最大值

		//if (output[CLS_NUM * i + 4] <= conf_thresh) continue;
		if (*maxPosition <= conf_thresh  ) continue;
		//if(1)
		//{
		//	printf("nms_yolov8:output:[");
		//	for(int kk=0;kk<CLS_NUM;kk++)
		//	{
		//		printf("%f ",output[CLS_NUM * i + kk]);
		//	}
		//	printf(" ]\n");
		//}

		vector<float> det(det_size);
		//Yolo::Detection det;
		//memcpy(&det[0], &output[CLS_NUM * i], (det_size - 1) * sizeof(float));
		memcpy(&det[0], &output[CLS_NUM * i], (det_size - 2) * sizeof(float));
		//det[4] = det[4]*(*maxPosition);
		det[4] = (*maxPosition);
		det[5] = maxPosition - cls_prob.begin();
		//printf("nms_yolov8 det =%f-%f-%f-%f %f %f \n",det[0],det[1],det[2],det[3],det[4],det[5]);

		if (m.count(det[5]) == 0)
			m.emplace(det[5], std::vector<vector<float>>());
		m[det[5]].push_back(det);

		/*det.conf = det.conf*(*maxPosition);
		det.class_id = maxPosition - cls_prob.begin();
		if (m.count(det.class_id) == 0)
			m.emplace(det.class_id, std::vector<Yolo::Detection>());
		m[det.class_id].push_back(det);*/
	}

	for (auto it = m.begin(); it != m.end(); it++) {
		//std::cout << it->second[0].class_id << " --- " << std::endl;
		auto& dets = it->second;
		std::sort(dets.begin(), dets.end(), cmp);

		for (size_t m = 0; m < dets.size(); ++m) {
			auto& item = dets[m];

			vector<float> det_temp(det_size);
			//Yolo::Detection det_temp;
			/*det_temp.conf = item.conf;
			det_temp.class_id = item.class_id;
			det_temp.bbox[0] = item.bbox[0] - item.bbox[2] / 2;
			det_temp.bbox[1] = item.bbox[1] - item.bbox[3] / 2;
			det_temp.bbox[2] = item.bbox[2];
			det_temp.bbox[3] = item.bbox[3];*/

			det_temp[4] = item[4];
			det_temp[5] = item[5];
			det_temp[0] = item[0] - item[2] / 2;
			det_temp[1] = item[1] - item[3] / 2;
			det_temp[2] = item[2];
			det_temp[3] = item[3];

			res.push_back(det_temp);

			for (size_t n = m + 1; n < dets.size(); ++n) {
				if (iou(&item[0], &dets[n][0]) > nms_thresh) {
					dets.erase(dets.begin() + n);
					--n;
				}
			}
		}
	}
}

int motor_phone_process_batch(void * handle, sy_img *image_data_array, int batchsize, motor_phone_result *result){
    Tools* tools = (Tools*) handle;

    int inputW = tools->phoneDetMotor->GetInputWidth();
    int inputH = tools->phoneDetMotor->GetInputHeight();
    
    //printf("debug inputw:%d,inputh:%d\n",inputW,inputH);

    for (int b = 0; b < batchsize; b++) {
        if (image_data_array[b].data_ == NULL || image_data_array[b].w_ == 0 || image_data_array[b].h_ == 0) {
            ERROR_LOG(" Headshoulder get null input ptr!");
	        return SY_FAILED;
        }
        
        ImageData resizeImg, src;
        // Utils::CopysyImageDataToDvpp(src, image_data_array[b]);
		ACL_CALL(Utils::CopysyImageDataToDvppV2(src, image_data_array[b]), SY_SUCCESS, SY_FAILED);
        ACL_CALL(tools->dvpp->CropAndPadding(resizeImg, src, inputW, inputH), SY_SUCCESS, SY_FAILED);
        // forward
        // double t1, t2;
        // t1 = msecond();
        int ret = tools->phoneDetMotor->Inference(resizeImg);
        if (ret != SY_SUCCESS) {
            return SY_MODEL_FORWARD_ERROR;
        }
        // t2 = msecond();
        // printf("debug infer time: %.2f\n", t2 - t1);

        vector<float> detRes;
        ret = tools->phoneDetMotor->PostProcess(detRes);
        if (ret != SY_SUCCESS) {
            return SY_MODEL_GETRESULT_ERROR;
        }

        int img_w = image_data_array[b].w_;
		int img_h = image_data_array[b].h_;
		int INPUT_W = 224;//模型输入
		int INPUT_H = 224;
		// int max_obj_data_count = 10;//4+多类置信度(这里是6类) 0912/1013
		int max_obj_data_count = 9;//4+多类置信度(这里是5类) 1127模型

        // printf("datacount:%d\n",detRes.size());
        int num_det = detRes.size() / max_obj_data_count;

        int w, h, x, y;
		float r_w = (float)INPUT_W / (img_w*1.0);
		float r_h = (float)INPUT_H / (img_h*1.0);
		float ratio = r_w < r_h ? r_w : r_h;
		if (r_h > r_w) {
			w = INPUT_W;
			h = r_w * img_h;
			x = 0;
			y = (INPUT_H - h) / 2;
		}
		else {
			w = r_h * img_w;
			h = INPUT_H;
			x = (INPUT_W - w) / 2;
			y = 0;
		}


        vector<vector<float>> nms_res;
		//printf("%d %d %d %d %d %d\n", img_w, img_h, w, h, x, y);
		nms_yolov8(nms_res, detRes, num_det, max_obj_data_count, tools->phoneDetMotor->config.confThr, 0.7);

        int obj_count = 0;
		result[b].objcount = 0;
		for (int i = 0; i < nms_res.size(); i++)
		{
			//class_id, score, x1, y1, x2, y2
			int index = nms_res[i][5];

			float detect_score = nms_res[i][4];
			if(detect_score > tools->phoneDetMotor->config.confThr)
			{

				int x1 = (nms_res[i][0] - x) / ratio;
				int y1 = (nms_res[i][1] - y) / ratio;
				int x2 = nms_res[i][2] / ratio + x1;
				int y2 = nms_res[i][3] / ratio + y1;

				//边界判断
				if(x1<0)x1=0;
				if(y1<0)y1=0;
				if(x2>=img_w) x2 = img_w-1;
				if(y2>=img_h) y2 = img_h-1;

				result[b].objinfo[obj_count].left = x1;
				result[b].objinfo[obj_count].top = y1;
				result[b].objinfo[obj_count].right = x2;
				result[b].objinfo[obj_count].bottom = y2;

				result[b].objinfo[obj_count].confidence = detect_score;
				result[b].objinfo[obj_count].index = index;

				obj_count++;
			}
		}
		result[b].objcount =obj_count;

		vector<vector<float>>().swap(nms_res);
        vector<float>().swap(detRes);



        // result[i].objcount = detRes.size() > MAX_OBJ_COUNT ? MAX_OBJ_COUNT : detRes.size();
        // int objIdx = 0;  
        // for (auto& det : detRes) {    
        //     if (objIdx >= MAX_OBJ_COUNT) continue;
        //     result[i].objinfo[objIdx].left = det[2];
        //     result[i].objinfo[objIdx].top = det[3];
        //     result[i].objinfo[objIdx].right = det[4];
        //     result[i].objinfo[objIdx].bottom = det[5];
        //     result[i].objinfo[objIdx].confidence = det[1];
        //     objIdx++;
            
        // }
        
            
    }

    return SY_SUCCESS;
}

void motor_phone_release(void **handle) {
    Tools* tools = (Tools*) handle;
    if (tools) {
        if (tools->phoneDetMotor) {
            //delete tools->phoneDetMotor;
            tools->phoneDetMotor = nullptr;
        }
        if (tools->dvpp) {
            //delete tools->dvpp;
            tools->dvpp = nullptr;
        }
        // aclFinalize();
        //delete tools;
        tools = NULL;
    }
}

const char * motor_phone_getversion() {
    return "motor_phone_vdec_arm_v310p_0.0.2.20231127_without_timelimit";
}