retinaface.cpp 10.9 KB
#include "retinaface.h"
// 生成anchors
vector<vector<float>> RetinaFace::priorBox(vector<float> image_size){
    vector<int> tmp1={16,32};
    vector<int> tmp2={64,128};
    vector<int> tmp3={256,512};
    vector<vector<int>> min_sizes_;
    min_sizes_.push_back(tmp1);
    min_sizes_.push_back(tmp2);
    min_sizes_.push_back(tmp3);
    vector<int> steps={8,16,32};
    vector<vector<int>> feature_maps;
    vector<vector<float>> anchors;
    for(int &step:steps){
        vector<int> tmp(2,0);
        tmp[0]=ceil(image_size[0]/step);
        tmp[1]=ceil(image_size[1]/step);
        feature_maps.push_back(tmp);
    }
    for(int k=0;k<feature_maps.size();k++){
        vector<int> min_sizes=min_sizes_[k];
        
        for(int i=0;i<feature_maps[k][0];i++){
            for(int j=0;j<feature_maps[k][1];j++){
                for(int &min_size:min_sizes){
                    float s_kx=float(min_size)/float(image_size[1]);
                    float s_ky=float(min_size)/float(image_size[0]);
                    float dense_cx=float((float(j)+float(0.5))*steps[k])/float(image_size[1]);
                    float dense_cy=float((float(i)+float(0.5))*steps[k])/float(image_size[1]);
                    vector<float> tmp_anchor={dense_cx,dense_cy,s_kx,s_ky};
                    anchors.push_back(tmp_anchor);
                }
            }
        }
    }
    return anchors;
}

// 解析bounding box 包含置信度
vector<Bbox> RetinaFace::decode(float *loc,float *score,float *pre,vector<vector<float>> priors,vector<float> variances){
    vector<float> input_size={640,640};
    float resize_scale=1.0;
    vector<Bbox> boxes;
    for(int i=0;i<priors.size();++i){
        float b1=priors[i][0]+loc[4*i]*variances[0]*priors[i][2];
        float b2=priors[i][1]+loc[4*i+1]*variances[0]*priors[i][3];
        float b3=priors[i][2]*exp(loc[4*i+2]*variances[1]);
        float b4=priors[i][3]*exp(loc[4*i+3]*variances[1]);
        b1=b1-b3/float(2);
        b2=b2-b4/float(2);
        b3=b3+b1;
        b4=b4+b2;
        float l1=priors[i][0]+pre[10*i]*variances[0]*priors[i][2];
        float l2=priors[i][1]+pre[10*i+1]*variances[0]*priors[i][3];
        float l3=priors[i][0]+pre[10*i+2]*variances[0]*priors[i][2];
        float l4=priors[i][1]+pre[10*i+3]*variances[0]*priors[i][3];
        float l5=priors[i][0]+pre[10*i+4]*variances[0]*priors[i][2];
        float l6=priors[i][1]+pre[10*i+5]*variances[0]*priors[i][3];
        float l7=priors[i][0]+pre[10*i+6]*variances[0]*priors[i][2];
        float l8=priors[i][1]+pre[10*i+7]*variances[0]*priors[i][3];
        float l9=priors[i][0]+pre[10*i+8]*variances[0]*priors[i][2];
        float l10=priors[i][1]+pre[10*i+9]*variances[0]*priors[i][3];
        b1>0?b1:0;
        b2>0?b2:0;
        b3>640?640:b3;
        b4>640?640:b4;
        Bbox tmp_box={.xmin=b1*input_size[0]/resize_scale,.ymin=b2*input_size[1]/resize_scale,.xmax=b3*input_size[0]/resize_scale,.ymax=b4*input_size[1]/resize_scale,
        .score=score[2*i+1],.x1=(l1*input_size[0])/resize_scale,.y1=l2*input_size[1]/resize_scale,.x2=l3*input_size[0]/resize_scale,.y2=l4*input_size[1]/resize_scale,
        .x3=l5*input_size[0]/resize_scale,.y3=l6*input_size[1]/resize_scale,.x4=l7*input_size[0]/resize_scale,.y4=l8*input_size[1]/resize_scale,.x5=l9*input_size[0]/resize_scale,.y5=l10*input_size[1]/resize_scale};
        boxes.push_back(tmp_box);
    }
    return boxes;
}



//NMS
void RetinaFace::nms_cpu(std::vector<Bbox> &bboxes, float threshold){
    if (bboxes.empty()){
        return ;
    }
    // 1.之前需要按照score排序
    std::sort(bboxes.begin(), bboxes.end(), [&](Bbox b1, Bbox b2){return b1.score>b2.score;});
    // 2.先求出所有bbox自己的大小
    std::vector<float> area(bboxes.size());
    for (int i=0; i<bboxes.size(); ++i){
        area[i] = (bboxes[i].xmax - bboxes[i].xmin + 1) * (bboxes[i].ymax - bboxes[i].ymin + 1);
    }
    // 3.循环
    for (int i=0; i<bboxes.size(); ++i){
        for (int j=i+1; j<bboxes.size(); ){
            float left = std::max(bboxes[i].xmin, bboxes[j].xmin);
            float right = std::min(bboxes[i].xmax, bboxes[j].xmax);
            float top = std::max(bboxes[i].ymin, bboxes[j].ymin);
            float bottom = std::min(bboxes[i].ymax, bboxes[j].ymax);
            float width = std::max(right - left + 1, 0.f);
            float height = std::max(bottom - top + 1, 0.f);
            float u_area = height * width;
            float iou = (u_area) / (area[i] + area[j] - u_area);
            if (iou>=threshold){
                bboxes.erase(bboxes.begin()+j);
                area.erase(area.begin()+j);
            }else{
                ++j;
            }
        }
    }
}

// 根据阈值筛选
vector<Bbox> RetinaFace::select_score(vector<Bbox> bboxes,float threshold,float w_r,float h_r){
    vector<Bbox> results;
    for(Bbox &box:bboxes){
        if (float(box.score)>=threshold){
            box.xmin=box.xmin/w_r;
            box.ymin=box.ymin/h_r;
            box.xmax=box.xmax/w_r;
            box.ymax=box.ymax/h_r;
            box.x1=box.x1/w_r;
            box.y1=box.y1/h_r;
            box.x2=box.x2/w_r;
            box.y2=box.y2/h_r;
            box.x3=box.x3/w_r;
            box.y3=box.y3/h_r;
            box.x4=box.x4/w_r;
            box.y4=box.y4/h_r;
            box.x5=box.x5/w_r;
            box.y5=box.y5/h_r;
            results.push_back(box);
        }
    }
    return results;
}

// 数据后处理
vector<Bbox> RetinaFace::bbox_process(vector<Bbox> bboxes,float frame_w,float frame_h){
    vector<Bbox> result_bboxes;
    for(Bbox &bbox:bboxes){
        Bbox new_bbox;
        float face_w=bbox.xmax-bbox.xmin;
        float face_h=bbox.ymax-bbox.ymin;
        new_bbox.xmin=bbox.xmin-face_w*0.15;
        new_bbox.xmax=bbox.xmax+face_w*0.15;
        new_bbox.ymin=bbox.ymin;
        new_bbox.ymax=bbox.ymax+face_h*0.15;
        new_bbox.xmin=new_bbox.xmin>0?new_bbox.xmin:0;
        new_bbox.ymin=new_bbox.ymin>0?new_bbox.ymin:0;
        new_bbox.xmax=new_bbox.xmax>frame_w?frame_w:new_bbox.xmax;
        new_bbox.ymax=new_bbox.ymax>frame_h?frame_h:new_bbox.ymax;
        new_bbox.score=bbox.score;
        new_bbox.x1=bbox.x1>0?bbox.x1:0;
        new_bbox.y1=bbox.y1>0?bbox.y1:0;
        new_bbox.x2=bbox.x2>0?bbox.x2:0;
        new_bbox.y2=bbox.y2>0?bbox.y2:0;
        new_bbox.x3=bbox.x3>0?bbox.x3:0;
        new_bbox.y3=bbox.y3>0?bbox.y3:0;
        new_bbox.x4=bbox.x4>0?bbox.x4:0;
        new_bbox.y4=bbox.y4>0?bbox.y4:0;
        new_bbox.x5=bbox.x5>0?bbox.x5:0;
        new_bbox.y5=bbox.y5>0?bbox.y5:0;
        result_bboxes.push_back(new_bbox);
    
    }
    return result_bboxes;
}


// 推理
vector<Bbox> RetinaFace::detect(string image_path){
    Mat image = cv::imread(image_path);
    float w_r=float(input_size[0])/float(image.cols);
    float h_r=float(input_size[1])/float(image.rows);
    Mat input_data;
    cv::resize(image,input_data,Size(input_size[0],input_size[1]));
    input_data = input_data-mean;
    input_data.convertTo(input_data, CV_32F);
    std::vector<std::vector<cv::Mat>> nChannels;
    std::vector<cv::Mat> rgbChannels(3);
    cv::split(input_data, rgbChannels);
    nChannels.push_back(rgbChannels); //  NHWC  转NCHW
    auto *pvData = malloc(1 * 3 * input_size[1] * input_size[0] *sizeof(float));
    int nPlaneSize = input_size[0] * input_size[1];
    for (int c = 0; c < 3; ++c)
    {
    cv::Mat matPlane = nChannels[0][c];
    memcpy((float *)(pvData) + c * nPlaneSize,\
            matPlane.data, nPlaneSize * sizeof(float));
    }
    auto inTensor = net->getSessionInput(session, NULL);
    net->resizeTensor(inTensor, {1, 3, input_size[1],input_size[0]});
    net->resizeSession(session);
    auto nchwTensor = new Tensor(inTensor, Tensor::CAFFE);
    ::memcpy(nchwTensor->host<float>(), pvData, nPlaneSize * 3 * sizeof(float));
    inTensor->copyFromHostTensor(nchwTensor);
//     //推理
    net->runSession(session);
    auto output0= net->getSessionOutput(session, "output0");
    auto output1= net->getSessionOutput(session, "output1");
    auto output2= net->getSessionOutput(session, "output2");
    MNN::Tensor feat_tensor0(output0, MNN::Tensor::CAFFE);
    MNN::Tensor feat_tensor1(output1, MNN::Tensor::CAFFE);
    MNN::Tensor feat_tensor2(output2, MNN::Tensor::CAFFE);
    output0->copyToHostTensor(&feat_tensor0);
    output1->copyToHostTensor(&feat_tensor1);
    output2->copyToHostTensor(&feat_tensor2);
    auto loc = feat_tensor0.host<float>();
    auto score = feat_tensor1.host<float>();
    auto landm = feat_tensor2.host<float>();

    vector<Bbox> result_boxes = decode(loc,score,landm,anchors,variances);
    vector<Bbox> results=select_score(result_boxes,confidence_threshold,w_r,h_r);
    
    nms_cpu(results,nms_threshold);
    if(is_bbox_process){
        vector<Bbox> res_bboxes=bbox_process(results,image.cols,image.rows);
        return res_bboxes;

    }else{
        return results;
    }
}
vector<Bbox> RetinaFace::detect_image(Mat image){
    float w_r=float(input_size[0])/float(image.cols);
    float h_r=float(input_size[1])/float(image.rows);
    Mat input_data;
    cv::resize(image,input_data,Size(input_size[0],input_size[1]));
    input_data = input_data-mean;
    input_data.convertTo(input_data, CV_32F);
    std::vector<std::vector<cv::Mat>> nChannels;
    std::vector<cv::Mat> rgbChannels(3);
    cv::split(input_data, rgbChannels);
    nChannels.push_back(rgbChannels); //  NHWC  转NCHW
    auto *pvData = malloc(1 * 3 * input_size[1] * input_size[0] *sizeof(float));
    int nPlaneSize = input_size[0] * input_size[1];
    for (int c = 0; c < 3; ++c)
    {
    cv::Mat matPlane = nChannels[0][c];
    memcpy((float *)(pvData) + c * nPlaneSize,\
            matPlane.data, nPlaneSize * sizeof(float));
    }
    auto inTensor = net->getSessionInput(session, NULL);
    net->resizeTensor(inTensor, {1, 3, input_size[1],input_size[0]});
    net->resizeSession(session);
    auto nchwTensor = new Tensor(inTensor, Tensor::CAFFE);
    ::memcpy(nchwTensor->host<float>(), pvData, nPlaneSize * 3 * sizeof(float));
    inTensor->copyFromHostTensor(nchwTensor);
//     //推理
    net->runSession(session);
    auto output0= net->getSessionOutput(session, "output0");
    auto output1= net->getSessionOutput(session, "output1");
    auto output2= net->getSessionOutput(session, "output2");
    MNN::Tensor feat_tensor0(output0, MNN::Tensor::CAFFE);
    MNN::Tensor feat_tensor1(output1, MNN::Tensor::CAFFE);
    MNN::Tensor feat_tensor2(output2, MNN::Tensor::CAFFE);
    output0->copyToHostTensor(&feat_tensor0);
    output1->copyToHostTensor(&feat_tensor1);
    output2->copyToHostTensor(&feat_tensor2);
    auto loc = feat_tensor0.host<float>();
    auto score = feat_tensor1.host<float>();
    auto landm = feat_tensor2.host<float>();

    vector<Bbox> result_boxes = decode(loc,score,landm,anchors,variances);
    vector<Bbox> results=select_score(result_boxes,confidence_threshold,w_r,h_r);
    
    nms_cpu(results,nms_threshold);
    if(is_bbox_process){
        vector<Bbox> res_bboxes=bbox_process(results,image.cols,image.rows);
        return res_bboxes;

    }else{
        return results;
    }
}