face_detecter.py 8.18 KB
import os
import numpy as np
import MNN
import cv2
import logging
from retinaface import PriorBox


def py_cpu_nms(dets, thresh):
    """Pure Python NMS baseline."""
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]
    scores = dets[:, 4]

    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])

        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        ovr = inter / (areas[i] + areas[order[1:]] - inter)

        inds = np.where(ovr <= thresh)[0]
        order = order[inds + 1]

    return keep


def decode_landm(pre, priors, variances):

    landms = np.concatenate((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
                             priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
                             priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
                             priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
                             priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:]), 1)
    return landms


def decode(loc, priors, variances):

    boxes = np.concatenate((
        priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
        priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])), 1)
    boxes[:, :2] -= boxes[:, 2:] / 2
    boxes[:, 2:] += boxes[:, :2]
    return boxes


class Face_Detector(object):
    def __init__(self, model_path):
        logging.info('******** Start Init Face Detector ********')
        self.det_interpreter = MNN.Interpreter(model_path)
        self.det_session = self.det_interpreter.createSession()
        self.det_input_tensor = self.det_interpreter.getSessionInput(self.det_session)
        logging.info('******** Success Init Face Detector  ********')

    def detect(self, frame, thr):
        logging.info('******** Start Face Detect ********')
        input_size = 320
        img = cv2.resize(frame, (input_size, input_size))
 
        img = np.float32(img)
        im_height, im_width, _ = img.shape
        scale = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        scale1 = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0], 
                           img.shape[1], img.shape[0], img.shape[1], img.shape[0],
                           img.shape[1], img.shape[0]])
  
        w_r = input_size/frame.shape[1]
        h_r = input_size/frame.shape[0]

        confidence_threshold = 0.02
        vis_threshold = 0.5
        nms_threshold = 0.4
        keep_top_k = 100
        variance = [0.1, 0.2]
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = np.expand_dims(img, axis=0)
      
        input_tensor = MNN.Tensor((1, 3, input_size, input_size), MNN.Halide_Type_Float, img, MNN.Tensor_DimensionType_Caffe)
        self.det_input_tensor.copyFrom(input_tensor)
        self.det_interpreter.runSession(self.det_session)

        bbox_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output0')
        conf_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output1')
        landmark_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output2')

        bbox_output = bbox_output_tensor.getData()
        conf_output = conf_output_tensor.getData()
        landmark_output = landmark_output_tensor.getData()

        norm_confs = list()
        for i in range(int(len(conf_output)/2)):
            norm_confs.append([conf_output[i * 2 + 0], conf_output[i * 2 + 1]])

        norm_bboxes = list()
        for i in range(int(len(conf_output)/2)):
            norm_bboxes.append([bbox_output[i * 4 + 0], bbox_output[i * 4 + 1], bbox_output[i * 4 + 2], bbox_output[i * 4 + 3]])

        norm_landmarks = list()
        for i in range(int(len(conf_output)/2)):
            norm_landmarks.append([landmark_output[i * 10 + 0], landmark_output[i * 10 + 1], 
                                   landmark_output[i * 10 + 2], landmark_output[i * 10 + 3], 
                                   landmark_output[i * 10 + 4], landmark_output[i * 10 + 5], 
                                   landmark_output[i * 10 + 6], landmark_output[i * 10 + 7], 
                                   landmark_output[i * 10 + 8], landmark_output[i * 10 + 9]])       
        
        norm_confs = np.array(norm_confs)
        norm_bboxes = np.array(norm_bboxes)
        norm_landmarks = np.array(norm_landmarks)
        
        priorbox = PriorBox(image_size=(im_height, im_width))
        priors = priorbox.forward()
     
        scores = norm_confs[:, 1]

        boxes = decode(norm_bboxes, priors, variance)
        boxes = boxes * scale 
    
        landms = decode_landm(norm_landmarks, priors, variance)
        landms = landms * scale1 
        
        # ignore low scores
        inds = np.where(scores > confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:keep_top_k]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
        keep = py_cpu_nms(dets, nms_threshold)
        
        dets = dets[keep, :]
        landms = landms[keep]

        # keep top-K faster NMS
        dets = dets[:keep_top_k, :]
        landms = landms[:keep_top_k, :]
        
        dets = np.concatenate((dets, landms), axis=1)
        face_bboxes = []
        face_landmarks = []
        max_area = float('-inf')
        max_index = 0

        i = 0
        for b in dets:
            if b[4] < vis_threshold:
                continue
            resize_b = []
            x1 = int(b[0] / w_r)
            y1 = int(b[1] / h_r)
            x2 = int(b[2] / w_r)
            y2 = int(b[3] / h_r)
            x3 = int(b[5] / w_r)
            y3 = int(b[6] / h_r)
            x4 = int(b[7] / w_r)
            y4 = int(b[8] / h_r)
            x5 = int(b[9] / w_r)
            y5 = int(b[10] / h_r)
            x6 = int(b[11] / w_r)
            y6 = int(b[12] / h_r)
            x7 = int(b[13] / w_r)
            y7 = int(b[14] / h_r) 
            resize_b = [x1, y1, x2, y2, 0, x3, y3, x4, y4, x5, y5, x6, y6, x7, y7]
            
            # cv2.rectangle(frame, (resize_b[0], resize_b[1]), (resize_b[2], resize_b[3]), (0, 0, 255), 2)
            # cv2.circle(frame, (resize_b[5], resize_b[6]), 1, (0, 0, 255), 4)
            # cv2.circle(frame, (resize_b[7], resize_b[8]), 1, (0, 255, 255), 4)
            # cv2.circle(frame, (resize_b[9], resize_b[10]), 1, (255, 0, 255), 4)
            # cv2.circle(frame, (resize_b[11], resize_b[12]), 1, (0, 255, 0), 4)
            # cv2.circle(frame, (resize_b[13], resize_b[14]), 1, (255, 0, 0), 4)

            area = (resize_b[2] - resize_b[0]) * (resize_b[3] - resize_b[1])
            if area > max_area:
                max_area = area
                max_index = i
            i += 1

            face_bboxes.append([resize_b[0], resize_b[1], resize_b[2], resize_b[3]])
            face_landmarks.append([(resize_b[5], resize_b[6]), 
                                   (resize_b[7], resize_b[8]), 
                                   (resize_b[9], resize_b[10]), 
                                   (resize_b[11], resize_b[12]), 
                                   (resize_b[13], resize_b[14])])
    
        # import time
        # cv2.imwrite('results/0.jpg', frame)
        return face_bboxes, face_landmarks, max_index
               

if __name__ == '__main__':
    det_face_model_path = r'/home/jwq/PycharmProjects/situ/src/face_det/Pytorch_Retinaface/weights/mobilenet_0.25.mnn'
    image_path = r'input/3.jpg'
    image_save_path = r'results/3.jpg'
    thr = 0.5

    face_detector = Face_Detector(det_face_model_path)
    image = cv2.imread(image_path)
    face_detector.detect(image, thr)
    # image_ploted = face_detector.plot(image, face_bboxes)
    # cv2.imwrite(image_save_path, image_ploted)