init project

jiangwenqiang
Showing 14 changed files with 827 additions and 0 deletions
__pycache__/face_detecter.cpython-36.pyc
__pycache__/face_id.cpython-36.pyc
__pycache__/retinaface.cpython-36.pyc
face_detecter.py
face_id.py
input/0.jpg
input/1.jpg
input/IMG_2099.jpeg
input/IMG_3370.JPG
mobile_face_id_demo.py
models/cls_face_mnn_1.0.0_v0.0.2.mnn
models/det_face_mnn_1.0.0_v0.0.2.mnn
q
retinaface.py
--- a/__pycache__/face_detecter.cpython-36.pyc 0 → 100644
View file @f6b8874
+++ b/__pycache__/face_detecter.cpython-36.pyc 0 → 100644
View file @f6b8874
--- a/__pycache__/face_id.cpython-36.pyc 0 → 100644
View file @f6b8874
+++ b/__pycache__/face_id.cpython-36.pyc 0 → 100644
View file @f6b8874
--- a/__pycache__/retinaface.cpython-36.pyc 0 → 100644
View file @f6b8874
+++ b/__pycache__/retinaface.cpython-36.pyc 0 → 100644
View file @f6b8874
--- a/face_detecter.py 0 → 100644
View file @f6b8874
+++ b/face_detecter.py 0 → 100644
View file @f6b8874
+import os
+import numpy as np
+import MNN
+import cv2
+import logging
+from retinaface import PriorBox
+
+
+def py_cpu_nms(dets, thresh):
+    """Pure Python NMS baseline."""
+    x1 = dets[:, 0]
+    y1 = dets[:, 1]
+    x2 = dets[:, 2]
+    y2 = dets[:, 3]
+    scores = dets[:, 4]
+
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    order = scores.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
+
+        inds = np.where(ovr <= thresh)[0]
+        order = order[inds + 1]
+
+    return keep
+
+
+def decode_landm(pre, priors, variances):
+
+    landms = np.concatenate((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
+                             priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
+                             priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
+                             priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
+                             priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:]), 1)
+    return landms
+
+
+def decode(loc, priors, variances):
+
+    boxes = np.concatenate((
+        priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
+        priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])), 1)
+    boxes[:, :2] -= boxes[:, 2:] / 2
+    boxes[:, 2:] += boxes[:, :2]
+    return boxes
+
+
+class Face_Detector(object):
+    def __init__(self, model_path):
+        logging.info('******** Start Init Face Detector ********')
+        self.det_interpreter = MNN.Interpreter(model_path)
+        self.det_session = self.det_interpreter.createSession()
+        self.det_input_tensor = self.det_interpreter.getSessionInput(self.det_session)
+        logging.info('******** Success Init Face Detector  ********')
+
+    def detect(self, frame, thr):
+        logging.info('******** Start Face Detect ********')
+        input_size = 320
+        img = cv2.resize(frame, (input_size, input_size))
+ 
+        img = np.float32(img)
+        im_height, im_width, _ = img.shape
+        scale = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
+        scale1 = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0], 
+                           img.shape[1], img.shape[0], img.shape[1], img.shape[0],
+                           img.shape[1], img.shape[0]])
+  
+        w_r = input_size/frame.shape[1]
+        h_r = input_size/frame.shape[0]
+
+        confidence_threshold = 0.02
+        vis_threshold = 0.5
+        nms_threshold = 0.4
+        keep_top_k = 100
+        variance = [0.1, 0.2]
+        img -= (104, 117, 123)
+        img = img.transpose(2, 0, 1)
+        img = np.expand_dims(img, axis=0)
+      
+        input_tensor = MNN.Tensor((1, 3, input_size, input_size), MNN.Halide_Type_Float, img, MNN.Tensor_DimensionType_Caffe)
+        self.det_input_tensor.copyFrom(input_tensor)
+        self.det_interpreter.runSession(self.det_session)
+
+        bbox_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output0')
+        conf_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output1')
+        landmark_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output2')
+
+        bbox_output = bbox_output_tensor.getData()
+        conf_output = conf_output_tensor.getData()
+        landmark_output = landmark_output_tensor.getData()
+
+        norm_confs = list()
+        for i in range(int(len(conf_output)/2)):
+            norm_confs.append([conf_output[i * 2 + 0], conf_output[i * 2 + 1]])
+
+        norm_bboxes = list()
+        for i in range(int(len(conf_output)/2)):
+            norm_bboxes.append([bbox_output[i * 4 + 0], bbox_output[i * 4 + 1], bbox_output[i * 4 + 2], bbox_output[i * 4 + 3]])
+
+        norm_landmarks = list()
+        for i in range(int(len(conf_output)/2)):
+            norm_landmarks.append([landmark_output[i * 10 + 0], landmark_output[i * 10 + 1], 
+                                   landmark_output[i * 10 + 2], landmark_output[i * 10 + 3], 
+                                   landmark_output[i * 10 + 4], landmark_output[i * 10 + 5], 
+                                   landmark_output[i * 10 + 6], landmark_output[i * 10 + 7], 
+                                   landmark_output[i * 10 + 8], landmark_output[i * 10 + 9]])       
+        
+        norm_confs = np.array(norm_confs)
+        norm_bboxes = np.array(norm_bboxes)
+        norm_landmarks = np.array(norm_landmarks)
+        
+        priorbox = PriorBox(image_size=(im_height, im_width))
+        priors = priorbox.forward()
+     
+        scores = norm_confs[:, 1]
+
+        boxes = decode(norm_bboxes, priors, variance)
+        boxes = boxes * scale 
+    
+        landms = decode_landm(norm_landmarks, priors, variance)
+        landms = landms * scale1 
+        
+        # ignore low scores
+        inds = np.where(scores > confidence_threshold)[0]
+        boxes = boxes[inds]
+        landms = landms[inds]
+        scores = scores[inds]
+
+        # keep top-K before NMS
+        order = scores.argsort()[::-1][:keep_top_k]
+        boxes = boxes[order]
+        landms = landms[order]
+        scores = scores[order]
+
+        # do NMS
+        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
+        keep = py_cpu_nms(dets, nms_threshold)
+        
+        dets = dets[keep, :]
+        landms = landms[keep]
+
+        # keep top-K faster NMS
+        dets = dets[:keep_top_k, :]
+        landms = landms[:keep_top_k, :]
+        
+        dets = np.concatenate((dets, landms), axis=1)
+        face_bboxes = []
+        face_landmarks = []
+        max_area = float('-inf')
+        max_index = 0
+
+        i = 0
+        for b in dets:
+            if b[4] < vis_threshold:
+                continue
+            resize_b = []
+            x1 = int(b[0] / w_r)
+            y1 = int(b[1] / h_r)
+            x2 = int(b[2] / w_r)
+            y2 = int(b[3] / h_r)
+            x3 = int(b[5] / w_r)
+            y3 = int(b[6] / h_r)
+            x4 = int(b[7] / w_r)
+            y4 = int(b[8] / h_r)
+            x5 = int(b[9] / w_r)
+            y5 = int(b[10] / h_r)
+            x6 = int(b[11] / w_r)
+            y6 = int(b[12] / h_r)
+            x7 = int(b[13] / w_r)
+            y7 = int(b[14] / h_r) 
+            resize_b = [x1, y1, x2, y2, 0, x3, y3, x4, y4, x5, y5, x6, y6, x7, y7]
+            
+            # cv2.rectangle(frame, (resize_b[0], resize_b[1]), (resize_b[2], resize_b[3]), (0, 0, 255), 2)
+            # cv2.circle(frame, (resize_b[5], resize_b[6]), 1, (0, 0, 255), 4)
+            # cv2.circle(frame, (resize_b[7], resize_b[8]), 1, (0, 255, 255), 4)
+            # cv2.circle(frame, (resize_b[9], resize_b[10]), 1, (255, 0, 255), 4)
+            # cv2.circle(frame, (resize_b[11], resize_b[12]), 1, (0, 255, 0), 4)
+            # cv2.circle(frame, (resize_b[13], resize_b[14]), 1, (255, 0, 0), 4)
+
+            area = (resize_b[2] - resize_b[0]) * (resize_b[3] - resize_b[1])
+            if area > max_area:
+                max_area = area
+                max_index = i
+            i += 1
+
+            face_bboxes.append([resize_b[0], resize_b[1], resize_b[2], resize_b[3]])
+            face_landmarks.append([(resize_b[5], resize_b[6]), 
+                                   (resize_b[7], resize_b[8]), 
+                                   (resize_b[9], resize_b[10]), 
+                                   (resize_b[11], resize_b[12]), 
+                                   (resize_b[13], resize_b[14])])
+    
+        # import time
+        # cv2.imwrite('results/0.jpg', frame)
+        return face_bboxes, face_landmarks, max_index
+               
+
+if __name__ == '__main__':
+    det_face_model_path = r'/home/jwq/PycharmProjects/situ/src/face_det/Pytorch_Retinaface/weights/mobilenet_0.25.mnn'
+    image_path = r'input/3.jpg'
+    image_save_path = r'results/3.jpg'
+    thr = 0.5
+
+    face_detector = Face_Detector(det_face_model_path)
+    image = cv2.imread(image_path)
+    face_detector.detect(image, thr)
+    # image_ploted = face_detector.plot(image, face_bboxes)
+    # cv2.imwrite(image_save_path, image_ploted)
--- a/face_id.py 0 → 100644
View file @f6b8874
+++ b/face_id.py 0 → 100644
View file @f6b8874
+import MNN
+import cv2
+import numpy as np
+import logging
+
+class Face_Recognizer(object):
+    def __init__(self, model_path):
+        logging.info('******** Start Init Face ID ********')
+        self.reg_interpreter = MNN.Interpreter(model_path)
+        self.reg_session = self.reg_interpreter.createSession()
+        self.reg_input_tensor = self.reg_interpreter.getSessionInput(self.reg_session)
+        logging.info('******** Success Init Face ID  ********')
+
+    def recognize(self, imgs):
+        
+        feats = []
+        for i in range(len(imgs)):
+            img = imgs[i] 
+
+            # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+            img = img.astype(np.float)
+            img = (img / 255. - 0.5) / 0.5
+            img = img.transpose(2, 0, 1)
+            img = np.expand_dims(img, axis=0)
+        
+            input_tensor = MNN.Tensor((1, 3, 112, 112), MNN.Halide_Type_Float, img, MNN.Tensor_DimensionType_Caffe)
+            self.reg_input_tensor.copyFrom(input_tensor)
+            self.reg_interpreter.runSession(self.reg_session)
+            output_tensor = self.reg_interpreter.getSessionOutput(self.reg_session, 'output0')
+            output = output_tensor.getData()
+
+            feats.append(output)
+
+        feats_np = np.array(feats)
+        return feats_np
--- a/input/0.jpg 0 → 100644
View file @f6b8874
+++ b/input/0.jpg 0 → 100644
View file @f6b8874
--- a/input/1.jpg 0 → 100644
View file @f6b8874
+++ b/input/1.jpg 0 → 100644
View file @f6b8874
--- a/input/IMG_2099.jpeg 0 → 100644
View file @f6b8874
+++ b/input/IMG_2099.jpeg 0 → 100644
View file @f6b8874
--- a/input/IMG_3370.JPG 0 → 100644
View file @f6b8874
+++ b/input/IMG_3370.JPG 0 → 100644
View file @f6b8874
--- a/mobile_face_id_demo.py 0 → 100644
View file @f6b8874
+++ b/mobile_face_id_demo.py 0 → 100644
View file @f6b8874
+import os
+import numpy as np
+import MNN
+import cv2
+import logging
+from skimage import transform as trans
+
+from face_detecter import Face_Detector
+from face_id import Face_Recognizer
+
+
+def preprocess(image, landmarks):
+    src = np.array([[38.2946, 51.6963],
+                    [73.5318, 51.5014],
+                    [56.0252, 71.7366],
+                    [41.5493, 92.3655],
+                    [70.7299, 92.2041] ], dtype=np.float32)
+
+    landmarks = np.array(landmarks)
+    dst = landmarks.astype(np.float32)
+    tform = trans.SimilarityTransform()
+    tform.estimate(dst, src)
+    M = tform.params[0:2,:]
+    warped = cv2.warpAffine(image, M, (112, 112), borderValue=0.0)
+    return warped
+
+
+def get_norm_face(image, landmarks):
+    norm_image = preprocess(image, landmarks)
+    norm_image = cv2.cvtColor(norm_image, cv2.COLOR_BGR2RGB).astype(np.float32)
+    norm_image = cv2.resize(norm_image, (112, 112))
+    # norm_image = norm_image.transpose((2, 0, 1))
+    # norm_image = norm_image.transpose((1,2,0))
+    # norm_image = cv2.resize(norm_image, (112, 112))[:,:,::-1]
+    return norm_image
+ 
+
+if __name__ == '__main__':
+    det_face_model_path = r'models/det_face_mnn_1.0.0_v0.0.2.mnn'
+    reg_face_id_model_path = r'models/cls_face_mnn_1.0.0_v0.0.2.mnn'
+    
+    id_image_path = r'input/IMG_2099.jpeg'
+    life_image_path = r'input/1.jpg'    
+
+    face_det_thr = 0.5
+    face_recongnize_thr = 0.2
+
+    face_detector = Face_Detector(det_face_model_path)
+    face_recognizer = Face_Recognizer(reg_face_id_model_path)
+    for i in range(10):
+        id_image = cv2.imread(id_image_path)
+        life_image = cv2.imread(life_image_path)
+         
+        id_face_bboxes, id_face_landmarks, id_max_idx = face_detector.detect(id_image, face_det_thr)
+        life_face_bboxes, life_face_landmarks, life_max_idx = face_detector.detect(life_image, face_det_thr)
+        print(id_face_bboxes)
+        print(life_face_bboxes)
+        id_norm_image = get_norm_face(id_image, id_face_landmarks[id_max_idx])
+        id_norm_image = np.transpose(id_norm_image, (2, 0, 1))
+        norm_images = [id_norm_image]
+
+        for j in range(len(life_face_landmarks)):
+            life_norm_image = get_norm_face(life_image, life_face_landmarks[j])
+            life_norm_image = np.transpose(life_norm_image, (2, 0, 1))
+            norm_images.append(life_norm_image)
+
+        embeddings = face_recognizer.recognize(norm_images) 
+        gallery_vector = np.mat(embeddings[0])
+        res = False
+        sim = 0
+        for p in range(1, len(embeddings)):
+            compare_vector = np.mat(embeddings[p])
+
+            dot = np.sum(np.multiply(gallery_vector, compare_vector), axis=1)
+            norm = np.linalg.norm(gallery_vector, axis=1) * np.linalg.norm(compare_vector, axis=1)
+            dist_1 = dot / norm
+
+            sim = dist_1.tolist()
+            sim = sim[0][0]
+
+            if sim > face_recongnize_thr: res = True
+            print('sim {} : {}'.format(j, sim))
+        
+        
--- a/models/cls_face_mnn_1.0.0_v0.0.2.mnn 0 → 100644
View file @f6b8874
+++ b/models/cls_face_mnn_1.0.0_v0.0.2.mnn 0 → 100644
View file @f6b8874
--- a/models/det_face_mnn_1.0.0_v0.0.2.mnn 0 → 100644
View file @f6b8874
+++ b/models/det_face_mnn_1.0.0_v0.0.2.mnn 0 → 100644
View file @f6b8874
--- a/q 0 → 100644
View file @f6b8874
+++ b/q 0 → 100644
View file @f6b8874
+import os
+import numpy as np
+import MNN
+import cv2
+import logging
+from retinaface import PriorBox
+
+
+def py_cpu_nms(dets, thresh):
+    """Pure Python NMS baseline."""
+    x1 = dets[:, 0]
+    y1 = dets[:, 1]
+    x2 = dets[:, 2]
+    y2 = dets[:, 3]
+    scores = dets[:, 4]
+
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    order = scores.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
+
+        inds = np.where(ovr <= thresh)[0]
+        order = order[inds + 1]
+
+    return keep
+
+
+def decode_landm(pre, priors, variances):
+
+    landms = np.concatenate((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
+                             priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
+                             priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
+                             priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
+                             priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:]), 1)
+    return landms
+
+
+def decode(loc, priors, variances):
+
+    boxes = np.concatenate((
+        priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
+        priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])), 1)
+    boxes[:, :2] -= boxes[:, 2:] / 2
+    boxes[:, 2:] += boxes[:, :2]
+    return boxes
+
+
+class Face_Detector(object):
+    def __init__(self, model_path):
+        logging.info('******** Start Init Face Detector ********')
+        self.det_interpreter = MNN.Interpreter(model_path)
+        self.det_session = self.det_interpreter.createSession()
+        self.det_input_tensor = self.det_interpreter.getSessionInput(self.det_session)
+        logging.info('******** Success Init Face Detector  ********')
+
+    def detect(self, frame, thr):
+        logging.info('******** Start Face Detect ********')
+        input_size = 320
+        img = cv2.resize(frame, (input_size, input_size))
+ 
+        img = np.float32(img)
+        im_height, im_width, _ = img.shape
+        scale = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
+        scale1 = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0], 
+                           img.shape[1], img.shape[0], img.shape[1], img.shape[0],
+                           img.shape[1], img.shape[0]])
+  
+        w_r = input_size/frame.shape[1]
+        h_r = input_size/frame.shape[0]
+
+        confidence_threshold = 0.02
+        vis_threshold = 0.5
+        nms_threshold = 0.4
+        keep_top_k = 100
+        variance = [0.1, 0.2]
+        img -= (104, 117, 123)
+        img = img.transpose(2, 0, 1)
+        img = np.expand_dims(img, axis=0)
+      
+        input_tensor = MNN.Tensor((1, 3, input_size, input_size), MNN.Halide_Type_Float, img, MNN.Tensor_DimensionType_Caffe)
+        self.det_input_tensor.copyFrom(input_tensor)
+        self.det_interpreter.runSession(self.det_session)
+
+        bbox_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output0')
+        conf_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output1')
+        landmark_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output2')
+
+        bbox_output = bbox_output_tensor.getData()
+        conf_output = conf_output_tensor.getData()
+        landmark_output = landmark_output_tensor.getData()
+
+        norm_confs = list()
+        for i in range(int(len(conf_output)/2)):
+            norm_confs.append([conf_output[i * 2 + 0], conf_output[i * 2 + 1]])
+
+        norm_bboxes = list()
+        for i in range(int(len(conf_output)/2)):
+            norm_bboxes.append([bbox_output[i * 4 + 0], bbox_output[i * 4 + 1], bbox_output[i * 4 + 2], bbox_output[i * 4 + 3]])
+
+        norm_landmarks = list()
+        for i in range(int(len(conf_output)/2)):
+            norm_landmarks.append([landmark_output[i * 10 + 0], landmark_output[i * 10 + 1], 
+                                   landmark_output[i * 10 + 2], landmark_output[i * 10 + 3], 
+                                   landmark_output[i * 10 + 4], landmark_output[i * 10 + 5], 
+                                   landmark_output[i * 10 + 6], landmark_output[i * 10 + 7], 
+                                   landmark_output[i * 10 + 8], landmark_output[i * 10 + 9]])       
+        
+        norm_confs = np.array(norm_confs)
+        norm_bboxes = np.array(norm_bboxes)
+        norm_landmarks = np.array(norm_landmarks)
+        
+        priorbox = PriorBox(image_size=(im_height, im_width))
+        priors = priorbox.forward()
+     
+        scores = norm_confs[:, 1]
+
+        boxes = decode(norm_bboxes, priors, variance)
+        boxes = boxes * scale 
+    
+        landms = decode_landm(norm_landmarks, priors, variance)
+        landms = landms * scale1 
+        
+        # ignore low scores
+        inds = np.where(scores > confidence_threshold)[0]
+        boxes = boxes[inds]
+        landms = landms[inds]
+        scores = scores[inds]
+
+        # keep top-K before NMS
+        order = scores.argsort()[::-1][:keep_top_k]
+        boxes = boxes[order]
+        landms = landms[order]
+        scores = scores[order]
+
+        # do NMS
+        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
+        keep = py_cpu_nms(dets, nms_threshold)
+        
+        dets = dets[keep, :]
+        landms = landms[keep]
+
+        # keep top-K faster NMS
+        dets = dets[:keep_top_k, :]
+        landms = landms[:keep_top_k, :]
+        
+        dets = np.concatenate((dets, landms), axis=1)
+        face_bboxes = []
+        face_landmarks = []
+        max_area = float('-inf')
+        max_index = 0
+
+        i = 0
+        for b in dets:
+            if b[4] < vis_threshold:
+                continue
+            resize_b = []
+            x1 = int(b[0] / w_r)
+            y1 = int(b[1] / h_r)
+            x2 = int(b[2] / w_r)
+            y2 = int(b[3] / h_r)
+            x3 = int(b[5] / w_r)
+            y3 = int(b[6] / h_r)
+            x4 = int(b[7] / w_r)
+            y4 = int(b[8] / h_r)
+            x5 = int(b[9] / w_r)
+            y5 = int(b[10] / h_r)
+            x6 = int(b[11] / w_r)
+            y6 = int(b[12] / h_r)
+            x7 = int(b[13] / w_r)
+            y7 = int(b[14] / h_r) 
+            resize_b = [x1, y1, x2, y2, 0, x3, y3, x4, y4, x5, y5, x6, y6, x7, y7]
+            
+            # cv2.rectangle(frame, (resize_b[0], resize_b[1]), (resize_b[2], resize_b[3]), (0, 0, 255), 2)
+            # cv2.circle(frame, (resize_b[5], resize_b[6]), 1, (0, 0, 255), 4)
+            # cv2.circle(frame, (resize_b[7], resize_b[8]), 1, (0, 255, 255), 4)
+            # cv2.circle(frame, (resize_b[9], resize_b[10]), 1, (255, 0, 255), 4)
+            # cv2.circle(frame, (resize_b[11], resize_b[12]), 1, (0, 255, 0), 4)
+            # cv2.circle(frame, (resize_b[13], resize_b[14]), 1, (255, 0, 0), 4)
+
+            area = (resize_b[2] - resize_b[0]) * (resize_b[3] - resize_b[1])
+            if area > max_area:
+                max_area = area
+                max_index = i
+            i += 1
+
+            face_bboxes.append([resize_b[0], resize_b[1], resize_b[2], resize_b[3]])
+            face_landmarks.append([(resize_b[5], resize_b[6]), 
+                                   (resize_b[7], resize_b[8]), 
+                                   (resize_b[9], resize_b[10]), 
+                                   (resize_b[11], resize_b[12]), 
+                                   (resize_b[13], resize_b[14])])
+    
+        # import time
+        # cv2.imwrite('results/0.jpg', frame)
+        return face_bboxes, face_landmarks, max_index
+               
+
+if __name__ == '__main__':
+    det_face_model_path = r'/home/jwq/PycharmProjects/situ/src/face_det/Pytorch_Retinaface/weights/mobilenet_0.25.mnn'
+    image_path = r'input/3.jpg'
+    image_save_path = r'results/3.jpg'
+    thr = 0.5
+
+    face_detector = Face_Detector(det_face_model_path)
+    image = cv2.imread(image_path)
+    face_detector.detect(image, thr)
+    # image_ploted = face_detector.plot(image, face_bboxes)
+    # cv2.imwrite(image_save_path, image_ploted)
--- a/retinaface.py 0 → 100644
View file @f6b8874
+++ b/retinaface.py 0 → 100644
View file @f6b8874
+from itertools import product as product
+from math import ceil
+import numpy as np
+import torch
+import torch.nn as nn
+import torchvision.models.detection.backbone_utils as backbone_utils
+import torchvision.models._utils as _utils
+import torch.nn.functional as F
+from collections import OrderedDict
+
+def conv_bn(inp, oup, stride = 1, leaky = 0):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+        nn.BatchNorm2d(oup),
+        nn.LeakyReLU(negative_slope=leaky, inplace=True)
+    )
+
+def conv_bn_no_relu(inp, oup, stride):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+        nn.BatchNorm2d(oup),
+    )
+
+def conv_bn1X1(inp, oup, stride, leaky=0):
+    return nn.Sequential(
+        nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False),
+        nn.BatchNorm2d(oup),
+        nn.LeakyReLU(negative_slope=leaky, inplace=True)
+    )
+
+def conv_dw(inp, oup, stride, leaky=0.1):
+    return nn.Sequential(
+        nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
+        nn.BatchNorm2d(inp),
+        nn.LeakyReLU(negative_slope= leaky,inplace=True),
+
+        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+        nn.BatchNorm2d(oup),
+        nn.LeakyReLU(negative_slope= leaky,inplace=True),
+    )
+    
+class ClassHead(nn.Module):
+    def __init__(self,inchannels=512,num_anchors=3):
+        super(ClassHead,self).__init__()
+        self.num_anchors = num_anchors
+        self.conv1x1 = nn.Conv2d(inchannels,self.num_anchors*2,kernel_size=(1,1),stride=1,padding=0)
+
+    def forward(self,x):
+        out = self.conv1x1(x)
+        out = out.permute(0,2,3,1).contiguous()
+        
+        return out.view(out.shape[0], -1, 2)
+        
+        
+class BboxHead(nn.Module):
+    def __init__(self,inchannels=512,num_anchors=3):
+        super(BboxHead,self).__init__()
+        self.conv1x1 = nn.Conv2d(inchannels,num_anchors*4,kernel_size=(1,1),stride=1,padding=0)
+
+    def forward(self,x):
+        out = self.conv1x1(x)
+        out = out.permute(0,2,3,1).contiguous()
+
+        return out.view(out.shape[0], -1, 4)
+
+
+class LandmarkHead(nn.Module):
+    def __init__(self,inchannels=512,num_anchors=3):
+        super(LandmarkHead,self).__init__()
+        self.conv1x1 = nn.Conv2d(inchannels,num_anchors*10,kernel_size=(1,1),stride=1,padding=0)
+
+    def forward(self,x):
+        out = self.conv1x1(x)
+        out = out.permute(0,2,3,1).contiguous()
+
+        return out.view(out.shape[0], -1, 10)
+
+
+class SSH(nn.Module):
+    def __init__(self, in_channel, out_channel):
+        super(SSH, self).__init__()
+        assert out_channel % 4 == 0
+        leaky = 0
+        if (out_channel <= 64):
+            leaky = 0.1
+        self.conv3X3 = conv_bn_no_relu(in_channel, out_channel//2, stride=1)
+
+        self.conv5X5_1 = conv_bn(in_channel, out_channel//4, stride=1, leaky = leaky)
+        self.conv5X5_2 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
+
+        self.conv7X7_2 = conv_bn(out_channel//4, out_channel//4, stride=1, leaky = leaky)
+        self.conv7x7_3 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
+
+    def forward(self, input):
+        conv3X3 = self.conv3X3(input)
+
+        conv5X5_1 = self.conv5X5_1(input)
+        conv5X5 = self.conv5X5_2(conv5X5_1)
+
+        conv7X7_2 = self.conv7X7_2(conv5X5_1)
+        conv7X7 = self.conv7x7_3(conv7X7_2)
+
+        out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1)
+        out = F.relu(out)
+        return out
+        
+        
+class FPN(nn.Module):
+    def __init__(self,in_channels_list,out_channels):
+        super(FPN,self).__init__()
+        leaky = 0
+        if (out_channels <= 64):
+            leaky = 0.1
+        self.output1 = conv_bn1X1(in_channels_list[0], out_channels, stride = 1, leaky = leaky)
+        self.output2 = conv_bn1X1(in_channels_list[1], out_channels, stride = 1, leaky = leaky)
+        self.output3 = conv_bn1X1(in_channels_list[2], out_channels, stride = 1, leaky = leaky)
+
+        self.merge1 = conv_bn(out_channels, out_channels, leaky = leaky)
+        self.merge2 = conv_bn(out_channels, out_channels, leaky = leaky)
+
+    def forward(self, input):
+        # names = list(input.keys())
+        input = list(input.values())
+
+        output1 = self.output1(input[0])
+        output2 = self.output2(input[1])
+        output3 = self.output3(input[2])
+
+        up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode="nearest")
+        output2 = output2 + up3
+        output2 = self.merge2(output2)
+
+        up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode="nearest")
+        output1 = output1 + up2
+        output1 = self.merge1(output1)
+
+        out = [output1, output2, output3]
+        return out
+
+
+class MobileNetV1(nn.Module):
+    def __init__(self):
+        super(MobileNetV1, self).__init__()
+        self.stage1 = nn.Sequential(
+            conv_bn(3, 8, 2, leaky = 0.1),    # 3
+            conv_dw(8, 16, 1),   # 7
+            conv_dw(16, 32, 2),  # 11
+            conv_dw(32, 32, 1),  # 19
+            conv_dw(32, 64, 2),  # 27
+            conv_dw(64, 64, 1),  # 43
+        )
+        self.stage2 = nn.Sequential(
+            conv_dw(64, 128, 2),  # 43 + 16 = 59
+            conv_dw(128, 128, 1), # 59 + 32 = 91
+            conv_dw(128, 128, 1), # 91 + 32 = 123
+            conv_dw(128, 128, 1), # 123 + 32 = 155
+            conv_dw(128, 128, 1), # 155 + 32 = 187
+            conv_dw(128, 128, 1), # 187 + 32 = 219
+        )
+        self.stage3 = nn.Sequential(
+            conv_dw(128, 256, 2), # 219 +3 2 = 241
+            conv_dw(256, 256, 1), # 241 + 64 = 301
+        )
+        self.avg = nn.AdaptiveAvgPool2d((1,1))
+        self.fc = nn.Linear(256, 1000)
+
+    def forward(self, x):
+        x = self.stage1(x)
+        x = self.stage2(x)
+        x = self.stage3(x)
+        x = self.avg(x)
+        # x = self.model(x)
+        x = x.view(-1, 256)
+        x = self.fc(x)
+        return x
+
+
+class RetinaFace(nn.Module):
+    def __init__(self):
+        super(RetinaFace,self).__init__()
+
+        backbone = MobileNetV1()
+        return_layers = {'stage1': 1, 'stage2': 2, 'stage3': 3}
+        self.body = _utils.IntermediateLayerGetter(backbone, return_layers)
+        in_channels_stage2 = 32
+        in_channels_list = [
+            in_channels_stage2 * 2,
+            in_channels_stage2 * 4,
+            in_channels_stage2 * 8,
+        ]
+        out_channels = 64
+        self.fpn = FPN(in_channels_list, out_channels)
+        self.ssh1 = SSH(out_channels, out_channels)
+        self.ssh2 = SSH(out_channels, out_channels)
+        self.ssh3 = SSH(out_channels, out_channels)
+
+        self.ClassHead = self._make_class_head(fpn_num=3, inchannels=out_channels)
+        self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=out_channels)
+        self.LandmarkHead = self._make_landmark_head(fpn_num=3, inchannels=out_channels)
+        
+    def _make_class_head(self, fpn_num=3, inchannels=64, anchor_num=2):
+        classhead = nn.ModuleList()
+        for i in range(fpn_num):
+            classhead.append(ClassHead(inchannels, anchor_num))
+        return classhead
+    
+    def _make_bbox_head(self, fpn_num=3, inchannels=64, anchor_num=2):
+        bboxhead = nn.ModuleList()
+        for i in range(fpn_num):
+            bboxhead.append(BboxHead(inchannels, anchor_num))
+        return bboxhead
+
+    def _make_landmark_head(self, fpn_num=3, inchannels=64, anchor_num=2):
+        landmarkhead = nn.ModuleList()
+        for i in range(fpn_num):
+            landmarkhead.append(LandmarkHead(inchannels, anchor_num))
+        return landmarkhead
+        
+    def forward(self,inputs):
+        out = self.body(inputs)
+
+        # FPN
+        fpn = self.fpn(out)
+
+        # SSH
+        feature1 = self.ssh1(fpn[0])
+        feature2 = self.ssh2(fpn[1])
+        feature3 = self.ssh3(fpn[2])
+        features = [feature1, feature2, feature3]
+
+        bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1)
+        classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features)],dim=1)
+        ldm_regressions = torch.cat([self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1)
+
+        output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions)
+        return output
+        
+
+class PriorBox(object):
+    def __init__(self, image_size=None):
+        super(PriorBox, self).__init__()
+        self.min_sizes = [[16, 32], [64, 128], [256, 512]]
+        self.steps = [8, 16, 32]
+        self.clip = False
+        self.image_size = image_size
+        self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]
+        self.name = "s"
+
+    def forward(self):
+        anchors = []
+        for k, f in enumerate(self.feature_maps):
+            min_sizes = self.min_sizes[k]
+            for i, j in product(range(f[0]), range(f[1])):
+                for min_size in min_sizes:
+                    s_kx = min_size / self.image_size[1]
+                    s_ky = min_size / self.image_size[0]
+                    dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
+                    dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
+                    for cy, cx in product(dense_cy, dense_cx):
+                        anchors += [cx, cy, s_kx, s_ky]
+
+        # back to torch land
+        output = np.array(anchors).reshape(-1, 4)
+        if self.clip:
+            output.clamp_(max=1, min=0)
+        return output
+
+