init project
0 parents
Showing
14 changed files
with
827 additions
and
0 deletions
__pycache__/face_detecter.cpython-36.pyc
0 → 100644
No preview for this file type
__pycache__/face_id.cpython-36.pyc
0 → 100644
No preview for this file type
__pycache__/retinaface.cpython-36.pyc
0 → 100644
No preview for this file type
face_detecter.py
0 → 100644
1 | import os | ||
2 | import numpy as np | ||
3 | import MNN | ||
4 | import cv2 | ||
5 | import logging | ||
6 | from retinaface import PriorBox | ||
7 | |||
8 | |||
9 | def py_cpu_nms(dets, thresh): | ||
10 | """Pure Python NMS baseline.""" | ||
11 | x1 = dets[:, 0] | ||
12 | y1 = dets[:, 1] | ||
13 | x2 = dets[:, 2] | ||
14 | y2 = dets[:, 3] | ||
15 | scores = dets[:, 4] | ||
16 | |||
17 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) | ||
18 | order = scores.argsort()[::-1] | ||
19 | |||
20 | keep = [] | ||
21 | while order.size > 0: | ||
22 | i = order[0] | ||
23 | keep.append(i) | ||
24 | xx1 = np.maximum(x1[i], x1[order[1:]]) | ||
25 | yy1 = np.maximum(y1[i], y1[order[1:]]) | ||
26 | xx2 = np.minimum(x2[i], x2[order[1:]]) | ||
27 | yy2 = np.minimum(y2[i], y2[order[1:]]) | ||
28 | |||
29 | w = np.maximum(0.0, xx2 - xx1 + 1) | ||
30 | h = np.maximum(0.0, yy2 - yy1 + 1) | ||
31 | inter = w * h | ||
32 | ovr = inter / (areas[i] + areas[order[1:]] - inter) | ||
33 | |||
34 | inds = np.where(ovr <= thresh)[0] | ||
35 | order = order[inds + 1] | ||
36 | |||
37 | return keep | ||
38 | |||
39 | |||
40 | def decode_landm(pre, priors, variances): | ||
41 | |||
42 | landms = np.concatenate((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:], | ||
43 | priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:], | ||
44 | priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:], | ||
45 | priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:], | ||
46 | priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:]), 1) | ||
47 | return landms | ||
48 | |||
49 | |||
50 | def decode(loc, priors, variances): | ||
51 | |||
52 | boxes = np.concatenate(( | ||
53 | priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], | ||
54 | priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])), 1) | ||
55 | boxes[:, :2] -= boxes[:, 2:] / 2 | ||
56 | boxes[:, 2:] += boxes[:, :2] | ||
57 | return boxes | ||
58 | |||
59 | |||
60 | class Face_Detector(object): | ||
61 | def __init__(self, model_path): | ||
62 | logging.info('******** Start Init Face Detector ********') | ||
63 | self.det_interpreter = MNN.Interpreter(model_path) | ||
64 | self.det_session = self.det_interpreter.createSession() | ||
65 | self.det_input_tensor = self.det_interpreter.getSessionInput(self.det_session) | ||
66 | logging.info('******** Success Init Face Detector ********') | ||
67 | |||
68 | def detect(self, frame, thr): | ||
69 | logging.info('******** Start Face Detect ********') | ||
70 | input_size = 320 | ||
71 | img = cv2.resize(frame, (input_size, input_size)) | ||
72 | |||
73 | img = np.float32(img) | ||
74 | im_height, im_width, _ = img.shape | ||
75 | scale = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) | ||
76 | scale1 = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0], | ||
77 | img.shape[1], img.shape[0], img.shape[1], img.shape[0], | ||
78 | img.shape[1], img.shape[0]]) | ||
79 | |||
80 | w_r = input_size/frame.shape[1] | ||
81 | h_r = input_size/frame.shape[0] | ||
82 | |||
83 | confidence_threshold = 0.02 | ||
84 | vis_threshold = 0.5 | ||
85 | nms_threshold = 0.4 | ||
86 | keep_top_k = 100 | ||
87 | variance = [0.1, 0.2] | ||
88 | img -= (104, 117, 123) | ||
89 | img = img.transpose(2, 0, 1) | ||
90 | img = np.expand_dims(img, axis=0) | ||
91 | |||
92 | input_tensor = MNN.Tensor((1, 3, input_size, input_size), MNN.Halide_Type_Float, img, MNN.Tensor_DimensionType_Caffe) | ||
93 | self.det_input_tensor.copyFrom(input_tensor) | ||
94 | self.det_interpreter.runSession(self.det_session) | ||
95 | |||
96 | bbox_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output0') | ||
97 | conf_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output1') | ||
98 | landmark_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output2') | ||
99 | |||
100 | bbox_output = bbox_output_tensor.getData() | ||
101 | conf_output = conf_output_tensor.getData() | ||
102 | landmark_output = landmark_output_tensor.getData() | ||
103 | |||
104 | norm_confs = list() | ||
105 | for i in range(int(len(conf_output)/2)): | ||
106 | norm_confs.append([conf_output[i * 2 + 0], conf_output[i * 2 + 1]]) | ||
107 | |||
108 | norm_bboxes = list() | ||
109 | for i in range(int(len(conf_output)/2)): | ||
110 | norm_bboxes.append([bbox_output[i * 4 + 0], bbox_output[i * 4 + 1], bbox_output[i * 4 + 2], bbox_output[i * 4 + 3]]) | ||
111 | |||
112 | norm_landmarks = list() | ||
113 | for i in range(int(len(conf_output)/2)): | ||
114 | norm_landmarks.append([landmark_output[i * 10 + 0], landmark_output[i * 10 + 1], | ||
115 | landmark_output[i * 10 + 2], landmark_output[i * 10 + 3], | ||
116 | landmark_output[i * 10 + 4], landmark_output[i * 10 + 5], | ||
117 | landmark_output[i * 10 + 6], landmark_output[i * 10 + 7], | ||
118 | landmark_output[i * 10 + 8], landmark_output[i * 10 + 9]]) | ||
119 | |||
120 | norm_confs = np.array(norm_confs) | ||
121 | norm_bboxes = np.array(norm_bboxes) | ||
122 | norm_landmarks = np.array(norm_landmarks) | ||
123 | |||
124 | priorbox = PriorBox(image_size=(im_height, im_width)) | ||
125 | priors = priorbox.forward() | ||
126 | |||
127 | scores = norm_confs[:, 1] | ||
128 | |||
129 | boxes = decode(norm_bboxes, priors, variance) | ||
130 | boxes = boxes * scale | ||
131 | |||
132 | landms = decode_landm(norm_landmarks, priors, variance) | ||
133 | landms = landms * scale1 | ||
134 | |||
135 | # ignore low scores | ||
136 | inds = np.where(scores > confidence_threshold)[0] | ||
137 | boxes = boxes[inds] | ||
138 | landms = landms[inds] | ||
139 | scores = scores[inds] | ||
140 | |||
141 | # keep top-K before NMS | ||
142 | order = scores.argsort()[::-1][:keep_top_k] | ||
143 | boxes = boxes[order] | ||
144 | landms = landms[order] | ||
145 | scores = scores[order] | ||
146 | |||
147 | # do NMS | ||
148 | dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) | ||
149 | keep = py_cpu_nms(dets, nms_threshold) | ||
150 | |||
151 | dets = dets[keep, :] | ||
152 | landms = landms[keep] | ||
153 | |||
154 | # keep top-K faster NMS | ||
155 | dets = dets[:keep_top_k, :] | ||
156 | landms = landms[:keep_top_k, :] | ||
157 | |||
158 | dets = np.concatenate((dets, landms), axis=1) | ||
159 | face_bboxes = [] | ||
160 | face_landmarks = [] | ||
161 | max_area = float('-inf') | ||
162 | max_index = 0 | ||
163 | |||
164 | i = 0 | ||
165 | for b in dets: | ||
166 | if b[4] < vis_threshold: | ||
167 | continue | ||
168 | resize_b = [] | ||
169 | x1 = int(b[0] / w_r) | ||
170 | y1 = int(b[1] / h_r) | ||
171 | x2 = int(b[2] / w_r) | ||
172 | y2 = int(b[3] / h_r) | ||
173 | x3 = int(b[5] / w_r) | ||
174 | y3 = int(b[6] / h_r) | ||
175 | x4 = int(b[7] / w_r) | ||
176 | y4 = int(b[8] / h_r) | ||
177 | x5 = int(b[9] / w_r) | ||
178 | y5 = int(b[10] / h_r) | ||
179 | x6 = int(b[11] / w_r) | ||
180 | y6 = int(b[12] / h_r) | ||
181 | x7 = int(b[13] / w_r) | ||
182 | y7 = int(b[14] / h_r) | ||
183 | resize_b = [x1, y1, x2, y2, 0, x3, y3, x4, y4, x5, y5, x6, y6, x7, y7] | ||
184 | |||
185 | # cv2.rectangle(frame, (resize_b[0], resize_b[1]), (resize_b[2], resize_b[3]), (0, 0, 255), 2) | ||
186 | # cv2.circle(frame, (resize_b[5], resize_b[6]), 1, (0, 0, 255), 4) | ||
187 | # cv2.circle(frame, (resize_b[7], resize_b[8]), 1, (0, 255, 255), 4) | ||
188 | # cv2.circle(frame, (resize_b[9], resize_b[10]), 1, (255, 0, 255), 4) | ||
189 | # cv2.circle(frame, (resize_b[11], resize_b[12]), 1, (0, 255, 0), 4) | ||
190 | # cv2.circle(frame, (resize_b[13], resize_b[14]), 1, (255, 0, 0), 4) | ||
191 | |||
192 | area = (resize_b[2] - resize_b[0]) * (resize_b[3] - resize_b[1]) | ||
193 | if area > max_area: | ||
194 | max_area = area | ||
195 | max_index = i | ||
196 | i += 1 | ||
197 | |||
198 | face_bboxes.append([resize_b[0], resize_b[1], resize_b[2], resize_b[3]]) | ||
199 | face_landmarks.append([(resize_b[5], resize_b[6]), | ||
200 | (resize_b[7], resize_b[8]), | ||
201 | (resize_b[9], resize_b[10]), | ||
202 | (resize_b[11], resize_b[12]), | ||
203 | (resize_b[13], resize_b[14])]) | ||
204 | |||
205 | # import time | ||
206 | # cv2.imwrite('results/0.jpg', frame) | ||
207 | return face_bboxes, face_landmarks, max_index | ||
208 | |||
209 | |||
210 | if __name__ == '__main__': | ||
211 | det_face_model_path = r'/home/jwq/PycharmProjects/situ/src/face_det/Pytorch_Retinaface/weights/mobilenet_0.25.mnn' | ||
212 | image_path = r'input/3.jpg' | ||
213 | image_save_path = r'results/3.jpg' | ||
214 | thr = 0.5 | ||
215 | |||
216 | face_detector = Face_Detector(det_face_model_path) | ||
217 | image = cv2.imread(image_path) | ||
218 | face_detector.detect(image, thr) | ||
219 | # image_ploted = face_detector.plot(image, face_bboxes) | ||
220 | # cv2.imwrite(image_save_path, image_ploted) |
face_id.py
0 → 100644
1 | import MNN | ||
2 | import cv2 | ||
3 | import numpy as np | ||
4 | import logging | ||
5 | |||
6 | class Face_Recognizer(object): | ||
7 | def __init__(self, model_path): | ||
8 | logging.info('******** Start Init Face ID ********') | ||
9 | self.reg_interpreter = MNN.Interpreter(model_path) | ||
10 | self.reg_session = self.reg_interpreter.createSession() | ||
11 | self.reg_input_tensor = self.reg_interpreter.getSessionInput(self.reg_session) | ||
12 | logging.info('******** Success Init Face ID ********') | ||
13 | |||
14 | def recognize(self, imgs): | ||
15 | |||
16 | feats = [] | ||
17 | for i in range(len(imgs)): | ||
18 | img = imgs[i] | ||
19 | |||
20 | # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | ||
21 | img = img.astype(np.float) | ||
22 | img = (img / 255. - 0.5) / 0.5 | ||
23 | img = img.transpose(2, 0, 1) | ||
24 | img = np.expand_dims(img, axis=0) | ||
25 | |||
26 | input_tensor = MNN.Tensor((1, 3, 112, 112), MNN.Halide_Type_Float, img, MNN.Tensor_DimensionType_Caffe) | ||
27 | self.reg_input_tensor.copyFrom(input_tensor) | ||
28 | self.reg_interpreter.runSession(self.reg_session) | ||
29 | output_tensor = self.reg_interpreter.getSessionOutput(self.reg_session, 'output0') | ||
30 | output = output_tensor.getData() | ||
31 | |||
32 | feats.append(output) | ||
33 | |||
34 | feats_np = np.array(feats) | ||
35 | return feats_np |
input/0.jpg
0 → 100644

161 KB
input/1.jpg
0 → 100644

273 KB
input/IMG_2099.jpeg
0 → 100644

2 MB
input/IMG_3370.JPG
0 → 100644
30.5 KB
mobile_face_id_demo.py
0 → 100644
1 | import os | ||
2 | import numpy as np | ||
3 | import MNN | ||
4 | import cv2 | ||
5 | import logging | ||
6 | from skimage import transform as trans | ||
7 | |||
8 | from face_detecter import Face_Detector | ||
9 | from face_id import Face_Recognizer | ||
10 | |||
11 | |||
12 | def preprocess(image, landmarks): | ||
13 | src = np.array([[38.2946, 51.6963], | ||
14 | [73.5318, 51.5014], | ||
15 | [56.0252, 71.7366], | ||
16 | [41.5493, 92.3655], | ||
17 | [70.7299, 92.2041] ], dtype=np.float32) | ||
18 | |||
19 | landmarks = np.array(landmarks) | ||
20 | dst = landmarks.astype(np.float32) | ||
21 | tform = trans.SimilarityTransform() | ||
22 | tform.estimate(dst, src) | ||
23 | M = tform.params[0:2,:] | ||
24 | warped = cv2.warpAffine(image, M, (112, 112), borderValue=0.0) | ||
25 | return warped | ||
26 | |||
27 | |||
28 | def get_norm_face(image, landmarks): | ||
29 | norm_image = preprocess(image, landmarks) | ||
30 | norm_image = cv2.cvtColor(norm_image, cv2.COLOR_BGR2RGB).astype(np.float32) | ||
31 | norm_image = cv2.resize(norm_image, (112, 112)) | ||
32 | # norm_image = norm_image.transpose((2, 0, 1)) | ||
33 | # norm_image = norm_image.transpose((1,2,0)) | ||
34 | # norm_image = cv2.resize(norm_image, (112, 112))[:,:,::-1] | ||
35 | return norm_image | ||
36 | |||
37 | |||
38 | if __name__ == '__main__': | ||
39 | det_face_model_path = r'models/det_face_mnn_1.0.0_v0.0.2.mnn' | ||
40 | reg_face_id_model_path = r'models/cls_face_mnn_1.0.0_v0.0.2.mnn' | ||
41 | |||
42 | id_image_path = r'input/IMG_2099.jpeg' | ||
43 | life_image_path = r'input/1.jpg' | ||
44 | |||
45 | face_det_thr = 0.5 | ||
46 | face_recongnize_thr = 0.2 | ||
47 | |||
48 | face_detector = Face_Detector(det_face_model_path) | ||
49 | face_recognizer = Face_Recognizer(reg_face_id_model_path) | ||
50 | for i in range(10): | ||
51 | id_image = cv2.imread(id_image_path) | ||
52 | life_image = cv2.imread(life_image_path) | ||
53 | |||
54 | id_face_bboxes, id_face_landmarks, id_max_idx = face_detector.detect(id_image, face_det_thr) | ||
55 | life_face_bboxes, life_face_landmarks, life_max_idx = face_detector.detect(life_image, face_det_thr) | ||
56 | print(id_face_bboxes) | ||
57 | print(life_face_bboxes) | ||
58 | id_norm_image = get_norm_face(id_image, id_face_landmarks[id_max_idx]) | ||
59 | id_norm_image = np.transpose(id_norm_image, (2, 0, 1)) | ||
60 | norm_images = [id_norm_image] | ||
61 | |||
62 | for j in range(len(life_face_landmarks)): | ||
63 | life_norm_image = get_norm_face(life_image, life_face_landmarks[j]) | ||
64 | life_norm_image = np.transpose(life_norm_image, (2, 0, 1)) | ||
65 | norm_images.append(life_norm_image) | ||
66 | |||
67 | embeddings = face_recognizer.recognize(norm_images) | ||
68 | gallery_vector = np.mat(embeddings[0]) | ||
69 | res = False | ||
70 | sim = 0 | ||
71 | for p in range(1, len(embeddings)): | ||
72 | compare_vector = np.mat(embeddings[p]) | ||
73 | |||
74 | dot = np.sum(np.multiply(gallery_vector, compare_vector), axis=1) | ||
75 | norm = np.linalg.norm(gallery_vector, axis=1) * np.linalg.norm(compare_vector, axis=1) | ||
76 | dist_1 = dot / norm | ||
77 | |||
78 | sim = dist_1.tolist() | ||
79 | sim = sim[0][0] | ||
80 | |||
81 | if sim > face_recongnize_thr: res = True | ||
82 | print('sim {} : {}'.format(j, sim)) | ||
83 | |||
84 |
models/cls_face_mnn_1.0.0_v0.0.2.mnn
0 → 100644
No preview for this file type
models/det_face_mnn_1.0.0_v0.0.2.mnn
0 → 100644
No preview for this file type
q
0 → 100644
1 | import os | ||
2 | import numpy as np | ||
3 | import MNN | ||
4 | import cv2 | ||
5 | import logging | ||
6 | from retinaface import PriorBox | ||
7 | |||
8 | |||
9 | def py_cpu_nms(dets, thresh): | ||
10 | """Pure Python NMS baseline.""" | ||
11 | x1 = dets[:, 0] | ||
12 | y1 = dets[:, 1] | ||
13 | x2 = dets[:, 2] | ||
14 | y2 = dets[:, 3] | ||
15 | scores = dets[:, 4] | ||
16 | |||
17 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) | ||
18 | order = scores.argsort()[::-1] | ||
19 | |||
20 | keep = [] | ||
21 | while order.size > 0: | ||
22 | i = order[0] | ||
23 | keep.append(i) | ||
24 | xx1 = np.maximum(x1[i], x1[order[1:]]) | ||
25 | yy1 = np.maximum(y1[i], y1[order[1:]]) | ||
26 | xx2 = np.minimum(x2[i], x2[order[1:]]) | ||
27 | yy2 = np.minimum(y2[i], y2[order[1:]]) | ||
28 | |||
29 | w = np.maximum(0.0, xx2 - xx1 + 1) | ||
30 | h = np.maximum(0.0, yy2 - yy1 + 1) | ||
31 | inter = w * h | ||
32 | ovr = inter / (areas[i] + areas[order[1:]] - inter) | ||
33 | |||
34 | inds = np.where(ovr <= thresh)[0] | ||
35 | order = order[inds + 1] | ||
36 | |||
37 | return keep | ||
38 | |||
39 | |||
40 | def decode_landm(pre, priors, variances): | ||
41 | |||
42 | landms = np.concatenate((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:], | ||
43 | priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:], | ||
44 | priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:], | ||
45 | priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:], | ||
46 | priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:]), 1) | ||
47 | return landms | ||
48 | |||
49 | |||
50 | def decode(loc, priors, variances): | ||
51 | |||
52 | boxes = np.concatenate(( | ||
53 | priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], | ||
54 | priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])), 1) | ||
55 | boxes[:, :2] -= boxes[:, 2:] / 2 | ||
56 | boxes[:, 2:] += boxes[:, :2] | ||
57 | return boxes | ||
58 | |||
59 | |||
60 | class Face_Detector(object): | ||
61 | def __init__(self, model_path): | ||
62 | logging.info('******** Start Init Face Detector ********') | ||
63 | self.det_interpreter = MNN.Interpreter(model_path) | ||
64 | self.det_session = self.det_interpreter.createSession() | ||
65 | self.det_input_tensor = self.det_interpreter.getSessionInput(self.det_session) | ||
66 | logging.info('******** Success Init Face Detector ********') | ||
67 | |||
68 | def detect(self, frame, thr): | ||
69 | logging.info('******** Start Face Detect ********') | ||
70 | input_size = 320 | ||
71 | img = cv2.resize(frame, (input_size, input_size)) | ||
72 | |||
73 | img = np.float32(img) | ||
74 | im_height, im_width, _ = img.shape | ||
75 | scale = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) | ||
76 | scale1 = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0], | ||
77 | img.shape[1], img.shape[0], img.shape[1], img.shape[0], | ||
78 | img.shape[1], img.shape[0]]) | ||
79 | |||
80 | w_r = input_size/frame.shape[1] | ||
81 | h_r = input_size/frame.shape[0] | ||
82 | |||
83 | confidence_threshold = 0.02 | ||
84 | vis_threshold = 0.5 | ||
85 | nms_threshold = 0.4 | ||
86 | keep_top_k = 100 | ||
87 | variance = [0.1, 0.2] | ||
88 | img -= (104, 117, 123) | ||
89 | img = img.transpose(2, 0, 1) | ||
90 | img = np.expand_dims(img, axis=0) | ||
91 | |||
92 | input_tensor = MNN.Tensor((1, 3, input_size, input_size), MNN.Halide_Type_Float, img, MNN.Tensor_DimensionType_Caffe) | ||
93 | self.det_input_tensor.copyFrom(input_tensor) | ||
94 | self.det_interpreter.runSession(self.det_session) | ||
95 | |||
96 | bbox_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output0') | ||
97 | conf_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output1') | ||
98 | landmark_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output2') | ||
99 | |||
100 | bbox_output = bbox_output_tensor.getData() | ||
101 | conf_output = conf_output_tensor.getData() | ||
102 | landmark_output = landmark_output_tensor.getData() | ||
103 | |||
104 | norm_confs = list() | ||
105 | for i in range(int(len(conf_output)/2)): | ||
106 | norm_confs.append([conf_output[i * 2 + 0], conf_output[i * 2 + 1]]) | ||
107 | |||
108 | norm_bboxes = list() | ||
109 | for i in range(int(len(conf_output)/2)): | ||
110 | norm_bboxes.append([bbox_output[i * 4 + 0], bbox_output[i * 4 + 1], bbox_output[i * 4 + 2], bbox_output[i * 4 + 3]]) | ||
111 | |||
112 | norm_landmarks = list() | ||
113 | for i in range(int(len(conf_output)/2)): | ||
114 | norm_landmarks.append([landmark_output[i * 10 + 0], landmark_output[i * 10 + 1], | ||
115 | landmark_output[i * 10 + 2], landmark_output[i * 10 + 3], | ||
116 | landmark_output[i * 10 + 4], landmark_output[i * 10 + 5], | ||
117 | landmark_output[i * 10 + 6], landmark_output[i * 10 + 7], | ||
118 | landmark_output[i * 10 + 8], landmark_output[i * 10 + 9]]) | ||
119 | |||
120 | norm_confs = np.array(norm_confs) | ||
121 | norm_bboxes = np.array(norm_bboxes) | ||
122 | norm_landmarks = np.array(norm_landmarks) | ||
123 | |||
124 | priorbox = PriorBox(image_size=(im_height, im_width)) | ||
125 | priors = priorbox.forward() | ||
126 | |||
127 | scores = norm_confs[:, 1] | ||
128 | |||
129 | boxes = decode(norm_bboxes, priors, variance) | ||
130 | boxes = boxes * scale | ||
131 | |||
132 | landms = decode_landm(norm_landmarks, priors, variance) | ||
133 | landms = landms * scale1 | ||
134 | |||
135 | # ignore low scores | ||
136 | inds = np.where(scores > confidence_threshold)[0] | ||
137 | boxes = boxes[inds] | ||
138 | landms = landms[inds] | ||
139 | scores = scores[inds] | ||
140 | |||
141 | # keep top-K before NMS | ||
142 | order = scores.argsort()[::-1][:keep_top_k] | ||
143 | boxes = boxes[order] | ||
144 | landms = landms[order] | ||
145 | scores = scores[order] | ||
146 | |||
147 | # do NMS | ||
148 | dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) | ||
149 | keep = py_cpu_nms(dets, nms_threshold) | ||
150 | |||
151 | dets = dets[keep, :] | ||
152 | landms = landms[keep] | ||
153 | |||
154 | # keep top-K faster NMS | ||
155 | dets = dets[:keep_top_k, :] | ||
156 | landms = landms[:keep_top_k, :] | ||
157 | |||
158 | dets = np.concatenate((dets, landms), axis=1) | ||
159 | face_bboxes = [] | ||
160 | face_landmarks = [] | ||
161 | max_area = float('-inf') | ||
162 | max_index = 0 | ||
163 | |||
164 | i = 0 | ||
165 | for b in dets: | ||
166 | if b[4] < vis_threshold: | ||
167 | continue | ||
168 | resize_b = [] | ||
169 | x1 = int(b[0] / w_r) | ||
170 | y1 = int(b[1] / h_r) | ||
171 | x2 = int(b[2] / w_r) | ||
172 | y2 = int(b[3] / h_r) | ||
173 | x3 = int(b[5] / w_r) | ||
174 | y3 = int(b[6] / h_r) | ||
175 | x4 = int(b[7] / w_r) | ||
176 | y4 = int(b[8] / h_r) | ||
177 | x5 = int(b[9] / w_r) | ||
178 | y5 = int(b[10] / h_r) | ||
179 | x6 = int(b[11] / w_r) | ||
180 | y6 = int(b[12] / h_r) | ||
181 | x7 = int(b[13] / w_r) | ||
182 | y7 = int(b[14] / h_r) | ||
183 | resize_b = [x1, y1, x2, y2, 0, x3, y3, x4, y4, x5, y5, x6, y6, x7, y7] | ||
184 | |||
185 | # cv2.rectangle(frame, (resize_b[0], resize_b[1]), (resize_b[2], resize_b[3]), (0, 0, 255), 2) | ||
186 | # cv2.circle(frame, (resize_b[5], resize_b[6]), 1, (0, 0, 255), 4) | ||
187 | # cv2.circle(frame, (resize_b[7], resize_b[8]), 1, (0, 255, 255), 4) | ||
188 | # cv2.circle(frame, (resize_b[9], resize_b[10]), 1, (255, 0, 255), 4) | ||
189 | # cv2.circle(frame, (resize_b[11], resize_b[12]), 1, (0, 255, 0), 4) | ||
190 | # cv2.circle(frame, (resize_b[13], resize_b[14]), 1, (255, 0, 0), 4) | ||
191 | |||
192 | area = (resize_b[2] - resize_b[0]) * (resize_b[3] - resize_b[1]) | ||
193 | if area > max_area: | ||
194 | max_area = area | ||
195 | max_index = i | ||
196 | i += 1 | ||
197 | |||
198 | face_bboxes.append([resize_b[0], resize_b[1], resize_b[2], resize_b[3]]) | ||
199 | face_landmarks.append([(resize_b[5], resize_b[6]), | ||
200 | (resize_b[7], resize_b[8]), | ||
201 | (resize_b[9], resize_b[10]), | ||
202 | (resize_b[11], resize_b[12]), | ||
203 | (resize_b[13], resize_b[14])]) | ||
204 | |||
205 | # import time | ||
206 | # cv2.imwrite('results/0.jpg', frame) | ||
207 | return face_bboxes, face_landmarks, max_index | ||
208 | |||
209 | |||
210 | if __name__ == '__main__': | ||
211 | det_face_model_path = r'/home/jwq/PycharmProjects/situ/src/face_det/Pytorch_Retinaface/weights/mobilenet_0.25.mnn' | ||
212 | image_path = r'input/3.jpg' | ||
213 | image_save_path = r'results/3.jpg' | ||
214 | thr = 0.5 | ||
215 | |||
216 | face_detector = Face_Detector(det_face_model_path) | ||
217 | image = cv2.imread(image_path) | ||
218 | face_detector.detect(image, thr) | ||
219 | # image_ploted = face_detector.plot(image, face_bboxes) | ||
220 | # cv2.imwrite(image_save_path, image_ploted) |
retinaface.py
0 → 100644
1 | from itertools import product as product | ||
2 | from math import ceil | ||
3 | import numpy as np | ||
4 | import torch | ||
5 | import torch.nn as nn | ||
6 | import torchvision.models.detection.backbone_utils as backbone_utils | ||
7 | import torchvision.models._utils as _utils | ||
8 | import torch.nn.functional as F | ||
9 | from collections import OrderedDict | ||
10 | |||
11 | def conv_bn(inp, oup, stride = 1, leaky = 0): | ||
12 | return nn.Sequential( | ||
13 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), | ||
14 | nn.BatchNorm2d(oup), | ||
15 | nn.LeakyReLU(negative_slope=leaky, inplace=True) | ||
16 | ) | ||
17 | |||
18 | def conv_bn_no_relu(inp, oup, stride): | ||
19 | return nn.Sequential( | ||
20 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), | ||
21 | nn.BatchNorm2d(oup), | ||
22 | ) | ||
23 | |||
24 | def conv_bn1X1(inp, oup, stride, leaky=0): | ||
25 | return nn.Sequential( | ||
26 | nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False), | ||
27 | nn.BatchNorm2d(oup), | ||
28 | nn.LeakyReLU(negative_slope=leaky, inplace=True) | ||
29 | ) | ||
30 | |||
31 | def conv_dw(inp, oup, stride, leaky=0.1): | ||
32 | return nn.Sequential( | ||
33 | nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), | ||
34 | nn.BatchNorm2d(inp), | ||
35 | nn.LeakyReLU(negative_slope= leaky,inplace=True), | ||
36 | |||
37 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), | ||
38 | nn.BatchNorm2d(oup), | ||
39 | nn.LeakyReLU(negative_slope= leaky,inplace=True), | ||
40 | ) | ||
41 | |||
42 | class ClassHead(nn.Module): | ||
43 | def __init__(self,inchannels=512,num_anchors=3): | ||
44 | super(ClassHead,self).__init__() | ||
45 | self.num_anchors = num_anchors | ||
46 | self.conv1x1 = nn.Conv2d(inchannels,self.num_anchors*2,kernel_size=(1,1),stride=1,padding=0) | ||
47 | |||
48 | def forward(self,x): | ||
49 | out = self.conv1x1(x) | ||
50 | out = out.permute(0,2,3,1).contiguous() | ||
51 | |||
52 | return out.view(out.shape[0], -1, 2) | ||
53 | |||
54 | |||
55 | class BboxHead(nn.Module): | ||
56 | def __init__(self,inchannels=512,num_anchors=3): | ||
57 | super(BboxHead,self).__init__() | ||
58 | self.conv1x1 = nn.Conv2d(inchannels,num_anchors*4,kernel_size=(1,1),stride=1,padding=0) | ||
59 | |||
60 | def forward(self,x): | ||
61 | out = self.conv1x1(x) | ||
62 | out = out.permute(0,2,3,1).contiguous() | ||
63 | |||
64 | return out.view(out.shape[0], -1, 4) | ||
65 | |||
66 | |||
67 | class LandmarkHead(nn.Module): | ||
68 | def __init__(self,inchannels=512,num_anchors=3): | ||
69 | super(LandmarkHead,self).__init__() | ||
70 | self.conv1x1 = nn.Conv2d(inchannels,num_anchors*10,kernel_size=(1,1),stride=1,padding=0) | ||
71 | |||
72 | def forward(self,x): | ||
73 | out = self.conv1x1(x) | ||
74 | out = out.permute(0,2,3,1).contiguous() | ||
75 | |||
76 | return out.view(out.shape[0], -1, 10) | ||
77 | |||
78 | |||
79 | class SSH(nn.Module): | ||
80 | def __init__(self, in_channel, out_channel): | ||
81 | super(SSH, self).__init__() | ||
82 | assert out_channel % 4 == 0 | ||
83 | leaky = 0 | ||
84 | if (out_channel <= 64): | ||
85 | leaky = 0.1 | ||
86 | self.conv3X3 = conv_bn_no_relu(in_channel, out_channel//2, stride=1) | ||
87 | |||
88 | self.conv5X5_1 = conv_bn(in_channel, out_channel//4, stride=1, leaky = leaky) | ||
89 | self.conv5X5_2 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1) | ||
90 | |||
91 | self.conv7X7_2 = conv_bn(out_channel//4, out_channel//4, stride=1, leaky = leaky) | ||
92 | self.conv7x7_3 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1) | ||
93 | |||
94 | def forward(self, input): | ||
95 | conv3X3 = self.conv3X3(input) | ||
96 | |||
97 | conv5X5_1 = self.conv5X5_1(input) | ||
98 | conv5X5 = self.conv5X5_2(conv5X5_1) | ||
99 | |||
100 | conv7X7_2 = self.conv7X7_2(conv5X5_1) | ||
101 | conv7X7 = self.conv7x7_3(conv7X7_2) | ||
102 | |||
103 | out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1) | ||
104 | out = F.relu(out) | ||
105 | return out | ||
106 | |||
107 | |||
108 | class FPN(nn.Module): | ||
109 | def __init__(self,in_channels_list,out_channels): | ||
110 | super(FPN,self).__init__() | ||
111 | leaky = 0 | ||
112 | if (out_channels <= 64): | ||
113 | leaky = 0.1 | ||
114 | self.output1 = conv_bn1X1(in_channels_list[0], out_channels, stride = 1, leaky = leaky) | ||
115 | self.output2 = conv_bn1X1(in_channels_list[1], out_channels, stride = 1, leaky = leaky) | ||
116 | self.output3 = conv_bn1X1(in_channels_list[2], out_channels, stride = 1, leaky = leaky) | ||
117 | |||
118 | self.merge1 = conv_bn(out_channels, out_channels, leaky = leaky) | ||
119 | self.merge2 = conv_bn(out_channels, out_channels, leaky = leaky) | ||
120 | |||
121 | def forward(self, input): | ||
122 | # names = list(input.keys()) | ||
123 | input = list(input.values()) | ||
124 | |||
125 | output1 = self.output1(input[0]) | ||
126 | output2 = self.output2(input[1]) | ||
127 | output3 = self.output3(input[2]) | ||
128 | |||
129 | up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode="nearest") | ||
130 | output2 = output2 + up3 | ||
131 | output2 = self.merge2(output2) | ||
132 | |||
133 | up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode="nearest") | ||
134 | output1 = output1 + up2 | ||
135 | output1 = self.merge1(output1) | ||
136 | |||
137 | out = [output1, output2, output3] | ||
138 | return out | ||
139 | |||
140 | |||
141 | class MobileNetV1(nn.Module): | ||
142 | def __init__(self): | ||
143 | super(MobileNetV1, self).__init__() | ||
144 | self.stage1 = nn.Sequential( | ||
145 | conv_bn(3, 8, 2, leaky = 0.1), # 3 | ||
146 | conv_dw(8, 16, 1), # 7 | ||
147 | conv_dw(16, 32, 2), # 11 | ||
148 | conv_dw(32, 32, 1), # 19 | ||
149 | conv_dw(32, 64, 2), # 27 | ||
150 | conv_dw(64, 64, 1), # 43 | ||
151 | ) | ||
152 | self.stage2 = nn.Sequential( | ||
153 | conv_dw(64, 128, 2), # 43 + 16 = 59 | ||
154 | conv_dw(128, 128, 1), # 59 + 32 = 91 | ||
155 | conv_dw(128, 128, 1), # 91 + 32 = 123 | ||
156 | conv_dw(128, 128, 1), # 123 + 32 = 155 | ||
157 | conv_dw(128, 128, 1), # 155 + 32 = 187 | ||
158 | conv_dw(128, 128, 1), # 187 + 32 = 219 | ||
159 | ) | ||
160 | self.stage3 = nn.Sequential( | ||
161 | conv_dw(128, 256, 2), # 219 +3 2 = 241 | ||
162 | conv_dw(256, 256, 1), # 241 + 64 = 301 | ||
163 | ) | ||
164 | self.avg = nn.AdaptiveAvgPool2d((1,1)) | ||
165 | self.fc = nn.Linear(256, 1000) | ||
166 | |||
167 | def forward(self, x): | ||
168 | x = self.stage1(x) | ||
169 | x = self.stage2(x) | ||
170 | x = self.stage3(x) | ||
171 | x = self.avg(x) | ||
172 | # x = self.model(x) | ||
173 | x = x.view(-1, 256) | ||
174 | x = self.fc(x) | ||
175 | return x | ||
176 | |||
177 | |||
178 | class RetinaFace(nn.Module): | ||
179 | def __init__(self): | ||
180 | super(RetinaFace,self).__init__() | ||
181 | |||
182 | backbone = MobileNetV1() | ||
183 | return_layers = {'stage1': 1, 'stage2': 2, 'stage3': 3} | ||
184 | self.body = _utils.IntermediateLayerGetter(backbone, return_layers) | ||
185 | in_channels_stage2 = 32 | ||
186 | in_channels_list = [ | ||
187 | in_channels_stage2 * 2, | ||
188 | in_channels_stage2 * 4, | ||
189 | in_channels_stage2 * 8, | ||
190 | ] | ||
191 | out_channels = 64 | ||
192 | self.fpn = FPN(in_channels_list, out_channels) | ||
193 | self.ssh1 = SSH(out_channels, out_channels) | ||
194 | self.ssh2 = SSH(out_channels, out_channels) | ||
195 | self.ssh3 = SSH(out_channels, out_channels) | ||
196 | |||
197 | self.ClassHead = self._make_class_head(fpn_num=3, inchannels=out_channels) | ||
198 | self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=out_channels) | ||
199 | self.LandmarkHead = self._make_landmark_head(fpn_num=3, inchannels=out_channels) | ||
200 | |||
201 | def _make_class_head(self, fpn_num=3, inchannels=64, anchor_num=2): | ||
202 | classhead = nn.ModuleList() | ||
203 | for i in range(fpn_num): | ||
204 | classhead.append(ClassHead(inchannels, anchor_num)) | ||
205 | return classhead | ||
206 | |||
207 | def _make_bbox_head(self, fpn_num=3, inchannels=64, anchor_num=2): | ||
208 | bboxhead = nn.ModuleList() | ||
209 | for i in range(fpn_num): | ||
210 | bboxhead.append(BboxHead(inchannels, anchor_num)) | ||
211 | return bboxhead | ||
212 | |||
213 | def _make_landmark_head(self, fpn_num=3, inchannels=64, anchor_num=2): | ||
214 | landmarkhead = nn.ModuleList() | ||
215 | for i in range(fpn_num): | ||
216 | landmarkhead.append(LandmarkHead(inchannels, anchor_num)) | ||
217 | return landmarkhead | ||
218 | |||
219 | def forward(self,inputs): | ||
220 | out = self.body(inputs) | ||
221 | |||
222 | # FPN | ||
223 | fpn = self.fpn(out) | ||
224 | |||
225 | # SSH | ||
226 | feature1 = self.ssh1(fpn[0]) | ||
227 | feature2 = self.ssh2(fpn[1]) | ||
228 | feature3 = self.ssh3(fpn[2]) | ||
229 | features = [feature1, feature2, feature3] | ||
230 | |||
231 | bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1) | ||
232 | classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features)],dim=1) | ||
233 | ldm_regressions = torch.cat([self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1) | ||
234 | |||
235 | output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions) | ||
236 | return output | ||
237 | |||
238 | |||
239 | class PriorBox(object): | ||
240 | def __init__(self, image_size=None): | ||
241 | super(PriorBox, self).__init__() | ||
242 | self.min_sizes = [[16, 32], [64, 128], [256, 512]] | ||
243 | self.steps = [8, 16, 32] | ||
244 | self.clip = False | ||
245 | self.image_size = image_size | ||
246 | self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps] | ||
247 | self.name = "s" | ||
248 | |||
249 | def forward(self): | ||
250 | anchors = [] | ||
251 | for k, f in enumerate(self.feature_maps): | ||
252 | min_sizes = self.min_sizes[k] | ||
253 | for i, j in product(range(f[0]), range(f[1])): | ||
254 | for min_size in min_sizes: | ||
255 | s_kx = min_size / self.image_size[1] | ||
256 | s_ky = min_size / self.image_size[0] | ||
257 | dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]] | ||
258 | dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]] | ||
259 | for cy, cx in product(dense_cy, dense_cx): | ||
260 | anchors += [cx, cy, s_kx, s_ky] | ||
261 | |||
262 | # back to torch land | ||
263 | output = np.array(anchors).reshape(-1, 4) | ||
264 | if self.clip: | ||
265 | output.clamp_(max=1, min=0) | ||
266 | return output | ||
267 | |||
268 |
-
Please register or sign in to post a comment