f6b88746 by jiangwenqiang

init project

0 parents
No preview for this file type
No preview for this file type
No preview for this file type
1 import os
2 import numpy as np
3 import MNN
4 import cv2
5 import logging
6 from retinaface import PriorBox
7
8
9 def py_cpu_nms(dets, thresh):
10 """Pure Python NMS baseline."""
11 x1 = dets[:, 0]
12 y1 = dets[:, 1]
13 x2 = dets[:, 2]
14 y2 = dets[:, 3]
15 scores = dets[:, 4]
16
17 areas = (x2 - x1 + 1) * (y2 - y1 + 1)
18 order = scores.argsort()[::-1]
19
20 keep = []
21 while order.size > 0:
22 i = order[0]
23 keep.append(i)
24 xx1 = np.maximum(x1[i], x1[order[1:]])
25 yy1 = np.maximum(y1[i], y1[order[1:]])
26 xx2 = np.minimum(x2[i], x2[order[1:]])
27 yy2 = np.minimum(y2[i], y2[order[1:]])
28
29 w = np.maximum(0.0, xx2 - xx1 + 1)
30 h = np.maximum(0.0, yy2 - yy1 + 1)
31 inter = w * h
32 ovr = inter / (areas[i] + areas[order[1:]] - inter)
33
34 inds = np.where(ovr <= thresh)[0]
35 order = order[inds + 1]
36
37 return keep
38
39
40 def decode_landm(pre, priors, variances):
41
42 landms = np.concatenate((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
43 priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
44 priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
45 priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
46 priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:]), 1)
47 return landms
48
49
50 def decode(loc, priors, variances):
51
52 boxes = np.concatenate((
53 priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
54 priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])), 1)
55 boxes[:, :2] -= boxes[:, 2:] / 2
56 boxes[:, 2:] += boxes[:, :2]
57 return boxes
58
59
60 class Face_Detector(object):
61 def __init__(self, model_path):
62 logging.info('******** Start Init Face Detector ********')
63 self.det_interpreter = MNN.Interpreter(model_path)
64 self.det_session = self.det_interpreter.createSession()
65 self.det_input_tensor = self.det_interpreter.getSessionInput(self.det_session)
66 logging.info('******** Success Init Face Detector ********')
67
68 def detect(self, frame, thr):
69 logging.info('******** Start Face Detect ********')
70 input_size = 320
71 img = cv2.resize(frame, (input_size, input_size))
72
73 img = np.float32(img)
74 im_height, im_width, _ = img.shape
75 scale = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
76 scale1 = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0],
77 img.shape[1], img.shape[0], img.shape[1], img.shape[0],
78 img.shape[1], img.shape[0]])
79
80 w_r = input_size/frame.shape[1]
81 h_r = input_size/frame.shape[0]
82
83 confidence_threshold = 0.02
84 vis_threshold = 0.5
85 nms_threshold = 0.4
86 keep_top_k = 100
87 variance = [0.1, 0.2]
88 img -= (104, 117, 123)
89 img = img.transpose(2, 0, 1)
90 img = np.expand_dims(img, axis=0)
91
92 input_tensor = MNN.Tensor((1, 3, input_size, input_size), MNN.Halide_Type_Float, img, MNN.Tensor_DimensionType_Caffe)
93 self.det_input_tensor.copyFrom(input_tensor)
94 self.det_interpreter.runSession(self.det_session)
95
96 bbox_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output0')
97 conf_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output1')
98 landmark_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output2')
99
100 bbox_output = bbox_output_tensor.getData()
101 conf_output = conf_output_tensor.getData()
102 landmark_output = landmark_output_tensor.getData()
103
104 norm_confs = list()
105 for i in range(int(len(conf_output)/2)):
106 norm_confs.append([conf_output[i * 2 + 0], conf_output[i * 2 + 1]])
107
108 norm_bboxes = list()
109 for i in range(int(len(conf_output)/2)):
110 norm_bboxes.append([bbox_output[i * 4 + 0], bbox_output[i * 4 + 1], bbox_output[i * 4 + 2], bbox_output[i * 4 + 3]])
111
112 norm_landmarks = list()
113 for i in range(int(len(conf_output)/2)):
114 norm_landmarks.append([landmark_output[i * 10 + 0], landmark_output[i * 10 + 1],
115 landmark_output[i * 10 + 2], landmark_output[i * 10 + 3],
116 landmark_output[i * 10 + 4], landmark_output[i * 10 + 5],
117 landmark_output[i * 10 + 6], landmark_output[i * 10 + 7],
118 landmark_output[i * 10 + 8], landmark_output[i * 10 + 9]])
119
120 norm_confs = np.array(norm_confs)
121 norm_bboxes = np.array(norm_bboxes)
122 norm_landmarks = np.array(norm_landmarks)
123
124 priorbox = PriorBox(image_size=(im_height, im_width))
125 priors = priorbox.forward()
126
127 scores = norm_confs[:, 1]
128
129 boxes = decode(norm_bboxes, priors, variance)
130 boxes = boxes * scale
131
132 landms = decode_landm(norm_landmarks, priors, variance)
133 landms = landms * scale1
134
135 # ignore low scores
136 inds = np.where(scores > confidence_threshold)[0]
137 boxes = boxes[inds]
138 landms = landms[inds]
139 scores = scores[inds]
140
141 # keep top-K before NMS
142 order = scores.argsort()[::-1][:keep_top_k]
143 boxes = boxes[order]
144 landms = landms[order]
145 scores = scores[order]
146
147 # do NMS
148 dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
149 keep = py_cpu_nms(dets, nms_threshold)
150
151 dets = dets[keep, :]
152 landms = landms[keep]
153
154 # keep top-K faster NMS
155 dets = dets[:keep_top_k, :]
156 landms = landms[:keep_top_k, :]
157
158 dets = np.concatenate((dets, landms), axis=1)
159 face_bboxes = []
160 face_landmarks = []
161 max_area = float('-inf')
162 max_index = 0
163
164 i = 0
165 for b in dets:
166 if b[4] < vis_threshold:
167 continue
168 resize_b = []
169 x1 = int(b[0] / w_r)
170 y1 = int(b[1] / h_r)
171 x2 = int(b[2] / w_r)
172 y2 = int(b[3] / h_r)
173 x3 = int(b[5] / w_r)
174 y3 = int(b[6] / h_r)
175 x4 = int(b[7] / w_r)
176 y4 = int(b[8] / h_r)
177 x5 = int(b[9] / w_r)
178 y5 = int(b[10] / h_r)
179 x6 = int(b[11] / w_r)
180 y6 = int(b[12] / h_r)
181 x7 = int(b[13] / w_r)
182 y7 = int(b[14] / h_r)
183 resize_b = [x1, y1, x2, y2, 0, x3, y3, x4, y4, x5, y5, x6, y6, x7, y7]
184
185 # cv2.rectangle(frame, (resize_b[0], resize_b[1]), (resize_b[2], resize_b[3]), (0, 0, 255), 2)
186 # cv2.circle(frame, (resize_b[5], resize_b[6]), 1, (0, 0, 255), 4)
187 # cv2.circle(frame, (resize_b[7], resize_b[8]), 1, (0, 255, 255), 4)
188 # cv2.circle(frame, (resize_b[9], resize_b[10]), 1, (255, 0, 255), 4)
189 # cv2.circle(frame, (resize_b[11], resize_b[12]), 1, (0, 255, 0), 4)
190 # cv2.circle(frame, (resize_b[13], resize_b[14]), 1, (255, 0, 0), 4)
191
192 area = (resize_b[2] - resize_b[0]) * (resize_b[3] - resize_b[1])
193 if area > max_area:
194 max_area = area
195 max_index = i
196 i += 1
197
198 face_bboxes.append([resize_b[0], resize_b[1], resize_b[2], resize_b[3]])
199 face_landmarks.append([(resize_b[5], resize_b[6]),
200 (resize_b[7], resize_b[8]),
201 (resize_b[9], resize_b[10]),
202 (resize_b[11], resize_b[12]),
203 (resize_b[13], resize_b[14])])
204
205 # import time
206 # cv2.imwrite('results/0.jpg', frame)
207 return face_bboxes, face_landmarks, max_index
208
209
210 if __name__ == '__main__':
211 det_face_model_path = r'/home/jwq/PycharmProjects/situ/src/face_det/Pytorch_Retinaface/weights/mobilenet_0.25.mnn'
212 image_path = r'input/3.jpg'
213 image_save_path = r'results/3.jpg'
214 thr = 0.5
215
216 face_detector = Face_Detector(det_face_model_path)
217 image = cv2.imread(image_path)
218 face_detector.detect(image, thr)
219 # image_ploted = face_detector.plot(image, face_bboxes)
220 # cv2.imwrite(image_save_path, image_ploted)
1 import MNN
2 import cv2
3 import numpy as np
4 import logging
5
6 class Face_Recognizer(object):
7 def __init__(self, model_path):
8 logging.info('******** Start Init Face ID ********')
9 self.reg_interpreter = MNN.Interpreter(model_path)
10 self.reg_session = self.reg_interpreter.createSession()
11 self.reg_input_tensor = self.reg_interpreter.getSessionInput(self.reg_session)
12 logging.info('******** Success Init Face ID ********')
13
14 def recognize(self, imgs):
15
16 feats = []
17 for i in range(len(imgs)):
18 img = imgs[i]
19
20 # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
21 img = img.astype(np.float)
22 img = (img / 255. - 0.5) / 0.5
23 img = img.transpose(2, 0, 1)
24 img = np.expand_dims(img, axis=0)
25
26 input_tensor = MNN.Tensor((1, 3, 112, 112), MNN.Halide_Type_Float, img, MNN.Tensor_DimensionType_Caffe)
27 self.reg_input_tensor.copyFrom(input_tensor)
28 self.reg_interpreter.runSession(self.reg_session)
29 output_tensor = self.reg_interpreter.getSessionOutput(self.reg_session, 'output0')
30 output = output_tensor.getData()
31
32 feats.append(output)
33
34 feats_np = np.array(feats)
35 return feats_np
input/0.jpg

161 KB

input/1.jpg

273 KB

1 import os
2 import numpy as np
3 import MNN
4 import cv2
5 import logging
6 from skimage import transform as trans
7
8 from face_detecter import Face_Detector
9 from face_id import Face_Recognizer
10
11
12 def preprocess(image, landmarks):
13 src = np.array([[38.2946, 51.6963],
14 [73.5318, 51.5014],
15 [56.0252, 71.7366],
16 [41.5493, 92.3655],
17 [70.7299, 92.2041] ], dtype=np.float32)
18
19 landmarks = np.array(landmarks)
20 dst = landmarks.astype(np.float32)
21 tform = trans.SimilarityTransform()
22 tform.estimate(dst, src)
23 M = tform.params[0:2,:]
24 warped = cv2.warpAffine(image, M, (112, 112), borderValue=0.0)
25 return warped
26
27
28 def get_norm_face(image, landmarks):
29 norm_image = preprocess(image, landmarks)
30 norm_image = cv2.cvtColor(norm_image, cv2.COLOR_BGR2RGB).astype(np.float32)
31 norm_image = cv2.resize(norm_image, (112, 112))
32 # norm_image = norm_image.transpose((2, 0, 1))
33 # norm_image = norm_image.transpose((1,2,0))
34 # norm_image = cv2.resize(norm_image, (112, 112))[:,:,::-1]
35 return norm_image
36
37
38 if __name__ == '__main__':
39 det_face_model_path = r'models/det_face_mnn_1.0.0_v0.0.2.mnn'
40 reg_face_id_model_path = r'models/cls_face_mnn_1.0.0_v0.0.2.mnn'
41
42 id_image_path = r'input/IMG_2099.jpeg'
43 life_image_path = r'input/1.jpg'
44
45 face_det_thr = 0.5
46 face_recongnize_thr = 0.2
47
48 face_detector = Face_Detector(det_face_model_path)
49 face_recognizer = Face_Recognizer(reg_face_id_model_path)
50 for i in range(10):
51 id_image = cv2.imread(id_image_path)
52 life_image = cv2.imread(life_image_path)
53
54 id_face_bboxes, id_face_landmarks, id_max_idx = face_detector.detect(id_image, face_det_thr)
55 life_face_bboxes, life_face_landmarks, life_max_idx = face_detector.detect(life_image, face_det_thr)
56 print(id_face_bboxes)
57 print(life_face_bboxes)
58 id_norm_image = get_norm_face(id_image, id_face_landmarks[id_max_idx])
59 id_norm_image = np.transpose(id_norm_image, (2, 0, 1))
60 norm_images = [id_norm_image]
61
62 for j in range(len(life_face_landmarks)):
63 life_norm_image = get_norm_face(life_image, life_face_landmarks[j])
64 life_norm_image = np.transpose(life_norm_image, (2, 0, 1))
65 norm_images.append(life_norm_image)
66
67 embeddings = face_recognizer.recognize(norm_images)
68 gallery_vector = np.mat(embeddings[0])
69 res = False
70 sim = 0
71 for p in range(1, len(embeddings)):
72 compare_vector = np.mat(embeddings[p])
73
74 dot = np.sum(np.multiply(gallery_vector, compare_vector), axis=1)
75 norm = np.linalg.norm(gallery_vector, axis=1) * np.linalg.norm(compare_vector, axis=1)
76 dist_1 = dot / norm
77
78 sim = dist_1.tolist()
79 sim = sim[0][0]
80
81 if sim > face_recongnize_thr: res = True
82 print('sim {} : {}'.format(j, sim))
83
84
No preview for this file type
No preview for this file type
1 import os
2 import numpy as np
3 import MNN
4 import cv2
5 import logging
6 from retinaface import PriorBox
7
8
9 def py_cpu_nms(dets, thresh):
10 """Pure Python NMS baseline."""
11 x1 = dets[:, 0]
12 y1 = dets[:, 1]
13 x2 = dets[:, 2]
14 y2 = dets[:, 3]
15 scores = dets[:, 4]
16
17 areas = (x2 - x1 + 1) * (y2 - y1 + 1)
18 order = scores.argsort()[::-1]
19
20 keep = []
21 while order.size > 0:
22 i = order[0]
23 keep.append(i)
24 xx1 = np.maximum(x1[i], x1[order[1:]])
25 yy1 = np.maximum(y1[i], y1[order[1:]])
26 xx2 = np.minimum(x2[i], x2[order[1:]])
27 yy2 = np.minimum(y2[i], y2[order[1:]])
28
29 w = np.maximum(0.0, xx2 - xx1 + 1)
30 h = np.maximum(0.0, yy2 - yy1 + 1)
31 inter = w * h
32 ovr = inter / (areas[i] + areas[order[1:]] - inter)
33
34 inds = np.where(ovr <= thresh)[0]
35 order = order[inds + 1]
36
37 return keep
38
39
40 def decode_landm(pre, priors, variances):
41
42 landms = np.concatenate((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
43 priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
44 priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
45 priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
46 priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:]), 1)
47 return landms
48
49
50 def decode(loc, priors, variances):
51
52 boxes = np.concatenate((
53 priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
54 priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])), 1)
55 boxes[:, :2] -= boxes[:, 2:] / 2
56 boxes[:, 2:] += boxes[:, :2]
57 return boxes
58
59
60 class Face_Detector(object):
61 def __init__(self, model_path):
62 logging.info('******** Start Init Face Detector ********')
63 self.det_interpreter = MNN.Interpreter(model_path)
64 self.det_session = self.det_interpreter.createSession()
65 self.det_input_tensor = self.det_interpreter.getSessionInput(self.det_session)
66 logging.info('******** Success Init Face Detector ********')
67
68 def detect(self, frame, thr):
69 logging.info('******** Start Face Detect ********')
70 input_size = 320
71 img = cv2.resize(frame, (input_size, input_size))
72
73 img = np.float32(img)
74 im_height, im_width, _ = img.shape
75 scale = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
76 scale1 = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0],
77 img.shape[1], img.shape[0], img.shape[1], img.shape[0],
78 img.shape[1], img.shape[0]])
79
80 w_r = input_size/frame.shape[1]
81 h_r = input_size/frame.shape[0]
82
83 confidence_threshold = 0.02
84 vis_threshold = 0.5
85 nms_threshold = 0.4
86 keep_top_k = 100
87 variance = [0.1, 0.2]
88 img -= (104, 117, 123)
89 img = img.transpose(2, 0, 1)
90 img = np.expand_dims(img, axis=0)
91
92 input_tensor = MNN.Tensor((1, 3, input_size, input_size), MNN.Halide_Type_Float, img, MNN.Tensor_DimensionType_Caffe)
93 self.det_input_tensor.copyFrom(input_tensor)
94 self.det_interpreter.runSession(self.det_session)
95
96 bbox_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output0')
97 conf_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output1')
98 landmark_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output2')
99
100 bbox_output = bbox_output_tensor.getData()
101 conf_output = conf_output_tensor.getData()
102 landmark_output = landmark_output_tensor.getData()
103
104 norm_confs = list()
105 for i in range(int(len(conf_output)/2)):
106 norm_confs.append([conf_output[i * 2 + 0], conf_output[i * 2 + 1]])
107
108 norm_bboxes = list()
109 for i in range(int(len(conf_output)/2)):
110 norm_bboxes.append([bbox_output[i * 4 + 0], bbox_output[i * 4 + 1], bbox_output[i * 4 + 2], bbox_output[i * 4 + 3]])
111
112 norm_landmarks = list()
113 for i in range(int(len(conf_output)/2)):
114 norm_landmarks.append([landmark_output[i * 10 + 0], landmark_output[i * 10 + 1],
115 landmark_output[i * 10 + 2], landmark_output[i * 10 + 3],
116 landmark_output[i * 10 + 4], landmark_output[i * 10 + 5],
117 landmark_output[i * 10 + 6], landmark_output[i * 10 + 7],
118 landmark_output[i * 10 + 8], landmark_output[i * 10 + 9]])
119
120 norm_confs = np.array(norm_confs)
121 norm_bboxes = np.array(norm_bboxes)
122 norm_landmarks = np.array(norm_landmarks)
123
124 priorbox = PriorBox(image_size=(im_height, im_width))
125 priors = priorbox.forward()
126
127 scores = norm_confs[:, 1]
128
129 boxes = decode(norm_bboxes, priors, variance)
130 boxes = boxes * scale
131
132 landms = decode_landm(norm_landmarks, priors, variance)
133 landms = landms * scale1
134
135 # ignore low scores
136 inds = np.where(scores > confidence_threshold)[0]
137 boxes = boxes[inds]
138 landms = landms[inds]
139 scores = scores[inds]
140
141 # keep top-K before NMS
142 order = scores.argsort()[::-1][:keep_top_k]
143 boxes = boxes[order]
144 landms = landms[order]
145 scores = scores[order]
146
147 # do NMS
148 dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
149 keep = py_cpu_nms(dets, nms_threshold)
150
151 dets = dets[keep, :]
152 landms = landms[keep]
153
154 # keep top-K faster NMS
155 dets = dets[:keep_top_k, :]
156 landms = landms[:keep_top_k, :]
157
158 dets = np.concatenate((dets, landms), axis=1)
159 face_bboxes = []
160 face_landmarks = []
161 max_area = float('-inf')
162 max_index = 0
163
164 i = 0
165 for b in dets:
166 if b[4] < vis_threshold:
167 continue
168 resize_b = []
169 x1 = int(b[0] / w_r)
170 y1 = int(b[1] / h_r)
171 x2 = int(b[2] / w_r)
172 y2 = int(b[3] / h_r)
173 x3 = int(b[5] / w_r)
174 y3 = int(b[6] / h_r)
175 x4 = int(b[7] / w_r)
176 y4 = int(b[8] / h_r)
177 x5 = int(b[9] / w_r)
178 y5 = int(b[10] / h_r)
179 x6 = int(b[11] / w_r)
180 y6 = int(b[12] / h_r)
181 x7 = int(b[13] / w_r)
182 y7 = int(b[14] / h_r)
183 resize_b = [x1, y1, x2, y2, 0, x3, y3, x4, y4, x5, y5, x6, y6, x7, y7]
184
185 # cv2.rectangle(frame, (resize_b[0], resize_b[1]), (resize_b[2], resize_b[3]), (0, 0, 255), 2)
186 # cv2.circle(frame, (resize_b[5], resize_b[6]), 1, (0, 0, 255), 4)
187 # cv2.circle(frame, (resize_b[7], resize_b[8]), 1, (0, 255, 255), 4)
188 # cv2.circle(frame, (resize_b[9], resize_b[10]), 1, (255, 0, 255), 4)
189 # cv2.circle(frame, (resize_b[11], resize_b[12]), 1, (0, 255, 0), 4)
190 # cv2.circle(frame, (resize_b[13], resize_b[14]), 1, (255, 0, 0), 4)
191
192 area = (resize_b[2] - resize_b[0]) * (resize_b[3] - resize_b[1])
193 if area > max_area:
194 max_area = area
195 max_index = i
196 i += 1
197
198 face_bboxes.append([resize_b[0], resize_b[1], resize_b[2], resize_b[3]])
199 face_landmarks.append([(resize_b[5], resize_b[6]),
200 (resize_b[7], resize_b[8]),
201 (resize_b[9], resize_b[10]),
202 (resize_b[11], resize_b[12]),
203 (resize_b[13], resize_b[14])])
204
205 # import time
206 # cv2.imwrite('results/0.jpg', frame)
207 return face_bboxes, face_landmarks, max_index
208
209
210 if __name__ == '__main__':
211 det_face_model_path = r'/home/jwq/PycharmProjects/situ/src/face_det/Pytorch_Retinaface/weights/mobilenet_0.25.mnn'
212 image_path = r'input/3.jpg'
213 image_save_path = r'results/3.jpg'
214 thr = 0.5
215
216 face_detector = Face_Detector(det_face_model_path)
217 image = cv2.imread(image_path)
218 face_detector.detect(image, thr)
219 # image_ploted = face_detector.plot(image, face_bboxes)
220 # cv2.imwrite(image_save_path, image_ploted)
1 from itertools import product as product
2 from math import ceil
3 import numpy as np
4 import torch
5 import torch.nn as nn
6 import torchvision.models.detection.backbone_utils as backbone_utils
7 import torchvision.models._utils as _utils
8 import torch.nn.functional as F
9 from collections import OrderedDict
10
11 def conv_bn(inp, oup, stride = 1, leaky = 0):
12 return nn.Sequential(
13 nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
14 nn.BatchNorm2d(oup),
15 nn.LeakyReLU(negative_slope=leaky, inplace=True)
16 )
17
18 def conv_bn_no_relu(inp, oup, stride):
19 return nn.Sequential(
20 nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
21 nn.BatchNorm2d(oup),
22 )
23
24 def conv_bn1X1(inp, oup, stride, leaky=0):
25 return nn.Sequential(
26 nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False),
27 nn.BatchNorm2d(oup),
28 nn.LeakyReLU(negative_slope=leaky, inplace=True)
29 )
30
31 def conv_dw(inp, oup, stride, leaky=0.1):
32 return nn.Sequential(
33 nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
34 nn.BatchNorm2d(inp),
35 nn.LeakyReLU(negative_slope= leaky,inplace=True),
36
37 nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
38 nn.BatchNorm2d(oup),
39 nn.LeakyReLU(negative_slope= leaky,inplace=True),
40 )
41
42 class ClassHead(nn.Module):
43 def __init__(self,inchannels=512,num_anchors=3):
44 super(ClassHead,self).__init__()
45 self.num_anchors = num_anchors
46 self.conv1x1 = nn.Conv2d(inchannels,self.num_anchors*2,kernel_size=(1,1),stride=1,padding=0)
47
48 def forward(self,x):
49 out = self.conv1x1(x)
50 out = out.permute(0,2,3,1).contiguous()
51
52 return out.view(out.shape[0], -1, 2)
53
54
55 class BboxHead(nn.Module):
56 def __init__(self,inchannels=512,num_anchors=3):
57 super(BboxHead,self).__init__()
58 self.conv1x1 = nn.Conv2d(inchannels,num_anchors*4,kernel_size=(1,1),stride=1,padding=0)
59
60 def forward(self,x):
61 out = self.conv1x1(x)
62 out = out.permute(0,2,3,1).contiguous()
63
64 return out.view(out.shape[0], -1, 4)
65
66
67 class LandmarkHead(nn.Module):
68 def __init__(self,inchannels=512,num_anchors=3):
69 super(LandmarkHead,self).__init__()
70 self.conv1x1 = nn.Conv2d(inchannels,num_anchors*10,kernel_size=(1,1),stride=1,padding=0)
71
72 def forward(self,x):
73 out = self.conv1x1(x)
74 out = out.permute(0,2,3,1).contiguous()
75
76 return out.view(out.shape[0], -1, 10)
77
78
79 class SSH(nn.Module):
80 def __init__(self, in_channel, out_channel):
81 super(SSH, self).__init__()
82 assert out_channel % 4 == 0
83 leaky = 0
84 if (out_channel <= 64):
85 leaky = 0.1
86 self.conv3X3 = conv_bn_no_relu(in_channel, out_channel//2, stride=1)
87
88 self.conv5X5_1 = conv_bn(in_channel, out_channel//4, stride=1, leaky = leaky)
89 self.conv5X5_2 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
90
91 self.conv7X7_2 = conv_bn(out_channel//4, out_channel//4, stride=1, leaky = leaky)
92 self.conv7x7_3 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
93
94 def forward(self, input):
95 conv3X3 = self.conv3X3(input)
96
97 conv5X5_1 = self.conv5X5_1(input)
98 conv5X5 = self.conv5X5_2(conv5X5_1)
99
100 conv7X7_2 = self.conv7X7_2(conv5X5_1)
101 conv7X7 = self.conv7x7_3(conv7X7_2)
102
103 out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1)
104 out = F.relu(out)
105 return out
106
107
108 class FPN(nn.Module):
109 def __init__(self,in_channels_list,out_channels):
110 super(FPN,self).__init__()
111 leaky = 0
112 if (out_channels <= 64):
113 leaky = 0.1
114 self.output1 = conv_bn1X1(in_channels_list[0], out_channels, stride = 1, leaky = leaky)
115 self.output2 = conv_bn1X1(in_channels_list[1], out_channels, stride = 1, leaky = leaky)
116 self.output3 = conv_bn1X1(in_channels_list[2], out_channels, stride = 1, leaky = leaky)
117
118 self.merge1 = conv_bn(out_channels, out_channels, leaky = leaky)
119 self.merge2 = conv_bn(out_channels, out_channels, leaky = leaky)
120
121 def forward(self, input):
122 # names = list(input.keys())
123 input = list(input.values())
124
125 output1 = self.output1(input[0])
126 output2 = self.output2(input[1])
127 output3 = self.output3(input[2])
128
129 up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode="nearest")
130 output2 = output2 + up3
131 output2 = self.merge2(output2)
132
133 up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode="nearest")
134 output1 = output1 + up2
135 output1 = self.merge1(output1)
136
137 out = [output1, output2, output3]
138 return out
139
140
141 class MobileNetV1(nn.Module):
142 def __init__(self):
143 super(MobileNetV1, self).__init__()
144 self.stage1 = nn.Sequential(
145 conv_bn(3, 8, 2, leaky = 0.1), # 3
146 conv_dw(8, 16, 1), # 7
147 conv_dw(16, 32, 2), # 11
148 conv_dw(32, 32, 1), # 19
149 conv_dw(32, 64, 2), # 27
150 conv_dw(64, 64, 1), # 43
151 )
152 self.stage2 = nn.Sequential(
153 conv_dw(64, 128, 2), # 43 + 16 = 59
154 conv_dw(128, 128, 1), # 59 + 32 = 91
155 conv_dw(128, 128, 1), # 91 + 32 = 123
156 conv_dw(128, 128, 1), # 123 + 32 = 155
157 conv_dw(128, 128, 1), # 155 + 32 = 187
158 conv_dw(128, 128, 1), # 187 + 32 = 219
159 )
160 self.stage3 = nn.Sequential(
161 conv_dw(128, 256, 2), # 219 +3 2 = 241
162 conv_dw(256, 256, 1), # 241 + 64 = 301
163 )
164 self.avg = nn.AdaptiveAvgPool2d((1,1))
165 self.fc = nn.Linear(256, 1000)
166
167 def forward(self, x):
168 x = self.stage1(x)
169 x = self.stage2(x)
170 x = self.stage3(x)
171 x = self.avg(x)
172 # x = self.model(x)
173 x = x.view(-1, 256)
174 x = self.fc(x)
175 return x
176
177
178 class RetinaFace(nn.Module):
179 def __init__(self):
180 super(RetinaFace,self).__init__()
181
182 backbone = MobileNetV1()
183 return_layers = {'stage1': 1, 'stage2': 2, 'stage3': 3}
184 self.body = _utils.IntermediateLayerGetter(backbone, return_layers)
185 in_channels_stage2 = 32
186 in_channels_list = [
187 in_channels_stage2 * 2,
188 in_channels_stage2 * 4,
189 in_channels_stage2 * 8,
190 ]
191 out_channels = 64
192 self.fpn = FPN(in_channels_list, out_channels)
193 self.ssh1 = SSH(out_channels, out_channels)
194 self.ssh2 = SSH(out_channels, out_channels)
195 self.ssh3 = SSH(out_channels, out_channels)
196
197 self.ClassHead = self._make_class_head(fpn_num=3, inchannels=out_channels)
198 self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=out_channels)
199 self.LandmarkHead = self._make_landmark_head(fpn_num=3, inchannels=out_channels)
200
201 def _make_class_head(self, fpn_num=3, inchannels=64, anchor_num=2):
202 classhead = nn.ModuleList()
203 for i in range(fpn_num):
204 classhead.append(ClassHead(inchannels, anchor_num))
205 return classhead
206
207 def _make_bbox_head(self, fpn_num=3, inchannels=64, anchor_num=2):
208 bboxhead = nn.ModuleList()
209 for i in range(fpn_num):
210 bboxhead.append(BboxHead(inchannels, anchor_num))
211 return bboxhead
212
213 def _make_landmark_head(self, fpn_num=3, inchannels=64, anchor_num=2):
214 landmarkhead = nn.ModuleList()
215 for i in range(fpn_num):
216 landmarkhead.append(LandmarkHead(inchannels, anchor_num))
217 return landmarkhead
218
219 def forward(self,inputs):
220 out = self.body(inputs)
221
222 # FPN
223 fpn = self.fpn(out)
224
225 # SSH
226 feature1 = self.ssh1(fpn[0])
227 feature2 = self.ssh2(fpn[1])
228 feature3 = self.ssh3(fpn[2])
229 features = [feature1, feature2, feature3]
230
231 bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1)
232 classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features)],dim=1)
233 ldm_regressions = torch.cat([self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1)
234
235 output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions)
236 return output
237
238
239 class PriorBox(object):
240 def __init__(self, image_size=None):
241 super(PriorBox, self).__init__()
242 self.min_sizes = [[16, 32], [64, 128], [256, 512]]
243 self.steps = [8, 16, 32]
244 self.clip = False
245 self.image_size = image_size
246 self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]
247 self.name = "s"
248
249 def forward(self):
250 anchors = []
251 for k, f in enumerate(self.feature_maps):
252 min_sizes = self.min_sizes[k]
253 for i, j in product(range(f[0]), range(f[1])):
254 for min_size in min_sizes:
255 s_kx = min_size / self.image_size[1]
256 s_ky = min_size / self.image_size[0]
257 dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
258 dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
259 for cy, cx in product(dense_cy, dense_cx):
260 anchors += [cx, cy, s_kx, s_ky]
261
262 # back to torch land
263 output = np.array(anchors).reshape(-1, 4)
264 if self.clip:
265 output.clamp_(max=1, min=0)
266 return output
267
268
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!