f6b88746 by jiangwenqiang

init project

0 parents
No preview for this file type
No preview for this file type
No preview for this file type
import os
import numpy as np
import MNN
import cv2
import logging
from retinaface import PriorBox
def py_cpu_nms(dets, thresh):
"""Pure Python NMS baseline."""
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]
return keep
def decode_landm(pre, priors, variances):
landms = np.concatenate((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:]), 1)
return landms
def decode(loc, priors, variances):
boxes = np.concatenate((
priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])), 1)
boxes[:, :2] -= boxes[:, 2:] / 2
boxes[:, 2:] += boxes[:, :2]
return boxes
class Face_Detector(object):
def __init__(self, model_path):
logging.info('******** Start Init Face Detector ********')
self.det_interpreter = MNN.Interpreter(model_path)
self.det_session = self.det_interpreter.createSession()
self.det_input_tensor = self.det_interpreter.getSessionInput(self.det_session)
logging.info('******** Success Init Face Detector ********')
def detect(self, frame, thr):
logging.info('******** Start Face Detect ********')
input_size = 320
img = cv2.resize(frame, (input_size, input_size))
img = np.float32(img)
im_height, im_width, _ = img.shape
scale = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
scale1 = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0],
img.shape[1], img.shape[0], img.shape[1], img.shape[0],
img.shape[1], img.shape[0]])
w_r = input_size/frame.shape[1]
h_r = input_size/frame.shape[0]
confidence_threshold = 0.02
vis_threshold = 0.5
nms_threshold = 0.4
keep_top_k = 100
variance = [0.1, 0.2]
img -= (104, 117, 123)
img = img.transpose(2, 0, 1)
img = np.expand_dims(img, axis=0)
input_tensor = MNN.Tensor((1, 3, input_size, input_size), MNN.Halide_Type_Float, img, MNN.Tensor_DimensionType_Caffe)
self.det_input_tensor.copyFrom(input_tensor)
self.det_interpreter.runSession(self.det_session)
bbox_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output0')
conf_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output1')
landmark_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output2')
bbox_output = bbox_output_tensor.getData()
conf_output = conf_output_tensor.getData()
landmark_output = landmark_output_tensor.getData()
norm_confs = list()
for i in range(int(len(conf_output)/2)):
norm_confs.append([conf_output[i * 2 + 0], conf_output[i * 2 + 1]])
norm_bboxes = list()
for i in range(int(len(conf_output)/2)):
norm_bboxes.append([bbox_output[i * 4 + 0], bbox_output[i * 4 + 1], bbox_output[i * 4 + 2], bbox_output[i * 4 + 3]])
norm_landmarks = list()
for i in range(int(len(conf_output)/2)):
norm_landmarks.append([landmark_output[i * 10 + 0], landmark_output[i * 10 + 1],
landmark_output[i * 10 + 2], landmark_output[i * 10 + 3],
landmark_output[i * 10 + 4], landmark_output[i * 10 + 5],
landmark_output[i * 10 + 6], landmark_output[i * 10 + 7],
landmark_output[i * 10 + 8], landmark_output[i * 10 + 9]])
norm_confs = np.array(norm_confs)
norm_bboxes = np.array(norm_bboxes)
norm_landmarks = np.array(norm_landmarks)
priorbox = PriorBox(image_size=(im_height, im_width))
priors = priorbox.forward()
scores = norm_confs[:, 1]
boxes = decode(norm_bboxes, priors, variance)
boxes = boxes * scale
landms = decode_landm(norm_landmarks, priors, variance)
landms = landms * scale1
# ignore low scores
inds = np.where(scores > confidence_threshold)[0]
boxes = boxes[inds]
landms = landms[inds]
scores = scores[inds]
# keep top-K before NMS
order = scores.argsort()[::-1][:keep_top_k]
boxes = boxes[order]
landms = landms[order]
scores = scores[order]
# do NMS
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
keep = py_cpu_nms(dets, nms_threshold)
dets = dets[keep, :]
landms = landms[keep]
# keep top-K faster NMS
dets = dets[:keep_top_k, :]
landms = landms[:keep_top_k, :]
dets = np.concatenate((dets, landms), axis=1)
face_bboxes = []
face_landmarks = []
max_area = float('-inf')
max_index = 0
i = 0
for b in dets:
if b[4] < vis_threshold:
continue
resize_b = []
x1 = int(b[0] / w_r)
y1 = int(b[1] / h_r)
x2 = int(b[2] / w_r)
y2 = int(b[3] / h_r)
x3 = int(b[5] / w_r)
y3 = int(b[6] / h_r)
x4 = int(b[7] / w_r)
y4 = int(b[8] / h_r)
x5 = int(b[9] / w_r)
y5 = int(b[10] / h_r)
x6 = int(b[11] / w_r)
y6 = int(b[12] / h_r)
x7 = int(b[13] / w_r)
y7 = int(b[14] / h_r)
resize_b = [x1, y1, x2, y2, 0, x3, y3, x4, y4, x5, y5, x6, y6, x7, y7]
# cv2.rectangle(frame, (resize_b[0], resize_b[1]), (resize_b[2], resize_b[3]), (0, 0, 255), 2)
# cv2.circle(frame, (resize_b[5], resize_b[6]), 1, (0, 0, 255), 4)
# cv2.circle(frame, (resize_b[7], resize_b[8]), 1, (0, 255, 255), 4)
# cv2.circle(frame, (resize_b[9], resize_b[10]), 1, (255, 0, 255), 4)
# cv2.circle(frame, (resize_b[11], resize_b[12]), 1, (0, 255, 0), 4)
# cv2.circle(frame, (resize_b[13], resize_b[14]), 1, (255, 0, 0), 4)
area = (resize_b[2] - resize_b[0]) * (resize_b[3] - resize_b[1])
if area > max_area:
max_area = area
max_index = i
i += 1
face_bboxes.append([resize_b[0], resize_b[1], resize_b[2], resize_b[3]])
face_landmarks.append([(resize_b[5], resize_b[6]),
(resize_b[7], resize_b[8]),
(resize_b[9], resize_b[10]),
(resize_b[11], resize_b[12]),
(resize_b[13], resize_b[14])])
# import time
# cv2.imwrite('results/0.jpg', frame)
return face_bboxes, face_landmarks, max_index
if __name__ == '__main__':
det_face_model_path = r'/home/jwq/PycharmProjects/situ/src/face_det/Pytorch_Retinaface/weights/mobilenet_0.25.mnn'
image_path = r'input/3.jpg'
image_save_path = r'results/3.jpg'
thr = 0.5
face_detector = Face_Detector(det_face_model_path)
image = cv2.imread(image_path)
face_detector.detect(image, thr)
# image_ploted = face_detector.plot(image, face_bboxes)
# cv2.imwrite(image_save_path, image_ploted)
import MNN
import cv2
import numpy as np
import logging
class Face_Recognizer(object):
def __init__(self, model_path):
logging.info('******** Start Init Face ID ********')
self.reg_interpreter = MNN.Interpreter(model_path)
self.reg_session = self.reg_interpreter.createSession()
self.reg_input_tensor = self.reg_interpreter.getSessionInput(self.reg_session)
logging.info('******** Success Init Face ID ********')
def recognize(self, imgs):
feats = []
for i in range(len(imgs)):
img = imgs[i]
# img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = img.astype(np.float)
img = (img / 255. - 0.5) / 0.5
img = img.transpose(2, 0, 1)
img = np.expand_dims(img, axis=0)
input_tensor = MNN.Tensor((1, 3, 112, 112), MNN.Halide_Type_Float, img, MNN.Tensor_DimensionType_Caffe)
self.reg_input_tensor.copyFrom(input_tensor)
self.reg_interpreter.runSession(self.reg_session)
output_tensor = self.reg_interpreter.getSessionOutput(self.reg_session, 'output0')
output = output_tensor.getData()
feats.append(output)
feats_np = np.array(feats)
return feats_np
input/0.jpg

161 KB

input/1.jpg

273 KB

import os
import numpy as np
import MNN
import cv2
import logging
from skimage import transform as trans
from face_detecter import Face_Detector
from face_id import Face_Recognizer
def preprocess(image, landmarks):
src = np.array([[38.2946, 51.6963],
[73.5318, 51.5014],
[56.0252, 71.7366],
[41.5493, 92.3655],
[70.7299, 92.2041] ], dtype=np.float32)
landmarks = np.array(landmarks)
dst = landmarks.astype(np.float32)
tform = trans.SimilarityTransform()
tform.estimate(dst, src)
M = tform.params[0:2,:]
warped = cv2.warpAffine(image, M, (112, 112), borderValue=0.0)
return warped
def get_norm_face(image, landmarks):
norm_image = preprocess(image, landmarks)
norm_image = cv2.cvtColor(norm_image, cv2.COLOR_BGR2RGB).astype(np.float32)
norm_image = cv2.resize(norm_image, (112, 112))
# norm_image = norm_image.transpose((2, 0, 1))
# norm_image = norm_image.transpose((1,2,0))
# norm_image = cv2.resize(norm_image, (112, 112))[:,:,::-1]
return norm_image
if __name__ == '__main__':
det_face_model_path = r'models/det_face_mnn_1.0.0_v0.0.2.mnn'
reg_face_id_model_path = r'models/cls_face_mnn_1.0.0_v0.0.2.mnn'
id_image_path = r'input/IMG_2099.jpeg'
life_image_path = r'input/1.jpg'
face_det_thr = 0.5
face_recongnize_thr = 0.2
face_detector = Face_Detector(det_face_model_path)
face_recognizer = Face_Recognizer(reg_face_id_model_path)
for i in range(10):
id_image = cv2.imread(id_image_path)
life_image = cv2.imread(life_image_path)
id_face_bboxes, id_face_landmarks, id_max_idx = face_detector.detect(id_image, face_det_thr)
life_face_bboxes, life_face_landmarks, life_max_idx = face_detector.detect(life_image, face_det_thr)
print(id_face_bboxes)
print(life_face_bboxes)
id_norm_image = get_norm_face(id_image, id_face_landmarks[id_max_idx])
id_norm_image = np.transpose(id_norm_image, (2, 0, 1))
norm_images = [id_norm_image]
for j in range(len(life_face_landmarks)):
life_norm_image = get_norm_face(life_image, life_face_landmarks[j])
life_norm_image = np.transpose(life_norm_image, (2, 0, 1))
norm_images.append(life_norm_image)
embeddings = face_recognizer.recognize(norm_images)
gallery_vector = np.mat(embeddings[0])
res = False
sim = 0
for p in range(1, len(embeddings)):
compare_vector = np.mat(embeddings[p])
dot = np.sum(np.multiply(gallery_vector, compare_vector), axis=1)
norm = np.linalg.norm(gallery_vector, axis=1) * np.linalg.norm(compare_vector, axis=1)
dist_1 = dot / norm
sim = dist_1.tolist()
sim = sim[0][0]
if sim > face_recongnize_thr: res = True
print('sim {} : {}'.format(j, sim))
No preview for this file type
No preview for this file type
import os
import numpy as np
import MNN
import cv2
import logging
from retinaface import PriorBox
def py_cpu_nms(dets, thresh):
"""Pure Python NMS baseline."""
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]
return keep
def decode_landm(pre, priors, variances):
landms = np.concatenate((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:]), 1)
return landms
def decode(loc, priors, variances):
boxes = np.concatenate((
priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])), 1)
boxes[:, :2] -= boxes[:, 2:] / 2
boxes[:, 2:] += boxes[:, :2]
return boxes
class Face_Detector(object):
def __init__(self, model_path):
logging.info('******** Start Init Face Detector ********')
self.det_interpreter = MNN.Interpreter(model_path)
self.det_session = self.det_interpreter.createSession()
self.det_input_tensor = self.det_interpreter.getSessionInput(self.det_session)
logging.info('******** Success Init Face Detector ********')
def detect(self, frame, thr):
logging.info('******** Start Face Detect ********')
input_size = 320
img = cv2.resize(frame, (input_size, input_size))
img = np.float32(img)
im_height, im_width, _ = img.shape
scale = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
scale1 = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0],
img.shape[1], img.shape[0], img.shape[1], img.shape[0],
img.shape[1], img.shape[0]])
w_r = input_size/frame.shape[1]
h_r = input_size/frame.shape[0]
confidence_threshold = 0.02
vis_threshold = 0.5
nms_threshold = 0.4
keep_top_k = 100
variance = [0.1, 0.2]
img -= (104, 117, 123)
img = img.transpose(2, 0, 1)
img = np.expand_dims(img, axis=0)
input_tensor = MNN.Tensor((1, 3, input_size, input_size), MNN.Halide_Type_Float, img, MNN.Tensor_DimensionType_Caffe)
self.det_input_tensor.copyFrom(input_tensor)
self.det_interpreter.runSession(self.det_session)
bbox_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output0')
conf_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output1')
landmark_output_tensor = self.det_interpreter.getSessionOutput(self.det_session, 'output2')
bbox_output = bbox_output_tensor.getData()
conf_output = conf_output_tensor.getData()
landmark_output = landmark_output_tensor.getData()
norm_confs = list()
for i in range(int(len(conf_output)/2)):
norm_confs.append([conf_output[i * 2 + 0], conf_output[i * 2 + 1]])
norm_bboxes = list()
for i in range(int(len(conf_output)/2)):
norm_bboxes.append([bbox_output[i * 4 + 0], bbox_output[i * 4 + 1], bbox_output[i * 4 + 2], bbox_output[i * 4 + 3]])
norm_landmarks = list()
for i in range(int(len(conf_output)/2)):
norm_landmarks.append([landmark_output[i * 10 + 0], landmark_output[i * 10 + 1],
landmark_output[i * 10 + 2], landmark_output[i * 10 + 3],
landmark_output[i * 10 + 4], landmark_output[i * 10 + 5],
landmark_output[i * 10 + 6], landmark_output[i * 10 + 7],
landmark_output[i * 10 + 8], landmark_output[i * 10 + 9]])
norm_confs = np.array(norm_confs)
norm_bboxes = np.array(norm_bboxes)
norm_landmarks = np.array(norm_landmarks)
priorbox = PriorBox(image_size=(im_height, im_width))
priors = priorbox.forward()
scores = norm_confs[:, 1]
boxes = decode(norm_bboxes, priors, variance)
boxes = boxes * scale
landms = decode_landm(norm_landmarks, priors, variance)
landms = landms * scale1
# ignore low scores
inds = np.where(scores > confidence_threshold)[0]
boxes = boxes[inds]
landms = landms[inds]
scores = scores[inds]
# keep top-K before NMS
order = scores.argsort()[::-1][:keep_top_k]
boxes = boxes[order]
landms = landms[order]
scores = scores[order]
# do NMS
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
keep = py_cpu_nms(dets, nms_threshold)
dets = dets[keep, :]
landms = landms[keep]
# keep top-K faster NMS
dets = dets[:keep_top_k, :]
landms = landms[:keep_top_k, :]
dets = np.concatenate((dets, landms), axis=1)
face_bboxes = []
face_landmarks = []
max_area = float('-inf')
max_index = 0
i = 0
for b in dets:
if b[4] < vis_threshold:
continue
resize_b = []
x1 = int(b[0] / w_r)
y1 = int(b[1] / h_r)
x2 = int(b[2] / w_r)
y2 = int(b[3] / h_r)
x3 = int(b[5] / w_r)
y3 = int(b[6] / h_r)
x4 = int(b[7] / w_r)
y4 = int(b[8] / h_r)
x5 = int(b[9] / w_r)
y5 = int(b[10] / h_r)
x6 = int(b[11] / w_r)
y6 = int(b[12] / h_r)
x7 = int(b[13] / w_r)
y7 = int(b[14] / h_r)
resize_b = [x1, y1, x2, y2, 0, x3, y3, x4, y4, x5, y5, x6, y6, x7, y7]
# cv2.rectangle(frame, (resize_b[0], resize_b[1]), (resize_b[2], resize_b[3]), (0, 0, 255), 2)
# cv2.circle(frame, (resize_b[5], resize_b[6]), 1, (0, 0, 255), 4)
# cv2.circle(frame, (resize_b[7], resize_b[8]), 1, (0, 255, 255), 4)
# cv2.circle(frame, (resize_b[9], resize_b[10]), 1, (255, 0, 255), 4)
# cv2.circle(frame, (resize_b[11], resize_b[12]), 1, (0, 255, 0), 4)
# cv2.circle(frame, (resize_b[13], resize_b[14]), 1, (255, 0, 0), 4)
area = (resize_b[2] - resize_b[0]) * (resize_b[3] - resize_b[1])
if area > max_area:
max_area = area
max_index = i
i += 1
face_bboxes.append([resize_b[0], resize_b[1], resize_b[2], resize_b[3]])
face_landmarks.append([(resize_b[5], resize_b[6]),
(resize_b[7], resize_b[8]),
(resize_b[9], resize_b[10]),
(resize_b[11], resize_b[12]),
(resize_b[13], resize_b[14])])
# import time
# cv2.imwrite('results/0.jpg', frame)
return face_bboxes, face_landmarks, max_index
if __name__ == '__main__':
det_face_model_path = r'/home/jwq/PycharmProjects/situ/src/face_det/Pytorch_Retinaface/weights/mobilenet_0.25.mnn'
image_path = r'input/3.jpg'
image_save_path = r'results/3.jpg'
thr = 0.5
face_detector = Face_Detector(det_face_model_path)
image = cv2.imread(image_path)
face_detector.detect(image, thr)
# image_ploted = face_detector.plot(image, face_bboxes)
# cv2.imwrite(image_save_path, image_ploted)
from itertools import product as product
from math import ceil
import numpy as np
import torch
import torch.nn as nn
import torchvision.models.detection.backbone_utils as backbone_utils
import torchvision.models._utils as _utils
import torch.nn.functional as F
from collections import OrderedDict
def conv_bn(inp, oup, stride = 1, leaky = 0):
return nn.Sequential(
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
nn.BatchNorm2d(oup),
nn.LeakyReLU(negative_slope=leaky, inplace=True)
)
def conv_bn_no_relu(inp, oup, stride):
return nn.Sequential(
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
nn.BatchNorm2d(oup),
)
def conv_bn1X1(inp, oup, stride, leaky=0):
return nn.Sequential(
nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False),
nn.BatchNorm2d(oup),
nn.LeakyReLU(negative_slope=leaky, inplace=True)
)
def conv_dw(inp, oup, stride, leaky=0.1):
return nn.Sequential(
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
nn.BatchNorm2d(inp),
nn.LeakyReLU(negative_slope= leaky,inplace=True),
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
nn.LeakyReLU(negative_slope= leaky,inplace=True),
)
class ClassHead(nn.Module):
def __init__(self,inchannels=512,num_anchors=3):
super(ClassHead,self).__init__()
self.num_anchors = num_anchors
self.conv1x1 = nn.Conv2d(inchannels,self.num_anchors*2,kernel_size=(1,1),stride=1,padding=0)
def forward(self,x):
out = self.conv1x1(x)
out = out.permute(0,2,3,1).contiguous()
return out.view(out.shape[0], -1, 2)
class BboxHead(nn.Module):
def __init__(self,inchannels=512,num_anchors=3):
super(BboxHead,self).__init__()
self.conv1x1 = nn.Conv2d(inchannels,num_anchors*4,kernel_size=(1,1),stride=1,padding=0)
def forward(self,x):
out = self.conv1x1(x)
out = out.permute(0,2,3,1).contiguous()
return out.view(out.shape[0], -1, 4)
class LandmarkHead(nn.Module):
def __init__(self,inchannels=512,num_anchors=3):
super(LandmarkHead,self).__init__()
self.conv1x1 = nn.Conv2d(inchannels,num_anchors*10,kernel_size=(1,1),stride=1,padding=0)
def forward(self,x):
out = self.conv1x1(x)
out = out.permute(0,2,3,1).contiguous()
return out.view(out.shape[0], -1, 10)
class SSH(nn.Module):
def __init__(self, in_channel, out_channel):
super(SSH, self).__init__()
assert out_channel % 4 == 0
leaky = 0
if (out_channel <= 64):
leaky = 0.1
self.conv3X3 = conv_bn_no_relu(in_channel, out_channel//2, stride=1)
self.conv5X5_1 = conv_bn(in_channel, out_channel//4, stride=1, leaky = leaky)
self.conv5X5_2 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
self.conv7X7_2 = conv_bn(out_channel//4, out_channel//4, stride=1, leaky = leaky)
self.conv7x7_3 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
def forward(self, input):
conv3X3 = self.conv3X3(input)
conv5X5_1 = self.conv5X5_1(input)
conv5X5 = self.conv5X5_2(conv5X5_1)
conv7X7_2 = self.conv7X7_2(conv5X5_1)
conv7X7 = self.conv7x7_3(conv7X7_2)
out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1)
out = F.relu(out)
return out
class FPN(nn.Module):
def __init__(self,in_channels_list,out_channels):
super(FPN,self).__init__()
leaky = 0
if (out_channels <= 64):
leaky = 0.1
self.output1 = conv_bn1X1(in_channels_list[0], out_channels, stride = 1, leaky = leaky)
self.output2 = conv_bn1X1(in_channels_list[1], out_channels, stride = 1, leaky = leaky)
self.output3 = conv_bn1X1(in_channels_list[2], out_channels, stride = 1, leaky = leaky)
self.merge1 = conv_bn(out_channels, out_channels, leaky = leaky)
self.merge2 = conv_bn(out_channels, out_channels, leaky = leaky)
def forward(self, input):
# names = list(input.keys())
input = list(input.values())
output1 = self.output1(input[0])
output2 = self.output2(input[1])
output3 = self.output3(input[2])
up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode="nearest")
output2 = output2 + up3
output2 = self.merge2(output2)
up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode="nearest")
output1 = output1 + up2
output1 = self.merge1(output1)
out = [output1, output2, output3]
return out
class MobileNetV1(nn.Module):
def __init__(self):
super(MobileNetV1, self).__init__()
self.stage1 = nn.Sequential(
conv_bn(3, 8, 2, leaky = 0.1), # 3
conv_dw(8, 16, 1), # 7
conv_dw(16, 32, 2), # 11
conv_dw(32, 32, 1), # 19
conv_dw(32, 64, 2), # 27
conv_dw(64, 64, 1), # 43
)
self.stage2 = nn.Sequential(
conv_dw(64, 128, 2), # 43 + 16 = 59
conv_dw(128, 128, 1), # 59 + 32 = 91
conv_dw(128, 128, 1), # 91 + 32 = 123
conv_dw(128, 128, 1), # 123 + 32 = 155
conv_dw(128, 128, 1), # 155 + 32 = 187
conv_dw(128, 128, 1), # 187 + 32 = 219
)
self.stage3 = nn.Sequential(
conv_dw(128, 256, 2), # 219 +3 2 = 241
conv_dw(256, 256, 1), # 241 + 64 = 301
)
self.avg = nn.AdaptiveAvgPool2d((1,1))
self.fc = nn.Linear(256, 1000)
def forward(self, x):
x = self.stage1(x)
x = self.stage2(x)
x = self.stage3(x)
x = self.avg(x)
# x = self.model(x)
x = x.view(-1, 256)
x = self.fc(x)
return x
class RetinaFace(nn.Module):
def __init__(self):
super(RetinaFace,self).__init__()
backbone = MobileNetV1()
return_layers = {'stage1': 1, 'stage2': 2, 'stage3': 3}
self.body = _utils.IntermediateLayerGetter(backbone, return_layers)
in_channels_stage2 = 32
in_channels_list = [
in_channels_stage2 * 2,
in_channels_stage2 * 4,
in_channels_stage2 * 8,
]
out_channels = 64
self.fpn = FPN(in_channels_list, out_channels)
self.ssh1 = SSH(out_channels, out_channels)
self.ssh2 = SSH(out_channels, out_channels)
self.ssh3 = SSH(out_channels, out_channels)
self.ClassHead = self._make_class_head(fpn_num=3, inchannels=out_channels)
self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=out_channels)
self.LandmarkHead = self._make_landmark_head(fpn_num=3, inchannels=out_channels)
def _make_class_head(self, fpn_num=3, inchannels=64, anchor_num=2):
classhead = nn.ModuleList()
for i in range(fpn_num):
classhead.append(ClassHead(inchannels, anchor_num))
return classhead
def _make_bbox_head(self, fpn_num=3, inchannels=64, anchor_num=2):
bboxhead = nn.ModuleList()
for i in range(fpn_num):
bboxhead.append(BboxHead(inchannels, anchor_num))
return bboxhead
def _make_landmark_head(self, fpn_num=3, inchannels=64, anchor_num=2):
landmarkhead = nn.ModuleList()
for i in range(fpn_num):
landmarkhead.append(LandmarkHead(inchannels, anchor_num))
return landmarkhead
def forward(self,inputs):
out = self.body(inputs)
# FPN
fpn = self.fpn(out)
# SSH
feature1 = self.ssh1(fpn[0])
feature2 = self.ssh2(fpn[1])
feature3 = self.ssh3(fpn[2])
features = [feature1, feature2, feature3]
bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1)
classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features)],dim=1)
ldm_regressions = torch.cat([self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1)
output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions)
return output
class PriorBox(object):
def __init__(self, image_size=None):
super(PriorBox, self).__init__()
self.min_sizes = [[16, 32], [64, 128], [256, 512]]
self.steps = [8, 16, 32]
self.clip = False
self.image_size = image_size
self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]
self.name = "s"
def forward(self):
anchors = []
for k, f in enumerate(self.feature_maps):
min_sizes = self.min_sizes[k]
for i, j in product(range(f[0]), range(f[1])):
for min_size in min_sizes:
s_kx = min_size / self.image_size[1]
s_ky = min_size / self.image_size[0]
dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
for cy, cx in product(dense_cy, dense_cx):
anchors += [cx, cy, s_kx, s_ky]
# back to torch land
output = np.array(anchors).reshape(-1, 4)
if self.clip:
output.clamp_(max=1, min=0)
return output
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!