increase the performance and logical
This commit is contained in:
@@ -6,8 +6,11 @@ import cv2
|
||||
import pandas as pd
|
||||
import os
|
||||
import time
|
||||
import facenet
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
from model import create_model
|
||||
|
||||
start_time = 0
|
||||
# 1. Dlib 正向人脸检测器
|
||||
# detector = dlib.get_frontal_face_detector()
|
||||
@@ -22,6 +25,9 @@ predictor = dlib.shape_predictor('data/data_dlib/shape_predictor_68_face_landmar
|
||||
# 3. Dlib Resnet 人脸识别模型,提取 128D 的特征矢量
|
||||
face_reco_model = dlib.face_recognition_model_v1("data/data_dlib/dlib_face_recognition_resnet_model_v1.dat")
|
||||
|
||||
nn4_small2_pretrained = create_model()
|
||||
nn4_small2_pretrained.load_weights('weights/nn4.small2.v1.h5')
|
||||
|
||||
|
||||
class Face_Recognizer:
|
||||
def __init__(self):
|
||||
@@ -32,6 +38,9 @@ class Face_Recognizer:
|
||||
self.name_known_cnt = 0
|
||||
self.name_known_list = []
|
||||
|
||||
self.metadata = []
|
||||
self.embedded = []
|
||||
|
||||
# 存储当前摄像头中捕获到的所有人脸的坐标名字
|
||||
self.pos_camera_list = []
|
||||
self.name_camera_list = []
|
||||
@@ -46,38 +55,39 @@ class Face_Recognizer:
|
||||
|
||||
# 从 "features_all.csv" 读取录入人脸特征
|
||||
def get_face_database(self):
|
||||
if os.path.exists("data/features_all.csv"):
|
||||
path_features_known_csv = "data/features_all.csv"
|
||||
csv_rd = pd.read_csv(path_features_known_csv, header=None)
|
||||
# 2. 读取已知人脸数据
|
||||
for i in range(csv_rd.shape[0]):
|
||||
features_someone_arr = []
|
||||
for j in range(0, 128):
|
||||
if csv_rd.iloc[i][j] == '':
|
||||
features_someone_arr.append('0')
|
||||
else:
|
||||
features_someone_arr.append(csv_rd.iloc[i][j])
|
||||
self.features_known_list.append(features_someone_arr)
|
||||
self.name_known_list.append("Person_" + str(i + 1))
|
||||
self.name_known_cnt = len(self.name_known_list)
|
||||
print("Faces in Database:", len(self.features_known_list))
|
||||
if os.path.exists("data/data_faces_from_camera/"):
|
||||
self.metadata = facenet.load_metadata("data/data_faces_from_camera/")
|
||||
self.name_known_cnt = self.metadata.shape[0]
|
||||
self.embedded = np.zeros((self.metadata.shape[0], 128))
|
||||
|
||||
for i, m in enumerate(self.metadata):
|
||||
for j, n in enumerate(m):
|
||||
img = facenet.load_image(n.image_path())
|
||||
# img = align_image(img)
|
||||
img = cv2.resize(img, (96, 96))
|
||||
# scale RGB values to interval [0,1]
|
||||
img = (img / 255.).astype(np.float32)
|
||||
# obtain embedding vector for image
|
||||
self.embedded[i] = nn4_small2_pretrained.predict(np.expand_dims(img, axis=0))[0]
|
||||
# self.embedded[i] = self.embedded[i] / len(m)
|
||||
self.name_known_list.append('')
|
||||
return 1
|
||||
else:
|
||||
print('##### Warning #####', '\n')
|
||||
print("'features_all.csv' not found!")
|
||||
print(
|
||||
"Please run 'get_faces_from_camera.py' and 'features_extraction_to_csv.py' before 'face_reco_from_camera.py'",
|
||||
"Please run 'get_faces_from_camera.py' before 'face_reco_from_camera.py'",
|
||||
'\n')
|
||||
print('##### End Warning #####')
|
||||
return 0
|
||||
|
||||
# 计算两个128D向量间的欧式距离
|
||||
@staticmethod
|
||||
def return_euclidean_distance(feature_1, feature_2):
|
||||
feature_1 = np.array(feature_1)
|
||||
feature_2 = np.array(feature_2)
|
||||
dist = np.sqrt(np.sum((feature_1 - feature_2) ** 2))
|
||||
return dist
|
||||
# @staticmethod
|
||||
# def return_euclidean_distance(feature_1, feature_2):
|
||||
# feature_1 = np.array(feature_1)
|
||||
# feature_2 = np.array(feature_2)
|
||||
# dist = np.sqrt(np.sum((feature_1 - feature_2) ** 2))
|
||||
# return dist
|
||||
|
||||
# 更新 FPS
|
||||
def update_fps(self):
|
||||
@@ -111,8 +121,8 @@ class Face_Recognizer:
|
||||
# Default known name: person_1, person_2, person_3
|
||||
self.name_known_list[0] = '唐麒'.encode('utf-8').decode()
|
||||
self.name_known_list[1] = '段海燕'.encode('utf-8').decode()
|
||||
# self.name_known_list[2] ='xx'.encode('utf-8').decode()
|
||||
# self.name_known_list[3] ='xx'.encode('utf-8').decode()
|
||||
# self.name_known_list[2] = '唐保生'.encode('utf-8').decode()
|
||||
# self.name_known_list[3] = '唐麒'.encode('utf-8').decode()
|
||||
# self.name_known_list[4] ='xx'.encode('utf-8').decode()
|
||||
|
||||
# 处理获取的视频流,进行人脸识别
|
||||
@@ -142,18 +152,18 @@ class Face_Recognizer:
|
||||
# 2. 检测到人脸
|
||||
if faces.shape[2] != 0:
|
||||
# 3. 获取当前捕获到的图像的所有人脸的特征,存储到 self.features_camera_list
|
||||
for i in range(0, faces.shape[2]):
|
||||
confidence = faces[0, 0, i, 2]
|
||||
|
||||
# filter out weak detections by ensuring the `confidence` is
|
||||
# greater than the minimum confidence
|
||||
if confidence < 0.5:
|
||||
continue
|
||||
box = faces[0, 0, i, 3:7] * np.array([w, h, w, h])
|
||||
(startX, startY, endX, endY) = box.astype("int")
|
||||
rect = dlib.rectangle(startX, startY, endX, endY)
|
||||
shape = predictor(img_rd, rect)
|
||||
self.features_camera_list.append(face_reco_model.compute_face_descriptor(img_rd, shape))
|
||||
# for i in range(0, faces.shape[2]):
|
||||
# confidence = faces[0, 0, i, 2]
|
||||
#
|
||||
# # filter out weak detections by ensuring the `confidence` is
|
||||
# # greater than the minimum confidence
|
||||
# if confidence < 0.5:
|
||||
# continue
|
||||
# box = faces[0, 0, i, 3:7] * np.array([w, h, w, h])
|
||||
# (startX, startY, endX, endY) = box.astype("int")
|
||||
# rect = dlib.rectangle(startX, startY, endX, endY)
|
||||
# shape = predictor(img_rd, rect)
|
||||
# self.features_camera_list.append(face_reco_model.compute_face_descriptor(img_rd, shape))
|
||||
|
||||
# 4. 遍历捕获到的图像中所有的人脸
|
||||
for k in range(0, faces.shape[2]):
|
||||
@@ -164,13 +174,13 @@ class Face_Recognizer:
|
||||
# greater than the minimum confidence
|
||||
if confidence < 0.5:
|
||||
continue
|
||||
self.faces_cnt+=1
|
||||
self.faces_cnt += 1
|
||||
# print("##### camera person", k + 1, "#####")
|
||||
# 让人名跟随在矩形框的上方
|
||||
# 确定人名的位置坐标
|
||||
# 先默认所有人不认识,是 unknown
|
||||
# Set the default names of faces with "unknown"
|
||||
self.name_camera_list.append("unknown")
|
||||
self.name_camera_list.append("陌生人")
|
||||
|
||||
# 每个捕获人脸的名字坐标
|
||||
box = faces[0, 0, k, 3:7] * np.array([w, h, w, h])
|
||||
@@ -178,25 +188,39 @@ class Face_Recognizer:
|
||||
self.pos_camera_list.append(tuple(
|
||||
[int(startX + 5), int(startY - 30)]))
|
||||
|
||||
height = (endY - startY)
|
||||
width = (endX - startX)
|
||||
|
||||
img_blank = np.zeros((height, width, 3), np.uint8)
|
||||
for ii in range(height):
|
||||
for jj in range(width):
|
||||
img_blank[ii][jj] = img_rd[startY + ii][startX + jj]
|
||||
|
||||
img = cv2.resize(img_blank, (96, 96))
|
||||
img = (img / 255.).astype(np.float32)
|
||||
img = nn4_small2_pretrained.predict(np.expand_dims(img, axis=0))[0]
|
||||
|
||||
# 5. 对于某张人脸,遍历所有存储的人脸特征
|
||||
e_distance_list = []
|
||||
for i in range(len(self.features_known_list)):
|
||||
# 如果 person_X 数据不为空
|
||||
if str(self.features_known_list[i][0]) != '0.0':
|
||||
# print("with person", str(i + 1), "the e distance: ", end='')
|
||||
e_distance_tmp = self.return_euclidean_distance(self.features_camera_list[k],
|
||||
self.features_known_list[i])
|
||||
# print(e_distance_tmp)
|
||||
e_distance_list.append(e_distance_tmp)
|
||||
else:
|
||||
# 空数据 person_X
|
||||
e_distance_list.append(999999999)
|
||||
# 6. 寻找出最小的欧式距离匹配
|
||||
for i in range(0, len(self.embedded)):
|
||||
e_distance_list.append(facenet.distance(self.embedded[i], img))
|
||||
# for i in range(len(self.features_known_list)):
|
||||
# # 如果 person_X 数据不为空
|
||||
# if str(self.features_known_list[i][0]) != '0.0':
|
||||
# # print("with person", str(i + 1), "the e distance: ", end='')
|
||||
# e_distance_tmp = self.return_euclidean_distance(self.features_camera_list[k],
|
||||
# self.features_known_list[i])
|
||||
# # print(e_distance_tmp)
|
||||
# e_distance_list.append(e_distance_tmp)
|
||||
# else:
|
||||
# # 空数据 person_X
|
||||
# e_distance_list.append(999999999)
|
||||
# # 6. 寻找出最小的欧式距离匹配
|
||||
similar_person_num = e_distance_list.index(min(e_distance_list))
|
||||
# print("Minimum e distance with person", self.name_known_list[similar_person_num])
|
||||
|
||||
if min(e_distance_list) < 1:
|
||||
self.name_camera_list[k] = self.name_known_list[similar_person_num]
|
||||
# print(min(e_distance_list))
|
||||
if min(e_distance_list) < 0.58:
|
||||
self.name_camera_list[k] = self.name_known_list[similar_person_num % 8]
|
||||
# print("May be person " + str(self.name_known_list[similar_person_num]))
|
||||
else:
|
||||
pass
|
||||
@@ -205,10 +229,16 @@ class Face_Recognizer:
|
||||
# 矩形框
|
||||
for kk, d in enumerate(faces):
|
||||
# 绘制矩形框
|
||||
cv2.rectangle(img_rd, tuple([startX, startY]), tuple([endX, endY]),
|
||||
(0, 255, 0), 2)
|
||||
cv2.rectangle(img_rd, tuple([startX, startY - 35]), tuple([endX, startY]),
|
||||
(0, 255, 0), cv2.FILLED)
|
||||
if self.name_camera_list[k] != '陌生人':
|
||||
cv2.rectangle(img_rd, tuple([startX, startY]), tuple([endX, endY]),
|
||||
(0, 255, 0), 2)
|
||||
cv2.rectangle(img_rd, tuple([startX, startY - 35]), tuple([endX, startY]),
|
||||
(0, 255, 0), cv2.FILLED)
|
||||
else:
|
||||
cv2.rectangle(img_rd, tuple([startX, startY]), tuple([endX, endY]),
|
||||
(0, 0, 255), 2)
|
||||
cv2.rectangle(img_rd, tuple([startX, startY - 35]), tuple([endX, startY]),
|
||||
(0, 0, 255), cv2.FILLED)
|
||||
# print('\n')
|
||||
# self.faces_cnt = faces.shape[2]
|
||||
# if len(self.name_camera_list) > 0:
|
||||
@@ -226,7 +256,7 @@ class Face_Recognizer:
|
||||
cv2.imshow("camera", img_with_name)
|
||||
|
||||
# 9. 更新 FPS / Update stream FPS
|
||||
# self.update_fps()
|
||||
self.update_fps()
|
||||
|
||||
# OpenCV 调用摄像头并进行 process
|
||||
def run(self):
|
||||
|
||||
@@ -195,7 +195,8 @@ class Face_Register:
|
||||
color_rectangle, 2)
|
||||
|
||||
# 7. 根据人脸大小生成空的图像
|
||||
img_blank = np.zeros((int(height * 2), width * 2, 3), np.uint8)
|
||||
# img_blank = np.zeros((int(height * 2), width * 2, 3), np.uint8)
|
||||
img_blank = np.zeros((height, width, 3), np.uint8)
|
||||
|
||||
if save_flag:
|
||||
# 8. 按下 's' 保存摄像头中的人脸到本地
|
||||
@@ -205,9 +206,9 @@ class Face_Register:
|
||||
self.ss_cnt += 1
|
||||
|
||||
if self.index <= 7:
|
||||
for ii in range(height * 2):
|
||||
for jj in range(width * 2):
|
||||
img_blank[ii][jj] = img_rd[startY - hh + ii][startX - ww + jj]
|
||||
for ii in range(height):
|
||||
for jj in range(width):
|
||||
img_blank[ii][jj] = img_rd[startY + ii][startX + jj]
|
||||
cv2.imwrite(current_face_dir + "/img_face_" + str(self.ss_cnt) + ".jpg", img_blank)
|
||||
print("写入本地 / Save into:",
|
||||
str(current_face_dir) + "/img_face_" + str(self.ss_cnt) + ".jpg")
|
||||
|
||||
@@ -4,10 +4,14 @@ import time
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from auto_whiteBalance import aug
|
||||
|
||||
from GFmatrix import GF
|
||||
|
||||
np.seterr(invalid='ignore')
|
||||
|
||||
# config = {'morning': [80, 220, 125, 125]}
|
||||
|
||||
|
||||
class Calibration:
|
||||
def __init__(self):
|
||||
@@ -26,8 +30,9 @@ class Calibration:
|
||||
def update_fps(self):
|
||||
now = time.time()
|
||||
self.frame_time = now - self.frame_start_time
|
||||
self.fps = 1.0 / self.frame_time
|
||||
self.frame_start_time = now
|
||||
if self.frame_time != 0:
|
||||
self.fps = 1.0 / self.frame_time
|
||||
self.frame_start_time = now
|
||||
|
||||
# 生成的 cv2 window 上面添加说明文字
|
||||
def draw_note(self, img_rd):
|
||||
@@ -244,20 +249,20 @@ class Calibration:
|
||||
def decode(self, frame, feature_point):
|
||||
points = []
|
||||
position = []
|
||||
|
||||
frame=aug(frame)
|
||||
color_map = np.zeros((frame.shape[0], frame.shape[1]))
|
||||
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
|
||||
l, a, b = cv2.split(frame)
|
||||
for i in range(0, frame.shape[0]):
|
||||
for j in range(0, frame.shape[1]):
|
||||
if l[i][j] < 50:
|
||||
if l[i][j] < 100:
|
||||
color_map[i][j] = 3
|
||||
frame[i][j] = np.array([0, 0, 0])
|
||||
elif l[i][j] > 140:
|
||||
elif l[i][j] > 220:
|
||||
frame[i][j] = np.array([255, 255, 255])
|
||||
color_map[i][j] = 255
|
||||
else:
|
||||
if b[i][j] < 125:
|
||||
if b[i][j] < 120:
|
||||
color_map[i][j] = 0
|
||||
frame[i][j] = np.array([255, 0, 0])
|
||||
else:
|
||||
@@ -368,23 +373,25 @@ class Calibration:
|
||||
(feature_points[index + 1][1] * 2 - feature_points[index][1] * 2) ** 2)
|
||||
|
||||
scale = world_distance / pixel_distance
|
||||
|
||||
print(pixel_distance)
|
||||
print(feature_points[index + 1][0] - feature_points[index][0],
|
||||
feature_points[index + 1][1] - feature_points[index][1])
|
||||
# print(distance)
|
||||
# for i in range(index - 1, index + 2):
|
||||
# print(distance[i])
|
||||
# 绘制特征点
|
||||
# point_size = 1
|
||||
# point_color = (0, 0, 255)
|
||||
# thickness = 0 # 可以为 0 、4、8
|
||||
point_size = 1
|
||||
point_color = (0, 0, 255)
|
||||
thickness = 0 # 可以为 0 、4、8
|
||||
|
||||
# for i in range(0, len(featurepoints_position)):
|
||||
# cv2.circle(img_rd, (int(featurepoints_position[i][1]), int(featurepoints_position[i][0])),
|
||||
# point_size, point_color, thickness)
|
||||
# for point in featurepoints_position:
|
||||
# cv2.circle(img_rd, (int(point[1]), int(point[0])), point_size, point_color, thickness)
|
||||
# cv2.namedWindow("image")
|
||||
# cv2.imshow('image', img_rd)
|
||||
# cv2.waitKey(0) # 按0退出
|
||||
for point in featurepoints_position:
|
||||
cv2.circle(img_rd, (int(point[1]), int(point[0])), point_size, point_color, thickness)
|
||||
cv2.namedWindow("image")
|
||||
cv2.imshow('image', img_rd)
|
||||
cv2.waitKey(0) # 按0退出
|
||||
return scale
|
||||
self.draw_note(img_rd)
|
||||
self.update_fps()
|
||||
|
||||
188
Class/detection/align.py
Normal file
188
Class/detection/align.py
Normal file
@@ -0,0 +1,188 @@
|
||||
# Copyright 2015-2016 Carnegie Mellon University
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Module for dlib-based alignment."""
|
||||
|
||||
import cv2
|
||||
import dlib
|
||||
import numpy as np
|
||||
|
||||
TEMPLATE = np.float32([
|
||||
(0.0792396913815, 0.339223741112), (0.0829219487236, 0.456955367943),
|
||||
(0.0967927109165, 0.575648016728), (0.122141515615, 0.691921601066),
|
||||
(0.168687863544, 0.800341263616), (0.239789390707, 0.895732504778),
|
||||
(0.325662452515, 0.977068762493), (0.422318282013, 1.04329000149),
|
||||
(0.531777802068, 1.06080371126), (0.641296298053, 1.03981924107),
|
||||
(0.738105872266, 0.972268833998), (0.824444363295, 0.889624082279),
|
||||
(0.894792677532, 0.792494155836), (0.939395486253, 0.681546643421),
|
||||
(0.96111933829, 0.562238253072), (0.970579841181, 0.441758925744),
|
||||
(0.971193274221, 0.322118743967), (0.163846223133, 0.249151738053),
|
||||
(0.21780354657, 0.204255863861), (0.291299351124, 0.192367318323),
|
||||
(0.367460241458, 0.203582210627), (0.4392945113, 0.233135599851),
|
||||
(0.586445962425, 0.228141644834), (0.660152671635, 0.195923841854),
|
||||
(0.737466449096, 0.182360984545), (0.813236546239, 0.192828009114),
|
||||
(0.8707571886, 0.235293377042), (0.51534533827, 0.31863546193),
|
||||
(0.516221448289, 0.396200446263), (0.517118861835, 0.473797687758),
|
||||
(0.51816430343, 0.553157797772), (0.433701156035, 0.604054457668),
|
||||
(0.475501237769, 0.62076344024), (0.520712933176, 0.634268222208),
|
||||
(0.565874114041, 0.618796581487), (0.607054002672, 0.60157671656),
|
||||
(0.252418718401, 0.331052263829), (0.298663015648, 0.302646354002),
|
||||
(0.355749724218, 0.303020650651), (0.403718978315, 0.33867711083),
|
||||
(0.352507175597, 0.349987615384), (0.296791759886, 0.350478978225),
|
||||
(0.631326076346, 0.334136672344), (0.679073381078, 0.29645404267),
|
||||
(0.73597236153, 0.294721285802), (0.782865376271, 0.321305281656),
|
||||
(0.740312274764, 0.341849376713), (0.68499850091, 0.343734332172),
|
||||
(0.353167761422, 0.746189164237), (0.414587777921, 0.719053835073),
|
||||
(0.477677654595, 0.706835892494), (0.522732900812, 0.717092275768),
|
||||
(0.569832064287, 0.705414478982), (0.635195811927, 0.71565572516),
|
||||
(0.69951672331, 0.739419187253), (0.639447159575, 0.805236879972),
|
||||
(0.576410514055, 0.835436670169), (0.525398405766, 0.841706377792),
|
||||
(0.47641545769, 0.837505914975), (0.41379548902, 0.810045601727),
|
||||
(0.380084785646, 0.749979603086), (0.477955996282, 0.74513234612),
|
||||
(0.523389793327, 0.748924302636), (0.571057789237, 0.74332894691),
|
||||
(0.672409137852, 0.744177032192), (0.572539621444, 0.776609286626),
|
||||
(0.5240106503, 0.783370783245), (0.477561227414, 0.778476346951)])
|
||||
|
||||
TPL_MIN, TPL_MAX = np.min(TEMPLATE, axis=0), np.max(TEMPLATE, axis=0)
|
||||
MINMAX_TEMPLATE = (TEMPLATE - TPL_MIN) / (TPL_MAX - TPL_MIN)
|
||||
|
||||
|
||||
class AlignDlib:
|
||||
"""
|
||||
Use `dlib's landmark estimation <http://blog.dlib.net/2014/08/real-time-face-pose-estimation.html>`_ to align faces.
|
||||
|
||||
The alignment preprocess faces for input into a neural network.
|
||||
Faces are resized to the same size (such as 96x96) and transformed
|
||||
to make landmarks (such as the eyes and nose) appear at the same
|
||||
location on every image.
|
||||
|
||||
Normalized landmarks:
|
||||
|
||||
.. image:: ../images/dlib-landmark-mean.png
|
||||
"""
|
||||
|
||||
#: Landmark indices.
|
||||
INNER_EYES_AND_BOTTOM_LIP = [39, 42, 57]
|
||||
OUTER_EYES_AND_NOSE = [36, 45, 33]
|
||||
|
||||
def __init__(self, facePredictor):
|
||||
"""
|
||||
Instantiate an 'AlignDlib' object.
|
||||
|
||||
:param facePredictor: The path to dlib's
|
||||
:type facePredictor: str
|
||||
"""
|
||||
assert facePredictor is not None
|
||||
|
||||
self.detector = dlib.get_frontal_face_detector()
|
||||
self.predictor = dlib.shape_predictor(facePredictor)
|
||||
|
||||
def getAllFaceBoundingBoxes(self, rgbImg):
|
||||
"""
|
||||
Find all face bounding boxes in an image.
|
||||
|
||||
:param rgbImg: RGB image to process. Shape: (height, width, 3)
|
||||
:type rgbImg: numpy.ndarray
|
||||
:return: All face bounding boxes in an image.
|
||||
:rtype: dlib.rectangles
|
||||
"""
|
||||
assert rgbImg is not None
|
||||
|
||||
try:
|
||||
return self.detector(rgbImg, 1)
|
||||
except Exception as e:
|
||||
print("Warning: {}".format(e))
|
||||
# In rare cases, exceptions are thrown.
|
||||
return []
|
||||
|
||||
def getLargestFaceBoundingBox(self, rgbImg, skipMulti=False):
|
||||
"""
|
||||
Find the largest face bounding box in an image.
|
||||
|
||||
:param rgbImg: RGB image to process. Shape: (height, width, 3)
|
||||
:type rgbImg: numpy.ndarray
|
||||
:param skipMulti: Skip image if more than one face detected.
|
||||
:type skipMulti: bool
|
||||
:return: The largest face bounding box in an image, or None.
|
||||
:rtype: dlib.rectangle
|
||||
"""
|
||||
assert rgbImg is not None
|
||||
|
||||
faces = self.getAllFaceBoundingBoxes(rgbImg)
|
||||
if (not skipMulti and len(faces) > 0) or len(faces) == 1:
|
||||
return max(faces, key=lambda rect: rect.width() * rect.height())
|
||||
else:
|
||||
return None
|
||||
|
||||
def findLandmarks(self, rgbImg, bb):
|
||||
"""
|
||||
Find the landmarks of a face.
|
||||
|
||||
:param rgbImg: RGB image to process. Shape: (height, width, 3)
|
||||
:type rgbImg: numpy.ndarray
|
||||
:param bb: Bounding box around the face to find landmarks for.
|
||||
:type bb: dlib.rectangle
|
||||
:return: Detected landmark locations.
|
||||
:rtype: list of (x,y) tuples
|
||||
"""
|
||||
assert rgbImg is not None
|
||||
assert bb is not None
|
||||
|
||||
points = self.predictor(rgbImg, bb)
|
||||
return list(map(lambda p: (p.x, p.y), points.parts()))
|
||||
|
||||
def align(self, imgDim, rgbImg, bb=None,
|
||||
landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP,
|
||||
skipMulti=False):
|
||||
r"""align(imgDim, rgbImg, bb=None, landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP)
|
||||
|
||||
Transform and align a face in an image.
|
||||
|
||||
:param imgDim: The edge length in pixels of the square the image is resized to.
|
||||
:type imgDim: int
|
||||
:param rgbImg: RGB image to process. Shape: (height, width, 3)
|
||||
:type rgbImg: numpy.ndarray
|
||||
:param bb: Bounding box around the face to align. \
|
||||
Defaults to the largest face.
|
||||
:type bb: dlib.rectangle
|
||||
:param landmarks: Detected landmark locations. \
|
||||
Landmarks found on `bb` if not provided.
|
||||
:type landmarks: list of (x,y) tuples
|
||||
:param landmarkIndices: The indices to transform to.
|
||||
:type landmarkIndices: list of ints
|
||||
:param skipMulti: Skip image if more than one face detected.
|
||||
:type skipMulti: bool
|
||||
:return: The aligned RGB image. Shape: (imgDim, imgDim, 3)
|
||||
:rtype: numpy.ndarray
|
||||
"""
|
||||
assert imgDim is not None
|
||||
assert rgbImg is not None
|
||||
assert landmarkIndices is not None
|
||||
|
||||
if bb is None:
|
||||
bb = self.getLargestFaceBoundingBox(rgbImg, skipMulti)
|
||||
if bb is None:
|
||||
return
|
||||
|
||||
if landmarks is None:
|
||||
landmarks = self.findLandmarks(rgbImg, bb)
|
||||
|
||||
npLandmarks = np.float32(landmarks)
|
||||
npLandmarkIndices = np.array(landmarkIndices)
|
||||
|
||||
H = cv2.getAffineTransform(npLandmarks[npLandmarkIndices],
|
||||
imgDim * MINMAX_TEMPLATE[npLandmarkIndices])
|
||||
thumbnail = cv2.warpAffine(rgbImg, H, (imgDim, imgDim))
|
||||
|
||||
return thumbnail
|
||||
41
Class/detection/auto_whiteBalance.py
Normal file
41
Class/detection/auto_whiteBalance.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
|
||||
def compute(img, min_percentile, max_percentile):
|
||||
"""计算分位点,目的是去掉图1的直方图两头的异常情况"""
|
||||
max_percentile_pixel = np.percentile(img, max_percentile)
|
||||
min_percentile_pixel = np.percentile(img, min_percentile)
|
||||
|
||||
return max_percentile_pixel, min_percentile_pixel
|
||||
|
||||
|
||||
def aug(src):
|
||||
"""图像亮度增强"""
|
||||
if get_lightness(src) > 130:
|
||||
print("图片亮度足够,不做增强")
|
||||
# 先计算分位点,去掉像素值中少数异常值,这个分位点可以自己配置。
|
||||
# 比如1中直方图的红色在0到255上都有值,但是实际上像素值主要在0到20内。
|
||||
|
||||
|
||||
max_percentile_pixel, min_percentile_pixel = compute(src, 1, 99)
|
||||
|
||||
# 去掉分位值区间之外的值
|
||||
src[src >= max_percentile_pixel] = max_percentile_pixel
|
||||
src[src <= min_percentile_pixel] = min_percentile_pixel
|
||||
|
||||
# 将分位值区间拉伸到0到255,这里取了255*0.1与255*0.9是因为可能会出现像素值溢出的情况,所以最好不要设置为0到255。
|
||||
out = np.zeros(src.shape, src.dtype)
|
||||
cv2.normalize(src, out, 255 * 0.1, 255 * 0.9, cv2.NORM_MINMAX)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def get_lightness(src):
|
||||
# 计算亮度
|
||||
hsv_image = cv2.cvtColor(src, cv2.COLOR_BGR2HSV)
|
||||
lightness = hsv_image[:, :, 2].mean()
|
||||
|
||||
return lightness
|
||||
|
||||
|
||||
170
Class/detection/facenet.py
Normal file
170
Class/detection/facenet.py
Normal file
@@ -0,0 +1,170 @@
|
||||
from model import create_model
|
||||
from keras import backend as K
|
||||
from keras.models import Model
|
||||
from keras.layers import Input, Layer
|
||||
from data import triplet_generator
|
||||
import numpy as np
|
||||
import os.path
|
||||
import cv2
|
||||
from align import AlignDlib
|
||||
|
||||
alignment = AlignDlib('data/data_dlib/landmarks.dat')
|
||||
|
||||
class TripletLossLayer(Layer):
|
||||
def __init__(self, alpha, **kwargs):
|
||||
self.alpha = alpha
|
||||
super(TripletLossLayer, self).__init__(**kwargs)
|
||||
|
||||
def triplet_loss(self, inputs):
|
||||
a, p, n = inputs
|
||||
p_dist = K.sum(K.square(a - p), axis=-1)
|
||||
n_dist = K.sum(K.square(a - n), axis=-1)
|
||||
return K.sum(K.maximum(p_dist - n_dist + self.alpha, 0), axis=0)
|
||||
|
||||
def call(self, inputs):
|
||||
loss = self.triplet_loss(inputs)
|
||||
self.add_loss(loss)
|
||||
return loss
|
||||
|
||||
|
||||
class IdentityMetadata():
|
||||
def __init__(self, base, name, file):
|
||||
# dataset base directory
|
||||
self.base = base
|
||||
# identity name
|
||||
self.name = name
|
||||
# image file name
|
||||
self.file = file
|
||||
|
||||
def __repr__(self):
|
||||
return self.image_path()
|
||||
|
||||
def image_path(self):
|
||||
return os.path.join(self.base, self.name, self.file)
|
||||
|
||||
|
||||
def load_metadata(path):
|
||||
metadata = []
|
||||
for i in sorted(os.listdir(path)):
|
||||
person = []
|
||||
for f in sorted(os.listdir(os.path.join(path, i))):
|
||||
# Check file extension. Allow only jpg/jpeg' files.
|
||||
ext = os.path.splitext(f)[1]
|
||||
if ext == '.jpg' or ext == '.jpeg':
|
||||
person.append(IdentityMetadata(path, i, f))
|
||||
metadata.append(person)
|
||||
return np.array(metadata,dtype=object)
|
||||
|
||||
|
||||
def load_image(path):
|
||||
img = cv2.imread(path, 1)
|
||||
# OpenCV loads images with color channels
|
||||
# in BGR order. So we need to reverse them
|
||||
return img[..., ::-1]
|
||||
|
||||
|
||||
def align_image(img):
|
||||
return alignment.align(96, img, alignment.getLargestFaceBoundingBox(img),
|
||||
landmarkIndices=AlignDlib.OUTER_EYES_AND_NOSE)
|
||||
|
||||
|
||||
def distance(emb1, emb2):
|
||||
return np.sum(np.square(emb1 - emb2))
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# # nn4_small2 = create_model()
|
||||
# #
|
||||
# # # Input for anchor, positive and negative images
|
||||
# # in_a = Input(shape=(96, 96, 3))
|
||||
# # in_p = Input(shape=(96, 96, 3))
|
||||
# # in_n = Input(shape=(96, 96, 3))
|
||||
# #
|
||||
# # # Output for anchor, positive and negative embedding vectors
|
||||
# # # The nn4_small model instance is shared (Siamese network)
|
||||
# # emb_a = nn4_small2(in_a)
|
||||
# # emb_p = nn4_small2(in_p)
|
||||
# # emb_n = nn4_small2(in_n)
|
||||
# #
|
||||
# # # Layer that computes the triplet loss from anchor, positive and negative embedding vectors
|
||||
# # triplet_loss_layer = TripletLossLayer(alpha=0.2, name='triplet_loss_layer')([emb_a, emb_p, emb_n])
|
||||
# #
|
||||
# # # Model that can be trained with anchor, positive negative images
|
||||
# # nn4_small2_train = Model([in_a, in_p, in_n], triplet_loss_layer)
|
||||
# #
|
||||
# # # triplet_generator() creates a generator that continuously returns
|
||||
# # # ([a_batch, p_batch, n_batch], None) tuples where a_batch, p_batch
|
||||
# # # and n_batch are batches of anchor, positive and negative RGB images
|
||||
# # # each having a shape of (batch_size, 96, 96, 3).
|
||||
# # generator = triplet_generator()
|
||||
# #
|
||||
# # nn4_small2_train.compile(loss=None, optimizer='adam')
|
||||
# # nn4_small2_train.fit_generator(generator, epochs=1, steps_per_epoch=100)
|
||||
#
|
||||
# # Please note that the current implementation of the generator only generates
|
||||
# # random image data. The main goal of this code snippet is to demonstrate
|
||||
# # the general setup for model training. In the following, we will anyway
|
||||
# # use a pre-trained model so we don't need a generator here that operates
|
||||
# # on real training data. I'll maybe provide a fully functional generator
|
||||
# # later.
|
||||
# nn4_small2_pretrained = create_model()
|
||||
# nn4_small2_pretrained.load_weights('weights/nn4.small2.v1.h5')
|
||||
#
|
||||
# metadata = load_metadata('images')
|
||||
#
|
||||
# # Initialize the OpenFace face alignment utility
|
||||
#
|
||||
#
|
||||
# # # Load an image of Jacques Chirac
|
||||
# # jc_orig = load_image(metadata[78].image_path())
|
||||
# #
|
||||
# # # Detect face and return bounding box
|
||||
# # bb = alignment.getLargestFaceBoundingBox(jc_orig)
|
||||
# #
|
||||
# # # Transform image using specified face landmark indices and crop image to 96x96
|
||||
# # jc_aligned = alignment.align(96, jc_orig, bb, landmarkIndices=AlignDlib.OUTER_EYES_AND_NOSE)
|
||||
#
|
||||
# embedded = np.zeros((metadata.shape[0], 128))
|
||||
#
|
||||
# for i, m in enumerate(metadata):
|
||||
# img = load_image(m.image_path())
|
||||
# # img = align_image(img)
|
||||
# img = cv2.resize(img, (96, 96))
|
||||
# # scale RGB values to interval [0,1]
|
||||
# try:
|
||||
# img = (img / 255.).astype(np.float32)
|
||||
# # obtain embedding vector for image
|
||||
# embedded[i] = nn4_small2_pretrained.predict(np.expand_dims(img, axis=0))[0]
|
||||
# except:
|
||||
# print(m.image_path)
|
||||
#
|
||||
# # show_pair(77, 78)
|
||||
# # show_pair(77, 100)
|
||||
# cap = cv2.VideoCapture(0)
|
||||
# while cap.isOpened():
|
||||
# flag, frame = cap.read()
|
||||
# kk = cv2.waitKey(1)
|
||||
# # 按下 q 键退出
|
||||
# if kk == ord('q'):
|
||||
# break
|
||||
# else:
|
||||
# try:
|
||||
# # img = align_image(frame)
|
||||
# frame = cv2.resize(frame, (96, 96))
|
||||
# img = (frame / 255.).astype(np.float32)
|
||||
# img = nn4_small2_pretrained.predict(np.expand_dims(img, axis=0))[0]
|
||||
# d = []
|
||||
# for i in range(0, len(embedded)):
|
||||
# d.append(distance(embedded[i], img))
|
||||
#
|
||||
# name = ['Person_2', 'tbs', 'Person_1']
|
||||
#
|
||||
# print(name[d.index(min(d))])
|
||||
# # if d < 1:
|
||||
# # print("same face")
|
||||
# # else:
|
||||
# # print("different face")
|
||||
# except Exception as e:
|
||||
# print(e)
|
||||
# cv2.imshow("normal", frame)
|
||||
# cap.release()
|
||||
# cv2.destroyAllWindows()
|
||||
159
Class/detection/facenet_utils.py
Normal file
159
Class/detection/facenet_utils.py
Normal file
@@ -0,0 +1,159 @@
|
||||
# -----------------------------------------------------------------------------------------
|
||||
# Code taken from https://github.com/iwantooxxoox/Keras-OpenFace (with minor modifications)
|
||||
# -----------------------------------------------------------------------------------------
|
||||
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
from numpy import genfromtxt
|
||||
from keras.layers import Conv2D, ZeroPadding2D, Activation
|
||||
from keras.layers.normalization import BatchNormalization
|
||||
from tensorflow.python.keras.backend import _get_session
|
||||
|
||||
_FLOATX = 'float32'
|
||||
|
||||
def variable(value, dtype=_FLOATX, name=None):
|
||||
v = tf.Variable(np.asarray(value, dtype=dtype), name=name)
|
||||
_get_session().run(v.initializer)
|
||||
return v
|
||||
|
||||
def shape(x):
|
||||
return x.get_shape()
|
||||
|
||||
def square(x):
|
||||
return tf.square(x)
|
||||
|
||||
def zeros(shape, dtype=_FLOATX, name=None):
|
||||
return variable(np.zeros(shape), dtype, name)
|
||||
|
||||
def concatenate(tensors, axis=-1):
|
||||
if axis < 0:
|
||||
axis = axis % len(tensors[0].get_shape())
|
||||
return tf.concat(axis, tensors)
|
||||
|
||||
def LRN2D(x):
|
||||
return tf.nn.lrn(x, alpha=1e-4, beta=0.75)
|
||||
|
||||
def conv2d_bn(
|
||||
x,
|
||||
layer=None,
|
||||
cv1_out=None,
|
||||
cv1_filter=(1, 1),
|
||||
cv1_strides=(1, 1),
|
||||
cv2_out=None,
|
||||
cv2_filter=(3, 3),
|
||||
cv2_strides=(1, 1),
|
||||
padding=None,
|
||||
):
|
||||
num = '' if cv2_out == None else '1'
|
||||
tensor = Conv2D(cv1_out, cv1_filter, strides=cv1_strides, name=layer+'_conv'+num)(x)
|
||||
tensor = BatchNormalization(axis=3, epsilon=0.00001, name=layer+'_bn'+num)(tensor)
|
||||
tensor = Activation('relu')(tensor)
|
||||
if padding == None:
|
||||
return tensor
|
||||
tensor = ZeroPadding2D(padding=padding)(tensor)
|
||||
if cv2_out == None:
|
||||
return tensor
|
||||
tensor = Conv2D(cv2_out, cv2_filter, strides=cv2_strides, name=layer+'_conv'+'2')(tensor)
|
||||
tensor = BatchNormalization(axis=3, epsilon=0.00001, name=layer+'_bn'+'2')(tensor)
|
||||
tensor = Activation('relu')(tensor)
|
||||
return tensor
|
||||
|
||||
weights = [
|
||||
'conv1', 'bn1', 'conv2', 'bn2', 'conv3', 'bn3',
|
||||
'inception_3a_1x1_conv', 'inception_3a_1x1_bn',
|
||||
'inception_3a_pool_conv', 'inception_3a_pool_bn',
|
||||
'inception_3a_5x5_conv1', 'inception_3a_5x5_conv2', 'inception_3a_5x5_bn1', 'inception_3a_5x5_bn2',
|
||||
'inception_3a_3x3_conv1', 'inception_3a_3x3_conv2', 'inception_3a_3x3_bn1', 'inception_3a_3x3_bn2',
|
||||
'inception_3b_3x3_conv1', 'inception_3b_3x3_conv2', 'inception_3b_3x3_bn1', 'inception_3b_3x3_bn2',
|
||||
'inception_3b_5x5_conv1', 'inception_3b_5x5_conv2', 'inception_3b_5x5_bn1', 'inception_3b_5x5_bn2',
|
||||
'inception_3b_pool_conv', 'inception_3b_pool_bn',
|
||||
'inception_3b_1x1_conv', 'inception_3b_1x1_bn',
|
||||
'inception_3c_3x3_conv1', 'inception_3c_3x3_conv2', 'inception_3c_3x3_bn1', 'inception_3c_3x3_bn2',
|
||||
'inception_3c_5x5_conv1', 'inception_3c_5x5_conv2', 'inception_3c_5x5_bn1', 'inception_3c_5x5_bn2',
|
||||
'inception_4a_3x3_conv1', 'inception_4a_3x3_conv2', 'inception_4a_3x3_bn1', 'inception_4a_3x3_bn2',
|
||||
'inception_4a_5x5_conv1', 'inception_4a_5x5_conv2', 'inception_4a_5x5_bn1', 'inception_4a_5x5_bn2',
|
||||
'inception_4a_pool_conv', 'inception_4a_pool_bn',
|
||||
'inception_4a_1x1_conv', 'inception_4a_1x1_bn',
|
||||
'inception_4e_3x3_conv1', 'inception_4e_3x3_conv2', 'inception_4e_3x3_bn1', 'inception_4e_3x3_bn2',
|
||||
'inception_4e_5x5_conv1', 'inception_4e_5x5_conv2', 'inception_4e_5x5_bn1', 'inception_4e_5x5_bn2',
|
||||
'inception_5a_3x3_conv1', 'inception_5a_3x3_conv2', 'inception_5a_3x3_bn1', 'inception_5a_3x3_bn2',
|
||||
'inception_5a_pool_conv', 'inception_5a_pool_bn',
|
||||
'inception_5a_1x1_conv', 'inception_5a_1x1_bn',
|
||||
'inception_5b_3x3_conv1', 'inception_5b_3x3_conv2', 'inception_5b_3x3_bn1', 'inception_5b_3x3_bn2',
|
||||
'inception_5b_pool_conv', 'inception_5b_pool_bn',
|
||||
'inception_5b_1x1_conv', 'inception_5b_1x1_bn',
|
||||
'dense_layer'
|
||||
]
|
||||
|
||||
conv_shape = {
|
||||
'conv1': [64, 3, 7, 7],
|
||||
'conv2': [64, 64, 1, 1],
|
||||
'conv3': [192, 64, 3, 3],
|
||||
'inception_3a_1x1_conv': [64, 192, 1, 1],
|
||||
'inception_3a_pool_conv': [32, 192, 1, 1],
|
||||
'inception_3a_5x5_conv1': [16, 192, 1, 1],
|
||||
'inception_3a_5x5_conv2': [32, 16, 5, 5],
|
||||
'inception_3a_3x3_conv1': [96, 192, 1, 1],
|
||||
'inception_3a_3x3_conv2': [128, 96, 3, 3],
|
||||
'inception_3b_3x3_conv1': [96, 256, 1, 1],
|
||||
'inception_3b_3x3_conv2': [128, 96, 3, 3],
|
||||
'inception_3b_5x5_conv1': [32, 256, 1, 1],
|
||||
'inception_3b_5x5_conv2': [64, 32, 5, 5],
|
||||
'inception_3b_pool_conv': [64, 256, 1, 1],
|
||||
'inception_3b_1x1_conv': [64, 256, 1, 1],
|
||||
'inception_3c_3x3_conv1': [128, 320, 1, 1],
|
||||
'inception_3c_3x3_conv2': [256, 128, 3, 3],
|
||||
'inception_3c_5x5_conv1': [32, 320, 1, 1],
|
||||
'inception_3c_5x5_conv2': [64, 32, 5, 5],
|
||||
'inception_4a_3x3_conv1': [96, 640, 1, 1],
|
||||
'inception_4a_3x3_conv2': [192, 96, 3, 3],
|
||||
'inception_4a_5x5_conv1': [32, 640, 1, 1,],
|
||||
'inception_4a_5x5_conv2': [64, 32, 5, 5],
|
||||
'inception_4a_pool_conv': [128, 640, 1, 1],
|
||||
'inception_4a_1x1_conv': [256, 640, 1, 1],
|
||||
'inception_4e_3x3_conv1': [160, 640, 1, 1],
|
||||
'inception_4e_3x3_conv2': [256, 160, 3, 3],
|
||||
'inception_4e_5x5_conv1': [64, 640, 1, 1],
|
||||
'inception_4e_5x5_conv2': [128, 64, 5, 5],
|
||||
'inception_5a_3x3_conv1': [96, 1024, 1, 1],
|
||||
'inception_5a_3x3_conv2': [384, 96, 3, 3],
|
||||
'inception_5a_pool_conv': [96, 1024, 1, 1],
|
||||
'inception_5a_1x1_conv': [256, 1024, 1, 1],
|
||||
'inception_5b_3x3_conv1': [96, 736, 1, 1],
|
||||
'inception_5b_3x3_conv2': [384, 96, 3, 3],
|
||||
'inception_5b_pool_conv': [96, 736, 1, 1],
|
||||
'inception_5b_1x1_conv': [256, 736, 1, 1],
|
||||
}
|
||||
|
||||
def load_weights():
|
||||
weightsDir = './weights'
|
||||
fileNames = filter(lambda f: not f.startswith('.'), os.listdir(weightsDir))
|
||||
paths = {}
|
||||
weights_dict = {}
|
||||
|
||||
for n in fileNames:
|
||||
paths[n.replace('.csv', '')] = weightsDir + '/' + n
|
||||
|
||||
for name in weights:
|
||||
if 'conv' in name:
|
||||
conv_w = genfromtxt(paths[name + '_w'], delimiter=',', dtype=None)
|
||||
conv_w = np.reshape(conv_w, conv_shape[name])
|
||||
conv_w = np.transpose(conv_w, (2, 3, 1, 0))
|
||||
conv_b = genfromtxt(paths[name + '_b'], delimiter=',', dtype=None)
|
||||
weights_dict[name] = [conv_w, conv_b]
|
||||
elif 'bn' in name:
|
||||
bn_w = genfromtxt(paths[name + '_w'], delimiter=',', dtype=None)
|
||||
bn_b = genfromtxt(paths[name + '_b'], delimiter=',', dtype=None)
|
||||
bn_m = genfromtxt(paths[name + '_m'], delimiter=',', dtype=None)
|
||||
bn_v = genfromtxt(paths[name + '_v'], delimiter=',', dtype=None)
|
||||
weights_dict[name] = [bn_w, bn_b, bn_m, bn_v]
|
||||
elif 'dense' in name:
|
||||
dense_w = genfromtxt(weightsDir+'/dense_w.csv', delimiter=',', dtype=None)
|
||||
dense_w = np.reshape(dense_w, (128, 736))
|
||||
dense_w = np.transpose(dense_w, (1, 0))
|
||||
dense_b = genfromtxt(weightsDir+'/dense_b.csv', delimiter=',', dtype=None)
|
||||
weights_dict[name] = [dense_w, dense_b]
|
||||
|
||||
return weights_dict
|
||||
220
Class/detection/model.py
Normal file
220
Class/detection/model.py
Normal file
@@ -0,0 +1,220 @@
|
||||
# -----------------------------------------------------------------------------------------
|
||||
# Code taken from https://github.com/iwantooxxoox/Keras-OpenFace (with minor modifications)
|
||||
# -----------------------------------------------------------------------------------------
|
||||
|
||||
from keras.layers import Conv2D, ZeroPadding2D, Activation, Input, concatenate
|
||||
from keras.layers.core import Lambda, Flatten, Dense
|
||||
from keras.layers.normalization import BatchNormalization
|
||||
from keras.layers.pooling import MaxPooling2D, AveragePooling2D
|
||||
from keras.models import Model
|
||||
from keras import backend as K
|
||||
|
||||
import facenet_utils
|
||||
from facenet_utils import LRN2D
|
||||
|
||||
def create_model():
|
||||
myInput = Input(shape=(96, 96, 3))
|
||||
|
||||
x = ZeroPadding2D(padding=(3, 3), input_shape=(96, 96, 3))(myInput)
|
||||
x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x)
|
||||
x = BatchNormalization(axis=3, epsilon=0.00001, name='bn1')(x)
|
||||
x = Activation('relu')(x)
|
||||
x = ZeroPadding2D(padding=(1, 1))(x)
|
||||
x = MaxPooling2D(pool_size=3, strides=2)(x)
|
||||
x = Lambda(LRN2D, name='lrn_1')(x)
|
||||
x = Conv2D(64, (1, 1), name='conv2')(x)
|
||||
x = BatchNormalization(axis=3, epsilon=0.00001, name='bn2')(x)
|
||||
x = Activation('relu')(x)
|
||||
x = ZeroPadding2D(padding=(1, 1))(x)
|
||||
x = Conv2D(192, (3, 3), name='conv3')(x)
|
||||
x = BatchNormalization(axis=3, epsilon=0.00001, name='bn3')(x)
|
||||
x = Activation('relu')(x)
|
||||
x = Lambda(LRN2D, name='lrn_2')(x)
|
||||
x = ZeroPadding2D(padding=(1, 1))(x)
|
||||
x = MaxPooling2D(pool_size=3, strides=2)(x)
|
||||
|
||||
# Inception3a
|
||||
inception_3a_3x3 = Conv2D(96, (1, 1), name='inception_3a_3x3_conv1')(x)
|
||||
inception_3a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_3x3_bn1')(inception_3a_3x3)
|
||||
inception_3a_3x3 = Activation('relu')(inception_3a_3x3)
|
||||
inception_3a_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3a_3x3)
|
||||
inception_3a_3x3 = Conv2D(128, (3, 3), name='inception_3a_3x3_conv2')(inception_3a_3x3)
|
||||
inception_3a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_3x3_bn2')(inception_3a_3x3)
|
||||
inception_3a_3x3 = Activation('relu')(inception_3a_3x3)
|
||||
|
||||
inception_3a_5x5 = Conv2D(16, (1, 1), name='inception_3a_5x5_conv1')(x)
|
||||
inception_3a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_5x5_bn1')(inception_3a_5x5)
|
||||
inception_3a_5x5 = Activation('relu')(inception_3a_5x5)
|
||||
inception_3a_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3a_5x5)
|
||||
inception_3a_5x5 = Conv2D(32, (5, 5), name='inception_3a_5x5_conv2')(inception_3a_5x5)
|
||||
inception_3a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_5x5_bn2')(inception_3a_5x5)
|
||||
inception_3a_5x5 = Activation('relu')(inception_3a_5x5)
|
||||
|
||||
inception_3a_pool = MaxPooling2D(pool_size=3, strides=2)(x)
|
||||
inception_3a_pool = Conv2D(32, (1, 1), name='inception_3a_pool_conv')(inception_3a_pool)
|
||||
inception_3a_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_pool_bn')(inception_3a_pool)
|
||||
inception_3a_pool = Activation('relu')(inception_3a_pool)
|
||||
inception_3a_pool = ZeroPadding2D(padding=((3, 4), (3, 4)))(inception_3a_pool)
|
||||
|
||||
inception_3a_1x1 = Conv2D(64, (1, 1), name='inception_3a_1x1_conv')(x)
|
||||
inception_3a_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_1x1_bn')(inception_3a_1x1)
|
||||
inception_3a_1x1 = Activation('relu')(inception_3a_1x1)
|
||||
|
||||
inception_3a = concatenate([inception_3a_3x3, inception_3a_5x5, inception_3a_pool, inception_3a_1x1], axis=3)
|
||||
|
||||
# Inception3b
|
||||
inception_3b_3x3 = Conv2D(96, (1, 1), name='inception_3b_3x3_conv1')(inception_3a)
|
||||
inception_3b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_3x3_bn1')(inception_3b_3x3)
|
||||
inception_3b_3x3 = Activation('relu')(inception_3b_3x3)
|
||||
inception_3b_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3b_3x3)
|
||||
inception_3b_3x3 = Conv2D(128, (3, 3), name='inception_3b_3x3_conv2')(inception_3b_3x3)
|
||||
inception_3b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_3x3_bn2')(inception_3b_3x3)
|
||||
inception_3b_3x3 = Activation('relu')(inception_3b_3x3)
|
||||
|
||||
inception_3b_5x5 = Conv2D(32, (1, 1), name='inception_3b_5x5_conv1')(inception_3a)
|
||||
inception_3b_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_5x5_bn1')(inception_3b_5x5)
|
||||
inception_3b_5x5 = Activation('relu')(inception_3b_5x5)
|
||||
inception_3b_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3b_5x5)
|
||||
inception_3b_5x5 = Conv2D(64, (5, 5), name='inception_3b_5x5_conv2')(inception_3b_5x5)
|
||||
inception_3b_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_5x5_bn2')(inception_3b_5x5)
|
||||
inception_3b_5x5 = Activation('relu')(inception_3b_5x5)
|
||||
|
||||
inception_3b_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_3a)
|
||||
inception_3b_pool = Conv2D(64, (1, 1), name='inception_3b_pool_conv')(inception_3b_pool)
|
||||
inception_3b_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_pool_bn')(inception_3b_pool)
|
||||
inception_3b_pool = Activation('relu')(inception_3b_pool)
|
||||
inception_3b_pool = ZeroPadding2D(padding=(4, 4))(inception_3b_pool)
|
||||
|
||||
inception_3b_1x1 = Conv2D(64, (1, 1), name='inception_3b_1x1_conv')(inception_3a)
|
||||
inception_3b_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_1x1_bn')(inception_3b_1x1)
|
||||
inception_3b_1x1 = Activation('relu')(inception_3b_1x1)
|
||||
|
||||
inception_3b = concatenate([inception_3b_3x3, inception_3b_5x5, inception_3b_pool, inception_3b_1x1], axis=3)
|
||||
|
||||
# Inception3c
|
||||
inception_3c_3x3 = facenet_utils.conv2d_bn(inception_3b,
|
||||
layer='inception_3c_3x3',
|
||||
cv1_out=128,
|
||||
cv1_filter=(1, 1),
|
||||
cv2_out=256,
|
||||
cv2_filter=(3, 3),
|
||||
cv2_strides=(2, 2),
|
||||
padding=(1, 1))
|
||||
|
||||
inception_3c_5x5 = facenet_utils.conv2d_bn(inception_3b,
|
||||
layer='inception_3c_5x5',
|
||||
cv1_out=32,
|
||||
cv1_filter=(1, 1),
|
||||
cv2_out=64,
|
||||
cv2_filter=(5, 5),
|
||||
cv2_strides=(2, 2),
|
||||
padding=(2, 2))
|
||||
|
||||
inception_3c_pool = MaxPooling2D(pool_size=3, strides=2)(inception_3b)
|
||||
inception_3c_pool = ZeroPadding2D(padding=((0, 1), (0, 1)))(inception_3c_pool)
|
||||
|
||||
inception_3c = concatenate([inception_3c_3x3, inception_3c_5x5, inception_3c_pool], axis=3)
|
||||
|
||||
#inception 4a
|
||||
inception_4a_3x3 = facenet_utils.conv2d_bn(inception_3c,
|
||||
layer='inception_4a_3x3',
|
||||
cv1_out=96,
|
||||
cv1_filter=(1, 1),
|
||||
cv2_out=192,
|
||||
cv2_filter=(3, 3),
|
||||
cv2_strides=(1, 1),
|
||||
padding=(1, 1))
|
||||
inception_4a_5x5 = facenet_utils.conv2d_bn(inception_3c,
|
||||
layer='inception_4a_5x5',
|
||||
cv1_out=32,
|
||||
cv1_filter=(1, 1),
|
||||
cv2_out=64,
|
||||
cv2_filter=(5, 5),
|
||||
cv2_strides=(1, 1),
|
||||
padding=(2, 2))
|
||||
|
||||
inception_4a_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_3c)
|
||||
inception_4a_pool = facenet_utils.conv2d_bn(inception_4a_pool,
|
||||
layer='inception_4a_pool',
|
||||
cv1_out=128,
|
||||
cv1_filter=(1, 1),
|
||||
padding=(2, 2))
|
||||
inception_4a_1x1 = facenet_utils.conv2d_bn(inception_3c,
|
||||
layer='inception_4a_1x1',
|
||||
cv1_out=256,
|
||||
cv1_filter=(1, 1))
|
||||
inception_4a = concatenate([inception_4a_3x3, inception_4a_5x5, inception_4a_pool, inception_4a_1x1], axis=3)
|
||||
|
||||
#inception4e
|
||||
inception_4e_3x3 = facenet_utils.conv2d_bn(inception_4a,
|
||||
layer='inception_4e_3x3',
|
||||
cv1_out=160,
|
||||
cv1_filter=(1, 1),
|
||||
cv2_out=256,
|
||||
cv2_filter=(3, 3),
|
||||
cv2_strides=(2, 2),
|
||||
padding=(1, 1))
|
||||
inception_4e_5x5 = facenet_utils.conv2d_bn(inception_4a,
|
||||
layer='inception_4e_5x5',
|
||||
cv1_out=64,
|
||||
cv1_filter=(1, 1),
|
||||
cv2_out=128,
|
||||
cv2_filter=(5, 5),
|
||||
cv2_strides=(2, 2),
|
||||
padding=(2, 2))
|
||||
inception_4e_pool = MaxPooling2D(pool_size=3, strides=2)(inception_4a)
|
||||
inception_4e_pool = ZeroPadding2D(padding=((0, 1), (0, 1)))(inception_4e_pool)
|
||||
|
||||
inception_4e = concatenate([inception_4e_3x3, inception_4e_5x5, inception_4e_pool], axis=3)
|
||||
|
||||
#inception5a
|
||||
inception_5a_3x3 = facenet_utils.conv2d_bn(inception_4e,
|
||||
layer='inception_5a_3x3',
|
||||
cv1_out=96,
|
||||
cv1_filter=(1, 1),
|
||||
cv2_out=384,
|
||||
cv2_filter=(3, 3),
|
||||
cv2_strides=(1, 1),
|
||||
padding=(1, 1))
|
||||
|
||||
inception_5a_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_4e)
|
||||
inception_5a_pool = facenet_utils.conv2d_bn(inception_5a_pool,
|
||||
layer='inception_5a_pool',
|
||||
cv1_out=96,
|
||||
cv1_filter=(1, 1),
|
||||
padding=(1, 1))
|
||||
inception_5a_1x1 = facenet_utils.conv2d_bn(inception_4e,
|
||||
layer='inception_5a_1x1',
|
||||
cv1_out=256,
|
||||
cv1_filter=(1, 1))
|
||||
|
||||
inception_5a = concatenate([inception_5a_3x3, inception_5a_pool, inception_5a_1x1], axis=3)
|
||||
|
||||
#inception_5b
|
||||
inception_5b_3x3 = facenet_utils.conv2d_bn(inception_5a,
|
||||
layer='inception_5b_3x3',
|
||||
cv1_out=96,
|
||||
cv1_filter=(1, 1),
|
||||
cv2_out=384,
|
||||
cv2_filter=(3, 3),
|
||||
cv2_strides=(1, 1),
|
||||
padding=(1, 1))
|
||||
inception_5b_pool = MaxPooling2D(pool_size=3, strides=2)(inception_5a)
|
||||
inception_5b_pool = facenet_utils.conv2d_bn(inception_5b_pool,
|
||||
layer='inception_5b_pool',
|
||||
cv1_out=96,
|
||||
cv1_filter=(1, 1))
|
||||
inception_5b_pool = ZeroPadding2D(padding=(1, 1))(inception_5b_pool)
|
||||
|
||||
inception_5b_1x1 = facenet_utils.conv2d_bn(inception_5a,
|
||||
layer='inception_5b_1x1',
|
||||
cv1_out=256,
|
||||
cv1_filter=(1, 1))
|
||||
inception_5b = concatenate([inception_5b_3x3, inception_5b_pool, inception_5b_1x1], axis=3)
|
||||
|
||||
av_pool = AveragePooling2D(pool_size=(3, 3), strides=(1, 1))(inception_5b)
|
||||
reshape_layer = Flatten()(av_pool)
|
||||
dense_layer = Dense(128, name='dense_layer')(reshape_layer)
|
||||
norm_layer = Lambda(lambda x: K.l2_normalize(x, axis=1), name='norm_layer')(dense_layer)
|
||||
|
||||
return Model(inputs=[myInput], outputs=norm_layer)
|
||||
Reference in New Issue
Block a user