diff --git a/Class/collect_data/face_reco_from_camera.py b/Class/collect_data/face_reco_from_camera.py index 05ad724..6ccbd06 100644 --- a/Class/collect_data/face_reco_from_camera.py +++ b/Class/collect_data/face_reco_from_camera.py @@ -6,8 +6,11 @@ import cv2 import pandas as pd import os import time +import facenet from PIL import Image, ImageDraw, ImageFont +from model import create_model + start_time = 0 # 1. Dlib 正向人脸检测器 # detector = dlib.get_frontal_face_detector() @@ -22,6 +25,9 @@ predictor = dlib.shape_predictor('data/data_dlib/shape_predictor_68_face_landmar # 3. Dlib Resnet 人脸识别模型,提取 128D 的特征矢量 face_reco_model = dlib.face_recognition_model_v1("data/data_dlib/dlib_face_recognition_resnet_model_v1.dat") +nn4_small2_pretrained = create_model() +nn4_small2_pretrained.load_weights('weights/nn4.small2.v1.h5') + class Face_Recognizer: def __init__(self): @@ -32,6 +38,9 @@ class Face_Recognizer: self.name_known_cnt = 0 self.name_known_list = [] + self.metadata = [] + self.embedded = [] + # 存储当前摄像头中捕获到的所有人脸的坐标名字 self.pos_camera_list = [] self.name_camera_list = [] @@ -46,38 +55,39 @@ class Face_Recognizer: # 从 "features_all.csv" 读取录入人脸特征 def get_face_database(self): - if os.path.exists("data/features_all.csv"): - path_features_known_csv = "data/features_all.csv" - csv_rd = pd.read_csv(path_features_known_csv, header=None) - # 2. 读取已知人脸数据 - for i in range(csv_rd.shape[0]): - features_someone_arr = [] - for j in range(0, 128): - if csv_rd.iloc[i][j] == '': - features_someone_arr.append('0') - else: - features_someone_arr.append(csv_rd.iloc[i][j]) - self.features_known_list.append(features_someone_arr) - self.name_known_list.append("Person_" + str(i + 1)) - self.name_known_cnt = len(self.name_known_list) - print("Faces in Database:", len(self.features_known_list)) + if os.path.exists("data/data_faces_from_camera/"): + self.metadata = facenet.load_metadata("data/data_faces_from_camera/") + self.name_known_cnt = self.metadata.shape[0] + self.embedded = np.zeros((self.metadata.shape[0], 128)) + + for i, m in enumerate(self.metadata): + for j, n in enumerate(m): + img = facenet.load_image(n.image_path()) + # img = align_image(img) + img = cv2.resize(img, (96, 96)) + # scale RGB values to interval [0,1] + img = (img / 255.).astype(np.float32) + # obtain embedding vector for image + self.embedded[i] = nn4_small2_pretrained.predict(np.expand_dims(img, axis=0))[0] + # self.embedded[i] = self.embedded[i] / len(m) + self.name_known_list.append('') return 1 else: print('##### Warning #####', '\n') print("'features_all.csv' not found!") print( - "Please run 'get_faces_from_camera.py' and 'features_extraction_to_csv.py' before 'face_reco_from_camera.py'", + "Please run 'get_faces_from_camera.py' before 'face_reco_from_camera.py'", '\n') print('##### End Warning #####') return 0 # 计算两个128D向量间的欧式距离 - @staticmethod - def return_euclidean_distance(feature_1, feature_2): - feature_1 = np.array(feature_1) - feature_2 = np.array(feature_2) - dist = np.sqrt(np.sum((feature_1 - feature_2) ** 2)) - return dist + # @staticmethod + # def return_euclidean_distance(feature_1, feature_2): + # feature_1 = np.array(feature_1) + # feature_2 = np.array(feature_2) + # dist = np.sqrt(np.sum((feature_1 - feature_2) ** 2)) + # return dist # 更新 FPS def update_fps(self): @@ -111,8 +121,8 @@ class Face_Recognizer: # Default known name: person_1, person_2, person_3 self.name_known_list[0] = '唐麒'.encode('utf-8').decode() self.name_known_list[1] = '段海燕'.encode('utf-8').decode() - # self.name_known_list[2] ='xx'.encode('utf-8').decode() - # self.name_known_list[3] ='xx'.encode('utf-8').decode() + # self.name_known_list[2] = '唐保生'.encode('utf-8').decode() + # self.name_known_list[3] = '唐麒'.encode('utf-8').decode() # self.name_known_list[4] ='xx'.encode('utf-8').decode() # 处理获取的视频流,进行人脸识别 @@ -142,18 +152,18 @@ class Face_Recognizer: # 2. 检测到人脸 if faces.shape[2] != 0: # 3. 获取当前捕获到的图像的所有人脸的特征,存储到 self.features_camera_list - for i in range(0, faces.shape[2]): - confidence = faces[0, 0, i, 2] - - # filter out weak detections by ensuring the `confidence` is - # greater than the minimum confidence - if confidence < 0.5: - continue - box = faces[0, 0, i, 3:7] * np.array([w, h, w, h]) - (startX, startY, endX, endY) = box.astype("int") - rect = dlib.rectangle(startX, startY, endX, endY) - shape = predictor(img_rd, rect) - self.features_camera_list.append(face_reco_model.compute_face_descriptor(img_rd, shape)) + # for i in range(0, faces.shape[2]): + # confidence = faces[0, 0, i, 2] + # + # # filter out weak detections by ensuring the `confidence` is + # # greater than the minimum confidence + # if confidence < 0.5: + # continue + # box = faces[0, 0, i, 3:7] * np.array([w, h, w, h]) + # (startX, startY, endX, endY) = box.astype("int") + # rect = dlib.rectangle(startX, startY, endX, endY) + # shape = predictor(img_rd, rect) + # self.features_camera_list.append(face_reco_model.compute_face_descriptor(img_rd, shape)) # 4. 遍历捕获到的图像中所有的人脸 for k in range(0, faces.shape[2]): @@ -164,13 +174,13 @@ class Face_Recognizer: # greater than the minimum confidence if confidence < 0.5: continue - self.faces_cnt+=1 + self.faces_cnt += 1 # print("##### camera person", k + 1, "#####") # 让人名跟随在矩形框的上方 # 确定人名的位置坐标 # 先默认所有人不认识,是 unknown # Set the default names of faces with "unknown" - self.name_camera_list.append("unknown") + self.name_camera_list.append("陌生人") # 每个捕获人脸的名字坐标 box = faces[0, 0, k, 3:7] * np.array([w, h, w, h]) @@ -178,25 +188,39 @@ class Face_Recognizer: self.pos_camera_list.append(tuple( [int(startX + 5), int(startY - 30)])) + height = (endY - startY) + width = (endX - startX) + + img_blank = np.zeros((height, width, 3), np.uint8) + for ii in range(height): + for jj in range(width): + img_blank[ii][jj] = img_rd[startY + ii][startX + jj] + + img = cv2.resize(img_blank, (96, 96)) + img = (img / 255.).astype(np.float32) + img = nn4_small2_pretrained.predict(np.expand_dims(img, axis=0))[0] + # 5. 对于某张人脸,遍历所有存储的人脸特征 e_distance_list = [] - for i in range(len(self.features_known_list)): - # 如果 person_X 数据不为空 - if str(self.features_known_list[i][0]) != '0.0': - # print("with person", str(i + 1), "the e distance: ", end='') - e_distance_tmp = self.return_euclidean_distance(self.features_camera_list[k], - self.features_known_list[i]) - # print(e_distance_tmp) - e_distance_list.append(e_distance_tmp) - else: - # 空数据 person_X - e_distance_list.append(999999999) - # 6. 寻找出最小的欧式距离匹配 + for i in range(0, len(self.embedded)): + e_distance_list.append(facenet.distance(self.embedded[i], img)) + # for i in range(len(self.features_known_list)): + # # 如果 person_X 数据不为空 + # if str(self.features_known_list[i][0]) != '0.0': + # # print("with person", str(i + 1), "the e distance: ", end='') + # e_distance_tmp = self.return_euclidean_distance(self.features_camera_list[k], + # self.features_known_list[i]) + # # print(e_distance_tmp) + # e_distance_list.append(e_distance_tmp) + # else: + # # 空数据 person_X + # e_distance_list.append(999999999) + # # 6. 寻找出最小的欧式距离匹配 similar_person_num = e_distance_list.index(min(e_distance_list)) # print("Minimum e distance with person", self.name_known_list[similar_person_num]) - - if min(e_distance_list) < 1: - self.name_camera_list[k] = self.name_known_list[similar_person_num] + # print(min(e_distance_list)) + if min(e_distance_list) < 0.58: + self.name_camera_list[k] = self.name_known_list[similar_person_num % 8] # print("May be person " + str(self.name_known_list[similar_person_num])) else: pass @@ -205,10 +229,16 @@ class Face_Recognizer: # 矩形框 for kk, d in enumerate(faces): # 绘制矩形框 - cv2.rectangle(img_rd, tuple([startX, startY]), tuple([endX, endY]), - (0, 255, 0), 2) - cv2.rectangle(img_rd, tuple([startX, startY - 35]), tuple([endX, startY]), - (0, 255, 0), cv2.FILLED) + if self.name_camera_list[k] != '陌生人': + cv2.rectangle(img_rd, tuple([startX, startY]), tuple([endX, endY]), + (0, 255, 0), 2) + cv2.rectangle(img_rd, tuple([startX, startY - 35]), tuple([endX, startY]), + (0, 255, 0), cv2.FILLED) + else: + cv2.rectangle(img_rd, tuple([startX, startY]), tuple([endX, endY]), + (0, 0, 255), 2) + cv2.rectangle(img_rd, tuple([startX, startY - 35]), tuple([endX, startY]), + (0, 0, 255), cv2.FILLED) # print('\n') # self.faces_cnt = faces.shape[2] # if len(self.name_camera_list) > 0: @@ -226,7 +256,7 @@ class Face_Recognizer: cv2.imshow("camera", img_with_name) # 9. 更新 FPS / Update stream FPS - # self.update_fps() + self.update_fps() # OpenCV 调用摄像头并进行 process def run(self): diff --git a/Class/collect_data/get_faces_from_camera.py b/Class/collect_data/get_faces_from_camera.py index 55ae139..e2e93d6 100644 --- a/Class/collect_data/get_faces_from_camera.py +++ b/Class/collect_data/get_faces_from_camera.py @@ -195,7 +195,8 @@ class Face_Register: color_rectangle, 2) # 7. 根据人脸大小生成空的图像 - img_blank = np.zeros((int(height * 2), width * 2, 3), np.uint8) + # img_blank = np.zeros((int(height * 2), width * 2, 3), np.uint8) + img_blank = np.zeros((height, width, 3), np.uint8) if save_flag: # 8. 按下 's' 保存摄像头中的人脸到本地 @@ -205,9 +206,9 @@ class Face_Register: self.ss_cnt += 1 if self.index <= 7: - for ii in range(height * 2): - for jj in range(width * 2): - img_blank[ii][jj] = img_rd[startY - hh + ii][startX - ww + jj] + for ii in range(height): + for jj in range(width): + img_blank[ii][jj] = img_rd[startY + ii][startX + jj] cv2.imwrite(current_face_dir + "/img_face_" + str(self.ss_cnt) + ".jpg", img_blank) print("写入本地 / Save into:", str(current_face_dir) + "/img_face_" + str(self.ss_cnt) + ".jpg") diff --git a/Class/detection/Calibration.py b/Class/detection/Calibration.py index 6886d9f..e487250 100644 --- a/Class/detection/Calibration.py +++ b/Class/detection/Calibration.py @@ -4,10 +4,14 @@ import time import cv2 import numpy as np +from auto_whiteBalance import aug + from GFmatrix import GF np.seterr(invalid='ignore') +# config = {'morning': [80, 220, 125, 125]} + class Calibration: def __init__(self): @@ -26,8 +30,9 @@ class Calibration: def update_fps(self): now = time.time() self.frame_time = now - self.frame_start_time - self.fps = 1.0 / self.frame_time - self.frame_start_time = now + if self.frame_time != 0: + self.fps = 1.0 / self.frame_time + self.frame_start_time = now # 生成的 cv2 window 上面添加说明文字 def draw_note(self, img_rd): @@ -244,20 +249,20 @@ class Calibration: def decode(self, frame, feature_point): points = [] position = [] - + frame=aug(frame) color_map = np.zeros((frame.shape[0], frame.shape[1])) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB) l, a, b = cv2.split(frame) for i in range(0, frame.shape[0]): for j in range(0, frame.shape[1]): - if l[i][j] < 50: + if l[i][j] < 100: color_map[i][j] = 3 frame[i][j] = np.array([0, 0, 0]) - elif l[i][j] > 140: + elif l[i][j] > 220: frame[i][j] = np.array([255, 255, 255]) color_map[i][j] = 255 else: - if b[i][j] < 125: + if b[i][j] < 120: color_map[i][j] = 0 frame[i][j] = np.array([255, 0, 0]) else: @@ -368,23 +373,25 @@ class Calibration: (feature_points[index + 1][1] * 2 - feature_points[index][1] * 2) ** 2) scale = world_distance / pixel_distance - + print(pixel_distance) + print(feature_points[index + 1][0] - feature_points[index][0], + feature_points[index + 1][1] - feature_points[index][1]) # print(distance) # for i in range(index - 1, index + 2): # print(distance[i]) # 绘制特征点 - # point_size = 1 - # point_color = (0, 0, 255) - # thickness = 0 # 可以为 0 、4、8 + point_size = 1 + point_color = (0, 0, 255) + thickness = 0 # 可以为 0 、4、8 # for i in range(0, len(featurepoints_position)): # cv2.circle(img_rd, (int(featurepoints_position[i][1]), int(featurepoints_position[i][0])), # point_size, point_color, thickness) - # for point in featurepoints_position: - # cv2.circle(img_rd, (int(point[1]), int(point[0])), point_size, point_color, thickness) - # cv2.namedWindow("image") - # cv2.imshow('image', img_rd) - # cv2.waitKey(0) # 按0退出 + for point in featurepoints_position: + cv2.circle(img_rd, (int(point[1]), int(point[0])), point_size, point_color, thickness) + cv2.namedWindow("image") + cv2.imshow('image', img_rd) + cv2.waitKey(0) # 按0退出 return scale self.draw_note(img_rd) self.update_fps() diff --git a/Class/detection/align.py b/Class/detection/align.py new file mode 100644 index 0000000..914e996 --- /dev/null +++ b/Class/detection/align.py @@ -0,0 +1,188 @@ +# Copyright 2015-2016 Carnegie Mellon University +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Module for dlib-based alignment.""" + +import cv2 +import dlib +import numpy as np + +TEMPLATE = np.float32([ + (0.0792396913815, 0.339223741112), (0.0829219487236, 0.456955367943), + (0.0967927109165, 0.575648016728), (0.122141515615, 0.691921601066), + (0.168687863544, 0.800341263616), (0.239789390707, 0.895732504778), + (0.325662452515, 0.977068762493), (0.422318282013, 1.04329000149), + (0.531777802068, 1.06080371126), (0.641296298053, 1.03981924107), + (0.738105872266, 0.972268833998), (0.824444363295, 0.889624082279), + (0.894792677532, 0.792494155836), (0.939395486253, 0.681546643421), + (0.96111933829, 0.562238253072), (0.970579841181, 0.441758925744), + (0.971193274221, 0.322118743967), (0.163846223133, 0.249151738053), + (0.21780354657, 0.204255863861), (0.291299351124, 0.192367318323), + (0.367460241458, 0.203582210627), (0.4392945113, 0.233135599851), + (0.586445962425, 0.228141644834), (0.660152671635, 0.195923841854), + (0.737466449096, 0.182360984545), (0.813236546239, 0.192828009114), + (0.8707571886, 0.235293377042), (0.51534533827, 0.31863546193), + (0.516221448289, 0.396200446263), (0.517118861835, 0.473797687758), + (0.51816430343, 0.553157797772), (0.433701156035, 0.604054457668), + (0.475501237769, 0.62076344024), (0.520712933176, 0.634268222208), + (0.565874114041, 0.618796581487), (0.607054002672, 0.60157671656), + (0.252418718401, 0.331052263829), (0.298663015648, 0.302646354002), + (0.355749724218, 0.303020650651), (0.403718978315, 0.33867711083), + (0.352507175597, 0.349987615384), (0.296791759886, 0.350478978225), + (0.631326076346, 0.334136672344), (0.679073381078, 0.29645404267), + (0.73597236153, 0.294721285802), (0.782865376271, 0.321305281656), + (0.740312274764, 0.341849376713), (0.68499850091, 0.343734332172), + (0.353167761422, 0.746189164237), (0.414587777921, 0.719053835073), + (0.477677654595, 0.706835892494), (0.522732900812, 0.717092275768), + (0.569832064287, 0.705414478982), (0.635195811927, 0.71565572516), + (0.69951672331, 0.739419187253), (0.639447159575, 0.805236879972), + (0.576410514055, 0.835436670169), (0.525398405766, 0.841706377792), + (0.47641545769, 0.837505914975), (0.41379548902, 0.810045601727), + (0.380084785646, 0.749979603086), (0.477955996282, 0.74513234612), + (0.523389793327, 0.748924302636), (0.571057789237, 0.74332894691), + (0.672409137852, 0.744177032192), (0.572539621444, 0.776609286626), + (0.5240106503, 0.783370783245), (0.477561227414, 0.778476346951)]) + +TPL_MIN, TPL_MAX = np.min(TEMPLATE, axis=0), np.max(TEMPLATE, axis=0) +MINMAX_TEMPLATE = (TEMPLATE - TPL_MIN) / (TPL_MAX - TPL_MIN) + + +class AlignDlib: + """ + Use `dlib's landmark estimation `_ to align faces. + + The alignment preprocess faces for input into a neural network. + Faces are resized to the same size (such as 96x96) and transformed + to make landmarks (such as the eyes and nose) appear at the same + location on every image. + + Normalized landmarks: + + .. image:: ../images/dlib-landmark-mean.png + """ + + #: Landmark indices. + INNER_EYES_AND_BOTTOM_LIP = [39, 42, 57] + OUTER_EYES_AND_NOSE = [36, 45, 33] + + def __init__(self, facePredictor): + """ + Instantiate an 'AlignDlib' object. + + :param facePredictor: The path to dlib's + :type facePredictor: str + """ + assert facePredictor is not None + + self.detector = dlib.get_frontal_face_detector() + self.predictor = dlib.shape_predictor(facePredictor) + + def getAllFaceBoundingBoxes(self, rgbImg): + """ + Find all face bounding boxes in an image. + + :param rgbImg: RGB image to process. Shape: (height, width, 3) + :type rgbImg: numpy.ndarray + :return: All face bounding boxes in an image. + :rtype: dlib.rectangles + """ + assert rgbImg is not None + + try: + return self.detector(rgbImg, 1) + except Exception as e: + print("Warning: {}".format(e)) + # In rare cases, exceptions are thrown. + return [] + + def getLargestFaceBoundingBox(self, rgbImg, skipMulti=False): + """ + Find the largest face bounding box in an image. + + :param rgbImg: RGB image to process. Shape: (height, width, 3) + :type rgbImg: numpy.ndarray + :param skipMulti: Skip image if more than one face detected. + :type skipMulti: bool + :return: The largest face bounding box in an image, or None. + :rtype: dlib.rectangle + """ + assert rgbImg is not None + + faces = self.getAllFaceBoundingBoxes(rgbImg) + if (not skipMulti and len(faces) > 0) or len(faces) == 1: + return max(faces, key=lambda rect: rect.width() * rect.height()) + else: + return None + + def findLandmarks(self, rgbImg, bb): + """ + Find the landmarks of a face. + + :param rgbImg: RGB image to process. Shape: (height, width, 3) + :type rgbImg: numpy.ndarray + :param bb: Bounding box around the face to find landmarks for. + :type bb: dlib.rectangle + :return: Detected landmark locations. + :rtype: list of (x,y) tuples + """ + assert rgbImg is not None + assert bb is not None + + points = self.predictor(rgbImg, bb) + return list(map(lambda p: (p.x, p.y), points.parts())) + + def align(self, imgDim, rgbImg, bb=None, + landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP, + skipMulti=False): + r"""align(imgDim, rgbImg, bb=None, landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP) + + Transform and align a face in an image. + + :param imgDim: The edge length in pixels of the square the image is resized to. + :type imgDim: int + :param rgbImg: RGB image to process. Shape: (height, width, 3) + :type rgbImg: numpy.ndarray + :param bb: Bounding box around the face to align. \ + Defaults to the largest face. + :type bb: dlib.rectangle + :param landmarks: Detected landmark locations. \ + Landmarks found on `bb` if not provided. + :type landmarks: list of (x,y) tuples + :param landmarkIndices: The indices to transform to. + :type landmarkIndices: list of ints + :param skipMulti: Skip image if more than one face detected. + :type skipMulti: bool + :return: The aligned RGB image. Shape: (imgDim, imgDim, 3) + :rtype: numpy.ndarray + """ + assert imgDim is not None + assert rgbImg is not None + assert landmarkIndices is not None + + if bb is None: + bb = self.getLargestFaceBoundingBox(rgbImg, skipMulti) + if bb is None: + return + + if landmarks is None: + landmarks = self.findLandmarks(rgbImg, bb) + + npLandmarks = np.float32(landmarks) + npLandmarkIndices = np.array(landmarkIndices) + + H = cv2.getAffineTransform(npLandmarks[npLandmarkIndices], + imgDim * MINMAX_TEMPLATE[npLandmarkIndices]) + thumbnail = cv2.warpAffine(rgbImg, H, (imgDim, imgDim)) + + return thumbnail \ No newline at end of file diff --git a/Class/detection/auto_whiteBalance.py b/Class/detection/auto_whiteBalance.py new file mode 100644 index 0000000..7e59610 --- /dev/null +++ b/Class/detection/auto_whiteBalance.py @@ -0,0 +1,41 @@ +import numpy as np +import cv2 + + +def compute(img, min_percentile, max_percentile): + """计算分位点,目的是去掉图1的直方图两头的异常情况""" + max_percentile_pixel = np.percentile(img, max_percentile) + min_percentile_pixel = np.percentile(img, min_percentile) + + return max_percentile_pixel, min_percentile_pixel + + +def aug(src): + """图像亮度增强""" + if get_lightness(src) > 130: + print("图片亮度足够,不做增强") + # 先计算分位点,去掉像素值中少数异常值,这个分位点可以自己配置。 + # 比如1中直方图的红色在0到255上都有值,但是实际上像素值主要在0到20内。 + + + max_percentile_pixel, min_percentile_pixel = compute(src, 1, 99) + + # 去掉分位值区间之外的值 + src[src >= max_percentile_pixel] = max_percentile_pixel + src[src <= min_percentile_pixel] = min_percentile_pixel + + # 将分位值区间拉伸到0到255,这里取了255*0.1与255*0.9是因为可能会出现像素值溢出的情况,所以最好不要设置为0到255。 + out = np.zeros(src.shape, src.dtype) + cv2.normalize(src, out, 255 * 0.1, 255 * 0.9, cv2.NORM_MINMAX) + + return out + + +def get_lightness(src): + # 计算亮度 + hsv_image = cv2.cvtColor(src, cv2.COLOR_BGR2HSV) + lightness = hsv_image[:, :, 2].mean() + + return lightness + + diff --git a/Class/detection/facenet.py b/Class/detection/facenet.py new file mode 100644 index 0000000..8c98339 --- /dev/null +++ b/Class/detection/facenet.py @@ -0,0 +1,170 @@ +from model import create_model +from keras import backend as K +from keras.models import Model +from keras.layers import Input, Layer +from data import triplet_generator +import numpy as np +import os.path +import cv2 +from align import AlignDlib + +alignment = AlignDlib('data/data_dlib/landmarks.dat') + +class TripletLossLayer(Layer): + def __init__(self, alpha, **kwargs): + self.alpha = alpha + super(TripletLossLayer, self).__init__(**kwargs) + + def triplet_loss(self, inputs): + a, p, n = inputs + p_dist = K.sum(K.square(a - p), axis=-1) + n_dist = K.sum(K.square(a - n), axis=-1) + return K.sum(K.maximum(p_dist - n_dist + self.alpha, 0), axis=0) + + def call(self, inputs): + loss = self.triplet_loss(inputs) + self.add_loss(loss) + return loss + + +class IdentityMetadata(): + def __init__(self, base, name, file): + # dataset base directory + self.base = base + # identity name + self.name = name + # image file name + self.file = file + + def __repr__(self): + return self.image_path() + + def image_path(self): + return os.path.join(self.base, self.name, self.file) + + +def load_metadata(path): + metadata = [] + for i in sorted(os.listdir(path)): + person = [] + for f in sorted(os.listdir(os.path.join(path, i))): + # Check file extension. Allow only jpg/jpeg' files. + ext = os.path.splitext(f)[1] + if ext == '.jpg' or ext == '.jpeg': + person.append(IdentityMetadata(path, i, f)) + metadata.append(person) + return np.array(metadata,dtype=object) + + +def load_image(path): + img = cv2.imread(path, 1) + # OpenCV loads images with color channels + # in BGR order. So we need to reverse them + return img[..., ::-1] + + +def align_image(img): + return alignment.align(96, img, alignment.getLargestFaceBoundingBox(img), + landmarkIndices=AlignDlib.OUTER_EYES_AND_NOSE) + + +def distance(emb1, emb2): + return np.sum(np.square(emb1 - emb2)) + +# if __name__ == '__main__': +# # nn4_small2 = create_model() +# # +# # # Input for anchor, positive and negative images +# # in_a = Input(shape=(96, 96, 3)) +# # in_p = Input(shape=(96, 96, 3)) +# # in_n = Input(shape=(96, 96, 3)) +# # +# # # Output for anchor, positive and negative embedding vectors +# # # The nn4_small model instance is shared (Siamese network) +# # emb_a = nn4_small2(in_a) +# # emb_p = nn4_small2(in_p) +# # emb_n = nn4_small2(in_n) +# # +# # # Layer that computes the triplet loss from anchor, positive and negative embedding vectors +# # triplet_loss_layer = TripletLossLayer(alpha=0.2, name='triplet_loss_layer')([emb_a, emb_p, emb_n]) +# # +# # # Model that can be trained with anchor, positive negative images +# # nn4_small2_train = Model([in_a, in_p, in_n], triplet_loss_layer) +# # +# # # triplet_generator() creates a generator that continuously returns +# # # ([a_batch, p_batch, n_batch], None) tuples where a_batch, p_batch +# # # and n_batch are batches of anchor, positive and negative RGB images +# # # each having a shape of (batch_size, 96, 96, 3). +# # generator = triplet_generator() +# # +# # nn4_small2_train.compile(loss=None, optimizer='adam') +# # nn4_small2_train.fit_generator(generator, epochs=1, steps_per_epoch=100) +# +# # Please note that the current implementation of the generator only generates +# # random image data. The main goal of this code snippet is to demonstrate +# # the general setup for model training. In the following, we will anyway +# # use a pre-trained model so we don't need a generator here that operates +# # on real training data. I'll maybe provide a fully functional generator +# # later. +# nn4_small2_pretrained = create_model() +# nn4_small2_pretrained.load_weights('weights/nn4.small2.v1.h5') +# +# metadata = load_metadata('images') +# +# # Initialize the OpenFace face alignment utility +# +# +# # # Load an image of Jacques Chirac +# # jc_orig = load_image(metadata[78].image_path()) +# # +# # # Detect face and return bounding box +# # bb = alignment.getLargestFaceBoundingBox(jc_orig) +# # +# # # Transform image using specified face landmark indices and crop image to 96x96 +# # jc_aligned = alignment.align(96, jc_orig, bb, landmarkIndices=AlignDlib.OUTER_EYES_AND_NOSE) +# +# embedded = np.zeros((metadata.shape[0], 128)) +# +# for i, m in enumerate(metadata): +# img = load_image(m.image_path()) +# # img = align_image(img) +# img = cv2.resize(img, (96, 96)) +# # scale RGB values to interval [0,1] +# try: +# img = (img / 255.).astype(np.float32) +# # obtain embedding vector for image +# embedded[i] = nn4_small2_pretrained.predict(np.expand_dims(img, axis=0))[0] +# except: +# print(m.image_path) +# +# # show_pair(77, 78) +# # show_pair(77, 100) +# cap = cv2.VideoCapture(0) +# while cap.isOpened(): +# flag, frame = cap.read() +# kk = cv2.waitKey(1) +# # 按下 q 键退出 +# if kk == ord('q'): +# break +# else: +# try: +# # img = align_image(frame) +# frame = cv2.resize(frame, (96, 96)) +# img = (frame / 255.).astype(np.float32) +# img = nn4_small2_pretrained.predict(np.expand_dims(img, axis=0))[0] +# d = [] +# for i in range(0, len(embedded)): +# d.append(distance(embedded[i], img)) +# +# name = ['Person_2', 'tbs', 'Person_1'] +# +# print(name[d.index(min(d))]) +# # if d < 1: +# # print("same face") +# # else: +# # print("different face") +# except Exception as e: +# print(e) +# cv2.imshow("normal", frame) +# cap.release() +# cv2.destroyAllWindows() diff --git a/Class/detection/facenet_utils.py b/Class/detection/facenet_utils.py new file mode 100644 index 0000000..586ade1 --- /dev/null +++ b/Class/detection/facenet_utils.py @@ -0,0 +1,159 @@ +# ----------------------------------------------------------------------------------------- +# Code taken from https://github.com/iwantooxxoox/Keras-OpenFace (with minor modifications) +# ----------------------------------------------------------------------------------------- + +import tensorflow as tf +import numpy as np +import os + +from numpy import genfromtxt +from keras.layers import Conv2D, ZeroPadding2D, Activation +from keras.layers.normalization import BatchNormalization +from tensorflow.python.keras.backend import _get_session + +_FLOATX = 'float32' + +def variable(value, dtype=_FLOATX, name=None): + v = tf.Variable(np.asarray(value, dtype=dtype), name=name) + _get_session().run(v.initializer) + return v + +def shape(x): + return x.get_shape() + +def square(x): + return tf.square(x) + +def zeros(shape, dtype=_FLOATX, name=None): + return variable(np.zeros(shape), dtype, name) + +def concatenate(tensors, axis=-1): + if axis < 0: + axis = axis % len(tensors[0].get_shape()) + return tf.concat(axis, tensors) + +def LRN2D(x): + return tf.nn.lrn(x, alpha=1e-4, beta=0.75) + +def conv2d_bn( + x, + layer=None, + cv1_out=None, + cv1_filter=(1, 1), + cv1_strides=(1, 1), + cv2_out=None, + cv2_filter=(3, 3), + cv2_strides=(1, 1), + padding=None, +): + num = '' if cv2_out == None else '1' + tensor = Conv2D(cv1_out, cv1_filter, strides=cv1_strides, name=layer+'_conv'+num)(x) + tensor = BatchNormalization(axis=3, epsilon=0.00001, name=layer+'_bn'+num)(tensor) + tensor = Activation('relu')(tensor) + if padding == None: + return tensor + tensor = ZeroPadding2D(padding=padding)(tensor) + if cv2_out == None: + return tensor + tensor = Conv2D(cv2_out, cv2_filter, strides=cv2_strides, name=layer+'_conv'+'2')(tensor) + tensor = BatchNormalization(axis=3, epsilon=0.00001, name=layer+'_bn'+'2')(tensor) + tensor = Activation('relu')(tensor) + return tensor + +weights = [ + 'conv1', 'bn1', 'conv2', 'bn2', 'conv3', 'bn3', + 'inception_3a_1x1_conv', 'inception_3a_1x1_bn', + 'inception_3a_pool_conv', 'inception_3a_pool_bn', + 'inception_3a_5x5_conv1', 'inception_3a_5x5_conv2', 'inception_3a_5x5_bn1', 'inception_3a_5x5_bn2', + 'inception_3a_3x3_conv1', 'inception_3a_3x3_conv2', 'inception_3a_3x3_bn1', 'inception_3a_3x3_bn2', + 'inception_3b_3x3_conv1', 'inception_3b_3x3_conv2', 'inception_3b_3x3_bn1', 'inception_3b_3x3_bn2', + 'inception_3b_5x5_conv1', 'inception_3b_5x5_conv2', 'inception_3b_5x5_bn1', 'inception_3b_5x5_bn2', + 'inception_3b_pool_conv', 'inception_3b_pool_bn', + 'inception_3b_1x1_conv', 'inception_3b_1x1_bn', + 'inception_3c_3x3_conv1', 'inception_3c_3x3_conv2', 'inception_3c_3x3_bn1', 'inception_3c_3x3_bn2', + 'inception_3c_5x5_conv1', 'inception_3c_5x5_conv2', 'inception_3c_5x5_bn1', 'inception_3c_5x5_bn2', + 'inception_4a_3x3_conv1', 'inception_4a_3x3_conv2', 'inception_4a_3x3_bn1', 'inception_4a_3x3_bn2', + 'inception_4a_5x5_conv1', 'inception_4a_5x5_conv2', 'inception_4a_5x5_bn1', 'inception_4a_5x5_bn2', + 'inception_4a_pool_conv', 'inception_4a_pool_bn', + 'inception_4a_1x1_conv', 'inception_4a_1x1_bn', + 'inception_4e_3x3_conv1', 'inception_4e_3x3_conv2', 'inception_4e_3x3_bn1', 'inception_4e_3x3_bn2', + 'inception_4e_5x5_conv1', 'inception_4e_5x5_conv2', 'inception_4e_5x5_bn1', 'inception_4e_5x5_bn2', + 'inception_5a_3x3_conv1', 'inception_5a_3x3_conv2', 'inception_5a_3x3_bn1', 'inception_5a_3x3_bn2', + 'inception_5a_pool_conv', 'inception_5a_pool_bn', + 'inception_5a_1x1_conv', 'inception_5a_1x1_bn', + 'inception_5b_3x3_conv1', 'inception_5b_3x3_conv2', 'inception_5b_3x3_bn1', 'inception_5b_3x3_bn2', + 'inception_5b_pool_conv', 'inception_5b_pool_bn', + 'inception_5b_1x1_conv', 'inception_5b_1x1_bn', + 'dense_layer' +] + +conv_shape = { + 'conv1': [64, 3, 7, 7], + 'conv2': [64, 64, 1, 1], + 'conv3': [192, 64, 3, 3], + 'inception_3a_1x1_conv': [64, 192, 1, 1], + 'inception_3a_pool_conv': [32, 192, 1, 1], + 'inception_3a_5x5_conv1': [16, 192, 1, 1], + 'inception_3a_5x5_conv2': [32, 16, 5, 5], + 'inception_3a_3x3_conv1': [96, 192, 1, 1], + 'inception_3a_3x3_conv2': [128, 96, 3, 3], + 'inception_3b_3x3_conv1': [96, 256, 1, 1], + 'inception_3b_3x3_conv2': [128, 96, 3, 3], + 'inception_3b_5x5_conv1': [32, 256, 1, 1], + 'inception_3b_5x5_conv2': [64, 32, 5, 5], + 'inception_3b_pool_conv': [64, 256, 1, 1], + 'inception_3b_1x1_conv': [64, 256, 1, 1], + 'inception_3c_3x3_conv1': [128, 320, 1, 1], + 'inception_3c_3x3_conv2': [256, 128, 3, 3], + 'inception_3c_5x5_conv1': [32, 320, 1, 1], + 'inception_3c_5x5_conv2': [64, 32, 5, 5], + 'inception_4a_3x3_conv1': [96, 640, 1, 1], + 'inception_4a_3x3_conv2': [192, 96, 3, 3], + 'inception_4a_5x5_conv1': [32, 640, 1, 1,], + 'inception_4a_5x5_conv2': [64, 32, 5, 5], + 'inception_4a_pool_conv': [128, 640, 1, 1], + 'inception_4a_1x1_conv': [256, 640, 1, 1], + 'inception_4e_3x3_conv1': [160, 640, 1, 1], + 'inception_4e_3x3_conv2': [256, 160, 3, 3], + 'inception_4e_5x5_conv1': [64, 640, 1, 1], + 'inception_4e_5x5_conv2': [128, 64, 5, 5], + 'inception_5a_3x3_conv1': [96, 1024, 1, 1], + 'inception_5a_3x3_conv2': [384, 96, 3, 3], + 'inception_5a_pool_conv': [96, 1024, 1, 1], + 'inception_5a_1x1_conv': [256, 1024, 1, 1], + 'inception_5b_3x3_conv1': [96, 736, 1, 1], + 'inception_5b_3x3_conv2': [384, 96, 3, 3], + 'inception_5b_pool_conv': [96, 736, 1, 1], + 'inception_5b_1x1_conv': [256, 736, 1, 1], +} + +def load_weights(): + weightsDir = './weights' + fileNames = filter(lambda f: not f.startswith('.'), os.listdir(weightsDir)) + paths = {} + weights_dict = {} + + for n in fileNames: + paths[n.replace('.csv', '')] = weightsDir + '/' + n + + for name in weights: + if 'conv' in name: + conv_w = genfromtxt(paths[name + '_w'], delimiter=',', dtype=None) + conv_w = np.reshape(conv_w, conv_shape[name]) + conv_w = np.transpose(conv_w, (2, 3, 1, 0)) + conv_b = genfromtxt(paths[name + '_b'], delimiter=',', dtype=None) + weights_dict[name] = [conv_w, conv_b] + elif 'bn' in name: + bn_w = genfromtxt(paths[name + '_w'], delimiter=',', dtype=None) + bn_b = genfromtxt(paths[name + '_b'], delimiter=',', dtype=None) + bn_m = genfromtxt(paths[name + '_m'], delimiter=',', dtype=None) + bn_v = genfromtxt(paths[name + '_v'], delimiter=',', dtype=None) + weights_dict[name] = [bn_w, bn_b, bn_m, bn_v] + elif 'dense' in name: + dense_w = genfromtxt(weightsDir+'/dense_w.csv', delimiter=',', dtype=None) + dense_w = np.reshape(dense_w, (128, 736)) + dense_w = np.transpose(dense_w, (1, 0)) + dense_b = genfromtxt(weightsDir+'/dense_b.csv', delimiter=',', dtype=None) + weights_dict[name] = [dense_w, dense_b] + + return weights_dict diff --git a/Class/detection/model.py b/Class/detection/model.py new file mode 100644 index 0000000..ad29956 --- /dev/null +++ b/Class/detection/model.py @@ -0,0 +1,220 @@ +# ----------------------------------------------------------------------------------------- +# Code taken from https://github.com/iwantooxxoox/Keras-OpenFace (with minor modifications) +# ----------------------------------------------------------------------------------------- + +from keras.layers import Conv2D, ZeroPadding2D, Activation, Input, concatenate +from keras.layers.core import Lambda, Flatten, Dense +from keras.layers.normalization import BatchNormalization +from keras.layers.pooling import MaxPooling2D, AveragePooling2D +from keras.models import Model +from keras import backend as K + +import facenet_utils +from facenet_utils import LRN2D + +def create_model(): + myInput = Input(shape=(96, 96, 3)) + + x = ZeroPadding2D(padding=(3, 3), input_shape=(96, 96, 3))(myInput) + x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x) + x = BatchNormalization(axis=3, epsilon=0.00001, name='bn1')(x) + x = Activation('relu')(x) + x = ZeroPadding2D(padding=(1, 1))(x) + x = MaxPooling2D(pool_size=3, strides=2)(x) + x = Lambda(LRN2D, name='lrn_1')(x) + x = Conv2D(64, (1, 1), name='conv2')(x) + x = BatchNormalization(axis=3, epsilon=0.00001, name='bn2')(x) + x = Activation('relu')(x) + x = ZeroPadding2D(padding=(1, 1))(x) + x = Conv2D(192, (3, 3), name='conv3')(x) + x = BatchNormalization(axis=3, epsilon=0.00001, name='bn3')(x) + x = Activation('relu')(x) + x = Lambda(LRN2D, name='lrn_2')(x) + x = ZeroPadding2D(padding=(1, 1))(x) + x = MaxPooling2D(pool_size=3, strides=2)(x) + + # Inception3a + inception_3a_3x3 = Conv2D(96, (1, 1), name='inception_3a_3x3_conv1')(x) + inception_3a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_3x3_bn1')(inception_3a_3x3) + inception_3a_3x3 = Activation('relu')(inception_3a_3x3) + inception_3a_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3a_3x3) + inception_3a_3x3 = Conv2D(128, (3, 3), name='inception_3a_3x3_conv2')(inception_3a_3x3) + inception_3a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_3x3_bn2')(inception_3a_3x3) + inception_3a_3x3 = Activation('relu')(inception_3a_3x3) + + inception_3a_5x5 = Conv2D(16, (1, 1), name='inception_3a_5x5_conv1')(x) + inception_3a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_5x5_bn1')(inception_3a_5x5) + inception_3a_5x5 = Activation('relu')(inception_3a_5x5) + inception_3a_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3a_5x5) + inception_3a_5x5 = Conv2D(32, (5, 5), name='inception_3a_5x5_conv2')(inception_3a_5x5) + inception_3a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_5x5_bn2')(inception_3a_5x5) + inception_3a_5x5 = Activation('relu')(inception_3a_5x5) + + inception_3a_pool = MaxPooling2D(pool_size=3, strides=2)(x) + inception_3a_pool = Conv2D(32, (1, 1), name='inception_3a_pool_conv')(inception_3a_pool) + inception_3a_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_pool_bn')(inception_3a_pool) + inception_3a_pool = Activation('relu')(inception_3a_pool) + inception_3a_pool = ZeroPadding2D(padding=((3, 4), (3, 4)))(inception_3a_pool) + + inception_3a_1x1 = Conv2D(64, (1, 1), name='inception_3a_1x1_conv')(x) + inception_3a_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_1x1_bn')(inception_3a_1x1) + inception_3a_1x1 = Activation('relu')(inception_3a_1x1) + + inception_3a = concatenate([inception_3a_3x3, inception_3a_5x5, inception_3a_pool, inception_3a_1x1], axis=3) + + # Inception3b + inception_3b_3x3 = Conv2D(96, (1, 1), name='inception_3b_3x3_conv1')(inception_3a) + inception_3b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_3x3_bn1')(inception_3b_3x3) + inception_3b_3x3 = Activation('relu')(inception_3b_3x3) + inception_3b_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3b_3x3) + inception_3b_3x3 = Conv2D(128, (3, 3), name='inception_3b_3x3_conv2')(inception_3b_3x3) + inception_3b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_3x3_bn2')(inception_3b_3x3) + inception_3b_3x3 = Activation('relu')(inception_3b_3x3) + + inception_3b_5x5 = Conv2D(32, (1, 1), name='inception_3b_5x5_conv1')(inception_3a) + inception_3b_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_5x5_bn1')(inception_3b_5x5) + inception_3b_5x5 = Activation('relu')(inception_3b_5x5) + inception_3b_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3b_5x5) + inception_3b_5x5 = Conv2D(64, (5, 5), name='inception_3b_5x5_conv2')(inception_3b_5x5) + inception_3b_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_5x5_bn2')(inception_3b_5x5) + inception_3b_5x5 = Activation('relu')(inception_3b_5x5) + + inception_3b_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_3a) + inception_3b_pool = Conv2D(64, (1, 1), name='inception_3b_pool_conv')(inception_3b_pool) + inception_3b_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_pool_bn')(inception_3b_pool) + inception_3b_pool = Activation('relu')(inception_3b_pool) + inception_3b_pool = ZeroPadding2D(padding=(4, 4))(inception_3b_pool) + + inception_3b_1x1 = Conv2D(64, (1, 1), name='inception_3b_1x1_conv')(inception_3a) + inception_3b_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_1x1_bn')(inception_3b_1x1) + inception_3b_1x1 = Activation('relu')(inception_3b_1x1) + + inception_3b = concatenate([inception_3b_3x3, inception_3b_5x5, inception_3b_pool, inception_3b_1x1], axis=3) + + # Inception3c + inception_3c_3x3 = facenet_utils.conv2d_bn(inception_3b, + layer='inception_3c_3x3', + cv1_out=128, + cv1_filter=(1, 1), + cv2_out=256, + cv2_filter=(3, 3), + cv2_strides=(2, 2), + padding=(1, 1)) + + inception_3c_5x5 = facenet_utils.conv2d_bn(inception_3b, + layer='inception_3c_5x5', + cv1_out=32, + cv1_filter=(1, 1), + cv2_out=64, + cv2_filter=(5, 5), + cv2_strides=(2, 2), + padding=(2, 2)) + + inception_3c_pool = MaxPooling2D(pool_size=3, strides=2)(inception_3b) + inception_3c_pool = ZeroPadding2D(padding=((0, 1), (0, 1)))(inception_3c_pool) + + inception_3c = concatenate([inception_3c_3x3, inception_3c_5x5, inception_3c_pool], axis=3) + + #inception 4a + inception_4a_3x3 = facenet_utils.conv2d_bn(inception_3c, + layer='inception_4a_3x3', + cv1_out=96, + cv1_filter=(1, 1), + cv2_out=192, + cv2_filter=(3, 3), + cv2_strides=(1, 1), + padding=(1, 1)) + inception_4a_5x5 = facenet_utils.conv2d_bn(inception_3c, + layer='inception_4a_5x5', + cv1_out=32, + cv1_filter=(1, 1), + cv2_out=64, + cv2_filter=(5, 5), + cv2_strides=(1, 1), + padding=(2, 2)) + + inception_4a_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_3c) + inception_4a_pool = facenet_utils.conv2d_bn(inception_4a_pool, + layer='inception_4a_pool', + cv1_out=128, + cv1_filter=(1, 1), + padding=(2, 2)) + inception_4a_1x1 = facenet_utils.conv2d_bn(inception_3c, + layer='inception_4a_1x1', + cv1_out=256, + cv1_filter=(1, 1)) + inception_4a = concatenate([inception_4a_3x3, inception_4a_5x5, inception_4a_pool, inception_4a_1x1], axis=3) + + #inception4e + inception_4e_3x3 = facenet_utils.conv2d_bn(inception_4a, + layer='inception_4e_3x3', + cv1_out=160, + cv1_filter=(1, 1), + cv2_out=256, + cv2_filter=(3, 3), + cv2_strides=(2, 2), + padding=(1, 1)) + inception_4e_5x5 = facenet_utils.conv2d_bn(inception_4a, + layer='inception_4e_5x5', + cv1_out=64, + cv1_filter=(1, 1), + cv2_out=128, + cv2_filter=(5, 5), + cv2_strides=(2, 2), + padding=(2, 2)) + inception_4e_pool = MaxPooling2D(pool_size=3, strides=2)(inception_4a) + inception_4e_pool = ZeroPadding2D(padding=((0, 1), (0, 1)))(inception_4e_pool) + + inception_4e = concatenate([inception_4e_3x3, inception_4e_5x5, inception_4e_pool], axis=3) + + #inception5a + inception_5a_3x3 = facenet_utils.conv2d_bn(inception_4e, + layer='inception_5a_3x3', + cv1_out=96, + cv1_filter=(1, 1), + cv2_out=384, + cv2_filter=(3, 3), + cv2_strides=(1, 1), + padding=(1, 1)) + + inception_5a_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_4e) + inception_5a_pool = facenet_utils.conv2d_bn(inception_5a_pool, + layer='inception_5a_pool', + cv1_out=96, + cv1_filter=(1, 1), + padding=(1, 1)) + inception_5a_1x1 = facenet_utils.conv2d_bn(inception_4e, + layer='inception_5a_1x1', + cv1_out=256, + cv1_filter=(1, 1)) + + inception_5a = concatenate([inception_5a_3x3, inception_5a_pool, inception_5a_1x1], axis=3) + + #inception_5b + inception_5b_3x3 = facenet_utils.conv2d_bn(inception_5a, + layer='inception_5b_3x3', + cv1_out=96, + cv1_filter=(1, 1), + cv2_out=384, + cv2_filter=(3, 3), + cv2_strides=(1, 1), + padding=(1, 1)) + inception_5b_pool = MaxPooling2D(pool_size=3, strides=2)(inception_5a) + inception_5b_pool = facenet_utils.conv2d_bn(inception_5b_pool, + layer='inception_5b_pool', + cv1_out=96, + cv1_filter=(1, 1)) + inception_5b_pool = ZeroPadding2D(padding=(1, 1))(inception_5b_pool) + + inception_5b_1x1 = facenet_utils.conv2d_bn(inception_5a, + layer='inception_5b_1x1', + cv1_out=256, + cv1_filter=(1, 1)) + inception_5b = concatenate([inception_5b_3x3, inception_5b_pool, inception_5b_1x1], axis=3) + + av_pool = AveragePooling2D(pool_size=(3, 3), strides=(1, 1))(inception_5b) + reshape_layer = Flatten()(av_pool) + dense_layer = Dense(128, name='dense_layer')(reshape_layer) + norm_layer = Lambda(lambda x: K.l2_normalize(x, axis=1), name='norm_layer')(dense_layer) + + return Model(inputs=[myInput], outputs=norm_layer)