increase the performance and logical

This commit is contained in:
Tang1705
2020-07-08 16:04:25 +08:00
parent 88a689a333
commit aaa3799fe2
8 changed files with 893 additions and 77 deletions

View File

@@ -4,10 +4,14 @@ import time
import cv2
import numpy as np
from auto_whiteBalance import aug
from GFmatrix import GF
np.seterr(invalid='ignore')
# config = {'morning': [80, 220, 125, 125]}
class Calibration:
def __init__(self):
@@ -26,8 +30,9 @@ class Calibration:
def update_fps(self):
now = time.time()
self.frame_time = now - self.frame_start_time
self.fps = 1.0 / self.frame_time
self.frame_start_time = now
if self.frame_time != 0:
self.fps = 1.0 / self.frame_time
self.frame_start_time = now
# 生成的 cv2 window 上面添加说明文字
def draw_note(self, img_rd):
@@ -244,20 +249,20 @@ class Calibration:
def decode(self, frame, feature_point):
points = []
position = []
frame=aug(frame)
color_map = np.zeros((frame.shape[0], frame.shape[1]))
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
l, a, b = cv2.split(frame)
for i in range(0, frame.shape[0]):
for j in range(0, frame.shape[1]):
if l[i][j] < 50:
if l[i][j] < 100:
color_map[i][j] = 3
frame[i][j] = np.array([0, 0, 0])
elif l[i][j] > 140:
elif l[i][j] > 220:
frame[i][j] = np.array([255, 255, 255])
color_map[i][j] = 255
else:
if b[i][j] < 125:
if b[i][j] < 120:
color_map[i][j] = 0
frame[i][j] = np.array([255, 0, 0])
else:
@@ -368,23 +373,25 @@ class Calibration:
(feature_points[index + 1][1] * 2 - feature_points[index][1] * 2) ** 2)
scale = world_distance / pixel_distance
print(pixel_distance)
print(feature_points[index + 1][0] - feature_points[index][0],
feature_points[index + 1][1] - feature_points[index][1])
# print(distance)
# for i in range(index - 1, index + 2):
# print(distance[i])
# 绘制特征点
# point_size = 1
# point_color = (0, 0, 255)
# thickness = 0 # 可以为 0 、4、8
point_size = 1
point_color = (0, 0, 255)
thickness = 0 # 可以为 0 、4、8
# for i in range(0, len(featurepoints_position)):
# cv2.circle(img_rd, (int(featurepoints_position[i][1]), int(featurepoints_position[i][0])),
# point_size, point_color, thickness)
# for point in featurepoints_position:
# cv2.circle(img_rd, (int(point[1]), int(point[0])), point_size, point_color, thickness)
# cv2.namedWindow("image")
# cv2.imshow('image', img_rd)
# cv2.waitKey(0) # 按0退出
for point in featurepoints_position:
cv2.circle(img_rd, (int(point[1]), int(point[0])), point_size, point_color, thickness)
cv2.namedWindow("image")
cv2.imshow('image', img_rd)
cv2.waitKey(0) # 按0退出
return scale
self.draw_note(img_rd)
self.update_fps()

188
Class/detection/align.py Normal file
View File

@@ -0,0 +1,188 @@
# Copyright 2015-2016 Carnegie Mellon University
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module for dlib-based alignment."""
import cv2
import dlib
import numpy as np
TEMPLATE = np.float32([
(0.0792396913815, 0.339223741112), (0.0829219487236, 0.456955367943),
(0.0967927109165, 0.575648016728), (0.122141515615, 0.691921601066),
(0.168687863544, 0.800341263616), (0.239789390707, 0.895732504778),
(0.325662452515, 0.977068762493), (0.422318282013, 1.04329000149),
(0.531777802068, 1.06080371126), (0.641296298053, 1.03981924107),
(0.738105872266, 0.972268833998), (0.824444363295, 0.889624082279),
(0.894792677532, 0.792494155836), (0.939395486253, 0.681546643421),
(0.96111933829, 0.562238253072), (0.970579841181, 0.441758925744),
(0.971193274221, 0.322118743967), (0.163846223133, 0.249151738053),
(0.21780354657, 0.204255863861), (0.291299351124, 0.192367318323),
(0.367460241458, 0.203582210627), (0.4392945113, 0.233135599851),
(0.586445962425, 0.228141644834), (0.660152671635, 0.195923841854),
(0.737466449096, 0.182360984545), (0.813236546239, 0.192828009114),
(0.8707571886, 0.235293377042), (0.51534533827, 0.31863546193),
(0.516221448289, 0.396200446263), (0.517118861835, 0.473797687758),
(0.51816430343, 0.553157797772), (0.433701156035, 0.604054457668),
(0.475501237769, 0.62076344024), (0.520712933176, 0.634268222208),
(0.565874114041, 0.618796581487), (0.607054002672, 0.60157671656),
(0.252418718401, 0.331052263829), (0.298663015648, 0.302646354002),
(0.355749724218, 0.303020650651), (0.403718978315, 0.33867711083),
(0.352507175597, 0.349987615384), (0.296791759886, 0.350478978225),
(0.631326076346, 0.334136672344), (0.679073381078, 0.29645404267),
(0.73597236153, 0.294721285802), (0.782865376271, 0.321305281656),
(0.740312274764, 0.341849376713), (0.68499850091, 0.343734332172),
(0.353167761422, 0.746189164237), (0.414587777921, 0.719053835073),
(0.477677654595, 0.706835892494), (0.522732900812, 0.717092275768),
(0.569832064287, 0.705414478982), (0.635195811927, 0.71565572516),
(0.69951672331, 0.739419187253), (0.639447159575, 0.805236879972),
(0.576410514055, 0.835436670169), (0.525398405766, 0.841706377792),
(0.47641545769, 0.837505914975), (0.41379548902, 0.810045601727),
(0.380084785646, 0.749979603086), (0.477955996282, 0.74513234612),
(0.523389793327, 0.748924302636), (0.571057789237, 0.74332894691),
(0.672409137852, 0.744177032192), (0.572539621444, 0.776609286626),
(0.5240106503, 0.783370783245), (0.477561227414, 0.778476346951)])
TPL_MIN, TPL_MAX = np.min(TEMPLATE, axis=0), np.max(TEMPLATE, axis=0)
MINMAX_TEMPLATE = (TEMPLATE - TPL_MIN) / (TPL_MAX - TPL_MIN)
class AlignDlib:
"""
Use `dlib's landmark estimation <http://blog.dlib.net/2014/08/real-time-face-pose-estimation.html>`_ to align faces.
The alignment preprocess faces for input into a neural network.
Faces are resized to the same size (such as 96x96) and transformed
to make landmarks (such as the eyes and nose) appear at the same
location on every image.
Normalized landmarks:
.. image:: ../images/dlib-landmark-mean.png
"""
#: Landmark indices.
INNER_EYES_AND_BOTTOM_LIP = [39, 42, 57]
OUTER_EYES_AND_NOSE = [36, 45, 33]
def __init__(self, facePredictor):
"""
Instantiate an 'AlignDlib' object.
:param facePredictor: The path to dlib's
:type facePredictor: str
"""
assert facePredictor is not None
self.detector = dlib.get_frontal_face_detector()
self.predictor = dlib.shape_predictor(facePredictor)
def getAllFaceBoundingBoxes(self, rgbImg):
"""
Find all face bounding boxes in an image.
:param rgbImg: RGB image to process. Shape: (height, width, 3)
:type rgbImg: numpy.ndarray
:return: All face bounding boxes in an image.
:rtype: dlib.rectangles
"""
assert rgbImg is not None
try:
return self.detector(rgbImg, 1)
except Exception as e:
print("Warning: {}".format(e))
# In rare cases, exceptions are thrown.
return []
def getLargestFaceBoundingBox(self, rgbImg, skipMulti=False):
"""
Find the largest face bounding box in an image.
:param rgbImg: RGB image to process. Shape: (height, width, 3)
:type rgbImg: numpy.ndarray
:param skipMulti: Skip image if more than one face detected.
:type skipMulti: bool
:return: The largest face bounding box in an image, or None.
:rtype: dlib.rectangle
"""
assert rgbImg is not None
faces = self.getAllFaceBoundingBoxes(rgbImg)
if (not skipMulti and len(faces) > 0) or len(faces) == 1:
return max(faces, key=lambda rect: rect.width() * rect.height())
else:
return None
def findLandmarks(self, rgbImg, bb):
"""
Find the landmarks of a face.
:param rgbImg: RGB image to process. Shape: (height, width, 3)
:type rgbImg: numpy.ndarray
:param bb: Bounding box around the face to find landmarks for.
:type bb: dlib.rectangle
:return: Detected landmark locations.
:rtype: list of (x,y) tuples
"""
assert rgbImg is not None
assert bb is not None
points = self.predictor(rgbImg, bb)
return list(map(lambda p: (p.x, p.y), points.parts()))
def align(self, imgDim, rgbImg, bb=None,
landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP,
skipMulti=False):
r"""align(imgDim, rgbImg, bb=None, landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP)
Transform and align a face in an image.
:param imgDim: The edge length in pixels of the square the image is resized to.
:type imgDim: int
:param rgbImg: RGB image to process. Shape: (height, width, 3)
:type rgbImg: numpy.ndarray
:param bb: Bounding box around the face to align. \
Defaults to the largest face.
:type bb: dlib.rectangle
:param landmarks: Detected landmark locations. \
Landmarks found on `bb` if not provided.
:type landmarks: list of (x,y) tuples
:param landmarkIndices: The indices to transform to.
:type landmarkIndices: list of ints
:param skipMulti: Skip image if more than one face detected.
:type skipMulti: bool
:return: The aligned RGB image. Shape: (imgDim, imgDim, 3)
:rtype: numpy.ndarray
"""
assert imgDim is not None
assert rgbImg is not None
assert landmarkIndices is not None
if bb is None:
bb = self.getLargestFaceBoundingBox(rgbImg, skipMulti)
if bb is None:
return
if landmarks is None:
landmarks = self.findLandmarks(rgbImg, bb)
npLandmarks = np.float32(landmarks)
npLandmarkIndices = np.array(landmarkIndices)
H = cv2.getAffineTransform(npLandmarks[npLandmarkIndices],
imgDim * MINMAX_TEMPLATE[npLandmarkIndices])
thumbnail = cv2.warpAffine(rgbImg, H, (imgDim, imgDim))
return thumbnail

View File

@@ -0,0 +1,41 @@
import numpy as np
import cv2
def compute(img, min_percentile, max_percentile):
"""计算分位点目的是去掉图1的直方图两头的异常情况"""
max_percentile_pixel = np.percentile(img, max_percentile)
min_percentile_pixel = np.percentile(img, min_percentile)
return max_percentile_pixel, min_percentile_pixel
def aug(src):
"""图像亮度增强"""
if get_lightness(src) > 130:
print("图片亮度足够,不做增强")
# 先计算分位点,去掉像素值中少数异常值,这个分位点可以自己配置。
# 比如1中直方图的红色在0到255上都有值但是实际上像素值主要在0到20内。
max_percentile_pixel, min_percentile_pixel = compute(src, 1, 99)
# 去掉分位值区间之外的值
src[src >= max_percentile_pixel] = max_percentile_pixel
src[src <= min_percentile_pixel] = min_percentile_pixel
# 将分位值区间拉伸到0到255这里取了255*0.1与255*0.9是因为可能会出现像素值溢出的情况所以最好不要设置为0到255。
out = np.zeros(src.shape, src.dtype)
cv2.normalize(src, out, 255 * 0.1, 255 * 0.9, cv2.NORM_MINMAX)
return out
def get_lightness(src):
# 计算亮度
hsv_image = cv2.cvtColor(src, cv2.COLOR_BGR2HSV)
lightness = hsv_image[:, :, 2].mean()
return lightness

170
Class/detection/facenet.py Normal file
View File

@@ -0,0 +1,170 @@
from model import create_model
from keras import backend as K
from keras.models import Model
from keras.layers import Input, Layer
from data import triplet_generator
import numpy as np
import os.path
import cv2
from align import AlignDlib
alignment = AlignDlib('data/data_dlib/landmarks.dat')
class TripletLossLayer(Layer):
def __init__(self, alpha, **kwargs):
self.alpha = alpha
super(TripletLossLayer, self).__init__(**kwargs)
def triplet_loss(self, inputs):
a, p, n = inputs
p_dist = K.sum(K.square(a - p), axis=-1)
n_dist = K.sum(K.square(a - n), axis=-1)
return K.sum(K.maximum(p_dist - n_dist + self.alpha, 0), axis=0)
def call(self, inputs):
loss = self.triplet_loss(inputs)
self.add_loss(loss)
return loss
class IdentityMetadata():
def __init__(self, base, name, file):
# dataset base directory
self.base = base
# identity name
self.name = name
# image file name
self.file = file
def __repr__(self):
return self.image_path()
def image_path(self):
return os.path.join(self.base, self.name, self.file)
def load_metadata(path):
metadata = []
for i in sorted(os.listdir(path)):
person = []
for f in sorted(os.listdir(os.path.join(path, i))):
# Check file extension. Allow only jpg/jpeg' files.
ext = os.path.splitext(f)[1]
if ext == '.jpg' or ext == '.jpeg':
person.append(IdentityMetadata(path, i, f))
metadata.append(person)
return np.array(metadata,dtype=object)
def load_image(path):
img = cv2.imread(path, 1)
# OpenCV loads images with color channels
# in BGR order. So we need to reverse them
return img[..., ::-1]
def align_image(img):
return alignment.align(96, img, alignment.getLargestFaceBoundingBox(img),
landmarkIndices=AlignDlib.OUTER_EYES_AND_NOSE)
def distance(emb1, emb2):
return np.sum(np.square(emb1 - emb2))
# if __name__ == '__main__':
# # nn4_small2 = create_model()
# #
# # # Input for anchor, positive and negative images
# # in_a = Input(shape=(96, 96, 3))
# # in_p = Input(shape=(96, 96, 3))
# # in_n = Input(shape=(96, 96, 3))
# #
# # # Output for anchor, positive and negative embedding vectors
# # # The nn4_small model instance is shared (Siamese network)
# # emb_a = nn4_small2(in_a)
# # emb_p = nn4_small2(in_p)
# # emb_n = nn4_small2(in_n)
# #
# # # Layer that computes the triplet loss from anchor, positive and negative embedding vectors
# # triplet_loss_layer = TripletLossLayer(alpha=0.2, name='triplet_loss_layer')([emb_a, emb_p, emb_n])
# #
# # # Model that can be trained with anchor, positive negative images
# # nn4_small2_train = Model([in_a, in_p, in_n], triplet_loss_layer)
# #
# # # triplet_generator() creates a generator that continuously returns
# # # ([a_batch, p_batch, n_batch], None) tuples where a_batch, p_batch
# # # and n_batch are batches of anchor, positive and negative RGB images
# # # each having a shape of (batch_size, 96, 96, 3).
# # generator = triplet_generator()
# #
# # nn4_small2_train.compile(loss=None, optimizer='adam')
# # nn4_small2_train.fit_generator(generator, epochs=1, steps_per_epoch=100)
#
# # Please note that the current implementation of the generator only generates
# # random image data. The main goal of this code snippet is to demonstrate
# # the general setup for model training. In the following, we will anyway
# # use a pre-trained model so we don't need a generator here that operates
# # on real training data. I'll maybe provide a fully functional generator
# # later.
# nn4_small2_pretrained = create_model()
# nn4_small2_pretrained.load_weights('weights/nn4.small2.v1.h5')
#
# metadata = load_metadata('images')
#
# # Initialize the OpenFace face alignment utility
#
#
# # # Load an image of Jacques Chirac
# # jc_orig = load_image(metadata[78].image_path())
# #
# # # Detect face and return bounding box
# # bb = alignment.getLargestFaceBoundingBox(jc_orig)
# #
# # # Transform image using specified face landmark indices and crop image to 96x96
# # jc_aligned = alignment.align(96, jc_orig, bb, landmarkIndices=AlignDlib.OUTER_EYES_AND_NOSE)
#
# embedded = np.zeros((metadata.shape[0], 128))
#
# for i, m in enumerate(metadata):
# img = load_image(m.image_path())
# # img = align_image(img)
# img = cv2.resize(img, (96, 96))
# # scale RGB values to interval [0,1]
# try:
# img = (img / 255.).astype(np.float32)
# # obtain embedding vector for image
# embedded[i] = nn4_small2_pretrained.predict(np.expand_dims(img, axis=0))[0]
# except:
# print(m.image_path)
#
# # show_pair(77, 78)
# # show_pair(77, 100)
# cap = cv2.VideoCapture(0)
# while cap.isOpened():
# flag, frame = cap.read()
# kk = cv2.waitKey(1)
# # 按下 q 键退出
# if kk == ord('q'):
# break
# else:
# try:
# # img = align_image(frame)
# frame = cv2.resize(frame, (96, 96))
# img = (frame / 255.).astype(np.float32)
# img = nn4_small2_pretrained.predict(np.expand_dims(img, axis=0))[0]
# d = []
# for i in range(0, len(embedded)):
# d.append(distance(embedded[i], img))
#
# name = ['Person_2', 'tbs', 'Person_1']
#
# print(name[d.index(min(d))])
# # if d < 1:
# # print("same face")
# # else:
# # print("different face")
# except Exception as e:
# print(e)
# cv2.imshow("normal", frame)
# cap.release()
# cv2.destroyAllWindows()

View File

@@ -0,0 +1,159 @@
# -----------------------------------------------------------------------------------------
# Code taken from https://github.com/iwantooxxoox/Keras-OpenFace (with minor modifications)
# -----------------------------------------------------------------------------------------
import tensorflow as tf
import numpy as np
import os
from numpy import genfromtxt
from keras.layers import Conv2D, ZeroPadding2D, Activation
from keras.layers.normalization import BatchNormalization
from tensorflow.python.keras.backend import _get_session
_FLOATX = 'float32'
def variable(value, dtype=_FLOATX, name=None):
v = tf.Variable(np.asarray(value, dtype=dtype), name=name)
_get_session().run(v.initializer)
return v
def shape(x):
return x.get_shape()
def square(x):
return tf.square(x)
def zeros(shape, dtype=_FLOATX, name=None):
return variable(np.zeros(shape), dtype, name)
def concatenate(tensors, axis=-1):
if axis < 0:
axis = axis % len(tensors[0].get_shape())
return tf.concat(axis, tensors)
def LRN2D(x):
return tf.nn.lrn(x, alpha=1e-4, beta=0.75)
def conv2d_bn(
x,
layer=None,
cv1_out=None,
cv1_filter=(1, 1),
cv1_strides=(1, 1),
cv2_out=None,
cv2_filter=(3, 3),
cv2_strides=(1, 1),
padding=None,
):
num = '' if cv2_out == None else '1'
tensor = Conv2D(cv1_out, cv1_filter, strides=cv1_strides, name=layer+'_conv'+num)(x)
tensor = BatchNormalization(axis=3, epsilon=0.00001, name=layer+'_bn'+num)(tensor)
tensor = Activation('relu')(tensor)
if padding == None:
return tensor
tensor = ZeroPadding2D(padding=padding)(tensor)
if cv2_out == None:
return tensor
tensor = Conv2D(cv2_out, cv2_filter, strides=cv2_strides, name=layer+'_conv'+'2')(tensor)
tensor = BatchNormalization(axis=3, epsilon=0.00001, name=layer+'_bn'+'2')(tensor)
tensor = Activation('relu')(tensor)
return tensor
weights = [
'conv1', 'bn1', 'conv2', 'bn2', 'conv3', 'bn3',
'inception_3a_1x1_conv', 'inception_3a_1x1_bn',
'inception_3a_pool_conv', 'inception_3a_pool_bn',
'inception_3a_5x5_conv1', 'inception_3a_5x5_conv2', 'inception_3a_5x5_bn1', 'inception_3a_5x5_bn2',
'inception_3a_3x3_conv1', 'inception_3a_3x3_conv2', 'inception_3a_3x3_bn1', 'inception_3a_3x3_bn2',
'inception_3b_3x3_conv1', 'inception_3b_3x3_conv2', 'inception_3b_3x3_bn1', 'inception_3b_3x3_bn2',
'inception_3b_5x5_conv1', 'inception_3b_5x5_conv2', 'inception_3b_5x5_bn1', 'inception_3b_5x5_bn2',
'inception_3b_pool_conv', 'inception_3b_pool_bn',
'inception_3b_1x1_conv', 'inception_3b_1x1_bn',
'inception_3c_3x3_conv1', 'inception_3c_3x3_conv2', 'inception_3c_3x3_bn1', 'inception_3c_3x3_bn2',
'inception_3c_5x5_conv1', 'inception_3c_5x5_conv2', 'inception_3c_5x5_bn1', 'inception_3c_5x5_bn2',
'inception_4a_3x3_conv1', 'inception_4a_3x3_conv2', 'inception_4a_3x3_bn1', 'inception_4a_3x3_bn2',
'inception_4a_5x5_conv1', 'inception_4a_5x5_conv2', 'inception_4a_5x5_bn1', 'inception_4a_5x5_bn2',
'inception_4a_pool_conv', 'inception_4a_pool_bn',
'inception_4a_1x1_conv', 'inception_4a_1x1_bn',
'inception_4e_3x3_conv1', 'inception_4e_3x3_conv2', 'inception_4e_3x3_bn1', 'inception_4e_3x3_bn2',
'inception_4e_5x5_conv1', 'inception_4e_5x5_conv2', 'inception_4e_5x5_bn1', 'inception_4e_5x5_bn2',
'inception_5a_3x3_conv1', 'inception_5a_3x3_conv2', 'inception_5a_3x3_bn1', 'inception_5a_3x3_bn2',
'inception_5a_pool_conv', 'inception_5a_pool_bn',
'inception_5a_1x1_conv', 'inception_5a_1x1_bn',
'inception_5b_3x3_conv1', 'inception_5b_3x3_conv2', 'inception_5b_3x3_bn1', 'inception_5b_3x3_bn2',
'inception_5b_pool_conv', 'inception_5b_pool_bn',
'inception_5b_1x1_conv', 'inception_5b_1x1_bn',
'dense_layer'
]
conv_shape = {
'conv1': [64, 3, 7, 7],
'conv2': [64, 64, 1, 1],
'conv3': [192, 64, 3, 3],
'inception_3a_1x1_conv': [64, 192, 1, 1],
'inception_3a_pool_conv': [32, 192, 1, 1],
'inception_3a_5x5_conv1': [16, 192, 1, 1],
'inception_3a_5x5_conv2': [32, 16, 5, 5],
'inception_3a_3x3_conv1': [96, 192, 1, 1],
'inception_3a_3x3_conv2': [128, 96, 3, 3],
'inception_3b_3x3_conv1': [96, 256, 1, 1],
'inception_3b_3x3_conv2': [128, 96, 3, 3],
'inception_3b_5x5_conv1': [32, 256, 1, 1],
'inception_3b_5x5_conv2': [64, 32, 5, 5],
'inception_3b_pool_conv': [64, 256, 1, 1],
'inception_3b_1x1_conv': [64, 256, 1, 1],
'inception_3c_3x3_conv1': [128, 320, 1, 1],
'inception_3c_3x3_conv2': [256, 128, 3, 3],
'inception_3c_5x5_conv1': [32, 320, 1, 1],
'inception_3c_5x5_conv2': [64, 32, 5, 5],
'inception_4a_3x3_conv1': [96, 640, 1, 1],
'inception_4a_3x3_conv2': [192, 96, 3, 3],
'inception_4a_5x5_conv1': [32, 640, 1, 1,],
'inception_4a_5x5_conv2': [64, 32, 5, 5],
'inception_4a_pool_conv': [128, 640, 1, 1],
'inception_4a_1x1_conv': [256, 640, 1, 1],
'inception_4e_3x3_conv1': [160, 640, 1, 1],
'inception_4e_3x3_conv2': [256, 160, 3, 3],
'inception_4e_5x5_conv1': [64, 640, 1, 1],
'inception_4e_5x5_conv2': [128, 64, 5, 5],
'inception_5a_3x3_conv1': [96, 1024, 1, 1],
'inception_5a_3x3_conv2': [384, 96, 3, 3],
'inception_5a_pool_conv': [96, 1024, 1, 1],
'inception_5a_1x1_conv': [256, 1024, 1, 1],
'inception_5b_3x3_conv1': [96, 736, 1, 1],
'inception_5b_3x3_conv2': [384, 96, 3, 3],
'inception_5b_pool_conv': [96, 736, 1, 1],
'inception_5b_1x1_conv': [256, 736, 1, 1],
}
def load_weights():
weightsDir = './weights'
fileNames = filter(lambda f: not f.startswith('.'), os.listdir(weightsDir))
paths = {}
weights_dict = {}
for n in fileNames:
paths[n.replace('.csv', '')] = weightsDir + '/' + n
for name in weights:
if 'conv' in name:
conv_w = genfromtxt(paths[name + '_w'], delimiter=',', dtype=None)
conv_w = np.reshape(conv_w, conv_shape[name])
conv_w = np.transpose(conv_w, (2, 3, 1, 0))
conv_b = genfromtxt(paths[name + '_b'], delimiter=',', dtype=None)
weights_dict[name] = [conv_w, conv_b]
elif 'bn' in name:
bn_w = genfromtxt(paths[name + '_w'], delimiter=',', dtype=None)
bn_b = genfromtxt(paths[name + '_b'], delimiter=',', dtype=None)
bn_m = genfromtxt(paths[name + '_m'], delimiter=',', dtype=None)
bn_v = genfromtxt(paths[name + '_v'], delimiter=',', dtype=None)
weights_dict[name] = [bn_w, bn_b, bn_m, bn_v]
elif 'dense' in name:
dense_w = genfromtxt(weightsDir+'/dense_w.csv', delimiter=',', dtype=None)
dense_w = np.reshape(dense_w, (128, 736))
dense_w = np.transpose(dense_w, (1, 0))
dense_b = genfromtxt(weightsDir+'/dense_b.csv', delimiter=',', dtype=None)
weights_dict[name] = [dense_w, dense_b]
return weights_dict

220
Class/detection/model.py Normal file
View File

@@ -0,0 +1,220 @@
# -----------------------------------------------------------------------------------------
# Code taken from https://github.com/iwantooxxoox/Keras-OpenFace (with minor modifications)
# -----------------------------------------------------------------------------------------
from keras.layers import Conv2D, ZeroPadding2D, Activation, Input, concatenate
from keras.layers.core import Lambda, Flatten, Dense
from keras.layers.normalization import BatchNormalization
from keras.layers.pooling import MaxPooling2D, AveragePooling2D
from keras.models import Model
from keras import backend as K
import facenet_utils
from facenet_utils import LRN2D
def create_model():
myInput = Input(shape=(96, 96, 3))
x = ZeroPadding2D(padding=(3, 3), input_shape=(96, 96, 3))(myInput)
x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x)
x = BatchNormalization(axis=3, epsilon=0.00001, name='bn1')(x)
x = Activation('relu')(x)
x = ZeroPadding2D(padding=(1, 1))(x)
x = MaxPooling2D(pool_size=3, strides=2)(x)
x = Lambda(LRN2D, name='lrn_1')(x)
x = Conv2D(64, (1, 1), name='conv2')(x)
x = BatchNormalization(axis=3, epsilon=0.00001, name='bn2')(x)
x = Activation('relu')(x)
x = ZeroPadding2D(padding=(1, 1))(x)
x = Conv2D(192, (3, 3), name='conv3')(x)
x = BatchNormalization(axis=3, epsilon=0.00001, name='bn3')(x)
x = Activation('relu')(x)
x = Lambda(LRN2D, name='lrn_2')(x)
x = ZeroPadding2D(padding=(1, 1))(x)
x = MaxPooling2D(pool_size=3, strides=2)(x)
# Inception3a
inception_3a_3x3 = Conv2D(96, (1, 1), name='inception_3a_3x3_conv1')(x)
inception_3a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_3x3_bn1')(inception_3a_3x3)
inception_3a_3x3 = Activation('relu')(inception_3a_3x3)
inception_3a_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3a_3x3)
inception_3a_3x3 = Conv2D(128, (3, 3), name='inception_3a_3x3_conv2')(inception_3a_3x3)
inception_3a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_3x3_bn2')(inception_3a_3x3)
inception_3a_3x3 = Activation('relu')(inception_3a_3x3)
inception_3a_5x5 = Conv2D(16, (1, 1), name='inception_3a_5x5_conv1')(x)
inception_3a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_5x5_bn1')(inception_3a_5x5)
inception_3a_5x5 = Activation('relu')(inception_3a_5x5)
inception_3a_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3a_5x5)
inception_3a_5x5 = Conv2D(32, (5, 5), name='inception_3a_5x5_conv2')(inception_3a_5x5)
inception_3a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_5x5_bn2')(inception_3a_5x5)
inception_3a_5x5 = Activation('relu')(inception_3a_5x5)
inception_3a_pool = MaxPooling2D(pool_size=3, strides=2)(x)
inception_3a_pool = Conv2D(32, (1, 1), name='inception_3a_pool_conv')(inception_3a_pool)
inception_3a_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_pool_bn')(inception_3a_pool)
inception_3a_pool = Activation('relu')(inception_3a_pool)
inception_3a_pool = ZeroPadding2D(padding=((3, 4), (3, 4)))(inception_3a_pool)
inception_3a_1x1 = Conv2D(64, (1, 1), name='inception_3a_1x1_conv')(x)
inception_3a_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_1x1_bn')(inception_3a_1x1)
inception_3a_1x1 = Activation('relu')(inception_3a_1x1)
inception_3a = concatenate([inception_3a_3x3, inception_3a_5x5, inception_3a_pool, inception_3a_1x1], axis=3)
# Inception3b
inception_3b_3x3 = Conv2D(96, (1, 1), name='inception_3b_3x3_conv1')(inception_3a)
inception_3b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_3x3_bn1')(inception_3b_3x3)
inception_3b_3x3 = Activation('relu')(inception_3b_3x3)
inception_3b_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3b_3x3)
inception_3b_3x3 = Conv2D(128, (3, 3), name='inception_3b_3x3_conv2')(inception_3b_3x3)
inception_3b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_3x3_bn2')(inception_3b_3x3)
inception_3b_3x3 = Activation('relu')(inception_3b_3x3)
inception_3b_5x5 = Conv2D(32, (1, 1), name='inception_3b_5x5_conv1')(inception_3a)
inception_3b_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_5x5_bn1')(inception_3b_5x5)
inception_3b_5x5 = Activation('relu')(inception_3b_5x5)
inception_3b_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3b_5x5)
inception_3b_5x5 = Conv2D(64, (5, 5), name='inception_3b_5x5_conv2')(inception_3b_5x5)
inception_3b_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_5x5_bn2')(inception_3b_5x5)
inception_3b_5x5 = Activation('relu')(inception_3b_5x5)
inception_3b_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_3a)
inception_3b_pool = Conv2D(64, (1, 1), name='inception_3b_pool_conv')(inception_3b_pool)
inception_3b_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_pool_bn')(inception_3b_pool)
inception_3b_pool = Activation('relu')(inception_3b_pool)
inception_3b_pool = ZeroPadding2D(padding=(4, 4))(inception_3b_pool)
inception_3b_1x1 = Conv2D(64, (1, 1), name='inception_3b_1x1_conv')(inception_3a)
inception_3b_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_1x1_bn')(inception_3b_1x1)
inception_3b_1x1 = Activation('relu')(inception_3b_1x1)
inception_3b = concatenate([inception_3b_3x3, inception_3b_5x5, inception_3b_pool, inception_3b_1x1], axis=3)
# Inception3c
inception_3c_3x3 = facenet_utils.conv2d_bn(inception_3b,
layer='inception_3c_3x3',
cv1_out=128,
cv1_filter=(1, 1),
cv2_out=256,
cv2_filter=(3, 3),
cv2_strides=(2, 2),
padding=(1, 1))
inception_3c_5x5 = facenet_utils.conv2d_bn(inception_3b,
layer='inception_3c_5x5',
cv1_out=32,
cv1_filter=(1, 1),
cv2_out=64,
cv2_filter=(5, 5),
cv2_strides=(2, 2),
padding=(2, 2))
inception_3c_pool = MaxPooling2D(pool_size=3, strides=2)(inception_3b)
inception_3c_pool = ZeroPadding2D(padding=((0, 1), (0, 1)))(inception_3c_pool)
inception_3c = concatenate([inception_3c_3x3, inception_3c_5x5, inception_3c_pool], axis=3)
#inception 4a
inception_4a_3x3 = facenet_utils.conv2d_bn(inception_3c,
layer='inception_4a_3x3',
cv1_out=96,
cv1_filter=(1, 1),
cv2_out=192,
cv2_filter=(3, 3),
cv2_strides=(1, 1),
padding=(1, 1))
inception_4a_5x5 = facenet_utils.conv2d_bn(inception_3c,
layer='inception_4a_5x5',
cv1_out=32,
cv1_filter=(1, 1),
cv2_out=64,
cv2_filter=(5, 5),
cv2_strides=(1, 1),
padding=(2, 2))
inception_4a_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_3c)
inception_4a_pool = facenet_utils.conv2d_bn(inception_4a_pool,
layer='inception_4a_pool',
cv1_out=128,
cv1_filter=(1, 1),
padding=(2, 2))
inception_4a_1x1 = facenet_utils.conv2d_bn(inception_3c,
layer='inception_4a_1x1',
cv1_out=256,
cv1_filter=(1, 1))
inception_4a = concatenate([inception_4a_3x3, inception_4a_5x5, inception_4a_pool, inception_4a_1x1], axis=3)
#inception4e
inception_4e_3x3 = facenet_utils.conv2d_bn(inception_4a,
layer='inception_4e_3x3',
cv1_out=160,
cv1_filter=(1, 1),
cv2_out=256,
cv2_filter=(3, 3),
cv2_strides=(2, 2),
padding=(1, 1))
inception_4e_5x5 = facenet_utils.conv2d_bn(inception_4a,
layer='inception_4e_5x5',
cv1_out=64,
cv1_filter=(1, 1),
cv2_out=128,
cv2_filter=(5, 5),
cv2_strides=(2, 2),
padding=(2, 2))
inception_4e_pool = MaxPooling2D(pool_size=3, strides=2)(inception_4a)
inception_4e_pool = ZeroPadding2D(padding=((0, 1), (0, 1)))(inception_4e_pool)
inception_4e = concatenate([inception_4e_3x3, inception_4e_5x5, inception_4e_pool], axis=3)
#inception5a
inception_5a_3x3 = facenet_utils.conv2d_bn(inception_4e,
layer='inception_5a_3x3',
cv1_out=96,
cv1_filter=(1, 1),
cv2_out=384,
cv2_filter=(3, 3),
cv2_strides=(1, 1),
padding=(1, 1))
inception_5a_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_4e)
inception_5a_pool = facenet_utils.conv2d_bn(inception_5a_pool,
layer='inception_5a_pool',
cv1_out=96,
cv1_filter=(1, 1),
padding=(1, 1))
inception_5a_1x1 = facenet_utils.conv2d_bn(inception_4e,
layer='inception_5a_1x1',
cv1_out=256,
cv1_filter=(1, 1))
inception_5a = concatenate([inception_5a_3x3, inception_5a_pool, inception_5a_1x1], axis=3)
#inception_5b
inception_5b_3x3 = facenet_utils.conv2d_bn(inception_5a,
layer='inception_5b_3x3',
cv1_out=96,
cv1_filter=(1, 1),
cv2_out=384,
cv2_filter=(3, 3),
cv2_strides=(1, 1),
padding=(1, 1))
inception_5b_pool = MaxPooling2D(pool_size=3, strides=2)(inception_5a)
inception_5b_pool = facenet_utils.conv2d_bn(inception_5b_pool,
layer='inception_5b_pool',
cv1_out=96,
cv1_filter=(1, 1))
inception_5b_pool = ZeroPadding2D(padding=(1, 1))(inception_5b_pool)
inception_5b_1x1 = facenet_utils.conv2d_bn(inception_5a,
layer='inception_5b_1x1',
cv1_out=256,
cv1_filter=(1, 1))
inception_5b = concatenate([inception_5b_3x3, inception_5b_pool, inception_5b_1x1], axis=3)
av_pool = AveragePooling2D(pool_size=(3, 3), strides=(1, 1))(inception_5b)
reshape_layer = Flatten()(av_pool)
dense_layer = Dense(128, name='dense_layer')(reshape_layer)
norm_layer = Lambda(lambda x: K.l2_normalize(x, axis=1), name='norm_layer')(dense_layer)
return Model(inputs=[myInput], outputs=norm_layer)