diff --git a/Class/detection/utils/__init__.py b/Class/detection/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Class/detection/utils/__pycache__/__init__.cpython-37.pyc b/Class/detection/utils/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..70bd699 Binary files /dev/null and b/Class/detection/utils/__pycache__/__init__.cpython-37.pyc differ diff --git a/Class/detection/utils/__pycache__/datasets.cpython-37.pyc b/Class/detection/utils/__pycache__/datasets.cpython-37.pyc new file mode 100644 index 0000000..1d83af0 Binary files /dev/null and b/Class/detection/utils/__pycache__/datasets.cpython-37.pyc differ diff --git a/Class/detection/utils/__pycache__/grad_cam.cpython-37.pyc b/Class/detection/utils/__pycache__/grad_cam.cpython-37.pyc new file mode 100644 index 0000000..4c61df1 Binary files /dev/null and b/Class/detection/utils/__pycache__/grad_cam.cpython-37.pyc differ diff --git a/Class/detection/utils/__pycache__/inference.cpython-37.pyc b/Class/detection/utils/__pycache__/inference.cpython-37.pyc new file mode 100644 index 0000000..21d688b Binary files /dev/null and b/Class/detection/utils/__pycache__/inference.cpython-37.pyc differ diff --git a/Class/detection/utils/__pycache__/preprocessor.cpython-37.pyc b/Class/detection/utils/__pycache__/preprocessor.cpython-37.pyc new file mode 100644 index 0000000..7cbe85f Binary files /dev/null and b/Class/detection/utils/__pycache__/preprocessor.cpython-37.pyc differ diff --git a/Class/detection/utils/data_augmentation.py b/Class/detection/utils/data_augmentation.py new file mode 100644 index 0000000..ee2189b --- /dev/null +++ b/Class/detection/utils/data_augmentation.py @@ -0,0 +1,235 @@ +import numpy as np +from random import shuffle +from .preprocessor import preprocess_input +from .preprocessor import _imread as imread +from .preprocessor import _imresize as imresize +from .preprocessor import to_categorical +import scipy.ndimage as ndi +import cv2 + + +class ImageGenerator(object): + """ Image generator with saturation, brightness, lighting, contrast, + horizontal flip and vertical flip transformations. It supports + bounding boxes coordinates. + + TODO: + - Finish support for not using bounding_boxes + - Random crop + - Test other transformations + """ + def __init__(self, ground_truth_data, batch_size, image_size, + train_keys, validation_keys, + ground_truth_transformer=None, + path_prefix=None, + saturation_var=0.5, + brightness_var=0.5, + contrast_var=0.5, + lighting_std=0.5, + horizontal_flip_probability=0.5, + vertical_flip_probability=0.5, + do_random_crop=False, + grayscale=False, + zoom_range=[0.75, 1.25], + translation_factor=.3): + + self.ground_truth_data = ground_truth_data + self.ground_truth_transformer = ground_truth_transformer + self.batch_size = batch_size + self.path_prefix = path_prefix + self.train_keys = train_keys + self.validation_keys = validation_keys + self.image_size = image_size + self.grayscale = grayscale + self.color_jitter = [] + if saturation_var: + self.saturation_var = saturation_var + self.color_jitter.append(self.saturation) + if brightness_var: + self.brightness_var = brightness_var + self.color_jitter.append(self.brightness) + if contrast_var: + self.contrast_var = contrast_var + self.color_jitter.append(self.contrast) + self.lighting_std = lighting_std + self.horizontal_flip_probability = horizontal_flip_probability + self.vertical_flip_probability = vertical_flip_probability + self.do_random_crop = do_random_crop + self.zoom_range = zoom_range + self.translation_factor = translation_factor + + def _do_random_crop(self, image_array): + """IMPORTANT: random crop only works for classification since the + current implementation does no transform bounding boxes""" + height = image_array.shape[0] + width = image_array.shape[1] + x_offset = np.random.uniform(0, self.translation_factor * width) + y_offset = np.random.uniform(0, self.translation_factor * height) + offset = np.array([x_offset, y_offset]) + scale_factor = np.random.uniform(self.zoom_range[0], + self.zoom_range[1]) + crop_matrix = np.array([[scale_factor, 0], + [0, scale_factor]]) + + image_array = np.rollaxis(image_array, axis=-1, start=0) + image_channel = [ndi.interpolation.affine_transform(image_channel, + crop_matrix, offset=offset, order=0, mode='nearest', + cval=0.0) for image_channel in image_array] + + image_array = np.stack(image_channel, axis=0) + image_array = np.rollaxis(image_array, 0, 3) + return image_array + + def do_random_rotation(self, image_array): + """IMPORTANT: random rotation only works for classification since the + current implementation does no transform bounding boxes""" + height = image_array.shape[0] + width = image_array.shape[1] + x_offset = np.random.uniform(0, self.translation_factor * width) + y_offset = np.random.uniform(0, self.translation_factor * height) + offset = np.array([x_offset, y_offset]) + scale_factor = np.random.uniform(self.zoom_range[0], + self.zoom_range[1]) + crop_matrix = np.array([[scale_factor, 0], + [0, scale_factor]]) + + image_array = np.rollaxis(image_array, axis=-1, start=0) + image_channel = [ndi.interpolation.affine_transform(image_channel, + crop_matrix, offset=offset, order=0, mode='nearest', + cval=0.0) for image_channel in image_array] + + image_array = np.stack(image_channel, axis=0) + image_array = np.rollaxis(image_array, 0, 3) + return image_array + + def _gray_scale(self, image_array): + return image_array.dot([0.299, 0.587, 0.114]) + + def saturation(self, image_array): + gray_scale = self._gray_scale(image_array) + alpha = 2.0 * np.random.random() * self.brightness_var + alpha = alpha + 1 - self.saturation_var + image_array = (alpha * image_array + (1 - alpha) * + gray_scale[:, :, None]) + return np.clip(image_array, 0, 255) + + def brightness(self, image_array): + alpha = 2 * np.random.random() * self.brightness_var + alpha = alpha + 1 - self.saturation_var + image_array = alpha * image_array + return np.clip(image_array, 0, 255) + + def contrast(self, image_array): + gray_scale = (self._gray_scale(image_array).mean() * + np.ones_like(image_array)) + alpha = 2 * np.random.random() * self.contrast_var + alpha = alpha + 1 - self.contrast_var + image_array = image_array * alpha + (1 - alpha) * gray_scale + return np.clip(image_array, 0, 255) + + def lighting(self, image_array): + covariance_matrix = np.cov(image_array.reshape(-1, 3) / + 255.0, rowvar=False) + eigen_values, eigen_vectors = np.linalg.eigh(covariance_matrix) + noise = np.random.randn(3) * self.lighting_std + noise = eigen_vectors.dot(eigen_values * noise) * 255 + image_array = image_array + noise + return np.clip(image_array, 0, 255) + + def horizontal_flip(self, image_array, box_corners=None): + if np.random.random() < self.horizontal_flip_probability: + image_array = image_array[:, ::-1] + if box_corners is not None: + box_corners[:, [0, 2]] = 1 - box_corners[:, [2, 0]] + return image_array, box_corners + + def vertical_flip(self, image_array, box_corners=None): + if (np.random.random() < self.vertical_flip_probability): + image_array = image_array[::-1] + if box_corners is not None: + box_corners[:, [1, 3]] = 1 - box_corners[:, [3, 1]] + return image_array, box_corners + + def transform(self, image_array, box_corners=None): + shuffle(self.color_jitter) + for jitter in self.color_jitter: + image_array = jitter(image_array) + + if self.lighting_std: + image_array = self.lighting(image_array) + + if self.horizontal_flip_probability > 0: + image_array, box_corners = self.horizontal_flip(image_array, + box_corners) + + if self.vertical_flip_probability > 0: + image_array, box_corners = self.vertical_flip(image_array, + box_corners) + return image_array, box_corners + + def preprocess_images(self, image_array): + return preprocess_input(image_array) + + def flow(self, mode='train'): + while True: + if mode == 'train': + shuffle(self.train_keys) + keys = self.train_keys + elif mode == 'val' or mode == 'demo': + shuffle(self.validation_keys) + keys = self.validation_keys + else: + raise Exception('invalid mode: %s' % mode) + + inputs = [] + targets = [] + for key in keys: + image_path = self.path_prefix + key + image_array = imread(image_path) + image_array = imresize(image_array, self.image_size) + + num_image_channels = len(image_array.shape) + if num_image_channels != 3: + continue + + ground_truth = self.ground_truth_data[key] + + if self.do_random_crop: + image_array = self._do_random_crop(image_array) + + image_array = image_array.astype('float32') + if mode == 'train' or mode == 'demo': + if self.ground_truth_transformer is not None: + image_array, ground_truth = self.transform( + image_array, + ground_truth) + ground_truth = ( + self.ground_truth_transformer.assign_boxes( + ground_truth)) + else: + image_array = self.transform(image_array)[0] + + if self.grayscale: + image_array = cv2.cvtColor( + image_array.astype('uint8'), + cv2.COLOR_RGB2GRAY).astype('float32') + image_array = np.expand_dims(image_array, -1) + + inputs.append(image_array) + targets.append(ground_truth) + if len(targets) == self.batch_size: + inputs = np.asarray(inputs) + targets = np.asarray(targets) + # this will not work for boxes + targets = to_categorical(targets) + if mode == 'train' or mode == 'val': + inputs = self.preprocess_images(inputs) + yield self._wrap_in_dictionary(inputs, targets) + if mode == 'demo': + yield self._wrap_in_dictionary(inputs, targets) + inputs = [] + targets = [] + + def _wrap_in_dictionary(self, image_array, targets): + return [{'input_1': image_array}, + {'predictions': targets}] diff --git a/Class/detection/utils/datasets.py b/Class/detection/utils/datasets.py new file mode 100644 index 0000000..aa445a7 --- /dev/null +++ b/Class/detection/utils/datasets.py @@ -0,0 +1,149 @@ +from scipy.io import loadmat +import pandas as pd +import numpy as np +from random import shuffle +import os +import cv2 + + +class DataManager(object): + """Class for loading fer2013 emotion classification dataset or + imdb gender classification dataset.""" + def __init__(self, dataset_name='imdb', + dataset_path=None, image_size=(48, 48)): + + self.dataset_name = dataset_name + self.dataset_path = dataset_path + self.image_size = image_size + if self.dataset_path is not None: + self.dataset_path = dataset_path + elif self.dataset_name == 'imdb': + self.dataset_path = '../datasets/imdb_crop/imdb.mat' + elif self.dataset_name == 'fer2013': + self.dataset_path = '../datasets/fer2013/fer2013.csv' + elif self.dataset_name == 'KDEF': + self.dataset_path = '../datasets/KDEF/' + else: + raise Exception( + 'Incorrect dataset name, please input imdb or fer2013') + + def get_data(self): + if self.dataset_name == 'imdb': + ground_truth_data = self._load_imdb() + elif self.dataset_name == 'fer2013': + ground_truth_data = self._load_fer2013() + elif self.dataset_name == 'KDEF': + ground_truth_data = self._load_KDEF() + return ground_truth_data + + def _load_imdb(self): + face_score_treshold = 3 + dataset = loadmat(self.dataset_path) + image_names_array = dataset['imdb']['full_path'][0, 0][0] + gender_classes = dataset['imdb']['gender'][0, 0][0] + face_score = dataset['imdb']['face_score'][0, 0][0] + second_face_score = dataset['imdb']['second_face_score'][0, 0][0] + face_score_mask = face_score > face_score_treshold + second_face_score_mask = np.isnan(second_face_score) + unknown_gender_mask = np.logical_not(np.isnan(gender_classes)) + mask = np.logical_and(face_score_mask, second_face_score_mask) + mask = np.logical_and(mask, unknown_gender_mask) + image_names_array = image_names_array[mask] + gender_classes = gender_classes[mask].tolist() + image_names = [] + for image_name_arg in range(image_names_array.shape[0]): + image_name = image_names_array[image_name_arg][0] + image_names.append(image_name) + return dict(zip(image_names, gender_classes)) + + def _load_fer2013(self): + data = pd.read_csv(self.dataset_path) + pixels = data['pixels'].tolist() + width, height = 48, 48 + faces = [] + for pixel_sequence in pixels: + face = [int(pixel) for pixel in pixel_sequence.split(' ')] + face = np.asarray(face).reshape(width, height) + face = cv2.resize(face.astype('uint8'), self.image_size) + faces.append(face.astype('float32')) + faces = np.asarray(faces) + faces = np.expand_dims(faces, -1) + emotions = pd.get_dummies(data['emotion']).as_matrix() + return faces, emotions + + def _load_KDEF(self): + class_to_arg = get_class_to_arg(self.dataset_name) + num_classes = len(class_to_arg) + + file_paths = [] + for folder, subfolders, filenames in os.walk(self.dataset_path): + for filename in filenames: + if filename.lower().endswith(('.jpg')): + file_paths.append(os.path.join(folder, filename)) + + num_faces = len(file_paths) + y_size, x_size = self.image_size + faces = np.zeros(shape=(num_faces, y_size, x_size)) + emotions = np.zeros(shape=(num_faces, num_classes)) + for file_arg, file_path in enumerate(file_paths): + image_array = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) + image_array = cv2.resize(image_array, (y_size, x_size)) + faces[file_arg] = image_array + file_basename = os.path.basename(file_path) + file_emotion = file_basename[4:6] + # there are two file names in the dataset + # that don't match the given classes + try: + emotion_arg = class_to_arg[file_emotion] + except: + continue + emotions[file_arg, emotion_arg] = 1 + faces = np.expand_dims(faces, -1) + return faces, emotions + + +def get_labels(dataset_name): + if dataset_name == 'fer2013': + return {0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy', + 4: 'sad', 5: 'surprise', 6: 'neutral'} + elif dataset_name == 'imdb': + return {0: 'woman', 1: 'man'} + elif dataset_name == 'KDEF': + return {0: 'AN', 1: 'DI', 2: 'AF', 3: 'HA', 4: 'SA', 5: 'SU', 6: 'NE'} + else: + raise Exception('Invalid dataset name') + + +def get_class_to_arg(dataset_name='fer2013'): + if dataset_name == 'fer2013': + return {'angry': 0, 'disgust': 1, 'fear': 2, 'happy': 3, 'sad': 4, + 'surprise': 5, 'neutral': 6} + elif dataset_name == 'imdb': + return {'woman': 0, 'man': 1} + elif dataset_name == 'KDEF': + return {'AN': 0, 'DI': 1, 'AF': 2, 'HA': 3, 'SA': 4, 'SU': 5, 'NE': 6} + else: + raise Exception('Invalid dataset name') + + +def split_imdb_data(ground_truth_data, validation_split=.2, do_shuffle=False): + ground_truth_keys = sorted(ground_truth_data.keys()) + if do_shuffle is not False: + shuffle(ground_truth_keys) + training_split = 1 - validation_split + num_train = int(training_split * len(ground_truth_keys)) + train_keys = ground_truth_keys[:num_train] + validation_keys = ground_truth_keys[num_train:] + return train_keys, validation_keys + + +def split_data(x, y, validation_split=.2): + num_samples = len(x) + num_train_samples = int((1 - validation_split)*num_samples) + train_x = x[:num_train_samples] + train_y = y[:num_train_samples] + val_x = x[num_train_samples:] + val_y = y[num_train_samples:] + train_data = (train_x, train_y) + val_data = (val_x, val_y) + return train_data, val_data diff --git a/Class/detection/utils/grad_cam.py b/Class/detection/utils/grad_cam.py new file mode 100644 index 0000000..28c06d9 --- /dev/null +++ b/Class/detection/utils/grad_cam.py @@ -0,0 +1,191 @@ +import cv2 +import h5py +import keras +import keras.backend as K +from keras.layers.core import Lambda +from keras.models import Sequential +from keras.models import load_model +import numpy as np +import tensorflow as tf +from tensorflow.python.framework import ops + +from .preprocessor import preprocess_input + + +def reset_optimizer_weights(model_filename): + model = h5py.File(model_filename, 'r+') + del model['optimizer_weights'] + model.close() + + +def target_category_loss(x, category_index, num_classes): + return tf.multiply(x, K.one_hot([category_index], num_classes)) + + +def target_category_loss_output_shape(input_shape): + return input_shape + + +def normalize(x): + # utility function to normalize a tensor by its L2 norm + return x / (K.sqrt(K.mean(K.square(x))) + 1e-5) + + +def load_image(image_array): + image_array = np.expand_dims(image_array, axis=0) + image_array = preprocess_input(image_array) + return image_array + + +def register_gradient(): + if "GuidedBackProp" not in ops._gradient_registry._registry: + @ops.RegisterGradient("GuidedBackProp") + def _GuidedBackProp(op, gradient): + dtype = op.inputs[0].dtype + guided_gradient = (gradient * tf.cast(gradient > 0., dtype) * + tf.cast(op.inputs[0] > 0., dtype)) + return guided_gradient + + +def compile_saliency_function(model, activation_layer='conv2d_7'): + input_image = model.input + layer_output = model.get_layer(activation_layer).output + max_output = K.max(layer_output, axis=3) + saliency = K.gradients(K.sum(max_output), input_image)[0] + return K.function([input_image, K.learning_phase()], [saliency]) + + +def modify_backprop(model, name, task): + graph = tf.get_default_graph() + with graph.gradient_override_map({'Relu': name}): + + # get layers that have an activation + activation_layers = [layer for layer in model.layers + if hasattr(layer, 'activation')] + + # replace relu activation + for layer in activation_layers: + if layer.activation == keras.activations.relu: + layer.activation = tf.nn.relu + + # re-instanciate a new model + if task == 'gender': + model_path = '../trained_models/gender_models/gender_mini_XCEPTION.21-0.95.hdf5' + elif task == 'emotion': + model_path = '../trained_models/emotion_models/fer2013_mini_XCEPTION.102-0.66.hdf5' + # model_path = '../trained_models/fer2013_mini_XCEPTION.119-0.65.hdf5' + # model_path = '../trained_models/fer2013_big_XCEPTION.54-0.66.hdf5' + new_model = load_model(model_path, compile=False) + return new_model + + +def deprocess_image(x): + """ Same normalization as in: + https://github.com/fchollet/keras/blob/master/examples/conv_filter_visualization.py + """ + if np.ndim(x) > 3: + x = np.squeeze(x) + # normalize tensor: center on 0., ensure std is 0.1 + x = x - x.mean() + x = x / (x.std() + 1e-5) + x = x * 0.1 + + # clip to [0, 1] + x = x + 0.5 + x = np.clip(x, 0, 1) + + # convert to RGB array + x = x * 255 + if K.image_dim_ordering() == 'th': + x = x.transpose((1, 2, 0)) + x = np.clip(x, 0, 255).astype('uint8') + return x + + +def compile_gradient_function(input_model, category_index, layer_name): + model = Sequential() + model.add(input_model) + + num_classes = model.output_shape[1] + target_layer = lambda x: target_category_loss(x, category_index, num_classes) + model.add(Lambda(target_layer, + output_shape=target_category_loss_output_shape)) + + loss = K.sum(model.layers[-1].output) + conv_output = model.layers[0].get_layer(layer_name).output + gradients = normalize(K.gradients(loss, conv_output)[0]) + gradient_function = K.function([model.layers[0].input, K.learning_phase()], + [conv_output, gradients]) + return gradient_function + + +def calculate_gradient_weighted_CAM(gradient_function, image): + output, evaluated_gradients = gradient_function([image, False]) + output, evaluated_gradients = output[0, :], evaluated_gradients[0, :, :, :] + weights = np.mean(evaluated_gradients, axis=(0, 1)) + CAM = np.ones(output.shape[0: 2], dtype=np.float32) + for weight_arg, weight in enumerate(weights): + CAM = CAM + (weight * output[:, :, weight_arg]) + CAM = cv2.resize(CAM, (64, 64)) + CAM = np.maximum(CAM, 0) + heatmap = CAM / np.max(CAM) + + # Return to BGR [0..255] from the preprocessed image + image = image[0, :] + image = image - np.min(image) + image = np.minimum(image, 255) + + CAM = cv2.applyColorMap(np.uint8(255 * heatmap), cv2.COLORMAP_JET) + CAM = np.float32(CAM) + np.float32(image) + CAM = 255 * CAM / np.max(CAM) + return np.uint8(CAM), heatmap + + +def calculate_guided_gradient_CAM( + preprocessed_input, gradient_function, saliency_function): + CAM, heatmap = calculate_gradient_weighted_CAM( + gradient_function, preprocessed_input) + saliency = saliency_function([preprocessed_input, 0]) + # gradCAM = saliency[0] * heatmap[..., np.newaxis] + # return deprocess_image(gradCAM) + return deprocess_image(saliency[0]) + # return saliency[0] + + +def calculate_guided_gradient_CAM_v2( + preprocessed_input, gradient_function, + saliency_function, target_size=(128, 128)): + CAM, heatmap = calculate_gradient_weighted_CAM( + gradient_function, preprocessed_input) + heatmap = np.squeeze(heatmap) + heatmap = cv2.resize(heatmap.astype('uint8'), target_size) + saliency = saliency_function([preprocessed_input, 0]) + saliency = np.squeeze(saliency[0]) + saliency = cv2.resize(saliency.astype('uint8'), target_size) + gradCAM = saliency * heatmap + gradCAM = deprocess_image(gradCAM) + return np.expand_dims(gradCAM, -1) + + +if __name__ == '__main__': + import pickle + faces = pickle.load(open('faces.pkl', 'rb')) + face = faces[0] + model_filename = '../../trained_models/emotion_models/mini_XCEPTION.523-0.65.hdf5' + # reset_optimizer_weights(model_filename) + model = load_model(model_filename) + + preprocessed_input = load_image(face) + predictions = model.predict(preprocessed_input) + predicted_class = np.argmax(predictions) + gradient_function = compile_gradient_function( + model, predicted_class, 'conv2d_6') + register_gradient() + guided_model = modify_backprop(model, 'GuidedBackProp') + saliency_function = compile_saliency_function(guided_model) + guided_gradCAM = calculate_guided_gradient_CAM( + preprocessed_input, gradient_function, saliency_function) + + cv2.imwrite('guided_gradCAM.jpg', guided_gradCAM) + + diff --git a/Class/detection/utils/inference.py b/Class/detection/utils/inference.py new file mode 100644 index 0000000..6793fdc --- /dev/null +++ b/Class/detection/utils/inference.py @@ -0,0 +1,48 @@ +import cv2 +import matplotlib.pyplot as plt +import numpy as np +from keras.preprocessing import image + + +def load_image(image_path, grayscale=False, target_size=None): + pil_image = image.load_img(image_path, grayscale, target_size) + return image.img_to_array(pil_image) + + +def load_detection_model(model_path): + # detection_model = cv2.CascadeClassifier(model_path) + detection_model = cv2.dnn.readNetFromCaffe("data/data_opencv/deploy.prototxt.txt", model_path) + return detection_model + + +def detect_faces(detection_model, gray_image_array): + blob = cv2.dnn.blobFromImage(cv2.resize(gray_image_array, (300, 300)), 1.0, + (300, 300), (104.0, 177.0, 123.0)) + detection_model.setInput(blob) + return detection_model.forward() + # return detection_model.detectMultiScale(gray_image_array, 1.3, 5) + + +def draw_bounding_box(face_coordinates, image_array, color): + x, y, w, h = face_coordinates + cv2.rectangle(image_array, (x, y), (x + w, y + h), color, 2) + + +def apply_offsets(face_coordinates, offsets): + x, y, width, height = face_coordinates + x_off, y_off = offsets + return (x - x_off, x + width + x_off, y - y_off, y + height + y_off) + + +def draw_text(coordinates, image_array, text, color, x_offset=0, y_offset=0, + font_scale=2, thickness=2): + x, y = coordinates[:2] + cv2.putText(image_array, text, (x + x_offset, y + y_offset), + cv2.FONT_HERSHEY_SIMPLEX, + font_scale, color, thickness, cv2.LINE_AA) + + +def get_colors(num_classes): + colors = plt.cm.hsv(np.linspace(0, 1, num_classes)).tolist() + colors = np.asarray(colors) * 255 + return colors diff --git a/Class/detection/utils/preprocessor.py b/Class/detection/utils/preprocessor.py new file mode 100644 index 0000000..72953f5 --- /dev/null +++ b/Class/detection/utils/preprocessor.py @@ -0,0 +1,29 @@ +import numpy as np +from PIL import Image +import numpy as np +from imageio import imread + +def preprocess_input(x, v2=True): + x = x.astype('float32') + x = x / 255.0 + if v2: + x = x - 0.5 + x = x * 2.0 + return x + + +def _imread(image_name): + return imread(image_name) + + +def _imresize(image_array, size): + return np.array(Image.fromarray(image_array).resize(size)) + # return imresize(image_array, size) + + +def to_categorical(integer_classes, num_classes=2): + integer_classes = np.asarray(integer_classes, dtype='int') + num_samples = integer_classes.shape[0] + categorical = np.zeros((num_samples, num_classes)) + categorical[np.arange(num_samples), integer_classes] = 1 + return categorical diff --git a/Class/detection/utils/visualizer.py b/Class/detection/utils/visualizer.py new file mode 100644 index 0000000..aa32550 --- /dev/null +++ b/Class/detection/utils/visualizer.py @@ -0,0 +1,177 @@ +import numpy as np +import matplotlib.cm as cm +from mpl_toolkits.axes_grid1 import make_axes_locatable +import matplotlib.pyplot as plt +import numpy.ma as ma + + +def make_mosaic(images, num_rows, num_cols, border=1, class_names=None): + num_images = len(images) + image_shape = images.shape[1:] + mosaic = ma.masked_all( + (num_rows * image_shape[0] + (num_rows - 1) * border, + num_cols * image_shape[1] + (num_cols - 1) * border), + dtype=np.float32) + paddedh = image_shape[0] + border + paddedw = image_shape[1] + border + for image_arg in range(num_images): + row = int(np.floor(image_arg / num_cols)) + col = image_arg % num_cols + image = np.squeeze(images[image_arg]) + image_shape = image.shape + mosaic[row * paddedh:row * paddedh + image_shape[0], + col * paddedw:col * paddedw + image_shape[1]] = image + return mosaic + + +def make_mosaic_v2(images, num_mosaic_rows=None, + num_mosaic_cols=None, border=1): + images = np.squeeze(images) + num_images, image_pixels_rows, image_pixels_cols = images.shape + if num_mosaic_rows is None and num_mosaic_cols is None: + box_size = int(np.ceil(np.sqrt(num_images))) + num_mosaic_rows = num_mosaic_cols = box_size + num_mosaic_pixel_rows = num_mosaic_rows * (image_pixels_rows + border) + num_mosaic_pixel_cols = num_mosaic_cols * (image_pixels_cols + border) + mosaic = np.empty(shape=(num_mosaic_pixel_rows, num_mosaic_pixel_cols)) + mosaic_col_arg = 0 + mosaic_row_arg = 0 + for image_arg in range(num_images): + if image_arg % num_mosaic_cols == 0 and image_arg != 0: + mosaic_col_arg = mosaic_col_arg + 1 + mosaic_row_arg = 0 + x0 = image_pixels_cols * (mosaic_row_arg) + x1 = image_pixels_cols * (mosaic_row_arg + 1) + y0 = image_pixels_rows * (mosaic_col_arg) + y1 = image_pixels_rows * (mosaic_col_arg + 1) + image = images[image_arg] + mosaic[y0:y1, x0:x1] = image + mosaic_row_arg = mosaic_row_arg + 1 + return mosaic + + +def pretty_imshow(axis, data, vmin=None, vmax=None, cmap=None): + if cmap is None: + cmap = cm.jet + if vmin is None: + vmin = data.min() + if vmax is None: + vmax = data.max() + cax = None + divider = make_axes_locatable(axis) + cax = divider.append_axes('right', size='5%', pad=0.05) + image = axis.imshow(data, vmin=vmin, vmax=vmax, + interpolation='nearest', cmap=cmap) + plt.colorbar(image, cax=cax) + + +def normal_imshow(axis, data, vmin=None, vmax=None, + cmap=None, axis_off=True): + if cmap is None: + cmap = cm.jet + if vmin is None: + vmin = data.min() + if vmax is None: + vmax = data.max() + image = axis.imshow(data, vmin=vmin, vmax=vmax, + interpolation='nearest', cmap=cmap) + if axis_off: + plt.axis('off') + return image + + +def display_image(face, class_vector=None, + class_decoder=None, pretty=False): + if class_vector is not None and class_decoder is None: + raise Exception('Provide class decoder') + face = np.squeeze(face) + color_map = None + if len(face.shape) < 3: + color_map = 'gray' + plt.figure() + if class_vector is not None: + class_arg = np.argmax(class_vector) + class_name = class_decoder[class_arg] + plt.title(class_name) + if pretty: + pretty_imshow(plt.gca(), face, cmap=color_map) + else: + plt.imshow(face, color_map) + + +def draw_mosaic(data, num_rows, num_cols, class_vectors=None, + class_decoder=None, cmap='gray'): + + if class_vectors is not None and class_decoder is None: + raise Exception('Provide class decoder') + + figure, axis_array = plt.subplots(num_rows, num_cols) + figure.set_size_inches(8, 8, forward=True) + titles = [] + if class_vectors is not None: + for vector_arg in range(len(class_vectors)): + class_arg = np.argmax(class_vectors[vector_arg]) + class_name = class_decoder[class_arg] + titles.append(class_name) + + image_arg = 0 + for row_arg in range(num_rows): + for col_arg in range(num_cols): + image = data[image_arg] + image = np.squeeze(image) + axis_array[row_arg, col_arg].axis('off') + axis_array[row_arg, col_arg].imshow(image, cmap=cmap) + axis_array[row_arg, col_arg].set_title(titles[image_arg]) + image_arg = image_arg + 1 + plt.tight_layout() + + +if __name__ == '__main__': + # from utils.data_manager import DataManager + from utils.utils import get_labels + from keras.models import load_model + import pickle + + # dataset_name = 'fer2013' + # model_path = '../trained_models/emotion_models/simple_CNN.985-0.66.hdf5' + dataset_name = 'fer2013' + class_decoder = get_labels(dataset_name) + # data_manager = DataManager(dataset_name) + # faces, emotions = data_manager.get_data() + faces = pickle.load(open('faces.pkl', 'rb')) + emotions = pickle.load(open('emotions.pkl', 'rb')) + pretty_imshow(plt.gca(), make_mosaic(faces[:4], 2, 2), cmap='gray') + plt.show() + + """ + image_arg = 0 + face = faces[image_arg:image_arg + 1] + emotion = emotions[image_arg:image_arg + 1] + display_image(face, emotion, class_decoder) + plt.show() + + normal_imshow(plt.gca(), make_mosaic(faces[:4], 3, 3), cmap='gray') + plt.show() + + draw_mosaic(faces, 2, 2, emotions, class_decoder) + plt.show() + + """ + model = load_model('../trained_models/emotion_models/simple_CNN.985-0.66.hdf5') + conv1_weights = model.layers[2].get_weights() + kernel_conv1_weights = conv1_weights[0] + kernel_conv1_weights = np.squeeze(kernel_conv1_weights) + kernel_conv1_weights = np.rollaxis(kernel_conv1_weights, 2, 0) + kernel_conv1_weights = np.expand_dims(kernel_conv1_weights, -1) + num_kernels = kernel_conv1_weights.shape[0] + box_size = int(np.ceil(np.sqrt(num_kernels))) + print('Box size:', box_size) + + print('Kernel shape', kernel_conv1_weights.shape) + plt.figure(figsize=(15, 15)) + plt.title('conv1 weights') + pretty_imshow( + plt.gca(), + make_mosaic(kernel_conv1_weights, box_size, box_size), + cmap=cm.binary) + plt.show() diff --git a/Class/detection/video_emotion_color_demo.py b/Class/detection/video_emotion_color_demo.py new file mode 100644 index 0000000..aa115a6 --- /dev/null +++ b/Class/detection/video_emotion_color_demo.py @@ -0,0 +1,105 @@ +from statistics import mode + +import cv2 +from keras.models import load_model +import numpy as np + +from utils.datasets import get_labels +from utils.inference import detect_faces +from utils.inference import draw_text +from utils.inference import draw_bounding_box +from utils.inference import load_detection_model +from utils.preprocessor import preprocess_input + +# parameters for loading data and images +detection_model_path = './data/data_opencv/res10_300x300_ssd_iter_140000.caffemodel' +emotion_model_path = 'data/trained_models/emotion_models/fer2013_mini_XCEPTION.102-0.66.hdf5' +emotion_labels = get_labels('fer2013') + +# hyper-parameters for bounding boxes shape +frame_window = 10 +emotion_offsets = (20, 40) + +# loading models +face_detection = load_detection_model(detection_model_path) +emotion_classifier = load_model(emotion_model_path, compile=False) + +# getting input model shapes for inference +emotion_target_size = emotion_classifier.input_shape[1:3] + +# starting lists for calculating modes +emotion_window = [] + +# starting video streaming +cv2.namedWindow('window_frame') +video_capture = cv2.VideoCapture(0) +while True: + bgr_image = video_capture.read()[1] + gray_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) + rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB) + faces = detect_faces(face_detection, bgr_image) + (h, w) = bgr_image.shape[:2] + + # for face_coordinates in faces: + for i in range(0, faces.shape[2]): + confidence = faces[0, 0, i, 2] + if confidence < 0.5: + continue + + box = faces[0, 0, i, 3:7] * np.array([w, h, w, h]) + (startX, startY, endX, endY) = box.astype("int") + x_off, y_off = emotion_offsets + x1 = startX - x_off + x2 = endX + x_off + y1 = startY - y_off + y2 = endY + y_off + + face_coordinates=np.array([startX, startY, endX-startX, endY-startY]) + + # x1, x2, y1, y2 = apply_offsets(face_coordinates, emotion_offsets) + gray_face = gray_image[y1:y2, x1:x2] + try: + gray_face = cv2.resize(gray_face, (emotion_target_size)) + except: + continue + + gray_face = preprocess_input(gray_face, True) + gray_face = np.expand_dims(gray_face, 0) + gray_face = np.expand_dims(gray_face, -1) + emotion_prediction = emotion_classifier.predict(gray_face) + emotion_probability = np.max(emotion_prediction) + emotion_label_arg = np.argmax(emotion_prediction) + emotion_text = emotion_labels[emotion_label_arg] + emotion_window.append(emotion_text) + + if len(emotion_window) > frame_window: + emotion_window.pop(0) + try: + emotion_mode = mode(emotion_window) + except: + continue + + if emotion_text == 'angry': + color = emotion_probability * np.asarray((255, 0, 0)) + elif emotion_text == 'sad': + color = emotion_probability * np.asarray((0, 0, 255)) + elif emotion_text == 'happy': + color = emotion_probability * np.asarray((255, 255, 0)) + elif emotion_text == 'surprise': + color = emotion_probability * np.asarray((0, 255, 255)) + else: + color = emotion_probability * np.asarray((0, 255, 0)) + + color = color.astype(int) + color = color.tolist() + + draw_bounding_box(face_coordinates, rgb_image, color) + draw_text(face_coordinates, rgb_image, emotion_mode, + color, 0, -45, 1, 1) + + bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR) + cv2.imshow('window_frame', bgr_image) + if cv2.waitKey(1) & 0xFF == ord('q'): + break +video_capture.release() +cv2.destroyAllWindows()