add smile detection

2020-07-01 22:57:04 +08:00
parent 96157ff19e
commit 84acd2f1b2
13 changed files with 934 additions and 0 deletions
--- a/Class/detection/utils/data_augmentation.py
+++ b/Class/detection/utils/data_augmentation.py
@@ -0,0 +1,235 @@
+import numpy as np
+from random import shuffle
+from .preprocessor import preprocess_input
+from .preprocessor import _imread as imread
+from .preprocessor import _imresize as imresize
+from .preprocessor import to_categorical
+import scipy.ndimage as ndi
+import cv2
+
+
+class ImageGenerator(object):
+    """ Image generator with saturation, brightness, lighting, contrast,
+    horizontal flip and vertical flip transformations. It supports
+    bounding boxes coordinates.
+
+    TODO:
+        - Finish support for not using bounding_boxes
+            - Random crop
+            - Test other transformations
+    """
+    def __init__(self, ground_truth_data, batch_size, image_size,
+                 train_keys, validation_keys,
+                 ground_truth_transformer=None,
+                 path_prefix=None,
+                 saturation_var=0.5,
+                 brightness_var=0.5,
+                 contrast_var=0.5,
+                 lighting_std=0.5,
+                 horizontal_flip_probability=0.5,
+                 vertical_flip_probability=0.5,
+                 do_random_crop=False,
+                 grayscale=False,
+                 zoom_range=[0.75, 1.25],
+                 translation_factor=.3):
+
+        self.ground_truth_data = ground_truth_data
+        self.ground_truth_transformer = ground_truth_transformer
+        self.batch_size = batch_size
+        self.path_prefix = path_prefix
+        self.train_keys = train_keys
+        self.validation_keys = validation_keys
+        self.image_size = image_size
+        self.grayscale = grayscale
+        self.color_jitter = []
+        if saturation_var:
+            self.saturation_var = saturation_var
+            self.color_jitter.append(self.saturation)
+        if brightness_var:
+            self.brightness_var = brightness_var
+            self.color_jitter.append(self.brightness)
+        if contrast_var:
+            self.contrast_var = contrast_var
+            self.color_jitter.append(self.contrast)
+        self.lighting_std = lighting_std
+        self.horizontal_flip_probability = horizontal_flip_probability
+        self.vertical_flip_probability = vertical_flip_probability
+        self.do_random_crop = do_random_crop
+        self.zoom_range = zoom_range
+        self.translation_factor = translation_factor
+
+    def _do_random_crop(self, image_array):
+        """IMPORTANT: random crop only works for classification since the
+        current implementation does no transform bounding boxes"""
+        height = image_array.shape[0]
+        width = image_array.shape[1]
+        x_offset = np.random.uniform(0, self.translation_factor * width)
+        y_offset = np.random.uniform(0, self.translation_factor * height)
+        offset = np.array([x_offset, y_offset])
+        scale_factor = np.random.uniform(self.zoom_range[0],
+                                         self.zoom_range[1])
+        crop_matrix = np.array([[scale_factor, 0],
+                                [0, scale_factor]])
+
+        image_array = np.rollaxis(image_array, axis=-1, start=0)
+        image_channel = [ndi.interpolation.affine_transform(image_channel,
+                         crop_matrix, offset=offset, order=0, mode='nearest',
+                         cval=0.0) for image_channel in image_array]
+
+        image_array = np.stack(image_channel, axis=0)
+        image_array = np.rollaxis(image_array, 0, 3)
+        return image_array
+
+    def do_random_rotation(self, image_array):
+        """IMPORTANT: random rotation only works for classification since the
+        current implementation does no transform bounding boxes"""
+        height = image_array.shape[0]
+        width = image_array.shape[1]
+        x_offset = np.random.uniform(0, self.translation_factor * width)
+        y_offset = np.random.uniform(0, self.translation_factor * height)
+        offset = np.array([x_offset, y_offset])
+        scale_factor = np.random.uniform(self.zoom_range[0],
+                                         self.zoom_range[1])
+        crop_matrix = np.array([[scale_factor, 0],
+                                [0, scale_factor]])
+
+        image_array = np.rollaxis(image_array, axis=-1, start=0)
+        image_channel = [ndi.interpolation.affine_transform(image_channel,
+                         crop_matrix, offset=offset, order=0, mode='nearest',
+                         cval=0.0) for image_channel in image_array]
+
+        image_array = np.stack(image_channel, axis=0)
+        image_array = np.rollaxis(image_array, 0, 3)
+        return image_array
+
+    def _gray_scale(self, image_array):
+        return image_array.dot([0.299, 0.587, 0.114])
+
+    def saturation(self, image_array):
+        gray_scale = self._gray_scale(image_array)
+        alpha = 2.0 * np.random.random() * self.brightness_var
+        alpha = alpha + 1 - self.saturation_var
+        image_array = (alpha * image_array + (1 - alpha) *
+                       gray_scale[:, :, None])
+        return np.clip(image_array, 0, 255)
+
+    def brightness(self, image_array):
+        alpha = 2 * np.random.random() * self.brightness_var
+        alpha = alpha + 1 - self.saturation_var
+        image_array = alpha * image_array
+        return np.clip(image_array, 0, 255)
+
+    def contrast(self, image_array):
+        gray_scale = (self._gray_scale(image_array).mean() *
+                      np.ones_like(image_array))
+        alpha = 2 * np.random.random() * self.contrast_var
+        alpha = alpha + 1 - self.contrast_var
+        image_array = image_array * alpha + (1 - alpha) * gray_scale
+        return np.clip(image_array, 0, 255)
+
+    def lighting(self, image_array):
+        covariance_matrix = np.cov(image_array.reshape(-1, 3) /
+                                   255.0, rowvar=False)
+        eigen_values, eigen_vectors = np.linalg.eigh(covariance_matrix)
+        noise = np.random.randn(3) * self.lighting_std
+        noise = eigen_vectors.dot(eigen_values * noise) * 255
+        image_array = image_array + noise
+        return np.clip(image_array, 0, 255)
+
+    def horizontal_flip(self, image_array, box_corners=None):
+        if np.random.random() < self.horizontal_flip_probability:
+            image_array = image_array[:, ::-1]
+            if box_corners is not None:
+                box_corners[:, [0, 2]] = 1 - box_corners[:, [2, 0]]
+        return image_array, box_corners
+
+    def vertical_flip(self, image_array, box_corners=None):
+        if (np.random.random() < self.vertical_flip_probability):
+            image_array = image_array[::-1]
+            if box_corners is not None:
+                box_corners[:, [1, 3]] = 1 - box_corners[:, [3, 1]]
+        return image_array, box_corners
+
+    def transform(self, image_array, box_corners=None):
+        shuffle(self.color_jitter)
+        for jitter in self.color_jitter:
+            image_array = jitter(image_array)
+
+        if self.lighting_std:
+            image_array = self.lighting(image_array)
+
+        if self.horizontal_flip_probability > 0:
+            image_array, box_corners = self.horizontal_flip(image_array,
+                                                            box_corners)
+
+        if self.vertical_flip_probability > 0:
+            image_array, box_corners = self.vertical_flip(image_array,
+                                                          box_corners)
+        return image_array, box_corners
+
+    def preprocess_images(self, image_array):
+        return preprocess_input(image_array)
+
+    def flow(self, mode='train'):
+            while True:
+                if mode == 'train':
+                    shuffle(self.train_keys)
+                    keys = self.train_keys
+                elif mode == 'val' or mode == 'demo':
+                    shuffle(self.validation_keys)
+                    keys = self.validation_keys
+                else:
+                    raise Exception('invalid mode: %s' % mode)
+
+                inputs = []
+                targets = []
+                for key in keys:
+                    image_path = self.path_prefix + key
+                    image_array = imread(image_path)
+                    image_array = imresize(image_array, self.image_size)
+
+                    num_image_channels = len(image_array.shape)
+                    if num_image_channels != 3:
+                        continue
+
+                    ground_truth = self.ground_truth_data[key]
+
+                    if self.do_random_crop:
+                        image_array = self._do_random_crop(image_array)
+
+                    image_array = image_array.astype('float32')
+                    if mode == 'train' or mode == 'demo':
+                        if self.ground_truth_transformer is not None:
+                            image_array, ground_truth = self.transform(
+                                                                image_array,
+                                                                ground_truth)
+                            ground_truth = (
+                                self.ground_truth_transformer.assign_boxes(
+                                                            ground_truth))
+                        else:
+                            image_array = self.transform(image_array)[0]
+
+                    if self.grayscale:
+                        image_array = cv2.cvtColor(
+                                image_array.astype('uint8'),
+                                cv2.COLOR_RGB2GRAY).astype('float32')
+                        image_array = np.expand_dims(image_array, -1)
+
+                    inputs.append(image_array)
+                    targets.append(ground_truth)
+                    if len(targets) == self.batch_size:
+                        inputs = np.asarray(inputs)
+                        targets = np.asarray(targets)
+                        # this will not work for boxes
+                        targets = to_categorical(targets)
+                        if mode == 'train' or mode == 'val':
+                            inputs = self.preprocess_images(inputs)
+                            yield self._wrap_in_dictionary(inputs, targets)
+                        if mode == 'demo':
+                            yield self._wrap_in_dictionary(inputs, targets)
+                        inputs = []
+                        targets = []
+
+    def _wrap_in_dictionary(self, image_array, targets):
+        return [{'input_1': image_array},
+                {'predictions': targets}]