Source code for deepaugment.build_features

# (C) 2019 Baris Ozmen <hbaristr@gmail.com>

import sys
import numpy as np
import keras


[docs]class DataOp:
[docs] @staticmethod def load(dataset_name): """Loads dataset from keras and returns a sample out of it Args: dataset_name (str): training_set_size (int): validation_set_size (int): Returns: dict: data, with keys X_train, Y_train, X_val, Y_val list: input shape """ if hasattr(keras.datasets, dataset_name): (x_train, y_train), (x_val, y_val) = getattr( keras.datasets, dataset_name ).load_data() else: sys.exit(f"Unknown dataset {dataset_name}") X = np.concatenate([x_train, x_val]) y = np.concatenate([y_train, y_val]) input_shape = x_train.shape[1:] return X, y, input_shape
[docs] @staticmethod def preprocess_normal(data): # normalize images data["X_train"] = data["X_train"].astype("float32") / 255 data["X_val"] = data["X_val"].astype("float32") / 255 # convert labels to categorical data["y_train"] = keras.utils.to_categorical(data["y_train"]) data["y_val"] = keras.utils.to_categorical(data["y_val"]) return data
[docs] @staticmethod def split_train_val_sets(X, y, train_set_size, val_set_size): """Splits given images randomly into `train` and `val_seed` groups val_seed -> is validation seed dataset, from where validation sets are sampled Args: X (numpy.array): y (numpy.array): train_set_size (int): val_set_size (int): return: dict: dict with keys `X_train`, `y_train`, `X_val_seed`, `y_val_seed` """ if train_set_size == None: print(f"Using all training images") train_set_size = len(X) - val_set_size else: print(f"Using {train_set_size} training images") # reduce training dataset ix = np.random.choice(len(X), train_set_size, False) X_train = X[ix] y_train = y[ix] other_ix = set(np.arange(len(X))).difference(set(ix)) other_ix = list(other_ix) X_val_seed = X[other_ix] y_val_seed = y[other_ix] data = { "X_train": X_train, "y_train": y_train, "X_val_seed": X_val_seed, "y_val_seed": y_val_seed, } return data
[docs] @staticmethod def preprocess(X, y, train_set_size, val_set_size=1000): """Preprocess images by: 1. normalize to 0-1 range (divide by 255) 2. convert labels to categorical) Args: X (numpy.array): y (numpy.array): train_set_size (int): val_set_size (int): Returns: dict: preprocessed data """ data = DataOp.split_train_val_sets(X, y, train_set_size, val_set_size) # normalize images data["X_train"] = data["X_train"].astype("float32") / 255 data["X_val_seed"] = data["X_val_seed"].astype("float32") / 255 # convert labels to categorical data["y_train"] = keras.utils.to_categorical(data["y_train"]) data["y_val_seed"] = keras.utils.to_categorical(data["y_val_seed"]) return data
[docs] @staticmethod def sample_validation_set(data): ix = np.random.choice(range(len(data["X_val_seed"])), 1000, False) X_val = data["X_val_seed"][ix].copy() y_val = data["y_val_seed"][ix].copy() return X_val, y_val
[docs] @staticmethod def find_num_classes(data): return data["y_train"].shape[1]