Source code for notebook

# (C) 2019 Baris Ozmen <hbaristr@gmail.com>

import pandas as pd
import numpy as np


[docs]def get_folder_path(path): last = path.split("/")[-1] return path.replace(last, "")
[docs]class Notebook: def __init__(self, config): self.df = pd.DataFrame() self.store_path = config["notebook_path"]
[docs] def record(self, trial_no, trial_hyperparams, sample_no, reward, history): """Records one complete training of child model Args: trial_no (int): no of trial (iteration) of training trial_hyperparams (list) : list of data augmentation hyperparameters used for training sample_no (int): sample no among training with same hyperparameters reward (float): reward is basically last n validation accuracy before overfitting history (dict): history returned by keras.model.fit() """ new_df = pd.DataFrame(history) new_df["trial_no"] = trial_no new_df["aug1_type"] = trial_hyperparams[0] new_df["aug1_magnitude"] = trial_hyperparams[1] new_df["aug2_type"] = trial_hyperparams[2] new_df["aug2_magnitude"] = trial_hyperparams[3] new_df["portion"] = trial_hyperparams[4] new_df["sample_no"] = sample_no new_df["mean_late_val_acc"] = reward new_df = new_df.round(3) # round all float values to 3 decimals after point new_df["epoch"] = np.arange(1, len(new_df) + 1) self.df = pd.concat([self.df, new_df])
[docs] def save(self): self.df.to_csv(self.store_path, index=False)
[docs] def add_records_from(self, notebook_path): notebook_df = pd.read_csv(notebook_path, comment="#") self.df = pd.concat([self.df, notebook_df])
[docs] def get_top_policies(self, k): """Prints and returns top-k policies Policies are ordered by their expected accuracy increas Args: k (int) top-k Returns pandas.DataFrame: top-k policies as dataframe """ trial_avg_val_acc_df = ( self.df.drop_duplicates(["trial_no", "sample_no"]) .groupby("trial_no") .mean()["mean_late_val_acc"] .reset_index() )[["trial_no", "mean_late_val_acc"]] x_df = pd.merge( self.df.drop(columns=["mean_late_val_acc"]), trial_avg_val_acc_df, on="trial_no", how="left", ) x_df = x_df.sort_values("mean_late_val_acc", ascending=False) baseline_val_acc = x_df[x_df["portion"] == 0.0]["mean_late_val_acc"].values[0] x_df["expected_accuracy_increase"] = ( x_df["mean_late_val_acc"] - baseline_val_acc ) self.top_df = x_df.drop_duplicates(["trial_no"]).sort_values( "mean_late_val_acc", ascending=False )[:k] SELECT = [ "trial_no", "aug1_type", "aug1_magnitude", "aug2_type", "aug2_magnitude", "portion", "mean_late_val_acc", "expected_accuracy_increase", ] self.top_df = self.top_df[SELECT] print(f"top-{k} policies:", k) print(self.top_df) return self.top_df
[docs] def output_top_policies(self): k = len(self.top_df) out_path = get_folder_path(self.store_path) + f"top{k}_policies.csv" self.top_df.to_csv(out_path, index=False) print(f"Top policies are saved to {out_path}")