Source code for notebook

# (C) 2019 Baris Ozmen <hbaristr@gmail.com>

import pandas as pd
import numpy as np


[docs]def get_folder_path(path):
    last = path.split("/")[-1]
    return path.replace(last, "")


[docs]class Notebook:
    def __init__(self, config):
        self.df = pd.DataFrame()
        self.store_path = config["notebook_path"]

[docs]    def record(self, trial_no, trial_hyperparams, sample_no, reward, history):
        """Records one complete training of child model

        Args:
            trial_no (int): no of trial (iteration) of training
            trial_hyperparams (list) : list of data augmentation hyperparameters used for training
            sample_no (int): sample no among training with same hyperparameters
            reward (float): reward is basically last n validation accuracy before overfitting
            history (dict): history returned by keras.model.fit()
        """
        new_df = pd.DataFrame(history)
        new_df["trial_no"] = trial_no
        new_df["aug1_type"] = trial_hyperparams[0]
        new_df["aug1_magnitude"] = trial_hyperparams[1]
        new_df["aug2_type"] = trial_hyperparams[2]
        new_df["aug2_magnitude"] = trial_hyperparams[3]
        new_df["portion"] = trial_hyperparams[4]
        new_df["sample_no"] = sample_no
        new_df["mean_late_val_acc"] = reward
        new_df = new_df.round(3)  # round all float values to 3 decimals after point
        new_df["epoch"] = np.arange(1, len(new_df) + 1)
        self.df = pd.concat([self.df, new_df])

[docs]    def save(self):
        self.df.to_csv(self.store_path, index=False)

[docs]    def add_records_from(self, notebook_path):
        notebook_df = pd.read_csv(notebook_path, comment="#")
        self.df = pd.concat([self.df, notebook_df])

[docs]    def get_top_policies(self, k):
        """Prints and returns top-k policies

        Policies are ordered by their expected accuracy increas
        Args:
            k (int) top-k
        Returns
            pandas.DataFrame: top-k policies as dataframe
        """
        trial_avg_val_acc_df = (
            self.df.drop_duplicates(["trial_no", "sample_no"])
            .groupby("trial_no")
            .mean()["mean_late_val_acc"]
            .reset_index()
        )[["trial_no", "mean_late_val_acc"]]

        x_df = pd.merge(
            self.df.drop(columns=["mean_late_val_acc"]),
            trial_avg_val_acc_df,
            on="trial_no",
            how="left",
        )

        x_df = x_df.sort_values("mean_late_val_acc", ascending=False)

        baseline_val_acc = x_df[x_df["portion"] == 0.0]["mean_late_val_acc"].values[0]

        x_df["expected_accuracy_increase"] = (
            x_df["mean_late_val_acc"] - baseline_val_acc
        )

        self.top_df = x_df.drop_duplicates(["trial_no"]).sort_values(
            "mean_late_val_acc", ascending=False
        )[:k]

        SELECT = [
            "trial_no",
            "aug1_type",
            "aug1_magnitude",
            "aug2_type",
            "aug2_magnitude",
            "portion",
            "mean_late_val_acc",
            "expected_accuracy_increase",
        ]
        self.top_df = self.top_df[SELECT]

        print(f"top-{k} policies:", k)
        print(self.top_df)

        return self.top_df

[docs]    def output_top_policies(self):
        k = len(self.top_df)
        out_path = get_folder_path(self.store_path) + f"top{k}_policies.csv"
        self.top_df.to_csv(out_path, index=False)
        print(f"Top policies are saved to {out_path}")
Source code for notebook

deepaugment

Navigation

Related Topics