Source code for expttools.analysis

import pandas as pd
import numpy as np
import os
from collections import namedtuple

def results_by_param_name( csv_file, param_name):
    '''
    This function takes as input the path to the csv file, and the name of the parameter you're searching for
    It then reads the csv file and gathers the values
    for the given parameter, adds them to a list and returns that list
    This function has the ability to only gather specific parameter values depending on
    which parameters get passed in
    '''

    value_list = []

    # reads the values of the csv file and gets the size
    df = pd.read_csv(csv_file)
    file_size = len(file)

    value_list = df[param_name].values

    return value_list

def results_to_df(params, csv_path):
    '''
    This function takes as input a list of parameters from the column headers
    and the path a the csv file. It then creates a data frame from the
    given csv file and parameters that were passed in.
    This function returns a singular data frame
    '''

    dict = {}

    # loops over the parameters to get the values
    for current_param in params:
        result_list = results_by_param_name(csv_path, current_param)
        dict[current_param] = result_list

    # create data frame from a dictionary
    my_df = pd.DataFrame(dict)

    return my_df

def add_info(params,result_df):
    '''
    '''
    # convert df and then repeat to be same number of rows
    info_df_row = params.to_frame().T.reset_index().drop(columns=['index'])

    n_results = len(result_df)
    if n_results>1:
        info_df = pd.concat([info_df_row]*n_results).reset_index().drop(columns=['index'])
    else:
        info_df = info_df_row

    # stack side by side
    return pd.concat([info_df,result_df],axis=1)





[docs]
class ExperimentResult():         

    def __init__(self, top_level_dir, is_info_df_list=False):
        '''
        load all results with params

        Parameters

        ----------
        top_level_dir : string or path
            where to load data from
        is_info_df_list : bool
            If true, include parameter info in call to get_result_df_list
        '''

        # Find most recent directory used
        if not os.path.exists(top_level_dir):
            base_path, pattern = os.path.split(top_level_dir)
            possible_dirs = [dir for dir in os.listdir(base_path) if pattern in dir]
            last = sorted(possible_dirs)[-1]
            top_level_dir = os.path.join(base_path,last)

        #Creates a file using 
        dir_path = lambda run,file: os.path.join(top_level_dir,run,file) 

        #Validates that we've created the file successfully
        successful = lambda run: os.path.exists(dir_path(run,'result.csv'))

        #list comprehension, creates a list using for loop architecture
        #os.listdir - 'ls' in bash
        #this call simply checks to see if the 'result.csv' file was created, and adds it to the list if so
        all_runs = [run for run in os.listdir(top_level_dir) if os.path.isdir(os.path.join(top_level_dir,run))]
        expt_runs = [run for run in all_runs if successful(run)] 

        self.res_dirs = [os.path.join(top_level_dir,run) for run in expt_runs]
        fail_runs = [run for run in all_runs if not(successful(run))]

        read_info = lambda res: pd.read_csv(dir_path(res,'info.csv'),
                                    header=None,index_col=0).squeeze("columns")
        
        read_result = lambda res: pd.read_csv(dir_path(res,'result.csv'))
        read_fail = lambda res: pd.read_csv(dir_path(res,'failed.csv'))

        self.result_dict = {run:{'params':read_info(run),
                                'result_df':read_result(run)}
                            for run in expt_runs}
        
        self.is_info_df_list = is_info_df_list


        self.fails = {run:read_fail(run) for run in fail_runs}

    def get_fail_names(self):
        return list(self.fails.keys())

    def get_result_names(self):
        return list(self.result_dict.keys())


[docs]
    def get_result_dirs(self):
        '''
        list of paths 
        '''
        return self.res_dirs

    

[docs]
    def stack_results(self):
        '''
        stack all results with
        '''
        return pd.concat([add_info(**result) for run, result in self.result_dict.items()])

    

[docs]
    def get_result_df_list(self):
        '''
        return a list of the result dfs with
        '''
        if self.is_info_df_list:
            return [add_info(**result) for run, result in self.result_dict.items()]
        else: 
            return [result['result_df'] for run, result in self.result_dict.items()]



[docs]
    def get_named_tuples(self):
        '''
        Returns
        -------
        result_tuple_list : a list of named tuples (name, dataframe, and info) as attributes
        '''
        result_tuple = namedtuple('result_tuple', ['name', 'dataframe', 'info'])

        result_tuple_list = []

        data = []

        for param, result in self.result_dict.items():
            
            this_name = result['params']['dir_name']
            this_result_df = result['result_df']
            this_info_df = result['params']
            this_tuple = {'name' : this_name, 'dataframe' : this_result_df, 'info': this_info_df}
            data.append(this_tuple)

        for item in data:
            this_tuple = result_tuple(name=item['name'], dataframe=item['dataframe'], info=item['info'])
            result_tuple_list.append(this_tuple)

        return result_tuple_list




[docs]
    def get_info_df(self):
        '''
        create a dataframe of only the parameters
        '''

        return pd.concat([res['params'].to_frame().T for run, res
                                    in self.result_dict.items()], axis=0)




[docs]
    def get_fail_df(self):
        '''
        stack fails into a dataframe
        '''
        return pd.concat(list(self.fails.values))