Source code for expttools.analysis
import pandas as pd
import numpy as np
import os
from collections import namedtuple
def results_by_param_name( csv_file, param_name):
'''
This function takes as input the path to the csv file, and the name of the parameter you're searching for
It then reads the csv file and gathers the values
for the given parameter, adds them to a list and returns that list
This function has the ability to only gather specific parameter values depending on
which parameters get passed in
'''
value_list = []
# reads the values of the csv file and gets the size
df = pd.read_csv(csv_file)
file_size = len(file)
value_list = df[param_name].values
return value_list
def results_to_df(params, csv_path):
'''
This function takes as input a list of parameters from the column headers
and the path a the csv file. It then creates a data frame from the
given csv file and parameters that were passed in.
This function returns a singular data frame
'''
dict = {}
# loops over the parameters to get the values
for current_param in params:
result_list = results_by_param_name(csv_path, current_param)
dict[current_param] = result_list
# create data frame from a dictionary
my_df = pd.DataFrame(dict)
return my_df
def add_info(params,result_df):
'''
'''
# convert df and then repeat to be same number of rows
info_df_row = params.to_frame().T.reset_index().drop(columns=['index'])
n_results = len(result_df)
if n_results>1:
info_df = pd.concat([info_df_row]*n_results).reset_index().drop(columns=['index'])
else:
info_df = info_df_row
# stack side by side
return pd.concat([info_df,result_df],axis=1)
[docs]
class ExperimentResult():
def __init__(self, top_level_dir, is_info_df_list=False):
'''
load all results with params
Parameters
----------
top_level_dir : string or path
where to load data from
is_info_df_list : bool
If true, include parameter info in call to get_result_df_list
'''
# Find most recent directory used
if not os.path.exists(top_level_dir):
base_path, pattern = os.path.split(top_level_dir)
possible_dirs = [dir for dir in os.listdir(base_path) if pattern in dir]
last = sorted(possible_dirs)[-1]
top_level_dir = os.path.join(base_path,last)
#Creates a file using
dir_path = lambda run,file: os.path.join(top_level_dir,run,file)
#Validates that we've created the file successfully
successful = lambda run: os.path.exists(dir_path(run,'result.csv'))
#list comprehension, creates a list using for loop architecture
#os.listdir - 'ls' in bash
#this call simply checks to see if the 'result.csv' file was created, and adds it to the list if so
all_runs = [run for run in os.listdir(top_level_dir) if os.path.isdir(os.path.join(top_level_dir,run))]
expt_runs = [run for run in all_runs if successful(run)]
self.res_dirs = [os.path.join(top_level_dir,run) for run in expt_runs]
fail_runs = [run for run in all_runs if not(successful(run))]
read_info = lambda res: pd.read_csv(dir_path(res,'info.csv'),
header=None,index_col=0).squeeze("columns")
read_result = lambda res: pd.read_csv(dir_path(res,'result.csv'))
read_fail = lambda res: pd.read_csv(dir_path(res,'failed.csv'))
self.result_dict = {run:{'params':read_info(run),
'result_df':read_result(run)}
for run in expt_runs}
self.is_info_df_list = is_info_df_list
self.fails = {run:read_fail(run) for run in fail_runs}
def get_fail_names(self):
return list(self.fails.keys())
def get_result_names(self):
return list(self.result_dict.keys())
[docs]
def get_result_dirs(self):
'''
list of paths
'''
return self.res_dirs
[docs]
def stack_results(self):
'''
stack all results with
'''
return pd.concat([add_info(**result) for run, result in self.result_dict.items()])
[docs]
def get_result_df_list(self):
'''
return a list of the result dfs with
'''
if self.is_info_df_list:
return [add_info(**result) for run, result in self.result_dict.items()]
else:
return [result['result_df'] for run, result in self.result_dict.items()]
[docs]
def get_named_tuples(self):
'''
Returns
-------
result_tuple_list : a list of named tuples (name, dataframe, and info) as attributes
'''
result_tuple = namedtuple('result_tuple', ['name', 'dataframe', 'info'])
result_tuple_list = []
data = []
for param, result in self.result_dict.items():
this_name = result['params']['dir_name']
this_result_df = result['result_df']
this_info_df = result['params']
this_tuple = {'name' : this_name, 'dataframe' : this_result_df, 'info': this_info_df}
data.append(this_tuple)
for item in data:
this_tuple = result_tuple(name=item['name'], dataframe=item['dataframe'], info=item['info'])
result_tuple_list.append(this_tuple)
return result_tuple_list
[docs]
def get_info_df(self):
'''
create a dataframe of only the parameters
'''
return pd.concat([res['params'].to_frame().T for run, res
in self.result_dict.items()], axis=0)
[docs]
def get_fail_df(self):
'''
stack fails into a dataframe
'''
return pd.concat(list(self.fails.values))