Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update layout #219

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions build/lib/wiggum/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import pandas as pd
import numpy as np
import scipy.stats as stats
import itertools as itert

#from .detectors import upper_triangle_df, upper_triangle_element, isReverse,
# from .detectors import detect_simpsons_paradox


from .ranking_processing import DEFAULT_SP_DEF, trend_quality_sp

# from .ranking_processing import (mark_designed_rows, compute_angle,
# compute_slope_all, add_slope_sp, add_angle_col, get_SP_views,
# count_sp_views, add_slope_cols, add_view_count,
# add_weighted, add_view_score,get_trend_row,label_SP_rows,get_views,
# get_SP_rows)


# from .data_augmentation import add_quantile, add_all_dpgmm


from .labeled_dataframe import (LabeledDataFrame, possible_roles, var_types,
simple_type_mapper)

# trend types
from .trends import (All_Pearson, Continuous_Pearson, Spearman_Correlation,
Kendall_Correlation, Mean_Rank_Trend,Linear_Trend,
All_Linear_Trend,Binary_Pearson_Trend,Binary_Mean_Rank_Trend,
Median_Rank_Trend,Binary_Median_Rank_Trend,
Binary_Accuracy_Trend, Binary_TPR_Trend,Binary_PPV_Trend,
Binary_TNR_Trend, Binary_FDR_Trend, Binary_F1_Trend,
Binary_Error_Trend,Binary_FNR_Trend)

#tren lists
from .trends import (all_trend_types,default_binary_trends,default_distance_trends)

# __all__ = ['detect_simpsons_paradox','mark_designed_rows', 'compute_angle',
# 'add_slope_sp','compute_slope_all','add_angle_col','count_sp_views',
# 'add_slope_cols','add_view_count',
# 'add_weighted','get_subgroup_trends_1lev','add_all_dpgmm',
# 'get_subgroup_trends_2lev','add_quantile','get_correlations',
# 'add_view_score','get_trend_row','label_SP_rows','get_views',
# 'get_SP_rows']
194 changes: 194 additions & 0 deletions build/lib/wiggum/auditing_reporting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
import pandas as pd
import numpy as np
import scipy.stats as stats
import itertools as itert


class _AuditReporting():
"""
This mixin class contians methods for reproducible auditing and report generation
"""

def annotate(self,row_idx, annotate_col, comment):
'''
add text of comment to the annotate_col column of result_df in the
row_idx row

Parameters
-----------
row_idx : integer
row index to annotate
annotate_col : string
name of annotation column (may be existing or new)
comment : string or number
content to add as annotation

Returns
-------
changed row
'''

# if new annotation column, initialize it empty
if not(annotate_col in self.result_df.columns):
self.result_df[annotate_col] = ''

# add specific annotation
self.result_df.loc[row_idx,annotate_col] = comment

return self.result_df.loc[row_idx]


def filter_annotate(self,feat1 = None,feat2 = None,group_feat= None,
subgroup= None,subgroup2= None,trend_type=None,
annotate_col = None, comment = 'x'):
'''
add text of comment to the annotate_col column of result_df in the
rows specified by a filter using the trend related variables

Parameters
-----------
feat1 : str, list, or None
trend variable name or None to include all
feat2 : str, list, or None
trend variable name or None to include all
group_feat : str, list, or None
groupoby variable name or None to include all
subgroup : str, list, or None
value of groupby_feat or or None to include all
annotate_col : string
name of annotation column (may be existing or new) or none to be
auto generated from filter
comment : string or number
content to add as annotation

Returns
-------
changed rows
'''
if annotate_col is None:
filter_vars = [feat1, feat2, group_feat, subgroup, subgroup2,
trend_type]
filter_params = [var for var in filter_vars if not(var is None)]
annotate_col = '_'.join(filter_params)

filt_df = self.get_trend_rows(feat1, feat2, group_feat, subgroup,
subgroup2, trend_type)

for row in filt_df.index:
self.annotate(row, annotate_col,comment)

filt_df[annotate_col] = comment

return filt_df

def delete_annotate(self,row_idx,annotate_col):
"""
delete a comment in an annotate_col column of result_df in the
row and annotate column specified

Parameters
-----------
row_inx : integer
row index to delete
annotate_col : string
name of annotation column (should be existing already) to delete from
Returns
-------
row deleted from

"""
self.result_df.loc[row_idx,annotate_col] = ''

return self.result_df.loc[row_idx]

def get_report_table(self,report_cols,report_rows):
"""
generate a df that can be used for tables

Parameters
-----------
report_cols : list
list of integers representing column indexes for report
report_rows : list
list of integers representing row indexes for report
Returns
-------
rows and columns of the report
"""
report_df = self.result_df.iloc[report_cols,report_rows]
return report_df

def detect_annotate():

pass

def save_report_table(self,report_cols,report_rows,filename):
"""
generate a csv file to save the report

Parameters
-----------
report_cols : list
list of integers representing column indexes for report
report_rows : list
list of integers representing row indexes for report
filename : string
.csv filename to save the report to
Returns
-------
rows and columns written to the report
"""
# call above and then save to csv
report_df = self.get_report_table(report_cols,report_rows)
report_df.to_csv(filename)
return



def count_values_above_thres(self,column,threshold):
"""
count all the values in a column above a certain threshold

Parameters
-----------
column : string
name of column to peform thresholding on
threshold : float
threshold that should be exceeded
Returns
-----------
number of values above the threshold
"""
valueCount = len(self.result_df[self.result_df[column] > threshold])
return valueCount
def result_df_stat_summary(self,filename):
"""
generate and save to file(s) summary statistics on the results

Parameters
-----------
filename : string
the filename of the saved summary files

Returns
-------


"""

# generate state tables and save each individually
state_table = self.result_df
state_table.to_csv(filename + ".csv")

# generate narrative for a text file summary
trendNum = state_table['trend_type'].value_counts()
dataGroups = state_table['subgroup'].value_counts()
new_df = self.result_df[((self.result_df["agg_trend"] < 0) == (self.result_df["subgroup_trend"] < 0)) == False]
total_rows = len(new_df.index)
# thresholds that you would like to add to the report
valueCount = self.count_values_above_thres('distance',.5)
valueCount2 = self.count_values_above_thres('distance',.2)
valueCount3 = self.count_values_above_thres('distance',.3)
outputFile = open(filename + ".txt",'w')
outputFile.write("Summary Statistics for " + filename + "\n" + "\n" +"Subgroups Present: \n" + dataGroups.to_string() +"\n\nTrends Detected: \n"+ trendNum.to_string() + "\n" + "\nNumber of Reversals found: " + str(total_rows) + "\n" +"\nDistance above .2 threshold: " + str(valueCount2)+"\nDistance above .3 threshold: " + str(valueCount)+ "\nDistance above .5 threshold: " + str(valueCount))
return
Loading