From 7bac388c535fd4af1b7d647442acf736df381dd3 Mon Sep 17 00:00:00 2001 From: sajadabvi Date: Thu, 25 Apr 2024 12:29:27 -0400 Subject: [PATCH] scripts related to MVGC --- gunfolds/conversions.py | 22 + .../MVGC_F1_improvment_undersampling.py | 181 +++++ gunfolds/scripts/MVGC_expo_impo.py | 2 +- gunfolds/scripts/MVGC_undersampeld_GT.py | 460 +++++++++++ .../Ruben_PCMCI_fix_bidir_datagraph.py | 727 ++++++++++++++++++ gunfolds/scripts/Ruben_datagraphs.py | 60 +- gunfolds/scripts/datasets/simple_networks.py | 67 ++ gunfolds/scripts/my_functions.py | 92 +++ gunfolds/solvers/clingo_rasl.py | 2 +- 9 files changed, 1589 insertions(+), 24 deletions(-) create mode 100644 gunfolds/scripts/MVGC_F1_improvment_undersampling.py create mode 100644 gunfolds/scripts/MVGC_undersampeld_GT.py create mode 100644 gunfolds/scripts/Ruben_PCMCI_fix_bidir_datagraph.py create mode 100644 gunfolds/scripts/datasets/simple_networks.py create mode 100644 gunfolds/scripts/my_functions.py diff --git a/gunfolds/conversions.py b/gunfolds/conversions.py index 9edcd511..09f52ccd 100644 --- a/gunfolds/conversions.py +++ b/gunfolds/conversions.py @@ -334,6 +334,28 @@ def vec2g(v, n): A, B = vec2adj(v, n) return adjs2graph(A, B) +def Glag2CG(results): + """Converts lag graph format to gunfolds graph format, + and A and B matrices representing directed and bidirected edges weights. + + Args: + results (dict): A dictionary containing: + - 'graph': A 3D NumPy array of shape [N, N, 2] representing the graph structure. + - 'val_matrix': A NumPy array of shape [N, N, 2] storing edge weights. + + Returns: + tuple: (graph_dict, A_matrix, B_matrix) + """ + + graph_array = results['graph'] + bidirected_edges = np.where(graph_array == 'o-o', 1, 0).astype(int) + directed_edges = np.where(graph_array == '-->', 1, 0).astype(int) + + graph_dict = adjs2graph(np.transpose(directed_edges[:, :, 1]), np.transpose((bidirected_edges[:, :, 0]))) + A_matrix = results['val_matrix'][:, :, 1] + B_matrix = results['val_matrix'][:, :, 0] + + return graph_dict, A_matrix, B_matrix def rate(u): """ diff --git a/gunfolds/scripts/MVGC_F1_improvment_undersampling.py b/gunfolds/scripts/MVGC_F1_improvment_undersampling.py new file mode 100644 index 00000000..babc996e --- /dev/null +++ b/gunfolds/scripts/MVGC_F1_improvment_undersampling.py @@ -0,0 +1,181 @@ +import os +from gunfolds.viz import gtool as gt +from gunfolds.utils import bfutils +import numpy as np +import pandas as pd +from gunfolds import conversions as cv +import matplotlib.pyplot as plt +from datetime import datetime +from scipy.io import loadmat +from scipy.io import savemat +import matplotlib.patches as mpatches +from gunfolds.scripts.datasets.simple_networks import simp_nets +from gunfolds.scripts import my_functions as mf + +PreFix = 'MVGC' +concat = True +POSTFIX = 'Ruben_data' + 'concat' if concat else 'individual' + +save_results = [] +Precision_O = [] +Recall_O = [] +Precision_O2 = [] +Recall_O2 = [] +Precision_A = [] +Recall_A = [] +Precision_A2 = [] +Recall_A2 = [] +Precision_C = [] +Recall_C = [] +Precision_C2 = [] +Recall_C2 = [] +F1_O = [] +F1_A = [] +F1_C = [] +F1_O2 = [] +F1_A2 = [] +F1_C2 = [] + +for nn in [5]: + + for fl in range(1, 61): + num = str(fl) if fl > 9 else '0' + str(fl) + print('reading file:' + num) + if not concat: + data = pd.read_csv( + './DataSets_Feedbacks/1. Simple_Networks/Network' + str( + nn) + '_amp/data_fslfilter/BOLDfslfilter_{0}.txt'.format( + num), delimiter='\t') + else: + data = pd.read_csv( + './DataSets_Feedbacks/1. Simple_Networks/Network' + str( + nn) + '_amp/data_fslfilter_concat/concat_BOLDfslfilter_{0}.txt'.format( + num), delimiter='\t') + + network_GT = simp_nets(nn, True) + + dd = np.transpose(data.values) + folder = 'expo_to_mat/expo_to_mat_n' + str(nn) + '_' + ('concat' if concat else 'individual') + if not os.path.exists(folder): + os.makedirs(folder) + savemat(folder + '/expo_to_mat_' + str(fl) + '.mat', {'dd': dd}) + + for fl in range(1, 61): + #######presision and recall (orintattion) + folder_read = 'expo_to_mat/expo_to_py_n' + str(nn) + '_' + ('concat' if concat else 'individual') + mat_data = loadmat(folder_read + '/mat_file_' + str(fl) + '.mat') + mat = mat_data['sig'] + for i in range(len(network_GT)): + mat[i, i] = 1 + MVGC = cv.adjs2graph(mat, np.zeros((len(network_GT), len(network_GT)))) + res_graph = MVGC + gt.plotg(MVGC, output='./figs/Gopt_GC_N' + str(nn) + '_file_' + str(fl) + '.pdf') + + normal_GT = mf.precision_recall(res_graph, network_GT) + Precision_O.append(normal_GT['orientation']['precision']) + Recall_O.append(normal_GT['orientation']['recall']) + F1_O.append(normal_GT['orientation']['F1']) + + Precision_A.append(normal_GT['adjacency']['precision']) + Recall_A.append(normal_GT['adjacency']['recall']) + F1_A.append(normal_GT['adjacency']['F1']) + + Precision_C.append(normal_GT['cycle']['precision']) + Recall_C.append(normal_GT['cycle']['recall']) + F1_C.append(normal_GT['cycle']['F1']) + + ###trying undersampled GT by 2 + + new_GT = bfutils.all_undersamples(network_GT)[1] + new_GT = mf.remove_bidir_edges(new_GT) + undersampled_GT = mf.precision_recall(res_graph, new_GT) + Precision_O2.append(undersampled_GT['orientation']['precision']) + Recall_O2.append(undersampled_GT['orientation']['recall']) + F1_O2.append(undersampled_GT['orientation']['F1']) + + Precision_A2.append(undersampled_GT['adjacency']['precision']) + Recall_A2.append(undersampled_GT['adjacency']['recall']) + F1_A2.append(undersampled_GT['adjacency']['F1']) + + Precision_C2.append(undersampled_GT['cycle']['precision']) + Recall_C2.append(undersampled_GT['cycle']['recall']) + F1_C2.append(undersampled_GT['cycle']['F1']) + + # ###trying undersampled GT by 3 + # + # new_GT3 = bfutils.all_undersamples(network_GT)[2] + # new_GT3 = mf.remove_bidir_edges(new_GT3) + # undersampled_GT3 = mf.precision_recall(res_graph, new_GT3) + # Precision_O3.append(undersampled_GT3['orientation']['precision']) + # Recall_O3.append(undersampled_GT3['orientation']['recall']) + # F1_O3.append(undersampled_GT3['orientation']['F1']) + # + # Precision_A3.append(undersampled_GT3['adjacency']['precision']) + # Recall_A3.append(undersampled_GT3['adjacency']['recall']) + # F1_A3.append(undersampled_GT3['adjacency']['F1']) + # + # Precision_C3.append(undersampled_GT3['cycle']['precision']) + # Recall_C3.append(undersampled_GT3['cycle']['recall']) + # F1_C3.append(undersampled_GT3['cycle']['F1']) + +now = str(datetime.now()) +now = now[:-7].replace(' ', '_') + +###saving files +filename = PreFix + '_with_selfloop_nets_456_amp_' + now + '_' + ('concat' if concat else 'individual') + +# Data for group 1 +data_group1 = [ + [Precision_O, Recall_O, F1_O], + [Precision_A, Recall_A, F1_A], + [Precision_C, Recall_C, F1_C] +] + +# Data for group 2 +data_group2 = [ + [Precision_O2, Recall_O2, F1_O2], + [Precision_A2, Recall_A2, F1_A2], + [Precision_C2, Recall_C2, F1_C2] +] + +# data_group3 = [ +# [Precision_O3, Recall_O3, F1_O3], +# [Precision_A3, Recall_A3, F1_A3], +# [Precision_C3, Recall_C3, F1_C3] +# ] + +# Labels and titles for subplots +titles = ['Orientation', 'Adjacency', '2 cycles'] +colors = ['blue', 'orange'] + +fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(18, 5)) + +for i, (data1, data2, title) in enumerate(zip(data_group1, data_group2, titles)): + ax1 = axes[i] + ax2 = ax1.twinx() + ax1.boxplot(data1, positions=np.array(range(len(data1))) * 2.0 - 0.4, patch_artist=True, showmeans=True, + boxprops=dict(facecolor=colors[0]), widths=0.6) + ax2.boxplot(data2, positions=np.array(range(len(data2))) * 2.0 + 0.4, patch_artist=True, showmeans=True, + boxprops=dict(facecolor=colors[1]), widths=0.6) + + + ax1.set_xticks(range(0, len(data1) * 2, 2)) + ax1.set_xticklabels(['Precision', 'Recall', 'F1-score']) + ax1.set_xlabel('Metrics') + ax1.set_ylabel('Value') + ax1.set_title(f'({title})') + ax1.grid(True) + ax1.set_ylim(0, 1) + ax2.set_ylim(0, 1) +# Add super title +plt.suptitle('Networks 4,5,6 ' + ('concat' if concat else 'individual' ) + ' data') +# Legend +blue_patch = mpatches.Patch(color='blue', label='ORG. GT') +orange_patch = mpatches.Patch(color='orange', label='GT^2') +plt.legend(handles=[blue_patch, orange_patch], loc='upper right') + +plt.tight_layout() + +# Save the figure +plt.savefig(filename + '_grouped_boxplot.png') +plt.close() diff --git a/gunfolds/scripts/MVGC_expo_impo.py b/gunfolds/scripts/MVGC_expo_impo.py index 1a2c44b8..4ac15a12 100644 --- a/gunfolds/scripts/MVGC_expo_impo.py +++ b/gunfolds/scripts/MVGC_expo_impo.py @@ -336,7 +336,7 @@ def precision_recall_2cycles(graph1, graph2): else: TP += 1 else: - if (edge[1], edge[0]) in GT_nx.edges(): + if (edge[1], edge[0]) in GT_nx.edges() and (edge[1] != edge[0]): FN += 0.5 else: FN += 1 diff --git a/gunfolds/scripts/MVGC_undersampeld_GT.py b/gunfolds/scripts/MVGC_undersampeld_GT.py new file mode 100644 index 00000000..9cf7a63a --- /dev/null +++ b/gunfolds/scripts/MVGC_undersampeld_GT.py @@ -0,0 +1,460 @@ +import os + +from gunfolds.viz import gtool as gt +import pickle +import distutils.util +import copy +from gunfolds.utils import bfutils +from gunfolds.utils import zickle as zkl +from gunfolds.estimation import grangercausality as gc +import numpy as np +import time, socket +import scipy +from gunfolds.solvers.clingo_rasl import drasl +import networkx as nx +import argparse +from gunfolds.utils import graphkit as gk +from gunfolds.utils.calc_procs import get_process_count +import pandas as pd +from gunfolds.estimation import linear_model as lm +from gunfolds.viz.dbn2latex import output_graph_figure +from gunfolds import conversions as cv +import matplotlib.pyplot as plt +from datetime import datetime +from scipy.io import loadmat +from scipy.io import savemat + +CLINGO_LIMIT = 64 +PNUM = int(min(CLINGO_LIMIT, get_process_count(1))) +POSTFIX = 'Ruben_data_concat' +Using_SVAR = True +PreFix = 'SVAR' if Using_SVAR else 'GC' +parser = argparse.ArgumentParser(description='Run settings.') +parser.add_argument("-c", "--CAPSIZE", default=10000, + help="stop traversing after growing equivalence class to this size.", type=int) +parser.add_argument("-b", "--BATCH", default=10, help="slurm batch.", type=int) +parser.add_argument("-n", "--NUMBER", default=1, help="simple network index.", type=int) +parser.add_argument("-p", "--PNUM", default=PNUM, help="number of CPUs in machine.", type=int) +parser.add_argument("-t", "--TIMEOUT", default=12, help="timeout in hours", type=int) +parser.add_argument("-x", "--MAXU", default=15, help="maximum number of undersampling to look for solution.", type=int) +parser.add_argument("-r", "--THRESHOLD", default=5, help="threshold for SVAR", type=int) +parser.add_argument("-s", "--SCC", default="f", help="true to use SCC structure, false to not", type=str) +parser.add_argument("-m", "--SCCMEMBERS", default="f", help="true for using g_estimate SCC members, false for using " + "GT SCC members", type=str) +parser.add_argument("-y", "--PRIORITY", default="42531", help="string of priorities", type=str) +args = parser.parse_args() +TIMEOUT = args.TIMEOUT * 60 * 60 +fl = args.BATCH +k_threshold = args.THRESHOLD +EDGE_CUTOFF = 0.01 +SCC = bool(distutils.util.strtobool(args.SCC)) +SCC_members = bool(distutils.util.strtobool(args.SCCMEMBERS)) +SCC = True if SCC_members else SCC +priprities = [] +for char in args.PRIORITY: + priprities.append(int(char)) + +SL_drop_bd_normed_errors_comm = [] +SL_drop_bd_normed_errors_omm = [] +drop_bd_normed_errors_comm = [] +drop_bd_normed_errors_omm = [] + +SL_undir_normed_errors_omm = [] +SL_undir_normed_errors_comm = [] +undir_errors_omm = [] +undir_errors_comm = [] + +opt_SL_undir_normed_errors_omm = [] +opt_SL_undir_normed_errors_comm = [] +opt_undir_errors_omm = [] +opt_undir_errors_comm = [] + +def round_tuple_elements(input_tuple, decimal_points=3): + return tuple(round(elem, decimal_points) if isinstance(elem, (int, float)) else elem for elem in input_tuple) +def rmBidirected(gu): + g = copy.deepcopy(gu) + for v in g: + for w in list(g[v]): + if g[v][w] == 2: + del g[v][w] + elif g[v][w] == 3: + g[v][w] = 1 + return g + +def get_strongly_connected_components(graph): + return [c for c in nx.strongly_connected_components(graph)] + + +def calculate_jaccard_similarity(set1, set2): + intersection = len(set1.intersection(set2)) + union = len(set1.union(set2)) + return intersection / union if union > 0 else 0 + +def quantify_graph_difference(graph1, graph2): + # Step 1: Find the strongly connected components in both graphs + scc1 = get_strongly_connected_components(graph1) + scc2 = get_strongly_connected_components(graph2) + + # Step 2: Represent SCCs as sets of vertices + scc_sets1 = set([frozenset(component) for component in scc1]) + scc_sets2 = set([frozenset(component) for component in scc2]) + + # Step 3: Calculate Jaccard similarity between SCC sets + intersection = len(scc_sets1.intersection(scc_sets2)) + union = len(scc_sets1.union(scc_sets2)) + jaccard_similarity = intersection / union if union > 0 else 0 + + return jaccard_similarity + +def Glag2CG(results): + """Converts lag graph format to gunfolds graph format, + and A and B matrices representing directed and bidirected edges weights. + + Args: + results (dict): A dictionary containing: + - 'graph': A 3D NumPy array of shape [N, N, 2] representing the graph structure. + - 'val_matrix': A NumPy array of shape [N, N, 2] storing edge weights. + + Returns: + tuple: (graph_dict, A_matrix, B_matrix) + """ + + graph_array = results['graph'] + bidirected_edges = np.where(graph_array == 'o-o', 1, 0).astype(int) + directed_edges = np.where(graph_array == '-->', 1, 0).astype(int) + + graph_dict = cv.adjs2graph(directed_edges[:, :, 1], bidirected_edges[:, :, 0]) + A_matrix = results['val_matrix'][:, :, 1] + B_matrix = results['val_matrix'][:, :, 0] + + return graph_dict, A_matrix, B_matrix + +def precision_recall(graph1, graph2): + # Convert both graphs to undirected + graph1_undirected = graph1.to_undirected() + graph2_undirected = graph2.to_undirected() + + # Get adjacency matrices + adj_matrix1 = nx.adjacency_matrix(graph1_undirected).todense() + adj_matrix2 = nx.adjacency_matrix(graph2_undirected).todense() + + # Calculate true positives (intersection of edges) + true_positives = np.sum(np.logical_and(adj_matrix1, adj_matrix2)) + true_positives += np.trace(np.logical_and(adj_matrix1, adj_matrix2)) + # Calculate false positives (edges in graph2 but not in graph1) + false_positives = np.sum(np.logical_and(adj_matrix2, np.logical_not(adj_matrix1))) + false_positives += np.trace(np.logical_and(adj_matrix2, np.logical_not(adj_matrix1))) + + # Calculate false negatives (edges in graph1 but not in graph2) + false_negatives = np.sum(np.logical_and(adj_matrix1, np.logical_not(adj_matrix2))) + false_negatives += np.trace(np.logical_and(adj_matrix1, np.logical_not(adj_matrix2))) + + true_positives = true_positives/2 + false_positives= false_positives/2 + false_negatives = false_negatives/2 + + # Calculate precision + precision = true_positives / (true_positives + false_positives) if true_positives + false_positives > 0 else 0 + + # Calculate recall + recall = true_positives / (true_positives + false_negatives) if true_positives + false_negatives > 0 else 0 + + return precision, recall + + +def find_two_cycles(graph): + # Find all 2-cycles in the graph + two_cycles = set() + visited = set() + for node in graph.nodes(): + for neighbor in graph.neighbors(node): + # Check for a directed edge in both directions and ensure it's not a self-loop + if node != neighbor and graph.has_edge(node, neighbor) and graph.has_edge(neighbor, node): + # Ensure we count each 2-cycle only once + edge_pair = tuple(sorted([node, neighbor])) + if edge_pair not in visited: + two_cycles.add(edge_pair) + visited.add(edge_pair) + return two_cycles + +def precision_recall_2cycles(graph1, graph2): + + + # Find 2-cycles in both graphs + two_cycles_graph1 = find_two_cycles(graph1) + two_cycles_graph2 = find_two_cycles(graph2) + + # Calculate true positives (intersection of 2-cycles) + true_positives = len(two_cycles_graph1.intersection(two_cycles_graph2)) + + # Calculate false positives (2-cycles in graph2 but not in graph1) + false_positives = len(two_cycles_graph2 - two_cycles_graph1) + + # Calculate false negatives (2-cycles in graph1 but not in graph2) + false_negatives = len(two_cycles_graph1 - two_cycles_graph2) + + # Calculate precision + precision = true_positives / (true_positives + false_positives) if true_positives + false_positives > 0 else 0 + + # Calculate recall + recall = true_positives / (true_positives + false_negatives) if true_positives + false_negatives > 0 else 0 + + return precision, recall + + +def remove_bidir_edges(input_dict): + result_dict = {} + for key, inner_dict in input_dict.items(): + result_dict[key] = {} + for inner_key, value in inner_dict.items(): + if value == 1: + result_dict[key][inner_key] = 1 + elif value == 2: + pass # Skip adding this key-value pair + elif value == 3: + result_dict[key][inner_key] = 1 + else: + raise ValueError("Invalid value encountered: {}".format(value)) + return result_dict + + +for nn in [1]: + save_results = {} + args.NUMBER = nn + SL_undir_normed_errors_omm = [] + SL_undir_normed_errors_comm = [] + undir_errors_omm = [] + undir_errors_comm = [] + opt_SL_undir_normed_errors_omm = [] + opt_SL_undir_normed_errors_comm = [] + opt_undir_errors_omm = [] + opt_undir_errors_comm = [] + save_results = [] + Precision_O = [] + Recall_O = [] + Precision_A = [] + Recall_A = [] + Precision_A2 = [] + Recall_A2 = [] + Precision_C = [] + Recall_C = [] + Precision_C2 = [] + Recall_C2 = [] + for fl in range(1, 61): + num = str(fl) if fl > 9 else '0' + str(fl) + print('reading file:' + num) + data = pd.read_csv( + './DataSets_Feedbacks/1. Simple_Networks/Network' + str( + args.NUMBER) + '_amp/data_fslfilter/BOLDfslfilter_{0}.txt'.format( + num), delimiter='\t') + + selfloops = [] + no_selfloops = [] + + network1_GT_selfloop = {1: {1: 1, 2: 1, 5: 1}, 2: {2: 1, 3: 1, 1: 1}, 3: {3: 1, 4: 1}, 4: {4: 1, 5: 1}, + 5: {5: 1}} + network1_GT = {1: {2: 1, 5: 1}, 2: {3: 1, 1: 1}, 3: {4: 1}, 4: {5: 1}, 5: {}} + selfloops.append(network1_GT_selfloop) + no_selfloops.append(network1_GT) + + network2_GT_selfloop = {1: {1: 1, 2: 1, 5: 1}, 2: {2: 1, 3: 1, 1: 1}, 3: {2: 1, 3: 1, 4: 1}, 4: {4: 1, 5: 1}, + 5: {5: 1}} + network2_GT = {1: {2: 1, 5: 1}, 2: {3: 1, 1: 1}, 3: {2: 1, 4: 1}, 4: {5: 1}, 5: {}} + selfloops.append(network2_GT_selfloop) + no_selfloops.append(network2_GT) + + network3_GT_selfloop = {1: {1: 1, 2: 1, 5: 1}, 2: {2: 1, 3: 1, 1: 1}, 3: {3: 1, 4: 1}, 4: {3: 1, 4: 1, 5: 1}, + 5: {5: 1}} + network3_GT = {1: {2: 1, 5: 1}, 2: {3: 1, 1: 1}, 3: {4: 1}, 4: {3: 1, 5: 1}, 5: {}} + selfloops.append(network3_GT_selfloop) + no_selfloops.append(network3_GT) + + network4_GT_selfloop = {1: {4: 1, 8: 1, 6: 1, 1: 1}, 2: {2: 1, 3: 1}, 3: {2: 1, 3: 1}, + 4: {4: 1, 2: 1, 7: 1, 9: 1, 5: 1}, 5: {5: 1, 4: 1, 6: 1}, + 6: {6: 1, 10: 1}, 7: {7: 1, 3: 1, 10: 1}, 8: {8: 1, 2: 1, 9: 1}, 9: {9: 1, 8: 1, 6: 1}, + 10: {10: 1, 6: 1}} + network4_GT = {1: {4: 1, 8: 1, 6: 1}, 2: {3: 1}, 3: {2: 1}, 4: {2: 1, 7: 1, 9: 1, 5: 1}, 5: {4: 1, 6: 1}, + 6: {10: 1}, 7: {3: 1, 10: 1}, 8: {2: 1, 9: 1}, 9: {8: 1, 6: 1}, 10: {6: 1}} + selfloops.append(network4_GT_selfloop) + no_selfloops.append(network4_GT) + + network5_GT_selfloop = {1: {1: 1, 3: 1}, 2: {2: 1, 4: 1}, 3: {3: 1, 4: 1, 5: 1}, 4: {4: 1, 3: 1}, 5: {5: 1}} + network5_GT = {1: {3: 1}, 2: {4: 1}, 3: {4: 1, 5: 1}, 4: {3: 1}, 5: {}} + selfloops.append(network5_GT_selfloop) + no_selfloops.append(network5_GT) + + network6_GT_selfloop = {1: {1: 1, 3: 1}, 2: {2: 1, 3: 1}, 3: {3: 1, 4: 1}, 4: {4: 1, 3: 1, 5: 1}, + 5: {5: 1, 7: 1, 8: 1, 6: 1}, + 6: {6: 1}, 7: {7: 1}, 8: {8: 1}} + network6_GT = {1: {3: 1}, 2: {3: 1}, 3: {4: 1}, 4: {3: 1, 5: 1}, 5: {7: 1, 8: 1, 6: 1}, 6: {}, 7: {}, 8: {}} + selfloops.append(network6_GT_selfloop) + no_selfloops.append(network6_GT) + + network7_GT_selfloop = {1: {1: 1, 2: 1}, 2: {2: 1, 3: 1}, 3: {3: 1, 4: 1}, 4: {4: 1, 5: 1}, + 5: {5: 1, 2: 1, 6: 1}, + 6: {6: 1}} + network7_GT = {1: {2: 1}, 2: {3: 1}, 3: {4: 1}, 4: {5: 1}, 5: {2: 1, 6: 1}, 6: {}} + selfloops.append(network7_GT_selfloop) + no_selfloops.append(network7_GT) + + network8_GT_selfloop = {1: {1: 1, 2: 1}, 2: {2: 1, 3: 1}, 3: {3: 1, 4: 1, 8: 1}, 4: {4: 1, 5: 1, 6: 1}, + 5: {5: 1, 2: 1}, 6: {6: 1, 7: 1}, 7: {7: 1, 5: 1}, 8: {8: 1}} + network8_GT = {1: {2: 1}, 2: {3: 1}, 3: {4: 1, 8: 1}, 4: {5: 1, 6: 1}, 5: {5: 1, 2: 1}, 6: {7: 1}, + 7: {5: 1}, 8: {}} + selfloops.append(network8_GT_selfloop) + no_selfloops.append(network8_GT) + + network9_GT_selfloop = {1: {1: 1, 2: 1}, 2: {2: 1, 3: 1}, 3: {3: 1, 4: 1}, 4: {4: 1, 5: 1}, 5: {5: 1, 2: 1}, + 6: {6: 1, 7: 1, 9: 1}, 7: {7: 1, 8: 1}, 8: {8: 1, 4: 1}, 9: {9: 1}} + network9_GT = {1: {2: 1}, 2: {3: 1}, 3: {4: 1}, 4: {5: 1}, 5: {2: 1}, 6: {7: 1, 9: 1}, 7: {8: 1}, 8: {4: 1}, + 9: {}} + selfloops.append(network9_GT_selfloop) + no_selfloops.append(network9_GT) + + network_GT_selfloop = selfloops[args.NUMBER - 1] + network_GT = no_selfloops[args.NUMBER - 1] + '''SVAR''' + dd = np.transpose(data.values) + folder = 'expo_to_mat/expo_to_mat_n'+str(nn) + if not os.path.exists(folder): + os.makedirs(folder) + savemat(folder+'/expo_to_mat_'+str(fl)+'.mat', {'dd': dd}) + + for fl in range(1, 61): + #######presision and recall (orintattion) + # Precision = True Positives / (True Positives + False Positives) + # Recall = True Positives / (True Positives + False Negatives) + mat_data = loadmat('expo_to_py/mat_file_'+str(fl)+'.mat') + mat = mat_data['sig'] + for i in range(5): + mat[i, i] = 1 + MVGC = cv.adjs2graph(mat, np.zeros((5, 5))) + res_graph = MVGC + gt.plotg(MVGC, output='./figs/Gopt_GC_' + str(fl) + '.pdf') + new_GT= bfutils.all_undersamples(network_GT_selfloop)[1] + new_GT = remove_bidir_edges(new_GT) + GT_nx = gk.graph2nx(new_GT) + res_nx = gk.graph2nx(res_graph) + TP, FP, FN = 0, 0, 0 + for edge in GT_nx.edges(): + if edge in res_nx.edges(): + TP += 1 + else: + FN += 1 + for edge in res_nx.edges(): + if edge not in GT_nx.edges(): + FP += 1 + if (TP + FP) != 0 and (TP + FN) != 0: + Precision_O.append(TP / (TP + FP)) + Recall_O.append(TP / (TP + FN)) + + #######presision and recall (adjacency) + TP, FP, FN = 0, 0, 0 + for edge in GT_nx.edges() : + if edge in res_nx.edges() or (edge[1], edge[0]) in res_nx.edges(): + if ( (edge[1], edge[0]) in GT_nx.edges()) and (edge[1] != edge[0]): + TP += 0.5 + else: + TP += 1 + else: + if (edge[1], edge[0]) in GT_nx.edges() and (edge[1] != edge[0]): + FN += 0.5 + else: + FN += 1 + for edge in res_nx.edges(): + if not(edge in GT_nx.edges() or (edge[1], edge[0]) in GT_nx.edges()): + if ((edge[1], edge[0]) in res_nx.edges()) and (edge[1] != edge[0]): + FP += 0.5 + else: + FP += 1 + if not (FP%1 == 0 and TP%1 == 0 and FN%1 == 0): + print('see why') + if (TP + FP) != 0 and (TP + FN) != 0: + Precision_A.append(TP / (TP + FP)) + Recall_A.append(TP / (TP + FN)) + else: + print('see how') + + #WRONG CALCULATIONS + # precision_val, recall_val = precision_recall(GT_nx,res_nx) + # Precision_A2.append(precision_val) + # Recall_A2.append(recall_val) + #######presision and recall (2-cycle) + + TP, FP, FN = 0, 0, 0 + for edge in GT_nx.edges(): + if not edge[1]== edge[0]: + if (edge[1], edge[0]) in GT_nx.edges(): + if edge in res_nx.edges() and (edge[1], edge[0]) in res_nx.edges(): + TP += 1 + else: + FN += 1 + for edge in res_nx.edges(): + if not edge[1] == edge[0]: + if (edge[1], edge[0]) in res_nx.edges(): + if not (edge in GT_nx.edges() and (edge[1], edge[0]) in GT_nx.edges()): + FP += 1 + if (TP + FP) !=0 and (TP + FN) !=0: + Precision_C.append(TP / (TP + FP)) + Recall_C.append(TP / (TP + FN)) + + # precision_val, recall_val = precision_recall_2cycles(GT_nx, res_nx) + # Precision_C2.append(precision_val) + # Recall_C2.append(recall_val) + + now = str(datetime.now()) + now = now[:-7].replace(' ', '_') + + ###saving files + filename = PreFix + '_res_MVGC_undersampled' + str(args.NUMBER) + '_' + now + + + precision_mean = sum(Precision_O) / len(Precision_O) + recall_mean = sum(Recall_O) / len(Recall_O) + # Names for the bars + names = ['Precision', 'Recall'] + # Mean values for the bars + means = [precision_mean, recall_mean] + # Plotting the bar plot + plt.bar(names, means, color=['blue', 'green']) + plt.ylim(0, 1) + plt.xlabel('Metrics') + plt.ylabel('Mean') + plt.title('Mean of Precision and Recall (orientation)') + plt.savefig(filename + '_orintation.png') + plt.close() + + + + precision_mean = sum(Precision_A) / len(Precision_A) + recall_mean = sum(Recall_A) / len(Recall_A) + # Names for the bars + names = ['Precision', 'Recall'] + # Mean values for the bars + means = [precision_mean, recall_mean] + # Plotting the bar plot + plt.bar(names, means, color=['blue', 'green']) + plt.ylim(0, 1) + plt.xlabel('Metrics') + plt.ylabel('Mean') + plt.title('Mean of Precision and Recall (adjacency)') + plt.savefig(filename + '_adjacency.png') + plt.close() + + ####### + + precision_mean = sum(Precision_C) / len(Precision_C) + recall_mean = sum(Recall_C) / len(Recall_C) + # Names for the bars + names = ['Precision', 'Recall'] + # Mean values for the bars + means = [precision_mean, recall_mean] + # Plotting the bar plot + plt.bar(names, means, color=['blue', 'green']) + plt.ylim(0, 1) + plt.xlabel('Metrics') + plt.ylabel('Mean') + plt.title('Mean of Precision and Recall (2 cycle)') + plt.savefig(filename + '_2_cycle.png') + + diff --git a/gunfolds/scripts/Ruben_PCMCI_fix_bidir_datagraph.py b/gunfolds/scripts/Ruben_PCMCI_fix_bidir_datagraph.py new file mode 100644 index 00000000..68ae2938 --- /dev/null +++ b/gunfolds/scripts/Ruben_PCMCI_fix_bidir_datagraph.py @@ -0,0 +1,727 @@ +from gunfolds.viz import gtool as gt +import pickle +import distutils.util +import copy +from gunfolds.utils import bfutils +from gunfolds.utils import zickle as zkl +from gunfolds.estimation import grangercausality as gc +import numpy as np +import time, socket +import scipy +from gunfolds.solvers.clingo_rasl import drasl +import networkx as nx +import argparse +from gunfolds.utils import graphkit as gk +from gunfolds.utils.calc_procs import get_process_count +import pandas as pd +from gunfolds.estimation import linear_model as lm +from gunfolds.viz.dbn2latex import output_graph_figure +from gunfolds import conversions as cv +import matplotlib.pyplot as plt +from datetime import datetime +from tigramite.pcmci import PCMCI +from tigramite.independence_tests.parcorr import ParCorr +import tigramite.data_processing as pp +from gunfolds.conversions import Glag2CG + +CLINGO_LIMIT = 64 +PNUM = int(min(CLINGO_LIMIT, get_process_count(1))) +POSTFIX = 'Ruben_data_' +PreFix = 'PCMCI_' +parser = argparse.ArgumentParser(description='Run settings.') +parser.add_argument("-c", "--CAPSIZE", default=0, + help="stop traversing after growing equivalence class to this size.", type=int) +parser.add_argument("-b", "--BATCH", default=1, help="slurm batch.", type=int) +parser.add_argument("-n", "--NUMBER", default=1, help="simple network index.", type=int) +parser.add_argument("-p", "--PNUM", default=PNUM, help="number of CPUs in machine.", type=int) +parser.add_argument("-t", "--TIMEOUT", default=12, help="timeout in hours", type=int) +parser.add_argument("-x", "--MAXU", default=15, help="maximum number of undersampling to look for solution.", type=int) +parser.add_argument("-r", "--THRESHOLD", default=5, help="threshold for SVAR", type=int) +parser.add_argument("-s", "--SCC", default="f", help="true to use SCC structure, false to not", type=str) +parser.add_argument("-m", "--SCCMEMBERS", default="f", help="true for using g_estimate SCC members, false for using " + "GT SCC members", type=str) +parser.add_argument("-y", "--PRIORITY", default="42531", help="string of priorities", type=str) +args = parser.parse_args() +TIMEOUT = args.TIMEOUT * 60 * 60 +fl = args.BATCH +k_threshold = args.THRESHOLD +EDGE_CUTOFF = 0.01 +SCC = bool(distutils.util.strtobool(args.SCC)) +SCC_members = bool(distutils.util.strtobool(args.SCCMEMBERS)) +SCC = True if SCC_members else SCC +priprities = [] +for char in args.PRIORITY: + priprities.append(int(char)) + +SL_drop_bd_normed_errors_comm = [] +SL_drop_bd_normed_errors_omm = [] +drop_bd_normed_errors_comm = [] +drop_bd_normed_errors_omm = [] + +SL_undir_normed_errors_omm = [] +SL_undir_normed_errors_comm = [] +undir_errors_omm = [] +undir_errors_comm = [] + +opt_SL_undir_normed_errors_omm = [] +opt_SL_undir_normed_errors_comm = [] +opt_undir_errors_omm = [] +opt_undir_errors_comm = [] + + +def round_tuple_elements(input_tuple, decimal_points=3): + return tuple(round(elem, decimal_points) if isinstance(elem, (int, float)) else elem for elem in input_tuple) + + +def rmBidirected(gu): + g = copy.deepcopy(gu) + for v in g: + for w in list(g[v]): + if g[v][w] == 2: + del g[v][w] + elif g[v][w] == 3: + g[v][w] = 1 + return g + + +def get_strongly_connected_components(graph): + return [c for c in nx.strongly_connected_components(graph)] + + +def calculate_jaccard_similarity(set1, set2): + intersection = len(set1.intersection(set2)) + union = len(set1.union(set2)) + return intersection / union if union > 0 else 0 + + +def quantify_graph_difference(graph1, graph2): + # Step 1: Find the strongly connected components in both graphs + scc1 = get_strongly_connected_components(graph1) + scc2 = get_strongly_connected_components(graph2) + + # Step 2: Represent SCCs as sets of vertices + scc_sets1 = set([frozenset(component) for component in scc1]) + scc_sets2 = set([frozenset(component) for component in scc2]) + + # Step 3: Calculate Jaccard similarity between SCC sets + intersection = len(scc_sets1.intersection(scc_sets2)) + union = len(scc_sets1.union(scc_sets2)) + jaccard_similarity = intersection / union if union > 0 else 0 + + return jaccard_similarity + + +def precision_recall(answer, network_GT_selfloop): + # Precision = True Positives / (True Positives + False Positives) + # Recall = True Positives / (True Positives + False Negatives) + results = {} + res_graph = bfutils.num2CG(answer[0][0], len(network_GT_selfloop)) + GT_nx = gk.graph2nx(network_GT_selfloop) + res_nx = gk.graph2nx(res_graph) + + #######precision and recall (orientation) + TP, FP, FN = 0, 0, 0 + for edge in GT_nx.edges(): + if edge in res_nx.edges(): + TP += 1 + else: + FN += 1 + for edge in res_nx.edges(): + if edge not in GT_nx.edges(): + FP += 1 + p_O = (TP / (TP + FP)) if (TP + FP) else 0 + r_O = (TP / (TP + FN)) if (TP + FN) else 0 + f1_O = (2 * TP) / (2 * TP + FP + FN) if 2 * TP + FP + FN else 0 + + #######precision and recall (adjacency) + TP, FP, FN = 0, 0, 0 + for edge in GT_nx.edges(): + if edge in res_nx.edges() or (edge[1], edge[0]) in res_nx.edges(): + if ((edge[1], edge[0]) in GT_nx.edges()) and (edge[1] != edge[0]): + TP += 0.5 + else: + TP += 1 + else: + if (edge[1], edge[0]) in GT_nx.edges() and (edge[1] != edge[0]): + FN += 0.5 + else: + FN += 1 + for edge in res_nx.edges(): + if not (edge in GT_nx.edges() or (edge[1], edge[0]) in GT_nx.edges()): + if ((edge[1], edge[0]) in res_nx.edges()) and (edge[1] != edge[0]): + FP += 0.5 + else: + FP += 1 + p_A = (TP / (TP + FP)) if (TP + FP) else 0 + r_A = (TP / (TP + FN)) if (TP + FN) else 0 + f1_A = (2 * TP) / (2 * TP + FP + FN) if 2 * TP + FP + FN else 0 + + #######precision and recall (2-cycle) + + TP, FP, FN = 0, 0, 0 + for edge in GT_nx.edges(): + if not edge[1] == edge[0]: + if (edge[1], edge[0]) in GT_nx.edges(): + if edge in res_nx.edges() and (edge[1], edge[0]) in res_nx.edges(): + TP += 1 + else: + FN += 1 + for edge in res_nx.edges(): + if not edge[1] == edge[0]: + if (edge[1], edge[0]) in res_nx.edges(): + if not (edge in GT_nx.edges() and (edge[1], edge[0]) in GT_nx.edges()): + FP += 1 + p_C = (TP / (TP + FP)) if (TP + FP) else 0 + r_C = (TP / (TP + FN)) if (TP + FN) else 0 + f1_C = (2 * TP) / (2 * TP + FP + FN) if 2 * TP + FP + FN else 0 + + results = {'orientation': {'precision': p_O, 'recall': r_O, 'F1': f1_O}, + 'adjacency': {'precision': p_A, 'recall': r_A, 'F1': f1_A}, + 'cycle': {'precision': p_C, 'recall': r_C, 'F1': f1_C}} + + return results + + +for nn in [2]: + args.NUMBER = nn + SL_undir_normed_errors_omm = [] + SL_undir_normed_errors_comm = [] + undir_errors_omm = [] + undir_errors_comm = [] + opt_SL_undir_normed_errors_omm = [] + opt_SL_undir_normed_errors_comm = [] + opt_undir_errors_omm = [] + opt_undir_errors_comm = [] + save_results = [] + Precision_O = [] + Recall_O = [] + Precision_A = [] + Recall_A = [] + Precision_C = [] + Recall_C = [] + Precision_O2 = [] + Recall_O2 = [] + Precision_A2 = [] + Recall_A2 = [] + Precision_C2 = [] + Recall_C2 = [] + for fl in range(1, 61): + num = str(fl) if fl > 9 else '0' + str(fl) + print('reading file:' + num) + if fl == 21: + print('skipping file') + continue + data = pd.read_csv( + './DataSets_Feedbacks/1. Simple_Networks/Network' + str( + args.NUMBER) + '_amp/data_fslfilter/BOLDfslfilter_{0}.txt'.format( + num), delimiter='\t') + + selfloops = [] + no_selfloops = [] + + network1_GT_selfloop = {1: {1: 1, 2: 1, 5: 1}, 2: {2: 1, 3: 1, 1: 1}, 3: {3: 1, 4: 1}, 4: {4: 1, 5: 1}, + 5: {5: 1}} + network1_GT = {1: {2: 1, 5: 1}, 2: {3: 1, 1: 1}, 3: {4: 1}, 4: {5: 1}, 5: {}} + selfloops.append(network1_GT_selfloop) + no_selfloops.append(network1_GT) + + network2_GT_selfloop = {1: {1: 1, 2: 1, 5: 1}, 2: {2: 1, 3: 1, 1: 1}, 3: {2: 1, 3: 1, 4: 1}, 4: {4: 1, 5: 1}, + 5: {5: 1}} + network2_GT = {1: {2: 1, 5: 1}, 2: {3: 1, 1: 1}, 3: {2: 1, 4: 1}, 4: {5: 1}, 5: {}} + selfloops.append(network2_GT_selfloop) + no_selfloops.append(network2_GT) + + network3_GT_selfloop = {1: {1: 1, 2: 1, 5: 1}, 2: {2: 1, 3: 1, 1: 1}, 3: {3: 1, 4: 1}, 4: {3: 1, 4: 1, 5: 1}, + 5: {5: 1}} + network3_GT = {1: {2: 1, 5: 1}, 2: {3: 1, 1: 1}, 3: {4: 1}, 4: {3: 1, 5: 1}, 5: {}} + selfloops.append(network3_GT_selfloop) + no_selfloops.append(network3_GT) + + network4_GT_selfloop = {1: {4: 1, 8: 1, 6: 1, 1: 1}, 2: {2: 1, 3: 1}, 3: {2: 1, 3: 1}, + 4: {4: 1, 2: 1, 7: 1, 9: 1, 5: 1}, 5: {5: 1, 4: 1, 6: 1}, + 6: {6: 1, 10: 1}, 7: {7: 1, 3: 1, 10: 1}, 8: {8: 1, 2: 1, 9: 1}, 9: {9: 1, 8: 1, 6: 1}, + 10: {10: 1, 6: 1}} + network4_GT = {1: {4: 1, 8: 1, 6: 1}, 2: {3: 1}, 3: {2: 1}, 4: {2: 1, 7: 1, 9: 1, 5: 1}, 5: {4: 1, 6: 1}, + 6: {10: 1}, 7: {3: 1, 10: 1}, 8: {2: 1, 9: 1}, 9: {8: 1, 6: 1}, 10: {6: 1}} + selfloops.append(network4_GT_selfloop) + no_selfloops.append(network4_GT) + + network5_GT_selfloop = {1: {1: 1, 3: 1}, 2: {2: 1, 4: 1}, 3: {3: 1, 4: 1, 5: 1}, 4: {4: 1, 3: 1}, 5: {5: 1}} + network5_GT = {1: {3: 1}, 2: {4: 1}, 3: {4: 1, 5: 1}, 4: {3: 1}, 5: {}} + selfloops.append(network5_GT_selfloop) + no_selfloops.append(network5_GT) + + network6_GT_selfloop = {1: {1: 1, 3: 1}, 2: {2: 1, 3: 1}, 3: {3: 1, 4: 1}, 4: {4: 1, 3: 1, 5: 1}, + 5: {5: 1, 7: 1, 8: 1, 6: 1}, + 6: {6: 1}, 7: {7: 1}, 8: {8: 1}} + network6_GT = {1: {3: 1}, 2: {3: 1}, 3: {4: 1}, 4: {3: 1, 5: 1}, 5: {7: 1, 8: 1, 6: 1}, 6: {}, 7: {}, 8: {}} + selfloops.append(network6_GT_selfloop) + no_selfloops.append(network6_GT) + + network7_GT_selfloop = {1: {1: 1, 2: 1}, 2: {2: 1, 3: 1}, 3: {3: 1, 4: 1}, 4: {4: 1, 5: 1}, + 5: {5: 1, 2: 1, 6: 1}, + 6: {6: 1}} + network7_GT = {1: {2: 1}, 2: {3: 1}, 3: {4: 1}, 4: {5: 1}, 5: {2: 1, 6: 1}, 6: {}} + selfloops.append(network7_GT_selfloop) + no_selfloops.append(network7_GT) + + network8_GT_selfloop = {1: {1: 1, 2: 1}, 2: {2: 1, 3: 1}, 3: {3: 1, 4: 1, 8: 1}, 4: {4: 1, 5: 1, 6: 1}, + 5: {5: 1, 2: 1}, 6: {6: 1, 7: 1}, 7: {7: 1, 5: 1}, 8: {8: 1}} + network8_GT = {1: {2: 1}, 2: {3: 1}, 3: {4: 1, 8: 1}, 4: {5: 1, 6: 1}, 5: {5: 1, 2: 1}, 6: {7: 1}, + 7: {5: 1}, 8: {}} + selfloops.append(network8_GT_selfloop) + no_selfloops.append(network8_GT) + + network9_GT_selfloop = {1: {1: 1, 2: 1}, 2: {2: 1, 3: 1}, 3: {3: 1, 4: 1}, 4: {4: 1, 5: 1}, 5: {5: 1, 2: 1}, + 6: {6: 1, 7: 1, 9: 1}, 7: {7: 1, 8: 1}, 8: {8: 1, 4: 1}, 9: {9: 1}} + network9_GT = {1: {2: 1}, 2: {3: 1}, 3: {4: 1}, 4: {5: 1}, 5: {2: 1}, 6: {7: 1, 9: 1}, 7: {8: 1}, 8: {4: 1}, + 9: {}} + selfloops.append(network9_GT_selfloop) + no_selfloops.append(network9_GT) + + network_GT_selfloop = selfloops[args.NUMBER - 1] + network_GT = no_selfloops[args.NUMBER - 1] + '''PCMCI''' + + dataframe = pp.DataFrame(data.values) + cond_ind_test = ParCorr() + pcmci = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test) + results = pcmci.run_pcmci(tau_max=1, pc_alpha=None, alpha_level=0.05) + + g_estimated, A, B = Glag2CG(results) + + MAXCOST = 10000 + DD = (np.abs((np.abs(A / np.abs(A).max()) + (cv.graph2adj(g_estimated) - 1)) * MAXCOST)).astype(int) + BD = (np.abs((np.abs(B / np.abs(B).max()) + (cv.graph2badj(g_estimated) - 1)) * MAXCOST)).astype(int) + + if SCC_members: + members = nx.strongly_connected_components(gk.graph2nx(g_estimated)) + else: + members = nx.strongly_connected_components(gk.graph2nx(network_GT_selfloop)) + + startTime = int(round(time.time() * 1000)) + r_estimated = drasl([g_estimated], weighted=True, capsize=0, timeout=TIMEOUT, + urate=min(args.MAXU, (3 * len(g_estimated) + 1)), + dm=[DD], + bdm=[BD], + scc=SCC, + scc_members=members, + GT_density=int(1000 * gk.density(network_GT_selfloop)), + edge_weights=priprities, pnum=args.PNUM, optim='optN') + endTime = int(round(time.time() * 1000)) + sat_time = endTime - startTime + print('number of optimal solutions is', len(r_estimated)) + ### minimizing with respect to Gu_opt Vs. G_estimate + min_err = {'directed': (0, 0), 'bidirected': (0, 0), 'total': (0, 0)} + min_norm_err = {'directed': (0, 0), 'bidirected': (0, 0), 'total': (0, 0)} + min_val = 1000000 + min_cost = 10000000 + for answer in r_estimated: + curr_errors = gk.OCE( + bfutils.undersample(bfutils.num2CG(answer[0][0], len(network_GT_selfloop)), answer[0][1][0]), + g_estimated) + curr_normed_errors = gk.OCE( + bfutils.undersample(bfutils.num2CG(answer[0][0], len(network_GT_selfloop)), answer[0][1][0]), + g_estimated, normalized=True) + curr_cost = answer[1] + if (curr_errors['total'][0] + curr_errors['total'][1]) < min_val: + min_err = curr_errors + min_norm_err = curr_normed_errors + min_cost = curr_cost + min_val = (curr_errors['total'][0] + curr_errors['total'][1]) + min_answer_WRT_GuOptVsGest = answer + elif (curr_errors['total'][0] + curr_errors['total'][1]) == min_val: + if curr_cost < min_cost: + min_err = curr_errors + min_norm_err = curr_normed_errors + min_cost = curr_cost + min_val = (curr_errors['total'][0] + curr_errors['total'][1]) + min_answer_WRT_GuOptVsGest = answer + + '''G1_opt - the solution of optimization problem (r_estimated from g_estimated) in causal time scale''' + G1_opt_WRT_GuOptVsGest = bfutils.num2CG(min_answer_WRT_GuOptVsGest[0][0], len(g_estimated)) + + '''Gu_opt - the solution of optimization problem (r_estimated from g_estimated) in measured time scale''' + Gu_opt_WRT_GuOptVsGest = bfutils.undersample(G1_opt_WRT_GuOptVsGest, min_answer_WRT_GuOptVsGest[0][1][0]) + '''network_GT_U - the GT in measured time scale''' + network_GT_U_WRT_GuOptVsGest = bfutils.undersample(network_GT_selfloop, min_answer_WRT_GuOptVsGest[0][1][0]) + + Gu_opt_errors_network_GT_U_WRT_GuOptVsGest = \ + gk.OCE(Gu_opt_WRT_GuOptVsGest, network_GT_U_WRT_GuOptVsGest, undirected=False, normalized=True)[ + 'total'] + Gu_opt_errors_g_estimated_WRT_GuOptVsGest = \ + gk.OCE(Gu_opt_WRT_GuOptVsGest, g_estimated, undirected=False, normalized=True)['total'] + G1_opt_error_GT_WRT_GuOptVsGest = \ + gk.OCE(G1_opt_WRT_GuOptVsGest, network_GT_selfloop, undirected=False, normalized=True)['total'] + print('*******************************************') + print('results with respect to Gu_opt Vs. G_estimate ') + print('U rate found to be:' + str(min_answer_WRT_GuOptVsGest[0][1][0])) + print('Gu_opt_errors_network_GT_U = ', round_tuple_elements(Gu_opt_errors_network_GT_U_WRT_GuOptVsGest)) + print('Gu_opt_errors_g_estimated', round_tuple_elements(Gu_opt_errors_g_estimated_WRT_GuOptVsGest)) + print('G1_opt_error_GT', round_tuple_elements(G1_opt_error_GT_WRT_GuOptVsGest)) + + SL_undir_normed_errors_omm.append(G1_opt_error_GT_WRT_GuOptVsGest[0]) + SL_undir_normed_errors_comm.append(G1_opt_error_GT_WRT_GuOptVsGest[1]) + + ### minimizing with respect to G1_opt Vs. GT + min_err = {'directed': (0, 0), 'bidirected': (0, 0), 'total': (0, 0)} + min_norm_err = {'directed': (0, 0), 'bidirected': (0, 0), 'total': (0, 0)} + min_val = 1000000 + min_cost = 10000000 + for answer in r_estimated: + curr_errors = gk.OCE(bfutils.num2CG(answer[0][0], len(network_GT_selfloop)), network_GT_selfloop) + curr_normed_errors = gk.OCE(bfutils.num2CG(answer[0][0], len(network_GT_selfloop)), network_GT_selfloop, + normalized=True) + curr_cost = answer[1] + if (curr_errors['total'][0] + curr_errors['total'][1]) < min_val: + min_err = curr_errors + min_norm_err = curr_normed_errors + min_cost = curr_cost + min_val = (curr_errors['total'][0] + curr_errors['total'][1]) + min_answer_WRT_G1OptVsGT = answer + elif (curr_errors['total'][0] + curr_errors['total'][1]) == min_val: + if curr_cost < min_cost: + min_err = curr_errors + min_norm_err = curr_normed_errors + min_cost = curr_cost + min_val = (curr_errors['total'][0] + curr_errors['total'][1]) + min_answer_WRT_G1OptVsGT = answer + + '''G1_opt - the solution of optimization problem (r_estimated from g_estimated) in causal time scale''' + G1_opt_WRT_G1OptVsGT = bfutils.num2CG(min_answer_WRT_G1OptVsGT[0][0], len(g_estimated)) + + '''Gu_opt - the solution of optimization problem (r_estimated from g_estimated) in measured time scale''' + Gu_opt_WRT_G1OptVsGT = bfutils.undersample(G1_opt_WRT_G1OptVsGT, min_answer_WRT_G1OptVsGT[0][1][0]) + '''network_GT_U - the GT in measured time scale''' + network_GT_U_WRT_G1OptVsGT = bfutils.undersample(network_GT_selfloop, min_answer_WRT_G1OptVsGT[0][1][0]) + + Gu_opt_errors_network_GT_U_WRT_G1OptVsGT = \ + gk.OCE(Gu_opt_WRT_G1OptVsGT, network_GT_U_WRT_G1OptVsGT, undirected=False, normalized=True)[ + 'total'] + Gu_opt_errors_g_estimated_WRT_G1OptVsGT = \ + gk.OCE(Gu_opt_WRT_G1OptVsGT, g_estimated, undirected=False, normalized=True)['total'] + G1_opt_error_GT_WRT_G1OptVsGT = \ + gk.OCE(G1_opt_WRT_G1OptVsGT, network_GT_selfloop, undirected=False, normalized=True)['total'] + print('*******************************************') + print('results of minimizing with respect to G1_opt Vs. GT') + print('U rate found to be:' + str(min_answer_WRT_G1OptVsGT[0][1][0])) + print('Gu_opt_errors_network_GT_U = ', round_tuple_elements(Gu_opt_errors_network_GT_U_WRT_G1OptVsGT)) + print('Gu_opt_errors_g_estimated', round_tuple_elements(Gu_opt_errors_g_estimated_WRT_G1OptVsGT)) + print('G1_opt_error_GT', round_tuple_elements(G1_opt_error_GT_WRT_G1OptVsGT)) + + opt_SL_undir_normed_errors_omm.append(G1_opt_error_GT_WRT_G1OptVsGT[0]) + opt_SL_undir_normed_errors_comm.append(G1_opt_error_GT_WRT_G1OptVsGT[1]) + #######presision and recall (orintattion) + # Precision = True Positives / (True Positives + False Positives) + # Recall = True Positives / (True Positives + False Negatives) + + res_graph = bfutils.num2CG(min_answer_WRT_G1OptVsGT[0][0], len(g_estimated)) + GT_nx = gk.graph2nx(network_GT_selfloop) + res_nx = gk.graph2nx(res_graph) + TP, FP, FN = 0, 0, 0 + for edge in GT_nx.edges(): + if edge in res_nx.edges(): + TP += 1 + else: + FN += 1 + for edge in res_nx.edges(): + if edge not in GT_nx.edges(): + FP += 1 + if (TP + FP) != 0 and (TP + FN) != 0: + Precision_O.append(TP / (TP + FP)) + Recall_O.append(TP / (TP + FN)) + + #######presision and recall (adjacency) + TP, FP, FN = 0, 0, 0 + for edge in GT_nx.edges(): + if edge in res_nx.edges() or (edge[1], edge[0]) in res_nx.edges(): + if ((edge[1], edge[0]) in GT_nx.edges()) and (edge[1] != edge[0]): + TP += 0.5 + else: + TP += 1 + else: + if (edge[1], edge[0]) in GT_nx.edges() and (edge[1] != edge[0]): + FN += 0.5 + else: + FN += 1 + for edge in res_nx.edges(): + if not (edge in GT_nx.edges() or (edge[1], edge[0]) in GT_nx.edges()): + if ((edge[1], edge[0]) in res_nx.edges()) and (edge[1] != edge[0]): + FP += 0.5 + else: + FP += 1 + if not (FP % 1 == 0 and TP % 1 == 0 and FN % 1 == 0): + print('see why') + if (TP + FP) != 0 and (TP + FN) != 0: + Precision_A.append(TP / (TP + FP)) + Recall_A.append(TP / (TP + FN)) + else: + print('see how') + + #######presision and recall (2-cycle) + + TP, FP, FN = 0, 0, 0 + for edge in GT_nx.edges(): + if not edge[1] == edge[0]: + if (edge[1], edge[0]) in GT_nx.edges(): + if edge in res_nx.edges() and (edge[1], edge[0]) in res_nx.edges(): + TP += 1 + else: + FN += 1 + for edge in res_nx.edges(): + if not edge[1] == edge[0]: + if (edge[1], edge[0]) in res_nx.edges(): + if not (edge in GT_nx.edges() and (edge[1], edge[0]) in GT_nx.edges()): + FP += 1 + if (TP + FP) != 0 and (TP + FN) != 0: + Precision_C.append(TP / (TP + FP)) + Recall_C.append(TP / (TP + FN)) + + ###optimizing with respect to F1 scores max + max_f1_score = 0 + max_answer_WRT_f1 = None + for answer in r_estimated: + #######presision and recall (orintattion) + # Precision = True Positives / (True Positives + False Positives) + # Recall = True Positives / (True Positives + False Negatives) + + res_graph = bfutils.num2CG(answer[0][0], len(g_estimated)) + GT_nx = gk.graph2nx(network_GT_selfloop) + res_nx = gk.graph2nx(res_graph) + TP, FP, FN = 0, 0, 0 + for edge in GT_nx.edges(): + if edge in res_nx.edges(): + TP += 1 + else: + FN += 1 + for edge in res_nx.edges(): + if edge not in GT_nx.edges(): + FP += 1 + + f1_O = (2 * TP) / (2 * TP + FP + FN) if 2 * TP + FP + FN else 0 + #######presision and recall (adjacency) + TP, FP, FN = 0, 0, 0 + for edge in GT_nx.edges(): + if edge in res_nx.edges() or (edge[1], edge[0]) in res_nx.edges(): + if ((edge[1], edge[0]) in GT_nx.edges()) and (edge[1] != edge[0]): + TP += 0.5 + else: + TP += 1 + else: + if (edge[1], edge[0]) in GT_nx.edges() and (edge[1] != edge[0]): + FN += 0.5 + else: + FN += 1 + for edge in res_nx.edges(): + if not (edge in GT_nx.edges() or (edge[1], edge[0]) in GT_nx.edges()): + if ((edge[1], edge[0]) in res_nx.edges()) and (edge[1] != edge[0]): + FP += 0.5 + else: + FP += 1 + f1_A = (2 * TP) / (2 * TP + FP + FN) if 2 * TP + FP + FN else 0 + + #######presision and recall (2-cycle) + + TP, FP, FN = 0, 0, 0 + for edge in GT_nx.edges(): + if not edge[1] == edge[0]: + if (edge[1], edge[0]) in GT_nx.edges(): + if edge in res_nx.edges() and (edge[1], edge[0]) in res_nx.edges(): + TP += 1 + else: + FN += 1 + for edge in res_nx.edges(): + if not edge[1] == edge[0]: + if (edge[1], edge[0]) in res_nx.edges(): + if not (edge in GT_nx.edges() and (edge[1], edge[0]) in GT_nx.edges()): + FP += 1 + f1_C = (2 * TP) / (2 * TP + FP + FN) if 2 * TP + FP + FN else 0 + + curr_f1 = f1_C + f1_A + f1_O + + if curr_f1 > max_f1_score: + max_f1_score = curr_f1 + max_answer_WRT_f1 = answer + + optim_PR = precision_recall(max_answer_WRT_f1, network_GT_selfloop) + Precision_O2.append(optim_PR['orientation']['precision']) + Recall_O2.append(optim_PR['orientation']['recall']) + + Precision_A2.append(optim_PR['adjacency']['precision']) + Recall_A2.append(optim_PR['adjacency']['recall']) + + Precision_C2.append(optim_PR['cycle']['precision']) + Recall_C2.append(optim_PR['cycle']['recall']) + + sorted_data = sorted(r_estimated, key=lambda x: x[1], reverse=True) + # add to lists for dict + results = {'general': {'method': PreFix, + 'g_estimated': g_estimated, + 'sample': fl, + 'dm': DD, + 'bdm': BD, + 'optim_cost': sorted_data[-1][1], + 'num_sols': len(r_estimated), + 'GT': network_GT_selfloop, + 'threshold': EDGE_CUTOFF * k_threshold, + 'timeout': TIMEOUT, + 'full_sols': r_estimated, + 'adjacency_(P,R)': (Precision_A[-1], Recall_A[-1]), + 'orientation_(P,R)': (Precision_O[-1], Recall_O[-1]), + '2cycle_(P,R)': (Precision_C[-1], Recall_C[-1]), + 'total_time': round(((sat_time) / 60000), 3)}, + 'GuOptVsGest': { + 'min_answer_WRT_GuOptVsGest': min_answer_WRT_GuOptVsGest, + 'G1_opt_WRT_GuOptVsGest': G1_opt_WRT_GuOptVsGest, + 'Gu_opt_WRT_GuOptVsGest': Gu_opt_WRT_GuOptVsGest, + 'network_GT_U_WRT_GuOptVsGest': network_GT_U_WRT_GuOptVsGest, + 'Gu_opt_errors_network_GT_U_WRT_GuOptVsGest': Gu_opt_errors_network_GT_U_WRT_GuOptVsGest, + 'Gu_opt_errors_g_estimated_WRT_GuOptVsGest': Gu_opt_errors_g_estimated_WRT_GuOptVsGest, + 'G1_opt_error_GT_WRT_GuOptVsGest': G1_opt_error_GT_WRT_GuOptVsGest, + 'U_found': min_answer_WRT_GuOptVsGest[0][1][0] + }, + 'G1OptVsGT': { + 'min_answer_WRT_G1OptVsGT': min_answer_WRT_G1OptVsGT, + 'G1_opt_WRT_G1OptVsGT': G1_opt_WRT_G1OptVsGT, + 'Gu_opt_WRT_G1OptVsGT': Gu_opt_WRT_G1OptVsGT, + 'network_GT_U_WRT_G1OptVsGT': network_GT_U_WRT_G1OptVsGT, + 'Gu_opt_errors_network_GT_U_WRT_G1OptVsGT': Gu_opt_errors_network_GT_U_WRT_G1OptVsGT, + 'Gu_opt_errors_g_estimated_WRT_G1OptVsGT': Gu_opt_errors_g_estimated_WRT_G1OptVsGT, + 'G1_opt_error_GT_WRT_G1OptVsGT': G1_opt_error_GT_WRT_G1OptVsGT, + 'U_found': min_answer_WRT_G1OptVsGT[0][1][0] + }} + save_results.append(results) + + now = str(datetime.now()) + now = now[:-7].replace(' ', '_') + # plotting + font = {'family': 'normal', + 'weight': 'normal', + 'size': 32} + plt.rc('font', **font) + fig, axs = plt.subplots(2) + for ax in axs: + ax.set_ylim(0, 1) + fig.set_figwidth(35) + fig.set_figheight(20) + fig.suptitle('Network' + str( + args.NUMBER) + ' on ' + PreFix + ':Om and comm err after comparing G1 to H1. individual data. with selfloop') + # axs[0].plot(errors_omm,label="ommition error") + axs[0].plot(SL_undir_normed_errors_omm, c='red', label="not knowing GT", linewidth=5) + # axs[0].plot(undir_errors_omm, c='green', label="omm: sRASL", linewidth=5) + axs[0].plot(opt_SL_undir_normed_errors_omm, c='orange', label="knowing GT", linewidth=5) + # axs[0].plot(opt_undir_errors_omm, c='blue', label="omm: sRASL + Opt", linewidth=5) + axs[0].legend() + # axs[1].plot(errors_comm, label="commition error") + # SL_drop_bd_normed_errors_comm= [0.003+i for i in SL_drop_bd_normed_errors_comm]all_network_SL + axs[1].plot(SL_undir_normed_errors_comm, c='red', label="not knowing GT", linewidth=5) + # axs[1].plot(undir_errors_comm, c='green', label="comm: sRASL", linewidth=5) + axs[1].plot(opt_SL_undir_normed_errors_comm, c='orange', label="knowing GT", linewidth=5) + # axs[1].plot(opt_undir_errors_comm, c='blue', label="comm: sRASL + Opt", linewidth=5) + axs[1].legend() + # plt.show() + + ###saving files + filename = PreFix + '_results_error_on_G1_H1' + str(args.NUMBER) + '_' + now + plt.savefig(filename + '_.png') + plt.close() + zkl.save(save_results, filename + '_.zkl') + font = {'family': 'normal', + 'weight': 'normal', + 'size': 18} + plt.rc('font', **font) + + precision_mean = sum(Precision_O) / len(Precision_O) + recall_mean = sum(Recall_O) / len(Recall_O) + precision_mean2 = sum(Precision_O2) / len(Precision_O2) + recall_mean2 = sum(Recall_O2) / len(Recall_O2) + + # Names for the bars + names = ['Precision', 'Recall'] + + # Mean values for the bars + means = [precision_mean, recall_mean] + means2 = [precision_mean2, recall_mean2] + + # Width of each bar + bar_width = 0.35 + + # Position of bars on x-axis + x = np.arange(len(names)) + + # Plotting the bar plot + plt.bar(x - bar_width / 2, means, bar_width, color=['blue', 'blue'], label='min error') + plt.bar(x + bar_width / 2, means2, bar_width, color=['orange', 'orange'], label='max F1') + + plt.ylim(0, 1) + plt.xlabel('Metrics') + plt.ylabel('Mean') + plt.title('Mean of Precision and Recall (orientation) 2 ways of finding best answer') + plt.xticks(x, names) + plt.legend() + plt.savefig(filename + '_orientation.png') + plt.close() + + precision_mean = sum(Precision_A) / len(Precision_A) + recall_mean = sum(Recall_A) / len(Recall_A) + precision_mean2 = sum(Precision_A2) / len(Precision_A2) + recall_mean2 = sum(Recall_A2) / len(Recall_A2) + + # Names for the bars + names = ['Precision', 'Recall'] + + # Mean values for the bars + means = [precision_mean, recall_mean] + means2 = [precision_mean2, recall_mean2] + + # Width of each bar + bar_width = 0.35 + + # Position of bars on x-axis + x = np.arange(len(names)) + + # Plotting the bar plot + plt.bar(x - bar_width / 2, means, bar_width, color=['blue', 'blue'], label='min error') + plt.bar(x + bar_width / 2, means2, bar_width, color=['orange', 'orange'], label='max F1') + + plt.ylim(0, 1) + plt.xlabel('Metrics') + plt.ylabel('Mean') + plt.title('Mean of Precision and Recall (adjacency) 2 ways of finding best answer') + plt.xticks(x, names) + plt.legend() + plt.savefig(filename + '_adjacency.png') + plt.close() + + + # Calculate means + precision_mean = sum(Precision_C) / len(Precision_C) + recall_mean = sum(Recall_C) / len(Recall_C) + precision_mean2 = sum(Precision_C2) / len(Precision_C2) + recall_mean2 = sum(Recall_C2) / len(Recall_C2) + + # Names for the bars + names = ['Precision', 'Recall'] + + # Mean values for the bars + means = [precision_mean, recall_mean] + means2 = [precision_mean2, recall_mean2] + + # Width of each bar + bar_width = 0.35 + + # Position of bars on x-axis + x = np.arange(len(names)) + + # Plotting the bar plot + plt.bar(x - bar_width / 2, means, bar_width, color=['blue', 'blue'], label='min error') + plt.bar(x + bar_width / 2, means2, bar_width, color=['orange', 'orange'], label='max F1') + + plt.ylim(0, 1) + plt.xlabel('Metrics') + plt.ylabel('Mean') + plt.title('Mean of Precision and Recall (2 cycles) 2 ways of finding best answer') + plt.xticks(x, names) + plt.legend() + plt.savefig(filename + '_2_cycle.png') + diff --git a/gunfolds/scripts/Ruben_datagraphs.py b/gunfolds/scripts/Ruben_datagraphs.py index 5e6b93e4..7f4adbb1 100644 --- a/gunfolds/scripts/Ruben_datagraphs.py +++ b/gunfolds/scripts/Ruben_datagraphs.py @@ -455,45 +455,61 @@ def precision_recall(graph1, graph2): res_nx = gk.graph2nx(res_graph) TP, FP, FN = 0, 0, 0 for edge in GT_nx.edges(): - if edge in res_nx.edges(): - TP += 1 - else: - FN += 1 + if edge in res_nx.edges(): + TP += 1 + else: + FN += 1 for edge in res_nx.edges(): - if edge not in GT_nx.edges(): - FP += 1 + if edge not in GT_nx.edges(): + FP += 1 if (TP + FP) != 0 and (TP + FN) != 0: Precision_O.append(TP / (TP + FP)) Recall_O.append(TP / (TP + FN)) #######presision and recall (adjacency) TP, FP, FN = 0, 0, 0 - for edge in GT_nx.edges() : - if edge in res_nx.edges() or (edge[1], edge[0]) in res_nx.edges(): - TP += 1 - else: - FN += 1 + for edge in GT_nx.edges(): + if edge in res_nx.edges() or (edge[1], edge[0]) in res_nx.edges(): + if ((edge[1], edge[0]) in GT_nx.edges()) and (edge[1] != edge[0]): + TP += 0.5 + else: + TP += 1 + else: + if (edge[1], edge[0]) in GT_nx.edges(): + FN += 0.5 + else: + FN += 1 for edge in res_nx.edges(): - if not(edge in GT_nx.edges() or (edge[1], edge[0]) in GT_nx.edges()): - FP += 1 + if not (edge in GT_nx.edges() or (edge[1], edge[0]) in GT_nx.edges()): + if ((edge[1], edge[0]) in res_nx.edges()) and (edge[1] != edge[0]): + FP += 0.5 + else: + FP += 1 + if not (FP % 1 == 0 and TP % 1 == 0 and FN % 1 == 0): + print('see why') if (TP + FP) != 0 and (TP + FN) != 0: Precision_A.append(TP / (TP + FP)) Recall_A.append(TP / (TP + FN)) + else: + print('see how') + #######presision and recall (2-cycle) TP, FP, FN = 0, 0, 0 for edge in GT_nx.edges(): - if (edge[1], edge[0]) in GT_nx.edges(): - if edge in res_nx.edges() and (edge[1], edge[0]) in res_nx.edges(): - TP += 1 - else: - FN += 1 + if not edge[1] == edge[0]: + if (edge[1], edge[0]) in GT_nx.edges(): + if edge in res_nx.edges() and (edge[1], edge[0]) in res_nx.edges(): + TP += 1 + else: + FN += 1 for edge in res_nx.edges(): - if (edge[1], edge[0]) in res_nx.edges(): - if not (edge in GT_nx.edges() and (edge[1], edge[0]) in GT_nx.edges()): - FP += 1 - if (TP + FP) !=0 and (TP + FN) !=0: + if not edge[1] == edge[0]: + if (edge[1], edge[0]) in res_nx.edges(): + if not (edge in GT_nx.edges() and (edge[1], edge[0]) in GT_nx.edges()): + FP += 1 + if (TP + FP) != 0 and (TP + FN) != 0: Precision_C.append(TP / (TP + FP)) Recall_C.append(TP / (TP + FN)) diff --git a/gunfolds/scripts/datasets/simple_networks.py b/gunfolds/scripts/datasets/simple_networks.py new file mode 100644 index 00000000..c090ffa1 --- /dev/null +++ b/gunfolds/scripts/datasets/simple_networks.py @@ -0,0 +1,67 @@ +selfloops = [] +no_selfloops = [] + +network1_GT_selfloop = {1: {1: 1, 2: 1, 5: 1}, 2: {2: 1, 3: 1, 1: 1}, 3: {3: 1, 4: 1}, 4: {4: 1, 5: 1}, + 5: {5: 1}} +network1_GT = {1: {2: 1, 5: 1}, 2: {3: 1, 1: 1}, 3: {4: 1}, 4: {5: 1}, 5: {}} +selfloops.append(network1_GT_selfloop) +no_selfloops.append(network1_GT) + +network2_GT_selfloop = {1: {1: 1, 2: 1, 5: 1}, 2: {2: 1, 3: 1, 1: 1}, 3: {2: 1, 3: 1, 4: 1}, 4: {4: 1, 5: 1}, + 5: {5: 1}} +network2_GT = {1: {2: 1, 5: 1}, 2: {3: 1, 1: 1}, 3: {2: 1, 4: 1}, 4: {5: 1}, 5: {}} +selfloops.append(network2_GT_selfloop) +no_selfloops.append(network2_GT) + +network3_GT_selfloop = {1: {1: 1, 2: 1, 5: 1}, 2: {2: 1, 3: 1, 1: 1}, 3: {3: 1, 4: 1}, 4: {3: 1, 4: 1, 5: 1}, + 5: {5: 1}} +network3_GT = {1: {2: 1, 5: 1}, 2: {3: 1, 1: 1}, 3: {4: 1}, 4: {3: 1, 5: 1}, 5: {}} +selfloops.append(network3_GT_selfloop) +no_selfloops.append(network3_GT) + +network4_GT_selfloop = {1: {4: 1, 8: 1, 6: 1, 1: 1}, 2: {2: 1, 3: 1}, 3: {2: 1, 3: 1}, + 4: {4: 1, 2: 1, 7: 1, 9: 1, 5: 1}, 5: {5: 1, 4: 1, 6: 1}, + 6: {6: 1, 10: 1}, 7: {7: 1, 3: 1, 10: 1}, 8: {8: 1, 2: 1, 9: 1}, 9: {9: 1, 8: 1, 6: 1}, + 10: {10: 1, 6: 1}} +network4_GT = {1: {4: 1, 8: 1, 6: 1}, 2: {3: 1}, 3: {2: 1}, 4: {2: 1, 7: 1, 9: 1, 5: 1}, 5: {4: 1, 6: 1}, + 6: {10: 1}, 7: {3: 1, 10: 1}, 8: {2: 1, 9: 1}, 9: {8: 1, 6: 1}, 10: {6: 1}} +selfloops.append(network4_GT_selfloop) +no_selfloops.append(network4_GT) + +network5_GT_selfloop = {1: {1: 1, 3: 1}, 2: {2: 1, 4: 1}, 3: {3: 1, 4: 1, 5: 1}, 4: {4: 1, 3: 1}, 5: {5: 1}} +network5_GT = {1: {3: 1}, 2: {4: 1}, 3: {4: 1, 5: 1}, 4: {3: 1}, 5: {}} +selfloops.append(network5_GT_selfloop) +no_selfloops.append(network5_GT) + +network6_GT_selfloop = {1: {1: 1, 3: 1}, 2: {2: 1, 3: 1}, 3: {3: 1, 4: 1}, 4: {4: 1, 3: 1, 5: 1}, + 5: {5: 1, 7: 1, 8: 1, 6: 1}, + 6: {6: 1}, 7: {7: 1}, 8: {8: 1}} +network6_GT = {1: {3: 1}, 2: {3: 1}, 3: {4: 1}, 4: {3: 1, 5: 1}, 5: {7: 1, 8: 1, 6: 1}, 6: {}, 7: {}, 8: {}} +selfloops.append(network6_GT_selfloop) +no_selfloops.append(network6_GT) + +network7_GT_selfloop = {1: {1: 1, 2: 1}, 2: {2: 1, 3: 1}, 3: {3: 1, 4: 1}, 4: {4: 1, 5: 1}, + 5: {5: 1, 2: 1, 6: 1}, + 6: {6: 1}} +network7_GT = {1: {2: 1}, 2: {3: 1}, 3: {4: 1}, 4: {5: 1}, 5: {2: 1, 6: 1}, 6: {}} +selfloops.append(network7_GT_selfloop) +no_selfloops.append(network7_GT) + +network8_GT_selfloop = {1: {1: 1, 2: 1}, 2: {2: 1, 3: 1}, 3: {3: 1, 4: 1, 8: 1}, 4: {4: 1, 5: 1, 6: 1}, + 5: {5: 1, 2: 1}, 6: {6: 1, 7: 1}, 7: {7: 1, 5: 1}, 8: {8: 1}} +network8_GT = {1: {2: 1}, 2: {3: 1}, 3: {4: 1, 8: 1}, 4: {5: 1, 6: 1}, 5: {5: 1, 2: 1}, 6: {7: 1}, + 7: {5: 1}, 8: {}} +selfloops.append(network8_GT_selfloop) +no_selfloops.append(network8_GT) + +network9_GT_selfloop = {1: {1: 1, 2: 1}, 2: {2: 1, 3: 1}, 3: {3: 1, 4: 1}, 4: {4: 1, 5: 1}, 5: {5: 1, 2: 1}, + 6: {6: 1, 7: 1, 9: 1}, 7: {7: 1, 8: 1}, 8: {8: 1, 4: 1}, 9: {9: 1}} +network9_GT = {1: {2: 1}, 2: {3: 1}, 3: {4: 1}, 4: {5: 1}, 5: {2: 1}, 6: {7: 1, 9: 1}, 7: {8: 1}, 8: {4: 1}, + 9: {}} + + +def simp_nets(nn, selfloop=True): + if selfloop: + return selfloops[nn - 1] + else: + return no_selfloops[nn - 1] \ No newline at end of file diff --git a/gunfolds/scripts/my_functions.py b/gunfolds/scripts/my_functions.py new file mode 100644 index 00000000..f972e339 --- /dev/null +++ b/gunfolds/scripts/my_functions.py @@ -0,0 +1,92 @@ +import networkx as nx +from gunfolds.utils import graphkit as gk + + +def remove_bidir_edges(input_dict): + result_dict = {} + for key, inner_dict in input_dict.items(): + result_dict[key] = {} + for inner_key, value in inner_dict.items(): + if value == 1: + result_dict[key][inner_key] = 1 + elif value == 2: + pass # Skip adding this key-value pair + elif value == 3: + result_dict[key][inner_key] = 1 + else: + raise ValueError("Invalid value encountered: {}".format(value)) + return result_dict + + +def precision_recall(answer, network_GT_selfloop): + # Precision = True Positives / (True Positives + False Positives) + # Recall = True Positives / (True Positives + False Negatives) + res_graph = answer + GT_nx = gk.graph2nx(network_GT_selfloop) + res_nx = gk.graph2nx(res_graph) + + #######precision and recall (orientation) + TP, FP, FN = 0, 0, 0 + for edge in GT_nx.edges(): + if edge in res_nx.edges(): + TP += 1 + else: + FN += 1 + for edge in res_nx.edges(): + if edge not in GT_nx.edges(): + FP += 1 + p_O = (TP / (TP + FP)) if (TP + FP) else 0 + r_O = (TP / (TP + FN)) if (TP + FN) else 0 + f1_O = (2 * TP) / (2 * TP + FP + FN) if 2 * TP + FP + FN else 0 + + #######precision and recall (adjacency) + TP, FP, FN = 0, 0, 0 + for edge in GT_nx.edges(): + if edge in res_nx.edges() or (edge[1], edge[0]) in res_nx.edges(): + if ((edge[1], edge[0]) in GT_nx.edges()) and (edge[1] != edge[0]): + TP += 0.5 + else: + TP += 1 + else: + if (edge[1], edge[0]) in GT_nx.edges() and (edge[1] != edge[0]): + FN += 0.5 + else: + FN += 1 + for edge in res_nx.edges(): + if not (edge in GT_nx.edges() or (edge[1], edge[0]) in GT_nx.edges()): + if ((edge[1], edge[0]) in res_nx.edges()) and (edge[1] != edge[0]): + FP += 0.5 + else: + FP += 1 + p_A = (TP / (TP + FP)) if (TP + FP) else 0 + r_A = (TP / (TP + FN)) if (TP + FN) else 0 + f1_A = (2 * TP) / (2 * TP + FP + FN) if 2 * TP + FP + FN else 0 + + #######precision and recall (2-cycle) + + TP, FP, FN = 0, 0, 0 + for edge in GT_nx.edges(): + if not edge[1] == edge[0]: + if (edge[1], edge[0]) in GT_nx.edges(): + if edge in res_nx.edges() and (edge[1], edge[0]) in res_nx.edges(): + TP += 1 + else: + FN += 1 + for edge in res_nx.edges(): + if not edge[1] == edge[0]: + if (edge[1], edge[0]) in res_nx.edges(): + if not (edge in GT_nx.edges() and (edge[1], edge[0]) in GT_nx.edges()): + FP += 1 + p_C = (TP / (TP + FP)) if (TP + FP) else 0 + r_C = (TP / (TP + FN)) if (TP + FN) else 0 + f1_C = (2 * TP) / (2 * TP + FP + FN) if 2 * TP + FP + FN else 0 + + prf = {'orientation': {'precision': p_O, 'recall': r_O, 'F1': f1_O}, + 'adjacency': {'precision': p_A, 'recall': r_A, 'F1': f1_A}, + 'cycle': {'precision': p_C, 'recall': r_C, 'F1': f1_C}} + + return prf + + +def round_tuple_elements(input_tuple, decimal_points=3): + return tuple(round(elem, decimal_points) if isinstance(elem, (int, float)) else elem for elem in input_tuple) diff --git a/gunfolds/solvers/clingo_rasl.py b/gunfolds/solvers/clingo_rasl.py index 1bba052c..01b74d18 100644 --- a/gunfolds/solvers/clingo_rasl.py +++ b/gunfolds/solvers/clingo_rasl.py @@ -112,7 +112,7 @@ """ -def weighted_drasl_program(directed, bidirected,no_directed, no_bidirected): +def weighted_drasl_program(directed, bidirected, no_directed, no_bidirected): """ Adjusts the optimization code based on the directed and bidirected priority