-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathexperiment_run_n_sii_sentence.py
175 lines (128 loc) · 7.45 KB
/
experiment_run_n_sii_sentence.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import copy
import os
from typing import Dict
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.patches import Patch
from approximators import SHAPIQEstimator
from games import NLPGame
def k_largest_index_argpartition_v1(a, k):
idx = np.argpartition(-a.ravel(), k)[:k]
return np.column_stack(np.unravel_index(idx, a.shape))
def find_top_k_n_shapley(n_shapley_dict: Dict[int, np.ndarray], k: int):
"""Find the top k n-Shapley values and indices for each order and returns the index of the top k values."""
top_k_indices = {}
top_k_values = {}
for order in n_shapley_dict.keys():
# find the indices where n_shapley_dict[order] is absolute value is higher than the k-th highest value
top_k_indices[order] = k_largest_index_argpartition_v1(a=np.abs(n_shapley_dict[order]), k=k)
top_k_values_order = []
for i in range(k):
index = tuple(top_k_indices[order][i])
top_k_values_order.append(n_shapley_dict[order][index])
top_k_values[order] = np.array(top_k_values_order)
return top_k_indices, top_k_values
if __name__ == "__main__":
interaction_order = 4
budget = 10_000
og = "I have never forgot this movie. All these years and it has remained in my life."
sentence = "i have never forgot this movie all these years and it has remained in my life"
#og = "Absolutely nothing is redeeming about this total piece of trash."
#sentence = "absolutely nothing is redeeming about this total piece of trash"
#og = "However, I really liked the ending so much, I actually smiled and cried tears of joy. I felt good."
#sentence = "however I really liked the ending so much I actually smiled and cried tears of joy I felt good"
#og = "I actually liked the ending even though it did not make a lot of sense."
#sentence = "i actually liked the ending even though it did not make a lot of sense"
#og = "I liked it more than most people, and actually rated it a 4 out of 10."
#sentence = "i liked it more than most people and actually rated it a 4 out of 10"
#og = "My first thought after this movies conclusion was this. Not good, but not bad, for early-to-mid eighties."
#sentence = "my first thought after this movies conclusion was this not good but not bad for early to mid eighties"
#og = "It is a gruesome cannibal movie. But it's not bad. If you like Hannibal, you'll love this."
#sentence = "it is a gruesome cannibal movie but it is not bad if you like hannibal you will love this"
#og = "Not bad for a Mixed tag match, it had entertaining antics, and passed the time well."
#sentence = "not bad for a mixed tag match it had entertaining antics and passed the time well"
#og = "At the time I did not know Richard Attenborough had directed it. But I am not surprised."
#sentence = "at the time I did not know richard attenborough had directed it but I am not surprised"
#og = "However, I would have liked to have seen more in the movie about WHY he took on this persona"
#sentence = "however I would have liked to have seen more in the movie about why he took on this persona"
game = NLPGame(input_text=sentence)
n = game.n
N = set(range(n))
input_sentence_tokens = game.input_sentence
input_words = []
for token in game.tokenized_input:
word = game.tokenizer.decode(token)
input_words.append(word)
print("Original Output:", game.original_output, "n:", game.n)
print("'words':", input_words)
x = np.arange(n)
# estimate the SII values and n-SHAPL values
shap_iq = SHAPIQEstimator(N=N, order=interaction_order, interaction_type="SII", top_order=False)
sii_estimates = shap_iq.compute_interactions_from_budget(budget=budget, game=game.set_call, pairing=True, show_pbar=True, only_expicit=True)
for n_sii_order in range(1, interaction_order + 1):
n_shapley_values = shap_iq.transform_interactions_in_n_shapley(interaction_values=sii_estimates, n=n_sii_order, reduce_one_dimension=True)
n_shapley_values_pos, n_shapley_values_neg = n_shapley_values
# plot the n-Shapley values --------------------------------------------------------------------
params = {
'legend.fontsize': 'x-large', 'axes.labelsize': 'x-large', 'axes.titlesize': 'x-large',
'xtick.labelsize': 'x-large', 'ytick.labelsize': 'x-large'
}
fig, axis = plt.subplots(figsize=(6, 4.15))
x = np.arange(n)
min_max_values = [0, 0]
colors = ["#D81B60", "#FFB000", "#1E88E5", "#FE6100", "#FFB000"]
# transform data to make plotting easier
values_pos = []
for order, values in n_shapley_values_pos.items():
values_pos.append(values)
values_pos = pd.DataFrame(values_pos)
values_neg = []
for order, values in n_shapley_values_neg.items():
values_neg.append(values)
values_neg = pd.DataFrame(values_neg)
reference_pos = np.zeros(n)
reference_neg = copy.deepcopy(np.asarray(values_neg.loc[0]))
for order in range(len(values_pos)):
axis.bar(x, height=values_pos.loc[order], bottom=reference_pos, color=colors[order])
axis.bar(x, height=abs(values_neg.loc[order]), bottom=reference_neg, color=colors[order])
axis.axhline(y=0, color="black", linestyle="solid")
reference_pos += values_pos.loc[order]
try:
reference_neg += values_neg.loc[order + 1]
except KeyError:
pass
min_max_values[0] = min(min_max_values[0], min(reference_neg))
min_max_values[1] = max(min_max_values[1], max(reference_pos))
# add legend
legend_elements = []
for order in range(n_sii_order):
legend_elements.append(
Patch(facecolor=colors[order], edgecolor='black', label=f"Order {order + 1}"))
axis.legend(handles=legend_elements, loc='upper center', ncol=n_sii_order)
axis.set_title(r"n-SII values for a sentence provided to the LM")
x_ticks_labels = [word for word in input_words]
axis.set_xticks(x)
axis.set_xticklabels(x_ticks_labels, rotation=45, ha='right')
axis.set_xlim(-0.5, n - 0.5)
axis.set_ylim(min_max_values[0] * 1.05, min_max_values[1] * 1.3)
axis.set_ylabel("n-SII values")
plt.tight_layout()
# save plot ------------------------------------------------------------------------------------
save_path = os.path.join("plots", f"n_SII_sentence_{sentence[:10]}_{budget}_order-{n_sii_order}.pdf")
fig.savefig(save_path)
plt.show()
color_steps = np.linspace(-1, 1, 20)
# return the top-k n-Shapley values ------------------------------------------------------------
n_shapley_values_not_single = shap_iq.transform_interactions_in_n_shapley(interaction_values=sii_estimates, n=interaction_order, reduce_one_dimension=False)
top_k_indices, top_k_values = find_top_k_n_shapley(n_shapley_values_not_single, k=n-1)
x_words_arr = np.asarray(x)
x_input_words = np.asarray(input_words)
for order in top_k_indices.keys():
print(f"Order {order}")
for i in range(len(top_k_indices[order])):
index = top_k_indices[order][i]
color_index = np.argmin(abs(color_steps - top_k_values[order][i])) + 1
print(f"{top_k_values[order][i]} {x_words_arr[index]} {x_input_words[index]} {color_index}")
print(sentence)
print()