Skip to content

Commit

Permalink
Imrpoving secruity analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
Rahul Chatterjee committed Nov 23, 2016
1 parent 8df7b52 commit f0fb62c
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 52 deletions.
25 changes: 16 additions & 9 deletions security/compute_guesses_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ def set_globals(settings_i):
# MIN_ENT, REL_ENT, MAX_NH_SIZE, CACHE_SIZE,
global N, MIN_ENTROPY_CUTOFF, REL_ENT_CUTOFF, MAX_NH_SIZE, CACHE_SIZE, Q
settings = [
(1e4, 10, -3, 10, 5, 1000), # online w/ blacklist
(1e4, 0, 0, 10, 5, 1000), # online w/o blacklist
(1e5, 10, -3, 10, 5, 10000), # offline w/ blacklist
(1e5, 0, 0, 10, 5, 10000), # offline w/o blacklist
(1e4, 10, -3, 10, 5, 1000), # online w/ blacklist
(1e4, 0, 0, 10, 5, 1000), # online w/o blacklist
(1e5, 10, -3, 10, 5, 10000), # offline w/ blacklist
(1e5, 0, 0, 10, 5, 10000), # offline w/o blacklist
]
(N, MIN_ENTROPY_CUTOFF, REL_ENT_CUTOFF, MAX_NH_SIZE, CACHE_SIZE, Q) = settings[settings_i]
return settings[settings_i]
Expand Down Expand Up @@ -248,7 +248,7 @@ def create_pw_nh_graph(fname):
# split *= multiplier


def read_pw_nh_graph(fname, q=-1):
def read_pw_nh_graph(fname, q=-1, _N=-1):
"""Reads the typo trie file and the neighborhood map created by
`create_pw_nh_graph` function.
Expand All @@ -270,6 +270,8 @@ def read_pw_nh_graph(fname, q=-1):
"""
# N = 1000
global N
if _N>0:
N = _N
typodir = '{}/typodir'.format(pwd)
pwm = Passwords(fname, max_pass_len=25, min_pass_len=5)
N = min(N, len(pwm))
Expand Down Expand Up @@ -458,7 +460,7 @@ def compute_guesses_using_typodist(fname, q, nh_size=5, topk=False, offline=Fals
proc_name = "TOPKTypo-{}-{}-{}".format
else:
proc_name = "TYPODIST-{}-{}-{}".format
proc_name = proc_name(MIN_ENTROPY_CUTOFF, REL_ENT_CUTOFF,
proc_name = proc_name(MIN_ENTROPY_CUTOFF, REL_ENT_CUTOFF,
('off' if offline else 'on'))

pwm = Passwords(fname, max_pass_len=25, min_pass_len=5)
Expand Down Expand Up @@ -565,7 +567,12 @@ def get_trie_key(T, _id):
return T.restore_key(_id)
except KeyError:
return ''

def get_trie_id(T, key):
try:
return T.key_id(unicode(key))
except KeyError:
return -1

proc_name = 'ALL'
def compute_guesses_all(fname, q):
"""We computed neighborhood graph, considering the neighborhood graph
Expand Down Expand Up @@ -681,7 +688,7 @@ def run_all(offline=False):
for p in processes: p.start()
# for p in processes: p.join()
return

if __name__ == '__main__':
import sys
# create_pw_db_(sys.argv[1])
Expand Down Expand Up @@ -711,7 +718,7 @@ def run_all(offline=False):
# }
# process['p_typodist'].start()
# process['p_topk'].start()

# compute_guesses_using_typodist(fname, q, 5, True, offline=True)
# compute_guesses_using_typodist(fname, q, 10, False)
# process['p_typodist'].join()
Expand Down
99 changes: 66 additions & 33 deletions security/compute_secloss.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,59 +10,92 @@
from word2keypress import Keyboard
from word2keypress.weighted_edist import sample_typos, get_topk_typos
from zxcvbn import password_strength
from compute_guesses_numpy import get_topk_typos, get_typodist_nh
from compute_guesses_numpy import (
get_topk_typos, get_typodist_nh, read_pw_nh_graph, get_trie_key, get_trie_id,
N
)

KB = Keyboard()
NH_SIZE = 10
def compute_secloss(guess_file, attpwf, chlpwf, q=100):
chlpwm = Passwords(chlpwf)
attpwm = Passwords(attpwf)
chlpwm = Passwords(chlpwf, max_pass_len=25, min_pass_len=5)
attpwm = Passwords(attpwf, max_pass_len=25, min_pass_len=5)
guesses = [w for w, _ in json.load(open(guess_file))]
guess_set = set(guesses)
q = len(guesses)
print("Found {} guesses".format(q))
lambda_q = sum(chlpwm.pw2freq(pw) for _id, pw, f in attpwm.iterpws())/chlpwm.totalf()
lambda_q = sum(chlpwm.pw2freq(pw) for _id, pw, f
in attpwm.iterpws(q))/float(chlpwm.totalf())
print("Normal succces: {}".format(lambda_q))
union_ball = set([
rpw
for w in guesses
for rpw in KB.word_to_typos(str(w))
if chlpwm.pw2id(rpw)>=0
])
]) | guess_set

print("Worst case success rate = {}"\
.format(sum(chpwm.pw2freq(w) for w in union_ball)/chpwm.totalf()))
.format(sum(chlpwm.pw2freq(w) for w in union_ball)/float(chlpwm.totalf())))

# global N
# N = 10000
# M, A, typo_trie, _ = read_pw_nh_graph(chlpwf, N)
# Mprime = np.zeros((M.shape[0], NH_SIZE+1))
# B = [[] for _ in guesses]
# # for g in xrange(M.shape[0]):
# M = Mprime
# fuzzlambda_q = 0.0
# guess_key_ids = [get_trie_id(typo_trie, g) for g in guess_set]
# killed = []

# for rpw in union_ball:
# try:
# rpwid = typo_trie.key_id(unicode(rpw))
# for g in guess_key_ids:
# if (M[M[:, 0] == rpwid] == g).any:
# killed.append(rpw)
# except KeyError:
# continue
# fuzzlambda_q = sum([chlpwm.pw2freq(w) for w in killed])/chlpwm.totalf()
# for rpw in union_ball:
# a = set(get_topk_typos(rpw, NH_SIZE+1)) & guess_set
# if a:
# print rpw, chlpwm.pw2freq(rpw)

lambda_corr_q = sum(
chpwm.pw2freq(rpw)
fuzzlambda_q = sum(
chlpwm.pw2freq(rpw)
for rpw in union_ball
if len(set(get_topk_typos(rpw, NH_SIZE)) & guess_set)>0
)/chpwm.totalf()
print("lambda-Topk Corr:", lambda_corr_q),
)/float(chlpwm.totalf())
# print("fuzzlambda_q:", fuzzlambda_q),

# lambda_topk_q = sum(
# chlpwm.pw2freq(rpw)
# for rpw in union_ball
# if len(set(get_typodist_nh(rpw, NH_SIZE)) & guess_set)>0
# )/chlpwm.totalf()
print("fuzzlambda_q: ", fuzzlambda_q)
print("Secloss:", fuzzlambda_q - lambda_q)

def compute_secloss_with_varying_q(guess_file, attpwf, chlpwf, q=100):
chlpwm = Passwords(chlpwf, max_pass_len=25, min_pass_len=5)
attpwm = Passwords(attpwf, max_pass_len=25, min_pass_len=5)

lambda_topk_q = sum(
chpwm.pw2freq(rpw)
for rpw in union_ball
if len(set(get_typodist_nh(rpw, NH_SIZE)) & guess_set)>0
)/chpwm.totalf()
print("lambda-typodist: ", lambda_topk_q)
print("Secloss:", lambda_topk_q - lambda_q)

def compute_secloss_with_varying_q(guess_file, pwfname):
chpwm = Passwords(pwfname)
guesses = [w for w, _ in json.load(open(guess_file))]
guess_set = dict((g, i) for i, g in enumerate(guesses))

q = len(guesses)
union_ball = list(set([
rpw
for w in guesses
for rpw in KB.word_to_typos(str(w))
if chpwm.pw2id(rpw)>=0
if chlpwm.pw2id(rpw)>=0
]))
freqs = np.array([chpwm.pw2freq(w) for w in union_ball])

freqs = np.array([chlpwm.pw2freq(w) for w in union_ball])
M = np.full((len(union_ball), NH_SIZE+1), -1, dtype=np.int32)
for i, rpw in enumerate(union_ball):
for j, tpw in enumerate(get_typodist_nh(rpw, NH_SIZE)):
for j, tpw in enumerate(get_topk_typos(rpw, NH_SIZE)):
M[i, j] = guess_set.get(tpw, -1)
print("Useful typos:", (M>0).sum())
tq = 1
Expand All @@ -74,19 +107,19 @@ def compute_secloss_with_varying_q(guess_file, pwfname):
last_suc = 0
for g in guesses[tq:tq*10]:
t = guess_set[g]
last_suc += freqs[(M==t).sum(axis=1)>0].sum()
freqs[(M==t).sum(axis=0)>0] = 0
lambda_topk_q.append((tq*10, last_suc/chpwm.totalf()))
print(tq, lambda_topk_q[-1])
last_suc += freqs[(M==t).sum(axis=1)>0].sum()/float(chlpwm.totalf())
freqs[(M==t).sum(axis=1)>0] = 0
lambda_topk_q.append((tq*10, last_suc))
print(lambda_topk_q[-1])
tq *= 10

with open('guess_file.csv', 'wb') as f:
csvf = csv.writer(f)
csvf.writerow('q,lambda_q,lambda_typodist_q'.split())
csvf.writerow('q,lambda_q,secloss'.split())
for tq, succ in lambda_topk_q:
lambda_q = chpwm.sumvalues(tq)/chpwm.totalf()
csvf.writerow([tq, lambda_q, succ])
lambda_q = chlpwm.sumvalues(tq)/float(chlpwm.totalf())
csvf.writerow([tq, lambda_q, succ-lambda_q])

if __name__ == "__main__":
compute_secloss(sys.argv[1], sys.argv[2], sys.argv[3])

compute_secloss_with_varying_q(sys.argv[1], sys.argv[2], sys.argv[3])
12 changes: 6 additions & 6 deletions security/guess_file.csv
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"q,lambda_q,lambda_typodist_q"
0,1.0,0
10,0.020536894151018417,3.0385196823181632e-06
100,0.045536913333592173,2.4553703728431483e-07
1000,0.11299376903966357,7.5360517224896903e-15
10000,0.22297136225891534,2.31297388744976e-22
"q,lambda_q,secloss"
10,0.027934273219508185,-0.0083704942036872468
100,0.056984351325850266,-0.02707686525383585
1000,0.12889699750607819,-0.068083673621198479
10000,0.27439403964435188,-0.155049898011522
100000,0.66960821545605098,-0.43270522000936773
10 changes: 6 additions & 4 deletions security/results.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
(10000.0, 10, -3, 5)
gi=2386, aASSWORD -> 1 ([u'password']),
gi=2386, aASSWORD -> 1 ([u'password']),
(TOPKTypo-10--3-on): 1> aASSWORD : 1.825e-01 (killed=1/1)
(TOPKTypo-10--3-on): 2> lLOVEYOU : 1.533e-01 (killed=1/2)
(TOPKTypo-10--3-on): 3> rRINCESS : 1.022e-01 (killed=1/3)
Expand Down Expand Up @@ -988,7 +988,7 @@ RPW freq is zero! rpw=jESUS1, f=0, guess=eesus1
(TOPKTypo-10--3-on): 982> eeyore : 3.192e-04 (killed=2/1298)
(TOPKTypo-10--3-on): 983> aariel : 3.192e-04 (killed=2/1300)
(TOPKTypo-10--3-on): 984> eeresa (10000.0, 0, 0, 5)
gi=52654, 1123456 -> 15 ([u'123456', u'0123456', u'A123456', u'J123456', u'M123456', u'S123456', u'K123456', u'B123456', u'D123456', u'C123456', u'T123456', u'R123456', u'L123456', u'E123456', u'a123456']),
gi=52654, 1123456 -> 15 ([u'123456', u'0123456', u'A123456', u'J123456', u'M123456', u'S123456', u'K123456', u'B123456', u'D123456', u'C123456', u'T123456', u'R123456', u'L123456', u'E123456', u'a123456']),
(TOPKTypo-0-0-on): 1> 1123456 : 9.064e-01 (killed=15/15)
(TOPKTypo-0-0-on): 2> 112345 : 2.485e-01 (killed=10/25)
(TOPKTypo-0-0-on): 3> 1123456789 : 2.462e-01 (killed=3/28)
Expand Down Expand Up @@ -2246,7 +2246,7 @@ Pwid changed for u'CONTRASE\xd1A' -> 'CONTRASEA'
(TOPKTypo-10--3-off) Processed: 97000
(TOPKTypo-10--3-off) Processed: 98000
(TOPKTypo-10--3-off) Processed: 99000
gi=276748, 1123456 -> 38 ([u'123456', u'0123456', u'A123456', u'J123456', u'M123456', u'S123456', u'K123456', u'B123456', u'D123456', u'C123456', u'T123456', u'R123456', u'L123456', u'E123456', u'a123456', u'N123456', u'P123456', u'H123456', u'Q123456', u'G123456', u'1123456', u'1123456', u'V123456', u'F123456', u'Z123456', u'j123456', u'W123456', u'I123456', u'Y123456', u'O123456', u'k123456', u'm123456', u's123456', u'c123456', u'X123456', u'd123456', u'`123456', u'l123456']),
gi=276748, 1123456 -> 38 ([u'123456', u'0123456', u'A123456', u'J123456', u'M123456', u'S123456', u'K123456', u'B123456', u'D123456', u'C123456', u'T123456', u'R123456', u'L123456', u'E123456', u'a123456', u'N123456', u'P123456', u'H123456', u'Q123456', u'G123456', u'1123456', u'1123456', u'V123456', u'F123456', u'Z123456', u'j123456', u'W123456', u'I123456', u'Y123456', u'O123456', u'k123456', u'm123456', u's123456', u'c123456', u'X123456', u'd123456', u'`123456', u'l123456']),
(TOPKTypo-10--3-off): 1> 1123456 : 9.154e-01 (killed=38/1)
(TOPKTypo-10--3-off): 2> 223456 : 8.937e-01 (killed=5/2)
(TOPKTypo-10--3-off): 3> 1234561 : 8.927e-01 (killed=2/3)
Expand Down Expand Up @@ -8393,7 +8393,7 @@ Pwid changed for u'CONTRASE\xd1A' -> 'CONTRASEA'
(TOPKTypo-0-0-off) Processed: 97000
(TOPKTypo-0-0-off) Processed: 98000
(TOPKTypo-0-0-off) Processed: 99000
gi=276748, 1123456 -> 38 ([u'123456', u'0123456', u'A123456', u'J123456', u'M123456', u'S123456', u'K123456', u'B123456', u'D123456', u'C123456', u'T123456', u'R123456', u'L123456', u'E123456', u'a123456', u'N123456', u'P123456', u'H123456', u'Q123456', u'G123456', u'1123456', u'1123456', u'V123456', u'F123456', u'Z123456', u'j123456', u'W123456', u'I123456', u'Y123456', u'O123456', u'k123456', u'm123456', u's123456', u'c123456', u'X123456', u'd123456', u'`123456', u'l123456']),
gi=276748, 1123456 -> 38 ([u'123456', u'0123456', u'A123456', u'J123456', u'M123456', u'S123456', u'K123456', u'B123456', u'D123456', u'C123456', u'T123456', u'R123456', u'L123456', u'E123456', u'a123456', u'N123456', u'P123456', u'H123456', u'Q123456', u'G123456', u'1123456', u'1123456', u'V123456', u'F123456', u'Z123456', u'j123456', u'W123456', u'I123456', u'Y123456', u'O123456', u'k123456', u'm123456', u's123456', u'c123456', u'X123456', u'd123456', u'`123456', u'l123456']),
(TOPKTypo-0-0-off): 1> 1123456 : 9.154e-01 (killed=38/1)
(TOPKTypo-0-0-off): 2> 223456 : 8.937e-01 (killed=5/2)
(TOPKTypo-0-0-off): 3> 1234561 : 8.927e-01 (killed=2/3)
Expand Down Expand Up @@ -22387,3 +22387,5 @@ RPW freq is zero! rpw=JJAMES, f=0, guess=JJAMES
(TOPKTypo-0-0-off): Total fuzzy success: 115.734034382
(TOPKTypo-0-0-off): Total normal success: 22.2971362259
('Saving the guesses:', 'guesses/rockyou-withcount_guesses_10000_typodist_5_TOPKTypo-0-0-off.json')
Done creating all the parts
rockyou-withcount__0_2000000_typo.trie and rockyou-withcount__0_2000000_typo.trie exits. So returning

0 comments on commit f0fb62c

Please sign in to comment.