-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyze_bibs.py
56 lines (45 loc) · 1.69 KB
/
analyze_bibs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import re
if False:
latex = r"""
blablabla
Author et. al \cite{author92} bla bla. % should match
\citep{author93} % should match
\nocite{author94} % should match
100\%\nocite{author95} % should match
100\% \nocite{author95} % should match
%\nocite{author96} % should not match
\cite{author97, author98, author99} % should match
\nocite{*} % should not match
"""
else:
with open('manuscript.tex') as f:
latexlines = f.readlines()
# latex = ''.join(line.strip() for line in latexlines)
latex = ''.join(line for line in latexlines)
# latex = latex.replace('\\', '\')
rx = re.compile(r'''(?<!\\)%.+|(\\(?:no)?cite[pt]?\{((?!\*)[^{}]+)\})''')
# rx = re.compile(r'''^(?!(%\\(?:no)?cite\w*\{([^}]*?)\}))[^*\n]*$''')
# rx = re.compile(r'''^(?!(%\\(?:no)?cite\w*\{([^}]*?)\}))[^*\n]*$''')
citekeys = [m.group(2) for m in rx.finditer(latex) if m.group(2)]
# print(citekeys)
citekey_list = []
for citekey in citekeys:
for citekey_ in citekey.split(','):
citekey_list.append(citekey_.strip(' '))
import numpy as np
citekey_list = np.unique(citekey_list)
# print(citekey_list)
with open('references.bib') as f:
bibtexlines = f.readlines()
# latex = ''.join(line.strip() for line in latexlines)
bibtex = ''.join(line for line in bibtexlines)
rx = re.compile(r'''@\w+{([\w:-]+)''')
bibkeys = [m.group(1) for m in rx.finditer(bibtex) if m.group(1)]
bibkeys = np.unique(bibkeys)
# print(bibkeys)
for citekey in citekey_list:
if not(citekey in bibkeys):
print('This key in not present in the bibliography', citekey)
for bibkey in bibkeys:
if not(bibkey in citekey_list):
print('This key in not used in the paper', bibkey)