-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyse.py
53 lines (42 loc) · 1.43 KB
/
analyse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# -*- coding:utf-8 -*-
'''
This script analyses an input file and write analysis in a file named analysed_<INPUT_FILE_NAME>.txt
Usage:
python3 analyse.py <PATH_INPUT_FILE> <FILE_TYPE> <PATH_FST>
where <FILE_TYPE> = "log" or "xml"
Ex:
python3 analyse.py data/user_input_2019_su.txt log ~/all-gut/giellalt/lang-sme/src/analyser-dict-gt-desc.xfst
python3 analyse.py data/smenob-all.lexc xml ~/all-gut/giellalt/lang-sme/src/analyser-dict-gt-desc.xfst
'''
import sys
from subprocess import Popen, PIPE
file_name = sys.argv[1]
file_type = sys.argv[2]
fst_file = sys.argv[3]
cmd = " | lookup -q -flags mbTT " + fst_file
analysed_file = "analysed_" + file_name.split("/")[1].split(".")[0] + ".txt"
af_txt = open(analysed_file, "w+")
with open(file_name) as f:
lines = f.readlines()
f.close()
cnt_error = 0
word = ""
for line in lines:
if file_type == "log":
try:
word = line.split("\t")[0]
in_dict = line.split("\t")[1]
except IndexError:
print("error in line", line)
cnt_error += 1
else:
if ":" in line:
word = line.split(":")[0].replace("_", " ")
p = Popen('echo "'+ word + '"' + cmd, shell=True, stdout=PIPE, stderr=PIPE)
out, err = p.communicate()
analyses = out.decode()
if file_type == "log":
af_txt.write(word + "\t" + in_dict + analyses + ":\n")
else:
af_txt.write(word + "\n" + analyses + ":\n")
af_txt.close()