-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathreadability_module.py
56 lines (40 loc) · 2 KB
/
readability_module.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
from readability import Readability
import svm_evaluation
data_metrics = []
data_grades = []
def training_readability(path):
# calculate scores for training data
for file in os.listdir(path):
with open(path + '/' + file, encoding='utf-8') as f:
text = f.read()
f.close()
# get Meta-Data through file name: ID (arbitrary), Grade (as set by the textbook authors),
# Format (read/listen), Type (plain/blog/song/dialog/...)
textGrade, textID, textFormat, textType = file.split("_")
textType = textType.split(".")[0]
# filter text types that arent suitable for this analysis
if textType in ('german', 'mixed'):
continue
r = Readability(text)
# print(file) TODO uncomment for bug hunt in data
# calculate readability scores and save as data
data_metrics.append(
[r.flesch_kincaid().score, r.flesch().score, r.gunning_fog().score, r.coleman_liau().score,
r.dale_chall().score, r.ari().score, r.linsear_write().score, r.spache().score])
data_grades.append(textGrade)
return data_metrics, data_grades
def compare(text, grade):
# predict the grade of the text by using the trained SVM and compare the result
r = Readability(text)
test_metrics = [[r.flesch_kincaid().score, r.flesch().score, r.gunning_fog().score, r.coleman_liau().score,
r.dale_chall().score, r.ari().score, r.linsear_write().score, r.spache().score]]
print('scores of test text: ' + str(test_metrics))
calculated_grade = int(svm_evaluation.predict(data_metrics, data_grades, test_metrics))
print('difficulty of input text is similar to a text from grade: ' + str(calculated_grade))
if calculated_grade < grade:
print("=> suitable, but may be too easy to read for the students")
elif calculated_grade > grade:
print("=> may be too difficult to read for the students")
else:
print("=> suitable for the students")