-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathodinxigt.py
50 lines (39 loc) · 1.25 KB
/
odinxigt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# common operations for odinclean and odinnormalize
import re
from xigt import Item
PRITAGS = ('L','G','T','L-G','L-T','G-T','L-G-T','M','B','C')
SECTAGS = ('AC','AL','CN','CR','DB','EX','LN','LT','SY')
def copy_items(items):
return [
Item(id=item.id, type=item.type,
attributes=item.attributes, text=item.text)
for item in items
]
def get_tags(item):
return item.attributes.get('tag', '').split('+')
def remove_blank_items(items):
return [
i for i in items
if (i.text or '').strip() != ''
]
def min_indent(items, tags=None):
# find the minimum indentation among items
if tags is None: tags = PRITAGS
tags = set(tags).difference(['M','B'])
indents = []
for item in items:
tag = get_tags(item)[0]
if tag in tags:
indents.append(re.match(r'\s*', item.text, re.U).end())
return min(indents or [0])
def shift_left(items, tags=None):
if tags is None: tags = PRITAGS
tags = set(tags).difference(['M','B'])
maxshift = min_indent(items, tags)
for item in items:
tag = get_tags(item)[0]
if tag == 'M':
item.text = item.text.strip()
elif tag in tags:
item.text = item.text[maxshift:]
return items