forked from metaodi/artmap
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgeoref.py
116 lines (96 loc) · 3.1 KB
/
georef.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from lxml import etree
from pprint import pprint
import requests
import time
import random
import json
import os.path
tree = etree.parse("MetadataGugelmann.xml")
total = 0
results = {}
results['type'] = "FeatureCollection"
results['features'] = []
limit = 3000
location_words = [
'im',
'in',
'bei',
'um',
'vor',
'auf'
]
continue_words = [
'der',
'die',
'dem'
]
def ask_nominatim(candidate, the_one):
if not candidate[0].isupper():
return the_one
r = requests.get('https://nominatim.openstreetmap.org/search?countrycodes=CH&format=json&q=' + candidate)
time.sleep(1)
try:
geo = r.json()[0]
if geo['importance'] > 0.3 and (the_one is None or geo['importance'] > the_one['importance']):
return geo
except:
pass
return the_one
rec_arr = []
for record in tree.findall('record'):
rec_arr.append(record)
# make a random selection from the input
index = range(0, len(rec_arr))
# random.shuffle(index)
for pos, idx in enumerate(index):
record = rec_arr[idx]
if total >= limit:
break
candidate = None
the_one = None
words = []
total += 1
id = record.find('Signatur')
if id is not None:
id = id.text
if os.path.isfile('output/' + id + '.json'):
print '%s/%s: SKIP: %s' % (pos + 1, len(rec_arr), id)
continue
place = record.find('Ort')
if place is not None and place.text != 'Unknown':
words = words + [place.text]
descr = record.find('TitelName')
if descr is not None:
words = words + descr.text.split(' ')
for i, candidate in enumerate(words):
the_one = ask_nominatim(candidate, the_one)
if the_one is None and i > 0:
the_one = ask_nominatim(words[i-1] + ' ' + candidate, the_one)
if the_one is None and i < len(words)-1:
the_one = ask_nominatim(candidate + ' ' + words[i+1], the_one)
if the_one is None and i > 0:
the_one = ask_nominatim(candidate + ' ' + words[i-1], the_one)
if the_one is None and i < len(words)-1:
the_one = ask_nominatim(words[i+1] + ' ' + candidate, the_one)
if the_one is not None:
gmetry = {}
gmetry['type'] = "Point"
gmetry['coordinates'] = [ float(the_one['lon']), float(the_one['lat']) ]
props = {}
props['name'] = descr.text
props['location'] = the_one['display_name']
props['url'] = record.find('SourceURL').text
props['id'] = id
geores = {}
geores['type'] = "Feature"
geores['geometry'] = gmetry
geores['properties'] = props
results['features'].append(geores)
with open('output/' + id + '.json', 'w') as outfile:
json.dump(geores, outfile)
print "%s/%s: FOUND: %s: %s (%s): %s" % (pos + 1, len(rec_arr), id, props['name'], place.text, props['location'])
else:
open('output/' + id + '.json', 'a').close()
print "%s/%s: NOT FOUND: %s: %s (%s)" % (pos + 1, len(rec_arr), id, descr.text, place.text)
print "Total: %s" % total
print "Matches: %s" % len(results['features'])