-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdetect_stale_body_annotations.py
72 lines (59 loc) · 2.06 KB
/
detect_stale_body_annotations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/bin/env
# ------------------------- imports -------------------------
import sys
import pandas as pd
from neuclease.util import compute_parallel, read_csv_col
from neuclease.dvid import find_master, fetch_sizes, fetch_keys
dvid_server = sys.argv[1]
dvid_uuid = sys.argv[2]
keyvalue = sys.argv[3]
stale_body_file = sys.argv[4]
node = (dvid_server, dvid_uuid)
bodyList = fetch_keys(*node, keyvalue)
#seg_annot_count = len(all_keys)
master_seg = (dvid_server, dvid_uuid, 'segmentation')
body_groups = []
group_list = []
body_count = 0
# chunk body list into groups of 1000
for bodyID in bodyList:
if bodyID[0].isdigit():
group_list.append(int(bodyID))
body_count += 1
if body_count == 1000:
body_groups.append(group_list)
group_list = []
body_count = 0
if len(group_list) > 0:
body_groups.append(group_list)
PROCESSES = 10
def get_sizes(label_ids):
try:
sizes_pd = fetch_sizes(*master_seg, label_ids, supervoxels=False)
except HTTPError:
s_empty_pd = pd.Series(index=label_ids, data=-1, dtype=int)
s_empty_pd.name = 'size'
s_empty_pd.index.name = 'body'
return(s_empty_pd)
else:
return(sizes_pd)
body_sizes_df_list = compute_parallel(get_sizes, body_groups, chunksize=100, processes=PROCESSES, ordered=False)
#This is actually a Series
body_sizes_df = pd.concat(body_sizes_df_list)
stale_bodies_df = pd.DataFrame(columns=['body','size'])
for bodyId, size in body_sizes_df.iteritems():
#print(bodyId, size)
if size == 0:
stale_bodies_df = stale_bodies_df.append({'body':bodyId, 'size':size}, ignore_index=True)
#print(stale_bodies_df)
stale_bodies_df.to_csv(stale_body_file, index=False)
#for index, data in body_sizes_df.iterrows():
# print(index)
#stale_df = pd.DataFrame(columns=['body','size'])
#for index, row in body_sizes_df.iterrows():
# bodyId = row['body']
# size = row['size']
# if size == 0:
#stale_df = stale_df.append(row, ignore_index=True)
# print(str(bodyId) + "," + str(size))
#stale_dif.to_csv(stale_body_file, index=True)