-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathjaxLIMS_sync.py
125 lines (110 loc) · 4.97 KB
/
jaxLIMS_sync.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import argparse
import logging
import json
import pandas as pd
import pathlib
from functools import partial
from getpass import getpass
from omero.gateway import BlitzGateway
from ezomero import get_image_ids
from ezomero import filter_by_filename
from ezomero import link_images_to_dataset
from jax_omeroutils.importer import set_or_create_dataset
from jax_omeroutils.importer import set_or_create_project
from jax_omeroutils.importer import multi_post_map_annotation
CURRENT_MD_NS = 'jax.org/omeroutils/jaxlims/v0'
MD_VALID_TYPES = {'xlsx': partial(pd.read_excel, dtype=str),
'xls': partial(pd.read_excel, dtype=str),
'tsv': partial(pd.read_csv, sep='\t', dtype=str)}
def load_md(md_filepath):
allowed_ftypes = MD_VALID_TYPES.keys()
if md_filepath is None:
return None
md_filepath = pathlib.Path(md_filepath)
ftype = md_filepath.suffix.strip('.')
if ftype not in allowed_ftypes:
raise ValueError(f'Metadata file type {ftype} is invalid')
else:
reader = MD_VALID_TYPES[ftype]
return reader(md_filepath)
def main(md_filepath, user_name, group, admin_user, server, port):
# create connection and establish context
password = getpass(f'Enter password for {admin_user}: ')
su_conn = BlitzGateway(admin_user, password, host=server, port=port)
su_conn.connect()
conn = su_conn.suConn(user_name, group, 600000)
su_conn.close()
orphan_ids = get_image_ids(conn)
# load and prepare metadata
md = load_md(md_filepath)
if 'filename' not in md.columns:
logging.error('Metadata file missing filename column')
return
if 'dataset' not in md.columns:
logging.error('Metadata file missing dataset column')
return
if 'project' not in md.columns:
logging.error('Metadata file missing project column')
return
md_json = json.loads(md.to_json(orient='table', index=False))
# loop over metadata, move and annotate matching images
processed_filenames = []
for row in md_json['data']:
row.pop('OMERO_group', None) # No longer using this field
project_name = str(row.pop('project'))
dataset_name = str(row.pop('dataset'))
filename = row.pop('filename')
if filename not in processed_filenames:
image_ids = filter_by_filename(conn, orphan_ids, filename)
if len(image_ids) > 0:
# move image into place, create projects/datasets as necessary
project_id = set_or_create_project(conn, project_name)
dataset_id = set_or_create_dataset(conn,
project_id,
dataset_name)
link_images_to_dataset(conn, image_ids, dataset_id)
print(f'Moved images:{image_ids} to dataset:{dataset_id}')
# map annotations
ns = CURRENT_MD_NS
map_ann_id = multi_post_map_annotation(conn,
"Image",
image_ids,
row,
ns)
print(f'Created annotation:{map_ann_id}'
f' and linked to images:{image_ids}')
processed_filenames.append(filename)
else:
print(f'Image with filename:{filename} not found in orphans')
else:
print(f'Already processed images with filename:{filename}')
conn.close()
print('Complete!')
if __name__ == "__main__":
description = ("Use metadata from jaxLIMS to organize orphaned files."
" Metadata is provided as tsv. Please contact Dave Mellert"
" and Mike McFarland for more details.")
parser = argparse.ArgumentParser(description=description)
parser.add_argument('md', type=str, help='Path to jaxlims metadata')
parser.add_argument('-u', '--user',
type=str,
help='OMERO user who owns the images (REQUIRED)',
required=True)
parser.add_argument('-g', '--group',
type=str,
help='Group in which to find orphans (REQUIRED)',
required=True)
parser.add_argument('--sudo',
type=str,
help='OMERO admin user for login (REQUIRED)',
required=True)
parser.add_argument('-s', '--server',
type=str,
help='OMERO server hostname (default = localhost)',
default='localhost')
parser.add_argument('-p', '--port',
type=int,
help='OMERO server port (default = 4064)',
default=4064)
args = parser.parse_args()
main(args.md, args.user, args.group, args.sudo, args.server, args.port)