-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
2123 lines (1741 loc) · 81.7 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""Server side API"""
# pylint: disable=too-many-lines
import json
import datetime
import copy
import os
import fnmatch
import time
import shutil
import hashlib
import base64
import uuid
import tempfile
import traceback
import subprocess
import sys
from typing import Optional, Union
from crypt import Crypt
from pathlib import Path
from irods.session import iRODSSession
from irods.data_object import chunks
import irods.exception
from flask import Flask, make_response, render_template, request, send_file, session
from flask_cors import CORS, cross_origin
from werkzeug.utils import secure_filename
from werkzeug.datastructures import MultiDict
from pylint import lint
from pylint.reporters.text import TextReporter
from workflow_definitions import WORKFLOW_DEFINITIONS
def _get_additional_folders() -> Optional[dict]:
"""Returns additional user accessible folders"""
folder_defs = os.getenv('MORE_FOLDERS')
if not folder_defs:
return None
return_defs = {}
folder_defs = folder_defs.split(';') if ';' in folder_defs else [folder_defs]
for one_def in folder_defs:
if ':' not in one_def:
print(f'Skipping invalid additional folder: "{one_def}"')
continue
try:
def_name, def_path = [part.strip() for part in one_def.split(':')]
except ValueError:
print(f'ValueError exception caught while splitting additional folder: "{one_def}"')
print(' continuing processing')
continue
if not def_name or not def_path:
print(f'Invalid additional folder missing a name or a path: "{one_def}"')
continue
def_path = os.path.realpath(def_path)
if not os.path.isdir(def_path):
print(f'Path for additional folder "{def_path}" is not found: "{one_def}"')
print(' skipping invalid path')
continue
if def_name in return_defs:
print(f'Current additional folder "{one_def}" overwriting previous definition "{def_name}:{return_defs[def_name]}"')
return_defs[def_name] = def_path
return return_defs
def _get_secret_key() -> str:
"""Returns a value to be used as a secret key"""
cur_key = os.getenv('SECRET_KEY')
if cur_key is None or len(cur_key) == 0:
cur_key = 'this_is_not_a_secret_key_33343536'
return cur_key
def _get_encryption_salt() -> str:
"""Returns a value to be used as salt (IV)"""
cur_salt = os.getenv('SALT_VALUE')
if cur_salt is None or len(cur_salt) == 0:
cur_salt = '381279526a7eAC2f'
# The salt must match the block size
return Crypt.adjust_crypto_salt(cur_salt)
def _get_default_passcode() -> str:
"""Returns a default passcode to used when one is not specified"""
cur_pc = os.getenv('DEFAULT_PASSCODE')
if cur_pc is None or len(cur_pc) == 0:
cur_pc = 'unsecurepasscode'
# Return the passcode
return cur_pc
def create_app(config_file: str = None) -> Flask:
"""Creates the Flask app to use using an optional configuration file
Arguments:
config_file - an optional configuration file to use when configuring the app
Returns:
Returns the created app
"""
new_app = Flask(__name__)
if config_file:
new_app.config.from_pyfile(config_file)
new_app.config['SECRET_KEY'] = _get_secret_key()
return new_app
# Allow an app configuration file to be specified in the environment
CONFIG_FILE = os.getenv('APP_CONFIG_FILE')
if CONFIG_FILE is not None:
if not os.path.exists(CONFIG_FILE) or not os.path.isfile(CONFIG_FILE):
print('Ignoring invalid app configuration file specified:', CONFIG_FILE)
CONFIG_FILE = None
# Create the App
app = create_app(CONFIG_FILE)
# Create the CORS handler
cors = CORS(app, resources={r"/files": {"origins": "http://127.0.0.1:3000"}})
# Additional folders to allow user access
ADDITIONAL_LOCAL_FOLDERS=_get_additional_folders()
# The default page to serve up
DEFAULT_TEMPLATE_PAGE='index.html'
# Starting point for uploading files from server
RESOURCE_START_PATH = os.path.abspath(os.path.dirname(__file__))
# Our local path
OUR_LOCAL_PATH = os.path.abspath(os.path.dirname(__file__))
# Get the salt (IV) for encryption
ENCRYPTION_SALT = _get_encryption_salt()
# Get the default passcode to use when it's not specified
DEFAULT_PASSCODE = _get_default_passcode()
# Starting point for seaching for user files on server
FILE_START_PATH = os.getenv('WORKING_FOLDER')
if FILE_START_PATH is None:
FILE_START_PATH = os.path.join(OUR_LOCAL_PATH, 'upload')
if not os.path.exists(FILE_START_PATH):
os.makedirs(FILE_START_PATH, exist_ok=True)
# Starting point for uploaded workflow files
WORKFLOW_FILE_START_PATH = os.getenv('WORKFLOW_FOLDER')
if WORKFLOW_FILE_START_PATH is None:
WORKFLOW_FILE_START_PATH = os.path.join(OUR_LOCAL_PATH, 'workflow')
if not os.path.exists(WORKFLOW_FILE_START_PATH):
os.makedirs(WORKFLOW_FILE_START_PATH, exist_ok=True)
# Running workflow path
WORKFLOW_RUN_PATH = os.path.join(tempfile.gettempdir(), 'atlana')
if not os.path.exists(WORKFLOW_RUN_PATH):
os.makedirs(WORKFLOW_RUN_PATH, exist_ok=True)
# Starting point for code checking files
CODE_CHECKING_PATH = os.getenv('CODE_CHECK_FOLDER')
if CODE_CHECKING_PATH is None:
CODE_CHECKING_PATH = os.path.join(OUR_LOCAL_PATH, 'code_temp')
if not os.path.exists(CODE_CHECKING_PATH):
os.makedirs(CODE_CHECKING_PATH, exist_ok=True)
# Starting point for testing code files
CODE_TESTING_PATH = os.getenv('CODE_CHECK_FOLDER')
if CODE_TESTING_PATH is None:
CODE_TESTING_PATH = os.path.join(OUR_LOCAL_PATH, 'code_test')
if not os.path.exists(CODE_TESTING_PATH):
os.makedirs(CODE_TESTING_PATH, exist_ok=True)
# Starting point for testing code files
CODE_TEMPLATE_PATH = os.getenv('CODE_TEMPLATE_FOLDER')
if CODE_TEMPLATE_PATH is None:
CODE_TEMPLATE_PATH = os.path.join(OUR_LOCAL_PATH, 'test_template')
if not os.path.exists(CODE_TEMPLATE_PATH):
os.makedirs(CODE_TEMPLATE_PATH, exist_ok=True)
# Starting point for repositories
CODE_REPOSITORY_PATH = os.getenv('CODE_REPOSITORY_FOLDER')
if CODE_REPOSITORY_PATH is None:
CODE_REPOSITORY_PATH = os.path.join(OUR_LOCAL_PATH, 'repos')
if not os.path.exists(CODE_REPOSITORY_PATH):
os.makedirs(CODE_REPOSITORY_PATH, exist_ok=True)
# Status codes for checking on processes
STATUS_NOT_STARTED = 0
STATUS_RUNNNG = 1
STATUS_FINISHED = 2
# Number of tries to download from iRODS before giving up
IRODS_DOWNLOAD_RETRIES = 2
# Number of times to try to access queue status; should not exceed delays defined in FILE_PROCESS_QUEUE_STATUS_TIMEOUTS
FILE_PROCESS_QUEUE_STATUS_RETRIES = 3
# Number of times to try to access queue status; should not exceed delays defined in FILE_PROCESS_QUEUE_MESSAGE_TIMEOUTS
FILE_PROCESS_QUEUE_MESSAGES_RETRIES = 3
# Delay times to access the queue status before giving up
FILE_PROCESS_QUEUE_STATUS_TIMEOUTS = [0.1, 0.2, 0.4, 0.7]
# Delay times to a access the queue messages before giving up
FILE_PROCESS_QUEUE_MESSAGE_TIMEOUTS = [0.1, 0.2, 0.1, 0.2, 0.4]
# The current version of the workflow save file
CURRENT_WORKFLOW_SAVE_VERSION = '1.0'
# List of workflow save file versions we understand
WORKFLOW_SAVE_VERSIONS_SUPPORTED = [CURRENT_WORKFLOW_SAVE_VERSION]
# The current version of the workflow definition save file
CURRENT_WORKFLOW_DEFINITION_SAVE_VERSION = '1.0'
# Type of workflow saved - workflow definitions
WORKFLOW_DEFINITION_SAVE_TYPE = 'workflow definition'
# List of workflow definition save files we understand
WORKFLOW_DEFINITION_SAVE_VERSIONS_SUPPORTED = [CURRENT_WORKFLOW_DEFINITION_SAVE_VERSION]
# Maximum code length acccepted
MAX_CODE_LENGTH = 30 * 1024
def _clean_for_json(dirty: object) -> dict:
"""Cleans the dictionary of non-JSON compatible elements
Arguments:
dirty: the dictionary to clean
Return:
Returns a copy of the dictionary that has been cleaned
"""
if isinstance(dirty, dict):
cleaned = {}
for key, item in dirty.items():
# We don't want callable objects to be returned
print("-> ",key,callable(item),item)
if not callable(item):
print(" handling",type(item))
cleaned[key] = _clean_for_json(item)
return cleaned
if isinstance(dirty, list):
return [_clean_for_json(el) for el in dirty if not callable(el)]
if isinstance(dirty, tuple):
return (_clean_for_json(el) for el in dirty if not callable(el))
if isinstance(dirty, set):
return set((_clean_for_json(el) for el in dirty if not callable(el)))
return dirty
def _get_num_code_lines(code: list) -> int:
"""Calculates the number of code lines are available for use
Arguments:
code: the list of code lines
Returns:
Return the number of code lines that should be processed
Notes:
This will strip trailing whitespace from the code lines
"""
num_lines = len(code)
while num_lines > 0:
cur_line = code[num_lines - 1].strip()
if len(cur_line) <= 0:
num_lines -= 1
else:
break
return num_lines
def _get_python_preamble(code: list, start_index: int = 0) -> int:
"""Returns the python preamble (the part before variables can be written)
Arguments:
code: the list of code lines
start_index: the starting index to begin processing
Returns:
Returns the ending index of the preamble
"""
# We disable this pylint check that would decrease readability
# pylint: disable=too-many-nested-blocks, too-many-branches
# We skip over blank lines, imports, comments, and docstrings
num_lines = len(code)
cur_line_idx = start_index
in_docstring = False
while cur_line_idx < num_lines:
cur_line = code[cur_line_idx].strip()
if in_docstring:
cur_line_idx += 1
if cur_line.count('"""') % 2 == 1:
in_docstring = False
elif len(cur_line) == 0:
cur_line_idx += 1
else:
have_special = False
for special_start in ['from', 'import']:
if cur_line.startswith(special_start):
have_special = True
break
if have_special is True:
cur_line_idx += 1
elif cur_line.startswith('def') or cur_line.startswith('class'):
# We are done, check if we need to back up some rows
if cur_line_idx > start_index:
while True:
cur_line_idx -= 1
cur_line = code[cur_line_idx].strip()
if len(cur_line) <= 0 or cur_line[0] != '#':
break
break
else:
in_docstring = cur_line.count('"""') % 2 == 1
cur_line_idx += 1
return cur_line_idx
def _write_python_file(filepath: str, code: str, variables: dict = None) -> tuple:
"""Writes the Python code to the specified file overwriting the current contents of the file
Arguments:
filepath - the file to write to
code - the python to write
variables - variables to add to the python code that's being written
Return:
Returns the number of variables written and the starting line number of the variables as a 2-tuple
"""
# Break the code apart into new lines
code_lines = code.split('\n')
num_lines = _get_num_code_lines(code_lines)
# Prepare for the run
if variables is None:
variables = {}
# Write the code
variable_start_line = -1
with open(filepath, 'w', encoding='utf8') as out_file:
line_index = 0
# Write preamble
end_index = _get_python_preamble(code_lines, line_index)
while line_index <= end_index:
out_file.write(code_lines[line_index].rstrip() + '\n')
line_index += 1
# Write variables
variable_start_line = line_index + 1
if variables:
for key, value in variables.items():
out_file.write(key + ' = "' + value + '"\n')
out_file.write('\n')
# Write remainder
while line_index < num_lines:
out_file.write(code_lines[line_index].rstrip() + '\n')
line_index += 1
with open(filepath, 'r', encoding='utf8') as in_file:
print(in_file.read())
return (len(variables), variable_start_line)
def _lint_python_file(filepath: str) -> list:
"""Lints the specified python file and returns the findings"""
class WritableObject():
"""Class to assist in getting pylint output"""
def __init__(self):
"""Initialize the instance"""
self.content = []
def write(self, message):
"""Saves the message to be retrieved later"""
self.content.append(message)
def read(self):
"""Returns the stored messagees """
return self.content
pylint_output = WritableObject()
args = ['-r', 'n', '--rcfile=pylint.rc', '--msg-template=\'{C}:{line}:{column}:{msg}:{symbol}:{msg_id}\'', '--errors-only']
_ = lint.Run([filepath]+args, reporter=TextReporter(pylint_output), exit=False)
return pylint_output.read()
def _test_python_file(algo_type: str, lang: str, filepath: str, test_folder: str) -> Union[tuple, dict]:
"""Tests the python file
Arguments:
algo_type: the type of algorithm to test
lang: the language to test
filepath: the file to test
test_folder: the folder to run the test in
Returns:
Returns the result of the test
Exceptions:
Raises RuntimeError if the environment is not properly configured
"""
print("HACK: _test_python_file", algo_type, lang, filepath, test_folder)
# Copy over needed files from the template
template_folder = os.path.join(CODE_TEMPLATE_PATH, algo_type, lang)
print("HACK: _test_python_file", "template folder", template_folder)
if not os.path.exists(template_folder) or not os.path.isdir(template_folder):
# pylint: disable=consider-using-f-string
raise RuntimeError('Expected template folder "%s" is not found' % os.path.join('/', algo_type, lang))
# Copy template files over
for one_file in os.listdir(template_folder):
src_name = os.path.join(template_folder, one_file)
if os.path.isfile(src_name) and src_name.endswith('.py'):
shutil.copyfile(src_name, os.path.join(test_folder, one_file))
print("HACK: _test_python_file", "template copy", one_file)
# Copy test images and folders over
test_images = []
images_folder = os.path.join(CODE_TEMPLATE_PATH, algo_type, 'images')
dest_folder = os.path.join(test_folder, 'images')
if not os.path.exists(dest_folder):
os.makedirs(dest_folder)
source_folders = [images_folder]
for one_folder in source_folders:
# Make sure we're putting the folders and files in the right place
cur_dest_folder = dest_folder if one_folder == images_folder else dest_folder + one_folder[len(images_folder):]
# Copy this folder's contents
for one_file in os.listdir(one_folder):
src_name = os.path.join(one_folder, one_file)
dest_name = os.path.join(cur_dest_folder, one_file)
if os.path.isfile(src_name):
shutil.copyfile(src_name, dest_name)
test_images.append(dest_name)
elif os.path.isdir(src_name):
os.makedirs(dest_name, exist_ok=True)
# Disable pylint check since we are deliberatly modifying the list
# pylint: disable=modified-iterating-list
source_folders.append(src_name)
# Run the test
cmd = [sys.executable, os.path.join(test_folder, filepath), '--working_space', test_folder] + test_images
proc = subprocess.run(cmd, capture_output=True, check=False)
print("PROC: ", cmd, proc.returncode, proc.stdout, proc.stderr)
# Look for the result file
csv_filepath = os.path.join(test_folder, 'rgb_plot.csv')
if os.path.exists(csv_filepath):
with open(csv_filepath, 'r', encoding='utf8') as in_file:
res_data = in_file.read().split('\n')
else:
print("Testing run failed")
res_data = {'error': 'Testing run was not successful'}
return res_data
def normalize_path(path: str) -> str:
"""Normalizes the path to the current OS separator character
Arguments:
path: the path to localize
Return:
Returns the localized path, which may be unchanged
"""
if os.path.sep == '/':
to_replace = '\\'
else:
to_replace = '/'
parts = path.split(to_replace)
if len(parts) <= 1:
return os.path.sep.join(parts)
# Strip out doubled up separators
new_parts = [one_part for one_part in parts if len(parts) > 0]
return os.path.sep.join(new_parts)
def copy_server_file(auth: dict, source_path: str, dest_path: str) -> bool:
"""Copies the server side file to the specified location
Arguments:
auth: authorization information
source_path: path to the file to copy
dest_path: path to copy the file to
Exceptions:
RuntimeError is raised if the path to copy from is not in the correct top folder
"""
# pylint: disable=unused-argument
working_path = normalize_path(source_path)
# Check if we have a special path
if len(working_path) > 1:
dir_name = Path(working_path).parts[1]
if ADDITIONAL_LOCAL_FOLDERS and dir_name in ADDITIONAL_LOCAL_FOLDERS:
cur_path = os.path.join(ADDITIONAL_LOCAL_FOLDERS[dir_name], working_path[len(dir_name) + 2:])
shutil.copyfile (cur_path, dest_path)
return True
if working_path[0] == '/':
working_path = '.' + working_path
cur_path = os.path.abspath(os.path.join(session['upload_folder'], working_path))
if not cur_path.startswith(session['upload_folder']):
raise RuntimeError("Invalid source path for server side copy:", cur_path)
shutil.copyfile (cur_path, dest_path)
return True
def irods_sha256_checksum(file_path: str, block_size: int=65536) -> str:
"""Calculates the iRODS checksum (hexdigest) for files
Arguments:
file_path: the path to the file to calculate the checksum for
block_size: the size of the blocks to read in
Return:
The checksum value as a string
"""
sha256 = hashlib.sha256()
with open(file_path, 'rb') as in_file:
for chunk in chunks(in_file, block_size):
sha256.update(chunk)
return base64.b64encode(sha256.digest()).decode()
def irod_md5_checksum(file_path: str) -> str:
"""Calcualtes the IRODS MD5 checksum for a file
Arguments:
file_path: the path of the file to calculate the checksum for
Return:
The checksum as a string
"""
with open(file_path, 'rb') as in_file:
return hashlib.md5(in_file.read()).hexdigest()
def get_irods_file(auth: dict, source_path: str, dest_path: str) -> bool:
"""Fetches the iRODS file to the specified location on the local Machine
Arguments:
auth: authorization information
source_path: path to the file to pull down
dest_path: path to the destination file
"""
have_success = False
for cur_try in range(0, IRODS_DOWNLOAD_RETRIES):
with iRODSSession(host=auth['host'], port=auth['port'], user=auth['user'], password=auth['password'], zone=auth['zone']) as conn:
obj = conn.data_objects.get(source_path, dest_path)
# Check the checksums
# TODO: determine which checksum method the server uses (depending upon file size it may be faster to try both methods?)
local_checksum = irod_md5_checksum(dest_path)
if local_checksum == obj.checksum: # pylint: disable=no-member
have_success = True
break
print ("IRODS: attempt", (cur_try + 1), "Bad checksum on downloaded file:", source_path)
return have_success
def put_irods_file(auth: dict, source_path: str, dest_path: str) -> bool:
"""Uploads the file to iRODS
Arguments:
auth: authorization information
source_path: path to the source file
dest_path: path to upload the file to
"""
raise RuntimeError("iRODS put is not implemented")
FILE_HANDLERS = {
'1': {
'name': 'Server-side',
'getFile': copy_server_file,
'putFile': copy_server_file,
},
'2': {
'name': 'iRODS',
'getFile': get_irods_file,
'putFile': put_irods_file,
}
}
def get_queue_path(working_folder: str) -> str:
""" Gets the path to the working queue
Arguments:
working_folder: path to the working folder
Return:
Returns the path to the queue
"""
return os.path.join(working_folder, 'queue')
def queue_start(workflow_id: str, working_folder: str, recover: bool) -> dict:
""" Handles starting queueing a set of processes
Arguments:
workflow_id: the workflow ID
working_folder: string representing the working folder
recover: flag indicating this is an attempt to restart a workflow
Return:
Returns information on this process as a dictionary
"""
print("Begin queueing workflow", workflow_id)
cleanup = False
queue_path = get_queue_path(working_folder)
if recover is True:
# Make sure we have something to recover
if not os.path.isfile(queue_path):
msg = f'ERROR: Attempting to recover a missing workflow {working_folder}'
print (msg)
raise RuntimeError(f'ERROR: Attempting to recover a missing workflow {working_folder}')
# TODO: Signal recover
else:
# Check if our queue is valid and restart it if not
starting_queue = True
if os.path.isfile(queue_path):
try:
with open(queue_path, 'r', encoding='utf8') as in_file:
res = json.load(in_file)
if isinstance(res, list):
starting_queue = False
except Exception:
pass
if starting_queue:
os.unlink(starting_queue)
# TODO: Signal cleanup
cleanup = True
# Begin the starting queue
with open(queue_path, 'w', encoding='utf8') as out_file:
json.dump([], out_file)
return {'recover': recover, 'cleanup': cleanup}
def queue_one_process(workflow_id: str, cur_command: dict, working_folder: str, process_info: dict):
"""Handles queueing one command
Arguments:
workflow_id: the workflow ID
cur_command: the command to queue
working_folder: string representing the working folder
process_info: dictionary returned by starting process call
"""
print("Current command ", cur_command['step'], " with working folder '", cur_command['working_folder'], "'", cur_command)
print(" Checking for files")
for one_parameter in cur_command['parameters']:
print(" ", one_parameter)
# Skip over special cases
if 'visibility' in one_parameter and one_parameter['visibility'] == 'server':
continue
# Handle downloading files
if one_parameter['type'] == 'file':
# Check for missing optional files
if not one_parameter['value'] and 'mandatory' in one_parameter and one_parameter['mandatory'] is False:
print(' Skipping missing non-mandatory file', one_parameter)
continue
# Copy mandatory file
dest_path = os.path.join(cur_command['working_folder'], os.path.basename(one_parameter['value']))
print("Downloading file '", one_parameter['value'], "' to '", dest_path, "'")
print(" one_parameter: '", one_parameter)
one_parameter['getFile'](one_parameter['auth'], one_parameter['value'], dest_path)
one_parameter['value'] = dest_path
print("Run workflow step", workflow_id, cur_command['step'], cur_command['command'])
queue_path = get_queue_path(working_folder)
if 'recover' in process_info and process_info['recover'] is True:
# TODO: Signal recover
return
with open(queue_path, 'r', encoding='utf8') as in_file:
current_workflow = json.load(in_file)
print("Appending command to workflow: ", current_workflow)
current_workflow.append(_clean_for_json(cur_command))
print("Current workflow: ", current_workflow)
with open(queue_path, 'w', encoding='utf8') as out_file:
json.dump(current_workflow, out_file, indent=2)
def queue_finish(workflow_id: str, working_folder: str, process_info: dict):
"""Finishes queueing workflow processes
Arguments:
workflow_id: the workflow ID
working_folder: string representing the working folder
process_info: dictionary returned by starting process call
"""
# pylint: disable=unused-argument
workflow_script = os.path.join(OUR_LOCAL_PATH, 'workflow_runner.py')
print("Finished queueing", workflow_id, working_folder, workflow_script)
cmd = ['python3', workflow_script, working_folder]
# Deliberately let the command run
# pylint: disable=consider-using-with
proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
print("PROC: ", cmd, proc.pid)
def queue_status(workflow_id: str, working_folder: str) -> Union[dict, str, None]:
"""Reurns the status of the workflow
Arguments:
workflow_id: the ID of the current workflow
working_folder: the working folder for the workflow
Return:
Returns None if the workflow isn't started, an empty status if it's running but has no status yet,
the current status, or a string indicating the completion status. A generic status is
returned if the real status can't be obtained
"""
print("Checking queue status", workflow_id, working_folder)
status_path = os.path.join(working_folder, 'status.json')
if not os.path.exists(status_path):
return None
cur_status = None
caught_exception = False
for one_attempt in range(0, FILE_PROCESS_QUEUE_STATUS_RETRIES):
caught_exception = True
try:
with open(status_path, 'r', encoding='utf8') as in_file:
try:
cur_status = json.load(in_file)
caught_exception = False
except json.JSONDecodeError as ex:
print("A JSON decode error was caught while loading status information", ex)
except Exception as ex:
print("An unknown exception was caught while checking workflow status", ex)
except OSError as ex:
msg = f'An OS exception was caught while trying to open status file "{status_path}"'
print(msg, ex)
except Exception as ex:
msg = f'Unknown exception caught while trying to access the status file "{status_path}"'
print(msg, ex)
if cur_status is None:
print("Sleeping before trying to get status again")
time.sleep(FILE_PROCESS_QUEUE_STATUS_TIMEOUTS[one_attempt])
else:
break
if cur_status and 'completion' in cur_status:
cur_status = cur_status['completion']
return cur_status if not caught_exception else {'status': 'Pending...'}
def queue_messages(workflow_id: str, working_folder: str) -> tuple:
"""Reurns the messages of the workflow
Arguments:
workflow_id: the ID of the current workflow
working_folder: the working folder for the workflow
Return:
A 2-tuple of: normal messages and error messages as separate lists. None is returned if the messages can't be loaded
"""
messages, errors = None, None
print("Checking queue messages", workflow_id, working_folder)
cur_path = os.path.join(working_folder, 'messages.txt')
if os.path.exists(cur_path):
for one_attempt in range(0, FILE_PROCESS_QUEUE_STATUS_RETRIES):
try:
with open(cur_path, 'r', encoding='utf8') as in_file:
messages = in_file.readlines()
except OSError as ex:
msg = f'An OS exception was caught while trying to read output file "{cur_path}"'
print(msg, ex)
except Exception as ex:
msg = f'An unknown exception was caught while trying to read output file "{cur_path}"'
print(msg, ex)
if messages is None:
msg = f'Sleeping {one_attempt} before trying to get messages again "{cur_path}"'
print(msg)
time.sleep(FILE_PROCESS_QUEUE_MESSAGE_TIMEOUTS[one_attempt])
else:
break
cur_path = os.path.join(working_folder, 'errors.txt')
if os.path.exists(cur_path):
for one_attempt in range(0, FILE_PROCESS_QUEUE_STATUS_RETRIES):
try:
with open(cur_path, 'r', encoding='utf8') as in_file:
errors = in_file.readlines()
except OSError as ex:
msg = f'An OS exception was caught while trying to read error file "{cur_path}"'
print(msg, ex)
except Exception as ex:
msg = f'An unknown exception was caught while trying to read error file "{cur_path}"'
print(msg, ex)
if errors is None:
msg = f'Sleeping {one_attempt} before trying to get errors again "{cur_path}"'
print(msg)
time.sleep(FILE_PROCESS_QUEUE_MESSAGE_TIMEOUTS[one_attempt])
else:
break
return messages, errors
def workflow_start(workflow_id: str, workflow_template: dict, data: list, file_handlers: list, working_folder: str, recover: bool=False):
"""Starts a workflow
Arguments:
workflow_id: the ID of the current workflow
workflow_template: the template of the workflow to run
data: the data used by the template for processing
file_handlers: the list of known file handlers
working_folder: the working folder for the workflow
recover: flag to indicate we're trying to recover a workflow that had a problem
"""
# Disable these warnings to keep avoid breaking the preparation into too many small pieces
# pylint: disable=too-many-nested-blocks, too-many-branches
workflow = []
for one_step in workflow_template['steps']:
cur_command = one_step['command']
parameters = []
if 'fields' in one_step:
for one_field in one_step['fields']:
# Find the data associated with this field
cur_parameter = {}
if 'visibility' in one_field and one_field['visibility'] == 'server':
print("SERVER SIDE", one_field)
cur_parameter = {'command': one_field['name'], 'field_name': one_field['name'], 'type': one_field['type'],
'prev_command_path': one_field['prev_command_path'], 'visibility': one_field['visibility']}
else:
for one_data in data:
if 'command' in one_data and one_data['command'] == cur_command and one_data['field_name'] == one_field['name']:
print("WORKING ON", one_data, one_field)
is_mandatory = not 'mandatory' in one_field or one_field ['mandatory']
if 'data_type' in one_data:
if one_data['data_type'] in file_handlers:
cur_parameter = {**one_data, **(file_handlers[one_data['data_type']])}
cur_parameter['command'] = one_field['name']
cur_parameter['type'] = one_field['type']
cur_parameter['mandatory'] = is_mandatory
break
else:
cur_parameter = {'field_name': one_data['field_name'], 'value': one_data[one_data['field_name']],
'type': one_field['type'], 'mandatory': is_mandatory}
print(" ", cur_parameter)
break
if cur_parameter:
print("HACK: ADDING PARAMETER FOR FIELD", cur_parameter, one_field)
parameters.append(cur_parameter)
else:
print("HACK: SKIPPING PARAMETER FOR FIELD", one_field)
if not 'mandatory' in one_field or one_field ['mandatory']:
print("Unable to find parameter for step ", one_step['name'], ' field ', one_field['name'])
# pylint: disable=consider-using-f-string
raise RuntimeError('Missing mandatory value for %s on workflow step %s' % (one_field['name'], one_step['name']))
cur_step = {'step': one_step['name'], 'command': one_step['command'], 'parameters': parameters, 'working_folder': working_folder}
print("HACK: CHECKING GIT", one_step)
if 'git_repo' in one_step:
print("HACK: FOUND GIT")
cur_step['git_repo'] = one_step['git_repo']
if 'git_branch' in one_step:
cur_step['git_branch'] = one_step['git_branch']
print("HACK: CUR STEP",cur_step)
workflow.append(cur_step)
process_info = queue_start(workflow_id, working_folder, recover)
print("FINAL WORKFLOW: ",workflow)
for one_process in workflow:
queue_one_process(workflow_id, one_process, working_folder, process_info)
queue_finish(workflow_id, working_folder, process_info)
def workflow_status(workflow_id: str, working_folder: str) -> dict:
"""Returns the status of the workflow
Arguments:
workflow_id: the ID of the current workflow
working_folder: the working folder for the workflow
Return:
Returns a dict containing a status ID and the status returned by the workflow query
"""
print("Checking workflow status", workflow_id, working_folder)
cur_status = queue_status(workflow_id, working_folder)
if cur_status is None:
return {'result': STATUS_NOT_STARTED}
if isinstance(cur_status, dict) and 'running' in cur_status:
return {'result': STATUS_RUNNNG, 'status': cur_status}
return {'result': STATUS_FINISHED, 'status': str(cur_status)}
def workflow_messages(workflow_id: str, working_folder: str) -> dict:
"""Returns the messages from the workflow
Arguments:
workflow_id: the ID of the current workflow
working_folder: the working folder for the workflow
Return:
Returns a dict containing any normal and error messages from the workflow query
"""
print("Checking workflow messages", workflow_id, working_folder)
messages, errors = queue_messages(workflow_id, working_folder)
return {'messages': messages if messages is not None else [],
'errors': errors if errors is not None else []}
def workflow_has_secure_parameters(params: list) -> bool:
"""Returns whether or not a parameter contains sensitive information
Arguments:
params - the list of parameters to check
Returns:
Returns True if a known sensitive parameter is detected and False otherwise
"""
return_value = False
for one_param in params:
if isinstance(one_param, dict) and 'auth' in one_param:
if one_param['auth']:
return_value = True
break
return return_value
def secure_workflow_parameters(params: list, passcode: str) -> list:
"""Secures workflow parameters by encrypting sensitive information
Arguments:
params - the list of parameters to secure
passcode - the passcode to use to secure information
Returns:
A list of secured parameters
"""
crypt = None
return_list = []
for one_param in params:
if isinstance(one_param, dict) and 'auth' in one_param and one_param['auth']:
if crypt is None:
crypt = Crypt(ENCRYPTION_SALT)
new_param = copy.copy(one_param)
new_param['auth'] = crypt.encrypt(json.dumps(new_param['auth']), passcode)
return_list.append(new_param)
else:
return_list.append(one_param)
return return_list
def unsecure_workflow_parameters(params: list, passcode: str, raise_on_error: bool = False) -> list:
"""Converts encryption from sensitive workflow parameters
Arguments:
params - the parameters to make clear text
passcode - the passcode that was used to secure information (used to make plain again)
raise_on_error - errors will raise an exception when set to True; defaults to False
Returns:
The list of unsecured parameters
"""
crypt = None
return_list = []
for one_param in params:
if isinstance(one_param, dict) and 'auth' in one_param and isinstance(one_param['auth'], str):
if crypt is None:
crypt = Crypt(ENCRYPTION_SALT)
new_param = copy.copy(one_param)
try:
plain_text = crypt.decrypt(new_param['auth'], passcode)
new_param['auth'] = json.loads(plain_text)
except ValueError as ex:
print('Value exception caught while trying to load secured "auth" information from workflow:', new_param['auth'])
print(ex)
if raise_on_error:
raise ex
print('Keeping original value')
return_list.append(new_param)
else:
return_list.append(one_param)
return return_list
@app.after_request
def add_cors_headers(response):
"""Appends CORS headers to a response
Arguments:
response: the response to append headers to
Notes:
Called automatically due to app.after_request decoration
"""
response.headers.add('Access-Control-Allow-Credentials', 'true')
response.headers.add('Access-Control-Allow-Headers', 'Content-Type')
response.headers.add('Access-Control-Allow-Headers', 'Cache-Control')