diff --git a/requirements.txt b/requirements.txt index adbcaad..171b439 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,3 @@ flask==2.0.3 gunicorn==20.1.0 requests==2.27.0 pandas==1.3.5 -retrying==1.3.4 -Werkzeug==2.0.3 \ No newline at end of file diff --git a/src/app.py b/src/app.py index 6e6fd6c..2ddee0b 100644 --- a/src/app.py +++ b/src/app.py @@ -1,5 +1,4 @@ from flask import Flask, jsonify, request -import logging import os import pandas as pd import csv @@ -8,24 +7,37 @@ from data_loading import * # ---------------------------------------------------------------------------- -# DATA PARAMETERS +# ENV Variables & DATA PARAMETERS # ---------------------------------------------------------------------------- +data_access_type = os.environ.get('DATA_ACCESS_TYPE') + current_folder = os.path.dirname(__file__) -DATA_PATH = os.path.join(current_folder,'data') ASSETS_PATH = os.path.join(current_folder,'assets') +local_data_path = os.environ.get("LOCAL_DATA_PATH","") +local_data_date = os.environ.get("LOCAL_DATA_DATE","") + +imaging_filepath = os.path.join(local_data_path,os.environ.get("IMAGING_FILE")) +qc_filepath = os.path.join(local_data_path,os.environ.get("QC_FILE")) +blood1_filepath = os.path.join(local_data_path,os.environ.get("BLOOD1_FILE")) +blood2_filepath = os.path.join(local_data_path,os.environ.get("BLOOD2_FILE")) +subjects1_filepath = os.path.join(local_data_path,os.environ.get("SUBJECTS1_FILE")) +subjects2_filepath = os.path.join(local_data_path,os.environ.get("SUBJECTS2_FILE")) + +print(local_data_path, local_data_date, subjects2_filepath) + + # ---------------------------------------------------------------------------- # LOAD ASSETS FILES # ---------------------------------------------------------------------------- -asset_files_dict = { - 'screening_sites': 'screening_sites.csv', - 'display_terms': 'A2CPS_display_terms.csv', -} -display_terms, display_terms_dict, display_terms_dict_multi = load_display_terms(ASSETS_PATH, asset_files_dict['display_terms']) - -screening_sites = pd.read_csv(os.path.join(ASSETS_PATH,asset_files_dict['screening_sites'])) +# Pointers to official files stored at github repository main branch +screening_sites_github_url = 'https://raw.githubusercontent.com/TACC/a2cps-datastore-weekly/main/src/assets/screening_sites.csv' +display_terms_github_url = 'https://raw.githubusercontent.com/TACC/a2cps-datastore-weekly/main/src/assets/A2CPS_display_terms.csv' +# load display terms and screening sites +screening_sites = pd.read_csv(screening_sites_github_url) +display_terms, display_terms_dict, display_terms_dict_multi = load_display_terms_from_github(display_terms_github_url) # Columns used in reports [UPDATE THIS IF START TO USE MORE] subjects_raw_cols_for_reports = ['ewcomments', @@ -53,39 +65,16 @@ 'main_record_id', 'sp_mricompatscr', 'ewdateterm'] -# ---------------------------------------------------------------------------- -# LOAD INITAL DATA FROM FILES -# ---------------------------------------------------------------------------- - -local_date = '2022-09-08' - -local_imaging_data = { - 'date': local_date, - 'data': get_local_imaging_data(DATA_PATH)} -local_blood_data = { - 'date': local_date, - 'data': get_local_blood_data(DATA_PATH)} -subjects_raw = get_local_subjects_raw(DATA_PATH) -local_subjects_data = { - 'date': local_date, - 'data': process_subjects_data(subjects_raw,subjects_raw_cols_for_reports,screening_sites, display_terms_dict, display_terms_dict_multi) - } - -local_data = { - 'imaging': local_imaging_data, - 'blood': local_imaging_data, - 'subjects': local_subjects_data -} # ---------------------------------------------------------------------------- # APIS # ---------------------------------------------------------------------------- datetime_format = "%m/%d/%Y, %H:%M:%S" - apis_imaging_index = {} data_state = 'empty' + api_data_index = { 'blood':'', 'imaging':'', @@ -107,9 +96,6 @@ 'consort':None, } -# ---------------------------------------------------------------------------- -# SIMPLE APIS -# ---------------------------------------------------------------------------- api_data_simple = { 'blood':None, 'imaging':None, @@ -117,50 +103,69 @@ 'raw': None } +# ---------------------------------------------------------------------------- +# APP +# ---------------------------------------------------------------------------- + app = Flask(__name__) app.debug = True -logging.basicConfig(filename='app.log',level=logging.DEBUG) # APIS: try to load new data, if doesn't work, get most recent @app.route("/api/apis") def api_apis(): return jsonify(api_data_index) +@app.route("/api/tester") +def api_tester(): + if local_data_path: + return jsonify(local_data_path) + else: + return jsonify('local_data_path not found') + @app.route("/api/imaging") def api_imaging(): global datetime_format global api_data_index global api_data_cache + try: if not api_data_index['imaging'] or not check_data_current(datetime.strptime(api_data_index['imaging'], datetime_format)): - api_date = datetime.now().strftime(datetime_format) - imaging_data = get_api_imaging_data(request) + if data_access_type != 'LOCAL': + data_date = datetime.now().strftime(datetime_format) + imaging_data = get_api_imaging_data(request) + else: + data_date = local_data_date + imaging_data = get_local_imaging_data(imaging_filepath, qc_filepath) + if imaging_data: + api_data_index['imaging'] = data_date api_data_cache['imaging'] = imaging_data - api_data_index['imaging'] = api_date + return jsonify({'date': api_data_index['imaging'], 'data': api_data_cache['imaging']}) except Exception as e: traceback.print_exc() return jsonify('error: {}'.format(e)) -@app.route("/api/consort") -def api_consort(): - global datetime_format - global api_data_index - global api_data_cache - # try: - if not api_data_index['consort'] or not check_data_current(datetime.strptime(api_data_index['consort'], datetime_format)): - api_date = datetime.now().strftime(datetime_format) - consort_data_json = get_api_consort_data(request) - if consort_data_json: - api_data_cache['consort'] = consort_data_json - api_data_index['consort'] = api_date - return jsonify({'date': api_data_index['consort'], 'data': api_data_cache['consort']}) - # except Exception as e: - # traceback.print_exc() - # return jsonify('error: {}'.format(e)) - -# get_api_consort_data +# @app.route("/api/consort") +# def api_consort(): +# global datetime_format +# global api_data_index +# global api_data_cache +# # try: +# if not api_data_index['consort'] or not check_data_current(datetime.strptime(api_data_index['consort'], datetime_format)): +# api_date = datetime.now().strftime(datetime_format) +# consort_data_json = get_api_consort_data(request) +# if consort_data_json: +# api_data_cache['consort'] = consort_data_json +# api_data_index['consort'] = api_date +# return jsonify({'date': api_data_index['consort'], 'data': api_data_cache['consort']}) +# # except Exception as e: +# # traceback.print_exc() +# # return jsonify('error: {}'.format(e)) + +# # get_api_consort_data + + @app.route("/api/blood") def api_blood(): global datetime_format @@ -168,10 +173,16 @@ def api_blood(): global api_data_cache try: if not api_data_index['blood'] or not check_data_current(datetime.strptime(api_data_index['blood'], datetime_format)): - api_date = datetime.now().strftime(datetime_format) - blood_data, blood_data_request_status = get_api_blood_data(request) + + if data_access_type != 'LOCAL': + data_date = datetime.now().strftime(datetime_format) + blood_data, blood_data_request_status = get_api_blood_data(request) + else: + data_date = local_data_date + blood_data, blood_data_request_status = get_local_blood_data(blood1_filepath, blood2_filepath) + if blood_data: - api_data_index['blood'] = api_date + api_data_index['blood'] = data_date api_data_cache['blood'] = blood_data with open('requests.csv', 'a', newline='') as f: @@ -196,47 +207,42 @@ def api_subjects(): try: if not api_data_index['subjects'] or not check_data_current(datetime.strptime(api_data_index['subjects'], datetime_format)): api_date = datetime.now().strftime(datetime_format) - latest_subjects_json = get_api_subjects_json(request) + if data_access_type != 'LOCAL': + data_date = datetime.now().strftime(datetime_format) + latest_subjects_json = get_api_subjects_json(request) + else: + data_date = local_data_date + latest_subjects_json = get_local_subjects_raw(subjects1_filepath, subjects2_filepath) if latest_subjects_json: - # latest_data = create_clean_subjects(latest_subjects_json, screening_sites, display_terms_dict, display_terms_dict_multi) + latest_data = create_clean_subjects(latest_subjects_json, screening_sites, display_terms_dict, display_terms_dict_multi) latest_data = process_subjects_data(latest_subjects_json,subjects_raw_cols_for_reports,screening_sites, display_terms_dict, display_terms_dict_multi) - api_data_cache['subjects'] = latest_data - api_data_index['subjects'] = api_date + api_data_index['subjects'] = data_date + api_data_cache['subjects'] = latest_subjects_json # latest_data + return jsonify({'date': api_data_index['subjects'], 'data': api_data_cache['subjects']}) except Exception as e: traceback.print_exc() return jsonify('error: {}'.format(e)) -def api_tester(): - - global local_subjects_data - - try: - return jsonify(local_subjects_data) - - except Exception as e: - traceback.print_exc() - return jsonify('error: {}'.format(e)) - -@app.route("/api/full") -def api_full(): - datafeeds = {} - for data_category in api_data_cache: - if api_data_cache[data_category]['data']: - datafeeds[data_category] = list(api_data_cache[data_category]['data'].keys()) - else: - datafeeds[data_category] = ['no data'] - return jsonify(datafeeds) - -@app.route("/api/simple") -def api_simple(): - if api_data_simple['subjects']: - return jsonify('simple subjects') - else: - return jsonify('not found') +# @app.route("/api/full") +# def api_full(): +# datafeeds = {} +# for data_category in api_data_cache: +# if api_data_cache[data_category]['data']: +# datafeeds[data_category] = list(api_data_cache[data_category]['data'].keys()) +# else: +# datafeeds[data_category] = ['no data'] +# return jsonify(datafeeds) + +# @app.route("/api/simple") +# def api_simple(): +# if api_data_simple['subjects']: +# return jsonify('simple subjects') +# else: +# return jsonify('not found') if __name__ == "__main__": diff --git a/src/data_loading.py b/src/data_loading.py index 361e4a9..4cc24fa 100644 --- a/src/data_loading.py +++ b/src/data_loading.py @@ -11,9 +11,9 @@ import datetime from datetime import datetime -from retrying import retry import logging +logger = logging.getLogger(__name__) # ---------------------------------------------------------------------------- @@ -210,31 +210,28 @@ def get_local_blood_data(blood1_filepath, blood2_filepath): # ---------------------------------------------------------------------------- # LOAD DATA FROM API # ---------------------------------------------------------------------------- - -# Retry handler for requests -@retry(wait_exponential_multiplier=500, wait_exponential_max=5000, stop_max_attempt_number=3) -def make_request_with_retry(url, cookies): - return requests.get(url, cookies=cookies) - # Get Tapis token if authorized to access data files def get_tapis_token(api_request): try: - response = make_request_with_retry(portal_api_root + '/auth/tapis/', api_request.cookies) + response = requests.get(portal_api_root + '/auth/tapis/', cookies=api_request.cookies) + #headers={'cookie':'coresessionid=' + api_request.cookies.get('coresessionid')}) if response: tapis_token = response.json()['token'] return tapis_token else: - logging.exception("Unauthorized to access tapis token.") + logger.warning("Unauthorized to access tapis token") raise Exception except Exception as e: - logging.exception('portal api error: {}'.format(e)) + logger.warning('portal api error: {}'.format(e)) return False -def get_api_consort_data(tapis_token, +def get_api_consort_data(api_request, report='consort', report_suffix = 'consort-data-[mcc]-latest.csv'): '''Load data for a specified consort file. Handle 500 server errors''' try: + tapis_token = get_tapis_token(api_request) + if tapis_token: cosort_columns = ['source','target','value', 'mcc'] consort_df = pd.DataFrame(columns=cosort_columns) @@ -269,7 +266,7 @@ def get_api_consort_data(tapis_token, return consort_data_json else: - logging.warning("Unauthorized attempt to access Consort data") + logger.warning("Unauthorized attempt to access Consort data") return None except Exception as e: @@ -278,9 +275,11 @@ def get_api_consort_data(tapis_token, ## Function to rebuild dataset from apis -def get_api_imaging_data(tapis_token): +def get_api_imaging_data(api_request): ''' Load data from imaging api. Return bad status notice if hits Tapis API''' - try: + try: + tapis_token = get_tapis_token(api_request) + if tapis_token: # IMAGING imaging_filepath = '/'.join([files_api_root,'imaging','imaging-log-latest.csv']) @@ -306,7 +305,7 @@ def get_api_imaging_data(tapis_token): return imaging_data_json else: - logging.exception("Unauthorized attempt to access Imaging data") + logger.warning("Unauthorized attempt to access Imaging data") return None except Exception as e: @@ -315,10 +314,11 @@ def get_api_imaging_data(tapis_token): ## Function to rebuild dataset from apis -def get_api_blood_data(tapis_token): +def get_api_blood_data(api_request): ''' Load blood data from api''' try: current_datetime = datetime.now() + tapis_token = get_tapis_token(api_request) if tapis_token: # BLOOD @@ -358,7 +358,7 @@ def get_api_blood_data(tapis_token): return blood_data_json, request_status else: - logging.exception("Unauthorized attempt to access Blood data") + logger.warning("Unauthorized attempt to access Blood data") return None except Exception as e: @@ -366,9 +366,11 @@ def get_api_blood_data(tapis_token): return None -def get_api_subjects_json(tapis_token): +def get_api_subjects_json(api_request): ''' Load subjects data from api. Note data needs to be cleaned, etc. to create properly formatted data product''' - try: + try: + tapis_token = get_tapis_token(api_request) + if tapis_token: # Load Json Data subjects1_filepath = '/'.join([files_api_root,'subjects','subjects-1-latest.json']) @@ -392,7 +394,7 @@ def get_api_subjects_json(tapis_token): return subjects_json else: - logging.exception("Unauthorized attempt to access Subjects data") + logger.warning("Unauthorized attempt to access Subjects data") return None except Exception as e: @@ -683,4 +685,4 @@ def clean_blooddata(blood_df): # ---------------------------------------------------------------------------- # GENERATE DICTIONARIES FOR API OUTPUTS (using functions above) -# ---------------------------------------------------------------------------- +# ---------------------------------------------------------------------------- \ No newline at end of file