Streamline imaging api

TACC · Jul 24, 2024 · c806c99 · c806c99
1 parent 34b4968
commit c806c99
Show file tree

Hide file tree

Showing 10 changed files with 10,993 additions and 1,892 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
-flask==2.0.3
+Flask==2.1.2
 gunicorn==20.1.0
 requests==2.27.0
 pandas==1.3.5

diff --git a/src/app.py b/src/app.py
@@ -47,13 +47,23 @@
 # LOAD ASSETS FILES
 # ----------------------------------------------------------------------------
 
-# Pointers to official files stored at github repository main branch
-screening_sites_github_url = 'https://raw.githubusercontent.com/TACC/a2cps-datastore-weekly/main/src/assets/screening_sites.csv'
-display_terms_github_url = 'https://raw.githubusercontent.com/TACC/a2cps-datastore-weekly/main/src/assets/A2CPS_display_terms.csv'
 
-# load display terms and screening sites
-screening_sites = pd.read_csv(screening_sites_github_url)
-display_terms, display_terms_dict, display_terms_dict_multi = load_display_terms_from_github(display_terms_github_url)
+
+# # Pointers to official files stored at github repository main branch
+# screening_sites_github_url = 'https://raw.githubusercontent.com/TACC/a2cps-datastore-weekly/main/src/assets/screening_sites.csv'
+# display_terms_github_url = 'https://raw.githubusercontent.com/TACC/a2cps-datastore-weekly/main/src/assets/A2CPS_display_terms.csv'
+
+# # load display terms and screening sites
+# screening_sites = pd.read_csv(screening_sites_github_url)
+# display_terms, display_terms_dict, display_terms_dict_multi = load_display_terms_from_github(display_terms_github_url)
+
+# For datastore: load locally from assets file to avoid issues with url library.
+screening_sites_location = os.path.join(ASSETS_PATH, 'screening_sites.csv')
+display_terms_location = os.path.join(ASSETS_PATH, 'A2CPS_display_terms.csv')
+
+screening_sites = pd.read_csv(screening_sites_location)
+display_terms, display_terms_dict, display_terms_dict_multi = load_display_terms(display_terms_location)
+
 
 # Columns used in reports [UPDATE THIS IF START TO USE MORE]
 subjects_raw_cols_for_reports = ['index',

diff --git a/src/assets/A2CPS_display_terms.csv b/src/assets/A2CPS_display_terms.csv
@@ -4,10 +4,10 @@ redcap_data_access_group,northshore,0,MCC1: NorthShore,MCC1: NorthShore,0
 redcap_data_access_group,uchicago,0,MCC1: UChicago,MCC1: UChicago,0
 redcap_data_access_group,university_of_mich,0,MCC2: UMichigan,MCC2: UMichigan,0
 redcap_data_access_group,wayne_state,0,MCC2: Wayne State,MCC2: Wayne State,0
-redcap_data_access_group,spectrum_health,0,MCC2: Spectrum Health,MCC2: Spectrum Health,0
+redcap_data_access_group,spectrum_health,0,MCC2: Spectrum Health,MCC2: Corewell Health,0
 sp_data_site,1,0,MCC2: UMichigan,MCC2: UMichigan,0
 sp_data_site,2,0,MCC2: Wayne State,MCC2: Wayne State,0
-sp_data_site,3,0,MCC2: Spectrum Health,MCC2: Spectrum Health,0
+sp_data_site,3,0,MCC2: Spectrum Health,MCC2: Corewell Health,0
 participation_interest,0,0,No,No,0
 participation_interest,1,0,Maybe,Maybe,0
 participation_interest,2,0,Yes,Yes,0

diff --git a/src/assets/screening_sites.csv b/src/assets/screening_sites.csv
@@ -9,7 +9,9 @@ MCC2: HFHS Macomb,2,HFHS Macomb,Thoracic,90000,99999,12/1/2021,12,2021,"1, 1, 2,
 MCC2: HFHS Jackson,2,HFHS Jackson,Thoracic,100000,109999,1/1/2022,1,2022,"1, 2, 2, 3, 3, 4, 3, 3, 4, 3, 3, 4, 3, 3, 4, 3, 3, 4, 3, 3, 4, 3, 4, 4","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24"
 MCC2: HFHS Wyandotte,2,HFHS Wyandotte,Thoracic,110000,119999,,,,,
 MCC2: University of Michigan,2,University of Michigan,TKA,120000,129999,,,,,
-MCC2: Spectrum,2,Spectrum,Thoracic,130000,139999,1/1/2022,1,2022,"8, 8, 8, 16, 16, 16, 16, 16, 17, 16, 16, 17, 16, 16, 17, 16, 16, 17, 16, 16, 17, 16, 16, 17","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24"
+MCC2: Corewell,2,Corewell,Thoracic,130000,139999,1/1/2022,1,2022,"8, 8, 8, 16, 16, 16, 16, 16, 17, 16, 16, 17, 16, 16, 17, 16, 16, 17, 16, 16, 17, 16, 16, 17","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24"
 MCC1: Rush,1,Rush,Thoracic,140000,149999,,,,,
 MCC2: HFHS ,2,HFHS,TKA,150000,159999,,,,,
 MCC1: University of Chicago,1,University of Chicago,Thoracic,160000,169999,,,,,
+MCC2: Corewell,2,Corewell,TKA,180000,189999,,,,,
+
diff --git a/src/data/imaging/imaging-log-latest.csv b/src/data/imaging/imaging-log-latest.csv
diff --git a/src/data/imaging/mriqc-group-bold-latest.csv b/src/data/imaging/mriqc-group-bold-latest.csv
diff --git a/src/data/imaging/qc-log-latest.csv b/src/data/imaging/qc-log-latest.csv
diff --git a/src/data/subjects/subjects-1-latest.json b/src/data/subjects/subjects-1-latest.json
diff --git a/src/data/subjects/subjects-2-latest.json b/src/data/subjects/subjects-2-latest.json
diff --git a/src/data_loading.py b/src/data_loading.py
@@ -149,11 +149,11 @@ def get_display_dictionary(display_terms, api_field, api_value, display_col):
         return None
 
 
-def load_display_terms_from_github(display_terms_gihub_raw_url):
+def load_display_terms(display_terms_location):
     '''Load the data file that explains how to translate the data columns and controlled terms into the English language
     terms to be displayed to the user'''
     try:
-        display_terms = pd.read_csv(display_terms_gihub_raw_url)
+        display_terms = pd.read_csv(display_terms_location)
 
         # Get display terms dictionary for one-to-one records
         display_terms_uni = display_terms[display_terms.multi == 0]
@@ -306,6 +306,24 @@ def get_api_consort_data(tapis_token,
 
 ## Function to rebuild dataset from apis
 
+def subset_imaging_data(imaging_full):
+    imaging_columns_used = ['site', 'subject_id', 'visit','acquisition_week','Surgery Week','bids', 'dicom', 
+    'T1 Indicated','DWI Indicated', '1st Resting State Indicated','fMRI Individualized Pressure Indicated', 
+    'fMRI Standard Pressure Indicated','2nd Resting State Indicated',
+    'T1 Received', 'DWI Received', 'fMRI Individualized Pressure Received', 'fMRI Standard Pressure Received',
+    '1st Resting State Received', '2nd Resting State Received']
+
+    imaging = imaging_full[imaging_columns_used].copy() # Select subset of columns
+    imaging = imaging.replace('na', np.nan) # Clean up data
+
+    return imaging
+
+def subset_qc_data(qc_full):
+    qc_cols_used = ['site', 'sub', 'ses', 'scan','rating']
+    qc = qc_full[qc_cols_used].copy() # Select subset of columns
+    return qc
+
+
 def get_api_imaging_data(tapis_token):
     ''' Load data from imaging api. Return bad status notice if hits Tapis API'''
     try:
@@ -314,15 +332,17 @@ def get_api_imaging_data(tapis_token):
             imaging_filepath = '/'.join([files_api_root,'imaging','imaging-log-latest.csv'])
             imaging_request = make_report_data_request(imaging_filepath, tapis_token)
             if imaging_request.status_code == 200:
-                imaging = pd.read_csv(io.StringIO(imaging_request.content.decode('utf-8')))
+                imaging_full = pd.read_csv(io.StringIO(imaging_request.content.decode('utf-8')))
+                imaging = subset_imaging_data(imaging_full)
             else:
                 return {'status':'500', 'source': 'imaging-log-latest.csv'}
 
 
             qc_filepath = '/'.join([files_api_root,'imaging','qc-log-latest.csv'])
             qc_request = make_report_data_request(qc_filepath, tapis_token)
             if qc_request.status_code == 200:
-                qc = pd.read_csv(io.StringIO(qc_request.content.decode('utf-8')))
+                qc_full = pd.read_csv(io.StringIO(qc_request.content.decode('utf-8')))
+                qc = subset_qc_data(qc_full)
             else:
                 return {'status':'500', 'source': 'qc-log-latest.csv'}