Skip to content

Commit

Permalink
Streamline imaging api
Browse files Browse the repository at this point in the history
  • Loading branch information
mepearson committed Jul 24, 2024
1 parent 34b4968 commit c806c99
Show file tree
Hide file tree
Showing 10 changed files with 10,993 additions and 1,892 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
flask==2.0.3
Flask==2.1.2
gunicorn==20.1.0
requests==2.27.0
pandas==1.3.5
Expand Down
22 changes: 16 additions & 6 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,23 @@
# LOAD ASSETS FILES
# ----------------------------------------------------------------------------

# Pointers to official files stored at github repository main branch
screening_sites_github_url = 'https://raw.githubusercontent.com/TACC/a2cps-datastore-weekly/main/src/assets/screening_sites.csv'
display_terms_github_url = 'https://raw.githubusercontent.com/TACC/a2cps-datastore-weekly/main/src/assets/A2CPS_display_terms.csv'

# load display terms and screening sites
screening_sites = pd.read_csv(screening_sites_github_url)
display_terms, display_terms_dict, display_terms_dict_multi = load_display_terms_from_github(display_terms_github_url)

# # Pointers to official files stored at github repository main branch
# screening_sites_github_url = 'https://raw.githubusercontent.com/TACC/a2cps-datastore-weekly/main/src/assets/screening_sites.csv'
# display_terms_github_url = 'https://raw.githubusercontent.com/TACC/a2cps-datastore-weekly/main/src/assets/A2CPS_display_terms.csv'

# # load display terms and screening sites
# screening_sites = pd.read_csv(screening_sites_github_url)
# display_terms, display_terms_dict, display_terms_dict_multi = load_display_terms_from_github(display_terms_github_url)

# For datastore: load locally from assets file to avoid issues with url library.
screening_sites_location = os.path.join(ASSETS_PATH, 'screening_sites.csv')
display_terms_location = os.path.join(ASSETS_PATH, 'A2CPS_display_terms.csv')

screening_sites = pd.read_csv(screening_sites_location)
display_terms, display_terms_dict, display_terms_dict_multi = load_display_terms(display_terms_location)


# Columns used in reports [UPDATE THIS IF START TO USE MORE]
subjects_raw_cols_for_reports = ['index',
Expand Down
4 changes: 2 additions & 2 deletions src/assets/A2CPS_display_terms.csv
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ redcap_data_access_group,northshore,0,MCC1: NorthShore,MCC1: NorthShore,0
redcap_data_access_group,uchicago,0,MCC1: UChicago,MCC1: UChicago,0
redcap_data_access_group,university_of_mich,0,MCC2: UMichigan,MCC2: UMichigan,0
redcap_data_access_group,wayne_state,0,MCC2: Wayne State,MCC2: Wayne State,0
redcap_data_access_group,spectrum_health,0,MCC2: Spectrum Health,MCC2: Spectrum Health,0
redcap_data_access_group,spectrum_health,0,MCC2: Spectrum Health,MCC2: Corewell Health,0
sp_data_site,1,0,MCC2: UMichigan,MCC2: UMichigan,0
sp_data_site,2,0,MCC2: Wayne State,MCC2: Wayne State,0
sp_data_site,3,0,MCC2: Spectrum Health,MCC2: Spectrum Health,0
sp_data_site,3,0,MCC2: Spectrum Health,MCC2: Corewell Health,0
participation_interest,0,0,No,No,0
participation_interest,1,0,Maybe,Maybe,0
participation_interest,2,0,Yes,Yes,0
Expand Down
4 changes: 3 additions & 1 deletion src/assets/screening_sites.csv
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ MCC2: HFHS Macomb,2,HFHS Macomb,Thoracic,90000,99999,12/1/2021,12,2021,"1, 1, 2,
MCC2: HFHS Jackson,2,HFHS Jackson,Thoracic,100000,109999,1/1/2022,1,2022,"1, 2, 2, 3, 3, 4, 3, 3, 4, 3, 3, 4, 3, 3, 4, 3, 3, 4, 3, 3, 4, 3, 4, 4","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24"
MCC2: HFHS Wyandotte,2,HFHS Wyandotte,Thoracic,110000,119999,,,,,
MCC2: University of Michigan,2,University of Michigan,TKA,120000,129999,,,,,
MCC2: Spectrum,2,Spectrum,Thoracic,130000,139999,1/1/2022,1,2022,"8, 8, 8, 16, 16, 16, 16, 16, 17, 16, 16, 17, 16, 16, 17, 16, 16, 17, 16, 16, 17, 16, 16, 17","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24"
MCC2: Corewell,2,Corewell,Thoracic,130000,139999,1/1/2022,1,2022,"8, 8, 8, 16, 16, 16, 16, 16, 17, 16, 16, 17, 16, 16, 17, 16, 16, 17, 16, 16, 17, 16, 16, 17","1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24"
MCC1: Rush,1,Rush,Thoracic,140000,149999,,,,,
MCC2: HFHS ,2,HFHS,TKA,150000,159999,,,,,
MCC1: University of Chicago,1,University of Chicago,Thoracic,160000,169999,,,,,
MCC2: Corewell,2,Corewell,TKA,180000,189999,,,,,

2,424 changes: 1,830 additions & 594 deletions src/data/imaging/imaging-log-latest.csv

Large diffs are not rendered by default.

2,063 changes: 1,931 additions & 132 deletions src/data/imaging/mriqc-group-bold-latest.csv

Large diffs are not rendered by default.

8,334 changes: 7,184 additions & 1,150 deletions src/data/imaging/qc-log-latest.csv

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/subjects/subjects-1-latest.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/data/subjects/subjects-2-latest.json

Large diffs are not rendered by default.

28 changes: 24 additions & 4 deletions src/data_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,11 +149,11 @@ def get_display_dictionary(display_terms, api_field, api_value, display_col):
return None


def load_display_terms_from_github(display_terms_gihub_raw_url):
def load_display_terms(display_terms_location):
'''Load the data file that explains how to translate the data columns and controlled terms into the English language
terms to be displayed to the user'''
try:
display_terms = pd.read_csv(display_terms_gihub_raw_url)
display_terms = pd.read_csv(display_terms_location)

# Get display terms dictionary for one-to-one records
display_terms_uni = display_terms[display_terms.multi == 0]
Expand Down Expand Up @@ -306,6 +306,24 @@ def get_api_consort_data(tapis_token,

## Function to rebuild dataset from apis

def subset_imaging_data(imaging_full):
imaging_columns_used = ['site', 'subject_id', 'visit','acquisition_week','Surgery Week','bids', 'dicom',
'T1 Indicated','DWI Indicated', '1st Resting State Indicated','fMRI Individualized Pressure Indicated',
'fMRI Standard Pressure Indicated','2nd Resting State Indicated',
'T1 Received', 'DWI Received', 'fMRI Individualized Pressure Received', 'fMRI Standard Pressure Received',
'1st Resting State Received', '2nd Resting State Received']

imaging = imaging_full[imaging_columns_used].copy() # Select subset of columns
imaging = imaging.replace('na', np.nan) # Clean up data

return imaging

def subset_qc_data(qc_full):
qc_cols_used = ['site', 'sub', 'ses', 'scan','rating']
qc = qc_full[qc_cols_used].copy() # Select subset of columns
return qc


def get_api_imaging_data(tapis_token):
''' Load data from imaging api. Return bad status notice if hits Tapis API'''
try:
Expand All @@ -314,15 +332,17 @@ def get_api_imaging_data(tapis_token):
imaging_filepath = '/'.join([files_api_root,'imaging','imaging-log-latest.csv'])
imaging_request = make_report_data_request(imaging_filepath, tapis_token)
if imaging_request.status_code == 200:
imaging = pd.read_csv(io.StringIO(imaging_request.content.decode('utf-8')))
imaging_full = pd.read_csv(io.StringIO(imaging_request.content.decode('utf-8')))
imaging = subset_imaging_data(imaging_full)
else:
return {'status':'500', 'source': 'imaging-log-latest.csv'}


qc_filepath = '/'.join([files_api_root,'imaging','qc-log-latest.csv'])
qc_request = make_report_data_request(qc_filepath, tapis_token)
if qc_request.status_code == 200:
qc = pd.read_csv(io.StringIO(qc_request.content.decode('utf-8')))
qc_full = pd.read_csv(io.StringIO(qc_request.content.decode('utf-8')))
qc = subset_qc_data(qc_full)
else:
return {'status':'500', 'source': 'qc-log-latest.csv'}

Expand Down

0 comments on commit c806c99

Please sign in to comment.