From cb685f413e30f5d4b47466292d725b20a00bfa12 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Wed, 21 Aug 2024 14:46:17 -0400 Subject: [PATCH 1/3] Reworked get_organ_types such that it is only called once during the initialization of schema_manager. Created a global variable inside schema triggers and a quick setter function inside app.py to set that value after initialization of schema manager but before the value is fetched in schema_triggers --- src/app.py | 10 ++-------- src/schema/schema_manager.py | 3 +++ src/schema/schema_triggers.py | 6 +++++- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/app.py b/src/app.py index 81b7cfec..03c543dc 100644 --- a/src/app.py +++ b/src/app.py @@ -228,6 +228,8 @@ def http_internal_server_error(e): f" _schema_yaml_file={_schema_yaml_file}." # Log the full stack trace, prepend a line with our message logger.exception(msg) +organ_types_dict = schema_manager.organ_types_dict +schema_triggers.set_organ_types_dict() #################################################################################################### ## Initialize an S3Worker from hubmap-commons @@ -3157,7 +3159,6 @@ def get_prov_info(): # Token is not required, but if an invalid token is provided, # we need to tell the client with a 401 error validate_token_if_auth_header_exists(request) - organ_types_dict = schema_manager.get_organ_types() if user_in_hubmap_read_group(request): published_only = False @@ -3448,7 +3449,6 @@ def get_prov_info_for_dataset(id): # Token is not required, but if an invalid token provided, # we need to tell the client with a 401 error validate_token_if_auth_header_exists(request) - organ_types_dict = schema_manager.get_organ_types() # Use the internal token to query the target entity # since public entities don't require user token token = get_internal_token() @@ -3761,10 +3761,6 @@ def sankey_data(): HEADER_DATASET_DATASET_TYPE = 'dataset_dataset_type' HEADER_DATASET_STATUS = 'dataset_status' - # Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description - # because that would require using a urllib request for each dataset - organ_types_dict = schema_manager.get_organ_types() - # As above, we parse te assay type yaml here rather than calling the special method for it because this avoids # having to access the resource for every dataset. assay_types_dict = schema_manager.get_assay_types() @@ -3868,8 +3864,6 @@ def get_sample_prov_info(): if user_in_hubmap_read_group(request): public_only = False - organ_types_dict = schema_manager.get_organ_types() - # Processing and validating query parameters accepted_arguments = ['group_uuid'] param_dict = {} # currently the only filter is group_uuid, but in case this grows, we're using a dictionary diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index 4012465f..97785c78 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -24,6 +24,7 @@ from schema import schema_neo4j_queries logger = logging.getLogger(__name__) +organ_types_dict = None # Suppress InsecureRequestWarning warning when requesting status on https with ssl cert verify disabled requests.packages.urllib3.disable_warnings(category = InsecureRequestWarning) @@ -121,6 +122,8 @@ def initialize(valid_yaml_file, _neo4j_driver = neo4j_driver_instance _memcached_client = memcached_client_instance _memcached_prefix = memcached_prefix + global organ_types_dict + organ_types_dict = get_organ_types() #################################################################################################### diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index 3091b081..7a567c07 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -17,6 +17,7 @@ from schema.schema_constants import SchemaConstants logger = logging.getLogger(__name__) +organ_types_dict = None #################################################################################################### @@ -50,6 +51,10 @@ def set_timestamp(property_key, normalized_type, user_token, existing_data_dict, # and schema_neo4j_queries._build_properties_map() return property_key, 'TIMESTAMP()' +def set_organ_types_dict(): + global organ_types_dict + organ_types_dict = schema_manager.organ_types_dict + """ Trigger event method of setting the entity type of a given entity @@ -1016,7 +1021,6 @@ def get_dataset_title(property_key, normalized_type, user_token, existing_data_d try: # The organ_name is the two-letter code only set for 'organ' # Convert the two-letter code to a description - organ_types_dict = schema_manager.get_organ_types() organ_desc = organ_types_dict[organ_name].lower() except (yaml.YAMLError, requests.exceptions.RequestException) as e: raise Exception(e) From 7e206bb15e22934b1c35297602683a90b38ad324 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Fri, 23 Aug 2024 14:33:28 -0400 Subject: [PATCH 2/3] reverted previous changes for get_organ_types. Added new global variable in schema manager _organ_types. For get_organ_types, if _organ_types is none, it generates organ types and sets the value to _organ_types then returns _organ_types. If its not none, it returns existing organ_types value --- src/app.py | 10 +++++++-- src/schema/schema_manager.py | 41 +++++++++++++++++++---------------- src/schema/schema_triggers.py | 6 +---- 3 files changed, 31 insertions(+), 26 deletions(-) diff --git a/src/app.py b/src/app.py index 03c543dc..81b7cfec 100644 --- a/src/app.py +++ b/src/app.py @@ -228,8 +228,6 @@ def http_internal_server_error(e): f" _schema_yaml_file={_schema_yaml_file}." # Log the full stack trace, prepend a line with our message logger.exception(msg) -organ_types_dict = schema_manager.organ_types_dict -schema_triggers.set_organ_types_dict() #################################################################################################### ## Initialize an S3Worker from hubmap-commons @@ -3159,6 +3157,7 @@ def get_prov_info(): # Token is not required, but if an invalid token is provided, # we need to tell the client with a 401 error validate_token_if_auth_header_exists(request) + organ_types_dict = schema_manager.get_organ_types() if user_in_hubmap_read_group(request): published_only = False @@ -3449,6 +3448,7 @@ def get_prov_info_for_dataset(id): # Token is not required, but if an invalid token provided, # we need to tell the client with a 401 error validate_token_if_auth_header_exists(request) + organ_types_dict = schema_manager.get_organ_types() # Use the internal token to query the target entity # since public entities don't require user token token = get_internal_token() @@ -3761,6 +3761,10 @@ def sankey_data(): HEADER_DATASET_DATASET_TYPE = 'dataset_dataset_type' HEADER_DATASET_STATUS = 'dataset_status' + # Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description + # because that would require using a urllib request for each dataset + organ_types_dict = schema_manager.get_organ_types() + # As above, we parse te assay type yaml here rather than calling the special method for it because this avoids # having to access the resource for every dataset. assay_types_dict = schema_manager.get_assay_types() @@ -3864,6 +3868,8 @@ def get_sample_prov_info(): if user_in_hubmap_read_group(request): public_only = False + organ_types_dict = schema_manager.get_organ_types() + # Processing and validating query parameters accepted_arguments = ['group_uuid'] param_dict = {} # currently the only filter is group_uuid, but in case this grows, we're using a dictionary diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index 97785c78..ff93c0b2 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -24,7 +24,6 @@ from schema import schema_neo4j_queries logger = logging.getLogger(__name__) -organ_types_dict = None # Suppress InsecureRequestWarning warning when requesting status on https with ssl cert verify disabled requests.packages.urllib3.disable_warnings(category = InsecureRequestWarning) @@ -40,6 +39,7 @@ _neo4j_driver = None _memcached_client = None _memcached_prefix = None +_organ_types = None #################################################################################################### @@ -122,8 +122,6 @@ def initialize(valid_yaml_file, _neo4j_driver = neo4j_driver_instance _memcached_client = memcached_client_instance _memcached_prefix = memcached_prefix - global organ_types_dict - organ_types_dict = get_organ_types() #################################################################################################### @@ -2163,29 +2161,34 @@ def delete_memcached_cache(uuids_list): """ def get_organ_types(): global _ontology_api_url + global _organ_types - target_url = _ontology_api_url + SchemaConstants.ONTOLOGY_API_ORGAN_TYPES_ENDPOINT + if _organ_types is None: + target_url = _ontology_api_url + SchemaConstants.ONTOLOGY_API_ORGAN_TYPES_ENDPOINT - # Use Memcached to improve performance - response = make_request_get(target_url, internal_token_used = True) + # Use Memcached to improve performance + response = make_request_get(target_url, internal_token_used = True) - # Invoke .raise_for_status(), an HTTPError will be raised with certain status codes - response.raise_for_status() + # Invoke .raise_for_status(), an HTTPError will be raised with certain status codes + response.raise_for_status() - if response.status_code == 200: - return response.json() - else: - # Log the full stack trace, prepend a line with our message - logger.exception("Unable to make a request to query the organ types via ontology-api") + if response.status_code == 200: + _organ_types = response.json() + return _organ_types + else: + # Log the full stack trace, prepend a line with our message + logger.exception("Unable to make a request to query the organ types via ontology-api") - logger.debug("======get_organ_types() status code from ontology-api======") - logger.debug(response.status_code) + logger.debug("======get_organ_types() status code from ontology-api======") + logger.debug(response.status_code) - logger.debug("======get_organ_types() response text from ontology-api======") - logger.debug(response.text) + logger.debug("======get_organ_types() response text from ontology-api======") + logger.debug(response.text) - # Also bubble up the error message from ontology-api - raise requests.exceptions.RequestException(response.text) + # Also bubble up the error message from ontology-api + raise requests.exceptions.RequestException(response.text) + else: + return _organ_types """ diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index 7a567c07..3091b081 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -17,7 +17,6 @@ from schema.schema_constants import SchemaConstants logger = logging.getLogger(__name__) -organ_types_dict = None #################################################################################################### @@ -51,10 +50,6 @@ def set_timestamp(property_key, normalized_type, user_token, existing_data_dict, # and schema_neo4j_queries._build_properties_map() return property_key, 'TIMESTAMP()' -def set_organ_types_dict(): - global organ_types_dict - organ_types_dict = schema_manager.organ_types_dict - """ Trigger event method of setting the entity type of a given entity @@ -1021,6 +1016,7 @@ def get_dataset_title(property_key, normalized_type, user_token, existing_data_d try: # The organ_name is the two-letter code only set for 'organ' # Convert the two-letter code to a description + organ_types_dict = schema_manager.get_organ_types() organ_desc = organ_types_dict[organ_name].lower() except (yaml.YAMLError, requests.exceptions.RequestException) as e: raise Exception(e) From 0f133b73f0359a6d911f2aa0886352c580e41081 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Tue, 27 Aug 2024 13:20:44 -0400 Subject: [PATCH 3/3] reworked validate_organ_code to accept only a single argument to go along with other changes to get_organ_types --- src/app.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/app.py b/src/app.py index 81b7cfec..829ff698 100644 --- a/src/app.py +++ b/src/app.py @@ -3273,7 +3273,7 @@ def get_prov_info(): distinct_organ_uuid_list.append(item['uuid']) organ_code = item['organ'].upper() - validate_organ_code(organ_code, organ_types_dict) + validate_organ_code(organ_code) distinct_organ_type_list.append(organ_types_dict[organ_code].lower()) internal_dict[HEADER_ORGAN_HUBMAP_ID] = distinct_organ_hubmap_id_list @@ -3593,7 +3593,7 @@ def get_prov_info_for_dataset(id): distinct_organ_uuid_list.append(item['uuid']) organ_code = item['organ'].upper() - validate_organ_code(organ_code, organ_types_dict ) + validate_organ_code(organ_code) distinct_organ_type_list.append(organ_types_dict[organ_code].lower()) internal_dict[HEADER_ORGAN_HUBMAP_ID] = distinct_organ_hubmap_id_list @@ -3789,7 +3789,7 @@ def sankey_data(): internal_dict[HEADER_DATASET_GROUP_NAME] = dataset[HEADER_DATASET_GROUP_NAME] organ_code = dataset[HEADER_ORGAN_TYPE].upper() - validate_organ_code(organ_code, organ_types_dict) + validate_organ_code(organ_code) internal_dict[HEADER_ORGAN_TYPE] = organ_types_dict[organ_code].lower() @@ -5456,9 +5456,8 @@ def access_level_prefix_dir(dir_name): ---------- organ_code : str """ -def validate_organ_code(organ_code, organ_types_dict=None): - if organ_types_dict is None: - organ_types_dict = schema_manager.get_organ_types() +def validate_organ_code(organ_code): + organ_types_dict = schema_manager.get_organ_types() if not organ_code.isalpha() or not len(organ_code) == 2: internal_server_error(f"Invalid organ code {organ_code}. Must be 2-letter alphabetic code")