From cb685f413e30f5d4b47466292d725b20a00bfa12 Mon Sep 17 00:00:00 2001
From: DerekFurstPitt <drf57@pitt.edu>
Date: Wed, 21 Aug 2024 14:46:17 -0400
Subject: [PATCH 1/3] Reworked get_organ_types such that it is only called once
 during the initialization of schema_manager. Created a global variable inside
 schema triggers and a quick setter function inside app.py to set that value
 after initialization of schema manager but before the value is fetched in
 schema_triggers

---
 src/app.py                    | 10 ++--------
 src/schema/schema_manager.py  |  3 +++
 src/schema/schema_triggers.py |  6 +++++-
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/app.py b/src/app.py
index 81b7cfec..03c543dc 100644
--- a/src/app.py
+++ b/src/app.py
@@ -228,6 +228,8 @@ def http_internal_server_error(e):
             f" _schema_yaml_file={_schema_yaml_file}."
     # Log the full stack trace, prepend a line with our message
     logger.exception(msg)
+organ_types_dict = schema_manager.organ_types_dict
+schema_triggers.set_organ_types_dict()
 
 ####################################################################################################
 ## Initialize an S3Worker from hubmap-commons
@@ -3157,7 +3159,6 @@ def get_prov_info():
     # Token is not required, but if an invalid token is provided,
     # we need to tell the client with a 401 error
     validate_token_if_auth_header_exists(request)
-    organ_types_dict = schema_manager.get_organ_types()
     if user_in_hubmap_read_group(request):
         published_only = False
 
@@ -3448,7 +3449,6 @@ def get_prov_info_for_dataset(id):
     # Token is not required, but if an invalid token provided,
     # we need to tell the client with a 401 error
     validate_token_if_auth_header_exists(request)
-    organ_types_dict = schema_manager.get_organ_types()
     # Use the internal token to query the target entity
     # since public entities don't require user token
     token = get_internal_token()
@@ -3761,10 +3761,6 @@ def sankey_data():
     HEADER_DATASET_DATASET_TYPE = 'dataset_dataset_type'
     HEADER_DATASET_STATUS = 'dataset_status'
 
-    # Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description
-    # because that would require using a urllib request for each dataset
-    organ_types_dict = schema_manager.get_organ_types()
-
     # As above, we parse te assay type yaml here rather than calling the special method for it because this avoids
     # having to access the resource for every dataset.
     assay_types_dict = schema_manager.get_assay_types()
@@ -3868,8 +3864,6 @@ def get_sample_prov_info():
     if user_in_hubmap_read_group(request):
         public_only = False
 
-    organ_types_dict = schema_manager.get_organ_types()
-
     # Processing and validating query parameters
     accepted_arguments = ['group_uuid']
     param_dict = {}  # currently the only filter is group_uuid, but in case this grows, we're using a dictionary
diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py
index 4012465f..97785c78 100644
--- a/src/schema/schema_manager.py
+++ b/src/schema/schema_manager.py
@@ -24,6 +24,7 @@
 from schema import schema_neo4j_queries
 
 logger = logging.getLogger(__name__)
+organ_types_dict = None
 
 # Suppress InsecureRequestWarning warning when requesting status on https with ssl cert verify disabled
 requests.packages.urllib3.disable_warnings(category = InsecureRequestWarning)
@@ -121,6 +122,8 @@ def initialize(valid_yaml_file,
     _neo4j_driver = neo4j_driver_instance
     _memcached_client = memcached_client_instance
     _memcached_prefix = memcached_prefix
+    global organ_types_dict
+    organ_types_dict = get_organ_types()
 
 
 ####################################################################################################
diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py
index 3091b081..7a567c07 100644
--- a/src/schema/schema_triggers.py
+++ b/src/schema/schema_triggers.py
@@ -17,6 +17,7 @@
 from schema.schema_constants import SchemaConstants
 
 logger = logging.getLogger(__name__)
+organ_types_dict = None
 
 
 ####################################################################################################
@@ -50,6 +51,10 @@ def set_timestamp(property_key, normalized_type, user_token, existing_data_dict,
     # and schema_neo4j_queries._build_properties_map()
     return property_key, 'TIMESTAMP()'
 
+def set_organ_types_dict():
+    global organ_types_dict
+    organ_types_dict = schema_manager.organ_types_dict
+
 """
 Trigger event method of setting the entity type of a given entity
 
@@ -1016,7 +1021,6 @@ def get_dataset_title(property_key, normalized_type, user_token, existing_data_d
         try: 
             # The organ_name is the two-letter code only set for 'organ'
             # Convert the two-letter code to a description
-            organ_types_dict = schema_manager.get_organ_types()
             organ_desc = organ_types_dict[organ_name].lower()
         except (yaml.YAMLError, requests.exceptions.RequestException) as e:
             raise Exception(e)

From 7e206bb15e22934b1c35297602683a90b38ad324 Mon Sep 17 00:00:00 2001
From: DerekFurstPitt <drf57@pitt.edu>
Date: Fri, 23 Aug 2024 14:33:28 -0400
Subject: [PATCH 2/3] reverted previous changes for get_organ_types. Added new
 global variable in schema manager _organ_types. For get_organ_types, if
 _organ_types is none, it generates organ types and sets the value to
 _organ_types then returns _organ_types. If its not none, it returns existing
 organ_types value

---
 src/app.py                    | 10 +++++++--
 src/schema/schema_manager.py  | 41 +++++++++++++++++++----------------
 src/schema/schema_triggers.py |  6 +----
 3 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/src/app.py b/src/app.py
index 03c543dc..81b7cfec 100644
--- a/src/app.py
+++ b/src/app.py
@@ -228,8 +228,6 @@ def http_internal_server_error(e):
             f" _schema_yaml_file={_schema_yaml_file}."
     # Log the full stack trace, prepend a line with our message
     logger.exception(msg)
-organ_types_dict = schema_manager.organ_types_dict
-schema_triggers.set_organ_types_dict()
 
 ####################################################################################################
 ## Initialize an S3Worker from hubmap-commons
@@ -3159,6 +3157,7 @@ def get_prov_info():
     # Token is not required, but if an invalid token is provided,
     # we need to tell the client with a 401 error
     validate_token_if_auth_header_exists(request)
+    organ_types_dict = schema_manager.get_organ_types()
     if user_in_hubmap_read_group(request):
         published_only = False
 
@@ -3449,6 +3448,7 @@ def get_prov_info_for_dataset(id):
     # Token is not required, but if an invalid token provided,
     # we need to tell the client with a 401 error
     validate_token_if_auth_header_exists(request)
+    organ_types_dict = schema_manager.get_organ_types()
     # Use the internal token to query the target entity
     # since public entities don't require user token
     token = get_internal_token()
@@ -3761,6 +3761,10 @@ def sankey_data():
     HEADER_DATASET_DATASET_TYPE = 'dataset_dataset_type'
     HEADER_DATASET_STATUS = 'dataset_status'
 
+    # Parsing the organ types yaml has to be done here rather than calling schema.schema_triggers.get_organ_description
+    # because that would require using a urllib request for each dataset
+    organ_types_dict = schema_manager.get_organ_types()
+
     # As above, we parse te assay type yaml here rather than calling the special method for it because this avoids
     # having to access the resource for every dataset.
     assay_types_dict = schema_manager.get_assay_types()
@@ -3864,6 +3868,8 @@ def get_sample_prov_info():
     if user_in_hubmap_read_group(request):
         public_only = False
 
+    organ_types_dict = schema_manager.get_organ_types()
+
     # Processing and validating query parameters
     accepted_arguments = ['group_uuid']
     param_dict = {}  # currently the only filter is group_uuid, but in case this grows, we're using a dictionary
diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py
index 97785c78..ff93c0b2 100644
--- a/src/schema/schema_manager.py
+++ b/src/schema/schema_manager.py
@@ -24,7 +24,6 @@
 from schema import schema_neo4j_queries
 
 logger = logging.getLogger(__name__)
-organ_types_dict = None
 
 # Suppress InsecureRequestWarning warning when requesting status on https with ssl cert verify disabled
 requests.packages.urllib3.disable_warnings(category = InsecureRequestWarning)
@@ -40,6 +39,7 @@
 _neo4j_driver = None
 _memcached_client = None
 _memcached_prefix = None
+_organ_types = None
 
 
 ####################################################################################################
@@ -122,8 +122,6 @@ def initialize(valid_yaml_file,
     _neo4j_driver = neo4j_driver_instance
     _memcached_client = memcached_client_instance
     _memcached_prefix = memcached_prefix
-    global organ_types_dict
-    organ_types_dict = get_organ_types()
 
 
 ####################################################################################################
@@ -2163,29 +2161,34 @@ def delete_memcached_cache(uuids_list):
 """
 def get_organ_types():
     global _ontology_api_url
+    global _organ_types
 
-    target_url = _ontology_api_url + SchemaConstants.ONTOLOGY_API_ORGAN_TYPES_ENDPOINT
+    if _organ_types is None:
+        target_url = _ontology_api_url + SchemaConstants.ONTOLOGY_API_ORGAN_TYPES_ENDPOINT
 
-    # Use Memcached to improve performance
-    response = make_request_get(target_url, internal_token_used = True)
+        # Use Memcached to improve performance
+        response = make_request_get(target_url, internal_token_used = True)
 
-    # Invoke .raise_for_status(), an HTTPError will be raised with certain status codes
-    response.raise_for_status()
+        # Invoke .raise_for_status(), an HTTPError will be raised with certain status codes
+        response.raise_for_status()
 
-    if response.status_code == 200:
-        return response.json()
-    else:
-        # Log the full stack trace, prepend a line with our message
-        logger.exception("Unable to make a request to query the organ types via ontology-api")
+        if response.status_code == 200:
+            _organ_types = response.json()
+            return _organ_types
+        else:
+            # Log the full stack trace, prepend a line with our message
+            logger.exception("Unable to make a request to query the organ types via ontology-api")
 
-        logger.debug("======get_organ_types() status code from ontology-api======")
-        logger.debug(response.status_code)
+            logger.debug("======get_organ_types() status code from ontology-api======")
+            logger.debug(response.status_code)
 
-        logger.debug("======get_organ_types() response text from ontology-api======")
-        logger.debug(response.text)
+            logger.debug("======get_organ_types() response text from ontology-api======")
+            logger.debug(response.text)
 
-        # Also bubble up the error message from ontology-api
-        raise requests.exceptions.RequestException(response.text)
+            # Also bubble up the error message from ontology-api
+            raise requests.exceptions.RequestException(response.text)
+    else:
+        return _organ_types
 
 
 """
diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py
index 7a567c07..3091b081 100644
--- a/src/schema/schema_triggers.py
+++ b/src/schema/schema_triggers.py
@@ -17,7 +17,6 @@
 from schema.schema_constants import SchemaConstants
 
 logger = logging.getLogger(__name__)
-organ_types_dict = None
 
 
 ####################################################################################################
@@ -51,10 +50,6 @@ def set_timestamp(property_key, normalized_type, user_token, existing_data_dict,
     # and schema_neo4j_queries._build_properties_map()
     return property_key, 'TIMESTAMP()'
 
-def set_organ_types_dict():
-    global organ_types_dict
-    organ_types_dict = schema_manager.organ_types_dict
-
 """
 Trigger event method of setting the entity type of a given entity
 
@@ -1021,6 +1016,7 @@ def get_dataset_title(property_key, normalized_type, user_token, existing_data_d
         try: 
             # The organ_name is the two-letter code only set for 'organ'
             # Convert the two-letter code to a description
+            organ_types_dict = schema_manager.get_organ_types()
             organ_desc = organ_types_dict[organ_name].lower()
         except (yaml.YAMLError, requests.exceptions.RequestException) as e:
             raise Exception(e)

From 0f133b73f0359a6d911f2aa0886352c580e41081 Mon Sep 17 00:00:00 2001
From: DerekFurstPitt <drf57@pitt.edu>
Date: Tue, 27 Aug 2024 13:20:44 -0400
Subject: [PATCH 3/3] reworked validate_organ_code to accept only a single
 argument to go along with other changes to get_organ_types

---
 src/app.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/app.py b/src/app.py
index 81b7cfec..829ff698 100644
--- a/src/app.py
+++ b/src/app.py
@@ -3273,7 +3273,7 @@ def get_prov_info():
                 distinct_organ_uuid_list.append(item['uuid'])
 
                 organ_code = item['organ'].upper()
-                validate_organ_code(organ_code, organ_types_dict)
+                validate_organ_code(organ_code)
 
                 distinct_organ_type_list.append(organ_types_dict[organ_code].lower())
             internal_dict[HEADER_ORGAN_HUBMAP_ID] = distinct_organ_hubmap_id_list
@@ -3593,7 +3593,7 @@ def get_prov_info_for_dataset(id):
             distinct_organ_uuid_list.append(item['uuid'])
 
             organ_code = item['organ'].upper()
-            validate_organ_code(organ_code, organ_types_dict )
+            validate_organ_code(organ_code)
 
             distinct_organ_type_list.append(organ_types_dict[organ_code].lower())
         internal_dict[HEADER_ORGAN_HUBMAP_ID] = distinct_organ_hubmap_id_list
@@ -3789,7 +3789,7 @@ def sankey_data():
             internal_dict[HEADER_DATASET_GROUP_NAME] = dataset[HEADER_DATASET_GROUP_NAME]
 
             organ_code = dataset[HEADER_ORGAN_TYPE].upper()
-            validate_organ_code(organ_code, organ_types_dict)
+            validate_organ_code(organ_code)
 
             internal_dict[HEADER_ORGAN_TYPE] = organ_types_dict[organ_code].lower()
 
@@ -5456,9 +5456,8 @@ def access_level_prefix_dir(dir_name):
 ----------
 organ_code : str
 """
-def validate_organ_code(organ_code, organ_types_dict=None):
-    if organ_types_dict is None:
-        organ_types_dict = schema_manager.get_organ_types()
+def validate_organ_code(organ_code):
+    organ_types_dict = schema_manager.get_organ_types()
     if not organ_code.isalpha() or not len(organ_code) == 2:
         internal_server_error(f"Invalid organ code {organ_code}. Must be 2-letter alphabetic code")