From 8c6640c521a425f8a4455af10a50cebeec1d0656 Mon Sep 17 00:00:00 2001
From: Karl Burke <kburke@pitt.edu>
Date: Fri, 20 Oct 2023 16:19:48 -0400
Subject: [PATCH 1/2] Remove endpoints which handled Collections prior to them
 being regular entities, and the code supporting *only* those endpoints.

---
 src/app.py               | 201 +--------------------------------------
 src/app_neo4j_queries.py |  47 ---------
 2 files changed, 2 insertions(+), 246 deletions(-)
diff --git a/src/app.py b/src/app.py
index b25be335..94c614d3 100644
--- a/src/app.py
+++ b/src/app.py
@@ -537,90 +537,6 @@ def get_entity_visibility(id):
 
     return jsonify(entity_scope.value)
 
-"""
-Retrieve the collection detail by id
-
-The gateway treats this endpoint as public accessible
-
-An optional Globus groups token can be provided in a standard Authentication Bearer header. If a valid token
-is provided with group membership in the HuBMAP-Read group any collection matching the id will be returned.
-otherwise if no token is provided or a valid token with no HuBMAP-Read group membership then
-only a public collection will be returned.  Public collections are defined as being published via a DOI 
-(collection.registered_doi not null) and at least one of the connected datasets is public
-(dataset.status == 'Published'). For public collections only connected datasets that are
-public are returned with it.
-
-By default we only reuturn the following Dataset properties:
-
-- collection.dataset.uuid
-- collection.dataset.hubmap_id
-- collection.dataset.data_types
-- collection.dataset.status
-- collection.dataset.last_modified_timestamp
-- collection.dataset.created_by_user_displayname
-
-Parameters
-----------
-id : str
-    The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of target collection 
-
-Returns
--------
-json
-    The collection detail with a list of connected datasets (only public datasets 
-    if user doesn't have the right access permission)
-"""
-@app.route('/collections/<id>', methods = ['GET'])
-def get_collection(id):
-    # Token is not required, but if an invalid token provided,
-    # we need to tell the client with a 401 error
-    validate_token_if_auth_header_exists(request)
-
-    # Use the internal token to query the target collection
-    # since public collections don't require user token
-    token = get_internal_token()
-
-    # Get the entity dict from cache if exists
-    # Otherwise query against uuid-api and neo4j to get the entity dict if the id exists
-    collection_dict = query_target_entity(id, token)
-
-    # A bit validation
-    if collection_dict['entity_type'] != 'Collection':
-        bad_request_error("Target entity of the given id is not a collection")
-
-    # Try to get user token from Authorization header
-    # It's highly possible that there's no token provided
-    user_token = get_user_token(request)
-
-    # The user_token is flask.Response on error
-    # Without token, the user can only access public collections, modify the collection result
-    # by only returning public datasets attached to this collection
-    if isinstance(user_token, Response):
-        # When the requested collection is not public, send back 401
-        if ('registered_doi' not in collection_dict) or ('doi_url' not in collection_dict):
-            # Require a valid token in this case
-            unauthorized_error("The requested collection is not public, a Globus token with the right access permission is required.")
-
-        # Otherwise only return the public datasets attached to this collection
-        # for Collection.datasets property
-        complete_dict = get_complete_public_collection_dict(collection_dict)
-    else:
-        # When the groups token is valid, but the user doesn't belong to HuBMAP-READ group
-        # Or the token is valid but doesn't contain group information (auth token or transfer token)
-        # Only return the public datasets attached to this Collection
-        if not user_in_hubmap_read_group(request):
-            complete_dict = get_complete_public_collection_dict(collection_dict)
-        else:
-            # We'll need to return all the properties including those
-            # generated by `on_read_trigger` to have a complete result
-            complete_dict = schema_manager.get_complete_entity_result(user_token, collection_dict)
-
-    # Will also filter the result based on schema
-    normalized_complete_dict = schema_manager.normalize_entity_result_for_response(complete_dict)
-
-    # Response with the final result
-    return jsonify(normalized_complete_dict)
-
 def _get_entity_visibility(normalized_entity_type, entity_dict):
     if normalized_entity_type not in schema_manager.get_all_entity_types():
         logger.log( logging.ERROR
@@ -893,8 +809,7 @@ def get_entity_types():
 Parameters
 ----------
 entity_type : str
-    One of the supported entity types: Dataset, Sample, Donor
-    Will handle Collection via API endpoint `/collections`
+    One of the supported entity types: Dataset, Collection, Sample, Donor
 
 Returns
 -------
@@ -965,80 +880,6 @@ def get_entities_by_type(entity_type):
     # Response with the final result
     return jsonify(final_result)
 
-"""
-Retrieve all the public collections
-
-The gateway treats this endpoint as public accessible
-
-Result filtering is supported based on query string
-For example: /collections?property=uuid
-
-Only return public collections, for either 
-- a valid token in HuBMAP-Read group, 
-- a valid token with no HuBMAP-Read group or 
-- no token at all
-
-Public collections are defined as being published via a DOI 
-(collection.registered_doi is not null) and at least one of the connected datasets is published
-(dataset.status == 'Published'). For public collections only connected datasets that are
-published are returned with it.
-
-Returns
--------
-json
-    A list of all the public collection dictionaries (with attached public datasts)
-"""
-@app.route('/collections', methods = ['GET'])
-def get_collections():
-    final_result = []
-
-    # Token is not required, but if an invalid token provided,
-    # we need to tell the client with a 401 error
-    validate_token_if_auth_header_exists(request)
-
-    normalized_entity_type = 'Collection'
-
-    # Result filtering based on query string
-    if bool(request.args):
-        property_key = request.args.get('property')
-
-        if property_key is not None:
-            result_filtering_accepted_property_keys = ['uuid']
-
-            # Validate the target property
-            if property_key not in result_filtering_accepted_property_keys:
-                bad_request_error(f"Only the following property keys are supported in the query string: {COMMA_SEPARATOR.join(result_filtering_accepted_property_keys)}")
-
-            # Only return a list of the filtered property value of each public collection
-            final_result = app_neo4j_queries.get_public_collections(neo4j_driver_instance, property_key)
-        else:
-            bad_request_error("The specified query string is not supported. Use '?property=<key>' to filter the result")
-    # Return all the details if no property filtering
-    else:
-        # Use the internal token since no user token is requried to access public collections
-        token = get_internal_token()
-
-        # Get back a list of public collections dicts
-        collections_list = app_neo4j_queries.get_public_collections(neo4j_driver_instance)
-
-        # Modify the Collection.datasets property for each collection dict
-        # to contain only public datasets
-        for collection_dict in collections_list:
-            # Only return the public datasets attached to this collection for Collection.datasets property
-            collection_dict = get_complete_public_collection_dict(collection_dict)
-
-        # Generate trigger data and merge into a big dict
-        # and skip some of the properties that are time-consuming to generate via triggers
-        properties_to_skip = ['datasets']
-        complete_collections_list = schema_manager.get_complete_entities_list(token, collections_list, properties_to_skip)
-
-        # Final result after normalization
-        final_result = schema_manager.normalize_entities_list_for_response(complete_collections_list)
-
-    # Response with the final result
-    return jsonify(final_result)
-
-
 """
 Create an entity of the target type in neo4j
 
@@ -1049,7 +890,7 @@ def get_collections():
 Parameters
 ----------
 entity_type : str
-    One of the target entity types (case-insensitive since will be normalized): Dataset, Donor, Sample, Upload
+    One of the target entity types (case-insensitive since will be normalized): Dataset, Donor, Sample, Upload, Collection
 
 Returns
 -------
@@ -4017,44 +3858,6 @@ def validate_token_if_auth_header_exists(request):
 def get_internal_token():
     return auth_helper_instance.getProcessSecret()
 
-
-"""
-Return the complete collection dict for a given raw collection dict
-
-Parameters
-----------
-collection_dict : dict
-    The raw collection dict returned by Neo4j
-
-Returns
--------
-dict
-    A dictionary of complete collection detail with all the generated 'on_read_trigger' data
-    The generated Collection.datasts contains only public datasets
-    if user/token doesn't have the right access permission
-"""
-def get_complete_public_collection_dict(collection_dict):
-    # Use internal token to query entity since
-    # no user token is required to access a public collection
-    token = get_internal_token()
-
-    # Collection.datasets is transient property and generated by the trigger method
-    # We'll need to return all the properties including those
-    # generated by `on_read_trigger` to have a complete result
-    complete_dict = schema_manager.get_complete_entity_result(token, collection_dict)
-
-    # Loop through Collection.datasets and only return the published/public datasets
-    public_datasets = []
-    for dataset in complete_dict['datasets']:
-        if dataset['status'].lower() == DATASET_STATUS_PUBLISHED:
-            public_datasets.append(dataset)
-
-    # Modify the result and only show the public datasets in this collection
-    complete_dict['datasets'] = public_datasets
-
-    return complete_dict
-
-
 """
 Generate 'before_create_triiger' data and create the entity details in Neo4j
 
diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py
index 933017e8..98fd674c 100644
--- a/src/app_neo4j_queries.py
+++ b/src/app_neo4j_queries.py
@@ -97,53 +97,6 @@ def get_entities_by_type(neo4j_driver, entity_type, property_key = None):
 
     return results
 
-"""
-Get all the public collection nodes
-
-Parameters
-----------
-neo4j_driver : neo4j.Driver object
-    The neo4j database connection pool
-property_key : str
-    A target property key for result filtering
-
-Returns
--------
-list
-    A list of public collections returned from the Cypher query
-"""
-def get_public_collections(neo4j_driver, property_key = None):
-    results = []
-
-    if property_key:
-        query = (f"MATCH (e:Collection) "
-                 f"WHERE e.registered_doi IS NOT NULL AND e.doi_url IS NOT NULL "
-                 # COLLECT() returns a list
-                 # apoc.coll.toSet() reruns a set containing unique nodes
-                 f"RETURN apoc.coll.toSet(COLLECT(e.{property_key})) AS {record_field_name}")
-    else:
-        query = (f"MATCH (e:Collection) "
-                 f"WHERE e.registered_doi IS NOT NULL AND e.doi_url IS NOT NULL "
-                 # COLLECT() returns a list
-                 # apoc.coll.toSet() reruns a set containing unique nodes
-                 f"RETURN apoc.coll.toSet(COLLECT(e)) AS {record_field_name}")
-
-    logger.info("======get_public_collections() query======")
-    logger.info(query)
-
-    with neo4j_driver.session() as session:
-        record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query)
-
-        if record and record[record_field_name]:
-            if property_key:
-                # Just return the list of property values from each entity node
-                results = record[record_field_name]
-            else:
-                # Convert the list of nodes to a list of dicts
-                results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name])
-
-    return results
-
 """
 Retrieve the ancestor organ(s) of a given entity
 

From 9f57ac2fb01942c702a3f3f320b7485dd344b5dd Mon Sep 17 00:00:00 2001
From: Karl Burke <kburke@pitt.edu>
Date: Mon, 23 Oct 2023 11:02:49 -0400
Subject: [PATCH 2/2] Remove endpoints which handled Collections prior to them
 being regular entities, and the code supporting *only* those endpoints.

---
 entity-api-spec.yaml | 27 ---------------------------
 1 file changed, 27 deletions(-)

diff --git a/entity-api-spec.yaml b/entity-api-spec.yaml
index 5943ede7..3c1909dd 100644
--- a/entity-api-spec.yaml
+++ b/entity-api-spec.yaml
@@ -1707,33 +1707,6 @@ paths:
           description: The target entity could not be found
         '500':
           description: Internal error
-  '/collections/{id}':
-    get:
-      summary: 'Returns the information of the Collection specified by the uuid with all connected datasets. If a valid token is provided with group membership in the HuBMAP-Read group any collection matching the id will be returned.  Otherwise if no token is provided or a valid token with no HuBMAP-Read group membership then only a public collection will be returned.  Public collections are defined as being published via a DOI (collection.doi_registered == true) and at least one of the connected datasets is public (dataset.metadata.data_access_level == ''public''). For public collections only connected datasets that are public are returned with it.'
-      parameters:
-        - name: id
-          in: path
-          description: The unique identifier of entity.  This identifier can be either an HuBMAP ID (e.g. HBM123.ABCD.456) or UUID
-          required: true
-          schema:
-            type: string
-      responses:
-        '200':
-          description: The collection is returned
-          content:
-            application/json:
-              schema:
-                type: array
-                items:
-                  $ref: '#/components/schemas/Collection'
-        '400':
-          description: Invalid or misformatted entity identifier
-        '401':
-          description: The user's token has expired or the user did not supply a valid token
-        '404':
-          description: The target entity could not be found
-        '500':
-          description: Internal error
   '/entities/new/{entity_type}':
     post:
       summary: Create a new entity of the target type