From 8c6640c521a425f8a4455af10a50cebeec1d0656 Mon Sep 17 00:00:00 2001 From: Karl Burke Date: Fri, 20 Oct 2023 16:19:48 -0400 Subject: [PATCH 1/2] Remove endpoints which handled Collections prior to them being regular entities, and the code supporting *only* those endpoints. --- src/app.py | 201 +-------------------------------------- src/app_neo4j_queries.py | 47 --------- 2 files changed, 2 insertions(+), 246 deletions(-) diff --git a/src/app.py b/src/app.py index b25be335..94c614d3 100644 --- a/src/app.py +++ b/src/app.py @@ -537,90 +537,6 @@ def get_entity_visibility(id): return jsonify(entity_scope.value) -""" -Retrieve the collection detail by id - -The gateway treats this endpoint as public accessible - -An optional Globus groups token can be provided in a standard Authentication Bearer header. If a valid token -is provided with group membership in the HuBMAP-Read group any collection matching the id will be returned. -otherwise if no token is provided or a valid token with no HuBMAP-Read group membership then -only a public collection will be returned. Public collections are defined as being published via a DOI -(collection.registered_doi not null) and at least one of the connected datasets is public -(dataset.status == 'Published'). For public collections only connected datasets that are -public are returned with it. - -By default we only reuturn the following Dataset properties: - -- collection.dataset.uuid -- collection.dataset.hubmap_id -- collection.dataset.data_types -- collection.dataset.status -- collection.dataset.last_modified_timestamp -- collection.dataset.created_by_user_displayname - -Parameters ----------- -id : str - The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of target collection - -Returns -------- -json - The collection detail with a list of connected datasets (only public datasets - if user doesn't have the right access permission) -""" -@app.route('/collections/', methods = ['GET']) -def get_collection(id): - # Token is not required, but if an invalid token provided, - # we need to tell the client with a 401 error - validate_token_if_auth_header_exists(request) - - # Use the internal token to query the target collection - # since public collections don't require user token - token = get_internal_token() - - # Get the entity dict from cache if exists - # Otherwise query against uuid-api and neo4j to get the entity dict if the id exists - collection_dict = query_target_entity(id, token) - - # A bit validation - if collection_dict['entity_type'] != 'Collection': - bad_request_error("Target entity of the given id is not a collection") - - # Try to get user token from Authorization header - # It's highly possible that there's no token provided - user_token = get_user_token(request) - - # The user_token is flask.Response on error - # Without token, the user can only access public collections, modify the collection result - # by only returning public datasets attached to this collection - if isinstance(user_token, Response): - # When the requested collection is not public, send back 401 - if ('registered_doi' not in collection_dict) or ('doi_url' not in collection_dict): - # Require a valid token in this case - unauthorized_error("The requested collection is not public, a Globus token with the right access permission is required.") - - # Otherwise only return the public datasets attached to this collection - # for Collection.datasets property - complete_dict = get_complete_public_collection_dict(collection_dict) - else: - # When the groups token is valid, but the user doesn't belong to HuBMAP-READ group - # Or the token is valid but doesn't contain group information (auth token or transfer token) - # Only return the public datasets attached to this Collection - if not user_in_hubmap_read_group(request): - complete_dict = get_complete_public_collection_dict(collection_dict) - else: - # We'll need to return all the properties including those - # generated by `on_read_trigger` to have a complete result - complete_dict = schema_manager.get_complete_entity_result(user_token, collection_dict) - - # Will also filter the result based on schema - normalized_complete_dict = schema_manager.normalize_entity_result_for_response(complete_dict) - - # Response with the final result - return jsonify(normalized_complete_dict) - def _get_entity_visibility(normalized_entity_type, entity_dict): if normalized_entity_type not in schema_manager.get_all_entity_types(): logger.log( logging.ERROR @@ -893,8 +809,7 @@ def get_entity_types(): Parameters ---------- entity_type : str - One of the supported entity types: Dataset, Sample, Donor - Will handle Collection via API endpoint `/collections` + One of the supported entity types: Dataset, Collection, Sample, Donor Returns ------- @@ -965,80 +880,6 @@ def get_entities_by_type(entity_type): # Response with the final result return jsonify(final_result) -""" -Retrieve all the public collections - -The gateway treats this endpoint as public accessible - -Result filtering is supported based on query string -For example: /collections?property=uuid - -Only return public collections, for either -- a valid token in HuBMAP-Read group, -- a valid token with no HuBMAP-Read group or -- no token at all - -Public collections are defined as being published via a DOI -(collection.registered_doi is not null) and at least one of the connected datasets is published -(dataset.status == 'Published'). For public collections only connected datasets that are -published are returned with it. - -Returns -------- -json - A list of all the public collection dictionaries (with attached public datasts) -""" -@app.route('/collections', methods = ['GET']) -def get_collections(): - final_result = [] - - # Token is not required, but if an invalid token provided, - # we need to tell the client with a 401 error - validate_token_if_auth_header_exists(request) - - normalized_entity_type = 'Collection' - - # Result filtering based on query string - if bool(request.args): - property_key = request.args.get('property') - - if property_key is not None: - result_filtering_accepted_property_keys = ['uuid'] - - # Validate the target property - if property_key not in result_filtering_accepted_property_keys: - bad_request_error(f"Only the following property keys are supported in the query string: {COMMA_SEPARATOR.join(result_filtering_accepted_property_keys)}") - - # Only return a list of the filtered property value of each public collection - final_result = app_neo4j_queries.get_public_collections(neo4j_driver_instance, property_key) - else: - bad_request_error("The specified query string is not supported. Use '?property=' to filter the result") - # Return all the details if no property filtering - else: - # Use the internal token since no user token is requried to access public collections - token = get_internal_token() - - # Get back a list of public collections dicts - collections_list = app_neo4j_queries.get_public_collections(neo4j_driver_instance) - - # Modify the Collection.datasets property for each collection dict - # to contain only public datasets - for collection_dict in collections_list: - # Only return the public datasets attached to this collection for Collection.datasets property - collection_dict = get_complete_public_collection_dict(collection_dict) - - # Generate trigger data and merge into a big dict - # and skip some of the properties that are time-consuming to generate via triggers - properties_to_skip = ['datasets'] - complete_collections_list = schema_manager.get_complete_entities_list(token, collections_list, properties_to_skip) - - # Final result after normalization - final_result = schema_manager.normalize_entities_list_for_response(complete_collections_list) - - # Response with the final result - return jsonify(final_result) - - """ Create an entity of the target type in neo4j @@ -1049,7 +890,7 @@ def get_collections(): Parameters ---------- entity_type : str - One of the target entity types (case-insensitive since will be normalized): Dataset, Donor, Sample, Upload + One of the target entity types (case-insensitive since will be normalized): Dataset, Donor, Sample, Upload, Collection Returns ------- @@ -4017,44 +3858,6 @@ def validate_token_if_auth_header_exists(request): def get_internal_token(): return auth_helper_instance.getProcessSecret() - -""" -Return the complete collection dict for a given raw collection dict - -Parameters ----------- -collection_dict : dict - The raw collection dict returned by Neo4j - -Returns -------- -dict - A dictionary of complete collection detail with all the generated 'on_read_trigger' data - The generated Collection.datasts contains only public datasets - if user/token doesn't have the right access permission -""" -def get_complete_public_collection_dict(collection_dict): - # Use internal token to query entity since - # no user token is required to access a public collection - token = get_internal_token() - - # Collection.datasets is transient property and generated by the trigger method - # We'll need to return all the properties including those - # generated by `on_read_trigger` to have a complete result - complete_dict = schema_manager.get_complete_entity_result(token, collection_dict) - - # Loop through Collection.datasets and only return the published/public datasets - public_datasets = [] - for dataset in complete_dict['datasets']: - if dataset['status'].lower() == DATASET_STATUS_PUBLISHED: - public_datasets.append(dataset) - - # Modify the result and only show the public datasets in this collection - complete_dict['datasets'] = public_datasets - - return complete_dict - - """ Generate 'before_create_triiger' data and create the entity details in Neo4j diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index 933017e8..98fd674c 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -97,53 +97,6 @@ def get_entities_by_type(neo4j_driver, entity_type, property_key = None): return results -""" -Get all the public collection nodes - -Parameters ----------- -neo4j_driver : neo4j.Driver object - The neo4j database connection pool -property_key : str - A target property key for result filtering - -Returns -------- -list - A list of public collections returned from the Cypher query -""" -def get_public_collections(neo4j_driver, property_key = None): - results = [] - - if property_key: - query = (f"MATCH (e:Collection) " - f"WHERE e.registered_doi IS NOT NULL AND e.doi_url IS NOT NULL " - # COLLECT() returns a list - # apoc.coll.toSet() reruns a set containing unique nodes - f"RETURN apoc.coll.toSet(COLLECT(e.{property_key})) AS {record_field_name}") - else: - query = (f"MATCH (e:Collection) " - f"WHERE e.registered_doi IS NOT NULL AND e.doi_url IS NOT NULL " - # COLLECT() returns a list - # apoc.coll.toSet() reruns a set containing unique nodes - f"RETURN apoc.coll.toSet(COLLECT(e)) AS {record_field_name}") - - logger.info("======get_public_collections() query======") - logger.info(query) - - with neo4j_driver.session() as session: - record = session.read_transaction(schema_neo4j_queries.execute_readonly_tx, query) - - if record and record[record_field_name]: - if property_key: - # Just return the list of property values from each entity node - results = record[record_field_name] - else: - # Convert the list of nodes to a list of dicts - results = schema_neo4j_queries.nodes_to_dicts(record[record_field_name]) - - return results - """ Retrieve the ancestor organ(s) of a given entity From 9f57ac2fb01942c702a3f3f320b7485dd344b5dd Mon Sep 17 00:00:00 2001 From: Karl Burke Date: Mon, 23 Oct 2023 11:02:49 -0400 Subject: [PATCH 2/2] Remove endpoints which handled Collections prior to them being regular entities, and the code supporting *only* those endpoints. --- entity-api-spec.yaml | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/entity-api-spec.yaml b/entity-api-spec.yaml index 5943ede7..3c1909dd 100644 --- a/entity-api-spec.yaml +++ b/entity-api-spec.yaml @@ -1707,33 +1707,6 @@ paths: description: The target entity could not be found '500': description: Internal error - '/collections/{id}': - get: - summary: 'Returns the information of the Collection specified by the uuid with all connected datasets. If a valid token is provided with group membership in the HuBMAP-Read group any collection matching the id will be returned. Otherwise if no token is provided or a valid token with no HuBMAP-Read group membership then only a public collection will be returned. Public collections are defined as being published via a DOI (collection.doi_registered == true) and at least one of the connected datasets is public (dataset.metadata.data_access_level == ''public''). For public collections only connected datasets that are public are returned with it.' - parameters: - - name: id - in: path - description: The unique identifier of entity. This identifier can be either an HuBMAP ID (e.g. HBM123.ABCD.456) or UUID - required: true - schema: - type: string - responses: - '200': - description: The collection is returned - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/Collection' - '400': - description: Invalid or misformatted entity identifier - '401': - description: The user's token has expired or the user did not supply a valid token - '404': - description: The target entity could not be found - '500': - description: Internal error '/entities/new/{entity_type}': post: summary: Create a new entity of the target type