From 0f929133bd8cf265b1699a2636ee123739037e7e Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Thu, 14 Nov 2024 14:51:32 -0500 Subject: [PATCH 1/4] Added fields metadata, files, and calculated_metadata to provenance schema --- src/schema/provenance_schema.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index 7b20274e..cb5369af 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -466,6 +466,18 @@ ENTITIES: type: json_string # dict indexed: true description: "The metadata returned from the processing at data submission time." + metadata: + type: json_string + indexed: true + description: "The metadata returned from the processing at data submission time." + files: + type: json_string + indexed: true + description: "The metadata files" + calculated_metadata: + type: json_string + indexed: true + description: "The calculated metadata" local_directory_rel_path: # Example: protected// type: string From da3ddb5d130b9f7b93c32172961c648cf8813f46 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Thu, 14 Nov 2024 16:47:43 -0500 Subject: [PATCH 2/4] added validators to new ingest_metadata fields --- src/schema/provenance_schema.yaml | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index cb5369af..03689902 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -302,9 +302,8 @@ ENTITIES: # Dataset can be either derivation source or target excluded_properties_from_public_response: - lab_dataset_id - - ingest_metadata: - metadata: - - lab_id + - metadata: + - lab_id derivation: source: true target: true @@ -470,14 +469,26 @@ ENTITIES: type: json_string indexed: true description: "The metadata returned from the processing at data submission time." + before_property_create_validators: + - validate_application_header_before_property_update + before_property_update_validators: + - validate_application_header_before_property_update files: - type: json_string + type: list indexed: true - description: "The metadata files" + before_property_create_validators: + - validate_application_header_before_property_update + before_property_update_validators: + - validate_application_header_before_property_update + description: "A list of files associated with the dataset." calculated_metadata: type: json_string indexed: true - description: "The calculated metadata" + before_property_create_validators: + - validate_application_header_before_property_update + before_property_update_validators: + - validate_application_header_before_property_update + description: "Calculated metadata outputted from the processing pipeline." local_directory_rel_path: # Example: protected// type: string From a64dbaea236d8e8bb8499d8b8e022664d397d487 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Tue, 3 Dec 2024 11:08:57 -0500 Subject: [PATCH 3/4] Updated provenance schema descriptions for metadata and ingest metadata. Added new fields to entity api spec yaml --- entity-api-spec.yaml | 11 ++++++++++- src/schema/provenance_schema.yaml | 4 ++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/entity-api-spec.yaml b/entity-api-spec.yaml index e51684c7..5ffb7325 100644 --- a/entity-api-spec.yaml +++ b/entity-api-spec.yaml @@ -710,7 +710,16 @@ components: description: "The email address of the user who published the provided by the authorization mechanism for the person or process authenticated when published." ingest_metadata: type: object - description: "The metadata returned from the ingest pipeline processing at data submission time. Provided as json." + description: "Information associated with running the ingest and processing pipelines." + metadata: + type: object + description: "Metadata associated with the ingested experimental data." + files: + type: array + description: "A list of files associated with the dataset." + calculated_metadata: + type: object + description: "Calculated metadata outputted from the processing pipeline." local_directory_rel_path: type: string readOnly: true diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index 03689902..8fc58ca8 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -464,11 +464,11 @@ ENTITIES: ingest_metadata: type: json_string # dict indexed: true - description: "The metadata returned from the processing at data submission time." + description: "Information associated with running the ingest and processing pipelines." metadata: type: json_string indexed: true - description: "The metadata returned from the processing at data submission time." + description: "Metadata associated with the ingested experimental data." before_property_create_validators: - validate_application_header_before_property_update before_property_update_validators: From 7ef440a742ef993057f03b99164f12dbac858124 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Tue, 3 Dec 2024 11:16:21 -0500 Subject: [PATCH 4/4] copied metadata reorganization changes from datasets to publications too --- entity-api-spec.yaml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/entity-api-spec.yaml b/entity-api-spec.yaml index 5ffb7325..ed7ee809 100644 --- a/entity-api-spec.yaml +++ b/entity-api-spec.yaml @@ -1123,7 +1123,16 @@ components: description: "The email address of the user who published the provided by the authorization mechanism for the person or process authenticated when published." ingest_metadata: type: object - description: "The metadata returned from the ingest pipeline processing at data submission time. Provided as json." + description: "Information associated with running the ingest and processing pipelines." + metadata: + type: object + description: "Metadata associated with the ingested experimental data." + files: + type: array + description: "A list of files associated with the dataset." + calculated_metadata: + type: object + description: "Calculated metadata outputted from the processing pipeline." local_directory_rel_path: type: string readOnly: true