From 8038ae55588a85c6aca255eb967c435558730b37 Mon Sep 17 00:00:00 2001 From: Paul Fouquet Date: Wed, 8 Jan 2025 15:58:56 +1300 Subject: [PATCH] wip: broken state --- poetry.lock | 22 ++++++++- pyproject.toml | 1 + scripts/datetimes.py | 9 ++-- scripts/stac/imagery/create_stac.py | 11 +++-- scripts/stac/imagery/item.py | 45 ++++++++++++++++++- scripts/stac/imagery/tests/collection_test.py | 15 +++---- .../stac/imagery/tests/create_stac_test.py | 33 +++++++------- scripts/stac/imagery/tests/item_test.py | 13 +++--- scripts/tests/collection_from_items_test.py | 8 ++-- 9 files changed, 108 insertions(+), 49 deletions(-) diff --git a/poetry.lock b/poetry.lock index 99a58dc93..62f229933 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1010,6 +1010,26 @@ tomlkit = ">=0.10.1" spelling = ["pyenchant (>=3.2,<4.0)"] testutils = ["gitpython (>3)"] +[[package]] +name = "pystac" +version = "1.11.0" +description = "Python library for working with the SpatioTemporal Asset Catalog (STAC) specification" +optional = false +python-versions = ">=3.10" +files = [ + {file = "pystac-1.11.0-py3-none-any.whl", hash = "sha256:10ac7c7b4ea6c5ec8333829a09ec1a33b596f02d1a97ffbbd72cd1b6c10598c1"}, + {file = "pystac-1.11.0.tar.gz", hash = "sha256:acb1e04be398a0cda2d8870ab5e90457783a8014a206590233171d8b2ae0d9e7"}, +] + +[package.dependencies] +python-dateutil = ">=2.7.0" + +[package.extras] +jinja2 = ["jinja2 (<4.0)"] +orjson = ["orjson (>=3.5)"] +urllib3 = ["urllib3 (>=1.26)"] +validation = ["jsonschema (>=4.18,<5.0)"] + [[package]] name = "pytest" version = "8.3.4" @@ -1483,4 +1503,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "~3.12.3" -content-hash = "11678d25e9494ac7384858577c2f3d861fd360fea67f1359c8cfb18da2a21d6c" +content-hash = "9de299b59e8cae4ab8e395b99cddbd3bc925e3ef9f4fbd95a4e2449773b4902f" diff --git a/pyproject.toml b/pyproject.toml index 4fd8a8065..973c64581 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,3 +58,4 @@ pytest-dependency = "*" pytest-mock = "*" pytest-subtests = "*" shellcheck-py = "*" +pystac = "^1.11.0" diff --git a/scripts/datetimes.py b/scripts/datetimes.py index f3f8a30b4..3af4b2a17 100644 --- a/scripts/datetimes.py +++ b/scripts/datetimes.py @@ -23,7 +23,7 @@ def format_rfc_3339_datetime_string(datetime_object: datetime) -> str: return datetime_object.astimezone(timezone.utc).strftime(RFC_3339_DATETIME_FORMAT) -def format_rfc_3339_nz_midnight_datetime_string(datetime_object: datetime) -> str: +def format_rfc_3339_nz_midnight_datetime(datetime_object: datetime) -> datetime: """Convert datetime to New Zealand midnight and format it to UTC""" naive_midnight_datetime_string = f"{datetime_object.strftime(RFC_3339_DATE_FORMAT)}T00:00:00.000" @@ -34,9 +34,12 @@ def format_rfc_3339_nz_midnight_datetime_string(datetime_object: datetime) -> st raise Exception(f"Not a valid date: {err}") from err utc_tz = tz.gettz("UTC") - datetime_utc = nz_datetime.astimezone(utc_tz) + return nz_datetime.astimezone(utc_tz) + - return format_rfc_3339_datetime_string(datetime_utc) +def format_rfc_3339_nz_midnight_datetime_string(datetime_object: datetime) -> str: + """Convert datetime to New Zealand midnight and format it to UTC as string""" + return format_rfc_3339_datetime_string(format_rfc_3339_nz_midnight_datetime(datetime_object)) class NaiveDatetimeError(Exception): diff --git a/scripts/stac/imagery/create_stac.py b/scripts/stac/imagery/create_stac.py index 6c112022c..5cfa47a7c 100644 --- a/scripts/stac/imagery/create_stac.py +++ b/scripts/stac/imagery/create_stac.py @@ -123,7 +123,7 @@ def create_item( Returns: a STAC Item wrapped in ImageryItem """ - item = create_or_load_base_item(asset_path, gdal_version, current_datetime, odr_url) + item = create_or_load_base_item(asset_path, gdal_version, current_datetime, start_datetime, end_datetime, odr_url) base_stac = item.clone() if not gdalinfo_result: @@ -153,7 +153,12 @@ def create_item( def create_or_load_base_item( - asset_path: str, gdal_version: str, current_datetime: str, odr_url: str | None = None + asset_path: str, + gdal_version: str, + current_datetime: str, + start_datetime: str, + end_datetime: str, + odr_url: str | None = None, ) -> ImageryItem: """ Args: @@ -197,7 +202,7 @@ def create_or_load_base_item( checksum=file_content_checksum, ) - return ImageryItem(id_, asset, stac_processing) + return ImageryItem(id_, asset, stac_processing, start_datetime, end_datetime) def get_published_file_contents(odr_url: str, filename: str) -> JSON_Dict: diff --git a/scripts/stac/imagery/item.py b/scripts/stac/imagery/item.py index 377e61432..8fc6683ae 100644 --- a/scripts/stac/imagery/item.py +++ b/scripts/stac/imagery/item.py @@ -2,8 +2,9 @@ import typing from typing import Any, TypedDict -from pystac import Asset, Item, Link, MediaType, RelType +from pystac import Asset, Item, Link, MediaType, RelType, StacIO +from scripts.datetimes import parse_rfc_3339_datetime from scripts.stac.link import create_link_with_checksum from scripts.stac.util.stac_extensions import StacExtensions @@ -25,7 +26,14 @@ class ImageryItem(Item): - def __init__(self, id_: str, asset: Asset, stac_processing: STACProcessing) -> None: + def __init__( + self, + id_: str, + asset: Asset, + stac_processing: STACProcessing, + start_datetime: str, + end_datetime: str, + ) -> None: stac_extensions = [StacExtensions.file.value, StacExtensions.processing.value] assets = {"visual": asset} properties = {"created": asset.extra_fields["created"], "updated": asset.extra_fields["updated"], **stac_processing} @@ -37,8 +45,41 @@ def __init__(self, id_: str, asset: Asset, stac_processing: STACProcessing) -> N stac_extensions=stac_extensions, assets=assets, properties=properties, + start_datetime=parse_rfc_3339_datetime(start_datetime), + end_datetime=parse_rfc_3339_datetime(end_datetime), ) + # Manually add the self link as pystac would force it to be absolute otherwise (requirement in STAC v1.1) + # pystac would have use the MediaType JSON for the self link, but we want to use the GeoJSON type + self.make_self_link_relative() + + @classmethod + def from_file(cls, href: str, stac_io: StacIO | None = None) -> "ImageryItem": + # Use pystac.Item's from_file to parse the STAC file + item = Item.from_file(href, stac_io) + imagery_item = cls( + id_=item.id, + asset=item.assets["visual"], + stac_processing={ + "processing:datetime": item.properties["processing:datetime"], + "processing:software": item.properties["processing:software"], + "processing:version": item.properties["processing:version"], + }, + start_datetime=item.properties["start_datetime"], + end_datetime=item.properties["end_datetime"], + ) + if item.collection_id: + imagery_item.add_collection(item.collection_id) + imagery_item.update_spatial(item.geometry, item.bbox) + imagery_item.make_self_link_relative() + + return imagery_item + + def make_self_link_relative(self) -> None: + """Make the self link relative""" + self.links = [l for l in self.links if l.rel != RelType.SELF] + self.add_link(Link(target=f"./{self.id}.json", rel=RelType.SELF, media_type=MediaType.GEOJSON)) + def update_checksum_related_metadata(self, file_content_checksum: str, stac_processing_data: STACProcessing) -> None: """Set the assets.visual.file:checksum attribute if it has changed. If the checksum has changed, this also updates the following attributes: diff --git a/scripts/stac/imagery/tests/collection_test.py b/scripts/stac/imagery/tests/collection_test.py index 050e67170..6f7286317 100644 --- a/scripts/stac/imagery/tests/collection_test.py +++ b/scripts/stac/imagery/tests/collection_test.py @@ -126,26 +126,21 @@ def test_add_item(fake_collection_metadata: CollectionMetadata, fake_linz_slug: "created": any_epoch_datetime_string(), "updated": any_epoch_datetime_string(), } + start_datetime = "2021-02-01T00:00:00Z" + end_datetime = "2021-02-20T00:00:00Z" item = ImageryItem( "BR34_5000_0304", - create_visual_asset(href="any href", file_checksum="any checksum", created=asset_datetimes["created"]), - # "href": "any href", - # "file:checksum": "any checksum", - # "created": asset_datetimes["created"], - # "updated": asset_datetimes["updated"], - # } + create_visual_asset(href="any href", checksum="any checksum", created=asset_datetimes["created"]), any_stac_processing(), + start_datetime, + end_datetime, ) geometry = { "type": "Polygon", "coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]], } bbox = (1799667.5, 5815977.0, 1800422.5, 5814986.0) - start_datetime = "2021-01-27T00:00:00Z" - end_datetime = "2021-01-27T00:00:00Z" item.update_spatial(geometry, bbox) - item.update_datetime(start_datetime, end_datetime) - collection.add_item(item.to_dict()) links = collection.stac["links"].copy() diff --git a/scripts/stac/imagery/tests/create_stac_test.py b/scripts/stac/imagery/tests/create_stac_test.py index 468c61e7e..745807447 100644 --- a/scripts/stac/imagery/tests/create_stac_test.py +++ b/scripts/stac/imagery/tests/create_stac_test.py @@ -3,7 +3,6 @@ from pathlib import Path from typing import cast -from pystac import Link, MediaType, RelType from pytest_subtests import SubTests from scripts.datetimes import format_rfc_3339_datetime_string @@ -22,8 +21,8 @@ def test_create_item(subtests: SubTests) -> None: current_datetime = any_epoch_datetime_string() item = create_item( "./scripts/tests/data/empty.tiff", - "", - "", + "2024-09-02T12:00:00Z", + "2024-09-02T12:00:00Z", "abc123", "any GDAL version", current_datetime, @@ -37,10 +36,10 @@ def test_create_item(subtests: SubTests) -> None: assert item.properties["updated"] == current_datetime with subtests.test(msg="assets.visual.created"): - assert item.assets["visual"]["created"] == current_datetime + assert item.assets["visual"].extra_fields["created"] == current_datetime with subtests.test(msg="assets.visual.updated"): - assert item.assets["visual"]["updated"] == current_datetime + assert item.assets["visual"].extra_fields["updated"] == current_datetime def test_create_item_when_resupplying(subtests: SubTests, tmp_path: Path) -> None: @@ -179,13 +178,12 @@ def test_create_item_with_derived_from(tmp_path: Path) -> None: [derived_from_path.as_posix()], ) - expected_link = Link( - derived_from_path.as_posix(), - RelType.DERIVED_FROM, - MediaType.JSON, - extra_fields={"file:checksum": "12209c3d50f21fdd739de5c76b3c7ca60ee7f5cf69c2cf92b1d0136308cf63d9c5d5"}, - ) - assert expected_link in item.links + assert { + "href": derived_from_path.as_posix(), + "rel": "derived_from", + "type": "application/geo+json", + "file:checksum": "12209c3d50f21fdd739de5c76b3c7ca60ee7f5cf69c2cf92b1d0136308cf63d9c5d5", + } in item.to_dict()["links"] def test_create_item_with_derived_from_datetimes(tmp_path: Path) -> None: @@ -307,7 +305,7 @@ def test_create_item_with_odr_url(tmp_path: Path) -> None: "this current datetime", fake_gdal_info, ) - existing_item_file.write_text(json.dumps(item_from_scratch.stac)) + existing_item_file.write_text(json.dumps(item_from_scratch.to_dict())) item_from_odr_unchanged = create_item( tiff_path, "a start datetime", @@ -318,7 +316,7 @@ def test_create_item_with_odr_url(tmp_path: Path) -> None: fake_gdal_info, odr_url=tmp_path.as_posix(), ) - assert item_from_odr_unchanged.stac == item_from_scratch.stac + assert item_from_odr_unchanged.to_dict() == item_from_scratch.to_dict() item_from_odr_changed = create_item( tiff_path, @@ -334,7 +332,7 @@ def test_create_item_with_odr_url(tmp_path: Path) -> None: del item_from_odr_changed.properties["end_datetime"] del item_from_scratch.properties["start_datetime"] del item_from_scratch.properties["end_datetime"] - assert item_from_odr_changed.stac == item_from_scratch.stac + assert item_from_odr_changed.to_dict() == item_from_scratch.to_dict() def test_create_item_when_resupplying_with_new_file(subtests: SubTests, tmp_path: Path) -> None: @@ -397,8 +395,7 @@ def test_create_item_when_resupplying_with_changed_asset_file(subtests: SubTests ) with subtests.test(msg="assets.visual.created"): - assert item.assets["visual"]["created"] == created_datetime + assert item.assets["visual"].extra_fields["created"] == created_datetime with subtests.test(msg="assets.visual.updated"): - assert item.assets["visual"]["updated"] == current_datetime - assert item.assets["visual"]["updated"] == current_datetime + assert item.assets["visual"].extra_fields["updated"] == current_datetime diff --git a/scripts/stac/imagery/tests/item_test.py b/scripts/stac/imagery/tests/item_test.py index f3783be09..1a0f61902 100644 --- a/scripts/stac/imagery/tests/item_test.py +++ b/scripts/stac/imagery/tests/item_test.py @@ -23,7 +23,7 @@ def test_imagery_stac_item(subtests: SubTests) -> None: "type": "Polygon", "coordinates": [[[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]]], } - bbox = (1799667.5, 5815977.0, 1800422.5, 5814986.0) + bbox = [1799667.5, 5815977.0, 1800422.5, 5814986.0] path = "./scripts/tests/data/empty.tiff" id_ = get_file_name_from_path(path) @@ -35,9 +35,8 @@ def test_imagery_stac_item(subtests: SubTests) -> None: asset = any_visual_asset() stac_processing = any_stac_processing() with patch.dict(environ, {"GIT_HASH": git_hash, "GIT_VERSION": git_version}): - item = ImageryItem(id_, asset, stac_processing) + item = ImageryItem(id_, asset, stac_processing, start_datetime, end_datetime) item.update_spatial(geometry, bbox) - item.update_datetime(start_datetime, end_datetime) # checks with subtests.test(): @@ -132,15 +131,15 @@ def test_imagery_add_collection(fake_linz_slug: str, subtests: SubTests) -> None path = "./scripts/tests/data/empty.tiff" id_ = get_file_name_from_path(path) - item = ImageryItem(id_, any_visual_asset(), any_stac_processing()) + item = ImageryItem(id_, any_visual_asset(), any_stac_processing(), "2022-02-02T00:00:00Z", "2022-02-02T00:00:00Z") - item.set_collection(collection.stac["id"]) + item.add_collection(collection.stac["id"]) with subtests.test(): assert item.collection_id == ulid with subtests.test(): - assert {"rel": "collection", "href": "./collection.json", "type": "application/json"} in item.stac["links"] + assert {"rel": "collection", "href": "./collection.json", "type": "application/json"} in item.to_dict()["links"] with subtests.test(): - assert {"rel": "parent", "href": "./collection.json", "type": "application/json"} in item.stac["links"] + assert {"rel": "parent", "href": "./collection.json", "type": "application/json"} in item.to_dict()["links"] diff --git a/scripts/tests/collection_from_items_test.py b/scripts/tests/collection_from_items_test.py index 0aa4c18d0..f6c20c3f2 100644 --- a/scripts/tests/collection_from_items_test.py +++ b/scripts/tests/collection_from_items_test.py @@ -30,16 +30,14 @@ def setup() -> Iterator[ImageryItem]: # Create mocked STAC Item with patch.dict(environ, {"GIT_HASH": "any Git hash", "GIT_VERSION": "any Git version"}): - item = ImageryItem("123", any_visual_asset(), any_stac_processing()) + item = ImageryItem("123", any_visual_asset(), any_stac_processing(), "2021-01-27T00:00:00Z", "2021-01-27T00:00:00Z") geometry = { "type": "Polygon", "coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]], } - bbox = (1799667.5, 5815977.0, 1800422.5, 5814986.0) - start_datetime = "2021-01-27T00:00:00Z" - end_datetime = "2021-01-27T00:00:00Z" + bbox = [1799667.5, 5815977.0, 1800422.5, 5814986.0] + item.update_spatial(geometry, bbox) - item.update_datetime(start_datetime, end_datetime) yield item