Skip to content

Commit

Permalink
wip: broken state
Browse files Browse the repository at this point in the history
  • Loading branch information
paulfouquet committed Jan 8, 2025
1 parent 645e525 commit 8038ae5
Show file tree
Hide file tree
Showing 9 changed files with 108 additions and 49 deletions.
22 changes: 21 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,4 @@ pytest-dependency = "*"
pytest-mock = "*"
pytest-subtests = "*"
shellcheck-py = "*"
pystac = "^1.11.0"
9 changes: 6 additions & 3 deletions scripts/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def format_rfc_3339_datetime_string(datetime_object: datetime) -> str:
return datetime_object.astimezone(timezone.utc).strftime(RFC_3339_DATETIME_FORMAT)


def format_rfc_3339_nz_midnight_datetime_string(datetime_object: datetime) -> str:
def format_rfc_3339_nz_midnight_datetime(datetime_object: datetime) -> datetime:
"""Convert datetime to New Zealand midnight and format it to UTC"""
naive_midnight_datetime_string = f"{datetime_object.strftime(RFC_3339_DATE_FORMAT)}T00:00:00.000"

Expand All @@ -34,9 +34,12 @@ def format_rfc_3339_nz_midnight_datetime_string(datetime_object: datetime) -> st
raise Exception(f"Not a valid date: {err}") from err

utc_tz = tz.gettz("UTC")
datetime_utc = nz_datetime.astimezone(utc_tz)
return nz_datetime.astimezone(utc_tz)


return format_rfc_3339_datetime_string(datetime_utc)
def format_rfc_3339_nz_midnight_datetime_string(datetime_object: datetime) -> str:
"""Convert datetime to New Zealand midnight and format it to UTC as string"""
return format_rfc_3339_datetime_string(format_rfc_3339_nz_midnight_datetime(datetime_object))


class NaiveDatetimeError(Exception):
Expand Down
11 changes: 8 additions & 3 deletions scripts/stac/imagery/create_stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def create_item(
Returns:
a STAC Item wrapped in ImageryItem
"""
item = create_or_load_base_item(asset_path, gdal_version, current_datetime, odr_url)
item = create_or_load_base_item(asset_path, gdal_version, current_datetime, start_datetime, end_datetime, odr_url)
base_stac = item.clone()

if not gdalinfo_result:
Expand Down Expand Up @@ -153,7 +153,12 @@ def create_item(


def create_or_load_base_item(
asset_path: str, gdal_version: str, current_datetime: str, odr_url: str | None = None
asset_path: str,
gdal_version: str,
current_datetime: str,
start_datetime: str,
end_datetime: str,
odr_url: str | None = None,
) -> ImageryItem:
"""
Args:
Expand Down Expand Up @@ -197,7 +202,7 @@ def create_or_load_base_item(
checksum=file_content_checksum,
)

return ImageryItem(id_, asset, stac_processing)
return ImageryItem(id_, asset, stac_processing, start_datetime, end_datetime)


def get_published_file_contents(odr_url: str, filename: str) -> JSON_Dict:
Expand Down
45 changes: 43 additions & 2 deletions scripts/stac/imagery/item.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
import typing
from typing import Any, TypedDict

from pystac import Asset, Item, Link, MediaType, RelType
from pystac import Asset, Item, Link, MediaType, RelType, StacIO

from scripts.datetimes import parse_rfc_3339_datetime
from scripts.stac.link import create_link_with_checksum
from scripts.stac.util.stac_extensions import StacExtensions

Expand All @@ -25,7 +26,14 @@

class ImageryItem(Item):

def __init__(self, id_: str, asset: Asset, stac_processing: STACProcessing) -> None:
def __init__(
self,
id_: str,
asset: Asset,
stac_processing: STACProcessing,
start_datetime: str,
end_datetime: str,
) -> None:
stac_extensions = [StacExtensions.file.value, StacExtensions.processing.value]
assets = {"visual": asset}
properties = {"created": asset.extra_fields["created"], "updated": asset.extra_fields["updated"], **stac_processing}
Expand All @@ -37,8 +45,41 @@ def __init__(self, id_: str, asset: Asset, stac_processing: STACProcessing) -> N
stac_extensions=stac_extensions,
assets=assets,
properties=properties,
start_datetime=parse_rfc_3339_datetime(start_datetime),
end_datetime=parse_rfc_3339_datetime(end_datetime),
)

# Manually add the self link as pystac would force it to be absolute otherwise (requirement in STAC v1.1)
# pystac would have use the MediaType JSON for the self link, but we want to use the GeoJSON type
self.make_self_link_relative()

@classmethod
def from_file(cls, href: str, stac_io: StacIO | None = None) -> "ImageryItem":
# Use pystac.Item's from_file to parse the STAC file
item = Item.from_file(href, stac_io)
imagery_item = cls(
id_=item.id,
asset=item.assets["visual"],
stac_processing={
"processing:datetime": item.properties["processing:datetime"],
"processing:software": item.properties["processing:software"],
"processing:version": item.properties["processing:version"],
},
start_datetime=item.properties["start_datetime"],
end_datetime=item.properties["end_datetime"],
)
if item.collection_id:
imagery_item.add_collection(item.collection_id)
imagery_item.update_spatial(item.geometry, item.bbox)
imagery_item.make_self_link_relative()

return imagery_item

def make_self_link_relative(self) -> None:
"""Make the self link relative"""
self.links = [l for l in self.links if l.rel != RelType.SELF]
self.add_link(Link(target=f"./{self.id}.json", rel=RelType.SELF, media_type=MediaType.GEOJSON))

def update_checksum_related_metadata(self, file_content_checksum: str, stac_processing_data: STACProcessing) -> None:
"""Set the assets.visual.file:checksum attribute if it has changed.
If the checksum has changed, this also updates the following attributes:
Expand Down
15 changes: 5 additions & 10 deletions scripts/stac/imagery/tests/collection_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,26 +126,21 @@ def test_add_item(fake_collection_metadata: CollectionMetadata, fake_linz_slug:
"created": any_epoch_datetime_string(),
"updated": any_epoch_datetime_string(),
}
start_datetime = "2021-02-01T00:00:00Z"
end_datetime = "2021-02-20T00:00:00Z"
item = ImageryItem(
"BR34_5000_0304",
create_visual_asset(href="any href", file_checksum="any checksum", created=asset_datetimes["created"]),
# "href": "any href",
# "file:checksum": "any checksum",
# "created": asset_datetimes["created"],
# "updated": asset_datetimes["updated"],
# }
create_visual_asset(href="any href", checksum="any checksum", created=asset_datetimes["created"]),
any_stac_processing(),
start_datetime,
end_datetime,
)
geometry = {
"type": "Polygon",
"coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]],
}
bbox = (1799667.5, 5815977.0, 1800422.5, 5814986.0)
start_datetime = "2021-01-27T00:00:00Z"
end_datetime = "2021-01-27T00:00:00Z"
item.update_spatial(geometry, bbox)
item.update_datetime(start_datetime, end_datetime)

collection.add_item(item.to_dict())

links = collection.stac["links"].copy()
Expand Down
33 changes: 15 additions & 18 deletions scripts/stac/imagery/tests/create_stac_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from pathlib import Path
from typing import cast

from pystac import Link, MediaType, RelType
from pytest_subtests import SubTests

from scripts.datetimes import format_rfc_3339_datetime_string
Expand All @@ -22,8 +21,8 @@ def test_create_item(subtests: SubTests) -> None:
current_datetime = any_epoch_datetime_string()
item = create_item(
"./scripts/tests/data/empty.tiff",
"",
"",
"2024-09-02T12:00:00Z",
"2024-09-02T12:00:00Z",
"abc123",
"any GDAL version",
current_datetime,
Expand All @@ -37,10 +36,10 @@ def test_create_item(subtests: SubTests) -> None:
assert item.properties["updated"] == current_datetime

with subtests.test(msg="assets.visual.created"):
assert item.assets["visual"]["created"] == current_datetime
assert item.assets["visual"].extra_fields["created"] == current_datetime

with subtests.test(msg="assets.visual.updated"):
assert item.assets["visual"]["updated"] == current_datetime
assert item.assets["visual"].extra_fields["updated"] == current_datetime


def test_create_item_when_resupplying(subtests: SubTests, tmp_path: Path) -> None:
Expand Down Expand Up @@ -179,13 +178,12 @@ def test_create_item_with_derived_from(tmp_path: Path) -> None:
[derived_from_path.as_posix()],
)

expected_link = Link(
derived_from_path.as_posix(),
RelType.DERIVED_FROM,
MediaType.JSON,
extra_fields={"file:checksum": "12209c3d50f21fdd739de5c76b3c7ca60ee7f5cf69c2cf92b1d0136308cf63d9c5d5"},
)
assert expected_link in item.links
assert {
"href": derived_from_path.as_posix(),
"rel": "derived_from",
"type": "application/geo+json",
"file:checksum": "12209c3d50f21fdd739de5c76b3c7ca60ee7f5cf69c2cf92b1d0136308cf63d9c5d5",
} in item.to_dict()["links"]


def test_create_item_with_derived_from_datetimes(tmp_path: Path) -> None:
Expand Down Expand Up @@ -307,7 +305,7 @@ def test_create_item_with_odr_url(tmp_path: Path) -> None:
"this current datetime",
fake_gdal_info,
)
existing_item_file.write_text(json.dumps(item_from_scratch.stac))
existing_item_file.write_text(json.dumps(item_from_scratch.to_dict()))
item_from_odr_unchanged = create_item(
tiff_path,
"a start datetime",
Expand All @@ -318,7 +316,7 @@ def test_create_item_with_odr_url(tmp_path: Path) -> None:
fake_gdal_info,
odr_url=tmp_path.as_posix(),
)
assert item_from_odr_unchanged.stac == item_from_scratch.stac
assert item_from_odr_unchanged.to_dict() == item_from_scratch.to_dict()

item_from_odr_changed = create_item(
tiff_path,
Expand All @@ -334,7 +332,7 @@ def test_create_item_with_odr_url(tmp_path: Path) -> None:
del item_from_odr_changed.properties["end_datetime"]
del item_from_scratch.properties["start_datetime"]
del item_from_scratch.properties["end_datetime"]
assert item_from_odr_changed.stac == item_from_scratch.stac
assert item_from_odr_changed.to_dict() == item_from_scratch.to_dict()


def test_create_item_when_resupplying_with_new_file(subtests: SubTests, tmp_path: Path) -> None:
Expand Down Expand Up @@ -397,8 +395,7 @@ def test_create_item_when_resupplying_with_changed_asset_file(subtests: SubTests
)

with subtests.test(msg="assets.visual.created"):
assert item.assets["visual"]["created"] == created_datetime
assert item.assets["visual"].extra_fields["created"] == created_datetime

with subtests.test(msg="assets.visual.updated"):
assert item.assets["visual"]["updated"] == current_datetime
assert item.assets["visual"]["updated"] == current_datetime
assert item.assets["visual"].extra_fields["updated"] == current_datetime
13 changes: 6 additions & 7 deletions scripts/stac/imagery/tests/item_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def test_imagery_stac_item(subtests: SubTests) -> None:
"type": "Polygon",
"coordinates": [[[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]]],
}
bbox = (1799667.5, 5815977.0, 1800422.5, 5814986.0)
bbox = [1799667.5, 5815977.0, 1800422.5, 5814986.0]

path = "./scripts/tests/data/empty.tiff"
id_ = get_file_name_from_path(path)
Expand All @@ -35,9 +35,8 @@ def test_imagery_stac_item(subtests: SubTests) -> None:
asset = any_visual_asset()
stac_processing = any_stac_processing()
with patch.dict(environ, {"GIT_HASH": git_hash, "GIT_VERSION": git_version}):
item = ImageryItem(id_, asset, stac_processing)
item = ImageryItem(id_, asset, stac_processing, start_datetime, end_datetime)
item.update_spatial(geometry, bbox)
item.update_datetime(start_datetime, end_datetime)

# checks
with subtests.test():
Expand Down Expand Up @@ -132,15 +131,15 @@ def test_imagery_add_collection(fake_linz_slug: str, subtests: SubTests) -> None

path = "./scripts/tests/data/empty.tiff"
id_ = get_file_name_from_path(path)
item = ImageryItem(id_, any_visual_asset(), any_stac_processing())
item = ImageryItem(id_, any_visual_asset(), any_stac_processing(), "2022-02-02T00:00:00Z", "2022-02-02T00:00:00Z")

item.set_collection(collection.stac["id"])
item.add_collection(collection.stac["id"])

with subtests.test():
assert item.collection_id == ulid

with subtests.test():
assert {"rel": "collection", "href": "./collection.json", "type": "application/json"} in item.stac["links"]
assert {"rel": "collection", "href": "./collection.json", "type": "application/json"} in item.to_dict()["links"]

with subtests.test():
assert {"rel": "parent", "href": "./collection.json", "type": "application/json"} in item.stac["links"]
assert {"rel": "parent", "href": "./collection.json", "type": "application/json"} in item.to_dict()["links"]
8 changes: 3 additions & 5 deletions scripts/tests/collection_from_items_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,14 @@
def setup() -> Iterator[ImageryItem]:
# Create mocked STAC Item
with patch.dict(environ, {"GIT_HASH": "any Git hash", "GIT_VERSION": "any Git version"}):
item = ImageryItem("123", any_visual_asset(), any_stac_processing())
item = ImageryItem("123", any_visual_asset(), any_stac_processing(), "2021-01-27T00:00:00Z", "2021-01-27T00:00:00Z")
geometry = {
"type": "Polygon",
"coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]],
}
bbox = (1799667.5, 5815977.0, 1800422.5, 5814986.0)
start_datetime = "2021-01-27T00:00:00Z"
end_datetime = "2021-01-27T00:00:00Z"
bbox = [1799667.5, 5815977.0, 1800422.5, 5814986.0]

item.update_spatial(geometry, bbox)
item.update_datetime(start_datetime, end_datetime)
yield item


Expand Down

0 comments on commit 8038ae5

Please sign in to comment.