Skip to content

Commit

Permalink
updates to wesm, overall organization
Browse files Browse the repository at this point in the history
  • Loading branch information
scottyhq committed Oct 2, 2024
1 parent a079eb8 commit 57dfd05
Show file tree
Hide file tree
Showing 10 changed files with 1,305 additions and 2,984 deletions.
52 changes: 26 additions & 26 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,41 +1,41 @@
{
"image": "ghcr.io/prefix-dev/pixi:0.30.0-jammy",
"postCreateCommand": {
// Run *after* source code with pyproject.toml is pulled to CWD
// Run *after* source code with pyproject.toml is pulled to CWD
"pixi-install": "pixi install"
},
// Slow...
// Git maybe added to base image https://github.com/prefix-dev/pixi-docker/issues/41
"features": {
"ghcr.io/devcontainers/features/git:1": {
"version": "latest",
"ppa": "false"
}
},
"customizations": {
"features": {
"ghcr.io/devcontainers/features/git:1": {
"version": "latest",
"ppa": "false"
}
},
"customizations": {
// for notebooks to render correctly see https://github.com/orgs/community/discussions/58399#discussioncomment-10504531
"codespaces": {
"openFiles": ["README.md"]
},
"vscode": {
"extensions": ["ms-toolsai.jupyter", "ms-python.python"]
}
},
"secrets": {
"MAXAR_API_KEY": {
"description": "Maxar platform api key",
"documentationUrl": "https://developers.maxar.com/docs/authentication/guides/api-key"
},
"PC_SDK_SUBSCRIPTION_KEY": {
"description": "Microsoft planetary computer subscription key",
"documentationUrl": "https://github.com/microsoft/planetary-computer-sdk-for-python"
},
"EARTHDATA_USERNAME": {
"description": "NASA Earthdata username",
"documentationUrl": "https://earthaccess.readthedocs.io/en/latest/howto/authenticate/#authenticate-with-earthdata-login"
},
"EARTHDATA_Password": {
"description": "NASA Earthdata password"
}
}
},
"secrets": {
"MAXAR_API_KEY": {
"description": "Maxar platform api key",
"documentationUrl": "https://developers.maxar.com/docs/authentication/guides/api-key"
},
"PC_SDK_SUBSCRIPTION_KEY": {
"description": "Microsoft planetary computer subscription key",
"documentationUrl": "https://github.com/microsoft/planetary-computer-sdk-for-python"
},
"EARTHDATA_USERNAME": {
"description": "NASA Earthdata username",
"documentationUrl": "https://earthaccess.readthedocs.io/en/latest/howto/authenticate/#authenticate-with-earthdata-login"
},
"EARTHDATA_PASSWORD": {
"description": "NASA Earthdata password"
}
}
}
3,950 changes: 1,037 additions & 2,913 deletions pixi.lock

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ dependencies = [
"maxar-platform==1.0.2a2",
"planetary-computer>=1.0.0,<2",
"pystac-client>=0.8.3,<0.9",
#"stac-asset>=0.4.3,<0.5",
]

[project.optional-dependencies]
Expand Down Expand Up @@ -137,6 +138,7 @@ ignore = [
"PLR09", # Too many <...>
"PLR2004", # Magic value used in comparison
"ISC001", # Conflicts with formatter
"PLC0414" # explicit re-export
]
isort.required-imports = ["from __future__ import annotations"]
# Uncomment if using a _compat.typing backport
Expand Down Expand Up @@ -174,6 +176,11 @@ docs = { features = ["docs"], solve-group = "default" }
geopandas = "*"
planetary-computer = "*"
pystac-client = "*"
fsspec = ">=2024.9.0,<2025"
requests = ">=2.32.3,<3"
aiohttp = ">=3.10.6,<4"
#rasterio = ">=1.4.0,<2"
#rioxarray = ">=0.17.0,<0.18"

[tool.pixi.feature.dev.dependencies]
pre-commit = "*"
Expand All @@ -185,6 +192,7 @@ uv = ">=0.4.14,<0.5"
ipykernel = ">=6.29.5,<7"
rich = ">=13.8.1,<14" # Optional. convenient for rich.print(dataset)
pyarrow = "17.*"
aiohttp = ">=3.10.6,<4"

[tool.pixi.feature.dev.pypi-dependencies]
coincident = { path = ".", editable = true }
Expand All @@ -196,5 +204,9 @@ pylint = "pipx run nox -s pylint"
test = "pytest -o markers=network -m 'not network' --cov --cov-report=xml --cov-report=term"
networktest = "pytest --cov --cov-report=xml --cov-report=term"


[tool.pixi.feature.docs.pypi-dependencies]
coincident = { path = ".", editable = true }

[tool.pixi.feature.docs.tasks]
docs = "python -m sphinx -T -b html -d docs/_build/doctrees -D language=en docs docs/_build/html"
5 changes: 3 additions & 2 deletions src/coincident/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
nasa.GEDI(),
planetary_computer.COP30(),
]
_aliases = [x.alias for x in _datasets]
_alias_to_Dataset = dict(zip(_aliases, _datasets, strict=False))

aliases = [x.alias for x in _datasets]
_alias_to_Dataset = dict(zip(aliases, _datasets, strict=False))

__all__ = ["Dataset", "usgs", "maxar", "nasa", "planetary_computer"]
2 changes: 1 addition & 1 deletion src/coincident/datasets/maxar.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class Collection(str, Enum):

@dataclass
class Stereo(Dataset):
"""Essential metadata for Maxar In Track Stere o"""
"""Essential metadata for Maxar In Track Stereo"""

alias: str = "maxar"
has_stac_api: bool = True
Expand Down
2 changes: 1 addition & 1 deletion src/coincident/datasets/usgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class ThreeDEP(Dataset):
search: str = (
"https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/metadata/WESM.gpkg"
)
start: str = "2007-01-01"
start: str = "2000-12-01"
end: str | None = None
type: str = "lidar"
alias: str = "3dep"
74 changes: 62 additions & 12 deletions src/coincident/overlaps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,19 @@

from __future__ import annotations

import geopandas as _gpd
import pandas as _pd # noqa: ICN001
from typing import Any

import geopandas as gpd
import numpy as np
import pandas as pd
import shapely.geometry
from shapely.geometry.polygon import orient as _orient


def subset_by_maximum_duration(
gf: _gpd.GeoDataFrame,
gf: gpd.GeoDataFrame,
max_duration: int = 60, # days
) -> _gpd.GeoDataFrame:
) -> gpd.GeoDataFrame:
"""
Subset a GeoDataFrame by a maximum duration.
Expand All @@ -27,17 +32,17 @@ def subset_by_maximum_duration(
_gpd.GeoDataFrame
A GeoDataFrame filtered to include only rows where the 'duration' is less than or equal to the specified maximum duration.
"""
max_duration = _pd.Timedelta(max_duration)
max_duration = pd.Timedelta(days=max_duration)
# NOTE: keep indexes?
return gf.loc[gf.duration <= max_duration, :].reset_index()


def subset_by_temporal_overlap(
gf: _gpd.GeoDataFrame,
start_datetime: _pd.Timestamp,
end_datetime: _pd.Timestamp,
gf: gpd.GeoDataFrame,
start_datetime: pd.Timestamp,
end_datetime: pd.Timestamp,
temporal_buffer: int = 14,
) -> _gpd.GeoDataFrame:
) -> gpd.GeoDataFrame:
"""
Subset a DataFrame based on temporal overlap.
Expand All @@ -50,15 +55,60 @@ def subset_by_temporal_overlap(
Returns:
DataFrame: A subset of the input DataFrame with rows that overlap temporally.
"""
temporal_buffer = _pd.Timedelta(temporal_buffer)
temporal_buffer = pd.Timedelta(days=temporal_buffer)
# Apply temporal buffer to collection start and end dates
buffered_start = gf.start_datetime - temporal_buffer
buffered_end = gf.end_datetime + temporal_buffer

results_intervals = _gpd.pd.IntervalIndex.from_arrays(
results_intervals = gpd.pd.IntervalIndex.from_arrays(
buffered_start, buffered_end, closed="both"
)
search_interval = _gpd.pd.Interval(start_datetime, end_datetime, closed="both")
search_interval = gpd.pd.Interval(start_datetime, end_datetime, closed="both")
keep = results_intervals.overlaps(search_interval)

return gf.loc[keep, :].reset_index()


def geographic_area(gf: gpd.GeoDataFrame) -> gpd.pd.Series:
"""
Calculate the geographic area of each polygon in a GeoDataFrame.
This function computes the area of polygons in a GeoDataFrame that has a geographic coordinate system.
It handles both single polygons and multipolygons by summing the areas of individual polygons.
Parameters
----------
gf : gpd.GeoDataFrame
A GeoDataFrame containing the geometries for which the area needs to be calculated. The GeoDataFrame
must have a geographic coordinate system (latitude and longitude).
Returns
-------
pd.Series
A Pandas Series containing the area of each polygon in the input GeoDataFrame.
Raises
------
TypeError
If the GeoDataFrame does not have a geographic coordinate system.
References
----------
- https://gis.stackexchange.com/questions/413349/calculating-area-of-lat-lon-polygons-without-transformation-using-geopandas
- https://pyproj4.github.io/pyproj/stable/api/geod.html
"""
if not gf.crs and gf.crs.is_geographic:
msg = "geodataframe should have geographic coordinate system"
raise TypeError(msg)

geod = gf.crs.get_geod()

# TODO: sort out numpy typig https://github.com/python/mypy/issues/5480
def area_calc(geom: shapely.geometry) -> Any:
if geom.geom_type not in ["MultiPolygon", "Polygon"]:
return np.nan

# For MultiPolygon do each separately
if geom.geom_type == "MultiPolygon":
return np.sum([area_calc(p) for p in geom.geoms])

# orient to ensure a counter-clockwise traversal.
# geometry_area_perimeter returns (area, perimeter)
return geod.geometry_area_perimeter(_orient(geom, 1))[0]

return gf.geometry.apply(area_calc)
57 changes: 39 additions & 18 deletions src/coincident/search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
Results returned as GeoDataFrame with consistent STAC column headings and best dtypes
"""

# from __future__ import annotations
from __future__ import annotations

from typing import Any
from typing import Any as _Any

import geopandas as _gpd

Expand All @@ -18,15 +19,20 @@
from coincident.datasets.general import Dataset as _Dataset
from coincident.search import _stac, _wesm

# NOTE: how to expose some functions in submodule of mostly private methods?
from coincident.search._wesm import get_swath_polygons as get_swath_polygons
from coincident.search._wesm import load_wesm_by_fid as load_wesm_by_fid

# Used to access formatters
_pystac_client = _ItemSearch("no_url")


def search(
dataset: str | _Dataset,
intersects: _gpd.GeoDataFrame | _gpd.GeoSeries,
intersects: _gpd.GeoDataFrame | _gpd.GeoSeries | None = None,
datetime: str | list[str] | None = None,
# NOTE: change to explicitly stac_kwargs? not sure of proper kwargs type (Optional[dict[str, Any]]?)
**kwargs: Any,
**kwargs: _Any,
) -> _gpd.GeoDataFrame:
"""
Perform a search for geospatial data using STAC or non-STAC APIs for a single dataset.
Expand All @@ -50,7 +56,7 @@ def search(
ValueError
If the provided dataset alias is not supported.
"""
# Validation of inputs
# Validate Dataset
if isinstance(dataset, str):
try:
dataset = _alias_to_Dataset[dataset]
Expand All @@ -60,17 +66,32 @@ def search(
)
raise ValueError(message) from e

# Validate Datetimes
_validate_temporal_bounds(dataset, datetime)
_validate_spatial_bounds(intersects)

# NOTE: not very robust, explode() demotes MultiPolygons to single Polygon (seems many GeoJSONs have this)
# ANd 'exterior' not available for Multipolygons, just
shapely_geometry = intersects.geometry.explode().iloc[0]
if (
not shapely_geometry.exterior.is_ccw
): # Apparently NASA CMR enforces polygon CCW order
shapely_geometry = shapely_geometry.reverse()
aoi = _pystac_client._format_intersects(shapely_geometry) # to JSON geometry
if datetime is not None:
start, end = _pystac_client._format_datetime(datetime).split("/")
search_start = (
_gpd.pd.to_datetime(start).tz_localize(None) if start != ".." else None
)
search_end = _gpd.pd.to_datetime(end).tz_localize(None) if end != ".." else None
else:
search_start = None # or _gpd.pd.Timestamp(dataset.start) ?
search_end = None # or _gpd.pd.Timestamp.today()?

# Validate Intersects
if intersects is not None:
_validate_spatial_bounds(intersects)

# NOTE: not very robust, explode() demotes MultiPolygons to single Polygon (seems many GeoJSONs have this)
# ANd 'exterior' not available for Multipolygons, just
shapely_geometry = intersects.geometry.explode().iloc[0]
if (
not shapely_geometry.exterior.is_ccw
): # Apparently NASA CMR enforces polygon CCW order
shapely_geometry = shapely_geometry.reverse()
aoi = _pystac_client._format_intersects(shapely_geometry) # to JSON geometry
else:
aoi = None

# STAC API Searches
if dataset.has_stac_api:
Expand Down Expand Up @@ -99,12 +120,12 @@ def search(

# Non-STAC Searches
elif dataset.alias == "3dep":
gf = _wesm.search_gpkg(
dataset.search, # type: ignore[arg-type]
mask=intersects,
gf = _wesm.search_convex_hulls(
intersects=intersects,
search_start=search_start,
search_end=search_end,
**kwargs,
)
# NOTE: Apply datetime interval filter after reading GPKG?

return gf

Expand Down
14 changes: 8 additions & 6 deletions src/coincident/search/_stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,18 @@
from __future__ import annotations

import contextlib
import warnings
from typing import Any

import geopandas as gpd
import pystac_client

# maxar_platform tries to authenticate upon import, change errors to warnings
# try:
# import maxar_platform.discovery
# except Exception as e:
# error_msg = str(e)
# warnings.warn(f'maxar_plaform authentication error: {error_msg}')
# maxar_platform tries to authenticate upon import, change error to warning
try:
import maxar_platform # noqa: F401
except Exception as e:
message = f"maxar_plaform authentication error: {e}, setting MAXAR_API_KEY environment variable is recommended."
warnings.warn(message, stacklevel=2)

# MSPC authenticated access has higher limits
# try:
Expand Down Expand Up @@ -72,6 +73,7 @@ def to_geopandas(
# gf['datetime'] = gpd.pd.to_datetime(gf.datetime)
# NOTE: assumption = all timestamps UTC, don't worry about timezone from here on out
gf["datetime"] = gpd.pd.to_datetime(gf.datetime).dt.tz_localize(None)
gf["dayofyear"] = gf["datetime"].dt.dayofyear
if "start_datetime" in gf.columns:
gf["start_datetime"] = gpd.pd.to_datetime(gf.start_datetime).dt.tz_localize(
None
Expand Down
Loading

0 comments on commit 57dfd05

Please sign in to comment.