Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create Python script to export to GEE #545

Merged
merged 4 commits into from
Jun 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .isort.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
line_length = 88
multi_line_output = 3
include_trailing_comma = True
known_third_party = _pytest,aenum,affine,aiohttp,alembic,async_lru,asyncpg,aws_utils,boto3,botocore,click,docker,errors,fastapi,fiona,gdal_utils,geoalchemy2,geojson,gfw_pixetl,gino,gino_starlette,google,httpx,httpx_auth,logger,logging_utils,moto,numpy,orjson,osgeo,pandas,pendulum,pglast,psutil,psycopg2,pydantic,pyproj,pytest,pytest_asyncio,rasterio,shapely,sqlalchemy,sqlalchemy_utils,starlette,tileputty,typer
known_third_party = _pytest,aenum,affine,aiohttp,alembic,async_lru,asyncpg,aws_utils,boto3,botocore,click,docker,ee,errors,fastapi,fiona,gdal_utils,geoalchemy2,geojson,gfw_pixetl,gino,gino_starlette,google,httpx,httpx_auth,logger,logging_utils,moto,numpy,orjson,osgeo,pandas,pendulum,pglast,psutil,psycopg2,pydantic,pyproj,pytest,pytest_asyncio,rasterio,shapely,sqlalchemy,sqlalchemy_utils,starlette,tileputty,typer
12 changes: 11 additions & 1 deletion app/models/pydantic/creation_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,8 +356,18 @@ class COGCreationOptions(StrictBaseModel):
ResamplingMethod.average,
description="Resampling method used to downsample overviews",
)
block_size: Optional[TileBlockSize] = 512
block_size: Optional[TileBlockSize] = Field(
512,
description="Block size to tile COG with.",
)
compute_stats: bool = False
export_to_gee: bool = Field(
False,
description="Option to export COG to a Google Cloud Storage and create"
" a COG-backed asset on Google Earth Engine (GEE). The asset will be created"
" under the project `forma-250` with the asset ID `{dataset}/{implementation}. "
"Versioning is currently not supported due to GEE storage constraints.",
)


class DynamicVectorTileCacheCreationOptions(TileCacheBaseModel):
Expand Down
7 changes: 7 additions & 0 deletions app/tasks/cog_assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,15 @@ async def create_cogify_job(
resample_method,
"--block_size",
creation_options.block_size.value,
"-d",
dataset,
"-I",
creation_options.implementation,
]

if creation_options.export_to_gee:
command += ["--export_to_gee"]

job_name: str = sanitize_batch_job_name(
f"COGify_{dataset}_{version}_{creation_options.implementation}"
)
Expand Down
2 changes: 2 additions & 0 deletions batch/gdal-python.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ RUN chmod +x -R /opt/python/
ENV PATH="/opt/scripts:${PATH}"
ENV PATH="/opt/python:${PATH}"

RUN pip install earthengine-api

ENV WORKDIR="/tmp"

ENTRYPOINT ["/opt/scripts/report_status.sh"]
87 changes: 87 additions & 0 deletions batch/python/export_to_gee.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import json
import os

import boto3
import ee
from google.auth.transport.requests import AuthorizedSession
from google.cloud import storage
from typer import Option, run

EE_PROJECT = "forma-250"
GCS_BUCKET = "data-api-gee-assets"
GCS_SECRET_KEY_ARN = os.environ["GCS_SECRET_KEY_ARN"]
GCS_CREDENTIALS_FILE = "gcs_credentials.json"


def set_google_application_credentials():
client = boto3.client("secretsmanager")
response = client.get_secret_value(SecretId=GCS_SECRET_KEY_ARN)

with open(GCS_CREDENTIALS_FILE, "w") as f:
f.write(response["SecretString"])

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = GCS_CREDENTIALS_FILE

return json.loads(response["SecretString"])["client_email"]


def upload_cog_to_gcs(dataset, implementation):
"""Uploads a file to the bucket."""

storage_client = storage.Client()
bucket = storage_client.bucket(GCS_BUCKET)
blob = bucket.blob(f"{dataset}/{implementation}.tif")

blob.upload_from_filename("cog.tif")

return f"gs://{GCS_BUCKET}/{dataset}/{implementation}.tif"


def create_cog_backed_asset(dataset, implementation, gcs_path, service_account):
credentials = ee.ServiceAccountCredentials(service_account, GCS_CREDENTIALS_FILE)
ee.Initialize(credentials)

# delete any existing asset with the same dataset/implementatio
try:
ee.data.deleteAsset(f"projects/{EE_PROJECT}/assets/{dataset}/{implementation}")
except ee.EEException:
# asset doesn't exist
pass

# create dataset folder if it doesn't exist
try:
ee.data.createAsset(
{"type": "Folder"}, f"projects/{EE_PROJECT}/assets/{dataset}"
)
except ee.EEException:
# folder already exists
pass

# link GCS COG to the GEE asset
session = AuthorizedSession(credentials.with_quota_project(EE_PROJECT))
request = {"type": "IMAGE", "gcs_location": {"uris": [gcs_path]}}

asset_id = f"{dataset}/{implementation}"
url = "https://earthengine.googleapis.com/v1alpha/projects/{}/assets?assetId={}"

response = session.post(
url=url.format(EE_PROJECT, asset_id), data=json.dumps(request)
)

if response.status_code != 200:
raise Exception(
f"GEE returned unexpected status code {response.status_code} with payload {response.content}"
)


def export_to_gee(
dataset: str = Option(..., help="Dataset name."),
implementation: str = Option(..., help="Implementation name."),
):
service_account = set_google_application_credentials()
gcs_path = upload_cog_to_gcs(dataset, implementation)
create_cog_backed_asset(dataset, implementation, gcs_path, service_account)


if __name__ == "__main__":
run(export_to_gee)
8 changes: 8 additions & 0 deletions batch/scripts/cogify.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ set -e
# -T | --target
# --block_size
# -r | --resample
# -G | --export_to_gee
# -d | --dataset
# -I | --implementation

ME=$(basename "$0")
. get_arguments.sh "$@"
Expand Down Expand Up @@ -37,5 +40,10 @@ fi

# upload to data lake
aws s3 cp cog.tif "${TARGET}"

if [ -n "$EXPORT_TO_GEE" ]; then
python export_to_gee.py --dataset "${DATASET}" --implementation "${IMPLEMENTATION}"
fi

set +x

4 changes: 4 additions & 0 deletions batch/scripts/get_arguments.sh
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ do
shift # past argument
shift # past value
;;
-G|--export_to_gee)
EXPORT_TO_GEE="TRUE"
shift # past argument
;;
jterry64 marked this conversation as resolved.
Show resolved Hide resolved
-i|--fid_name)
FID_NAME="$2"
shift # past argument
Expand Down
Loading