From db7e4d19febaa13aaf88dcd567009691b271a32f Mon Sep 17 00:00:00 2001 From: Justin Terry Date: Wed, 12 Jun 2024 15:09:03 -0700 Subject: [PATCH] Create Python script to export to GEE --- .isort.cfg | 2 +- app/models/pydantic/creation_options.py | 12 ++++- app/tasks/cog_assets.py | 7 +++ batch/gdal-python.dockerfile | 2 + batch/python/export_to_gee.py | 69 +++++++++++++++++++++++++ batch/scripts/cogify.sh | 8 +++ batch/scripts/get_arguments.sh | 5 ++ 7 files changed, 103 insertions(+), 2 deletions(-) create mode 100644 batch/python/export_to_gee.py diff --git a/.isort.cfg b/.isort.cfg index 9c838702b..ef73abf55 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -2,4 +2,4 @@ line_length = 88 multi_line_output = 3 include_trailing_comma = True -known_third_party = _pytest,aenum,affine,aiohttp,alembic,async_lru,asyncpg,aws_utils,boto3,botocore,click,docker,errors,fastapi,fiona,gdal_utils,geoalchemy2,geojson,gfw_pixetl,gino,gino_starlette,google,httpx,httpx_auth,logger,logging_utils,moto,numpy,orjson,osgeo,pandas,pendulum,pglast,psutil,psycopg2,pydantic,pyproj,pytest,pytest_asyncio,rasterio,shapely,sqlalchemy,sqlalchemy_utils,starlette,tileputty,typer +known_third_party = _pytest,aenum,affine,aiohttp,alembic,async_lru,asyncpg,aws_utils,boto3,botocore,click,docker,ee,errors,fastapi,fiona,gdal_utils,geoalchemy2,geojson,gfw_pixetl,gino,gino_starlette,google,httpx,httpx_auth,logger,logging_utils,moto,numpy,orjson,osgeo,pandas,pendulum,pglast,psutil,psycopg2,pydantic,pyproj,pytest,pytest_asyncio,rasterio,shapely,sqlalchemy,sqlalchemy_utils,starlette,tileputty,typer diff --git a/app/models/pydantic/creation_options.py b/app/models/pydantic/creation_options.py index 58c55964f..e334a7912 100644 --- a/app/models/pydantic/creation_options.py +++ b/app/models/pydantic/creation_options.py @@ -356,8 +356,18 @@ class COGCreationOptions(StrictBaseModel): ResamplingMethod.average, description="Resampling method used to downsample overviews", ) - block_size: Optional[TileBlockSize] = 512 + block_size: Optional[TileBlockSize] = Field( + 512, + description="Block size to tile COG with.", + ) compute_stats: bool = False + export_to_gee: bool = Field( + False, + description="Option to export COG to a Google Cloud Storage and create" + " a COG-backed asset on Google Earth Engine (GEE). The asset will be created" + " under the project `forma-250` with the asset ID `{dataset}/{implementation}. " + "Versioning is currently not supported due to GEE storage constraints.", + ) class DynamicVectorTileCacheCreationOptions(TileCacheBaseModel): diff --git a/app/tasks/cog_assets.py b/app/tasks/cog_assets.py index 345f56d04..61a2e742b 100644 --- a/app/tasks/cog_assets.py +++ b/app/tasks/cog_assets.py @@ -86,8 +86,15 @@ async def create_cogify_job( resample_method, "--block_size", creation_options.block_size.value, + "-d", + dataset, + "-I", + creation_options.implementation, ] + if creation_options.export_to_gee: + command += ["--export_to_gee"] + job_name: str = sanitize_batch_job_name( f"COGify_{dataset}_{version}_{creation_options.implementation}" ) diff --git a/batch/gdal-python.dockerfile b/batch/gdal-python.dockerfile index 8030a8c8c..ff2cf77c4 100644 --- a/batch/gdal-python.dockerfile +++ b/batch/gdal-python.dockerfile @@ -11,6 +11,8 @@ RUN chmod +x -R /opt/python/ ENV PATH="/opt/scripts:${PATH}" ENV PATH="/opt/python:${PATH}" +RUN pip install earthengine-api + ENV WORKDIR="/tmp" ENTRYPOINT ["/opt/scripts/report_status.sh"] \ No newline at end of file diff --git a/batch/python/export_to_gee.py b/batch/python/export_to_gee.py new file mode 100644 index 000000000..0b7e775c4 --- /dev/null +++ b/batch/python/export_to_gee.py @@ -0,0 +1,69 @@ +import json +import os + +import boto3 +import ee +from google.auth.transport.requests import AuthorizedSession +from google.cloud import storage +from typer import Option, run + +EE_PROJECT = "forma-250" +GCS_BUCKET = "data-api-gee-assets" +GCS_SECRET_KEY_ARN = os.environ["GCS_SECRET_KEY_ARN"] +GCS_CREDENTIALS_FILE = "gcs_credentials.json" + + +def set_google_application_credentials(): + client = boto3.client("secretsmanager") + response = client.get_secret_value(SecretId=GCS_SECRET_KEY_ARN) + + with open(GCS_CREDENTIALS_FILE, "w") as f: + f.write(response["SecretString"]) + + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = GCS_CREDENTIALS_FILE + + return json.loads(response["SecretString"])["client_email"] + + +def upload_cog_to_gcs(dataset, implementation): + """Uploads a file to the bucket.""" + + storage_client = storage.Client() + bucket = storage_client.bucket(GCS_BUCKET) + blob = bucket.blob(f"{dataset}/{implementation}.tif") + + blob.upload_from_filename("cog.tif") + + return f"gs://{GCS_BUCKET}/{dataset}/{implementation}.tif" + + +def create_cog_backed_asset(dataset, implementation, gcs_path, service_account): + credentials = ee.ServiceAccountCredentials(service_account, GCS_CREDENTIALS_FILE) + session = AuthorizedSession(credentials.with_quota_project(EE_PROJECT)) + + request = {"type": "IMAGE", "gcs_location": {"uris": [gcs_path]}} + + asset_id = f"{dataset}/{implementation}" + url = "https://earthengine.googleapis.com/v1alpha/projects/{}/assets?assetId={}" + + response = session.post( + url=url.format(EE_PROJECT, asset_id), data=json.dumps(request) + ) + + if response.status_code != 200: + raise Exception( + f"GEE returned unexpected status code {response.status_code} with payload {response.content}" + ) + + +def export_to_gee( + dataset: str = Option(..., help="Dataset name."), + implementation: str = Option(..., help="Implementation name."), +): + service_account = set_google_application_credentials() + gcs_path = upload_cog_to_gcs(dataset, implementation) + create_cog_backed_asset(gcs_path, dataset, implementation, service_account) + + +if __name__ == "__main__": + run(export_to_gee) diff --git a/batch/scripts/cogify.sh b/batch/scripts/cogify.sh index ec97edf52..40b851591 100755 --- a/batch/scripts/cogify.sh +++ b/batch/scripts/cogify.sh @@ -7,6 +7,9 @@ set -e # -T | --target # --block_size # -r | --resample +# -G | --export_to_gee +# -d | --dataset +# -I | --implementation ME=$(basename "$0") . get_arguments.sh "$@" @@ -37,5 +40,10 @@ fi # upload to data lake aws s3 cp cog.tif "${TARGET}" + +if [ -z "$EXPORT_TO_GEE" ]; then + python export_to_gee.py --dataset "${DATASET}" --implementation "${IMPLEMENTATION}" +fi + set +x diff --git a/batch/scripts/get_arguments.sh b/batch/scripts/get_arguments.sh index d4e057a4e..037d7f8f8 100755 --- a/batch/scripts/get_arguments.sh +++ b/batch/scripts/get_arguments.sh @@ -100,6 +100,11 @@ do shift # past argument shift # past value ;; + -G|--export_to_gee) + EXPORT_TO_GEE="$2" + shift # past argument + shift # past value + ;; -i|--fid_name) FID_NAME="$2" shift # past argument