diff --git a/app/models/pydantic/creation_options.py b/app/models/pydantic/creation_options.py index 5bf9422a7..2e9ebc9d0 100644 --- a/app/models/pydantic/creation_options.py +++ b/app/models/pydantic/creation_options.py @@ -357,8 +357,8 @@ class COGCreationOptions(StrictBaseModel): ResamplingMethod.average, description="Resampling method used to downsample overviews", ) - block_size: Optional[TileBlockSize] = TileBlockSize.five_twelve - srid: Optional[Srid] = Srid.wgs84 + block_size: Optional[TileBlockSize] = 512 + srid: Optional[Srid] = "epsg:4326" compute_stats: bool = False diff --git a/app/models/pydantic/jobs.py b/app/models/pydantic/jobs.py index d3f3ab0ab..56c5d5742 100644 --- a/app/models/pydantic/jobs.py +++ b/app/models/pydantic/jobs.py @@ -88,7 +88,10 @@ class PostgresqlClientJob(Job): class GdalPythonImportJob(Job): """Use for write operations to PostgreSQL which require GDAL/Ogr2Ogr - drivers. NOTE: JOB MUST BE SAFE TO RETRY!""" + drivers. + + NOTE: JOB MUST BE SAFE TO RETRY! + """ job_queue = AURORA_JOB_QUEUE job_definition = GDAL_PYTHON_JOB_DEFINITION @@ -134,6 +137,19 @@ class PixETLJob(Job): attempt_duration_seconds = int(DEFAULT_JOB_DURATION * 1.5) +class GDALPythonLargeJob(Job): + """Use for raster transformations using GDAL Python docker in PixETL + queue.""" + + job_queue = PIXETL_JOB_QUEUE + job_definition = GDAL_PYTHON_JOB_DEFINITION + vcpus = MAX_CORES + memory = MAX_MEM + num_processes = max(int(MAX_CORES * 2 / 3), 1) + attempts = 10 + attempt_duration_seconds = int(DEFAULT_JOB_DURATION * 1.5) + + class GDALDEMJob(Job): """Use for applying color maps to raster tiles with gdaldem.""" diff --git a/app/tasks/cog_assets.py b/app/tasks/cog_assets.py index a75c1b06b..e5f855c0a 100644 --- a/app/tasks/cog_assets.py +++ b/app/tasks/cog_assets.py @@ -9,7 +9,8 @@ from app.models.orm.tasks import Task from app.models.pydantic.change_log import ChangeLog from app.models.pydantic.creation_options import COGCreationOptions -from app.models.pydantic.jobs import Job, PixETLJob +from app.models.pydantic.jobs import GDALPythonLargeJob, Job +from app.settings.globals import DATA_LAKE_BUCKET from app.tasks import callback_constructor from app.tasks.batch import execute from app.tasks.raster_tile_set_assets.utils import JOB_ENV @@ -78,7 +79,7 @@ async def create_cogify_job( resample_method = ( "near" if creation_options.resampling == ResamplingMethod.nearest - else creation_options.resampling + else creation_options.resampling.value ) command = [ @@ -89,12 +90,14 @@ async def create_cogify_job( version, "-s", source_uri, + "--target_bucket", + DATA_LAKE_BUCKET, "-r", - f"{resample_method}", + resample_method, "--block_size", - creation_options.block_size, + creation_options.block_size.value, "--srid", - creation_options.srid, + creation_options.srid.value, "-T", target_prefix, ] @@ -105,7 +108,7 @@ async def create_cogify_job( kwargs = dict() - return PixETLJob( + return GDALPythonLargeJob( dataset=dataset, job_name=job_name, command=command, diff --git a/batch/scripts/cogify.sh b/batch/scripts/cogify.sh index cc6d67669..44cc48a16 100755 --- a/batch/scripts/cogify.sh +++ b/batch/scripts/cogify.sh @@ -7,22 +7,19 @@ set -e # -v | --version # -s | --source # --target_bucket +# --target_prefix ME=$(basename "$0") . get_arguments.sh "$@" +set -x # download all GeoTiff files aws s3 cp --recursive --exclude "*" --include "*.tif" "${SRC}" . -# get list of file names -sources=() -for source in *.tif; do - sources+=("$source") -done - # combine to one big COG -gdalwarp "${sources[@]}" "${DATASET}_${VERSION}.tif" -r "${RESAMPLE}" -t_srs "${EPSG}" -of COG -co BLOCKSIZE="${BLOCKSIZE}" -co NUM_THREADS=ALL_CPUS +gdalwarp *.tif "${DATASET}_${VERSION}.tif" -r "${RESAMPLE}" -t_srs "${SRID}" -of COG -co BLOCKSIZE="${BLOCKSIZE}" -co NUM_THREADS=ALL_CPUS # upload to data lake -aws s3 cp "${DATASET}_${VERSION}.tif" "s3://${TARGET_BUCKET}/${DATASET}/${VERSION}/epsg-${EPSG}/cog/${DATASET}_${VERSION}.tif" +aws s3 cp "${DATASET}_${VERSION}.tif" "s3://${TARGET_BUCKET}/${TARGET_PREFIX}" +set +x diff --git a/tests/routes/datasets/test_assets.py b/tests/routes/datasets/test_assets.py index 9fa915e6c..4248617ca 100644 --- a/tests/routes/datasets/test_assets.py +++ b/tests/routes/datasets/test_assets.py @@ -284,14 +284,14 @@ async def test_cog_asset(async_client, httpd, logs): # Add a dataset, version, and default asset dataset = "test_cog_raster_asset" version = "v1.8" - primary_grid = "10/40000" + primary_grid = "90/27008" default_asset_id = await create_test_default_asset( dataset, version, primary_grid, "percent", - {"subset": "90N_000E"}, + {}, async_client, logs, httpd,