diff --git a/app/tasks/cog_assets.py b/app/tasks/cog_assets.py index 61a2e742..6b8e6f69 100644 --- a/app/tasks/cog_assets.py +++ b/app/tasks/cog_assets.py @@ -9,6 +9,7 @@ from app.models.pydantic.change_log import ChangeLog from app.models.pydantic.creation_options import COGCreationOptions from app.models.pydantic.jobs import GDALCOGJob, Job +from app.settings.globals import DATA_LAKE_BUCKET from app.tasks import callback_constructor from app.tasks.batch import execute from app.tasks.raster_tile_set_assets.utils import JOB_ENV @@ -90,6 +91,8 @@ async def create_cogify_job( dataset, "-I", creation_options.implementation, + "--prefix", + f"s3://{DATA_LAKE_BUCKET}/{dataset}/{version}/raster/{srid}/cog", ] if creation_options.export_to_gee: diff --git a/batch/scripts/cogify.sh b/batch/scripts/cogify.sh index d85344e6..3fb2d40b 100755 --- a/batch/scripts/cogify.sh +++ b/batch/scripts/cogify.sh @@ -3,43 +3,49 @@ set -e # requires arguments -# -s | --source -# -T | --target # --block_size # -r | --resample # -G | --export_to_gee -# -d | --dataset # -I | --implementation +# -t | --target +# --prefix ME=$(basename "$0") . get_arguments.sh "$@" set -x # download all GeoTiff files -aws s3 cp --recursive --exclude "*" --include "*.tif" "${SRC}" . -# create VRT of input files so we can use gdal_translate -if [ ! -f "merged.vrt" ]; then - gdalbuildvrt merged.vrt *.tif -fi +if [[ $(aws s3 ls "${PREFIX}/${IMPLEMENTATION}_merged.tif") ]]; then + aws s3 cp "${PREFIX}/${IMPLEMENTATION}_merged.tif" "${IMPLEMENTATION}_merged.tif" +else + aws s3 cp --recursive --exclude "*" --include "*.tif" "${SRC}" . + + # create VRT of input files so we can use gdal_translate + gdalbuildvrt "${IMPLEMENTATION}_merged.vrt" *.tif -# merge all rasters into one huge raster using COG block size -if [ ! -f "merged.tif" ]; then - gdal_translate -of GTiff -co TILED=YES -co BLOCKXSIZE="${BLOCK_SIZE}" -co BLOCKYSIZE="${BLOCK_SIZE}" -co COMPRESS=DEFLATE -co BIGTIFF=IF_SAFER -co NUM_THREADS=ALL_CPUS --config GDAL_CACHEMAX 70% --config GDAL_NUM_THREADS ALL_CPUS merged.vrt merged.tif + # merge all rasters into one huge raster using COG block size + gdal_translate -of GTiff -co TILED=YES -co BLOCKXSIZE="${BLOCK_SIZE}" -co BLOCKYSIZE="${BLOCK_SIZE}" -co COMPRESS=DEFLATE -co BIGTIFF=IF_SAFER -co NUM_THREADS=ALL_CPUS --config GDAL_CACHEMAX 70% --config GDAL_NUM_THREADS ALL_CPUS "${IMPLEMENTATION}_merged.vrt" "${IMPLEMENTATION}_merged.tif" + aws s3 cp "${IMPLEMENTATION}_merged.tif" "${PREFIX}/${IMPLEMENTATION}_merged.tif" fi -# create overviews in raster -if ! gdalinfo "merged.tif" | grep -q "Overviews"; then - gdaladdo merged.tif -r "${RESAMPLE}" --config GDAL_NUM_THREADS ALL_CPUS --config GDAL_CACHEMAX 70% +if [[ $(aws s3 ls "${PREFIX}/${IMPLEMENTATION}_merged.tif.ovr") ]]; then + aws s3 cp "${PREFIX}/${IMPLEMENTATION}_merged.tif.ovr" "${IMPLEMENTATION}_merged.tif.ovr" +else + # generate overviews externally + gdaladdo "${IMPLEMENTATION}_merged.tif" -r "${RESAMPLE}" -ro --config GDAL_NUM_THREADS ALL_CPUS --config GDAL_CACHEMAX 70% --config COMPRESS_OVERVIEW DEFLATE + aws s3 cp "${IMPLEMENTATION}_merged.tif.ovr" "${PREFIX}/${IMPLEMENTATION}_merged.tif.ovr" fi # convert to COG using existing overviews, this adds some additional layout optimizations -if [ ! -f "cog.tif" ]; then - gdal_translate merged.tif cog.tif -of COG -co COMPRESS=DEFLATE -co BLOCKSIZE="${BLOCK_SIZE}" -co BIGTIFF=IF_SAFER -co NUM_THREADS=ALL_CPUS --config GDAL_CACHEMAX 70% --config GDAL_NUM_THREADS ALL_CPUS -fi +gdal_translate "${IMPLEMENTATION}_merged.tif" "${IMPLEMENTATION}.tif" -of COG -co COMPRESS=DEFLATE -co BLOCKSIZE="${BLOCK_SIZE}" -co BIGTIFF=IF_SAFER -co NUM_THREADS=ALL_CPUS -co OVERVIEWS=FORCE_USE_EXISTING --config GDAL_CACHEMAX 70% --config GDAL_NUM_THREADS ALL_CPUS # upload to data lake -aws s3 cp cog.tif "${TARGET}" +aws s3 cp "${IMPLEMENTATION}.tif" "${TARGET}" + +# delete intermediate file +aws s3 rm "${PREFIX}/${IMPLEMENTATION}_merged.tif" +aws s3 rm "${PREFIX}/${IMPLEMENTATION}_merged.tif.ovr" if [ -n "$EXPORT_TO_GEE" ]; then export_to_gee.py --dataset "${DATASET}" --implementation "${IMPLEMENTATION}"