Skip to content

Commit

Permalink
Save intermediate COG files to S3 (#563)
Browse files Browse the repository at this point in the history
  • Loading branch information
jterry64 authored Aug 1, 2024
1 parent fb6fad9 commit bfe8534
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 18 deletions.
3 changes: 3 additions & 0 deletions app/tasks/cog_assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from app.models.pydantic.change_log import ChangeLog
from app.models.pydantic.creation_options import COGCreationOptions
from app.models.pydantic.jobs import GDALCOGJob, Job
from app.settings.globals import DATA_LAKE_BUCKET
from app.tasks import callback_constructor
from app.tasks.batch import execute
from app.tasks.raster_tile_set_assets.utils import JOB_ENV
Expand Down Expand Up @@ -90,6 +91,8 @@ async def create_cogify_job(
dataset,
"-I",
creation_options.implementation,
"--prefix",
f"s3://{DATA_LAKE_BUCKET}/{dataset}/{version}/raster/{srid}/cog",
]

if creation_options.export_to_gee:
Expand Down
42 changes: 24 additions & 18 deletions batch/scripts/cogify.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,43 +3,49 @@
set -e

# requires arguments
# -s | --source
# -T | --target
# --block_size
# -r | --resample
# -G | --export_to_gee
# -d | --dataset
# -I | --implementation
# -t | --target
# --prefix

ME=$(basename "$0")
. get_arguments.sh "$@"

set -x
# download all GeoTiff files
aws s3 cp --recursive --exclude "*" --include "*.tif" "${SRC}" .

# create VRT of input files so we can use gdal_translate
if [ ! -f "merged.vrt" ]; then
gdalbuildvrt merged.vrt *.tif
fi
if [[ $(aws s3 ls "${PREFIX}/${IMPLEMENTATION}_merged.tif") ]]; then
aws s3 cp "${PREFIX}/${IMPLEMENTATION}_merged.tif" "${IMPLEMENTATION}_merged.tif"
else
aws s3 cp --recursive --exclude "*" --include "*.tif" "${SRC}" .

# create VRT of input files so we can use gdal_translate
gdalbuildvrt "${IMPLEMENTATION}_merged.vrt" *.tif

# merge all rasters into one huge raster using COG block size
if [ ! -f "merged.tif" ]; then
gdal_translate -of GTiff -co TILED=YES -co BLOCKXSIZE="${BLOCK_SIZE}" -co BLOCKYSIZE="${BLOCK_SIZE}" -co COMPRESS=DEFLATE -co BIGTIFF=IF_SAFER -co NUM_THREADS=ALL_CPUS --config GDAL_CACHEMAX 70% --config GDAL_NUM_THREADS ALL_CPUS merged.vrt merged.tif
# merge all rasters into one huge raster using COG block size
gdal_translate -of GTiff -co TILED=YES -co BLOCKXSIZE="${BLOCK_SIZE}" -co BLOCKYSIZE="${BLOCK_SIZE}" -co COMPRESS=DEFLATE -co BIGTIFF=IF_SAFER -co NUM_THREADS=ALL_CPUS --config GDAL_CACHEMAX 70% --config GDAL_NUM_THREADS ALL_CPUS "${IMPLEMENTATION}_merged.vrt" "${IMPLEMENTATION}_merged.tif"
aws s3 cp "${IMPLEMENTATION}_merged.tif" "${PREFIX}/${IMPLEMENTATION}_merged.tif"
fi

# create overviews in raster
if ! gdalinfo "merged.tif" | grep -q "Overviews"; then
gdaladdo merged.tif -r "${RESAMPLE}" --config GDAL_NUM_THREADS ALL_CPUS --config GDAL_CACHEMAX 70%
if [[ $(aws s3 ls "${PREFIX}/${IMPLEMENTATION}_merged.tif.ovr") ]]; then
aws s3 cp "${PREFIX}/${IMPLEMENTATION}_merged.tif.ovr" "${IMPLEMENTATION}_merged.tif.ovr"
else
# generate overviews externally
gdaladdo "${IMPLEMENTATION}_merged.tif" -r "${RESAMPLE}" -ro --config GDAL_NUM_THREADS ALL_CPUS --config GDAL_CACHEMAX 70% --config COMPRESS_OVERVIEW DEFLATE
aws s3 cp "${IMPLEMENTATION}_merged.tif.ovr" "${PREFIX}/${IMPLEMENTATION}_merged.tif.ovr"
fi

# convert to COG using existing overviews, this adds some additional layout optimizations
if [ ! -f "cog.tif" ]; then
gdal_translate merged.tif cog.tif -of COG -co COMPRESS=DEFLATE -co BLOCKSIZE="${BLOCK_SIZE}" -co BIGTIFF=IF_SAFER -co NUM_THREADS=ALL_CPUS --config GDAL_CACHEMAX 70% --config GDAL_NUM_THREADS ALL_CPUS
fi
gdal_translate "${IMPLEMENTATION}_merged.tif" "${IMPLEMENTATION}.tif" -of COG -co COMPRESS=DEFLATE -co BLOCKSIZE="${BLOCK_SIZE}" -co BIGTIFF=IF_SAFER -co NUM_THREADS=ALL_CPUS -co OVERVIEWS=FORCE_USE_EXISTING --config GDAL_CACHEMAX 70% --config GDAL_NUM_THREADS ALL_CPUS

# upload to data lake
aws s3 cp cog.tif "${TARGET}"
aws s3 cp "${IMPLEMENTATION}.tif" "${TARGET}"

# delete intermediate file
aws s3 rm "${PREFIX}/${IMPLEMENTATION}_merged.tif"
aws s3 rm "${PREFIX}/${IMPLEMENTATION}_merged.tif.ovr"

if [ -n "$EXPORT_TO_GEE" ]; then
export_to_gee.py --dataset "${DATASET}" --implementation "${IMPLEMENTATION}"
Expand Down

0 comments on commit bfe8534

Please sign in to comment.