From 85c438d9bed3820c401767b193313c5ecb3b08f7 Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Sun, 21 Jul 2024 23:43:56 -0400 Subject: [PATCH] Replace per-zoom where filters with tippecanoe mapbox filters --- app/models/pydantic/creation_options.py | 11 +-- .../raster_tile_cache_assets.py | 1 + app/tasks/static_vector_tile_cache_assets.py | 93 +++++++------------ batch/scripts/create_vector_tile_cache.sh | 47 ++++++---- batch/scripts/get_arguments.sh | 10 +- 5 files changed, 72 insertions(+), 90 deletions(-) diff --git a/app/models/pydantic/creation_options.py b/app/models/pydantic/creation_options.py index 86070c588..69c20e632 100644 --- a/app/models/pydantic/creation_options.py +++ b/app/models/pydantic/creation_options.py @@ -70,13 +70,6 @@ class Config: orm_mode = True -class ZoomRangeWhereFilter(StrictBaseModel): - min_zoom: int - max_zoom: int - field: str - values_in: List[StrictInt | str] - - class HashPartitionSchema(StrictBaseModel): partition_count: PositiveInt @@ -406,9 +399,9 @@ class StaticVectorTileCacheCreationOptions(TileCacheBaseModel): "for vector tile caches. `source` and `source-layer` attributes must use `dataset` name." "Styling rules will be used in autogenerated root.json and preview.", ) - where_filters: Optional[List[ZoomRangeWhereFilter]] = Field( + feature_filter: Optional[Dict[str, Any]] = Field( None, - description="Optional per-zoom-range filters to apply to features." + description="Optional filter to apply to features." ) diff --git a/app/tasks/raster_tile_cache_assets/raster_tile_cache_assets.py b/app/tasks/raster_tile_cache_assets/raster_tile_cache_assets.py index 93bf63e0e..2d4bd078c 100644 --- a/app/tasks/raster_tile_cache_assets/raster_tile_cache_assets.py +++ b/app/tasks/raster_tile_cache_assets/raster_tile_cache_assets.py @@ -32,6 +32,7 @@ from ...errors import RecordNotFoundError + async def raster_tile_cache_asset( dataset: str, version: str, diff --git a/app/tasks/static_vector_tile_cache_assets.py b/app/tasks/static_vector_tile_cache_assets.py index acffccd82..57c001b31 100644 --- a/app/tasks/static_vector_tile_cache_assets.py +++ b/app/tasks/static_vector_tile_cache_assets.py @@ -3,6 +3,8 @@ from typing import Any, Dict, List, Optional from uuid import UUID +from fastapi.encoders import jsonable_encoder + from ..crud import assets, metadata from ..errors import RecordNotFoundError from ..models.orm.assets import Asset as ORMAsset @@ -98,68 +100,39 @@ async def static_vector_tile_cache_asset( ) tile_cache_jobs: List[TileCacheJob] = [] - if creation_options.where_filters: - for where_filter in creation_options.where_filters: - command = [ - "create_vector_tile_cache.sh", - "-d", - dataset, - "-v", - version, - "-s", - ndjson_uri, - "-Z", - str(where_filter.min_zoom), - "-z", - str(where_filter.max_zoom), - "-t", - creation_options.tile_strategy, - "-I", - creation_options.implementation, - "--where_field", - where_filter.field, - "--where_values", - ",".join(str(value) for value in where_filter.values_in), - ] - tile_cache_jobs.append( - TileCacheJob( - dataset=dataset, - job_name=f"vector_tile_cache_zoom_{str(where_filter.min_zoom)}-{where_filter.max_zoom}", - command=command, - parents=[export_ndjson.job_name], - environment=report_vars, - callback=callback_constructor(asset_id), - ) - ) - else: - command = [ - "create_vector_tile_cache.sh", - "-d", - dataset, - "-v", - version, - "-s", - ndjson_uri, - "-Z", - str(creation_options.min_zoom), - "-z", - str(creation_options.max_zoom), - "-t", - creation_options.tile_strategy, - "-I", - creation_options.implementation, - ] + command = [ + "create_vector_tile_cache.sh", + "-d", + dataset, + "-v", + version, + "-s", + ndjson_uri, + "-Z", + str(creation_options.min_zoom), + "-z", + str(creation_options.max_zoom), + "-t", + creation_options.tile_strategy, + "-I", + creation_options.implementation, + ] + if creation_options.feature_filter: + command += ( + "--filter", + f"'{json.dumps(jsonable_encoder(creation_options.feature_filter))}'" + ) - tile_cache_jobs.append( - TileCacheJob( - dataset=dataset, - job_name="create_vector_tile_cache", - command=command, - parents=[export_ndjson.job_name], - environment=report_vars, - callback=callback_constructor(asset_id), - ) + tile_cache_jobs.append( + TileCacheJob( + dataset=dataset, + job_name="create_vector_tile_cache", + command=command, + parents=[export_ndjson.job_name], + environment=report_vars, + callback=callback_constructor(asset_id), ) + ) ####################### # execute jobs diff --git a/batch/scripts/create_vector_tile_cache.sh b/batch/scripts/create_vector_tile_cache.sh index a067414d9..cac9425b3 100755 --- a/batch/scripts/create_vector_tile_cache.sh +++ b/batch/scripts/create_vector_tile_cache.sh @@ -10,19 +10,39 @@ set -e # -z | --max_zoom # -t | --tile_strategy # -I | --implementation + +# optional arguments +# --filter + ME=$(basename "$0") . get_arguments.sh "$@" -# Set TILE_STRATEGY + +NDJSON_FILE="data.json" + +echo "Fetching NDJSON data from the Data Lake: ${SRC} -> ${NDJSON_FILE}..." +aws s3 cp "${SRC}" "${NDJSON_FILE}" --no-progress + + +# Build an array of arguments to pass to tippecanoe +TIPPE_ARG_ARRAY=( + "-e tilecache" + "-Z${MIN_ZOOM}" + "-z${MAX_ZOOM}" + "--preserve-input-order" + "-P" + "-n ${DATASET}" +) + case ${TILE_STRATEGY} in discontinuous) # Discontinuous polygon features - STRATEGY=("--drop-densest-as-needed" "--extend-zooms-if-still-dropping") + TIPPE_ARG_ARRAY+=("--drop-densest-as-needed" "--extend-zooms-if-still-dropping") ;; continuous) # Continuous polygon features - STRATEGY=("--coalesce-densest-as-needed" "--extend-zooms-if-still-dropping") + TIPPE_ARG_ARRAY+=("--coalesce-densest-as-needed" "--extend-zooms-if-still-dropping") ;; keep_all) # never drop or coalesce feature, ignore size and feature count - STRATEGY=("-r1") + TIPPE_ARG_ARRAY+=("-r1") ;; *) echo "Invalid Tile Cache option -${TILE_STRATEGY}" @@ -30,19 +50,14 @@ keep_all) # never drop or coalesce feature, ignore size and feature count ;; esac -echo "Fetch NDJSON data from Data Lake ${SRC} -> 'data.ndjson'" -aws s3 cp "${SRC}" 'data.ndjson' --no-progress - -FINAL_DATA='data.ndjson' - -if [ -n "$WHERE_FIELD" ]; then - FINAL_DATA='filtered_data.ndjson' - echo "Perform Filtering" - ogr2ogr -if GeoJSONSeq "${FINAL_DATA}" 'data.ndjson' -where "${WHERE_FIELD} IN (${WHERE_VALUES})" +if [ -n "FILTER" ]; then + TIPPE_ARG_ARRAY+=(" -j ${FILTER}") fi -echo "Build Tile Cache" -tippecanoe -Z"${MIN_ZOOM}" -z"${MAX_ZOOM}" -e tilecache "${STRATEGY[@]}" -P -n "${DATASET}" "${FINAL_DATA}" --preserve-input-order +TIPPE_ARG_ARRAY+=("${NDJSON_FILE}") + +echo "Building Tile Cache with Tippecanoe..." +tippecanoe "${TIPPE_ARG_ARRAY[@]}" -echo "Upload tiles to S3" +echo "Uploading tiles to S3 with TilePutty..." tileputty tilecache --bucket "${TILE_CACHE}" --dataset "${DATASET}" --version "${VERSION}" --implementation "${IMPLEMENTATION}" --cores "${NUM_PROCESSES}" \ No newline at end of file diff --git a/batch/scripts/get_arguments.sh b/batch/scripts/get_arguments.sh index b996a1d0a..44996a483 100755 --- a/batch/scripts/get_arguments.sh +++ b/batch/scripts/get_arguments.sh @@ -90,6 +90,11 @@ do shift # past argument shift # past value ;; + --filter) + filter="$2" + shift + shift + ;; -F|--format) FORMAT="$2" shift # past argument @@ -246,11 +251,6 @@ do shift shift ;; - --where_values) - WHERE_VALUES="$2" - shift - shift - ;; *) # unknown option POSITIONAL+=("$1") # save it in an array for later