Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Respect specified order of fields, add option for tile_id for 1x1 vector export #459

Merged
merged 5 commits into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion app/models/pydantic/creation_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,13 @@ class StaticVectorFileCreationOptions(StrictBaseModel):
)


class StaticVector1x1CreationOptions(StaticVectorFileCreationOptions):
include_tile_id: Optional[bool] = Field(
False,
description="Whether or not to include the tile_id of each feature"
)


SourceCreationOptions = Union[
TableSourceCreationOptions,
RasterTileSetSourceCreationOptions,
Expand All @@ -391,6 +398,7 @@ class StaticVectorFileCreationOptions(StrictBaseModel):
RasterTileCacheCreationOptions,
StaticVectorTileCacheCreationOptions,
StaticVectorFileCreationOptions,
StaticVector1x1CreationOptions,
DynamicVectorTileCacheCreationOptions,
RasterTileSetAssetCreationOptions,
]
Expand All @@ -412,7 +420,7 @@ class CreationOptionsResponse(Response):
AssetType.dynamic_vector_tile_cache: DynamicVectorTileCacheCreationOptions,
AssetType.static_vector_tile_cache: StaticVectorTileCacheCreationOptions,
AssetType.ndjson: StaticVectorFileCreationOptions,
AssetType.grid_1x1: StaticVectorFileCreationOptions,
AssetType.grid_1x1: StaticVector1x1CreationOptions,
AssetType.shapefile: StaticVectorFileCreationOptions,
AssetType.geopackage: StaticVectorFileCreationOptions,
AssetType.raster_tile_set: RasterTileSetAssetCreationOptions,
Expand Down
5 changes: 4 additions & 1 deletion app/tasks/static_vector_1x1_assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ async def static_vector_1x1_asset(
asset_id: UUID,
input_data: Dict[str, Any],
) -> ChangeLog:
"""Create Vector tile cache and NDJSON file as intermediate data."""
"""Export a TSV to S3 with features in a 1x1 grid of tiles."""

#######################
# Update asset metadata
Expand Down Expand Up @@ -57,6 +57,9 @@ async def static_vector_1x1_asset(
grid_1x1_uri,
]

if creation_options.include_tile_id:
command.append("--include_tile_id")

export_1x1_grid = PostgresqlClientJob(
dataset=dataset,
job_name="export_1x1_grid",
Expand Down
35 changes: 19 additions & 16 deletions app/utils/fields.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,38 @@
from typing import Any, Dict, List

from ..crud import assets, metadata as metadata_crud
from ..crud.assets import get_default_asset
from ..crud.metadata import get_asset_fields_dicts
from ..models.orm.assets import Asset as ORMAsset
from ..models.pydantic.creation_options import CreationOptions


async def get_field_attributes(
dataset: str, version: str, creation_options: CreationOptions
) -> List[Dict[str, Any]]:
"""Get field attribute list from creation options.

If no attribute list provided, use all fields from DB table, marked
as `is_feature_info`. Otherwise compare to provide list with
available fields and use intersection.
"""Get list of field attributes on the asset which are marked as `is_feature_info`
If a field list is provided in creation options, limit the list to those provided,
in the order provided. Invalid provided fields are silently ignored.
"""

default_asset: ORMAsset = await assets.get_default_asset(dataset, version)
fields = await metadata_crud.get_asset_fields_dicts(default_asset)
default_asset: ORMAsset = await get_default_asset(dataset, version)
asset_fields = await get_asset_fields_dicts(default_asset)

field_attributes: List[Dict[str, Any]] = [
field for field in fields if field["is_feature_info"]
]
name_to_feature_fields: Dict[str, Dict] = {
field["name"]: field
for field in asset_fields
if field["is_feature_info"]
}

if (
"field_attributes" in creation_options.__fields__
and creation_options.field_attributes
):
field_attributes = [
field
for field in field_attributes
if field["name"] in creation_options.field_attributes
asset_field_attributes = [
name_to_feature_fields[field_name]
for field_name in creation_options.field_attributes
if field_name in name_to_feature_fields
]
else:
asset_field_attributes = list(name_to_feature_fields.values())

return field_attributes
return asset_field_attributes
24 changes: 20 additions & 4 deletions batch/python/export_1x1_grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ def src_table(dataset: str, version: str) -> Table:


def get_sql(
dataset: str, version: str, fields: List[str], grid_id: str, tcl: bool, glad: bool
dataset: str, version: str, fields: List[str], include_tile_id: bool, grid_id: str, tcl: bool, glad: bool
) -> Select:
"""Generate SQL statement."""

Expand All @@ -366,6 +366,9 @@ def get_sql(
nested_columns = [field.split(",") for field in fields]
columns = [column(c) for columns in nested_columns for c in columns]

if include_tile_id:
columns.append(literal_column(f"'{grid_id}'").label("tile_id"))

sql: Select = (
select(columns + [tcl_column, glad_column, geom_column])
.select_from(src_table(dataset, version).alias("t"))
Expand All @@ -379,7 +382,7 @@ def get_sql(


async def run(
loop: AbstractEventLoop, dataset: str, version: str, fields: List[str]
loop: AbstractEventLoop, dataset: str, version: str, fields: List[str], include_tile_id: bool
) -> None:
async def copy_tiles(i: int, tile: Tuple[str, bool, bool]) -> None:
if i == 0:
Expand All @@ -406,7 +409,17 @@ async def copy_tiles(i: int, tile: Tuple[str, bool, bool]) -> None:
password=PGPASSWORD,
)
result = await con.copy_from_query(
str(get_sql(dataset, version, fields, grid_id, tcl, glad)),
str(
get_sql(
dataset,
version,
fields,
include_tile_id,
grid_id,
tcl,
glad
)
),
output=output,
format="csv",
delimiter="\t",
Expand Down Expand Up @@ -445,6 +458,9 @@ async def copy_tiles(i: int, tile: Tuple[str, bool, bool]) -> None:
parser.add_argument(
"--column_names", "-C", type=str, nargs="+", help="Column names to include"
)
parser.add_argument(
"--include_tile_id", action='store_true', help="Include tile_id in the output"
)
args = parser.parse_args()
loop: AbstractEventLoop = asyncio.get_event_loop()
loop.run_until_complete(run(loop, args.dataset, args.version, args.column_names))
loop.run_until_complete(run(loop, args.dataset, args.version, args.column_names, args.include_tile_id))
14 changes: 12 additions & 2 deletions batch/scripts/export_1x1_grid.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,27 @@

set -e

# requires arguments
# required arguments
# -d | --dataset
# -v | --version
# -C | --column_names
# -T | --target
#
# optional arguments
# --include_tile_id

ME=$(basename "$0")
. get_arguments.sh "$@"

echo "PYTHON: Create 1x1 grid files"
export_1x1_grid.py -d "$DATASET" -v "$VERSION" -C "$COLUMN_NAMES"
ARG_ARRAY=("--dataset" "${DATASET}"
"--version" "${VERSION}"
"-C" "${COLUMN_NAMES}")

if [ -n "${INCLUDE_TILE_ID}" ]; then
ARG_ARRAY+=("--include_tile_id")
fi
export_1x1_grid.py "${ARG_ARRAY[@]}"

echo "Combine output files"
echo ./*.tmp | xargs cat >> "${DATASET}_${VERSION}_1x1.tsv"
Expand Down
4 changes: 4 additions & 0 deletions batch/scripts/get_arguments.sh
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ do
shift # past argument
shift # past value
;;
--include_tile_id)
INCLUDE_TILE_ID="TRUE"
shift # past argument
;;
-j|--json)
JSON="$2"
shift # past argument
Expand Down
Loading
Loading