diff --git a/batch/universal_batch.dockerfile b/batch/universal_batch.dockerfile new file mode 100644 index 000000000..fd174213b --- /dev/null +++ b/batch/universal_batch.dockerfile @@ -0,0 +1,63 @@ +FROM ghcr.io/osgeo/gdal:ubuntu-full-3.8.5 +LABEL desc="Docker image with ALL THE THINGS for use in Batch by the GFW data API" +LABEL version="v1.0" + +ENV TIPPECANOE_VERSION=2.56.0 + +ENV VENV_DIR="/.venv" + +RUN apt-get update -y \ + && apt-get install --no-install-recommends -y python3-dev python3-venv \ + postgresql-client jq curl libsqlite3-dev zlib1g-dev zip libpq-dev build-essential gcc g++ \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN ln -s /usr/include /usr/include/gdal + +# --system-site-packages is needed to copy the GDAL Python libs into the venv +RUN python -m venv ${VENV_DIR} --system-site-packages \ + && . ${VENV_DIR}/bin/activate \ + && python -m ensurepip --upgrade \ + && python -m pip install \ + agate~=1.11.0 \ + asyncpg~=0.29.0 \ + awscli~=1.33.13 \ + awscli-plugin-endpoint~=0.4 \ + boto3~=1.34.128 \ + click~=8.1.7 \ + csvkit~=2.0.0 \ + earthengine-api~=0.1.408 \ + fiona~=1.9.6 \ + numpy~=1.26.4 \ + pandas~=2.1.4 \ + psycopg2~=2.9.9 \ + rasterio~=1.3.10 \ + setuptools~=70.1 \ + shapely~=2.0.4 \ + SQLAlchemy~=1.3.24 \ + tileputty~=0.2.10 + +# Install TippeCanoe +RUN mkdir -p /opt/src +WORKDIR /opt/src +RUN curl https://codeload.github.com/felt/tippecanoe/tar.gz/${TIPPECANOE_VERSION} | tar -xz \ + && cd /opt/src/tippecanoe-${TIPPECANOE_VERSION} \ + && make \ + && make install \ + && rm -R /opt/src/tippecanoe-${TIPPECANOE_VERSION} + +# Copy scripts +COPY ./batch/scripts/ /opt/scripts/ +COPY ./batch/python/ /opt/python/ + +# Make sure scripts are executable +RUN chmod +x -R /opt/scripts/ +RUN chmod +x -R /opt/python/ + +ENV PATH="/opt/scripts:${PATH}" +ENV PATH="/opt/python:${PATH}" + +ENV WORKDIR="/tmp" +WORKDIR /tmp + +ENTRYPOINT ["/opt/scripts/report_status.sh"] \ No newline at end of file diff --git a/scripts/test b/scripts/test index 7fcc16587..c2230935f 100755 --- a/scripts/test +++ b/scripts/test @@ -60,9 +60,10 @@ if [ $# -eq 0 ]; then fi if [ "${BUILD}" = true ]; then - docker build -t batch_gdal-python_test . -f batch/gdal-python.dockerfile - docker build -t batch_postgresql-client_test . -f batch/postgresql-client.dockerfile - docker build -t batch_tile_cache_test . -f batch/tile_cache.dockerfile + docker build -t universal_batch_test . -f batch/universal_batch.dockerfile +# docker build -t batch_gdal-python_test . -f batch/gdal-python.dockerfile +# docker build -t batch_postgresql-client_test . -f batch/postgresql-client.dockerfile +# docker build -t batch_tile_cache_test . -f batch/tile_cache.dockerfile docker build -t pixetl_test . -f batch/pixetl.dockerfile docker-compose -f docker-compose.test.yml --project-name gfw-data-api_test build --no-cache app_test fi diff --git a/scripts/test_v2 b/scripts/test_v2 index 60e4977b9..03288e1ce 100755 --- a/scripts/test_v2 +++ b/scripts/test_v2 @@ -52,9 +52,10 @@ if [ $# -eq 0 ]; then fi if [ "${BUILD}" = true ]; then - docker build -t batch_gdal-python_test . -f batch/gdal-python.dockerfile - docker build -t batch_postgresql-client_test . -f batch/postgresql-client.dockerfile - docker build -t batch_tile_cache_test . -f batch/tile_cache.dockerfile + docker build -t universal_batch_test . -f batch/universal_batch.dockerfile +# docker build -t batch_gdal-python_test . -f batch/gdal-python.dockerfile +# docker build -t batch_postgresql-client_test . -f batch/postgresql-client.dockerfile +# docker build -t batch_tile_cache_test . -f batch/tile_cache.dockerfile docker build -t pixetl_test . -f batch/pixetl.dockerfile docker-compose -f docker-compose.test.yml --project-name gfw-data-api_test build --no-cache app_test fi diff --git a/terraform/main.tf b/terraform/main.tf index 92b41c23f..77d83a244 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -39,14 +39,23 @@ module "app_docker_image" { } # Docker image for GDAL Python Batch jobs -module "batch_gdal_python_image" { +module "universal_batch_image" { source = "git::https://github.com/wri/gfw-terraform-modules.git//terraform/modules/container_registry?ref=v0.4.2.3" - image_name = substr(lower("${local.project}-gdal_python${local.name_suffix}"), 0, 64) + image_name = substr(lower("${local.project}-universal_batch${local.name_suffix}"), 0, 64) root_dir = "${path.root}/../" docker_path = "batch" - docker_filename = "gdal-python.dockerfile" + docker_filename = "universal_batch.dockerfile" } +## Docker image for GDAL Python Batch jobs +#module "batch_gdal_python_image" { +# source = "git::https://github.com/wri/gfw-terraform-modules.git//terraform/modules/container_registry?ref=v0.4.2.3" +# image_name = substr(lower("${local.project}-gdal_python${local.name_suffix}"), 0, 64) +# root_dir = "${path.root}/../" +# docker_path = "batch" +# docker_filename = "gdal-python.dockerfile" +#} + # Docker image for PixETL Batch jobs module "batch_pixetl_image" { source = "git::https://github.com/wri/gfw-terraform-modules.git//terraform/modules/container_registry?ref=v0.4.2.3" @@ -56,23 +65,23 @@ module "batch_pixetl_image" { docker_filename = "pixetl.dockerfile" } -# Docker image for PostgreSQL Client Batch jobs -module "batch_postgresql_client_image" { - source = "git::https://github.com/wri/gfw-terraform-modules.git//terraform/modules/container_registry?ref=v0.4.2.3" - image_name = substr(lower("${local.project}-postgresql_client${local.name_suffix}"), 0, 64) - root_dir = "${path.root}/../" - docker_path = "batch" - docker_filename = "postgresql-client.dockerfile" -} - -# Docker image for Tile Cache Batch jobs -module "batch_tile_cache_image" { - source = "git::https://github.com/wri/gfw-terraform-modules.git//terraform/modules/container_registry?ref=v0.4.2.3" - image_name = substr(lower("${local.project}-tile_cache${local.name_suffix}"), 0, 64) - root_dir = "${path.root}/../" - docker_path = "batch" - docker_filename = "tile_cache.dockerfile" -} +## Docker image for PostgreSQL Client Batch jobs +#module "batch_postgresql_client_image" { +# source = "git::https://github.com/wri/gfw-terraform-modules.git//terraform/modules/container_registry?ref=v0.4.2.3" +# image_name = substr(lower("${local.project}-postgresql_client${local.name_suffix}"), 0, 64) +# root_dir = "${path.root}/../" +# docker_path = "batch" +# docker_filename = "postgresql-client.dockerfile" +#} +# +## Docker image for Tile Cache Batch jobs +#module "batch_tile_cache_image" { +# source = "git::https://github.com/wri/gfw-terraform-modules.git//terraform/modules/container_registry?ref=v0.4.2.3" +# image_name = substr(lower("${local.project}-tile_cache${local.name_suffix}"), 0, 64) +# root_dir = "${path.root}/../" +# docker_path = "batch" +# docker_filename = "tile_cache.dockerfile" +#} module "fargate_autoscaling" { @@ -191,10 +200,10 @@ module "batch_job_queues" { environment = var.environment name_suffix = local.name_suffix project = local.project - gdal_repository_url = "${module.batch_gdal_python_image.repository_url}:latest" + gdal_repository_url = "${module.universal_batch_image.repository_url}:latest" pixetl_repository_url = "${module.batch_pixetl_image.repository_url}:latest" - postgres_repository_url = "${module.batch_postgresql_client_image.repository_url}:latest" - tile_cache_repository_url = "${module.batch_tile_cache_image.repository_url}:latest" + postgres_repository_url = "${module.universal_batch_image.repository_url}:latest" + tile_cache_repository_url = "${module.universal_batch_image.repository_url}:latest" iam_policy_arn = [ "arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess", aws_iam_policy.query_batch_jobs.arn, diff --git a/tests/conftest.py b/tests/conftest.py index c8f69fd72..4ec57c6bf 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -170,11 +170,11 @@ def patch_run(self, *k, **kwargs): aws_mock.add_job_queue(TILE_CACHE_JOB_QUEUE, s3_writer_env["computeEnvironmentArn"]) aws_mock.add_job_queue(PIXETL_JOB_QUEUE, pixetl_env["computeEnvironmentArn"]) - aws_mock.add_job_definition(GDAL_PYTHON_JOB_DEFINITION, "batch_gdal-python_test") + aws_mock.add_job_definition(GDAL_PYTHON_JOB_DEFINITION, "universal_batch_test") aws_mock.add_job_definition( - POSTGRESQL_CLIENT_JOB_DEFINITION, "batch_postgresql-client_test" + POSTGRESQL_CLIENT_JOB_DEFINITION, "universal_batch_test" ) - aws_mock.add_job_definition(TILE_CACHE_JOB_DEFINITION, "batch_tile_cache_test") + aws_mock.add_job_definition(TILE_CACHE_JOB_DEFINITION, "universal_batch_test") aws_mock.add_job_definition(PIXETL_JOB_DEFINITION, "pixetl_test", mount_tmp=True) yield aws_mock.mocked_services["batch"]["client"], aws_mock.mocked_services["logs"][