From a91da8c177fac1d8b186131ed51a3a5566e2ba82 Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Wed, 3 Apr 2024 14:25:39 -0400 Subject: [PATCH 1/8] Cache _get_data_environment function --- app/routes/datasets/queries.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/app/routes/datasets/queries.py b/app/routes/datasets/queries.py index 5a43fb307..4e39c107c 100755 --- a/app/routes/datasets/queries.py +++ b/app/routes/datasets/queries.py @@ -7,13 +7,13 @@ from uuid import UUID, uuid4 import httpx +from async_lru import alru_cache from asyncpg import DataError, InsufficientPrivilegeError, SyntaxOrAccessError from fastapi import APIRouter, Depends, HTTPException, Query from fastapi import Request as FastApiRequest from fastapi import Response as FastApiResponse from fastapi.encoders import jsonable_encoder from fastapi.logger import logger - # from fastapi.openapi.models import APIKey from fastapi.responses import RedirectResponse from pglast import printers # noqa @@ -21,11 +21,9 @@ from pglast.parser import ParseError from pglast.printer import RawStream from pydantic.tools import parse_obj_as -from sqlalchemy.sql import and_ from ...authentication.token import is_gfwpro_admin_for_query from ...application import db - # from ...authentication.api_keys import get_api_key from ...crud import assets from ...models.enum.assets import AssetType @@ -62,7 +60,6 @@ from ...models.enum.queries import QueryFormat, QueryType from ...models.orm.assets import Asset as AssetORM from ...models.orm.queries.raster_assets import latest_raster_tile_sets -from ...models.orm.versions import Version as VersionORM from ...models.pydantic.asset_metadata import RasterTable, RasterTableRow from ...models.pydantic.creation_options import NoDataType from ...models.pydantic.geostore import Geometry, GeostoreCommon @@ -659,13 +656,14 @@ async def _query_raster_lambda( def _get_area_density_name(nm): - """Return empty string if nm doesn't not have an area-density suffix, else + """Return empty string if nm doesn't have an area-density suffix, else return nm with the area-density suffix removed.""" for suffix in AREA_DENSITY_RASTER_SUFFIXES: if nm.endswith(suffix): return nm[:-len(suffix)] return "" + def _get_default_layer(dataset, pixel_meaning): default_type = pixel_meaning area_density_name = _get_area_density_name(default_type) @@ -683,6 +681,7 @@ def _get_default_layer(dataset, pixel_meaning): return f"{dataset}__{default_type}" +@alru_cache(maxsize=16, ttl=300.0) async def _get_data_environment(grid: Grid) -> DataEnvironment: # get all raster tile set assets with the same grid. latest_tile_sets = await db.all(latest_raster_tile_sets, {"grid": grid}) From bc904576805a6e581f1f52e74ad9638aad524ea7 Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Thu, 4 Apr 2024 16:23:17 -0400 Subject: [PATCH 2/8] Make a per-function async test client to avoid caching problems during testing --- tests_v2/conftest.py | 6 ++++++ tests_v2/unit/app/routes/analysis/test_analysis.py | 5 ++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tests_v2/conftest.py b/tests_v2/conftest.py index 33fbb3a53..9bb49cb8f 100755 --- a/tests_v2/conftest.py +++ b/tests_v2/conftest.py @@ -93,6 +93,12 @@ async def async_client(db, init_db) -> AsyncGenerator[AsyncClient, None]: app.dependency_overrides = {} +@pytest_asyncio.fixture(scope="function") +async def async_client_per_function(async_client) -> AsyncGenerator[AsyncClient, None]: + """Async Test Client that's limited to per-function scope.""" + yield async_client + + @pytest_asyncio.fixture async def async_client_unauthenticated( db, init_db diff --git a/tests_v2/unit/app/routes/analysis/test_analysis.py b/tests_v2/unit/app/routes/analysis/test_analysis.py index d8dcd6673..5d55a0f72 100644 --- a/tests_v2/unit/app/routes/analysis/test_analysis.py +++ b/tests_v2/unit/app/routes/analysis/test_analysis.py @@ -79,8 +79,11 @@ async def test_analysis_with_huge_geostore( @pytest.mark.asyncio async def test_raster_analysis_payload_shape( - generic_dataset, async_client: AsyncClient, monkeypatch: MonkeyPatch + generic_dataset, async_client_per_function: AsyncClient, monkeypatch: MonkeyPatch ): + """Note that we use the async_client_per_function fixture to avoid + the cache getting polluted from other tests""" + dataset_name, _ = generic_dataset pixel_meaning: str = "date_conf" no_data_value = 0 From 824be2897efe39ae8e117aabb00e6a267612088b Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Thu, 4 Apr 2024 21:55:53 -0400 Subject: [PATCH 3/8] I forgot to pass the perf-function test client. Fix that --- tests_v2/unit/app/routes/analysis/test_analysis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests_v2/unit/app/routes/analysis/test_analysis.py b/tests_v2/unit/app/routes/analysis/test_analysis.py index 5d55a0f72..7536be157 100644 --- a/tests_v2/unit/app/routes/analysis/test_analysis.py +++ b/tests_v2/unit/app/routes/analysis/test_analysis.py @@ -89,7 +89,7 @@ async def test_raster_analysis_payload_shape( no_data_value = 0 async with custom_raster_version( - async_client, + async_client_per_function, dataset_name, monkeypatch, pixel_meaning=pixel_meaning, @@ -106,7 +106,7 @@ async def test_raster_analysis_payload_shape( ) monkeypatch.setattr(geostore.rw_api, "get_geostore", mock_rw_get_geostore) - _ = await async_client.get( + _ = await async_client_per_function.get( f"/analysis/zonal/17076d5ea9f214a5bdb68cc40433addb?geostore_origin=rw&group_by=umd_tree_cover_loss__year&filters=is__umd_regional_primary_forest_2001&filters=umd_tree_cover_density_2000__30&sum=area__ha&start_date=2001" ) payload = mock_invoke_lambda.call_args.args[1] From a5ca0706265b7b0c7735dde902dca3f982d06b31 Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Fri, 5 Apr 2024 09:47:30 -0400 Subject: [PATCH 4/8] Reluctantly duplicate async_client fixture for per-function version --- tests_v2/conftest.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/tests_v2/conftest.py b/tests_v2/conftest.py index 9bb49cb8f..c10b1985d 100755 --- a/tests_v2/conftest.py +++ b/tests_v2/conftest.py @@ -96,7 +96,25 @@ async def async_client(db, init_db) -> AsyncGenerator[AsyncClient, None]: @pytest_asyncio.fixture(scope="function") async def async_client_per_function(async_client) -> AsyncGenerator[AsyncClient, None]: """Async Test Client that's limited to per-function scope.""" - yield async_client + from app.main import app + + # mock authentication function to avoid having to reach out to RW API during tests + app.dependency_overrides[is_admin] = bool_function_closure(True, with_args=False) + app.dependency_overrides[is_service_account] = bool_function_closure( + True, with_args=False + ) + app.dependency_overrides[get_user] = get_admin_mocked + + async with AsyncClient( + app=app, + base_url="http://test", + trust_env=False, + headers={"Origin": "https://www.globalforestwatch.org"}, + ) as client: + yield client + + # Clean up + app.dependency_overrides = {} @pytest_asyncio.fixture From 9c9e7a7725955d13233e81986832a88b90792443 Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Fri, 5 Apr 2024 09:59:08 -0400 Subject: [PATCH 5/8] Fix fixture deps for async_client_per_function --- tests_v2/conftest.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests_v2/conftest.py b/tests_v2/conftest.py index c10b1985d..5051276ec 100755 --- a/tests_v2/conftest.py +++ b/tests_v2/conftest.py @@ -94,8 +94,10 @@ async def async_client(db, init_db) -> AsyncGenerator[AsyncClient, None]: @pytest_asyncio.fixture(scope="function") -async def async_client_per_function(async_client) -> AsyncGenerator[AsyncClient, None]: - """Async Test Client that's limited to per-function scope.""" +async def async_client_per_function(db, init_db) -> AsyncGenerator[AsyncClient, None]: + """Async Test Client that's limited to per-function scope. + Use sparingly (such as when you need to clear caches) to avoid lengthier tests + """ from app.main import app # mock authentication function to avoid having to reach out to RW API during tests From a7568693b5988bb6fd7b12489fd47c043a0a4bff Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Mon, 15 Apr 2024 15:12:21 -0400 Subject: [PATCH 6/8] Use a feature of async_lru to clear the cache in test --- tests_v2/unit/app/routes/analysis/test_analysis.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests_v2/unit/app/routes/analysis/test_analysis.py b/tests_v2/unit/app/routes/analysis/test_analysis.py index 7536be157..547a7d37d 100644 --- a/tests_v2/unit/app/routes/analysis/test_analysis.py +++ b/tests_v2/unit/app/routes/analysis/test_analysis.py @@ -81,8 +81,8 @@ async def test_analysis_with_huge_geostore( async def test_raster_analysis_payload_shape( generic_dataset, async_client_per_function: AsyncClient, monkeypatch: MonkeyPatch ): - """Note that we use the async_client_per_function fixture to avoid - the cache getting polluted from other tests""" + """Note that this test depends on the output of _get_data_environment + which will likely have cached values from other tests, so we clear it.""" dataset_name, _ = generic_dataset pixel_meaning: str = "date_conf" @@ -106,6 +106,9 @@ async def test_raster_analysis_payload_shape( ) monkeypatch.setattr(geostore.rw_api, "get_geostore", mock_rw_get_geostore) + # The other tests will have polluted the data env cache. Clear it. + queries._get_data_environment.cache_clear() + _ = await async_client_per_function.get( f"/analysis/zonal/17076d5ea9f214a5bdb68cc40433addb?geostore_origin=rw&group_by=umd_tree_cover_loss__year&filters=is__umd_regional_primary_forest_2001&filters=umd_tree_cover_density_2000__30&sum=area__ha&start_date=2001" ) From 638a97c9cd81b8680ab0aa89983c1f9df1686609 Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Mon, 15 Apr 2024 15:13:33 -0400 Subject: [PATCH 7/8] Remove unneeded per-function async client fixture --- tests_v2/conftest.py | 26 ------------------- .../unit/app/routes/analysis/test_analysis.py | 2 +- 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/tests_v2/conftest.py b/tests_v2/conftest.py index 5051276ec..33fbb3a53 100755 --- a/tests_v2/conftest.py +++ b/tests_v2/conftest.py @@ -93,32 +93,6 @@ async def async_client(db, init_db) -> AsyncGenerator[AsyncClient, None]: app.dependency_overrides = {} -@pytest_asyncio.fixture(scope="function") -async def async_client_per_function(db, init_db) -> AsyncGenerator[AsyncClient, None]: - """Async Test Client that's limited to per-function scope. - Use sparingly (such as when you need to clear caches) to avoid lengthier tests - """ - from app.main import app - - # mock authentication function to avoid having to reach out to RW API during tests - app.dependency_overrides[is_admin] = bool_function_closure(True, with_args=False) - app.dependency_overrides[is_service_account] = bool_function_closure( - True, with_args=False - ) - app.dependency_overrides[get_user] = get_admin_mocked - - async with AsyncClient( - app=app, - base_url="http://test", - trust_env=False, - headers={"Origin": "https://www.globalforestwatch.org"}, - ) as client: - yield client - - # Clean up - app.dependency_overrides = {} - - @pytest_asyncio.fixture async def async_client_unauthenticated( db, init_db diff --git a/tests_v2/unit/app/routes/analysis/test_analysis.py b/tests_v2/unit/app/routes/analysis/test_analysis.py index 547a7d37d..0055e96e1 100644 --- a/tests_v2/unit/app/routes/analysis/test_analysis.py +++ b/tests_v2/unit/app/routes/analysis/test_analysis.py @@ -79,7 +79,7 @@ async def test_analysis_with_huge_geostore( @pytest.mark.asyncio async def test_raster_analysis_payload_shape( - generic_dataset, async_client_per_function: AsyncClient, monkeypatch: MonkeyPatch + generic_dataset, async_client: AsyncClient, monkeypatch: MonkeyPatch ): """Note that this test depends on the output of _get_data_environment which will likely have cached values from other tests, so we clear it.""" From 0b19c4691e29ab29a0a33d38e766cf7cf958430c Mon Sep 17 00:00:00 2001 From: Daniel Mannarino Date: Mon, 15 Apr 2024 15:45:25 -0400 Subject: [PATCH 8/8] Fix references to removed async_client_per_function --- tests_v2/unit/app/routes/analysis/test_analysis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests_v2/unit/app/routes/analysis/test_analysis.py b/tests_v2/unit/app/routes/analysis/test_analysis.py index 0055e96e1..1def7fb39 100644 --- a/tests_v2/unit/app/routes/analysis/test_analysis.py +++ b/tests_v2/unit/app/routes/analysis/test_analysis.py @@ -89,7 +89,7 @@ async def test_raster_analysis_payload_shape( no_data_value = 0 async with custom_raster_version( - async_client_per_function, + async_client, dataset_name, monkeypatch, pixel_meaning=pixel_meaning, @@ -109,7 +109,7 @@ async def test_raster_analysis_payload_shape( # The other tests will have polluted the data env cache. Clear it. queries._get_data_environment.cache_clear() - _ = await async_client_per_function.get( + _ = await async_client.get( f"/analysis/zonal/17076d5ea9f214a5bdb68cc40433addb?geostore_origin=rw&group_by=umd_tree_cover_loss__year&filters=is__umd_regional_primary_forest_2001&filters=umd_tree_cover_density_2000__30&sum=area__ha&start_date=2001" ) payload = mock_invoke_lambda.call_args.args[1]