diff --git a/app/authentication/token.py b/app/authentication/token.py index 64f4abf48..43a743cba 100644 --- a/app/authentication/token.py +++ b/app/authentication/token.py @@ -1,14 +1,18 @@ -from typing import Tuple +from typing import Tuple, cast from fastapi import Depends, HTTPException from fastapi.logger import logger from fastapi.security import OAuth2PasswordBearer from httpx import Response +from ..routes import dataset_dependency from ..utils.rw_api import who_am_i +from ..settings.globals import PROTECTED_QUERY_DATASETS +# token dependency where we immediately cause an exception if there is no auth token oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/token") - +# token dependency where we don't cause exception if there is no auth token +oauth2_scheme_no_auto = OAuth2PasswordBearer(tokenUrl="/token", auto_error=False) async def is_service_account(token: str = Depends(oauth2_scheme)) -> bool: """Calls GFW API to authorize user. @@ -36,13 +40,21 @@ async def is_admin(token: str = Depends(oauth2_scheme)) -> bool: return await is_app_admin(token, "gfw", "Unauthorized") -async def is_gfwpro_admin(error_str: str, token: str = Depends(oauth2_scheme)) -> bool: - """Calls GFW API to authorize user. - - User must be ADMIN for gfw pro app +async def is_gfwpro_admin_for_query(dataset: str = Depends(dataset_dependency), + token: str | None = Depends(oauth2_scheme_no_auto)) -> bool: + """If the dataset is protected dataset, calls GFW API to authorize user by + requiring the user must be ADMIN for gfw-pro app. If the dataset is not + protected, just returns True without any required token or authorization. """ - - return await is_app_admin(token, "gfw-pro", error_str) + + if dataset in PROTECTED_QUERY_DATASETS: + if token == None: + raise HTTPException(status_code=401, detail="Unauthorized query on a restricted dataset") + else: + return await is_app_admin(cast(str, token), "gfw-pro", + error_str="Unauthorized query on a restricted dataset") + + return True async def is_app_admin(token: str, app: str, error_str: str) -> bool: """Calls GFW API to authorize user. diff --git a/app/models/pydantic/jobs.py b/app/models/pydantic/jobs.py index 1e883e92d..d3f3ab0ab 100644 --- a/app/models/pydantic/jobs.py +++ b/app/models/pydantic/jobs.py @@ -87,14 +87,14 @@ class PostgresqlClientJob(Job): class GdalPythonImportJob(Job): - """Use for write operations to PostgreSQL which require GDAL/ Ogr2Ogr - drivers.""" + """Use for write operations to PostgreSQL which require GDAL/Ogr2Ogr + drivers. NOTE: JOB MUST BE SAFE TO RETRY!""" job_queue = AURORA_JOB_QUEUE job_definition = GDAL_PYTHON_JOB_DEFINITION vcpus = 1 memory = 2500 - attempts = 1 + attempts = 10 attempt_duration_seconds = DEFAULT_JOB_DURATION diff --git a/app/routes/datasets/queries.py b/app/routes/datasets/queries.py index b25092868..5a43fb307 100755 --- a/app/routes/datasets/queries.py +++ b/app/routes/datasets/queries.py @@ -23,7 +23,7 @@ from pydantic.tools import parse_obj_as from sqlalchemy.sql import and_ -from ...authentication.token import is_gfwpro_admin +from ...authentication.token import is_gfwpro_admin_for_query from ...application import db # from ...authentication.api_keys import get_api_key @@ -86,10 +86,6 @@ # Special suffixes to do an extra area density calculation on the raster data set. AREA_DENSITY_RASTER_SUFFIXES = ["_ha-1", "_ha_yr-1"] -# Datasets that require admin privileges to do a query. (Extra protection on -# commercial datasets which shouldn't be downloaded in any way.) -PROTECTED_QUERY_DATASETS = ["wdpa_licensed_protected_areas"] - @router.get( "/{dataset}/{version}/query", response_class=RedirectResponse, @@ -134,6 +130,7 @@ async def query_dataset_json( geostore_origin: GeostoreOrigin = Query( GeostoreOrigin.gfw, description="Service to search first for geostore." ), + is_authorized: bool = Depends(is_gfwpro_admin_for_query), # api_key: APIKey = Depends(get_api_key), ): """Execute a READ-ONLY SQL query on the given dataset version (if @@ -160,8 +157,6 @@ async def query_dataset_json( """ dataset, version = dataset_version - #if dataset in PROTECTED_QUERY_DATASETS: - # await is_gfwpro_admin(error_str="Unauthorized query on a restricted dataset") if geostore_id: geostore: Optional[GeostoreCommon] = await get_geostore( @@ -197,6 +192,7 @@ async def query_dataset_csv( delimiter: Delimiters = Query( Delimiters.comma, description="Delimiter to use for CSV file." ), + is_authorized: bool = Depends(is_gfwpro_admin_for_query), # api_key: APIKey = Depends(get_api_key), ): """Execute a READ-ONLY SQL query on the given dataset version (if @@ -259,6 +255,7 @@ async def query_dataset_json_post( *, dataset_version: Tuple[str, str] = Depends(dataset_version_dependency), request: QueryRequestIn, + is_authorized: bool = Depends(is_gfwpro_admin_for_query), # api_key: APIKey = Depends(get_api_key), ): """Execute a READ-ONLY SQL query on the given dataset version (if @@ -289,6 +286,7 @@ async def query_dataset_csv_post( *, dataset_version: Tuple[str, str] = Depends(dataset_version_dependency), request: CsvQueryRequestIn, + is_authorized: bool = Depends(is_gfwpro_admin_for_query), # api_key: APIKey = Depends(get_api_key), ): """Execute a READ-ONLY SQL query on the given dataset version (if diff --git a/app/routes/datasets/versions.py b/app/routes/datasets/versions.py index 111ce89c6..82dc21064 100644 --- a/app/routes/datasets/versions.py +++ b/app/routes/datasets/versions.py @@ -242,7 +242,7 @@ async def append_to_version( update_data = {"creation_options": deepcopy(default_asset.creation_options)} update_data["creation_options"]["source_uri"] += request.source_uri if input_data["creation_options"].get("layers"): - update_data["creation_options"]["layers"] += request.layers + update_data["creation_options"]["layers"] += request.layers await assets.update_asset(default_asset.asset_id, **update_data) version_orm: ORMVersion = await versions.get_version(dataset, version) diff --git a/app/settings/globals.py b/app/settings/globals.py index 5630c647e..018daa267 100644 --- a/app/settings/globals.py +++ b/app/settings/globals.py @@ -185,3 +185,7 @@ GOOGLE_APPLICATION_CREDENTIALS = config( "GOOGLE_APPLICATION_CREDENTIALS", cast=str, default="/root/.gcs/private_key.json" ) + +# Datasets that require admin privileges to do a query. (Extra protection on +# commercial datasets which shouldn't be downloaded in any way.) +PROTECTED_QUERY_DATASETS = ["wdpa_licensed_protected_areas"] diff --git a/tests_v2/unit/app/routes/datasets/test_query.py b/tests_v2/unit/app/routes/datasets/test_query.py index 998579ff2..31c3b8a30 100755 --- a/tests_v2/unit/app/routes/datasets/test_query.py +++ b/tests_v2/unit/app/routes/datasets/test_query.py @@ -434,7 +434,6 @@ async def test_query_vector_asset_disallowed_10( ) @pytest.mark.asyncio() -@pytest.mark.skip("Skip while figuring out permissions") async def test_query_licensed_disallowed_11( licensed_version, async_client: AsyncClient ):