From b2fdf0e7aad830397a01b3716116b7bef4e592c7 Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Wed, 26 Jun 2024 18:06:09 +0200 Subject: [PATCH] Issue #13 unify "benchmark scenario" naming --- .../max_ndvi.json | 0 .../worldcereal.json | 0 qa/benchmarks/tests/test_benchmarks.py | 16 ++++----- qa/tools/apex_algorithm_qa_tools/common.py | 6 ++-- .../{usecases.py => scenarios.py} | 33 +++++++++---------- qa/unittests/tests/conftest.py | 2 +- qa/unittests/tests/test_scenarios.py | 26 +++++++++++++++ qa/unittests/tests/test_usecases.py | 22 ------------- 8 files changed, 54 insertions(+), 51 deletions(-) rename {algorithm_invocations => benchmark_scenarios}/max_ndvi.json (100%) rename {algorithm_invocations => benchmark_scenarios}/worldcereal.json (100%) rename qa/tools/apex_algorithm_qa_tools/{usecases.py => scenarios.py} (74%) create mode 100644 qa/unittests/tests/test_scenarios.py delete mode 100644 qa/unittests/tests/test_usecases.py diff --git a/algorithm_invocations/max_ndvi.json b/benchmark_scenarios/max_ndvi.json similarity index 100% rename from algorithm_invocations/max_ndvi.json rename to benchmark_scenarios/max_ndvi.json diff --git a/algorithm_invocations/worldcereal.json b/benchmark_scenarios/worldcereal.json similarity index 100% rename from algorithm_invocations/worldcereal.json rename to benchmark_scenarios/worldcereal.json diff --git a/qa/benchmarks/tests/test_benchmarks.py b/qa/benchmarks/tests/test_benchmarks.py index 22f946a..b829030 100644 --- a/qa/benchmarks/tests/test_benchmarks.py +++ b/qa/benchmarks/tests/test_benchmarks.py @@ -1,23 +1,23 @@ import openeo import pytest -from apex_algorithm_qa_tools.usecases import UseCase, get_use_cases +from apex_algorithm_qa_tools.scenarios import BenchmarkScenario, get_benchmark_scenarios @pytest.mark.parametrize( - "use_case", + "scenario", [ - # Use use case id as parameterization id to give nicer test names. + # Use scenario id as parameterization id to give nicer test names. pytest.param(uc, id=uc.id) - for uc in get_use_cases() + for uc in get_benchmark_scenarios() ], ) -def test_run_benchmark(use_case: UseCase, connection_factory): - connection: openeo.Connection = connection_factory(url=use_case.backend) +def test_run_benchmark(scenario: BenchmarkScenario, connection_factory): + connection: openeo.Connection = connection_factory(url=scenario.backend) # TODO: scenario option to use synchronous instead of batch job mode? job = connection.create_job( - process_graph=use_case.process_graph, - title=f"APEx benchmark {use_case.id}", + process_graph=scenario.process_graph, + title=f"APEx benchmark {scenario.id}", ) job.start_and_wait() diff --git a/qa/tools/apex_algorithm_qa_tools/common.py b/qa/tools/apex_algorithm_qa_tools/common.py index 3216263..3f2d1e6 100644 --- a/qa/tools/apex_algorithm_qa_tools/common.py +++ b/qa/tools/apex_algorithm_qa_tools/common.py @@ -11,7 +11,7 @@ def get_project_root() -> Path: - """Try to find project root for common project use cases.""" + """Try to find project root for common project use cases and CI situations.""" def candidates() -> Iterator[Path]: # TODO: support a environment variable to override the project root detection? @@ -27,9 +27,9 @@ def candidates() -> Iterator[Path]: for candidate in candidates(): if candidate.is_dir() and all( (candidate / p).is_dir() - for p in ["algorithm_catalog", "algorithm_invocations", "qa/tools"] + for p in ["algorithm_catalog", "benchmark_scenarios", "qa/tools"] ): _log.info(f"Detected project root {candidate!r}") return candidate - raise RuntimeError("Could not determine algorithm invocations root directory.") + raise RuntimeError("Could not determine project root directory.") diff --git a/qa/tools/apex_algorithm_qa_tools/usecases.py b/qa/tools/apex_algorithm_qa_tools/scenarios.py similarity index 74% rename from qa/tools/apex_algorithm_qa_tools/usecases.py rename to qa/tools/apex_algorithm_qa_tools/scenarios.py index 2a2c808..4cd4124 100644 --- a/qa/tools/apex_algorithm_qa_tools/usecases.py +++ b/qa/tools/apex_algorithm_qa_tools/scenarios.py @@ -13,19 +13,18 @@ # TODO: Flatten apex_algorithm_qa_tools to a single module and push as much functionality to https://github.com/ESA-APEx/esa-apex-toolbox-python -# TODO: rename `algorithm_invocations` and `qa/tools/apex_algorithm_qa_tools/usecases.py` to more descriptive "scenarios" or "benchmark-scenarios"? @dataclasses.dataclass(kw_only=True) -class UseCase: - # TODO: need for differentiation between different types of use cases? +class BenchmarkScenario: + # TODO: need for differentiation between different types of scenarios? id: str description: str | None = None backend: str process_graph: dict @classmethod - def from_dict(cls, data: dict) -> UseCase: + def from_dict(cls, data: dict) -> BenchmarkScenario: # TODO: standardization of these types? What about other types and how to support them? assert data["type"] == "openeo" return cls( @@ -36,33 +35,33 @@ def from_dict(cls, data: dict) -> UseCase: ) -def get_use_cases() -> List[UseCase]: - # TODO: instead of flat list, keep original grouping/structure of "algorithm_invocations" files? +def get_benchmark_scenarios() -> List[BenchmarkScenario]: + # TODO: instead of flat list, keep original grouping/structure of benchmark scenario files? # TODO: check for uniqueness of scenario IDs? Also make this a pre-commit lint tool? - use_cases = [] - for path in (get_project_root() / "algorithm_invocations").glob("**/*.json"): + scenarios = [] + for path in (get_project_root() / "benchmark_scenarios").glob("**/*.json"): with open(path) as f: data = json.load(f) - # TODO: support single use case files in addition to listings? + # TODO: support single scenario files in addition to listings? assert isinstance(data, list) - use_cases.extend(UseCase.from_dict(item) for item in data) - return use_cases + scenarios.extend(BenchmarkScenario.from_dict(item) for item in data) + return scenarios -def lint_usecase(usecase: UseCase): +def lint_benchmark_scenario(scenario: BenchmarkScenario): """ - Various sanity checks for use case data. + Various sanity checks for scenario data. To be used in unit tests and pre-commit hooks. """ # TODO integrate this as a pre-commit hook # TODO raise descriptive exceptions instead of asserts? - assert re.match(r"^[a-zA-Z0-9_-]+$", usecase.id) + assert re.match(r"^[a-zA-Z0-9_-]+$", scenario.id) # TODO: proper allow-list of backends? - assert usecase.backend in ["openeofed.dataspace.copernicus.eu"] + assert scenario.backend in ["openeofed.dataspace.copernicus.eu"] # TODO: refactor this out to a more generic process graph validator? Or use an existing tool? # TODO: more advanced process graph validation? - assert isinstance(usecase.process_graph, dict) - for node_id, node in usecase.process_graph.items(): + assert isinstance(scenario.process_graph, dict) + for node_id, node in scenario.process_graph.items(): assert isinstance(node, dict) assert re.match(r"^[a-z0-9_-]+$", node["process_id"]) assert "arguments" in node diff --git a/qa/unittests/tests/conftest.py b/qa/unittests/tests/conftest.py index 731505b..5bce017 100644 --- a/qa/unittests/tests/conftest.py +++ b/qa/unittests/tests/conftest.py @@ -1,3 +1,3 @@ import pytest -pytest.register_assert_rewrite("apex_algorithm_qa_tools.usecases") +pytest.register_assert_rewrite("apex_algorithm_qa_tools.scenarios") diff --git a/qa/unittests/tests/test_scenarios.py b/qa/unittests/tests/test_scenarios.py new file mode 100644 index 0000000..7e26ae4 --- /dev/null +++ b/qa/unittests/tests/test_scenarios.py @@ -0,0 +1,26 @@ +import pytest +from apex_algorithm_qa_tools.scenarios import ( + BenchmarkScenario, + get_benchmark_scenarios, + lint_benchmark_scenario, +) + + +def test_get_benchmark_scenarios(): + scenarios = get_benchmark_scenarios() + assert len(scenarios) > 0 + + +# TODO: tests to check uniqueness of scenario ids? + + +@pytest.mark.parametrize( + "scenario", + [ + # Use scenario id as parameterization id to give nicer test names. + pytest.param(uc, id=uc.id) + for uc in get_benchmark_scenarios() + ], +) +def test_lint_scenario(scenario: BenchmarkScenario): + lint_benchmark_scenario(scenario) diff --git a/qa/unittests/tests/test_usecases.py b/qa/unittests/tests/test_usecases.py deleted file mode 100644 index 203bb04..0000000 --- a/qa/unittests/tests/test_usecases.py +++ /dev/null @@ -1,22 +0,0 @@ -import pytest -from apex_algorithm_qa_tools.usecases import UseCase, get_use_cases, lint_usecase - - -def test_get_use_cases(): - use_cases = get_use_cases() - assert len(use_cases) > 0 - - -# TODO: tests to check uniqueness of use case ids? - - -@pytest.mark.parametrize( - "use_case", - [ - # Use use case id as parameterization id to give nicer test names. - pytest.param(uc, id=uc.id) - for uc in get_use_cases() - ], -) -def test_lint_usecase(use_case: UseCase): - lint_usecase(use_case)