Skip to content

Commit

Permalink
Issue #13 unify "benchmark scenario" naming
Browse files Browse the repository at this point in the history
  • Loading branch information
soxofaan committed Jun 26, 2024
1 parent dc6dcbf commit b2fdf0e
Show file tree
Hide file tree
Showing 8 changed files with 54 additions and 51 deletions.
File renamed without changes.
File renamed without changes.
16 changes: 8 additions & 8 deletions qa/benchmarks/tests/test_benchmarks.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
import openeo
import pytest
from apex_algorithm_qa_tools.usecases import UseCase, get_use_cases
from apex_algorithm_qa_tools.scenarios import BenchmarkScenario, get_benchmark_scenarios


@pytest.mark.parametrize(
"use_case",
"scenario",
[
# Use use case id as parameterization id to give nicer test names.
# Use scenario id as parameterization id to give nicer test names.
pytest.param(uc, id=uc.id)
for uc in get_use_cases()
for uc in get_benchmark_scenarios()
],
)
def test_run_benchmark(use_case: UseCase, connection_factory):
connection: openeo.Connection = connection_factory(url=use_case.backend)
def test_run_benchmark(scenario: BenchmarkScenario, connection_factory):
connection: openeo.Connection = connection_factory(url=scenario.backend)

# TODO: scenario option to use synchronous instead of batch job mode?
job = connection.create_job(
process_graph=use_case.process_graph,
title=f"APEx benchmark {use_case.id}",
process_graph=scenario.process_graph,
title=f"APEx benchmark {scenario.id}",
)

job.start_and_wait()
Expand Down
6 changes: 3 additions & 3 deletions qa/tools/apex_algorithm_qa_tools/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


def get_project_root() -> Path:
"""Try to find project root for common project use cases."""
"""Try to find project root for common project use cases and CI situations."""

def candidates() -> Iterator[Path]:
# TODO: support a environment variable to override the project root detection?
Expand All @@ -27,9 +27,9 @@ def candidates() -> Iterator[Path]:
for candidate in candidates():
if candidate.is_dir() and all(
(candidate / p).is_dir()
for p in ["algorithm_catalog", "algorithm_invocations", "qa/tools"]
for p in ["algorithm_catalog", "benchmark_scenarios", "qa/tools"]
):
_log.info(f"Detected project root {candidate!r}")
return candidate

raise RuntimeError("Could not determine algorithm invocations root directory.")
raise RuntimeError("Could not determine project root directory.")
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,18 @@


# TODO: Flatten apex_algorithm_qa_tools to a single module and push as much functionality to https://github.com/ESA-APEx/esa-apex-toolbox-python
# TODO: rename `algorithm_invocations` and `qa/tools/apex_algorithm_qa_tools/usecases.py` to more descriptive "scenarios" or "benchmark-scenarios"?


@dataclasses.dataclass(kw_only=True)
class UseCase:
# TODO: need for differentiation between different types of use cases?
class BenchmarkScenario:
# TODO: need for differentiation between different types of scenarios?
id: str
description: str | None = None
backend: str
process_graph: dict

@classmethod
def from_dict(cls, data: dict) -> UseCase:
def from_dict(cls, data: dict) -> BenchmarkScenario:
# TODO: standardization of these types? What about other types and how to support them?
assert data["type"] == "openeo"
return cls(
Expand All @@ -36,33 +35,33 @@ def from_dict(cls, data: dict) -> UseCase:
)


def get_use_cases() -> List[UseCase]:
# TODO: instead of flat list, keep original grouping/structure of "algorithm_invocations" files?
def get_benchmark_scenarios() -> List[BenchmarkScenario]:
# TODO: instead of flat list, keep original grouping/structure of benchmark scenario files?
# TODO: check for uniqueness of scenario IDs? Also make this a pre-commit lint tool?
use_cases = []
for path in (get_project_root() / "algorithm_invocations").glob("**/*.json"):
scenarios = []
for path in (get_project_root() / "benchmark_scenarios").glob("**/*.json"):
with open(path) as f:
data = json.load(f)
# TODO: support single use case files in addition to listings?
# TODO: support single scenario files in addition to listings?
assert isinstance(data, list)
use_cases.extend(UseCase.from_dict(item) for item in data)
return use_cases
scenarios.extend(BenchmarkScenario.from_dict(item) for item in data)
return scenarios


def lint_usecase(usecase: UseCase):
def lint_benchmark_scenario(scenario: BenchmarkScenario):
"""
Various sanity checks for use case data.
Various sanity checks for scenario data.
To be used in unit tests and pre-commit hooks.
"""
# TODO integrate this as a pre-commit hook
# TODO raise descriptive exceptions instead of asserts?
assert re.match(r"^[a-zA-Z0-9_-]+$", usecase.id)
assert re.match(r"^[a-zA-Z0-9_-]+$", scenario.id)
# TODO: proper allow-list of backends?
assert usecase.backend in ["openeofed.dataspace.copernicus.eu"]
assert scenario.backend in ["openeofed.dataspace.copernicus.eu"]
# TODO: refactor this out to a more generic process graph validator? Or use an existing tool?
# TODO: more advanced process graph validation?
assert isinstance(usecase.process_graph, dict)
for node_id, node in usecase.process_graph.items():
assert isinstance(scenario.process_graph, dict)
for node_id, node in scenario.process_graph.items():
assert isinstance(node, dict)
assert re.match(r"^[a-z0-9_-]+$", node["process_id"])
assert "arguments" in node
Expand Down
2 changes: 1 addition & 1 deletion qa/unittests/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
import pytest

pytest.register_assert_rewrite("apex_algorithm_qa_tools.usecases")
pytest.register_assert_rewrite("apex_algorithm_qa_tools.scenarios")
26 changes: 26 additions & 0 deletions qa/unittests/tests/test_scenarios.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import pytest
from apex_algorithm_qa_tools.scenarios import (
BenchmarkScenario,
get_benchmark_scenarios,
lint_benchmark_scenario,
)


def test_get_benchmark_scenarios():
scenarios = get_benchmark_scenarios()
assert len(scenarios) > 0


# TODO: tests to check uniqueness of scenario ids?


@pytest.mark.parametrize(
"scenario",
[
# Use scenario id as parameterization id to give nicer test names.
pytest.param(uc, id=uc.id)
for uc in get_benchmark_scenarios()
],
)
def test_lint_scenario(scenario: BenchmarkScenario):
lint_benchmark_scenario(scenario)
22 changes: 0 additions & 22 deletions qa/unittests/tests/test_usecases.py

This file was deleted.

0 comments on commit b2fdf0e

Please sign in to comment.