diff --git a/nxbench/benchmarks/benchmark.py b/nxbench/benchmarks/benchmark.py index 5683cef..e791a53 100644 --- a/nxbench/benchmarks/benchmark.py +++ b/nxbench/benchmarks/benchmark.py @@ -478,7 +478,11 @@ async def main_benchmark( ) for run_result in results: - if run_result is not None: + if isinstance(run_result, BaseException): + logger.error("A subflow raised an exception: %s", run_result) + continue + + if isinstance(run_result, dict): run_result["python_version"] = actual_python_version bname = run_result.get("backend", "unknown") run_result["backend_version"] = backend_version_map.get( diff --git a/nxbench/data/tests/test_db.py b/nxbench/data/tests/test_db.py index a4c9426..ced44cd 100644 --- a/nxbench/data/tests/test_db.py +++ b/nxbench/data/tests/test_db.py @@ -1,5 +1,5 @@ import sqlite3 -from datetime import datetime, timezone +from datetime import datetime, timedelta, timezone import pandas as pd import pytest @@ -81,9 +81,14 @@ def test_get_unique_values(benchmark_db, sample_benchmark_result): def test_filter_results_by_date(benchmark_db, sample_benchmark_result): - timestamp = datetime.now(timezone.utc).isoformat() + """Ensures that results are filtered by start_date.""" + timestamp_before_save = datetime.now(timezone.utc).isoformat() benchmark_db.save_results(sample_benchmark_result) - filtered_results = benchmark_db.get_results(start_date=timestamp, as_pandas=False) + # Using the same timestamp as start_date means the newly inserted result + # should be included (because `timestamp >= timestamp_before_save`). + filtered_results = benchmark_db.get_results( + start_date=timestamp_before_save, as_pandas=False + ) assert len(filtered_results) == 1 @@ -106,3 +111,127 @@ def test_get_results_as_pandas(benchmark_db, sample_benchmark_result): def test_invalid_column_unique_values(benchmark_db): with pytest.raises(ValueError, match="Invalid column name: nonexistent_column"): benchmark_db.get_unique_values("nonexistent_column") + + +def test_save_multiple_results(benchmark_db, sample_benchmark_result): + """Test saving multiple BenchmarkResult objects at once.""" + result2 = BenchmarkResult( + execution_time=2.34, + execution_time_with_preloading=2.5, + memory_used=789.01, + algorithm="test_algo_2", + backend="test_backend_2", + dataset="test_dataset_2", + num_nodes=200, + num_edges=400, + num_thread=2, + date=2345678, + metadata={}, + is_directed=False, + is_weighted=True, + validation="passed", + validation_message="OK", + ) + + benchmark_db.save_results([sample_benchmark_result, result2]) + + results = benchmark_db.get_results(as_pandas=False) + assert len(results) == 2 + algo_names = {r["algorithm"] for r in results} + assert "test_algo" in algo_names + assert "test_algo_2" in algo_names + + +def test_save_results_with_minimal_data(benchmark_db): + """Test that saving a BenchmarkResult without optional fields does not break.""" + minimal_result = BenchmarkResult( + execution_time=0.99, + execution_time_with_preloading=None, + memory_used=100.0, + algorithm="minimal_algo", + backend="minimal_backend", + dataset="minimal_dataset", + num_nodes=10, + num_edges=20, + num_thread=1, + date=999999, + metadata={}, + is_directed=False, + is_weighted=False, + validation=None, + validation_message=None, + ) + + benchmark_db.save_results(minimal_result) + + results = benchmark_db.get_results(as_pandas=False) + assert len(results) == 1 + result = results[0] + assert result["algorithm"] == "minimal_algo" + assert result["git_commit"] is None + assert result["machine_info"] is None + assert result["python_version"] is None + assert result["package_versions"] is None + + +def test_get_results_with_filters(benchmark_db, sample_benchmark_result): + """Test filtering results by multiple fields: algorithm & backend.""" + result2 = BenchmarkResult( + execution_time=5.67, + execution_time_with_preloading=6.5, + memory_used=999.99, + algorithm="filtered_algo", + backend="filtered_backend", + dataset="another_dataset", + num_nodes=999, + num_edges=1998, + num_thread=4, + date=999888, + metadata={}, + is_directed=True, + is_weighted=True, + validation="passed", + validation_message="OK", + ) + + benchmark_db.save_results([sample_benchmark_result, result2]) + + filtered_results = benchmark_db.get_results( + algorithm="filtered_algo", backend="filtered_backend", as_pandas=False + ) + assert len(filtered_results) == 1 + assert filtered_results[0]["algorithm"] == "filtered_algo" + assert filtered_results[0]["backend"] == "filtered_backend" + + +def test_delete_results_by_date(benchmark_db, sample_benchmark_result): + """Test deleting results older than a certain date.""" + benchmark_db.save_results(sample_benchmark_result) + old_date_str = (datetime.now(timezone.utc) - timedelta(days=2)).isoformat() + + with sqlite3.connect(benchmark_db.db_path) as conn: + conn.execute("UPDATE benchmarks SET timestamp=? WHERE id=1", (old_date_str,)) + conn.commit() + + rows_deleted = benchmark_db.delete_results( + before_date=datetime.now(timezone.utc).isoformat() + ) + assert rows_deleted == 1 + + remaining = benchmark_db.get_results(as_pandas=False) + assert len(remaining) == 0 + + benchmark_db.save_results(sample_benchmark_result) + future_date_str = (datetime.now(timezone.utc) + timedelta(days=1)).isoformat() + rows_deleted = benchmark_db.delete_results(before_date=future_date_str) + assert rows_deleted == 1 + + +def test_delete_results_no_match(benchmark_db, sample_benchmark_result): + """Ensure delete_results returns 0 if no row matches the criteria.""" + benchmark_db.save_results(sample_benchmark_result) + rows_deleted = benchmark_db.delete_results(algorithm="nonexistent_algo") + assert rows_deleted == 0 + + remaining = benchmark_db.get_results(as_pandas=False) + assert len(remaining) == 1 diff --git a/nxbench/data/tests/test_loader.py b/nxbench/data/tests/test_loader.py index 6d62799..3aea8c5 100644 --- a/nxbench/data/tests/test_loader.py +++ b/nxbench/data/tests/test_loader.py @@ -1,3 +1,4 @@ +import importlib.resources as importlib_resources import tempfile import warnings import zipfile @@ -997,3 +998,300 @@ def test_load_metadata_exception(): with tempfile.TemporaryDirectory() as temp_dir: with pytest.raises(RuntimeError, match="Metadata load failed"): BenchmarkDataManager(data_dir=temp_dir) + + +@pytest.mark.asyncio +async def test_no_suitable_file_found_after_extract(data_manager, tmp_path): + data_manager.data_dir = tmp_path + extracted_folder = data_manager.data_dir / "missing_graph_extracted" + extracted_folder.mkdir(exist_ok=True) + + data_manager._metadata_df = pd.concat( + [ + data_manager._metadata_df, + pd.DataFrame( + [ + { + "name": "missing_graph", + "directed": False, + "weighted": False, + "download_url": "http://example.com/missing.zip", + } + ] + ), + ], + ignore_index=True, + ) + + with patch.object(data_manager, "_download_file", return_value=None): + with patch("zipfile.ZipFile") as mock_zip_class: + mock_zip = MagicMock() + mock_zip.extractall = MagicMock() + mock_zip_class.return_value.__enter__.return_value = mock_zip + + data_manager._find_graph_file = MagicMock(return_value=None) + + with pytest.raises( + FileNotFoundError, match="No suitable graph file found after extracting" + ): + await data_manager._download_and_extract_network( + "missing_graph", "http://example.com/missing.zip" + ) + + +@pytest.mark.asyncio +async def test_load_nr_graph_no_suitable_file_found(data_manager): + """Test that FileNotFoundError is raised if no suitable file is found after + downloading. + """ + with patch.object(data_manager, "_download_and_extract_network", return_value=None): + with patch.object( + data_manager, + "_load_graph_file", + side_effect=FileNotFoundError("Mocked no file"), + ): + data_manager.get_metadata = MagicMock( + return_value={ + "download_url": "http://example.com/test.zip", + "directed": False, + } + ) + config = DatasetConfig( + name="not_found_after_download", source="networkrepository", params={} + ) + with pytest.raises( + FileNotFoundError, + match="No suitable graph file found after downloading", + ): + await data_manager.load_network(config) + + +@pytest.mark.asyncio +async def test_convert_numeric_nodes_to_strings_in_mtx( + data_manager, create_edge_file, tmp_path +): + data_manager.data_dir = tmp_path + + mtx_content = """%%MatrixMarket matrix coordinate real general +4 4 3 +1 2 1.0 +2 3 2.0 +3 4 3.0 +""" + local_mtx_path = data_manager.data_dir / "numeric_nodes_mtx.mtx" + local_mtx_path.write_text(mtx_content) + + data_manager._metadata_df = pd.concat( + [ + data_manager._metadata_df, + pd.DataFrame( + [{"name": "numeric_nodes_mtx", "directed": False, "weighted": True}] + ), + ], + ignore_index=True, + ) + + config = DatasetConfig( + name="numeric_nodes_mtx", + source="local", + params={"path": str(local_mtx_path)}, + metadata={"directed": False, "weighted": True}, + ) + + graph, metadata = await data_manager.load_network(config) + + assert all(isinstance(node, (str, int)) for node in graph.nodes()) + + +@pytest.mark.asyncio +async def test_load_unweighted_edgelist_failure(data_manager, create_edge_file): + edge_content = """A B +B C +""" + create_edge_file("unweighted_failure.edges", edge_content) + + # Add to metadata so we don't fail with "not found in cache" + data_manager._metadata_df = pd.concat( + [ + data_manager._metadata_df, + pd.DataFrame( + [ + { + "name": "unweighted_failure", + "directed": False, + "weighted": False, + "download_url": "http://example.com/unweighted_failure.edges", + } + ] + ), + ], + ignore_index=True, + ) + + with patch( + "networkx.read_edgelist", side_effect=Exception("Unweighted parse failed") + ): + config = DatasetConfig( + name="unweighted_failure", + source="networkrepository", + params={}, + metadata={"directed": False, "weighted": False}, + ) + + with pytest.raises(Exception, match="Unweighted parse failed"): + await data_manager.load_network(config) + + +@pytest.mark.asyncio +async def test_load_edges_unexpected_error_parsing_weights( + data_manager, create_edge_file +): + edge_content = """A B 1.0 +B C not_a_float +C D 3.0 +""" + create_edge_file("unexpected_error.edges", edge_content) + + data_manager._metadata_df = pd.concat( + [ + data_manager._metadata_df, + pd.DataFrame( + [ + { + "name": "unexpected_error", + "directed": False, + "weighted": True, + "download_url": "http://example.com/unexpected_error.edges", + } + ] + ), + ], + ignore_index=True, + ) + + with patch("builtins.open", side_effect=TypeError("Unexpected error")): + config = DatasetConfig( + name="unexpected_error", + source="networkrepository", + params={}, + metadata={"directed": False, "weighted": True}, + ) + graph, metadata = await data_manager.load_network(config) + + assert ( + graph.number_of_nodes() > 0 + ), "We should still get nodes from the fallback parse" + + +@pytest.mark.asyncio +async def test_load_mtx_with_corrected_file_exists(data_manager, tmp_path): + """Test loading a Matrix Market file when a corrected version already exists.""" + corrected_mtx_file = tmp_path / "example_corrected.mtx" + corrected_mtx_content = """%%MatrixMarket matrix coordinate real general +3 3 2 +1 2 1.0 +2 3 2.0 +""" + corrected_mtx_file.write_text(corrected_mtx_content) + + original_mtx_file = tmp_path / "example.mtx" + original_mtx_file.write_text("This file is intentionally incorrect or not used") + + data_manager._metadata_df = pd.concat( + [ + data_manager._metadata_df, + pd.DataFrame( + [{"name": "example_corrected", "directed": False, "weighted": True}] + ), + ], + ignore_index=True, + ) + + corrected_mtx_file.rename(data_manager.data_dir / "example_corrected_corrected.mtx") + original_mtx_file.rename(data_manager.data_dir / "example_corrected.mtx") + + config = DatasetConfig( + name="example_corrected", + source="networkrepository", + params={}, + metadata={"directed": False, "weighted": True}, + ) + + graph, metadata = await data_manager.load_network(config) + + assert graph.number_of_nodes() == 3, "Should load from the corrected file" + assert graph.number_of_edges() == 2 + assert "example_corrected" in data_manager._network_cache, "Should cache the graph" + + +@pytest.mark.asyncio +async def test_load_network_from_cache(data_manager): + data_manager._metadata_df = pd.concat( + [ + data_manager._metadata_df, + pd.DataFrame( + [ + { + "name": "cache_test_graph", + "directed": False, + "weighted": False, + } + ] + ), + ], + ignore_index=True, + ) + + config = DatasetConfig( + name="cache_test_graph", + source="generator", + params={"generator": "networkx.empty_graph", "n": 5}, + metadata={"directed": False, "weighted": False}, + ) + + graph1, metadata1 = await data_manager.load_network(config) + + graph2, metadata2 = await data_manager.load_network(config) + + assert nx.is_isomorphic( + graph1, graph2 + ), "They should have same structure, even if not the same object" + assert metadata1 == metadata2, "Metadata should match for cached graph" + + +def test_load_network_invalid_source_expanded(data_manager): + data_manager._metadata_df = pd.concat( + [ + data_manager._metadata_df, + pd.DataFrame( + [ + { + "name": "invalid_source_expanded", + "directed": False, + "weighted": False, + "download_url": "http://example.com/not_used", + } + ] + ), + ], + ignore_index=True, + ) + + config = DatasetConfig( + name="invalid_source_expanded", + source="doesnotexist", + params={}, + metadata={"directed": False, "weighted": False}, + ) + + with pytest.raises(ValueError, match="Invalid network source: doesnotexist"): + data_manager.load_network_sync(config) + + +def test_load_metadata_failure(): + """Test that a RuntimeError is raised when loading metadata fails.""" + with patch.object( + importlib_resources, "open_text", side_effect=Exception("Mocked exception") + ): + with pytest.raises(RuntimeError, match="Failed to load network metadata"): + _ = BenchmarkDataManager() diff --git a/nxbench/viz/tests/test_app.py b/nxbench/viz/tests/test_app.py index 2eeb9a5..303fbf8 100644 --- a/nxbench/viz/tests/test_app.py +++ b/nxbench/viz/tests/test_app.py @@ -1,5 +1,8 @@ +# test_app.py + from unittest.mock import patch +import pandas as pd import plotly.graph_objects as go import pytest @@ -13,29 +16,33 @@ @pytest.fixture def mock_load_and_prepare_data_return(): """ - Provide a mocked return value for load_and_prepare_data that includes a 3-level - MultiIndex: (algorithm, dataset, backend_full). This way, "dataset" and - "backend_full" - are legitimate levels in df_agg. - - Returns - ------- - tuple - (df, df_agg, group_columns, available_parcats_columns) + Provide a mocked return value for load_and_prepare_data that includes: + - All the columns referenced by `hover_data` in make_violin_figure + - "backend" (or "backend_full") so that the fallback dimension will work + - Enough columns so aggregator code can run """ - import pandas as pd - df = pd.DataFrame( { "algorithm": ["bfs", "bfs", "dfs"], "dataset": ["ds1", "ds2", "ds3"], - "backend_full": ["cpu", "gpu", "cpu"], + "backend_full": ["parallel", "cugraph", "graphblas"], + "backend": ["parallel", "cugraph", "graphblas"], "execution_time_with_preloading": [0.4, 0.5, 0.3], "execution_time": [0.6, 0.7, 0.4], "memory_used": [0.2, 0.1, 0.3], + "num_nodes_bin": [1000, 2000, 3000], + "num_edges_bin": [5000, 6000, 7000], + "is_directed": [False, True, False], + "is_weighted": [False, False, True], + "python_version": ["3.9", "3.9", "3.10"], + "cpu": ["Intel", "AMD", "Intel"], + "os": ["Linux", "Linux", "Windows"], + "num_thread": [1, 2, 4], } ) + df["backend"] = df["backend_full"] + index = pd.MultiIndex.from_tuples( [ ("bfs", "ds1", "cpu"), @@ -55,8 +62,7 @@ def mock_load_and_prepare_data_return(): ) group_columns = ["algorithm", "dataset", "backend_full"] - - available_parcats_columns = ["dataset", "backend_full"] + available_parcats_columns = ["dataset", "backend_full"] # for the parallel cat return (df, df_agg, group_columns, available_parcats_columns) @@ -73,10 +79,7 @@ def mock_load_data_function(mock_load_and_prepare_data_return): def test_app_runs_without_crashing(mock_load_data_function): - """ - Test that the Dash app can be instantiated without errors and without - starting the server. - """ + """Ensure the app can be instantiated (server not actually run).""" try: run_server(debug=False, run=False) except Exception as e: @@ -91,10 +94,10 @@ def test_app_runs_without_crashing(mock_load_data_function): "memory_used", ], ) -def test_make_parallel_categories_figure(color_by, mock_load_data_function): +def test_make_parallel_categories_figure_basic(color_by, mock_load_data_function): """ Test the logic function for building the parallel categories figure with various - color_by parameters. + color_by parameters under normal circumstances. """ df, df_agg, group_columns, available_parcats_columns = ( mock_load_data_function.return_value @@ -106,12 +109,12 @@ def test_make_parallel_categories_figure(color_by, mock_load_data_function): fig, store_data = make_parallel_categories_figure( df, df_agg, group_columns, selected_algorithm, color_by, selected_dimensions ) + assert isinstance(fig, go.Figure) + assert store_data is not None - # Basic checks - assert isinstance(fig, go.Figure), "Expected a Plotly Figure object" - assert store_data is not None, "Expected non-None store_data for hover info." + trace = fig.data[0] + assert isinstance(trace, go.Parcats) - # Check colorbar title logic if color_by == "execution_time": expected_title = "Execution Time (s)" elif color_by == "execution_time_with_preloading": @@ -119,43 +122,158 @@ def test_make_parallel_categories_figure(color_by, mock_load_data_function): else: expected_title = "Memory Used (GB)" - assert len(fig.data) > 0, "Figure has no data traces." - trace = fig.data[0] - assert isinstance( - trace, go.Parcats - ), "Expected the first trace to be a Parcats plot." + assert trace.line.colorbar.title.text == expected_title - actual_title = trace.line.colorbar.title.text - assert ( - actual_title == expected_title - ), f"Colorbar title should be '{expected_title}', got '{actual_title}'" +def test_make_parallel_categories_figure_preloading_column_missing( + mock_load_data_function, +): + """If 'execution_time_with_preloading' is NOT in df.columns, fallback to + mean_execution_time. + """ + df, df_agg, group_columns, _ = mock_load_data_function.return_value + df_no_preload = df.drop(columns=["execution_time_with_preloading"]) + + selected_algorithm = "bfs" + color_by = "execution_time_with_preloading" + selected_dimensions = ["dataset", "backend_full"] -def test_make_violin_figure_no_data(mock_load_data_function): - """Test that make_violin_figure returns a figure with a - "No data available for the selected algorithm." annotation when given a - non-existent algorithm. + fig, store_data = make_parallel_categories_figure( + df_no_preload, + df_agg, + group_columns, + selected_algorithm, + color_by, + selected_dimensions, + ) + assert isinstance(fig, go.Figure) + assert getattr(fig.data[0].line, "colorscale", None), "No colorscale found!" - Parameters - ---------- - mock_load_data_function : pytest.fixture - Fixture that mocks the data loading, ensuring a consistent dataset. + +def test_make_parallel_categories_figure_preloading_agg_keyerror( + mock_load_data_function, +): + """ + If aggregator's .xs(...) fails for BFS, we fallback to mean_execution_time. + We'll force that by removing BFS from df_agg so `.xs('bfs')` triggers KeyError. """ + df, df_agg, group_columns, _ = mock_load_data_function.return_value + + df_agg_no_bfs = df_agg.drop(labels="bfs", level="algorithm") + + selected_algorithm = "bfs" + color_by = "execution_time_with_preloading" + selected_dimensions = ["dataset", "backend_full"] + + fig, store_data = make_parallel_categories_figure( + df, + df_agg_no_bfs, + group_columns, + selected_algorithm, + color_by, + selected_dimensions, + ) + + assert isinstance(fig, go.Figure) + assert not store_data, "We expect an empty store_data due to KeyError fallback." + + +def test_make_violin_figure_empty_df(mock_load_data_function): + """If the .xs(...) yields an empty DataFrame, we get "No data available" figure.""" df, df_agg, group_columns, available_parcats_columns = ( mock_load_data_function.return_value ) + df_agg_empty = df_agg.drop(labels="bfs", level="algorithm") + + selected_algorithm = "bfs" + fig = make_violin_figure( + df, + df_agg_empty, + selected_algorithm, + "execution_time", + ["dataset", "backend_full"], + ) + assert fig.layout.annotations + assert any("No data available" in ann["text"] for ann in fig.layout.annotations) - selected_algorithm = "fakealgo" + +@pytest.mark.parametrize( + ("color_by", "expected_y_metric"), + [ + ("execution_time", "mean_execution_time"), + ("execution_time_with_preloading", "mean_execution_time_with_preloading"), + ("memory_used", "mean_memory_used"), + ], +) +def test_make_violin_figure_color_by( + color_by, expected_y_metric, mock_load_data_function +): + df, df_agg, group_columns, available_parcats_columns = ( + mock_load_data_function.return_value + ) + df_agg = df_agg.reset_index() + df_agg["num_nodes_bin"] = [1000, 2000, 3000] + df_agg["num_edges_bin"] = [5000, 6000, 7000] + df_agg["is_directed"] = [False, True, False] + df_agg["is_weighted"] = [False, False, True] + df_agg["python_version"] = ["3.9", "3.9", "3.10"] + df_agg["cpu"] = ["Intel", "AMD", "Intel"] + df_agg["os"] = ["Linux", "Linux", "Windows"] + df_agg["num_thread"] = [1, 2, 4] + df_agg.set_index(["algorithm", "dataset", "backend_full"], inplace=True) + + selected_algorithm = "bfs" + fig = make_violin_figure( + df, + df_agg, + selected_algorithm, + color_by, + ["dataset", "backend_full"], + ) + assert isinstance(fig, go.Figure) + + +def test_make_violin_figure_dimension_fallback(mock_load_data_function): + """ + If the chosen dimension is missing, fallback to "backend" or "backend_full". + We'll just ensure it doesn't crash now that 'backend' exists. + """ + df, df_agg, group_columns, available_parcats_columns = ( + mock_load_data_function.return_value + ) + df_agg = df_agg.reset_index() + + df_agg["num_nodes_bin"] = [1000, 2000, 3000] + df_agg["num_edges_bin"] = [5000, 6000, 7000] + df_agg["is_directed"] = [False, True, False] + df_agg["is_weighted"] = [False, False, True] + df_agg["python_version"] = ["3.9", "3.9", "3.10"] + df_agg["cpu"] = ["Intel", "AMD", "Intel"] + df_agg["os"] = ["Linux", "Linux", "Windows"] + df_agg["num_thread"] = [1, 2, 4] + + df_agg["backend"] = df_agg["backend_full"] + + df_agg.set_index(["algorithm", "dataset", "backend_full"], inplace=True) + + selected_algorithm = "bfs" color_by = "execution_time" - selected_dimensions = ["dataset", "backend_full"] + selected_dimensions = ["foo_dimension"] # doesn't exist fig = make_violin_figure( df, df_agg, selected_algorithm, color_by, selected_dimensions ) + assert isinstance(fig, go.Figure) + assert not fig.layout.annotations - assert fig.layout.annotations, "Expected annotations in the layout for no data." - expected_text = "No data available for the selected algorithm." - assert any( - expected_text in ann["text"] for ann in fig.layout.annotations - ), f"Could not find '{expected_text}' annotation in the figure." +def test_make_violin_figure_no_data_for_algorithm(mock_load_data_function): + """If the algorithm doesn't exist, KeyError => "No data available" annotation.""" + df, df_agg, group_columns, available_parcats_columns = ( + mock_load_data_function.return_value + ) + fig = make_violin_figure( + df, df_agg, "fakealgo", "execution_time", ["dataset", "backend_full"] + ) + assert fig.layout.annotations + assert any("No data available" in ann["text"] for ann in fig.layout.annotations)