From 3aee58821d85fe1e3847602689ff020fc1d928af Mon Sep 17 00:00:00 2001 From: dpys Date: Thu, 12 Dec 2024 16:15:02 -0500 Subject: [PATCH 1/7] feat: add mtx file de-corruptor --- nxbench/benchmarks/benchmark.py | 10 ++- nxbench/configs/all.yaml | 32 +++------ nxbench/configs/dummy.yaml | 52 ++++++++------ nxbench/data/loader.py | 48 ++++++++++--- nxbench/data/repository.py | 5 +- nxbench/data/utils.py | 116 ++++++++++++++++++++++++++++++++ nxbench/validation/registry.py | 5 ++ 7 files changed, 205 insertions(+), 63 deletions(-) diff --git a/nxbench/benchmarks/benchmark.py b/nxbench/benchmarks/benchmark.py index cb2a74a..13de097 100644 --- a/nxbench/benchmarks/benchmark.py +++ b/nxbench/benchmarks/benchmark.py @@ -61,18 +61,16 @@ def make_benchmark_method(algo_config, dataset_name, backend, num_thread): and number of threads combination. """ algo_name = algo_config.name - orig_dataset_name = dataset_name - method_name = f"track_{algo_name}_{orig_dataset_name}_{backend}_{num_thread}" + safe_dataset_name = dataset_name.replace("-", "_") + method_name = f"track_{algo_name}_{safe_dataset_name}_{backend}_{num_thread}" def track_method(self): """Run benchmark and return metrics for the unique combination.""" logger.debug( f"Starting track_method for {method_name} with backend={backend}, " - f"threads={num_thread}, dataset={orig_dataset_name}" - ) - metrics = self.do_benchmark( - algo_config, orig_dataset_name, backend, num_thread + f"threads={num_thread}, dataset={dataset_name}" ) + metrics = self.do_benchmark(algo_config, dataset_name, backend, num_thread) logger.debug(f"Track {method_name} results: {metrics}") return metrics diff --git a/nxbench/configs/all.yaml b/nxbench/configs/all.yaml index 9b765c5..3bd1fd8 100644 --- a/nxbench/configs/all.yaml +++ b/nxbench/configs/all.yaml @@ -160,41 +160,25 @@ algorithms: # min_rounds: 3 datasets: - - name: "twitter" - source: "networkrepository" - params: {} - - name: "08blocks" source: "networkrepository" params: {} - - name: "amazon" - source: "networkrepository" - params: {} - - - name: "google" - source: "networkrepository" - params: {} - - name: "enron" source: "networkrepository" params: {} - - name: "citationCiteseer" - source: "networkrepository" - params: {} + # - name: "citationCiteseer" + # source: "networkrepository" + # params: {} - name: "karate" source: "networkrepository" params: {} - - name: "netscience" - source: "networkrepository" - params: {} - - - name: "email-Eu-core" - source: "networkrepository" - params: {} + # - name: "ca-netscience" + # source: "networkrepository" + # params: {} - name: "jazz" source: "networkrepository" @@ -295,7 +279,7 @@ matrix: - "networkx" - "parallel" - "graphblas" - - "cugraph" + # - "cugraph" num_threads: - "1" - "2" @@ -306,7 +290,7 @@ env_config: - "networkx==3.4.2" - "nx-parallel-0.3rc0.dev0" - "graphblas_algorithms==2023.10.0" - - "nx-cugraph_cu11==24.8.0" + # - "nx-cugraph_cu11==24.8.0" pythons: - "3.10" - "3.11" diff --git a/nxbench/configs/dummy.yaml b/nxbench/configs/dummy.yaml index 9258547..9377da4 100644 --- a/nxbench/configs/dummy.yaml +++ b/nxbench/configs/dummy.yaml @@ -1,25 +1,34 @@ algorithms: - - name: "pagerank" - func: "networkx.pagerank" - params: - alpha: 0.9 - tol: 1.0e-6 + - name: "average_clustering" + func: "networkx.average_clustering" + params: {} requires_directed: false - groups: ["centrality", "random_walk"] - min_rounds: 10 - warmup: true - warmup_iterations: 50 + groups: ["clustering", "graph_structure"] + min_rounds: 3 + validate_result: "nxbench.validation.validate_scalar_result" datasets: - - name: "erdos_renyi_small" - source: "generator" - params: - generator: "networkx.erdos_renyi_graph" - n: 1000 - p: 0.01 - metadata: - directed: false - weighted: false + # - name: "erdos_renyi_small" + # source: "generator" + # params: + # generator: "networkx.erdos_renyi_graph" + # n: 1000 + # p: 0.01 + # metadata: + # directed: false + # weighted: false + + - name: "enron" + source: "networkrepository" + params: {} + + - name: "citationCiteseer" + source: "networkrepository" + params: {} + + # - name: "ca-netscience" + # source: "networkrepository" + # params: {} validation: skip_slow: false @@ -35,9 +44,10 @@ matrix: - "1" env_config: - repo: "https://github.com/dpys/nxbench.git" - branches: - - "main" req: - "networkx==3.4.2" + - "nx-parallel-0.3rc0.dev0" - "graphblas_algorithms==2023.10.0" + pythons: + - "3.10" + - "3.11" diff --git a/nxbench/data/loader.py b/nxbench/data/loader.py index 86160a4..99d6b62 100644 --- a/nxbench/data/loader.py +++ b/nxbench/data/loader.py @@ -14,6 +14,7 @@ from nxbench.benchmarks.config import DatasetConfig from nxbench.data.synthesize import generate_graph +from nxbench.data.utils import fix_matrix_market_file warnings.filterwarnings("ignore") @@ -36,7 +37,7 @@ def __init__(self, data_dir: str | Path | None = None): self._metadata_df = self._load_metadata() def _normalize_name(self, name: str) -> str: - return name.lower().replace("-", "_") + return name.lower().strip().replace("-", "_") def _load_metadata(self) -> pd.DataFrame: try: @@ -55,7 +56,15 @@ def get_metadata(self, name: str) -> dict[str, Any]: normalized_name = self._normalize_name(name) network = self._metadata_df[self._metadata_df["name"] == normalized_name] if len(network) == 0: - raise ValueError(f"Network {name} not found in metadata cache") + logger.warning( + f"Network {name} not found in metadata cache. Returning dummy metadata." + ) + return { + "name": name, + "download_url": None, + "directed": False, + "weighted": False, + } return network.iloc[0].to_dict() async def load_network( @@ -117,8 +126,32 @@ def check_graph_validity(graph, file_path): suffix = graph_file.suffix.lower() if suffix == ".mtx": logger.info(f"Loading Matrix Market file from {graph_file}") + graph_path = Path(graph_file) + corrected_file = graph_path.with_name( + f"{graph_path.stem}_corrected{graph_path.suffix}" + ) + try: - sparse_matrix = mmread(graph_file) + # check if the corrected file already exists + if corrected_file.exists(): + logger.info( + f"Using existing corrected Matrix Market file: " + f"{corrected_file}" + ) + sparse_matrix = mmread(corrected_file) + else: + try: + # attempt to read the original file + sparse_matrix = mmread(graph_file) + except Exception: + logger.info(f"Fixing Matrix Market file: {graph_file}") + # fix the file and load the corrected version + corrected_file = fix_matrix_market_file(graph_path) + sparse_matrix = mmread(corrected_file) + except Exception: + logger.exception(f"Failed to load Matrix Market file {graph_file}") + raise + else: graph = nx.from_scipy_sparse_array( sparse_matrix, create_using=( @@ -127,13 +160,8 @@ def check_graph_validity(graph, file_path): else nx.Graph() ), ) - check_graph_validity(graph, graph_file) - except ValueError: - logger.exception(f"Failed to load Matrix Market file {graph_file}") - raise ValueError("Matrix Market file not in expected format") - except Exception: - logger.exception(f"Failed to load Matrix Market file {graph_file}") - raise + graph.graph.update(metadata) + return graph elif suffix in [".edgelist", ".edges"]: create_using = ( nx.DiGraph() if metadata.get("directed", False) else nx.Graph() diff --git a/nxbench/data/repository.py b/nxbench/data/repository.py index a45b70a..15c0ffc 100644 --- a/nxbench/data/repository.py +++ b/nxbench/data/repository.py @@ -14,6 +14,7 @@ from urllib.parse import urljoin import aiofiles +import aiofiles.os import aiohttp import chardet from aiohttp import ClientSession, ClientTimeout @@ -176,7 +177,7 @@ async def __aexit__(self, exc_type, exc, tb): async def _fetch_text( self, url: str, method: str = "GET", retries: int = 3, **kwargs - ) -> str: + ) -> str | None: """Fetch the text content of a URL using aiohttp with retries and robust encoding handling. """ @@ -252,7 +253,7 @@ async def _fetch_text( async def _fetch_response( self, url: str, method: str = "GET", retries: int = 3, **kwargs - ) -> aiohttp.ClientResponse: + ) -> aiohttp.ClientResponse | None: """Fetch the response object of a URL using aiohttp with retries.""" if not self.session: raise RuntimeError("HTTP session is not initialized.") diff --git a/nxbench/data/utils.py b/nxbench/data/utils.py index bc00b5a..d8ee935 100644 --- a/nxbench/data/utils.py +++ b/nxbench/data/utils.py @@ -60,3 +60,119 @@ def safe_extract(filepath, extracted_path): if name.startswith("/") or ".." in name: raise ValueError(f"Malicious path in archive: {name}") zf.extractall(extracted_path) + + +def fix_matrix_market_file(in_path: Path) -> Path: + if not in_path.exists() or not in_path.is_file(): + raise FileNotFoundError( + f"Input file '{in_path!s}' does not exist or is not a file." + ) + + with in_path.open("r") as f: + lines = [line.rstrip("\n") for line in f] + + header_index = None + for i, line in enumerate(lines): + if line.startswith("%%MatrixMarket"): + header_index = i + break + + if header_index is None: + raise ValueError("No %%MatrixMarket header line found.") + + header_line = lines[header_index] + if "coordinate" not in header_line: + raise ValueError( + "This fix only applies to coordinate format Matrix Market files." + ) + + symmetric = "symmetric" in header_line.lower() + content_lines = lines[header_index + 1 :] + + non_comment_lines = [ln for ln in content_lines if ln and not ln.startswith("%")] + + if not non_comment_lines: + raise ValueError("No dimension or data lines found after header and comments.") + + dimension_line = non_comment_lines[0] + parts = dimension_line.split() + + out_file_path = in_path.with_name(f"{in_path.stem}_corrected{in_path.suffix}") + + if len(parts) == 3: + out_file_path.write_text("\n".join(lines) + "\n") + return out_file_path + + if len(parts) < 2: + raise ValueError( + f"Dimension line '{dimension_line}' does not have enough integers." + ) + + data_lines = non_comment_lines[1:] + if not data_lines: + raise ValueError("No data lines found; cannot infer NNZ, M, N.") + + # parse data lines to determine M, N, and NNZ + max_row = 0 + max_col = 0 + NNZ = 0 + for line in data_lines: + coords = line.split() + if len(coords) < 2: + raise ValueError(f"Data line '{line}' does not have two coordinates.") + + r, c = map(int, coords[:2]) # row and col are 1-based + if r > max_row: + max_row = r + if c > max_col: + max_col = c + NNZ += 1 + + # infer M and N from max indices + M = max_row + N = max_col + + # if symmetric and not square, make it square by taking max dimension + if symmetric and M != N: + dim = max(M, N) + M = dim + N = dim + + # construct corrected dimension line + corrected_dimension_line = f"{M} {N} {NNZ}" + + # extract comment lines after header and before dimension line: + after_header = lines[header_index + 1 :] + dim_line_index_in_after = None + for idx, val in enumerate(after_header): + if val.strip() == dimension_line: + dim_line_index_in_after = idx + break + + if dim_line_index_in_after is None: + raise ValueError( + "Could not locate dimension line in the file after header. File may be " + "malformed." + ) + + # comment lines before dimension line: + comment_lines_before_dim = [] + for val in after_header[:dim_line_index_in_after]: + if val.startswith("%"): + comment_lines_before_dim.append(val) + elif not val.strip(): + pass + + with out_file_path.open("w") as out_f: + for i in range(header_index + 1): + out_f.write(lines[i] + "\n") + + for cl in comment_lines_before_dim: + out_f.write(cl + "\n") + + out_f.write(corrected_dimension_line + "\n") + + for dl in data_lines: + out_f.write(dl + "\n") + + return out_file_path diff --git a/nxbench/validation/registry.py b/nxbench/validation/registry.py index 13bd938..2246634 100644 --- a/nxbench/validation/registry.py +++ b/nxbench/validation/registry.py @@ -112,6 +112,11 @@ class ValidationRegistry: params={}, expected_type=dict, ), + "average_clustering": ValidationConfig( + validator=validate_node_scores, + params={"require_normalized": False}, + expected_type=dict, + ), "square_clustering": ValidationConfig( validator=validate_node_scores, params={"require_normalized": False}, From 04483f64d7bfb4db73a051e28c398c5652db6e28 Mon Sep 17 00:00:00 2001 From: dpys Date: Thu, 12 Dec 2024 16:19:01 -0500 Subject: [PATCH 2/7] rebase: main --- nxbench/configs/dummy.yaml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/nxbench/configs/dummy.yaml b/nxbench/configs/dummy.yaml index 9377da4..e2b40ce 100644 --- a/nxbench/configs/dummy.yaml +++ b/nxbench/configs/dummy.yaml @@ -18,11 +18,15 @@ datasets: # directed: false # weighted: false - - name: "enron" - source: "networkrepository" - params: {} + # - name: "enron" + # source: "networkrepository" + # params: {} + + # - name: "citationCiteseer" + # source: "networkrepository" + # params: {} - - name: "citationCiteseer" + - name: "patentcite" source: "networkrepository" params: {} From ced9b309c9179a9575ff9bf5b1c8fde105fe9636 Mon Sep 17 00:00:00 2001 From: dpys Date: Thu, 12 Dec 2024 17:40:22 -0500 Subject: [PATCH 3/7] feat: add dynamic delimiter discovery --- nxbench/configs/all.yaml | 16 ++++++--------- nxbench/configs/dummy.yaml | 34 ++++++++----------------------- nxbench/data/loader.py | 30 ++++++++++++++++----------- nxbench/data/tests/test_loader.py | 26 ++++++++++++++--------- nxbench/data/utils.py | 23 +++++++++++++++++++++ 5 files changed, 72 insertions(+), 57 deletions(-) diff --git a/nxbench/configs/all.yaml b/nxbench/configs/all.yaml index 3bd1fd8..362bea7 100644 --- a/nxbench/configs/all.yaml +++ b/nxbench/configs/all.yaml @@ -160,30 +160,26 @@ algorithms: # min_rounds: 3 datasets: - - name: "08blocks" + - name: "enron" source: "networkrepository" params: {} - - name: "enron" + - name: "citationCiteseer" source: "networkrepository" params: {} - # - name: "citationCiteseer" - # source: "networkrepository" - # params: {} - - name: "karate" source: "networkrepository" params: {} + - name: "patentcite" + source: "networkrepository" + params: {} + # - name: "ca-netscience" # source: "networkrepository" # params: {} - - name: "jazz" - source: "networkrepository" - params: {} - - name: "erdos_renyi_small" source: "generator" params: diff --git a/nxbench/configs/dummy.yaml b/nxbench/configs/dummy.yaml index e2b40ce..309b939 100644 --- a/nxbench/configs/dummy.yaml +++ b/nxbench/configs/dummy.yaml @@ -8,31 +8,15 @@ algorithms: validate_result: "nxbench.validation.validate_scalar_result" datasets: - # - name: "erdos_renyi_small" - # source: "generator" - # params: - # generator: "networkx.erdos_renyi_graph" - # n: 1000 - # p: 0.01 - # metadata: - # directed: false - # weighted: false - - # - name: "enron" - # source: "networkrepository" - # params: {} - - # - name: "citationCiteseer" - # source: "networkrepository" - # params: {} - - - name: "patentcite" - source: "networkrepository" - params: {} - - # - name: "ca-netscience" - # source: "networkrepository" - # params: {} + - name: "erdos_renyi_small" + source: "generator" + params: + generator: "networkx.erdos_renyi_graph" + n: 1000 + p: 0.01 + metadata: + directed: false + weighted: false validation: skip_slow: false diff --git a/nxbench/data/loader.py b/nxbench/data/loader.py index 99d6b62..c8b0ac4 100644 --- a/nxbench/data/loader.py +++ b/nxbench/data/loader.py @@ -14,7 +14,7 @@ from nxbench.benchmarks.config import DatasetConfig from nxbench.data.synthesize import generate_graph -from nxbench.data.utils import fix_matrix_market_file +from nxbench.data.utils import detect_delimiter, fix_matrix_market_file warnings.filterwarnings("ignore") @@ -56,27 +56,19 @@ def get_metadata(self, name: str) -> dict[str, Any]: normalized_name = self._normalize_name(name) network = self._metadata_df[self._metadata_df["name"] == normalized_name] if len(network) == 0: - logger.warning( - f"Network {name} not found in metadata cache. Returning dummy metadata." - ) - return { - "name": name, - "download_url": None, - "directed": False, - "weighted": False, - } + raise ValueError(f"Network {name} not found in metadata cache") return network.iloc[0].to_dict() async def load_network( self, config: DatasetConfig, session: aiohttp.ClientSession | None = None ) -> tuple[nx.Graph | nx.DiGraph, dict[str, Any]]: """Load or generate a network based on config.""" + metadata = self.get_metadata(config.name) source_lower = config.source.lower() if source_lower == "generator": return self._generate_graph(config) - metadata = self.get_metadata(config.name) if config.name in self._network_cache: logger.debug(f"Loading network '{config.name}' from cache") return self._network_cache[config.name] @@ -150,7 +142,7 @@ def check_graph_validity(graph, file_path): sparse_matrix = mmread(corrected_file) except Exception: logger.exception(f"Failed to load Matrix Market file {graph_file}") - raise + raise ValueError("Matrix Market file not in expected format") else: graph = nx.from_scipy_sparse_array( sparse_matrix, @@ -163,6 +155,15 @@ def check_graph_validity(graph, file_path): graph.graph.update(metadata) return graph elif suffix in [".edgelist", ".edges"]: + try: + delimiter = detect_delimiter(graph_file) + logger.debug(f"Detected delimiter: '{delimiter}'") + except Exception: + logger.debug( + "No valid delimiter found, falling back to whitespace split" + ) + delimiter = " " + create_using = ( nx.DiGraph() if metadata.get("directed", False) else nx.Graph() ) @@ -207,6 +208,7 @@ def edge_parser(): ) graph = nx.read_edgelist( edge_iter, + delimiter=delimiter, nodetype=str, create_using=create_using, data=False, @@ -222,6 +224,7 @@ def edge_parser(): ) graph = nx.read_edgelist( edge_iter, + delimiter=delimiter, nodetype=str, create_using=create_using, data=False, @@ -239,6 +242,7 @@ def edge_parser(): ) graph = nx.read_edgelist( edge_iter, + delimiter=delimiter, nodetype=str, create_using=create_using, data=False, @@ -273,6 +277,8 @@ def edge_parser(): raise else: graph.graph.update(metadata) + if graph.number_of_edges() == 0: + raise ValueError(f"Graph file {graph_file} contains no valid edges.") logger.info(f"Loaded network from '{graph_file}' successfully.") return graph diff --git a/nxbench/data/tests/test_loader.py b/nxbench/data/tests/test_loader.py index b820fd9..6d62799 100644 --- a/nxbench/data/tests/test_loader.py +++ b/nxbench/data/tests/test_loader.py @@ -209,7 +209,7 @@ async def test_load_unweighted_with_comments(data_manager, create_edge_file): graph, (nx.Graph, nx.DiGraph) ), "Graph should be NetworkX Graph or DiGraph" assert graph.number_of_nodes() == 4, "Graph should have 4 nodes" - assert graph.number_of_edges() == 3, "Graph should have 3 edges" + assert graph.number_of_edges() == 2, "Graph should have 2 edges" for u, v, data in graph.edges(data=True): assert "weight" not in data, f"Edge ({u}, {v}) should not have a 'weight'" @@ -629,15 +629,21 @@ async def test_generate_graph_exception(data_manager): metadata={"directed": False, "weighted": False}, ) - with patch( - "nxbench.data.loader.generate_graph", side_effect=Exception("Generator failed") - ) as mock_generate_graph: - with pytest.raises(Exception, match="Generator failed"): - await data_manager.load_network(config) - - mock_generate_graph.assert_called_once_with( - "networkx.invalid_generator", {"n": 100, "p": 0.1}, False - ) + with patch.object( + data_manager, + "get_metadata", + return_value={"directed": False, "weighted": False}, + ): + with patch( + "nxbench.data.loader.generate_graph", + side_effect=Exception("Generator failed"), + ) as mock_generate_graph: + with pytest.raises(Exception, match="Generator failed"): + await data_manager.load_network(config) + + mock_generate_graph.assert_called_once_with( + "networkx.invalid_generator", {"n": 100, "p": 0.1}, False + ) def test_generate_graph_missing_generator_name(data_manager): diff --git a/nxbench/data/utils.py b/nxbench/data/utils.py index d8ee935..0ea4650 100644 --- a/nxbench/data/utils.py +++ b/nxbench/data/utils.py @@ -1,5 +1,6 @@ import re import zipfile +from collections import Counter from pathlib import Path import networkx as nx @@ -176,3 +177,25 @@ def fix_matrix_market_file(in_path: Path) -> Path: out_f.write(dl + "\n") return out_file_path + + +def detect_delimiter(file_path: Path, sample_size: int = 5) -> str: + """Detect the most common delimiter in the first few lines of a file.""" + delimiters = [",", "\t", " ", ";"] + delimiter_counts = Counter() + + with file_path.open("r") as f: + for i, line in enumerate(f): + if i >= sample_size: + break + line = line.strip() + if not line or line.startswith(("#", "%")): + continue + for delimiter in delimiters: + if delimiter in line: + delimiter_counts[delimiter] += line.count(delimiter) + + if delimiter_counts: + return delimiter_counts.most_common(1)[0][0] + + raise ValueError("No valid delimiter found in the file.") From aaa0a171abafcbc1e756fe6f6e85353e31f12500 Mon Sep 17 00:00:00 2001 From: dpys Date: Thu, 12 Dec 2024 17:42:52 -0500 Subject: [PATCH 4/7] feat: add dynamic delimiter discovery --- nxbench/data/loader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nxbench/data/loader.py b/nxbench/data/loader.py index c8b0ac4..f0ad4e0 100644 --- a/nxbench/data/loader.py +++ b/nxbench/data/loader.py @@ -63,12 +63,13 @@ async def load_network( self, config: DatasetConfig, session: aiohttp.ClientSession | None = None ) -> tuple[nx.Graph | nx.DiGraph, dict[str, Any]]: """Load or generate a network based on config.""" - metadata = self.get_metadata(config.name) source_lower = config.source.lower() if source_lower == "generator": return self._generate_graph(config) + metadata = self.get_metadata(config.name) + if config.name in self._network_cache: logger.debug(f"Loading network '{config.name}' from cache") return self._network_cache[config.name] From 1fee8939fcd50bb9b9dd82546d6fe89f269bf0bf Mon Sep 17 00:00:00 2001 From: dpys Date: Thu, 12 Dec 2024 22:18:21 -0500 Subject: [PATCH 5/7] feat: check=False --- nxbench/cli.py | 4 +- nxbench/configs/asv.conf.json | 2 +- nxbench/configs/{all.yaml => example2.yaml} | 136 ++++++++++---------- 3 files changed, 69 insertions(+), 73 deletions(-) rename nxbench/configs/{all.yaml => example2.yaml} (75%) diff --git a/nxbench/cli.py b/nxbench/cli.py index e611819..aeb591a 100644 --- a/nxbench/cli.py +++ b/nxbench/cli.py @@ -110,7 +110,7 @@ def get_latest_commit_hash(github_url: str) -> str: def safe_run( cmd: Sequence[str | Path], - check: bool = True, + check: bool = False, capture_output: bool = False, **kwargs, ) -> subprocess.CompletedProcess: @@ -121,7 +121,7 @@ def safe_run( ---------- cmd : Sequence[str | Path] The command and arguments to execute. - check : bool, default=True + check : bool, default=False If True, raise an exception if the command fails. capture_output : bool, default=False If True, capture stdout and stderr. diff --git a/nxbench/configs/asv.conf.json b/nxbench/configs/asv.conf.json index 1f76a1b..d4c0b4f 100644 --- a/nxbench/configs/asv.conf.json +++ b/nxbench/configs/asv.conf.json @@ -8,7 +8,7 @@ "main" ], "repo": "https://github.com/dpys/nxbench", - "environment_type": "conda", + "environment_type": "virtualenv", "show_commit_url": "https://github.com/dpys/nxbench/commit/", "matrix": {}, "benchmark_dir": "nxbench/benchmarks", diff --git a/nxbench/configs/all.yaml b/nxbench/configs/example2.yaml similarity index 75% rename from nxbench/configs/all.yaml rename to nxbench/configs/example2.yaml index 362bea7..db121c2 100644 --- a/nxbench/configs/all.yaml +++ b/nxbench/configs/example2.yaml @@ -19,17 +19,17 @@ algorithms: warmup_iterations: 20 validate_result: "nxbench.validation.validate_node_scores" - - name: "betweenness_centrality" - func: "networkx.betweenness_centrality" - params: - normalized: true - endpoints: false - requires_directed: false - groups: ["centrality", "path_based"] - min_rounds: 5 - warmup: true - warmup_iterations: 20 - validate_result: "nxbench.validation.validate_node_scores" + # - name: "betweenness_centrality" + # func: "networkx.betweenness_centrality" + # params: + # normalized: true + # endpoints: false + # requires_directed: false + # groups: ["centrality", "path_based"] + # min_rounds: 5 + # warmup: true + # warmup_iterations: 20 + # validate_result: "nxbench.validation.validate_node_scores" # - name: "edge_betweenness_centrality" # func: "networkx.edge_betweenness_centrality" @@ -65,13 +65,13 @@ algorithms: min_rounds: 3 validate_result: "nxbench.validation.validate_node_scores" - - name: "transitivity" - func: "networkx.transitivity" - params: {} - requires_directed: false - groups: ["clustering", "graph_structure"] - min_rounds: 3 - validate_result: "nxbench.validation.validate_scalar_result" + # - name: "transitivity" + # func: "networkx.transitivity" + # params: {} + # requires_directed: false + # groups: ["clustering", "graph_structure"] + # min_rounds: 3 + # validate_result: "nxbench.validation.validate_scalar_result" # - name: "all_pairs_node_connectivity" # func: "networkx.algorithms.connectivity.connectivity.all_pairs_node_connectivity" @@ -160,23 +160,19 @@ algorithms: # min_rounds: 3 datasets: - - name: "enron" - source: "networkrepository" - params: {} - - - name: "citationCiteseer" - source: "networkrepository" - params: {} + # - name: "enron" + # source: "networkrepository" + # params: {} - - name: "karate" - source: "networkrepository" - params: {} + # - name: "citationCiteseer" + # source: "networkrepository" + # params: {} - - name: "patentcite" - source: "networkrepository" - params: {} + # - name: "karate" + # source: "networkrepository" + # params: {} - # - name: "ca-netscience" + # - name: "patentcite" # source: "networkrepository" # params: {} @@ -190,47 +186,47 @@ datasets: directed: false weighted: false - - name: "watts_strogatz_small" - source: "generator" - params: - generator: "networkx.watts_strogatz_graph" - n: 10000 - k: 6 - p: 0.1 - metadata: - directed: false - weighted: false + # - name: "watts_strogatz_small" + # source: "generator" + # params: + # generator: "networkx.watts_strogatz_graph" + # n: 10000 + # k: 6 + # p: 0.1 + # metadata: + # directed: false + # weighted: false - - name: "barabasi_albert_small" - source: "generator" - params: - generator: "networkx.barabasi_albert_graph" - n: 1000 - m: 3 - metadata: - directed: false - weighted: false + # - name: "barabasi_albert_small" + # source: "generator" + # params: + # generator: "networkx.barabasi_albert_graph" + # n: 1000 + # m: 3 + # metadata: + # directed: false + # weighted: false - - name: "powerlaw_cluster_small" - source: "generator" - params: - generator: "networkx.powerlaw_cluster_graph" - n: 1000 - m: 2 - p: 0.1 - metadata: - directed: false - weighted: false + # - name: "powerlaw_cluster_small" + # source: "generator" + # params: + # generator: "networkx.powerlaw_cluster_graph" + # n: 1000 + # m: 2 + # p: 0.1 + # metadata: + # directed: false + # weighted: false - - name: "erdos_renyi_small" - source: "generator" - params: - generator: "networkx.erdos_renyi_graph" - n: 1000 - p: 0.01 - metadata: - directed: false - weighted: false + # - name: "erdos_renyi_small" + # source: "generator" + # params: + # generator: "networkx.erdos_renyi_graph" + # n: 1000 + # p: 0.01 + # metadata: + # directed: false + # weighted: false # - name: "watts_strogatz_large" # source: "generator" From e9dd1ec3ebdb2bc7483eacfca8cfca6bfb4750e7 Mon Sep 17 00:00:00 2001 From: dpys Date: Thu, 12 Dec 2024 22:20:14 -0500 Subject: [PATCH 6/7] feat: check=False --- nxbench/configs/example2.yaml | 62 +++++++++++++++++------------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/nxbench/configs/example2.yaml b/nxbench/configs/example2.yaml index db121c2..1a33ccf 100644 --- a/nxbench/configs/example2.yaml +++ b/nxbench/configs/example2.yaml @@ -65,13 +65,13 @@ algorithms: min_rounds: 3 validate_result: "nxbench.validation.validate_node_scores" - # - name: "transitivity" - # func: "networkx.transitivity" - # params: {} - # requires_directed: false - # groups: ["clustering", "graph_structure"] - # min_rounds: 3 - # validate_result: "nxbench.validation.validate_scalar_result" + - name: "transitivity" + func: "networkx.transitivity" + params: {} + requires_directed: false + groups: ["clustering", "graph_structure"] + min_rounds: 3 + validate_result: "nxbench.validation.validate_scalar_result" # - name: "all_pairs_node_connectivity" # func: "networkx.algorithms.connectivity.connectivity.all_pairs_node_connectivity" @@ -160,21 +160,21 @@ algorithms: # min_rounds: 3 datasets: - # - name: "enron" - # source: "networkrepository" - # params: {} + - name: "enron" + source: "networkrepository" + params: {} - # - name: "citationCiteseer" - # source: "networkrepository" - # params: {} + - name: "citationCiteseer" + source: "networkrepository" + params: {} - # - name: "karate" - # source: "networkrepository" - # params: {} + - name: "karate" + source: "networkrepository" + params: {} - # - name: "patentcite" - # source: "networkrepository" - # params: {} + - name: "patentcite" + source: "networkrepository" + params: {} - name: "erdos_renyi_small" source: "generator" @@ -186,16 +186,16 @@ datasets: directed: false weighted: false - # - name: "watts_strogatz_small" - # source: "generator" - # params: - # generator: "networkx.watts_strogatz_graph" - # n: 10000 - # k: 6 - # p: 0.1 - # metadata: - # directed: false - # weighted: false + - name: "watts_strogatz_small" + source: "generator" + params: + generator: "networkx.watts_strogatz_graph" + n: 1000 + k: 6 + p: 0.1 + metadata: + directed: false + weighted: false # - name: "barabasi_albert_small" # source: "generator" @@ -218,11 +218,11 @@ datasets: # directed: false # weighted: false - # - name: "erdos_renyi_small" + # - name: "erdos_renyi_large" # source: "generator" # params: # generator: "networkx.erdos_renyi_graph" - # n: 1000 + # n: 10000 # p: 0.01 # metadata: # directed: false From f0ce92aac134edacc66f1c352f26c8f3d0a24e8f Mon Sep 17 00:00:00 2001 From: dpys Date: Fri, 13 Dec 2024 00:19:33 -0500 Subject: [PATCH 7/7] feat: update example config 2 --- nxbench/configs/asv.conf.json | 2 +- nxbench/configs/example2.yaml | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/nxbench/configs/asv.conf.json b/nxbench/configs/asv.conf.json index d4c0b4f..947501e 100644 --- a/nxbench/configs/asv.conf.json +++ b/nxbench/configs/asv.conf.json @@ -5,7 +5,7 @@ "project_url": "https://github.com/dpys/nxbench", "dvcs": "git", "branches": [ - "main" + "cli-gotchas" ], "repo": "https://github.com/dpys/nxbench", "environment_type": "virtualenv", diff --git a/nxbench/configs/example2.yaml b/nxbench/configs/example2.yaml index 1a33ccf..03ad96e 100644 --- a/nxbench/configs/example2.yaml +++ b/nxbench/configs/example2.yaml @@ -160,21 +160,21 @@ algorithms: # min_rounds: 3 datasets: - - name: "enron" - source: "networkrepository" - params: {} + # - name: "enron" + # source: "networkrepository" + # params: {} - - name: "citationCiteseer" - source: "networkrepository" - params: {} + # - name: "citationCiteseer" + # source: "networkrepository" + # params: {} - name: "karate" source: "networkrepository" params: {} - - name: "patentcite" - source: "networkrepository" - params: {} + # - name: "patentcite" + # source: "networkrepository" + # params: {} - name: "erdos_renyi_small" source: "generator"