Skip to content

Commit

Permalink
Merge pull request #17 from dPys/cli-gotchas
Browse files Browse the repository at this point in the history
Cli gotchas
  • Loading branch information
dPys authored Dec 13, 2024
2 parents 2a0e09b + f0ce92a commit 9babc43
Show file tree
Hide file tree
Showing 10 changed files with 276 additions and 118 deletions.
10 changes: 4 additions & 6 deletions nxbench/benchmarks/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,18 +61,16 @@ def make_benchmark_method(algo_config, dataset_name, backend, num_thread):
and number of threads combination.
"""
algo_name = algo_config.name
orig_dataset_name = dataset_name
method_name = f"track_{algo_name}_{orig_dataset_name}_{backend}_{num_thread}"
safe_dataset_name = dataset_name.replace("-", "_")
method_name = f"track_{algo_name}_{safe_dataset_name}_{backend}_{num_thread}"

def track_method(self):
"""Run benchmark and return metrics for the unique combination."""
logger.debug(
f"Starting track_method for {method_name} with backend={backend}, "
f"threads={num_thread}, dataset={orig_dataset_name}"
)
metrics = self.do_benchmark(
algo_config, orig_dataset_name, backend, num_thread
f"threads={num_thread}, dataset={dataset_name}"
)
metrics = self.do_benchmark(algo_config, dataset_name, backend, num_thread)
logger.debug(f"Track {method_name} results: {metrics}")
return metrics

Expand Down
4 changes: 2 additions & 2 deletions nxbench/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def get_latest_commit_hash(github_url: str) -> str:

def safe_run(
cmd: Sequence[str | Path],
check: bool = True,
check: bool = False,
capture_output: bool = False,
**kwargs,
) -> subprocess.CompletedProcess:
Expand All @@ -121,7 +121,7 @@ def safe_run(
----------
cmd : Sequence[str | Path]
The command and arguments to execute.
check : bool, default=True
check : bool, default=False
If True, raise an exception if the command fails.
capture_output : bool, default=False
If True, capture stdout and stderr.
Expand Down
4 changes: 2 additions & 2 deletions nxbench/configs/asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
"project_url": "https://github.com/dpys/nxbench",
"dvcs": "git",
"branches": [
"main"
"cli-gotchas"
],
"repo": "https://github.com/dpys/nxbench",
"environment_type": "conda",
"environment_type": "virtualenv",
"show_commit_url": "https://github.com/dpys/nxbench/commit/",
"matrix": {},
"benchmark_dir": "nxbench/benchmarks",
Expand Down
22 changes: 10 additions & 12 deletions nxbench/configs/dummy.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
algorithms:
- name: "pagerank"
func: "networkx.pagerank"
params:
alpha: 0.9
tol: 1.0e-6
- name: "average_clustering"
func: "networkx.average_clustering"
params: {}
requires_directed: false
groups: ["centrality", "random_walk"]
min_rounds: 10
warmup: true
warmup_iterations: 50
groups: ["clustering", "graph_structure"]
min_rounds: 3
validate_result: "nxbench.validation.validate_scalar_result"

datasets:
- name: "erdos_renyi_small"
Expand All @@ -35,9 +32,10 @@ matrix:
- "1"

env_config:
repo: "https://github.com/dpys/nxbench.git"
branches:
- "main"
req:
- "networkx==3.4.2"
- "nx-parallel-0.3rc0.dev0"
- "graphblas_algorithms==2023.10.0"
pythons:
- "3.10"
- "3.11"
126 changes: 51 additions & 75 deletions nxbench/configs/all.yaml → nxbench/configs/example2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,17 @@ algorithms:
warmup_iterations: 20
validate_result: "nxbench.validation.validate_node_scores"

- name: "betweenness_centrality"
func: "networkx.betweenness_centrality"
params:
normalized: true
endpoints: false
requires_directed: false
groups: ["centrality", "path_based"]
min_rounds: 5
warmup: true
warmup_iterations: 20
validate_result: "nxbench.validation.validate_node_scores"
# - name: "betweenness_centrality"
# func: "networkx.betweenness_centrality"
# params:
# normalized: true
# endpoints: false
# requires_directed: false
# groups: ["centrality", "path_based"]
# min_rounds: 5
# warmup: true
# warmup_iterations: 20
# validate_result: "nxbench.validation.validate_node_scores"

# - name: "edge_betweenness_centrality"
# func: "networkx.edge_betweenness_centrality"
Expand Down Expand Up @@ -160,45 +160,21 @@ algorithms:
# min_rounds: 3

datasets:
- name: "twitter"
source: "networkrepository"
params: {}

- name: "08blocks"
source: "networkrepository"
params: {}

- name: "amazon"
source: "networkrepository"
params: {}

- name: "google"
source: "networkrepository"
params: {}

- name: "enron"
source: "networkrepository"
params: {}
# - name: "enron"
# source: "networkrepository"
# params: {}

- name: "citationCiteseer"
source: "networkrepository"
params: {}
# - name: "citationCiteseer"
# source: "networkrepository"
# params: {}

- name: "karate"
source: "networkrepository"
params: {}

- name: "netscience"
source: "networkrepository"
params: {}

- name: "email-Eu-core"
source: "networkrepository"
params: {}

- name: "jazz"
source: "networkrepository"
params: {}
# - name: "patentcite"
# source: "networkrepository"
# params: {}

- name: "erdos_renyi_small"
source: "generator"
Expand All @@ -214,43 +190,43 @@ datasets:
source: "generator"
params:
generator: "networkx.watts_strogatz_graph"
n: 10000
n: 1000
k: 6
p: 0.1
metadata:
directed: false
weighted: false

- name: "barabasi_albert_small"
source: "generator"
params:
generator: "networkx.barabasi_albert_graph"
n: 1000
m: 3
metadata:
directed: false
weighted: false
# - name: "barabasi_albert_small"
# source: "generator"
# params:
# generator: "networkx.barabasi_albert_graph"
# n: 1000
# m: 3
# metadata:
# directed: false
# weighted: false

- name: "powerlaw_cluster_small"
source: "generator"
params:
generator: "networkx.powerlaw_cluster_graph"
n: 1000
m: 2
p: 0.1
metadata:
directed: false
weighted: false
# - name: "powerlaw_cluster_small"
# source: "generator"
# params:
# generator: "networkx.powerlaw_cluster_graph"
# n: 1000
# m: 2
# p: 0.1
# metadata:
# directed: false
# weighted: false

- name: "erdos_renyi_small"
source: "generator"
params:
generator: "networkx.erdos_renyi_graph"
n: 1000
p: 0.01
metadata:
directed: false
weighted: false
# - name: "erdos_renyi_large"
# source: "generator"
# params:
# generator: "networkx.erdos_renyi_graph"
# n: 10000
# p: 0.01
# metadata:
# directed: false
# weighted: false

# - name: "watts_strogatz_large"
# source: "generator"
Expand Down Expand Up @@ -295,7 +271,7 @@ matrix:
- "networkx"
- "parallel"
- "graphblas"
- "cugraph"
# - "cugraph"
num_threads:
- "1"
- "2"
Expand All @@ -306,7 +282,7 @@ env_config:
- "networkx==3.4.2"
- "nx-parallel-0.3rc0.dev0"
- "graphblas_algorithms==2023.10.0"
- "nx-cugraph_cu11==24.8.0"
# - "nx-cugraph_cu11==24.8.0"
pythons:
- "3.10"
- "3.11"
53 changes: 44 additions & 9 deletions nxbench/data/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from nxbench.benchmarks.config import DatasetConfig
from nxbench.data.synthesize import generate_graph
from nxbench.data.utils import detect_delimiter, fix_matrix_market_file

warnings.filterwarnings("ignore")

Expand All @@ -36,7 +37,7 @@ def __init__(self, data_dir: str | Path | None = None):
self._metadata_df = self._load_metadata()

def _normalize_name(self, name: str) -> str:
return name.lower().replace("-", "_")
return name.lower().strip().replace("-", "_")

def _load_metadata(self) -> pd.DataFrame:
try:
Expand Down Expand Up @@ -68,6 +69,7 @@ async def load_network(
return self._generate_graph(config)

metadata = self.get_metadata(config.name)

if config.name in self._network_cache:
logger.debug(f"Loading network '{config.name}' from cache")
return self._network_cache[config.name]
Expand Down Expand Up @@ -117,8 +119,32 @@ def check_graph_validity(graph, file_path):
suffix = graph_file.suffix.lower()
if suffix == ".mtx":
logger.info(f"Loading Matrix Market file from {graph_file}")
graph_path = Path(graph_file)
corrected_file = graph_path.with_name(
f"{graph_path.stem}_corrected{graph_path.suffix}"
)

try:
sparse_matrix = mmread(graph_file)
# check if the corrected file already exists
if corrected_file.exists():
logger.info(
f"Using existing corrected Matrix Market file: "
f"{corrected_file}"
)
sparse_matrix = mmread(corrected_file)
else:
try:
# attempt to read the original file
sparse_matrix = mmread(graph_file)
except Exception:
logger.info(f"Fixing Matrix Market file: {graph_file}")
# fix the file and load the corrected version
corrected_file = fix_matrix_market_file(graph_path)
sparse_matrix = mmread(corrected_file)
except Exception:
logger.exception(f"Failed to load Matrix Market file {graph_file}")
raise ValueError("Matrix Market file not in expected format")
else:
graph = nx.from_scipy_sparse_array(
sparse_matrix,
create_using=(
Expand All @@ -127,14 +153,18 @@ def check_graph_validity(graph, file_path):
else nx.Graph()
),
)
check_graph_validity(graph, graph_file)
except ValueError:
logger.exception(f"Failed to load Matrix Market file {graph_file}")
raise ValueError("Matrix Market file not in expected format")
except Exception:
logger.exception(f"Failed to load Matrix Market file {graph_file}")
raise
graph.graph.update(metadata)
return graph
elif suffix in [".edgelist", ".edges"]:
try:
delimiter = detect_delimiter(graph_file)
logger.debug(f"Detected delimiter: '{delimiter}'")
except Exception:
logger.debug(
"No valid delimiter found, falling back to whitespace split"
)
delimiter = " "

create_using = (
nx.DiGraph() if metadata.get("directed", False) else nx.Graph()
)
Expand Down Expand Up @@ -179,6 +209,7 @@ def edge_parser():
)
graph = nx.read_edgelist(
edge_iter,
delimiter=delimiter,
nodetype=str,
create_using=create_using,
data=False,
Expand All @@ -194,6 +225,7 @@ def edge_parser():
)
graph = nx.read_edgelist(
edge_iter,
delimiter=delimiter,
nodetype=str,
create_using=create_using,
data=False,
Expand All @@ -211,6 +243,7 @@ def edge_parser():
)
graph = nx.read_edgelist(
edge_iter,
delimiter=delimiter,
nodetype=str,
create_using=create_using,
data=False,
Expand Down Expand Up @@ -245,6 +278,8 @@ def edge_parser():
raise
else:
graph.graph.update(metadata)
if graph.number_of_edges() == 0:
raise ValueError(f"Graph file {graph_file} contains no valid edges.")
logger.info(f"Loaded network from '{graph_file}' successfully.")
return graph

Expand Down
Loading

0 comments on commit 9babc43

Please sign in to comment.