Skip to content

Commit

Permalink
fix: merge conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
dPys committed Dec 28, 2024
1 parent 2ac0e82 commit 0483083
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 72 deletions.
162 changes: 92 additions & 70 deletions nxbench/benchmarking/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,17 @@ def __init__(self, results_file: Path):
self.data_manager = BenchmarkDataManager()
self._cached_results: list[BenchmarkResult] | None = None

def _safely_parse_entry(self, entry: dict[str, Any]) -> BenchmarkResult | None:
"""
Convert one dictionary entry into a BenchmarkResult, logging
and returning None on error (so we skip just that one).
"""
try:
return self._create_benchmark_result_from_entry(entry)
except Exception:
logger.exception("Skipping one entry due to error")
return None

Check warning on line 40 in nxbench/benchmarking/export.py

View check run for this annotation

Codecov / codecov/patch

nxbench/benchmarking/export.py#L38-L40

Added lines #L38 - L40 were not covered by tests

def load_results(self) -> list[BenchmarkResult]:
"""Load benchmark results from the workflow outputs (JSON or CSV),
integrating all known fields into BenchmarkResult and treating unknown
Expand All @@ -39,30 +50,29 @@ def load_results(self) -> list[BenchmarkResult]:
results = []

try:
if self.results_file.suffix.lower() == ".json":
suffix = self.results_file.suffix.lower()
if suffix == ".json":
with self.results_file.open("r") as f:
data = json.load(f)

if not isinstance(data, list):
logger.error(
f"Expected a list of results in JSON file, got {type(data)}"
)
return []

for entry in data:
result = self._create_benchmark_result_from_entry(entry)
result = self._safely_parse_entry(entry)
if result:
results.append(result)

elif self.results_file.suffix.lower() == ".csv":
elif suffix == ".csv":
df = pd.read_csv(self.results_file)
for _, row in df.iterrows():
entry = row.to_dict()
result = self._create_benchmark_result_from_entry(entry)
result = self._safely_parse_entry(entry)
if result:
results.append(result)
else:
logger.error(f"Unsupported file format: {self.results_file.suffix}")
return []

except Exception:
logger.exception(f"Failed to load results from: {self.results_file}")
Expand All @@ -74,67 +84,81 @@ def load_results(self) -> list[BenchmarkResult]:

def _create_benchmark_result_from_entry(
self, entry: dict[str, Any]
) -> BenchmarkResult | None:
try:
known_fields = {
"algorithm",
"dataset",
"execution_time",
"execution_time_with_preloading",
"memory_used",
"num_nodes",
"num_edges",
"is_directed",
"is_weighted",
"backend",
"num_thread",
"date",
"validation",
"validation_message",
"error",
}

algorithm = entry.get("algorithm", "unknown")
dataset = entry.get("dataset", "unknown")
backend = entry.get("backend", "unknown")
execution_time = float(entry.get("execution_time", float("nan")))
execution_time_with_preloading = float(
entry.get("execution_time_with_preloading", float("nan"))
)
memory_used = float(entry.get("memory_used", float("nan")))
num_thread = int(entry.get("num_thread", 1))
num_nodes = int(entry.get("num_nodes", 0))
num_edges = int(entry.get("num_edges", 0))
is_directed = bool(entry.get("is_directed", False))
is_weighted = bool(entry.get("is_weighted", False))
date = int(entry.get("date", 0))
validation = entry.get("validation", "unknown")
validation_message = entry.get("validation_message", "")
error = entry.get("error")

metadata = {k: v for k, v in entry.items() if k not in known_fields}

return BenchmarkResult(
algorithm=algorithm,
dataset=dataset,
execution_time=execution_time,
execution_time_with_preloading=execution_time_with_preloading,
memory_used=memory_used,
num_nodes=num_nodes,
num_edges=num_edges,
is_directed=is_directed,
is_weighted=is_weighted,
backend=backend,
num_thread=num_thread,
date=date,
metadata=metadata,
validation=validation,
validation_message=validation_message,
error=error,
)
except Exception:
logger.exception("Failed to process result entry.")
return None
) -> BenchmarkResult:
"""
Parse a single JSON or CSV row into a BenchmarkResult object.
Missing/unparseable fields are gracefully handled, so no row is dropped.
"""
known_fields = {
"algorithm",
"dataset",
"execution_time",
"execution_time_with_preloading",
"memory_used",
"num_nodes",
"num_edges",
"is_directed",
"is_weighted",
"backend",
"num_thread",
"date",
"validation",
"validation_message",
"error",
}

def as_float(value, default=float("nan")):
"""Attempt parsing a float; fallback to default if unparseable."""
try:
return float(value)
except (TypeError, ValueError):
return default

Check warning on line 115 in nxbench/benchmarking/export.py

View check run for this annotation

Codecov / codecov/patch

nxbench/benchmarking/export.py#L114-L115

Added lines #L114 - L115 were not covered by tests

def as_int(value, default=0):
"""Attempt parsing an int; fallback to default if unparseable."""
try:
return int(value)
except (TypeError, ValueError):
return default

Check warning on line 122 in nxbench/benchmarking/export.py

View check run for this annotation

Codecov / codecov/patch

nxbench/benchmarking/export.py#L121-L122

Added lines #L121 - L122 were not covered by tests

algorithm = entry.get("algorithm", "unknown")
dataset = entry.get("dataset", "unknown")
backend = entry.get("backend", "unknown")
execution_time = as_float(entry.get("execution_time"))
execution_time_with_preloading = as_float(
entry.get("execution_time_with_preloading")
)
memory_used = as_float(entry.get("memory_used"))
num_nodes = as_int(entry.get("num_nodes"))
num_edges = as_int(entry.get("num_edges"))
is_directed = bool(entry.get("is_directed", False))
is_weighted = bool(entry.get("is_weighted", False))
num_thread = as_int(entry.get("num_thread"), default=1)
date = as_int(entry.get("date"), default=0)
validation = entry.get("validation", "unknown")
validation_message = entry.get("validation_message", "")
error = entry.get("error")

metadata = {k: v for k, v in entry.items() if k not in known_fields}

return BenchmarkResult(
algorithm=algorithm,
dataset=dataset,
execution_time=execution_time,
execution_time_with_preloading=execution_time_with_preloading,
memory_used=memory_used,
num_nodes=num_nodes,
num_edges=num_edges,
is_directed=is_directed,
is_weighted=is_weighted,
backend=backend,
num_thread=num_thread,
date=date,
metadata=metadata,
validation=validation,
validation_message=validation_message,
error=error,
)

def to_dataframe(self) -> pd.DataFrame:
results = self.load_results()
Expand All @@ -150,9 +174,7 @@ def to_dataframe(self) -> pd.DataFrame:
"dataset": result.dataset,
"backend": result.backend,
"execution_time": result.execution_time,
### ADDED:
"execution_time_with_preloading": result.execution_time_with_preloading,
### END ADDED
"memory_used": result.memory_used,
"num_nodes": result.num_nodes,
"num_edges": result.num_edges,
Expand Down
2 changes: 1 addition & 1 deletion nxbench/benchmarking/tests/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def test_load_results_unsupported_suffix(self, mock_logger):
exporter = ResultsExporter(results_file=Path("results.txt"))
results = exporter.load_results()
assert results == []
mock_logger.error.assert_any_call("Unsupported file format: .txt")
mock_logger.exception.assert_not_called()

def test_to_dataframe_no_results(self, mock_logger):
"""Test to_dataframe when load_results is empty."""
Expand Down
2 changes: 1 addition & 1 deletion nxbench/viz/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ def run_server(port=8050, debug=False, run=True):
],
value=available_parcats_columns,
multi=True,
style={"width": "100%"},
style={"width": "100%", "color": "#000"},
),
],
style={"width": "100%", "display": "block", "padding": "20px"},
Expand Down
15 changes: 15 additions & 0 deletions nxbench/viz/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ def preprocess_data(df: pd.DataFrame) -> pd.DataFrame:
else:
df["num_nodes_bin"] = df["num_nodes"]

df["num_nodes_bin"] = (
df["num_nodes_bin"].astype("category").cat.remove_unused_categories()
)

unique_n_edges = df["num_edges"].nunique(dropna=True)
if unique_n_edges > 1:
num_edges_binned = pd.cut(df["num_edges"], bins=min(unique_n_edges, 4))
Expand All @@ -87,6 +91,9 @@ def preprocess_data(df: pd.DataFrame) -> pd.DataFrame:
else:
df["num_edges_bin"] = df["num_edges"]

df["num_edges_bin"] = (
df["num_edges_bin"].astype("category").cat.remove_unused_categories()
)
return df


Expand Down Expand Up @@ -146,6 +153,14 @@ def aggregate_data(df: pd.DataFrame) -> tuple[pd.DataFrame, list, list]:
col for col in group_columns if col != "algorithm" and unique_counts[col] > 1
]

df_agg.reset_index(inplace=True)
# remove unused categories
for col in ["num_nodes_bin", "num_edges_bin"]:
if col in df_agg.columns and pd.api.types.is_categorical_dtype(df_agg[col]):
df_agg[col] = df_agg[col].cat.remove_unused_categories()

df_agg.set_index(group_columns, inplace=True)

return df_agg, group_columns, available_parcats_columns


Expand Down

0 comments on commit 0483083

Please sign in to comment.