From 0e95e6770f3920671ed84e3d1811047833f1cdc6 Mon Sep 17 00:00:00 2001
From: dpys <dpysalexander@gmail.com>
Date: Wed, 11 Dec 2024 12:04:46 -0500
Subject: [PATCH 1/2] fix: viz mean

---
 nxbench/benchmarks/benchmark.py |  12 ++-
 nxbench/viz/app.py              | 127 ++++++++++++++++++++++----------
 2 files changed, 96 insertions(+), 43 deletions(-)

diff --git a/nxbench/benchmarks/benchmark.py b/nxbench/benchmarks/benchmark.py
index 10c8947..c80ce30 100644
--- a/nxbench/benchmarks/benchmark.py
+++ b/nxbench/benchmarks/benchmark.py
@@ -204,6 +204,9 @@ def prepare_benchmark(
                 logger.exception("graphblas_algorithms backend not available")
                 return None
             try:
+                logger.info(
+                    f"GraphBlas Algorithms nthreads={ga.ss.config['nthreads']} "
+                )
                 return ga.Graph.from_networkx(original_graph)
             except Exception:
                 logger.exception("Error converting graph to graphblas format")
@@ -230,10 +233,11 @@ def do_benchmark(
 
         try:
             algo_func = get_algorithm_function(algo_config, backend)
-            if isinstance(algo_func, partial):
-                alg_func_name = algo_func.func.__name__
-            else:
-                alg_func_name = algo_func.__name__
+            alg_func_name = (
+                algo_func.func.__name__
+                if isinstance(algo_func, partial)
+                else algo_func.__name__
+            )
             logger.debug(f"Got algorithm function: {alg_func_name}")
         except (ImportError, AttributeError):
             logger.exception(f"Function not available for backend {backend}")
diff --git a/nxbench/viz/app.py b/nxbench/viz/app.py
index 9420231..e06649f 100644
--- a/nxbench/viz/app.py
+++ b/nxbench/viz/app.py
@@ -14,7 +14,6 @@ def run_server(port=8050, debug=False):
     pd.DataFrame.iteritems = pd.DataFrame.items
 
     essential_columns = ["algorithm", "execution_time", "memory_used"]
-
     df = df.dropna(subset=essential_columns)
 
     df["execution_time"] = pd.to_numeric(df["execution_time"], errors="coerce")
@@ -40,7 +39,6 @@ def run_server(port=8050, debug=False):
     for col in string_columns:
         df[col] = df[col].astype(str).str.strip().str.lower()
 
-    aggregation_columns = ["execution_time", "memory_used"]
     group_columns = [
         "algorithm",
         "dataset",
@@ -56,8 +54,14 @@ def run_server(port=8050, debug=False):
         "os",
         "num_thread",
     ]
-    df = df.groupby(group_columns, as_index=False)[aggregation_columns].mean()
-    df.set_index(group_columns, inplace=True)
+
+    # compute both mean and count
+    df_agg = df.groupby(group_columns, as_index=False).agg(
+        mean_execution_time=("execution_time", "mean"),
+        mean_memory_used=("memory_used", "mean"),
+        sample_count=("execution_time", "size"),
+    )
+    df_agg.set_index(group_columns, inplace=True)
 
     available_parcats_columns = [col for col in group_columns if col != "algorithm"]
 
@@ -72,12 +76,12 @@ def run_server(port=8050, debug=False):
                         options=[
                             {"label": alg.title(), "value": alg}
                             for alg in sorted(
-                                df.index.get_level_values("algorithm").unique()
+                                df_agg.index.get_level_values("algorithm").unique()
                             )
                         ],
-                        value=sorted(df.index.get_level_values("algorithm").unique())[
-                            0
-                        ],
+                        value=sorted(
+                            df_agg.index.get_level_values("algorithm").unique()
+                        )[0],
                         clearable=False,
                         style={"width": "100%"},
                     ),
@@ -117,7 +121,7 @@ def run_server(port=8050, debug=False):
                             {"label": c.replace("_", " ").title(), "value": c}
                             for c in available_parcats_columns
                         ],
-                        value=available_parcats_columns,  # default select all
+                        value=available_parcats_columns,
                         multi=True,
                         style={"width": "100%"},
                     ),
@@ -144,11 +148,12 @@ def run_server(port=8050, debug=False):
                 active_tab="parcats-tab",
                 style={"marginTop": "20px"},
             ),
+            dcc.Store(id="mean-values-store"),
         ]
     )
 
     @app.callback(
-        Output("benchmark-graph", "figure"),
+        [Output("benchmark-graph", "figure"), Output("mean-values-store", "data")],
         [
             Input("algorithm-dropdown", "value"),
             Input("color-toggle", "value"),
@@ -159,7 +164,7 @@ def update_graph(selected_algorithm, color_by, selected_dimensions):
         selected_algorithm = selected_algorithm.lower()
 
         try:
-            filtered_df = df.xs(selected_algorithm, level="algorithm")
+            filtered_df = df_agg.xs(selected_algorithm, level="algorithm")
         except KeyError:
             fig = go.Figure()
             fig.update_layout(
@@ -174,7 +179,7 @@ def update_graph(selected_algorithm, color_by, selected_dimensions):
                     }
                 ],
             )
-            return fig
+            return fig, []
 
         if filtered_df.empty:
             fig = go.Figure()
@@ -190,15 +195,18 @@ def update_graph(selected_algorithm, color_by, selected_dimensions):
                     }
                 ],
             )
-            return fig
+            return fig, []
 
         if color_by == "execution_time":
-            color_values = filtered_df["execution_time"]
+            mean_values = filtered_df["mean_execution_time"]
             colorbar_title = "Execution Time (s)"
         else:
-            color_values = filtered_df["memory_used"]
+            mean_values = filtered_df["mean_memory_used"]
             colorbar_title = "Memory Used (GB)"
 
+        counts = filtered_df["sample_count"].values
+        color_values = mean_values.values
+
         dims = [
             {
                 "label": dim_col.replace("_", " ").title(),
@@ -207,24 +215,27 @@ def update_graph(selected_algorithm, color_by, selected_dimensions):
             for dim_col in selected_dimensions
         ]
 
-        parcats = go.Parcats(
-            dimensions=dims,
-            line={
-                "color": color_values,
-                "colorscale": "Tealrose",
-                "showscale": True,
-                "colorbar": {"title": colorbar_title},
-            },
-            hoverinfo="count",
+        fig = go.Figure()
+        fig.add_trace(
+            go.Parcats(
+                dimensions=dims,
+                line={
+                    "color": color_values,
+                    "colorscale": "Tealrose",
+                    "showscale": True,
+                    "colorbar": {"title": colorbar_title},
+                },
+                counts=counts,
+                hoverinfo="count",
+                hovertemplate="Count: %{count}\nMean: REPLACE_ME<extra></extra>",
+            )
         )
-
-        fig = go.Figure(data=parcats)
         fig.update_layout(
             title=f"Benchmark Results for {selected_algorithm.title()}",
             template="plotly_white",
         )
 
-        return fig
+        return fig, color_values.tolist()
 
     @app.callback(
         Output("violin-graph", "figure"),
@@ -237,7 +248,7 @@ def update_graph(selected_algorithm, color_by, selected_dimensions):
     def update_violin(selected_algorithm, color_by, selected_dimensions):
         selected_algorithm = selected_algorithm.lower()
         try:
-            filtered_df = df.xs(selected_algorithm, level="algorithm").reset_index()
+            filtered_df = df_agg.xs(selected_algorithm, level="algorithm").reset_index()
         except KeyError:
             fig = go.Figure()
             fig.update_layout(
@@ -270,7 +281,11 @@ def update_violin(selected_algorithm, color_by, selected_dimensions):
             )
             return fig
 
-        y_metric = "execution_time" if color_by == "execution_time" else "memory_used"
+        y_metric = (
+            "mean_execution_time"
+            if color_by == "execution_time"
+            else "mean_memory_used"
+        )
         y_label = "Execution Time" if color_by == "execution_time" else "Memory Used"
 
         violin_dimension = selected_dimensions[0] if selected_dimensions else "backend"
@@ -296,6 +311,7 @@ def update_violin(selected_algorithm, color_by, selected_dimensions):
                 "cpu",
                 "os",
                 "num_thread",
+                "sample_count",
             ],
             title=f"{y_label} Distribution for {selected_algorithm.title()}",
         )
@@ -304,22 +320,55 @@ def update_violin(selected_algorithm, color_by, selected_dimensions):
 
     app.clientside_callback(
         """
-        function(hoverData) {
-            setTimeout(() => {
-                const tooltips = document.querySelectorAll('.hoverlayer .hovertext');
-                tooltips.forEach(tooltip => {
-                    const textNode = tooltip.querySelector('text');
-                    if (textNode && textNode.textContent.includes('Count:')) {
-                        textNode.textContent = textNode.textContent.replace('Count:',
-                        'Mean:');
+        function(hoverData, meanValues) {
+            if (!hoverData || !hoverData.points || hoverData.points.length === 0) {
+                return null;
+            }
+
+            if (!meanValues) {
+                // No mean values available yet
+                return null;
+            }
+
+            var point = hoverData.points[0];
+            var pointIndex = point.pointNumber;
+            var meanValue = meanValues[pointIndex];
+
+            const tooltips = document.querySelectorAll('.hoverlayer .hovertext');
+            // Create a MutationObserver that waits for the tooltip to appear
+            const observer = new MutationObserver(mutations => {
+                let replaced = false;
+                mutations.forEach(mutation => {
+                    if (mutation.type === 'childList') {
+                        const tooltips = document.querySelectorAll('.hoverlayer .
+                        hovertext text');
+                        tooltips.forEach(tNode => {
+                            if (tNode.textContent.includes('REPLACE_ME')) {
+                                tNode.textContent = tNode.textContent.replace(
+                                    'REPLACE_ME',
+                                    meanValue.toFixed(3)
+                                );
+                                replaced = true;
+                            }
+                        });
                     }
                 });
-            }, 100);
+                // Once replaced, disconnect the observer to stop unnecessary monitoring
+                if (replaced) {
+                    observer.disconnect();
+                }
+            });
+
+            const hoverlayer = document.querySelector('.hoverlayer');
+            if (hoverlayer) {
+                observer.observe(hoverlayer, { childList: true, subtree: true });
+            }
+
             return null;
         }
         """,
         Output("hover-text-hack", "children"),
-        [Input("benchmark-graph", "hoverData")],
+        [Input("benchmark-graph", "hoverData"), Input("mean-values-store", "data")],
     )
 
     app.run_server(port=port, debug=debug)

From a3731a92c2f0f9c45f050ac0123b130334575060 Mon Sep 17 00:00:00 2001
From: dpys <dpysalexander@gmail.com>
Date: Wed, 11 Dec 2024 12:44:27 -0500
Subject: [PATCH 2/2] fix: add missing vanilla graphblas import

---
 nxbench/benchmarks/benchmark.py | 3 ++-
 nxbench/configs/example.yaml    | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/nxbench/benchmarks/benchmark.py b/nxbench/benchmarks/benchmark.py
index c80ce30..cb2a74a 100644
--- a/nxbench/benchmarks/benchmark.py
+++ b/nxbench/benchmarks/benchmark.py
@@ -199,13 +199,14 @@ def prepare_benchmark(
 
         if "graphblas" in backend and is_graphblas_available():
             try:
+                gb = import_module("graphblas")
                 ga = import_module("graphblas_algorithms")
             except ImportError:
                 logger.exception("graphblas_algorithms backend not available")
                 return None
             try:
                 logger.info(
-                    f"GraphBlas Algorithms nthreads={ga.ss.config['nthreads']} "
+                    f"GraphBlas Algorithms nthreads={gb.ss.config['nthreads']} "
                 )
                 return ga.Graph.from_networkx(original_graph)
             except Exception:
diff --git a/nxbench/configs/example.yaml b/nxbench/configs/example.yaml
index 76fe855..63517d0 100644
--- a/nxbench/configs/example.yaml
+++ b/nxbench/configs/example.yaml
@@ -241,6 +241,7 @@ validation:
 matrix:
   backend:
     - "networkx"
+    - "parallel"
     - "graphblas"
   num_threads:
     - "1"
@@ -250,6 +251,7 @@ matrix:
 env_config:
   req:
     - "networkx==3.4.2"
+    - "nx-parallel-0.3rc0.dev0"
     - "graphblas_algorithms==2023.10.0"
   pythons:
     - "3.10"