diff --git a/.gitignore b/.gitignore
index a4fa544..91c5946 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,3 +26,10 @@ __pycache__/
 tests/__pycache__/*
 *.py[cod]
 *.pyc
+*.so
+
+
+# results:
+*.png
+!csc.png
+*.json
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 8955a79..6cc2d95 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,7 +1,3 @@
 {
     "rust-analyzer.check.features": null,
-    "rust-analyzer.cargo.features": [
-        "bindings"
-    ],
-    "rust-analyzer.check.features": "bindings"
 }
diff --git a/Cargo.toml b/Cargo.toml
index c242975..f91e679 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,13 +6,44 @@ edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 [lib]
 name = "coreset_sc"
-crate-type = ["cdylib"]
+crate-type = ["cdylib", "rlib"]
+path = "src/lib.rs"
+
 
-[features]
-default = []
-bindings = ["pyo3"]
 
 
 [dependencies]
-clap = "4.5.20"
-pyo3 = {version = "0.22.4", optional = true, features = ["extension-module"]}
+ndarray = "0.16.1"
+rand =  "0.8.5"
+pyo3 = {version = "0.22.6", features = ["extension-module"]}
+faer = { version = "0.19.4", features = ["rayon"] }
+numpy = "0.22.1"
+
+sampling-tree = "0.1.0"
+rand_distr = "0.4.3"
+criterion = "0.5.1"
+rayon = "1.10.0"
+
+
+faer-ext = { version = "0.3.0", features = ["ndarray"] }
+ndarray-rand = "0.15.0"
+
+
+[workspace]
+resolver = "2"
+
+[profile.dev]
+incremental = true
+
+
+
+[profile.release]
+debug = true
+
+
+
+
+
+[[bench]]
+name = "sbm"
+harness = false
diff --git a/README.md b/README.md
index 20a22b5..db9843f 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,43 @@
 # coreset-sc
-Coreset Spectral Clustering
+A minimal implementation of the Coreset Spectral Clustering (CSC) algorithm given in [this paper](https://openreview.net/pdf?id=1qgZXeMTTU).
 
-Work in progress....
+The presented method repeatedly jumps across the equivalence between the normlised cut and weighted kernel k-means problems to apply coreset methods to spectral clustering.
+
+Combined with [recent work on fast spectral clustering](https://neurips.cc/virtual/2023/poster/71723), this gives us a method for clustering very large graphs (millions of nodes) in seconds by only running (fast) spectral clustering on a much smaller induced subgraph. It can do so in even the most extreme case, where the number of clusters is linear in the number of nodes. See the experiments section the [paper](https://openreview.net/pdf?id=1qgZXeMTTU).
+
+![Coreset Spectral Clustering](csc.png)
+
+
+
+
+Basic usage:
+```python
+from coreset_sc import CoresetSpectralClustering, gen_sbm
+from sklearn.metrics.cluster import adjusted_rand_score
+
+# Generate a graph from the stochastic block model
+n = 1000            # number of nodes per cluster
+k = 50              # number of clusters
+p = 0.5             # probability of an intra-cluster edge
+q = (1.0 / n) / k   # probability of an inter-cluster edge
+
+
+# A is a sparse scipy CSR matrix of a symmetric adjacency graph
+A,ground_truth_labels = gen_sbm(n, k, p, q)
+
+coreset_ratio = 0.1 # fraction of the data to use for the coreset graph
+
+csc = CoresetSpectralClustering(
+    num_clusters=k, coreset_ratio=coreset_ratio
+)
+csc.fit(A) # sample extract and cluster the coreset graph
+csc.label_full_graph() # label the rest of the graph given the coreset labels
+pred_labels = csc.labels_ # get the full labels
+
+# Alternatively, label the full graph in one line:
+pred_labels = csc.fit_predict(A)
+ari = adjusted_rand_score(ground_truth_labels,pred_labels)
+```
 
 
 [Python Docs](https://benjourdan.github.io/coreset-sc/)
diff --git a/benches/sbm.rs b/benches/sbm.rs
new file mode 100644
index 0000000..3e8804a
--- /dev/null
+++ b/benches/sbm.rs
@@ -0,0 +1,37 @@
+
+
+
+use criterion::{criterion_group, criterion_main, Criterion, black_box};
+
+
+use coreset_sc::gen_sbm_with_self_loops;
+
+
+fn bench_sbm(c: &mut Criterion) {
+    let mut group = c.benchmark_group("sbm");
+
+    let ns = [1000];
+    let ks = [20];
+
+
+    for n in ns{
+        for k in ks{
+            let p = 0.5;
+            let q = (1.0/(n as f64))/(k as f64);
+            group.sample_size(10);
+            group.nresamples(10);
+            group.measurement_time(std::time::Duration::from_secs(10));
+            group.bench_function(
+                format!("gen_sbm_{}_{}",n,k).as_str(),
+                |b| b.iter(|| black_box(gen_sbm_with_self_loops(black_box(n),black_box(k),black_box(p),black_box(q)))
+            ));
+
+        }
+    }
+    group.finish();
+}
+
+
+
+criterion_group!(benches, bench_sbm);
+criterion_main!(benches);
diff --git a/csc.png b/csc.png
new file mode 100644
index 0000000..907460e
Binary files /dev/null and b/csc.png differ
diff --git a/pyproject.toml b/pyproject.toml
index e4b2936..3c46a10 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,8 +12,12 @@ classifiers = [
 ]
 dynamic = ["version"]
 [tool.maturin]
-features = ["pyo3/extension-module", "bindings"]
-
+cargo-extra-args = ["--quiet"]
+features = ["pyo3/extension-module"]
+python-source = "python"
+# build with release optimizations
+release = true
+strip = false
 
 
 [tool.ruff]
diff --git a/python/coreset_sc/__init__.py b/python/coreset_sc/__init__.py
new file mode 100644
index 0000000..501a806
--- /dev/null
+++ b/python/coreset_sc/__init__.py
@@ -0,0 +1,71 @@
+from . import coreset_sc, utils
+from .csc import CoresetSpectralClustering as CoresetSpectralClustering
+
+# import maturin_import_hook
+# import scipy
+# import stag.random
+# from maturin_import_hook.settings import MaturinSettings
+
+# maturin_import_hook.install(
+#     enable_project_importer=True,
+#     enable_rs_file_importer=True,
+#     settings=MaturinSettings(
+#         release=True,
+#         strip=True,
+#     ),
+#     show_warnings=False,
+# )
+
+
+def gen_sbm(n, k, p, q):
+    """
+    Generate an approximate sample from a Stochastic Block Model (SBM) graph.
+
+    Parameters
+    ----------
+    n : int
+        Number of nodes in each cluster.
+    k : int
+        Number of clusters.
+    p : float
+        Probability of an edge within the same cluster.
+    q : float
+        Probability of an edge between different clusters.
+
+    Returns
+    -------
+    adj_mat : scipy.sparse.csr_matrix, shape = (n*k, n*k)
+        The symmetric adjacency matrix of the generated graph with self loops added.
+    labels : numpy.ndarray, shape = (n*k,)
+        The ground truth cluster labels
+    """
+    assert isinstance(n, int), "n must be an integer"
+    assert isinstance(k, int), "k must be an integer"
+    assert isinstance(p, float), "p must be a float"
+    assert isinstance(q, float), "q must be a float"
+    assert n > 0, "n must be greater than 0"
+    assert k > 0, "k must be greater than 0"
+    assert 0 <= p <= 1, "p must be between 0 and 1"
+    assert 0 <= q <= 1, "q must be between 0 and 1"
+
+    size, data, indices, indptr, labels = coreset_sc.gen_sbm(n, k, p, q)
+    adj_mat = utils.convert_to_csr_matrix(size, data, indptr, indices)
+    return adj_mat, labels
+
+
+# def stag_sbm(n, k, p, q):
+#     assert isinstance(n, int), "n must be an integer"
+#     assert isinstance(k, int), "k must be an integer"
+#     assert isinstance(p, float), "p must be a float"
+#     assert isinstance(q, float), "q must be a float"
+#     assert n > 0, "n must be greater than 0"
+#     assert k > 0, "k must be greater than 0"
+#     assert 0 <= p <= 1, "p must be between 0 and 1"
+#     assert 0 <= q <= 1, "q must be between 0 and 1"
+#     N = int(n * k)
+#     g = stag.random.sbm(N, k, p, q, False)
+#     adj = g.adjacency().to_scipy()
+#     adj = (adj + scipy.sparse.eye(int(n * k))).tocsr()
+#     labels = stag.random.sbm_gt_labels(N, k)
+
+#     return adj, labels
diff --git a/python/coreset_sc/csc.py b/python/coreset_sc/csc.py
new file mode 100644
index 0000000..c4357ca
--- /dev/null
+++ b/python/coreset_sc/csc.py
@@ -0,0 +1,240 @@
+import numpy
+from scipy import sparse
+from sklearn.base import BaseEstimator, ClusterMixin
+from sklearn.cluster import KMeans
+
+from . import coreset_sc, utils
+
+
+class CoresetSpectralClustering(BaseEstimator, ClusterMixin):
+    """
+    Coreset Spectral Clustering
+
+    Parameters
+    ----------
+    num_clusters : int
+        Number of clusters to form.
+
+    coreset_ratio : float, default=0.01
+        Ratio of the coreset size to the original data size.
+
+    k_over_sampling_factor : float, default=2.0
+        The factor to oversample the number of clusters for the coreset
+        seeding stage. Higher values will increase the "resolution" of the
+        sampling distribution, but take longer to compute.
+
+    shift: float, default=0.0
+        The shift to add to the implicit kernel matrix of the form K' = K + shift*D^{-1}.
+        This is useful for graphs with large edge weights relative to degree, which can
+        cause the kernel matrix to be indefinite.
+
+    kmeans_alg : sklearn.cluster.KMeans, default=None
+        The KMeans algorithm to use for clustering the coreset embeddings.
+        If None, a default KMeans algorithm will be used.
+
+    full_labels : bool, default=True
+        Whether to return the full labels of the graph after fitting.
+        If False, only the coreset labels will be returned.
+
+    Attributes
+    ----------
+    """
+
+    def __init__(
+        self,
+        num_clusters,
+        coreset_ratio=0.01,
+        k_over_sampling_factor=2.0,
+        shift=0.0,
+        kmeans_alg=None,
+        full_labels=True,
+        ignore_warnings=False,
+    ):
+
+        assert isinstance(num_clusters, int), "Number of clusters must be an integer"
+        assert num_clusters > 1, "Number of clusters must be greater than 1"
+
+        assert isinstance(coreset_ratio, float), "Coreset ratio must be a float"
+        assert (
+            coreset_ratio > 0.0 and coreset_ratio < 1.0
+        ), "Coreset ratio must be in the range (0, 1)"
+
+        assert isinstance(
+            k_over_sampling_factor, float
+        ), "k_over_sampling_factor must be a float"
+        assert (
+            k_over_sampling_factor >= 1.0
+        ), "k_over_sampling_factor must be greater than 1.0"
+
+        assert isinstance(shift, float), "shift must be a float"
+        assert shift >= 0.0, "shift must be non-negative"
+        assert full_labels in [True, False], "full_labels must be a boolean"
+        assert ignore_warnings in [True, False], "ignore_warnings must be a boolean"
+
+        self.num_clusters = num_clusters
+        self.coreset_ratio = coreset_ratio
+        self.k_over_sampling_factor = k_over_sampling_factor
+        self.shift = shift
+        self.full_labels = full_labels
+
+        self.kmeans_alg = (
+            kmeans_alg
+            if kmeans_alg is not None
+            else KMeans(n_clusters=num_clusters, n_init=10)
+        )
+        self.ignore_warnings = ignore_warnings
+
+        self.n_ = None
+        self.data_ = None
+        self.indices_ = None
+        self.indptr_ = None
+        self.nnz_per_row_ = None
+        self.degrees_ = None
+
+        self.coreset_size_ = None
+        self.coreset_indices_ = None
+        self.coreset_labels_ = None
+        self.coreset_weights_ = None
+
+        self.labels_ = None
+        self.closest_cluster_distance_ = None
+
+    def fit(self, adjacency_matrix, y=None):
+        """
+        Fit the coreset clustering algorithm on the sparse adjacency matrix.
+
+        Parameters
+        ----------
+        adjacency_matrix : scipy.sparse.csr_matrix, shape = (n_samples, n_samples)
+            The adjacency matrix of the graph. This must contain self loops
+            for each node.
+
+        y : Ignored
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        assert isinstance(
+            adjacency_matrix, sparse.csr_matrix
+        ), "Adjacency matrix must be a scipy.sparse.csr_matrix"
+        assert (
+            adjacency_matrix.shape[0] == adjacency_matrix.shape[1]
+        ), "Adjacency matrix must be square"
+        assert (
+            self.num_clusters * self.k_over_sampling_factor <= adjacency_matrix.shape[0]
+        ), "Number of clusters times the oversampling factor must be less than the number of samples"
+
+        self.n_ = adjacency_matrix.shape[0]
+
+        # Compute degree vector
+        self.degrees_ = adjacency_matrix.sum(axis=1).A1
+        # decompose the adjacency matrix into its components
+        self.data_, self.indices_, self.indptr_ = utils.convert_from_csr_matrix(
+            adjacency_matrix
+        )
+        self.nnz_per_row_ = numpy.diff(self.indptr_).astype(numpy.uint64)
+
+        rough_coreset_size = int(self.coreset_ratio * self.n_)
+
+        self.coreset_indices_, self.coreset_weights_, coreset_embeddings_ = (
+            coreset_sc.coreset_embeddings(
+                self.num_clusters,
+                self.n_,
+                rough_coreset_size,
+                self.k_over_sampling_factor,
+                self.data_,
+                self.indices_,
+                self.indptr_,
+                self.nnz_per_row_,
+                self.degrees_,
+                self.shift,
+                self.ignore_warnings,
+            )
+        )
+        self.coreset_size_ = self.coreset_indices_.shape[0]
+
+        # run kmeans on the coreset embeddings
+        self.kmeans_alg.fit(coreset_embeddings_)
+        self.coreset_labels_ = self.kmeans_alg.labels_.astype(numpy.uint64)
+
+        if self.full_labels:
+            self.label_full_graph()
+
+        return self
+
+    def compute_conductances(self):
+        """
+        Compute the conductance of the labelled graph after fitting.
+
+        Returns
+        -------
+        conductances : numpy.ndarray, shape = (num_clusters,)
+            The conductance of each cluster
+        """
+
+        assert self.labels_ is not None, "Labels must be computed before conductances"
+
+        assert self.labels_ is not None, "Labels must be computed before conductances"
+        # This also assumes all the other required attributes are set
+
+        return coreset_sc.compute_conductances(
+            self.num_clusters,
+            self.n_,
+            self.data_,
+            self.indices_,
+            self.indptr_,
+            self.nnz_per_row_,
+            self.degrees_,
+            self.labels_,
+        )
+
+    def label_full_graph(self):
+        """
+        Label the full graph using the coreset labels.
+
+        Returns
+        -------
+        labels : numpy.ndarray, shape = (n_samples,)
+            Cluster assignments.
+        """
+
+        self.labels_, self.closest_cluster_distance_ = coreset_sc.label_full_graph(
+            self.num_clusters,
+            self.n_,
+            self.data_,
+            self.indices_,
+            self.indptr_,
+            self.nnz_per_row_,
+            self.degrees_,
+            self.coreset_indices_,
+            self.coreset_weights_,
+            self.coreset_labels_,
+            self.shift,
+        )
+
+    def fit_predict(self, adjacency_matrix, y=None):
+        """
+        Fit the coreset clustering algorithm on the sparse adjacency matrix
+        and return the cluster assignments.
+
+        Parameters
+        ----------
+        adjacency_matrix : scipy.sparse.csr_matrix, shape = (n_samples, n_samples)
+            The adjacency matrix of the graph. This must contain self loops
+            for each node.
+
+        y : Ignored
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        labels : numpy.ndarray, shape = (n_samples,)
+            Cluster assignments.
+        """
+        self.fit(adjacency_matrix)
+        self.label_full_graph()
+
+        return self.labels_
diff --git a/python/coreset_sc/utils.py b/python/coreset_sc/utils.py
new file mode 100644
index 0000000..3bb6fe8
--- /dev/null
+++ b/python/coreset_sc/utils.py
@@ -0,0 +1,33 @@
+import numpy as np
+from scipy.sparse import csr_matrix
+
+
+def convert_from_csr_matrix(matrix: csr_matrix):
+    """
+    Convert a scipy.sparse.csr_matrix into a format compatible with the Rust module.
+    This function ensures that indices are of type `np.uintp` (which corresponds to `usize` in Rust)
+    and data is of type `np.float64` (which corresponds to `f64` in Rust).
+    """
+    matrix.sort_indices()
+    indices = matrix.indices.astype(np.uintp)  # Convert indices to `usize`
+    indptr = matrix.indptr.astype(np.uintp)  # Convert indptr to `usize`
+
+    # Convert data to float64 (f64 in Rust)
+    if matrix.data.dtype == np.float32:
+        data = matrix.data.astype(np.float64)
+    elif matrix.data.dtype == np.float64:
+        data = matrix.data
+    else:
+        raise ValueError("Data type not supported, expected float32 or float64.")
+
+    return data, indices, indptr
+
+
+def convert_to_csr_matrix(size, data, indptr, indices):
+    """
+    Convert a scipy.sparse.csr_matrix into a format compatible with the Rust module.
+    This function ensures that indices are of type `np.uintp` (which corresponds to `usize` in Rust)
+    and data is of type `np.float64` (which corresponds to `f64` in Rust).
+    """
+    matrix = csr_matrix((data, indices, indptr), shape=(size, size))
+    return matrix
diff --git a/requirements.txt b/requirements.txt
index ba34161..3fa4fcb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,3 +8,7 @@ sphinx-rtd-theme>=1.0  # For the Sphinx Read the Docs theme
 # For example:
 numpy>=1.20  # Example of a project dependency
 pandas>=1.3  # Example of a project dependency
+matplotlib>=3.7
+networkx>=3.0
+scikit-learn>=1.0
+tqdm>=2.0
diff --git a/src/coreset/common.rs b/src/coreset/common.rs
new file mode 100644
index 0000000..9e4970e
--- /dev/null
+++ b/src/coreset/common.rs
@@ -0,0 +1,236 @@
+use std::ops::{Add, AddAssign};
+use std::fmt::Debug;
+
+
+pub type Float = f64;
+
+
+#[allow(clippy::enum_variant_names)]
+#[allow(dead_code)]
+#[derive(Debug)]
+pub enum Error{
+    NodeNotFound(Index),
+    NodeHasNoParent(ShiftedIndex),
+    NodeAlreadyInserted(Index),
+    EmptyTree,
+    NumericalError
+}
+
+impl std::fmt::Display for Error{
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result{
+        match self{
+            Error::NodeNotFound(index) => write!(f, "Node with index {} not found", index.0),
+            Error::NodeHasNoParent(index) => write!(f, "Node with shifted_index {} has no parent", index.0),
+            Error::NodeAlreadyInserted(index) => write!(f, "Node with index {} is already inserted", index.0),
+            Error::EmptyTree => write!(f, "Tree is empty"),
+            Error::NumericalError => write!(f, "Numerical error"),
+        }
+    }
+}
+
+
+impl std::error::Error for Error {}
+// MARK: -Newtypes
+
+#[derive(Eq, PartialEq, Hash, Copy, Clone, Debug)]
+pub struct Index(pub usize);
+
+impl From<usize> for Index{
+    fn from(index: usize) -> Self{
+        Index(index)
+    }
+}
+
+#[derive(Eq, PartialEq, Hash, Copy, Clone, Debug)]
+pub struct ShiftedIndex(pub usize);
+
+impl From<usize> for ShiftedIndex{
+    fn from(index: usize) -> Self{
+        ShiftedIndex(index)
+    }
+}
+
+
+#[derive(Copy, Clone, Debug)]
+pub struct Weight(pub Float);
+
+impl From<Float> for Weight{
+    fn from(weight: Float) -> Self{
+        Weight(weight)
+    }
+}
+
+
+
+impl Add for Weight{
+    type Output = Weight;
+    fn add(self, other: Weight) -> Weight{
+        Weight(self.0 + other.0)
+    }
+}
+
+impl AddAssign for Weight{
+    fn add_assign(&mut self, other: Weight){
+        self.0 += other.0;
+    }
+}
+
+
+#[derive(Copy, Clone, Debug, PartialEq, PartialOrd)]
+pub struct CoresetCrossTerm(pub Float);
+impl From<Float> for CoresetCrossTerm{
+    fn from(coreset_cross_term: Float) -> Self{
+        CoresetCrossTerm(coreset_cross_term)
+    }
+}
+
+
+
+#[derive(Copy, Clone, Debug)]
+pub struct SelfAffinity(pub Float);
+
+impl From<Float> for SelfAffinity{
+    fn from(self_affinity: Float) -> Self{
+        SelfAffinity(self_affinity)
+    }
+}
+
+
+
+#[derive(Copy, Clone, Debug)]
+
+pub struct Delta(pub Float);
+impl From<Float> for Delta{
+    fn from(delta: Float) -> Self{
+        Delta(delta)
+    }
+}
+
+
+#[derive(Copy, Clone, Debug)]
+pub struct Contribution(pub Float);
+impl From<Float> for Contribution{
+    fn from(contribution: Float) -> Self{
+        Contribution(contribution)
+    }
+}
+
+#[derive(Copy, Clone, Debug)]
+pub struct SmoothedContribution(pub Float);
+impl From<Float> for SmoothedContribution{
+    fn from(contribution: Float) -> Self{
+        SmoothedContribution(contribution)
+    }
+}
+
+
+// MARK: Datapoint struct
+
+#[derive(Debug)]
+pub struct Datapoint{
+    pub weight: Weight,
+    pub self_affinity: SelfAffinity,
+}
+
+impl Datapoint{
+    // pub fn new(weight: Weight, self_affinity: SelfAffinity) -> Self{
+    //     Datapoint{
+    //         weight,
+    //         self_affinity
+    //     }
+    // }
+}
+
+impl Datapoint{
+    #[allow(dead_code)]
+    pub fn contribution(&self, smallest_coreset_self_affinity: Float) -> Contribution{
+        Contribution(self.weight.0*(self.self_affinity.0 + smallest_coreset_self_affinity))
+    }
+
+    #[allow(dead_code)]
+    pub fn smoothed_contribution(&self,smallest_coreset_self_affinity: Float, cost: Float, coreset_star_weight: Weight) -> Float{
+        self.contribution(smallest_coreset_self_affinity).0/cost + self.weight.0/coreset_star_weight.0
+    }
+}
+
+
+#[derive(Debug, Copy, Clone)]
+pub struct DatapointWithCoresetCrossTerm{
+    pub weight: Weight,
+    pub self_affinity: SelfAffinity,
+    pub coreset_cross_term: CoresetCrossTerm
+}
+
+impl DatapointWithCoresetCrossTerm{
+    #[allow(dead_code)]
+    pub fn contribution(&self) -> Float{
+        self.weight.0*(self.self_affinity.0 + self.coreset_cross_term.0)
+    }
+
+    #[allow(dead_code)]
+    pub fn smoothed_contribution(&self, cost: Float, coreset_start_weight: Weight) -> Float{
+        self.contribution()/cost + self.weight.0/coreset_start_weight.0
+    }
+}
+
+// MARK: Node trait
+pub trait Node
+where Self: Sized
+{
+    fn left_child(shifted_index: ShiftedIndex) -> ShiftedIndex{
+        ShiftedIndex(2*shifted_index.0 + 1)
+    }
+    fn right_child(shifted_index: ShiftedIndex) -> ShiftedIndex{
+        ShiftedIndex(2*shifted_index.0 + 2)
+    }
+
+    fn parent(shifted_index: ShiftedIndex) -> Result<ShiftedIndex,Error>{
+        if shifted_index.0 == 0{
+            return Err(Error::NodeHasNoParent(shifted_index));
+        }
+        Ok(ShiftedIndex((shifted_index.0 - 1)/2))
+    }
+
+    fn contribution(&self) -> Contribution;
+
+    #[allow(dead_code)]
+    fn smoothed_contribution(&self, cost: Contribution, coreset_star_weight: Weight) -> SmoothedContribution;
+    fn weight(&self) -> Weight;
+    fn new(weight: Weight, self_affinity: SelfAffinity, min_self_affinity: SelfAffinity) -> Self;
+    fn update_delta(storage: &mut Vec<Self>, shifted_index: ShiftedIndex, new_delta: Delta);
+    fn update_contribution(&mut self, contribution_diff: Contribution);
+    fn from_children(left: &Self, right: &Self) -> Self;
+    fn _sample(storage: &[Self], rng: &mut impl rand::Rng, smoothed:bool, cost:Contribution, coreset_star_weight:Weight) -> Result<(ShiftedIndex,Float),Error>;
+    fn _computed_sampling_probability(storage: &[Self], smoothed: bool, shifted_idx: ShiftedIndex, cost: Contribution, coreset_star_weight: Weight) ->Result<Float,Error>;
+}
+
+
+// MARK: tests:
+#[cfg(test)]
+mod tests{
+    pub use super::*;
+
+    #[test]
+    fn test_datapoint_contribution(){
+        let datapoint = Datapoint{
+            weight: Weight(1.0),
+            self_affinity: SelfAffinity(2.0),
+        };
+        let smallest_coreset_self_affinity = 0.5;
+        let contribution = datapoint.contribution(smallest_coreset_self_affinity);
+        assert_eq!(contribution.0, 2.5);
+    }
+
+    #[test]
+    fn test_datapoint_smoothed_contribution(){
+        let datapoint = DatapointWithCoresetCrossTerm{
+            weight: Weight(1.0),
+            self_affinity: SelfAffinity(2.0),
+            coreset_cross_term: CoresetCrossTerm(0.5),
+        };
+        let cost = 5.0;
+        let coreset_start_weight = Weight(10.0);
+        let smoothed_contribution = datapoint.smoothed_contribution(cost, coreset_start_weight);
+        assert_eq!(smoothed_contribution, 0.6);
+    }
+}
diff --git a/src/coreset/full.rs b/src/coreset/full.rs
new file mode 100644
index 0000000..a34af82
--- /dev/null
+++ b/src/coreset/full.rs
@@ -0,0 +1,222 @@
+use crate::coreset::{common::*, sampling_tree::SamplingTree};
+
+use rand::rngs::StdRng;
+
+use faer::sparse::SparseRowMatRef;
+use faer::ColRef;
+use rand::Rng;
+
+
+#[derive(Debug)]
+pub struct DefaultCoresetSampler<'a,T>{
+    sampling_tree: SamplingTree<T>,
+    num_clusters: usize,
+    coreset_star_weight: Weight,
+    coreset_size: usize,
+    rng: StdRng,
+    number_of_data_points: usize,
+    adj_matrix: SparseRowMatRef<'a, usize, Float>,
+    degree_vector: ColRef::<'a, Float>,
+    self_affinities: Vec<SelfAffinity>,
+    x_star_index: Index,
+    numerical_warning: bool,
+}
+
+
+
+
+/// Assumes a undirected graph where self loops are all 1
+impl <'a,T> DefaultCoresetSampler<'a,T>
+    where T: Node
+{
+
+    pub fn new(
+        adj_matrix: SparseRowMatRef<'a,usize, Float>,
+        degree_vector: ColRef<'a,Float>,
+        num_clusters: usize,
+        coreset_size: usize,
+        shift: Option<Float>,
+        rng: StdRng) -> Self{
+
+        let n = adj_matrix.nrows();
+        assert_eq!(n, adj_matrix.ncols());
+        assert_eq!(n, degree_vector.nrows());
+
+        let mut sampling_tree = SamplingTree::<T>::new();
+
+        let shift = shift.unwrap_or(0.0);
+
+        // Find the node with the lowest self affinity. Aka lowest value of A[i,i]/d[i]^2
+        let self_affinities: Vec<SelfAffinity> = degree_vector
+            .iter().enumerate().map(|(i,d)| SelfAffinity((shift/d) +adj_matrix[(i,i)]/(d*d))).collect();
+        let x_star = self_affinities.iter().enumerate().min_by(|a,b| a.1.0.partial_cmp(&b.1.0).unwrap()).unwrap().0;
+        let min_self_affinity = self_affinities[x_star];
+
+        // Populate the sampling  tree with weights and self affinities
+        sampling_tree.insert_from_iterator(degree_vector.iter().zip(self_affinities.iter()).map(|(d,self_affinity)|{
+            (Weight(*d),*self_affinity)
+        }),
+             min_self_affinity
+        );
+
+        DefaultCoresetSampler{
+            sampling_tree,
+            num_clusters,
+            coreset_star_weight: Weight(0.0),
+            coreset_size,
+            rng,
+            number_of_data_points: n,
+            adj_matrix,
+            degree_vector,
+            self_affinities,
+            x_star_index: Index(x_star),
+            numerical_warning: false,
+        }
+    }
+
+
+    fn repair(&mut self, point_added: Index){
+        // We implicitly add the point to the init set and update it's neighbours:
+        let point_added_degree: Float = self.degree_vector[point_added.0];
+        let point_added_weight: Weight = point_added_degree.into();
+        let point_added_self_affinity: SelfAffinity = self.self_affinities[point_added.0];
+
+        self.coreset_star_weight += point_added_weight;
+
+        // set the contribution of the added point to zero:
+        self.sampling_tree.update_delta(point_added, Delta(0.0)).unwrap();
+        // Now we update the neighbours of the added point:
+        self.adj_matrix.col_indices_of_row(point_added.0).map(Index)
+            .zip(self.adj_matrix.values_of_row(point_added.0))
+            .for_each(|(neighbour_index,edge_weight)|{
+            // If the neighbour is the added point, skip it
+            if neighbour_index == self.x_star_index{
+                return;
+            }
+            // compute the distance^2 between the added point and the neighbour:
+            let neighbour_degree: Float = self.degree_vector[neighbour_index.0];
+            let neighbour_self_affinity: SelfAffinity = self.self_affinities[neighbour_index.0];
+            let cross_term: CoresetCrossTerm = (edge_weight/(point_added_degree*neighbour_degree)).into();
+            let mut distance2 =  point_added_self_affinity.0 + neighbour_self_affinity.0 - 2.0*cross_term.0;
+            // update the delta of the neighbour:
+            if distance2 < 0.0{
+                self.numerical_warning = true;
+                distance2 = 0.0;
+            }
+            self.sampling_tree.update_delta(neighbour_index, Delta(distance2)).unwrap();
+        })
+    }
+
+    fn sample_first_point(&mut self){
+        self.repair(self.x_star_index);
+    }
+
+    fn sample_next_k(&mut self) -> Result<(),Error>{
+        // Now we run k-means++ to sample the next k points (total k+1 points)
+        // first we uniformly sample the first point and repair:
+        let uniform_sampled_index = Index(self.rng.gen_range(0..self.number_of_data_points));
+        self.repair(uniform_sampled_index);
+        // Now we sample the next k-1 points and repair:
+        for i in 0..self.num_clusters-1{
+            let mut maybe_index = self.sampling_tree.sample(&mut self.rng);
+
+            while let Err(Error::NumericalError) = maybe_index{
+                // If we fail to sample, we rebuild the tree and try again:
+                println!("Numerical error detected on round {}. Rebuilding tree", i);
+                self.sampling_tree.rebuild_from_leaves();
+                maybe_index = self.sampling_tree.sample(&mut self.rng);
+            }
+            let index = maybe_index.unwrap();
+            self.repair(index);
+        }
+        Ok(())
+    }
+
+    fn sample_rest(&mut self) -> Result<(Vec<usize>, Vec<Float>),Error>{
+        // Now we have seeded the sampling distribution, we sample the actual coreset:
+        let mut coreset_indices: Vec<usize> = Vec::with_capacity(self.coreset_size);
+        let mut coreset_weights: Vec<Float> = Vec::with_capacity(self.coreset_size);
+
+        let cost = self.sampling_tree.storage.first().unwrap().contribution();
+
+        for _ in 0..self.coreset_size{
+            let (index,prob) = self.sampling_tree.sample_smoothed(
+                &mut self.rng, cost, self.coreset_star_weight)?;
+            let weight = self.sampling_tree.storage.get(self.sampling_tree.get_shifted_node_index(index).unwrap().0).unwrap().weight();
+            coreset_indices.push(index.0);
+            coreset_weights.push(weight.0/(prob*self.coreset_size as Float));
+        }
+
+        // sort the indices and weights in ascending order of indices
+        let mut combined: Vec<_> = coreset_indices.iter_mut().zip(coreset_weights.iter_mut()).collect();
+        combined.sort_by(|a,b| a.0.cmp(&b.0));
+
+
+        Ok((coreset_indices,coreset_weights))
+    }
+
+    pub fn sample(&mut self) -> Result<(Vec<usize>, Vec<Float>,bool),Error> {
+        self.sample_first_point();
+        self.sample_next_k()?;
+        self.sample_rest().map(|(a,b)| (a,b,self.numerical_warning))
+    }
+
+}
+
+#[cfg(test)]
+mod tests{
+    use faer::Col;
+    use faer::sparse::SparseRowMat;
+    use rand::SeedableRng;
+    use super::super::unstable;
+
+    use super::*;
+
+    #[test]
+    fn basic(){
+        // create a sparse kernel matrix corresponding to the following dense matrix:
+        // 1.0      0       0.4     0.8
+        // 0        1.0     0.5     0
+        // 0.4      0.5     1.0     0
+        // 0.8      0       0       1.0
+        let adj_matrix = SparseRowMat::<usize, Float>::try_new_from_triplets(
+            4,
+            4,
+            &[
+                (0, 0, 1.0),
+                (0, 2, 1.0),
+                (0, 3, 1.0),
+                (1, 1, 1.0),
+                (1, 2, 1.0),
+                (2, 2, 1.0),
+                (2, 0, 1.0),
+                (2, 1, 1.0),
+                (3, 0, 1.0),
+                (3, 3, 1.0),
+            ],
+        ).unwrap();
+
+        let dense_mat = adj_matrix.to_dense();
+        let degree_vector = Col::from_fn(4,|i| dense_mat.row(i).iter().sum::<Float>());
+
+
+        // display the kernel matrix nicely
+        println!("{:?}", &adj_matrix.to_dense());
+
+        let mut coreset_sampler = DefaultCoresetSampler::<unstable::TreeNode>::new(
+            adj_matrix.as_ref(),
+            degree_vector.as_ref(),
+            2,
+            3,
+            Some(0.0),
+            rand::SeedableRng::from_rng(rand::rngs::StdRng::from_entropy()).unwrap()
+        );
+        println!("{:?}", &coreset_sampler);
+
+
+        let (coreset_indices, coreset_weights,_) = coreset_sampler.sample().unwrap();
+        println!("{:?}", coreset_indices);
+        println!("{:?}", coreset_weights);
+        panic!("stop");
+    }
+}
diff --git a/src/coreset/mod.rs b/src/coreset/mod.rs
new file mode 100644
index 0000000..1ac8caf
--- /dev/null
+++ b/src/coreset/mod.rs
@@ -0,0 +1,7 @@
+pub mod full;
+pub mod sampling_tree;
+pub mod common;
+pub mod unstable;
+pub mod stable;
+
+pub mod old;
diff --git a/src/coreset/old.rs b/src/coreset/old.rs
new file mode 100644
index 0000000..a31ebcf
--- /dev/null
+++ b/src/coreset/old.rs
@@ -0,0 +1,84 @@
+use faer::Col;
+use faer::{sparse::SparseRowMatRef, unzipped, zipped, ColRef};
+use faer::sparse::SparseRowMat;
+use rand::rngs::StdRng;
+use rand::Rng;
+
+use super::common::Float;
+
+
+
+
+#[allow(non_snake_case)]
+pub fn old_coreset<'a>(
+    adj_matrix: SparseRowMatRef<'a,usize, Float>,
+    degree_vector: ColRef<'a,Float>,
+    num_clusters: usize,
+    coreset_size: usize,
+    rng: &mut StdRng)
+    -> (Vec<usize>, Vec<Float>){
+
+    // First construct K with D^{-1}AD^{-1}
+    let n = adj_matrix.nrows();
+    assert_eq!(n, adj_matrix.ncols());
+    assert_eq!(n, degree_vector.nrows());
+
+    let W = degree_vector;
+    let d_inv = zipped!(degree_vector).map(|unzipped!(d)| 1.0/d.read());
+    let triplets: Vec<(usize,usize,f64)> = (0..n).map(|i| (i,i, d_inv[i])).collect();
+    let diagonal_d_inv = SparseRowMat::try_new_from_triplets(n, n, triplets.as_slice()).unwrap();
+
+    let K = diagonal_d_inv.as_ref()*adj_matrix.as_ref()*diagonal_d_inv.as_ref();
+
+
+    let self_affinities = Col::from_fn(n, |i| K[(i,i)]);
+    let mut probs = Col::zeros(n);
+
+    let mut initial_coreset = Vec::with_capacity(coreset_size);
+    let mut initial_coreset_weight = 0.0;
+
+
+    // Select the first point uniformly at random
+    let mut index_to_add = rng.gen_range(0..n);
+    initial_coreset.push(index_to_add);
+    initial_coreset_weight += W[index_to_add];
+
+    let mut dist_2_to_new_index = Col::from_fn(n, |i| (K[(index_to_add,index_to_add)] + K[(i,i)] - 2.0*K.get(index_to_add, i).unwrap_or(&0.0)));
+
+    (0..num_clusters-1).for_each(|_|{
+        // Sample the next point proportional to the distance to the initial coreset
+        zipped!(&mut probs,&dist_2_to_new_index).for_each_with_index(|i,unzipped!(mut p,d)| *p = W[i]*d.read());
+        index_to_add = rng.sample(rand::distributions::WeightedIndex::new(probs.as_slice()).unwrap());
+        initial_coreset.push(index_to_add);
+        initial_coreset_weight += W[index_to_add];
+
+        let self_affinity = self_affinities[index_to_add];
+        zipped!(&mut dist_2_to_new_index).for_each_with_index(|i,unzipped!(mut d)|{
+            let new_dist = self_affinity + self_affinities[i] - 2.0*K.get(index_to_add, i).unwrap_or(&0.0);
+            *d = (*d).min(new_dist);
+        });
+    });
+    let costs = zipped!(&dist_2_to_new_index).map_with_index(|i,unzipped!(d)| W[i]*d.read());
+    // Now we have the initial coreset, we sample the coreset with smoothed sampling
+    let total_cost = costs.iter().sum::<Float>();
+
+    let mut coreset_dist = Col::from_fn(n, |i| costs[i]/total_cost + W[i]/initial_coreset_weight);
+    let coreset_dist_sum = coreset_dist.iter().sum::<Float>();
+    zipped!(&mut coreset_dist).for_each(|unzipped!(mut d)| *d /= coreset_dist_sum);
+
+
+    let (mut coreset,mut coreset_weights): (Vec<_>,Vec<_>) = (0..coreset_size).map(|_|{
+        let index = rng.sample(rand::distributions::WeightedIndex::new(coreset_dist.as_slice()).unwrap());
+        let prob = coreset_dist[index];
+        let weight = W[index]/(prob*coreset_size as Float);
+        (index, weight)
+    }).unzip();
+
+
+
+    // sort the indices and weights in ascending order of indices
+    let mut combined: Vec<_> = coreset.iter_mut().zip(coreset_weights.iter_mut()).collect();
+    combined.sort_by(|(a,_),(b,_)| a.cmp(b));
+
+    (coreset,coreset_weights)
+}
diff --git a/src/coreset/sampling_tree.rs b/src/coreset/sampling_tree.rs
new file mode 100644
index 0000000..6746b10
--- /dev/null
+++ b/src/coreset/sampling_tree.rs
@@ -0,0 +1,162 @@
+use std::fmt::{Formatter, Debug};
+use crate::coreset::common::*;
+use std::mem::MaybeUninit;
+
+
+#[allow(dead_code)]
+pub struct SamplingTree<T>
+{
+    // Leaves are stored at the end of the storage vector. The root is at index 0.
+    pub storage: Vec<T>,
+}
+
+
+impl <T> Debug for SamplingTree<T>
+where T: Debug
+{
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self.storage.first(){
+            None =>{
+                f.debug_struct("SamplingTree")
+                .field("root", &Option::<()>::None)
+                .finish()
+            },
+            Some(root) =>{
+                let tree_node = root;
+                f.debug_struct("SamplingTree")
+                .field("root", tree_node)
+                .finish()
+            }
+        }
+    }
+}
+
+impl <T> SamplingTree<T>
+where T: Node
+{
+    pub fn new() -> Self{
+        SamplingTree{
+            storage: Vec::new()}
+    }
+
+    pub fn get_shifted_node_index(&self, node_index: Index) -> Result<ShiftedIndex,Error>{
+        let storage_size = self.storage.len();
+        let num_leaves = (storage_size + 1)/2;
+        let shift = num_leaves - 1;
+        let shifted_node_index = node_index.0 + shift;
+        match shifted_node_index < storage_size{
+            false => Err(Error::NodeNotFound(node_index)),
+            true => Ok(ShiftedIndex(shifted_node_index))
+        }
+    }
+    pub fn get_node_index(&self, shifted_node_index: ShiftedIndex) -> Result<Index, Error>{
+        let storage_size = self.storage.len();
+        let num_leaves = (storage_size + 1)/2;
+        let shift = num_leaves - 1;
+        let node_index = shifted_node_index.0 - shift;
+        match node_index < num_leaves{
+            false => Err(Error::NodeNotFound(Index(node_index))),
+            true => Ok(Index(node_index))
+        }
+    }
+
+    pub fn rebuild_from_leaves(&mut self){
+        let num_nodes = self.storage.len();
+        let num_leaves = (num_nodes + 1)/2;
+        // Leaves are stored in the last num_leaves elements of the storage vector.
+        // We proceed by updating the first num_leaves -1 elements in reverse order.
+        (0..num_leaves-1).rev().for_each(|i|{
+            let (left_child_idx,right_child_idx) = (2*i+1,2*i+2);
+            let left_child_ref = &self.storage[left_child_idx];
+            let right_child_ref = &self.storage[right_child_idx];
+            self.storage[i] = T::from_children(left_child_ref, right_child_ref);
+        });
+    }
+
+    pub fn insert_from_iterator<I>(&mut self, mut iterator: I, min_self_affinity:SelfAffinity) ->std::ops::Range<ShiftedIndex>
+    where I: Iterator<Item = (Weight,SelfAffinity)> + std::iter::ExactSizeIterator
+    {
+        // Given an iterator of leaf node data, we create a balanced binary tree in a bottom up fashion.
+        // This is to help with cache locality and branch prediction.
+
+        // We will fill up an array with the nodes. Leafs will be stored at the end of the array.
+
+        // The total number of nodes in a binary tree with num_leaves is 2*num_leaves - 1.
+        // given an index i, the left child is 2*i + 1 and the right child is 2*i + 2.
+        // The parent of a node at index i is (i-1)/2.
+        if iterator.len() == 0{
+            self.storage = Vec::new();
+            return ShiftedIndex(0)..ShiftedIndex(0);
+        }
+        let num_leaves = iterator.len();
+        let tree_len = 2*num_leaves - 1;
+        // The leaves are stored at the end of the array. We will return a range of indices that correspond to the leaves.
+        let mut tree: Vec<MaybeUninit<T>> = Vec::with_capacity(tree_len);
+        unsafe{
+            tree.set_len(tree_len);
+        }
+        // println!("{:?}",(num_leaves..tree_len).collect::<Vec<usize>>());
+        // We will store the leaves at the end of the array.
+        tree[num_leaves-1..].iter_mut().for_each(| node|{
+            let node = node.as_mut_ptr();
+            unsafe{
+                let (weight, self_affinity) = iterator.next().unwrap();
+                node.write(T::new(weight, self_affinity, min_self_affinity));
+            }
+        });
+        // println!("{:?}", (0..num_leaves).collect::<Vec<usize>>());
+        // Now we proceed backwards from num_leaves-1 to 0, constructing the interal nodes.
+        (0..num_leaves-1).rev().for_each(|i|{
+            let (left_child_idx,right_child_idx) = (2*i+1,2*i+2);
+            unsafe{
+                let node = tree[i].as_mut_ptr();
+                let left_child_ref = tree[left_child_idx].as_ptr().as_ref().unwrap();
+                let right_child_ref = tree[right_child_idx].as_ptr().as_ref().unwrap();
+                node.write(T::from_children(left_child_ref, right_child_ref));
+            }
+        });
+        // The root is at index 0. Now we transmute the tree to a Vec<T> and store it in the storage field.
+        let tree = unsafe{
+            std::mem::transmute::<Vec<MaybeUninit<T>>, Vec<T>>(tree)
+        };
+        self.storage = tree;
+        ShiftedIndex(num_leaves-1)..ShiftedIndex(tree_len)
+    }
+
+    pub fn sample(&self, rng: &mut impl rand::Rng) -> Result<Index,Error>{
+        self._sample(rng, false, Contribution(0.0), Weight(0.0)).map(|(index,_)| index)
+    }
+
+    pub fn sample_smoothed(&self, rng: &mut impl rand::Rng, cost: Contribution, coreset_star_weight: Weight) -> Result<(Index,Float),Error>{
+        self._sample(rng, true, cost, coreset_star_weight)
+    }
+
+    pub fn _sample(&self, rng: &mut impl rand::Rng, smoothed:bool, cost:Contribution, coreset_star_weight:Weight) -> Result<(Index,Float),Error>{
+        let shifted_idx_res = T::_sample(&self.storage, rng, smoothed, cost, coreset_star_weight);
+        shifted_idx_res.map(|(shifted_idx,prob)|{
+            let idx = self.get_node_index(shifted_idx).unwrap();
+            (idx, prob)
+        })
+    }
+
+    // pub fn compute_sampling_probability(&self, idx: Index) -> Float{
+    //     self._computed_sampling_probability(false, idx, Contribution(0.0), Weight(0.0)).unwrap()
+    // }
+
+    // pub fn compute_smoothed_sampling_probability(&self, idx: Index, cost: Contribution, coreset_star_weight: Weight) -> Float{
+    //     self._computed_sampling_probability(true, idx, cost, coreset_star_weight).unwrap()
+    // }
+
+    pub fn _computed_sampling_probability(&self, smoothed: bool, idx: Index, cost: Contribution, coreset_star_weight: Weight) -> Result<Float,Error>{
+        let shifted_idx = self.get_shifted_node_index(idx).unwrap();
+        T::_computed_sampling_probability(&self.storage, smoothed, shifted_idx, cost, coreset_star_weight)
+    }
+
+    pub fn update_delta(&mut self, idx: Index, new_delta:Delta) -> Result<(),Error>{
+        let shifted_idx = self.get_shifted_node_index(idx)?;
+        assert!(new_delta.0 >= 0.0, "Delta: {} is negative", new_delta.0);
+        T::update_delta(&mut self.storage, shifted_idx, new_delta);
+        Ok(())
+    }
+
+}
diff --git a/src/coreset/stable.rs b/src/coreset/stable.rs
new file mode 100644
index 0000000..e69de29
diff --git a/src/coreset/unstable.rs b/src/coreset/unstable.rs
new file mode 100644
index 0000000..7024295
--- /dev/null
+++ b/src/coreset/unstable.rs
@@ -0,0 +1,259 @@
+use std::fmt::{Formatter, Debug};
+use crate::coreset::common::*;
+// MARK: - Node structs
+#[derive(Debug)]
+#[allow(dead_code)]
+pub struct LeafNode{
+    pub weight: Weight,
+    pub delta: Delta,
+}
+impl LeafNode{
+    pub fn contribution(&self) -> Contribution{
+        (self.weight.0*self.delta.0).into()
+    }
+
+    // pub fn weight(&self) -> Weight{
+    //     self.weight
+    // }
+
+}
+pub struct InternalNode{
+    pub contribution: Contribution,
+    pub weight: Weight,
+}
+
+impl InternalNode{
+
+    #[allow(dead_code)]
+    pub fn contribution(&self) -> Contribution{
+        self.contribution
+    }
+
+    #[allow(dead_code)]
+    pub fn smoothed_contribution(&self, cost: Contribution, coreset_star_weight: Weight) -> SmoothedContribution{
+        (self.contribution.0/cost.0 + self.weight.0/coreset_star_weight.0).into()
+    }
+}
+
+impl Debug for InternalNode{
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("InternalNode")
+            .field("contribution", &self.contribution)
+            .field("weight", &self.weight)
+            .finish()
+    }
+
+}
+
+#[derive(Debug)]
+#[allow(dead_code)]
+pub enum TreeNode{
+    Leaf(LeafNode),
+    Internal(InternalNode)
+}
+
+#[allow(dead_code)]
+impl TreeNode{
+    pub fn contribution(&self) -> Contribution{
+        match self{
+            TreeNode::Leaf(leaf_node) => leaf_node.contribution(),
+            TreeNode::Internal(internal_node) => internal_node.contribution()
+        }
+    }
+    pub fn smoothed_contribution(&self, cost: Contribution, coreset_star_weight: Weight) -> SmoothedContribution{
+        match self{
+            TreeNode::Leaf(LeafNode{weight, ..}) =>{
+                let contribution = self.contribution();
+                let smoothed_contribution = contribution.0/cost.0 + weight.0/coreset_star_weight.0;
+                smoothed_contribution.into()
+            },
+            TreeNode::Internal(internal_node) =>{
+                internal_node.smoothed_contribution(cost, coreset_star_weight)
+            }
+        }
+    }
+
+    pub fn weight(&self) -> Weight{
+        match self{
+            TreeNode::Leaf(LeafNode { weight, ..}) => *weight,
+            TreeNode::Internal(InternalNode{weight, ..}) => *weight
+        }
+    }
+
+    pub fn delta(&self) -> Delta{
+        match self{
+            TreeNode::Leaf(LeafNode{delta, ..}) => *delta,
+            TreeNode::Internal(_) => panic!("Internal nodes don't have deltas")
+        }
+    }
+}
+
+
+impl Node for TreeNode{
+    fn contribution(&self) -> Contribution {
+        self.contribution()
+    }
+
+    fn smoothed_contribution(&self, cost: Contribution, coreset_star_weight: Weight) -> SmoothedContribution {
+        self.smoothed_contribution(cost, coreset_star_weight)
+    }
+
+    fn weight(&self) -> Weight {
+        self.weight()
+    }
+
+    fn new(weight: Weight, self_affinity: SelfAffinity, min_self_affinity: SelfAffinity) -> Self {
+        TreeNode::Leaf(LeafNode{
+            weight,
+            delta: (self_affinity.0 + min_self_affinity.0).into()
+        })
+    }
+
+    fn update_delta(storage: &mut Vec<Self>, shifted_index: ShiftedIndex, new_delta: Delta) {
+
+        let mut shifted_node_index = shifted_index;
+
+        let leaf = storage.get_mut(shifted_node_index.0).unwrap();
+
+        match leaf{
+            TreeNode::Internal(_) => panic!("should have started at a leaf node"),
+            TreeNode::Leaf(leaf_node) => {
+                if leaf_node.delta.0 <= new_delta.0{
+                    return;
+                }
+
+                let delta_diff = leaf_node.delta.0 - new_delta.0;
+                let contribution_diff = delta_diff*leaf_node.weight.0;
+                leaf_node.delta = new_delta;
+
+                let mut parent = TreeNode::parent(shifted_node_index);
+                while let Ok(parent_idx) = parent{
+                    let parent_node = storage.get_mut(parent_idx.0).unwrap();
+                    parent_node.update_contribution(contribution_diff.into());
+                    shifted_node_index = parent_idx;
+                    parent = TreeNode::parent(shifted_node_index);
+
+                }
+            }
+        }
+    }
+
+    fn update_contribution(&mut self, contribution_diff: Contribution) {
+        match self{
+            TreeNode::Leaf(_) => panic!("Leaf nodes don't have contributions"),
+            TreeNode::Internal(internal_node) => {
+                internal_node.contribution.0 -= contribution_diff.0;
+            }
+        }
+    }
+
+    fn from_children(left: &Self, right: &Self) -> Self {
+        let contribution = (left.contribution().0 + right.contribution().0).into();
+        let weight = left.weight() + right.weight();
+        TreeNode::Internal(InternalNode{
+            contribution,
+            weight
+        })
+    }
+
+    fn _sample(storage: &[Self], rng: &mut impl rand::Rng, smoothed:bool, cost:Contribution, coreset_star_weight:Weight) -> Result<(ShiftedIndex,Float),Error> {
+        if storage.is_empty(){
+            return Err(Error::EmptyTree)
+        }
+        let mut shifted_node_index: ShiftedIndex = ShiftedIndex(0);
+        let mut prob: Float = 1.0;
+
+        let mut node = storage.get(shifted_node_index.0).unwrap();
+        match smoothed{
+            true =>{
+                while let TreeNode::Internal(_) = node{
+                    let left_node_idx = TreeNode::left_child(shifted_node_index);
+                    let right_node_idx = TreeNode::right_child(shifted_node_index);
+                    let left_node = storage.get(left_node_idx.0).unwrap();
+                    let right_node = storage.get(right_node_idx.0).unwrap();
+                    let left_node_smoothed_contribution = left_node.smoothed_contribution(cost, coreset_star_weight);
+                    let right_node_smoothed_contribution = right_node.smoothed_contribution(cost, coreset_star_weight);
+
+                    let total_smoothed_contribution = left_node_smoothed_contribution.0 + right_node_smoothed_contribution.0;
+                    if total_smoothed_contribution <= 0.0{
+                        return Err(Error::NumericalError)
+                    }
+
+                    let sample = rng.gen_range(0.0..total_smoothed_contribution);
+                    node = match sample <= left_node_smoothed_contribution.0{
+                        true => {
+                            prob *= left_node_smoothed_contribution.0/total_smoothed_contribution;
+                            shifted_node_index = left_node_idx;
+                            left_node
+                        },
+                        false =>{
+                            prob *= right_node_smoothed_contribution.0/total_smoothed_contribution;
+                            shifted_node_index = right_node_idx;
+                            right_node
+                        }
+                    }
+                }
+                Ok((shifted_node_index, prob))
+            },
+            false =>{
+                while let TreeNode::Internal(_) = node{
+                    let left_node_idx = TreeNode::left_child(shifted_node_index);
+                    let right_node_idx = TreeNode::right_child(shifted_node_index);
+                    let left_node = storage.get(left_node_idx.0).unwrap();
+                    let right_node = storage.get(right_node_idx.0).unwrap();
+                    let left_node_contribution = left_node.contribution();
+                    let right_node_contribution = right_node.contribution();
+                    let total_smoothed_contribution = left_node_contribution.0 + right_node_contribution.0;
+                    if total_smoothed_contribution <= 0.0{
+                        return Err(Error::NumericalError)
+                    }
+
+                    let sample = rng.gen_range(0.0..total_smoothed_contribution);
+                    node = match sample <= left_node_contribution.0{
+                        true => {
+                            prob *= left_node_contribution.0/total_smoothed_contribution;
+                            shifted_node_index = left_node_idx;
+                            left_node
+                        },
+                        false =>{
+                            prob *= right_node_contribution.0/total_smoothed_contribution;
+                            shifted_node_index = right_node_idx;
+                            right_node
+                        }
+                    }
+                }
+                Ok((shifted_node_index, prob))
+            }
+        }
+
+    }
+
+    fn _computed_sampling_probability(storage: &[Self], smoothed: bool, shifted_idx: ShiftedIndex, cost: Contribution, coreset_star_weight: Weight) ->Result<Float,Error> {
+
+        let mut shifted_node_index = shifted_idx;
+        let mut prob: Float = 1.0;
+
+        match smoothed{
+            true =>{
+                while let Ok(parent_idx) = TreeNode::parent(shifted_node_index){
+                    let parent_node = storage.get(parent_idx.0).unwrap();
+                    let parent_contribution = parent_node.smoothed_contribution(cost, coreset_star_weight);
+                    let child_contribution = storage.get(shifted_node_index.0).unwrap().smoothed_contribution(cost, coreset_star_weight);
+                    prob *= child_contribution.0/parent_contribution.0;
+                    shifted_node_index = parent_idx;
+                }
+            },
+            false =>{
+                while let Ok(parent_idx) = TreeNode::parent(shifted_node_index){
+                    let parent_node = storage.get(parent_idx.0).unwrap();
+                    let parent_contribution = parent_node.contribution();
+                    let child_contribution = storage.get(shifted_node_index.0).unwrap().contribution();
+                    prob *= child_contribution.0/parent_contribution.0;
+                    shifted_node_index = parent_idx;
+                }
+            }
+        }
+        Ok(prob)
+    }
+
+}
diff --git a/src/lib.rs b/src/lib.rs
index 02e1a2d..518e734 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,34 +1,250 @@
-// src/lib.rs
+//! coreset-sc is a Rust library with python bindings
+//! for Coreset Spectral Clustering.
+//!
+//! The library implementes the coreset spectral clustering algorithm given in the [paper](https://openreview.net/forum?id=1qgZXeMTTU).
+//!
+//! The (rust) API allows the user to pass as input one of the following:
+//!
+//! 1. A sparse symmetric adjacency matrix in the form of a CSR/CSC matrix.
+//!     - Coreset spectral clustering is performed on the input graph.
+//! 2. Input data points in euclidean space in the form of a 2D array.
+//!     - A k-nn graph is constructed using [faiss-rs](https://github.com/Enet4/faiss-rs)
+//!     - Or a kernel function can be specified and a coreset of the full similarity matrix can be computed lazily.
+//! 3. A k-nn oracle that can be used to query the k-nearest neighbours of a point.
+//!     - The coreset will be computed lazily using the oracle.
+
+mod coreset;
+use faer::ColRef;
+
 
 mod rust;
+use numpy::PyArray1;
+use numpy::ToPyArray;
+use rust::*;
+mod sbm;
+
+pub use rust::default_coreset_sampler;
+pub use rust::gen_sbm_with_self_loops;
+pub use rust::label_full_graph;
+pub use rust::compute_conductances;
+
 
-// Import necessary dependencies from PyO3
-#[cfg(feature = "bindings")]
 use pyo3::prelude::*;
 
 
-// Python bindings: conditionally included if the "bindings" feature is enabled
-#[cfg(feature = "bindings")]
+use pyo3::{
+    pymodule,
+    types::{PyModule, PyTuple},
+    Bound, PyResult,
+};
+
+use numpy::{IntoPyArray, PyReadonlyArray1};
+
+use ndarray::ArrayView1;
+use faer::mat::from_raw_parts;
+use faer::mat::MatRef;
+use faer_ext::IntoNdarray;
+use faer::sparse::SparseRowMatRef;
+use crate::coreset::common::Float;
+
+
+
+pub fn construct_from_py<'py>(
+    n: usize,
+    data: &'py PyReadonlyArray1<f64>,
+    indices: &'py PyReadonlyArray1<usize>,
+    indptr: &'py PyReadonlyArray1<usize>,
+    nnz_per_row: &'py PyReadonlyArray1<usize>,
+    degrees: &'py PyReadonlyArray1<f64>,
+) -> (SparseRowMatRef<'py, usize, Float>, ColRef<'py,Float>){
+    let adj_mat_faer: SparseRowMatRef< usize,Float> = data_indices_indptr_to_sparse_mat_ref(n,
+        data.as_slice().unwrap(),
+        indices.as_slice().unwrap(),
+        indptr.as_slice().unwrap(),
+        nnz_per_row.as_slice().unwrap());
+    let degrees_numpy: ArrayView1<f64> = degrees.as_array();
+    let degrees_faer: MatRef<Float> = unsafe{from_raw_parts::<f64>(degrees_numpy.as_ptr(), n,1,1,1)};
+    let degrees_faer = degrees_faer.col(0);
+    (adj_mat_faer, degrees_faer)
+}
+
+
+
 #[allow(non_snake_case)]
 #[pymodule]
 fn coreset_sc(m: &Bound<'_, PyModule>) -> PyResult<()> {
+    // use numpy::PyArray1;
+    use rand::{rngs::StdRng, SeedableRng};
+
 
+
+    #[allow(clippy::too_many_arguments)]
     #[pyfn(m)]
-    #[pyo3(name = "sum_as_string")]
-    fn sum_as_string_py(a: usize, b: usize) -> PyResult<String>{
-        Ok((a + b).to_string())
+    #[pyo3(name = "old_coreset")]
+    fn old_coreset_py<'py>(
+        py: Python<'py>,
+        clusters: usize,
+        n: usize,
+        coreset_size: usize,
+        data: PyReadonlyArray1<'py, f64>,
+        indices: PyReadonlyArray1<'py, usize>,
+        indptr: PyReadonlyArray1<'py, usize>,
+        nnz_per_row: PyReadonlyArray1<'py, usize>,
+        degrees: PyReadonlyArray1<'py, f64>,
+    ) -> Bound<'py, PyTuple>{
+
+
+        let (adj_mat_faer, degrees_faer) = construct_from_py(n, &data, &indices, &indptr, &nnz_per_row, &degrees);
+
+        let (indices, weights) = coreset::old::old_coreset(adj_mat_faer, degrees_faer, clusters, coreset_size, &mut StdRng::from_entropy());
+        let indices_py = indices.into_pyarray_bound(py);
+        let weights_py = weights.into_pyarray_bound(py);
+        let tuple = PyTuple::new_bound(py, &[indices_py.to_object(py),weights_py.to_object(py)]);
+        tuple
     }
-    Ok(())
-}
 
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use rust::rust_sum_as_string;
-    // Pure Rust test
-    #[test]
-    fn test_rust_sum_as_string() {
-        assert_eq!(rust_sum_as_string(1, 2), "3");
+    #[allow(clippy::too_many_arguments)]
+    #[pyfn(m)]
+    #[pyo3(name = "coreset_embeddings")]
+    fn default_csc_py<'py>(
+        py: Python<'py>,
+        clusters: usize,
+        n: usize,
+        coreset_size: usize,
+        k_over_sampling_factor: f64,
+        data: PyReadonlyArray1<'py, f64>,
+        indices: PyReadonlyArray1<'py, usize>,
+        indptr: PyReadonlyArray1<'py, usize>,
+        nnz_per_row: PyReadonlyArray1<'py, usize>,
+        degrees: PyReadonlyArray1<'py, f64>,
+        shift: f64,
+        ignore_warnings: bool,
+    ) -> Bound<'py, PyTuple>{
+        let (adj_mat_faer, degrees_faer) = construct_from_py(n, &data, &indices, &indptr, &nnz_per_row, &degrees);
+        let (indices, weights,numerical_warning) = default_coreset_sampler(
+            adj_mat_faer, degrees_faer, (clusters as Float * k_over_sampling_factor) as usize,
+            coreset_size, Some(shift), StdRng::from_entropy()).unwrap();
+        if numerical_warning && !ignore_warnings{
+            let user_warning = py.get_type_bound::<pyo3::exceptions::PyUserWarning>();
+            pyo3::PyErr::warn_bound(
+                py,
+                &user_warning,
+                "Negative distance encountered while sampling coreset. If you are getting odd results, try increasing the shift parameter.",
+                0
+            ).unwrap();
+        }
+        let (indices,weights) = aggregate_coreset_weights(indices, weights);
+        let coreset_size = indices.len();
+        let mut rng = StdRng::from_entropy();
+        let coreset_embeddings = compute_coreset_embeddings(adj_mat_faer.as_ref(), degrees_faer.as_ref(), &indices, &weights, clusters, coreset_size,Some(shift), &mut rng);
+        let coreset_embeddings = coreset_embeddings.as_ref().into_ndarray().to_owned();
+        let indices_py = indices.into_pyarray_bound(py);
+        let weights_py = weights.into_pyarray_bound(py);
+        let coreset_embeddings_py = coreset_embeddings.to_pyarray_bound(py);
+        let tuple = PyTuple::new_bound(py, &[indices_py.to_object(py),weights_py.to_object(py),coreset_embeddings_py.to_object(py)]);
+        tuple
     }
 
+    #[pyfn(m)]
+    #[allow(clippy::too_many_arguments)]
+    #[pyo3(name = "label_full_graph")]
+    fn label_full_graph_py<'py>(
+        py: Python<'py>,
+        clusters: usize,
+        n: usize,
+        data: PyReadonlyArray1<'py, f64>,
+        indices: PyReadonlyArray1<'py, usize>,
+        indptr: PyReadonlyArray1<'py, usize>,
+        nnz_per_row: PyReadonlyArray1<'py, usize>,
+        degrees: PyReadonlyArray1<'py, f64>,
+        coreset_indices: PyReadonlyArray1<'py, usize>,
+        coreset_weights: PyReadonlyArray1<'py, f64>,
+        coreset_labels: PyReadonlyArray1<'py, usize>,
+        shift: f64,
+    ) -> Bound<'py, PyTuple>{
+
+        let (adj_mat_faer, degrees_faer) = construct_from_py(n, &data, &indices, &indptr, &nnz_per_row, &degrees);
+
+        let coreset_indices = coreset_indices.as_array();
+        let coreset_weights = coreset_weights.as_array();
+        let coreset_labels = coreset_labels.as_array();
+
+        let coreset_indices = coreset_indices.as_slice().unwrap();
+        let coreset_weights = coreset_weights.as_slice().unwrap();
+        let coreset_labels = coreset_labels.as_slice().unwrap();
+
+        let labels_and_distances2 = label_full_graph(
+            adj_mat_faer,
+            degrees_faer,
+            coreset_indices,
+            coreset_weights,
+            coreset_labels,
+            clusters,
+            Some(shift),
+        );
+        let labels_py = labels_and_distances2.0.into_pyarray_bound(py);
+        let distances_py = labels_and_distances2.1.into_pyarray_bound(py);
+        let tuple = PyTuple::new_bound(py, &[labels_py.to_object(py),distances_py.to_object(py)]);
+        tuple
+    }
+
+    #[pyfn(m)]
+    #[allow(clippy::too_many_arguments)]
+    #[pyo3(name = "compute_conductances")]
+    fn compute_conductances_py<'py>(
+        py: Python<'py>,
+        clusters: usize,
+        n: usize,
+        data: PyReadonlyArray1<'py, f64>,
+        indices: PyReadonlyArray1<'py, usize>,
+        indptr: PyReadonlyArray1<'py, usize>,
+        nnz_per_row: PyReadonlyArray1<'py, usize>,
+        degrees: PyReadonlyArray1<'py, f64>,
+        labels: PyReadonlyArray1<'py, usize>,
+    ) -> Bound<'py,PyArray1<f64>>{
+
+        let (adj_mat_faer, degrees_faer) = construct_from_py(n, &data, &indices, &indptr, &nnz_per_row, &degrees);
+
+        let conductances = compute_conductances(
+            adj_mat_faer,
+            degrees_faer,
+            labels.as_array().as_slice().unwrap(),
+            clusters,
+        );
+
+        let conductances_py = conductances.into_pyarray_bound(py);
+        conductances_py
+    }
+
+
+    #[pyfn(m)]
+    #[pyo3(name = "gen_sbm")]
+    fn gen_sbm_py(
+        py: Python,
+        n: usize,
+        k: usize,
+        p: f64,
+        q: f64,
+    ) -> Bound<PyTuple> {
+        let (adj_mat,labels) = gen_sbm_with_self_loops(n, k, p, q);
+        let (symbolic,data) = adj_mat.into_parts();
+        let (row_size,col_size,indptr,_,indices) = symbolic.into_parts();
+
+        assert!(row_size == n*k);
+        assert!(col_size == n*k);
+        let data = data.into_pyarray_bound(py);
+        let indices = indices.into_pyarray_bound(py);
+        let indptr = indptr.into_pyarray_bound(py);
+        let labels = labels.into_pyarray_bound(py);
+        let tuple = PyTuple::new_bound(py, &[
+            (n*k).to_object(py),
+            data.to_object(py),
+            indices.to_object(py),
+            indptr.to_object(py),
+            labels.to_object(py),]);
+        tuple
+    }
+
+
+    Ok(())
 }
diff --git a/src/rust.rs b/src/rust.rs
index 000fa1e..7aa9413 100644
--- a/src/rust.rs
+++ b/src/rust.rs
@@ -1,18 +1,515 @@
-// Some pure rust functions
 
+
+
+use std::collections::{HashMap, HashSet};
+// use std::time::Instant;
+
+use faer::sparse::{SparseColMat, SparseRowMat, SparseRowMatRef, SymbolicSparseColMat, SymbolicSparseRowMat, SymbolicSparseRowMatRef};
+use faer::{unzipped, zipped, Col, ColRef, Mat};
+// use faer::sparse::linalg::matmul::{dense_sparse_matmul,sparse_dense_matmul};
+
+
+use rand::rngs::StdRng;
+// use rand::rngs::StdRng;
+use rand::prelude::*;
+
+use rand_distr::StandardNormal;
+
+use crate::coreset;
+use rayon::prelude::*;
+
+use coreset::common::{Float,Error};
+use coreset::unstable;
+use coreset::full::DefaultCoresetSampler;
+// use smartcore::cluster::kmeans::*;
+
+
+
+
+pub use crate::sbm::gen_sbm_with_self_loops;
+
+
+
+pub fn data_indices_indptr_to_sparse_mat_ref<'a ,E>(
+    n: usize,
+    data: &'a [E],
+    indices: &'a [usize],
+    indptr: &'a [usize],
+    nnz_per_row: &'a [usize]) -> SparseRowMatRef::<'a, usize, E>
+where E: faer::Entity + faer::SimpleEntity {
+    let symbolic_sparse_mat_ref =
+         SymbolicSparseRowMatRef::new_checked(
+            n,
+            n,
+            indptr,
+            Some(nnz_per_row),
+            indices,
+        );
+
+        SparseRowMatRef::new(
+        symbolic_sparse_mat_ref,
+        data
+    )
+}
+
+#[allow(dead_code)]
+fn reinterpret_sym_csr_as_csc(mat_in: SparseRowMat<usize,f64>) -> SparseColMat<usize,f64>{
+    let (symbolic,data) = mat_in.into_parts();
+    let (rows,cols,indptr,nnz,indices) = symbolic.into_parts();
+    SparseColMat::new(SymbolicSparseColMat::new_checked(
+        rows,
+        cols,
+        indptr,
+        nnz,
+        indices
+    ),data)
+}
+
+#[allow(clippy::too_many_arguments)]
+#[allow(unused_assignments)]
 #[allow(dead_code)]
-pub fn rust_sum_as_string(a: usize, b: usize) -> String {
-    (a + b).to_string()
+#[allow(non_snake_case)]
+pub fn compute_coreset_embeddings(
+    adj_matrix: SparseRowMatRef<usize, Float>,
+    degree_vector: ColRef<Float>,
+    coreset_indices: &[usize],
+    coreset_weights: &[Float],
+    num_clusters: usize,
+    coreset_size: usize,
+    shift: Option<Float>,
+    rng: &mut StdRng) -> Mat<Float>{
+    // first extract the coreset graph
+    let coreset_graph = extract_coreset_graph(adj_matrix, degree_vector, coreset_indices, coreset_weights, shift);
+    let coreset_degree_vector = Col::from_fn(coreset_size, |i|{
+        coreset_graph.values_of_row(i).iter().sum::<Float>()
+    });
+    // Then extract the M matrix:
+    let M: SparseRowMat<usize, f64> = convert_to_signless_laplacian(coreset_graph, &coreset_degree_vector);
+    // Now we can cluster the coreset graph using Peter's method:
+    // We use the traspose because faer doesn't have native support for csr Sparse Dense multiplication.
+    let l = num_clusters.min((num_clusters as Float).log2().ceil() as usize);
+    let t = 10 * ((coreset_size/num_clusters) as Float).log2().ceil() as usize;
+    let mut Y1: Mat<Float> = Mat::from_fn(coreset_size, l, |_,_|{
+        rng.sample(StandardNormal)
+    });
+    let mut Y2: Mat<Float> = Mat::zeros(coreset_size,l);
+    let mut top_eigvec = zipped!(&coreset_degree_vector).map(|unzipped!(d)| d.sqrt());
+    let norm = top_eigvec.norm_l2();
+    if norm >0.0{
+        zipped!(&mut top_eigvec).for_each(|unzipped!(mut v)| *v /= norm);
+    }
+    for _ in 0..t/2{
+        Y2 = M.as_ref()*Y1;
+        // Now remove the contribution of the top eigenvector from Y2
+        let contributions_to_top_eig = top_eigvec.transpose()*Y2.as_ref();
+        Y2.col_iter_mut().zip(contributions_to_top_eig.iter()).for_each(|(mut y2,contrib)|{
+            y2 -= *contrib*top_eigvec.as_ref();
+        });
+        Y1 = M.as_ref()*Y2;
+        // Now remove the contribution of the top eigenvector from Y1
+        let contributions_to_top_eig = top_eigvec.transpose()*Y1.as_ref();
+        Y1.col_iter_mut().zip(contributions_to_top_eig.iter()).for_each(|(mut y1,contrib)|{
+            y1 -= *contrib*top_eigvec.as_ref();
+        });
+
+    }
+    Y1
 }
 
+pub fn convert_to_signless_laplacian(adj_matrix: SparseRowMat<usize, Float>, degree_vector: &Col<Float>)->SparseRowMat<usize, Float>{
+    // Fuse everything together. We only loop over the non-zero entries of the adjacency matrix once.
+
+    // We want to replicate the following python code assuming D is np.diag(A.sum(axis=1)):
+
+    //
+    // D_inv_half = np.diag(1 / np.sqrt(D.diagonal()))
+    // L = D - A
+    // N = D_inv_half @ L @ D_inv_half
+    // M = np.eye(n) - (0.5)*N
+
+    // Expanding M we see that
+
+    // M = np.eye(n) - (0.5)*D_inv_half@(D-A)@D_inv_half
+    // = (0.5)*np.eye(n) + (0.5)*D_inv_half@A@D_inv_half
+    // = (0.5)*(np.eye(n) + D_inv_half@A@D_inv_half)
+    let n = adj_matrix.nrows();
 
+    let degree_inv_half = zipped!(degree_vector).map(|unzipped!(d)| 1.0/(d.sqrt()));
 
+
+    // We first deconstruct the sparse matrix into it's component parts
+    // We need to do this
+
+    let (sparse_rep,mut data) = adj_matrix.into_parts();
+    let (rows,cols,indptr,nnz,indices) = sparse_rep.into_parts();
+
+    let nnz = nnz.unwrap();
+    let mut data_chunks: Vec<&mut [Float]> = Vec::with_capacity(n);
+    let mut indices_chunks: Vec<&[usize]> = Vec::with_capacity(n);
+    let mut remaining_data: &mut [Float] = &mut data;
+    let mut remaining_indices: &[usize] = & indices;
+
+    for &nnz_i in nnz.iter() {
+        let (data_chunk, data_remaining) = remaining_data.split_at_mut(nnz_i);
+        let (indices_chunk, indices_remaining) = remaining_indices.split_at(nnz_i);
+        data_chunks.push(data_chunk);
+        indices_chunks.push(indices_chunk);
+        remaining_data = data_remaining;
+        remaining_indices = indices_remaining;
+    }
+
+    data_chunks.into_par_iter()
+    .zip(indices_chunks.par_iter())
+    .enumerate().for_each(|(i,(data_chunk,indices_chunk))|{
+        let d_inv_half_i = degree_inv_half[i];
+        for (data, &j) in data_chunk.iter_mut().zip(indices_chunk.iter()){
+            *data = 0.5*(((i==j) as usize as Float) + d_inv_half_i**data*degree_inv_half[j]);
+            // This is the line where we fuse everything together :)
+        }
+    });
+
+    // reassemble the matrix:
+    SparseRowMat::new(
+        SymbolicSparseRowMat::new_checked(
+            rows,
+            cols,
+            indptr,
+            Some(nnz),
+            indices),
+            data
+    )
+}
+
+pub fn default_coreset_sampler(
+    adj_matrix: SparseRowMatRef<usize, Float>,
+    degree_vector: ColRef<Float>,
+    num_clusters: usize,
+    coreset_size: usize,
+    shift: Option<Float>,
+    rng: StdRng) -> Result<(Vec<usize>,Vec<Float>,bool),Error>{
+    let mut default_sampler: DefaultCoresetSampler<unstable::TreeNode> = coreset::full::DefaultCoresetSampler::new(adj_matrix, degree_vector, num_clusters, coreset_size, shift, rng);
+    default_sampler.sample()
+}
+
+#[allow(dead_code)]
+#[allow(non_snake_case)]
+pub fn extract_coreset_graph(
+    adj_matrix: SparseRowMatRef<usize, Float>,
+    degree_vector: ColRef<Float>,
+    coreset_indices: &[usize],
+    coreset_weights: &[Float],
+    shift: Option<Float>
+    ) -> SparseRowMat<usize,Float>{
+
+    // first build a map from the raw index to the coreset index. We can also use this to check coreset membership.
+    let raw_index_to_coreset_index = coreset_indices.iter().enumerate().map(|(i,coreset_i)|{
+        (*coreset_i,i)
+    }).collect::<HashMap<usize,usize>>();
+
+    let shift = shift.unwrap_or(0.0);
+
+    let W_D_inv = coreset_indices.iter().enumerate().map(|(coreset_idx,original_idx)|{
+        coreset_weights[coreset_idx]/degree_vector[*original_idx]
+    }).collect::<Vec<Float>>();
+
+    let coreset_size = coreset_indices.len();
+    // Todo: Guess the number of non-zero entrires in the coreset graph.
+    let mut data = Vec::<Float>::with_capacity(coreset_size*200);
+    let mut indices = Vec::<usize>::with_capacity(coreset_size*200);
+    let mut indptr = Vec::<usize>::with_capacity(coreset_size+1);
+    let mut nnz_per_row = Vec::<usize>::with_capacity(coreset_size);
+
+    let mut indptr_counter = 0;
+    for (i,&index) in coreset_indices.iter().enumerate(){
+        let adj_row_indices = adj_matrix.col_indices_of_row(index);
+        let adj_row_data = adj_matrix.values_of_row(index);
+        // get the neighbours of index that are in the coreset and transform the data
+        // We are computing
+        // A_C = W_CD^{-1}_C A_C D^{-1}_C W_C + W_C shift*D^{-1}_C W_C
+        //     = W_CD^{-1}_C A_C D^{-1}_C W_C + shift* W_C*D^{-1}_C W_C
+        // where:
+        //  -A_C is the submatrix of A corresponding to the coreset indices,
+        //  -W_C is the diagonal matrix of coreset weights,
+        //  -D is the diagonal matrix of A and D_C is the submatrix of D corresponding to the coreset indices.
+
+        let W_D_inv_i = W_D_inv[i];
+        let mut good_indices_and_data_transformed = adj_row_indices.zip(adj_row_data.iter()).filter_map(|(j,&data)|{
+            if index == j{
+                raw_index_to_coreset_index.get(&j).map(|&coreset_j|{
+                (coreset_j, data*W_D_inv_i*W_D_inv[i] + shift*coreset_weights[i]*W_D_inv_i)
+            })}
+            else{
+                raw_index_to_coreset_index.get(&j).map(|&coreset_j|{
+                (coreset_j, data*W_D_inv_i*W_D_inv[coreset_j])
+            })}
+
+        }).collect::<Vec<(usize,Float)>>();
+        good_indices_and_data_transformed.sort_unstable_by_key(|&(idx,_)| idx);
+        //  push the transformed data:
+        data.extend(good_indices_and_data_transformed.iter().map(|(_,data)| *data));
+        // push the indices
+        indices.extend(good_indices_and_data_transformed.iter().map(|(idx,_)| *idx));
+
+        // push the non-zeros
+        let nnz = good_indices_and_data_transformed.len();
+        nnz_per_row.push(nnz);
+
+        // push the indptr counter and increment it
+        indptr.push(indptr_counter);
+        indptr_counter += nnz;
+    }
+
+    // push the last indptr
+    indptr.push(indptr_counter);
+
+    // Now we have the data, indices, indptr and nnz_per_row. We can now build and return the coreset graph.
+    SparseRowMat::new(
+        SymbolicSparseRowMat::new_checked(
+            coreset_size,
+            coreset_size,
+            indptr,
+            Some(nnz_per_row),
+            indices,
+        ),
+        data
+    )
+}
+
+#[allow(dead_code)]
+pub fn aggregate_coreset_weights(coreset_indices: Vec<usize>, coreset_weights: Vec<f64>)-> (Vec<usize>,Vec<f64>){
+    let mut coreset_map: HashMap<usize,f64> = HashMap::with_capacity(coreset_indices.len());
+    coreset_indices.into_iter().zip(coreset_weights).for_each(|(i,w)|{
+        *coreset_map.entry(i).or_insert(0.0) += w;
+    });
+    coreset_map.into_iter().unzip()
+}
+
+pub fn label_full_graph(
+    adj_mat: SparseRowMatRef<usize, Float>,
+    degree_vector: ColRef<Float>,
+    coreset_indices: &[usize],
+    coreset_weights: &[Float],
+    coreset_labels: &[usize],
+    num_clusters: usize,
+    shift: Option<Float>
+) -> (Vec<usize>, Vec<Float>){
+
+
+    let shift = shift.unwrap_or(0.0);
+    let num_points = adj_mat.nrows();
+
+    // group the coreset and coreset weights by label and process each group in parallel:
+
+    let coreset_grouped = coreset_indices.iter().zip(coreset_labels).zip(coreset_weights).fold(
+        vec![(vec![],vec![]);num_clusters], |mut acc, ((&i, &label), &weight)|{
+            acc[label].0.push(i);
+            acc[label].1.push(weight);
+            acc
+        }
+    );
+
+    // Now we compute the center norms and center denoms for each cluster
+    let result = coreset_grouped.into_par_iter().enumerate().map(|(_, (indices,weights))|{
+        // return zero if the cluster is empty
+        if indices.is_empty(){
+            return (0.0,0.0);
+        }
+
+        let indices_set: HashSet<&usize> = indices.iter().collect();
+        let index_to_weight: HashMap<_,_> = indices.iter().zip(weights.iter()).collect();
+
+        // compute the denominator:
+        let denom = weights.iter().sum::<Float>();
+        // compute the center norm sum
+        let mut center_norm_sum = 0.0;
+        indices.iter().for_each(|i|{
+            let weight = index_to_weight[i];
+            let neighbour_indices = adj_mat.col_indices_of_row(*i);
+            let neighbour_values = adj_mat.values_of_row(*i).iter().enumerate().map(|(j,v)|{
+                if *i!=j{
+                    v/(degree_vector[*i]*degree_vector[j])
+                }else{
+                    v/(degree_vector[*i]*degree_vector[j]) + shift/(degree_vector[*i])
+                }
+
+            });
+            center_norm_sum += neighbour_indices.zip(neighbour_values).fold(
+                0.0f64, |acc, (j, value)|{
+                    match indices_set.contains(&j){
+                        true => acc + value*weight*index_to_weight[&j],
+                        false => acc
+                    }
+                });
+        });
+        center_norm_sum /= denom*denom;
+        (center_norm_sum,denom)
+    }).collect::<Vec<(Float,Float)>>();
+
+    let (center_norms, center_denoms): (Vec<f64>,Vec<f64>) = result.into_iter().unzip();
+
+    // Now find the cluster with the smallest center norm - this will be the "default" cluster
+
+    let smallest_center_by_norm = center_norms.iter().enumerate()
+        .min_by(|(_,a),(_,b)| a.partial_cmp(b).unwrap()).unwrap().0;
+    let smallest_center_by_norm_value = center_norms[smallest_center_by_norm];
+
+    // Now prepare to label everything in parallel:
+
+    let coreset_set = coreset_indices.iter().collect::<HashSet<_>>();
+    let label_map = coreset_indices.iter().zip(coreset_labels).collect::<HashMap<_,_>>();
+    let weight_map = coreset_indices.iter().zip(coreset_weights).collect::<HashMap<_,_>>();
+
+    let labels_and_distances2: (Vec<usize>,Vec<Float>) = (0..num_points).into_par_iter().map(|i|{
+
+        let vertex_degree = degree_vector[i];
+        // store the inner product to all the centers
+        let mut x_to_c_is = HashMap::new();
+
+
+        let neighbour_indices = adj_mat.col_indices_of_row(i);
+        let neighbour_edge_weights = adj_mat.values_of_row(i);
+        // let neighbour_values = adj_mat.values_of_row(i).iter().enumerate().map(|(j,v)|{
+        //     v/(degree_vector[i]*degree_vector[j])
+        // });
+
+        neighbour_indices.enumerate().for_each(|(j,indx)|{
+            if coreset_set.contains(&indx){
+                    let label = label_map[&indx];
+                    let neighbour_weight = weight_map[&indx];
+                    let inner_prod_with_vertex = {
+                        if i!=indx{
+                            neighbour_edge_weights[j]/(vertex_degree*degree_vector[indx])
+                        }else{
+                            neighbour_edge_weights[j]/(vertex_degree*degree_vector[indx]) + shift/(vertex_degree)
+                        }
+                    };
+                    x_to_c_is.entry(label).and_modify(|e|{
+                        *e += neighbour_weight*inner_prod_with_vertex;
+                    }).or_insert(neighbour_weight*inner_prod_with_vertex);
+                }
+        });
+
+        // normalize the inner products to each cluster by each center denominator
+        x_to_c_is.iter_mut().for_each(|(k,v)| *v /= center_denoms[**k]);
+
+        let mut best_center = smallest_center_by_norm;
+        let mut best_center_value = smallest_center_by_norm_value;
+
+        x_to_c_is.iter().for_each(|(center,v)|{
+            // right now v is just the inner product to each center, not the distance
+
+            // When we compute the (smallest) distance, we can ignore the contribution of the vertex
+            let distance = center_norms[**center] - 2.0*v;
+            if distance < best_center_value{
+                best_center = **center;
+                best_center_value = distance;
+            }
+        });
+        (best_center,best_center_value + adj_mat[(i,i)]/(vertex_degree*vertex_degree) + shift/(vertex_degree))
+    }).unzip();
+    labels_and_distances2
+}
+
+pub fn compute_conductances(
+    adj_mat: SparseRowMatRef<usize, Float>,
+    degrees: ColRef<Float>,
+    labels: &[usize],
+    num_clusters: usize,
+) -> Vec<Float>{
+    let mut volumes = vec![0.0; num_clusters];
+    let mut cuts = vec![0.0; num_clusters];
+    let mut grouped_labels = vec![Vec::new(); num_clusters];
+    labels.iter().enumerate().for_each(|(i,&label)|{
+        volumes[label] += degrees[i];
+        grouped_labels[label].push(i);
+    });
+    grouped_labels.par_iter().zip(cuts.par_iter_mut())
+        .enumerate().for_each(|(_,(cluster_indices, cut))|{
+            cluster_indices.iter().for_each(|&i|{
+                let neighbour_indices = adj_mat.col_indices_of_row(i);
+                let neighbour_values = adj_mat.values_of_row(i);
+                neighbour_indices.zip(neighbour_values).for_each(|(j,&v)|{
+                    if labels[i] != labels[j] && j>i{
+                        *cut += v;
+                    }
+                });
+            });
+    });
+    cuts.iter().zip(volumes.iter()).map(|(&cut,&vol)|{
+        if vol > 0.0{
+            cut/vol
+        }else{
+            0.0
+        }
+    }).collect()
+}
+
+
+#[allow(dead_code)]
+#[allow(unused_variables)]
 #[cfg(test)]
 mod tests {
-    use super::*;
+
+    use faer::Col;
+    use rand::rngs::StdRng;
+    use rand::prelude::*;
+
+    // use super::*;
+    use crate::{aggregate_coreset_weights, coreset::common::*};
+
+    use super::{default_coreset_sampler, gen_sbm_with_self_loops};
+
+    // use rand::Rng;
+
+    fn compute_actual_total_contribution(data: &[Datapoint], min_self_affinity: SelfAffinity) -> Contribution{
+        data.iter().map(|datapoint| {
+            let delta = datapoint.weight.0*(datapoint.self_affinity.0 + min_self_affinity.0);
+            datapoint.weight.0 * delta
+        }).sum::<Float>().into()
+    }
+
+    fn compute_total_smoothed_contribution(data: &[Datapoint], min_self_affinity: SelfAffinity, coreset_star_weight: Weight) -> SmoothedContribution{
+        let contribution = compute_actual_total_contribution(data, min_self_affinity);
+
+        todo!()
+
+    }
 
     #[test]
-    fn test_rust_sum_as_string() {
-        assert_eq!(rust_sum_as_string(1, 2), "3");
+    fn test_coreset_clustering(){
+
+        let n = 100;
+        let num_clusters = 5;
+        let p = 0.99;
+        let q = 0.001;
+
+        let num_nodes = n*num_clusters;
+
+        let (adj_mat,labels) = gen_sbm_with_self_loops(n, num_clusters, p, q);
+        assert!(adj_mat.nrows() == num_nodes);
+        assert!(adj_mat.ncols() == num_nodes);
+
+        let degree_vector = Col::from_fn(num_nodes, |i|{
+            adj_mat.values_of_row(i).iter().sum::<Float>()
+        });
+        assert!(degree_vector.nrows() == num_nodes);
+
+        let coreset_size = n*num_clusters/10;
+
+        let rng = StdRng::from_entropy();
+
+        let (coreset_indices, coreset_weights,warning) = default_coreset_sampler(adj_mat.as_ref(), degree_vector.as_ref(), 2*num_clusters, coreset_size,Some(0.0), rng).unwrap();
+        let (coreset_indices,coreset_weights) = aggregate_coreset_weights(coreset_indices, coreset_weights);
+        let coreset_size = coreset_indices.len();
+        let mut rng = StdRng::from_entropy();
+
+        let coreset_embeddings = super::compute_coreset_embeddings(adj_mat.as_ref(), degree_vector.as_ref(), &coreset_indices, &coreset_weights, num_clusters, coreset_size,Some(0.0) ,&mut rng);
+
+
+        panic!();
     }
+
+
+
+
 }
diff --git a/src/sbm.rs b/src/sbm.rs
new file mode 100644
index 0000000..0fe0692
--- /dev/null
+++ b/src/sbm.rs
@@ -0,0 +1,276 @@
+use sampling_tree::SimpleSamplingTree;
+use rand_distr::{Binomial,Distribution};
+use faer::sparse::{SparseRowMat, SymbolicSparseRowMat};
+use rand::{rngs::StdRng, seq::IteratorRandom, SeedableRng};
+// use aligned_vec::{AVec,avec};
+use rayon::prelude::*;
+
+
+pub fn largest_triangleq_num_below(x: usize)-> usize{
+    //  Floor((-1 + sqrt(1+8x))/2)
+    ((((1+8*x) as f64).sqrt()-1.0)/2.0).floor() as usize
+}
+
+pub fn triangle_number(n: usize)-> usize{
+    (n*(n+1))/2
+}
+
+pub fn extract_edge_indices(idx: usize) -> (usize,usize){
+    let tria_num_largest = largest_triangleq_num_below(idx);
+    let col = tria_num_largest + 1;
+    let row = idx - triangle_number(tria_num_largest);
+    (row,col)
+}
+
+
+pub fn shift_edge_indices_same_cluster((row,col): (usize,usize),cluster_index:usize,cluster_size:usize) -> (usize,usize){
+    (row + cluster_index*cluster_size, col + cluster_index*cluster_size)
+}
+
+
+pub fn gen_sbm_with_self_loops(
+    nodes_per_cluster: usize,
+    num_clusters: usize,
+    p: f64,
+    q: f64,
+) -> (SparseRowMat<usize,f64>,Vec<usize>){
+
+    let n = nodes_per_cluster;
+    let k = num_clusters;
+
+
+    // First we compute how many intra and inter cluster edges we will sample:
+    let mut rng = rand::thread_rng();
+    // intra:
+    let intra_bin = Binomial::new(((n*(n-1))/2) as u64,p).unwrap();
+    let num_intra_cluster_edges = (0..k).map(|_| intra_bin.sample(&mut rng) as usize).collect::<Vec<usize>>();
+    let total_intra_cluster_edges = num_intra_cluster_edges.iter().sum::<usize>();
+
+    // inter:
+    let inter_bin = Binomial::new(((k*n*(k-1)*n)/2) as u64,q).unwrap();
+    let num_inter_cluster_edges =inter_bin.sample(&mut rand::thread_rng()) as usize;
+    // double count the inter and intra edges since we want (u,v) and (v,u). Then add self loops (counted once)
+    let total_edges = 2*total_intra_cluster_edges + 2*num_inter_cluster_edges + n*k;
+    // Allocate space for the edge list using the average number of edges per node
+
+
+    // TODO: Do some fancy maths to properly compute the optimal preallocated size of each edge list:
+    // Trade off between memory and time. Get it wrong and reallocations will kill multi-threading
+    // as every thread will be fighting to use the system allocator.
+    let overhead = 1.1f64;
+
+    let mut edge_list = (0..n*k)
+    // .into_par_iter()
+        .map(|_|{
+            Vec::<usize>::with_capacity(((total_edges/(n*k))as f64 *overhead) as usize)
+        }).collect::<Vec<Vec<usize>>>();
+
+
+    // Now we can start sampling edges
+
+    // First sample inter-cluster edges:
+    // Each cluster has a contribution equal to the total number of inter-cluster edges it could have:
+    // This is n* (k-1)*n (double counts by 2)
+    // n pointer in the cluster, potentially connected to the (k-1) other clusters of size n.
+    let mut cluster_sampling_tree = SimpleSamplingTree::<usize>::from_iterable(
+        (0..k).map(|_| n*n*(k-1))
+    ).unwrap();
+
+    // Now we create another sampling tree for each cluster,
+    // where each vertex has a leaf with initial contribution equal to the total number of inter-cluster edges it could have:
+    // This is n*(k-1)
+    // 1 point in the cluster, potentially connected to the (k-1) other clusters of size n.
+    let mut vertex_sampling_trees = (0..k)
+        .map(|_| SimpleSamplingTree::<usize>::from_iterable((0..n).map(|_| n*(k-1))).unwrap()
+    ).collect::<Vec<SimpleSamplingTree<usize>>>();
+
+
+    // Now we sample the number of inter-cluster edges across the whole graph at once:
+    (0..num_inter_cluster_edges).for_each(|_|{
+        // sample first cluster index, store current contribution and temporarily set it to 0:
+        let cluster_i = cluster_sampling_tree.sample(&mut rand::thread_rng()).unwrap();
+        let cluster_i_temp_contrib = cluster_sampling_tree.get_contribution(cluster_i).unwrap();
+        cluster_sampling_tree.update(cluster_i, 0).unwrap();
+
+        // now sample the first vertex index and get its contribution:
+        let cluster_i_vertex = vertex_sampling_trees[cluster_i.0].sample(&mut rand::thread_rng()).unwrap();
+        let cluster_i_vertex_contribution = vertex_sampling_trees[cluster_i.0].get_contribution(cluster_i_vertex).unwrap();
+
+        // Now get the existing neighbours of i, store their contributions and temporarily set them to 0
+        // Reflect this in their cluster contributions as well:
+        let vertex_i_neighbours = edge_list[cluster_i.0*n + cluster_i_vertex.0].clone();
+        let vertex_i_neighbour_contributions_and_indices = vertex_i_neighbours.iter().map(|&v|{
+            let cluster_neighbour = v/n;
+            let vertex_neighbour = v%n;
+            let contribution = vertex_sampling_trees[cluster_neighbour].get_contribution(vertex_neighbour.into()).unwrap();
+            // temporarily set the contribution to 0:
+            vertex_sampling_trees[cluster_neighbour].update(vertex_neighbour.into(),0).unwrap();
+            // reflect this in the cluster contribution:
+            cluster_sampling_tree.update(
+                cluster_neighbour.into(),
+                cluster_sampling_tree.get_contribution(cluster_neighbour.into()).unwrap()-contribution).unwrap();
+
+            (contribution,v)
+        }).collect::<Vec<(usize,usize)>>();
+
+        // Sample second cluster index, store current conribution.
+        let cluster_j = cluster_sampling_tree.sample(&mut rand::thread_rng()).unwrap();
+        let cluster_j_temp_contrib = cluster_sampling_tree.get_contribution(cluster_j).unwrap();
+        // Now sample the vertex indices and get their contributions:
+        let cluster_j_vertex = vertex_sampling_trees[cluster_j.0].sample(&mut rand::thread_rng()).unwrap();
+        let cluster_j_vertex_contribution = vertex_sampling_trees[cluster_j.0].get_contribution(cluster_j_vertex).unwrap();
+        // Now update the vertex tree contributions:
+        vertex_sampling_trees[cluster_i.0].update(cluster_i_vertex, cluster_i_vertex_contribution-1).unwrap();
+        vertex_sampling_trees[cluster_j.0].update(cluster_j_vertex, cluster_j_vertex_contribution-1).unwrap();
+        // Update the cluster tree contributions:
+        cluster_sampling_tree.update(cluster_i, cluster_i_temp_contrib-1).unwrap();
+        cluster_sampling_tree.update(cluster_j, cluster_j_temp_contrib-1).unwrap();
+        // Now we have the cluster and vertex indices, we can add the edge to the set by shifting the indices:
+        let (u,v) = (cluster_i.0*n + cluster_i_vertex.0, cluster_j.0*n + cluster_j_vertex.0);
+        edge_list[u].push(v);
+        edge_list[v].push(u);
+
+        // Now we need to repair the contributions of the neighbours of i:
+        vertex_i_neighbour_contributions_and_indices.into_iter().for_each(|(contribution,v)|{
+            let cluster_neighbour = v/n;
+            let vertex_neighbour = v%n;
+            // repair the contribution:
+            vertex_sampling_trees[cluster_neighbour].update(vertex_neighbour.into(),contribution).unwrap();
+            // repair the cluster contribution:
+            cluster_sampling_tree.update(
+                cluster_neighbour.into(),
+                cluster_sampling_tree.get_contribution(cluster_neighbour.into()).unwrap()+contribution).unwrap();
+        });
+    });
+
+
+
+    edge_list.par_iter_mut().enumerate().for_each(|(i, edges)|{
+        edges.push(i);
+    });
+
+    // Now we move onto sampling intra-cluster edges:
+
+
+    // First we sample the indices of the intra-cluster edges:
+    let indices_per_cluster = (0..k).into_par_iter().map(|cluster_i|{
+        (0..((n*(n-1))/2)).choose_multiple(&mut StdRng::from_entropy(), num_intra_cluster_edges[cluster_i])
+    });
+    // Now we shift the indices to the correct cluster and convert them to edge pairs.
+    let shifted_indices_per_cluster = indices_per_cluster.into_par_iter().enumerate().map(
+        |(cluster_i,indices)|{
+            indices.into_iter().map(|i|{
+                let (u,v) = extract_edge_indices(i);
+                let (u,v) = shift_edge_indices_same_cluster((u,v),cluster_i,n);
+                (u,v)
+            }).collect::<Vec<(usize,usize)>>()
+    });
+    edge_list.par_chunks_exact_mut(n)
+        .zip(shifted_indices_per_cluster)
+        .for_each(
+        |(edge_list_local,indices)|{
+            indices.iter().for_each(|&(u,v)|{
+                edge_list_local[u%n].push(v);
+                edge_list_local[v%n].push(u);
+            });
+        }
+    );
+
+
+    // build the indices and indptr quickly from the edge list:
+    let mut indptr = vec![0;n*k+1];
+    let mut indices = vec![0;total_edges];
+
+
+
+    // populate indptr initially with the number of edges in each row. We will cumsum this later.
+    indptr[1..].par_iter_mut().enumerate().for_each(|(i,p)|{
+        *p = edge_list[i].len();
+    });
+    let nnz = indptr[1..].to_vec();
+
+    // Now we can split the indices array into n*k mutable slices which we will pass to rayon
+    // to populate with the edges in parallel.
+    let mut splits = Vec::with_capacity(n*k);
+    let mut remaining: &mut[usize] = &mut indices;
+
+    for x in &indptr[1..]{
+        let (a,b) = remaining.split_at_mut(*x);
+        splits.push(a);
+        remaining = b;
+    }
+
+    // Now copy the (sorted) edges into the indices array:
+    edge_list.par_iter_mut().zip(splits).for_each(|(edges,indices)|{
+        edges.sort_unstable(); // we don't care about sort stability
+        indices.copy_from_slice(edges);
+    });
+
+    // now finish cumsum on indptr:
+    for i in 1..indptr.len(){
+        indptr[i] += indptr[i-1];
+    }
+
+    // Now we have the indices and indptr, we can build the adjacency matrix:
+        let structure = SymbolicSparseRowMat::new_checked(
+            n*k,
+            n*k,
+            indptr,
+            Some(nnz),
+            indices,
+        );
+
+    let mut data = vec![0.0f64;total_edges];
+    data.par_iter_mut().for_each(|x| *x = 1.0);
+    let sparse_mat = SparseRowMat::<usize,f64>::new(structure,data);
+
+
+    let labels = (0..n*k).map(|i| i/n).collect::<Vec<usize>>();
+
+    (sparse_mat,labels)
+}
+
+
+
+#[cfg(test)]
+mod tests{
+
+
+
+    use super::*;
+
+    #[test]
+    fn test_small_sbm(){
+        let adj_mat = gen_sbm_with_self_loops(5,2,0.5,0.1);
+
+        println!("{:?}",adj_mat);
+
+    }
+    #[test]
+    fn test_sbm(){
+        let n = 1000;
+        let k = 100;
+        let p = 0.5;
+        let q = (1.0/(n as f64))/(k as f64);
+        let t0 = std::time::Instant::now();
+        let (adj_mat,_labels) = gen_sbm_with_self_loops(n,k,p,q);
+        println!("total time: {:?}",t0.elapsed());
+        println!("{:?} shape",adj_mat.shape());
+        panic!();
+    }
+
+
+
+
+    #[test]
+    fn splat(){
+
+        let adjacency: Vec<Box<[usize]>> = vec![
+            vec![1,2,3].into_boxed_slice(),
+            vec![4,6].into_boxed_slice(),
+            vec![9].into_boxed_slice(),
+        ];
+
+        println!("{:?}",adjacency);
+    }
+}
diff --git a/tests/test_dummy.py b/tests/test_dummy.py
index 8e58190..df4f67b 100644
--- a/tests/test_dummy.py
+++ b/tests/test_dummy.py
@@ -1,14 +1,278 @@
 # test_coreset_sc.py
 # test
 
+import json
+import time
+import warnings
+
+import matplotlib.pyplot as plt
+import numpy
+from coreset_sc import CoresetSpectralClustering, gen_sbm
+
+# from maturin_import_hook.settings import MaturinSettings
+# import maturin_import_hook
+# maturin_import_hook.install(
+# enable_project_importer=True,
+# enable_rs_file_importer=True,
+# settings=MaturinSettings(
+#     release=True,
+#     strip=True,
+# ),
+# show_warnings=False,
+# )
+from sklearn.metrics.cluster import adjusted_rand_score
+from tqdm import tqdm
+
+warnings.filterwarnings(
+    "ignore", message="KMeans is known to have a memory leak on Windows with MKL"
+)
+
+
+def test_default_coreset():
+    """Test the default coreset sampler"""
+    resolution = 2
+    rounds = 1
+
+    ks = numpy.linspace(10, 20, resolution, dtype=int)
+
+    construction_times = numpy.zeros((resolution, rounds))
+    coreset_time = numpy.zeros((resolution, rounds))
+    labelling_time = numpy.zeros((resolution, rounds))
+
+    coreset_aris = numpy.zeros((resolution, rounds))
+    full_aris = numpy.zeros((resolution, rounds))
+
+    for i, k in tqdm(list(enumerate(ks))):
+        k = int(k)
+        for r in range(rounds):
+            # Generate some data
+            n = 1000
+            p = 0.5
+            q_factor = 1.0
+            q = (q_factor / n) / k
+            coreset_fraction = 0.02
+            # Generate the data
+            t0 = time.time()
+            A, y = gen_sbm(n, k, p, q)
+            construction_times[i, r] = time.time() - t0
+            # Sample, extract, cluster and label the coreset graph
+            t0 = time.time()
+            csc = CoresetSpectralClustering(
+                num_clusters=k, coreset_ratio=coreset_fraction
+            )
+            csc.fit(A)
+            coreset_time[i, r] = time.time() - t0
+            # Get the coreset ARI
+            coreset_aris[i, r] = adjusted_rand_score(
+                y[csc.coreset_indices_], csc.coreset_labels_
+            )
+            # label the full graph
+            t0 = time.time()
+            csc.label_full_graph()
+            labelling_time[i, r] = time.time() - t0
+            # Get the full ARI
+            full_aris[i, r] = adjusted_rand_score(y, csc.labels_)
+            assert full_aris[i, r] > 0.9, f"ARI {full_aris[i, r]} is below 0.9"
+
+    # save the data to a json file
+    data_dict = {
+        "construction_times": construction_times.tolist(),
+        "coreset_time": coreset_time.tolist(),
+        "labelling_time": labelling_time.tolist(),
+        "coreset_aris": coreset_aris.tolist(),
+        "full_aris": full_aris.tolist(),
+        "ks": ks.tolist(),
+        "rounds": rounds,
+        "title": f"SBM with N=1000k, p={p}, q={q_factor}/nk, coreset_fraction={coreset_fraction*100}%",
+    }
+
+    with open("test_default_coreset.json", "w") as f:
+        json.dump(data_dict, f)
+
+
+def plot_coreset_test():
+
+    # Load the data
+    with open("test_default_coreset.json", "r") as f:
+        data_dict = json.load(f)
+
+    construction_times = numpy.array(data_dict["construction_times"])
+    coreset_time = numpy.array(data_dict["coreset_time"])
+    labelling_time = numpy.array(data_dict["labelling_time"])
+    coreset_aris = numpy.array(data_dict["coreset_aris"])
+    full_aris = numpy.array(data_dict["full_aris"])
+    ks = numpy.array(data_dict["ks"])
+    rounds = data_dict["rounds"]
+    title = data_dict["title"]
+
+    # plot results on two subplots.
+
+    # On the first subplot, plot the construction time and stack the corset time under the full labelling time
+    # On the second subplot, plot the coreset and full ARI
+
+    fontsize = 20
+
+    # Means:
+    construction_time_means = construction_times.mean(axis=1)
+    coreset_time_means = coreset_time.mean(axis=1)
+    labelling_time_means = labelling_time.mean(axis=1)
+    coreset_ari_means = coreset_aris.mean(axis=1)
+    full_ari_means = full_aris.mean(axis=1)
+
+    # Std errors:
+    construction_time_se = construction_times.std(axis=1) / numpy.sqrt(rounds)
+    coreset_time_se = coreset_time.std(axis=1) / numpy.sqrt(rounds)
+    labelling_time_se = labelling_time.std(axis=1) / numpy.sqrt(rounds)
+    coreset_ari_se = coreset_aris.std(axis=1) / numpy.sqrt(rounds)
+    full_ari_se = full_aris.std(axis=1) / numpy.sqrt(rounds)
+
+    fig, ax = plt.subplots(1, 2, figsize=(20, 10))
+
+    # first subplot
+    ax[0].plot(ks, construction_times.mean(axis=1), label="Construction time")
+    # Stack the mean coreset time under the mean full labelling time
+
+    ax[0].fill_between(
+        ks,
+        0.0,
+        coreset_time_means,
+        label="coreset labelling time",
+        color="orange",
+        alpha=0.5,
+    )
+    ax[0].fill_between(
+        ks,
+        coreset_time_means,
+        coreset_time_means + labelling_time_means,
+        label="Full labelling time",
+        color="green",
+        alpha=0.5,
+    )
+
+    # error bars:
+    ax[0].errorbar(
+        ks,
+        construction_time_means,
+        yerr=construction_time_se,
+        fmt="none",
+        ecolor="black",
+    )
+    ax[0].errorbar(
+        ks,
+        coreset_time_means,
+        yerr=coreset_time_se,
+        fmt="none",
+        ecolor="black",
+    )
+    ax[0].errorbar(
+        ks,
+        coreset_time_means + labelling_time_means,
+        yerr=labelling_time_se,
+        fmt="none",
+        ecolor="black",
+    )
+
+    ax[0].set_yscale("log")
+
+    ax[0].tick_params(axis="both", which="major", labelsize=fontsize)
+
+    ax[0].set_xlabel("k", fontsize=fontsize)
+    ax[0].set_ylabel("Time (s)", fontsize=fontsize)
+    ax[0].legend(fontsize=fontsize)
+
+    # second subplot
+
+    ax[1].plot(ks, coreset_aris.mean(axis=1), label="Coreset ARI")
+    ax[1].plot(ks, full_aris.mean(axis=1), label="Full ARI")
+
+    # error bars:
+    ax[1].errorbar(
+        ks,
+        coreset_ari_means,
+        yerr=coreset_ari_se,
+        fmt="none",
+        ecolor="black",
+    )
+
+    ax[1].errorbar(
+        ks,
+        full_ari_means,
+        yerr=full_ari_se,
+        fmt="none",
+        ecolor="black",
+    )
+
+    ax[1].tick_params(axis="both", which="major", labelsize=fontsize)
+
+    ax[1].set_xlabel("k", fontsize=fontsize)
+    ax[1].set_ylabel("ARI", fontsize=fontsize)
+
+    ax[1].legend(fontsize=fontsize)
+    fig.suptitle(title, fontsize=20)
+
+    fig.tight_layout()
+    plt.savefig("test_default_coreset.png")
+
+
+def test_sbm():
+    """
+    Test the stochastic block model generator
+    """
+    import time
 
-def test_sum_as_string():
-    """Test the sum_as_string function from the coreset_sc module."""
-    # Import the Rust-based module
     import coreset_sc
+    import networkx as nx
+
+    n = 20
+    k = 5
+    p = 0.5
+    q = (1.0 / n) / k
+
+    print(n, k, p, q)
+
+    t = time.time()
+    A, labels = coreset_sc.gen_sbm(n, k, p, q)
+    print(f"Time: {time.time()-t}")
+
+    # convert to networkx graph
+    G = nx.from_scipy_sparse_array(A)
+
+    # remove all self loops:
+    G.remove_edges_from(nx.selfloop_edges(G))
+
+    # draw and save the graph
+    import matplotlib.pyplot as plt
+
+    plt.figure(figsize=(20, 20))
+    nx.draw(G)
+    plt.savefig("test_sbm.png")
+    # print pwd:
+
+
+# def test_both_sbms():
+
+#     import time
+
+#     import coreset_sc
+
+#     n = 5000
+#     k = 50
+#     p = 0.1
+#     q = (1.0 / n) / k
+
+#     t0 = time.time()
+#     _fast_sbm = coreset_sc.gen_sbm(n, k, p, q)
+#     fast_time = time.time() - t0
+
+#     t0 = time.time()
+#     _slow_sbm = coreset_sc.stag_sbm(n, k, p, q)
+#     slow_time = time.time() - t0
+
+#     print(f"fast time: {fast_time:.5f}s\t slow time: {slow_time:.5f}s")
+
+#     raise Exception("stop")
 
-    # Call the function
-    result = coreset_sc.sum_as_string(3, 4)
 
-    # Assert that the result is the expected string
-    assert result == "7", f"Expected '7' but got {result}"
+if __name__ == "__main__":
+    test_default_coreset()
+    plot_coreset_test()