diff --git a/nimare/dataset.py b/nimare/dataset.py
index 23b323e3c..122a2d3b5 100755
--- a/nimare/dataset.py
+++ b/nimare/dataset.py
@@ -604,7 +604,8 @@ def get_studies_by_label(self, labels=None, label_threshold=0.001):
             the labels above the threshold, it will be returned.
             Default is None.
         label_threshold : :obj:`float`, optional
-            Default is 0.5.
+            Default is 0.001, which corresponds to terms appearing at least once in abstracts
+            for the commonly-used Neurosynth TF-IDF feature set.
 
         Returns
         -------
@@ -657,27 +658,49 @@ def get_studies_by_mask(self, mask):
         found_ids = list(self.coordinates.loc[distances, "id"].unique())
         return found_ids
 
-    def get_studies_by_coordinate(self, xyz, r=20):
+    def get_studies_by_coordinate(self, xyz, r=None, n=None):
         """Extract list of studies with at least one focus within radius of requested coordinates.
 
+        .. versionchanged:: 0.0.9
+
+            New option (n) supported to identify closest n studies to coordinate.
+            Default value for r changed to None.
+
         Parameters
         ----------
         xyz : (X x 3) array_like
             List of coordinates against which to find studies.
         r : :obj:`float`, optional
-            Radius (in mm) within which to find studies. Default is 20mm.
+            Radius (in mm) within which to find studies. Mutually exclusive with ``n``.
+            Default is None.
+        n : :obj:`int`, optional
+            Number of closest studies to identify. Mutually exclusive with ``r``.
+            Default is None.
 
         Returns
         -------
         found_ids : :obj:`list`
-            A list of IDs from the Dataset with at least one focus within
-            radius r of requested coordinates.
+            A list of IDs from the Dataset matching the search criterion.
         """
+        if n and r:
+            raise ValueError("Only one of 'r' and 'n' may be provided.")
+        elif not n and not r:
+            raise ValueError("Either 'r' or 'n' must be provided.")
+
         from scipy.spatial.distance import cdist
 
-        xyz = np.array(xyz)
+        temp_coords = self.coordinates.copy()
+
+        xyz = np.atleast_2d(xyz)
         assert xyz.shape[1] == 3 and xyz.ndim == 2
-        distances = cdist(xyz, self.coordinates[["x", "y", "z"]].values)
-        distances = np.any(distances <= r, axis=0)
-        found_ids = list(self.coordinates.loc[distances, "id"].unique())
+        distances = cdist(xyz, temp_coords[["x", "y", "z"]].values)
+        distances = np.min(distances, axis=0)
+        temp_coords["distance"] = distances
+        min_dist_ser = temp_coords.groupby("id")["distance"].min()
+
+        if r:
+            found_ids = min_dist_ser.loc[min_dist_ser <= r].index.tolist()
+        elif n:
+            found_ids = min_dist_ser.nsmallest(n).index.tolist()
+
         return found_ids
diff --git a/nimare/parcellate.py b/nimare/parcellate.py
new file mode 100644
index 000000000..b584ab1dc
--- /dev/null
+++ b/nimare/parcellate.py
@@ -0,0 +1,505 @@
+"""Parcellation tools."""
+import datetime
+import inspect
+import logging
+import os
+from tempfile import mkstemp
+
+import numpy as np
+from nilearn._utils import load_niimg
+from scipy.spatial import distance
+from sklearn.cluster import KMeans
+
+from .base import NiMAREBase
+from .extract.utils import _get_dataset_dir
+from .meta.cbma.ale import ALE
+from .meta.cbma.base import CBMAEstimator
+from .results import MetaResult
+from .utils import add_metadata_to_dataframe, check_type, listify, use_memmap, vox2mm
+
+LGR = logging.getLogger(__name__)
+
+
+class CoordCBP(NiMAREBase):
+    """Perform coordinate-based coactivation-based parcellation.
+
+    .. versionadded:: 0.0.10
+
+    Parameters
+    ----------
+    target_mask : :obj:`nibabel.Nifti1.Nifti1Image`
+        Mask of target of parcellation.
+        Currently must be in same space/resolution as Dataset mask.
+    n_clusters : :obj:`list` of :obj:`int`
+        Number of clusters to evaluate in clustering.
+        Metrics will be calculated for each cluster-count in the list, to allow users to select
+        the optimal cluster solution.
+    r : :obj:`float` or :obj:`list` of :obj:`float` or None, optional
+        Radius (in mm) within which to find studies.
+        If a list of values is provided, then MACMs and clustering will be performed across all
+        values, and a selection procedure will be performed to identify the optimal ``r``.
+        Mutually exclusive with ``n``. Default is None.
+    n : :obj:`int` or :obj:`list` of :obj:`int` or None, optional
+        Number of closest studies to identify.
+        If a list of values is provided, then MACMs and clustering will be performed across all
+        values, and a selection procedure will be performed to identify the optimal ``n``.
+        Mutually exclusive with ``r``. Default is None.
+    meta_estimator : :obj:`nimare.meta.base.CBMAEstimator`, optional
+        CBMA Estimator with which to run the MACMs.
+        Default is :obj:`nimare.meta.cbma.ale.ALE`.
+    target_image : :obj:`str`, optional
+        Name of meta-analysis results image to use for clustering.
+        Default is "stat".
+
+    Notes
+    -----
+    This approach deviates in a number of respects from the method described in
+    Chase et al. (2020), including the fact that the scikit-learn implementation of the K-means
+    clustering algorithm only supports Euclidean distance, so correlation distance is not used.
+    """
+
+    _required_inputs = {"coordinates": ("coordinates", None)}
+
+    def __init__(
+        self,
+        target_mask,
+        n_clusters,
+        r=None,
+        n=None,
+        meta_estimator=None,
+        target_image="stat",
+        *args,
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+
+        if meta_estimator is None:
+            meta_estimator = ALE()
+        else:
+            meta_estimator = check_type(meta_estimator, CBMAEstimator)
+
+        if r and n:
+            raise ValueError("Only one of 'r' and 'n' may be provided.")
+        elif not r and not n:
+            raise ValueError("Either 'r' or 'n' must be provided.")
+
+        self.target_mask = load_niimg(target_mask)
+        self.meta_estimator = meta_estimator
+        self.target_image = target_image
+        self.n_clusters = listify(n_clusters)
+        self.filter_selection = isinstance(r, list) or isinstance(n, list)
+        self.filter_type = "r" if r else "n"
+        self.alpha = 0.05
+        self.r = listify(r)
+        self.n = listify(n)
+        self.masker = None
+
+    def _preprocess_input(self, dataset):
+        """Mask required input images using either the dataset's mask or the estimator's.
+
+        Also, insert required metadata into coordinates DataFrame.
+        """
+        super()._preprocess_input(dataset)
+
+        # All extra (non-ijk) parameters for a kernel should be overrideable as
+        # parameters to __init__, so we can access them with get_params()
+        kt_args = list(self.meta_estimator.kernel_transformer.get_params().keys())
+
+        # Integrate "sample_size" from metadata into DataFrame so that
+        # kernel_transformer can access it.
+        if "sample_size" in kt_args:
+            self.inputs_["coordinates"] = add_metadata_to_dataframe(
+                dataset,
+                self.inputs_["coordinates"],
+                metadata_field="sample_sizes",
+                target_column="sample_size",
+                filter_func=np.mean,
+            )
+
+    @use_memmap(LGR, n_files=1)
+    def _fit(self, dataset):
+        """Perform coordinate-based coactivation-based parcellation on dataset.
+
+        Parameters
+        ----------
+        dataset : :obj:`nimare.dataset.Dataset`
+            Dataset to analyze.
+        """
+        self.dataset = dataset
+        self.masker = self.masker or dataset.masker
+
+        # Loop through voxels in target_mask, selecting studies for each and running MACMs (no MCC)
+        target_ijk = np.vstack(np.where(self.target_mask.get_fdata())).T
+        target_ijk = target_ijk[:20, :]  # limit to first 20 for testing
+        target_xyz = vox2mm(target_ijk, self.masker.mask_img.affine)
+        n_target_voxels = target_xyz.shape[0]
+        n_mask_voxels = self.masker.transform(self.masker.mask_img).size
+
+        n_filters = len(getattr(self, self.filter_type))
+        labels = np.zeros((n_filters, len(self.n_clusters), n_target_voxels), dtype=int)
+        silhouettes = np.zeros((n_filters, len(self.n_clusters)), dtype=float)
+        kwargs = {"r": None, "n": None}
+
+        # Use a memmapped 2D array
+        start_time = datetime.datetime.now().strftime("%Y%m%dT%H%M%S")
+        dataset_dir = _get_dataset_dir("temporary_files", data_dir=None)
+        _, memmap_filename = mkstemp(
+            prefix=self.__class__.__name__,
+            suffix=start_time,
+            dir=dataset_dir,
+        )
+        data = np.memmap(
+            memmap_filename,
+            dtype=float,
+            mode="w+",
+            shape=(n_target_voxels, n_mask_voxels),
+        )
+        ratios = np.zeros((n_filters, len(self.n_clusters)), dtype=float)
+
+        for i_filter in range(n_filters):
+            kwargs[self.filter_type] = getattr(self, self.filter_type)[i_filter]
+            for j_coord in range(n_target_voxels):
+                xyz = target_xyz[j_coord, :]
+                macm_ids = dataset.get_studies_by_coordinate(xyz, **kwargs)
+                coord_dset = dataset.slice(macm_ids)
+
+                # This seems like a somewhat inelegant solution
+                # Check if the meta method is a pairwise estimator
+                if "dataset2" in inspect.getfullargspec(self.meta_estimator.fit).args:
+                    unselected_ids = sorted(list(set(dataset.ids) - set(macm_ids)))
+                    unselected_dset = dataset.slice(unselected_ids)
+                    self.meta_estimator.fit(coord_dset, unselected_dset)
+                else:
+                    self.meta_estimator.fit(coord_dset)
+
+                data[j_coord, :] = self.meta_estimator.results.get_map(
+                    self.target_image,
+                    return_type="array",
+                )  # data is overwritten across filters
+
+            # Perform clustering
+            for j_cluster, cluster_count in enumerate(self.n_clusters):
+                kmeans = KMeans(
+                    n_clusters=cluster_count,
+                    init="k-means++",
+                    n_init=10,
+                    random_state=0,
+                    algorithm="elkan",
+                ).fit(data)
+                labels[i_filter, j_cluster, :] = kmeans.labels_
+
+                silhouettes[i_filter, j_cluster] = self._average_silhouette(data, kmeans.labels_)
+
+                # Necessary calculations for ratios metric
+                avg_intercenter_distance = np.mean(
+                    distance.pdist(
+                        kmeans.cluster_centers_,
+                        metric="euclidean",
+                    )
+                )
+                total_voxel_center_distance = 0
+                for k_cluster_val in range(cluster_count):
+                    cluster_val_voxels = np.where(kmeans.labels_ == k_cluster_val)[0]
+                    cluster_val_center = kmeans.cluster_centers_[k_cluster_val, :]
+                    cluster_val_features = data[cluster_val_voxels, :]
+                    cluster_val_distances = distance.cdist(
+                        cluster_val_center[None, :],
+                        cluster_val_features,
+                        metric="euclidean",
+                    )
+                    total_voxel_center_distance += np.sum(cluster_val_distances)
+                avg_intracluster_distance = total_voxel_center_distance / n_target_voxels
+                ratio = avg_intercenter_distance / avg_intracluster_distance
+                ratios[i_filter, j_cluster] = ratio
+
+        # TODO: Implement re-labeling procedure across filters, for each cluster solution
+
+        # return labels, silhouettes, ratios
+        # Identify range of optimal filters
+        # retained_filter_idx = self._filter_selection(labels)
+        # labels = labels[retained_filter_idx, ...]
+        # silhouettes = silhouettes[retained_filter_idx, ...]
+        # ratios = ratios[retained_filter_idx, ...]
+
+        # Calculate metrics
+        vm_decision_criteria, deviant_props = self._voxel_misclassification(
+            labels,
+            alpha=self.alpha,
+        )  # boolx2
+        vi_results = self._variation_of_information(labels, self.n_clusters)  # floatx2
+
+        return labels, silhouettes, ratios, deviant_props, vi_results
+        s_results = silhouettes  # float
+        nvp_results = self._nondominant_voxel_percentage()  # ?
+        cdr_results = self._cluster_distance_ratio(ratios)  # float
+        vote = np.nansum(
+            np.dstack((vm_decision_criteria, vi_results, s_results, nvp_results, cdr_results)),
+            axis=2,
+        )
+        assert vote.shape == (n_filters, len(self.n_clusters))
+
+        # Clean up MACM data memmap
+        LGR.info(f"Removing temporary file: {memmap_filename}")
+        os.remove(memmap_filename)
+
+        images = {"labels": labels}
+        return images
+
+    def _filter_selection(self, labels):
+        """Select a range of optimal filter values based on consistency of cluster assignment.
+
+        Parameters
+        ----------
+        labels : :obj:`numpy.ndarray` of shape (n_filters, nclusters, n_voxels)
+            Labeling results from a range of KMeans clustering runs.
+
+        Notes
+        -----
+        From Chase et al. (2020):
+        We implemented a two-step procedure that involved a decision on those filter sizes to be
+        included in the final analysis and subsequently a decision on the optimal cluster solution.
+        In the first step, we examined the consistency of the cluster assignment for the individual
+        voxels across the cluster solutions of the co-occurrence maps performed at different filter
+        sizes. We selected a filter range with the lowest number of deviants, that is, number of
+        voxels that were assigned differently compared with the solution from the majority of
+        filters. In other words, we identified those filter sizes which produced solutions most
+        similar to the consensus-solution across all filter sizes
+        """
+        from scipy import stats
+
+        n_filters, n_clusters, n_voxels = labels.shape
+        deviant_proportions = np.zeros((n_filters, n_clusters))
+        for i_cluster in range(n_clusters):
+            cluster_labels = labels[:, i_cluster, :]
+            cluster_labels_mode = stats.mode(cluster_labels, axis=0)[0]
+            is_deviant = cluster_labels != cluster_labels_mode
+            deviant_proportion = is_deviant.mean(axis=1)
+            assert deviant_proportion.size == n_filters
+            deviant_proportions[:, i_cluster] = deviant_proportion
+        # Z-score within each cluster solution
+        deviant_z = stats.zscore(deviant_proportions, axis=0)
+        filter_deviant_z = deviant_z.sum(axis=1)
+        min_deviants_filter = np.where(filter_deviant_z == np.min(filter_deviant_z))[0]
+
+        # NOTE: This is not the end, but I don't know how filters were selected based on this array
+        return min_deviants_filter
+
+    def _voxel_misclassification(self, selected_filter_labels, alpha=0.05):
+        """Calculate voxel misclassification metric.
+
+        Parameters
+        ----------
+        labels : :obj:`numpy.ndarray` of shape (n_selected_filters, nclusters, n_voxels)
+            Labeling results from a range of KMeans clustering runs, limited to those filters
+            that survived the filter selection procedure.
+
+        Notes
+        -----
+        From Chase et al. (2020):
+        First, misclassified voxels (deviants) were examined as a topological criterion,
+        with optimal K parcellations being those in which the percentage of deviants was not
+        significantly increased compared to the K − 1 solution and but where the K + 1 was
+        associated with significantly increased deviants.
+
+        TS: Deviants are presumably calculated only from the range of filters selected in the
+        filter selection step.
+
+        TS: To measure significance, we could do a series of pair-wise proportion z-tests
+        Left side null hypothesis: proportion from K is <= K - 1
+        Left side alternative: K > K -1
+        Accept on null
+        z, p = proportions_ztest([k, k-1], [n, n], alternative="larger")
+        p >= alpha --> YAY
+        Right side null hypothesis: proportion from K is >= K + 1
+        Right side alternative: K < K + 1
+        Accept on rejected null
+        z, p = proportions_ztest([k, k+1], [n, n], alternative="smaller")
+        p < alpha --> YAY
+        """
+        from scipy import stats
+        from statsmodels.stats.proportion import proportions_ztest
+
+        n_filters, n_clusters, n_voxels = selected_filter_labels.shape
+        count = n_voxels * n_filters
+        deviant_counts = np.zeros((n_filters, n_clusters))
+        for i_cluster in range(n_clusters):
+            cluster_labels = selected_filter_labels[:, i_cluster, :]
+            cluster_labels_mode = stats.mode(cluster_labels, axis=0)[0]
+            is_deviant = cluster_labels != cluster_labels_mode
+            deviant_count = is_deviant.sum(axis=1)
+            assert deviant_count.size == n_filters
+            deviant_counts[:, i_cluster] = deviant_count
+
+        decision_criteria = np.empty((n_clusters, 2), dtype=bool)
+
+        for i_cluster in range(n_clusters):
+            cluster_deviant_count = np.sum(deviant_counts[:, i_cluster])
+
+            # Compare to k - 1
+            if i_cluster > 0:
+                m1_cluster_deviant_count = np.sum(deviant_counts[:, i_cluster - 1])
+                _, p = proportions_ztest(
+                    count=[cluster_deviant_count, m1_cluster_deviant_count],
+                    nobs=[count, count],
+                    alternative="larger",
+                )
+                decision_criteria[i_cluster, 0] = p >= alpha
+            else:
+                decision_criteria[i_cluster, 0] = np.nan
+
+            if i_cluster < (n_clusters - 1):
+                p1_cluster_deviant_count = np.sum(deviant_counts[:, i_cluster + 1])
+                _, p = proportions_ztest(
+                    count=[cluster_deviant_count, p1_cluster_deviant_count],
+                    nobs=[count, count],
+                    alternative="smaller",
+                )
+                decision_criteria[i_cluster, 1] = p < alpha
+            else:
+                decision_criteria[i_cluster, 1] = np.nan
+
+        decision_results = np.all(decision_criteria, axis=1)
+        good_cluster_idx = np.where(decision_results)[0]
+        if not good_cluster_idx.size:
+            LGR.warning("No cluster solution found according to VM metric.")
+
+        deviant_props = deviant_counts / n_voxels
+
+        return decision_criteria, deviant_props
+
+    def _variation_of_information(self, labels, cluster_counts):
+        """Calculate variation of information metric.
+
+        Parameters
+        ----------
+        labels : :obj:`numpy.ndarray` of shape (n_filters, n_clusters, n_voxels)
+        cluster_counts : :obj:`list` of :obj:`int`
+            The set of K values tested. Must be n_clusters items long.
+
+        Returns
+        -------
+        vi_values : :obj:`numpy.ndarray` of shape (n_filters, n_clusters, 2)
+            Variation of information values for each filter and each cluster, where the last
+            dimension has two values: the VI value for K vs. K - 1 and the value for K vs. K + 1.
+            K values with no K - 1 or K + 1 have a NaN in the associated cell.
+
+        Notes
+        -----
+        From Chase et al. (2020):
+        Second, the variation of information (VI) metric was employed as an information-theoretic
+        criterion to assess the similarity of cluster assignments for each filter size between
+        the current solution and the neighboring (K − 1 and K + 1) solutions.
+        """
+        from .utils import variation_of_information
+
+        n_filters, n_clusters, _ = labels.shape
+        assert len(cluster_counts) == n_clusters
+
+        vi_values = np.empty((n_filters, n_clusters, 2))
+        for i_filter in range(n_filters):
+            filter_labels = labels[i_filter, :, :]
+            for j_cluster, cluster_count in enumerate(cluster_counts):
+                cluster_partition = [
+                    np.where(filter_labels[j_cluster, :] == k_cluster_num)[0]
+                    for k_cluster_num in range(cluster_count)
+                ]
+
+                # Calculate VI between K and K - 1
+                if j_cluster > 0:
+                    cluster_m1_partition = [
+                        np.where(filter_labels[j_cluster - 1, :] == k_cluster_num)[0]
+                        for k_cluster_num in range(cluster_counts[j_cluster - 1])
+                    ]
+
+                    vi_values[i_filter, j_cluster, 0] = variation_of_information(
+                        cluster_partition,
+                        cluster_m1_partition,
+                    )
+                else:
+                    vi_values[i_filter, j_cluster, 0] = np.nan
+
+                # Calculate VI between K and K + 1
+                if j_cluster < (labels.shape[1] - 1):
+                    cluster_p1_partition = [
+                        np.where(filter_labels[j_cluster + 1, :] == k_cluster_num)[0]
+                        for k_cluster_num in range(cluster_counts[j_cluster + 1])
+                    ]
+
+                    vi_values[i_filter, j_cluster, 1] = variation_of_information(
+                        cluster_partition,
+                        cluster_p1_partition,
+                    )
+                else:
+                    vi_values[i_filter, j_cluster, 1] = np.nan
+
+        return vi_values
+
+    def _average_silhouette(self, data, labels):
+        """Calculate average silhouette score.
+
+        Notes
+        -----
+        From Chase et al. (2020):
+        Third, the silhouette value averaged across voxels for each filter size was considered a
+        cluster separation criterion.
+        """
+        from sklearn.metrics import silhouette_score
+
+        silhouette = silhouette_score(data, labels, metric="euclidean", random_state=None)
+        # Average across voxels
+        return silhouette
+
+    def _nondominant_voxel_percentage(self):
+        """Calculate percentage-of-voxels-not-with-parent metric.
+
+        Notes
+        -----
+        From Chase et al. (2020):
+        Fourth, we assessed the percentage of voxels not related to the dominant parent cluster
+        compared with the K − 1 solution as a second topological criterion.
+        This measure corresponds to the percentage voxels that are not present in hierarchy, K,
+        compared with the previous K − 1 solution, and is related to the hierarchy index.
+        """
+        pass
+
+    def _cluster_distance_ratio(self, ratios):
+        """Calculate change-in-inter/intra-cluster-distance metric.
+
+        Parameters
+        ----------
+        ratios : (n_filters, n_cluster_solutions)
+
+        Notes
+        -----
+        From Chase et al. (2020):
+        Finally, the change in inter- versus intra-cluster distance ratio was computed as a second
+        cluster separation criterion.
+        This ratio is the first derivative of the ratio between the average distance of a given
+        voxel to its own cluster center and the average distance between the cluster centers.
+        """
+        # TODO: Calculate first derivative?
+        ratio_derivs = np.diff(ratios, n=1, axis=1)
+        return ratio_derivs
+
+    def fit(self, dataset, drop_invalid=True):
+        """Perform coordinate-based coactivation-based parcellation on dataset.
+
+        Parameters
+        ----------
+        dataset : :obj:`nimare.dataset.Dataset`
+            Dataset to analyze.
+        drop_invalid : :obj:`bool`, optional
+            Whether to automatically ignore any studies without the required data or not.
+            Default is True.
+        """
+        # self._validate_input(dataset, drop_invalid=drop_invalid)
+        # self._preprocess_input(dataset)
+        maps = self._fit(dataset)
+
+        if hasattr(self, "masker") and self.masker is not None:
+            masker = self.masker
+        else:
+            masker = dataset.masker
+        return maps
+        self.results = MetaResult(self, masker, maps)
+        return self.results
diff --git a/nimare/tests/test_dataset.py b/nimare/tests/test_dataset.py
index 3f7007698..31a2d79f1 100644
--- a/nimare/tests/test_dataset.py
+++ b/nimare/tests/test_dataset.py
@@ -31,7 +31,15 @@ def test_dataset_smoke():
     assert isinstance(dset.get_images(imtype="beta"), list)
     assert isinstance(dset.get_metadata(field="sample_sizes"), list)
     assert isinstance(dset.get_studies_by_label("cogat_cognitive_control"), list)
-    assert isinstance(dset.get_studies_by_coordinate(np.array([[20, 20, 20]])), list)
+    assert isinstance(dset.get_studies_by_coordinate([20, 20, 20], r=20), list)
+
+    assert len(dset.get_studies_by_coordinate([20, 20, 20], n=2)) == 2
+    assert len(dset.get_studies_by_coordinate([20, 20, 20], n=3)) == 3
+    with pytest.raises(ValueError):
+        dset.get_studies_by_coordinate([20, 20, 20])
+
+    with pytest.raises(ValueError):
+        dset.get_studies_by_coordinate([20, 20, 20], r=20, n=4)
 
     # If label is not available, raise ValueError
     with pytest.raises(ValueError):
diff --git a/nimare/utils.py b/nimare/utils.py
old mode 100755
new mode 100644
index 1e33eaab7..7c388ced1
--- a/nimare/utils.py
+++ b/nimare/utils.py
@@ -1158,3 +1158,36 @@ def _get_cluster_coms(labeled_cluster_arr):
         )
 
     return cluster_coms
+
+
+def variation_of_information(X, Y):
+    """Calculate variation of information metric.
+
+    Parameters
+    ----------
+    X, Y : :obj:`list` of :obj:`list`
+        Partitions to compare
+
+    Notes
+    -----
+    Implementation adapted from https://gist.github.com/jwcarr/626cbc80e0006b526688.
+
+    Examples
+    --------
+    >>> X = [[1, 2, 3], [4, 5, 6, 7]]
+    >>> Y = [[1, 2, 3, 4], [5, 6, 7]]
+    >>> variation_of_information(X, Y)
+    0.9271749993818661
+    """
+    from math import log
+
+    n = float(sum([len(x) for x in X]))
+    sigma = 0.0
+    for x in X:
+        p = len(x) / n
+        for y in Y:
+            q = len(y) / n
+            r = len(set(x) & set(y)) / n
+            if r > 0.0:
+                sigma += r * (log(r / p, 2) + log(r / q, 2))
+    return abs(sigma)