Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add coordinate-based coactivation-based parcellation class #533

Draft
wants to merge 27 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
08ae309
Add n option to get_studies_by_coordinate
tsalo Jun 30, 2021
88d5c05
Mock up maybe passable CoordCBP class.
tsalo Jun 30, 2021
3ce90f1
Fix docstrings.
tsalo Jun 30, 2021
40f4585
Don't calculate distances.
tsalo Jul 1, 2021
530048f
Remove extra newline.
tsalo Jul 1, 2021
8382704
Clustering should be fairly good. Still need to implement metrics.
tsalo Jul 1, 2021
c79b251
Add empty methods for the different metrics.
tsalo Jul 1, 2021
3e2d580
Work on filter-selection metric. Far from done.
tsalo Jul 1, 2021
3365d5c
Add a couple of metrics.
tsalo Jul 1, 2021
7081604
Improve metric documentation.
tsalo Jul 2, 2021
12cddec
VI should be fairly good now.
tsalo Jul 2, 2021
7023291
Merge branch 'main' into coord-cbp
tsalo Jul 9, 2021
f37751a
Draft voxel misclassification metric.
tsalo Jul 9, 2021
f555790
Merge branch 'main' into coord-cbp
tsalo Jul 12, 2021
0896745
Draft another metric.
tsalo Jul 14, 2021
f9611db
Little cleanup.
tsalo Jul 14, 2021
909c822
Move ratio calculations to main fit method.
tsalo Jul 14, 2021
65857d1
More cleanup.
tsalo Jul 14, 2021
65b9675
More work and debugging.
tsalo Jul 14, 2021
118e718
Mention re-labeling (not implemented).
tsalo Jul 15, 2021
267a135
Merge branch 'main' into coord-cbp
tsalo Dec 21, 2021
530a4de
Remove added line.
tsalo Dec 21, 2021
e58a9d9
Merge branch 'main' into coord-cbp
tsalo Apr 20, 2022
a1bcf0f
Merge remote-tracking branch 'upstream/main' into jperaza-coord-cbp
JulioAPeraza Jul 25, 2022
1c352f7
Update test_dataset.py
JulioAPeraza Oct 4, 2022
917e943
Merge branch 'main' into coord-cbp
JulioAPeraza Oct 4, 2022
0c60dd5
Update utils.py
JulioAPeraza Oct 4, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 33 additions & 9 deletions nimare/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,8 @@ def get_studies_by_label(self, labels=None, label_threshold=0.001):
the labels above the threshold, it will be returned.
Default is None.
label_threshold : :obj:`float`, optional
Default is 0.5.
Default is 0.001, which corresponds to terms appearing at least once in abstracts
for the commonly-used Neurosynth TF-IDF feature set.

Returns
-------
Expand Down Expand Up @@ -600,27 +601,50 @@ def get_studies_by_mask(self, mask):
found_ids = list(self.coordinates.loc[distances, "id"].unique())
return found_ids

def get_studies_by_coordinate(self, xyz, r=20):
def get_studies_by_coordinate(self, xyz, r=None, n=None):
"""Extract list of studies with at least one focus within radius of requested coordinates.

.. versionchanged:: 0.0.9

New option (n) supported to identify closest n studies to coordinate.
Default value for r changed to None.

Parameters
----------
xyz : (X x 3) array_like
List of coordinates against which to find studies.
r : :obj:`float`, optional
Radius (in mm) within which to find studies. Default is 20mm.
Radius (in mm) within which to find studies. Mutually exclusive with ``n``.
Default is None.
n : :obj:`int`, optional
Number of closest studies to identify. Mutually exclusive with ``r``.
Default is None.

Returns
-------
found_ids : :obj:`list`
A list of IDs from the Dataset with at least one focus within
radius r of requested coordinates.
A list of IDs from the Dataset matching the search criterion.
"""

if n and r:
raise ValueError("Only one of 'r' and 'n' may be provided.")
elif not n and not r:
raise ValueError("Either 'r' or 'n' must be provided.")

from scipy.spatial.distance import cdist

xyz = np.array(xyz)
temp_coords = self.coordinates.copy()

xyz = np.atleast_2d(xyz)
assert xyz.shape[1] == 3 and xyz.ndim == 2
distances = cdist(xyz, self.coordinates[["x", "y", "z"]].values)
distances = np.any(distances <= r, axis=0)
found_ids = list(self.coordinates.loc[distances, "id"].unique())
distances = cdist(xyz, temp_coords[["x", "y", "z"]].values)
distances = np.min(distances, axis=0)
temp_coords["distance"] = distances
min_dist_ser = temp_coords.groupby("id")["distance"].min()

if r:
found_ids = min_dist_ser.loc[min_dist_ser <= r].index.tolist()
elif n:
found_ids = min_dist_ser.nsmallest(n).index.tolist()

return found_ids
166 changes: 166 additions & 0 deletions nimare/parcellate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
"""Parcellation tools."""
import inspect
import logging

import numpy as np
from sklearn.cluster import KMeans

from .base import NiMAREBase
from .meta.base import CBMAEstimator
from .meta.cbma.ale import ALE
from .results import MetaResult
from .utils import add_metadata_to_dataframe, check_type, listify, use_memmap, vox2mm

LGR = logging.getLogger(__name__)


class CoordCBP(NiMAREBase):
"""Perform coordinate-based coactivation-based parcellation.

.. versionadded:: 0.0.10

Parameters
----------
target_mask : :obj:`nibabel.Nifti1.Nifti1Image`
Mask of target of parcellation.
Currently must be in same space/resolution as Dataset mask.
n_clusters : :obj:`list` of :obj:`int`
Number of clusters to evaluate in clustering.
r : :obj:`float` or None, optional
Radius (in mm) within which to find studies. Mutually exclusive with ``n``.
Default is None.
n : :obj:`int` or None, optional
Number of closest studies to identify. Mutually exclusive with ``r``.
Default is None.
meta_estimator : :obj:`nimare.meta.base.CBMAEstimator`, optional
CBMA Estimator with which to run the MACMs.
Default is :obj:`nimare.meta.cbma.ale.ALE`.
target_image : :obj:`str`, optional
Name of meta-analysis results image to use for clustering.
Default is "ale", which is specific to the ALE estimator.
"""

_required_inputs = {"coordinates": ("coordinates", None)}

def __init__(
self,
target_mask,
n_clusters,
r=None,
n=None,
meta_estimator=None,
target_image="ale",
*args,
**kwargs,
):
super().__init__(*args, **kwargs)

if meta_estimator is None:
meta_estimator = ALE()
else:
meta_estimator = check_type(meta_estimator, CBMAEstimator)

if r and n:
raise ValueError("Only one of 'r' and 'n' may be provided.")
elif not r and not n:
raise ValueError("Either 'r' or 'n' must be provided.")

self.meta_estimator = meta_estimator
self.target_image = target_image
self.n_clusters = listify(n_clusters)
self.r = r
self.n = n

def _preprocess_input(self, dataset):
"""Mask required input images using either the dataset's mask or the estimator's.

Also, insert required metadata into coordinates DataFrame.
"""
super()._preprocess_input(dataset)

# All extra (non-ijk) parameters for a kernel should be overrideable as
# parameters to __init__, so we can access them with get_params()
kt_args = list(self.kernel_transformer.get_params().keys())

# Integrate "sample_size" from metadata into DataFrame so that
# kernel_transformer can access it.
if "sample_size" in kt_args:
self.inputs_["coordinates"] = add_metadata_to_dataframe(
dataset,
self.inputs_["coordinates"],
metadata_field="sample_sizes",
target_column="sample_size",
filter_func=np.mean,
)

@use_memmap(LGR, n_files=1)
def _fit(self, dataset):
"""Perform coordinate-based coactivation-based parcellation on dataset.

Parameters
----------
dataset : :obj:`nimare.dataset.Dataset`
Dataset to analyze.
"""
self.dataset = dataset
self.masker = self.masker or dataset.masker
self.null_distributions_ = {}

# Loop through voxels in target_mask, selecting studies for each and running MACMs (no MCC)
target_ijk = np.vstack(np.where(self.target_mask.get_fdata()))
target_xyz = vox2mm(target_ijk, self.masker.mask_img.affine)
for i_coord in target_xyz.shape[1]:
xyz = target_xyz[:, i_coord]
macm_ids = dataset.get_studies_by_coordinate(xyz, r=self.r, n=self.n)
coord_dset = dataset.slice(macm_ids)

# This seems like a somewhat inelegant solution
# Check if the meta method is a pairwise estimator
if "dataset2" in inspect.getfullargspec(self.meta_estimator.fit).args:
unselected_ids = sorted(list(set(dataset.ids) - set(macm_ids)))
unselected_dset = dataset.slice(unselected_ids)
self.meta_estimator.fit(coord_dset, unselected_dset)
else:
self.meta_estimator.fit(coord_dset)
macm_data = self.meta_estimator.results.get_map(self.target_image, return_type="array")
if i_coord == 0:
data = np.zeros((target_xyz.shape[1], len(macm_data)))
data[i_coord, :] = macm_data

# Correlate voxel-wise MACM results
data = np.corrcoef(data)

# Convert voxel-wise correlation matrix to distances
data = 1 - data
tsalo marked this conversation as resolved.
Show resolved Hide resolved

# Perform clustering
labels = np.zeros((len(self.n_clusters), len(macm_data)), dtype=int)
for i_cluster, cluster_count in enumerate(self.n_clusters):
kmeans = KMeans(n_clusters=cluster_count, random_state=0).fit(data)
labels[i_cluster, :] = kmeans.labels_

images = {"labels": labels}
return images

def fit(self, dataset, drop_invalid=True):
"""Perform coordinate-based coactivation-based parcellation on dataset.

Parameters
----------
dataset : :obj:`nimare.dataset.Dataset`
Dataset to analyze.
drop_invalid : :obj:`bool`, optional
Whether to automatically ignore any studies without the required data or not.
Default is True.
"""
self._validate_input(dataset, drop_invalid=drop_invalid)
self._preprocess_input(dataset)
maps = self._fit(dataset)

if hasattr(self, "masker") and self.masker is not None:
masker = self.masker
else:
masker = dataset.masker

self.results = MetaResult(self, masker, maps)
return self.results
11 changes: 10 additions & 1 deletion nimare/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import nibabel as nib
import numpy as np
import pytest

import nimare
from nimare import dataset
Expand All @@ -23,7 +24,15 @@ def test_dataset_smoke():
assert isinstance(dset.get_images(imtype="beta"), list)
assert isinstance(dset.get_metadata(field="sample_sizes"), list)
assert isinstance(dset.get_studies_by_label("cogat_cognitive_control"), list)
assert isinstance(dset.get_studies_by_coordinate(np.array([[20, 20, 20]])), list)
assert isinstance(dset.get_studies_by_coordinate([20, 20, 20], r=20), list)
assert len(dset.get_studies_by_coordinate([20, 20, 20], n=2)) == 2
assert len(dset.get_studies_by_coordinate([20, 20, 20], n=3)) == 3
with pytest.raises(ValueError):
dset.get_studies_by_coordinate([20, 20, 20])

with pytest.raises(ValueError):
dset.get_studies_by_coordinate([20, 20, 20], r=20, n=4)

mask_data = np.zeros(dset.masker.mask_img.shape, int)
mask_data[40, 40, 40] = 1
mask_img = nib.Nifti1Image(mask_data, dset.masker.mask_img.affine)
Expand Down