Skip to content

Commit

Permalink
rois: Added parent-rois option.
Browse files Browse the repository at this point in the history
  • Loading branch information
stuarteberg committed Oct 18, 2024
1 parent 471d8dc commit 7eed8fa
Showing 1 changed file with 114 additions and 0 deletions.
114 changes: 114 additions & 0 deletions flyem_snapshot/inputs/rois.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import json
import logging
from graphlib import TopologicalSorter
from itertools import chain
from collections import namedtuple

Expand Down Expand Up @@ -102,6 +103,27 @@
"type": "string",
"default": ""
},
"parent-roiset": {
"description":
"Optional. If you list the name of another roiset here, then unlabeled points\n"
"in the current roiset will not be given label 0 ('<unspecified>').\n"
"Instead, they will be given a name according to the corresponding ROI from\n"
"the 'parent' roiset, such as 'Brain-unspecified'.\n",
"Note: The parent roiset MUST be listed in the config BEFORE the roiset(s) that reference it.\n"
"type": "string",
"default": ""
},
"parent-rois": {
"description":
"If you list a parent-roiset, you should list the particular ROIs from that\n"
"roiset which actually have child ROIs and thus have 'leftover' portions which\n"
"should be identified.\n",
"type": "array",
"items": {
"type": "string"
},
"default": []
}
}
}

Expand Down Expand Up @@ -186,6 +208,9 @@ def load_point_rois(cfg, point_df, roiset_names):
roi_ids = _apply_roi_renames(point_df, roiset_name, roi_ids, roiset_cfg['rename-rois'])
roisets[roiset_name] = roi_ids

_check_duplicate_rois(roisets)

roisets = _replace_unspecified_with_parent_rois(cfg, point_df, roisets)
return point_df, roisets


Expand Down Expand Up @@ -521,6 +546,95 @@ def _apply_roi_renames(point_df, roiset_name, roi_ids, renames):
return roi_ids


def _check_duplicate_rois(roisets):
all_rois = pd.Series([
k
for d in roisets.values()
for k in d.keys()
if k != '<unspecified>'
])

vc = all_rois.value_counts()
duplicate_rois = vc[vc > 1].index.tolist()
if duplicate_rois:
raise RuntimeError(f"ROIs duplicated in multiple roisets: {duplicate_rois}")


def _replace_unspecified_with_parent_rois(cfg, point_df, roisets):
"""
For roisets (columns) which ended up with any <unspecified> points (i.e. label 0),
those point ROIs can be overwritten with a value from the 'parent' roiset column.
For example if we initially loaded this:
x y z shell primary subprimary
1 2 3 Brain ME(R) ME_layer_1_R
4 5 6 Brain <unspecified> <unspecified>
7 8 9 VNC ANm <unspecified>
0 1 2 VNC <unspecified> <unspecified>
and 'shell' is the 'parent-roiset' of the 'primary' roiset, which is in turn
the parent of the 'subprimary' roiset, then the final table will be:
x y z shell primary subprimary
1 2 3 Brain ME(R) ME_layer_1_R
4 5 6 Brain Brain-unspecified <unspecified>
7 8 9 VNC ANm ANm-unspecified
0 1 2 VNC VNC-unspecified <unspecified>
Note that the <unspecified> values are only replaced if the parent column has an actual ROI to offer.
"""
# To ensure correct propagation from upstream roisets to downstream roisets,
# we must process roisets in topological order.
# In the config, we don't require the roisets to be listed in topological order,
# so we must compute that order ourselves.
ts = TopologicalSorter()
for roiset_name in roisets.keys():
parent = cfg['roi-sets'][roiset_name]['parent-roiset']
if parent:
ts.add(roiset_name, parent)

for roiset_name in ts.static_order():
parent = cfg['roi-sets'][roiset_name]['parent-roiset']
roi_ids = roisets[roiset_name]
if not parent or '<unspecified>' not in roi_ids:
continue

parent_rois = cfg['roi-sets'][roiset_name]['parent-rois']
if any('unspecified' in roi for roi in parent_rois):
raise RuntimeError("Parent rois cannot include any ROI with 'unspecified' in the name.")

# Copy the parent ROI of points for which the parent has an ROI to inherit (not '<unspecified>')
replaceable_points = point_df[parent].isin(parent_rois)
parent_rois = point_df.loc[replaceable_points, parent].unique()
point_df[roiset_name] = point_df[roiset_name].cat.add_categories(parent_rois)

child_dtype = point_df[roiset_name].dtype
point_df.loc[replaceable_points, roiset_name] = point_df.loc[replaceable_points, parent].astype(child_dtype)

# Rename the ROIs copied from the parent, for example:
# - Brain -> Brain-unspecified
# - MB(R) -> MB-unspecified(R)
base_rgx, suffix_rgx = r"[^()]+", r"\([LR]\)?"
parent_parts = pd.Series(parent_rois).str.extract(f"({base_rgx})({suffix_rgx})").fillna('')

renames = {}
max_label = max(roi_ids.values())
for parent, (base, suffix) in zip(parent_rois, parent_parts):
new_roi = f"{base}-unspecified{suffix}"
renames[parent] = new_roi

# Introduce a new integer label for the new ROI
roi_ids[new_roi] = max_label = 1 + max_label

point_df[roiset_name] = point_df[roiset_name].cat.rename_categories(renames)

# Replace zeros in {name}_label with the new labels
point_df.loc[replaceable_points, f"{roiset_name}_label"] = point_df.loc[replaceable_points, roiset_name].map(roi_ids)

return roisets


class RoiVolCache:
"""
Utility class for loading/saving a cached ROI volume and its box on disk.
Expand Down

0 comments on commit 7eed8fa

Please sign in to comment.