From b00dd5c4856acff3f6387d048ba0a3c6609ca85f Mon Sep 17 00:00:00 2001 From: Sam Bray Date: Tue, 26 Nov 2024 08:47:33 -0800 Subject: [PATCH 1/4] cleanup interval orphans in nightly job only --- docs/src/ForDevelopers/Management.md | 11 ++++++++--- src/spyglass/utils/dj_mixin.py | 3 --- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/docs/src/ForDevelopers/Management.md b/docs/src/ForDevelopers/Management.md index 5c00d2688..46aef5ab0 100644 --- a/docs/src/ForDevelopers/Management.md +++ b/docs/src/ForDevelopers/Management.md @@ -228,10 +228,14 @@ disk. There are several tables that retain lists of files that have been generated during analyses. If someone deletes analysis entries, files will still be on disk. -To remove orphaned files, we run the following commands in our cron jobs: +Additionally, there are periphery tables such as `IntervalList` which are used +to store entries created by downstream tables. These entries are not +automatically deleted when the downstream entry is removed, + +To remove orphaned files and entries, we run the following commands in our cron jobs: ```python -from spyglass.common import AnalysisNwbfile +from spyglass.common import AnalysisNwbfile, IntervalList from spyglass.spikesorting import SpikeSorting from spyglass.common.common_nwbfile import schema as nwbfile_schema from spyglass.decoding.v1.sorted_spikes import schema as spikes_schema @@ -241,8 +245,9 @@ from spyglass.decoding.v1.clusterless import schema as clusterless_schema def main(): AnalysisNwbfile().nightly_cleanup() SpikeSorting().nightly_cleanup() + IntervalList().nightly_cleanup() nwbfile_schema.external['analysis'].delete(delete_external_files=True)) - nwbfile_schema.external['raw'].delete(delete_external_files=True)) + **nwbfile_schema**.external['raw'].delete(delete_external_files=True)) spikes_schema.external['analysis'].delete(delete_external_files=True)) clusterless_schema.external['analysis'].delete(delete_external_files=True)) ``` diff --git a/src/spyglass/utils/dj_mixin.py b/src/spyglass/utils/dj_mixin.py index 72e34c04f..f40957168 100644 --- a/src/spyglass/utils/dj_mixin.py +++ b/src/spyglass/utils/dj_mixin.py @@ -485,9 +485,6 @@ def cautious_delete( delete_external_files=True, display_progress=False ) - if not self._test_mode: - _ = IntervalList().nightly_cleanup(dry_run=False) - def delete(self, *args, **kwargs): """Alias for cautious_delete, overwrites datajoint.table.Table.delete""" self.cautious_delete(*args, **kwargs) From c6a7e0a7757e4b08fd0b8b33609e2ab790cc1a99 Mon Sep 17 00:00:00 2001 From: Sam Bray Date: Tue, 26 Nov 2024 08:55:27 -0800 Subject: [PATCH 2/4] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 86058203b..8e16652e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,7 @@ dj.FreeTable(dj.conn(), "common_session.session_group").drop() - Remove numpy version restriction #1169 - Merge table delete removes orphaned master entries #1164 - Edit `merge_fetch` to expect positional before keyword arguments #1181 +- Move cleanup of `IntervalList` orphan entries to nightly cleanup #1195 ### Pipelines From cea52c5135aa0f847ce1dbc9c2b12300cd5cea58 Mon Sep 17 00:00:00 2001 From: Samuel Bray Date: Tue, 26 Nov 2024 15:46:31 -0800 Subject: [PATCH 3/4] Update docs/src/ForDevelopers/Management.md Co-authored-by: Chris Broz --- docs/src/ForDevelopers/Management.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/ForDevelopers/Management.md b/docs/src/ForDevelopers/Management.md index 46aef5ab0..afabc1a4e 100644 --- a/docs/src/ForDevelopers/Management.md +++ b/docs/src/ForDevelopers/Management.md @@ -247,7 +247,7 @@ def main(): SpikeSorting().nightly_cleanup() IntervalList().nightly_cleanup() nwbfile_schema.external['analysis'].delete(delete_external_files=True)) - **nwbfile_schema**.external['raw'].delete(delete_external_files=True)) + nwbfile_schema.external['raw'].delete(delete_external_files=True)) spikes_schema.external['analysis'].delete(delete_external_files=True)) clusterless_schema.external['analysis'].delete(delete_external_files=True)) ``` From a9261f94c6a4fdf0fe152d04b6fd4e2be6cb33bc Mon Sep 17 00:00:00 2001 From: Sam Bray Date: Wed, 4 Dec 2024 14:41:22 -0800 Subject: [PATCH 4/4] suggest less frequent running of IntervalList cleanup --- docs/src/ForDevelopers/Management.md | 6 ++++-- src/spyglass/common/common_interval.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/src/ForDevelopers/Management.md b/docs/src/ForDevelopers/Management.md index afabc1a4e..df0caae81 100644 --- a/docs/src/ForDevelopers/Management.md +++ b/docs/src/ForDevelopers/Management.md @@ -230,7 +230,9 @@ be on disk. Additionally, there are periphery tables such as `IntervalList` which are used to store entries created by downstream tables. These entries are not -automatically deleted when the downstream entry is removed, +automatically deleted when the downstream entry is removed. To minimize interference +with ongoing user entry creation, we recommend running these cleanups on a less frequent +basis (e.g. weekly). To remove orphaned files and entries, we run the following commands in our cron jobs: @@ -245,7 +247,7 @@ from spyglass.decoding.v1.clusterless import schema as clusterless_schema def main(): AnalysisNwbfile().nightly_cleanup() SpikeSorting().nightly_cleanup() - IntervalList().nightly_cleanup() + IntervalList().cleanup() nwbfile_schema.external['analysis'].delete(delete_external_files=True)) nwbfile_schema.external['raw'].delete(delete_external_files=True)) spikes_schema.external['analysis'].delete(delete_external_files=True)) diff --git a/src/spyglass/common/common_interval.py b/src/spyglass/common/common_interval.py index 25670f03c..2021c5f69 100644 --- a/src/spyglass/common/common_interval.py +++ b/src/spyglass/common/common_interval.py @@ -158,7 +158,7 @@ def plot_epoch_pos_raw_intervals(self, figsize=(20, 5), return_fig=False): if return_fig: return fig - def nightly_cleanup(self, dry_run=True): + def cleanup(self, dry_run=True): """Clean up orphaned IntervalList entries.""" orphans = self - get_child_tables(self) if dry_run: