From c1756e55de601ea4675d6bf19f589323da8b6190 Mon Sep 17 00:00:00 2001 From: David Burke Date: Fri, 13 Dec 2024 14:56:52 -0500 Subject: [PATCH 01/12] Draft django app: Long running migrations --- kobo/apps/long_running_migrations/README.md | 27 ++++++++++ kobo/apps/long_running_migrations/__init__.py | 0 kobo/apps/long_running_migrations/admin.py | 8 +++ .../maintenance_tasks.py | 17 ++++++ .../migrations/0001_initial.py | 29 ++++++++++ .../migrations/__init__.py | 0 kobo/apps/long_running_migrations/models.py | 54 +++++++++++++++++++ .../long_running_migrations/tasks/__init__.py | 0 .../tasks/sample_failure.py | 2 + .../tasks/sample_task.py | 2 + kobo/apps/long_running_migrations/tests.py | 52 ++++++++++++++++++ kobo/settings/base.py | 1 + kpi/tasks.py | 19 +++++-- 13 files changed, 207 insertions(+), 4 deletions(-) create mode 100644 kobo/apps/long_running_migrations/README.md create mode 100644 kobo/apps/long_running_migrations/__init__.py create mode 100644 kobo/apps/long_running_migrations/admin.py create mode 100644 kobo/apps/long_running_migrations/maintenance_tasks.py create mode 100644 kobo/apps/long_running_migrations/migrations/0001_initial.py create mode 100644 kobo/apps/long_running_migrations/migrations/__init__.py create mode 100644 kobo/apps/long_running_migrations/models.py create mode 100644 kobo/apps/long_running_migrations/tasks/__init__.py create mode 100644 kobo/apps/long_running_migrations/tasks/sample_failure.py create mode 100644 kobo/apps/long_running_migrations/tasks/sample_task.py create mode 100644 kobo/apps/long_running_migrations/tests.py diff --git a/kobo/apps/long_running_migrations/README.md b/kobo/apps/long_running_migrations/README.md new file mode 100644 index 0000000000..ee1e1535b7 --- /dev/null +++ b/kobo/apps/long_running_migrations/README.md @@ -0,0 +1,27 @@ +# Long Running Migrations + +This will execute a task in celery. The task will try for 23 hours and then give up. It will retry until it yields an exception or completes. + +1. Create your tasks in tasks and give it a unique name. Django migration style 0001_description is a good idea. It must contain a function called "task". +2. Run `LongRunningMigration.objects.create(task_name="0001_description")`. Consider running this from a real Django migration. +3. Wait for daily maintenance task or manually dispatch the celery "perform_maintenance". + +## Writing a good task. + +Very slow tasks should be written atomically and tolerant to disruption at any time. + +```python +# 2024-10-13 +from django.db import transaction + +def task(): + for foo in Foo.objects.filter(is_barred=False): # Checks actually needs to run still + with transaction.atomic(): # Atomic! + foo.make_it_bar() # Perhap this does multiple things that could succeed or fail +``` + +Notice that if the task is interrupted, it will simply continue in the next run. + +Because tasks are slow, your code should run regardless of when the data migration takes place. + +Timestamp the task to help a future developer know when it can be safely deleted. diff --git a/kobo/apps/long_running_migrations/__init__.py b/kobo/apps/long_running_migrations/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kobo/apps/long_running_migrations/admin.py b/kobo/apps/long_running_migrations/admin.py new file mode 100644 index 0000000000..0b1b5f3154 --- /dev/null +++ b/kobo/apps/long_running_migrations/admin.py @@ -0,0 +1,8 @@ +from django.contrib import admin + +from .models import LongRunningMigration + + +@admin.register(LongRunningMigration) +class LongRunningMigrationAdmin(admin.ModelAdmin): + readonly_fields=('date_created', 'date_modified') diff --git a/kobo/apps/long_running_migrations/maintenance_tasks.py b/kobo/apps/long_running_migrations/maintenance_tasks.py new file mode 100644 index 0000000000..dc4af2c5ce --- /dev/null +++ b/kobo/apps/long_running_migrations/maintenance_tasks.py @@ -0,0 +1,17 @@ +from datetime import timedelta + +from django.db.models import Q +from django.utils import timezone + +from .models import LongRunningMigration, LongRunningMigrationStatus + + +def execute_long_running_migrations(): + yesterday = timezone.now() - timedelta(days=1) + # Run tasks that were just created or are in progress yet older than 1 day + for migration in LongRunningMigration.objects.filter( + Q(status=LongRunningMigrationStatus.CREATED) + | Q(status=LongRunningMigrationStatus.IN_PROGRESS) + & Q(date_modified__lte=yesterday) + ): + migration.execute() diff --git a/kobo/apps/long_running_migrations/migrations/0001_initial.py b/kobo/apps/long_running_migrations/migrations/0001_initial.py new file mode 100644 index 0000000000..1457a6c475 --- /dev/null +++ b/kobo/apps/long_running_migrations/migrations/0001_initial.py @@ -0,0 +1,29 @@ +# Generated by Django 4.2.15 on 2024-12-13 19:53 + +from django.db import migrations, models +import kpi.models.abstract_models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='LongRunningMigration', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('date_created', models.DateTimeField(default=kpi.models.abstract_models._get_default_datetime)), + ('date_modified', models.DateTimeField(default=kpi.models.abstract_models._get_default_datetime)), + ('task_name', models.CharField(max_length=255, unique=True)), + ('status', models.CharField(choices=[('created', 'Created'), ('in_progress', 'In Progress'), ('failed', 'Failed'), ('completed', 'Completed')], default='created', max_length=20)), + ('attempts', models.PositiveSmallIntegerField(default=0)), + ], + options={ + 'abstract': False, + }, + ), + ] diff --git a/kobo/apps/long_running_migrations/migrations/__init__.py b/kobo/apps/long_running_migrations/migrations/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kobo/apps/long_running_migrations/models.py b/kobo/apps/long_running_migrations/models.py new file mode 100644 index 0000000000..505863a76a --- /dev/null +++ b/kobo/apps/long_running_migrations/models.py @@ -0,0 +1,54 @@ +import importlib +import os + +from django.db import models + +from kpi.models.abstract_models import AbstractTimeStampedModel +from kpi.utils.log import logging + + +class LongRunningMigrationStatus(models.TextChoices): + CREATED = 'created' + IN_PROGRESS = 'in_progress' + FAILED = 'failed' + COMPLETED = 'completed' + + +class LongRunningMigration(AbstractTimeStampedModel): + + APP_DIR = os.path.basename(os.path.dirname(__file__)) + + task_name = models.CharField(max_length=255, unique=True) + status = models.CharField( + default=LongRunningMigrationStatus.CREATED, + choices=LongRunningMigrationStatus.choices, + max_length=20, + ) + attempts = models.PositiveSmallIntegerField(default=0) + + def execute(self): + if self.status == LongRunningMigrationStatus.COMPLETED: + return + + module = importlib.import_module( + os.path.join(self.APP_DIR, 'tasks', self.task_name) + ) + self.status = LongRunningMigrationStatus.IN_PROGRESS + self.attempts += self.attempts + self.save(update_fields=['status', 'attempts']) + try: + module.task() + except Exception as e: + logging.error(f'LongRunningMigration.execute(): {str(e)}') + self.status = LongRunningMigrationStatus.FAILED + self.save(update_fields=['status']) + return + self.status = LongRunningMigrationStatus.COMPLETED + self.save(update_fields=['status']) + + def save(self, **kwargs): + if self._state.adding: + file_path = os.path.join(self.APP_DIR, 'tasks', f'{self.task_name}.py') + if not os.path.exists(file_path): + raise ValueError('Task does not exist in tasks directory') + super().save(**kwargs) diff --git a/kobo/apps/long_running_migrations/tasks/__init__.py b/kobo/apps/long_running_migrations/tasks/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kobo/apps/long_running_migrations/tasks/sample_failure.py b/kobo/apps/long_running_migrations/tasks/sample_failure.py new file mode 100644 index 0000000000..b2a1b14fcb --- /dev/null +++ b/kobo/apps/long_running_migrations/tasks/sample_failure.py @@ -0,0 +1,2 @@ +def task(): + raise Exception diff --git a/kobo/apps/long_running_migrations/tasks/sample_task.py b/kobo/apps/long_running_migrations/tasks/sample_task.py new file mode 100644 index 0000000000..a1b0d73dd9 --- /dev/null +++ b/kobo/apps/long_running_migrations/tasks/sample_task.py @@ -0,0 +1,2 @@ +def task(): + print('hello from long running migration') diff --git a/kobo/apps/long_running_migrations/tests.py b/kobo/apps/long_running_migrations/tests.py new file mode 100644 index 0000000000..edcbc5fae0 --- /dev/null +++ b/kobo/apps/long_running_migrations/tests.py @@ -0,0 +1,52 @@ +from django.test import TestCase +from freezegun import freeze_time + +from .maintenance_tasks import execute_long_running_migrations +from .models import LongRunningMigration, LongRunningMigrationStatus + + +class LongRunningMigrationTestCase(TestCase): + def test_sample_task(self): + migration = LongRunningMigration.objects.create(task_name='sample_task') + migration.execute() + migration.refresh_from_db() + self.assertEqual(migration.status, LongRunningMigrationStatus.COMPLETED) + + def test_invalid_task_name(self): + with self.assertRaises(ValueError): + LongRunningMigration.objects.create(task_name='foo') + + def test_sample_failure(self): + migration = LongRunningMigration.objects.create(task_name='sample_failure') + migration.execute() + migration.refresh_from_db() + self.assertEqual(migration.status, LongRunningMigrationStatus.FAILED) + + def test_maintenance(self): + # # New task + migration = LongRunningMigration.objects.create(task_name='sample_task') + execute_long_running_migrations() + migration.refresh_from_db() + self.assertEqual(migration.status, LongRunningMigrationStatus.COMPLETED) + + # Ignore failed task + migration.status = LongRunningMigrationStatus.FAILED + migration.save() + execute_long_running_migrations() + migration.refresh_from_db() + self.assertEqual(migration.status, LongRunningMigrationStatus.FAILED) + + # Ignore recently in progress task + migration.status = LongRunningMigrationStatus.IN_PROGRESS + migration.save() + execute_long_running_migrations() + migration.refresh_from_db() + self.assertEqual(migration.status, LongRunningMigrationStatus.IN_PROGRESS) + + # Run old in progress task + with freeze_time('2024-12-10'): + migration.status = LongRunningMigrationStatus.IN_PROGRESS + migration.save() + execute_long_running_migrations() + migration.refresh_from_db() + self.assertEqual(migration.status, LongRunningMigrationStatus.COMPLETED) diff --git a/kobo/settings/base.py b/kobo/settings/base.py index 4a555bbe04..54ed482a28 100644 --- a/kobo/settings/base.py +++ b/kobo/settings/base.py @@ -144,6 +144,7 @@ 'guardian', 'kobo.apps.openrosa.libs', 'kobo.apps.project_ownership.ProjectOwnershipAppConfig', + 'kobo.apps.long_running_migrations', ) MIDDLEWARE = [ diff --git a/kpi/tasks.py b/kpi/tasks.py index 786484fec8..1821d15d36 100644 --- a/kpi/tasks.py +++ b/kpi/tasks.py @@ -1,9 +1,13 @@ import requests from django.conf import settings from django.core.mail import send_mail +from django.core.cache import cache from django.core.management import call_command from kobo.apps.kobo_auth.shortcuts import User +from kobo.apps.long_running_migrations.maintenance_tasks import ( + execute_long_running_migrations, +) from kobo.apps.markdownx_uploader.tasks import remove_unused_markdown_files from kobo.celery import celery_app from kpi.constants import LIMIT_HOURS_23 @@ -96,8 +100,15 @@ def enketo_flush_cached_preview(server_url, form_id): @celery_app.task(time_limit=LIMIT_HOURS_23, soft_time_limit=LIMIT_HOURS_23) def perform_maintenance(): """ - Run daily maintenance tasks + Run daily maintenance tasks. Ensure it cannot run multiple times. """ - remove_unused_markdown_files() - remove_old_import_tasks() - remove_old_asset_snapshots() + # Simplistic redis lock with timeout and self removal to prevent duplicate runs + lock_key = 'perform_maintenance_lock' + if cache.add(lock_key, 'true', timeout=LIMIT_HOURS_23): + try: + remove_unused_markdown_files() + remove_old_import_tasks() + remove_old_asset_snapshots() + execute_long_running_migrations() + finally: + cache.delete(lock_key) From b03b2999d166516e8197ef8d8b79980ec62acb13 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Tue, 17 Dec 2024 23:19:02 -0500 Subject: [PATCH 02/12] Refactoring --- .../{tasks => jobs}/__init__.py | 0 .../{tasks => jobs}/sample_failure.py | 0 .../{tasks => jobs}/sample_task.py | 0 .../maintenance_tasks.py | 39 +++++++++++++++---- 4 files changed, 31 insertions(+), 8 deletions(-) rename kobo/apps/long_running_migrations/{tasks => jobs}/__init__.py (100%) rename kobo/apps/long_running_migrations/{tasks => jobs}/sample_failure.py (100%) rename kobo/apps/long_running_migrations/{tasks => jobs}/sample_task.py (100%) diff --git a/kobo/apps/long_running_migrations/tasks/__init__.py b/kobo/apps/long_running_migrations/jobs/__init__.py similarity index 100% rename from kobo/apps/long_running_migrations/tasks/__init__.py rename to kobo/apps/long_running_migrations/jobs/__init__.py diff --git a/kobo/apps/long_running_migrations/tasks/sample_failure.py b/kobo/apps/long_running_migrations/jobs/sample_failure.py similarity index 100% rename from kobo/apps/long_running_migrations/tasks/sample_failure.py rename to kobo/apps/long_running_migrations/jobs/sample_failure.py diff --git a/kobo/apps/long_running_migrations/tasks/sample_task.py b/kobo/apps/long_running_migrations/jobs/sample_task.py similarity index 100% rename from kobo/apps/long_running_migrations/tasks/sample_task.py rename to kobo/apps/long_running_migrations/jobs/sample_task.py diff --git a/kobo/apps/long_running_migrations/maintenance_tasks.py b/kobo/apps/long_running_migrations/maintenance_tasks.py index dc4af2c5ce..e02c039cf0 100644 --- a/kobo/apps/long_running_migrations/maintenance_tasks.py +++ b/kobo/apps/long_running_migrations/maintenance_tasks.py @@ -1,17 +1,40 @@ -from datetime import timedelta +from kobo.celery import celery_app +from django.conf import settings +from django.core.cache import cache from django.db.models import Q from django.utils import timezone +from dateutil.relativedelta import relativedelta from .models import LongRunningMigration, LongRunningMigrationStatus +@celery_app.task( + queue='kpi_low_priority_queue', + soft_time_limit=settings.CELERY_LONG_RUNNING_TASK_SOFT_TIME_LIMIT, + time_limit=settings.CELERY_LONG_RUNNING_TASK_TIME_LIMIT, +) def execute_long_running_migrations(): - yesterday = timezone.now() - timedelta(days=1) - # Run tasks that were just created or are in progress yet older than 1 day - for migration in LongRunningMigration.objects.filter( - Q(status=LongRunningMigrationStatus.CREATED) - | Q(status=LongRunningMigrationStatus.IN_PROGRESS) - & Q(date_modified__lte=yesterday) + lock_key = 'execute_long_running_migrations' + if cache.add( + lock_key, 'true', timeout=settings.CELERY_LONG_RUNNING_TASK_TIME_LIMIT ): - migration.execute() + try: + # Adding an offset to account for potential delays in task execution and + # clock drift between the Celery workers and the database, ensuring tasks + # are not prematurely skipped. + offset = 5 * 60 * 1000 + task_expiry_time = timezone.now() - relativedelta( + minutes=settings.CELERY_LONG_RUNNING_TASK_TIME_LIMIT + offset + ) + # Run tasks that were just created or are in progress but have exceeded + # the maximum runtime allowed for a Celery task, causing Celery to terminate + # them and raise a SoftTimeLimitExceeded exception. + for migration in LongRunningMigration.objects.filter( + Q(status=LongRunningMigrationStatus.CREATED) + | Q(status=LongRunningMigrationStatus.IN_PROGRESS) + & Q(date_modified__lte=task_expiry_time) + ): + migration.execute() + finally: + cache.delete(lock_key) From ff1065c47cfd4b91a9502d45c4cebb52ed68f508 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Wed, 18 Dec 2024 00:32:52 -0500 Subject: [PATCH 03/12] refactor tests and app structure --- .../__init__.py | 0 .../migrations/0001_initial.py | 4 +- kobo/apps/long_running_migrations/models.py | 36 ++++++++++++++--- .../{maintenance_tasks.py => tasks.py} | 4 +- .../long_running_migrations/tests/__init__.py | 0 .../tests/fixtures/__init__.py | 0 .../fixtures}/sample_failure.py | 0 .../{jobs => tests/fixtures}/sample_task.py | 0 .../{tests.py => tests/test_run.py} | 40 ++++++++++++++----- kpi/tasks.py | 18 ++------- 10 files changed, 68 insertions(+), 34 deletions(-) rename kobo/apps/long_running_migrations/{jobs => long_running_migrations}/__init__.py (100%) rename kobo/apps/long_running_migrations/{maintenance_tasks.py => tasks.py} (94%) create mode 100644 kobo/apps/long_running_migrations/tests/__init__.py create mode 100644 kobo/apps/long_running_migrations/tests/fixtures/__init__.py rename kobo/apps/long_running_migrations/{jobs => tests/fixtures}/sample_failure.py (100%) rename kobo/apps/long_running_migrations/{jobs => tests/fixtures}/sample_task.py (100%) rename kobo/apps/long_running_migrations/{tests.py => tests/test_run.py} (57%) diff --git a/kobo/apps/long_running_migrations/jobs/__init__.py b/kobo/apps/long_running_migrations/long_running_migrations/__init__.py similarity index 100% rename from kobo/apps/long_running_migrations/jobs/__init__.py rename to kobo/apps/long_running_migrations/long_running_migrations/__init__.py diff --git a/kobo/apps/long_running_migrations/migrations/0001_initial.py b/kobo/apps/long_running_migrations/migrations/0001_initial.py index 1457a6c475..1a739539cf 100644 --- a/kobo/apps/long_running_migrations/migrations/0001_initial.py +++ b/kobo/apps/long_running_migrations/migrations/0001_initial.py @@ -1,6 +1,6 @@ # Generated by Django 4.2.15 on 2024-12-13 19:53 - from django.db import migrations, models + import kpi.models.abstract_models @@ -18,7 +18,7 @@ class Migration(migrations.Migration): ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('date_created', models.DateTimeField(default=kpi.models.abstract_models._get_default_datetime)), ('date_modified', models.DateTimeField(default=kpi.models.abstract_models._get_default_datetime)), - ('task_name', models.CharField(max_length=255, unique=True)), + ('name', models.CharField(max_length=255, unique=True)), ('status', models.CharField(choices=[('created', 'Created'), ('in_progress', 'In Progress'), ('failed', 'Failed'), ('completed', 'Completed')], default='created', max_length=20)), ('attempts', models.PositiveSmallIntegerField(default=0)), ], diff --git a/kobo/apps/long_running_migrations/models.py b/kobo/apps/long_running_migrations/models.py index 505863a76a..487b387e90 100644 --- a/kobo/apps/long_running_migrations/models.py +++ b/kobo/apps/long_running_migrations/models.py @@ -1,6 +1,8 @@ import importlib import os +from django.conf import settings +from django.core.exceptions import SuspiciousOperation from django.db import models from kpi.models.abstract_models import AbstractTimeStampedModel @@ -16,9 +18,13 @@ class LongRunningMigrationStatus(models.TextChoices): class LongRunningMigration(AbstractTimeStampedModel): - APP_DIR = os.path.basename(os.path.dirname(__file__)) + LONG_RUNNING_MIGRATIONS_DIR = os.path.join( + 'kobo', + 'apps', + 'long_running_migrations' + ) - task_name = models.CharField(max_length=255, unique=True) + name = models.CharField(max_length=255, unique=True) status = models.CharField( default=LongRunningMigrationStatus.CREATED, choices=LongRunningMigrationStatus.choices, @@ -26,29 +32,47 @@ class LongRunningMigration(AbstractTimeStampedModel): ) attempts = models.PositiveSmallIntegerField(default=0) + def clean(self): + super().clean() + if '..' in self.name or '/' in self.name or '\\' in self.name: + raise SuspiciousOperation( + f"Invalid migration name '{self.name}'. " + f"Migration names cannot contain directory traversal characters " + f"such as '..', '/', or '\\'." + ) + def execute(self): + # Skip execution if the migration is already completed if self.status == LongRunningMigrationStatus.COMPLETED: return - module = importlib.import_module( - os.path.join(self.APP_DIR, 'tasks', self.task_name) - ) + base_import = self.LONG_RUNNING_MIGRATIONS_DIR.replace('/', '.') + module = importlib.import_module('.'.join([base_import, self.name])) + self.status = LongRunningMigrationStatus.IN_PROGRESS self.attempts += self.attempts self.save(update_fields=['status', 'attempts']) + try: module.task() except Exception as e: + # Log the error and update the status to 'failed' logging.error(f'LongRunningMigration.execute(): {str(e)}') self.status = LongRunningMigrationStatus.FAILED self.save(update_fields=['status']) return + self.status = LongRunningMigrationStatus.COMPLETED self.save(update_fields=['status']) def save(self, **kwargs): + + self.clean() + if self._state.adding: - file_path = os.path.join(self.APP_DIR, 'tasks', f'{self.task_name}.py') + file_path = os.path.join( + settings.BASE_DIR, self.LONG_RUNNING_MIGRATIONS_DIR, f'{self.name}.py' + ) if not os.path.exists(file_path): raise ValueError('Task does not exist in tasks directory') super().save(**kwargs) diff --git a/kobo/apps/long_running_migrations/maintenance_tasks.py b/kobo/apps/long_running_migrations/tasks.py similarity index 94% rename from kobo/apps/long_running_migrations/maintenance_tasks.py rename to kobo/apps/long_running_migrations/tasks.py index e02c039cf0..ee998ba86b 100644 --- a/kobo/apps/long_running_migrations/maintenance_tasks.py +++ b/kobo/apps/long_running_migrations/tasks.py @@ -23,9 +23,9 @@ def execute_long_running_migrations(): # Adding an offset to account for potential delays in task execution and # clock drift between the Celery workers and the database, ensuring tasks # are not prematurely skipped. - offset = 5 * 60 * 1000 + offset = 5 * 60 task_expiry_time = timezone.now() - relativedelta( - minutes=settings.CELERY_LONG_RUNNING_TASK_TIME_LIMIT + offset + seconds=settings.CELERY_LONG_RUNNING_TASK_TIME_LIMIT + offset ) # Run tasks that were just created or are in progress but have exceeded # the maximum runtime allowed for a Celery task, causing Celery to terminate diff --git a/kobo/apps/long_running_migrations/tests/__init__.py b/kobo/apps/long_running_migrations/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kobo/apps/long_running_migrations/tests/fixtures/__init__.py b/kobo/apps/long_running_migrations/tests/fixtures/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kobo/apps/long_running_migrations/jobs/sample_failure.py b/kobo/apps/long_running_migrations/tests/fixtures/sample_failure.py similarity index 100% rename from kobo/apps/long_running_migrations/jobs/sample_failure.py rename to kobo/apps/long_running_migrations/tests/fixtures/sample_failure.py diff --git a/kobo/apps/long_running_migrations/jobs/sample_task.py b/kobo/apps/long_running_migrations/tests/fixtures/sample_task.py similarity index 100% rename from kobo/apps/long_running_migrations/jobs/sample_task.py rename to kobo/apps/long_running_migrations/tests/fixtures/sample_task.py diff --git a/kobo/apps/long_running_migrations/tests.py b/kobo/apps/long_running_migrations/tests/test_run.py similarity index 57% rename from kobo/apps/long_running_migrations/tests.py rename to kobo/apps/long_running_migrations/tests/test_run.py index edcbc5fae0..ea27b0dc56 100644 --- a/kobo/apps/long_running_migrations/tests.py +++ b/kobo/apps/long_running_migrations/tests/test_run.py @@ -1,30 +1,50 @@ -from django.test import TestCase +import os +from unittest.mock import patch + +from django.core.exceptions import SuspiciousOperation +from django.test import TestCase, override_settings from freezegun import freeze_time -from .maintenance_tasks import execute_long_running_migrations -from .models import LongRunningMigration, LongRunningMigrationStatus +from ..models import LongRunningMigration, LongRunningMigrationStatus +from ..tasks import execute_long_running_migrations +@override_settings( + CACHES={ + 'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'} + } +) +@patch.object(LongRunningMigration, 'LONG_RUNNING_MIGRATIONS_DIR', os.path.join( + 'kobo', + 'apps', + 'long_running_migrations', + 'tests', + 'fixtures' +)) class LongRunningMigrationTestCase(TestCase): def test_sample_task(self): - migration = LongRunningMigration.objects.create(task_name='sample_task') + migration = LongRunningMigration.objects.create(name='sample_task') migration.execute() migration.refresh_from_db() self.assertEqual(migration.status, LongRunningMigrationStatus.COMPLETED) def test_invalid_task_name(self): with self.assertRaises(ValueError): - LongRunningMigration.objects.create(task_name='foo') + LongRunningMigration.objects.create(name='foo') + + def test_traversal_characters(self): + with self.assertRaises(SuspiciousOperation): + LongRunningMigration.objects.create(name='../fixtures/sample_task') def test_sample_failure(self): - migration = LongRunningMigration.objects.create(task_name='sample_failure') + migration = LongRunningMigration.objects.create(name='sample_failure') migration.execute() migration.refresh_from_db() self.assertEqual(migration.status, LongRunningMigrationStatus.FAILED) - def test_maintenance(self): + def test_crontab(self): # # New task - migration = LongRunningMigration.objects.create(task_name='sample_task') + migration = LongRunningMigration.objects.create(name='sample_task') execute_long_running_migrations() migration.refresh_from_db() self.assertEqual(migration.status, LongRunningMigrationStatus.COMPLETED) @@ -36,14 +56,14 @@ def test_maintenance(self): migration.refresh_from_db() self.assertEqual(migration.status, LongRunningMigrationStatus.FAILED) - # Ignore recently in progress task + # Ignore recent started tasks migration.status = LongRunningMigrationStatus.IN_PROGRESS migration.save() execute_long_running_migrations() migration.refresh_from_db() self.assertEqual(migration.status, LongRunningMigrationStatus.IN_PROGRESS) - # Run old in progress task + # Run an old in-progress task with freeze_time('2024-12-10'): migration.status = LongRunningMigrationStatus.IN_PROGRESS migration.save() diff --git a/kpi/tasks.py b/kpi/tasks.py index 1821d15d36..4d1ebcdfeb 100644 --- a/kpi/tasks.py +++ b/kpi/tasks.py @@ -1,13 +1,9 @@ import requests from django.conf import settings from django.core.mail import send_mail -from django.core.cache import cache from django.core.management import call_command from kobo.apps.kobo_auth.shortcuts import User -from kobo.apps.long_running_migrations.maintenance_tasks import ( - execute_long_running_migrations, -) from kobo.apps.markdownx_uploader.tasks import remove_unused_markdown_files from kobo.celery import celery_app from kpi.constants import LIMIT_HOURS_23 @@ -102,13 +98,7 @@ def perform_maintenance(): """ Run daily maintenance tasks. Ensure it cannot run multiple times. """ - # Simplistic redis lock with timeout and self removal to prevent duplicate runs - lock_key = 'perform_maintenance_lock' - if cache.add(lock_key, 'true', timeout=LIMIT_HOURS_23): - try: - remove_unused_markdown_files() - remove_old_import_tasks() - remove_old_asset_snapshots() - execute_long_running_migrations() - finally: - cache.delete(lock_key) + + remove_unused_markdown_files() + remove_old_import_tasks() + remove_old_asset_snapshots() From 7ae205dff0288e618fe335e38920502ec9f4a1d0 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Wed, 18 Dec 2024 12:59:49 -0500 Subject: [PATCH 04/12] Refactoring & rewording --- kobo/apps/long_running_migrations/README.md | 35 +++++++--- .../__init__.py | 0 kobo/apps/long_running_migrations/models.py | 11 +++- kobo/apps/long_running_migrations/tasks.py | 1 + .../tests/fixtures/sample_failure.py | 2 +- .../tests/fixtures/sample_task.py | 2 +- .../tests/{test_run.py => test_case.py} | 64 ++++++++++++++----- 7 files changed, 84 insertions(+), 31 deletions(-) rename kobo/apps/long_running_migrations/{long_running_migrations => jobs}/__init__.py (100%) rename kobo/apps/long_running_migrations/tests/{test_run.py => test_case.py} (50%) diff --git a/kobo/apps/long_running_migrations/README.md b/kobo/apps/long_running_migrations/README.md index ee1e1535b7..bbbda54a51 100644 --- a/kobo/apps/long_running_migrations/README.md +++ b/kobo/apps/long_running_migrations/README.md @@ -1,14 +1,29 @@ # Long Running Migrations -This will execute a task in celery. The task will try for 23 hours and then give up. It will retry until it yields an exception or completes. +This feature allows you to execute long-running tasks using Celery. Each task will attempt to complete within a 23-hour window, after which it will give up and retry until it either raises an exception or successfully completes. -1. Create your tasks in tasks and give it a unique name. Django migration style 0001_description is a good idea. It must contain a function called "task". -2. Run `LongRunningMigration.objects.create(task_name="0001_description")`. Consider running this from a real Django migration. -3. Wait for daily maintenance task or manually dispatch the celery "perform_maintenance". +## How to Use -## Writing a good task. +1. **Create your migration** + Define your migrations in the `jobs` folder. Each migration should have a unique name, following Django's migration naming convention (e.g., `0001_description`). The migration file must contain a function called `run()`. -Very slow tasks should be written atomically and tolerant to disruption at any time. +2. **Register the migration** + Create a `LongRunningMigration` entry by running: + + ```python + LongRunningMigration.objects.create(task_name='0001_description') + ``` + + You can automate this step by adding it to a Django migration with `RunPython` + + +3. **Execute the migration** + Wait for the periodic task `execute_long_running_migrations` to run automatically or trigger it manually (beware of the lock, it can only run one at a time). + + +## Writing a good task + +When writing slow tasks, ensure they are both **atomic** and **tolerant** to interruptions at any point in their execution. ```python # 2024-10-13 @@ -17,11 +32,11 @@ from django.db import transaction def task(): for foo in Foo.objects.filter(is_barred=False): # Checks actually needs to run still with transaction.atomic(): # Atomic! - foo.make_it_bar() # Perhap this does multiple things that could succeed or fail + foo.make_it_bar() # Perhaps this does multiple things that could succeed or fail ``` -Notice that if the task is interrupted, it will simply continue in the next run. +* Notice that if the task is interrupted, it will simply continue in the next run. -Because tasks are slow, your code should run regardless of when the data migration takes place. +* Because tasks are slow, your code should run regardless of when the data migration takes place. -Timestamp the task to help a future developer know when it can be safely deleted. +* Add a timestamp to your migration definition to help future developers identify when it can be safely removed (if needed). diff --git a/kobo/apps/long_running_migrations/long_running_migrations/__init__.py b/kobo/apps/long_running_migrations/jobs/__init__.py similarity index 100% rename from kobo/apps/long_running_migrations/long_running_migrations/__init__.py rename to kobo/apps/long_running_migrations/jobs/__init__.py diff --git a/kobo/apps/long_running_migrations/models.py b/kobo/apps/long_running_migrations/models.py index 487b387e90..2b4e3f29d5 100644 --- a/kobo/apps/long_running_migrations/models.py +++ b/kobo/apps/long_running_migrations/models.py @@ -47,14 +47,21 @@ def execute(self): return base_import = self.LONG_RUNNING_MIGRATIONS_DIR.replace('/', '.') - module = importlib.import_module('.'.join([base_import, self.name])) + try: + module = importlib.import_module('.'.join([base_import, self.name])) + except ModuleNotFoundError as e: + logging.error( + f'LongRunningMigration.execute(), ' + f'failed to import task module: {str(e)}' + ) + return self.status = LongRunningMigrationStatus.IN_PROGRESS self.attempts += self.attempts self.save(update_fields=['status', 'attempts']) try: - module.task() + module.run() except Exception as e: # Log the error and update the status to 'failed' logging.error(f'LongRunningMigration.execute(): {str(e)}') diff --git a/kobo/apps/long_running_migrations/tasks.py b/kobo/apps/long_running_migrations/tasks.py index ee998ba86b..376603cde4 100644 --- a/kobo/apps/long_running_migrations/tasks.py +++ b/kobo/apps/long_running_migrations/tasks.py @@ -16,6 +16,7 @@ ) def execute_long_running_migrations(): lock_key = 'execute_long_running_migrations' + if cache.add( lock_key, 'true', timeout=settings.CELERY_LONG_RUNNING_TASK_TIME_LIMIT ): diff --git a/kobo/apps/long_running_migrations/tests/fixtures/sample_failure.py b/kobo/apps/long_running_migrations/tests/fixtures/sample_failure.py index b2a1b14fcb..033ba598c9 100644 --- a/kobo/apps/long_running_migrations/tests/fixtures/sample_failure.py +++ b/kobo/apps/long_running_migrations/tests/fixtures/sample_failure.py @@ -1,2 +1,2 @@ -def task(): +def run(): raise Exception diff --git a/kobo/apps/long_running_migrations/tests/fixtures/sample_task.py b/kobo/apps/long_running_migrations/tests/fixtures/sample_task.py index a1b0d73dd9..ff4e27a2cf 100644 --- a/kobo/apps/long_running_migrations/tests/fixtures/sample_task.py +++ b/kobo/apps/long_running_migrations/tests/fixtures/sample_task.py @@ -1,2 +1,2 @@ -def task(): +def run(): print('hello from long running migration') diff --git a/kobo/apps/long_running_migrations/tests/test_run.py b/kobo/apps/long_running_migrations/tests/test_case.py similarity index 50% rename from kobo/apps/long_running_migrations/tests/test_run.py rename to kobo/apps/long_running_migrations/tests/test_case.py index ea27b0dc56..e409412b44 100644 --- a/kobo/apps/long_running_migrations/tests/test_run.py +++ b/kobo/apps/long_running_migrations/tests/test_case.py @@ -42,31 +42,61 @@ def test_sample_failure(self): migration.refresh_from_db() self.assertEqual(migration.status, LongRunningMigrationStatus.FAILED) - def test_crontab(self): - # # New task - migration = LongRunningMigration.objects.create(name='sample_task') - execute_long_running_migrations() + def test_not_updated_worker(self): + migrations = LongRunningMigration.objects.bulk_create( + [LongRunningMigration(name='foo')] + ) + migration = migrations[0] + migration.execute() migration.refresh_from_db() - self.assertEqual(migration.status, LongRunningMigrationStatus.COMPLETED) + self.assertEqual(migration.status, LongRunningMigrationStatus.CREATED) + + +@override_settings( + CACHES={ + 'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'} + } +) +class LongRunningMigrationPeriodicTaskTestCase(TestCase): + + def setUp(self): + self.patcher = patch.object( + LongRunningMigration, + 'LONG_RUNNING_MIGRATIONS_DIR', + os.path.join('kobo', 'apps', 'long_running_migrations', 'tests', 'fixtures') + ) + self.patcher.start() + self.migration = LongRunningMigration.objects.create(name='sample_task') + + def tearDown(self): + self.patcher.stop() + + def test_migration_as_completed(self): + execute_long_running_migrations() + self.migration.refresh_from_db() + self.assertEqual(self.migration.status, LongRunningMigrationStatus.COMPLETED) + def test_failed_migration_is_ignored(self): # Ignore failed task - migration.status = LongRunningMigrationStatus.FAILED - migration.save() + self.migration.status = LongRunningMigrationStatus.FAILED + self.migration.save(update_fields=['status']) execute_long_running_migrations() - migration.refresh_from_db() - self.assertEqual(migration.status, LongRunningMigrationStatus.FAILED) + self.migration.refresh_from_db() + self.assertEqual(self.migration.status, LongRunningMigrationStatus.FAILED) + def test_ignore_recent_started_migration(self): # Ignore recent started tasks - migration.status = LongRunningMigrationStatus.IN_PROGRESS - migration.save() + self.migration.status = LongRunningMigrationStatus.IN_PROGRESS + self.migration.save() execute_long_running_migrations() - migration.refresh_from_db() - self.assertEqual(migration.status, LongRunningMigrationStatus.IN_PROGRESS) + self.migration.refresh_from_db() + self.assertEqual(self.migration.status, LongRunningMigrationStatus.IN_PROGRESS) + def test_resume_stuck_migration(self): # Run an old in-progress task with freeze_time('2024-12-10'): - migration.status = LongRunningMigrationStatus.IN_PROGRESS - migration.save() + self.migration.status = LongRunningMigrationStatus.IN_PROGRESS + self.migration.save() execute_long_running_migrations() - migration.refresh_from_db() - self.assertEqual(migration.status, LongRunningMigrationStatus.COMPLETED) + self.migration.refresh_from_db() + self.assertEqual(self.migration.status, LongRunningMigrationStatus.COMPLETED) From a0d31b69d3e8552cc41a8c5098e907d294ba61a2 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Wed, 18 Dec 2024 13:44:51 -0500 Subject: [PATCH 05/12] Update README --- kobo/apps/long_running_migrations/README.md | 28 ++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/kobo/apps/long_running_migrations/README.md b/kobo/apps/long_running_migrations/README.md index bbbda54a51..bd81d8bfa4 100644 --- a/kobo/apps/long_running_migrations/README.md +++ b/kobo/apps/long_running_migrations/README.md @@ -1,35 +1,35 @@ # Long Running Migrations -This feature allows you to execute long-running tasks using Celery. Each task will attempt to complete within a 23-hour window, after which it will give up and retry until it either raises an exception or successfully completes. +This feature allows you to execute long-running migrations using Celery. Each migration will attempt to complete within the maximum time allowed by Celery (see settings.CELERY_LONG_RUNNING_TASK_TIME_LIMIT`). If it does not complete within this time, the periodic task will retry and resume the migration from where it left off, continuing until the long-running migration either successfully completes or raises an exception. ## How to Use -1. **Create your migration** +1. **Create your migration** Define your migrations in the `jobs` folder. Each migration should have a unique name, following Django's migration naming convention (e.g., `0001_description`). The migration file must contain a function called `run()`. -2. **Register the migration** - Create a `LongRunningMigration` entry by running: - +2. **Register the migration** + Create a `LongRunningMigration` entry by running: + ```python - LongRunningMigration.objects.create(task_name='0001_description') + LongRunningMigration.objects.create(name='0001_fix_transfer') ``` - - You can automate this step by adding it to a Django migration with `RunPython` - - + + You can automate this step by adding it to a Django migration with `RunPython` + + 3. **Execute the migration** Wait for the periodic task `execute_long_running_migrations` to run automatically or trigger it manually (beware of the lock, it can only run one at a time). + +## Writing a good long-running migration -## Writing a good task - -When writing slow tasks, ensure they are both **atomic** and **tolerant** to interruptions at any point in their execution. +When writing long-running migrations, ensure they are both **atomic** and **tolerant** to interruptions at any point in their execution. ```python # 2024-10-13 from django.db import transaction -def task(): +def run(): for foo in Foo.objects.filter(is_barred=False): # Checks actually needs to run still with transaction.atomic(): # Atomic! foo.make_it_bar() # Perhaps this does multiple things that could succeed or fail From 8f4194b43ded7d37b034076b75e89747c662d399 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Wed, 18 Dec 2024 14:08:23 -0500 Subject: [PATCH 06/12] Add task to celery beat scheduler --- kobo/settings/base.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kobo/settings/base.py b/kobo/settings/base.py index 54ed482a28..736e40959f 100644 --- a/kobo/settings/base.py +++ b/kobo/settings/base.py @@ -1202,6 +1202,12 @@ def dj_stripe_request_callback_method(): 'schedule': crontab(minute=0, hour=0), 'options': {'queue': 'kpi_low_priority_queue'} }, + # Schedule every hour, every day + 'long-running-migrations': { + 'task': 'kobo.apps.long_running_migrations.tasks.execute_long_running_migrations', # noqa + 'schedule': crontab(minute=0), + 'options': {'queue': 'kpi_low_priority_queue'} + }, } From 0c1c221465c9b7cee793cfc0746fcd4f6281ec39 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Wed, 18 Dec 2024 14:58:20 -0500 Subject: [PATCH 07/12] Add missing dependency --- dependencies/pip/dev_requirements.in | 1 + dependencies/pip/dev_requirements.txt | 3 +++ 2 files changed, 4 insertions(+) diff --git a/dependencies/pip/dev_requirements.in b/dependencies/pip/dev_requirements.in index 0e2c9ab027..93a205ef4b 100644 --- a/dependencies/pip/dev_requirements.in +++ b/dependencies/pip/dev_requirements.in @@ -12,6 +12,7 @@ pytest pytest-cov pytest-django pytest-env +freezegun # Kobocat diff --git a/dependencies/pip/dev_requirements.txt b/dependencies/pip/dev_requirements.txt index 2db23fbc3a..5755401f1e 100644 --- a/dependencies/pip/dev_requirements.txt +++ b/dependencies/pip/dev_requirements.txt @@ -254,6 +254,8 @@ fabric==3.2.2 # via -r dependencies/pip/dev_requirements.in flower==2.0.1 # via -r dependencies/pip/requirements.in +freezegun==1.5.1 + # via -r dependencies/pip/dev_requirements.in frozenlist==1.4.1 # via # aiohttp @@ -488,6 +490,7 @@ python-dateutil==2.9.0.post0 # -r dependencies/pip/requirements.in # botocore # celery + # freezegun # pandas # python-crontab python3-openid==3.2.0 From 658d9533214276bfd274fab795dcabfe96ac98d6 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Wed, 18 Dec 2024 16:21:51 -0500 Subject: [PATCH 08/12] Apply requested changes --- kobo/apps/long_running_migrations/models.py | 3 ++- kobo/apps/long_running_migrations/tasks.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/kobo/apps/long_running_migrations/models.py b/kobo/apps/long_running_migrations/models.py index 2b4e3f29d5..346ee29bde 100644 --- a/kobo/apps/long_running_migrations/models.py +++ b/kobo/apps/long_running_migrations/models.py @@ -21,7 +21,8 @@ class LongRunningMigration(AbstractTimeStampedModel): LONG_RUNNING_MIGRATIONS_DIR = os.path.join( 'kobo', 'apps', - 'long_running_migrations' + 'long_running_migrations', + 'jobs' ) name = models.CharField(max_length=255, unique=True) diff --git a/kobo/apps/long_running_migrations/tasks.py b/kobo/apps/long_running_migrations/tasks.py index 376603cde4..0b2510c80c 100644 --- a/kobo/apps/long_running_migrations/tasks.py +++ b/kobo/apps/long_running_migrations/tasks.py @@ -35,7 +35,7 @@ def execute_long_running_migrations(): Q(status=LongRunningMigrationStatus.CREATED) | Q(status=LongRunningMigrationStatus.IN_PROGRESS) & Q(date_modified__lte=task_expiry_time) - ): + ).order_by('date_created'): migration.execute() finally: cache.delete(lock_key) From 7281f272e99d755898f1c85279d6dd6d4be1d273 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Wed, 18 Dec 2024 16:57:13 -0500 Subject: [PATCH 09/12] Update README and add an example --- kobo/apps/long_running_migrations/README.md | 47 +++++++++++++++---- .../jobs/0001_sample.py | 8 ++++ 2 files changed, 46 insertions(+), 9 deletions(-) create mode 100644 kobo/apps/long_running_migrations/jobs/0001_sample.py diff --git a/kobo/apps/long_running_migrations/README.md b/kobo/apps/long_running_migrations/README.md index bd81d8bfa4..b3da45f254 100644 --- a/kobo/apps/long_running_migrations/README.md +++ b/kobo/apps/long_running_migrations/README.md @@ -4,22 +4,51 @@ This feature allows you to execute long-running migrations using Celery. Each mi ## How to Use -1. **Create your migration** +1. **Create your migration** Define your migrations in the `jobs` folder. Each migration should have a unique name, following Django's migration naming convention (e.g., `0001_description`). The migration file must contain a function called `run()`. -2. **Register the migration** - Create a `LongRunningMigration` entry by running: - +2. **Register the migration** + Create a `LongRunningMigration` entry by running: + ```python - LongRunningMigration.objects.create(name='0001_fix_transfer') + LongRunningMigration.objects.create(name='0001_sample') ``` + + You can automate this step by adding it to a Django migration with `RunPython`. - You can automate this step by adding it to a Django migration with `RunPython` - - + ```python + from django.db import migrations + + + def add_long_running_migration(apps, schema_editor): + LongRunningMigration = apps.get_model('long_running_migrations', 'LongRunningMigration') # noqa + LongRunningMigration.objects.create( + name='0001_sample' + ) + + + def noop(*args, **kwargs): + pass + + + class Migration(migrations.Migration): + + dependencies = [ + ('long_running_migrations', '0001_initial'), + ] + + operations = [ + migrations.RunPython(add_long_running_migration, noop), + ] + + + ``` + + + 3. **Execute the migration** Wait for the periodic task `execute_long_running_migrations` to run automatically or trigger it manually (beware of the lock, it can only run one at a time). - + ## Writing a good long-running migration diff --git a/kobo/apps/long_running_migrations/jobs/0001_sample.py b/kobo/apps/long_running_migrations/jobs/0001_sample.py new file mode 100644 index 0000000000..085cbb8389 --- /dev/null +++ b/kobo/apps/long_running_migrations/jobs/0001_sample.py @@ -0,0 +1,8 @@ +# Generated on 2024-12-18 + +def run(): + """ + Describe your long-running migration + """ + + pass From 815cece961ae1662d0d1740d188bdc66af413831 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 18 Dec 2024 17:14:50 -0500 Subject: [PATCH 10/12] fix(projectOwnershipTransfer): ensure the path of OpenRosa media files is correct for transferred projects TASK-1352 (#5385) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### 📣 Summary Extended the bug fix from #5365 to address issues in existing projects. ### 📖 Description This update addresses the issue where OpenRosa media file paths were not updated correctly during project ownership transfers for existing projects. This misalignment led to 404 errors when users attempted to access transferred projects in Collect due to missing media files. ### Notes The fix leverages the new long-running migration flow to address the issue in the background. --- ...ect_ownership_transfer_with_media_files.py | 56 +++++++++++++++++++ ...ect_ownership_transfer_with_media_files.py | 25 +++++++++ 2 files changed, 81 insertions(+) create mode 100644 kobo/apps/long_running_migrations/jobs/0002_fix_project_ownership_transfer_with_media_files.py create mode 100644 kobo/apps/long_running_migrations/migrations/0002_fix_failed_project_ownership_transfer_with_media_files.py diff --git a/kobo/apps/long_running_migrations/jobs/0002_fix_project_ownership_transfer_with_media_files.py b/kobo/apps/long_running_migrations/jobs/0002_fix_project_ownership_transfer_with_media_files.py new file mode 100644 index 0000000000..fd627e7b2e --- /dev/null +++ b/kobo/apps/long_running_migrations/jobs/0002_fix_project_ownership_transfer_with_media_files.py @@ -0,0 +1,56 @@ +# Generated on 2024-12-18 +from django.db.models import Q, OuterRef, Subquery + +from kobo.apps.openrosa.apps.logger.models import XForm +from kobo.apps.openrosa.apps.main.models import MetaData +from kpi.models.asset import Asset +from kpi.models.asset_file import AssetFile +from kobo.apps.project_ownership.models import Transfer + + +def run(): + """ + Update OpenRosa MetaData objects that were not updated when project + ownership was transferred to someone else. This fixes a bug introduced + and later addressed in KPI (issue #5365). + """ + + # Step 1: Retrieve all assets that were transferred since the bug was present and + # use media files + asset_uids = Asset.objects.filter( + Q( + pk__in=AssetFile.objects.values_list('asset_id', flat=True).exclude( + file_type=AssetFile.PAIRED_DATA + ) + ) + & Q( + pk__in=Transfer.objects.values_list('asset_id', flat=True).filter( + invite__date_created__date__gte='2024-09-15' + ) + ) + ).values_list('uid', flat=True) + + username_subquery = XForm.objects.filter(pk=OuterRef('xform_id')).values( + 'user__username' + )[:1] + + # Step 2: Iterate through relevant MetaData objects and fix their data_file fields + for metadata in ( + MetaData.objects.filter( + xform_id__in=XForm.objects.filter( + kpi_asset_uid__in=list(asset_uids) + ), + ) + .exclude( + Q(data_file__startswith=Subquery(username_subquery)) + | Q(data_file__isnull=True) + | Q(data_file='') + ) + .select_related('xform', 'xform__user') + .iterator() + ): + data_file = str(metadata.data_file) + old_username, *other_parts = data_file.split('/') + other_parts.insert(0, metadata.xform.user.username) + metadata.data_file = '/'.join(other_parts) + metadata.save(update_fields=['data_file']) diff --git a/kobo/apps/long_running_migrations/migrations/0002_fix_failed_project_ownership_transfer_with_media_files.py b/kobo/apps/long_running_migrations/migrations/0002_fix_failed_project_ownership_transfer_with_media_files.py new file mode 100644 index 0000000000..fa1f74df36 --- /dev/null +++ b/kobo/apps/long_running_migrations/migrations/0002_fix_failed_project_ownership_transfer_with_media_files.py @@ -0,0 +1,25 @@ +# Generated by Django 4.2.15 on 2024-12-18 20:00 + +from django.db import migrations + + +def add_long_running_migration(apps, schema_editor): + LongRunningMigration = apps.get_model('long_running_migrations', 'LongRunningMigration') # noqa + LongRunningMigration.objects.create( + name='0002_fix_project_ownership_transfer_with_media_files' + ) + + +def noop(*args, **kwargs): + pass + + +class Migration(migrations.Migration): + + dependencies = [ + ('long_running_migrations', '0001_initial'), + ] + + operations = [ + migrations.RunPython(add_long_running_migration, noop), + ] From 9a12c0c66977855dc61007c7710cfaad08fb5d07 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Wed, 18 Dec 2024 18:02:26 -0500 Subject: [PATCH 11/12] Support long-running migrations starting with numbers --- kobo/apps/long_running_migrations/models.py | 40 ++++++++++++++----- .../tests/test_case.py | 5 +++ 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/kobo/apps/long_running_migrations/models.py b/kobo/apps/long_running_migrations/models.py index 346ee29bde..9b5d119b6d 100644 --- a/kobo/apps/long_running_migrations/models.py +++ b/kobo/apps/long_running_migrations/models.py @@ -1,5 +1,5 @@ -import importlib import os +from importlib.util import module_from_spec, spec_from_file_location from django.conf import settings from django.core.exceptions import SuspiciousOperation @@ -47,14 +47,7 @@ def execute(self): if self.status == LongRunningMigrationStatus.COMPLETED: return - base_import = self.LONG_RUNNING_MIGRATIONS_DIR.replace('/', '.') - try: - module = importlib.import_module('.'.join([base_import, self.name])) - except ModuleNotFoundError as e: - logging.error( - f'LongRunningMigration.execute(), ' - f'failed to import task module: {str(e)}' - ) + if not (module := self._load_module()): return self.status = LongRunningMigrationStatus.IN_PROGRESS @@ -84,3 +77,32 @@ def save(self, **kwargs): if not os.path.exists(file_path): raise ValueError('Task does not exist in tasks directory') super().save(**kwargs) + + def _load_module(self): + """ + This function allows you to load a Python module from a file path even if + the module's name does not follow Python's standard naming conventions + (e.g., starting with numbers or containing special characters). Normally, + Python identifiers must adhere to specific rules, but this method bypasses + those restrictions by dynamically creating a module from its file. + """ + module_path = f'{self.LONG_RUNNING_MIGRATIONS_DIR}/{self.name}.py' + if not os.path.exists(f'{settings.BASE_DIR}/{module_path}'): + logging.error( + f'LongRunningMigration._load_module():' + f'File not found `{module_path}`' + ) + return + + spec = spec_from_file_location(self.name, module_path) + try: + module = module_from_spec(spec) + except (ModuleNotFoundError, AttributeError): + logging.error( + f'LongRunningMigration._load_module():' + f'Failed to import migration module `{self.name}`' + ) + return + + spec.loader.exec_module(module) + return module diff --git a/kobo/apps/long_running_migrations/tests/test_case.py b/kobo/apps/long_running_migrations/tests/test_case.py index e409412b44..7549b15cf2 100644 --- a/kobo/apps/long_running_migrations/tests/test_case.py +++ b/kobo/apps/long_running_migrations/tests/test_case.py @@ -43,6 +43,7 @@ def test_sample_failure(self): self.assertEqual(migration.status, LongRunningMigrationStatus.FAILED) def test_not_updated_worker(self): + # simulate not updated worker with a wrong name migrations = LongRunningMigration.objects.bulk_create( [LongRunningMigration(name='foo')] ) @@ -68,6 +69,10 @@ def setUp(self): self.patcher.start() self.migration = LongRunningMigration.objects.create(name='sample_task') + # Remove real existing long-running migrations + LongRunningMigration.objects.exclude(pk=self.migration.pk).delete() + assert LongRunningMigration.objects.count() == 1 + def tearDown(self): self.patcher.stop() From 2a5008841d92ba6783a48107289bb33816d64429 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Wed, 18 Dec 2024 18:26:31 -0500 Subject: [PATCH 12/12] fix wrong calculation for attempts, update date_modified in AbstractTimeStampedModel --- kobo/apps/long_running_migrations/models.py | 2 +- kpi/models/abstract_models.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/kobo/apps/long_running_migrations/models.py b/kobo/apps/long_running_migrations/models.py index 9b5d119b6d..e18df934e6 100644 --- a/kobo/apps/long_running_migrations/models.py +++ b/kobo/apps/long_running_migrations/models.py @@ -51,7 +51,7 @@ def execute(self): return self.status = LongRunningMigrationStatus.IN_PROGRESS - self.attempts += self.attempts + self.attempts += 1 self.save(update_fields=['status', 'attempts']) try: diff --git a/kpi/models/abstract_models.py b/kpi/models/abstract_models.py index 9f6e29702d..9d42dc6565 100644 --- a/kpi/models/abstract_models.py +++ b/kpi/models/abstract_models.py @@ -25,4 +25,6 @@ def save(self, *args, **kwargs): update_fields = kwargs.get('update_fields', None) if update_fields is None or 'date_modified' not in update_fields: self.date_modified = timezone.now() + if update_fields: + update_fields.append('date_modified') super().save(*args, **kwargs)