From d829f157eace43ca363044349aa08c7baadaa1bc Mon Sep 17 00:00:00 2001 From: Rebecca Graber Date: Tue, 17 Dec 2024 12:14:31 -0500 Subject: [PATCH 1/5] fix(sso): 500 on admin login (#5334) (#5363) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes a 500 being thrown when a non-superuser goes to /admin and then tries to log in as a superuser. Upgrade django-allauth to 65.1.0. The simplest solution seemed to be just to upgrade django-allauth. It's unclear which exact commit fixes the bug and we are very behind in versions so upgraded to the latest. Nothing obvious broke except one unit test. Bug template: 1. ℹī¸ Have 2 accounts, one super and one not 2. Log in as the non-superuser 3. Go to /admin 4. You should be redirected to the login page. 5. Log in as the super user 6. 🔴 [on main] 500 error 7. Checkout the PR branch. Make sure to reinstall requirements. 8. Do 1-5 again. 9. đŸŸĸ [on PR] You should be correctly logged in and sent to the admin page --- dependencies/pip/dev_requirements.txt | 17 ++++------------- dependencies/pip/requirements.txt | 17 ++++------------- kobo/apps/accounts/tests/test_backend.py | 8 ++++---- 3 files changed, 12 insertions(+), 30 deletions(-) diff --git a/dependencies/pip/dev_requirements.txt b/dependencies/pip/dev_requirements.txt index 13c4b905f7..3a2a9587d1 100644 --- a/dependencies/pip/dev_requirements.txt +++ b/dependencies/pip/dev_requirements.txt @@ -29,6 +29,7 @@ amqp==5.2.0 asgiref==3.8.1 # via # django + # django-allauth # django-cors-headers asttokens==2.4.1 # via stack-data @@ -109,7 +110,6 @@ cryptography==42.0.5 # azure-storage-blob # jwcrypto # paramiko - # pyjwt # pyopenssl cssselect==1.2.0 # via pyquery @@ -131,7 +131,6 @@ defusedxml==0.7.1 # via # -r dependencies/pip/requirements.in # djangorestframework-xml - # python3-openid # pyxform deprecated==1.2.14 # via fabric @@ -171,7 +170,7 @@ django==4.2.15 # djangorestframework # jsonfield # model-bakery -django-allauth==0.61.1 +django-allauth==65.1.0 # via -r dependencies/pip/requirements.in django-amazon-ses==4.0.1 # via -r dependencies/pip/requirements.in @@ -488,10 +487,8 @@ pygments==2.17.2 # via # -r dependencies/pip/requirements.in # ipython -pyjwt[crypto]==2.8.0 - # via - # django-allauth - # twilio +pyjwt==2.8.0 + # via twilio pymongo==4.6.3 # via -r dependencies/pip/requirements.in pynacl==1.5.0 @@ -531,8 +528,6 @@ python-dateutil==2.9.0.post0 # freezegun # pandas # python-crontab -python3-openid==3.2.0 - # via django-allauth pytz==2024.1 # via # flower @@ -559,19 +554,15 @@ requests==2.31.0 # -r dependencies/pip/requirements.in # azure-core # coveralls - # django-allauth # django-oauth-toolkit # google-api-core # google-cloud-storage # httmock - # requests-oauthlib # responses # smsapi-client # stripe # twilio # yubico-client -requests-oauthlib==2.0.0 - # via django-allauth responses==0.25.0 # via -r dependencies/pip/requirements.in rpds-py==0.18.0 diff --git a/dependencies/pip/requirements.txt b/dependencies/pip/requirements.txt index 41e5731fa0..8cb6eefcf6 100644 --- a/dependencies/pip/requirements.txt +++ b/dependencies/pip/requirements.txt @@ -29,6 +29,7 @@ amqp==5.2.0 asgiref==3.8.1 # via # django + # django-allauth # django-cors-headers async-timeout==4.0.3 # via @@ -92,7 +93,6 @@ cryptography==42.0.5 # via # azure-storage-blob # jwcrypto - # pyjwt # pyopenssl cssselect==1.2.0 # via pyquery @@ -102,7 +102,6 @@ defusedxml==0.7.1 # via # -r dependencies/pip/requirements.in # djangorestframework-xml - # python3-openid # pyxform dict2xml==1.7.5 # via -r dependencies/pip/requirements.in @@ -139,7 +138,7 @@ django==4.2.15 # django-timezone-field # djangorestframework # jsonfield -django-allauth==0.61.1 +django-allauth==65.1.0 # via -r dependencies/pip/requirements.in django-amazon-ses==4.0.1 # via -r dependencies/pip/requirements.in @@ -381,10 +380,8 @@ pycparser==2.22 # via cffi pygments==2.17.2 # via -r dependencies/pip/requirements.in -pyjwt[crypto]==2.8.0 - # via - # django-allauth - # twilio +pyjwt==2.8.0 + # via twilio pymongo==4.6.3 # via -r dependencies/pip/requirements.in pyopenssl==24.1.0 @@ -406,8 +403,6 @@ python-dateutil==2.9.0.post0 # celery # pandas # python-crontab -python3-openid==3.2.0 - # via django-allauth pytz==2024.1 # via # flower @@ -433,18 +428,14 @@ requests==2.31.0 # via # -r dependencies/pip/requirements.in # azure-core - # django-allauth # django-oauth-toolkit # google-api-core # google-cloud-storage - # requests-oauthlib # responses # smsapi-client # stripe # twilio # yubico-client -requests-oauthlib==2.0.0 - # via django-allauth responses==0.25.0 # via -r dependencies/pip/requirements.in rpds-py==0.18.0 diff --git a/kobo/apps/accounts/tests/test_backend.py b/kobo/apps/accounts/tests/test_backend.py index c7ef6dc0eb..5b7fcefc5c 100644 --- a/kobo/apps/accounts/tests/test_backend.py +++ b/kobo/apps/accounts/tests/test_backend.py @@ -49,9 +49,9 @@ def setUp(self): @override_settings(SOCIALACCOUNT_PROVIDERS=SOCIALACCOUNT_PROVIDERS) @responses.activate - @patch('allauth.socialaccount.models.SocialLogin.verify_and_unstash_state') - def test_keep_django_auth_backend_with_sso(self, mock_verify_and_unstash_state): - mock_verify_and_unstash_state.return_value = {'process': 'login'} + @patch('allauth.socialaccount.providers.oauth2.views.statekit.unstash_state') + def test_keep_django_auth_backend_with_sso(self, mock_unstash_state): + mock_unstash_state.return_value = {'process': 'login'} # Mock `requests` responses to fool django-allauth responses.add( @@ -91,7 +91,7 @@ def test_keep_django_auth_backend_with_sso(self, mock_verify_and_unstash_state): ) # Simulate GET request to SSO provider - mock_sso_response = {'code': 'foobar'} + mock_sso_response = {'code': 'foobar', 'state': '12345'} response = self.client.get(sso_login_url, data=mock_sso_response) # Ensure user is logged in From edd63520504c1794b1f1e08c98706e3b8f5b4ec9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Tue, 17 Dec 2024 17:53:18 -0500 Subject: [PATCH 2/5] fix(projectOwnershipTransfer): ensure OpenRosa media files are synchronized when transferring project ownership TASK-1352 (#5365) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### đŸ“Ŗ Summary Fixed an issue where OpenRosa media files were not synced during project ownership transfers causing a 404 error in Collect when opening a project due to missing media files. ### 📖 Description An issue was resolved where OpenRosa media files were not properly synced when transferring project ownership. This fix ensures that all media files are transferred correctly along with the project, preventing errors and maintaining data consistency. Previously, the absence of these media files resulted in a 404 error when users attempted to open the project using Collect. This update addresses the root cause, ensuring that all required files are available post-transfer. --- kobo/apps/project_ownership/constants.py | 1 - kobo/apps/project_ownership/utils.py | 128 +++++++++-------------- 2 files changed, 49 insertions(+), 80 deletions(-) diff --git a/kobo/apps/project_ownership/constants.py b/kobo/apps/project_ownership/constants.py index c7ebca32be..a269f067e3 100644 --- a/kobo/apps/project_ownership/constants.py +++ b/kobo/apps/project_ownership/constants.py @@ -1,2 +1 @@ ASYNC_TASK_HEARTBEAT = 60 * 5 # every 5 minutes -FILE_MOVE_CHUNK_SIZE = 1000 diff --git a/kobo/apps/project_ownership/utils.py b/kobo/apps/project_ownership/utils.py index 93fff540e2..0bc4e2a02f 100644 --- a/kobo/apps/project_ownership/utils.py +++ b/kobo/apps/project_ownership/utils.py @@ -13,7 +13,7 @@ from kpi.models.asset import AssetFile from .models.choices import TransferStatusChoices, TransferStatusTypeChoices from .exceptions import AsyncTaskException -from .constants import ASYNC_TASK_HEARTBEAT, FILE_MOVE_CHUNK_SIZE +from .constants import ASYNC_TASK_HEARTBEAT def get_target_folder( @@ -62,40 +62,25 @@ def move_attachments(transfer: 'project_ownership.Transfer'): instance_id__in=submission_ids ).exclude(media_file__startswith=f'{transfer.asset.owner.username}/') - attachments_to_update = [] - try: - heartbeat = int(time.time()) - # Moving files is pretty slow, thus it should run in a celery task. - for attachment in attachments.iterator(): - if not ( - target_folder := get_target_folder( - transfer.invite.sender.username, - transfer.invite.recipient.username, - attachment.media_file.name, - ) - ): - continue - else: - # We want to be sure the path of the file is saved no matter what. - # Thanks to try/finally block, if updates are still pending, they - # should be saved in case of errors. - # It lets us resume when it stopped in case of failure. - attachment.media_file.move(target_folder) - attachments_to_update.append(attachment) - - if len(attachments_to_update) > FILE_MOVE_CHUNK_SIZE: - Attachment.objects.bulk_update( - attachments_to_update, fields=['media_file'] - ) - attachments_to_update = [] - - heartbeat = _update_heartbeat(heartbeat, transfer, async_task_type) - - finally: - if attachments_to_update: - Attachment.objects.bulk_update( - attachments_to_update, fields=['media_file'] + heartbeat = int(time.time()) + # Moving files is pretty slow, thus it should run in a celery task. + for attachment in attachments.iterator(): + if not ( + target_folder := get_target_folder( + transfer.invite.sender.username, + transfer.invite.recipient.username, + attachment.media_file.name, ) + ): + continue + else: + # There is no way to ensure atomicity when moving the file and saving the + # object to the database. Fingers crossed that the process doesn't get + # interrupted between these two operations. + attachment.media_file.move(target_folder) + attachment.save(updated_fields=['media_file']) + + heartbeat = _update_heartbeat(heartbeat, transfer, async_task_type) _mark_task_as_successful(transfer, async_task_type) @@ -117,53 +102,38 @@ def move_media_files(transfer: 'project_ownership.Transfer'): ) } - media_files_to_update = [] - metadata_to_update = [] - try: - heartbeat = int(time.time()) - # Moving files is pretty slow, thus it should run in a celery task. - for media_file in media_files: - if not ( - target_folder := get_target_folder( + heartbeat = int(time.time()) + # Moving files is pretty slow, thus it should run in a celery task. + for media_file in media_files: + if not ( + target_folder := get_target_folder( + transfer.invite.sender.username, + transfer.invite.recipient.username, + media_file.content.name, + ) + ): + continue + else: + # There is no way to ensure atomicity when moving the file and saving the + # object to the database. Fingers crossed that the process doesn't get + # interrupted between these two operations. + media_file.content.move(target_folder) + old_md5 = media_file.metadata.pop('hash', None) + media_file.set_md5_hash() + media_file.save(update_fields=['content', 'metadata']) + + if old_md5 in kc_files.keys(): + kc_obj = kc_files[old_md5] + if kc_target_folder := get_target_folder( transfer.invite.sender.username, transfer.invite.recipient.username, - media_file.content.name, - ) - ): - continue - else: - # We want to be sure the path of the file is saved no matter what. - # Thanks to try/finally block, if updates are still pending, they - # should be saved in case of errors. - # It lets us resume when it stopped in case of failure. - media_file.content.move(target_folder) - old_md5 = media_file.metadata.pop('hash', None) - media_file.set_md5_hash() - if old_md5 in kc_files.keys(): - kc_obj = kc_files[old_md5] - if kc_target_folder := get_target_folder( - transfer.invite.sender.username, - transfer.invite.recipient.username, - kc_obj.data_file.name, - ): - kc_obj.data_file.move(kc_target_folder) - kc_obj.file_hash = media_file.md5_hash - metadata_to_update.append(kc_obj) - - media_files_to_update.append(media_file) - heartbeat = _update_heartbeat(heartbeat, transfer, async_task_type) - - finally: - # No need to use chunk size for media files like we do for attachments, - # because the odds are pretty low that more than 100 media files are - # linked to the project. - if metadata_to_update: - media_files_to_update.append(media_file) - - if media_files_to_update: - AssetFile.objects.bulk_update( - media_files_to_update, fields=['content', 'metadata'] - ) + kc_obj.data_file.name, + ): + kc_obj.data_file.move(kc_target_folder) + kc_obj.file_hash = media_file.md5_hash + kc_obj.save(update_fields=['file_hash', 'data_file']) + + heartbeat = _update_heartbeat(heartbeat, transfer, async_task_type) _mark_task_as_successful(transfer, async_task_type) From c71dde8e6a84db0bee1e85b34228f71bf5d25081 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 18 Dec 2024 18:45:55 -0500 Subject: [PATCH 3/5] feat(migrations): introduce long-running migration system using Celery (#5379) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### đŸ“Ŗ Summary Added a new system for handling long-running migrations using Celery. ### 📖 Description A new long-running migration system has been implemented, leveraging Celery to process migrations in the background. This system is designed to handle large-scale data updates that would otherwise cause require significant downtime with regular Django migrations ### 💭 Notes This long-running migration system leverages Celery for asynchronous task processing. However, a similar (and likely more robust) feature is being developed in Django, as detailed in [DEP 14 - Background Workers](https://github.com/django/deps/blob/main/accepted/0014-background-workers.rst). Once Django officially releases its built-in background worker system, this custom solution will be phased out in favor of the native implementation to better align with the Django ecosystem. --- dependencies/pip/dev_requirements.in | 1 + dependencies/pip/dev_requirements.txt | 3 + kobo/apps/long_running_migrations/README.md | 71 ++++++++++++ kobo/apps/long_running_migrations/__init__.py | 0 kobo/apps/long_running_migrations/admin.py | 8 ++ .../jobs/0001_sample.py | 8 ++ ...ect_ownership_transfer_with_media_files.py | 56 +++++++++ .../long_running_migrations/jobs/__init__.py | 0 .../migrations/0001_initial.py | 29 +++++ ...ect_ownership_transfer_with_media_files.py | 25 ++++ .../migrations/__init__.py | 0 kobo/apps/long_running_migrations/models.py | 108 ++++++++++++++++++ kobo/apps/long_running_migrations/tasks.py | 41 +++++++ .../long_running_migrations/tests/__init__.py | 0 .../tests/fixtures/__init__.py | 0 .../tests/fixtures/sample_failure.py | 2 + .../tests/fixtures/sample_task.py | 2 + .../tests/test_case.py | 107 +++++++++++++++++ kobo/settings/base.py | 7 ++ kpi/models/abstract_models.py | 2 + kpi/tasks.py | 3 +- 21 files changed, 472 insertions(+), 1 deletion(-) create mode 100644 kobo/apps/long_running_migrations/README.md create mode 100644 kobo/apps/long_running_migrations/__init__.py create mode 100644 kobo/apps/long_running_migrations/admin.py create mode 100644 kobo/apps/long_running_migrations/jobs/0001_sample.py create mode 100644 kobo/apps/long_running_migrations/jobs/0002_fix_project_ownership_transfer_with_media_files.py create mode 100644 kobo/apps/long_running_migrations/jobs/__init__.py create mode 100644 kobo/apps/long_running_migrations/migrations/0001_initial.py create mode 100644 kobo/apps/long_running_migrations/migrations/0002_fix_failed_project_ownership_transfer_with_media_files.py create mode 100644 kobo/apps/long_running_migrations/migrations/__init__.py create mode 100644 kobo/apps/long_running_migrations/models.py create mode 100644 kobo/apps/long_running_migrations/tasks.py create mode 100644 kobo/apps/long_running_migrations/tests/__init__.py create mode 100644 kobo/apps/long_running_migrations/tests/fixtures/__init__.py create mode 100644 kobo/apps/long_running_migrations/tests/fixtures/sample_failure.py create mode 100644 kobo/apps/long_running_migrations/tests/fixtures/sample_task.py create mode 100644 kobo/apps/long_running_migrations/tests/test_case.py diff --git a/dependencies/pip/dev_requirements.in b/dependencies/pip/dev_requirements.in index 0e2c9ab027..93a205ef4b 100644 --- a/dependencies/pip/dev_requirements.in +++ b/dependencies/pip/dev_requirements.in @@ -12,6 +12,7 @@ pytest pytest-cov pytest-django pytest-env +freezegun # Kobocat diff --git a/dependencies/pip/dev_requirements.txt b/dependencies/pip/dev_requirements.txt index 2db23fbc3a..5755401f1e 100644 --- a/dependencies/pip/dev_requirements.txt +++ b/dependencies/pip/dev_requirements.txt @@ -254,6 +254,8 @@ fabric==3.2.2 # via -r dependencies/pip/dev_requirements.in flower==2.0.1 # via -r dependencies/pip/requirements.in +freezegun==1.5.1 + # via -r dependencies/pip/dev_requirements.in frozenlist==1.4.1 # via # aiohttp @@ -488,6 +490,7 @@ python-dateutil==2.9.0.post0 # -r dependencies/pip/requirements.in # botocore # celery + # freezegun # pandas # python-crontab python3-openid==3.2.0 diff --git a/kobo/apps/long_running_migrations/README.md b/kobo/apps/long_running_migrations/README.md new file mode 100644 index 0000000000..b3da45f254 --- /dev/null +++ b/kobo/apps/long_running_migrations/README.md @@ -0,0 +1,71 @@ +# Long Running Migrations + +This feature allows you to execute long-running migrations using Celery. Each migration will attempt to complete within the maximum time allowed by Celery (see settings.CELERY_LONG_RUNNING_TASK_TIME_LIMIT`). If it does not complete within this time, the periodic task will retry and resume the migration from where it left off, continuing until the long-running migration either successfully completes or raises an exception. + +## How to Use + +1. **Create your migration** + Define your migrations in the `jobs` folder. Each migration should have a unique name, following Django's migration naming convention (e.g., `0001_description`). The migration file must contain a function called `run()`. + +2. **Register the migration** + Create a `LongRunningMigration` entry by running: + + ```python + LongRunningMigration.objects.create(name='0001_sample') + ``` + + You can automate this step by adding it to a Django migration with `RunPython`. + + ```python + from django.db import migrations + + + def add_long_running_migration(apps, schema_editor): + LongRunningMigration = apps.get_model('long_running_migrations', 'LongRunningMigration') # noqa + LongRunningMigration.objects.create( + name='0001_sample' + ) + + + def noop(*args, **kwargs): + pass + + + class Migration(migrations.Migration): + + dependencies = [ + ('long_running_migrations', '0001_initial'), + ] + + operations = [ + migrations.RunPython(add_long_running_migration, noop), + ] + + + ``` + + + +3. **Execute the migration** + Wait for the periodic task `execute_long_running_migrations` to run automatically or trigger it manually (beware of the lock, it can only run one at a time). + + +## Writing a good long-running migration + +When writing long-running migrations, ensure they are both **atomic** and **tolerant** to interruptions at any point in their execution. + +```python +# 2024-10-13 +from django.db import transaction + +def run(): + for foo in Foo.objects.filter(is_barred=False): # Checks actually needs to run still + with transaction.atomic(): # Atomic! + foo.make_it_bar() # Perhaps this does multiple things that could succeed or fail +``` + +* Notice that if the task is interrupted, it will simply continue in the next run. + +* Because tasks are slow, your code should run regardless of when the data migration takes place. + +* Add a timestamp to your migration definition to help future developers identify when it can be safely removed (if needed). diff --git a/kobo/apps/long_running_migrations/__init__.py b/kobo/apps/long_running_migrations/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kobo/apps/long_running_migrations/admin.py b/kobo/apps/long_running_migrations/admin.py new file mode 100644 index 0000000000..0b1b5f3154 --- /dev/null +++ b/kobo/apps/long_running_migrations/admin.py @@ -0,0 +1,8 @@ +from django.contrib import admin + +from .models import LongRunningMigration + + +@admin.register(LongRunningMigration) +class LongRunningMigrationAdmin(admin.ModelAdmin): + readonly_fields=('date_created', 'date_modified') diff --git a/kobo/apps/long_running_migrations/jobs/0001_sample.py b/kobo/apps/long_running_migrations/jobs/0001_sample.py new file mode 100644 index 0000000000..085cbb8389 --- /dev/null +++ b/kobo/apps/long_running_migrations/jobs/0001_sample.py @@ -0,0 +1,8 @@ +# Generated on 2024-12-18 + +def run(): + """ + Describe your long-running migration + """ + + pass diff --git a/kobo/apps/long_running_migrations/jobs/0002_fix_project_ownership_transfer_with_media_files.py b/kobo/apps/long_running_migrations/jobs/0002_fix_project_ownership_transfer_with_media_files.py new file mode 100644 index 0000000000..fd627e7b2e --- /dev/null +++ b/kobo/apps/long_running_migrations/jobs/0002_fix_project_ownership_transfer_with_media_files.py @@ -0,0 +1,56 @@ +# Generated on 2024-12-18 +from django.db.models import Q, OuterRef, Subquery + +from kobo.apps.openrosa.apps.logger.models import XForm +from kobo.apps.openrosa.apps.main.models import MetaData +from kpi.models.asset import Asset +from kpi.models.asset_file import AssetFile +from kobo.apps.project_ownership.models import Transfer + + +def run(): + """ + Update OpenRosa MetaData objects that were not updated when project + ownership was transferred to someone else. This fixes a bug introduced + and later addressed in KPI (issue #5365). + """ + + # Step 1: Retrieve all assets that were transferred since the bug was present and + # use media files + asset_uids = Asset.objects.filter( + Q( + pk__in=AssetFile.objects.values_list('asset_id', flat=True).exclude( + file_type=AssetFile.PAIRED_DATA + ) + ) + & Q( + pk__in=Transfer.objects.values_list('asset_id', flat=True).filter( + invite__date_created__date__gte='2024-09-15' + ) + ) + ).values_list('uid', flat=True) + + username_subquery = XForm.objects.filter(pk=OuterRef('xform_id')).values( + 'user__username' + )[:1] + + # Step 2: Iterate through relevant MetaData objects and fix their data_file fields + for metadata in ( + MetaData.objects.filter( + xform_id__in=XForm.objects.filter( + kpi_asset_uid__in=list(asset_uids) + ), + ) + .exclude( + Q(data_file__startswith=Subquery(username_subquery)) + | Q(data_file__isnull=True) + | Q(data_file='') + ) + .select_related('xform', 'xform__user') + .iterator() + ): + data_file = str(metadata.data_file) + old_username, *other_parts = data_file.split('/') + other_parts.insert(0, metadata.xform.user.username) + metadata.data_file = '/'.join(other_parts) + metadata.save(update_fields=['data_file']) diff --git a/kobo/apps/long_running_migrations/jobs/__init__.py b/kobo/apps/long_running_migrations/jobs/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kobo/apps/long_running_migrations/migrations/0001_initial.py b/kobo/apps/long_running_migrations/migrations/0001_initial.py new file mode 100644 index 0000000000..1a739539cf --- /dev/null +++ b/kobo/apps/long_running_migrations/migrations/0001_initial.py @@ -0,0 +1,29 @@ +# Generated by Django 4.2.15 on 2024-12-13 19:53 +from django.db import migrations, models + +import kpi.models.abstract_models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='LongRunningMigration', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('date_created', models.DateTimeField(default=kpi.models.abstract_models._get_default_datetime)), + ('date_modified', models.DateTimeField(default=kpi.models.abstract_models._get_default_datetime)), + ('name', models.CharField(max_length=255, unique=True)), + ('status', models.CharField(choices=[('created', 'Created'), ('in_progress', 'In Progress'), ('failed', 'Failed'), ('completed', 'Completed')], default='created', max_length=20)), + ('attempts', models.PositiveSmallIntegerField(default=0)), + ], + options={ + 'abstract': False, + }, + ), + ] diff --git a/kobo/apps/long_running_migrations/migrations/0002_fix_failed_project_ownership_transfer_with_media_files.py b/kobo/apps/long_running_migrations/migrations/0002_fix_failed_project_ownership_transfer_with_media_files.py new file mode 100644 index 0000000000..fa1f74df36 --- /dev/null +++ b/kobo/apps/long_running_migrations/migrations/0002_fix_failed_project_ownership_transfer_with_media_files.py @@ -0,0 +1,25 @@ +# Generated by Django 4.2.15 on 2024-12-18 20:00 + +from django.db import migrations + + +def add_long_running_migration(apps, schema_editor): + LongRunningMigration = apps.get_model('long_running_migrations', 'LongRunningMigration') # noqa + LongRunningMigration.objects.create( + name='0002_fix_project_ownership_transfer_with_media_files' + ) + + +def noop(*args, **kwargs): + pass + + +class Migration(migrations.Migration): + + dependencies = [ + ('long_running_migrations', '0001_initial'), + ] + + operations = [ + migrations.RunPython(add_long_running_migration, noop), + ] diff --git a/kobo/apps/long_running_migrations/migrations/__init__.py b/kobo/apps/long_running_migrations/migrations/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kobo/apps/long_running_migrations/models.py b/kobo/apps/long_running_migrations/models.py new file mode 100644 index 0000000000..e18df934e6 --- /dev/null +++ b/kobo/apps/long_running_migrations/models.py @@ -0,0 +1,108 @@ +import os +from importlib.util import module_from_spec, spec_from_file_location + +from django.conf import settings +from django.core.exceptions import SuspiciousOperation +from django.db import models + +from kpi.models.abstract_models import AbstractTimeStampedModel +from kpi.utils.log import logging + + +class LongRunningMigrationStatus(models.TextChoices): + CREATED = 'created' + IN_PROGRESS = 'in_progress' + FAILED = 'failed' + COMPLETED = 'completed' + + +class LongRunningMigration(AbstractTimeStampedModel): + + LONG_RUNNING_MIGRATIONS_DIR = os.path.join( + 'kobo', + 'apps', + 'long_running_migrations', + 'jobs' + ) + + name = models.CharField(max_length=255, unique=True) + status = models.CharField( + default=LongRunningMigrationStatus.CREATED, + choices=LongRunningMigrationStatus.choices, + max_length=20, + ) + attempts = models.PositiveSmallIntegerField(default=0) + + def clean(self): + super().clean() + if '..' in self.name or '/' in self.name or '\\' in self.name: + raise SuspiciousOperation( + f"Invalid migration name '{self.name}'. " + f"Migration names cannot contain directory traversal characters " + f"such as '..', '/', or '\\'." + ) + + def execute(self): + # Skip execution if the migration is already completed + if self.status == LongRunningMigrationStatus.COMPLETED: + return + + if not (module := self._load_module()): + return + + self.status = LongRunningMigrationStatus.IN_PROGRESS + self.attempts += 1 + self.save(update_fields=['status', 'attempts']) + + try: + module.run() + except Exception as e: + # Log the error and update the status to 'failed' + logging.error(f'LongRunningMigration.execute(): {str(e)}') + self.status = LongRunningMigrationStatus.FAILED + self.save(update_fields=['status']) + return + + self.status = LongRunningMigrationStatus.COMPLETED + self.save(update_fields=['status']) + + def save(self, **kwargs): + + self.clean() + + if self._state.adding: + file_path = os.path.join( + settings.BASE_DIR, self.LONG_RUNNING_MIGRATIONS_DIR, f'{self.name}.py' + ) + if not os.path.exists(file_path): + raise ValueError('Task does not exist in tasks directory') + super().save(**kwargs) + + def _load_module(self): + """ + This function allows you to load a Python module from a file path even if + the module's name does not follow Python's standard naming conventions + (e.g., starting with numbers or containing special characters). Normally, + Python identifiers must adhere to specific rules, but this method bypasses + those restrictions by dynamically creating a module from its file. + """ + module_path = f'{self.LONG_RUNNING_MIGRATIONS_DIR}/{self.name}.py' + if not os.path.exists(f'{settings.BASE_DIR}/{module_path}'): + logging.error( + f'LongRunningMigration._load_module():' + f'File not found `{module_path}`' + ) + return + + spec = spec_from_file_location(self.name, module_path) + try: + module = module_from_spec(spec) + except (ModuleNotFoundError, AttributeError): + logging.error( + f'LongRunningMigration._load_module():' + f'Failed to import migration module `{self.name}`' + ) + return + + spec.loader.exec_module(module) + return module diff --git a/kobo/apps/long_running_migrations/tasks.py b/kobo/apps/long_running_migrations/tasks.py new file mode 100644 index 0000000000..0b2510c80c --- /dev/null +++ b/kobo/apps/long_running_migrations/tasks.py @@ -0,0 +1,41 @@ +from kobo.celery import celery_app + +from django.conf import settings +from django.core.cache import cache +from django.db.models import Q +from django.utils import timezone +from dateutil.relativedelta import relativedelta + +from .models import LongRunningMigration, LongRunningMigrationStatus + + +@celery_app.task( + queue='kpi_low_priority_queue', + soft_time_limit=settings.CELERY_LONG_RUNNING_TASK_SOFT_TIME_LIMIT, + time_limit=settings.CELERY_LONG_RUNNING_TASK_TIME_LIMIT, +) +def execute_long_running_migrations(): + lock_key = 'execute_long_running_migrations' + + if cache.add( + lock_key, 'true', timeout=settings.CELERY_LONG_RUNNING_TASK_TIME_LIMIT + ): + try: + # Adding an offset to account for potential delays in task execution and + # clock drift between the Celery workers and the database, ensuring tasks + # are not prematurely skipped. + offset = 5 * 60 + task_expiry_time = timezone.now() - relativedelta( + seconds=settings.CELERY_LONG_RUNNING_TASK_TIME_LIMIT + offset + ) + # Run tasks that were just created or are in progress but have exceeded + # the maximum runtime allowed for a Celery task, causing Celery to terminate + # them and raise a SoftTimeLimitExceeded exception. + for migration in LongRunningMigration.objects.filter( + Q(status=LongRunningMigrationStatus.CREATED) + | Q(status=LongRunningMigrationStatus.IN_PROGRESS) + & Q(date_modified__lte=task_expiry_time) + ).order_by('date_created'): + migration.execute() + finally: + cache.delete(lock_key) diff --git a/kobo/apps/long_running_migrations/tests/__init__.py b/kobo/apps/long_running_migrations/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kobo/apps/long_running_migrations/tests/fixtures/__init__.py b/kobo/apps/long_running_migrations/tests/fixtures/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kobo/apps/long_running_migrations/tests/fixtures/sample_failure.py b/kobo/apps/long_running_migrations/tests/fixtures/sample_failure.py new file mode 100644 index 0000000000..033ba598c9 --- /dev/null +++ b/kobo/apps/long_running_migrations/tests/fixtures/sample_failure.py @@ -0,0 +1,2 @@ +def run(): + raise Exception diff --git a/kobo/apps/long_running_migrations/tests/fixtures/sample_task.py b/kobo/apps/long_running_migrations/tests/fixtures/sample_task.py new file mode 100644 index 0000000000..ff4e27a2cf --- /dev/null +++ b/kobo/apps/long_running_migrations/tests/fixtures/sample_task.py @@ -0,0 +1,2 @@ +def run(): + print('hello from long running migration') diff --git a/kobo/apps/long_running_migrations/tests/test_case.py b/kobo/apps/long_running_migrations/tests/test_case.py new file mode 100644 index 0000000000..7549b15cf2 --- /dev/null +++ b/kobo/apps/long_running_migrations/tests/test_case.py @@ -0,0 +1,107 @@ +import os +from unittest.mock import patch + +from django.core.exceptions import SuspiciousOperation +from django.test import TestCase, override_settings +from freezegun import freeze_time + +from ..models import LongRunningMigration, LongRunningMigrationStatus +from ..tasks import execute_long_running_migrations + + +@override_settings( + CACHES={ + 'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'} + } +) +@patch.object(LongRunningMigration, 'LONG_RUNNING_MIGRATIONS_DIR', os.path.join( + 'kobo', + 'apps', + 'long_running_migrations', + 'tests', + 'fixtures' +)) +class LongRunningMigrationTestCase(TestCase): + def test_sample_task(self): + migration = LongRunningMigration.objects.create(name='sample_task') + migration.execute() + migration.refresh_from_db() + self.assertEqual(migration.status, LongRunningMigrationStatus.COMPLETED) + + def test_invalid_task_name(self): + with self.assertRaises(ValueError): + LongRunningMigration.objects.create(name='foo') + + def test_traversal_characters(self): + with self.assertRaises(SuspiciousOperation): + LongRunningMigration.objects.create(name='../fixtures/sample_task') + + def test_sample_failure(self): + migration = LongRunningMigration.objects.create(name='sample_failure') + migration.execute() + migration.refresh_from_db() + self.assertEqual(migration.status, LongRunningMigrationStatus.FAILED) + + def test_not_updated_worker(self): + # simulate not updated worker with a wrong name + migrations = LongRunningMigration.objects.bulk_create( + [LongRunningMigration(name='foo')] + ) + migration = migrations[0] + migration.execute() + migration.refresh_from_db() + self.assertEqual(migration.status, LongRunningMigrationStatus.CREATED) + + +@override_settings( + CACHES={ + 'default': {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'} + } +) +class LongRunningMigrationPeriodicTaskTestCase(TestCase): + + def setUp(self): + self.patcher = patch.object( + LongRunningMigration, + 'LONG_RUNNING_MIGRATIONS_DIR', + os.path.join('kobo', 'apps', 'long_running_migrations', 'tests', 'fixtures') + ) + self.patcher.start() + self.migration = LongRunningMigration.objects.create(name='sample_task') + + # Remove real existing long-running migrations + LongRunningMigration.objects.exclude(pk=self.migration.pk).delete() + assert LongRunningMigration.objects.count() == 1 + + def tearDown(self): + self.patcher.stop() + + def test_migration_as_completed(self): + execute_long_running_migrations() + self.migration.refresh_from_db() + self.assertEqual(self.migration.status, LongRunningMigrationStatus.COMPLETED) + + def test_failed_migration_is_ignored(self): + # Ignore failed task + self.migration.status = LongRunningMigrationStatus.FAILED + self.migration.save(update_fields=['status']) + execute_long_running_migrations() + self.migration.refresh_from_db() + self.assertEqual(self.migration.status, LongRunningMigrationStatus.FAILED) + + def test_ignore_recent_started_migration(self): + # Ignore recent started tasks + self.migration.status = LongRunningMigrationStatus.IN_PROGRESS + self.migration.save() + execute_long_running_migrations() + self.migration.refresh_from_db() + self.assertEqual(self.migration.status, LongRunningMigrationStatus.IN_PROGRESS) + + def test_resume_stuck_migration(self): + # Run an old in-progress task + with freeze_time('2024-12-10'): + self.migration.status = LongRunningMigrationStatus.IN_PROGRESS + self.migration.save() + execute_long_running_migrations() + self.migration.refresh_from_db() + self.assertEqual(self.migration.status, LongRunningMigrationStatus.COMPLETED) diff --git a/kobo/settings/base.py b/kobo/settings/base.py index 4a555bbe04..736e40959f 100644 --- a/kobo/settings/base.py +++ b/kobo/settings/base.py @@ -144,6 +144,7 @@ 'guardian', 'kobo.apps.openrosa.libs', 'kobo.apps.project_ownership.ProjectOwnershipAppConfig', + 'kobo.apps.long_running_migrations', ) MIDDLEWARE = [ @@ -1201,6 +1202,12 @@ def dj_stripe_request_callback_method(): 'schedule': crontab(minute=0, hour=0), 'options': {'queue': 'kpi_low_priority_queue'} }, + # Schedule every hour, every day + 'long-running-migrations': { + 'task': 'kobo.apps.long_running_migrations.tasks.execute_long_running_migrations', # noqa + 'schedule': crontab(minute=0), + 'options': {'queue': 'kpi_low_priority_queue'} + }, } diff --git a/kpi/models/abstract_models.py b/kpi/models/abstract_models.py index 9f6e29702d..9d42dc6565 100644 --- a/kpi/models/abstract_models.py +++ b/kpi/models/abstract_models.py @@ -25,4 +25,6 @@ def save(self, *args, **kwargs): update_fields = kwargs.get('update_fields', None) if update_fields is None or 'date_modified' not in update_fields: self.date_modified = timezone.now() + if update_fields: + update_fields.append('date_modified') super().save(*args, **kwargs) diff --git a/kpi/tasks.py b/kpi/tasks.py index 786484fec8..4d1ebcdfeb 100644 --- a/kpi/tasks.py +++ b/kpi/tasks.py @@ -96,8 +96,9 @@ def enketo_flush_cached_preview(server_url, form_id): @celery_app.task(time_limit=LIMIT_HOURS_23, soft_time_limit=LIMIT_HOURS_23) def perform_maintenance(): """ - Run daily maintenance tasks + Run daily maintenance tasks. Ensure it cannot run multiple times. """ + remove_unused_markdown_files() remove_old_import_tasks() remove_old_asset_snapshots() From 9471b34bb1211e7c0f56bd04fb3811957ab4b74a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 18 Dec 2024 18:54:05 -0500 Subject: [PATCH 4/5] fix(celery): correct crontab syntax for Celery Beat scheduler (#5386) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### đŸ“Ŗ Summary Fixed an issue with incorrect crontab syntax in the Celery Beat scheduler. ### 📖 Description An issue with the crontab syntax in the Celery Beat scheduler prevented periodic tasks from running correctly. This fix updates the syntax to conform to the expected format. --- kobo/settings/base.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kobo/settings/base.py b/kobo/settings/base.py index 736e40959f..c1c794d0a3 100644 --- a/kobo/settings/base.py +++ b/kobo/settings/base.py @@ -1160,11 +1160,11 @@ def dj_stripe_request_callback_method(): # Schedule every 30 minutes 'trash-bin-garbage-collector': { 'task': 'kobo.apps.trash_bin.tasks.garbage_collector', - 'schedule': crontab(minute=30), + 'schedule': crontab(minute='*/30'), 'options': {'queue': 'kpi_low_priority_queue'}, }, 'perform-maintenance': { - 'task': 'kobo.tasks.perform_maintenance', + 'task': 'kpi.tasks.perform_maintenance', 'schedule': crontab(hour=20, minute=0), 'options': {'queue': 'kpi_low_priority_queue'}, }, @@ -1181,19 +1181,19 @@ def dj_stripe_request_callback_method(): # Schedule every 10 minutes 'project-ownership-task-scheduler': { 'task': 'kobo.apps.project_ownership.tasks.task_rescheduler', - 'schedule': crontab(minute=10), + 'schedule': crontab(minute='*/10'), 'options': {'queue': 'kpi_low_priority_queue'} }, # Schedule every 30 minutes 'project-ownership-mark-stuck-tasks-as-failed': { 'task': 'kobo.apps.project_ownership.tasks.mark_stuck_tasks_as_failed', - 'schedule': crontab(minute=30), + 'schedule': crontab(minute='*/30'), 'options': {'queue': 'kpi_low_priority_queue'} }, # Schedule every 30 minutes 'project-ownership-mark-as-expired': { 'task': 'kobo.apps.project_ownership.tasks.mark_as_expired', - 'schedule': crontab(minute=30), + 'schedule': crontab(minute='*/30'), 'options': {'queue': 'kpi_low_priority_queue'} }, # Schedule every day at midnight UTC From 89a5d6edf15fc4de42d1448ddeaacd17746e3ea9 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Thu, 19 Dec 2024 11:23:42 -0500 Subject: [PATCH 5/5] Update pip dependencies --- dependencies/pip/dev_requirements.txt | 1 - dependencies/pip/requirements.txt | 1 - 2 files changed, 2 deletions(-) diff --git a/dependencies/pip/dev_requirements.txt b/dependencies/pip/dev_requirements.txt index 3a2a9587d1..93f2c64a33 100644 --- a/dependencies/pip/dev_requirements.txt +++ b/dependencies/pip/dev_requirements.txt @@ -412,7 +412,6 @@ oauthlib==3.2.2 # via # -r dependencies/pip/requirements.in # django-oauth-toolkit - # requests-oauthlib openpyxl==3.0.9 # via # -r dependencies/pip/requirements.in diff --git a/dependencies/pip/requirements.txt b/dependencies/pip/requirements.txt index 8cb6eefcf6..70dee7d3ea 100644 --- a/dependencies/pip/requirements.txt +++ b/dependencies/pip/requirements.txt @@ -334,7 +334,6 @@ oauthlib==3.2.2 # via # -r dependencies/pip/requirements.in # django-oauth-toolkit - # requests-oauthlib openpyxl==3.0.9 # via # -r dependencies/pip/requirements.in