Skip to content

Commit

Permalink
feat: back-fill submission unique identifier - TASK 1459 (#5434)
Browse files Browse the repository at this point in the history
### 📣 Summary
Added a long-running migration to backfill the `root_uuid` field for old
submissions.


### 📖 Description
Submissions now use a field called `root_uuid`, which serves as a unique
identifier per project. For older submissions, this field is empty. This
PR introduces a long-running migration to backfill the `root_uuid` field
with the current `uuid` for existing submissions.

To ensure uniqueness, the migration includes logic to detect and resolve
conflicts. If a `uuid` is not unique within a project, the
`clean_duplicated_submissions` management command is invoked to address
the duplicates before assigning the `root_uuid`.
  • Loading branch information
noliveleger authored Jan 27, 2025
1 parent e424952 commit 1e23d21
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Generated on 2025-16-19 11:58
from django.conf import settings
from django.core.management import call_command
from django.db import IntegrityError
from more_itertools import chunked
from taggit.models import TaggedItem

from kobo.apps.openrosa.apps.logger.models import XForm, Instance
from kpi.utils.database import use_db


def run():
"""
Transfers all assets owned by members to their respective organizations.
"""
CHUNK_SIZE = 2000

with use_db(settings.OPENROSA_DB_ALIAS):
xforms = XForm.objects.only('pk', 'id_string').exclude(
tags__name__contains='kobo-root-uuid'
).iterator()
for xform_batch in chunked(xforms, CHUNK_SIZE):
for xform in xform_batch:
instances = Instance.objects.only(
'pk', 'uuid', 'xml', 'root_uuid'
).filter(root_uuid__isnull=True, xform_id=xform.pk).iterator()
error = False
for instance_batch in chunked(instances, CHUNK_SIZE):
if not _process_instances_batch(xform, instance_batch):
error = True
break

if not error:
xform.tags.add('kobo-root-uuid-success')

# Clean up tags while retaining failed entries for future manual review
TaggedItem.objects.filter(tag__name='kobo-root-uuid-success').delete()


def _process_instances_batch(
xform: XForm, instance_batch: list[Instance], first_try=True
) -> bool:
for instance in instance_batch:
try:
instance._populate_root_uuid() # noqa
except AssertionError as e:
if 'root_uuid should not be empty' in str(e):
# fallback on `uuid` to back-fill `root_uuid`
instance.root_uuid = instance.uuid
try:
Instance.objects.bulk_update(
instance_batch, fields=['root_uuid']
)
except IntegrityError:
if first_try:
call_command(
'clean_duplicated_submissions',
xform=xform.id_string,
)
# Need to reload instance_batch to get new uuids
instance_batch = Instance.objects.only(
'pk', 'uuid', 'xml', 'root_uuid'
).filter(pk__in=[instance.pk for instance in instance_batch])
return _process_instances_batch(
xform, instance_batch, first_try=False
)
else:
xform.tags.add('kobo-root-uuid-failed')
return False
else:
return True
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Generated by Django 4.2.15 on 2025-01-16 11:50

from django.db import migrations


def add_long_running_migration(apps, schema_editor):
LongRunningMigration = apps.get_model(
'long_running_migrations', 'LongRunningMigration'
) # noqa
LongRunningMigration.objects.create(
name='0005_back_fill_logger_instance_root_uuid'
)


def noop(*args, **kwargs):
pass


class Migration(migrations.Migration):

dependencies = [
(
'long_running_migrations',
'0004_back_fill_asset_search_field_for_owner_label',
),
]

operations = [
migrations.RunPython(add_long_running_migration, noop),
]
8 changes: 7 additions & 1 deletion kpi/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ def tag_uid_post_save(sender, instance, created, raw, **kwargs):
""" Make sure we have a TagUid object for each newly-created Tag """
if raw or not created:
return

# We don't want to create KPI things for OpenRosa models
if kwargs.get('using') == settings.OPENROSA_DB_ALIAS: # noqa
return

TagUid.objects.get_or_create(tag=instance)


Expand All @@ -28,7 +33,8 @@ def post_delete_asset(sender, instance, **kwargs):
# Update parent's languages if this object is a child of another asset.
try:
parent = instance.parent
except Asset.DoesNotExist: # `parent` may exists in DJANGO models cache but not in DB
except Asset.DoesNotExist:
# `parent` may exist in DJANGO models cache but not in DB
pass
else:
if parent:
Expand Down

0 comments on commit 1e23d21

Please sign in to comment.