From 6f728bb0f4995ce2cd6234656d7d898f0316cae2 Mon Sep 17 00:00:00 2001 From: Dickson Ukang'a Date: Sun, 10 Sep 2017 04:07:24 +0300 Subject: [PATCH] Compare XML data using it's checksum. Store the checksum of a submission XML and use it to compare when checking for changes/edits in the data. --- .../migrations/0039_auto_20170909_2052.py | 25 ++++++++++++++++++ onadata/apps/logger/models/instance.py | 3 +++ onadata/libs/utils/logger_tools.py | 26 ++++++++++++------- 3 files changed, 45 insertions(+), 9 deletions(-) create mode 100644 onadata/apps/logger/migrations/0039_auto_20170909_2052.py diff --git a/onadata/apps/logger/migrations/0039_auto_20170909_2052.py b/onadata/apps/logger/migrations/0039_auto_20170909_2052.py new file mode 100644 index 0000000000..c8f4eabb83 --- /dev/null +++ b/onadata/apps/logger/migrations/0039_auto_20170909_2052.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.1 on 2017-09-10 00:52 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('logger', '0038_auto_20170828_1718'), + ] + + operations = [ + migrations.AddField( + model_name='instance', + name='checksum', + field=models.CharField(blank=True, max_length=32, null=True), + ), + migrations.AddField( + model_name='instancehistory', + name='checksum', + field=models.CharField(blank=True, max_length=32, null=True), + ), + ] diff --git a/onadata/apps/logger/models/instance.py b/onadata/apps/logger/models/instance.py index 18da72bad4..0deafe556e 100644 --- a/onadata/apps/logger/models/instance.py +++ b/onadata/apps/logger/models/instance.py @@ -437,6 +437,7 @@ class Instance(models.Model, InstanceBaseClass): media_count = models.PositiveIntegerField(_("Received Media Attachments"), null=True, default=0) + checksum = models.CharField(max_length=32, null=True, blank=True) tags = TaggableManager() @@ -528,6 +529,8 @@ class Meta: date_modified = models.DateTimeField(auto_now=True) submission_date = models.DateTimeField(null=True, default=None) geom = models.GeometryCollectionField(null=True) + checksum = models.CharField(max_length=32, null=True, blank=True) + objects = models.GeoManager() @property diff --git a/onadata/libs/utils/logger_tools.py b/onadata/libs/utils/logger_tools.py index 2335a45939..d17eb329d2 100644 --- a/onadata/libs/utils/logger_tools.py +++ b/onadata/libs/utils/logger_tools.py @@ -14,12 +14,14 @@ ValidationError) from django.core.files.storage import get_storage_class from django.db import IntegrityError, transaction +from django.db.models import Q from django.http import (HttpResponse, HttpResponseNotFound, StreamingHttpResponse, UnreadablePostError) from django.shortcuts import get_object_or_404 from django.utils import timezone from django.utils.encoding import DjangoUnicodeDecodeError from django.utils.translation import ugettext as _ +from hashlib import md5 from modilabs.utils.subprocess_timeout import ProcessTimedOut from multidb.pinning import use_master @@ -52,7 +54,7 @@ re.DOTALL) -def _get_instance(xml, new_uuid, submitted_by, status, xform): +def _get_instance(xml, new_uuid, submitted_by, status, xform, checksum): history = None instance = None # check if its an edit submission @@ -70,6 +72,7 @@ def _get_instance(xml, new_uuid, submitted_by, status, xform): last_edited = timezone.now() InstanceHistory.objects.create( + checksum=instance.checksum, xml=instance.xml, xform_instance=instance, uuid=old_uuid, @@ -79,6 +82,7 @@ def _get_instance(xml, new_uuid, submitted_by, status, xform): instance.xml = xml instance.last_edited = last_edited instance.uuid = new_uuid + instance.checksum = checksum instance.save() # call webhooks @@ -88,7 +92,8 @@ def _get_instance(xml, new_uuid, submitted_by, status, xform): if old_uuid is None or (instance is None and history is None): # new submission instance = Instance.objects.create( - xml=xml, user=submitted_by, status=status, xform=xform) + xml=xml, user=submitted_by, status=status, xform=xform, + checksum=checksum) return instance @@ -218,11 +223,12 @@ def save_attachments(xform, instance, media_files): def save_submission(xform, xml, media_files, new_uuid, submitted_by, status, - date_created_override): + date_created_override, checksum): if not date_created_override: date_created_override = get_submission_date_from_xml(xml) - instance = _get_instance(xml, new_uuid, submitted_by, status, xform) + instance = _get_instance(xml, new_uuid, submitted_by, status, xform, + checksum) save_attachments(xform, instance, media_files) # override date created if required @@ -279,10 +285,11 @@ def create_instance(username, xml = xml_file.read() xform = get_xform_from_submission(xml, username, uuid) check_submission_permissions(request, xform) + checksum = md5(xml).hexdigest() new_uuid = get_uuid_from_xml(xml) filtered_instances = get_filtered_instances( - uuid=new_uuid, xform_id=xform.pk) + Q(checksum=checksum) | Q(uuid=new_uuid), xform_id=xform.pk) existing_instance = filtered_instances.first() if existing_instance and \ (new_uuid or existing_instance.xform.has_start_time): @@ -297,7 +304,7 @@ def create_instance(username, # has already been submitted for that user. return DuplicateInstance() - # get new and depracated uuid's + # get new and deprecated uuid's history = InstanceHistory.objects.filter( xform_instance__xform_id=xform.pk, uuid=new_uuid).only('xform_instance').first() @@ -315,10 +322,11 @@ def create_instance(username, with transaction.atomic(): instance = save_submission(xform, xml, media_files, new_uuid, submitted_by, status, - date_created_override) + date_created_override, checksum) except IntegrityError: - instance = Instance.objects.filter(uuid=new_uuid, - xform__id=xform.pk).first() + instance = Instance.objects.filter( + Q(checksum=checksum) | Q(uuid=new_uuid), + xform_id=xform.pk).first() if instance: attachment_names = [