Skip to content

Commit

Permalink
Compare XML data using it's checksum.
Browse files Browse the repository at this point in the history
Store the checksum of a submission XML and use it to compare when
checking for changes/edits in the data.
  • Loading branch information
ukanga committed Sep 10, 2017
1 parent 280de4c commit 6f728bb
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 9 deletions.
25 changes: 25 additions & 0 deletions onadata/apps/logger/migrations/0039_auto_20170909_2052.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.1 on 2017-09-10 00:52
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('logger', '0038_auto_20170828_1718'),
]

operations = [
migrations.AddField(
model_name='instance',
name='checksum',
field=models.CharField(blank=True, max_length=32, null=True),
),
migrations.AddField(
model_name='instancehistory',
name='checksum',
field=models.CharField(blank=True, max_length=32, null=True),
),
]
3 changes: 3 additions & 0 deletions onadata/apps/logger/models/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,7 @@ class Instance(models.Model, InstanceBaseClass):
media_count = models.PositiveIntegerField(_("Received Media Attachments"),
null=True,
default=0)
checksum = models.CharField(max_length=32, null=True, blank=True)

tags = TaggableManager()

Expand Down Expand Up @@ -528,6 +529,8 @@ class Meta:
date_modified = models.DateTimeField(auto_now=True)
submission_date = models.DateTimeField(null=True, default=None)
geom = models.GeometryCollectionField(null=True)
checksum = models.CharField(max_length=32, null=True, blank=True)

objects = models.GeoManager()

@property
Expand Down
26 changes: 17 additions & 9 deletions onadata/libs/utils/logger_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@
ValidationError)
from django.core.files.storage import get_storage_class
from django.db import IntegrityError, transaction
from django.db.models import Q
from django.http import (HttpResponse, HttpResponseNotFound,
StreamingHttpResponse, UnreadablePostError)
from django.shortcuts import get_object_or_404
from django.utils import timezone
from django.utils.encoding import DjangoUnicodeDecodeError
from django.utils.translation import ugettext as _
from hashlib import md5
from modilabs.utils.subprocess_timeout import ProcessTimedOut
from multidb.pinning import use_master

Expand Down Expand Up @@ -52,7 +54,7 @@
re.DOTALL)


def _get_instance(xml, new_uuid, submitted_by, status, xform):
def _get_instance(xml, new_uuid, submitted_by, status, xform, checksum):
history = None
instance = None
# check if its an edit submission
Expand All @@ -70,6 +72,7 @@ def _get_instance(xml, new_uuid, submitted_by, status, xform):

last_edited = timezone.now()
InstanceHistory.objects.create(
checksum=instance.checksum,
xml=instance.xml,
xform_instance=instance,
uuid=old_uuid,
Expand All @@ -79,6 +82,7 @@ def _get_instance(xml, new_uuid, submitted_by, status, xform):
instance.xml = xml
instance.last_edited = last_edited
instance.uuid = new_uuid
instance.checksum = checksum
instance.save()

# call webhooks
Expand All @@ -88,7 +92,8 @@ def _get_instance(xml, new_uuid, submitted_by, status, xform):
if old_uuid is None or (instance is None and history is None):
# new submission
instance = Instance.objects.create(
xml=xml, user=submitted_by, status=status, xform=xform)
xml=xml, user=submitted_by, status=status, xform=xform,
checksum=checksum)
return instance


Expand Down Expand Up @@ -218,11 +223,12 @@ def save_attachments(xform, instance, media_files):


def save_submission(xform, xml, media_files, new_uuid, submitted_by, status,
date_created_override):
date_created_override, checksum):
if not date_created_override:
date_created_override = get_submission_date_from_xml(xml)

instance = _get_instance(xml, new_uuid, submitted_by, status, xform)
instance = _get_instance(xml, new_uuid, submitted_by, status, xform,
checksum)
save_attachments(xform, instance, media_files)

# override date created if required
Expand Down Expand Up @@ -279,10 +285,11 @@ def create_instance(username,
xml = xml_file.read()
xform = get_xform_from_submission(xml, username, uuid)
check_submission_permissions(request, xform)
checksum = md5(xml).hexdigest()

new_uuid = get_uuid_from_xml(xml)
filtered_instances = get_filtered_instances(
uuid=new_uuid, xform_id=xform.pk)
Q(checksum=checksum) | Q(uuid=new_uuid), xform_id=xform.pk)
existing_instance = filtered_instances.first()
if existing_instance and \
(new_uuid or existing_instance.xform.has_start_time):
Expand All @@ -297,7 +304,7 @@ def create_instance(username,
# has already been submitted for that user.
return DuplicateInstance()

# get new and depracated uuid's
# get new and deprecated uuid's
history = InstanceHistory.objects.filter(
xform_instance__xform_id=xform.pk,
uuid=new_uuid).only('xform_instance').first()
Expand All @@ -315,10 +322,11 @@ def create_instance(username,
with transaction.atomic():
instance = save_submission(xform, xml, media_files, new_uuid,
submitted_by, status,
date_created_override)
date_created_override, checksum)
except IntegrityError:
instance = Instance.objects.filter(uuid=new_uuid,
xform__id=xform.pk).first()
instance = Instance.objects.filter(
Q(checksum=checksum) | Q(uuid=new_uuid),
xform_id=xform.pk).first()

if instance:
attachment_names = [
Expand Down

0 comments on commit 6f728bb

Please sign in to comment.