Skip to content

Commit

Permalink
Merge pull request #5957 from uktrade/bugfix/TET-981-stova-event-inge…
Browse files Browse the repository at this point in the history
…stion

Bugfix/tet 981 stova event ingestion
  • Loading branch information
DeanElliott96 authored Feb 18, 2025
2 parents 170b1a3 + f3210c2 commit 4c93e3c
Show file tree
Hide file tree
Showing 4 changed files with 200 additions and 47 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Generated by Django 4.2.17 on 2025-02-18 07:55

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('company_activity', '0027_stova_event_allow_empty_fields'),
]

operations = [
migrations.AlterField(
model_name='stovaevent',
name='description',
field=models.TextField(blank=True),
),
migrations.AlterField(
model_name='stovaevent',
name='location_address2',
field=models.CharField(blank=True, max_length=255),
),
migrations.AlterField(
model_name='stovaevent',
name='location_address3',
field=models.CharField(blank=True, max_length=255),
),
migrations.AlterField(
model_name='stovaevent',
name='location_name',
field=models.CharField(blank=True, max_length=255, null=True),
),
migrations.AlterField(
model_name='stovaevent',
name='location_postcode',
field=models.CharField(blank=True, max_length=255),
),
migrations.AlterField(
model_name='stovaevent',
name='location_state',
field=models.CharField(blank=True, max_length=255),
),
migrations.AlterField(
model_name='stovaevent',
name='url',
field=models.TextField(blank=True, null=True),
),
]
14 changes: 7 additions & 7 deletions datahub/company_activity/models/stova_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class StovaEvent(models.Model):

stova_event_id = models.IntegerField(unique=True)
name = models.TextField()
description = models.TextField()
description = models.TextField(blank=True)
code = models.CharField(max_length=MAX_LENGTH, blank=True, default='')

created_by = models.CharField(max_length=MAX_LENGTH, blank=True, default='')
Expand All @@ -33,7 +33,7 @@ class StovaEvent(models.Model):
city = models.CharField(max_length=MAX_LENGTH)
state = models.CharField(max_length=MAX_LENGTH)
timezone = models.CharField(max_length=MAX_LENGTH, blank=True, null=True, default='')
url = models.TextField()
url = models.TextField(blank=True, null=True)
max_reg = models.IntegerField(null=True, blank=True)

created_date = models.DateTimeField()
Expand All @@ -43,14 +43,14 @@ class StovaEvent(models.Model):
close_date = models.DateTimeField(null=True, blank=True)
end_date = models.DateTimeField(null=True, blank=True)

location_state = models.CharField(max_length=MAX_LENGTH)
location_state = models.CharField(max_length=MAX_LENGTH, blank=True)
location_country = models.CharField(max_length=MAX_LENGTH)
location_address1 = models.CharField(max_length=MAX_LENGTH)
location_address2 = models.CharField(max_length=MAX_LENGTH)
location_address3 = models.CharField(max_length=MAX_LENGTH)
location_address2 = models.CharField(max_length=MAX_LENGTH, blank=True)
location_address3 = models.CharField(max_length=MAX_LENGTH, blank=True)
location_city = models.CharField(max_length=MAX_LENGTH)
location_name = models.CharField(max_length=MAX_LENGTH)
location_postcode = models.CharField(max_length=MAX_LENGTH)
location_name = models.CharField(max_length=MAX_LENGTH, null=True, blank=True)
location_postcode = models.CharField(max_length=MAX_LENGTH, blank=True)

approval_required = models.BooleanField()
price_type = models.CharField(max_length=MAX_LENGTH)
Expand Down
116 changes: 83 additions & 33 deletions datahub/company_activity/tasks/ingest_stova_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,43 +51,93 @@ def _should_process_record(self, record: dict) -> bool:

return True

@staticmethod
def _required_fields() -> list:
"""
Returns a list of fields required for to make a StovaEvent a Data Hub Event.
Any fields listed here but not provided by Stova will be rejected from ingestion.
:return: Required fields to save a StovaEvent.
"""
return [
'id',
'name',
'location_address1',
'location_city',
]

@staticmethod
def _convert_fields_from_null_to_blank(values: dict) -> dict:
"""
Coverts values from the stova record which could be null into empty strings for saving
as a Data Hub event.
:param values: A single Stova Event record from an S3 bucket.
:return: A single Stova Event record with null/None values replaced with empty strings.
"""
fields_required_as_blank = [
'location_address2',
'location_address3',
'location_state',
'location_postcode',
'description',
]

for field in fields_required_as_blank:
if values[field] is None:
values[field] = ''

return values

def _process_record(self, record: dict) -> None:
"""Saves an event from Stova from the S3 bucket into a `StovaEvent`"""
stova_event_id = record.get('id')

required_fields = self._required_fields()
for field in required_fields:
if record[field] is None or record[field] == '':
logger.info(
f'Stova Event with id {stova_event_id} does not have required field {field}. '
'This stova event will not be processed into Data Hub.',
)
return

cleaned_record = self._convert_fields_from_null_to_blank(record)

values = {
'stova_event_id': record.get('id'),
'url': record.get('url', ''),
'city': record.get('city', ''),
'code': record.get('code', ''),
'name': record.get('name', ''),
'state': record.get('state', ''),
'country': record.get('country', ''),
'max_reg': record.get('max_reg'),
'end_date': record.get('end_date'),
'timezone': record.get('timezone', ''),
'folder_id': record.get('folder_id'),
'live_date': record.get('live_date'),
'close_date': record.get('close_date'),
'created_by': record.get('created_by', ''),
'price_type': record.get('price_type', ''),
'start_date': record.get('start_date'),
'description': record.get('description', ''),
'modified_by': record.get('modified_by', ''),
'contact_info': record.get('contact_info', ''),
'created_date': record.get('created_date'),
'location_city': record.get('location_city', ''),
'location_name': record.get('location_name', ''),
'modified_date': record.get('modified_date'),
'client_contact': record.get('client_contact', ''),
'location_state': record.get('location_state', ''),
'default_language': record.get('default_language', ''),
'location_country': record.get('location_country', ''),
'approval_required': record.get('approval_required'),
'location_address1': record.get('location_address1', ''),
'location_address2': record.get('location_address2', ''),
'location_address3': record.get('location_address3', ''),
'location_postcode': record.get('location_postcode', ''),
'standard_currency': record.get('standard_currency', ''),
'stova_event_id': cleaned_record.get('id'),
'url': cleaned_record.get('url', ''),
'city': cleaned_record.get('city', ''),
'code': cleaned_record.get('code', ''),
'name': cleaned_record.get('name', ''),
'state': cleaned_record.get('state', ''),
'country': cleaned_record.get('country', ''),
'max_reg': cleaned_record.get('max_reg'),
'end_date': cleaned_record.get('end_date'),
'timezone': cleaned_record.get('timezone', ''),
'folder_id': cleaned_record.get('folder_id'),
'live_date': cleaned_record.get('live_date'),
'close_date': cleaned_record.get('close_date'),
'created_by': cleaned_record.get('created_by', ''),
'price_type': cleaned_record.get('price_type', ''),
'start_date': cleaned_record.get('start_date'),
'description': cleaned_record.get('description', ''),
'modified_by': cleaned_record.get('modified_by', ''),
'contact_info': cleaned_record.get('contact_info', ''),
'created_date': cleaned_record.get('created_date'),
'location_city': cleaned_record.get('location_city', ''),
'location_name': cleaned_record.get('location_name', ''),
'modified_date': cleaned_record.get('modified_date'),
'client_contact': cleaned_record.get('client_contact', ''),
'location_state': cleaned_record.get('location_state', ''),
'default_language': cleaned_record.get('default_language', ''),
'location_country': cleaned_record.get('location_country', ''),
'approval_required': cleaned_record.get('approval_required'),
'location_address1': cleaned_record.get('location_address1', ''),
'location_address2': cleaned_record.get('location_address2', ''),
'location_address3': cleaned_record.get('location_address3', ''),
'location_postcode': cleaned_record.get('location_postcode', ''),
'standard_currency': cleaned_record.get('standard_currency', ''),
}

try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ def capture_envelope(self, envelope):
self.events.append(envelope)


@pytest.mark.django_db
class TestStovaIngestionTasks:

@pytest.mark.django_db
@mock_aws
@override_settings(S3_LOCAL_ENDPOINT_URL=None)
def test_stova_data_file_ingestion(self, caplog, test_file, test_file_path):
Expand All @@ -132,7 +132,6 @@ def test_stova_data_file_ingestion(self, caplog, test_file, test_file_path):
assert StovaEvent.objects.count() == initial_stova_activity_count + 27
assert IngestedObject.objects.count() == initial_ingested_count + 1

@pytest.mark.django_db
@mock_aws
@override_settings(S3_LOCAL_ENDPOINT_URL=None)
def test_skip_previously_ingested_records(self, test_file_path, test_base_stova_event):
Expand All @@ -152,7 +151,6 @@ def test_skip_previously_ingested_records(self, test_file_path, test_base_stova_
stova_event_ingestion_task(test_file_path)
assert StovaEvent.objects.filter(stova_event_id=123456789).count() == 1

@pytest.mark.django_db
@mock_aws
@override_settings(S3_LOCAL_ENDPOINT_URL=None)
def test_invalid_file(self, test_file_path):
Expand All @@ -169,7 +167,6 @@ def test_invalid_file(self, test_file_path):
expected = "key: 'data-flow/exports/ExportAventriEvents/" 'stovaEventFake2.jsonl.gz'
assert expected in exception

@pytest.mark.django_db
def test_stova_event_fields_are_saved(self, test_base_stova_event):
"""
Test that the ingested stova event fields are saved to the StovaEvent model.
Expand All @@ -190,7 +187,6 @@ def test_stova_event_fields_are_saved(self, test_base_stova_event):

assert model_value == file_value

@pytest.mark.django_db
def test_stova_event_fields_with_duplicate_attendee_ids_in_db(
self, caplog, test_base_stova_event,
):
Expand All @@ -208,7 +204,6 @@ def test_stova_event_fields_with_duplicate_attendee_ids_in_db(
f'Record already exists for stova_event_id: {existing_stova_event.stova_event_id}'
) in caplog.text

@pytest.mark.django_db
def test_stova_event_fields_with_duplicate_attendee_ids_in_json(
self, caplog, test_base_stova_event,
):
Expand All @@ -229,7 +224,6 @@ def test_stova_event_fields_with_duplicate_attendee_ids_in_json(
"Stova event id already exists.']" in caplog.text
)

@pytest.mark.django_db
def test_stova_event_ingestion_handles_unexpected_fields(self, caplog, test_base_stova_event):
"""
Test that if they rows from stova contain data in an unexpected data type these are handled
Expand All @@ -253,3 +247,64 @@ def test_stova_event_ingestion_handles_unexpected_fields(self, caplog, test_base
) in caplog.text

assert 'approval_required' in caplog.text

@pytest.mark.parametrize(
'required_field',
(
'id',
'name',
'location_address1',
'location_city',
),
)
def test_stova_event_ingestion_rejects_event_if_missing_required_fields(
self, caplog, test_base_stova_event, required_field,
):
"""
Some fields are required by Data Hub events, if a Stova Event does not provide these fields
the stova event will not be ingested.
"""
s3_processor_mock = mock.Mock()
task = StovaEventIngestionTask('dummy-prefix', s3_processor_mock)

data = test_base_stova_event

# This is required so a Stova Event can be saved as a Data Hub event.
data[required_field] = None

with caplog.at_level(logging.INFO):
task._process_record(data)
assert (
f'Stova Event with id {data.get("id")} does not have required field '
f'{required_field}. This stova event will not be processed into Data Hub.'
) in caplog.text

assert StovaEvent.objects.count() == 0

@pytest.mark.parametrize(
'null_field',
(
'location_address2',
'location_address3',
'location_state',
'location_postcode',
'description',
),
)
def test_stova_event_ingestion_converts_null_fields_to_empty_string(
self, test_base_stova_event, null_field,
):
"""
Some fields are required to be an empty string by Data Hub Events, they do not accept
null values.
"""
s3_processor_mock = mock.Mock()
task = StovaEventIngestionTask('dummy-prefix', s3_processor_mock)

data = test_base_stova_event
# This must be empty string to be saved, test it gets converted from None
data[null_field] = None
task._process_record(data)

assert StovaEvent.objects.count() == 1
assert getattr(StovaEvent.objects.first(), null_field) == ''

0 comments on commit 4c93e3c

Please sign in to comment.