From d08dc672e0793b99b68dfb40a4506595abe4f93c Mon Sep 17 00:00:00 2001 From: Bau Nguyen Date: Fri, 28 Feb 2025 16:11:25 +0000 Subject: [PATCH 1/6] add logging for company merge command --- .../commands/company_merge_duns_number.py | 38 +++++++++++ .../test_company_merge_duns_number.py | 65 ++++++++++++++++++- 2 files changed, 102 insertions(+), 1 deletion(-) diff --git a/datahub/dbmaintenance/management/commands/company_merge_duns_number.py b/datahub/dbmaintenance/management/commands/company_merge_duns_number.py index 8821ef09f..36e43d61a 100644 --- a/datahub/dbmaintenance/management/commands/company_merge_duns_number.py +++ b/datahub/dbmaintenance/management/commands/company_merge_duns_number.py @@ -1,8 +1,12 @@ +from logging import getLogger + from datahub.company.merge_company import merge_companies from datahub.company.models import Company from datahub.dbmaintenance.management.base import CSVBaseCommand from datahub.dbmaintenance.utils import parse_uuid +logger = getLogger(__name__) + class Command(CSVBaseCommand): """ @@ -15,6 +19,16 @@ class Command(CSVBaseCommand): its related models, and find the company with Duns number and merge. """ + additional_logging: dict + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.additional_logging = { + 'companies_with_subsidiaries': [], + 'target_companies_archived': [], + 'source_company_global_headquarters': [], + } + def _process_row(self, row, simulate=False, **options): """Process one single row.""" source_pk = parse_uuid(row['id']) @@ -23,4 +37,28 @@ def _process_row(self, row, simulate=False, **options): source_company = Company.objects.get(pk=source_pk) target_company = Company.objects.get(duns_number=target_duns) + if source_company.subsidiaries.exists(): + self.additional_logging['companies_with_subsidiaries'].append(str(source_company.id)) + if target_company.archived: + self.additional_logging['target_companies_archived'].append(str(target_company.id)) + if source_company.global_headquarters: + self.additional_logging['source_company_global_headquarters'].append( + str(source_company.id)) merge_companies(source_company, target_company, None) + + def handle(self, *args, **options): + """ + Process the CSV file and logs some additional logging to help with companies merging + """ + super().handle(*args, **options) + logger.info( + 'List of Source Companies with Subsidiaries: ' + f'{self.additional_logging["companies_with_subsidiaries"]}') + logger.info( + 'List of Target Companies Archived: ' + f'{self.additional_logging["target_companies_archived"]}') + logger.info( + 'List of Source Compnies with Global Headqaurters: ' + f'{self.additional_logging["source_company_global_headquarters"]}') + + self.additional_logging.clear() diff --git a/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py b/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py index 26811b883..d9090004a 100644 --- a/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py +++ b/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py @@ -5,7 +5,7 @@ from django.core.management import call_command from datahub.company.test.factories import CompanyFactory - +from datahub.company.test.factories import SubsidiaryFactory pytestmark = pytest.mark.django_db @@ -49,3 +49,66 @@ def test_merge_id_duns_number(s3_stubber): company_4.refresh_from_db() assert company_3.transferred_to == company_1 assert company_4.transferred_to == company_2 + + +def test_logs_contain_errors(s3_stubber, caplog): + """Tests errors are captured in the logs""" + caplog.set_level('INFO') + global_company = CompanyFactory() + company_source = CompanyFactory( + global_headquarters=global_company + ) + company_with_duns = CompanyFactory(duns_number='12345678', archived=True) + + bucket = 'test_bucket' + object_key = 'test_key' + csv_content = f"""id,duns +{company_source.id},{company_with_duns.duns_number} +""" + + s3_stubber.add_response( + 'get_object', + { + 'Body': BytesIO(csv_content.encode(encoding='utf-8')), + }, + expected_params={ + 'Bucket': bucket, + 'Key': object_key, + }, + ) + + call_command('company_merge_duns_number', bucket, object_key) + + assert 'List of Target Companies Archived: ' in caplog.text + assert str(company_with_duns.id) in caplog.text + assert 'List of Source Compnies with Global Headqaurters: ' in caplog.text + assert str(company_source.id) + + +def test_subsidiary_logs(s3_stubber, caplog): + """Tests subsidiary errors are captured in the logs""" + caplog.set_level('INFO') + subsidiary_company = SubsidiaryFactory() + company_with_duns = CompanyFactory(duns_number='12345678') + + bucket = 'test_bucket' + object_key = 'test_key' + csv_content = f"""id,duns +{subsidiary_company.id},{company_with_duns.duns_number} +""" + + s3_stubber.add_response( + 'get_object', + { + 'Body': BytesIO(csv_content.encode(encoding='utf-8')), + }, + expected_params={ + 'Bucket': bucket, + 'Key': object_key, + }, + ) + + call_command('company_merge_duns_number', bucket, object_key) + + assert 'List of Source Companies with Subsidiaries: ' in caplog.text + assert str(subsidiary_company.id) in caplog.text From f65a8d11006dce8cc9cb487a4a72e276e6fa6baf Mon Sep 17 00:00:00 2001 From: Bau Nguyen Date: Fri, 28 Feb 2025 16:15:01 +0000 Subject: [PATCH 2/6] Update test_company_merge_duns_number.py --- .../test/commands/test_company_merge_duns_number.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py b/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py index d9090004a..604866e32 100644 --- a/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py +++ b/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py @@ -56,7 +56,7 @@ def test_logs_contain_errors(s3_stubber, caplog): caplog.set_level('INFO') global_company = CompanyFactory() company_source = CompanyFactory( - global_headquarters=global_company + global_headquarters=global_company, ) company_with_duns = CompanyFactory(duns_number='12345678', archived=True) From d2db5a8d62c95c7425121102577714303a49bd17 Mon Sep 17 00:00:00 2001 From: Bau Nguyen Date: Mon, 3 Mar 2025 09:55:15 +0000 Subject: [PATCH 3/6] Add test for codecov --- .../test_company_merge_duns_number.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py b/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py index 604866e32..83918a34a 100644 --- a/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py +++ b/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py @@ -112,3 +112,31 @@ def test_subsidiary_logs(s3_stubber, caplog): assert 'List of Source Companies with Subsidiaries: ' in caplog.text assert str(subsidiary_company.id) in caplog.text + + +def test_non_subsidiary_logs(s3_stubber, caplog): + """Tests subsidiary list in log is empty""" + caplog.set_level('INFO') + non_subsidiary_company = CompanyFactory() + company_with_duns = CompanyFactory(duns_number='12345678') + + bucket = 'test_bucket' + object_key = 'test_key' + csv_content = f"""id,duns +{non_subsidiary_company.id},{company_with_duns.duns_number} +""" + + s3_stubber.add_response( + 'get_object', + { + 'Body': BytesIO(csv_content.encode(encoding='utf-8')), + }, + expected_params={ + 'Bucket': bucket, + 'Key': object_key, + }, + ) + + call_command('company_merge_duns_number', bucket, object_key) + + assert 'List of Source Companies with Subsidiaries: []' in caplog.text From 0777d0f0e5f4be30b6d906f195e3fa965404e7a1 Mon Sep 17 00:00:00 2001 From: Bau Nguyen Date: Mon, 3 Mar 2025 10:20:40 +0000 Subject: [PATCH 4/6] Update test_company_merge_duns_number.py --- .../test/commands/test_company_merge_duns_number.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py b/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py index 83918a34a..c026738fb 100644 --- a/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py +++ b/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py @@ -110,8 +110,8 @@ def test_subsidiary_logs(s3_stubber, caplog): call_command('company_merge_duns_number', bucket, object_key) - assert 'List of Source Companies with Subsidiaries: ' in caplog.text - assert str(subsidiary_company.id) in caplog.text + assert 'List of Source Companies with Subsidiaries: ' + f"{str(subsidiary_company.id)}" in caplog.text def test_non_subsidiary_logs(s3_stubber, caplog): From b2705b8e8b694bc38415dccada30872a1ed264ce Mon Sep 17 00:00:00 2001 From: Bau Nguyen Date: Mon, 3 Mar 2025 11:50:07 +0000 Subject: [PATCH 5/6] codecov test --- .../management/commands/company_merge_duns_number.py | 2 +- .../test/commands/test_company_merge_duns_number.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/datahub/dbmaintenance/management/commands/company_merge_duns_number.py b/datahub/dbmaintenance/management/commands/company_merge_duns_number.py index 36e43d61a..d1a633e55 100644 --- a/datahub/dbmaintenance/management/commands/company_merge_duns_number.py +++ b/datahub/dbmaintenance/management/commands/company_merge_duns_number.py @@ -37,7 +37,7 @@ def _process_row(self, row, simulate=False, **options): source_company = Company.objects.get(pk=source_pk) target_company = Company.objects.get(duns_number=target_duns) - if source_company.subsidiaries.exists(): + if source_company.subsidiaries.all().exists(): self.additional_logging['companies_with_subsidiaries'].append(str(source_company.id)) if target_company.archived: self.additional_logging['target_companies_archived'].append(str(target_company.id)) diff --git a/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py b/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py index c026738fb..689dbf9e6 100644 --- a/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py +++ b/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py @@ -88,13 +88,13 @@ def test_logs_contain_errors(s3_stubber, caplog): def test_subsidiary_logs(s3_stubber, caplog): """Tests subsidiary errors are captured in the logs""" caplog.set_level('INFO') - subsidiary_company = SubsidiaryFactory() + company = CompanyFactory() + SubsidiaryFactory(global_headquarters=company) company_with_duns = CompanyFactory(duns_number='12345678') - bucket = 'test_bucket' object_key = 'test_key' csv_content = f"""id,duns -{subsidiary_company.id},{company_with_duns.duns_number} +{company.id},{company_with_duns.duns_number} """ s3_stubber.add_response( @@ -110,8 +110,8 @@ def test_subsidiary_logs(s3_stubber, caplog): call_command('company_merge_duns_number', bucket, object_key) - assert 'List of Source Companies with Subsidiaries: ' - f"{str(subsidiary_company.id)}" in caplog.text + assert 'List of Source Companies with Subsidiaries: ' in caplog.text + assert f"{str(company.id)}" in caplog.text def test_non_subsidiary_logs(s3_stubber, caplog): From 09d2c65b8f7d23405e4710096e02238467dd67aa Mon Sep 17 00:00:00 2001 From: Bau Nguyen Date: Mon, 3 Mar 2025 11:56:04 +0000 Subject: [PATCH 6/6] Update test_company_merge_duns_number.py --- .../test/commands/test_company_merge_duns_number.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py b/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py index 689dbf9e6..4b9fa67f8 100644 --- a/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py +++ b/datahub/dbmaintenance/test/commands/test_company_merge_duns_number.py @@ -111,7 +111,7 @@ def test_subsidiary_logs(s3_stubber, caplog): call_command('company_merge_duns_number', bucket, object_key) assert 'List of Source Companies with Subsidiaries: ' in caplog.text - assert f"{str(company.id)}" in caplog.text + assert f'{str(company.id)}' in caplog.text def test_non_subsidiary_logs(s3_stubber, caplog):