From 0ca25160d05ac37bedd80fb8634e0307a8a47688 Mon Sep 17 00:00:00 2001 From: MohdSiddique Bagwan Date: Wed, 21 Sep 2022 12:45:45 +0530 Subject: [PATCH 1/2] test issue with mock --- .../ingestion/source/tableau_common.py | 1 + .../tableau/tableau_state_mces_golden.json | 10 +-- .../tableau/test_tableau_common.py | 7 ++ .../tableau/test_tableau_ingest.py | 67 ++++++++++++++++++- 4 files changed, 79 insertions(+), 6 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py index 353465eb917fd..bb087ce67020d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py @@ -581,4 +581,5 @@ def query_metadata(server, main_query, connection_name, first, offset, qry_filte filter=qry_filter, main_query=main_query, ) + print(f"Internal {server.metadata.query.__name__}") return server.metadata.query(query) diff --git a/metadata-ingestion/tests/integration/tableau/tableau_state_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_state_mces_golden.json index 1cbe658f5abdc..c8367a4f42d78 100644 --- a/metadata-ingestion/tests/integration/tableau/tableau_state_mces_golden.json +++ b/metadata-ingestion/tests/integration/tableau/tableau_state_mces_golden.json @@ -22236,7 +22236,7 @@ "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { - "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore.xls.people,PROD)\", \"type\": \"TRANSFORMED\"}, {\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore.xls.returns,PROD)\", \"type\": \"TRANSFORMED\"}, {\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore.xls.orders,PROD)\", \"type\": \"TRANSFORMED\"}]}", + "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore%2C %28new%29.xls.people,PROD)\", \"type\": \"TRANSFORMED\"}, {\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore%2C %28new%29.xls.returns,PROD)\", \"type\": \"TRANSFORMED\"}, {\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore%2C %28new%29.xls.orders,PROD)\", \"type\": \"TRANSFORMED\"}]}", "contentType": "application/json" }, "systemMetadata": { @@ -31694,7 +31694,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore.xls.people,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore%2C %28new%29.xls.people,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { @@ -31763,7 +31763,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore.xls.returns,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore%2C %28new%29.xls.returns,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { @@ -31832,7 +31832,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore.xls.orders,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore%2C %28new%29.xls.orders,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { @@ -32205,4 +32205,4 @@ "runId": "tableau-test" } } -] \ No newline at end of file +] diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_common.py b/metadata-ingestion/tests/integration/tableau/test_tableau_common.py index 5fd4c00399e24..a6ac36b4bf727 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_common.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_common.py @@ -21,6 +21,8 @@ def _read_response(file_name): def define_query_metadata_func(workbook_0: str, workbook_all: str): # type: ignore def side_effect_query_metadata(query): + print(f"workbook0 {workbook_0}") + print(f"workbook0 {workbook_all}") if "workbooksConnection (first:0" in query: return _read_response(workbook_0) @@ -83,6 +85,8 @@ def tableau_ingest_common( mock_client = mock.Mock() mocked_metadata = mock.Mock() mocked_metadata.query.side_effect = side_effect_query_metadata_func + mocked_metadata.query.__name__ = side_effect_query_metadata_func.__name__ + print(f"func name {mocked_metadata.query.side_effect.__name__}") mock_client.metadata = mocked_metadata mock_client.auth = mock.Mock() mock_client.views = mock.Mock() @@ -126,6 +130,9 @@ def tableau_ingest_common( pipeline.run() pipeline.raise_from_status() + print(f"outputPath {tmp_path}/{output_file_name}") + print(f"golden {test_resources_dir} / {golden_file_name}") + mce_helpers.check_golden_file( pytestconfig, output_path=f"{tmp_path}/{output_file_name}", diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py index b2667be373fbf..0423f67704173 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py @@ -1,5 +1,9 @@ +import functools +import logging + import pytest import test_tableau_common +from test_tableau_common import _read_response from freezegun import freeze_time from datahub.configuration.source_common import DEFAULT_ENV @@ -14,8 +18,16 @@ @freeze_time(FROZEN_TIME) -@pytest.mark.slow_unit +#@pytest.mark.slow_unit def test_tableau_ingest(pytestconfig, tmp_path): + import gc + gc.collect() + objects = [i for i in gc.get_objects() + if isinstance(i, functools._lru_cache_wrapper)] + + for object in objects: + object.cache_clear() + output_file_name: str = "tableau_mces.json" golden_file_name: str = "tableau_mces_golden.json" side_effect_query_metadata = test_tableau_common.define_query_metadata_func( @@ -30,6 +42,58 @@ def test_tableau_ingest(pytestconfig, tmp_path): ) +def side_effect_query_metadata2(query): + + if "workbooksConnection (first:0" in query: + return _read_response("workbooksConnection_0.json") + + if "workbooksConnection (first:3" in query: + return _read_response("workbooksConnection_state_all.json") + + if "embeddedDatasourcesConnection (first:0" in query: + return _read_response("embeddedDatasourcesConnection_0.json") + + if "embeddedDatasourcesConnection (first:8" in query: + return _read_response("embeddedDatasourcesConnection_all.json") + + if "publishedDatasourcesConnection (first:0" in query: + return _read_response("publishedDatasourcesConnection_0.json") + + if "publishedDatasourcesConnection (first:2" in query: + return _read_response("publishedDatasourcesConnection_all.json") + + if "customSQLTablesConnection (first:0" in query: + return _read_response("customSQLTablesConnection_0.json") + + if "customSQLTablesConnection (first:2" in query: + return _read_response("customSQLTablesConnection_all.json") + + +@freeze_time(FROZEN_TIME) +#@pytest.mark.slow_unit +def test_tableau_usage_stat(pytestconfig, tmp_path): + import gc + gc.collect() + objects = [i for i in gc.get_objects() + if isinstance(i, functools._lru_cache_wrapper)] + + for object in objects: + object.cache_clear() + + print("Mohd objects {}".format(len(objects))) + + output_file_name: str = "tableau_stat_mces.json" + golden_file_name: str = "tableau_state_mces_golden.json" + func = side_effect_query_metadata2 + test_tableau_common.tableau_ingest_common( + pytestconfig, + "/tmp", + func, + golden_file_name, + output_file_name, + ) + + def test_lineage_overrides(): # Simple - specify platform instance to presto table assert ( @@ -76,3 +140,4 @@ def test_lineage_overrides(): ) == "urn:li:dataset:(urn:li:dataPlatform:presto,my_presto_instance.presto_catalog.test-schema.test-table,PROD)" ) + From 1e6125ea8e71ef488f1db3d423706a2b12be1b6f Mon Sep 17 00:00:00 2001 From: MohdSiddique Bagwan Date: Wed, 21 Sep 2022 16:11:57 +0530 Subject: [PATCH 2/2] lint fix --- .../ingestion/source/tableau_common.py | 1 - .../tableau/test_tableau_common.py | 141 ------------- .../tableau/test_tableau_ingest.py | 191 +++++++++++++----- 3 files changed, 135 insertions(+), 198 deletions(-) delete mode 100644 metadata-ingestion/tests/integration/tableau/test_tableau_common.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py index bb087ce67020d..353465eb917fd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py @@ -581,5 +581,4 @@ def query_metadata(server, main_query, connection_name, first, offset, qry_filte filter=qry_filter, main_query=main_query, ) - print(f"Internal {server.metadata.query.__name__}") return server.metadata.query(query) diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_common.py b/metadata-ingestion/tests/integration/tableau/test_tableau_common.py deleted file mode 100644 index a6ac36b4bf727..0000000000000 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_common.py +++ /dev/null @@ -1,141 +0,0 @@ -import json -import pathlib -from unittest import mock - -from tableauserverclient.models import ViewItem - -from datahub.ingestion.run.pipeline import Pipeline -from tests.test_helpers import mce_helpers - -FROZEN_TIME = "2021-12-07 07:00:00" - -test_resources_dir = None - - -def _read_response(file_name): - response_json_path = f"{test_resources_dir}/setup/{file_name}" - with open(response_json_path) as file: - data = json.loads(file.read()) - return data - - -def define_query_metadata_func(workbook_0: str, workbook_all: str): # type: ignore - def side_effect_query_metadata(query): - print(f"workbook0 {workbook_0}") - print(f"workbook0 {workbook_all}") - - if "workbooksConnection (first:0" in query: - return _read_response(workbook_0) - - if "workbooksConnection (first:3" in query: - return _read_response(workbook_all) - - if "embeddedDatasourcesConnection (first:0" in query: - return _read_response("embeddedDatasourcesConnection_0.json") - - if "embeddedDatasourcesConnection (first:8" in query: - return _read_response("embeddedDatasourcesConnection_all.json") - - if "publishedDatasourcesConnection (first:0" in query: - return _read_response("publishedDatasourcesConnection_0.json") - - if "publishedDatasourcesConnection (first:2" in query: - return _read_response("publishedDatasourcesConnection_all.json") - - if "customSQLTablesConnection (first:0" in query: - return _read_response("customSQLTablesConnection_0.json") - - if "customSQLTablesConnection (first:2" in query: - return _read_response("customSQLTablesConnection_all.json") - - return side_effect_query_metadata - - -def side_effect_usage_stat(*arg, **kwargs): - mock_pagination = mock.MagicMock() - mock_pagination.total_available = None - - dashboard_stat: ViewItem = ViewItem() - - # Added as luid of Dashboard in workbooksConnection_state_all.json - dashboard_stat._id = "fc9ea488-f810-4fa8-ac19-aa96018b5d66" - dashboard_stat._total_views = 3 - - # Added as luid of Sheet in workbooksConnection_state_all.json - sheet_stat: ViewItem = ViewItem() - sheet_stat._id = "f0779f9d-6765-47a9-a8f6-c740cfd27783" - sheet_stat._total_views = 5 - - return [dashboard_stat, sheet_stat], mock_pagination - - -def tableau_ingest_common( - pytestconfig, - tmp_path, - side_effect_query_metadata_func, - golden_file_name, - output_file_name, -): - global test_resources_dir - test_resources_dir = pathlib.Path( - pytestconfig.rootpath / "tests/integration/tableau" - ) - - with mock.patch("tableauserverclient.Server") as mock_sdk: - mock_client = mock.Mock() - mocked_metadata = mock.Mock() - mocked_metadata.query.side_effect = side_effect_query_metadata_func - mocked_metadata.query.__name__ = side_effect_query_metadata_func.__name__ - print(f"func name {mocked_metadata.query.side_effect.__name__}") - mock_client.metadata = mocked_metadata - mock_client.auth = mock.Mock() - mock_client.views = mock.Mock() - mock_client.views.get.side_effect = side_effect_usage_stat - mock_client.auth.sign_in.return_value = None - mock_client.auth.sign_out.return_value = None - mock_sdk.return_value = mock_client - mock_sdk._auth_token = "ABC" - - pipeline = Pipeline.create( - { - "run_id": "tableau-test", - "source": { - "type": "tableau", - "config": { - "username": "username", - "password": "pass`", - "connect_uri": "https://do-not-connect", - "site": "acryl", - "projects": ["default", "Project 2"], - "page_size": 10, - "ingest_tags": True, - "ingest_owner": True, - "ingest_tables_external": True, - "default_schema_map": { - "dvdrental": "public", - "someotherdb": "schema", - }, - "platform_instance_map": {"postgres": "demo_postgres_instance"}, - "extract_usage_stats": True, - }, - }, - "sink": { - "type": "file", - "config": { - "filename": f"{tmp_path}/{output_file_name}", - }, - }, - } - ) - pipeline.run() - pipeline.raise_from_status() - - print(f"outputPath {tmp_path}/{output_file_name}") - print(f"golden {test_resources_dir} / {golden_file_name}") - - mce_helpers.check_golden_file( - pytestconfig, - output_path=f"{tmp_path}/{output_file_name}", - golden_path=test_resources_dir / golden_file_name, - ignore_paths=mce_helpers.IGNORE_PATH_TIMESTAMPS, - ) diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py index 0423f67704173..cb01d9cdaff28 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py @@ -1,93 +1,173 @@ -import functools -import logging +import json +import pathlib +from unittest import mock import pytest -import test_tableau_common -from test_tableau_common import _read_response from freezegun import freeze_time +from tableauserverclient.models import ViewItem from datahub.configuration.source_common import DEFAULT_ENV +from datahub.ingestion.run.pipeline import Pipeline from datahub.ingestion.source.tableau_common import ( TableauLineageOverrides, make_table_urn, ) +from tests.test_helpers import mce_helpers FROZEN_TIME = "2021-12-07 07:00:00" test_resources_dir = None -@freeze_time(FROZEN_TIME) -#@pytest.mark.slow_unit -def test_tableau_ingest(pytestconfig, tmp_path): - import gc - gc.collect() - objects = [i for i in gc.get_objects() - if isinstance(i, functools._lru_cache_wrapper)] +def _read_response(file_name): + response_json_path = f"{test_resources_dir}/setup/{file_name}" + with open(response_json_path) as file: + data = json.loads(file.read()) + return data - for object in objects: - object.cache_clear() - output_file_name: str = "tableau_mces.json" - golden_file_name: str = "tableau_mces_golden.json" - side_effect_query_metadata = test_tableau_common.define_query_metadata_func( - "workbooksConnection_0.json", "workbooksConnection_all.json" - ) - test_tableau_common.tableau_ingest_common( - pytestconfig, - tmp_path, - side_effect_query_metadata, - golden_file_name, - output_file_name, - ) +def define_query_metadata_func(workbook_0: str, workbook_all: str): # type: ignore + def side_effect_query_metadata(query): + if "workbooksConnection (first:0" in query: + return _read_response(workbook_0) + if "workbooksConnection (first:3" in query: + return _read_response(workbook_all) -def side_effect_query_metadata2(query): + if "embeddedDatasourcesConnection (first:0" in query: + return _read_response("embeddedDatasourcesConnection_0.json") - if "workbooksConnection (first:0" in query: - return _read_response("workbooksConnection_0.json") + if "embeddedDatasourcesConnection (first:8" in query: + return _read_response("embeddedDatasourcesConnection_all.json") - if "workbooksConnection (first:3" in query: - return _read_response("workbooksConnection_state_all.json") + if "publishedDatasourcesConnection (first:0" in query: + return _read_response("publishedDatasourcesConnection_0.json") - if "embeddedDatasourcesConnection (first:0" in query: - return _read_response("embeddedDatasourcesConnection_0.json") + if "publishedDatasourcesConnection (first:2" in query: + return _read_response("publishedDatasourcesConnection_all.json") - if "embeddedDatasourcesConnection (first:8" in query: - return _read_response("embeddedDatasourcesConnection_all.json") + if "customSQLTablesConnection (first:0" in query: + return _read_response("customSQLTablesConnection_0.json") - if "publishedDatasourcesConnection (first:0" in query: - return _read_response("publishedDatasourcesConnection_0.json") + if "customSQLTablesConnection (first:2" in query: + return _read_response("customSQLTablesConnection_all.json") - if "publishedDatasourcesConnection (first:2" in query: - return _read_response("publishedDatasourcesConnection_all.json") + return side_effect_query_metadata - if "customSQLTablesConnection (first:0" in query: - return _read_response("customSQLTablesConnection_0.json") - if "customSQLTablesConnection (first:2" in query: - return _read_response("customSQLTablesConnection_all.json") +def side_effect_usage_stat(*arg, **kwargs): + mock_pagination = mock.MagicMock() + mock_pagination.total_available = None + dashboard_stat: ViewItem = ViewItem() + + # Added as luid of Dashboard in workbooksConnection_state_all.json + dashboard_stat._id = "fc9ea488-f810-4fa8-ac19-aa96018b5d66" + dashboard_stat._total_views = 3 + + # Added as luid of Sheet in workbooksConnection_state_all.json + sheet_stat: ViewItem = ViewItem() + sheet_stat._id = "f0779f9d-6765-47a9-a8f6-c740cfd27783" + sheet_stat._total_views = 5 + + return [dashboard_stat, sheet_stat], mock_pagination + + +def tableau_ingest_common( + pytestconfig, + tmp_path, + side_effect_query_metadata_func, + golden_file_name, + output_file_name, +): + global test_resources_dir + test_resources_dir = pathlib.Path( + pytestconfig.rootpath / "tests/integration/tableau" + ) + + with mock.patch("datahub.ingestion.source.tableau.Server") as mock_sdk: + mock_client = mock.Mock() + mocked_metadata = mock.Mock() + mocked_metadata.query.side_effect = side_effect_query_metadata_func + mock_client.metadata = mocked_metadata + mock_client.auth = mock.Mock() + mock_client.views = mock.Mock() + mock_client.views.get.side_effect = side_effect_usage_stat + mock_client.auth.sign_in.return_value = None + mock_client.auth.sign_out.return_value = None + mock_sdk.return_value = mock_client + mock_sdk._auth_token = "ABC" + + pipeline = Pipeline.create( + { + "run_id": "tableau-test", + "source": { + "type": "tableau", + "config": { + "username": "username", + "password": "pass`", + "connect_uri": "https://do-not-connect", + "site": "acryl", + "projects": ["default", "Project 2"], + "page_size": 10, + "ingest_tags": True, + "ingest_owner": True, + "ingest_tables_external": True, + "default_schema_map": { + "dvdrental": "public", + "someotherdb": "schema", + }, + "platform_instance_map": {"postgres": "demo_postgres_instance"}, + "extract_usage_stats": True, + }, + }, + "sink": { + "type": "file", + "config": { + "filename": f"{tmp_path}/{output_file_name}", + }, + }, + } + ) + pipeline.run() + pipeline.raise_from_status() + + mce_helpers.check_golden_file( + pytestconfig, + output_path=f"{tmp_path}/{output_file_name}", + golden_path=test_resources_dir / golden_file_name, + ignore_paths=mce_helpers.IGNORE_PATH_TIMESTAMPS, + ) -@freeze_time(FROZEN_TIME) -#@pytest.mark.slow_unit -def test_tableau_usage_stat(pytestconfig, tmp_path): - import gc - gc.collect() - objects = [i for i in gc.get_objects() - if isinstance(i, functools._lru_cache_wrapper)] - for object in objects: - object.cache_clear() +@freeze_time(FROZEN_TIME) +@pytest.mark.slow_unit +def test_tableau_ingest(pytestconfig, tmp_path): + output_file_name: str = "tableau_mces.json" + golden_file_name: str = "tableau_mces_golden.json" + side_effect_query_metadata = define_query_metadata_func( + "workbooksConnection_0.json", "workbooksConnection_all.json" + ) + tableau_ingest_common( + pytestconfig, + tmp_path, + side_effect_query_metadata, + golden_file_name, + output_file_name, + ) - print("Mohd objects {}".format(len(objects))) +@freeze_time(FROZEN_TIME) +@pytest.mark.slow_unit +def test_tableau_usage_stat(pytestconfig, tmp_path): output_file_name: str = "tableau_stat_mces.json" golden_file_name: str = "tableau_state_mces_golden.json" - func = side_effect_query_metadata2 - test_tableau_common.tableau_ingest_common( + func = define_query_metadata_func( + "workbooksConnection_0.json", "workbooksConnection_state_all.json" + ) + tableau_ingest_common( pytestconfig, - "/tmp", + tmp_path, func, golden_file_name, output_file_name, @@ -140,4 +220,3 @@ def test_lineage_overrides(): ) == "urn:li:dataset:(urn:li:dataPlatform:presto,my_presto_instance.presto_catalog.test-schema.test-table,PROD)" ) -