From d7df6e902541a5fae8e90c65634ba1cad42ad827 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 08:48:41 +1000
Subject: [PATCH 01/88] Spelling errors in code

---
 metadata-ingestion-modules/airflow-plugin/setup.py     |  2 +-
 .../src/datahub_airflow_plugin/datahub_plugin.py       |  2 +-
 .../examples/bootstrap_data/business_glossary.yml      |  2 +-
 .../examples/library/data_quality_mcpw_rest.py         |  2 +-
 .../src/datahub/api/entities/datajob/dataflow.py       |  2 +-
 .../src/datahub/api/entities/datajob/datajob.py        |  4 ++--
 .../api/entities/dataprocess/dataprocess_instance.py   | 10 +++++-----
 metadata-ingestion/src/datahub/cli/cli_utils.py        |  2 +-
 metadata-ingestion/src/datahub/cli/ingest_cli.py       |  2 +-
 9 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py
index 8ca4cad470f67e..85b0a4553d7a52 100644
--- a/metadata-ingestion-modules/airflow-plugin/setup.py
+++ b/metadata-ingestion-modules/airflow-plugin/setup.py
@@ -21,7 +21,7 @@ def get_long_description():
 
 
 base_requirements = {
-    # Compatability.
+    # Compatibility.
     "dataclasses>=0.6; python_version < '3.7'",
     "typing_extensions>=3.10.0.2",
     "mypy_extensions>=0.4.3",
diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py
index c893ff61cb9ec3..6d8bb56791b3e2 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py
@@ -215,7 +215,7 @@ def datahub_on_success_callback(context, *args, **kwargs):
     for inlet in inlets:
         datajob.inlets.append(inlet.urn)
 
-    # We have to use _oulets because outlets is empty
+    # We have to use _outlets because outlets is empty
     for outlet in task._outlets:
         datajob.outlets.append(outlet.urn)
 
diff --git a/metadata-ingestion/examples/bootstrap_data/business_glossary.yml b/metadata-ingestion/examples/bootstrap_data/business_glossary.yml
index 6669d393b7211d..71fd59bbccc462 100644
--- a/metadata-ingestion/examples/bootstrap_data/business_glossary.yml
+++ b/metadata-ingestion/examples/bootstrap_data/business_glossary.yml
@@ -40,7 +40,7 @@ nodes:
         inherits:
           - Classification.Sensitive
   - name: ClientsAndAccounts
-    description: Provides basic concepts such as account, account holder, account provider, relationship manager that are commonly used by financial services providers to describe customers and to determine counterparty identities
+    description: Provides basic concepts such as account, account holder, account provider, relationship manager that are commonly used by financial services providers to describe customers and to determine counterparts identities
     owners:
       groups:
         - finance
diff --git a/metadata-ingestion/examples/library/data_quality_mcpw_rest.py b/metadata-ingestion/examples/library/data_quality_mcpw_rest.py
index 7672d634f58468..077ca550e880eb 100644
--- a/metadata-ingestion/examples/library/data_quality_mcpw_rest.py
+++ b/metadata-ingestion/examples/library/data_quality_mcpw_rest.py
@@ -47,7 +47,7 @@ def emitAssertionResult(assertionResult: AssertionRunEvent) -> None:
         aspect=assertionResult,
     )
 
-    # Emit BatchAssertion Result! (timseries aspect)
+    # Emit BatchAssertion Result! (timeseries aspect)
     emitter.emit_mcp(dataset_assertionRunEvent_mcp)
 
 
diff --git a/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py b/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py
index c0378d554d5d31..588e66f19a0ef1 100644
--- a/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py
+++ b/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py
@@ -142,7 +142,7 @@ def emit(
         """
         Emit the DataFlow entity to Datahub
 
-        :param emitter: Datahub Emitter to emit the proccess event
+        :param emitter: Datahub Emitter to emit the process event
         :param callback: (Optional[Callable[[Exception, str], None]]) the callback method for KafkaEmitter if it is used
         """
         for mcp in self.generate_mcp():
diff --git a/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py b/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py
index 329eca7d9cd44b..ec86ad80226312 100644
--- a/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py
+++ b/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py
@@ -52,7 +52,7 @@ class DataJob:
         properties Dict[str, str]: Custom properties to set for the DataProcessInstance
         url (Optional[str]): Url which points to the DataJob at the orchestrator
         inlets (List[str]): List of urns the DataProcessInstance consumes
-        outlest (List[str]): List of urns the DataProcessInstance produces
+        outlets (List[str]): List of urns the DataProcessInstance produces
         input_datajob_urns: List[DataJobUrn] = field(default_factory=list)
     """
 
@@ -179,7 +179,7 @@ def emit(
         """
         Emit the DataJob entity to Datahub
 
-        :param emitter: Datahub Emitter to emit the proccess event
+        :param emitter: Datahub Emitter to emit the process event
         :param callback: (Optional[Callable[[Exception, str], None]]) the callback method for KafkaEmitter if it is used
         :rtype: None
         """
diff --git a/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py b/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py
index 859e5700a51c3b..9b107d701ab02d 100644
--- a/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py
+++ b/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py
@@ -55,7 +55,7 @@ class DataProcessInstance:
         template_urn (Optional[Union[DataJobUrn, DataFlowUrn]]): The parent DataJob or DataFlow which was instantiated if applicable
         parent_instance (Optional[DataProcessInstanceUrn]): The parent execution's urn if applicable
         properties Dict[str, str]: Custom properties to set for the DataProcessInstance
-        url (Optional[str]): Url which points to the exection at the orchestrator
+        url (Optional[str]): Url which points to the execution at the orchestrator
         inlets (List[str]): List of entities the DataProcessInstance consumes
         outlets (List[str]): List of entities the DataProcessInstance produces
     """
@@ -118,10 +118,10 @@ def emit_process_start(
         """
 
         :rtype: None
-        :param emitter: Datahub Emitter to emit the proccess event
+        :param emitter: Datahub Emitter to emit the process event
         :param start_timestamp_millis: (int) the execution start time in milliseconds
         :param attempt: the number of attempt of the execution with the same execution id
-        :param emit_template: (bool) If it is set the template of the execution (datajob, datflow) will be emitted as well.
+        :param emit_template: (bool) If it is set the template of the execution (datajob, dataflow) will be emitted as well.
         :param callback: (Optional[Callable[[Exception, str], None]]) the callback method for KafkaEmitter if it is used
         """
         if emit_template and self.template_urn is not None:
@@ -312,8 +312,8 @@ def from_datajob(
 
         :param datajob: (DataJob) the datajob from generate the DataProcessInstance
         :param id: (str) the id for the DataProcessInstance
-        :param clone_inlets: (bool) wheather to clone datajob's inlets
-        :param clone_outlets: (bool) wheather to clone datajob's outlets
+        :param clone_inlets: (bool) whether to clone datajob's inlets
+        :param clone_outlets: (bool) whether to clone datajob's outlets
         :return: DataProcessInstance
         """
         dpi: DataProcessInstance = DataProcessInstance(
diff --git a/metadata-ingestion/src/datahub/cli/cli_utils.py b/metadata-ingestion/src/datahub/cli/cli_utils.py
index ffd6da2b1c3399..81dbae79d7354f 100644
--- a/metadata-ingestion/src/datahub/cli/cli_utils.py
+++ b/metadata-ingestion/src/datahub/cli/cli_utils.py
@@ -130,7 +130,7 @@ def get_details_from_config():
             gms_token = gms_config.token
             return gms_host, gms_token
         except yaml.YAMLError as exc:
-            click.secho(f"{DATAHUB_CONFIG_PATH} malformatted, error: {exc}", bold=True)
+            click.secho(f"{DATAHUB_CONFIG_PATH} malformed, error: {exc}", bold=True)
     return None, None
 
 
diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py
index 55d3a0cdc04760..264a7bf0fed723 100644
--- a/metadata-ingestion/src/datahub/cli/ingest_cli.py
+++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py
@@ -79,7 +79,7 @@ def ingest() -> None:
     type=bool,
     is_flag=True,
     default=False,
-    help="Supress display of variable values in logs by supressing elaborae stacktrace (stackprinter) during ingestion failures",
+    help="Suppress display of variable values in logs by suppressing elaborate stacktrace (stackprinter) during ingestion failures",
 )
 @click.pass_context
 @upgrade.check_upgrade

From bb26c5c9a3c343c5b788f047b347c3837176ee60 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 08:48:50 +1000
Subject: [PATCH 02/88] Use pathlib

---
 metadata-ingestion-modules/airflow-plugin/setup.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py
index 85b0a4553d7a52..6d26cf4498bdf9 100644
--- a/metadata-ingestion-modules/airflow-plugin/setup.py
+++ b/metadata-ingestion-modules/airflow-plugin/setup.py
@@ -1,4 +1,5 @@
 import os
+import pathlib
 import sys
 from typing import Dict, Set
 
@@ -14,10 +15,7 @@
 
 def get_long_description():
     root = os.path.dirname(__file__)
-    with open(os.path.join(root, "README.md")) as f:
-        description = f.read()
-
-    return description
+    return pathlib.Path(os.path.join(root, "README.md")).read_text()
 
 
 base_requirements = {

From 0c72919d7670f54b90ec78a63ebaf7f306ab2c69 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 08:48:58 +1000
Subject: [PATCH 03/88] Spelling

---
 metadata-ingestion/src/datahub/ingestion/api/decorators.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/api/decorators.py b/metadata-ingestion/src/datahub/ingestion/api/decorators.py
index 9b3f35ae9d8116..eafbe14106fd23 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/decorators.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/decorators.py
@@ -56,7 +56,7 @@ class SupportStatus(Enum):
     """
     INCUBATING = auto()
     """
-    Incubating Sources are ready for DataHub Community adoption but have not been tested for a wide variety of edge-cases. We eagerly solicit feedback from the Community to streghten the connector; minor version changes may arise in future releases.
+    Incubating Sources are ready for DataHub Community adoption but have not been tested for a wide variety of edge-cases. We eagerly solicit feedback from the Community to strengthen the connector; minor version changes may arise in future releases.
     """
     TESTING = auto()
     """

From 7eb7aa7e41a0062f4c422d638d10339c243d6e04 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 08:49:10 +1000
Subject: [PATCH 04/88] Found bug in code

---
 metadata-ingestion/src/datahub/emitter/mcp_builder.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py
index 7aed2e29137492..868916fda2c810 100644
--- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py
+++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py
@@ -233,6 +233,7 @@ def gen_containers(
 
 
 def add_dataset_to_container(
+    # FIXME: Union requires two or more type arguments
     container_key: KeyType, dataset_urn: str
 ) -> Iterable[Union[MetadataWorkUnit]]:
     container_urn = make_container_urn(

From fb069eee4ff55f82358693eee5c636e0e24a4962 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 08:49:28 +1000
Subject: [PATCH 05/88] Spelling and declare as TODO

---
 metadata-ingestion/src/datahub/configuration/kafka.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/configuration/kafka.py b/metadata-ingestion/src/datahub/configuration/kafka.py
index 876db21086e88e..e752285cdde2af 100644
--- a/metadata-ingestion/src/datahub/configuration/kafka.py
+++ b/metadata-ingestion/src/datahub/configuration/kafka.py
@@ -27,7 +27,7 @@ def bootstrap_host_colon_port_comma(cls, val: str) -> str:
             else:
                 host = entry
             assert re.match(
-                # This regex is quite loose. Many invalid hostnames or IPs will slip through,
+                # TODO: This regex is quite loose. Many invalid hostname's or IPs will slip through,
                 # but it serves as a good first line of validation. We defer to Kafka for the
                 # remaining validation.
                 r"^[\w\-\.\:]+$",

From d4c36a096c60006831ca8dfe5a9d0a100d10511d Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 08:51:53 +1000
Subject: [PATCH 06/88] Test assertions to pass

---
 .../airflow-plugin/tests/integration/integration_test_dummy.py  | 2 +-
 .../airflow-plugin/tests/unit/test_dummy.py                     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/integration_test_dummy.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/integration_test_dummy.py
index f4f53619168f89..10cf3ad0a608ae 100644
--- a/metadata-ingestion-modules/airflow-plugin/tests/integration/integration_test_dummy.py
+++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/integration_test_dummy.py
@@ -1,2 +1,2 @@
 def test_dummy():
-    assert True
+    pass
diff --git a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_dummy.py b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_dummy.py
index f4f53619168f89..10cf3ad0a608ae 100644
--- a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_dummy.py
+++ b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_dummy.py
@@ -1,2 +1,2 @@
 def test_dummy():
-    assert True
+    pass

From 97617d3c44c982bcde6b1d12c7726445c63a20b4 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 08:52:02 +1000
Subject: [PATCH 07/88] use contextlib

---
 .../src/datahub_airflow_plugin/datahub_plugin.py            | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py
index 6d8bb56791b3e2..f7b384c1d32390 100644
--- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py
+++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py
@@ -1,3 +1,4 @@
+import contextlib
 import traceback
 from typing import Any, Iterable
 
@@ -389,13 +390,10 @@ def _patch_policy(settings):
 
 
 def _patch_datahub_policy():
-    try:
+    with contextlib.suppress(ImportError):
         import airflow_local_settings
 
         _patch_policy(airflow_local_settings)
-    except ImportError:
-        pass
-
     from airflow.models.dagbag import settings
 
     _patch_policy(settings)

From 2774b73d4dcf54331eb9c9335dd7fbfe90e8b19f Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 08:52:20 +1000
Subject: [PATCH 08/88] dataset add col if/else simplification

---
 .../library/dataset_add_column_tag.py         | 19 +++++++++----------
 .../library/dataset_add_column_term.py        | 19 +++++++++----------
 2 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/metadata-ingestion/examples/library/dataset_add_column_tag.py b/metadata-ingestion/examples/library/dataset_add_column_tag.py
index f5243ce28a5f01..a457d12f493ae0 100644
--- a/metadata-ingestion/examples/library/dataset_add_column_tag.py
+++ b/metadata-ingestion/examples/library/dataset_add_column_tag.py
@@ -23,18 +23,17 @@
 
 def get_simple_field_path_from_v2_field_path(field_path: str) -> str:
     """A helper function to extract simple . path notation from the v2 field path"""
-    if field_path.startswith("[version=2.0]"):
-        # this is a v2 field path
-        tokens = [
-            t
-            for t in field_path.split(".")
-            if not (t.startswith("[") or t.endswith("]"))
-        ]
-        path = ".".join(tokens)
-        return path
-    else:
+    if not field_path.startswith("[version=2.0]"):
         # not a v2, we assume this is a simple path
         return field_path
+        # this is a v2 field path
+    tokens = [
+        t
+        for t in field_path.split(".")
+        if not (t.startswith("[") or t.endswith("]"))
+    ]
+
+    return ".".join(tokens)
 
 
 # Inputs -> the column, dataset and the tag to set
diff --git a/metadata-ingestion/examples/library/dataset_add_column_term.py b/metadata-ingestion/examples/library/dataset_add_column_term.py
index ff1cad48a9f0c0..ea5cd1f632f743 100644
--- a/metadata-ingestion/examples/library/dataset_add_column_term.py
+++ b/metadata-ingestion/examples/library/dataset_add_column_term.py
@@ -23,18 +23,17 @@
 
 def get_simple_field_path_from_v2_field_path(field_path: str) -> str:
     """A helper function to extract simple . path notation from the v2 field path"""
-    if field_path.startswith("[version=2.0]"):
-        # this is a v2 field path
-        tokens = [
-            t
-            for t in field_path.split(".")
-            if not (t.startswith("[") or t.endswith("]"))
-        ]
-        path = ".".join(tokens)
-        return path
-    else:
+    if not field_path.startswith("[version=2.0]"):
         # not a v2, we assume this is a simple path
         return field_path
+        # this is a v2 field path
+    tokens = [
+        t
+        for t in field_path.split(".")
+        if not (t.startswith("[") or t.endswith("]"))
+    ]
+    
+    return ".".join(tokens)
 
 
 # Inputs -> the column, dataset and the term to set

From 387b9d3eda977bdd653bb7b36fbdb2547b716464 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 08:52:40 +1000
Subject: [PATCH 09/88] lineage emitter UTC timezone

---
 .../lineage_job_dataflow_new_api_simple.py    | 40 +++++++++----------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_simple.py b/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_simple.py
index 7212282156d8b9..d339d35110db1d 100644
--- a/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_simple.py
+++ b/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_simple.py
@@ -8,6 +8,7 @@
 )
 from datahub.emitter.rest_emitter import DatahubRestEmitter
 
+from datetime import timezone
 emitter = DatahubRestEmitter("http://localhost:8080")
 
 jobFlow = DataFlow(cluster="prod", orchestrator="airflow", id="flow_api_simple")
@@ -36,40 +37,39 @@
 jobFlowRun = DataProcessInstance.from_dataflow(
     dataflow=jobFlow, id=f"{jobFlow.id}-{uuid.uuid4()}"
 )
-jobFlowRun.emit_process_start(emitter, int(datetime.utcnow().timestamp() * 1000))
+jobFlowRun.emit_process_start(emitter, int(datetime.now(timezone.utc).timestamp() * 1000))
+
 
 jobRun = DataProcessInstance.from_datajob(
     datajob=dataJob, id=f"{jobFlow.id}-{uuid.uuid4()}"
 )
-jobRun.emit_process_start(emitter, int(datetime.utcnow().timestamp() * 1000))
-jobRun.emit_process_end(
-    emitter, int(datetime.utcnow().timestamp() * 1000), result=InstanceRunResult.SUCCESS
-)
+jobRun.emit_process_start(emitter, int(datetime.now(timezone.utc).timestamp() * 1000))
+
+jobRun.emit_process_end(emitter, int(datetime.now(timezone.utc).timestamp() * 1000), result=InstanceRunResult.SUCCESS)
+
 
 job2Run = DataProcessInstance.from_datajob(
     datajob=dataJob2, id=f"{jobFlow.id}-{uuid.uuid4()}"
 )
-job2Run.emit_process_start(emitter, int(datetime.utcnow().timestamp() * 1000))
-job2Run.emit_process_end(
-    emitter, int(datetime.utcnow().timestamp() * 1000), result=InstanceRunResult.SUCCESS
-)
+job2Run.emit_process_start(emitter, int(datetime.now(timezone.utc).timestamp() * 1000))
+
+job2Run.emit_process_end(emitter, int(datetime.now(timezone.utc).timestamp() * 1000), result=InstanceRunResult.SUCCESS)
+
 
 job3Run = DataProcessInstance.from_datajob(
     datajob=dataJob3, id=f"{jobFlow.id}-{uuid.uuid4()}"
 )
-job3Run.emit_process_start(emitter, int(datetime.utcnow().timestamp() * 1000))
-job3Run.emit_process_end(
-    emitter, int(datetime.utcnow().timestamp() * 1000), result=InstanceRunResult.SUCCESS
-)
+job3Run.emit_process_start(emitter, int(datetime.now(timezone.utc).timestamp() * 1000))
+
+job3Run.emit_process_end(emitter, int(datetime.now(timezone.utc).timestamp() * 1000), result=InstanceRunResult.SUCCESS)
+
 
 job4Run = DataProcessInstance.from_datajob(
     datajob=dataJob4, id=f"{jobFlow.id}-{uuid.uuid4()}"
 )
-job4Run.emit_process_start(emitter, int(datetime.utcnow().timestamp() * 1000))
-job4Run.emit_process_end(
-    emitter, int(datetime.utcnow().timestamp() * 1000), result=InstanceRunResult.SUCCESS
-)
+job4Run.emit_process_start(emitter, int(datetime.now(timezone.utc).timestamp() * 1000))
 
-jobFlowRun.emit_process_end(
-    emitter, int(datetime.utcnow().timestamp() * 1000), result=InstanceRunResult.SUCCESS
-)
+job4Run.emit_process_end(emitter, int(datetime.now(timezone.utc).timestamp() * 1000), result=InstanceRunResult.SUCCESS)
+
+
+jobFlowRun.emit_process_end(emitter, int(datetime.now(timezone.utc).timestamp() * 1000), result=InstanceRunResult.SUCCESS)

From de786c332d547a3ef98ef1ddadae1f4dbf61cb3e Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 08:52:43 +1000
Subject: [PATCH 10/88] Update lineage_emitter_mcpw_rest.py

---
 .../examples/library/lineage_emitter_mcpw_rest.py             | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/metadata-ingestion/examples/library/lineage_emitter_mcpw_rest.py b/metadata-ingestion/examples/library/lineage_emitter_mcpw_rest.py
index 11f73d36cb29d9..d1c934cba40409 100644
--- a/metadata-ingestion/examples/library/lineage_emitter_mcpw_rest.py
+++ b/metadata-ingestion/examples/library/lineage_emitter_mcpw_rest.py
@@ -10,13 +10,11 @@
 )
 from datahub.metadata.schema_classes import ChangeTypeClass
 
-# Construct upstream tables.
-upstream_tables: List[UpstreamClass] = []
 upstream_table_1 = UpstreamClass(
     dataset=builder.make_dataset_urn("bigquery", "upstream_table_1", "PROD"),
     type=DatasetLineageTypeClass.TRANSFORMED,
 )
-upstream_tables.append(upstream_table_1)
+upstream_tables: List[UpstreamClass] = [upstream_table_1]
 upstream_table_2 = UpstreamClass(
     dataset=builder.make_dataset_urn("bigquery", "upstream_table_2", "PROD"),
     type=DatasetLineageTypeClass.TRANSFORMED,

From d95e8baac5525886a6ed870c4303a906595263e0 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 08:52:52 +1000
Subject: [PATCH 11/88] Update custom_transform_example.py

---
 .../examples/transforms/custom_transform_example.py      | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/metadata-ingestion/examples/transforms/custom_transform_example.py b/metadata-ingestion/examples/transforms/custom_transform_example.py
index 85663d971092b5..4a3d16d4a4dd94 100644
--- a/metadata-ingestion/examples/transforms/custom_transform_example.py
+++ b/metadata-ingestion/examples/transforms/custom_transform_example.py
@@ -61,13 +61,8 @@ def transform_aspect(  # type: ignore
         assert aspect is None or isinstance(aspect, OwnershipClass)
 
         if owners_to_add:
-            ownership = (
-                aspect
-                if aspect
-                else OwnershipClass(
-                    owners=[],
-                )
-            )
+            ownership = aspect or OwnershipClass(owners=[],)
+
             ownership.owners.extend(owners_to_add)
 
         return ownership

From 20f770ebed5566fbdadbde4bc664b39eb280f958 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 08:53:22 +1000
Subject: [PATCH 12/88] path read over open

---
 metadata-ingestion/scripts/avro_codegen.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/metadata-ingestion/scripts/avro_codegen.py b/metadata-ingestion/scripts/avro_codegen.py
index 05d8fb1c5804c6..e5758d159bdee5 100644
--- a/metadata-ingestion/scripts/avro_codegen.py
+++ b/metadata-ingestion/scripts/avro_codegen.py
@@ -10,11 +10,8 @@
 
 
 def load_schema_file(schema_file: str) -> str:
-    with open(schema_file) as f:
-        raw_schema_text = f.read()
-
-    redo_spaces = json.dumps(json.loads(raw_schema_text), indent=2)
-    return redo_spaces
+    raw_schema_text = Path(schema_file).read_text()
+    return json.dumps(json.loads(raw_schema_text), indent=2)
 
 
 def merge_schemas(schemas: List[str]) -> str:

From 2b1b6b9d378a28bb7add2a6838e18d553a95839d Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 08:53:52 +1000
Subject: [PATCH 13/88] spelling

---
 metadata-ingestion/src/datahub/cli/cli_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/cli/cli_utils.py b/metadata-ingestion/src/datahub/cli/cli_utils.py
index 81dbae79d7354f..0e0c1c9c430adb 100644
--- a/metadata-ingestion/src/datahub/cli/cli_utils.py
+++ b/metadata-ingestion/src/datahub/cli/cli_utils.py
@@ -200,7 +200,7 @@ def test_connection():
 
 
 def test_connectivity_complain_exit(operation_name: str) -> None:
-    """Test connectivty to metadata-service, log operation name and exit"""
+    """Test connectivity to metadata-service, log operation name and exit"""
     # First test connectivity
     try:
         test_connection()

From 5562bfe680598ee012674ae862941d01d25db0b0 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 08:54:05 +1000
Subject: [PATCH 14/88] escape strings

---
 metadata-ingestion/src/datahub/cli/cli_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/metadata-ingestion/src/datahub/cli/cli_utils.py b/metadata-ingestion/src/datahub/cli/cli_utils.py
index 0e0c1c9c430adb..817ea500e7c093 100644
--- a/metadata-ingestion/src/datahub/cli/cli_utils.py
+++ b/metadata-ingestion/src/datahub/cli/cli_utils.py
@@ -194,7 +194,7 @@ def get_session_and_host():
 
 def test_connection():
     (session, host) = get_session_and_host()
-    url = host + "/config"
+    url = f"{host}/config"
     response = session.get(url)
     response.raise_for_status()
 
@@ -556,7 +556,7 @@ def get_entity(
     endpoint: str = f"/entitiesV2/{encoded_urn}"
 
     if aspect and len(aspect):
-        endpoint = endpoint + "?aspects=List(" + ",".join(aspect) + ")"
+        endpoint = f"{endpoint}?aspects=List(" + ",".join(aspect) + ")"
 
     response = session.get(gms_host + endpoint)
     response.raise_for_status()

From 2952cefb369364b6f22131117c24422b4202982d Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 16:32:39 +1000
Subject: [PATCH 15/88] Update cli_utils.py

---
 .../src/datahub/cli/cli_utils.py              | 48 ++++---------------
 1 file changed, 10 insertions(+), 38 deletions(-)

diff --git a/metadata-ingestion/src/datahub/cli/cli_utils.py b/metadata-ingestion/src/datahub/cli/cli_utils.py
index 817ea500e7c093..84ee3aa8513a31 100644
--- a/metadata-ingestion/src/datahub/cli/cli_utils.py
+++ b/metadata-ingestion/src/datahub/cli/cli_utils.py
@@ -297,10 +297,7 @@ def post_delete_references_endpoint(
     path: str,
     cached_session_host: Optional[Tuple[Session, str]] = None,
 ) -> Tuple[int, List[Dict]]:
-    if not cached_session_host:
-        session, gms_host = get_session_and_host()
-    else:
-        session, gms_host = cached_session_host
+    session, gms_host = cached_session_host or get_session_and_host()
     url = gms_host + path
 
     payload = json.dumps(payload_obj)
@@ -316,10 +313,7 @@ def post_delete_endpoint(
     path: str,
     cached_session_host: Optional[Tuple[Session, str]] = None,
 ) -> typing.Tuple[str, int]:
-    if not cached_session_host:
-        session, gms_host = get_session_and_host()
-    else:
-        session, gms_host = cached_session_host
+    session, gms_host = cached_session_host or get_session_and_host()
     url = gms_host + path
 
     return post_delete_endpoint_with_session_and_url(session, url, payload_obj)
@@ -369,9 +363,7 @@ def get_urns_by_filter(
                 "condition": "EQUAL",
             }
         )
-    if platform is not None and (
-        entity_type_lower == "chart" or entity_type_lower == "dashboard"
-    ):
+    if platform is not None and entity_type_lower in {"chart", "dashboard"}:
         filter_criteria.append(
             {
                 "field": "tool",
@@ -479,10 +471,7 @@ def batch_get_ids(
     session, gms_host = get_session_and_host()
     endpoint: str = "/entitiesV2"
     url = gms_host + endpoint
-    ids_to_get = []
-    for id in ids:
-        ids_to_get.append(Urn.url_encode(id))
-
+    ids_to_get = [Urn.url_encode(id) for id in ids]
     response = session.get(
         f"{url}?ids=List({','.join(ids_to_get)})",
     )
@@ -539,11 +528,7 @@ def get_entity(
     aspect: Optional[List] = None,
     cached_session_host: Optional[Tuple[Session, str]] = None,
 ) -> Dict:
-    if not cached_session_host:
-        session, gms_host = get_session_and_host()
-    else:
-        session, gms_host = cached_session_host
-
+    session, gms_host = cached_session_host or get_session_and_host()
     if urn.startswith("urn%3A"):
         # we assume the urn is already encoded
         encoded_urn: str = urn
@@ -570,11 +555,7 @@ def post_entity(
     aspect_value: Dict,
     cached_session_host: Optional[Tuple[Session, str]] = None,
 ) -> Dict:
-    if not cached_session_host:
-        session, gms_host = get_session_and_host()
-    else:
-        session, gms_host = cached_session_host
-
+    session, gms_host = cached_session_host or get_session_and_host()
     endpoint: str = "/aspects/?action=ingestProposal"
 
     proposal = {
@@ -671,10 +652,7 @@ def get_latest_timeseries_aspect_values(
     timeseries_aspect_name: str,
     cached_session_host: Optional[Tuple[Session, str]],
 ) -> Dict:
-    if not cached_session_host:
-        session, gms_host = get_session_and_host()
-    else:
-        session, gms_host = cached_session_host
+    session, gms_host = cached_session_host or get_session_and_host()
     query_body = {
         "urn": entity_urn,
         "entity": guess_entity_type(entity_urn),
@@ -725,14 +703,8 @@ def get_aspects_for_entity(
                 aspect_value["aspect"]["value"] = json.loads(
                     aspect_value["aspect"]["value"]
                 )
-                aspect_list.update(
-                    # Follow the convention used for non-timeseries aspects.
-                    {
-                        aspect_cls.RECORD_SCHEMA.fullname.replace(
-                            "pegasus2avro.", ""
-                        ): aspect_value
-                    }
-                )
+                aspect_list[aspect_cls.RECORD_SCHEMA.fullname.replace("pegasus2avro.", "")] = aspect_value
+
 
     aspect_map: Dict[str, Union[dict, DictWrapper]] = {}
     for a in aspect_list.values():
@@ -756,4 +728,4 @@ def get_aspects_for_entity(
     if aspects:
         return {k: v for (k, v) in aspect_map.items() if k in aspects}
     else:
-        return {k: v for (k, v) in aspect_map.items()}
+        return dict(aspect_map)

From ef6b5bc5b18850af8bf807e624dea976115b1f00 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 16:32:41 +1000
Subject: [PATCH 16/88] Update delete_cli.py

---
 .../src/datahub/cli/delete_cli.py             | 35 +++++++++----------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/metadata-ingestion/src/datahub/cli/delete_cli.py b/metadata-ingestion/src/datahub/cli/delete_cli.py
index 1ca0ac864693b1..ae8574b89c9695 100644
--- a/metadata-ingestion/src/datahub/cli/delete_cli.py
+++ b/metadata-ingestion/src/datahub/cli/delete_cli.py
@@ -234,16 +234,16 @@ def delete_with_filters(
     logger.info(f"datahub configured with {gms_host}")
     emitter = rest_emitter.DatahubRestEmitter(gms_server=gms_host, token=token)
     batch_deletion_result = DeletionResult()
-    urns = [
-        u
-        for u in cli_utils.get_urns_by_filter(
+    urns = list(
+        cli_utils.get_urns_by_filter(
             env=env,
             platform=platform,
             search_query=search_query,
             entity_type=entity_type,
             include_removed=include_removed,
         )
-    ]
+    )
+
     logger.info(
         f"Filter matched {len(urns)} entities. Sample: {choices(urns, k=min(5, len(urns)))}"
     )
@@ -284,12 +284,12 @@ def _delete_one_urn(
 
     if soft:
         # Add removed aspect
-        if not cached_emitter:
+        if cached_emitter:
+            emitter = cached_emitter
+        else:
             _, gms_host = cli_utils.get_session_and_host()
             token = cli_utils.get_token()
             emitter = rest_emitter.DatahubRestEmitter(gms_server=gms_host, token=token)
-        else:
-            emitter = cached_emitter
         if not dry_run:
             emitter.emit_mcp(
                 MetadataChangeProposalWrapper(
@@ -305,18 +305,17 @@ def _delete_one_urn(
             )
         else:
             logger.info(f"[Dry-run] Would soft-delete {urn}")
+    elif not dry_run:
+        payload_obj = {"urn": urn}
+        urn, rows_affected = cli_utils.post_delete_endpoint(
+            payload_obj,
+            "/entities?action=delete",
+            cached_session_host=cached_session_host,
+        )
+        deletion_result.num_records = rows_affected
     else:
-        if not dry_run:
-            payload_obj = {"urn": urn}
-            urn, rows_affected = cli_utils.post_delete_endpoint(
-                payload_obj,
-                "/entities?action=delete",
-                cached_session_host=cached_session_host,
-            )
-            deletion_result.num_records = rows_affected
-        else:
-            logger.info(f"[Dry-run] Would hard-delete {urn}")
-            deletion_result.num_records = UNKNOWN_NUM_RECORDS  # since we don't know how many rows will be affected
+        logger.info(f"[Dry-run] Would hard-delete {urn}")
+        deletion_result.num_records = UNKNOWN_NUM_RECORDS  # since we don't know how many rows will be affected
 
     deletion_result.end()
     return deletion_result

From 0c92e2b64fc2140b5f350c7671129898fee13a74 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 16:32:45 +1000
Subject: [PATCH 17/88] Update docker_check.py

---
 metadata-ingestion/src/datahub/cli/docker_check.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/metadata-ingestion/src/datahub/cli/docker_check.py b/metadata-ingestion/src/datahub/cli/docker_check.py
index e530f4d19616f3..005651d673df36 100644
--- a/metadata-ingestion/src/datahub/cli/docker_check.py
+++ b/metadata-ingestion/src/datahub/cli/docker_check.py
@@ -88,10 +88,9 @@ def check_local_docker_containers(preflight_only: bool = False) -> List[str]:
         if len(containers) == 0:
             issues.append("quickstart.sh or dev.sh is not running")
         else:
-            existing_containers = set(container.name for container in containers)
+            existing_containers = {container.name for container in containers}
             missing_containers = set(REQUIRED_CONTAINERS) - existing_containers
-            for missing in missing_containers:
-                issues.append(f"{missing} container is not present")
+            issues.extend(f"{missing} container is not present" for missing in missing_containers)
 
         # Check that the containers are running and healthy.
         for container in containers:

From 62a6dd87c93b61f5b7bddfdd10cb3a4d26e930b1 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 16:32:50 +1000
Subject: [PATCH 18/88] Escape

---
 metadata-ingestion/src/datahub/cli/ingest_cli.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py
index 264a7bf0fed723..ecb7d80e30fcfa 100644
--- a/metadata-ingestion/src/datahub/cli/ingest_cli.py
+++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py
@@ -313,14 +313,14 @@ def rollback(
         current_time = now.strftime("%Y-%m-%d %H:%M:%S")
 
         try:
-            folder_name = report_dir + "/" + current_time
+            folder_name = f"{report_dir}/{current_time}"
 
-            ingestion_config_file_name = folder_name + "/config.json"
+            ingestion_config_file_name = f"{folder_name}/config.json"
             os.makedirs(os.path.dirname(ingestion_config_file_name), exist_ok=True)
             with open(ingestion_config_file_name, "w") as file_handle:
                 json.dump({"run_id": run_id}, file_handle)
 
-            csv_file_name = folder_name + "/unsafe_entities.csv"
+            csv_file_name = f"{folder_name}/unsafe_entities.csv"
             with open(csv_file_name, "w") as file_handle:
                 writer = csv.writer(file_handle)
                 writer.writerow(["urn"])
@@ -329,4 +329,4 @@ def rollback(
 
         except IOError as e:
             print(e)
-            sys.exit("Unable to write reports to " + report_dir)
+            sys.exit(f"Unable to write reports to {report_dir}")

From 26f758f66a8459574380816c10359c05f163f353 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 16:32:54 +1000
Subject: [PATCH 19/88] Update migration_utils.py

---
 metadata-ingestion/src/datahub/cli/migration_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/metadata-ingestion/src/datahub/cli/migration_utils.py b/metadata-ingestion/src/datahub/cli/migration_utils.py
index b383e2849b6b9b..79546e07ac056a 100644
--- a/metadata-ingestion/src/datahub/cli/migration_utils.py
+++ b/metadata-ingestion/src/datahub/cli/migration_utils.py
@@ -218,8 +218,8 @@ def modify_urn_list_for_aspect(
     new_urn: str,
 ) -> DictWrapper:
 
-    if hasattr(UrnListModifier, aspect_name + "_modifier"):
-        modifier = getattr(UrnListModifier, aspect_name + "_modifier")
+    if hasattr(UrnListModifier, f"{aspect_name}_modifier"):
+        modifier = getattr(UrnListModifier, f"{aspect_name}_modifier")
         return modifier(
             aspect=aspect,
             relationship_type=relationship_type,

From 03c4bf1646f8971f24a4ad35e7c5ac316de757f3 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 16:32:57 +1000
Subject: [PATCH 20/88] Update timeline_cli.py

---
 .../src/datahub/cli/timeline_cli.py           | 58 +++++++------------
 1 file changed, 21 insertions(+), 37 deletions(-)

diff --git a/metadata-ingestion/src/datahub/cli/timeline_cli.py b/metadata-ingestion/src/datahub/cli/timeline_cli.py
index 40c5af4e1e78a0..eec753a4af2ba0 100644
--- a/metadata-ingestion/src/datahub/cli/timeline_cli.py
+++ b/metadata-ingestion/src/datahub/cli/timeline_cli.py
@@ -19,46 +19,34 @@
 
 
 def pretty_field_path(field_path: str) -> str:
-    if field_path.startswith("[version=2.0]"):
+    if not field_path.startswith("[version=2.0]"):
+        return field_path
         # breakpoint()
         # parse schema field
-        tokens = [
-            t
-            for t in field_path.split(".")
-            if not (t.startswith("[") or t.endswith("]"))
-        ]
-        path = ".".join(tokens)
-        return path
-    else:
-        return field_path
+    tokens = [t for t in field_path.split(".") if not t.startswith("[") and not t.endswith("]")]
+
+    return ".".join(tokens)
 
 
 def pretty_id(id: Optional[str]) -> str:
     if not id:
         return ""
-    else:
-        # breakpoint()
-        assert id is not None
-        if id.startswith("urn:li:datasetField:") or id.startswith(
+    # breakpoint()
+    assert id is not None
+    if id.startswith("urn:li:datasetField:") or id.startswith(
             "urn:li:schemaField:"
         ):
-            # parse schema field
-            schema_field_key = schema_field_urn_to_key(
-                id.replace("urn:li:datasetField", "urn:li:schemaField")
-            )
-            if schema_field_key:
-                assert schema_field_key is not None
-                field_path = schema_field_key.fieldPath
-
-                return f"{colored('field','cyan')}:{colored(pretty_field_path(field_path),'white')}"
-        if id.startswith("[version=2.0]"):
-            return f"{colored('field','cyan')}:{colored(pretty_field_path(id),'white')}"
-
-        if id.startswith("urn:li:dataset"):
-            # parse dataset urn
-            dataset_key = dataset_urn_to_key(id)
-            if dataset_key:
-                return f"{colored('dataset','cyan')}:{colored(dataset_key.platform,'white')}:{colored(dataset_key.name,'white')}"
+        if schema_field_key := schema_field_urn_to_key(id.replace("urn:li:datasetField", "urn:li:schemaField")):
+            assert schema_field_key is not None
+            field_path = schema_field_key.fieldPath
+
+            return f"{colored('field','cyan')}:{colored(pretty_field_path(field_path),'white')}"
+    if id.startswith("[version=2.0]"):
+        return f"{colored('field','cyan')}:{colored(pretty_field_path(id),'white')}"
+
+    if id.startswith("urn:li:dataset"):
+        if dataset_key := dataset_urn_to_key(id):
+            return f"{colored('dataset','cyan')}:{colored(dataset_key.platform,'white')}:{colored(dataset_key.name,'white')}"
     # failed to prettify, return original
     return id
 
@@ -194,12 +182,8 @@ def timeline(
             change_instant = str(
                 datetime.fromtimestamp(change_txn["timestamp"] // 1000)
             )
-            change_color = (
-                "green"
-                if change_txn.get("semVerChange") == "MINOR"
-                or change_txn.get("semVerChange") == "PATCH"
-                else "red"
-            )
+            change_color = "green" if change_txn.get("semVerChange") in ["MINOR", "PATCH"] else "red"
+
             print(
                 f"{colored(change_instant,'cyan')} - {colored(change_txn['semVer'],change_color)}"
             )

From 5ee83d9f637d00b71ba4ffb225c7eacb4e03877b Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 16:33:01 +1000
Subject: [PATCH 21/88] Update common.py

---
 .../src/datahub/configuration/common.py       | 44 +++++--------------
 1 file changed, 11 insertions(+), 33 deletions(-)

diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py
index 716572babc1e41..b9fce56d5ff60b 100644
--- a/metadata-ingestion/src/datahub/configuration/common.py
+++ b/metadata-ingestion/src/datahub/configuration/common.py
@@ -39,10 +39,7 @@ class OperationalError(PipelineExecutionError):
 
     def __init__(self, message: str, info: dict = None):
         self.message = message
-        if info:
-            self.info = info
-        else:
-            self.info = {}
+        self.info = info or {}
 
 
 class ConfigurationError(MetaError):
@@ -104,7 +101,7 @@ class OauthConfiguration(ConfigModel):
 
 
 class AllowDenyPattern(ConfigModel):
-    """A class to store allow deny regexes"""
+    """A class to store allow deny regex's"""
 
     allow: List[str] = Field(
         default=[".*"],
@@ -128,10 +125,7 @@ def alphabet_pattern(self) -> Pattern:
 
     @property
     def regex_flags(self) -> int:
-        if self.ignoreCase:
-            return re.IGNORECASE
-        else:
-            return 0
+        return re.IGNORECASE if self.ignoreCase else 0
 
     @classmethod
     def allow_all(cls) -> "AllowDenyPattern":
@@ -142,23 +136,16 @@ def allowed(self, string: str) -> bool:
             if re.match(deny_pattern, string, self.regex_flags):
                 return False
 
-        for allow_pattern in self.allow:
-            if re.match(allow_pattern, string, self.regex_flags):
-                return True
-
-        return False
+        return any(re.match(allow_pattern, string, self.regex_flags) for allow_pattern in self.allow)
 
     def is_fully_specified_allow_list(self) -> bool:
         """
-        If the allow patterns are literals and not full regexes, then it is considered
+        If the allow patterns are literals and not full regex's, then it is considered
         fully specified. This is useful if you want to convert a 'list + filter'
         pattern into a 'search for the ones that are allowed' pattern, which can be
         much more efficient in some cases.
         """
-        for allow_pattern in self.allow:
-            if not self.alphabet_pattern.match(allow_pattern):
-                return False
-        return True
+        return all(self.alphabet_pattern.match(allow_pattern) for allow_pattern in self.allow)
 
     def get_allowed_list(self) -> List[str]:
         """Return the list of allowed strings as a list, after taking into account deny patterns, if possible"""
@@ -167,7 +154,7 @@ def get_allowed_list(self) -> List[str]:
 
 
 class KeyValuePattern(ConfigModel):
-    """A class to store allow deny regexes"""
+    """A class to store allow deny regex's"""
 
     rules: Dict[str, List[str]] = {".*": []}
     alphabet: str = "[A-Za-z0-9 _.-]"
@@ -181,28 +168,19 @@ def all(cls) -> "KeyValuePattern":
         return KeyValuePattern()
 
     def value(self, string: str) -> List[str]:
-        for key in self.rules.keys():
-            if re.match(key, string):
-                return self.rules[key]
-        return []
+        return next((self.rules[key] for key in self.rules.keys() if re.match(key, string)), [])
 
     def matched(self, string: str) -> bool:
-        for key in self.rules.keys():
-            if re.match(key, string):
-                return True
-        return False
+        return any(re.match(key, string) for key in self.rules.keys())
 
     def is_fully_specified_key(self) -> bool:
         """
-        If the allow patterns are literals and not full regexes, then it is considered
+        If the allow patterns are literals and not full regex's, then it is considered
         fully specified. This is useful if you want to convert a 'list + filter'
         pattern into a 'search for the ones that are allowed' pattern, which can be
         much more efficient in some cases.
         """
-        for key in self.rules.keys():
-            if not self.alphabet_pattern.match(key):
-                return True
-        return False
+        return any(not self.alphabet_pattern.match(key) for key in self.rules.keys())
 
     def get(self) -> Dict[str, List[str]]:
         """Return the list of allowed strings as a list, after taking into account deny patterns, if possible"""

From c78fdd87707c9df2482f3947615a1ecafd503c1c Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 16:33:03 +1000
Subject: [PATCH 22/88] Update import_resolver.py

---
 .../src/datahub/configuration/import_resolver.py              | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/metadata-ingestion/src/datahub/configuration/import_resolver.py b/metadata-ingestion/src/datahub/configuration/import_resolver.py
index 56e232d0403241..19627c7b8c9569 100644
--- a/metadata-ingestion/src/datahub/configuration/import_resolver.py
+++ b/metadata-ingestion/src/datahub/configuration/import_resolver.py
@@ -8,9 +8,7 @@
 
 
 def _pydantic_resolver(v: Union[T, str]) -> T:
-    if isinstance(v, str):
-        return import_path(v)
-    return v
+    return import_path(v) if isinstance(v, str) else v
 
 
 def pydantic_resolve_key(field: str) -> classmethod:

From 14209324d2c9658f3760206f9172a98dcf79e5f2 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 16:33:07 +1000
Subject: [PATCH 23/88] Update yaml.py

---
 metadata-ingestion/src/datahub/configuration/yaml.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/metadata-ingestion/src/datahub/configuration/yaml.py b/metadata-ingestion/src/datahub/configuration/yaml.py
index ee710b07bab3d2..1f1172836f7448 100644
--- a/metadata-ingestion/src/datahub/configuration/yaml.py
+++ b/metadata-ingestion/src/datahub/configuration/yaml.py
@@ -9,5 +9,4 @@ class YamlConfigurationMechanism(ConfigurationMechanism):
     """Ability to load configuration from yaml files"""
 
     def load_config(self, config_fp: IO) -> dict:
-        config = yaml.safe_load(config_fp)
-        return config
+        return yaml.safe_load(config_fp)

From 16e2c1204e05f2c5fd5c01602c04c3eb2dea69da Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 16:33:09 +1000
Subject: [PATCH 24/88] Update kafka_emitter.py

---
 .../src/datahub/emitter/kafka_emitter.py        | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/metadata-ingestion/src/datahub/emitter/kafka_emitter.py b/metadata-ingestion/src/datahub/emitter/kafka_emitter.py
index f2dc663cf0677a..001097a2e42f5b 100644
--- a/metadata-ingestion/src/datahub/emitter/kafka_emitter.py
+++ b/metadata-ingestion/src/datahub/emitter/kafka_emitter.py
@@ -49,12 +49,11 @@ def validate_topic_routes(cls: "KafkaEmitterConfig", values: dict) -> dict:
                 raise ConfigurationError(
                     "Using both topic and topic_routes configuration for Kafka is not supported. Use only topic_routes"
                 )
-            else:
-                logger.warning(
-                    "Looks like you're using the deprecated `topic` configuration. Please migrate to `topic_routes`."
-                )
-                # upgrade topic provided to topic_routes mce entry
-                values["topic_routes"][MCE_KEY] = values["topic"]
+            logger.warning(
+                "Looks like you're using the deprecated `topic` configuration. Please migrate to `topic_routes`."
+            )
+            # upgrade topic provided to topic_routes mce entry
+            values["topic_routes"][MCE_KEY] = values["topic"]
         return values
 
 
@@ -70,8 +69,7 @@ def __init__(self, config: KafkaEmitterConfig):
         def convert_mce_to_dict(
             mce: MetadataChangeEvent, ctx: SerializationContext
         ) -> dict:
-            tuple_encoding = mce.to_obj(tuples=True)
-            return tuple_encoding
+            return mce.to_obj(tuples=True)
 
         mce_avro_serializer = AvroSerializer(
             schema_str=getMetadataChangeEventSchema(),
@@ -83,8 +81,7 @@ def convert_mcp_to_dict(
             mcp: Union[MetadataChangeProposal, MetadataChangeProposalWrapper],
             ctx: SerializationContext,
         ) -> dict:
-            tuple_encoding = mcp.to_obj(tuples=True)
-            return tuple_encoding
+            return mcp.to_obj(tuples=True)
 
         mcp_avro_serializer = AvroSerializer(
             schema_str=getMetadataChangeProposalSchema(),

From 2b0612f1ae88c79f72f0b856c2be4461106b6097 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 16:33:13 +1000
Subject: [PATCH 25/88] Update mce_builder.py

---
 .../src/datahub/emitter/mce_builder.py        | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py
index e6203933705cb1..19f2487f043327 100644
--- a/metadata-ingestion/src/datahub/emitter/mce_builder.py
+++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py
@@ -104,8 +104,8 @@ def schema_field_urn_to_key(schema_field_urn: str) -> Optional[SchemaFieldKeyCla
     pattern = r"urn:li:schemaField:\((.*),(.*)\)"
     results = re.search(pattern, schema_field_urn)
     if results is not None:
-        dataset_urn: str = results.group(1)
-        field_path: str = results.group(2)
+        dataset_urn: str = results[1]
+        field_path: str = results[2]
         return SchemaFieldKeyClass(parent=dataset_urn, fieldPath=field_path)
     return None
 
@@ -114,9 +114,7 @@ def dataset_urn_to_key(dataset_urn: str) -> Optional[DatasetKeyClass]:
     pattern = r"urn:li:dataset:\(urn:li:dataPlatform:(.*),(.*),(.*)\)"
     results = re.search(pattern, dataset_urn)
     if results is not None:
-        return DatasetKeyClass(
-            platform=results.group(1), name=results.group(2), origin=results.group(3)
-        )
+        return DatasetKeyClass(platform=results[1], name=results[2], origin=results[3])
     return None
 
 
@@ -128,9 +126,7 @@ def container_new_urn_to_key(dataset_urn: str) -> Optional[ContainerKeyClass]:
     pattern = r"urn:dh:container:0:\((.*)\)"
     results = re.search(pattern, dataset_urn)
     if results is not None:
-        return ContainerKeyClass(
-            guid=results.group(1),
-        )
+        return ContainerKeyClass(guid=results[1])
     return None
 
 
@@ -146,9 +142,7 @@ def container_urn_to_key(guid: str) -> Optional[ContainerKeyClass]:
     pattern = r"urn:li:container:(.*)"
     results = re.search(pattern, guid)
     if results is not None:
-        return ContainerKeyClass(
-            guid=results.group(1),
-        )
+        return ContainerKeyClass(guid=results[1])
     return None
 
 
@@ -156,8 +150,7 @@ def datahub_guid(obj: dict) -> str:
     obj_str = json.dumps(
         pre_json_transform(obj), separators=(",", ":"), sort_keys=True
     ).encode("utf-8")
-    datahub_guid = md5(obj_str).hexdigest()
-    return datahub_guid
+    return md5(obj_str).hexdigest()
 
 
 def make_assertion_urn(assertion_id: str) -> str:

From 85368b5f8b150ceccd93f5c15a51743c826855cc Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 16:33:15 +1000
Subject: [PATCH 26/88] Update serialization_helper.py

---
 .../src/datahub/emitter/serialization_helper.py          | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/metadata-ingestion/src/datahub/emitter/serialization_helper.py b/metadata-ingestion/src/datahub/emitter/serialization_helper.py
index 5a348ce267b10f..958c913698e442 100644
--- a/metadata-ingestion/src/datahub/emitter/serialization_helper.py
+++ b/metadata-ingestion/src/datahub/emitter/serialization_helper.py
@@ -16,10 +16,11 @@ def _json_transform(obj: Any, from_pattern: str, to_pattern: str) -> Any:
             field = obj["fieldDiscriminator"]
             return {field: _json_transform(obj[field], from_pattern, to_pattern)}
 
-        new_obj: Any = {}
-        for key, value in obj.items():
-            if value is not None:
-                new_obj[key] = _json_transform(value, from_pattern, to_pattern)
+        new_obj: Any = {
+            key: _json_transform(value, from_pattern, to_pattern) \
+                for key, value in obj.items() if value is not None
+        }
+
         return new_obj
     elif isinstance(obj, list):
         new_obj = [_json_transform(item, from_pattern, to_pattern) for item in obj]

From af20f707102f0dcc8b84680381ba6f1feef92260 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 16:33:17 +1000
Subject: [PATCH 27/88] Update committable.py

---
 metadata-ingestion/src/datahub/ingestion/api/committable.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/api/committable.py b/metadata-ingestion/src/datahub/ingestion/api/committable.py
index f1aada4477f1ab..e41eb24abc2d96 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/committable.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/committable.py
@@ -55,7 +55,7 @@ def __init__(
         super(_CommittableConcrete, self).__init__(state_to_commit=state_to_commit)
 
     def has_successfully_committed(self) -> bool:
-        return True if not self.state_to_commit or self.committed else False
+        return bool(not self.state_to_commit or self.committed)
 
     @abstractmethod
     def get_previous_states(

From 9ab0d3dff3a84dcb943825c576cf45d4a5efaca6 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 16:33:19 +1000
Subject: [PATCH 28/88] Update common.py

---
 .../src/datahub/ingestion/api/common.py             | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/api/common.py b/metadata-ingestion/src/datahub/ingestion/api/common.py
index 56c21a7f39c627..fd458f9b4fd980 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/common.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/common.py
@@ -55,8 +55,8 @@ def __init__(
         self.pipeline_name = pipeline_name
         self.dry_run_mode = dry_run
         self.preview_mode = preview_mode
-        self.reporters: Dict[str, Committable] = dict()
-        self.checkpointers: Dict[str, Committable] = dict()
+        self.reporters: Dict[str, Committable] = {}
+        self.checkpointers: Dict[str, Committable] = {}
         self._set_dataset_urn_to_lower_if_needed()
 
     def _set_dataset_urn_to_lower_if_needed(self) -> None:
@@ -81,11 +81,8 @@ def register_reporter(self, committable: Committable) -> None:
         self.reporters[committable.name] = committable
 
     def get_reporters(self) -> Iterable[Committable]:
-        for committable in self.reporters.values():
-            yield committable
+        yield from self.reporters.values()
 
     def get_committables(self) -> Iterable[Tuple[str, Committable]]:
-        for reporting_item_commitable in self.reporters.items():
-            yield reporting_item_commitable
-        for checkpointing_item_commitable in self.checkpointers.items():
-            yield checkpointing_item_commitable
+        yield from self.reporters.items()
+        yield from self.checkpointers.items()

From daff0e490ceea7573bd4e2b9b0aca4c2458bcd1c Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 16:33:21 +1000
Subject: [PATCH 29/88] Update decorators.py

---
 metadata-ingestion/src/datahub/ingestion/api/decorators.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/api/decorators.py b/metadata-ingestion/src/datahub/ingestion/api/decorators.py
index eafbe14106fd23..7666a4f52a2271 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/decorators.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/decorators.py
@@ -34,11 +34,8 @@ def platform_name(
 
     def wrapper(cls: Type) -> Type:
         setattr(cls, "get_platform_name", lambda: platform_name)
-        setattr(
-            cls,
-            "get_platform_id",
-            lambda: id if id else platform_name.lower().replace(" ", "-"),
-        )
+        setattr(cls, "get_platform_id", lambda: id or platform_name.lower().replace(" ", "-"))
+
         return cls
 
     if id and " " in id:

From 6a79b7f53beeca7fdd767a3c723cd55e370f61ae Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:30:32 +1000
Subject: [PATCH 30/88] Update urn.py

---
 .../src/datahub/utilities/urns/urn.py             | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/metadata-ingestion/src/datahub/utilities/urns/urn.py b/metadata-ingestion/src/datahub/utilities/urns/urn.py
index 7498cc1532c66e..7ae6d37472621a 100644
--- a/metadata-ingestion/src/datahub/utilities/urns/urn.py
+++ b/metadata-ingestion/src/datahub/utilities/urns/urn.py
@@ -21,7 +21,7 @@ class Urn:
     def __init__(
         self, entity_type: str, entity_id: List[str], urn_domain: str = LI_DOMAIN
     ):
-        if len(entity_id) == 0:
+        if not entity_id:
             raise InvalidUrnError("Empty entity id.")
         self._validate_entity_type(entity_type)
         self._validate_entity_id(entity_id)
@@ -122,9 +122,9 @@ def _get_entity_id_from_str(entity_id: str) -> List[str]:
                 part_start = i + 1
 
         if start_paren_count != 0:
-            raise InvalidUrnError(f"{entity_id}, mismtached paren nesting")
+            raise InvalidUrnError(f"{entity_id}, mismatched parent nesting")
 
-        parts.append(entity_id[part_start : len(entity_id) - 1])
+        parts.append(entity_id[part_start:-1])
 
         return parts
 
@@ -151,11 +151,4 @@ def __hash__(self) -> int:
         return hash((self._domain, self._entity_type) + tuple(self._entity_id))
 
     def __eq__(self, other: object) -> bool:
-        if not isinstance(other, Urn):
-            return False
-
-        return (
-            self._entity_id == other._entity_id
-            and self._domain == other._domain
-            and self._entity_type == other._entity_type
-        )
+        return (self._entity_id == other._entity_id and self._domain == other._domain and self._entity_type == other._entity_type) if isinstance(other, Urn) else False

From 3f48494f63122d1f5b0f38911358de6551bc16d6 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:30:39 +1000
Subject: [PATCH 31/88] Update registry.py

---
 .../src/datahub/ingestion/api/registry.py       | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/api/registry.py b/metadata-ingestion/src/datahub/ingestion/api/registry.py
index f83921639c227e..a8529817e2500f 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/registry.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/registry.py
@@ -79,16 +79,15 @@ def register_disabled(
 
     def _ensure_not_lazy(self, key: str) -> Union[Type[T], Exception]:
         path = self._mapping[key]
-        if isinstance(path, str):
-            try:
-                plugin_class = import_path(path)
-                self.register(key, plugin_class, override=True)
-                return plugin_class
-            except (AssertionError, ModuleNotFoundError, ImportError) as e:
-                self.register_disabled(key, e, override=True)
-                return e
-        else:
+        if not isinstance(path, str):
             return path
+        try:
+            plugin_class = import_path(path)
+            self.register(key, plugin_class, override=True)
+            return plugin_class
+        except (AssertionError, ImportError) as e:
+            self.register_disabled(key, e, override=True)
+            return e
 
     def is_enabled(self, key: str) -> bool:
         tp = self._mapping[key]

From d0b37ef5793122c5730b781879dfd72db172e4ef Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:30:42 +1000
Subject: [PATCH 32/88] Update protobuf_util.py

---
 .../src/datahub/ingestion/extractor/protobuf_util.py        | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/extractor/protobuf_util.py b/metadata-ingestion/src/datahub/ingestion/extractor/protobuf_util.py
index e5f976ff88dd56..51fdbd8fbdb680 100644
--- a/metadata-ingestion/src/datahub/ingestion/extractor/protobuf_util.py
+++ b/metadata-ingestion/src/datahub/ingestion/extractor/protobuf_util.py
@@ -365,11 +365,7 @@ def _schema_fields_from_dag(
 
     if generations and generations[0]:
         roots = generations[0]
-        leafs: List = []
-        for node in graph:
-            if graph.out_degree(node) == 0:
-                leafs.append(node)
-
+        leafs: List = [node for node in graph if graph.out_degree(node) == 0]
         type_of_nodes: Dict = nx.get_node_attributes(graph, "node_type")
 
         for root in roots:

From a58dd7402e1ec086df954eab782c28472472136f Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:30:45 +1000
Subject: [PATCH 33/88] Update datahub_ingestion_reporting_provider.py

---
 .../ingestion/reporting/datahub_ingestion_reporting_provider.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_reporting_provider.py b/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_reporting_provider.py
index 568c41aac9dbd9..1bb89236cc51ad 100644
--- a/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_reporting_provider.py
+++ b/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_reporting_provider.py
@@ -115,7 +115,7 @@ def get_previous_states(
     ) -> List[ReportingJobStatesMap]:
         if not last_only:
             raise NotImplementedError(
-                "Currently supports retrieving only the last commited state."
+                "Currently supports retrieving only the last committed state."
             )
         if filter_opt is not None:
             raise NotImplementedError(

From 5342aaf31223085d077b663a80b9ea131f48ceaa Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:30:48 +1000
Subject: [PATCH 34/88] Update pipeline.py

---
 .../src/datahub/ingestion/run/pipeline.py     | 27 ++++++++-----------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
index e26470f823a6c7..e429ad097b440d 100644
--- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
+++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
@@ -53,11 +53,10 @@ def run_id_should_be_semantic(
         cls, v: Optional[str], values: Dict[str, Any], **kwargs: Any
     ) -> str:
         if v == "__DEFAULT_RUN_ID":
-            if "source" in values:
-                if hasattr(values["source"], "type"):
-                    source_type = values["source"].type
-                    current_time = datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S")
-                    return f"{source_type}-{current_time}"
+            if "source" in values and hasattr(values["source"], "type"):
+                source_type = values["source"].type
+                current_time = datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S")
+                return f"{source_type}-{current_time}"
 
             return str(uuid.uuid1())  # default run_id if we cannot infer a source type
         else:
@@ -86,12 +85,11 @@ def default_sink_is_datahub_rest(cls, values: Dict[str, Any]) -> Any:
     def datahub_api_should_use_rest_sink_as_default(
         cls, v: Optional[DatahubClientConfig], values: Dict[str, Any], **kwargs: Any
     ) -> Optional[DatahubClientConfig]:
-        if v is None:
-            if "sink" in values and hasattr(values["sink"], "type"):
-                sink_type = values["sink"].type
-                if sink_type == "datahub-rest":
-                    sink_config = values["sink"].config
-                    v = DatahubClientConfig.parse_obj(sink_config)
+        if v is None and "sink" in values and hasattr(values["sink"], "type"):
+            sink_type = values["sink"].type
+            if sink_type == "datahub-rest":
+                sink_config = values["sink"].config
+                v = DatahubClientConfig.parse_obj(sink_config)
         return v
 
 
@@ -266,11 +264,8 @@ def process_commits(self) -> None:
             if self.source.get_report().failures or self.sink.get_report().failures
             else False
         )
-        has_warnings: bool = (
-            True
-            if self.source.get_report().warnings or self.sink.get_report().warnings
-            else False
-        )
+        has_warnings: bool = bool(self.source.get_report().warnings or self.sink.get_report().warnings)
+
         for name, committable in self.ctx.get_committables():
             commit_policy: CommitPolicy = committable.commit_policy
 

From 759bacd37cd58a895a9ba6da7a122afaafe1a584 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:30:52 +1000
Subject: [PATCH 35/88] Update datahub_kafka.py

---
 .../src/datahub/ingestion/sink/datahub_kafka.py               | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/sink/datahub_kafka.py b/metadata-ingestion/src/datahub/ingestion/sink/datahub_kafka.py
index f931b9039303a0..20929e85887a77 100644
--- a/metadata-ingestion/src/datahub/ingestion/sink/datahub_kafka.py
+++ b/metadata-ingestion/src/datahub/ingestion/sink/datahub_kafka.py
@@ -77,9 +77,7 @@ def write_record_async(
                     self.report, record_envelope, write_callback
                 ).kafka_callback,
             )
-        elif isinstance(record, MetadataChangeProposalWrapper) or isinstance(
-            record, MetadataChangeProposalClass
-        ):
+        elif isinstance(record, (MetadataChangeProposalWrapper, MetadataChangeProposalClass)):
             self.emitter.emit_mcp_async(
                 record,
                 callback=_KafkaCallback(

From b4275e19f135e545425eb7b6e43a737b4a15379e Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:30:55 +1000
Subject: [PATCH 36/88] Update datahub_rest.py

---
 .../src/datahub/ingestion/sink/datahub_rest.py         | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py
index 74e536350457b5..415a7a1c827da8 100644
--- a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py
+++ b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py
@@ -1,4 +1,5 @@
 import concurrent.futures
+import contextlib
 import functools
 import logging
 from dataclasses import dataclass
@@ -111,13 +112,8 @@ def _write_done_callback(
                 else:
                     # trim exception stacktraces when reporting warnings
                     if "stackTrace" in e.info:
-                        try:
-                            e.info["stackTrace"] = "\n".join(
-                                e.info["stackTrace"].split("\n")[0:2]
-                            )
-                        except Exception:
-                            # ignore failures in trimming
-                            pass
+                        with contextlib.suppress(Exception):
+                            e.info["stackTrace"] = "\n".join(e.info["stackTrace"].split("\n")[:2])
                     record = record_envelope.record
                     if isinstance(record, MetadataChangeProposalWrapper):
                         # include information about the entity that failed

From f12961dad694c29afc1b86ea9cae17f9785ec6ea Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:30:59 +1000
Subject: [PATCH 37/88] Update pulsar.py

---
 .../src/datahub/ingestion/source_config/pulsar.py             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/pulsar.py b/metadata-ingestion/src/datahub/ingestion/source_config/pulsar.py
index e21c6fc3ea42ba..836960ac50633f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source_config/pulsar.py
+++ b/metadata-ingestion/src/datahub/ingestion/source_config/pulsar.py
@@ -30,7 +30,7 @@ def _is_valid_hostname(hostname: str) -> bool:
     """
     if len(hostname) > 253:
         return False
-    # Hostnames ending on a dot are valid, if present strip exactly one
+    # Hostname's ending on a dot are valid, if present strip exactly one
     if hostname[-1] == ".":
         hostname = hostname[:-1]
     allowed = re.compile(r"(?!-)[A-Z\d-]{1,63}(?<!-)$", re.IGNORECASE)
@@ -82,7 +82,7 @@ class PulsarSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigBase):
     )
     exclude_individual_partitions: bool = Field(
         default=True,
-        description="Extract each individual partitioned topic. e.g. when turned off a topic with 100 partitions will result in 100 Datesets.",
+        description="Extract each individual partitioned topic. e.g. when turned off a topic with 100 partitions will result in 100 Datasets.",
     )
 
     tenants: List[str] = Field(

From d193219473d82e2a0a9db9010845ff3c9653fee9 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:31:04 +1000
Subject: [PATCH 38/88] Update snowflake.py

---
 .../ingestion/source_config/sql/snowflake.py  | 100 +++++++++---------
 1 file changed, 49 insertions(+), 51 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py
index b00ac9cdfb41b8..644be7afbe749a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py
+++ b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py
@@ -90,7 +90,7 @@ class SnowflakeProvisionRoleConfig(ConfigModel):
     @pydantic.validator("admin_username", always=True)
     def username_not_empty(cls, v, values, **kwargs):
         v_str: str = str(v)
-        if v_str.strip() == "":
+        if not v_str.strip():
             raise ValueError("username is empty")
         return v
 
@@ -180,60 +180,58 @@ def authenticator_type_is_valid(cls, v, values, field):
                 f"unsupported authenticator type '{v}' was provided,"
                 f" use one of {list(VALID_AUTH_TYPES.keys())}"
             )
-        else:
-            if v == "KEY_PAIR_AUTHENTICATOR":
-                # If we are using key pair auth, we need the private key path and password to be set
-                if values.get("private_key_path") is None:
-                    raise ValueError(
-                        f"'private_key_path' was none "
-                        f"but should be set when using {v} authentication"
-                    )
-            elif v == "OAUTH_AUTHENTICATOR":
-                if values.get("oauth_config") is None:
-                    raise ValueError(
-                        f"'oauth_config' is none but should be set when using {v} authentication"
-                    )
-                if values.get("oauth_config").provider is None:
-                    raise ValueError(
-                        f"'oauth_config.provider' is none "
-                        f"but should be set when using {v} authentication"
-                    )
-                if values.get("oauth_config").client_id is None:
-                    raise ValueError(
-                        f"'oauth_config.client_id' is none "
-                        f"but should be set when using {v} authentication"
-                    )
-                if values.get("oauth_config").scopes is None:
+        if v == "KEY_PAIR_AUTHENTICATOR":
+            # If we are using key pair auth, we need the private key path and password to be set
+            if values.get("private_key_path") is None:
+                raise ValueError(
+                    f"'private_key_path' was none "
+                    f"but should be set when using {v} authentication"
+                )
+        elif v == "OAUTH_AUTHENTICATOR":
+            if values.get("oauth_config") is None:
+                raise ValueError(
+                    f"'oauth_config' is none but should be set when using {v} authentication"
+                )
+            if values.get("oauth_config").provider is None:
+                raise ValueError(
+                    f"'oauth_config.provider' is none "
+                    f"but should be set when using {v} authentication"
+                )
+            if values.get("oauth_config").client_id is None:
+                raise ValueError(
+                    f"'oauth_config.client_id' is none "
+                    f"but should be set when using {v} authentication"
+                )
+            if values.get("oauth_config").scopes is None:
+                raise ValueError(
+                    f"'oauth_config.scopes' was none "
+                    f"but should be set when using {v} authentication"
+                )
+            if values.get("oauth_config").authority_url is None:
+                raise ValueError(
+                    f"'oauth_config.authority_url' was none "
+                    f"but should be set when using {v} authentication"
+                )
+            if values.get("oauth_config").use_certificate is True:
+                if (
+                    values.get("oauth_config").base64_encoded_oauth_private_key
+                    is None
+                ):
                     raise ValueError(
-                        f"'oauth_config.scopes' was none "
-                        f"but should be set when using {v} authentication"
+                        "'base64_encoded_oauth_private_key' was none "
+                        "but should be set when using certificate for oauth_config"
                     )
-                if values.get("oauth_config").authority_url is None:
+                if values.get("oauth").base64_encoded_oauth_public_key is None:
                     raise ValueError(
-                        f"'oauth_config.authority_url' was none "
-                        f"but should be set when using {v} authentication"
+                        "'base64_encoded_oauth_public_key' was none"
+                        "but should be set when using use_certificate true for oauth_config"
                     )
-                if values.get("oauth_config").use_certificate is True:
-                    if (
-                        values.get("oauth_config").base64_encoded_oauth_private_key
-                        is None
-                    ):
-                        raise ValueError(
-                            "'base64_encoded_oauth_private_key' was none "
-                            "but should be set when using certificate for oauth_config"
-                        )
-                    if values.get("oauth").base64_encoded_oauth_public_key is None:
-                        raise ValueError(
-                            "'base64_encoded_oauth_public_key' was none"
-                            "but should be set when using use_certificate true for oauth_config"
-                        )
-                else:
-                    if values.get("oauth_config").client_secret is None:
-                        raise ValueError(
-                            "'oauth_config.client_secret' was none "
-                            "but should be set when using use_certificate false for oauth_config"
-                        )
-            logger.info(f"using authenticator type '{v}'")
+            elif values.get("oauth_config").client_secret is None:
+                raise ValueError(
+                    "'oauth_config.client_secret' was none "
+                    "but should be set when using use_certificate false for oauth_config"
+                )
+        logger.info(f"using authenticator type '{v}'")
         return v
 
     @pydantic.validator("include_view_lineage")

From e7dd056f70ad4b0b000ae3b10fec80fec86f2b56 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:31:10 +1000
Subject: [PATCH 39/88] Update bigquery_usage.py

---
 .../src/datahub/ingestion/source_config/usage/bigquery_usage.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/usage/bigquery_usage.py b/metadata-ingestion/src/datahub/ingestion/source_config/usage/bigquery_usage.py
index 05dc636d312c2e..9abee691ca9bf8 100644
--- a/metadata-ingestion/src/datahub/ingestion/source_config/usage/bigquery_usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source_config/usage/bigquery_usage.py
@@ -114,7 +114,7 @@ class BigQueryUsageConfig(BigQueryBaseConfig, DatasetSourceConfigBase, BaseUsage
 
     credential: Optional[BigQueryCredential] = pydantic.Field(
         default=None,
-        description="Bigquery credential. Required if GOOGLE_APPLICATION_CREDENTIALS enviroment variable is not set. See this example recipe for details",
+        description="Bigquery credential. Required if GOOGLE_APPLICATION_CREDENTIALS environment variable is not set. See this example recipe for details",
     )
     _credentials_path: Optional[str] = pydantic.PrivateAttr(None)
     temp_table_dataset_prefix: str = pydantic.Field(

From 6bf6136eed0b1e49204e1a37a5687a3f119c3c49 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:31:13 +1000
Subject: [PATCH 40/88] Update base_transformer.py

---
 .../src/datahub/ingestion/transformer/base_transformer.py     | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py b/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py
index ecc1dcfc5fd31f..c6f641c8fcd6e5 100644
--- a/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py
@@ -132,9 +132,7 @@ def _should_process(
                     return True
             # fall through, no entity type matched
             return False
-        elif isinstance(record, MetadataChangeProposalWrapper) or isinstance(
-            record, MetadataChangeProposalClass
-        ):
+        elif isinstance(record, (MetadataChangeProposalWrapper, MetadataChangeProposalClass)):
             return record.entityType in entity_types
 
         # default to process everything that is not caught by above checks

From 0a1db396b12d33f589ca5b031d6f906a263542d9 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:31:21 +1000
Subject: [PATCH 41/88] Update mark_dataset_status.py

---
 .../src/datahub/ingestion/transformer/mark_dataset_status.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/mark_dataset_status.py b/metadata-ingestion/src/datahub/ingestion/transformer/mark_dataset_status.py
index bae8d0e07a80ab..d833e9bcc75a64 100644
--- a/metadata-ingestion/src/datahub/ingestion/transformer/mark_dataset_status.py
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/mark_dataset_status.py
@@ -40,6 +40,6 @@ def transform_aspect(
         self, entity_urn: str, aspect_name: str, aspect: Optional[builder.Aspect]
     ) -> Optional[builder.Aspect]:
         assert aspect is None or isinstance(aspect, StatusClass)
-        status_aspect: StatusClass = aspect if aspect else StatusClass(removed=None)
+        status_aspect: StatusClass = aspect or StatusClass(removed=None)
         status_aspect.removed = self.config.removed
         return status_aspect  # type: ignore

From df463c45f8a4b543718b94161900ef6a82f53b3f Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:31:23 +1000
Subject: [PATCH 42/88] Update action.py

---
 .../integrations/great_expectations/action.py | 38 ++++++++-----------
 1 file changed, 16 insertions(+), 22 deletions(-)

diff --git a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py
index 98b344c0a06cc7..572ecdf36302d5 100644
--- a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py
+++ b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py
@@ -190,12 +190,11 @@ def _run(
             result = "DataHub notification succeeded"
         except Exception as e:
             result = "DataHub notification failed"
-            if self.graceful_exceptions:
-                logger.error(e)
-                logger.info("Supressing error because graceful_exceptions is set")
-            else:
+            if not self.graceful_exceptions:
                 raise
 
+            logger.error(e)
+            logger.info("Suppressing error because graceful_exceptions is set")
         return {"datahub_notification_result": result}
 
     def get_assertions_with_results(
@@ -224,7 +223,7 @@ def get_assertions_with_results(
         for result in validation_result_suite.results:
             expectation_config = result["expectation_config"]
             expectation_type = expectation_config["expectation_type"]
-            success = True if result["success"] else False
+            success = bool(result["success"])
             kwargs = {
                 k: v for k, v in expectation_config["kwargs"].items() if k != "batch_id"
             }
@@ -271,8 +270,6 @@ def get_assertions_with_results(
 
             # TODO: Understand why their run time is incorrect.
             run_time = run_id.run_time.astimezone(timezone.utc)
-            assertionResults = []
-
             evaluation_parameters = (
                 {
                     k: convert_to_string(v)
@@ -328,8 +325,7 @@ def get_assertions_with_results(
             )
             if ds.get("partitionSpec") is not None:
                 assertionResult.partitionSpec = ds.get("partitionSpec")
-            assertionResults.append(assertionResult)
-
+            assertionResults = [assertionResult]
             assertions_with_results.append(
                 {
                     "assertionUrn": assertionUrn,
@@ -629,10 +625,8 @@ def get_dataset_partitions(self, batch_identifier, data_asset):
                 query = data_asset.batches[
                     batch_identifier
                 ].batch_request.runtime_parameters["query"]
-                partitionSpec = PartitionSpecClass(
-                    type=PartitionTypeClass.QUERY,
-                    partition="Query_" + builder.datahub_guid(query),
-                )
+                partitionSpec = PartitionSpecClass(type=PartitionTypeClass.QUERY, partition=f"Query_{builder.datahub_guid(query)}")
+
                 batchSpec = BatchSpec(
                     nativeBatchId=batch_identifier,
                     query=query,
@@ -678,9 +672,9 @@ def get_dataset_partitions(self, batch_identifier, data_asset):
         return dataset_partitions
 
     def get_platform_instance(self, datasource_name):
-        if self.platform_instance_map and datasource_name in self.platform_instance_map:
-            return self.platform_instance_map[datasource_name]
         if self.platform_instance_map:
+            if datasource_name in self.platform_instance_map:
+                return self.platform_instance_map[datasource_name]
             warn(
                 f"Datasource {datasource_name} is not present in platform_instance_map"
             )
@@ -698,21 +692,21 @@ def make_dataset_urn_from_sqlalchemy_uri(
         schema_name, table_name = table_name.split(".")[-2:]
 
     if data_platform in ["redshift", "postgres"]:
-        schema_name = schema_name if schema_name else "public"
+        schema_name = schema_name or "public"
         if url_instance.database is None:
             warn(
                 f"DataHubValidationAction failed to locate database name for {data_platform}."
             )
             return None
-        schema_name = "{}.{}".format(url_instance.database, schema_name)
+        schema_name = f"{url_instance.database}.{schema_name}"
     elif data_platform == "mssql":
-        schema_name = schema_name if schema_name else "dbo"
+        schema_name = schema_name or "dbo"
         if url_instance.database is None:
             warn(
                 f"DataHubValidationAction failed to locate database name for {data_platform}."
             )
             return None
-        schema_name = "{}.{}".format(url_instance.database, schema_name)
+        schema_name = f"{url_instance.database}.{schema_name}"
     elif data_platform in ["trino", "snowflake"]:
         if schema_name is None or url_instance.database is None:
             warn(
@@ -738,16 +732,16 @@ def make_dataset_urn_from_sqlalchemy_uri(
                 )
             )
             return None
-        schema_name = "{}.{}".format(url_instance.host, url_instance.database)
+        schema_name = f"{url_instance.host}.{url_instance.database}"
 
-    schema_name = schema_name if schema_name else url_instance.database
+    schema_name = schema_name or url_instance.database
     if schema_name is None:
         warn(
             f"DataHubValidationAction failed to locate schema name for {data_platform}."
         )
         return None
 
-    dataset_name = "{}.{}".format(schema_name, table_name)
+    dataset_name = f"{schema_name}.{table_name}"
 
     dataset_urn = builder.make_dataset_urn_with_platform_instance(
         platform=data_platform,

From 2ab2fddba5b0da0c9d7060645570f342a003814b Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:31:26 +1000
Subject: [PATCH 43/88] Update stats.py

---
 metadata-ingestion/src/datahub/telemetry/stats.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/metadata-ingestion/src/datahub/telemetry/stats.py b/metadata-ingestion/src/datahub/telemetry/stats.py
index ea48aab14c77db..e76580d677588c 100644
--- a/metadata-ingestion/src/datahub/telemetry/stats.py
+++ b/metadata-ingestion/src/datahub/telemetry/stats.py
@@ -27,9 +27,7 @@ def calculate_percentiles(
         min(i, size - 1) for i in percentile_indices
     ]  # in case of rounding errors
 
-    values = {p: data_sorted[i] for p, i in zip(percentiles, percentile_indices)}
-
-    return values
+    return {p: data_sorted[i] for p, i in zip(percentiles, percentile_indices)}
 
 
 def discretize(statistic: Union[float, int]) -> int:

From d1881718ca0d1971397788e78288e3eaa8b760cc Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:31:29 +1000
Subject: [PATCH 44/88] Update upgrade.py

---
 .../src/datahub/upgrade/upgrade.py            | 21 +++++--------------
 1 file changed, 5 insertions(+), 16 deletions(-)

diff --git a/metadata-ingestion/src/datahub/upgrade/upgrade.py b/metadata-ingestion/src/datahub/upgrade/upgrade.py
index 046f32202d83bd..839e23f6536ae6 100644
--- a/metadata-ingestion/src/datahub/upgrade/upgrade.py
+++ b/metadata-ingestion/src/datahub/upgrade/upgrade.py
@@ -1,3 +1,4 @@
+import contextlib
 import logging
 from datetime import datetime, timedelta, timezone
 from functools import wraps
@@ -221,7 +222,7 @@ def maybe_print_upgrade_message(  # noqa: C901
     encourage_cli_upgrade = False
     client_server_compat = 0
     encourage_quickstart_upgrade = False
-    try:
+    with contextlib.suppress(Exception):
         version_stats = retrieve_versions(server)
         if not version_stats:
             return
@@ -261,12 +262,9 @@ def maybe_print_upgrade_message(  # noqa: C901
             ):
                 encourage_quickstart_upgrade = True
 
-    except Exception:
-        pass
-
     # Compute recommendations and print one
     if client_server_compat < 0:
-        try:
+        with contextlib.suppress(Exception):
             assert version_stats
             print(
                 colored("❗Client-Server Incompatible❗", "yellow"),
@@ -279,10 +277,8 @@ def maybe_print_upgrade_message(  # noqa: C901
                     "cyan",
                 ),
             )
-        except Exception:
-            pass
     elif client_server_compat > 0:
-        try:
+        with contextlib.suppress(Exception):
             assert version_stats
             print(
                 colored("❗Client-Server Incompatible❗", "red"),
@@ -295,12 +291,8 @@ def maybe_print_upgrade_message(  # noqa: C901
                     "cyan",
                 ),
             )
-        except Exception:
-            pass
-
-    # we only encourage upgrades if we think client_server is currently compatible
     elif client_server_compat == 0 and encourage_cli_upgrade:
-        try:
+        with contextlib.suppress(Exception):
             print(
                 colored("💡 Upgrade cli!", "yellow"),
                 colored(
@@ -308,9 +300,6 @@ def maybe_print_upgrade_message(  # noqa: C901
                     "cyan",
                 ),
             )
-        except Exception:
-            pass
-
     elif encourage_quickstart_upgrade:
         try:
             assert version_stats

From 5663dc9129b6bacf68f81e4793030e64c9eb43cb Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:31:32 +1000
Subject: [PATCH 45/88] Update telemetry.py

---
 metadata-ingestion/src/datahub/telemetry/telemetry.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/telemetry/telemetry.py b/metadata-ingestion/src/datahub/telemetry/telemetry.py
index b95df169414320..0a346d09373850 100644
--- a/metadata-ingestion/src/datahub/telemetry/telemetry.py
+++ b/metadata-ingestion/src/datahub/telemetry/telemetry.py
@@ -273,7 +273,7 @@ def get_full_class_name(obj):
     module = obj.__class__.__module__
     if module is None or module == str.__class__.__module__:
         return obj.__class__.__name__
-    return module + "." + obj.__class__.__name__
+    return f"{module}.{obj.__class__.__name__}"
 
 
 def with_telemetry(func: Callable[..., T]) -> Callable[..., T]:

From b57b58b24f8e36c3f026b35f5b12cea03a91fe7e Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:31:34 +1000
Subject: [PATCH 46/88] Update hive_schema_to_avro.py

---
 .../datahub/utilities/hive_schema_to_avro.py  | 31 +++++++------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py b/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py
index fc9680ba642d42..c83ec153144f0f 100644
--- a/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py
+++ b/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py
@@ -52,10 +52,11 @@ def _parse_datatype_string(
                 raise ValueError("'>' should be the last char, but got: %s" % s)
             parts = HiveColumnToAvroConverter._ignore_brackets_split(s[4:-1], ",")
             if len(parts) != 2:
-                raise ValueError(
+                raise ValueError((
                     "The map type string format is: 'map<key_type,value_type>', "
-                    + "but got: %s" % s
-                )
+                    + f"but got: {s}"
+                    ))
+
             kt = HiveColumnToAvroConverter._parse_datatype_string(parts[0])
             vt = HiveColumnToAvroConverter._parse_datatype_string(parts[1])
             # keys are assumed to be strings in avro map
@@ -102,10 +103,8 @@ def _parse_struct_fields_string(s: str, **kwargs: Any) -> Dict[str, object]:
         for part in parts:
             name_and_type = HiveColumnToAvroConverter._ignore_brackets_split(part, ":")
             if len(name_and_type) != 2:
-                raise ValueError(
-                    "The struct field string format is: 'field_name:field_type', "
-                    + "but got: %s" % part
-                )
+                raise ValueError(("The struct field string format is: 'field_name:field_type', " + f"but got: {part}"))
+
             field_name = name_and_type[0].strip()
             if field_name.startswith("`"):
                 if field_name[-1] != "`":
@@ -117,17 +116,11 @@ def _parse_struct_fields_string(s: str, **kwargs: Any) -> Dict[str, object]:
             fields.append({"name": field_name, "type": field_type})
 
         if kwargs.get("ustruct_seqn") is not None:
-            struct_name = "__structn_{}_{}".format(
-                kwargs["ustruct_seqn"], str(uuid.uuid4()).replace("-", "")
-            )
+            struct_name = f'__structn_{kwargs["ustruct_seqn"]}_{str(uuid.uuid4()).replace("-", "")}'
+
         else:
-            struct_name = "__struct_{}".format(str(uuid.uuid4()).replace("-", ""))
-        return {
-            "type": "record",
-            "name": struct_name,
-            "fields": fields,
-            "native_data_type": "struct<{}>".format(s),
-        }
+            struct_name = f'__struct_{str(uuid.uuid4()).replace("-", "")}'
+        return {"type": "record", "name": struct_name, "fields": fields, "native_data_type": f"struct<{s}>"}
 
     @staticmethod
     def _parse_basic_datatype_string(s: str) -> Dict[str, object]:
@@ -193,7 +186,7 @@ def _ignore_brackets_split(s: str, separator: str) -> List[str]:
                 buf += c
             elif c in HiveColumnToAvroConverter._BRACKETS.values():
                 if level == 0:
-                    raise ValueError("Brackets are not correctly paired: %s" % s)
+                    raise ValueError(f"Brackets are not correctly paired: {s}")
                 level -= 1
                 buf += c
             elif c == separator and level > 0:
@@ -205,7 +198,7 @@ def _ignore_brackets_split(s: str, separator: str) -> List[str]:
                 buf += c
 
         if len(buf) == 0:
-            raise ValueError("The %s cannot be the last char: %s" % (separator, s))
+            raise ValueError(f"The {separator} cannot be the last char: {s}")
         parts.append(buf)
         return parts
 

From d1dda5e32ce5c2ce314a68f89e5273f46b2d64b9 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:31:36 +1000
Subject: [PATCH 47/88] Update mapping.py

---
 metadata-ingestion/src/datahub/utilities/mapping.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/metadata-ingestion/src/datahub/utilities/mapping.py b/metadata-ingestion/src/datahub/utilities/mapping.py
index 2b6c458db8d1ce..af4916a4055747 100644
--- a/metadata-ingestion/src/datahub/utilities/mapping.py
+++ b/metadata-ingestion/src/datahub/utilities/mapping.py
@@ -172,7 +172,7 @@ def get_operation_value(
 
     def sanitize_owner_ids(self, owner_id: str) -> str:
         if owner_id.__contains__("@"):
-            owner_id = owner_id[0 : owner_id.index("@")]
+            owner_id = owner_id[:owner_id.index("@")]
         return owner_id
 
     def is_match(self, match_clause: Any, raw_props_value: Any) -> bool:
@@ -181,9 +181,8 @@ def is_match(self, match_clause: Any, raw_props_value: Any) -> bool:
         if type(raw_props_value) not in Constants.OPERAND_DATATYPE_SUPPORTED or type(
             raw_props_value
         ) != type(match_clause):
-            is_matching = False
+            return False
         elif type(raw_props_value) == str:
-            is_matching = True if re.match(match_clause, raw_props_value) else False
+            return bool(re.match(match_clause, raw_props_value))
         else:
-            is_matching = match_clause == raw_props_value
-        return is_matching
+            return match_clause == raw_props_value

From a3839e25574b589743ed710d61183ac808b89f9e Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:31:38 +1000
Subject: [PATCH 48/88] Update memory_leak_detector.py

---
 .../src/datahub/utilities/memory_leak_detector.py            | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/metadata-ingestion/src/datahub/utilities/memory_leak_detector.py b/metadata-ingestion/src/datahub/utilities/memory_leak_detector.py
index b5fa3c3a723ea4..ef0db205b72ac9 100644
--- a/metadata-ingestion/src/datahub/utilities/memory_leak_detector.py
+++ b/metadata-ingestion/src/datahub/utilities/memory_leak_detector.py
@@ -12,7 +12,7 @@
 
 
 def _trace_has_file(trace: tracemalloc.Traceback, file_pattern: str) -> bool:
-    for frame_index in range(0, len(trace)):
+    for frame_index in range(len(trace)):
         cur_frame = trace[frame_index]
         if fnmatch.fnmatch(cur_frame.filename, file_pattern):
             return True
@@ -99,8 +99,7 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
             _init_leak_detection()
 
         try:
-            res = func(*args, **kwargs)
-            return res
+            return func(*args, **kwargs)
         finally:
             if detect_leaks:
                 _perform_leak_detection()

From 1d180c15a48e25a80b184ae52db75b5d5f35ed2d Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:31:40 +1000
Subject: [PATCH 49/88] Update server_config_util.py

---
 metadata-ingestion/src/datahub/utilities/server_config_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/utilities/server_config_util.py b/metadata-ingestion/src/datahub/utilities/server_config_util.py
index c919a1356f2642..40841321ad2778 100644
--- a/metadata-ingestion/src/datahub/utilities/server_config_util.py
+++ b/metadata-ingestion/src/datahub/utilities/server_config_util.py
@@ -3,7 +3,7 @@
 from datahub.telemetry.telemetry import set_telemetry_enable
 
 # Only to be written to for logging server related information
-global_debug: Dict[str, Any] = dict()
+global_debug: Dict[str, Any] = {}
 
 
 def set_gms_config(config: Dict) -> Any:

From ab48916abae7766657d40dd1ce52bf3c38aed487 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:31:42 +1000
Subject: [PATCH 50/88] Update sql_lineage_parser_impl.py

---
 .../utilities/sql_lineage_parser_impl.py       | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/metadata-ingestion/src/datahub/utilities/sql_lineage_parser_impl.py b/metadata-ingestion/src/datahub/utilities/sql_lineage_parser_impl.py
index 80ea7cc31455cd..63b3edaf8c0556 100644
--- a/metadata-ingestion/src/datahub/utilities/sql_lineage_parser_impl.py
+++ b/metadata-ingestion/src/datahub/utilities/sql_lineage_parser_impl.py
@@ -1,3 +1,4 @@
+import contextlib
 import logging
 import re
 import unittest
@@ -7,15 +8,12 @@
 from sqllineage.core.holders import Column, SQLLineageHolder
 from sqllineage.exceptions import SQLLineageException
 
-try:
+with contextlib.suppress(ImportError):
     import sqlparse
     from networkx import DiGraph
     from sqllineage.core import LineageAnalyzer
 
     import datahub.utilities.sqllineage_patch
-except ImportError:
-    pass
-
 logger = logging.getLogger(__name__)
 
 
@@ -97,7 +95,7 @@ def __init__(self, sql_query: str) -> None:
             logger.error(f"SQL lineage analyzer error '{e}' for query: '{self._sql}")
 
     def get_tables(self) -> List[str]:
-        result: List[str] = list()
+        result: List[str] = []
         if self._sql_holder is None:
             logger.error("sql holder not present so cannot get tables")
             return result
@@ -135,12 +133,8 @@ def get_columns(self) -> List[str]:
                 result.add(str(column.raw_name))
 
         # Reverting back all the previously renamed words which confuses the parser
-        result = set(["date" if c == self._DATE_SWAP_TOKEN else c for c in result])
-        result = set(
-            [
-                "timestamp" if c == self._TIMESTAMP_SWAP_TOKEN else c
-                for c in list(result)
-            ]
-        )
+        result = {"date" if c == self._DATE_SWAP_TOKEN else c for c in result}
+        result = {"timestamp" if c == self._TIMESTAMP_SWAP_TOKEN else c for c in list(result)}
+
         # swap back renamed date column
         return list(result)

From bb37d4d8966df05e24e799c3c2c227333bfa7103 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:31:46 +1000
Subject: [PATCH 51/88] Update sql_parser.py

---
 metadata-ingestion/src/datahub/utilities/sql_parser.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/metadata-ingestion/src/datahub/utilities/sql_parser.py b/metadata-ingestion/src/datahub/utilities/sql_parser.py
index eb0bc0ec8262f4..28b5082ccbb3b2 100644
--- a/metadata-ingestion/src/datahub/utilities/sql_parser.py
+++ b/metadata-ingestion/src/datahub/utilities/sql_parser.py
@@ -1,3 +1,4 @@
+import contextlib
 import logging
 import multiprocessing
 import re
@@ -9,11 +10,8 @@
 
 from datahub.utilities.sql_lineage_parser_impl import SqlLineageSQLParserImpl
 
-try:
+with contextlib.suppress(ImportError):
     from sql_metadata import Parser as MetadataSQLParser
-except ImportError:
-    pass
-
 logger = logging.getLogger(__name__)
 
 

From 15740801cb87cba36d46f2e32d65dad68997ec7f Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:31:48 +1000
Subject: [PATCH 52/88] Update sqlalchemy_query_combiner.py

---
 .../src/datahub/utilities/sqlalchemy_query_combiner.py         | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/metadata-ingestion/src/datahub/utilities/sqlalchemy_query_combiner.py b/metadata-ingestion/src/datahub/utilities/sqlalchemy_query_combiner.py
index 0474f4ec7d3d68..947f5e30d62c89 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlalchemy_query_combiner.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlalchemy_query_combiner.py
@@ -108,8 +108,7 @@ def get_query_columns(query: Any) -> List[Any]:
     try:
         # inner_columns will be more accurate if the column names are unnamed,
         # since .columns will remove the "duplicates".
-        inner_columns = list(query.inner_columns)
-        return inner_columns
+        return list(query.inner_columns)
     except AttributeError:
         return list(query.columns)
 

From 705050e2e3a548dfd4ee211ca7aaf81ceada5212 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:42:16 +1000
Subject: [PATCH 53/88] Update powerbi.py

---
 .../src/datahub/ingestion/source/powerbi.py   | 57 +++++++++----------
 1 file changed, 27 insertions(+), 30 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi.py
index 5cfba5fa2ec14b..a192840cd5dd68 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi.py
@@ -131,7 +131,7 @@ class PowerBiAPIConfig(EnvBasedSourceConfigBase):
     authority = "https://login.microsoftonline.com/"
 
     def get_authority_url(self):
-        return "{}{}".format(self.authority, self.tenant_id)
+        return f"{self.authority}{self.tenant_id}"
 
 
 class PowerBiDashboardSourceConfig(PowerBiAPIConfig):
@@ -216,7 +216,7 @@ class Table:
         tables: List[Any]
 
         def get_urn_part(self):
-            return "datasets.{}".format(self.id)
+            return f"datasets.{self.id}"
 
         def __members(self):
             return (self.id,)
@@ -239,7 +239,7 @@ class Report:
         dataset: Any
 
         def get_urn_part(self):
-            return "reports.{}".format(self.id)
+            return f"reports.{self.id}"
 
     @dataclass
     class Tile:
@@ -257,7 +257,7 @@ class CreatedFrom(Enum):
         createdFrom: CreatedFrom
 
         def get_urn_part(self):
-            return "charts.{}".format(self.id)
+            return f"charts.{self.id}"
 
     @dataclass
     class User:
@@ -269,7 +269,7 @@ class User:
         principalType: str
 
         def get_urn_part(self):
-            return "users.{}".format(self.id)
+            return f"users.{self.id}"
 
         def __members(self):
             return (self.id,)
@@ -296,7 +296,7 @@ class Dashboard:
         users: List[Any]
 
         def get_urn_part(self):
-            return "dashboards.{}".format(self.id)
+            return f"dashboards.{self.id}"
 
         def __members(self):
             return (self.id,)
@@ -322,9 +322,9 @@ def __init__(self, config: PowerBiAPIConfig) -> None:
         )
 
         # Test connection by generating a access token
-        LOGGER.info("Trying to connect to {}".format(self.__config.get_authority_url()))
+        LOGGER.info(f"Trying to connect to {self.__config.get_authority_url()}")
         self.get_access_token()
-        LOGGER.info("Able to connect to {}".format(self.__config.get_authority_url()))
+        LOGGER.info(f"Able to connect to {self.__config.get_authority_url()}")
 
     def __get_users(self, workspace_id: str, entity: str, id: str) -> List[User]:
         """
@@ -338,7 +338,7 @@ def __get_users(self, workspace_id: str, entity: str, id: str) -> List[User]:
             ENTITY_ID=id,
         )
         # Hit PowerBi
-        LOGGER.info("Request to URL={}".format(user_list_endpoint))
+        LOGGER.info(f"Request to URL={user_list_endpoint}")
         response = requests.get(
             url=user_list_endpoint,
             headers={Constant.Authorization: self.get_access_token()},
@@ -346,14 +346,11 @@ def __get_users(self, workspace_id: str, entity: str, id: str) -> List[User]:
 
         # Check if we got response from PowerBi
         if response.status_code != 200:
-            LOGGER.warning(
-                "Failed to fetch user list from power-bi for, http_status={}, message={}".format(
-                    response.status_code, response.text
-                )
-            )
-            LOGGER.info("{}={}".format(Constant.WorkspaceId, workspace_id))
-            LOGGER.info("{}={}".format(Constant.ENTITY, entity))
-            LOGGER.info("{}={}".format(Constant.ID, id))
+            LOGGER.warning(f"Failed to fetch user list from power-bi for, http_status={response.status_code}, message={response.text}")
+
+            LOGGER.info(f"{Constant.WorkspaceId}={workspace_id}")
+            LOGGER.info(f"{Constant.ENTITY}={entity}")
+            LOGGER.info(f"{Constant.ID}={id}")
             raise ConnectionError("Failed to fetch the user list from the power-bi")
 
         users_dict: List[Any] = response.json()[Constant.VALUE]
@@ -379,8 +376,8 @@ def __get_report(self, workspace_id: str, report_id: str) -> Any:
         """
         if workspace_id is None or report_id is None:
             LOGGER.info("Input values are None")
-            LOGGER.info("{}={}".format(Constant.WorkspaceId, workspace_id))
-            LOGGER.info("{}={}".format(Constant.ReportId, report_id))
+            LOGGER.info(f"{Constant.WorkspaceId}={workspace_id}")
+            LOGGER.info(f"{Constant.ReportId}={report_id}")
             return None
 
         report_get_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.REPORT_GET]
@@ -391,7 +388,7 @@ def __get_report(self, workspace_id: str, report_id: str) -> Any:
             REPORT_ID=report_id,
         )
         # Hit PowerBi
-        LOGGER.info("Request to report URL={}".format(report_get_endpoint))
+        LOGGER.info(f"Request to report URL={report_get_endpoint}")
         response = requests.get(
             url=report_get_endpoint,
             headers={Constant.Authorization: self.get_access_token()},
@@ -401,8 +398,8 @@ def __get_report(self, workspace_id: str, report_id: str) -> Any:
         if response.status_code != 200:
             message: str = "Failed to fetch report from power-bi for"
             LOGGER.warning(message)
-            LOGGER.warning("{}={}".format(Constant.WorkspaceId, workspace_id))
-            LOGGER.warning("{}={}".format(Constant.ReportId, report_id))
+            LOGGER.warning(f"{Constant.WorkspaceId}={workspace_id}")
+            LOGGER.warning(f"{Constant.ReportId}={report_id}")
             raise ConnectionError(message)
 
         response_dict = response.json()
@@ -440,7 +437,7 @@ def get_access_token(self):
 
         self.__access_token = "Bearer {}".format(auth_response.get("access_token"))
 
-        LOGGER.debug("{}={}".format(Constant.PBIAccessToken, self.__access_token))
+        LOGGER.debug(f"{Constant.PBIAccessToken}={self.__access_token}")
 
         return self.__access_token
 
@@ -464,7 +461,7 @@ def get_dashboards(self, workspace: Workspace) -> List[Dashboard]:
             POWERBI_BASE_URL=self.__config.base_url, WORKSPACE_ID=workspace.id
         )
         # Hit PowerBi
-        LOGGER.info("Request to URL={}".format(dashboard_list_endpoint))
+        LOGGER.info(f"Request to URL={dashboard_list_endpoint}")
         response = requests.get(
             url=dashboard_list_endpoint,
             headers={Constant.Authorization: self.get_access_token()},
@@ -473,7 +470,7 @@ def get_dashboards(self, workspace: Workspace) -> List[Dashboard]:
         # Check if we got response from PowerBi
         if response.status_code != 200:
             LOGGER.warning("Failed to fetch dashboard list from power-bi for")
-            LOGGER.warning("{}={}".format(Constant.WorkspaceId, workspace.id))
+            LOGGER.warning(f"{Constant.WorkspaceId}={workspace.id}")
             raise ConnectionError(
                 "Failed to fetch the dashboard list from the power-bi"
             )
@@ -505,8 +502,8 @@ def get_dataset(self, workspace_id: str, dataset_id: str) -> Any:
         """
         if workspace_id is None or dataset_id is None:
             LOGGER.info("Input values are None")
-            LOGGER.info("{}={}".format(Constant.WorkspaceId, workspace_id))
-            LOGGER.info("{}={}".format(Constant.DatasetId, dataset_id))
+            LOGGER.info(f"{Constant.WorkspaceId}={workspace_id}")
+            LOGGER.info(f"{Constant.DatasetId}={dataset_id}")
             return None
 
         dataset_get_endpoint: str = PowerBiAPI.API_ENDPOINTS[Constant.DATASET_GET]
@@ -517,7 +514,7 @@ def get_dataset(self, workspace_id: str, dataset_id: str) -> Any:
             DATASET_ID=dataset_id,
         )
         # Hit PowerBi
-        LOGGER.info("Request to dataset URL={}".format(dataset_get_endpoint))
+        LOGGER.info(f"Request to dataset URL={dataset_get_endpoint}")
         response = requests.get(
             url=dataset_get_endpoint,
             headers={Constant.Authorization: self.get_access_token()},
@@ -527,8 +524,8 @@ def get_dataset(self, workspace_id: str, dataset_id: str) -> Any:
         if response.status_code != 200:
             message: str = "Failed to fetch dataset from power-bi for"
             LOGGER.warning(message)
-            LOGGER.warning("{}={}".format(Constant.WorkspaceId, workspace_id))
-            LOGGER.warning("{}={}".format(Constant.DatasetId, dataset_id))
+            LOGGER.warning(f"{Constant.WorkspaceId}={workspace_id}")
+            LOGGER.warning(f"{Constant.DatasetId}={dataset_id}")
             raise ConnectionError(message)
 
         response_dict = response.json()

From b7df2f923ea72b8a40c6c4b7f19ab174ee3f650a Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:42:18 +1000
Subject: [PATCH 54/88] Update redash.py

---
 .../src/datahub/ingestion/source/redash.py    | 47 +++++++------------
 1 file changed, 18 insertions(+), 29 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/redash.py b/metadata-ingestion/src/datahub/ingestion/source/redash.py
index 2abd61849ac260..7aceafd22bd5aa 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redash.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redash.py
@@ -203,29 +203,23 @@ def get_full_qualified_name(self, database_name: str, table_name: str) -> str:
 
 
 def get_full_qualified_name(platform: str, database_name: str, table_name: str) -> str:
-    if platform == "postgres":
-        full_qualified_name = PostgresQualifiedNameParser().get_full_qualified_name(
-            database_name, table_name
-        )
-    elif platform == "mysql":
-        full_qualified_name = MysqlQualifiedNameParser().get_full_qualified_name(
-            database_name, table_name
-        )
-    elif platform == "mssql":
-        full_qualified_name = MssqlQualifiedNameParser().get_full_qualified_name(
-            database_name, table_name
-        )
-    elif platform == "athena":
-        full_qualified_name = AthenaQualifiedNameParser().get_full_qualified_name(
-            database_name, table_name
-        )
+    if platform == "athena":
+        return AthenaQualifiedNameParser().get_full_qualified_name(database_name, table_name)
+
     elif platform == "bigquery":
-        full_qualified_name = BigqueryQualifiedNameParser().get_full_qualified_name(
-            database_name, table_name
-        )
+        return BigqueryQualifiedNameParser().get_full_qualified_name(database_name, table_name)
+
+    elif platform == "mssql":
+        return MssqlQualifiedNameParser().get_full_qualified_name(database_name, table_name)
+
+    elif platform == "mysql":
+        return MysqlQualifiedNameParser().get_full_qualified_name(database_name, table_name)
+
+    elif platform == "postgres":
+        return PostgresQualifiedNameParser().get_full_qualified_name(database_name, table_name)
+
     else:
-        full_qualified_name = f"{database_name}.{table_name}"
-    return full_qualified_name
+        return f"{database_name}.{table_name}"
 
 
 class RedashConfig(ConfigModel):
@@ -405,8 +399,7 @@ def _get_platform_based_on_datasource(self, data_source: Dict) -> str:
             map = REDASH_DATA_SOURCE_TO_DATAHUB_MAP.get(
                 data_source_type, {"platform": DEFAULT_DATA_SOURCE_PLATFORM}
             )
-            platform = map.get("platform", DEFAULT_DATA_SOURCE_PLATFORM)
-            return platform
+            return map.get("platform", DEFAULT_DATA_SOURCE_PLATFORM)
         return DEFAULT_DATA_SOURCE_PLATFORM
 
     def _get_database_name_based_on_datasource(
@@ -596,9 +589,7 @@ def _process_dashboard_response(
                     # the API is id based not slug based
                     # Tested the same with a Redash instance
                     dashboard_id = dashboard_response["id"]
-                    dashboard_data = self.client._get(
-                        "api/dashboards/{}".format(dashboard_id)
-                    ).json()
+                    dashboard_data = self.client._get(f"api/dashboards/{dashboard_id}").json()
                 except Exception:
                     # This does not work in our testing but keeping for now because
                     # people in community are using Redash connector successfully
@@ -686,9 +677,7 @@ def _get_chart_snapshot(self, query_data: Dict, viz_data: Dict) -> ChartSnapshot
         chart_type = self._get_chart_type_from_viz_data(viz_data)
         query_id = query_data.get("id")
         chart_url = f"{self.config.connect_uri}/queries/{query_id}#{viz_id}"
-        description = (
-            viz_data.get("description", "") if viz_data.get("description", "") else ""
-        )
+        description = viz_data.get("description", "") or ""
         data_source_id = query_data.get("data_source_id")
         data_source = self._get_chart_data_source(data_source_id)
         data_source_type = data_source.get("type")

From 2c51bc386d6f793c2907aba9237360a38a1d6ec5 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:42:21 +1000
Subject: [PATCH 55/88] Update tableau.py

---
 .../src/datahub/ingestion/source/tableau.py   | 42 +++++--------------
 1 file changed, 11 insertions(+), 31 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
index fdb57ef4b543cc..2a6696f8823683 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
@@ -398,22 +398,17 @@ def _create_upstream_table_lineage(
 
             table_path = None
             if project and datasource.get("name"):
-                table_name = table.get("name") if table.get("name") else table["id"]
+                table_name = table.get("name") or table["id"]
                 table_path = f"{project.replace('/', REPLACE_SLASH_CHAR)}/{datasource['name']}/{table_name}"
 
-            self.upstream_tables[table_urn] = (
-                table.get("columns", []),
-                table_path,
-                table.get("isEmbedded") if table.get("isEmbedded") else False,
-            )
+            self.upstream_tables[table_urn] = table.get("columns", []), table_path, table.get("isEmbedded") or False
+
 
         return upstream_tables
 
     def emit_custom_sql_datasources(self) -> Iterable[MetadataWorkUnit]:
         count_on_query = len(self.custom_sql_ids_being_used)
-        custom_sql_filter = "idWithin: {}".format(
-            json.dumps(self.custom_sql_ids_being_used)
-        )
+        custom_sql_filter = f"idWithin: {json.dumps(self.custom_sql_ids_being_used)}"
         custom_sql_connection, total_count, has_next_page = self.get_connection_object(
             custom_sql_graphql_query, "customSQLTablesConnection", custom_sql_filter
         )
@@ -491,7 +486,7 @@ def emit_custom_sql_datasources(self) -> Iterable[MetadataWorkUnit]:
                     dataset_snapshot.aspects.append(schema_metadata)
 
                 # Browse path
-                csql_name = csql.get("name") if csql.get("name") else csql_id
+                csql_name = csql.get("name") or csql_id
 
                 if project and datasource_name:
                     browse_paths = BrowsePathsClass(
@@ -605,7 +600,6 @@ def _get_schema_metadata_for_datasource(
         self, datasource_fields: List[dict]
     ) -> Optional[SchemaMetadata]:
         fields = []
-        schema_metadata = None
         for field in datasource_fields:
             # check datasource - custom sql relations from a field being referenced
             self._track_custom_sql_ids(field)
@@ -632,17 +626,7 @@ def _get_schema_metadata_for_datasource(
             )
             fields.append(schema_field)
 
-        if fields:
-            schema_metadata = SchemaMetadata(
-                schemaName="test",
-                platform=f"urn:li:dataPlatform:{self.platform}",
-                version=0,
-                fields=fields,
-                hash="",
-                platformSchema=OtherSchema(rawSchema=""),
-            )
-
-        return schema_metadata
+        return SchemaMetadata(schemaName="test", platform=f"urn:li:dataPlatform:{self.platform}", version=0, fields=fields, hash="", platformSchema=OtherSchema(rawSchema=""),) if fields else None
 
     def get_metadata_change_event(
         self, snap_shot: Union["DatasetSnapshot", "DashboardSnapshot", "ChartSnapshot"]
@@ -697,9 +681,7 @@ def emit_datasource(
             aspects=[],
         )
 
-        datasource_name = (
-            datasource.get("name") if datasource.get("name") else datasource_id
-        )
+        datasource_name = datasource.get("name") or datasource_id
         if is_embedded_ds and workbook and workbook.get("name"):
             datasource_name = f"{workbook['name']}/{datasource_name}"
         # Browse path
@@ -780,9 +762,7 @@ def emit_datasource(
 
     def emit_published_datasources(self) -> Iterable[MetadataWorkUnit]:
         count_on_query = len(self.datasource_ids_being_used)
-        datasource_filter = "idWithin: {}".format(
-            json.dumps(self.datasource_ids_being_used)
-        )
+        datasource_filter = f"idWithin: {json.dumps(self.datasource_ids_being_used)}"
         (
             published_datasource_conn,
             total_count,
@@ -933,7 +913,7 @@ def emit_sheets_as_charts(self, workbook: Dict) -> Iterable[MetadataWorkUnit]:
             chart_snapshot.aspects.append(chart_info)
 
             if workbook.get("projectName") and workbook.get("name"):
-                sheet_name = sheet.get("name") if sheet.get("name") else sheet["id"]
+                sheet_name = sheet.get("name") or sheet["id"]
                 # Browse path
                 browse_path = BrowsePathsClass(
                     paths=[
@@ -1050,7 +1030,7 @@ def emit_dashboards(self, workbook: Dict) -> Iterable[MetadataWorkUnit]:
             dashboard_snapshot.aspects.append(dashboard_info_class)
 
             if workbook.get("projectName") and workbook.get("name"):
-                dashboard_name = title if title else dashboard["id"]
+                dashboard_name = title or dashboard["id"]
                 # browse path
                 browse_paths = BrowsePathsClass(
                     paths=[
@@ -1104,7 +1084,7 @@ def _get_schema(self, schema_provided: str, database: str, fullName: str) -> str
     def _extract_schema_from_fullName(self, fullName: str) -> str:
         # fullName is observed to be in format [schemaName].[tableName]
         # OR simply tableName OR [tableName]
-        if fullName.startswith("[") and fullName.find("].[") >= 0:
+        if fullName.startswith("[") and "].[" in fullName:
             return fullName[1 : fullName.index("]")]
         return ""
 

From 4c9705f62c29434d5406a403b1119ae53da8e8e6 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:42:23 +1000
Subject: [PATCH 56/88] Update pulsar.py

---
 .../src/datahub/ingestion/source/pulsar.py    | 47 +++++++------------
 1 file changed, 16 insertions(+), 31 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/pulsar.py b/metadata-ingestion/src/datahub/ingestion/source/pulsar.py
index e4d9a505ea7210..e969c2d3aeb3ea 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/pulsar.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/pulsar.py
@@ -98,7 +98,7 @@ def __init__(self, config: PulsarSourceConfig, ctx: PipelineContext):
         self.platform: str = "pulsar"
         self.config: PulsarSourceConfig = config
         self.report: PulsarSourceReport = PulsarSourceReport()
-        self.base_url: str = self.config.web_service_url + "/admin/v2"
+        self.base_url: str = f"{self.config.web_service_url}/admin/v2"
         self.tenants: List[str] = config.tenants
 
         if (
@@ -119,9 +119,7 @@ def __init__(self, config: PulsarSourceConfig, ctx: PipelineContext):
 
         if self._is_oauth_authentication_configured():
             # Get OpenId configuration from issuer, e.g. token_endpoint
-            oid_config_url = (
-                "%s/.well-known/openid-configuration" % self.config.issuer_url
-            )
+            oid_config_url = f"{self.config.issuer_url}/.well-known/openid-configuration"
             oid_config_response = requests.get(
                 oid_config_url, verify=False, allow_redirects=False
             )
@@ -129,10 +127,8 @@ def __init__(self, config: PulsarSourceConfig, ctx: PipelineContext):
             if oid_config_response:
                 self.config.oid_config.update(oid_config_response.json())
             else:
-                logger.error(
-                    "Unexpected response while getting discovery document using %s : %s"
-                    % (oid_config_url, oid_config_response)
-                )
+                logger.error(f"Unexpected response while getting discovery document using {oid_config_url} : {oid_config_response}")
+
 
             if "token_endpoint" not in self.config.oid_config:
                 raise Exception(
@@ -323,17 +319,13 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
         ]
 
         # Report the Pulsar broker version we are communicating with
-        self.report.report_pulsar_version(
-            self.session.get(
-                "%s/brokers/version" % self.base_url,
-                timeout=self.config.timeout,
-            ).text
-        )
+        self.report.report_pulsar_version(self.session.get(f"{self.base_url}/brokers/version", timeout=self.config.timeout).text)
+
 
         # If no tenants are provided, request all tenants from cluster using /admin/v2/tenants endpoint.
         # Requesting cluster tenant information requires superuser privileges
         if not self.tenants:
-            self.tenants = self._get_pulsar_metadata(self.base_url + "/tenants") or []
+            self.tenants = self._get_pulsar_metadata(f"{self.base_url}/tenants") or []
 
         # Initialize counters
         self.report.tenants_scanned = 0
@@ -345,10 +337,9 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
             if self.config.tenant_patterns.allowed(tenant):
                 # Get namespaces belonging to a tenant, /admin/v2/%s/namespaces
                 # A tenant admin role has sufficient privileges to perform this action
-                namespaces = (
-                    self._get_pulsar_metadata(self.base_url + "/namespaces/%s" % tenant)
-                    or []
-                )
+                namespaces = self._get_pulsar_metadata(f"{self.base_url}/namespaces/{tenant}") or []
+
+
                 for namespace in namespaces:
                     self.report.namespaces_scanned += 1
                     if self.config.namespace_patterns.allowed(namespace):
@@ -406,14 +397,10 @@ def _add_topic_to_checkpoint(self, topic: str) -> None:
             )
 
     def _is_token_authentication_configured(self) -> bool:
-        if self.config.token is not None:
-            return True
-        return False
+        return self.config.token is not None
 
     def _is_oauth_authentication_configured(self) -> bool:
-        if self.config.issuer_url is not None:
-            return True
-        return False
+        return self.config.issuer_url is not None
 
     def _get_schema_and_fields(
         self, pulsar_topic: PulsarTopic, is_key_schema: bool
@@ -421,11 +408,8 @@ def _get_schema_and_fields(
 
         pulsar_schema: Optional[PulsarSchema] = None
 
-        schema_url = self.base_url + "/schemas/%s/%s/%s/schema" % (
-            pulsar_topic.tenant,
-            pulsar_topic.namespace,
-            pulsar_topic.topic,
-        )
+        schema_url = self.base_url + f"/schemas/{pulsar_topic.tenant}/{pulsar_topic.namespace}/{pulsar_topic.topic}/schema"
+
 
         schema_payload = self._get_pulsar_metadata(schema_url)
 
@@ -449,7 +433,7 @@ def _get_schema_fields(
     ) -> List[SchemaField]:
         # Parse the schema and convert it to SchemaFields.
         fields: List[SchemaField] = []
-        if schema.schema_type == "AVRO" or schema.schema_type == "JSON":
+        if schema.schema_type in ["AVRO", "JSON"]:
             # Extract fields from schema and get the FQN for the schema
             fields = schema_util.avro_schema_to_mce_fields(
                 schema.schema_str, is_key_schema=is_key_schema
@@ -465,6 +449,7 @@ def _get_schema_metadata(
         self, pulsar_topic: PulsarTopic, platform_urn: str
     ) -> Tuple[Optional[PulsarSchema], Optional[SchemaMetadata]]:
 
+        # FIXME: Type annotations are not working for this function.
         schema, fields = self._get_schema_and_fields(
             pulsar_topic=pulsar_topic, is_key_schema=False
         )  # type: Tuple[Optional[PulsarSchema], List[SchemaField]]

From d4afcb76f14e32641cca260e31ac2734c686787b Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:42:24 +1000
Subject: [PATCH 57/88] Update tableau_common.py

---
 .../src/datahub/ingestion/source/tableau_common.py   | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py
index ddebc6a437ea4b..90a254e1c872bd 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py
@@ -430,8 +430,7 @@ def make_table_urn(
     # if there are more than 3 tokens, just take the final 3
     fully_qualified_table_name = ".".join(fully_qualified_table_name.split(".")[-3:])
 
-    urn = builder.make_dataset_urn(platform, fully_qualified_table_name, env)
-    return urn
+    return builder.make_dataset_urn(platform, fully_qualified_table_name, env)
 
 
 def make_description_from_params(description, formula):
@@ -448,10 +447,9 @@ def make_description_from_params(description, formula):
 
 def get_field_value_in_sheet(field, field_name):
     if field.get("__typename", "") == "DatasourceField":
-        field = field.get("remoteField") if field.get("remoteField") else {}
+        field = field.get("remoteField") or {}
 
-    field_value = field.get(field_name, "")
-    return field_value
+    return field.get(field_name, "")
 
 
 def get_unique_custom_sql(custom_sql_list: List[dict]) -> List[dict]:
@@ -503,6 +501,4 @@ def query_metadata(server, main_query, connection_name, first, offset, qry_filte
         filter=qry_filter,
         main_query=main_query,
     )
-    query_result = server.metadata.query(query)
-
-    return query_result
+    return server.metadata.query(query)

From c4b15e95eebd2a45abc95c947d8dba12cc8cce1d Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:49:35 +1000
Subject: [PATCH 58/88] Update powerbi.py

---
 .../src/datahub/ingestion/source/powerbi.py   | 131 ++++++------------
 1 file changed, 44 insertions(+), 87 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi.py
index a192840cd5dd68..f9df9a802a3719 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi.py
@@ -555,7 +555,7 @@ def get_data_source(self, dataset: Dataset) -> Any:
             DATASET_ID=dataset.id,
         )
         # Hit PowerBi
-        LOGGER.info("Request to datasource URL={}".format(datasource_get_endpoint))
+        LOGGER.info(f"Request to datasource URL={datasource_get_endpoint}")
         response = requests.get(
             url=datasource_get_endpoint,
             headers={Constant.Authorization: self.get_access_token()},
@@ -565,18 +565,15 @@ def get_data_source(self, dataset: Dataset) -> Any:
         if response.status_code != 200:
             message: str = "Failed to fetch datasource from power-bi for"
             LOGGER.warning(message)
-            LOGGER.warning("{}={}".format(Constant.WorkspaceId, dataset.workspace_id))
-            LOGGER.warning("{}={}".format(Constant.DatasetId, dataset.id))
+            LOGGER.warning(f"{Constant.WorkspaceId}={dataset.workspace_id}")
+            LOGGER.warning(f"{Constant.DatasetId}={dataset.id}")
             raise ConnectionError(message)
 
         res = response.json()
         value = res["value"]
         if len(value) == 0:
-            LOGGER.info(
-                "datasource is not found for dataset {}({})".format(
-                    dataset.name, dataset.id
-                )
-            )
+            LOGGER.info(f"datasource is not found for dataset {dataset.name}({dataset.id})")
+
             return None
         # Consider only zero index datasource
         datasource_dict = value[0]
@@ -642,13 +639,8 @@ def new_dataset_or_report(tile_instance: Any) -> dict:
             else:
                 report_fields["createdFrom"] = PowerBiAPI.Tile.CreatedFrom.VISUALIZATION
 
-            LOGGER.info(
-                "Tile {}({}) is created from {}".format(
-                    tile_instance.get("title"),
-                    tile_instance.get("id"),
-                    report_fields["createdFrom"],
-                )
-            )
+            LOGGER.info(f'Tile {tile_instance.get("title")}({tile_instance.get("id")}) is created from {report_fields["createdFrom"]}')
+
 
             return report_fields
 
@@ -698,7 +690,7 @@ def get_workspace(self, workspace_id: str) -> Workspace:
             POWERBI_ADMIN_BASE_URL=self.__config.admin_base_url
         )
 
-        def create_scan_job():
+        def create_scan_job():  # sourcery skip: avoid-builtin-shadow
             """
             Create scan job on PowerBi for the workspace
             """
@@ -718,9 +710,8 @@ def create_scan_job():
             )
 
             if res.status_code not in (200, 202):
-                message = "API({}) return error code {} for workpace id({})".format(
-                    scan_create_endpoint, res.status_code, workspace_id
-                )
+                message = f"API({scan_create_endpoint}) return error code {res.status_code} for workspace id({workspace_id})"
+
 
                 LOGGER.warning(message)
 
@@ -736,47 +727,40 @@ def wait_for_scan_to_complete(scan_id: str, timeout: int) -> Boolean:
             """
             minimum_sleep = 3
             if timeout < minimum_sleep:
-                LOGGER.info(
-                    "Setting timeout to minimum_sleep time {} seconds".format(
-                        minimum_sleep
-                    )
-                )
+                LOGGER.info(f"Setting timeout to minimum_sleep time {minimum_sleep} seconds")
                 timeout = minimum_sleep
 
-            max_trial = int(timeout / minimum_sleep)
-            LOGGER.info("Max trial {}".format(max_trial))
+            max_trial = timeout // minimum_sleep
+            LOGGER.info(f"Max trial {max_trial}")
             scan_get_endpoint = PowerBiAPI.API_ENDPOINTS[Constant.SCAN_GET]
             scan_get_endpoint = scan_get_endpoint.format(
                 POWERBI_ADMIN_BASE_URL=self.__config.admin_base_url, SCAN_ID=scan_id
             )
 
-            LOGGER.info("Hitting URL={}".format(scan_get_endpoint))
+            LOGGER.info(f"Hitting URL={scan_get_endpoint}")
 
             trail = 1
             while True:
-                LOGGER.info("Trial = {}".format(trail))
+                LOGGER.info(f"Trial = {trail}")
                 res = requests.get(
                     scan_get_endpoint,
                     headers={Constant.Authorization: self.get_access_token()},
                 )
                 if res.status_code != 200:
-                    message = "API({}) return error code {} for scan id({})".format(
-                        scan_get_endpoint, res.status_code, scan_id
-                    )
+                    message = f"API({scan_get_endpoint}) return error code {res.status_code} for scan id({scan_id})"
+
 
                     LOGGER.warning(message)
 
                     raise ConnectionError(message)
 
                 if res.json()["status"].upper() == "Succeeded".upper():
-                    LOGGER.info(
-                        "Scan result is available for scan id({})".format(scan_id)
-                    )
+                    LOGGER.info(f"Scan result is available for scan id({scan_id})")
                     return True
 
                 if trail == max_trial:
                     break
-                LOGGER.info("Sleeping for {} seconds".format(minimum_sleep))
+                LOGGER.info(f"Sleeping for {minimum_sleep} seconds")
                 sleep(minimum_sleep)
                 trail += 1
 
@@ -785,7 +769,7 @@ def wait_for_scan_to_complete(scan_id: str, timeout: int) -> Boolean:
 
         def get_scan_result(scan_id: str) -> dict:
             LOGGER.info("Fetching scan  result")
-            LOGGER.info("{}={}".format(Constant.SCAN_ID, scan_id))
+            LOGGER.info(f"{Constant.SCAN_ID}={scan_id}")
             scan_result_get_endpoint = PowerBiAPI.API_ENDPOINTS[
                 Constant.SCAN_RESULT_GET
             ]
@@ -793,15 +777,14 @@ def get_scan_result(scan_id: str) -> dict:
                 POWERBI_ADMIN_BASE_URL=self.__config.admin_base_url, SCAN_ID=scan_id
             )
 
-            LOGGER.info("Hittin URL={}".format(scan_result_get_endpoint))
+            LOGGER.info(f"Hitting URL={scan_result_get_endpoint}")
             res = requests.get(
                 scan_result_get_endpoint,
                 headers={Constant.Authorization: self.get_access_token()},
             )
             if res.status_code != 200:
-                message = "API({}) return error code {} for scan id({})".format(
-                    scan_result_get_endpoint, res.status_code, scan_id
-                )
+                message = f"API({scan_result_get_endpoint}) return error code {res.status_code} for scan id({scan_id})"
+
 
                 LOGGER.warning(message)
 
@@ -817,11 +800,8 @@ def json_to_dataset_map(scan_result: dict) -> dict:
             dataset_map: dict = {}
 
             if datasets is None or len(datasets) == 0:
-                LOGGER.warning(
-                    "Workspace {}({}) does not have datasets".format(
-                        scan_result["name"], scan_result["id"]
-                    )
-                )
+                LOGGER.warning(f'Workspace {scan_result["name"]}({scan_result["id"]}) does not have datasets')
+
                 LOGGER.info("Returning empty datasets")
                 return dataset_map
 
@@ -840,19 +820,13 @@ def json_to_dataset_map(scan_result: dict) -> dict:
                     dataset_instance.datasource
                     and dataset_instance.datasource.metadata.is_relational is True
                 ):
-                    LOGGER.info(
-                        "Processing tables attribute for dataset {}({})".format(
-                            dataset_instance.name, dataset_instance.id
-                        )
-                    )
+                    LOGGER.info(f"Processing tables attribute for dataset {dataset_instance.name}({dataset_instance.id})")
+
 
                     for table in dataset_dict["tables"]:
                         if "Value.NativeQuery(" in table["source"][0]["expression"]:
-                            LOGGER.warning(
-                                "Table {} is created from Custom SQL. Ignoring in processing".format(
-                                    table["name"]
-                                )
-                            )
+                            LOGGER.warning(f'Table {table["name"]} is created from Custom SQL. Ignoring in processing')
+
                             continue
 
                         # PowerBi table name contains schema name and table name. Format is <SchemaName> <TableName>
@@ -972,29 +946,18 @@ def __to_datahub_dataset(
             dataset.datasource is None
             or dataset.datasource.metadata.is_relational is False
         ):
-            LOGGER.warning(
-                "Dataset {}({}) is not created from relational datasource".format(
-                    dataset.name, dataset.id
-                )
-            )
+            LOGGER.warning(f"Dataset {dataset.name}({dataset.id}) is not created from relational datasource")
+
             return dataset_mcps
 
-        LOGGER.info(
-            "Converting dataset={}(id={}) to datahub dataset".format(
-                dataset.name, dataset.id
-            )
-        )
+        LOGGER.info(f"Converting dataset={dataset.name}(id={dataset.id}) to datahub dataset")
+
 
         for table in dataset.tables:
             # Create an URN for dataset
-            ds_urn = builder.make_dataset_urn(
-                platform=self.__config.dataset_type_mapping[dataset.datasource.type],
-                name="{}.{}.{}".format(
-                    dataset.datasource.database, table.schema_name, table.name
-                ),
-                env=self.__config.env,
-            )
-            LOGGER.info("{}={}".format(Constant.Dataset_URN, ds_urn))
+            ds_urn = builder.make_dataset_urn(platform=self.__config.dataset_type_mapping[dataset.datasource.type], name=f"{dataset.datasource.database}.{table.schema_name}.{table.name}", env=self.__config.env)
+
+            LOGGER.info(f"{Constant.Dataset_URN}={ds_urn}")
             # Create datasetProperties mcp
             ds_properties = DatasetPropertiesClass(description=table.name)
 
@@ -1202,11 +1165,8 @@ def to_datahub_user(
         Map PowerBi user to datahub user
         """
 
-        LOGGER.info(
-            "Converting user {}(id={}) to datahub's user".format(
-                user.displayName, user.id
-            )
-        )
+        LOGGER.info(f"Converting user {user.displayName}(id={user.id}) to datahub's user")
+
 
         # Create an URN for user
         user_urn = builder.make_user_urn(user.get_urn_part())
@@ -1263,10 +1223,10 @@ def to_datahub_chart(
         chart_mcps = []
 
         # Return empty list if input list is empty
-        if len(tiles) == 0:
+        if not tiles:
             return [], []
 
-        LOGGER.info("Converting tiles(count={}) to charts".format(len(tiles)))
+        LOGGER.info(f"Converting tiles(count={len(tiles)}) to charts")
 
         for tile in tiles:
             if tile is None:
@@ -1288,9 +1248,8 @@ def to_datahub_work_units(
     ) -> Set[EquableMetadataWorkUnit]:
         mcps = []
 
-        LOGGER.info(
-            "Converting dashboard={} to datahub dashboard".format(dashboard.displayName)
-        )
+        LOGGER.info(f"Converting dashboard={dashboard.displayName} to datahub dashboard")
+
 
         # Convert user to CorpUser
         user_mcps = self.to_datahub_users(dashboard.users)
@@ -1388,12 +1347,10 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
                 self.reporter.report_dashboards_scanned()
                 self.reporter.report_charts_scanned(count=len(dashboard.tiles))
             except Exception as e:
-                message = "Error ({}) occurred while loading dashboard {}(id={}) tiles.".format(
-                    e, dashboard.displayName, dashboard.id
-                )
+                message = f"Error ({e}) occurred while loading dashboard {dashboard.displayName}(id={dashboard.id}) tiles."
+
                 LOGGER.exception(message, e)
                 self.reporter.report_warning(dashboard.id, message)
-
             # Convert PowerBi Dashboard and child entities to Datahub work unit to ingest into Datahub
             workunits = self.mapper.to_datahub_work_units(dashboard)
             for workunit in workunits:

From 3cfd95a510a61812a941ccb9fccf5b07e9fe3d4c Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:49:37 +1000
Subject: [PATCH 59/88] Update openapi_parser.py

---
 .../ingestion/source/openapi_parser.py        | 52 ++++++++-----------
 1 file changed, 22 insertions(+), 30 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py b/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py
index 830b6562755eb7..233d920f6877ef 100755
--- a/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py
@@ -20,9 +20,9 @@
 def flatten(d: dict, prefix: str = "") -> Generator:
     for k, v in d.items():
         if isinstance(v, dict):
-            yield from flatten(v, prefix + "." + k)
+            yield from flatten(v, f"{prefix}.{k}")
         else:
-            yield (prefix + "-" + k).strip(".")
+            yield f"{prefix}-{k}".strip(".")
 
 
 def flatten2list(d: dict) -> list:
@@ -53,15 +53,13 @@ def request_call(
     headers = {"accept": "application/json"}
 
     if username is not None and password is not None:
-        response = requests.get(
-            url, headers=headers, auth=HTTPBasicAuth(username, password)
-        )
+        return requests.get(url, headers=headers, auth=HTTPBasicAuth(username, password))
+
     elif token is not None:
-        headers["Authorization"] = "Bearer " + token
-        response = requests.get(url, headers=headers)
+        headers["Authorization"] = f"Bearer {token}"
+        return requests.get(url, headers=headers)
     else:
-        response = requests.get(url, headers=headers)
-    return response
+        return requests.get(url, headers=headers)
 
 
 def get_swag_json(
@@ -77,14 +75,13 @@ def get_swag_json(
     else:
         response = request_call(url=tot_url, username=username, password=password)
 
-    if response.status_code == 200:
-        try:
-            dict_data = json.loads(response.content)
-        except json.JSONDecodeError:  # it's not a JSON!
-            dict_data = yaml.safe_load(response.content)
-        return dict_data
-    else:
+    if response.status_code != 200:
         raise Exception(f"Unable to retrieve {tot_url}, error {response.status_code}")
+    try:
+        dict_data = json.loads(response.content)
+    except json.JSONDecodeError:  # it's not a JSON!
+        dict_data = yaml.safe_load(response.content)
+    return dict_data
 
 
 def get_url_basepath(sw_dict: dict) -> str:
@@ -95,7 +92,7 @@ def get_url_basepath(sw_dict: dict) -> str:
 
 
 def check_sw_version(sw_dict: dict) -> None:
-    if "swagger" in sw_dict.keys():
+    if "swagger" in sw_dict:
         v_split = sw_dict["swagger"].split(".")
     else:
         v_split = sw_dict["openapi"].split(".")
@@ -108,7 +105,7 @@ def check_sw_version(sw_dict: dict) -> None:
         )
 
 
-def get_endpoints(sw_dict: dict) -> dict:  # noqa: C901
+def get_endpoints(sw_dict: dict) -> dict:    # noqa: C901
     """
     Get all the URLs accepting the "GET" method, together with their description and the tags
     """
@@ -176,8 +173,7 @@ def get_endpoints(sw_dict: dict) -> dict:  # noqa: C901
             if "parameters" in p_o["get"].keys():
                 url_details[p_k]["parameters"] = p_o["get"]["parameters"]
 
-    ord_d = dict(sorted(url_details.items()))  # sorting for convenience
-    return ord_d
+    return dict(sorted(url_details.items()))
 
 
 def guessing_url_name(url: str, examples: dict) -> str:
@@ -187,10 +183,7 @@ def guessing_url_name(url: str, examples: dict) -> str:
     extr_data = {"advancedcomputersearches": {'id': 202, 'name': '_unmanaged'}}
     -->> guessed_url = /advancedcomputersearches/name/_unmanaged/id/202'
     """
-    if url[0] == "/":
-        url2op = url[1:]  # operational url does not need the very first /
-    else:
-        url2op = url
+    url2op = url[1:] if url[0] == "/" else url
     divisions = url2op.split("/")
 
     # the very first part of the url should stay the same.
@@ -211,14 +204,14 @@ def guessing_url_name(url: str, examples: dict) -> str:
             if div_pos > 0:
                 root = root[: div_pos - 1]  # like "base/field" should become "base"
 
-    if root in examples.keys():
+    if root in examples:
         # if our root is contained in our samples examples...
         ex2use = root
-    elif root[:-1] in examples.keys():
+    elif root[:-1] in examples:
         ex2use = root[:-1]
-    elif root.replace("/", ".") in examples.keys():
+    elif root.replace("/", ".") in examples:
         ex2use = root.replace("/", ".")
-    elif root[:-1].replace("/", ".") in examples.keys():
+    elif root[:-1].replace("/", ".") in examples:
         ex2use = root[:-1].replace("/", ".")
     else:
         return url
@@ -277,8 +270,7 @@ def try_guessing(url: str, examples: dict) -> str:
     Any non-guessed name will stay as it was (with parenthesis{})
     """
     url_guess = guessing_url_name(url, examples)  # try to fill with known informations
-    url_guess_id = maybe_theres_simple_id(url_guess)  # try to fill IDs with "1"s...
-    return url_guess_id
+    return maybe_theres_simple_id(url_guess)
 
 
 def clean_url(url: str) -> str:

From 99823bbfba36599e235091c38f8a9cc6906ae30e Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:49:40 +1000
Subject: [PATCH 60/88] Update openapi.py

---
 metadata-ingestion/src/datahub/ingestion/source/openapi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/openapi.py b/metadata-ingestion/src/datahub/ingestion/source/openapi.py
index b71cb363b96e46..9548677e1cdc11 100755
--- a/metadata-ingestion/src/datahub/ingestion/source/openapi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/openapi.py
@@ -118,7 +118,7 @@ class ApiWorkUnit(MetadataWorkUnit):
 class APISource(Source, ABC):
     """
 
-    This plugin is meant to gather dataset-like informations about OpenApi Endpoints.
+    This plugin is meant to gather dataset-like information about OpenApi Endpoints.
 
     As example, if by calling GET at the endpoint at `https://test_endpoint.com/api/users/` you obtain as result:
     ```JSON

From 1a85c8fed56ff4351f616198dd01d860a05cd5f9 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:49:42 +1000
Subject: [PATCH 61/88] Update nifi.py

---
 metadata-ingestion/src/datahub/ingestion/source/nifi.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/nifi.py b/metadata-ingestion/src/datahub/ingestion/source/nifi.py
index bb8ac443555252..20cd1daa6671e2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/nifi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/nifi.py
@@ -337,10 +337,7 @@ def __init__(self, config: NifiSourceConfig, ctx: PipelineContext) -> None:
 
         if self.config.site_url_to_site_name is None:
             self.config.site_url_to_site_name = {}
-        if (
-            not urljoin(self.config.site_url, "/nifi/")
-            in self.config.site_url_to_site_name
-        ):
+        if urljoin(self.config.site_url, "/nifi/") not in self.config.site_url_to_site_name:
             self.config.site_url_to_site_name[
                 urljoin(self.config.site_url, "/nifi/")
             ] = self.config.site_name
@@ -774,7 +771,7 @@ def construct_workunits(self) -> Iterable[MetadataWorkUnit]:  # noqa: C901
         rootpg = self.nifi_flow.root_process_group
         flow_name = rootpg.name  # self.config.site_name
         flow_urn = builder.make_data_flow_urn(NIFI, rootpg.id, self.config.env)
-        flow_properties = dict()
+        flow_properties = {}
         if self.nifi_flow.clustered is not None:
             flow_properties["clustered"] = str(self.nifi_flow.clustered)
         if self.nifi_flow.version is not None:

From 748d1155e6ab68cca1e51accc0e59374ca3b302d Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:53:02 +1000
Subject: [PATCH 62/88] Update mongodb.py

---
 metadata-ingestion/src/datahub/ingestion/source/mongodb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py
index 8d6201867dd8b3..9710fe023a2560 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py
@@ -172,9 +172,9 @@ def construct_schema_pymongo(
             maximum size of the document that will be considered for generating the schema.
     """
 
-    doc_size_field = "temporary_doc_size_field"
     aggregations: List[Dict] = []
     if is_version_gte_4_4:
+        doc_size_field = "temporary_doc_size_field"
         # create a temporary field to store the size of the document. filter on it and then remove it.
         aggregations = [
             {"$addFields": {doc_size_field: {"$bsonSize": "$$ROOT"}}},

From 23e43294ae4612cb86d2f8307d8e4872ae6518c8 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 22:53:04 +1000
Subject: [PATCH 63/88] Update metabase.py

---
 .../src/datahub/ingestion/source/metabase.py             | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/metabase.py b/metadata-ingestion/src/datahub/ingestion/source/metabase.py
index 93308ff93b3226..a4873e1bd08633 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/metabase.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/metabase.py
@@ -42,6 +42,7 @@
     OwnershipTypeClass,
 )
 from datahub.utilities import config_clean
+from datetime import timezone
 
 
 class MetabaseConfig(DatasetLineageProviderConfigBase):
@@ -199,7 +200,7 @@ def get_timestamp_millis_from_ts_string(ts_str: str) -> int:
         try:
             return int(dp.parse(ts_str).timestamp() * 1000)
         except (dp.ParserError, OverflowError):
-            return int(datetime.utcnow().timestamp() * 1000)
+            return int(datetime.now(timezone.utc).timestamp() * 1000)
 
     def construct_dashboard_from_api_data(
         self, dashboard_info: dict
@@ -448,9 +449,7 @@ def get_datasource_urn(self, card_details):
             if source_table_id is not None:
                 schema_name, table_name = self.get_source_table_from_id(source_table_id)
                 if table_name:
-                    source_paths.add(
-                        f"{schema_name + '.' if schema_name else ''}{table_name}"
-                    )
+                    source_paths.add(f"{f'{schema_name}.' if schema_name else ''}{table_name}")
         else:
             try:
                 raw_query = (
@@ -478,7 +477,7 @@ def get_datasource_urn(self, card_details):
 
         # Create dataset URNs
         dataset_urn = []
-        dbname = f"{database_name + '.' if database_name else ''}"
+        dbname = f"{f'{database_name}.' if database_name else ''}"
         source_tables = list(map(lambda tbl: f"{dbname}{tbl}", source_paths))
         dataset_urn = [
             builder.make_dataset_urn_with_platform_instance(

From 82509c7504cd109a049ba02c0c288ea4e7230287 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 23:02:24 +1000
Subject: [PATCH 64/88] Update lookml.py

---
 .../src/datahub/ingestion/source/lookml.py    | 57 +++++++------------
 1 file changed, 19 insertions(+), 38 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/lookml.py b/metadata-ingestion/src/datahub/ingestion/source/lookml.py
index 2370a76aa28a00..2c8e23709d1ee1 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/lookml.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/lookml.py
@@ -130,18 +130,17 @@ def from_looker_connection(
             ".*": _get_generic_definition,
         }
 
-        if looker_connection.dialect_name is not None:
-            for extractor_pattern, extracting_function in extractors.items():
-                if re.match(extractor_pattern, looker_connection.dialect_name):
-                    (platform, db, schema) = extracting_function(looker_connection)
-                    return cls(platform=platform, default_db=db, default_schema=schema)
-            raise ConfigurationError(
-                f"Could not find an appropriate platform for looker_connection: {looker_connection.name} with dialect: {looker_connection.dialect_name}"
-            )
-        else:
+        if looker_connection.dialect_name is None:
             raise ConfigurationError(
                 f"Unable to fetch a fully filled out connection for {looker_connection.name}. Please check your API permissions."
             )
+        for extractor_pattern, extracting_function in extractors.items():
+            if re.match(extractor_pattern, looker_connection.dialect_name):
+                (platform, db, schema) = extracting_function(looker_connection)
+                return cls(platform=platform, default_db=db, default_schema=schema)
+        raise ConfigurationError(
+            f"Could not find an appropriate platform for looker_connection: {looker_connection.name} with dialect: {looker_connection.dialect_name}"
+        )
 
 
 class LookMLSourceConfig(LookerCommonConfig):
@@ -591,7 +590,7 @@ def from_looker_dict(
             if sql_table_name is not None
             else None
         )
-        derived_table = looker_view.get("derived_table", None)
+        derived_table = looker_view.get("derived_table")
 
         dimensions = cls._get_fields(
             looker_view.get("dimensions", []), ViewFieldType.DIMENSION
@@ -605,7 +604,7 @@ def from_looker_dict(
         fields: List[ViewField] = dimensions + dimension_groups + measures
 
         # also store the view logic and materialization
-        view_logic = looker_viewfile.raw_file_content[0:max_file_snippet_length]
+        view_logic = looker_viewfile.raw_file_content[:max_file_snippet_length]
 
         # Parse SQL from derived tables to extract dependencies
         if derived_table is not None:
@@ -630,9 +629,7 @@ def from_looker_dict(
                 if k in ["datagroup_trigger", "sql_trigger_value", "persist_for"]:
                     materialized = True
             if "materialized_view" in derived_table:
-                materialized = (
-                    True if derived_table["materialized_view"] == "yes" else False
-                )
+                materialized = derived_table["materialized_view"] == "yes"
 
             view_details = ViewProperties(
                 materialized=materialized, viewLogic=view_logic, viewLanguage=view_lang
@@ -654,14 +651,7 @@ def from_looker_dict(
 
         # If not a derived table, then this view essentially wraps an existing
         # object in the database.
-        if sql_table_name is not None:
-            # If sql_table_name is set, there is a single dependency in the view, on the sql_table_name.
-            sql_table_names = [sql_table_name]
-        else:
-            # Otherwise, default to the view name as per the docs:
-            # https://docs.looker.com/reference/view-params/sql_table_name-for-view
-            sql_table_names = [view_name]
-
+        sql_table_names = [view_name] if sql_table_name is None else [sql_table_name]
         output_looker_view = LookerView(
             id=LookerViewId(
                 project_name=project_name, model_name=model_name, view_name=view_name
@@ -705,7 +695,7 @@ def _extract_metadata_from_sql_query(
             # Add those in if we detect that it is missing
             if not re.search(r"SELECT\s", sql_query, flags=re.I):
                 # add a SELECT clause at the beginning
-                sql_query = "SELECT " + sql_query
+                sql_query = f"SELECT {sql_query}"
             if not re.search(r"FROM\s", sql_query, flags=re.I):
                 # add a FROM clause at the end
                 sql_query = f"{sql_query} FROM {sql_table_name if sql_table_name is not None else view_name}"
@@ -714,7 +704,7 @@ def _extract_metadata_from_sql_query(
                 sql_info = cls._get_sql_info(sql_query, sql_parser_path)
                 sql_table_names = sql_info.table_names
                 column_names = sql_info.column_names
-                if fields == []:
+                if not fields:
                     # it seems like the view is defined purely as sql, let's try using the column names to populate the schema
                     fields = [
                         # set types to unknown for now as our sql parser doesn't give us column types yet
@@ -722,10 +712,7 @@ def _extract_metadata_from_sql_query(
                         for c in column_names
                     ]
             except Exception as e:
-                reporter.report_warning(
-                    f"looker-view-{view_name}",
-                    f"Failed to parse sql query, lineage will not be accurate. Exception: {e}",
-                )
+                reporter.report_warning(f"looker-view-{view_name}", f"Failed to parse sql query, lineage will not be accurate. Exception: {e}")
 
         return fields, sql_table_names
 
@@ -843,10 +830,7 @@ def _load_model(self, path: str) -> LookerModel:
         return looker_model
 
     def _platform_names_have_2_parts(self, platform: str) -> bool:
-        if platform in ["hive", "mysql", "athena"]:
-            return True
-        else:
-            return False
+        return platform in {"hive", "mysql", "athena"}
 
     def _generate_fully_qualified_name(
         self, sql_table_name: str, connection_def: LookerConnectionDefinition
@@ -998,7 +982,6 @@ def _get_custom_properties(self, looker_view: LookerView) -> DatasetPropertiesCl
     def _build_dataset_mcps(
         self, looker_view: LookerView
     ) -> List[MetadataChangeProposalWrapper]:
-        events = []
         subTypeEvent = MetadataChangeProposalWrapper(
             entityType="dataset",
             changeType=ChangeTypeClass.UPSERT,
@@ -1006,7 +989,7 @@ def _build_dataset_mcps(
             aspectName="subTypes",
             aspect=SubTypesClass(typeNames=["view"]),
         )
-        events.append(subTypeEvent)
+        events = [subTypeEvent]
         if looker_view.view_details is not None:
             viewEvent = MetadataChangeProposalWrapper(
                 entityType="dataset",
@@ -1047,9 +1030,7 @@ def _build_dataset_mce(self, looker_view: LookerView) -> MetadataChangeEvent:
             dataset_snapshot.aspects.append(schema_metadata)
         dataset_snapshot.aspects.append(self._get_custom_properties(looker_view))
 
-        mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
-
-        return mce
+        return MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
 
     def get_project_name(self, model_name: str) -> str:
         if self.source_config.project_name is not None:
@@ -1091,7 +1072,7 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:  # noqa: C901
 
         for file_path in model_files:
             self.reporter.report_models_scanned()
-            model_name = file_path.stem[0:-model_suffix_len]
+            model_name = file_path.stem[:-model_suffix_len]
 
             if not self.source_config.model_pattern.allowed(model_name):
                 self.reporter.report_models_dropped(model_name)

From 3245d7b91b5a1ecbf0e086640bd449c1f25c7bb0 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 23:02:26 +1000
Subject: [PATCH 65/88] Update looker_common.py

---
 .../datahub/ingestion/source/looker_common.py | 40 ++++++-------------
 1 file changed, 12 insertions(+), 28 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker_common.py
index 7f717d0efc82d0..58cf0674267d76 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker_common.py
@@ -110,12 +110,8 @@ class LookerExploreNamingConfig(ConfigModel):
     def init_naming_pattern(cls, v):
         if isinstance(v, NamingPattern):
             return v
-        else:
-            assert isinstance(v, str), "pattern must be a string"
-            naming_pattern = NamingPattern(
-                allowed_vars=naming_pattern_variables, pattern=v
-            )
-            return naming_pattern
+        assert isinstance(v, str), "pattern must be a string"
+        return NamingPattern(allowed_vars=naming_pattern_variables, pattern=v)
 
     @validator("explore_naming_pattern", "explore_browse_pattern", always=True)
     def validate_naming_pattern(cls, v):
@@ -143,12 +139,8 @@ class LookerViewNamingConfig(ConfigModel):
     def init_naming_pattern(cls, v):
         if isinstance(v, NamingPattern):
             return v
-        else:
-            assert isinstance(v, str), "pattern must be a string"
-            naming_pattern = NamingPattern(
-                allowed_vars=naming_pattern_variables, pattern=v
-            )
-            return naming_pattern
+        assert isinstance(v, str), "pattern must be a string"
+        return NamingPattern(allowed_vars=naming_pattern_variables, pattern=v)
 
     @validator("view_naming_pattern", "view_browse_pattern", always=True)
     def validate_naming_pattern(cls, v):
@@ -314,8 +306,7 @@ def _extract_view_from_field(field: str) -> str:
         assert (
             field.count(".") == 1
         ), f"Error: A field must be prefixed by a view name, field is: {field}"
-        view_name = field.split(".")[0]
-        return view_name
+        return field.split(".")[0]
 
     @staticmethod
     def _get_field_type(
@@ -336,8 +327,7 @@ def _get_field_type(
             )
             type_class = NullTypeClass
 
-        data_type = SchemaFieldDataType(type=type_class())
-        return data_type
+        return SchemaFieldDataType(type=type_class())
 
     @staticmethod
     def _get_schema(
@@ -346,7 +336,7 @@ def _get_schema(
         view_fields: List[ViewField],
         reporter: SourceReport,
     ) -> Optional[SchemaMetadataClass]:
-        if view_fields == []:
+        if not view_fields:
             return None
         fields, primary_keys = LookerUtil._get_fields_and_primary_keys(
             view_fields=view_fields, reporter=reporter
@@ -618,16 +608,10 @@ def from_api(  # noqa: C901
                 source_file=explore.source_file,
             )
         except SDKError as e:
-            logger.warn(
-                "Failed to extract explore {} from model {}.".format(
-                    explore_name, model
-                )
-            )
-            logger.debug(
-                "Failed to extract explore {} from model {} with {}".format(
-                    explore_name, model, e
-                )
-            )
+            logger.warn(f"Failed to extract explore {explore_name} from model {model}.")
+            logger.debug(f"Failed to extract explore {explore_name} from model {model} with {e}")
+
+
         except AssertionError:
             reporter.report_warning(
                 key="chart-",
@@ -678,7 +662,7 @@ def _get_url(self, base_url):
         # If the base_url contains a port number (like https://company.looker.com:19999) remove the port number
         m = re.match("^(.*):([0-9]+)$", base_url)
         if m is not None:
-            base_url = m.group(1)
+            base_url = m[1]
         return f"{base_url}/explore/{self.model_name}/{self.name}"
 
     def _to_metadata_events(  # noqa: C901

From 232dbdac550461027b97c204b21b9fd5754008af Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 23:02:29 +1000
Subject: [PATCH 66/88] Update looker.py

---
 .../src/datahub/ingestion/source/looker.py    | 52 ++++++++-----------
 1 file changed, 22 insertions(+), 30 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker.py b/metadata-ingestion/src/datahub/ingestion/source/looker.py
index 252d9f553e0572..2afa4ca05cbb36 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker.py
@@ -216,11 +216,11 @@ def url(self, base_url: str) -> str:
         # If the base_url contains a port number (like https://company.looker.com:19999) remove the port number
         m = re.match("^(.*):([0-9]+)$", base_url)
         if m is not None:
-            base_url = m.group(1)
+            base_url = m[1]
         if self.look_id is not None:
-            return base_url + "/looks/" + self.look_id
+            return f"{base_url}/looks/{self.look_id}"
         else:
-            return base_url + "/x/" + self.query_slug
+            return f"{base_url}/x/{self.query_slug}"
 
     def get_urn_element_id(self):
         # A dashboard element can use a look or just a raw query against an explore
@@ -270,23 +270,22 @@ def __init__(self, client: Looker31SDK):
     def get_by_id(
         self, id: int, transport_options: Optional[TransportOptions]
     ) -> Optional[LookerUser]:
-        logger.debug("Will get user {}".format(id))
+        logger.debug(f"Will get user {id}")
         if id in self.user_map:
             return self.user_map[id]
-        else:
-            try:
-                raw_user: User = self.client.user(
-                    id,
-                    fields=self.fields,
-                    transport_options=transport_options,
-                )
-                looker_user = LookerUser._from_user(raw_user)
-                self.user_map[id] = looker_user
-                return looker_user
-            except SDKError as e:
-                logger.warn("Could not find user with id {}".format(id))
-                logger.warn("Failure was {}".format(e))
-                return None
+        try:
+            raw_user: User = self.client.user(
+                id,
+                fields=self.fields,
+                transport_options=transport_options,
+            )
+            looker_user = LookerUser._from_user(raw_user)
+            self.user_map[id] = looker_user
+            return looker_user
+        except SDKError as e:
+            logger.warn(f"Could not find user with id {id}")
+            logger.warn(f"Failure was {e}")
+            return None
 
 
 @dataclass
@@ -306,8 +305,8 @@ def url(self, base_url):
         # If the base_url contains a port number (like https://company.looker.com:19999) remove the port number
         m = re.match("^(.*):([0-9]+)$", base_url)
         if m is not None:
-            base_url = m.group(1)
-        return base_url + "/dashboards/" + self.id
+            base_url = m[1]
+        return f"{base_url}/dashboards/{self.id}"
 
     def get_urn_dashboard_id(self):
         return f"dashboards.{self.id}"
@@ -350,8 +349,7 @@ def _extract_view_from_field(field: str) -> str:
         assert (
             field.count(".") == 1
         ), f"Error: A field must be prefixed by a view name, field is: {field}"
-        view_name = field.split(".")[0]
-        return view_name
+        return field.split(".")[0]
 
     def _get_views_from_fields(self, fields: List[str]) -> List[str]:
         field_set = set(fields)
@@ -449,12 +447,8 @@ def _get_looker_dashboard_element(  # noqa: C901
             raise ValueError("Element ID can't be None")
 
         if element.query is not None:
-            explores = []
             fields = self._get_fields_from_query(element.query)
-            if element.query.view is not None:
-                # Get the explore from the view directly
-                explores = [element.query.view]
-
+            explores = [element.query.view] if element.query.view is not None else []
             logger.debug(
                 "Element {}: Explores added: {}".format(element.title, explores)
             )
@@ -474,7 +468,6 @@ def _get_looker_dashboard_element(  # noqa: C901
                 upstream_fields=fields,
             )
 
-        # Dashboard elements can *alternatively* link to an existing look
         elif element.look is not None:
             # we pick from element title by default, falling back to look title.
             title: str = (
@@ -512,7 +505,6 @@ def _get_looker_dashboard_element(  # noqa: C901
                     upstream_fields=fields,
                 )
 
-        # Failing the above two approaches, pick out details from result_maker
         elif element.result_maker is not None:
             model: str = ""
             fields = []
@@ -957,7 +949,7 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
                     else False,
                 )
             else:
-                raise Exception("Unexpected type of event {}".format(event))
+                raise Exception(f"Unexpected type of event {event}")
 
             self.reporter.report_workunit(workunit)
             yield workunit

From bc6e2b03bdc204693901846798ee1c77a8e6e03c Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 23:02:31 +1000
Subject: [PATCH 67/88] Update feast.py

---
 .../src/datahub/ingestion/source/feast.py     | 31 ++++++++++---------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/feast.py b/metadata-ingestion/src/datahub/ingestion/source/feast.py
index c17ae5c14a85fd..0c67e2d036083f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/feast.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/feast.py
@@ -4,23 +4,22 @@
 
 from pydantic import Field
 
-if sys.version_info >= (3, 7):
-    from feast import (
-        BigQuerySource,
-        Entity,
-        Feature,
-        FeatureStore,
-        FeatureView,
-        FileSource,
-        KafkaSource,
-        KinesisSource,
-        OnDemandFeatureView,
-        ValueType,
-    )
-    from feast.data_source import DataSource, RequestDataSource
-else:
+if sys.version_info < (3, 7):
     raise ModuleNotFoundError("The feast plugin requires Python 3.7 or newer.")
 
+from feast import (
+    BigQuerySource,
+    Entity,
+    Feature,
+    FeatureStore,
+    FeatureView,
+    FileSource,
+    KafkaSource,
+    KinesisSource,
+    OnDemandFeatureView,
+    ValueType,
+)
+from feast.data_source import DataSource, RequestDataSource
 import datahub.emitter.mce_builder as builder
 from datahub.configuration.common import ConfigModel
 from datahub.emitter.mce_builder import DEFAULT_ENV
@@ -52,6 +51,7 @@
 
 assert sys.version_info >= (3, 7)  # needed for mypy
 
+# FIXME: ValueType module cannot be used as a type
 _field_type_mapping: Dict[ValueType, str] = {
     ValueType.UNKNOWN: MLFeatureDataType.UNKNOWN,
     ValueType.BYTES: MLFeatureDataType.BYTE,
@@ -218,6 +218,7 @@ def _get_entity_workunit(
 
     def _get_feature_workunit(
         self,
+        # FIXME: FeatureView and OnDemandFeatureView cannot be used as a type
         feature_view: Union[FeatureView, OnDemandFeatureView],
         feature: Feature,
     ) -> MetadataWorkUnit:

From 77609ee991e216d2490d86dd57386c9598b0272b Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Wed, 8 Jun 2022 23:02:33 +1000
Subject: [PATCH 68/88] Update elastic_search.py

---
 .../src/datahub/ingestion/source/elastic_search.py            | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py b/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py
index c5a6c13bf695df..74fca0d2654519 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py
@@ -255,9 +255,7 @@ def host_colon_port_comma(cls, host_val: str) -> str:
 
     @property
     def http_auth(self) -> Optional[Tuple[str, str]]:
-        if self.username is None:
-            return None
-        return self.username, self.password or ""
+        return None if self.username is None else (self.username, self.password or "")
 
 
 @platform_name("Elastic Search")

From 0dedbc881e5a4dc0ace715cb5e93d4b7deb682fa Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Thu, 9 Jun 2022 14:39:26 +1000
Subject: [PATCH 69/88] Running black

---
 .../src/datahub/cli/cli_utils.py              |  5 +-
 .../src/datahub/cli/delete_cli.py             |  4 +-
 .../src/datahub/cli/docker_check.py           |  4 +-
 .../src/datahub/cli/timeline_cli.py           | 18 ++++--
 .../src/datahub/configuration/common.py       | 13 ++++-
 .../src/datahub/emitter/mcp_builder.py        |  3 +-
 .../datahub/emitter/serialization_helper.py   |  5 +-
 .../src/datahub/ingestion/api/decorators.py   |  6 +-
 .../src/datahub/ingestion/run/pipeline.py     |  4 +-
 .../datahub/ingestion/sink/datahub_kafka.py   |  4 +-
 .../datahub/ingestion/sink/datahub_rest.py    |  4 +-
 .../datahub/ingestion/source/looker_common.py |  5 +-
 .../src/datahub/ingestion/source/lookml.py    |  5 +-
 .../src/datahub/ingestion/source/metabase.py  |  4 +-
 .../src/datahub/ingestion/source/nifi.py      |  5 +-
 .../ingestion/source/openapi_parser.py        |  6 +-
 .../src/datahub/ingestion/source/powerbi.py   | 58 ++++++++++++-------
 .../src/datahub/ingestion/source/pulsar.py    | 28 ++++++---
 .../src/datahub/ingestion/source/redash.py    | 24 ++++++--
 .../src/datahub/ingestion/source/tableau.py   | 20 ++++++-
 .../ingestion/source_config/sql/snowflake.py  |  5 +-
 .../ingestion/transformer/base_transformer.py |  4 +-
 .../integrations/great_expectations/action.py |  5 +-
 .../datahub/utilities/hive_schema_to_avro.py  | 24 ++++++--
 .../src/datahub/utilities/mapping.py          |  2 +-
 .../utilities/sql_lineage_parser_impl.py      |  4 +-
 .../src/datahub/utilities/urns/urn.py         | 10 +++-
 27 files changed, 200 insertions(+), 79 deletions(-)

diff --git a/metadata-ingestion/src/datahub/cli/cli_utils.py b/metadata-ingestion/src/datahub/cli/cli_utils.py
index 84ee3aa8513a31..d234beb2dafbfe 100644
--- a/metadata-ingestion/src/datahub/cli/cli_utils.py
+++ b/metadata-ingestion/src/datahub/cli/cli_utils.py
@@ -703,8 +703,9 @@ def get_aspects_for_entity(
                 aspect_value["aspect"]["value"] = json.loads(
                     aspect_value["aspect"]["value"]
                 )
-                aspect_list[aspect_cls.RECORD_SCHEMA.fullname.replace("pegasus2avro.", "")] = aspect_value
-
+                aspect_list[
+                    aspect_cls.RECORD_SCHEMA.fullname.replace("pegasus2avro.", "")
+                ] = aspect_value
 
     aspect_map: Dict[str, Union[dict, DictWrapper]] = {}
     for a in aspect_list.values():
diff --git a/metadata-ingestion/src/datahub/cli/delete_cli.py b/metadata-ingestion/src/datahub/cli/delete_cli.py
index ae8574b89c9695..c4ff2b5e6f9361 100644
--- a/metadata-ingestion/src/datahub/cli/delete_cli.py
+++ b/metadata-ingestion/src/datahub/cli/delete_cli.py
@@ -315,7 +315,9 @@ def _delete_one_urn(
         deletion_result.num_records = rows_affected
     else:
         logger.info(f"[Dry-run] Would hard-delete {urn}")
-        deletion_result.num_records = UNKNOWN_NUM_RECORDS  # since we don't know how many rows will be affected
+        deletion_result.num_records = (
+            UNKNOWN_NUM_RECORDS  # since we don't know how many rows will be affected
+        )
 
     deletion_result.end()
     return deletion_result
diff --git a/metadata-ingestion/src/datahub/cli/docker_check.py b/metadata-ingestion/src/datahub/cli/docker_check.py
index 005651d673df36..25719cef2334d9 100644
--- a/metadata-ingestion/src/datahub/cli/docker_check.py
+++ b/metadata-ingestion/src/datahub/cli/docker_check.py
@@ -90,7 +90,9 @@ def check_local_docker_containers(preflight_only: bool = False) -> List[str]:
         else:
             existing_containers = {container.name for container in containers}
             missing_containers = set(REQUIRED_CONTAINERS) - existing_containers
-            issues.extend(f"{missing} container is not present" for missing in missing_containers)
+            issues.extend(
+                f"{missing} container is not present" for missing in missing_containers
+            )
 
         # Check that the containers are running and healthy.
         for container in containers:
diff --git a/metadata-ingestion/src/datahub/cli/timeline_cli.py b/metadata-ingestion/src/datahub/cli/timeline_cli.py
index eec753a4af2ba0..516ca7bd7fe7b3 100644
--- a/metadata-ingestion/src/datahub/cli/timeline_cli.py
+++ b/metadata-ingestion/src/datahub/cli/timeline_cli.py
@@ -23,7 +23,11 @@ def pretty_field_path(field_path: str) -> str:
         return field_path
         # breakpoint()
         # parse schema field
-    tokens = [t for t in field_path.split(".") if not t.startswith("[") and not t.endswith("]")]
+    tokens = [
+        t
+        for t in field_path.split(".")
+        if not t.startswith("[") and not t.endswith("]")
+    ]
 
     return ".".join(tokens)
 
@@ -33,10 +37,10 @@ def pretty_id(id: Optional[str]) -> str:
         return ""
     # breakpoint()
     assert id is not None
-    if id.startswith("urn:li:datasetField:") or id.startswith(
-            "urn:li:schemaField:"
+    if id.startswith("urn:li:datasetField:") or id.startswith("urn:li:schemaField:"):
+        if schema_field_key := schema_field_urn_to_key(
+            id.replace("urn:li:datasetField", "urn:li:schemaField")
         ):
-        if schema_field_key := schema_field_urn_to_key(id.replace("urn:li:datasetField", "urn:li:schemaField")):
             assert schema_field_key is not None
             field_path = schema_field_key.fieldPath
 
@@ -182,7 +186,11 @@ def timeline(
             change_instant = str(
                 datetime.fromtimestamp(change_txn["timestamp"] // 1000)
             )
-            change_color = "green" if change_txn.get("semVerChange") in ["MINOR", "PATCH"] else "red"
+            change_color = (
+                "green"
+                if change_txn.get("semVerChange") in ["MINOR", "PATCH"]
+                else "red"
+            )
 
             print(
                 f"{colored(change_instant,'cyan')} - {colored(change_txn['semVer'],change_color)}"
diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py
index b9fce56d5ff60b..80f8d717daf7a5 100644
--- a/metadata-ingestion/src/datahub/configuration/common.py
+++ b/metadata-ingestion/src/datahub/configuration/common.py
@@ -136,7 +136,10 @@ def allowed(self, string: str) -> bool:
             if re.match(deny_pattern, string, self.regex_flags):
                 return False
 
-        return any(re.match(allow_pattern, string, self.regex_flags) for allow_pattern in self.allow)
+        return any(
+            re.match(allow_pattern, string, self.regex_flags)
+            for allow_pattern in self.allow
+        )
 
     def is_fully_specified_allow_list(self) -> bool:
         """
@@ -145,7 +148,9 @@ def is_fully_specified_allow_list(self) -> bool:
         pattern into a 'search for the ones that are allowed' pattern, which can be
         much more efficient in some cases.
         """
-        return all(self.alphabet_pattern.match(allow_pattern) for allow_pattern in self.allow)
+        return all(
+            self.alphabet_pattern.match(allow_pattern) for allow_pattern in self.allow
+        )
 
     def get_allowed_list(self) -> List[str]:
         """Return the list of allowed strings as a list, after taking into account deny patterns, if possible"""
@@ -168,7 +173,9 @@ def all(cls) -> "KeyValuePattern":
         return KeyValuePattern()
 
     def value(self, string: str) -> List[str]:
-        return next((self.rules[key] for key in self.rules.keys() if re.match(key, string)), [])
+        return next(
+            (self.rules[key] for key in self.rules.keys() if re.match(key, string)), []
+        )
 
     def matched(self, string: str) -> bool:
         return any(re.match(key, string) for key in self.rules.keys())
diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py
index 868916fda2c810..055db3c6a4ad61 100644
--- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py
+++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py
@@ -234,7 +234,8 @@ def gen_containers(
 
 def add_dataset_to_container(
     # FIXME: Union requires two or more type arguments
-    container_key: KeyType, dataset_urn: str
+    container_key: KeyType,
+    dataset_urn: str,
 ) -> Iterable[Union[MetadataWorkUnit]]:
     container_urn = make_container_urn(
         guid=container_key.guid(),
diff --git a/metadata-ingestion/src/datahub/emitter/serialization_helper.py b/metadata-ingestion/src/datahub/emitter/serialization_helper.py
index 958c913698e442..cad4e9dd3270fc 100644
--- a/metadata-ingestion/src/datahub/emitter/serialization_helper.py
+++ b/metadata-ingestion/src/datahub/emitter/serialization_helper.py
@@ -17,8 +17,9 @@ def _json_transform(obj: Any, from_pattern: str, to_pattern: str) -> Any:
             return {field: _json_transform(obj[field], from_pattern, to_pattern)}
 
         new_obj: Any = {
-            key: _json_transform(value, from_pattern, to_pattern) \
-                for key, value in obj.items() if value is not None
+            key: _json_transform(value, from_pattern, to_pattern)
+            for key, value in obj.items()
+            if value is not None
         }
 
         return new_obj
diff --git a/metadata-ingestion/src/datahub/ingestion/api/decorators.py b/metadata-ingestion/src/datahub/ingestion/api/decorators.py
index 7666a4f52a2271..20867a8571b24c 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/decorators.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/decorators.py
@@ -34,7 +34,11 @@ def platform_name(
 
     def wrapper(cls: Type) -> Type:
         setattr(cls, "get_platform_name", lambda: platform_name)
-        setattr(cls, "get_platform_id", lambda: id or platform_name.lower().replace(" ", "-"))
+        setattr(
+            cls,
+            "get_platform_id",
+            lambda: id or platform_name.lower().replace(" ", "-"),
+        )
 
         return cls
 
diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
index 9c94108ec51309..4d2e02d8022e60 100644
--- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
+++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
@@ -264,7 +264,9 @@ def process_commits(self) -> None:
             if self.source.get_report().failures or self.sink.get_report().failures
             else False
         )
-        has_warnings: bool = bool(self.source.get_report().warnings or self.sink.get_report().warnings)
+        has_warnings: bool = bool(
+            self.source.get_report().warnings or self.sink.get_report().warnings
+        )
 
         for name, committable in self.ctx.get_committables():
             commit_policy: CommitPolicy = committable.commit_policy
diff --git a/metadata-ingestion/src/datahub/ingestion/sink/datahub_kafka.py b/metadata-ingestion/src/datahub/ingestion/sink/datahub_kafka.py
index 20929e85887a77..93d3aa5f6c85d3 100644
--- a/metadata-ingestion/src/datahub/ingestion/sink/datahub_kafka.py
+++ b/metadata-ingestion/src/datahub/ingestion/sink/datahub_kafka.py
@@ -77,7 +77,9 @@ def write_record_async(
                     self.report, record_envelope, write_callback
                 ).kafka_callback,
             )
-        elif isinstance(record, (MetadataChangeProposalWrapper, MetadataChangeProposalClass)):
+        elif isinstance(
+            record, (MetadataChangeProposalWrapper, MetadataChangeProposalClass)
+        ):
             self.emitter.emit_mcp_async(
                 record,
                 callback=_KafkaCallback(
diff --git a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py
index 415a7a1c827da8..d95eb245ccfb50 100644
--- a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py
+++ b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py
@@ -113,7 +113,9 @@ def _write_done_callback(
                     # trim exception stacktraces when reporting warnings
                     if "stackTrace" in e.info:
                         with contextlib.suppress(Exception):
-                            e.info["stackTrace"] = "\n".join(e.info["stackTrace"].split("\n")[:2])
+                            e.info["stackTrace"] = "\n".join(
+                                e.info["stackTrace"].split("\n")[:2]
+                            )
                     record = record_envelope.record
                     if isinstance(record, MetadataChangeProposalWrapper):
                         # include information about the entity that failed
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker_common.py
index 49637251375b6e..668d3d3e2d898f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker_common.py
@@ -624,8 +624,9 @@ def from_api(  # noqa: C901
             )
         except SDKError as e:
             logger.warn(f"Failed to extract explore {explore_name} from model {model}.")
-            logger.debug(f"Failed to extract explore {explore_name} from model {model} with {e}")
-
+            logger.debug(
+                f"Failed to extract explore {explore_name} from model {model} with {e}"
+            )
 
         except AssertionError:
             reporter.report_warning(
diff --git a/metadata-ingestion/src/datahub/ingestion/source/lookml.py b/metadata-ingestion/src/datahub/ingestion/source/lookml.py
index 571ba1e38359c2..203bf199c27ad5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/lookml.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/lookml.py
@@ -712,7 +712,10 @@ def _extract_metadata_from_sql_query(
                         for c in column_names
                     ]
             except Exception as e:
-                reporter.report_warning(f"looker-view-{view_name}", f"Failed to parse sql query, lineage will not be accurate. Exception: {e}")
+                reporter.report_warning(
+                    f"looker-view-{view_name}",
+                    f"Failed to parse sql query, lineage will not be accurate. Exception: {e}",
+                )
 
         return fields, sql_table_names
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/metabase.py b/metadata-ingestion/src/datahub/ingestion/source/metabase.py
index a4873e1bd08633..bd930f90ff3aff 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/metabase.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/metabase.py
@@ -449,7 +449,9 @@ def get_datasource_urn(self, card_details):
             if source_table_id is not None:
                 schema_name, table_name = self.get_source_table_from_id(source_table_id)
                 if table_name:
-                    source_paths.add(f"{f'{schema_name}.' if schema_name else ''}{table_name}")
+                    source_paths.add(
+                        f"{f'{schema_name}.' if schema_name else ''}{table_name}"
+                    )
         else:
             try:
                 raw_query = (
diff --git a/metadata-ingestion/src/datahub/ingestion/source/nifi.py b/metadata-ingestion/src/datahub/ingestion/source/nifi.py
index 20cd1daa6671e2..e9bc5f0b5daba0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/nifi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/nifi.py
@@ -337,7 +337,10 @@ def __init__(self, config: NifiSourceConfig, ctx: PipelineContext) -> None:
 
         if self.config.site_url_to_site_name is None:
             self.config.site_url_to_site_name = {}
-        if urljoin(self.config.site_url, "/nifi/") not in self.config.site_url_to_site_name:
+        if (
+            urljoin(self.config.site_url, "/nifi/")
+            not in self.config.site_url_to_site_name
+        ):
             self.config.site_url_to_site_name[
                 urljoin(self.config.site_url, "/nifi/")
             ] = self.config.site_name
diff --git a/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py b/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py
index 233d920f6877ef..f33654daa15595 100755
--- a/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py
@@ -53,7 +53,9 @@ def request_call(
     headers = {"accept": "application/json"}
 
     if username is not None and password is not None:
-        return requests.get(url, headers=headers, auth=HTTPBasicAuth(username, password))
+        return requests.get(
+            url, headers=headers, auth=HTTPBasicAuth(username, password)
+        )
 
     elif token is not None:
         headers["Authorization"] = f"Bearer {token}"
@@ -105,7 +107,7 @@ def check_sw_version(sw_dict: dict) -> None:
         )
 
 
-def get_endpoints(sw_dict: dict) -> dict:    # noqa: C901
+def get_endpoints(sw_dict: dict) -> dict:  # noqa: C901
     """
     Get all the URLs accepting the "GET" method, together with their description and the tags
     """
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi.py
index f9df9a802a3719..a4182ab6b6824f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi.py
@@ -346,7 +346,9 @@ def __get_users(self, workspace_id: str, entity: str, id: str) -> List[User]:
 
         # Check if we got response from PowerBi
         if response.status_code != 200:
-            LOGGER.warning(f"Failed to fetch user list from power-bi for, http_status={response.status_code}, message={response.text}")
+            LOGGER.warning(
+                f"Failed to fetch user list from power-bi for, http_status={response.status_code}, message={response.text}"
+            )
 
             LOGGER.info(f"{Constant.WorkspaceId}={workspace_id}")
             LOGGER.info(f"{Constant.ENTITY}={entity}")
@@ -572,7 +574,9 @@ def get_data_source(self, dataset: Dataset) -> Any:
         res = response.json()
         value = res["value"]
         if len(value) == 0:
-            LOGGER.info(f"datasource is not found for dataset {dataset.name}({dataset.id})")
+            LOGGER.info(
+                f"datasource is not found for dataset {dataset.name}({dataset.id})"
+            )
 
             return None
         # Consider only zero index datasource
@@ -639,8 +643,9 @@ def new_dataset_or_report(tile_instance: Any) -> dict:
             else:
                 report_fields["createdFrom"] = PowerBiAPI.Tile.CreatedFrom.VISUALIZATION
 
-            LOGGER.info(f'Tile {tile_instance.get("title")}({tile_instance.get("id")}) is created from {report_fields["createdFrom"]}')
-
+            LOGGER.info(
+                f'Tile {tile_instance.get("title")}({tile_instance.get("id")}) is created from {report_fields["createdFrom"]}'
+            )
 
             return report_fields
 
@@ -712,7 +717,6 @@ def create_scan_job():  # sourcery skip: avoid-builtin-shadow
             if res.status_code not in (200, 202):
                 message = f"API({scan_create_endpoint}) return error code {res.status_code} for workspace id({workspace_id})"
 
-
                 LOGGER.warning(message)
 
                 raise ConnectionError(message)
@@ -727,7 +731,9 @@ def wait_for_scan_to_complete(scan_id: str, timeout: int) -> Boolean:
             """
             minimum_sleep = 3
             if timeout < minimum_sleep:
-                LOGGER.info(f"Setting timeout to minimum_sleep time {minimum_sleep} seconds")
+                LOGGER.info(
+                    f"Setting timeout to minimum_sleep time {minimum_sleep} seconds"
+                )
                 timeout = minimum_sleep
 
             max_trial = timeout // minimum_sleep
@@ -749,7 +755,6 @@ def wait_for_scan_to_complete(scan_id: str, timeout: int) -> Boolean:
                 if res.status_code != 200:
                     message = f"API({scan_get_endpoint}) return error code {res.status_code} for scan id({scan_id})"
 
-
                     LOGGER.warning(message)
 
                     raise ConnectionError(message)
@@ -785,7 +790,6 @@ def get_scan_result(scan_id: str) -> dict:
             if res.status_code != 200:
                 message = f"API({scan_result_get_endpoint}) return error code {res.status_code} for scan id({scan_id})"
 
-
                 LOGGER.warning(message)
 
                 raise ConnectionError(message)
@@ -800,7 +804,9 @@ def json_to_dataset_map(scan_result: dict) -> dict:
             dataset_map: dict = {}
 
             if datasets is None or len(datasets) == 0:
-                LOGGER.warning(f'Workspace {scan_result["name"]}({scan_result["id"]}) does not have datasets')
+                LOGGER.warning(
+                    f'Workspace {scan_result["name"]}({scan_result["id"]}) does not have datasets'
+                )
 
                 LOGGER.info("Returning empty datasets")
                 return dataset_map
@@ -820,12 +826,15 @@ def json_to_dataset_map(scan_result: dict) -> dict:
                     dataset_instance.datasource
                     and dataset_instance.datasource.metadata.is_relational is True
                 ):
-                    LOGGER.info(f"Processing tables attribute for dataset {dataset_instance.name}({dataset_instance.id})")
-
+                    LOGGER.info(
+                        f"Processing tables attribute for dataset {dataset_instance.name}({dataset_instance.id})"
+                    )
 
                     for table in dataset_dict["tables"]:
                         if "Value.NativeQuery(" in table["source"][0]["expression"]:
-                            LOGGER.warning(f'Table {table["name"]} is created from Custom SQL. Ignoring in processing')
+                            LOGGER.warning(
+                                f'Table {table["name"]} is created from Custom SQL. Ignoring in processing'
+                            )
 
                             continue
 
@@ -946,16 +955,23 @@ def __to_datahub_dataset(
             dataset.datasource is None
             or dataset.datasource.metadata.is_relational is False
         ):
-            LOGGER.warning(f"Dataset {dataset.name}({dataset.id}) is not created from relational datasource")
+            LOGGER.warning(
+                f"Dataset {dataset.name}({dataset.id}) is not created from relational datasource"
+            )
 
             return dataset_mcps
 
-        LOGGER.info(f"Converting dataset={dataset.name}(id={dataset.id}) to datahub dataset")
-
+        LOGGER.info(
+            f"Converting dataset={dataset.name}(id={dataset.id}) to datahub dataset"
+        )
 
         for table in dataset.tables:
             # Create an URN for dataset
-            ds_urn = builder.make_dataset_urn(platform=self.__config.dataset_type_mapping[dataset.datasource.type], name=f"{dataset.datasource.database}.{table.schema_name}.{table.name}", env=self.__config.env)
+            ds_urn = builder.make_dataset_urn(
+                platform=self.__config.dataset_type_mapping[dataset.datasource.type],
+                name=f"{dataset.datasource.database}.{table.schema_name}.{table.name}",
+                env=self.__config.env,
+            )
 
             LOGGER.info(f"{Constant.Dataset_URN}={ds_urn}")
             # Create datasetProperties mcp
@@ -1165,8 +1181,9 @@ def to_datahub_user(
         Map PowerBi user to datahub user
         """
 
-        LOGGER.info(f"Converting user {user.displayName}(id={user.id}) to datahub's user")
-
+        LOGGER.info(
+            f"Converting user {user.displayName}(id={user.id}) to datahub's user"
+        )
 
         # Create an URN for user
         user_urn = builder.make_user_urn(user.get_urn_part())
@@ -1248,8 +1265,9 @@ def to_datahub_work_units(
     ) -> Set[EquableMetadataWorkUnit]:
         mcps = []
 
-        LOGGER.info(f"Converting dashboard={dashboard.displayName} to datahub dashboard")
-
+        LOGGER.info(
+            f"Converting dashboard={dashboard.displayName} to datahub dashboard"
+        )
 
         # Convert user to CorpUser
         user_mcps = self.to_datahub_users(dashboard.users)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/pulsar.py b/metadata-ingestion/src/datahub/ingestion/source/pulsar.py
index e969c2d3aeb3ea..ffc0253a070b2d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/pulsar.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/pulsar.py
@@ -119,7 +119,9 @@ def __init__(self, config: PulsarSourceConfig, ctx: PipelineContext):
 
         if self._is_oauth_authentication_configured():
             # Get OpenId configuration from issuer, e.g. token_endpoint
-            oid_config_url = f"{self.config.issuer_url}/.well-known/openid-configuration"
+            oid_config_url = (
+                f"{self.config.issuer_url}/.well-known/openid-configuration"
+            )
             oid_config_response = requests.get(
                 oid_config_url, verify=False, allow_redirects=False
             )
@@ -127,8 +129,9 @@ def __init__(self, config: PulsarSourceConfig, ctx: PipelineContext):
             if oid_config_response:
                 self.config.oid_config.update(oid_config_response.json())
             else:
-                logger.error(f"Unexpected response while getting discovery document using {oid_config_url} : {oid_config_response}")
-
+                logger.error(
+                    f"Unexpected response while getting discovery document using {oid_config_url} : {oid_config_response}"
+                )
 
             if "token_endpoint" not in self.config.oid_config:
                 raise Exception(
@@ -319,8 +322,11 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
         ]
 
         # Report the Pulsar broker version we are communicating with
-        self.report.report_pulsar_version(self.session.get(f"{self.base_url}/brokers/version", timeout=self.config.timeout).text)
-
+        self.report.report_pulsar_version(
+            self.session.get(
+                f"{self.base_url}/brokers/version", timeout=self.config.timeout
+            ).text
+        )
 
         # If no tenants are provided, request all tenants from cluster using /admin/v2/tenants endpoint.
         # Requesting cluster tenant information requires superuser privileges
@@ -337,8 +343,10 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
             if self.config.tenant_patterns.allowed(tenant):
                 # Get namespaces belonging to a tenant, /admin/v2/%s/namespaces
                 # A tenant admin role has sufficient privileges to perform this action
-                namespaces = self._get_pulsar_metadata(f"{self.base_url}/namespaces/{tenant}") or []
-
+                namespaces = (
+                    self._get_pulsar_metadata(f"{self.base_url}/namespaces/{tenant}")
+                    or []
+                )
 
                 for namespace in namespaces:
                     self.report.namespaces_scanned += 1
@@ -408,8 +416,10 @@ def _get_schema_and_fields(
 
         pulsar_schema: Optional[PulsarSchema] = None
 
-        schema_url = self.base_url + f"/schemas/{pulsar_topic.tenant}/{pulsar_topic.namespace}/{pulsar_topic.topic}/schema"
-
+        schema_url = (
+            self.base_url
+            + f"/schemas/{pulsar_topic.tenant}/{pulsar_topic.namespace}/{pulsar_topic.topic}/schema"
+        )
 
         schema_payload = self._get_pulsar_metadata(schema_url)
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/redash.py b/metadata-ingestion/src/datahub/ingestion/source/redash.py
index 7aceafd22bd5aa..aa1d093e02e85f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/redash.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/redash.py
@@ -204,19 +204,29 @@ def get_full_qualified_name(self, database_name: str, table_name: str) -> str:
 
 def get_full_qualified_name(platform: str, database_name: str, table_name: str) -> str:
     if platform == "athena":
-        return AthenaQualifiedNameParser().get_full_qualified_name(database_name, table_name)
+        return AthenaQualifiedNameParser().get_full_qualified_name(
+            database_name, table_name
+        )
 
     elif platform == "bigquery":
-        return BigqueryQualifiedNameParser().get_full_qualified_name(database_name, table_name)
+        return BigqueryQualifiedNameParser().get_full_qualified_name(
+            database_name, table_name
+        )
 
     elif platform == "mssql":
-        return MssqlQualifiedNameParser().get_full_qualified_name(database_name, table_name)
+        return MssqlQualifiedNameParser().get_full_qualified_name(
+            database_name, table_name
+        )
 
     elif platform == "mysql":
-        return MysqlQualifiedNameParser().get_full_qualified_name(database_name, table_name)
+        return MysqlQualifiedNameParser().get_full_qualified_name(
+            database_name, table_name
+        )
 
     elif platform == "postgres":
-        return PostgresQualifiedNameParser().get_full_qualified_name(database_name, table_name)
+        return PostgresQualifiedNameParser().get_full_qualified_name(
+            database_name, table_name
+        )
 
     else:
         return f"{database_name}.{table_name}"
@@ -589,7 +599,9 @@ def _process_dashboard_response(
                     # the API is id based not slug based
                     # Tested the same with a Redash instance
                     dashboard_id = dashboard_response["id"]
-                    dashboard_data = self.client._get(f"api/dashboards/{dashboard_id}").json()
+                    dashboard_data = self.client._get(
+                        f"api/dashboards/{dashboard_id}"
+                    ).json()
                 except Exception:
                     # This does not work in our testing but keeping for now because
                     # people in community are using Redash connector successfully
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
index 2a6696f8823683..5d33c593c85c67 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
@@ -401,8 +401,11 @@ def _create_upstream_table_lineage(
                 table_name = table.get("name") or table["id"]
                 table_path = f"{project.replace('/', REPLACE_SLASH_CHAR)}/{datasource['name']}/{table_name}"
 
-            self.upstream_tables[table_urn] = table.get("columns", []), table_path, table.get("isEmbedded") or False
-
+            self.upstream_tables[table_urn] = (
+                table.get("columns", []),
+                table_path,
+                table.get("isEmbedded") or False,
+            )
 
         return upstream_tables
 
@@ -626,7 +629,18 @@ def _get_schema_metadata_for_datasource(
             )
             fields.append(schema_field)
 
-        return SchemaMetadata(schemaName="test", platform=f"urn:li:dataPlatform:{self.platform}", version=0, fields=fields, hash="", platformSchema=OtherSchema(rawSchema=""),) if fields else None
+        return (
+            SchemaMetadata(
+                schemaName="test",
+                platform=f"urn:li:dataPlatform:{self.platform}",
+                version=0,
+                fields=fields,
+                hash="",
+                platformSchema=OtherSchema(rawSchema=""),
+            )
+            if fields
+            else None
+        )
 
     def get_metadata_change_event(
         self, snap_shot: Union["DatasetSnapshot", "DashboardSnapshot", "ChartSnapshot"]
diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py
index 644be7afbe749a..984b8fa917d647 100644
--- a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py
+++ b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py
@@ -213,10 +213,7 @@ def authenticator_type_is_valid(cls, v, values, field):
                     f"but should be set when using {v} authentication"
                 )
             if values.get("oauth_config").use_certificate is True:
-                if (
-                    values.get("oauth_config").base64_encoded_oauth_private_key
-                    is None
-                ):
+                if values.get("oauth_config").base64_encoded_oauth_private_key is None:
                     raise ValueError(
                         "'base64_encoded_oauth_private_key' was none "
                         "but should be set when using certificate for oauth_config"
diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py b/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py
index c6f641c8fcd6e5..82cfecbddfed39 100644
--- a/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py
+++ b/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py
@@ -132,7 +132,9 @@ def _should_process(
                     return True
             # fall through, no entity type matched
             return False
-        elif isinstance(record, (MetadataChangeProposalWrapper, MetadataChangeProposalClass)):
+        elif isinstance(
+            record, (MetadataChangeProposalWrapper, MetadataChangeProposalClass)
+        ):
             return record.entityType in entity_types
 
         # default to process everything that is not caught by above checks
diff --git a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py
index 572ecdf36302d5..5dca3541493156 100644
--- a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py
+++ b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py
@@ -625,7 +625,10 @@ def get_dataset_partitions(self, batch_identifier, data_asset):
                 query = data_asset.batches[
                     batch_identifier
                 ].batch_request.runtime_parameters["query"]
-                partitionSpec = PartitionSpecClass(type=PartitionTypeClass.QUERY, partition=f"Query_{builder.datahub_guid(query)}")
+                partitionSpec = PartitionSpecClass(
+                    type=PartitionTypeClass.QUERY,
+                    partition=f"Query_{builder.datahub_guid(query)}",
+                )
 
                 batchSpec = BatchSpec(
                     nativeBatchId=batch_identifier,
diff --git a/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py b/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py
index c83ec153144f0f..6e8d8da5f3fb82 100644
--- a/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py
+++ b/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py
@@ -52,10 +52,12 @@ def _parse_datatype_string(
                 raise ValueError("'>' should be the last char, but got: %s" % s)
             parts = HiveColumnToAvroConverter._ignore_brackets_split(s[4:-1], ",")
             if len(parts) != 2:
-                raise ValueError((
-                    "The map type string format is: 'map<key_type,value_type>', "
-                    + f"but got: {s}"
-                    ))
+                raise ValueError(
+                    (
+                        "The map type string format is: 'map<key_type,value_type>', "
+                        + f"but got: {s}"
+                    )
+                )
 
             kt = HiveColumnToAvroConverter._parse_datatype_string(parts[0])
             vt = HiveColumnToAvroConverter._parse_datatype_string(parts[1])
@@ -103,7 +105,12 @@ def _parse_struct_fields_string(s: str, **kwargs: Any) -> Dict[str, object]:
         for part in parts:
             name_and_type = HiveColumnToAvroConverter._ignore_brackets_split(part, ":")
             if len(name_and_type) != 2:
-                raise ValueError(("The struct field string format is: 'field_name:field_type', " + f"but got: {part}"))
+                raise ValueError(
+                    (
+                        "The struct field string format is: 'field_name:field_type', "
+                        + f"but got: {part}"
+                    )
+                )
 
             field_name = name_and_type[0].strip()
             if field_name.startswith("`"):
@@ -120,7 +127,12 @@ def _parse_struct_fields_string(s: str, **kwargs: Any) -> Dict[str, object]:
 
         else:
             struct_name = f'__struct_{str(uuid.uuid4()).replace("-", "")}'
-        return {"type": "record", "name": struct_name, "fields": fields, "native_data_type": f"struct<{s}>"}
+        return {
+            "type": "record",
+            "name": struct_name,
+            "fields": fields,
+            "native_data_type": f"struct<{s}>",
+        }
 
     @staticmethod
     def _parse_basic_datatype_string(s: str) -> Dict[str, object]:
diff --git a/metadata-ingestion/src/datahub/utilities/mapping.py b/metadata-ingestion/src/datahub/utilities/mapping.py
index af4916a4055747..7b0d6d1dd92229 100644
--- a/metadata-ingestion/src/datahub/utilities/mapping.py
+++ b/metadata-ingestion/src/datahub/utilities/mapping.py
@@ -172,7 +172,7 @@ def get_operation_value(
 
     def sanitize_owner_ids(self, owner_id: str) -> str:
         if owner_id.__contains__("@"):
-            owner_id = owner_id[:owner_id.index("@")]
+            owner_id = owner_id[: owner_id.index("@")]
         return owner_id
 
     def is_match(self, match_clause: Any, raw_props_value: Any) -> bool:
diff --git a/metadata-ingestion/src/datahub/utilities/sql_lineage_parser_impl.py b/metadata-ingestion/src/datahub/utilities/sql_lineage_parser_impl.py
index 63b3edaf8c0556..6fe57b297d4528 100644
--- a/metadata-ingestion/src/datahub/utilities/sql_lineage_parser_impl.py
+++ b/metadata-ingestion/src/datahub/utilities/sql_lineage_parser_impl.py
@@ -134,7 +134,9 @@ def get_columns(self) -> List[str]:
 
         # Reverting back all the previously renamed words which confuses the parser
         result = {"date" if c == self._DATE_SWAP_TOKEN else c for c in result}
-        result = {"timestamp" if c == self._TIMESTAMP_SWAP_TOKEN else c for c in list(result)}
+        result = {
+            "timestamp" if c == self._TIMESTAMP_SWAP_TOKEN else c for c in list(result)
+        }
 
         # swap back renamed date column
         return list(result)
diff --git a/metadata-ingestion/src/datahub/utilities/urns/urn.py b/metadata-ingestion/src/datahub/utilities/urns/urn.py
index 7ae6d37472621a..ca664a7848b850 100644
--- a/metadata-ingestion/src/datahub/utilities/urns/urn.py
+++ b/metadata-ingestion/src/datahub/utilities/urns/urn.py
@@ -151,4 +151,12 @@ def __hash__(self) -> int:
         return hash((self._domain, self._entity_type) + tuple(self._entity_id))
 
     def __eq__(self, other: object) -> bool:
-        return (self._entity_id == other._entity_id and self._domain == other._domain and self._entity_type == other._entity_type) if isinstance(other, Urn) else False
+        return (
+            (
+                self._entity_id == other._entity_id
+                and self._domain == other._domain
+                and self._entity_type == other._entity_type
+            )
+            if isinstance(other, Urn)
+            else False
+        )

From 95d47dbce737cb9bab753c10fcacf7105178909b Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Thu, 9 Jun 2022 14:39:42 +1000
Subject: [PATCH 70/88] running isort

---
 metadata-ingestion/src/datahub/ingestion/source/feast.py    | 1 +
 metadata-ingestion/src/datahub/ingestion/source/metabase.py | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/feast.py b/metadata-ingestion/src/datahub/ingestion/source/feast.py
index 0c67e2d036083f..7d4f5360ad4202 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/feast.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/feast.py
@@ -20,6 +20,7 @@
     ValueType,
 )
 from feast.data_source import DataSource, RequestDataSource
+
 import datahub.emitter.mce_builder as builder
 from datahub.configuration.common import ConfigModel
 from datahub.emitter.mce_builder import DEFAULT_ENV
diff --git a/metadata-ingestion/src/datahub/ingestion/source/metabase.py b/metadata-ingestion/src/datahub/ingestion/source/metabase.py
index bd930f90ff3aff..98bcbcba591ebd 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/metabase.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/metabase.py
@@ -1,4 +1,4 @@
-from datetime import datetime
+from datetime import datetime, timezone
 from functools import lru_cache
 from typing import Dict, Iterable, Optional
 
@@ -42,7 +42,6 @@
     OwnershipTypeClass,
 )
 from datahub.utilities import config_clean
-from datetime import timezone
 
 
 class MetabaseConfig(DatasetLineageProviderConfigBase):

From 4d708b585f686d8930589c2bd8aa4b2e04ffacdc Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Thu, 9 Jun 2022 14:41:53 +1000
Subject: [PATCH 71/88] lint fix

---
 .../library/dataset_add_column_tag.py         |  4 +-
 .../library/dataset_add_column_term.py        |  6 +--
 .../lineage_job_dataflow_new_api_simple.py    | 37 +++++++++++++++----
 .../transforms/custom_transform_example.py    |  4 +-
 4 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/metadata-ingestion/examples/library/dataset_add_column_tag.py b/metadata-ingestion/examples/library/dataset_add_column_tag.py
index a457d12f493ae0..8a15d33ff78779 100644
--- a/metadata-ingestion/examples/library/dataset_add_column_tag.py
+++ b/metadata-ingestion/examples/library/dataset_add_column_tag.py
@@ -28,9 +28,7 @@ def get_simple_field_path_from_v2_field_path(field_path: str) -> str:
         return field_path
         # this is a v2 field path
     tokens = [
-        t
-        for t in field_path.split(".")
-        if not (t.startswith("[") or t.endswith("]"))
+        t for t in field_path.split(".") if not (t.startswith("[") or t.endswith("]"))
     ]
 
     return ".".join(tokens)
diff --git a/metadata-ingestion/examples/library/dataset_add_column_term.py b/metadata-ingestion/examples/library/dataset_add_column_term.py
index ea5cd1f632f743..d656b5bd4502e7 100644
--- a/metadata-ingestion/examples/library/dataset_add_column_term.py
+++ b/metadata-ingestion/examples/library/dataset_add_column_term.py
@@ -28,11 +28,9 @@ def get_simple_field_path_from_v2_field_path(field_path: str) -> str:
         return field_path
         # this is a v2 field path
     tokens = [
-        t
-        for t in field_path.split(".")
-        if not (t.startswith("[") or t.endswith("]"))
+        t for t in field_path.split(".") if not (t.startswith("[") or t.endswith("]"))
     ]
-    
+
     return ".".join(tokens)
 
 
diff --git a/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_simple.py b/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_simple.py
index d339d35110db1d..1871a8af09e50c 100644
--- a/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_simple.py
+++ b/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_simple.py
@@ -1,5 +1,5 @@
 import uuid
-from datetime import datetime
+from datetime import datetime, timezone
 
 from datahub.api.entities.datajob import DataFlow, DataJob
 from datahub.api.entities.dataprocess.dataprocess_instance import (
@@ -8,7 +8,6 @@
 )
 from datahub.emitter.rest_emitter import DatahubRestEmitter
 
-from datetime import timezone
 emitter = DatahubRestEmitter("http://localhost:8080")
 
 jobFlow = DataFlow(cluster="prod", orchestrator="airflow", id="flow_api_simple")
@@ -37,7 +36,9 @@
 jobFlowRun = DataProcessInstance.from_dataflow(
     dataflow=jobFlow, id=f"{jobFlow.id}-{uuid.uuid4()}"
 )
-jobFlowRun.emit_process_start(emitter, int(datetime.now(timezone.utc).timestamp() * 1000))
+jobFlowRun.emit_process_start(
+    emitter, int(datetime.now(timezone.utc).timestamp() * 1000)
+)
 
 
 jobRun = DataProcessInstance.from_datajob(
@@ -45,7 +46,11 @@
 )
 jobRun.emit_process_start(emitter, int(datetime.now(timezone.utc).timestamp() * 1000))
 
-jobRun.emit_process_end(emitter, int(datetime.now(timezone.utc).timestamp() * 1000), result=InstanceRunResult.SUCCESS)
+jobRun.emit_process_end(
+    emitter,
+    int(datetime.now(timezone.utc).timestamp() * 1000),
+    result=InstanceRunResult.SUCCESS,
+)
 
 
 job2Run = DataProcessInstance.from_datajob(
@@ -53,7 +58,11 @@
 )
 job2Run.emit_process_start(emitter, int(datetime.now(timezone.utc).timestamp() * 1000))
 
-job2Run.emit_process_end(emitter, int(datetime.now(timezone.utc).timestamp() * 1000), result=InstanceRunResult.SUCCESS)
+job2Run.emit_process_end(
+    emitter,
+    int(datetime.now(timezone.utc).timestamp() * 1000),
+    result=InstanceRunResult.SUCCESS,
+)
 
 
 job3Run = DataProcessInstance.from_datajob(
@@ -61,7 +70,11 @@
 )
 job3Run.emit_process_start(emitter, int(datetime.now(timezone.utc).timestamp() * 1000))
 
-job3Run.emit_process_end(emitter, int(datetime.now(timezone.utc).timestamp() * 1000), result=InstanceRunResult.SUCCESS)
+job3Run.emit_process_end(
+    emitter,
+    int(datetime.now(timezone.utc).timestamp() * 1000),
+    result=InstanceRunResult.SUCCESS,
+)
 
 
 job4Run = DataProcessInstance.from_datajob(
@@ -69,7 +82,15 @@
 )
 job4Run.emit_process_start(emitter, int(datetime.now(timezone.utc).timestamp() * 1000))
 
-job4Run.emit_process_end(emitter, int(datetime.now(timezone.utc).timestamp() * 1000), result=InstanceRunResult.SUCCESS)
+job4Run.emit_process_end(
+    emitter,
+    int(datetime.now(timezone.utc).timestamp() * 1000),
+    result=InstanceRunResult.SUCCESS,
+)
 
 
-jobFlowRun.emit_process_end(emitter, int(datetime.now(timezone.utc).timestamp() * 1000), result=InstanceRunResult.SUCCESS)
+jobFlowRun.emit_process_end(
+    emitter,
+    int(datetime.now(timezone.utc).timestamp() * 1000),
+    result=InstanceRunResult.SUCCESS,
+)
diff --git a/metadata-ingestion/examples/transforms/custom_transform_example.py b/metadata-ingestion/examples/transforms/custom_transform_example.py
index 4a3d16d4a4dd94..57560e75cf7e92 100644
--- a/metadata-ingestion/examples/transforms/custom_transform_example.py
+++ b/metadata-ingestion/examples/transforms/custom_transform_example.py
@@ -61,7 +61,9 @@ def transform_aspect(  # type: ignore
         assert aspect is None or isinstance(aspect, OwnershipClass)
 
         if owners_to_add:
-            ownership = aspect or OwnershipClass(owners=[],)
+            ownership = aspect or OwnershipClass(
+                owners=[],
+            )
 
             ownership.owners.extend(owners_to_add)
 

From ee0bc05294d7d0e01e86e49126e83995a07e2b18 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Thu, 9 Jun 2022 15:32:58 +1000
Subject: [PATCH 72/88] Update timeline_cli.py

---
 metadata-ingestion/src/datahub/cli/timeline_cli.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/metadata-ingestion/src/datahub/cli/timeline_cli.py b/metadata-ingestion/src/datahub/cli/timeline_cli.py
index 516ca7bd7fe7b3..579dff5425a112 100644
--- a/metadata-ingestion/src/datahub/cli/timeline_cli.py
+++ b/metadata-ingestion/src/datahub/cli/timeline_cli.py
@@ -38,9 +38,10 @@ def pretty_id(id: Optional[str]) -> str:
     # breakpoint()
     assert id is not None
     if id.startswith("urn:li:datasetField:") or id.startswith("urn:li:schemaField:"):
-        if schema_field_key := schema_field_urn_to_key(
+        schema_field_key = schema_field_urn_to_key(
             id.replace("urn:li:datasetField", "urn:li:schemaField")
-        ):
+        )
+        if schema_field_key:
             assert schema_field_key is not None
             field_path = schema_field_key.fieldPath
 
@@ -49,7 +50,8 @@ def pretty_id(id: Optional[str]) -> str:
         return f"{colored('field','cyan')}:{colored(pretty_field_path(id),'white')}"
 
     if id.startswith("urn:li:dataset"):
-        if dataset_key := dataset_urn_to_key(id):
+        dataset_key = dataset_urn_to_key(id)
+        if dataset_key:
             return f"{colored('dataset','cyan')}:{colored(dataset_key.platform,'white')}:{colored(dataset_key.name,'white')}"
     # failed to prettify, return original
     return id

From 8010744cddbb9409dd3d0b3591e4f80d3c1d3329 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Thu, 9 Jun 2022 16:32:30 +1000
Subject: [PATCH 73/88] logger warn to logger warning

---
 metadata-ingestion/src/datahub/cli/delete_cli.py          | 2 +-
 .../src/datahub/ingestion/source/aws/glue.py              | 6 +++---
 metadata-ingestion/src/datahub/ingestion/source/looker.py | 4 ++--
 .../src/datahub/ingestion/source/looker_common.py         | 2 +-
 metadata-ingestion/src/datahub/ingestion/source/nifi.py   | 8 ++++----
 .../src/datahub/ingestion/source/s3/source.py             | 6 +++---
 6 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/metadata-ingestion/src/datahub/cli/delete_cli.py b/metadata-ingestion/src/datahub/cli/delete_cli.py
index c4ff2b5e6f9361..a11fa3e6703520 100644
--- a/metadata-ingestion/src/datahub/cli/delete_cli.py
+++ b/metadata-ingestion/src/datahub/cli/delete_cli.py
@@ -182,7 +182,7 @@ def delete(
     else:
         # log warn include_removed + hard is the only way to work
         if include_removed and soft:
-            logger.warn(
+            logger.warning(
                 "A filtered delete including soft deleted entities is redundant, because it is a soft delete by default. Please use --include-removed in conjunction with --hard"
             )
         # Filter based delete
diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py
index a7171cfe42caf0..6594da6ddf2063 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py
@@ -1059,7 +1059,7 @@ def get_s3_tags() -> Optional[GlobalTagsClass]:
                         ]
                     )
                 except self.s3_client.exceptions.ClientError:
-                    logger.warn(f"No tags found for bucket={bucket_name}")
+                    logger.warning(f"No tags found for bucket={bucket_name}")
             if self.source_config.use_s3_object_tags:
                 key_prefix = s3_util.get_key_prefix(
                     table["StorageDescriptor"]["Location"]
@@ -1078,7 +1078,7 @@ def get_s3_tags() -> Optional[GlobalTagsClass]:
                 else:
                     # Unlike bucket tags, if an object does not have tags, it will just return an empty array
                     # as opposed to an exception.
-                    logger.warn(
+                    logger.warning(
                         f"No tags found for bucket={bucket_name} key={key_prefix}"
                     )
             if len(tags_to_add) == 0:
@@ -1097,7 +1097,7 @@ def get_s3_tags() -> Optional[GlobalTagsClass]:
                         [current_tag.tag for current_tag in current_tags.tags]
                     )
             else:
-                logger.warn(
+                logger.warning(
                     "Could not connect to DatahubApi. No current tags to maintain"
                 )
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker.py b/metadata-ingestion/src/datahub/ingestion/source/looker.py
index 2afa4ca05cbb36..a469e16ab34c47 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker.py
@@ -283,8 +283,8 @@ def get_by_id(
             self.user_map[id] = looker_user
             return looker_user
         except SDKError as e:
-            logger.warn(f"Could not find user with id {id}")
-            logger.warn(f"Failure was {e}")
+            logger.warning(f"Could not find user with id {id}")
+            logger.warning(f"Failure was {e}")
             return None
 
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker_common.py
index 668d3d3e2d898f..b2348fee9ace40 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker_common.py
@@ -623,7 +623,7 @@ def from_api(  # noqa: C901
                 source_file=explore.source_file,
             )
         except SDKError as e:
-            logger.warn(f"Failed to extract explore {explore_name} from model {model}.")
+            logger.warning(f"Failed to extract explore {explore_name} from model {model}.")
             logger.debug(
                 f"Failed to extract explore {explore_name} from model {model} with {e}"
             )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/nifi.py b/metadata-ingestion/src/datahub/ingestion/source/nifi.py
index e9bc5f0b5daba0..9677d5cbd3b5cb 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/nifi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/nifi.py
@@ -620,7 +620,7 @@ def create_nifi_flow(self):
         if about_response.ok:
             nifi_version = about_response.json().get("about", {}).get("version")
         else:
-            logger.warn("Failed to fetch version for nifi")
+            logger.warning("Failed to fetch version for nifi")
         cluster_response = self.session.get(
             url=urljoin(self.config.site_url, CLUSTER_ENDPOINT)
         )
@@ -630,7 +630,7 @@ def create_nifi_flow(self):
                 cluster_response.json().get("clusterSummary", {}).get("clustered")
             )
         else:
-            logger.warn("Failed to fetch cluster summary for flow")
+            logger.warning("Failed to fetch cluster summary for flow")
         pg_response = self.session.get(
             url=urljoin(self.config.site_url, PG_ENDPOINT) + "root"
         )
@@ -715,7 +715,7 @@ def fetch_provenance_events(
 
             attempts = 5  # wait for at most 5 attempts 5*1= 5 seconds
             while (not provenance.get("finished", False)) and attempts > 0:
-                logger.warn(
+                logger.warning(
                     f"Provenance query not completed, attempts left : {attempts}"
                 )
                 # wait until the uri returns percentcomplete 100
@@ -757,7 +757,7 @@ def fetch_provenance_events(
                 f"provenance events could not be fetched for processor \
                     {processor.id} of type {processor.name}",
             )
-            logger.warn(provenance_response.text)
+            logger.warning(provenance_response.text)
         return
 
     def report_warning(self, key: str, reason: str) -> None:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
index 29f54dc9449e3e..d3943745cbc81f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
@@ -689,7 +689,7 @@ def get_s3_tags(
                     ]
                 )
             except s3.meta.client.exceptions.ClientError:
-                logger.warn(f"No tags found for bucket={bucket_name}")
+                logger.warning(f"No tags found for bucket={bucket_name}")
 
         if self.source_config.use_s3_object_tags and key_name is not None:
             s3_client = self.source_config.aws_config.get_s3_client()
@@ -707,7 +707,7 @@ def get_s3_tags(
             else:
                 # Unlike bucket tags, if an object does not have tags, it will just return an empty array
                 # as opposed to an exception.
-                logger.warn(f"No tags found for bucket={bucket_name} key={key_name}")
+                logger.warning(f"No tags found for bucket={bucket_name} key={key_name}")
         if len(tags_to_add) == 0:
             return None
         if self.ctx.graph is not None:
@@ -722,7 +722,7 @@ def get_s3_tags(
                     [current_tag.tag for current_tag in current_tags.tags]
                 )
         else:
-            logger.warn("Could not connect to DatahubApi. No current tags to maintain")
+            logger.warning("Could not connect to DatahubApi. No current tags to maintain")
         # Remove duplicate tags
         tags_to_add = list(set(tags_to_add))
         new_tags = GlobalTagsClass(

From b168138f30a8e8755dd4e3e1200a17700e348fc8 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Sat, 11 Jun 2022 17:00:40 +1000
Subject: [PATCH 74/88] Update business_glossary.yml

---
 .../examples/bootstrap_data/business_glossary.yml               | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/examples/bootstrap_data/business_glossary.yml b/metadata-ingestion/examples/bootstrap_data/business_glossary.yml
index 71fd59bbccc462..6669d393b7211d 100644
--- a/metadata-ingestion/examples/bootstrap_data/business_glossary.yml
+++ b/metadata-ingestion/examples/bootstrap_data/business_glossary.yml
@@ -40,7 +40,7 @@ nodes:
         inherits:
           - Classification.Sensitive
   - name: ClientsAndAccounts
-    description: Provides basic concepts such as account, account holder, account provider, relationship manager that are commonly used by financial services providers to describe customers and to determine counterparts identities
+    description: Provides basic concepts such as account, account holder, account provider, relationship manager that are commonly used by financial services providers to describe customers and to determine counterparty identities
     owners:
       groups:
         - finance

From 980826cd1c3dbff831794d527becb1027567c58d Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Sat, 11 Jun 2022 20:37:33 +1000
Subject: [PATCH 75/88] Update ldap.py

---
 metadata-ingestion/src/datahub/ingestion/source/ldap.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/ldap.py b/metadata-ingestion/src/datahub/ingestion/source/ldap.py
index 633651c7b171a6..f3b40610d0210a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/ldap.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/ldap.py
@@ -153,7 +153,7 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
                 _rtype, rdata, _rmsgid, serverctrls = self.ldap_client.result3(msgid)
             except ldap.LDAPError as e:
                 self.report.report_failure(
-                    "ldap-control", "LDAP search failed: {}".format(e)
+                    "ldap-control", f"LDAP search failed: {e}"
                 )
                 break
 
@@ -211,9 +211,8 @@ def handle_user(self, dn: str, attrs: Dict[str, Any]) -> Iterable[MetadataWorkUn
                 manager_ldap = guess_person_ldap(m_attrs)
             except ldap.LDAPError as e:
                 self.report.report_warning(
-                    dn, "manager LDAP search failed: {}".format(e)
+                    dn, f"manager LDAP search failed: {e}"
                 )
-
         mce = self.build_corp_user_mce(dn, attrs, manager_ldap)
         if mce:
             wu = MetadataWorkUnit(dn, mce)

From 1e60f1b3b5f95c301587d4a8bd865b88d580553f Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Tue, 14 Jun 2022 13:18:07 +1000
Subject: [PATCH 76/88] lint

---
 metadata-ingestion/src/datahub/ingestion/source/ldap.py   | 8 ++------
 .../src/datahub/ingestion/source/looker_common.py         | 4 +++-
 .../src/datahub/ingestion/source/s3/source.py             | 4 +++-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/ldap.py b/metadata-ingestion/src/datahub/ingestion/source/ldap.py
index f3b40610d0210a..45d1c26e21495f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/ldap.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/ldap.py
@@ -152,9 +152,7 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]:
                 )
                 _rtype, rdata, _rmsgid, serverctrls = self.ldap_client.result3(msgid)
             except ldap.LDAPError as e:
-                self.report.report_failure(
-                    "ldap-control", f"LDAP search failed: {e}"
-                )
+                self.report.report_failure("ldap-control", f"LDAP search failed: {e}")
                 break
 
             for dn, attrs in rdata:
@@ -210,9 +208,7 @@ def handle_user(self, dn: str, attrs: Dict[str, Any]) -> Iterable[MetadataWorkUn
                 _m_dn, m_attrs = self.ldap_client.result3(manager_msgid)[1][0]
                 manager_ldap = guess_person_ldap(m_attrs)
             except ldap.LDAPError as e:
-                self.report.report_warning(
-                    dn, f"manager LDAP search failed: {e}"
-                )
+                self.report.report_warning(dn, f"manager LDAP search failed: {e}")
         mce = self.build_corp_user_mce(dn, attrs, manager_ldap)
         if mce:
             wu = MetadataWorkUnit(dn, mce)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker_common.py
index b2348fee9ace40..28f2b23c1a258a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker_common.py
@@ -623,7 +623,9 @@ def from_api(  # noqa: C901
                 source_file=explore.source_file,
             )
         except SDKError as e:
-            logger.warning(f"Failed to extract explore {explore_name} from model {model}.")
+            logger.warning(
+                f"Failed to extract explore {explore_name} from model {model}."
+            )
             logger.debug(
                 f"Failed to extract explore {explore_name} from model {model} with {e}"
             )
diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
index d3943745cbc81f..0856c58e69ed5d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
@@ -722,7 +722,9 @@ def get_s3_tags(
                     [current_tag.tag for current_tag in current_tags.tags]
                 )
         else:
-            logger.warning("Could not connect to DatahubApi. No current tags to maintain")
+            logger.warning(
+                "Could not connect to DatahubApi. No current tags to maintain"
+            )
         # Remove duplicate tags
         tags_to_add = list(set(tags_to_add))
         new_tags = GlobalTagsClass(

From 7c5e98da8706aceb940be1dca996cebd75b5d6f0 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Mon, 20 Jun 2022 17:56:38 +1000
Subject: [PATCH 77/88] Update cli_utils.py

---
 metadata-ingestion/src/datahub/cli/cli_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/cli/cli_utils.py b/metadata-ingestion/src/datahub/cli/cli_utils.py
index d234beb2dafbfe..ee7478e075d848 100644
--- a/metadata-ingestion/src/datahub/cli/cli_utils.py
+++ b/metadata-ingestion/src/datahub/cli/cli_utils.py
@@ -579,7 +579,7 @@ def post_entity(
     )
     response = session.post(url, payload)
     response.raise_for_status()
-    return response.status_code
+    return int(response.status_code)
 
 
 type_class_to_name_map = {

From b59d2edc38b3999018be4d9e1ed7dbf9ef21157b Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Mon, 20 Jun 2022 21:56:36 +1000
Subject: [PATCH 78/88] Update cli_utils.py

---
 metadata-ingestion/src/datahub/cli/cli_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/metadata-ingestion/src/datahub/cli/cli_utils.py b/metadata-ingestion/src/datahub/cli/cli_utils.py
index ee7478e075d848..b3c20dad16de22 100644
--- a/metadata-ingestion/src/datahub/cli/cli_utils.py
+++ b/metadata-ingestion/src/datahub/cli/cli_utils.py
@@ -554,7 +554,7 @@ def post_entity(
     aspect_name: str,
     aspect_value: Dict,
     cached_session_host: Optional[Tuple[Session, str]] = None,
-) -> Dict:
+) -> int:
     session, gms_host = cached_session_host or get_session_and_host()
     endpoint: str = "/aspects/?action=ingestProposal"
 
@@ -579,7 +579,7 @@ def post_entity(
     )
     response = session.post(url, payload)
     response.raise_for_status()
-    return int(response.status_code)
+    return (response.status_code)
 
 
 type_class_to_name_map = {

From 4820f6239d53ef262b208bd4846ece0fd1c8b18d Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Mon, 20 Jun 2022 21:56:45 +1000
Subject: [PATCH 79/88] Update cli_utils.py

---
 metadata-ingestion/src/datahub/cli/cli_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/cli/cli_utils.py b/metadata-ingestion/src/datahub/cli/cli_utils.py
index b3c20dad16de22..c7fab41b4d8acc 100644
--- a/metadata-ingestion/src/datahub/cli/cli_utils.py
+++ b/metadata-ingestion/src/datahub/cli/cli_utils.py
@@ -579,7 +579,7 @@ def post_entity(
     )
     response = session.post(url, payload)
     response.raise_for_status()
-    return (response.status_code)
+    return response.status_code
 
 
 type_class_to_name_map = {

From 978111f172d6bafc8d9f4c552402b981c47ca92a Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Fri, 24 Jun 2022 21:47:29 +1000
Subject: [PATCH 80/88] updates as per request

---
 metadata-ingestion/src/datahub/configuration/common.py    | 8 ++++----
 metadata-ingestion/src/datahub/ingestion/source/looker.py | 3 +++
 metadata-ingestion/src/datahub/ingestion/source/lookml.py | 5 ++++-
 .../src/datahub/ingestion/source_config/pulsar.py         | 2 +-
 metadata-ingestion/src/datahub/utilities/urns/urn.py      | 2 +-
 5 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py
index 80f8d717daf7a5..86b366a008962b 100644
--- a/metadata-ingestion/src/datahub/configuration/common.py
+++ b/metadata-ingestion/src/datahub/configuration/common.py
@@ -101,7 +101,7 @@ class OauthConfiguration(ConfigModel):
 
 
 class AllowDenyPattern(ConfigModel):
-    """A class to store allow deny regex's"""
+    """A class to store allow deny regexs"""
 
     allow: List[str] = Field(
         default=[".*"],
@@ -143,7 +143,7 @@ def allowed(self, string: str) -> bool:
 
     def is_fully_specified_allow_list(self) -> bool:
         """
-        If the allow patterns are literals and not full regex's, then it is considered
+        If the allow patterns are literals and not full regexs, then it is considered
         fully specified. This is useful if you want to convert a 'list + filter'
         pattern into a 'search for the ones that are allowed' pattern, which can be
         much more efficient in some cases.
@@ -159,7 +159,7 @@ def get_allowed_list(self) -> List[str]:
 
 
 class KeyValuePattern(ConfigModel):
-    """A class to store allow deny regex's"""
+    """A class to store allow deny regexs"""
 
     rules: Dict[str, List[str]] = {".*": []}
     alphabet: str = "[A-Za-z0-9 _.-]"
@@ -182,7 +182,7 @@ def matched(self, string: str) -> bool:
 
     def is_fully_specified_key(self) -> bool:
         """
-        If the allow patterns are literals and not full regex's, then it is considered
+        If the allow patterns are literals and not full regexs, then it is considered
         fully specified. This is useful if you want to convert a 'list + filter'
         pattern into a 'search for the ones that are allowed' pattern, which can be
         much more efficient in some cases.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker.py b/metadata-ingestion/src/datahub/ingestion/source/looker.py
index a469e16ab34c47..7c1368f814e4e0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker.py
@@ -448,6 +448,7 @@ def _get_looker_dashboard_element(  # noqa: C901
 
         if element.query is not None:
             fields = self._get_fields_from_query(element.query)
+            # Get the explore from the view directly
             explores = [element.query.view] if element.query.view is not None else []
             logger.debug(
                 "Element {}: Explores added: {}".format(element.title, explores)
@@ -468,6 +469,7 @@ def _get_looker_dashboard_element(  # noqa: C901
                 upstream_fields=fields,
             )
 
+        # Dashboard elements can *alternatively* link to an existing look
         elif element.look is not None:
             # we pick from element title by default, falling back to look title.
             title: str = (
@@ -505,6 +507,7 @@ def _get_looker_dashboard_element(  # noqa: C901
                     upstream_fields=fields,
                 )
 
+        # Failing the above two approaches, pick out details from result_maker
         elif element.result_maker is not None:
             model: str = ""
             fields = []
diff --git a/metadata-ingestion/src/datahub/ingestion/source/lookml.py b/metadata-ingestion/src/datahub/ingestion/source/lookml.py
index 203bf199c27ad5..45203f9b3891aa 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/lookml.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/lookml.py
@@ -650,7 +650,10 @@ def from_looker_dict(
             )
 
         # If not a derived table, then this view essentially wraps an existing
-        # object in the database.
+        # object in the database. If sql_table_name is set, there is a single 
+        # dependency in the view, on the sql_table_name.
+        # Otherwise, default to the view name as per the docs:
+        # https://docs.looker.com/reference/view-params/sql_table_name-for-view
         sql_table_names = [view_name] if sql_table_name is None else [sql_table_name]
         output_looker_view = LookerView(
             id=LookerViewId(
diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/pulsar.py b/metadata-ingestion/src/datahub/ingestion/source_config/pulsar.py
index 836960ac50633f..e1bdf072787cd5 100644
--- a/metadata-ingestion/src/datahub/ingestion/source_config/pulsar.py
+++ b/metadata-ingestion/src/datahub/ingestion/source_config/pulsar.py
@@ -30,7 +30,7 @@ def _is_valid_hostname(hostname: str) -> bool:
     """
     if len(hostname) > 253:
         return False
-    # Hostname's ending on a dot are valid, if present strip exactly one
+    # Hostnames ending on a dot are valid, if present strip exactly one
     if hostname[-1] == ".":
         hostname = hostname[:-1]
     allowed = re.compile(r"(?!-)[A-Z\d-]{1,63}(?<!-)$", re.IGNORECASE)
diff --git a/metadata-ingestion/src/datahub/utilities/urns/urn.py b/metadata-ingestion/src/datahub/utilities/urns/urn.py
index ca664a7848b850..479e74331fd9b3 100644
--- a/metadata-ingestion/src/datahub/utilities/urns/urn.py
+++ b/metadata-ingestion/src/datahub/utilities/urns/urn.py
@@ -122,7 +122,7 @@ def _get_entity_id_from_str(entity_id: str) -> List[str]:
                 part_start = i + 1
 
         if start_paren_count != 0:
-            raise InvalidUrnError(f"{entity_id}, mismatched parent nesting")
+            raise InvalidUrnError(f"{entity_id}, mismatched paren nesting")
 
         parts.append(entity_id[part_start:-1])
 

From 391fa2834a4d2367a2c38d8d2530566977db8c14 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Fri, 24 Jun 2022 21:55:24 +1000
Subject: [PATCH 81/88] Update kafka.py

---
 metadata-ingestion/src/datahub/configuration/kafka.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/configuration/kafka.py b/metadata-ingestion/src/datahub/configuration/kafka.py
index e752285cdde2af..197322a2e566e1 100644
--- a/metadata-ingestion/src/datahub/configuration/kafka.py
+++ b/metadata-ingestion/src/datahub/configuration/kafka.py
@@ -27,7 +27,7 @@ def bootstrap_host_colon_port_comma(cls, val: str) -> str:
             else:
                 host = entry
             assert re.match(
-                # TODO: This regex is quite loose. Many invalid hostname's or IPs will slip through,
+                # This regex is quite loose. Many invalid hostname's or IPs will slip through,
                 # but it serves as a good first line of validation. We defer to Kafka for the
                 # remaining validation.
                 r"^[\w\-\.\:]+$",

From e29961885df610aaaa1272719f9aa52930bb7d14 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Fri, 24 Jun 2022 22:01:43 +1000
Subject: [PATCH 82/88] Update powerbi.py

---
 metadata-ingestion/src/datahub/ingestion/source/powerbi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi.py
index a4182ab6b6824f..ff0924349c0e91 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi.py
@@ -695,7 +695,7 @@ def get_workspace(self, workspace_id: str) -> Workspace:
             POWERBI_ADMIN_BASE_URL=self.__config.admin_base_url
         )
 
-        def create_scan_job():  # sourcery skip: avoid-builtin-shadow
+        def create_scan_job():
             """
             Create scan job on PowerBi for the workspace
             """

From 76150f1ca241736a1e8c816342bdc99d4040f141 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Sat, 25 Jun 2022 08:45:17 +1000
Subject: [PATCH 83/88] lint

---
 metadata-ingestion/scripts/docgen.py          | 23 ++++++++++++-------
 .../src/datahub/ingestion/source/lookml.py    |  2 +-
 .../src/datahub/utilities/mapping.py          |  2 +-
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/metadata-ingestion/scripts/docgen.py b/metadata-ingestion/scripts/docgen.py
index 03434d291d3024..fa59cd20dda1c0 100644
--- a/metadata-ingestion/scripts/docgen.py
+++ b/metadata-ingestion/scripts/docgen.py
@@ -605,9 +605,10 @@ def generate(
                 os.makedirs(config_dir, exist_ok=True)
                 with open(f"{config_dir}/{plugin_name}_config.json", "w") as f:
                     f.write(source_config_class.schema_json(indent=2))
-                
-                create_or_update(source_documentation,
-                                    [platform_id, "plugins", plugin_name, "config_schema"],
+
+                create_or_update(
+                    source_documentation,
+                    [platform_id, "plugins", plugin_name, "config_schema"],
                     source_config_class.schema_json(indent=2) or "",
                 )
 
@@ -649,7 +650,9 @@ def generate(
 
         with open(platform_doc_file, "w") as f:
             if "name" in platform_docs:
-                f.write(f"import Tabs from '@theme/Tabs';\nimport TabItem from '@theme/TabItem';\n\n")
+                f.write(
+                    f"import Tabs from '@theme/Tabs';\nimport TabItem from '@theme/TabItem';\n\n"
+                )
                 f.write(f"# {platform_docs['name']}\n")
             if len(platform_docs["plugins"].keys()) > 1:
                 # More than one plugin used to provide integration with this platform
@@ -722,8 +725,10 @@ def generate(
                     f.write("\n```\n")
                 if "config" in plugin_docs:
                     f.write("\n### Config Details\n")
-                    f.write("""<Tabs>
-                <TabItem value="options" label="Options" default>\n\n""")
+                    f.write(
+                        """<Tabs>
+                <TabItem value="options" label="Options" default>\n\n"""
+                    )
                     f.write(
                         "Note that a `.` is used to denote nested fields in the YAML recipe.\n\n"
                     )
@@ -733,7 +738,8 @@ def generate(
                     for doc in plugin_docs["config"]:
                         f.write(doc)
                     f.write("\n</details>\n\n")
-                    f.write(f"""</TabItem>
+                    f.write(
+                        f"""</TabItem>
 <TabItem value="schema" label="Schema">
 
 The [JSONSchema](https://json-schema.org/) for this configuration is inlined below.\n\n
@@ -741,7 +747,8 @@ def generate(
 {plugin_docs['config_schema']}
 ```\n\n
 </TabItem>
-</Tabs>\n\n""")
+</Tabs>\n\n"""
+                    )
                 # insert custom plugin docs after config details
                 f.write(plugin_docs.get("custom_docs", ""))
                 if "classname" in plugin_docs:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/lookml.py b/metadata-ingestion/src/datahub/ingestion/source/lookml.py
index 45203f9b3891aa..aee876da2f7bac 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/lookml.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/lookml.py
@@ -650,7 +650,7 @@ def from_looker_dict(
             )
 
         # If not a derived table, then this view essentially wraps an existing
-        # object in the database. If sql_table_name is set, there is a single 
+        # object in the database. If sql_table_name is set, there is a single
         # dependency in the view, on the sql_table_name.
         # Otherwise, default to the view name as per the docs:
         # https://docs.looker.com/reference/view-params/sql_table_name-for-view
diff --git a/metadata-ingestion/src/datahub/utilities/mapping.py b/metadata-ingestion/src/datahub/utilities/mapping.py
index 1af643c8491876..0debde162ecaf7 100644
--- a/metadata-ingestion/src/datahub/utilities/mapping.py
+++ b/metadata-ingestion/src/datahub/utilities/mapping.py
@@ -236,4 +236,4 @@ def get_match(self, match_clause: Any, raw_props_value: Any) -> Optional[Match]:
         elif type(raw_props_value) == str:
             return bool(re.match(match_clause, raw_props_value))
         else:
-            return match_clause == raw_props_value
\ No newline at end of file
+            return match_clause == raw_props_value

From da217c6ebdb2c2b3b1aacb63d52fa7c79cd1918e Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Sat, 25 Jun 2022 08:46:48 +1000
Subject: [PATCH 84/88] Update common.py

---
 metadata-ingestion/src/datahub/configuration/common.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py
index 86b366a008962b..be3c6a13d3599b 100644
--- a/metadata-ingestion/src/datahub/configuration/common.py
+++ b/metadata-ingestion/src/datahub/configuration/common.py
@@ -101,7 +101,7 @@ class OauthConfiguration(ConfigModel):
 
 
 class AllowDenyPattern(ConfigModel):
-    """A class to store allow deny regexs"""
+    """A class to store allow deny regexes"""
 
     allow: List[str] = Field(
         default=[".*"],
@@ -143,7 +143,7 @@ def allowed(self, string: str) -> bool:
 
     def is_fully_specified_allow_list(self) -> bool:
         """
-        If the allow patterns are literals and not full regexs, then it is considered
+        If the allow patterns are literals and not full regexes, then it is considered
         fully specified. This is useful if you want to convert a 'list + filter'
         pattern into a 'search for the ones that are allowed' pattern, which can be
         much more efficient in some cases.
@@ -159,7 +159,7 @@ def get_allowed_list(self) -> List[str]:
 
 
 class KeyValuePattern(ConfigModel):
-    """A class to store allow deny regexs"""
+    """A class to store allow deny regexes"""
 
     rules: Dict[str, List[str]] = {".*": []}
     alphabet: str = "[A-Za-z0-9 _.-]"
@@ -182,7 +182,7 @@ def matched(self, string: str) -> bool:
 
     def is_fully_specified_key(self) -> bool:
         """
-        If the allow patterns are literals and not full regexs, then it is considered
+        If the allow patterns are literals and not full regexes, then it is considered
         fully specified. This is useful if you want to convert a 'list + filter'
         pattern into a 'search for the ones that are allowed' pattern, which can be
         much more efficient in some cases.

From 6ab23f24ed8fa9654805e1e663e08781d1fb6b59 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Tue, 28 Jun 2022 08:47:34 +1000
Subject: [PATCH 85/88] Update mapping.py

---
 metadata-ingestion/src/datahub/utilities/mapping.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/utilities/mapping.py b/metadata-ingestion/src/datahub/utilities/mapping.py
index 0debde162ecaf7..4d0093f4b2ffcc 100644
--- a/metadata-ingestion/src/datahub/utilities/mapping.py
+++ b/metadata-ingestion/src/datahub/utilities/mapping.py
@@ -224,7 +224,7 @@ def _get_best_match(the_match: Match, group_name: str) -> str:
 
     def sanitize_owner_ids(self, owner_id: str) -> str:
         if owner_id.__contains__("@"):
-            owner_id = owner_id[: owner_id.index("@")]
+            owner_id = owner_id[0: owner_id.index("@")]
         return owner_id
 
     def get_match(self, match_clause: Any, raw_props_value: Any) -> Optional[Match]:

From c7f8106f5e6635cd746b48f49d3b999100fc8b9e Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Tue, 28 Jun 2022 08:51:57 +1000
Subject: [PATCH 86/88] Update mapping.py

---
 metadata-ingestion/src/datahub/utilities/mapping.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/utilities/mapping.py b/metadata-ingestion/src/datahub/utilities/mapping.py
index 4d0093f4b2ffcc..30d044e25ba46e 100644
--- a/metadata-ingestion/src/datahub/utilities/mapping.py
+++ b/metadata-ingestion/src/datahub/utilities/mapping.py
@@ -224,7 +224,7 @@ def _get_best_match(the_match: Match, group_name: str) -> str:
 
     def sanitize_owner_ids(self, owner_id: str) -> str:
         if owner_id.__contains__("@"):
-            owner_id = owner_id[0: owner_id.index("@")]
+            owner_id = owner_id[0 : owner_id.index("@")]
         return owner_id
 
     def get_match(self, match_clause: Any, raw_props_value: Any) -> Optional[Match]:

From 6fc07859e19527edf4fb0f726ba6635cc7f089e8 Mon Sep 17 00:00:00 2001
From: Vincent Koc <koconder@users.noreply.github.com>
Date: Thu, 30 Jun 2022 08:14:39 +1000
Subject: [PATCH 87/88] Update source.py

---
 .../src/datahub/ingestion/source/s3/source.py | 79 -------------------
 1 file changed, 79 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
index c7feb6d19463fa..6e66dcc3d84167 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py
@@ -593,85 +593,6 @@ def ingest_table(
         if self.source_config.profiling.enabled:
             yield from self.get_table_profile(table_data, dataset_urn)
 
-    def gen_bucket_key(self, name):
-        return S3BucketKey(
-            platform="s3",
-            instance=self.source_config.env
-            if self.source_config.platform_instance is None
-            else self.source_config.platform_instance,
-            bucket_name=name,
-        )
-
-    def get_s3_tags(
-        self, bucket_name: str, key_name: Optional[str], dataset_urn: str
-    ) -> Optional[GlobalTagsClass]:
-        if self.source_config.aws_config is None:
-            raise ValueError("aws_config not set. Cannot browse s3")
-        new_tags = GlobalTagsClass(tags=[])
-        tags_to_add = []
-        if self.source_config.use_s3_bucket_tags:
-            s3 = self.source_config.aws_config.get_s3_resource()
-            bucket = s3.Bucket(bucket_name)
-            try:
-                tags_to_add.extend(
-                    [
-                        make_tag_urn(f"""{tag["Key"]}:{tag["Value"]}""")
-                        for tag in bucket.Tagging().tag_set
-                    ]
-                )
-            except s3.meta.client.exceptions.ClientError:
-                logger.warning(f"No tags found for bucket={bucket_name}")
-
-        if self.source_config.use_s3_object_tags and key_name is not None:
-            s3_client = self.source_config.aws_config.get_s3_client()
-            object_tagging = s3_client.get_object_tagging(
-                Bucket=bucket_name, Key=key_name
-            )
-            tag_set = object_tagging["TagSet"]
-            if tag_set:
-                tags_to_add.extend(
-                    [
-                        make_tag_urn(f"""{tag["Key"]}:{tag["Value"]}""")
-                        for tag in tag_set
-                    ]
-                )
-            else:
-                # Unlike bucket tags, if an object does not have tags, it will just return an empty array
-                # as opposed to an exception.
-                logger.warning(f"No tags found for bucket={bucket_name} key={key_name}")
-        if len(tags_to_add) == 0:
-            return None
-        if self.ctx.graph is not None:
-            logger.debug("Connected to DatahubApi, grabbing current tags to maintain.")
-            current_tags: Optional[GlobalTagsClass] = self.ctx.graph.get_aspect_v2(
-                entity_urn=dataset_urn,
-                aspect="globalTags",
-                aspect_type=GlobalTagsClass,
-            )
-            if current_tags:
-                tags_to_add.extend(
-                    [current_tag.tag for current_tag in current_tags.tags]
-                )
-        else:
-            logger.warning(
-                "Could not connect to DatahubApi. No current tags to maintain"
-            )
-        # Remove duplicate tags
-        tags_to_add = list(set(tags_to_add))
-        new_tags = GlobalTagsClass(
-            tags=[TagAssociationClass(tag_to_add) for tag_to_add in tags_to_add]
-        )
-        return new_tags
-
-    def gen_folder_key(self, abs_path):
-        return FolderKey(
-            platform=self.source_config.platform,
-            instance=self.source_config.env
-            if self.source_config.platform_instance is None
-            else self.source_config.platform_instance,
-            folder_abs_path=abs_path,
-        )
-
     def get_prefix(self, relative_path: str) -> str:
         index = re.search(r"[\*|\{]", relative_path)
         if index:

From 2de8aee652194bc0845a48e51fae1aa9f9b7961a Mon Sep 17 00:00:00 2001
From: Aseem Bansal <asmbansal2@gmail.com>
Date: Wed, 6 Jul 2022 14:52:18 +0530
Subject: [PATCH 88/88] revert changes to functionality

---
 metadata-ingestion/src/datahub/utilities/mapping.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/metadata-ingestion/src/datahub/utilities/mapping.py b/metadata-ingestion/src/datahub/utilities/mapping.py
index 30d044e25ba46e..6212f1b001e002 100644
--- a/metadata-ingestion/src/datahub/utilities/mapping.py
+++ b/metadata-ingestion/src/datahub/utilities/mapping.py
@@ -232,8 +232,8 @@ def get_match(self, match_clause: Any, raw_props_value: Any) -> Optional[Match]:
         if type(raw_props_value) not in Constants.OPERAND_DATATYPE_SUPPORTED or type(
             raw_props_value
         ) != type(match_clause):
-            return False
+            return None
         elif type(raw_props_value) == str:
-            return bool(re.match(match_clause, raw_props_value))
+            return re.match(match_clause, raw_props_value)
         else:
-            return match_clause == raw_props_value
+            return re.match(str(match_clause), str(raw_props_value))