From 4f6d689d7b0191d682ce2e5dec718a4fd7cf8725 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 2 Sep 2022 12:57:13 -0700 Subject: [PATCH] refactor(ingest): prefer `as` imports instead of pegasus2avro files --- metadata-ingestion/pyproject.toml | 1 + metadata-ingestion/scripts/modeldocgen.py | 2 +- metadata-ingestion/src/datahub/emitter/kafka_emitter.py | 6 +++--- metadata-ingestion/src/datahub/emitter/mce_builder.py | 4 ++-- .../src/datahub/ingestion/source/aws/glue.py | 3 +-- .../src/datahub/ingestion/source/looker/looker_common.py | 3 +-- .../src/datahub/ingestion/source/looker/lookml_source.py | 4 +--- metadata-ingestion/src/datahub/ingestion/source/powerbi.py | 3 +-- .../src/datahub/ingestion/source/s3/profiling.py | 2 +- .../datahub/ingestion/source/schema_inference/object.py | 4 +--- .../src/datahub/ingestion/transformer/base_transformer.py | 7 +++---- 11 files changed, 16 insertions(+), 23 deletions(-) diff --git a/metadata-ingestion/pyproject.toml b/metadata-ingestion/pyproject.toml index 5871612174e38..60b67ca442922 100644 --- a/metadata-ingestion/pyproject.toml +++ b/metadata-ingestion/pyproject.toml @@ -12,6 +12,7 @@ include = '\.pyi?$' target-version = ['py36', 'py37', 'py38'] [tool.isort] +combine_as_imports = true indent = ' ' profile = 'black' sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER' diff --git a/metadata-ingestion/scripts/modeldocgen.py b/metadata-ingestion/scripts/modeldocgen.py index 3265d3570ae96..00f23e601f0a0 100644 --- a/metadata-ingestion/scripts/modeldocgen.py +++ b/metadata-ingestion/scripts/modeldocgen.py @@ -20,7 +20,6 @@ from datahub.ingestion.api.sink import NoopWriteCallback from datahub.ingestion.extractor.schema_util import avro_schema_to_mce_fields from datahub.ingestion.sink.file import FileSink, FileSinkConfig -from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField from datahub.metadata.schema_classes import ( BrowsePathsClass, ChangeTypeClass, @@ -30,6 +29,7 @@ GlobalTagsClass, MetadataChangeEventClass, OtherSchemaClass, + SchemaFieldClass as SchemaField, SchemaFieldDataTypeClass, SchemaMetadataClass, StringTypeClass, diff --git a/metadata-ingestion/src/datahub/emitter/kafka_emitter.py b/metadata-ingestion/src/datahub/emitter/kafka_emitter.py index 001097a2e42f5..f42365bfb0c5e 100644 --- a/metadata-ingestion/src/datahub/emitter/kafka_emitter.py +++ b/metadata-ingestion/src/datahub/emitter/kafka_emitter.py @@ -10,9 +10,9 @@ from datahub.configuration.common import ConfigModel, ConfigurationError from datahub.configuration.kafka import KafkaProducerConnectionConfig from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.metadata.com.linkedin.pegasus2avro.mxe import ( - MetadataChangeEvent, - MetadataChangeProposal, +from datahub.metadata.schema_classes import ( + MetadataChangeEventClass as MetadataChangeEvent, + MetadataChangeProposalClass as MetadataChangeProposal, ) from datahub.metadata.schemas import ( getMetadataChangeEventSchema, diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py index 3d8208f5088e5..40a053c1de487 100644 --- a/metadata-ingestion/src/datahub/emitter/mce_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py @@ -12,7 +12,6 @@ from datahub.configuration.source_common import DEFAULT_ENV as DEFAULT_ENV_CONFIGURATION from datahub.emitter.serialization_helper import pre_json_transform -from datahub.metadata.com.linkedin.pegasus2avro.common import GlossaryTerms from datahub.metadata.schema_classes import ( AuditStampClass, ContainerKeyClass, @@ -21,6 +20,7 @@ DatasetSnapshotClass, GlobalTagsClass, GlossaryTermAssociationClass, + GlossaryTermsClass as GlossaryTerms, MetadataChangeEventClass, OwnerClass, OwnershipClass, @@ -31,8 +31,8 @@ TagAssociationClass, UpstreamClass, UpstreamLineageClass, + _Aspect as AspectAbstract, ) -from datahub.metadata.schema_classes import _Aspect as AspectAbstract from datahub.utilities.urns.dataset_urn import DatasetUrn logger = logging.getLogger(__name__) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py index d39503dbcbefe..236d4695d60a0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py @@ -1,8 +1,7 @@ import logging import typing from collections import defaultdict -from dataclasses import dataclass -from dataclasses import field as dataclass_field +from dataclasses import dataclass, field as dataclass_field from typing import ( Any, Dict, diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py index 692c571d37d69..0811292266f9c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py @@ -1,8 +1,7 @@ import datetime import logging import re -from dataclasses import dataclass -from dataclasses import field as dataclasses_field +from dataclasses import dataclass, field as dataclasses_field from enum import Enum from typing import Dict, Iterable, List, Optional, Tuple, Union diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py index 3701ac7621052..ec74769d98631 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py @@ -4,9 +4,7 @@ import pathlib import re import sys -from dataclasses import dataclass -from dataclasses import field as dataclass_field -from dataclasses import replace +from dataclasses import dataclass, field as dataclass_field, replace from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Type import pydantic diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi.py index b33281e4d27d5..993e74a76f9ab 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi.py @@ -5,8 +5,7 @@ ######################################################### import logging -from dataclasses import dataclass -from dataclasses import field as dataclass_field +from dataclasses import dataclass, field as dataclass_field from enum import Enum from time import sleep from typing import Any, Dict, Iterable, List, Optional, Tuple diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/profiling.py b/metadata-ingestion/src/datahub/ingestion/source/s3/profiling.py index d1f0b9c625c1c..af2cb718e3a98 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/profiling.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/profiling.py @@ -19,8 +19,8 @@ ) from pyspark.sql import SparkSession from pyspark.sql.functions import col, count, isnan, when -from pyspark.sql.types import DataType as SparkDataType from pyspark.sql.types import ( + DataType as SparkDataType, DateType, DecimalType, DoubleType, diff --git a/metadata-ingestion/src/datahub/ingestion/source/schema_inference/object.py b/metadata-ingestion/src/datahub/ingestion/source/schema_inference/object.py index d12e9f26efa7e..5797d66aa4d19 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/schema_inference/object.py +++ b/metadata-ingestion/src/datahub/ingestion/source/schema_inference/object.py @@ -1,7 +1,5 @@ from collections import Counter -from typing import Any -from typing import Counter as CounterType -from typing import Dict, Sequence, Tuple, Union +from typing import Any, Counter as CounterType, Dict, Sequence, Tuple, Union from mypy_extensions import TypedDict diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py b/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py index a2b1c13502065..34116a506afbb 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py @@ -1,6 +1,6 @@ import logging from abc import ABCMeta, abstractmethod -from typing import Any, Dict, Iterable, List, Optional, Type, Union, cast +from typing import Any, Dict, Iterable, List, Optional, Type, Union import datahub.emitter.mce_builder from datahub.emitter.mce_builder import Aspect @@ -30,7 +30,6 @@ StatusClass, UpstreamLineageClass, ViewPropertiesClass, - _Aspect, ) from datahub.utilities.urns.urn import Urn @@ -213,12 +212,12 @@ def _transform_or_record_mcp( # remember stuff assert envelope.record.entityUrn assert isinstance(self, SingleAspectTransformer) - if envelope.record.aspectName == self.aspect_name(): + if envelope.record.aspectName == self.aspect_name() and envelope.record.aspect: # we have a match on the aspect name, call the specific transform function transformed_aspect = self.transform_aspect( entity_urn=envelope.record.entityUrn, aspect_name=envelope.record.aspectName, - aspect=cast(_Aspect, envelope.record.aspect), + aspect=envelope.record.aspect, ) self._mark_processed(envelope.record.entityUrn) if transformed_aspect is None: