From 860d475c5edb68baf868c2a78db5c2ae7aedbc89 Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Mon, 11 Jul 2022 09:37:38 -0700 Subject: [PATCH 01/22] feat(ingest): improve domain ingestion usability (#5366) --- docs/domains.md | 44 +++++++- .../src/datahub/configuration/common.py | 4 +- .../src/datahub/emitter/rest_emitter.py | 10 ++ .../src/datahub/ingestion/api/sink.py | 4 + .../src/datahub/ingestion/graph/client.py | 83 +++++++++++++- .../src/datahub/ingestion/run/pipeline.py | 101 +++++++++++++----- .../datahub/ingestion/sink/datahub_rest.py | 6 ++ .../src/datahub/ingestion/source/kafka.py | 10 +- .../ingestion/source/sql/sql_common.py | 31 ++++-- .../datahub/utilities/registries/__init__.py | 0 .../utilities/registries/domain_registry.py | 43 ++++++++ .../tests/integration/hana/hana_to_file.yml | 2 +- .../tests/integration/kafka/kafka_to_file.yml | 2 +- .../tests/integration/mysql/mysql_to_file.yml | 2 +- .../mysql/mysql_to_file_dbalias.yml | 2 +- 15 files changed, 303 insertions(+), 41 deletions(-) create mode 100644 metadata-ingestion/src/datahub/utilities/registries/__init__.py create mode 100644 metadata-ingestion/src/datahub/utilities/registries/domain_registry.py diff --git a/docs/domains.md b/docs/domains.md index a2a6f9b262440c..3f409ead1495b9 100644 --- a/docs/domains.md +++ b/docs/domains.md @@ -47,8 +47,11 @@ By default, you don't need to worry about this. DataHub will auto-generate an un Once you've chosen a name and a description, click 'Create' to create the new Domain. -## Assigning an Asset to a Domain +## Assigning an Asset to a Domain +You can assign assets to Domain using the UI or programmatically using the API or during ingestion. + +### UI-Based Assignment To assign an asset to a Domain, simply navigate to the asset's profile page. At the bottom left-side menu bar, you'll see a 'Domain' section. Click 'Set Domain', and then search for the Domain you'd like to add to. When you're done, click 'Add'. @@ -59,6 +62,45 @@ To remove an asset from a Domain, click the 'x' icon on the Domain tag. > Notice: Adding or removing an asset from a Domain requires the `Edit Domain` Metadata Privilege, which can be granted > by a [Policy](authorization/policies.md). +### Ingestion-time Assignment +All SQL-based ingestion sources support assigning domains during ingestion using the `domain` configuration. Consult your source's configuration details page (e.g. [Snowflake](./generated/ingestion/sources/snowflake.md)), to verify that it supports the Domain capability. + +:::note + +Assignment of domains during ingestion will overwrite domains that you have assigned in the UI. A single table can only belong to one domain. + +::: + + +Here is a quick example of a snowflake ingestion recipe that has been enhanced to attach the **Analytics** domain to all tables in the **long_tail_companions** database in the **analytics** schema, and the **Finance** domain to all tables in the **long_tail_companions** database in the **ecommerce** schema. + +```yaml +source: + type: snowflake + config: + username: ${SNOW_USER} + password: ${SNOW_PASS} + account_id: + warehouse: COMPUTE_WH + role: accountadmin + database_pattern: + allow: + - "long_tail_companions" + schema_pattern: + deny: + - information_schema + profiling: + enabled: False + domain: + Analytics: + allow: + - "long_tail_companions.analytics.*" + Finance: + allow: + - "long_tail_companions.ecommerce.*" +``` + + ## Searching by Domain diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py index be3c6a13d3599b..c342a7cd33424b 100644 --- a/metadata-ingestion/src/datahub/configuration/common.py +++ b/metadata-ingestion/src/datahub/configuration/common.py @@ -105,11 +105,11 @@ class AllowDenyPattern(ConfigModel): allow: List[str] = Field( default=[".*"], - description="List of regex patterns for process groups to include in ingestion", + description="List of regex patterns to include in ingestion", ) deny: List[str] = Field( default=[], - description="List of regex patterns for process groups to exclude from ingestion.", + description="List of regex patterns to exclude from ingestion.", ) ignoreCase: Optional[bool] = Field( default=True, diff --git a/metadata-ingestion/src/datahub/emitter/rest_emitter.py b/metadata-ingestion/src/datahub/emitter/rest_emitter.py index ab96fea8c2c983..a20489163c0de3 100644 --- a/metadata-ingestion/src/datahub/emitter/rest_emitter.py +++ b/metadata-ingestion/src/datahub/emitter/rest_emitter.py @@ -249,6 +249,16 @@ def _emit_generic(self, url: str, payload: str) -> None: "Unable to emit metadata to DataHub GMS", {"message": str(e)} ) from e + def __repr__(self) -> str: + token_str = ( + f" with token: {self._token[:4]}**********{self._token[-4:]}" + if self._token + else "" + ) + return ( + f"DataHubRestEmitter: configured to talk to {self._gms_server}{token_str}" + ) + class DatahubRestEmitter(DataHubRestEmitter): """This class exists as a pass-through for backwards compatibility""" diff --git a/metadata-ingestion/src/datahub/ingestion/api/sink.py b/metadata-ingestion/src/datahub/ingestion/api/sink.py index 56030987bce5da..1fa961dd42836a 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/sink.py +++ b/metadata-ingestion/src/datahub/ingestion/api/sink.py @@ -109,3 +109,7 @@ def get_report(self) -> SinkReport: @abstractmethod def close(self) -> None: pass + + def configured(self) -> str: + """Override this method to output a human-readable and scrubbed version of the configured sink""" + return "" diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index 04f792f7244714..2e876bfa8d15bc 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -2,7 +2,7 @@ import logging import os from json.decoder import JSONDecodeError -from typing import Any, Dict, List, Optional, Type +from typing import Any, Dict, Iterable, List, Optional, Type from avro.schema import RecordSchema from deprecated import deprecated @@ -287,3 +287,84 @@ def get_latest_timeseries_value( f"Failed to find {aspect_type} in response {aspect_json}" ) return None + + def _get_search_endpoint(self): + return f"{self.config.server}/entities?action=search" + + def get_domain_urn_by_name(self, domain_name: str) -> Optional[str]: + """Retrieve a domain urn based on its name. Returns None if there is no match found""" + + filters = [] + filter_criteria = [ + { + "field": "name", + "value": domain_name, + "condition": "EQUAL", + } + ] + + filters.append({"and": filter_criteria}) + search_body = { + "input": "*", + "entity": "domain", + "start": 0, + "count": 10, + "filter": {"or": filters}, + } + results: Dict = self._post_generic(self._get_search_endpoint(), search_body) + num_entities = results.get("value", {}).get("numEntities", 0) + if num_entities > 1: + logger.warning( + f"Got {num_entities} results for domain name {domain_name}. Will return the first match." + ) + entities_yielded: int = 0 + entities = [] + for x in results["value"]["entities"]: + entities_yielded += 1 + logger.debug(f"yielding {x['entity']}") + entities.append(x["entity"]) + return entities[0] if entities_yielded else None + + def get_container_urns_by_filter( + self, + env: Optional[str] = None, + search_query: str = "*", + ) -> Iterable[str]: + """Return container urns that match based on query""" + url = self._get_search_endpoint() + + container_filters = [] + for container_subtype in ["Database", "Schema", "Project", "Dataset"]: + filter_criteria = [] + + filter_criteria.append( + { + "field": "customProperties", + "value": f"instance={env}", + "condition": "EQUAL", + } + ) + + filter_criteria.append( + { + "field": "typeNames", + "value": container_subtype, + "condition": "EQUAL", + } + ) + container_filters.append({"and": filter_criteria}) + search_body = { + "input": search_query, + "entity": "container", + "start": 0, + "count": 10000, + "filter": {"or": container_filters}, + } + results: Dict = self._post_generic(url, search_body) + num_entities = results["value"]["numEntities"] + logger.debug(f"Matched {num_entities} containers") + entities_yielded: int = 0 + for x in results["value"]["entities"]: + entities_yielded += 1 + logger.debug(f"yielding {x['entity']}") + yield x["entity"] diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py index d0b1e300b9184b..0c2b4082f68251 100644 --- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py +++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py @@ -120,6 +120,11 @@ class Pipeline: sink: Sink transformers: List[Transformer] + def _record_initialization_failure(self, e: Exception, msg: str) -> None: + self.pipeline_init_exception: Optional[Exception] = e + self.pipeline_init_failures: Optional[str] = f"{msg} due to {e}" + logger.error(e) + def __init__( self, config: PipelineConfig, @@ -138,23 +143,59 @@ def __init__( dry_run=dry_run, preview_mode=preview_mode, ) + self.pipeline_init_failures = None + self.pipeline_init_exception = None sink_type = self.config.sink.type - sink_class = sink_registry.get(sink_type) - sink_config = self.config.sink.dict().get("config") or {} - self.sink: Sink = sink_class.create(sink_config, self.ctx) - logger.debug(f"Sink type:{self.config.sink.type},{sink_class} configured") - - source_type = self.config.source.type - source_class = source_registry.get(source_type) - self.source: Source = source_class.create( - self.config.source.dict().get("config", {}), self.ctx - ) - logger.debug(f"Source type:{source_type},{source_class} configured") + try: + sink_class = sink_registry.get(sink_type) + except Exception as e: + self._record_initialization_failure(e, "Failed to create a sink") + return + + try: + sink_config = self.config.sink.dict().get("config") or {} + self.sink: Sink = sink_class.create(sink_config, self.ctx) + logger.debug(f"Sink type:{self.config.sink.type},{sink_class} configured") + logger.info(f"Sink configured successfully. {self.sink.configured()}") + except Exception as e: + self._record_initialization_failure( + e, f"Failed to configure sink ({sink_type})" + ) + return + + try: + source_type = self.config.source.type + source_class = source_registry.get(source_type) + except Exception as e: + self._record_initialization_failure(e, "Failed to create source") + return + + try: + self.source: Source = source_class.create( + self.config.source.dict().get("config", {}), self.ctx + ) + logger.debug(f"Source type:{source_type},{source_class} configured") + except Exception as e: + self._record_initialization_failure( + e, f"Failed to configure source ({source_type})" + ) + return - self.extractor_class = extractor_registry.get(self.config.source.extractor) + try: + self.extractor_class = extractor_registry.get(self.config.source.extractor) + except Exception as e: + self._record_initialization_failure( + e, f"Failed to configure extractor ({self.config.source.extractor})" + ) + return + + try: + self._configure_transforms() + except ValueError as e: + self._record_initialization_failure(e, "Failed to configure transformers") + return - self._configure_transforms() self._configure_reporting() def _configure_transforms(self) -> None: @@ -209,6 +250,10 @@ def create( def run(self) -> None: callback = LoggingCallback() + if self.pipeline_init_failures: + # no point continuing, return early + return + extractor: Extractor = self.extractor_class() for wu in itertools.islice( self.source.get_workunits(), @@ -296,6 +341,9 @@ def process_commits(self) -> None: logger.info(f"Successfully committed changes for {name}.") def raise_from_status(self, raise_warnings: bool = False) -> None: + if self.pipeline_init_exception: + raise self.pipeline_init_exception + if self.source.get_report().failures: raise PipelineExecutionError( "Source reported errors", self.source.get_report() @@ -310,18 +358,18 @@ def raise_from_status(self, raise_warnings: bool = False) -> None: ) def log_ingestion_stats(self) -> None: - - telemetry.telemetry_instance.ping( - "ingest_stats", - { - "source_type": self.config.source.type, - "sink_type": self.config.sink.type, - "records_written": stats.discretize( - self.sink.get_report().records_written - ), - }, - self.ctx.graph, - ) + if not self.pipeline_init_failures: + telemetry.telemetry_instance.ping( + "ingest_stats", + { + "source_type": self.config.source.type, + "sink_type": self.config.sink.type, + "records_written": stats.discretize( + self.sink.get_report().records_written + ), + }, + self.ctx.graph, + ) def _count_all_vals(self, d: Dict[str, List]) -> int: result = 0 @@ -331,6 +379,9 @@ def _count_all_vals(self, d: Dict[str, List]) -> int: def pretty_print_summary(self, warnings_as_failure: bool = False) -> int: click.echo() + if self.pipeline_init_failures: + click.secho(f"{self.pipeline_init_failures}", fg="red") + return 1 click.secho(f"Source ({self.config.source.type}) report:", bold=True) click.echo(self.source.get_report().as_string()) click.secho(f"Sink ({self.config.sink.type}) report:", bold=True) diff --git a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py index d95eb245ccfb50..45e9a28c763a9f 100644 --- a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py +++ b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py @@ -157,3 +157,9 @@ def get_report(self) -> SinkReport: def close(self): self.executor.shutdown(wait=True) + + def __repr__(self) -> str: + return self.emitter.__repr__() + + def configured(self) -> str: + return self.__repr__() diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka.py b/metadata-ingestion/src/datahub/ingestion/source/kafka.py index 98551bd66e73cb..97db75c525f754 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka.py @@ -47,6 +47,7 @@ JobStatusClass, SubTypesClass, ) +from datahub.utilities.registries.domain_registry import DomainRegistry logger = logging.getLogger(__name__) @@ -150,6 +151,11 @@ def __init__(self, config: KafkaSourceConfig, ctx: PipelineContext): self.schema_registry_client: KafkaSchemaRegistryBase = ( KafkaSource.create_schema_registry(config, self.report) ) + if self.source_config.domain: + self.domain_registry = DomainRegistry( + cached_domains=[k for k in self.source_config.domain], + graph=self.ctx.graph, + ) def is_checkpointing_enabled(self, job_id: JobId) -> bool: if ( @@ -333,7 +339,9 @@ def _extract_record(self, topic: str) -> Iterable[MetadataWorkUnit]: # 6. Emit domains aspect MCPW for domain, pattern in self.source_config.domain.items(): if pattern.allowed(dataset_name): - domain_urn = make_domain_urn(domain) + domain_urn = make_domain_urn( + self.domain_registry.get_domain_urn(domain) + ) if domain_urn: wus = add_domain_to_entity_wu( diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 034445bd853c55..56b82adbbab156 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -92,6 +92,7 @@ ViewPropertiesClass, ) from datahub.telemetry import telemetry +from datahub.utilities.registries.domain_registry import DomainRegistry from datahub.utilities.sqlalchemy_query_combiner import SQLAlchemyQueryCombinerReport if TYPE_CHECKING: @@ -241,23 +242,23 @@ class SQLAlchemyConfig(StatefulIngestionConfigBase): # them out afterwards via the table_pattern. schema_pattern: AllowDenyPattern = Field( default=AllowDenyPattern.allow_all(), - description="regex patterns for schemas to filter in ingestion.", + description="Regex patterns for schemas to filter in ingestion. Specify regex to only match the schema name. e.g. to match all tables in schema analytics, use the regex 'analytics'", ) table_pattern: AllowDenyPattern = Field( default=AllowDenyPattern.allow_all(), - description="regex patterns for tables to filter in ingestion.", + description="Regex patterns for tables to filter in ingestion. Specify regex to match the entire table name in database.schema.table format. e.g. to match all tables starting with customer in Customer database and public schema, use the regex 'Customer.public.customer.*'", ) view_pattern: AllowDenyPattern = Field( default=AllowDenyPattern.allow_all(), - description="regex patterns for views to filter in ingestion.", + description="Regex patterns for views to filter in ingestion. Note: Defaults to table_pattern if not specified. Specify regex to match the entire view name in database.schema.view format. e.g. to match all views starting with customer in Customer database and public schema, use the regex 'Customer.public.customer.*'", ) profile_pattern: AllowDenyPattern = Field( default=AllowDenyPattern.allow_all(), - description="regex patterns for profiles to filter in ingestion, allowed by the `table_pattern`.", + description="Regex patterns to filter tables for profiling during ingestion. Allowed by the `table_pattern`.", ) domain: Dict[str, AllowDenyPattern] = Field( default=dict(), - description=' regex patterns for tables/schemas to descide domain_key domain key (domain_key can be any string like "sales".) There can be multiple domain key specified.', + description='Attach domains to databases, schemas or tables during ingestion using regex patterns. Domain key can be a guid like *urn:li:domain:ec428203-ce86-4db3-985d-5a8ee6df32ba* or a string like "Marketing".) If you provide strings, then datahub will attempt to resolve this name to a guid, and will error out if this fails. There can be multiple domain keys specified.', ) include_views: Optional[bool] = Field( @@ -273,6 +274,17 @@ class SQLAlchemyConfig(StatefulIngestionConfigBase): # Custom Stateful Ingestion settings stateful_ingestion: Optional[SQLAlchemyStatefulIngestionConfig] = None + @pydantic.root_validator(pre=True) + def view_pattern_is_table_pattern_unless_specified( + cls, values: Dict[str, Any] + ) -> Dict[str, Any]: + view_pattern = values.get("view_pattern") + table_pattern = values.get("table_pattern") + if table_pattern and not view_pattern: + logger.info(f"Applying table_pattern {table_pattern} to view_pattern.") + values["view_pattern"] = table_pattern + return values + @pydantic.root_validator() def ensure_profiling_pattern_is_passed_to_profiling( cls, values: Dict[str, Any] @@ -497,6 +509,10 @@ def __init__(self, config: SQLAlchemyConfig, ctx: PipelineContext, platform: str for config_flag in profiling_flags_to_report }, ) + if self.config.domain: + self.domain_registry = DomainRegistry( + cached_domains=[k for k in self.config.domain], graph=self.ctx.graph + ) def warn(self, log: logging.Logger, key: str, reason: str) -> None: self.report.report_warning(key, reason) @@ -809,7 +825,9 @@ def _gen_domain_urn(self, dataset_name: str) -> Optional[str]: for domain, pattern in self.config.domain.items(): if pattern.allowed(dataset_name): - domain_urn = make_domain_urn(domain) + domain_urn = make_domain_urn( + self.domain_registry.get_domain_urn(domain) + ) return domain_urn @@ -857,7 +875,6 @@ def loop_tables( # noqa: C901 continue self.report.report_entity_scanned(dataset_name, ent_type="table") - if not sql_config.table_pattern.allowed(dataset_name): self.report.report_dropped(dataset_name) continue diff --git a/metadata-ingestion/src/datahub/utilities/registries/__init__.py b/metadata-ingestion/src/datahub/utilities/registries/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/metadata-ingestion/src/datahub/utilities/registries/domain_registry.py b/metadata-ingestion/src/datahub/utilities/registries/domain_registry.py new file mode 100644 index 00000000000000..6ae55dee509677 --- /dev/null +++ b/metadata-ingestion/src/datahub/utilities/registries/domain_registry.py @@ -0,0 +1,43 @@ +import logging +from typing import List, Optional + +from datahub.ingestion.graph.client import DataHubGraph + +logger = logging.getLogger(__name__) + + +class DomainRegistry: + """A class that makes it easy to resolve domains using DataHub""" + + def __init__( + self, + cached_domains: Optional[List[str]] = [], + graph: Optional[DataHubGraph] = None, + ): + self.domain_registry = {} + if cached_domains: + # isolate the domains that don't seem fully specified + domains_needing_resolution = [ + d + for d in cached_domains + if (not d.startswith("urn:li:domain") and d.count("-") != 4) + ] + if domains_needing_resolution and not graph: + raise ValueError( + f"Following domains need server-side resolution {domains_needing_resolution} but a DataHub server wasn't provided. Either use fully qualified domain ids (e.g. urn:li:domain:ec428203-ce86-4db3-985d-5a8ee6df32ba) or provide a datahub_api config in your recipe." + ) + for domain_identifier in domains_needing_resolution: + assert graph + domain_urn = graph.get_domain_urn_by_name(domain_identifier) + if domain_urn: + self.domain_registry[domain_identifier] = domain_urn + else: + logger.error( + f"Failed to retrieve domain id for domain {domain_identifier}" + ) + raise ValueError( + f"domain {domain_identifier} doesn't seem to be provisioned on DataHub. Either provision it first and re-run ingestion, or provide a fully qualified domain id (e.g. urn:li:domain:ec428203-ce86-4db3-985d-5a8ee6df32ba) to skip this check." + ) + + def get_domain_urn(self, domain_identifier: str) -> str: + return self.domain_registry.get(domain_identifier) or domain_identifier diff --git a/metadata-ingestion/tests/integration/hana/hana_to_file.yml b/metadata-ingestion/tests/integration/hana/hana_to_file.yml index c37f37b884bc6b..8900a5a3bfbadf 100644 --- a/metadata-ingestion/tests/integration/hana/hana_to_file.yml +++ b/metadata-ingestion/tests/integration/hana/hana_to_file.yml @@ -32,7 +32,7 @@ source: include_field_histogram: true include_field_sample_values: true domain: - sales: + "urn:li:domain:sales": allow: - "HOTEL" sink: diff --git a/metadata-ingestion/tests/integration/kafka/kafka_to_file.yml b/metadata-ingestion/tests/integration/kafka/kafka_to_file.yml index 69c43c57aa44bd..b345ba86799371 100644 --- a/metadata-ingestion/tests/integration/kafka/kafka_to_file.yml +++ b/metadata-ingestion/tests/integration/kafka/kafka_to_file.yml @@ -7,7 +7,7 @@ source: bootstrap: "localhost:59092" schema_registry_url: "http://localhost:58081" domain: - sales: + "urn:li:domain:sales": allow: - "key_value_topic" sink: diff --git a/metadata-ingestion/tests/integration/mysql/mysql_to_file.yml b/metadata-ingestion/tests/integration/mysql/mysql_to_file.yml index 988dbca18ff366..8d4a5b84b91d20 100644 --- a/metadata-ingestion/tests/integration/mysql/mysql_to_file.yml +++ b/metadata-ingestion/tests/integration/mysql/mysql_to_file.yml @@ -31,7 +31,7 @@ source: include_field_histogram: true include_field_sample_values: true domain: - sales: + "urn:li:domain:sales": allow: - "^metagalaxy" sink: diff --git a/metadata-ingestion/tests/integration/mysql/mysql_to_file_dbalias.yml b/metadata-ingestion/tests/integration/mysql/mysql_to_file_dbalias.yml index 86e5915a0d1d25..1c324641fe1583 100644 --- a/metadata-ingestion/tests/integration/mysql/mysql_to_file_dbalias.yml +++ b/metadata-ingestion/tests/integration/mysql/mysql_to_file_dbalias.yml @@ -32,7 +32,7 @@ source: include_field_histogram: true include_field_sample_values: true domain: - sales: + "urn:li:domain:sales": allow: - "^metagalaxy" sink: From 070dfa0eaf1afa6d23350514d498497104d4277f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Mon, 11 Jul 2022 18:57:20 +0200 Subject: [PATCH 02/22] fix(config): fixes config key in DataHubAuthorizerFactory (#5371) --- .../com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java index 997709b2c90f52..e5e377b5777c1e 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubAuthorizerFactory.java @@ -32,7 +32,7 @@ public class DataHubAuthorizerFactory { @Value("${authorization.defaultAuthorizer.cacheRefreshIntervalSecs}") private Integer policyCacheRefreshIntervalSeconds; - @Value("${authorization.defaultAuthorizer..enabled:true}") + @Value("${authorization.defaultAuthorizer.enabled:true}") private Boolean policiesEnabled; @Bean(name = "dataHubAuthorizer") From 489b5bb5b450570de92a1fa44c1a76befb286ee2 Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Mon, 11 Jul 2022 15:08:26 -0700 Subject: [PATCH 03/22] fix(ingest): domains - check whether urn based domain exists during resolution (#5373) --- docs/domains.md | 37 ++++++++++++++++++- .../utilities/registries/domain_registry.py | 29 ++++++++++----- 2 files changed, 56 insertions(+), 10 deletions(-) diff --git a/docs/domains.md b/docs/domains.md index 3f409ead1495b9..436f7ba1f332a5 100644 --- a/docs/domains.md +++ b/docs/domains.md @@ -100,6 +100,41 @@ source: - "long_tail_companions.ecommerce.*" ``` +:::note + +When bare domain names like `Analytics` is used, the ingestion system will first check if a domain like `urn:li:domain:Analytics` is provisioned, failing that; it will check for a provisioned domain that has the same name. If we are unable to resolve bare domain names to provisioned domains, then ingestion will refuse to proceeed until the domain is provisioned on DataHub. + +::: + +You can also provide fully-qualified domain names to ensure that no ingestion-time domain resolution is needed. For example, the following recipe shows an example using fully qualified domain names: + +```yaml +source: + type: snowflake + config: + username: ${SNOW_USER} + password: ${SNOW_PASS} + account_id: + warehouse: COMPUTE_WH + role: accountadmin + database_pattern: + allow: + - "long_tail_companions" + schema_pattern: + deny: + - information_schema + profiling: + enabled: False + domain: + "urn:li:domain:6289fccc-4af2-4cbb-96ed-051e7d1de93c": + allow: + - "long_tail_companions.analytics.*" + "urn:li:domain:07155b15-cee6-4fda-b1c1-5a19a6b74c3a": + allow: + - "long_tail_companions.ecommerce.*" +``` + + ## Searching by Domain @@ -179,4 +214,4 @@ Click [here](https://www.loom.com/share/72b3bcc2729b4df0982fa63ae3a8cb21) to see ## Feedback / Questions / Concerns -We want to hear from you! For any inquiries, including Feedback, Questions, or Concerns, reach out on Slack! \ No newline at end of file +We want to hear from you! For any inquiries, including Feedback, Questions, or Concerns, reach out on Slack! diff --git a/metadata-ingestion/src/datahub/utilities/registries/domain_registry.py b/metadata-ingestion/src/datahub/utilities/registries/domain_registry.py index 6ae55dee509677..4e719c939b6f2b 100644 --- a/metadata-ingestion/src/datahub/utilities/registries/domain_registry.py +++ b/metadata-ingestion/src/datahub/utilities/registries/domain_registry.py @@ -28,16 +28,27 @@ def __init__( ) for domain_identifier in domains_needing_resolution: assert graph - domain_urn = graph.get_domain_urn_by_name(domain_identifier) - if domain_urn: - self.domain_registry[domain_identifier] = domain_urn + # first try to check if this domain exists by urn + maybe_domain_urn = f"urn:li:domain:{domain_identifier}" + from datahub.metadata.schema_classes import DomainPropertiesClass + + maybe_domain_properties = graph.get_aspect_v2( + maybe_domain_urn, DomainPropertiesClass, "domainProperties" + ) + if maybe_domain_properties: + self.domain_registry[domain_identifier] = maybe_domain_urn else: - logger.error( - f"Failed to retrieve domain id for domain {domain_identifier}" - ) - raise ValueError( - f"domain {domain_identifier} doesn't seem to be provisioned on DataHub. Either provision it first and re-run ingestion, or provide a fully qualified domain id (e.g. urn:li:domain:ec428203-ce86-4db3-985d-5a8ee6df32ba) to skip this check." - ) + # try to get this domain by name + domain_urn = graph.get_domain_urn_by_name(domain_identifier) + if domain_urn: + self.domain_registry[domain_identifier] = domain_urn + else: + logger.error( + f"Failed to retrieve domain id for domain {domain_identifier}" + ) + raise ValueError( + f"domain {domain_identifier} doesn't seem to be provisioned on DataHub. Either provision it first and re-run ingestion, or provide a fully qualified domain id (e.g. urn:li:domain:ec428203-ce86-4db3-985d-5a8ee6df32ba) to skip this check." + ) def get_domain_urn(self, domain_identifier: str) -> str: return self.domain_registry.get(domain_identifier) or domain_identifier From f3e5afdba957e3943c28d9a01ebea0967175b010 Mon Sep 17 00:00:00 2001 From: Navin Sharma <103643430+NavinSharma13@users.noreply.github.com> Date: Tue, 12 Jul 2022 11:03:24 +0530 Subject: [PATCH 04/22] feat(quickstart): Adding env variables and cli options for customizing mapped ports in quickstart (#5353) Co-authored-by: Shirshanka Das --- docker/docker-compose.override.yml | 2 +- docker/docker-compose.yml | 17 +- ...er-compose-without-neo4j-m1.quickstart.yml | 15 +- .../quickstart/docker-compose.quickstart.yml | 19 ++- docs/quickstart.md | 110 +++++++++---- metadata-ingestion/setup.py | 2 +- metadata-ingestion/src/datahub/cli/docker.py | 149 +++++++++++++++++- metadata-ingestion/src/datahub/entrypoints.py | 2 +- 8 files changed, 248 insertions(+), 68 deletions(-) diff --git a/docker/docker-compose.override.yml b/docker/docker-compose.override.yml index b6a095d8836222..e8336d8349b2ab 100644 --- a/docker/docker-compose.override.yml +++ b/docker/docker-compose.override.yml @@ -9,7 +9,7 @@ services: env_file: mysql/env/docker.env command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin ports: - - "3306:3306" + - ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306 volumes: - ./mysql/init.sql:/docker-entrypoint-initdb.d/init.sql - mysqldata:/var/lib/mysql diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 6063626ab04b29..402916fd717ccd 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -12,7 +12,7 @@ services: hostname: zookeeper container_name: zookeeper ports: - - "2181:2181" + - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 volumes: - zkdata:/var/opt/zookeeper @@ -24,8 +24,7 @@ services: depends_on: - zookeeper ports: - - "29092:29092" - - "9092:9092" + - ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092 volumes: - broker:/var/lib/kafka/data/ @@ -50,7 +49,7 @@ services: - zookeeper - broker ports: - - "8081:8081" + - ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081 elasticsearch: image: elasticsearch:7.9.3 @@ -58,7 +57,7 @@ services: container_name: elasticsearch hostname: elasticsearch ports: - - "9200:9200" + - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200 environment: - discovery.type=single-node - xpack.security.enabled=false @@ -75,8 +74,8 @@ services: hostname: neo4j container_name: neo4j ports: - - "7474:7474" - - "7687:7687" + - ${DATAHUB_MAPPED_NEO4J_HTTP_PORT:-7474}:7474 + - ${DATAHUB_MAPPED_NEO4J_BOLT_PORT:-7687}:7687 volumes: - neo4jdata:/data @@ -100,7 +99,7 @@ services: hostname: datahub-gms container_name: datahub-gms ports: - - "8080:8080" + - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 depends_on: - elasticsearch-setup - kafka-setup @@ -116,7 +115,7 @@ services: hostname: datahub-frontend-react container_name: datahub-frontend-react ports: - - "9002:9002" + - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 depends_on: - datahub-gms volumes: diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml index 76c0fc823c3503..49dbbc5b9f3086 100644 --- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml @@ -17,8 +17,7 @@ services: hostname: broker image: kymeric/cp-kafka:latest ports: - - 29092:29092 - - 9092:9092 + - ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092 datahub-actions: depends_on: - datahub-gms @@ -57,7 +56,7 @@ services: hostname: datahub-frontend-react image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head} ports: - - 9002:9002 + - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 volumes: - ${HOME}/.datahub/plugins:/etc/datahub/plugins datahub-gms: @@ -86,7 +85,7 @@ services: hostname: datahub-gms image: linkedin/datahub-gms:${DATAHUB_VERSION:-head} ports: - - 8080:8080 + - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 volumes: - ${HOME}/.datahub/plugins:/etc/datahub/plugins elasticsearch: @@ -106,7 +105,7 @@ services: image: elasticsearch:7.9.3 mem_limit: 1g ports: - - 9200:9200 + - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200 volumes: - esdata:/usr/share/elasticsearch/data elasticsearch-setup: @@ -140,7 +139,7 @@ services: hostname: mysql image: mariadb:10.5.8 ports: - - 3306:3306 + - ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306 volumes: - ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql - mysqldata:/var/lib/mysql @@ -167,7 +166,7 @@ services: hostname: schema-registry image: eugenetea/schema-registry-arm64:latest ports: - - 8081:8081 + - ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081 zookeeper: container_name: zookeeper environment: @@ -176,7 +175,7 @@ services: hostname: zookeeper image: kymeric/cp-zookeeper:latest ports: - - 2181:2181 + - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 volumes: - zkdata:/var/opt/zookeeper version: '2.3' diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index 7cdee0e7084a35..6e46f3f16ab208 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -17,8 +17,7 @@ services: hostname: broker image: confluentinc/cp-kafka:5.4.0 ports: - - 29092:29092 - - 9092:9092 + - ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092 volumes: - broker:/var/lib/kafka/data/ datahub-actions: @@ -59,7 +58,7 @@ services: hostname: datahub-frontend-react image: linkedin/datahub-frontend-react:${DATAHUB_VERSION:-head} ports: - - 9002:9002 + - ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002 volumes: - ${HOME}/.datahub/plugins:/etc/datahub/plugins datahub-gms: @@ -94,7 +93,7 @@ services: hostname: datahub-gms image: linkedin/datahub-gms:${DATAHUB_VERSION:-head} ports: - - 8080:8080 + - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 volumes: - ${HOME}/.datahub/plugins/:/etc/datahub/plugins elasticsearch: @@ -114,7 +113,7 @@ services: image: elasticsearch:7.9.3 mem_limit: 1g ports: - - 9200:9200 + - ${DATAHUB_MAPPED_ELASTIC_PORT:-9200}:9200 volumes: - esdata:/usr/share/elasticsearch/data elasticsearch-setup: @@ -148,7 +147,7 @@ services: hostname: mysql image: mysql:5.7 ports: - - 3306:3306 + - ${DATAHUB_MAPPED_MYSQL_PORT:-3306}:3306 volumes: - ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql - mysqldata:/var/lib/mysql @@ -173,8 +172,8 @@ services: hostname: neo4j image: neo4j:4.0.6 ports: - - 7474:7474 - - 7687:7687 + - ${DATAHUB_MAPPED_NEO4J_HTTP_PORT:-7474}:7474 + - ${DATAHUB_MAPPED_NEO4J_BOLT_PORT:-7687}:7687 volumes: - neo4jdata:/data schema-registry: @@ -188,7 +187,7 @@ services: hostname: schema-registry image: confluentinc/cp-schema-registry:5.4.0 ports: - - 8081:8081 + - ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081 zookeeper: container_name: zookeeper environment: @@ -197,7 +196,7 @@ services: hostname: zookeeper image: confluentinc/cp-zookeeper:5.4.0 ports: - - 2181:2181 + - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 volumes: - zkdata:/var/opt/zookeeper version: '2.3' diff --git a/docs/quickstart.md b/docs/quickstart.md index 5e83efe3e2aab8..4118314a099240 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -42,10 +42,6 @@ To deploy a new instance of DataHub, perform the following steps. at [http://localhost:9002](http://localhost:9002) in your browser. You can sign in using `datahub` as both the username and password. - If you would like to modify/configure the DataHub installation in some way, please download the [docker-compose.yaml](https://raw.githubusercontent.com/datahub-project/datahub/master/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml) used by the cli tool, modify it as necessary and deploy DataHub by passing the downloaded docker-compose file: - ``` - datahub docker quickstart --quickstart-compose-file - ``` 5. To ingest the sample metadata, run the following CLI command from your terminal @@ -62,11 +58,73 @@ using the `--token ` parameter in the command. That's it! Now feel free to play around with DataHub! +## Troubleshooting Issues + +
+Command not found: datahub + + +If running the datahub cli produces "command not found" errors inside your terminal, your system may be defaulting to an +older version of Python. Try prefixing your `datahub` commands with `python3 -m`: + +``` +python3 -m datahub docker quickstart +``` + +Another possibility is that your system PATH does not include pip's `$HOME/.local/bin` directory. On linux, you can add this to your `~/.bashrc`: + +``` +if [ -d "$HOME/.local/bin" ] ; then + PATH="$HOME/.local/bin:$PATH" +fi +``` +
+ +
+ +Port Conflicts + + +By default the quickstart deploy will require the following ports to be free on your local machine: + - 3306 for MySQL + - 9200 for Elasticsearch + - 9092 for the Kafka broker + - 8081 for Schema Registry + - 2181 for ZooKeeper + - 9002 for the DataHub Web Application (datahub-frontend) + - 8080 for the DataHub Metadata Service (datahub-gms) + + In case the default ports conflict with software you are already running on your machine, you can override these ports by passing additional flags to the `datahub docker quickstart` command. + e.g. To override the MySQL port with 53306 (instead of the default 3306), you can say: `datahub docker quickstart --mysql-port 53306`. Use `datahub docker quickstart --help` to see all the supported options. + +
+ +
+ +Miscellaneous Docker issues + +There can be misc issues with Docker, like conflicting containers and dangling volumes, that can often be resolved by +pruning your Docker state with the following command. Note that this command removes all unused containers, networks, +images (both dangling and unreferenced), and optionally, volumes. + +``` +docker system prune +``` + +
+ +
+ +Still stuck? + +Hop over to our [Slack community](https://slack.datahubproject.io) and ask for help in the [#troubleshoot](https://datahubspace.slack.com/archives/C029A3M079U) channel! +
+ ## Next Steps ### Ingest Metadata -To start pushing your company's metadata into DataHub, take a look at the [Metadata Ingestion Framework](../metadata-ingestion/README.md). +To start pushing your company's metadata into DataHub, take a look at [UI-based Ingestion Guide](./ui-ingestion.md), or to run ingestion using the cli, look at the [Metadata Ingestion Guide](../metadata-ingestion/README.md). ### Invite Users @@ -82,50 +140,36 @@ To enable backend Authentication, check out [authentication in DataHub's backend We recommend deploying DataHub to production using Kubernetes. We provide helpful [Helm Charts](https://artifacthub.io/packages/helm/datahub/datahub) to help you quickly get up and running. Check out [Deploying DataHub to Kubernetes](./deploy/kubernetes.md) for a step-by-step walkthrough. -## Resetting DataHub +## Other Common Operations + +### Stopping DataHub -To cleanse DataHub of all of it's state (e.g. before ingesting your own), you can use the CLI `nuke` command. +To stop DataHub's quickstart, you can issue the following command. ``` -datahub docker nuke +datahub docker quickstart --stop ``` -## Updating DataHub locally +### Resetting DataHub -If you have been testing DataHub locally, a new version of DataHub got released and you want to try the new version then you can use below commands. +To cleanse DataHub of all of its state (e.g. before ingesting your own), you can use the CLI `nuke` command. ``` -datahub docker nuke --keep-data -datahub docker quickstart +datahub docker nuke ``` -This will keep the data that you have ingested so far in DataHub and start a new quickstart with the latest version of DataHub. +### Upgrading your local DataHub -## Troubleshooting - -### Command not found: datahub - -If running the datahub cli produces "command not found" errors inside your terminal, your system may be defaulting to an -older version of Python. Try prefixing your `datahub` commands with `python3 -m`: +If you have been testing DataHub locally, a new version of DataHub got released and you want to try the new version then you can just issue the quickstart command again. It will pull down newer images and restart your instance without losing any data. ``` -python3 -m datahub docker quickstart +datahub docker quickstart ``` -Another possibility is that your system PATH does not include pip's `$HOME/.local/bin` directory. On linux, you can add this to your `~/.bashrc`: +### Customization +If you would like to customize the DataHub installation further, please download the [docker-compose.yaml](https://raw.githubusercontent.com/datahub-project/datahub/master/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml) used by the cli tool, modify it as necessary and deploy DataHub by passing the downloaded docker-compose file: ``` -if [ -d "$HOME/.local/bin" ] ; then - PATH="$HOME/.local/bin:$PATH" -fi +datahub docker quickstart --quickstart-compose-file ``` -### Miscellaneous Docker issues - -There can be misc issues with Docker, like conflicting containers and dangling volumes, that can often be resolved by -pruning your Docker state with the following command. Note that this command removes all unused containers, networks, -images (both dangling and unreferenced), and optionally, volumes. - -``` -docker system prune -``` diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index b07650b1ba5055..929208eaa8891e 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -68,7 +68,7 @@ def get_long_description(): # At the same time, we use Kafka's AvroSerializer, which internally relies on # fastavro for serialization. We do not use confluent_kafka[avro], since it # is incompatible with its own dep on avro-python3. - "confluent_kafka>=1.5.0,<1.9.0", + "confluent_kafka>=1.5.0", "fastavro>=1.2.0", } diff --git a/metadata-ingestion/src/datahub/cli/docker.py b/metadata-ingestion/src/datahub/cli/docker.py index 6e42b4e5adb020..f2e580a2122bcc 100644 --- a/metadata-ingestion/src/datahub/cli/docker.py +++ b/metadata-ingestion/src/datahub/cli/docker.py @@ -11,6 +11,7 @@ from typing import List, NoReturn, Optional import click +import pydantic import requests from datahub.cli.docker_check import ( @@ -131,6 +132,80 @@ def should_use_neo4j_for_graph_service(graph_service_override: Optional[str]) -> return False +def _set_environment_variables( + version: Optional[str], + mysql_port: Optional[pydantic.PositiveInt], + zk_port: Optional[pydantic.PositiveInt], + kafka_broker_port: Optional[pydantic.PositiveInt], + schema_registry_port: Optional[pydantic.PositiveInt], + elastic_port: Optional[pydantic.PositiveInt], +) -> None: + if version is not None: + os.environ["DATAHUB_VERSION"] = version + if mysql_port is not None: + os.environ["DATAHUB_MAPPED_MYSQL_PORT"] = str(mysql_port) + + if zk_port is not None: + os.environ["DATAHUB_MAPPED_ZK_PORT"] = str(zk_port) + + if kafka_broker_port is not None: + os.environ["DATAHUB_MAPPED_KAFKA_BROKER_PORT"] = str(kafka_broker_port) + + if schema_registry_port is not None: + os.environ["DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT"] = str(schema_registry_port) + + if elastic_port is not None: + os.environ["DATAHUB_MAPPED_ELASTIC_PORT"] = str(elastic_port) + + +def _get_default_quickstart_compose_file() -> Optional[str]: + home = os.environ["HOME"] + if home: + try: + os.makedirs(f"{home}/.datahub/quickstart", exist_ok=True) + return f"{home}/.datahub/quickstart/docker-compose.yml" + except Exception as e: + logger.debug( + f"Failed to identify a default quickstart compose file due to {e}" + ) + + return None + + +def _attempt_stop(quickstart_compose_file: List[pathlib.Path]) -> None: + default_quickstart_compose_file = _get_default_quickstart_compose_file() + compose_files_for_stopping = ( + quickstart_compose_file + if quickstart_compose_file + else [pathlib.Path(default_quickstart_compose_file)] + if default_quickstart_compose_file + else None + ) + if compose_files_for_stopping: + # docker-compose stop + base_command: List[str] = [ + "docker-compose", + *itertools.chain.from_iterable( + ("-f", f"{path}") for path in compose_files_for_stopping + ), + "-p", + "datahub", + ] + try: + logger.debug(f"Executing {base_command} stop") + subprocess.run( + [*base_command, "stop"], + check=True, + ) + click.secho("Stopped datahub successfully.", fg="green") + except subprocess.CalledProcessError: + click.secho( + "Error while stopping.", + fg="red", + ) + return + + @docker.command() @click.option( "--version", @@ -166,6 +241,48 @@ def should_use_neo4j_for_graph_service(graph_service_override: Optional[str]) -> default=None, help="If set, forces docker-compose to use that graph service implementation", ) +@click.option( + "--mysql-port", + type=pydantic.PositiveInt, + is_flag=False, + default=None, + help="If there is an existing mysql instance running on port 3306, set this to a free port to avoid port conflicts on startup", +) +@click.option( + "--zk-port", + type=pydantic.PositiveInt, + is_flag=False, + default=None, + help="If there is an existing zookeeper instance running on port 2181, set this to a free port to avoid port conflicts on startup", +) +@click.option( + "--kafka-broker-port", + type=pydantic.PositiveInt, + is_flag=False, + default=None, + help="If there is an existing Kafka broker running on port 9092, set this to a free port to avoid port conflicts on startup", +) +@click.option( + "--schema-registry-port", + type=pydantic.PositiveInt, + is_flag=False, + default=None, + help="If there is an existing process running on port 8081, set this to a free port to avoid port conflicts with Kafka schema registry on startup", +) +@click.option( + "--elastic-port", + type=pydantic.PositiveInt, + is_flag=False, + default=None, + help="If there is an existing Elasticsearch instance running on port 9092, set this to a free port to avoid port conflicts on startup", +) +@click.option( + "--stop", + type=bool, + is_flag=True, + default=False, + help="Use this flag to stop the running containers", +) @upgrade.check_upgrade @telemetry.with_telemetry def quickstart( @@ -174,6 +291,12 @@ def quickstart( quickstart_compose_file: List[pathlib.Path], dump_logs_on_failure: bool, graph_service_impl: Optional[str], + mysql_port: Optional[pydantic.PositiveInt], + zk_port: Optional[pydantic.PositiveInt], + kafka_broker_port: Optional[pydantic.PositiveInt], + schema_registry_port: Optional[pydantic.PositiveInt], + elastic_port: Optional[pydantic.PositiveInt], + stop: bool, ) -> None: """Start an instance of DataHub locally using docker-compose. @@ -185,7 +308,7 @@ def quickstart( running_on_m1 = is_m1() if running_on_m1: - click.echo("Detected M1 machine") + click.secho("Detected M1 machine", fg="yellow") # Run pre-flight checks. issues = check_local_docker_containers(preflight_only=True) @@ -195,7 +318,13 @@ def quickstart( quickstart_compose_file = list( quickstart_compose_file ) # convert to list from tuple - if not quickstart_compose_file: + + default_quickstart_compose_file = _get_default_quickstart_compose_file() + if stop: + _attempt_stop(quickstart_compose_file) + return + elif not quickstart_compose_file: + # download appropriate quickstart file should_use_neo4j = should_use_neo4j_for_graph_service(graph_service_impl) if should_use_neo4j and running_on_m1: click.secho( @@ -210,7 +339,11 @@ def quickstart( else GITHUB_M1_QUICKSTART_COMPOSE_URL ) - with tempfile.NamedTemporaryFile(suffix=".yml", delete=False) as tmp_file: + with open( + default_quickstart_compose_file, "wb" + ) if default_quickstart_compose_file else tempfile.NamedTemporaryFile( + suffix=".yml", delete=False + ) as tmp_file: path = pathlib.Path(tmp_file.name) quickstart_compose_file.append(path) click.echo(f"Fetching docker-compose file {github_file} from GitHub") @@ -221,8 +354,14 @@ def quickstart( logger.debug(f"Copied to {path}") # set version - if version is not None: - os.environ["DATAHUB_VERSION"] = version + _set_environment_variables( + version=version, + mysql_port=mysql_port, + zk_port=zk_port, + kafka_broker_port=kafka_broker_port, + schema_registry_port=schema_registry_port, + elastic_port=elastic_port, + ) base_command: List[str] = [ "docker-compose", diff --git a/metadata-ingestion/src/datahub/entrypoints.py b/metadata-ingestion/src/datahub/entrypoints.py index 81cc375c15bc07..41404122979a11 100644 --- a/metadata-ingestion/src/datahub/entrypoints.py +++ b/metadata-ingestion/src/datahub/entrypoints.py @@ -26,7 +26,7 @@ logger = logging.getLogger(__name__) # Configure some loggers. -logging.getLogger("urllib3").setLevel(logging.WARNING) +logging.getLogger("urllib3").setLevel(logging.ERROR) logging.getLogger("snowflake").setLevel(level=logging.WARNING) # logging.getLogger("botocore").setLevel(logging.INFO) # logging.getLogger("google").setLevel(logging.INFO) From 9fca5277dc6d603e619c1a7c46f0d7990318a3d6 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Tue, 12 Jul 2022 12:37:47 +0530 Subject: [PATCH 05/22] fix(build): tweak ingestion build (#5374) --- .github/workflows/docker-ingestion-base.yml | 2 + .github/workflows/docker-ingestion-smoke.yml | 42 +++++++++++++++++++ docker/datahub-ingestion/Dockerfile | 3 +- .../datahub-ingestion/base-requirements.txt | 1 + docker/datahub-ingestion/base.Dockerfile | 3 +- docker/datahub-ingestion/smoke.Dockerfile | 18 ++++++++ 6 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/docker-ingestion-smoke.yml create mode 100644 docker/datahub-ingestion/smoke.Dockerfile diff --git a/.github/workflows/docker-ingestion-base.yml b/.github/workflows/docker-ingestion-base.yml index 9616d211cd1610..adcf8b14ab40bc 100644 --- a/.github/workflows/docker-ingestion-base.yml +++ b/.github/workflows/docker-ingestion-base.yml @@ -1,5 +1,7 @@ name: ingestion base on: + release: + types: [published, edited] push: branches: - master diff --git a/.github/workflows/docker-ingestion-smoke.yml b/.github/workflows/docker-ingestion-smoke.yml new file mode 100644 index 00000000000000..66762b78ccd902 --- /dev/null +++ b/.github/workflows/docker-ingestion-smoke.yml @@ -0,0 +1,42 @@ +name: ingestion smoke +on: + release: + types: [published, edited] + push: + branches: + - master + paths: + - "docker/datahub-ingestion/**" + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + + build-smoke: + name: Build and Push Docker Image to Docker Hub + runs-on: ubuntu-latest + steps: + - name: Check out the repo + uses: actions/checkout@v2 + with: + fetch-depth: 0 + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.ACRYL_DOCKER_USERNAME }} + password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} + - name: Build and Push image + uses: docker/build-push-action@v2 + with: + context: ./docker/datahub-ingestion + file: ./docker/datahub-ingestion/smoke.Dockerfile + platforms: linux/amd64,linux/arm64 + tags: acryldata/datahub-ingestion-base:smoke + push: true diff --git a/docker/datahub-ingestion/Dockerfile b/docker/datahub-ingestion/Dockerfile index 899919b2978c41..aaea8b04f38362 100644 --- a/docker/datahub-ingestion/Dockerfile +++ b/docker/datahub-ingestion/Dockerfile @@ -25,7 +25,8 @@ ARG RELEASE_VERSION RUN cd /datahub-ingestion && \ sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \ cat src/datahub/__init__.py && \ - pip install ".[all]" + pip install ".[all]" && \ + pip freeze FROM base as dev-install # Dummy stage for development. Assumes code is built on your machine and mounted to this image. diff --git a/docker/datahub-ingestion/base-requirements.txt b/docker/datahub-ingestion/base-requirements.txt index ea0fc24498e58c..788baab9de5338 100644 --- a/docker/datahub-ingestion/base-requirements.txt +++ b/docker/datahub-ingestion/base-requirements.txt @@ -1,3 +1,4 @@ +acryl-datahub absl-py==1.1.0 acryl-iceberg-legacy==0.0.4 acryl-PyHive==0.6.13 diff --git a/docker/datahub-ingestion/base.Dockerfile b/docker/datahub-ingestion/base.Dockerfile index e104b9b34f8adb..dfffa64ea3a090 100644 --- a/docker/datahub-ingestion/base.Dockerfile +++ b/docker/datahub-ingestion/base.Dockerfile @@ -12,4 +12,5 @@ RUN apt-get update && apt-get install -y \ COPY ./base-requirements.txt requirements.txt -RUN pip install -r requirements.txt \ No newline at end of file +RUN pip install -r requirements.txt && \ + pip uninstall -y acryl-datahub \ No newline at end of file diff --git a/docker/datahub-ingestion/smoke.Dockerfile b/docker/datahub-ingestion/smoke.Dockerfile new file mode 100644 index 00000000000000..3bfdc9ccd0d770 --- /dev/null +++ b/docker/datahub-ingestion/smoke.Dockerfile @@ -0,0 +1,18 @@ +FROM acryldata/datahub-ingestion-base as base + +RUN apt-get update && apt-get install -y \ + sudo \ + python3-dev \ + libgtk2.0-0 \ + libgtk-3-0 \ + libgbm-dev \ + libnotify-dev \ + libgconf-2-4 \ + libnss3 \ + libxss1 \ + libasound2 \ + libxtst6 \ + xauth \ + xvfb + +RUN DEBIAN_FRONTEND=noninteractive apt-get install -y openjdk-11-jdk \ No newline at end of file From ff0aa3f24b4496c6e4c26f95c332882ee296a55a Mon Sep 17 00:00:00 2001 From: Aezo <45879156+aezomz@users.noreply.github.com> Date: Wed, 13 Jul 2022 01:10:07 +0800 Subject: [PATCH 06/22] feat(sdk): python - add get_aspects_for_entity (#5255) Co-authored-by: Shirshanka Das --- .../library/dataset_query_entity_v2.py | 33 ++++++++++++ .../src/datahub/ingestion/graph/client.py | 53 +++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 metadata-ingestion/examples/library/dataset_query_entity_v2.py diff --git a/metadata-ingestion/examples/library/dataset_query_entity_v2.py b/metadata-ingestion/examples/library/dataset_query_entity_v2.py new file mode 100644 index 00000000000000..a8b8439079f22c --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_query_entity_v2.py @@ -0,0 +1,33 @@ +import logging + +from datahub.emitter.mce_builder import make_dataset_urn + +# read-modify-write requires access to the DataHubGraph (RestEmitter is not enough) +from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph + +# Imports for metadata model classes +from datahub.metadata.schema_classes import ( + DataPlatformInstanceClass, + DatasetKeyClass, + StatusClass, +) + +log = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + +dataset_urn = make_dataset_urn(platform="hive", name="realestate_db.sales", env="PROD") + +gms_endpoint = "http://localhost:8080" +graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint)) + +# Query multiple aspects from entity +result = graph.get_aspects_for_entity( + entity_urn=dataset_urn, + aspects=["status", "dataPlatformInstance", "datasetKey"], + aspect_types=[StatusClass, DataPlatformInstanceClass, DatasetKeyClass], +) + +# result are typed according to their class if exist +if result is not None: + if result["datasetKey"]: + log.info(result["datasetKey"].name) diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index 2e876bfa8d15bc..9b069f148f7f94 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -288,6 +288,59 @@ def get_latest_timeseries_value( ) return None + def get_aspects_for_entity( + self, + entity_urn: str, + aspects: List[str], + aspect_types: List[Type[Aspect]], + ) -> Optional[Dict[str, Optional[Aspect]]]: + """ + Get multiple aspects for an entity. To get a single aspect for an entity, use the `get_aspect_v2` method. + Warning: Do not use this method to determine if an entity exists! + This method will always return an entity, even if it doesn't exist. This is an issue with how DataHub server + responds to these calls, and will be fixed automatically when the server-side issue is fixed. + + :param str entity_urn: The urn of the entity + :param List[Type[Aspect]] aspect_type_list: List of aspect type classes being requested (e.g. [datahub.metadata.schema_classes.DatasetProperties]) + :param List[str] aspects_list: List of aspect names being requested (e.g. [schemaMetadata, datasetProperties]) + :return: Optionally, a map of aspect_name to aspect_value as a dictionary if present, aspect_value will be set to None if that aspect was not found. Returns None on HTTP status 404. + :rtype: Optional[Dict[str, Optional[Aspect]]] + :raises HttpError: if the HTTP response is not a 200 or a 404 + """ + assert len(aspects) == len( + aspect_types + ), f"number of aspects requested ({len(aspects)}) should be the same as number of aspect types provided ({len(aspect_types)})" + aspects_list = ",".join(aspects) + url: str = f"{self._gms_server}/entitiesV2/{Urn.url_encode(entity_urn)}?aspects=List({aspects_list})" + + response = self._session.get(url) + if response.status_code == 404: + # not found + return None + response.raise_for_status() + response_json = response.json() + + result: Dict[str, Optional[Aspect]] = {} + for aspect_type in aspect_types: + record_schema: RecordSchema = aspect_type.__getattribute__( + aspect_type, "RECORD_SCHEMA" + ) + if not record_schema: + logger.warning( + f"Failed to infer type name of the aspect from the aspect type class {aspect_type}. Continuing, but this will fail." + ) + else: + aspect_type_name = record_schema.props["Aspect"]["name"] + aspect_json = response_json.get("aspects", {}).get(aspect_type_name) + if aspect_json: + # need to apply a transform to the response to match rest.li and avro serialization + post_json_obj = post_json_transform(aspect_json) + result[aspect_type_name] = aspect_type.from_obj(post_json_obj["value"]) + else: + result[aspect_type_name] = None + + return result + def _get_search_endpoint(self): return f"{self.config.server}/entities?action=search" From f0281f32abe8ce3163fe6ab6427a03af192c0fd5 Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Tue, 12 Jul 2022 19:20:27 +0200 Subject: [PATCH 07/22] fix(airflow): fix for failing serialisation when Param was specified + support for external task sensor (#5368) fixes #4546 --- .../client/airflow_generator.py | 58 +++++++++++-------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/metadata-ingestion/src/datahub_provider/client/airflow_generator.py b/metadata-ingestion/src/datahub_provider/client/airflow_generator.py index b5c389d298969e..b7864ddb71ea60 100644 --- a/metadata-ingestion/src/datahub_provider/client/airflow_generator.py +++ b/metadata-ingestion/src/datahub_provider/client/airflow_generator.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Dict, List, Optional, Union, cast from airflow.configuration import conf @@ -87,6 +87,27 @@ def _get_dependencies( if subdag_task_id in upstream_task._downstream_task_ids: upstream_subdag_triggers.append(upstream_task_urn) + # If the operator is an ExternalTaskSensor then we set the remote task as upstream. + # It is possible to tie an external sensor to DAG if external_task_id is omitted but currently we can't tie + # jobflow to anothet jobflow. + external_task_upstreams = [] + if task.task_type == "ExternalTaskSensor": + from airflow.sensors.external_task_sensor import ExternalTaskSensor + + task = cast(ExternalTaskSensor, task) + if hasattr(task, "external_task_id") and task.external_task_id is not None: + external_task_upstreams = [ + DataJobUrn.create_from_ids( + job_id=task.external_task_id, + data_flow_urn=str( + DataFlowUrn.create_from_ids( + orchestrator=flow_urn.get_orchestrator_name(), + flow_id=task.external_dag_id, + env=flow_urn.get_env(), + ) + ), + ) + ] # exclude subdag operator tasks since these are not emitted, resulting in empty metadata upstream_tasks = ( [ @@ -96,6 +117,7 @@ def _get_dependencies( ] + upstream_subdag_task_urns + upstream_subdag_triggers + + external_task_upstreams ) return upstream_tasks @@ -114,8 +136,6 @@ def generate_dataflow( :param capture_owner: :return: DataFlow - Data generated dataflow """ - from airflow.serialization.serialized_objects import SerializedDAG - id = dag.dag_id orchestrator = "airflow" description = f"{dag.description}\n\n{dag.doc_md or ''}" @@ -123,13 +143,7 @@ def generate_dataflow( cluster=cluster, id=id, orchestrator=orchestrator, description=description ) - flow_property_bag: Dict[str, str] = { - key: repr(value) - for (key, value) in SerializedDAG.serialize_dag(dag).items() - } - for key in dag.get_serialized_fields(): - if key not in flow_property_bag: - flow_property_bag[key] = repr(getattr(dag, key)) + flow_property_bag: Dict[str, str] = {} allowed_flow_keys = [ "_access_control", @@ -142,9 +156,10 @@ def generate_dataflow( "tags", "timezone", ] - flow_property_bag = { - k: v for (k, v) in flow_property_bag.items() if k in allowed_flow_keys - } + + for key in allowed_flow_keys: + if hasattr(dag, key): + flow_property_bag[key] = repr(getattr(dag, key)) data_flow.properties = flow_property_bag base_url = conf.get("webserver", "base_url") @@ -191,21 +206,13 @@ def generate_datajob( :param capture_tags: bool - whether to set tags automatically from airflow task :return: DataJob - returns the generated DataJob object """ - from airflow.serialization.serialized_objects import SerializedBaseOperator - dataflow_urn = DataFlowUrn.create_from_ids( orchestrator="airflow", env=cluster, flow_id=dag.dag_id ) datajob = DataJob(id=task.task_id, flow_urn=dataflow_urn) datajob.description = AirflowGenerator._get_description(task) - job_property_bag: Dict[str, str] = { - key: repr(value) - for (key, value) in SerializedBaseOperator.serialize_operator(task).items() - } - for key in task.get_serialized_fields(): - if key not in job_property_bag: - job_property_bag[key] = repr(getattr(task, key)) + job_property_bag: Dict[str, str] = {} allowed_task_keys = [ "_downstream_task_ids", @@ -223,9 +230,10 @@ def generate_datajob( "trigger_rule", "wait_for_downstream", ] - job_property_bag = { - k: v for (k, v) in job_property_bag.items() if k in allowed_task_keys - } + + for key in allowed_task_keys: + if hasattr(task, key): + job_property_bag[key] = repr(getattr(task, key)) datajob.properties = job_property_bag base_url = conf.get("webserver", "base_url") From e2a0fddcaa8c30babd6be07a1941793a775f5e19 Mon Sep 17 00:00:00 2001 From: Aditya Radhakrishnan Date: Tue, 12 Jul 2022 10:20:35 -0700 Subject: [PATCH 08/22] fix(users): fix to not get invite token unless the invite token modal is visible (#5380) --- .../src/app/identity/user/ViewInviteTokenModal.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datahub-web-react/src/app/identity/user/ViewInviteTokenModal.tsx b/datahub-web-react/src/app/identity/user/ViewInviteTokenModal.tsx index a353af4677f026..d8d2d9ccc38ecb 100644 --- a/datahub-web-react/src/app/identity/user/ViewInviteTokenModal.tsx +++ b/datahub-web-react/src/app/identity/user/ViewInviteTokenModal.tsx @@ -42,7 +42,7 @@ type Props = { export default function ViewInviteTokenModal({ visible, onClose }: Props) { const baseUrl = window.location.origin; - const { data: getNativeUserInviteTokenData } = useGetNativeUserInviteTokenQuery({}); + const { data: getNativeUserInviteTokenData } = useGetNativeUserInviteTokenQuery({ skip: !visible }); const [createNativeUserInviteToken, { data: createNativeUserInviteTokenData }] = useCreateNativeUserInviteTokenMutation({}); From 5011c2f6db565c95eee9f1bb0c90528f104aebc3 Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Wed, 13 Jul 2022 00:02:28 +0100 Subject: [PATCH 09/22] fix(gms) Propagate cache exception upstream (#5381) --- .../datahub/authentication/token/StatefulTokenService.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java index b35ad32ee88abf..19efd4f6139f5d 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java @@ -161,7 +161,7 @@ public TokenClaims validateAccessToken(@Nonnull String accessToken) throws Token this.revokeAccessToken(hash(accessToken)); throw e; } catch (final ExecutionException e) { - throw new TokenException("Failed to validate DataHub token: Unable to load token information from store"); + throw new TokenException("Failed to validate DataHub token: Unable to load token information from store", e); } } @@ -174,7 +174,7 @@ public void revokeAccessToken(@Nonnull String hashedToken) throws TokenException return; } } catch (ExecutionException e) { - throw new TokenException("Failed to validate DataHub token from cache"); + throw new TokenException("Failed to validate DataHub token from cache", e); } throw new TokenException("Access token no longer exists"); } From 3a2fec341aa2dac2d59eb51290a4f3e5756ca092 Mon Sep 17 00:00:00 2001 From: Aditya Radhakrishnan Date: Tue, 12 Jul 2022 17:23:41 -0700 Subject: [PATCH 10/22] fix(bootstrap): skip ingesting data platforms that already exist (#5382) --- .../boot/steps/IngestDataPlatformsStep.java | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformsStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformsStep.java index 56910cf24baea0..7d460419adf7b2 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformsStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformsStep.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.boot.steps; +import com.datahub.util.RecordUtils; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.common.AuditStamp; @@ -7,12 +8,9 @@ import com.linkedin.dataplatform.DataPlatformInfo; import com.linkedin.metadata.Constants; import com.linkedin.metadata.boot.BootstrapStep; -import com.datahub.util.RecordUtils; import com.linkedin.metadata.entity.EntityService; - import java.io.IOException; import java.net.URISyntaxException; - import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.core.io.ClassPathResource; @@ -46,13 +44,24 @@ public void execute() throws IOException, URISyntaxException { // 2. For each JSON object, cast into a DataPlatformSnapshot object. for (final JsonNode dataPlatform : dataPlatforms) { + final String urnString; final Urn urn; try { - urn = Urn.createFromString(dataPlatform.get("urn").asText()); + urnString = dataPlatform.get("urn").asText(); + urn = Urn.createFromString(urnString); } catch (URISyntaxException e) { log.error("Malformed urn: {}", dataPlatform.get("urn").asText()); throw new RuntimeException("Malformed urn", e); } + + final DataPlatformInfo existingInfo = + (DataPlatformInfo) _entityService.getLatestAspect(urn, PLATFORM_ASPECT_NAME); + // Skip ingesting for this JSON object if info already exists. + if (existingInfo != null) { + log.debug(String.format("%s already exists for %s. Skipping...", PLATFORM_ASPECT_NAME, urnString)); + continue; + } + final DataPlatformInfo info = RecordUtils.toRecordTemplate(DataPlatformInfo.class, dataPlatform.get("aspect").toString()); From 4c6d42780005bf9da313d57c05b3a8e140f3799f Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Wed, 13 Jul 2022 11:24:18 +0200 Subject: [PATCH 11/22] fix(cli): respect server telemetry settings correctly (#5384) Co-authored-by: Shirshanka Das --- metadata-ingestion/src/datahub/telemetry/telemetry.py | 10 +++++----- .../src/datahub/utilities/server_config_util.py | 7 ++++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/metadata-ingestion/src/datahub/telemetry/telemetry.py b/metadata-ingestion/src/datahub/telemetry/telemetry.py index 0a346d09373850..c7c0e8e51526f5 100644 --- a/metadata-ingestion/src/datahub/telemetry/telemetry.py +++ b/metadata-ingestion/src/datahub/telemetry/telemetry.py @@ -262,11 +262,11 @@ def _server_props(self, server: Optional[DataHubGraph]) -> Dict[str, str]: T = TypeVar("T") -def set_telemetry_enable(enable: bool) -> Any: - telemetry_instance.enabled = enable - if not enable: - logger.info("Disabling Telemetry locally due to server config") - telemetry_instance.update_config() +def suppress_telemetry() -> Any: + """disables telemetry for this invocation, doesn't affect persistent client settings""" + if telemetry_instance.enabled: + logger.debug("Disabling telemetry locally due to server config") + telemetry_instance.enabled = False def get_full_class_name(obj): diff --git a/metadata-ingestion/src/datahub/utilities/server_config_util.py b/metadata-ingestion/src/datahub/utilities/server_config_util.py index 40841321ad2778..1b8c05b6091347 100644 --- a/metadata-ingestion/src/datahub/utilities/server_config_util.py +++ b/metadata-ingestion/src/datahub/utilities/server_config_util.py @@ -1,6 +1,6 @@ from typing import Any, Dict, Optional -from datahub.telemetry.telemetry import set_telemetry_enable +from datahub.telemetry.telemetry import suppress_telemetry # Only to be written to for logging server related information global_debug: Dict[str, Any] = {} @@ -10,8 +10,9 @@ def set_gms_config(config: Dict) -> Any: global_debug["gms_config"] = config cli_telemtry_enabled = is_cli_telemetry_enabled() - if cli_telemtry_enabled is not None: - set_telemetry_enable(cli_telemtry_enabled) + if cli_telemtry_enabled is not None and not cli_telemtry_enabled: + # server requires telemetry to be disabled on client + suppress_telemetry() def get_gms_config() -> Dict: From 9ec4fbae866f15a515ee7702e46905b49ef3246a Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Wed, 13 Jul 2022 13:21:45 +0200 Subject: [PATCH 12/22] fix(ingest): bigquery - Graceful bq partition id date parsing failure (#5386) --- .../datahub/ingestion/source/sql/bigquery.py | 26 ++++++++++++++----- .../ingestion/source/sql/sql_common.py | 3 ++- .../ingestion/source_report/sql/bigquery.py | 1 + 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py index 947f9b92718471..fcb4d63477f7b4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py @@ -868,17 +868,29 @@ def generate_partition_profiler_query( partitioned table. See more about partitioned tables at https://cloud.google.com/bigquery/docs/partitioned-tables """ - + logger.debug( + f"generate partition profiler query for schema: {schema} and table {table}, partition_datetime: {partition_datetime}" + ) partition = self.get_latest_partition(schema, table) if partition: partition_where_clause: str logger.debug(f"{table} is partitioned and partition column is {partition}") - ( - partition_datetime, - upper_bound_partition_datetime, - ) = get_partition_range_from_partition_id( - partition.partition_id, partition_datetime - ) + try: + ( + partition_datetime, + upper_bound_partition_datetime, + ) = get_partition_range_from_partition_id( + partition.partition_id, partition_datetime + ) + except ValueError as e: + logger.error( + f"Unable to get partition range for partition id: {partition.partition_id} it failed with exception {e}" + ) + self.report.invalid_partition_ids[ + f"{schema}.{table}" + ] = partition.partition_id + return None, None + if partition.data_type in ("TIMESTAMP", "DATETIME"): partition_where_clause = "{column_name} BETWEEN '{partition_id}' AND '{upper_bound_partition_id}'".format( column_name=partition.column_name, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 56b82adbbab156..3a5d91b101ca07 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -1442,7 +1442,8 @@ def loop_profiler_requests( database=None, schema=schema, table=table ): self.report.report_warning( - "profile skipped as partitioned table empty", dataset_name + "profile skipped as partitioned table is empty or partition id was invalid", + dataset_name, ) continue diff --git a/metadata-ingestion/src/datahub/ingestion/source_report/sql/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source_report/sql/bigquery.py index e8577164640b95..066c0ee0c080d7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source_report/sql/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source_report/sql/bigquery.py @@ -36,3 +36,4 @@ class BigQueryReport(SQLSourceReport): table_metadata: Dict[str, List[str]] = field(default_factory=dict) profile_table_selection_criteria: Dict[str, str] = field(default_factory=dict) selected_profile_tables: Dict[str, List[str]] = field(default_factory=dict) + invalid_partition_ids: Dict[str, str] = field(default_factory=dict) From 433424895319b5adcfa13affe0614553da0e7a5c Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Wed, 13 Jul 2022 19:17:38 +0200 Subject: [PATCH 13/22] feat(airflow): Circuit breaker and python api for Assertion and Operation (#5196) --- .../ecommerce/01_snowflake_load.py | 38 +++ .../01-operation/marketing/01_send_emails.py | 36 +++ .../ecommerce/02_snowflake_load.py | 42 +++ .../02-assertion/marketing/02_send_emails.py | 52 ++++ metadata-ingestion/setup.py | 5 + .../datahub/api/circuit_breaker/__init__.py | 8 + .../assertion_circuit_breaker.py | 137 ++++++++++ .../api/circuit_breaker/circuit_breaker.py | 50 ++++ .../operation_circuit_breaker.py | 81 ++++++ .../src/datahub/api/graphql/__init__.py | 2 + .../src/datahub/api/graphql/assertion.py | 91 +++++++ .../src/datahub/api/graphql/base.py | 52 ++++ .../src/datahub/api/graphql/operation.py | 140 ++++++++++ .../operators/datahub_assertion_operator.py | 78 ++++++ .../operators/datahub_assertion_sensor.py | 78 ++++++ .../operators/datahub_operation_operator.py | 98 +++++++ .../operators/datahub_operation_sensor.py | 100 +++++++ .../integration/circuit_breaker/__init__.py | 0 .../assertion_gql_empty_response.json | 9 + .../assertion_gql_response.json | 244 ++++++++++++++++++ .../assertion_gql_response_with_no_error.json | 198 ++++++++++++++ .../operation_gql_empty_response.json | 6 + .../operation_gql_response.json | 79 ++++++ .../circuit_breaker/test_circuit_breaker.py | 155 +++++++++++ 24 files changed, 1779 insertions(+) create mode 100644 metadata-ingestion/examples/airflow/circuit_breaker/long_tail_companion/01-operation/ecommerce/01_snowflake_load.py create mode 100644 metadata-ingestion/examples/airflow/circuit_breaker/long_tail_companion/01-operation/marketing/01_send_emails.py create mode 100644 metadata-ingestion/examples/airflow/circuit_breaker/long_tail_companion/02-assertion/ecommerce/02_snowflake_load.py create mode 100644 metadata-ingestion/examples/airflow/circuit_breaker/long_tail_companion/02-assertion/marketing/02_send_emails.py create mode 100644 metadata-ingestion/src/datahub/api/circuit_breaker/__init__.py create mode 100644 metadata-ingestion/src/datahub/api/circuit_breaker/assertion_circuit_breaker.py create mode 100644 metadata-ingestion/src/datahub/api/circuit_breaker/circuit_breaker.py create mode 100644 metadata-ingestion/src/datahub/api/circuit_breaker/operation_circuit_breaker.py create mode 100644 metadata-ingestion/src/datahub/api/graphql/__init__.py create mode 100644 metadata-ingestion/src/datahub/api/graphql/assertion.py create mode 100644 metadata-ingestion/src/datahub/api/graphql/base.py create mode 100644 metadata-ingestion/src/datahub/api/graphql/operation.py create mode 100644 metadata-ingestion/src/datahub_provider/operators/datahub_assertion_operator.py create mode 100644 metadata-ingestion/src/datahub_provider/operators/datahub_assertion_sensor.py create mode 100644 metadata-ingestion/src/datahub_provider/operators/datahub_operation_operator.py create mode 100644 metadata-ingestion/src/datahub_provider/operators/datahub_operation_sensor.py create mode 100644 metadata-ingestion/tests/integration/circuit_breaker/__init__.py create mode 100644 metadata-ingestion/tests/integration/circuit_breaker/assertion_gql_empty_response.json create mode 100644 metadata-ingestion/tests/integration/circuit_breaker/assertion_gql_response.json create mode 100644 metadata-ingestion/tests/integration/circuit_breaker/assertion_gql_response_with_no_error.json create mode 100644 metadata-ingestion/tests/integration/circuit_breaker/operation_gql_empty_response.json create mode 100644 metadata-ingestion/tests/integration/circuit_breaker/operation_gql_response.json create mode 100644 metadata-ingestion/tests/integration/circuit_breaker/test_circuit_breaker.py diff --git a/metadata-ingestion/examples/airflow/circuit_breaker/long_tail_companion/01-operation/ecommerce/01_snowflake_load.py b/metadata-ingestion/examples/airflow/circuit_breaker/long_tail_companion/01-operation/ecommerce/01_snowflake_load.py new file mode 100644 index 00000000000000..582002f8d80f16 --- /dev/null +++ b/metadata-ingestion/examples/airflow/circuit_breaker/long_tail_companion/01-operation/ecommerce/01_snowflake_load.py @@ -0,0 +1,38 @@ +import pendulum +from airflow.models import DAG +from airflow.operators.bash import BashOperator + +from datahub.api.graphql.operation import Operation +from datahub_provider.entities import Dataset +from datahub_provider.hooks.datahub import DatahubRestHook + +dag = DAG( + dag_id="snowflake_load", + start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), + schedule_interval="0 0 * * *", + catchup=False, +) + + +# Operation push +# The number of rows is hardcoded in this example but this shouldn't in normal operation +def report_operation(context): + hook: DatahubRestHook = DatahubRestHook("datahub_longtail") + host, password, timeout_sec = hook._get_config() + reporter = Operation(datahub_host=host, datahub_token=password, timeout=timeout_sec) + task = context["ti"].task + for outlet in task._outlets: + print(f"Reporting insert operation for {outlet.urn}") + reporter.report_operation( + urn=outlet.urn, operation_type="INSERT", num_affected_rows=123 + ) + + +pet_profiles_load = BashOperator( + task_id="load_s3_adoption_pet_profiles", + dag=dag, + inlets=[Dataset("s3", "longtail-core-data/mongo/adoption/pet_profiles")], + outlets=[Dataset("snowflake", "long_tail_companions.adoption.pet_profiles")], + bash_command="echo Dummy Task", + on_success_callback=report_operation, +) diff --git a/metadata-ingestion/examples/airflow/circuit_breaker/long_tail_companion/01-operation/marketing/01_send_emails.py b/metadata-ingestion/examples/airflow/circuit_breaker/long_tail_companion/01-operation/marketing/01_send_emails.py new file mode 100644 index 00000000000000..62bf8962bfa721 --- /dev/null +++ b/metadata-ingestion/examples/airflow/circuit_breaker/long_tail_companion/01-operation/marketing/01_send_emails.py @@ -0,0 +1,36 @@ +import datetime + +import pendulum +from airflow.models import DAG +from airflow.operators.bash import BashOperator + +from datahub_provider.entities import Dataset +from datahub_provider.operators.datahub_operation_sensor import ( + DataHubOperationCircuitBreakerSensor, +) + +dag = DAG( + dag_id="marketing-send_emails", + start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), + schedule_interval="0 0 * * *", + catchup=False, +) + +# New DataHub Operation Circuit Breaker Sensor +pet_profiles_operation_sensor = DataHubOperationCircuitBreakerSensor( + task_id="pet_profiles_operation_sensor", + datahub_rest_conn_id="datahub_longtail", + urn=[ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.pet_profiles,PROD)" + ], + time_delta=datetime.timedelta(minutes=10), +) + +send_email = BashOperator( + task_id="send_emails", + dag=dag, + inlets=[Dataset("snowflake", "long_tail_companions.adoption.pet_profiles")], + bash_command="echo Dummy Task", +) + +pet_profiles_operation_sensor.set_downstream(send_email) diff --git a/metadata-ingestion/examples/airflow/circuit_breaker/long_tail_companion/02-assertion/ecommerce/02_snowflake_load.py b/metadata-ingestion/examples/airflow/circuit_breaker/long_tail_companion/02-assertion/ecommerce/02_snowflake_load.py new file mode 100644 index 00000000000000..1adcee76178a45 --- /dev/null +++ b/metadata-ingestion/examples/airflow/circuit_breaker/long_tail_companion/02-assertion/ecommerce/02_snowflake_load.py @@ -0,0 +1,42 @@ +import pendulum +from airflow.models import DAG +from airflow.operators.bash import BashOperator + +from datahub.api.graphql.operation import Operation +from datahub_provider.entities import Dataset +from datahub_provider.hooks.datahub import DatahubRestHook + +dag = DAG( + dag_id="snowflake_load", + start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), + schedule_interval="0 0 * * *", + catchup=False, +) + + +def report_operation(context): + hook: DatahubRestHook = DatahubRestHook("datahub_longtail") + host, password, timeout_sec = hook._get_config() + reporter = Operation(datahub_host=host, datahub_token=password, timeout=timeout_sec) + task = context["ti"].task + for inlet in task._outlets: + reporter.report_operation(urn=inlet.urn, operation_type="INSERT") + + +pet_profiles_load = BashOperator( + task_id="load_s3_adoption_pet_profiles", + dag=dag, + inlets=[Dataset("s3", "longtail-core-data/mongo/adoption/pet_profiles")], + outlets=[Dataset("snowflake", "long_tail_companions.adoption.pet_profiles")], + bash_command="echo Dummy Task", + on_success_callback=report_operation, +) + +# Simple bash command as example to load great expectation tests +run_ge_tests = BashOperator( + task_id="pet_profiles_ge_tests_run", + inlets=[Dataset("snowflake", "long_tail_companions.adoption.pet_profiles")], + bash_command="echo /usr/local/airflow/.local/bin/great_expectations checkpoint run pet_profiles", +) + +pet_profiles_load.set_downstream(run_ge_tests) diff --git a/metadata-ingestion/examples/airflow/circuit_breaker/long_tail_companion/02-assertion/marketing/02_send_emails.py b/metadata-ingestion/examples/airflow/circuit_breaker/long_tail_companion/02-assertion/marketing/02_send_emails.py new file mode 100644 index 00000000000000..8f6464206e62a9 --- /dev/null +++ b/metadata-ingestion/examples/airflow/circuit_breaker/long_tail_companion/02-assertion/marketing/02_send_emails.py @@ -0,0 +1,52 @@ +import datetime + +import pendulum +from airflow.models import DAG +from airflow.operators.bash import BashOperator + +from datahub_provider.entities import Dataset +from datahub_provider.operators.datahub_assertion_operator import ( + DataHubAssertionOperator, +) +from datahub_provider.operators.datahub_operation_sensor import ( + DataHubOperationCircuitBreakerSensor, +) + +dag = DAG( + dag_id="marketing-send_emails", + start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), + schedule_interval="0 0 * * *", + catchup=False, +) + +items_operation_sensor = DataHubOperationCircuitBreakerSensor( + dag=dag, + task_id="pet_profiles_operation_sensor", + datahub_rest_conn_id="datahub_longtail", + urn=[ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.pet_profiles,PROD)" + ], + time_delta=datetime.timedelta(days=1), +) + +# Assertion circuit breaker to check if there are assertions for the urns specified. +# verify_after_last_update is enabled which means it will get from the latest operation the timeframe +# it accepts assertions. +assertion_circuit_breaker = DataHubAssertionOperator( + task_id="pet_profiles_assertion_circuit_breaker", + datahub_rest_conn_id="datahub_longtail", + urn=[ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.adoption.pet_profiles,PROD)" + ], + check_last_assertion_time=True, +) + +send_email = BashOperator( + task_id="send_emails", + dag=dag, + inlets=[Dataset("snowflake", "long_tail_companions.adoption.pet_profiles")], + bash_command="echo Dummy Task", +) + +items_operation_sensor.set_downstream(assertion_circuit_breaker) +assertion_circuit_breaker.set_downstream(send_email) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 929208eaa8891e..6467380cb7f5d6 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -192,6 +192,10 @@ def get_long_description(): "airflow": { "apache-airflow >= 1.10.2", }, + "circuit-breaker": { + "gql>=3.3.0", + "gql[requests]>=3.3.0", + }, "great-expectations": sql_common | {"sqllineage==1.3.5"}, # Source plugins # PyAthena is pinned with exact version because we use private method in PyAthena @@ -422,6 +426,7 @@ def get_long_description(): *list( dependency for plugin in [ + "circuit-breaker", "clickhouse", "druid", "feast-legacy", diff --git a/metadata-ingestion/src/datahub/api/circuit_breaker/__init__.py b/metadata-ingestion/src/datahub/api/circuit_breaker/__init__.py new file mode 100644 index 00000000000000..4dcf40454736b9 --- /dev/null +++ b/metadata-ingestion/src/datahub/api/circuit_breaker/__init__.py @@ -0,0 +1,8 @@ +from datahub.api.circuit_breaker.assertion_circuit_breaker import ( + AssertionCircuitBreaker, + AssertionCircuitBreakerConfig, +) +from datahub.api.circuit_breaker.operation_circuit_breaker import ( + OperationCircuitBreaker, + OperationCircuitBreakerConfig, +) diff --git a/metadata-ingestion/src/datahub/api/circuit_breaker/assertion_circuit_breaker.py b/metadata-ingestion/src/datahub/api/circuit_breaker/assertion_circuit_breaker.py new file mode 100644 index 00000000000000..67a5b3630a455f --- /dev/null +++ b/metadata-ingestion/src/datahub/api/circuit_breaker/assertion_circuit_breaker.py @@ -0,0 +1,137 @@ +import logging +from dataclasses import dataclass +from datetime import datetime, timedelta +from typing import Any, Dict, List, Optional + +from pydantic import Field + +from datahub.api.circuit_breaker.circuit_breaker import ( + AbstractCircuitBreaker, + CircuitBreakerConfig, +) +from datahub.api.graphql import Assertion, Operation + +logger: logging.Logger = logging.getLogger(__name__) + + +class AssertionCircuitBreakerConfig(CircuitBreakerConfig): + verify_after_last_update: bool = Field( + default=True, + description="Whether to check if assertion happened after the dataset was last updated.", + ) + time_delta: timedelta = Field( + default=(timedelta(days=1)), + description="In what timeframe should accept an assertion result if updated field is not available for the dataset", + ) + + +class AssertionCircuitBreaker(AbstractCircuitBreaker): + r""" + DataHub Assertion Circuit Breaker + + The circuit breaker checks if there are passing assertion on the Dataset. + """ + config: AssertionCircuitBreakerConfig + + def __init__(self, config: AssertionCircuitBreakerConfig): + super().__init__(config.datahub_host, config.datahub_token, config.timeout) + self.config = config + self.assertion_api = Assertion( + datahub_host=config.datahub_host, + datahub_token=config.datahub_token, + timeout=config.timeout, + ) + + def get_last_updated(self, urn: str) -> Optional[datetime]: + operation_api: Operation = Operation(transport=self.assertion_api.transport) + operations = operation_api.query_operations(urn=urn) + if not operations: + return None + else: + return datetime.fromtimestamp(operations[0]["lastUpdatedTimestamp"] / 1000) + + def _check_if_assertion_failed( + self, assertions: List[Dict[str, Any]], last_updated: Optional[datetime] = None + ) -> bool: + @dataclass + class AssertionResult: + time: int + state: str + run_event: Any + + # If last_updated is set we expect to have at least one successfull assertion + if not assertions and last_updated: + return True + + result: bool = True + assertion_last_states: Dict[str, AssertionResult] = {} + for assertion in assertions: + if "runEvents" in assertion and "runEvents" in assertion["runEvents"]: + for run_event in assertion["runEvents"]["runEvents"]: + assertion_time = run_event["timestampMillis"] + assertion_state = run_event["result"]["type"] + assertion_urn = run_event["assertionUrn"] + if ( + assertion_urn not in assertion_last_states + or assertion_last_states[assertion_urn].time < assertion_time + ): + assertion_last_states[assertion_urn] = AssertionResult( + time=assertion_time, + state=assertion_state, + run_event=run_event, + ) + + for assertion_urn, last_assertion in assertion_last_states.items(): + if last_assertion.state == "FAILURE": + logger.debug(f"Runevent: {last_assertion.run_event}") + logger.info( + f"Assertion {assertion_urn} is failed on dataset. Breaking the circuit" + ) + return True + elif last_assertion.state == "SUCCESS": + logger.info(f"Found successful assertion: {assertion_urn}") + result = False + if last_updated is not None: + last_run = datetime.fromtimestamp(last_assertion.time / 1000) + if last_updated > last_run: + logger.error( + f"Missing assertion run for {assertion_urn}. The dataset was updated on {last_updated} but the last assertion run was at {last_run}" + ) + return True + return result + + def is_circuit_breaker_active(self, urn: str) -> bool: + r""" + Checks if the circuit breaker is active + + :param urn: The DataHub dataset unique identifier. + """ + + last_updated: Optional[datetime] = None + + if self.config.verify_after_last_update: + last_updated = self.get_last_updated(urn) + logger.info( + f"The dataset {urn} was last updated at {last_updated}, using this as min assertion date." + ) + + if not last_updated: + last_updated = datetime.now() - self.config.time_delta + logger.info( + f"Dataset {urn} doesn't have last updated or check_last_assertion_time is false, using calculated min assertion date {last_updated}" + ) + + assertions = self.assertion_api.query_assertion( + urn, + start_time_millis=int(last_updated.timestamp() * 1000), + status="COMPLETE", + ) + + if self._check_if_assertion_failed( + assertions, + last_updated if self.config.verify_after_last_update is True else None, + ): + logger.info(f"Dataset {urn} has failed or missing assertion(s).") + return True + + return False diff --git a/metadata-ingestion/src/datahub/api/circuit_breaker/circuit_breaker.py b/metadata-ingestion/src/datahub/api/circuit_breaker/circuit_breaker.py new file mode 100644 index 00000000000000..f8554334281d85 --- /dev/null +++ b/metadata-ingestion/src/datahub/api/circuit_breaker/circuit_breaker.py @@ -0,0 +1,50 @@ +import logging +from abc import abstractmethod +from typing import Optional + +from gql import Client +from gql.transport.requests import RequestsHTTPTransport +from pydantic import Field + +from datahub.configuration import ConfigModel + +logger = logging.getLogger(__name__) + + +class CircuitBreakerConfig(ConfigModel): + datahub_host: str = Field(description="Url of the DataHub instance") + datahub_token: Optional[str] = Field(default=None, description="The datahub token") + timeout: Optional[int] = Field( + default=None, + description="The number of seconds to wait for your client to establish a connection to a remote machine", + ) + + +class AbstractCircuitBreaker: + client: Client + + def __init__( + self, + datahub_host: str, + datahub_token: Optional[str] = None, + timeout: Optional[int] = None, + ): + # logging.basicConfig(level=logging.DEBUG) + + # Select your transport with a defined url endpoint + self.transport = RequestsHTTPTransport( + url=datahub_host + "/api/graphql", + headers={"Authorization": "Bearer " + datahub_token} + if datahub_token is not None + else None, + method="POST", + timeout=timeout, + ) + self.client = Client( + transport=self.transport, + fetch_schema_from_transport=True, + ) + + @abstractmethod + def is_circuit_breaker_active(self, urn: str) -> bool: + pass diff --git a/metadata-ingestion/src/datahub/api/circuit_breaker/operation_circuit_breaker.py b/metadata-ingestion/src/datahub/api/circuit_breaker/operation_circuit_breaker.py new file mode 100644 index 00000000000000..58a4ee37d959b6 --- /dev/null +++ b/metadata-ingestion/src/datahub/api/circuit_breaker/operation_circuit_breaker.py @@ -0,0 +1,81 @@ +import logging +from datetime import datetime, timedelta +from typing import Optional + +from pydantic import Field + +from datahub.api.circuit_breaker.circuit_breaker import ( + AbstractCircuitBreaker, + CircuitBreakerConfig, +) +from datahub.api.graphql import Operation + +logger: logging.Logger = logging.getLogger(__name__) + + +class OperationCircuitBreakerConfig(CircuitBreakerConfig): + time_delta: timedelta = Field( + default=(timedelta(days=1)), + description="In what timeframe should accept an operation result if updated field is not available for the dataset", + ) + + +class OperationCircuitBreaker(AbstractCircuitBreaker): + r""" + DataHub Operation Circuit Breaker + + The circuit breaker checks if there is an operation metadata for the dataset. + If there is no valid Operation metadata then the circuit breaker fails. + """ + + config: OperationCircuitBreakerConfig + operation_api: Operation + + def __init__(self, config: OperationCircuitBreakerConfig): + super().__init__(config.datahub_host, config.datahub_token, config.timeout) + self.config = config + self.operation_api = Operation( + datahub_host=config.datahub_host, + datahub_token=config.datahub_token, + timeout=config.timeout, + ) + + def is_circuit_breaker_active( + self, + urn: str, + partition: Optional[str] = None, + source_type: Optional[str] = None, + operation_type: Optional[str] = None, + ) -> bool: + r""" + Checks if the circuit breaker is active + + :param urn: The Datahub dataset unique identifier. + :param datahub_rest_conn_id: The REST DataHub connection id to communicate with DataHub + which is set as Airflow connection. + :param partition: The partition to check the operation. + :param source_type: The source type to filter on. If not set it will accept any source type. + See valid types here: https://datahubproject.io/docs/graphql/enums#operationsourcetype + :param operation_type: The operation type to filter on. If not set it will accept any source type. + See valid types here: https://datahubproject.io/docs/graphql/enums/#operationtype + """ + + start_time_millis: int = int( + (datetime.now() - self.config.time_delta).timestamp() * 1000 + ) + operations = self.operation_api.query_operations( + urn, + start_time_millis=start_time_millis, + partition=partition, + source_type=source_type, + operation_type=operation_type, + ) + logger.info(f"Operations: {operations}") + for operation in operations: + if ( + operation.get("lastUpdatedTimestamp") + and operation["lastUpdatedTimestamp"] >= start_time_millis + ): + return False + + return True diff --git a/metadata-ingestion/src/datahub/api/graphql/__init__.py b/metadata-ingestion/src/datahub/api/graphql/__init__.py new file mode 100644 index 00000000000000..e8c8d22bbb93df --- /dev/null +++ b/metadata-ingestion/src/datahub/api/graphql/__init__.py @@ -0,0 +1,2 @@ +from datahub.api.graphql.assertion import Assertion +from datahub.api.graphql.operation import Operation diff --git a/metadata-ingestion/src/datahub/api/graphql/assertion.py b/metadata-ingestion/src/datahub/api/graphql/assertion.py new file mode 100644 index 00000000000000..eb342e830e9969 --- /dev/null +++ b/metadata-ingestion/src/datahub/api/graphql/assertion.py @@ -0,0 +1,91 @@ +import logging +from typing import Any, Dict, List, Optional + +from gql import gql + +from datahub.api.graphql.base import BaseApi + +logger = logging.getLogger(__name__) + + +class Assertion(BaseApi): + + ASSERTION_QUERY = """ +query dataset($urn: String!, $start: Int, $count: Int, $status: AssertionRunStatus,$limit: Int, $startTimeMillis:Long, $endTimeMillis:Long, $filter:FilterInput) { + dataset(urn: $urn) { + assertions(start: $start, count: $count){ + __typename + total + assertions{ + __typename + runEvents(status: $status, limit: $limit, startTimeMillis: $startTimeMillis, endTimeMillis: $endTimeMillis, filter: $filter) { + total + failed + succeeded + runEvents { + __typename + timestampMillis + partitionSpec { + __typename + type + partition + timePartition { + startTimeMillis + durationMillis + } + } + result { + __typename + type + rowCount + missingCount + unexpectedCount + actualAggValue + externalUrl + } + assertionUrn + } + } + } + } + } +} +""" + + def query_assertion( + self, + urn: str, + status: Optional[str] = None, + start_time_millis: Optional[int] = None, + end_time_millis: Optional[int] = None, + limit: Optional[int] = None, + filter: Optional[Dict[str, Optional[str]]] = None, + ) -> List[Dict[Any, Any]]: + r""" + Query assertions for a dataset. + + :param urn: The DataHub dataset unique identifier. + :param status: The assertion status to filter for. Every status will be accepted if it is not set. + See valid status at https://datahubproject.io/docs/graphql/enums#assertionrunstatus + :param start_time_millis: The start time in milliseconds from the assertions will be queried. + :param end_time_millis: The end time in milliseconds until the assertions will be queried. + :param filter: Additional key value filters which will be applied as AND query + """ + + result = self.client.execute( + gql(Assertion.ASSERTION_QUERY), + variable_values={ + "urn": urn, + "filter": self.gen_filter(filter) if filter else None, + "limit": limit, + "status": status, + "startTimeMillis": start_time_millis, + "endTimeMillis": end_time_millis, + }, + ) + + assertions = [] + if "dataset" in result and "assertions" in result["dataset"]: + assertions = result["dataset"]["assertions"]["assertions"] + + return assertions diff --git a/metadata-ingestion/src/datahub/api/graphql/base.py b/metadata-ingestion/src/datahub/api/graphql/base.py new file mode 100644 index 00000000000000..3654bd38816996 --- /dev/null +++ b/metadata-ingestion/src/datahub/api/graphql/base.py @@ -0,0 +1,52 @@ +from typing import Dict, List, Optional + +from gql import Client +from gql.transport.requests import RequestsHTTPTransport + + +class BaseApi: + client: Client + + def __init__( + self, + datahub_host: Optional[str] = None, + datahub_token: Optional[str] = None, + timeout: Optional[int] = None, + transport: Optional[RequestsHTTPTransport] = None, + ): + # logging.basicConfig(level=logging.DEBUG) + + if transport: + self.transport = transport + else: + assert datahub_host is not None + # Select your transport with a defined url endpoint + self.transport = RequestsHTTPTransport( + url=datahub_host + "/api/graphql", + headers={"Authorization": "Bearer " + datahub_token} + if datahub_token is not None + else None, + method="POST", + timeout=timeout, + ) + + self.client = Client( + transport=self.transport, + fetch_schema_from_transport=True, + ) + + def gen_filter( + self, filters: Dict[str, Optional[str]] + ) -> Optional[Dict[str, List[Dict[str, str]]]]: + filter_expression: Optional[Dict[str, List[Dict[str, str]]]] = None + if not filters: + return None + + filter = [] + for key, value in filters.items(): + if value is None: + continue + filter.append({"field": key, "value": value}) + + filter_expression = {"and": filter} + return filter_expression diff --git a/metadata-ingestion/src/datahub/api/graphql/operation.py b/metadata-ingestion/src/datahub/api/graphql/operation.py new file mode 100644 index 00000000000000..5e1575e6f75dd2 --- /dev/null +++ b/metadata-ingestion/src/datahub/api/graphql/operation.py @@ -0,0 +1,140 @@ +import logging +from typing import Any, Dict, List, Optional + +from gql import gql + +from datahub.api.graphql.base import BaseApi + +logger = logging.getLogger(__name__) + + +class Operation(BaseApi): + REPORT_OPERATION_MUTATION: str = """ +mutation reportOperation($urn: String!, $sourceType: OperationSourceType!, $operationType: OperationType!, $partition: String, $numAffectedRows: Long, $customProperties: [StringMapEntryInput!]) { + reportOperation(input: { + urn: $urn + sourceType: $sourceType + operationType: $operationType + partition: $partition + numAffectedRows: $numAffectedRows + customProperties: $customProperties + }) +}""" + + QUERY_OPERATIONS: str = """ + query dataset($urn: String!, $startTimeMillis: Long, $endTimeMillis: Long, $limit: Int, $filter:FilterInput) { + dataset(urn: $urn) { + urn + operations (startTimeMillis: $startTimeMillis, endTimeMillis: $endTimeMillis, limit: $limit, filter: $filter) { + __typename + actor + operationType + sourceType + numAffectedRows + partition + timestampMillis + lastUpdatedTimestamp + customProperties { + key + value + } + } + } +}""" + + def report_operation( + self, + urn: str, + source_type: str = "DATA_PROCESS", + operation_type: str = "UPDATE", + partition: Optional[str] = None, + num_affected_rows: int = 0, + custom_properties: Optional[Dict[str, str]] = None, + ) -> str: + r""" + Report operation metadata for a dataset. + :param source_type: The source type to filter on. If not set it will accept any source type. + Default value: DATA_PROCESS + See valid types here: https://datahubproject.io/docs/graphql/enums#operationsourcetype + :param operation_type: The operation type to filter on. If not set it will accept any source type. + Default value: "UPDATE" + See valid types here: https://datahubproject.io/docs/graphql/enums/#operationtype + :param partition: The partition to set the operation. + :param num_affected_rows: The number of rows affected by this operation. + :param custom_properties: Key/value pair of custom propertis + """ + variable_values = { + "urn": urn, + "sourceType": source_type, + "operationType": operation_type, + "numAffectedRows": num_affected_rows, + } + + if partition is not None: + variable_values["partition"] = partition + + if num_affected_rows is not None: + variable_values["numAffectedRows"] = num_affected_rows + + if custom_properties is not None: + variable_values["customProperties"] = custom_properties + + result = self.client.execute( + gql(Operation.REPORT_OPERATION_MUTATION), variable_values + ) + + return result["reportOperation"] + + def query_operations( + self, + urn: str, + start_time_millis: Optional[int] = None, + end_time_millis: Optional[int] = None, + limit: Optional[int] = None, + source_type: Optional[str] = None, + operation_type: Optional[str] = None, + partition: Optional[str] = None, + ) -> List[Dict[Any, Any]]: + r""" + Query operations for a dataset. + + :param urn: The DataHub dataset unique identifier. + :param start_time_millis: The start time in milliseconds from the operations will be queried. + :param end_time_millis: The end time in milliseconds until the operations will be queried. + :param limit: The maximum number of items to return. + :param source_type: The source type to filter on. If not set it will accept any source type. + See valid types here: https://datahubproject.io/docs/graphql/enums#operationsourcetype + :param operation_type: The operation type to filter on. If not set it will accept any source type. + See valid types here: https://datahubproject.io/docs/graphql/enums#operationsourcetype + :param partition: The partition to check the operation. + """ + + result = self.client.execute( + gql(Operation.QUERY_OPERATIONS), + variable_values={ + "urn": urn, + "startTimeMillis": start_time_millis, + "end_time_millis": end_time_millis, + "limit": limit, + "filter": self.gen_filter( + { + "sourceType": source_type, + "operationType": operation_type, + "partition": partition, + } + if filter + else None + ), + }, + ) + if "dataset" in result and "operations" in result["dataset"]: + operations = [] + if source_type is not None: + for operation in result["dataset"]["operations"]: + if operation["sourceType"] == source_type: + operations.append(operation) + else: + operations = result["dataset"]["operations"] + + return operations + return [] diff --git a/metadata-ingestion/src/datahub_provider/operators/datahub_assertion_operator.py b/metadata-ingestion/src/datahub_provider/operators/datahub_assertion_operator.py new file mode 100644 index 00000000000000..89a037324e7cbe --- /dev/null +++ b/metadata-ingestion/src/datahub_provider/operators/datahub_assertion_operator.py @@ -0,0 +1,78 @@ +import datetime +from typing import Any, List, Optional, Sequence, Union + +from airflow.models import BaseOperator + +from datahub.api.circuit_breaker import ( + AssertionCircuitBreaker, + AssertionCircuitBreakerConfig, +) +from datahub_provider.hooks.datahub import DatahubRestHook + + +class DataHubAssertionOperator(BaseOperator): + r""" + DataHub Assertion Circuit Breaker Operator. + + :param urn: The DataHub dataset unique identifier. (templated) + :param datahub_rest_conn_id: The REST datahub connection id to communicate with DataHub + which is set as Airflow connection. + :param check_last_assertion_time: If set it checks assertions after the last operation was set on the dataset. + By default it is True. + :param time_delta: If verify_after_last_update is False it checks for assertion within the time delta. + """ + + template_fields: Sequence[str] = ("urn",) + circuit_breaker: AssertionCircuitBreaker + urn: Union[List[str], str] + + def __init__( # type: ignore[no-untyped-def] + self, + *, + urn: Union[List[str], str], + datahub_rest_conn_id: Optional[str] = None, + check_last_assertion_time: bool = True, + time_delta: Optional[datetime.timedelta] = None, + **kwargs, + ) -> None: + super().__init__(**kwargs) + hook: DatahubRestHook + if datahub_rest_conn_id is not None: + hook = DatahubRestHook(datahub_rest_conn_id=datahub_rest_conn_id) + else: + hook = DatahubRestHook() + + host, password, timeout_sec = hook._get_config() + self.urn = urn + config: AssertionCircuitBreakerConfig = AssertionCircuitBreakerConfig( + datahub_host=host, + datahub_token=password, + timeout=timeout_sec, + verify_after_last_update=check_last_assertion_time, + time_delta=time_delta if time_delta else datetime.timedelta(days=1), + ) + + self.circuit_breaker = AssertionCircuitBreaker(config=config) + + def execute(self, context: Any) -> bool: + if "datahub_silence_circuit_breakers" in context["dag_run"].conf: + self.log.info( + "Circuit breaker is silenced because datahub_silence_circuit_breakers config is set" + ) + return True + + self.log.info(f"Checking if dataset {self.urn} is ready to be consumed") + if type(self.urn) == str: + urns = [self.urn] + elif type(self.urn) == list: + urns = self.urn + else: + raise Exception(f"urn parameter has invalid type {type(self.urn)}") + + for urn in urns: + self.log.info(f"Checking if dataset {self.urn} is ready to be consumed") + ret = self.circuit_breaker.is_circuit_breaker_active(urn=urn) + if ret: + raise Exception(f"Dataset {self.urn} is not in consumable state") + + return True diff --git a/metadata-ingestion/src/datahub_provider/operators/datahub_assertion_sensor.py b/metadata-ingestion/src/datahub_provider/operators/datahub_assertion_sensor.py new file mode 100644 index 00000000000000..55a3492f9c8d64 --- /dev/null +++ b/metadata-ingestion/src/datahub_provider/operators/datahub_assertion_sensor.py @@ -0,0 +1,78 @@ +import datetime +from typing import Any, List, Optional, Sequence, Union + +from airflow.sensors.base import BaseSensorOperator + +from datahub.api.circuit_breaker import ( + AssertionCircuitBreaker, + AssertionCircuitBreakerConfig, +) +from datahub_provider.hooks.datahub import DatahubRestHook + + +class DataHubAssertionSensor(BaseSensorOperator): + r""" + DataHub Assertion Circuit Breaker Sensor. + + :param urn: The DataHub dataset unique identifier. (templated) + :param datahub_rest_conn_id: The REST datahub connection id to communicate with DataHub + which is set as Airflow connection. + :param check_last_assertion_time: If set it checks assertions after the last operation was set on the dataset. + By default it is True. + :param time_delta: If verify_after_last_update is False it checks for assertion within the time delta. + """ + + template_fields: Sequence[str] = ("urn",) + circuit_breaker: AssertionCircuitBreaker + urn: Union[List[str], str] + + def __init__( # type: ignore[no-untyped-def] + self, + *, + urn: Union[List[str], str], + datahub_rest_conn_id: Optional[str] = None, + check_last_assertion_time: bool = True, + time_delta: datetime.timedelta = datetime.timedelta(days=1), + **kwargs, + ) -> None: + super().__init__(**kwargs) + hook: DatahubRestHook + if datahub_rest_conn_id is not None: + hook = DatahubRestHook(datahub_rest_conn_id=datahub_rest_conn_id) + else: + hook = DatahubRestHook() + + host, password, timeout_sec = hook._get_config() + self.urn = urn + config: AssertionCircuitBreakerConfig = AssertionCircuitBreakerConfig( + datahub_host=host, + datahub_token=password, + timeout=timeout_sec, + verify_after_last_update=check_last_assertion_time, + time_delta=time_delta, + ) + self.circuit_breaker = AssertionCircuitBreaker(config=config) + + def poke(self, context: Any) -> bool: + if "datahub_silence_circuit_breakers" in context["dag_run"].conf: + self.log.info( + "Circuit breaker is silenced because datahub_silence_circuit_breakers config is set" + ) + return True + + self.log.info(f"Checking if dataset {self.urn} is ready to be consumed") + if type(self.urn) == str: + urns = [self.urn] + elif type(self.urn) == list: + urns = self.urn + else: + raise Exception(f"urn parameter has invalid type {type(self.urn)}") + + for urn in urns: + self.log.info(f"Checking if dataset {self.urn} is ready to be consumed") + ret = self.circuit_breaker.is_circuit_breaker_active(urn=urn) + if ret: + self.log.info(f"Dataset {self.urn} is not in consumable state") + return False + + return True diff --git a/metadata-ingestion/src/datahub_provider/operators/datahub_operation_operator.py b/metadata-ingestion/src/datahub_provider/operators/datahub_operation_operator.py new file mode 100644 index 00000000000000..905b7f71e23ca5 --- /dev/null +++ b/metadata-ingestion/src/datahub_provider/operators/datahub_operation_operator.py @@ -0,0 +1,98 @@ +import datetime +from typing import Any, List, Optional, Sequence, Union + +from airflow.sensors.base import BaseSensorOperator + +from datahub.api.circuit_breaker import ( + OperationCircuitBreaker, + OperationCircuitBreakerConfig, +) +from datahub_provider.hooks.datahub import DatahubRestHook + + +class DataHubOperationCircuitBreakerOperator(BaseSensorOperator): + r""" + DataHub Operation Circuit Breaker Operator. + + :param urn: The DataHub dataset unique identifier. (templated) + :param datahub_rest_conn_id: The REST datahub connection id to communicate with DataHub + which is set as Airflow connection. + :param partition: The partition to check the operation. + :param source_type: The partition to check the operation. :ref:`https://datahubproject.io/docs/graphql/enums#operationsourcetype` + + """ + + template_fields: Sequence[str] = ( + "urn", + "partition", + "source_type", + "operation_type", + ) + circuit_breaker: OperationCircuitBreaker + urn: Union[List[str], str] + partition: Optional[str] + source_type: Optional[str] + operation_type: Optional[str] + + def __init__( # type: ignore[no-untyped-def] + self, + *, + urn: Union[List[str], str], + datahub_rest_conn_id: Optional[str] = None, + time_delta: Optional[datetime.timedelta] = datetime.timedelta(days=1), + partition: Optional[str] = None, + source_type: Optional[str] = None, + operation_type: Optional[str] = None, + **kwargs, + ) -> None: + super().__init__(**kwargs) + hook: DatahubRestHook + if datahub_rest_conn_id is not None: + hook = DatahubRestHook(datahub_rest_conn_id=datahub_rest_conn_id) + else: + hook = DatahubRestHook() + + host, password, timeout_sec = hook._get_config() + + self.urn = urn + self.partition = partition + self.operation_type = operation_type + self.source_type = source_type + + config: OperationCircuitBreakerConfig = OperationCircuitBreakerConfig( + datahub_host=host, + datahub_token=password, + timeout=timeout_sec, + time_delta=time_delta, + ) + + self.circuit_breaker = OperationCircuitBreaker(config=config) + + def execute(self, context: Any) -> bool: + if "datahub_silence_circuit_breakers" in context["dag_run"].conf: + self.log.info( + "Circuit breaker is silenced because datahub_silence_circuit_breakers config is set" + ) + return True + + self.log.info(f"Checking if dataset {self.urn} is ready to be consumed") + if type(self.urn) == str: + urns = [self.urn] + elif type(self.urn) == list: + urns = self.urn + else: + raise Exception(f"urn parameter has invalid type {type(self.urn)}") + + partition: Optional[str] + for urn in urns: + self.log.info(f"Checking if dataset {self.urn} is ready to be consumed") + ret = self.circuit_breaker.is_circuit_breaker_active( + urn=urn, + partition=self.partition, + operation_type=self.operation_type, + source_type=self.source_type, + ) + if ret: + raise Exception(f"Dataset {self.urn} is not in consumable state") + + return True diff --git a/metadata-ingestion/src/datahub_provider/operators/datahub_operation_sensor.py b/metadata-ingestion/src/datahub_provider/operators/datahub_operation_sensor.py new file mode 100644 index 00000000000000..31b387a7e65b05 --- /dev/null +++ b/metadata-ingestion/src/datahub_provider/operators/datahub_operation_sensor.py @@ -0,0 +1,100 @@ +import datetime +from typing import Any, List, Optional, Sequence, Union + +from airflow.sensors.base import BaseSensorOperator + +from datahub.api.circuit_breaker import ( + OperationCircuitBreaker, + OperationCircuitBreakerConfig, +) +from datahub_provider.hooks.datahub import DatahubRestHook + + +class DataHubOperationCircuitBreakerSensor(BaseSensorOperator): + r""" + DataHub Operation Circuit Breaker Sensor. + + :param urn: The DataHub dataset unique identifier. (templated) + :param datahub_rest_conn_id: The REST datahub connection id to communicate with DataHub + which is set as Airflow connection. + :param partition: The partition to check the operation. + :param source_type: The source type to filter on. If not set it will accept any source type. + See valid values at: https://datahubproject.io/docs/graphql/enums#operationsourcetype + :param operation_type: The operation type to filter on. If not set it will accept any source type. + See valid values at: https://datahubproject.io/docs/graphql/enums/#operationtype + """ + + template_fields: Sequence[str] = ( + "urn", + "partition", + "source_type", + "operation_type", + ) + circuit_breaker: OperationCircuitBreaker + urn: Union[List[str], str] + partition: Optional[str] + source_type: Optional[str] + operation_type: Optional[str] + + def __init__( # type: ignore[no-untyped-def] + self, + *, + urn: Union[List[str], str], + datahub_rest_conn_id: Optional[str] = None, + time_delta: Optional[datetime.timedelta] = datetime.timedelta(days=1), + partition: Optional[str] = None, + source_type: Optional[str] = None, + operation_type: Optional[str] = None, + **kwargs, + ) -> None: + super().__init__(**kwargs) + hook: DatahubRestHook + if datahub_rest_conn_id is not None: + hook = DatahubRestHook(datahub_rest_conn_id=datahub_rest_conn_id) + else: + hook = DatahubRestHook() + + host, password, timeout_sec = hook._get_config() + + self.urn = urn + self.partition = partition + self.operation_type = operation_type + self.source_type = source_type + + config: OperationCircuitBreakerConfig = OperationCircuitBreakerConfig( + datahub_host=host, + datahub_token=password, + timeout=timeout_sec, + time_delta=time_delta, + ) + + self.circuit_breaker = OperationCircuitBreaker(config=config) + + def poke(self, context: Any) -> bool: + if "datahub_silence_circuit_breakers" in context["dag_run"].conf: + self.log.info( + "Circuit breaker is silenced because datahub_silence_circuit_breakers config is set" + ) + return True + + self.log.info(f"Checking if dataset {self.urn} is ready to be consumed") + if type(self.urn) == str: + urns = [self.urn] + elif type(self.urn) == list: + urns = self.urn + else: + raise Exception(f"urn parameter has invalid type {type(self.urn)}") + + for urn in urns: + self.log.info(f"Checking if dataset {self.urn} is ready to be consumed") + ret = self.circuit_breaker.is_circuit_breaker_active( + urn=urn, + partition=self.partition, + operation_type=self.operation_type, + source_type=self.source_type, + ) + if ret: + self.log.info(f"Dataset {self.urn} is not in consumable state") + return False + + return True diff --git a/metadata-ingestion/tests/integration/circuit_breaker/__init__.py b/metadata-ingestion/tests/integration/circuit_breaker/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/metadata-ingestion/tests/integration/circuit_breaker/assertion_gql_empty_response.json b/metadata-ingestion/tests/integration/circuit_breaker/assertion_gql_empty_response.json new file mode 100644 index 00000000000000..5f7adc514ef075 --- /dev/null +++ b/metadata-ingestion/tests/integration/circuit_breaker/assertion_gql_empty_response.json @@ -0,0 +1,9 @@ +{ + "dataset": { + "assertions": { + "__typename": "EntityAssertionsResult", + "total": 0, + "assertions": [] + } + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/circuit_breaker/assertion_gql_response.json b/metadata-ingestion/tests/integration/circuit_breaker/assertion_gql_response.json new file mode 100644 index 00000000000000..73dc104f59e609 --- /dev/null +++ b/metadata-ingestion/tests/integration/circuit_breaker/assertion_gql_response.json @@ -0,0 +1,244 @@ +{ + "dataset": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres1.postgres.public.foo1,PROD)", + "operations": [], + "incidents": { + "incidents": [] + }, + "assertions": { + "total": 5, + "assertions": [ + { + "__typename": "Assertion", + "runEvents": { + "total": 2, + "failed": 0, + "succeeded": 2, + "runEvents": [ + { + "__typename": "AssertionRunEvent", + "timestampMillis": 1640692800000, + "partitionSpec": { + "type": "PARTITION", + "partition": "{\"category\": \"catA\"}", + "timePartition": null + }, + "result": { + "type": "SUCCESS", + "rowCount": null, + "missingCount": null, + "unexpectedCount": null, + "actualAggValue": null, + "externalUrl": null + }, + "assertionUrn": "urn:li:assertion:3d1699164901675df774ab34fd16f4f3" + }, + { + "__typename": "AssertionRunEvent", + "timestampMillis": 1640692800000, + "partitionSpec": { + "type": "PARTITION", + "partition": "{\"category\": \"catB\"}", + "timePartition": null + }, + "result": { + "type": "SUCCESS", + "rowCount": null, + "missingCount": null, + "unexpectedCount": null, + "actualAggValue": null, + "externalUrl": null + }, + "assertionUrn": "urn:li:assertion:3d1699164901675df774ab34fd16f4f3" + } + ] + } + }, + { + "__typename": "Assertion", + "runEvents": { + "total": 2, + "failed": 2, + "succeeded": 0, + "runEvents": [ + { + "__typename": "AssertionRunEvent", + "timestampMillis": 1640692800000, + "partitionSpec": { + "type": "PARTITION", + "partition": "{\"category\": \"catA\"}", + "timePartition": null + }, + "result": { + "type": "FAILURE", + "rowCount": null, + "missingCount": null, + "unexpectedCount": null, + "actualAggValue": null, + "externalUrl": null + }, + "assertionUrn": "urn:li:assertion:358c683782c93c2fc2bd4bdd4fdb0153" + }, + { + "__typename": "AssertionRunEvent", + "timestampMillis": 1640692800000, + "partitionSpec": { + "type": "PARTITION", + "partition": "{\"category\": \"catB\"}", + "timePartition": null + }, + "result": { + "type": "FAILURE", + "rowCount": null, + "missingCount": null, + "unexpectedCount": null, + "actualAggValue": null, + "externalUrl": null + }, + "assertionUrn": "urn:li:assertion:358c683782c93c2fc2bd4bdd4fdb0153" + } + ] + } + }, + { + "__typename": "Assertion", + "runEvents": { + "total": 2, + "failed": 0, + "succeeded": 2, + "runEvents": [ + { + "__typename": "AssertionRunEvent", + "timestampMillis": 1640692800000, + "partitionSpec": { + "type": "PARTITION", + "partition": "{\"category\": \"catA\"}", + "timePartition": null + }, + "result": { + "type": "SUCCESS", + "rowCount": 3, + "missingCount": null, + "unexpectedCount": 0, + "actualAggValue": null, + "externalUrl": null + }, + "assertionUrn": "urn:li:assertion:9729dfafea4bb2c2f114bc80e513a7ec" + }, + { + "__typename": "AssertionRunEvent", + "timestampMillis": 1640692800000, + "partitionSpec": { + "type": "PARTITION", + "partition": "{\"category\": \"catB\"}", + "timePartition": null + }, + "result": { + "type": "SUCCESS", + "rowCount": 2, + "missingCount": null, + "unexpectedCount": 0, + "actualAggValue": null, + "externalUrl": null + }, + "assertionUrn": "urn:li:assertion:9729dfafea4bb2c2f114bc80e513a7ec" + } + ] + } + }, + { + "__typename": "Assertion", + "runEvents": { + "total": 2, + "failed": 0, + "succeeded": 2, + "runEvents": [ + { + "__typename": "AssertionRunEvent", + "timestampMillis": 1640692800000, + "partitionSpec": { + "type": "PARTITION", + "partition": "{\"category\": \"catA\"}", + "timePartition": null + }, + "result": { + "type": "SUCCESS", + "rowCount": null, + "missingCount": null, + "unexpectedCount": null, + "actualAggValue": 1, + "externalUrl": null + }, + "assertionUrn": "urn:li:assertion:16d6f586b2febda7f2b53faec6bb9035" + }, + { + "__typename": "AssertionRunEvent", + "timestampMillis": 1640692800000, + "partitionSpec": { + "type": "PARTITION", + "partition": "{\"category\": \"catB\"}", + "timePartition": null + }, + "result": { + "type": "SUCCESS", + "rowCount": null, + "missingCount": null, + "unexpectedCount": null, + "actualAggValue": 3, + "externalUrl": null + }, + "assertionUrn": "urn:li:assertion:16d6f586b2febda7f2b53faec6bb9035" + } + ] + } + }, + { + "__typename": "Assertion", + "runEvents": { + "total": 2, + "failed": 0, + "succeeded": 2, + "runEvents": [ + { + "__typename": "AssertionRunEvent", + "timestampMillis": 1640692800000, + "partitionSpec": { + "type": "PARTITION", + "partition": "{\"category\": \"catA\"}", + "timePartition": null + }, + "result": { + "type": "SUCCESS", + "rowCount": null, + "missingCount": null, + "unexpectedCount": null, + "actualAggValue": 3, + "externalUrl": null + }, + "assertionUrn": "urn:li:assertion:4cf76385ccf614cc6cbb9daa551c3c74" + }, + { + "__typename": "AssertionRunEvent", + "timestampMillis": 1640692800000, + "partitionSpec": { + "type": "PARTITION", + "partition": "{\"category\": \"catB\"}", + "timePartition": null + }, + "result": { + "type": "SUCCESS", + "rowCount": null, + "missingCount": null, + "unexpectedCount": null, + "actualAggValue": 2, + "externalUrl": null + }, + "assertionUrn": "urn:li:assertion:4cf76385ccf614cc6cbb9daa551c3c74" + } + ] + } + } + ] + } + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/circuit_breaker/assertion_gql_response_with_no_error.json b/metadata-ingestion/tests/integration/circuit_breaker/assertion_gql_response_with_no_error.json new file mode 100644 index 00000000000000..1d7099c9321605 --- /dev/null +++ b/metadata-ingestion/tests/integration/circuit_breaker/assertion_gql_response_with_no_error.json @@ -0,0 +1,198 @@ +{ + "dataset": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres1.postgres.public.foo1,PROD)", + "operations": [], + "incidents": { + "incidents": [] + }, + "assertions": { + "total": 4, + "assertions": [ + { + "__typename": "Assertion", + "runEvents": { + "total": 2, + "failed": 0, + "succeeded": 2, + "runEvents": [ + { + "__typename": "AssertionRunEvent", + "timestampMillis": 1640692800000, + "partitionSpec": { + "type": "PARTITION", + "partition": "{\"category\": \"catA\"}", + "timePartition": null + }, + "result": { + "type": "SUCCESS", + "rowCount": null, + "missingCount": null, + "unexpectedCount": null, + "actualAggValue": null, + "externalUrl": null + }, + "assertionUrn": "urn:li:assertion:3d1699164901675df774ab34fd16f4f3" + }, + { + "__typename": "AssertionRunEvent", + "timestampMillis": 1640692800000, + "partitionSpec": { + "type": "PARTITION", + "partition": "{\"category\": \"catB\"}", + "timePartition": null + }, + "result": { + "type": "SUCCESS", + "rowCount": null, + "missingCount": null, + "unexpectedCount": null, + "actualAggValue": null, + "externalUrl": null + }, + "assertionUrn": "urn:li:assertion:3d1699164901675df774ab34fd16f4f3" + } + ] + } + }, + { + "__typename": "Assertion", + "runEvents": { + "total": 2, + "failed": 0, + "succeeded": 2, + "runEvents": [ + { + "__typename": "AssertionRunEvent", + "timestampMillis": 1640692800000, + "partitionSpec": { + "type": "PARTITION", + "partition": "{\"category\": \"catA\"}", + "timePartition": null + }, + "result": { + "type": "SUCCESS", + "rowCount": 3, + "missingCount": null, + "unexpectedCount": 0, + "actualAggValue": null, + "externalUrl": null + }, + "assertionUrn": "urn:li:assertion:9729dfafea4bb2c2f114bc80e513a7ec" + }, + { + "__typename": "AssertionRunEvent", + "timestampMillis": 1640692800000, + "partitionSpec": { + "type": "PARTITION", + "partition": "{\"category\": \"catB\"}", + "timePartition": null + }, + "result": { + "type": "SUCCESS", + "rowCount": 2, + "missingCount": null, + "unexpectedCount": 0, + "actualAggValue": null, + "externalUrl": null + }, + "assertionUrn": "urn:li:assertion:9729dfafea4bb2c2f114bc80e513a7ec" + } + ] + } + }, + { + "__typename": "Assertion", + "runEvents": { + "total": 2, + "failed": 0, + "succeeded": 2, + "runEvents": [ + { + "__typename": "AssertionRunEvent", + "timestampMillis": 1640692800000, + "partitionSpec": { + "type": "PARTITION", + "partition": "{\"category\": \"catA\"}", + "timePartition": null + }, + "result": { + "type": "SUCCESS", + "rowCount": null, + "missingCount": null, + "unexpectedCount": null, + "actualAggValue": 1, + "externalUrl": null + }, + "assertionUrn": "urn:li:assertion:16d6f586b2febda7f2b53faec6bb9035" + }, + { + "__typename": "AssertionRunEvent", + "timestampMillis": 1640692800000, + "partitionSpec": { + "type": "PARTITION", + "partition": "{\"category\": \"catB\"}", + "timePartition": null + }, + "result": { + "type": "SUCCESS", + "rowCount": null, + "missingCount": null, + "unexpectedCount": null, + "actualAggValue": 3, + "externalUrl": null + }, + "assertionUrn": "urn:li:assertion:16d6f586b2febda7f2b53faec6bb9035" + } + ] + } + }, + { + "__typename": "Assertion", + "runEvents": { + "total": 2, + "failed": 0, + "succeeded": 2, + "runEvents": [ + { + "__typename": "AssertionRunEvent", + "timestampMillis": 1640692800000, + "partitionSpec": { + "type": "PARTITION", + "partition": "{\"category\": \"catA\"}", + "timePartition": null + }, + "result": { + "type": "SUCCESS", + "rowCount": null, + "missingCount": null, + "unexpectedCount": null, + "actualAggValue": 3, + "externalUrl": null + }, + "assertionUrn": "urn:li:assertion:4cf76385ccf614cc6cbb9daa551c3c74" + }, + { + "__typename": "AssertionRunEvent", + "timestampMillis": 1640692800000, + "partitionSpec": { + "type": "PARTITION", + "partition": "{\"category\": \"catB\"}", + "timePartition": null + }, + "result": { + "type": "SUCCESS", + "rowCount": null, + "missingCount": null, + "unexpectedCount": null, + "actualAggValue": 2, + "externalUrl": null + }, + "assertionUrn": "urn:li:assertion:4cf76385ccf614cc6cbb9daa551c3c74" + } + ] + } + } + ] + } + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/circuit_breaker/operation_gql_empty_response.json b/metadata-ingestion/tests/integration/circuit_breaker/operation_gql_empty_response.json new file mode 100644 index 00000000000000..0f99316f9ec933 --- /dev/null +++ b/metadata-ingestion/tests/integration/circuit_breaker/operation_gql_empty_response.json @@ -0,0 +1,6 @@ +{ + "dataset": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "operations": [] + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/circuit_breaker/operation_gql_response.json b/metadata-ingestion/tests/integration/circuit_breaker/operation_gql_response.json new file mode 100644 index 00000000000000..6f52f95b115a36 --- /dev/null +++ b/metadata-ingestion/tests/integration/circuit_breaker/operation_gql_response.json @@ -0,0 +1,79 @@ +{ + "dataset": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my_project.jaffle_shop.customers,PROD)", + "operations": [ + { + "__typename": "Operation", + "actor": "urn:li:corpuser:bq-usage", + "operationType": "CUSTOM", + "sourceType": null, + "numAffectedRows": 1, + "partition": "FULL_TABLE_SNAPSHOT", + "timestampMillis": 1655769674092, + "lastUpdatedTimestamp": 1655700504432, + "customProperties": [ + { + "key": "millisecondsTaken", + "value": "222" + }, + { + "key": "text", + "value": "/* {\"app\": \"dbt\", \"dbt_version\": \"1.1.0\", \"profile_name\": \"jaffle_shop\", \"target_name\": \"dev\", \"node_id\": \"test.jaffle_shop.unique_customers_customer_id.c5af1ff4b1\"} */\nselect\n count(*) as failures,\n count(*) != 0 as should_warn,\n count(*) != 0 as should_error\n from (\n \n \n \n\nwith dbt_test__target as (\n\n select customer_id as unique_field\n from `my_project`.`jaffle_shop`.`customers`\n where customer_id is not null\n\n)\n\nselect\n unique_field,\n count(*) as n_records\n\nfrom dbt_test__target\ngroup by unique_field\nhaving count(*) > 1\n\n\n\n \n ) dbt_internal_test" + }, + { + "key": "sessionId", + "value": "projects/my_project/jobs/b68487dc-61db-4f01-abd7-c5f7d931a46c" + }, + { + "key": "fieldsRead", + "value": "customer_id" + }, + { + "key": "readReason", + "value": "JOB" + }, + { + "key": "bytesProcessed", + "value": "10485760" + } + ] + }, + { + "__typename": "Operation", + "actor": "urn:li:corpuser:bq-usage", + "operationType": "CUSTOM", + "sourceType": null, + "numAffectedRows": 1, + "partition": "FULL_TABLE_SNAPSHOT", + "timestampMillis": 1655769674090, + "lastUpdatedTimestamp": 1655700503898, + "customProperties": [ + { + "key": "millisecondsTaken", + "value": "234" + }, + { + "key": "text", + "value": "/* {\"app\": \"dbt\", \"dbt_version\": \"1.1.0\", \"profile_name\": \"jaffle_shop\", \"target_name\": \"dev\", \"node_id\": \"test.jaffle_shop.relationships_orders_customer_id__customer_id__ref_customers_.c6ec7f58f2\"} */\nselect\n count(*) as failures,\n count(*) != 0 as should_warn,\n count(*) != 0 as should_error\n from (\n \n \n \n\nwith child as (\n select customer_id as from_field\n from `my_project`.`jaffle_shop`.`orders`\n where customer_id is not null\n),\n\nparent as (\n select customer_id as to_field\n from `my_project`.`jaffle_shop`.`customers`\n)\n\nselect\n from_field\n\nfrom child\nleft join parent\n on child.from_field = parent.to_field\n\nwhere parent.to_field is null\n\n\n\n \n ) dbt_internal_test" + }, + { + "key": "sessionId", + "value": "projects/my_project/jobs/4b6ae0b9-b7d3-43d4-aaae-1baf91be3553" + }, + { + "key": "fieldsRead", + "value": "customer_id" + }, + { + "key": "readReason", + "value": "JOB" + }, + { + "key": "bytesProcessed", + "value": "20971520" + } + ] + } + ] + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/circuit_breaker/test_circuit_breaker.py b/metadata-ingestion/tests/integration/circuit_breaker/test_circuit_breaker.py new file mode 100644 index 00000000000000..b9c661935c5e06 --- /dev/null +++ b/metadata-ingestion/tests/integration/circuit_breaker/test_circuit_breaker.py @@ -0,0 +1,155 @@ +import json +from unittest.mock import patch + +import pytest +from freezegun import freeze_time + +try: + from datahub.api.circuit_breaker import ( + AssertionCircuitBreaker, + AssertionCircuitBreakerConfig, + OperationCircuitBreaker, + OperationCircuitBreakerConfig, + ) +# Imports are only available if we are running integrations tests +except ImportError: + pass +lastUpdatedResponseBeforeLastAssertion = { + "dataset": {"operations": [{"lastUpdatedTimestamp": 1640685600000}]} +} + +lastUpdatedResponseAfterLastAssertion = { + "dataset": {"operations": [{"lastUpdatedTimestamp": 1652450039000}]} +} + + +@pytest.mark.integration +def test_operation_circuit_breaker_with_empty_response(pytestconfig): + with patch("gql.client.Client.execute") as mock_gql_client: + test_resources_dir = pytestconfig.rootpath / "tests/integration/circuit_breaker" + f = open( + f"{test_resources_dir}/operation_gql_empty_response.json", + ) + data = json.load(f) + mock_gql_client.side_effect = [data] + + config = OperationCircuitBreakerConfig(datahub_host="dummy") + cb = OperationCircuitBreaker(config) + + result = cb.is_circuit_breaker_active( + urn="urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD))" + ) + assert result is True + + +@freeze_time("2022-06-20 05:00:00") +@pytest.mark.integration +def test_operation_circuit_breaker_with_valid_response(pytestconfig): + with patch("gql.client.Client.execute") as mock_gql_client: + test_resources_dir = pytestconfig.rootpath / "tests/integration/circuit_breaker" + f = open( + f"{test_resources_dir}/operation_gql_response.json", + ) + data = json.load(f) + mock_gql_client.side_effect = [data] + + config = OperationCircuitBreakerConfig(datahub_host="dummy") + cb = OperationCircuitBreaker(config) + + result = cb.is_circuit_breaker_active( + urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,my_project.jaffle_shop.customers,PROD)" + ) + assert result is False + + +@freeze_time("2022-06-21 07:00:00") +@pytest.mark.integration +def test_operation_circuit_breaker_with_not_recent_operation(pytestconfig): + with patch("gql.client.Client.execute") as mock_gql_client: + test_resources_dir = pytestconfig.rootpath / "tests/integration/circuit_breaker" + f = open( + f"{test_resources_dir}/operation_gql_response.json", + ) + data = json.load(f) + mock_gql_client.side_effect = [data] + + config = OperationCircuitBreakerConfig(datahub_host="dummy") + cb = OperationCircuitBreaker(config) + + result = cb.is_circuit_breaker_active( + urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,my_project.jaffle_shop.customers,PROD)" + ) + assert result is True + + +@pytest.mark.integration +def test_assertion_circuit_breaker_with_empty_response(pytestconfig): + with patch("gql.client.Client.execute") as mock_gql_client: + test_resources_dir = pytestconfig.rootpath / "tests/integration/circuit_breaker" + f = open( + f"{test_resources_dir}/assertion_gql_empty_response.json", + ) + data = json.load(f) + mock_gql_client.side_effect = [lastUpdatedResponseBeforeLastAssertion, data] + + config = AssertionCircuitBreakerConfig(datahub_host="dummy") + cb = AssertionCircuitBreaker(config) + + result = cb.is_circuit_breaker_active( + urn="urn:li:dataset:(urn:li:dataPlatform:postgres,postgres1.postgres.public.foo1,PROD)" + ) + assert result is True + + +@pytest.mark.integration +def test_assertion_circuit_breaker_with_no_error(pytestconfig): + with patch("gql.client.Client.execute") as mock_gql_client: + test_resources_dir = pytestconfig.rootpath / "tests/integration/circuit_breaker" + f = open( + f"{test_resources_dir}/assertion_gql_response_with_no_error.json", + ) + data = json.load(f) + mock_gql_client.side_effect = [lastUpdatedResponseBeforeLastAssertion, data] + + config = AssertionCircuitBreakerConfig(datahub_host="dummy") + cb = AssertionCircuitBreaker(config) + + result = cb.is_circuit_breaker_active( + urn="urn:li:dataset:(urn:li:dataPlatform:postgres,postgres1.postgres.public.foo1,PROD)" + ) + assert result is False + + +@pytest.mark.integration +def test_assertion_circuit_breaker_updated_at_after_last_assertion(pytestconfig): + with patch("gql.client.Client.execute") as mock_gql_client: + test_resources_dir = pytestconfig.rootpath / "tests/integration/circuit_breaker" + f = open( + f"{test_resources_dir}/assertion_gql_response_with_no_error.json", + ) + data = json.load(f) + mock_gql_client.side_effect = [lastUpdatedResponseAfterLastAssertion, data] + + config = AssertionCircuitBreakerConfig(datahub_host="dummy") + cb = AssertionCircuitBreaker(config) + result = cb.is_circuit_breaker_active( + urn="urn:li:dataset:(urn:li:dataPlatform:postgres,postgres1.postgres.public.foo1,PROD)" + ) + assert result is True + + +@pytest.mark.integration +def test_assertion_circuit_breaker_assertion_with_active_assertion(pytestconfig): + test_resources_dir = pytestconfig.rootpath / "tests/integration/circuit_breaker" + with patch("gql.client.Client.execute") as mock_gql_client: + f = open( + f"{test_resources_dir}/assertion_gql_response.json", + ) + data = json.load(f) + mock_gql_client.side_effect = [lastUpdatedResponseBeforeLastAssertion, data] + config = AssertionCircuitBreakerConfig(datahub_host="dummy") + cb = AssertionCircuitBreaker(config) + result = cb.is_circuit_breaker_active( + urn="urn:li:dataset:(urn:li:dataPlatform:postgres,postgres1.postgres.public.foo1,PROD)" + ) + assert result is True # add assertion here From 7bbac5ef4d7242e63dc7433dd09a8349b39bf188 Mon Sep 17 00:00:00 2001 From: abiwill Date: Thu, 14 Jul 2022 02:17:23 +0530 Subject: [PATCH 14/22] feat(kafka-setup): add options for sasl_plaintext (#5385) allow sasl_plaintext options using environment variables --- docker/kafka-setup/kafka-setup.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docker/kafka-setup/kafka-setup.sh b/docker/kafka-setup/kafka-setup.sh index 6bda831d2e258b..27593876a48df5 100755 --- a/docker/kafka-setup/kafka-setup.sh +++ b/docker/kafka-setup/kafka-setup.sh @@ -11,6 +11,12 @@ CONNECTION_PROPERTIES_PATH=/tmp/connection.properties echo "bootstrap.servers=$KAFKA_BOOTSTRAP_SERVER" > $CONNECTION_PROPERTIES_PATH echo "security.protocol=$KAFKA_PROPERTIES_SECURITY_PROTOCOL" >> $CONNECTION_PROPERTIES_PATH +## Add support for SASL_PLAINTEXT +if [[ $KAFKA_PROPERTIES_SECURITY_PROTOCOL == "SASL_PLAINTEXT" ]]; then + echo "sasl.jaas.config=$KAFKA_PROPERTIES_SASL_JAAS_CONFIG" >> $CONNECTION_PROPERTIES_PATH + echo "sasl.kerberos.service.name=$KAFKA_PROPERTIES_SASL_KERBEROS_SERVICE_NAME" >> $CONNECTION_PROPERTIES_PATH +fi + if [[ $KAFKA_PROPERTIES_SECURITY_PROTOCOL == "SSL" ]]; then if [[ -n $KAFKA_PROPERTIES_SSL_KEYSTORE_LOCATION ]]; then echo "ssl.keystore.location=$KAFKA_PROPERTIES_SSL_KEYSTORE_LOCATION" >> $CONNECTION_PROPERTIES_PATH From 60714df08ffbc76c8bc1115f0159a01712ce819a Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Thu, 14 Jul 2022 16:03:01 +0530 Subject: [PATCH 15/22] fix(bigquery): multi-project GCP setup run query through correct project (#5393) --- .../src/datahub/ingestion/source/sql/bigquery.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py index fcb4d63477f7b4..b24755dcf1d31a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py @@ -342,6 +342,10 @@ def __init__(self, config, ctx): def get_multiproject_project_id( self, inspector: Optional[Inspector] = None, run_on_compute: bool = False ) -> Optional[str]: + """ + Use run_on_compute = true when running queries on storage project + where you don't have job create rights + """ if self.config.storage_project_id and (not run_on_compute): return self.config.storage_project_id elif self.config.project_id: @@ -353,6 +357,11 @@ def get_multiproject_project_id( return None def get_db_name(self, inspector: Inspector) -> str: + """ + DO NOT USE this to get project name when running queries. + That can cause problems with multi-project setups. + Use get_multiproject_project_id with run_on_compute = True + """ db_name = self.get_multiproject_project_id(inspector) # db name can't be empty here as we pass in inpector to get_multiproject_project_id assert db_name @@ -458,7 +467,7 @@ def generate_profile_candidates( profile_clause = c if c == "" else f" WHERE {c}"[:-4] if profile_clause == "": return None - project_id = self.get_db_name(inspector) + project_id = self.get_multiproject_project_id(inspector, run_on_compute=True) _client: BigQueryClient = BigQueryClient(project=project_id) # Reading all tables' metadata to report base_query = ( From 45315ef1d4fd29c6c0c6f3cbe913dde552390e95 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Thu, 14 Jul 2022 19:52:38 +0530 Subject: [PATCH 16/22] fix(bigquery): add storage project name (#5395) --- .../src/datahub/ingestion/source/sql/bigquery.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py index b24755dcf1d31a..3687160ee0ebec 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py @@ -467,8 +467,11 @@ def generate_profile_candidates( profile_clause = c if c == "" else f" WHERE {c}"[:-4] if profile_clause == "": return None - project_id = self.get_multiproject_project_id(inspector, run_on_compute=True) - _client: BigQueryClient = BigQueryClient(project=project_id) + storage_project_id = self.get_multiproject_project_id(inspector) + exec_project_id = self.get_multiproject_project_id( + inspector, run_on_compute=True + ) + _client: BigQueryClient = BigQueryClient(project=exec_project_id) # Reading all tables' metadata to report base_query = ( f"SELECT " @@ -476,7 +479,7 @@ def generate_profile_candidates( f"size_bytes, " f"last_modified_time, " f"row_count, " - f"FROM {schema}.__TABLES__" + f"FROM {storage_project_id}.{schema}.__TABLES__" ) all_tables = _client.query(base_query) report_tables: List[str] = [ @@ -499,7 +502,7 @@ def generate_profile_candidates( f"size_bytes, " f"last_modified_time, " f"row_count, " - f"FROM {schema}.__TABLES__" + f"FROM {storage_project_id}.{schema}.__TABLES__" f"{profile_clause}" ) logger.debug(f"Profiling via {query}") From ee7c5f75a28f9370753b48775edc45554c9d7313 Mon Sep 17 00:00:00 2001 From: Navin Sharma <103643430+NavinSharma13@users.noreply.github.com> Date: Thu, 14 Jul 2022 22:04:06 +0530 Subject: [PATCH 17/22] Add Changes to support smoke test on Datahub deployed on kubernetes Cluster (#5334) Co-authored-by: Aseem Bansal --- docker/elasticsearch-setup/create-indices.sh | 2 +- settings.gradle | 1 + smoke-test/.gitignore | 1 + smoke-test/build.gradle | 40 ++++++ smoke-test/requirements.txt | 1 + smoke-test/smoke.sh | 4 +- smoke-test/test_e2e.py | 135 +++++++----------- smoke-test/test_rapid.py | 6 +- .../tests/assertions/assertions_test.py | 7 +- smoke-test/tests/cli/datahub_graph_test.py | 5 +- smoke-test/tests/conftest.py | 6 +- smoke-test/tests/cypress/integration_test.py | 4 +- smoke-test/tests/delete/delete_test.py | 15 +- smoke-test/tests/domains/domains_test.py | 53 ++++--- .../managed_ingestion_test.py | 19 ++- smoke-test/tests/policies/test_policies.py | 7 +- .../tags-and-terms/tags_and_terms_test.py | 38 +++-- smoke-test/tests/test_stateful_ingestion.py | 7 +- .../tokens/revokable_access_token_test.py | 7 +- smoke-test/tests/utils.py | 50 ++++++- 20 files changed, 248 insertions(+), 160 deletions(-) create mode 100644 smoke-test/build.gradle diff --git a/docker/elasticsearch-setup/create-indices.sh b/docker/elasticsearch-setup/create-indices.sh index 4ada7a7cb69626..94fe86e722014c 100755 --- a/docker/elasticsearch-setup/create-indices.sh +++ b/docker/elasticsearch-setup/create-indices.sh @@ -63,7 +63,7 @@ function create_datahub_usage_event_aws_elasticsearch() { fi if [ $(curl -o /dev/null -s -w "%{http_code}" --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_template/${PREFIX}datahub_usage_event_index_template") -eq 404 ] then - echo -e "\ncreating datahub_usagAe_event_index_template" + echo -e "\ncreating datahub_usage_event_index_template" sed -e "s/PREFIX/${PREFIX}/g" /index/usage-event/aws_es_index_template.json | tee -a /tmp/aws_es_index_template.json curl -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_template/${PREFIX}datahub_usage_event_index_template" -H 'Content-Type: application/json' --data @/tmp/aws_es_index_template.json curl -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/${PREFIX}datahub_usage_event-000001" -H 'Content-Type: application/json' --data "{\"aliases\":{\"${PREFIX}datahub_usage_event\":{\"is_write_index\":true}}}" diff --git a/settings.gradle b/settings.gradle index 99f6991184db8f..4f5b5dc44cd885 100644 --- a/settings.gradle +++ b/settings.gradle @@ -46,3 +46,4 @@ include 'metadata-integration:java:datahub-client' include 'metadata-integration:java:datahub-protobuf' include 'metadata-ingestion-modules:airflow-plugin' include 'ingestion-scheduler' +include 'smoke-test' \ No newline at end of file diff --git a/smoke-test/.gitignore b/smoke-test/.gitignore index bbad99b9f63857..55142a4a3630f3 100644 --- a/smoke-test/.gitignore +++ b/smoke-test/.gitignore @@ -130,3 +130,4 @@ dmypy.json # Pyre type checker .pyre/ +junit* \ No newline at end of file diff --git a/smoke-test/build.gradle b/smoke-test/build.gradle new file mode 100644 index 00000000000000..ee0ea3c7be384f --- /dev/null +++ b/smoke-test/build.gradle @@ -0,0 +1,40 @@ +apply plugin: 'com.github.node-gradle.node' + +node { + + // If true, it will download node using above parameters. + // If false, it will try to use globally installed node. + if (project.hasProperty('useSystemNode') && project.getProperty('useSystemNode').toBoolean()) { + download = false + } else { + download = true + } + + // Version of node to use. + version = '16.8.0' + + // Version of Yarn to use. + yarnVersion = '1.22.0' + + // Base URL for fetching node distributions (set nodeDistBaseUrl if you have a mirror). + if (project.hasProperty('nodeDistBaseUrl')) { + distBaseUrl = project.getProperty('nodeDistBaseUrl') + } else { + distBaseUrl = 'https://nodejs.org/dist' + } + + // Set the work directory for unpacking node + workDir = file("${project.projectDir}/.gradle/nodejs") + + // Set the work directory for NPM + yarnWorkDir = file("${project.projectDir}/.gradle/yarn") + + // Set the work directory where node_modules should be located + nodeModulesDir = file("${project.projectDir}") + +} + +task yarnInstall(type: YarnTask) { + println "Root directory: ${project.rootDir}"; + args = ['install', '--cwd', "${project.rootDir}/smoke-test/tests/cypress"] +} \ No newline at end of file diff --git a/smoke-test/requirements.txt b/smoke-test/requirements.txt index 111b876e4e92a4..1ca876f8bc5e42 100644 --- a/smoke-test/requirements.txt +++ b/smoke-test/requirements.txt @@ -1,4 +1,5 @@ pytest>=6.2 pytest-dependency>=0.5.1 psutil +tenacity -e ../metadata-ingestion[datahub-rest,datahub-kafka,mysql] diff --git a/smoke-test/smoke.sh b/smoke-test/smoke.sh index 379d7465af06b5..141f0c70e17f09 100755 --- a/smoke-test/smoke.sh +++ b/smoke-test/smoke.sh @@ -22,6 +22,6 @@ pip install -r requirements.txt echo "DATAHUB_VERSION = $DATAHUB_VERSION" DATAHUB_TELEMETRY_ENABLED=false datahub docker quickstart --quickstart-compose-file ../docker/quickstart/docker-compose-without-neo4j.quickstart.yml --dump-logs-on-failure -(cd tests/cypress ; yarn install) +(cd ..; ./gradlew :smoke-test:yarnInstall) -pytest -vv --continue-on-collection-errors --junit-xml=junit.smoke.xml +pytest -rP --durations=20 -vv --continue-on-collection-errors --junit-xml=junit.smoke.xml diff --git a/smoke-test/test_e2e.py b/smoke-test/test_e2e.py index b3311884307437..8b22fff08e17db 100644 --- a/smoke-test/test_e2e.py +++ b/smoke-test/test_e2e.py @@ -1,19 +1,20 @@ import time import urllib -from contextlib import contextmanager -from typing import Optional +from typing import Any, Optional import pytest import requests -from datahub.cli.docker import check_local_docker_containers +import tenacity from datahub.ingestion.run.pipeline import Pipeline from tests.utils import ( get_frontend_url, get_gms_url, get_kafka_broker_url, + get_kafka_schema_registry, get_sleep_info, ingest_file_via_rest, + wait_for_healthcheck_util, ) bootstrap_sample_data = "../metadata-ingestion/examples/mce_files/bootstrap_mce.json" @@ -26,11 +27,12 @@ } kafka_post_ingestion_wait_sec = 60 +sleep_sec, sleep_times = get_sleep_info() + @pytest.fixture(scope="session") def wait_for_healthchecks(): - # Simply assert that everything is healthy, but don't wait. - assert not check_local_docker_containers() + wait_for_healthcheck_util() yield @@ -54,71 +56,52 @@ def frontend_session(wait_for_healthchecks): yield session -@contextmanager -def with_sleep_times( - sleep_between: Optional[int] = None, sleep_times: Optional[int] = None -): - _sleep_between, _sleep_times = get_sleep_info() - if sleep_times is None: - sleep_times = _sleep_times - while True: - try: - yield - except Exception as e: - if sleep_times > 0: - sleep_time = sleep_between or _sleep_between - sleep_times -= 1 - print( - f"Sleeping for {sleep_time}. Will sleep for {sleep_times} more if needed" - ) - time.sleep(sleep_time) - else: - raise e - finally: - break - - -def _ensure_user_present( - urn: str, sleep_between: Optional[int] = None, sleep_times: Optional[int] = None -): - with with_sleep_times(sleep_between, sleep_times): - response = requests.get( - f"{get_gms_url()}/entities/{urllib.parse.quote(urn)}", - headers={ - **restli_default_headers, - }, - ) - response.raise_for_status() - data = response.json() +@tenacity.retry( + stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) +) +def _ensure_user_present(urn: str): + response = requests.get( + f"{get_gms_url()}/entities/{urllib.parse.quote(urn)}", + headers={ + **restli_default_headers, + }, + ) + response.raise_for_status() + data = response.json() - user_key = "com.linkedin.metadata.snapshot.CorpUserSnapshot" - assert data["value"] - assert data["value"][user_key] - assert data["value"][user_key]["urn"] == urn + user_key = "com.linkedin.metadata.snapshot.CorpUserSnapshot" + assert data["value"] + assert data["value"][user_key] + assert data["value"][user_key]["urn"] == urn + return data +@tenacity.retry( + stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) +) def _ensure_dataset_present( - urn: str, sleep_between: Optional[int] = None, sleep_times: Optional[int] = None -): - with with_sleep_times(sleep_between, sleep_times): - response = requests.get( - f"{get_gms_url()}/entitiesV2?ids=List({urllib.parse.quote(urn)})&aspects=List(datasetProperties)", - headers={ - **restli_default_headers, - "X-RestLi-Method": "batch_get", - }, - ) - response.raise_for_status() - res_data = response.json() - assert res_data["results"] - assert res_data["results"][urn] - assert res_data["results"][urn]["aspects"]["datasetProperties"] + urn: str, + aspects: Optional[str] = "datasetProperties", +) -> Any: + response = requests.get( + f"{get_gms_url()}/entitiesV2?ids=List({urllib.parse.quote(urn)})&aspects=List({aspects})", + headers={ + **restli_default_headers, + "X-RestLi-Method": "batch_get", + }, + ) + response.raise_for_status() + res_data = response.json() + assert res_data["results"] + assert res_data["results"][urn] + assert res_data["results"][urn]["aspects"]["datasetProperties"] + return res_data @pytest.mark.dependency(depends=["test_healthchecks"]) def test_ingestion_via_rest(wait_for_healthchecks): ingest_file_via_rest(bootstrap_sample_data) - _ensure_user_present(urn="urn:li:corpuser:datahub", sleep_between=10, sleep_times=6) + _ensure_user_present(urn="urn:li:corpuser:datahub") @pytest.mark.dependency(depends=["test_healthchecks"]) @@ -139,6 +122,7 @@ def test_ingestion_via_kafka(wait_for_healthchecks): "config": { "connection": { "bootstrap": get_kafka_broker_url(), + "schema_registry_url": get_kafka_schema_registry(), } }, }, @@ -227,25 +211,12 @@ def test_gms_batch_get_v2(): urn1 = f"urn:li:dataset:({platform},{name_1},{env})" urn2 = f"urn:li:dataset:({platform},{name_2},{env})" - response = requests.get( - f"{get_gms_url()}/entitiesV2?ids=List({urllib.parse.quote(urn1)},{urllib.parse.quote(urn2)})&aspects=List(datasetProperties,ownership)", - headers={ - **restli_default_headers, - "X-RestLi-Method": "batch_get", - }, - ) - response.raise_for_status() - res_data = response.json() + resp1 = _ensure_dataset_present(urn1, aspects="datasetProperties,ownership") + assert resp1["results"][urn1]["aspects"]["ownership"] - # Verify both urns exist and have correct aspects - assert res_data["results"] - assert res_data["results"][urn1] - assert res_data["results"][urn1]["aspects"]["datasetProperties"] - assert res_data["results"][urn1]["aspects"]["ownership"] - assert res_data["results"][urn2] - assert res_data["results"][urn2]["aspects"]["datasetProperties"] + resp2 = _ensure_dataset_present(urn2, aspects="datasetProperties,ownership") assert ( - "ownership" not in res_data["results"][urn2]["aspects"] + "ownership" not in resp2["results"][urn2]["aspects"] ) # Aspect does not exist. @@ -1171,8 +1142,8 @@ def test_update_corp_group_properties(frontend_session): # Reset the editable properties json = { - "query": """mutation updateCorpGroupProperties($urn: String!, $input: UpdateCorpGroupPropertiesInput!) {\n - updateCorpGroupProperties(urn: $urn, input: $input) }""", + "query": """mutation updateCorpGroupProperties($urn: String!, $input: CorpGroupUpdateInput!) {\n + updateCorpGroupProperties(urn: $urn, input: $input) { urn } }""", "variables": { "urn": group_urn, "input": {"description": "", "slack": "", "email": ""}, @@ -1466,7 +1437,9 @@ def test_generate_personal_access_token(frontend_session): # Test unauthenticated case json = { "query": """query getAccessToken($input: GetAccessTokenInput!) {\n - accessToken\n + getAccessToken(input: $input) {\n + accessToken\n + }\n }""", "variables": { "input": { diff --git a/smoke-test/test_rapid.py b/smoke-test/test_rapid.py index fae575a9bac273..e6cd421e5cc915 100644 --- a/smoke-test/test_rapid.py +++ b/smoke-test/test_rapid.py @@ -2,9 +2,8 @@ import pytest import requests -from datahub.cli.docker import check_local_docker_containers -from tests.utils import get_frontend_url, ingest_file_via_rest +from tests.utils import get_frontend_url, ingest_file_via_rest, wait_for_healthcheck_util bootstrap_small = "test_resources/bootstrap_single.json" bootstrap_small_2 = "test_resources/bootstrap_single2.json" @@ -12,8 +11,7 @@ @pytest.fixture(scope="session") def wait_for_healthchecks(): - # Simply assert that everything is healthy, but don't wait. - assert not check_local_docker_containers() + wait_for_healthcheck_util() yield diff --git a/smoke-test/tests/assertions/assertions_test.py b/smoke-test/tests/assertions/assertions_test.py index 66db987e62ceb5..5e749d2214fd7a 100644 --- a/smoke-test/tests/assertions/assertions_test.py +++ b/smoke-test/tests/assertions/assertions_test.py @@ -4,7 +4,6 @@ import pytest import requests -from datahub.cli.docker import check_local_docker_containers from datahub.emitter.mce_builder import make_dataset_urn, make_schema_field_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.common import PipelineContext, RecordEnvelope @@ -24,7 +23,7 @@ PartitionSpecClass, PartitionTypeClass, ) -from tests.utils import delete_urns_from_file, get_gms_url, ingest_file_via_rest +from tests.utils import delete_urns_from_file, get_gms_url, ingest_file_via_rest, wait_for_healthcheck_util restli_default_headers = { "X-RestLi-Protocol-Version": "2.0.0", @@ -63,7 +62,6 @@ def create_test_data(test_file): 1643880726874, 1643880726875, ] - msg_ids = [] # The assertion run event attached to the dataset mcp2 = MetadataChangeProposalWrapper( entityType="assertion", @@ -233,8 +231,7 @@ def generate_test_data(tmp_path_factory): @pytest.fixture(scope="session") def wait_for_healthchecks(generate_test_data): - # Simply assert that everything is healthy, but don't wait. - assert not check_local_docker_containers() + wait_for_healthcheck_util() yield diff --git a/smoke-test/tests/cli/datahub_graph_test.py b/smoke-test/tests/cli/datahub_graph_test.py index 3728f68f8208fe..371edd66563b4a 100644 --- a/smoke-test/tests/cli/datahub_graph_test.py +++ b/smoke-test/tests/cli/datahub_graph_test.py @@ -1,7 +1,8 @@ import pytest from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph from datahub.metadata.schema_classes import KafkaSchemaClass, SchemaMetadataClass -from tests.utils import delete_urns_from_file, ingest_file_via_rest +from tests.utils import delete_urns_from_file, ingest_file_via_rest, get_gms_url + @pytest.fixture(scope="module", autouse=False) @@ -21,7 +22,7 @@ def test_healthchecks(wait_for_healthchecks): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_get_aspect_v2(frontend_session, ingest_cleanup_data): - graph: DataHubGraph = DataHubGraph(DatahubClientConfig()) + graph: DataHubGraph = DataHubGraph(DatahubClientConfig(server=get_gms_url())) urn = "urn:li:dataset:(urn:li:dataPlatform:kafka,test-rollback,PROD)" schema_metadata: SchemaMetadataClass = graph.get_aspect_v2( urn, aspect="schemaMetadata", aspect_type=SchemaMetadataClass diff --git a/smoke-test/tests/conftest.py b/smoke-test/tests/conftest.py index 946d282a2b050b..ffb6bdbc632249 100644 --- a/smoke-test/tests/conftest.py +++ b/smoke-test/tests/conftest.py @@ -2,9 +2,8 @@ import pytest import requests -from datahub.cli.docker import check_local_docker_containers -from tests.utils import get_frontend_url +from tests.utils import get_frontend_url, wait_for_healthcheck_util # Disable telemetry os.putenv("DATAHUB_TELEMETRY_ENABLED", "false") @@ -12,8 +11,7 @@ @pytest.fixture(scope="session") def wait_for_healthchecks(): - # Simply assert that everything is healthy, but don't wait. - assert not check_local_docker_containers() + wait_for_healthcheck_util() yield diff --git a/smoke-test/tests/cypress/integration_test.py b/smoke-test/tests/cypress/integration_test.py index 1c756aab9e680e..2d0ddb3cd14d11 100644 --- a/smoke-test/tests/cypress/integration_test.py +++ b/smoke-test/tests/cypress/integration_test.py @@ -24,10 +24,10 @@ def test_run_cypress(frontend_session, wait_for_healthchecks): record_key = os.getenv("CYPRESS_RECORD_KEY") if record_key: print('Running Cypress tests with recording') - command = f"npx cypress run --record" + command = f"NO_COLOR=1 npx cypress run --record" else: print('Running Cypress tests without recording') - command = f"npx cypress run" + command = f"NO_COLOR=1 npx cypress run" proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd="tests/cypress") stdout = proc.stdout.read() stderr = proc.stderr.read() diff --git a/smoke-test/tests/delete/delete_test.py b/smoke-test/tests/delete/delete_test.py index a7dde49487ebdd..e884da146c4032 100644 --- a/smoke-test/tests/delete/delete_test.py +++ b/smoke-test/tests/delete/delete_test.py @@ -2,27 +2,27 @@ import json import pytest from time import sleep -from datahub.cli import delete_cli, ingest_cli -from datahub.cli.docker import check_local_docker_containers -from datahub.cli.cli_utils import guess_entity_type, post_entity, get_aspects_for_entity +from datahub.cli.cli_utils import get_aspects_for_entity from datahub.cli.ingest_cli import get_session_and_host -from datahub.cli.delete_cli import guess_entity_type, delete_one_urn_cmd, delete_references -from tests.utils import ingest_file_via_rest, delete_urns_from_file +from datahub.cli.delete_cli import delete_references +from tests.utils import ingest_file_via_rest, wait_for_healthcheck_util # Disable telemetry os.putenv("DATAHUB_TELEMETRY_ENABLED", "false") + @pytest.fixture(scope="session") def wait_for_healthchecks(): - # Simply assert that everything is healthy, but don't wait. - assert not check_local_docker_containers() + wait_for_healthcheck_util() yield + @pytest.mark.dependency() def test_healthchecks(wait_for_healthchecks): # Call to wait_for_healthchecks fixture will do the actual functionality. pass + @pytest.fixture(autouse=True) def test_setup(): """Fixture to execute asserts before and after a test is run""" @@ -53,6 +53,7 @@ def test_setup(): assert "browsePaths" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False) assert "editableDatasetProperties" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False) + @pytest.mark.dependency() def test_delete_reference(depends=["test_healthchecks"]): platform = "urn:li:dataPlatform:kafka" diff --git a/smoke-test/tests/domains/domains_test.py b/smoke-test/tests/domains/domains_test.py index 0e935c244ed091..50f447f80a4065 100644 --- a/smoke-test/tests/domains/domains_test.py +++ b/smoke-test/tests/domains/domains_test.py @@ -1,13 +1,17 @@ import time import pytest +import tenacity from tests.utils import ( delete_urns_from_file, get_frontend_url, get_gms_url, ingest_file_via_rest, + get_sleep_info, ) +sleep_sec, sleep_times = get_sleep_info() + @pytest.fixture(scope="module", autouse=False) def ingest_cleanup_data(request): @@ -24,6 +28,30 @@ def test_healthchecks(wait_for_healthchecks): pass +@tenacity.retry( + stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) +) +def _ensure_more_domains(frontend_session, list_domains_json, before_count): + time.sleep(2) + + # Get new count of Domains + response = frontend_session.post( + f"{get_frontend_url()}/api/v2/graphql", json=list_domains_json + ) + response.raise_for_status() + res_data = response.json() + + assert res_data + assert res_data["data"] + assert res_data["data"]["listDomains"]["total"] is not None + assert "errors" not in res_data + + # Assert that there are more domains now. + after_count = res_data["data"]["listDomains"]["total"] + print(f"after_count is {after_count}") + assert after_count == before_count + 1 + + @pytest.mark.dependency(depends=["test_healthchecks"]) def test_create_list_get_domain(frontend_session): @@ -55,9 +83,10 @@ def test_create_list_get_domain(frontend_session): assert res_data["data"] assert res_data["data"]["listDomains"]["total"] is not None assert "errors" not in res_data + print(f"domains resp is {res_data}") before_count = res_data["data"]["listDomains"]["total"] - print(before_count) + print(f"before_count is {before_count}") domain_id = "test id" domain_name = "test name" @@ -90,25 +119,11 @@ def test_create_list_get_domain(frontend_session): domain_urn = res_data["data"]["createDomain"] - # Sleep for eventual consistency (not ideal) - time.sleep(2) - - # Get new count of Domains - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=list_domains_json + _ensure_more_domains( + frontend_session=frontend_session, + list_domains_json=list_domains_json, + before_count=before_count, ) - response.raise_for_status() - res_data = response.json() - - assert res_data - assert res_data["data"] - assert res_data["data"]["listDomains"]["total"] is not None - assert "errors" not in res_data - - # Assert that there are more domains now. - after_count = res_data["data"]["listDomains"]["total"] - print(after_count) - assert after_count == before_count + 1 # Get the domain value back get_domain_json = { diff --git a/smoke-test/tests/managed-ingestion/managed_ingestion_test.py b/smoke-test/tests/managed-ingestion/managed_ingestion_test.py index 47a0f0e2e5f2e1..837e27f72cd630 100644 --- a/smoke-test/tests/managed-ingestion/managed_ingestion_test.py +++ b/smoke-test/tests/managed-ingestion/managed_ingestion_test.py @@ -1,10 +1,22 @@ import time import pytest -from tests.utils import get_frontend_url +from tests.utils import get_frontend_url, wait_for_healthcheck_util -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) +@pytest.fixture(scope="session") +def wait_for_healthchecks(): + wait_for_healthcheck_util() + yield + + +@pytest.mark.dependency() +def test_healthchecks(wait_for_healthchecks): + # Call to wait_for_healthchecks fixture will do the actual functionality. + pass + + +@pytest.mark.dependency(depends=["test_healthchecks"]) def test_create_list_get_remove_secret(frontend_session): # Get count of existing secrets @@ -155,7 +167,7 @@ def test_create_list_get_remove_secret(frontend_session): assert len(secret_value_arr) == 0 -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) +@pytest.mark.dependency(depends=["test_healthchecks"]) def test_create_list_get_remove_ingestion_source(frontend_session): # Get count of existing ingestion sources @@ -338,7 +350,6 @@ def test_create_list_get_remove_ingestion_source(frontend_session): @pytest.mark.dependency( depends=[ "test_healthchecks", - "test_run_ingestion", "test_create_list_get_remove_ingestion_source", ] ) diff --git a/smoke-test/tests/policies/test_policies.py b/smoke-test/tests/policies/test_policies.py index b94765a75b0a44..175a40d6f159b5 100644 --- a/smoke-test/tests/policies/test_policies.py +++ b/smoke-test/tests/policies/test_policies.py @@ -1,15 +1,14 @@ import time import pytest import requests -from tests.utils import get_frontend_url -from datahub.cli.docker import check_local_docker_containers +from tests.utils import get_frontend_url, wait_for_healthcheck_util TEST_POLICY_NAME = "Updated Platform Policy" + @pytest.fixture(scope="session") def wait_for_healthchecks(): - # Simply assert that everything is healthy, but don't wait. - assert not check_local_docker_containers() + wait_for_healthcheck_util() yield diff --git a/smoke-test/tests/tags-and-terms/tags_and_terms_test.py b/smoke-test/tests/tags-and-terms/tags_and_terms_test.py index a8e315801fdc06..b0ca29b544cfef 100644 --- a/smoke-test/tests/tags-and-terms/tags_and_terms_test.py +++ b/smoke-test/tests/tags-and-terms/tags_and_terms_test.py @@ -1,5 +1,5 @@ import pytest -from tests.utils import delete_urns_from_file, get_frontend_url, ingest_file_via_rest +from tests.utils import delete_urns_from_file, get_frontend_url, ingest_file_via_rest, wait_for_healthcheck_util @pytest.fixture(scope="module", autouse=True) @@ -11,8 +11,20 @@ def ingest_cleanup_data(request): delete_urns_from_file("tests/tags-and-terms/data.json") -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_add_tag(frontend_session, wait_for_healthchecks): +@pytest.fixture(scope="session") +def wait_for_healthchecks(): + wait_for_healthcheck_util() + yield + + +@pytest.mark.dependency() +def test_healthchecks(wait_for_healthchecks): + # Call to wait_for_healthchecks fixture will do the actual functionality. + pass + + +@pytest.mark.dependency(depends=["test_healthchecks"]) +def test_add_tag(frontend_session): platform = "urn:li:dataPlatform:kafka" dataset_name = "test-tags-terms-sample-kafka" env = "PROD" @@ -45,7 +57,7 @@ def test_add_tag(frontend_session, wait_for_healthchecks): assert res_data assert res_data["data"] assert res_data["data"]["dataset"] - assert res_data["data"]["dataset"]["globalTags"] == None + assert res_data["data"]["dataset"]["globalTags"] is None add_json = { "query": """mutation addTag($input: TagAssociationInput!) {\n @@ -128,8 +140,8 @@ def test_add_tag(frontend_session, wait_for_healthchecks): assert res_data["data"]["dataset"]["globalTags"] == {"tags": []} -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_add_tag_to_chart(frontend_session, wait_for_healthchecks): +@pytest.mark.dependency(depends=["test_healthchecks"]) +def test_add_tag_to_chart(frontend_session): chart_urn = "urn:li:chart:(looker,test-tags-terms-sample-chart)" chart_json = { @@ -159,7 +171,7 @@ def test_add_tag_to_chart(frontend_session, wait_for_healthchecks): assert res_data assert res_data["data"] assert res_data["data"]["chart"] - assert res_data["data"]["chart"]["globalTags"] == None + assert res_data["data"]["chart"]["globalTags"] is None add_json = { "query": """mutation addTag($input: TagAssociationInput!) {\n @@ -240,8 +252,8 @@ def test_add_tag_to_chart(frontend_session, wait_for_healthchecks): assert res_data["data"]["chart"]["globalTags"] == {"tags": []} -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_add_term(frontend_session, wait_for_healthchecks): +@pytest.mark.dependency(depends=["test_healthchecks"]) +def test_add_term(frontend_session): platform = "urn:li:dataPlatform:kafka" dataset_name = "test-tags-terms-sample-kafka" env = "PROD" @@ -273,7 +285,7 @@ def test_add_term(frontend_session, wait_for_healthchecks): assert res_data assert res_data["data"] assert res_data["data"]["dataset"] - assert res_data["data"]["dataset"]["glossaryTerms"] == None + assert res_data["data"]["dataset"]["glossaryTerms"] is None add_json = { "query": """mutation addTerm($input: TermAssociationInput!) {\n @@ -356,8 +368,8 @@ def test_add_term(frontend_session, wait_for_healthchecks): assert res_data["data"]["dataset"]["glossaryTerms"] == {"terms": []} -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_update_schemafield(frontend_session, wait_for_healthchecks): +@pytest.mark.dependency(depends=["test_healthchecks"]) +def test_update_schemafield(frontend_session): platform = "urn:li:dataPlatform:kafka" dataset_name = "test-tags-terms-sample-kafka" env = "PROD" @@ -445,7 +457,7 @@ def test_update_schemafield(frontend_session, wait_for_healthchecks): assert res_data assert res_data["data"] assert res_data["data"]["dataset"] - assert res_data["data"]["dataset"]["editableSchemaMetadata"] == None + assert res_data["data"]["dataset"]["editableSchemaMetadata"] is None add_json = { "query": """mutation addTag($input: TagAssociationInput!) {\n diff --git a/smoke-test/tests/test_stateful_ingestion.py b/smoke-test/tests/test_stateful_ingestion.py index 1cf39e59fb7c8c..cfd311578160d1 100644 --- a/smoke-test/tests/test_stateful_ingestion.py +++ b/smoke-test/tests/test_stateful_ingestion.py @@ -8,7 +8,7 @@ from sqlalchemy import create_engine from sqlalchemy.sql import text -from tests.utils import get_gms_url +from tests.utils import get_gms_url, get_mysql_url, get_mysql_username, get_mysql_password def test_stateful_ingestion(wait_for_healthchecks): @@ -50,8 +50,9 @@ def get_current_checkpoint_from_pipeline( ) source_config_dict: Dict[str, Any] = { - "username": "datahub", - "password": "datahub", + "host_port": get_mysql_url(), + "username": get_mysql_username(), + "password": get_mysql_password(), "database": "datahub", "stateful_ingestion": { "enabled": True, diff --git a/smoke-test/tests/tokens/revokable_access_token_test.py b/smoke-test/tests/tokens/revokable_access_token_test.py index 4a20105787d66b..ff958f3e7da484 100644 --- a/smoke-test/tests/tokens/revokable_access_token_test.py +++ b/smoke-test/tests/tokens/revokable_access_token_test.py @@ -3,9 +3,7 @@ import requests from time import sleep -from datahub.cli.docker import check_local_docker_containers -from datahub.cli.ingest_cli import get_session_and_host -from tests.utils import get_frontend_url +from tests.utils import get_frontend_url, wait_for_healthcheck_util # Disable telemetry os.putenv("DATAHUB_TELEMETRY_ENABLED", "false") @@ -13,8 +11,7 @@ @pytest.fixture(scope="session") def wait_for_healthchecks(): - # Simply assert that everything is healthy, but don't wait. - assert not check_local_docker_containers() + wait_for_healthcheck_util() yield @pytest.mark.dependency() diff --git a/smoke-test/tests/utils.py b/smoke-test/tests/utils.py index 350b3a06c9931c..731f0b3d8ff6c1 100644 --- a/smoke-test/tests/utils.py +++ b/smoke-test/tests/utils.py @@ -1,10 +1,11 @@ import json import os -from typing import Any +from typing import Any, Tuple import requests from datahub.cli import cli_utils from datahub.ingestion.run.pipeline import Pipeline +from datahub.cli.docker import check_local_docker_containers def get_gms_url(): @@ -19,13 +20,54 @@ def get_kafka_broker_url(): return os.getenv("DATAHUB_KAFKA_URL") or "localhost:9092" -def get_sleep_info(): +def get_kafka_schema_registry(): + return os.getenv("DATAHUB_KAFKA_SCHEMA_REGISTRY_URL") or "http://localhost:8081" + + +def get_mysql_url(): + return os.getenv("DATAHUB_MYSQL_URL") or "localhost:3306" + + +def get_mysql_username(): + return os.getenv("DATAHUB_MYSQL_USERNAME") or "datahub" + + +def get_mysql_password(): + return os.getenv("DATAHUB_MYSQL_PASSWORD") or "datahub" + + +def get_sleep_info() -> Tuple[int, int]: return ( - os.environ.get("DATAHUB_TEST_SLEEP_BETWEEN") or 60, - os.environ.get("DATAHUB_TEST_SLEEP_TIMES") or 5, + int(os.getenv("DATAHUB_TEST_SLEEP_BETWEEN", 60)), + int(os.getenv("DATAHUB_TEST_SLEEP_TIMES", 5)), ) +def is_k8s_enabled(): + return os.getenv("K8S_CLUSTER_ENABLED", "false").lower() in ["true", "yes"] + + +def wait_for_healthcheck_util(): + if is_k8s_enabled(): + # Simply assert that kubernetes endpoints are healthy, but don't wait. + assert not check_k8s_endpoint(f"{get_frontend_url()}/admin") + assert not check_k8s_endpoint(f"{get_gms_url()}/health") + else: + # Simply assert that docker is healthy, but don't wait. + assert not check_local_docker_containers() + + +def check_k8s_endpoint(url): + try: + get = requests.get(url) + if get.status_code == 200: + return + else: + return f"{url}: is Not reachable, status_code: {get.status_code}" + except requests.exceptions.RequestException as e: + raise SystemExit(f"{url}: is Not reachable \nErr: {e}") + + def ingest_file_via_rest(filename: str) -> Any: pipeline = Pipeline.create( { From 8d4b7cf8a12a3f919a42d2b00f045e8c78dcbd45 Mon Sep 17 00:00:00 2001 From: neojunjie Date: Fri, 15 Jul 2022 00:52:51 +0800 Subject: [PATCH 18/22] fix(PlayCookie) PLAY_TOKEN cookie rejected because userprofile exceeds 4096 chars (#5114) --- build.gradle | 1 + datahub-frontend/app/auth/AuthModule.java | 50 ++++++++++++++++------- datahub-frontend/conf/application.conf | 5 +++ datahub-frontend/play.gradle | 1 + docs/debugging.md | 15 +++++++ 5 files changed, 57 insertions(+), 15 deletions(-) diff --git a/build.gradle b/build.gradle index 4db72083a926f7..6914bd1f3606ea 100644 --- a/build.gradle +++ b/build.gradle @@ -116,6 +116,7 @@ project.ext.externalDependency = [ 'parquet': 'org.apache.parquet:parquet-avro:1.12.2', 'picocli': 'info.picocli:picocli:4.5.0', 'playCache': 'com.typesafe.play:play-cache_2.12:2.7.6', + 'playEhcache': 'com.typesafe.play:play-ehcache_2.12:2.7.6', 'playWs': 'com.typesafe.play:play-ahc-ws-standalone_2.12:2.0.8', 'playDocs': 'com.typesafe.play:play-docs_2.12:2.7.6', 'playGuice': 'com.typesafe.play:play-guice_2.12:2.7.6', diff --git a/datahub-frontend/app/auth/AuthModule.java b/datahub-frontend/app/auth/AuthModule.java index 3358998acde2e2..0773d95b45cf58 100644 --- a/datahub-frontend/app/auth/AuthModule.java +++ b/datahub-frontend/app/auth/AuthModule.java @@ -20,6 +20,7 @@ import org.pac4j.core.context.session.SessionStore; import org.pac4j.play.LogoutController; import org.pac4j.play.http.PlayHttpActionAdapter; +import org.pac4j.play.store.PlayCacheSessionStore; import org.pac4j.play.store.PlayCookieSessionStore; import org.pac4j.play.store.PlaySessionStore; import org.pac4j.play.store.ShiroAesDataEncrypter; @@ -32,6 +33,7 @@ import auth.sso.SsoConfigs; import auth.sso.SsoManager; import controllers.SsoCallbackController; +import play.cache.SyncCacheApi; import utils.ConfigUtil; import static auth.AuthUtils.*; @@ -51,6 +53,8 @@ public class AuthModule extends AbstractModule { * We hash this value (SHA1), then take the first 16 bytes as the AES key. */ private static final String PAC4J_AES_KEY_BASE_CONF = "play.http.secret.key"; + private static final String PAC4J_SESSIONSTORE_PROVIDER_CONF = "pac4j.sessionStore.provider"; + private final com.typesafe.config.Config _configs; public AuthModule(final Environment environment, final com.typesafe.config.Config configs) { @@ -59,22 +63,38 @@ public AuthModule(final Environment environment, final com.typesafe.config.Confi @Override protected void configure() { - PlayCookieSessionStore playCacheCookieStore; - try { - // To generate a valid encryption key from an input value, we first - // hash the input to generate a fixed-length string. Then, we convert - // it to hex and slice the first 16 bytes, because AES key length must strictly - // have a specific length. - final String aesKeyBase = _configs.getString(PAC4J_AES_KEY_BASE_CONF); - final String aesKeyHash = DigestUtils.sha1Hex(aesKeyBase.getBytes(StandardCharsets.UTF_8)); - final String aesEncryptionKey = aesKeyHash.substring(0, 16); - playCacheCookieStore = new PlayCookieSessionStore( - new ShiroAesDataEncrypter(aesEncryptionKey)); - } catch (Exception e) { - throw new RuntimeException("Failed to instantiate Pac4j cookie session store!", e); + /** + * In Pac4J, you are given the option to store the profiles of authenticated users in either + * (i) PlayCacheSessionStore - saves your data in the Play cache or + * (ii) PlayCookieSessionStore saves your data in the Play session cookie + * However there is problem (https://github.com/datahub-project/datahub/issues/4448) observed when storing the Pac4j profile in cookie. + * Whenever the profile returned by Pac4j is greater than 4096 characters, the response will be rejected by the browser. + * Default to PlayCacheCookieStore so that datahub-frontend container remains as a stateless service + */ + String sessionStoreProvider = _configs.getString(PAC4J_SESSIONSTORE_PROVIDER_CONF); + + if (sessionStoreProvider.equals("PlayCacheSessionStore")) { + final PlayCacheSessionStore playCacheSessionStore = new PlayCacheSessionStore(getProvider(SyncCacheApi.class)); + bind(SessionStore.class).toInstance(playCacheSessionStore); + bind(PlaySessionStore.class).toInstance(playCacheSessionStore); + } else { + PlayCookieSessionStore playCacheCookieStore; + try { + // To generate a valid encryption key from an input value, we first + // hash the input to generate a fixed-length string. Then, we convert + // it to hex and slice the first 16 bytes, because AES key length must strictly + // have a specific length. + final String aesKeyBase = _configs.getString(PAC4J_AES_KEY_BASE_CONF); + final String aesKeyHash = DigestUtils.sha1Hex(aesKeyBase.getBytes(StandardCharsets.UTF_8)); + final String aesEncryptionKey = aesKeyHash.substring(0, 16); + playCacheCookieStore = new PlayCookieSessionStore( + new ShiroAesDataEncrypter(aesEncryptionKey)); + } catch (Exception e) { + throw new RuntimeException("Failed to instantiate Pac4j cookie session store!", e); + } + bind(SessionStore.class).toInstance(playCacheCookieStore); + bind(PlaySessionStore.class).toInstance(playCacheCookieStore); } - bind(SessionStore.class).toInstance(playCacheCookieStore); - bind(PlaySessionStore.class).toInstance(playCacheCookieStore); try { bind(SsoCallbackController.class).toConstructor(SsoCallbackController.class.getConstructor( diff --git a/datahub-frontend/conf/application.conf b/datahub-frontend/conf/application.conf index 254a6f0531e4d6..354e8283e2bae9 100644 --- a/datahub-frontend/conf/application.conf +++ b/datahub-frontend/conf/application.conf @@ -39,6 +39,11 @@ play.http.server.akka.max-header-count = ${?DATAHUB_AKKA_MAX_HEADER_COUNT} play.server.akka.max-header-value-length = 8k play.server.akka.max-header-value-length = ${?DATAHUB_AKKA_MAX_HEADER_VALUE_LENGTH} +# pac4j configuration +# default to PlayCookieSessionStore to keep datahub-frontend's statelessness +pac4j.sessionStore.provider= "PlayCookieSessionStore" +pac4j.sessionStore.provider= ${?PAC4J_SESSIONSTORE_PROVIDER} + # Database configuration # ~~~~~ # You can declare as many datasources as you want. diff --git a/datahub-frontend/play.gradle b/datahub-frontend/play.gradle index 27eff34742899e..80892c59e9f4ec 100644 --- a/datahub-frontend/play.gradle +++ b/datahub-frontend/play.gradle @@ -48,6 +48,7 @@ dependencies { implementation externalDependency.playPac4j implementation externalDependency.shiroCore implementation externalDependency.playCache + implementation externalDependency.playEhcache implementation externalDependency.playWs implementation externalDependency.playServer implementation externalDependency.playAkkaHttpServer diff --git a/docs/debugging.md b/docs/debugging.md index 3697ee508d48ed..5396423ce1e3ba 100644 --- a/docs/debugging.md +++ b/docs/debugging.md @@ -205,3 +205,18 @@ You'll need to ingest some metadata of the following form to see it inside the D "proposedDelta": null } ``` + +## I've configured OIDC, but I cannot login. I get continuously redirected. What do I do? + +Sorry to hear that! + +This phenomena may be due to the size of a Cookie DataHub uses to authenticate its users. If it's too large ( > 4096), then you'll see this behavior. The cookie embeds an encoded version of the information returned by your OIDC Identity Provider - if they return a lot of information, this can be the root cause. + +One solution is to use Play Cache to persist this session information for a user. This means the attributes about the user (and their session info) will be stored in an in-memory store in the `datahub-frontend` service, instead of a browser-side cookie. + +To configure the Play Cache session store, you can set the env variable "PAC4J_SESSIONSTORE_PROVIDER" as "PlayCacheSessionStore" for the `datahub-frontend` container. + +Do note that there are downsides to using the Play Cache. Specifically, it will make `datahub-frontend` a stateful server. If you have multiple instances of `datahub-frontend` deployed, you'll need to ensure that the same user is deterministically routed to the same service container (since the sessions are stored in memory). If you're using a single instance of `datahub-frontend` (the default), then things should "just work". + +For more details, please refer to https://github.com/datahub-project/datahub/pull/5114 + From a0303448bac2c00d91fb3ee8c58663b1ae7265a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20L=C3=BCdin?= <13187726+Masterchen09@users.noreply.github.com> Date: Thu, 14 Jul 2022 18:54:02 +0200 Subject: [PATCH 19/22] feat(dashboards): add datasets field to DashboardInfo aspect (#5188) Co-authored-by: John Joyce --- .../app/entity/dashboard/DashboardEntity.tsx | 13 ++++++++++++- .../tabs/Entity/DashboardDatasetsTab.tsx | 19 +++++++++++++++++++ .../src/graphql/dashboard.graphql | 3 +++ .../integration/looker/expected_output.json | 1 + .../looker/golden_test_allow_ingest.json | 1 + .../looker/golden_test_ingest.json | 1 + .../looker/golden_test_ingest_joins.json | 1 + .../golden_test_ingest_unaliased_joins.json | 1 + .../metabase/metabase_mces_golden.json | 1 + .../integration/mode/mode_mces_golden.json | 1 + .../powerbi/golden_test_ingest.json | 2 +- .../tableau/tableau_mces_golden.json | 4 ++++ .../tests/unit/test_redash_source.py | 1 + .../com/linkedin/dashboard/DashboardInfo.pdl | 13 +++++++++++++ .../com.linkedin.entity.aspects.snapshot.json | 15 +++++++++++++++ ...com.linkedin.entity.entities.snapshot.json | 15 +++++++++++++++ .../com.linkedin.entity.runs.snapshot.json | 15 +++++++++++++++ ...m.linkedin.platform.platform.snapshot.json | 15 +++++++++++++++ 18 files changed, 120 insertions(+), 2 deletions(-) create mode 100644 datahub-web-react/src/app/entity/shared/tabs/Entity/DashboardDatasetsTab.tsx diff --git a/datahub-web-react/src/app/entity/dashboard/DashboardEntity.tsx b/datahub-web-react/src/app/entity/dashboard/DashboardEntity.tsx index 7a90826b711cf1..923263863d8108 100644 --- a/datahub-web-react/src/app/entity/dashboard/DashboardEntity.tsx +++ b/datahub-web-react/src/app/entity/dashboard/DashboardEntity.tsx @@ -13,6 +13,7 @@ import { SidebarAboutSection } from '../shared/containers/profile/sidebar/Sideba import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection'; import { DocumentationTab } from '../shared/tabs/Documentation/DocumentationTab'; import { DashboardChartsTab } from '../shared/tabs/Entity/DashboardChartsTab'; +import { DashboardDatasetsTab } from '../shared/tabs/Entity/DashboardDatasetsTab'; import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; import { GenericEntityProperties } from '../shared/types'; import { DashboardPreview } from './preview/DashboardPreview'; @@ -100,10 +101,20 @@ export class DashboardEntity implements Entity { name: 'Charts', component: DashboardChartsTab, display: { - visible: (_, _1) => true, + visible: (_, dashboard: GetDashboardQuery) => + (dashboard?.dashboard?.charts?.total || 0) > 0 || + (dashboard?.dashboard?.datasets?.total || 0) === 0, enabled: (_, dashboard: GetDashboardQuery) => (dashboard?.dashboard?.charts?.total || 0) > 0, }, }, + { + name: 'Datasets', + component: DashboardDatasetsTab, + display: { + visible: (_, dashboard: GetDashboardQuery) => (dashboard?.dashboard?.datasets?.total || 0) > 0, + enabled: (_, dashboard: GetDashboardQuery) => (dashboard?.dashboard?.datasets?.total || 0) > 0, + }, + }, ]} sidebarSections={[ { diff --git a/datahub-web-react/src/app/entity/shared/tabs/Entity/DashboardDatasetsTab.tsx b/datahub-web-react/src/app/entity/shared/tabs/Entity/DashboardDatasetsTab.tsx new file mode 100644 index 00000000000000..62d612b0df4df2 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Entity/DashboardDatasetsTab.tsx @@ -0,0 +1,19 @@ +import React from 'react'; +import { useBaseEntity } from '../../EntityContext'; +import { EntityType } from '../../../../../types.generated'; +import { EntityList } from './components/EntityList'; +import { useEntityRegistry } from '../../../../useEntityRegistry'; + +export const DashboardDatasetsTab = () => { + const entity = useBaseEntity() as any; + const dashboard = entity && entity.dashboard; + const datasets = dashboard?.datasets?.relationships.map((relationship) => relationship.entity); + const entityRegistry = useEntityRegistry(); + const totalDatasets = dashboard?.datasets?.total || 0; + const title = `Consumes ${totalDatasets} ${ + totalDatasets === 1 + ? entityRegistry.getEntityName(EntityType.Dataset) + : entityRegistry.getCollectionName(EntityType.Dataset) + }`; + return ; +}; diff --git a/datahub-web-react/src/graphql/dashboard.graphql b/datahub-web-react/src/graphql/dashboard.graphql index e9e1412f5eeab0..6da6264f14df8c 100644 --- a/datahub-web-react/src/graphql/dashboard.graphql +++ b/datahub-web-react/src/graphql/dashboard.graphql @@ -4,6 +4,9 @@ query getDashboard($urn: String!) { charts: relationships(input: { types: ["Contains"], direction: OUTGOING, start: 0, count: 100 }) { ...fullRelationshipResults } + datasets: relationships(input: { types: ["Consumes"], direction: OUTGOING, start: 0, count: 100 }) { + ...fullRelationshipResults + } upstream: lineage(input: { direction: UPSTREAM, start: 0, count: 100 }) { ...partialLineageResults } diff --git a/metadata-ingestion/tests/integration/looker/expected_output.json b/metadata-ingestion/tests/integration/looker/expected_output.json index f4e272e04ceac1..96393ce4b531a3 100644 --- a/metadata-ingestion/tests/integration/looker/expected_output.json +++ b/metadata-ingestion/tests/integration/looker/expected_output.json @@ -56,6 +56,7 @@ "charts": [ "urn:li:chart:(looker,dashboard_elements.2)" ], + "datasets": [], "lastModified": { "created": { "time": 0, diff --git a/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json index 4f031c7010a1db..df0190b8e72984 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json @@ -12,6 +12,7 @@ "title": "foo", "description": "lorem ipsum", "charts": [], + "datasets": [], "lastModified": { "created": { "time": 1586847600000, diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json index 4f031c7010a1db..df0190b8e72984 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json @@ -12,6 +12,7 @@ "title": "foo", "description": "lorem ipsum", "charts": [], + "datasets": [], "lastModified": { "created": { "time": 1586847600000, diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json index baaa64100bdb80..35bf69572d2a15 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json @@ -12,6 +12,7 @@ "title": "foo", "description": "lorem ipsum", "charts": [], + "datasets": [], "lastModified": { "created": { "time": 1586847600000, diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json index af3b7ab3eb48ed..4438c07bb0ad46 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json @@ -12,6 +12,7 @@ "title": "foo", "description": "lorem ipsum", "charts": [], + "datasets": [], "lastModified": { "created": { "time": 1586847600000, diff --git a/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json b/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json index 127988ba381d7b..3029f815e58bb8 100644 --- a/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json +++ b/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json @@ -15,6 +15,7 @@ "urn:li:chart:(metabase,1)", "urn:li:chart:(metabase,2)" ], + "datasets": [], "lastModified": { "created": { "time": 1639417721742, diff --git a/metadata-ingestion/tests/integration/mode/mode_mces_golden.json b/metadata-ingestion/tests/integration/mode/mode_mces_golden.json index 16e9bd0db56226..003a74ed0a6d12 100644 --- a/metadata-ingestion/tests/integration/mode/mode_mces_golden.json +++ b/metadata-ingestion/tests/integration/mode/mode_mces_golden.json @@ -14,6 +14,7 @@ "charts": [ "urn:li:chart:(mode,f622b9ee725b)" ], + "datasets": [], "lastModified": { "created": { "time": 1639169724316, diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json index f7f8f285c1c4cb..de37604b3ebbe0 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_ingest.json @@ -311,7 +311,7 @@ "changeType": "UPSERT", "aspectName": "dashboardInfo", "aspect": { - "value": "{\"customProperties\": {\"chartCount\": \"1\", \"workspaceName\": \"foo\", \"workspaceId\": \"7D668CAD-7FFC-4505-9215-655BCA5BEBAE\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\"], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}", + "value": "{\"customProperties\": {\"chartCount\": \"1\", \"workspaceName\": \"foo\", \"workspaceId\": \"7D668CAD-7FFC-4505-9215-655BCA5BEBAE\"}, \"title\": \"test_dashboard\", \"description\": \"test_dashboard\", \"charts\": [\"urn:li:chart:(powerbi,charts.B8E293DC-0C83-4AA0-9BB9-0A8738DF24A0)\"], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"https://localhost/dashboards/web/1\"}", "contentType": "application/json" }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/tableau/tableau_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_mces_golden.json index f19c9432bcf079..db69d5cbdc365e 100644 --- a/metadata-ingestion/tests/integration/tableau/tableau_mces_golden.json +++ b/metadata-ingestion/tests/integration/tableau/tableau_mces_golden.json @@ -514,6 +514,7 @@ "urn:li:chart:(tableau,692a2da4-2a82-32c1-f713-63b8e4325d86)", "urn:li:chart:(tableau,f4317efd-c3e6-6ace-8fe6-e71b590bbbcc)" ], + "datasets": [], "lastModified": { "created": { "time": 1640200234000, @@ -3283,6 +3284,7 @@ "charts": [ "urn:li:chart:(tableau,8a6a269a-d6de-fae4-5050-513255b40ffc)" ], + "datasets": [], "lastModified": { "created": { "time": 1639773866000, @@ -3368,6 +3370,7 @@ "title": "Story 1", "description": "", "charts": [], + "datasets": [], "lastModified": { "created": { "time": 1639773866000, @@ -7311,6 +7314,7 @@ "urn:li:chart:(tableau,e70a540d-55ed-b9cc-5a3c-01ebe81a1274)", "urn:li:chart:(tableau,f76d3570-23b8-f74b-d85c-cc5484c2079c)" ], + "datasets": [], "lastModified": { "created": { "time": 1639768450000, diff --git a/metadata-ingestion/tests/unit/test_redash_source.py b/metadata-ingestion/tests/unit/test_redash_source.py index 95d368a9498dbf..70e6f816949c4f 100644 --- a/metadata-ingestion/tests/unit/test_redash_source.py +++ b/metadata-ingestion/tests/unit/test_redash_source.py @@ -487,6 +487,7 @@ def test_get_dashboard_snapshot(): "urn:li:chart:(redash,9)", "urn:li:chart:(redash,8)", ], + datasets=[], lastModified=ChangeAuditStamps( created=AuditStamp( time=1628882055288, actor="urn:li:corpuser:unknown" diff --git a/metadata-models/src/main/pegasus/com/linkedin/dashboard/DashboardInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/dashboard/DashboardInfo.pdl index 10549227213c42..bf498f5af62445 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/dashboard/DashboardInfo.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/dashboard/DashboardInfo.pdl @@ -5,6 +5,7 @@ import com.linkedin.common.ChangeAuditStamps import com.linkedin.common.ChartUrn import com.linkedin.common.Time import com.linkedin.common.Url +import com.linkedin.common.Urn import com.linkedin.common.CustomProperties import com.linkedin.common.ExternalReference @@ -47,6 +48,18 @@ record DashboardInfo includes CustomProperties, ExternalReference { } charts: array[ChartUrn] = [ ] + /** + * Datasets consumed by a dashboard + */ + @Relationship = { + "/*": { + "name": "Consumes", + "entityTypes": [ "dataset" ], + "isLineage": true + } + } + datasets: array[Urn] = [ ] + /** * Captures information about who created/last modified/deleted this dashboard and when */ diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index d815eaa2d77b1b..927dfabaa12ea5 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -1200,6 +1200,21 @@ "name" : "Contains" } } + }, { + "name" : "datasets", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "Datasets consumed by a dashboard", + "default" : [ ], + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataset" ], + "isLineage" : true, + "name" : "Consumes" + } + } }, { "name" : "lastModified", "type" : "com.linkedin.common.ChangeAuditStamps", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 3a7dee437cfc61..365a1d1a597a20 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -1217,6 +1217,21 @@ "name" : "Contains" } } + }, { + "name" : "datasets", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "Datasets consumed by a dashboard", + "default" : [ ], + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataset" ], + "isLineage" : true, + "name" : "Consumes" + } + } }, { "name" : "lastModified", "type" : "com.linkedin.common.ChangeAuditStamps", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json index f3d81d99fe4f8a..82c14d3cb3fe63 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json @@ -960,6 +960,21 @@ "name" : "Contains" } } + }, { + "name" : "datasets", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "Datasets consumed by a dashboard", + "default" : [ ], + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataset" ], + "isLineage" : true, + "name" : "Consumes" + } + } }, { "name" : "lastModified", "type" : "com.linkedin.common.ChangeAuditStamps", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json index e787f5bd4551d6..96cc658d201729 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json @@ -1217,6 +1217,21 @@ "name" : "Contains" } } + }, { + "name" : "datasets", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "Datasets consumed by a dashboard", + "default" : [ ], + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataset" ], + "isLineage" : true, + "name" : "Consumes" + } + } }, { "name" : "lastModified", "type" : "com.linkedin.common.ChangeAuditStamps", From cd147685c7c76ca0f4c5babc4064252771e56cf6 Mon Sep 17 00:00:00 2001 From: Gabe Lyons Date: Thu, 14 Jul 2022 14:23:12 -0700 Subject: [PATCH 20/22] feat(siblings): allow viewing siblings separately (#5390) * allow pulling back curtain for siblings * sibling pullback working for lineage + property merge * propagating provinence to ui * fixups from merge & some renames * fix styling & add tooltip * adding cypress tests * fix lint * updating mocks * updating smoke test * fixing domains smoke test * responding to comments --- .../datahub/graphql/GmsGraphQLEngine.java | 83 +---- .../load/EntityLineageResultResolver.java | 11 +- .../graphql/types/aspect/AspectMapper.java | 13 +- .../graphql/types/aspect/AspectType.java | 2 +- .../types/assertion/AssertionMapper.java | 2 +- .../types/chart/mappers/ChartMapper.java | 39 +- .../mappers/CustomPropertiesMapper.java | 36 ++ .../types/common/mappers/OwnerMapper.java | 12 +- .../types/common/mappers/OwnershipMapper.java | 13 +- .../types/common/mappers/StringMapMapper.java | 5 +- .../container/mappers/ContainerMapper.java | 22 +- .../corpgroup/mappers/CorpGroupMapper.java | 9 +- .../corpuser/mappers/CorpUserMapper.java | 5 +- .../dashboard/mappers/DashboardMapper.java | 40 +- .../dataflow/mappers/DataFlowMapper.java | 39 +- .../types/datajob/mappers/DataJobMapper.java | 31 +- .../types/dataset/mappers/DatasetMapper.java | 33 +- .../EditableSchemaFieldInfoMapper.java | 23 +- .../mappers/EditableSchemaMetadataMapper.java | 18 +- .../dataset/mappers/SchemaFieldMapper.java | 17 +- .../types/dataset/mappers/SchemaMapper.java | 13 +- .../dataset/mappers/SchemaMetadataMapper.java | 19 +- .../mappers/VersionedDatasetMapper.java | 32 +- .../types/domain/DomainAssociationMapper.java | 36 ++ .../graphql/types/domain/DomainMapper.java | 4 +- .../glossary/mappers/GlossaryNodeMapper.java | 4 +- .../mappers/GlossaryTermInfoMapper.java | 15 +- .../glossary/mappers/GlossaryTermMapper.java | 9 +- .../mappers/GlossaryTermPropertiesMapper.java | 15 +- .../glossary/mappers/GlossaryTermsMapper.java | 26 +- .../mlmodel/mappers/MLFeatureMapper.java | 21 +- .../mlmodel/mappers/MLFeatureTableMapper.java | 29 +- .../MLFeatureTablePropertiesMapper.java | 15 +- .../mlmodel/mappers/MLModelGroupMapper.java | 21 +- .../types/mlmodel/mappers/MLModelMapper.java | 28 +- .../mappers/MLModelPropertiesMapper.java | 14 +- .../mlmodel/mappers/MLPrimaryKeyMapper.java | 21 +- .../notebook/mappers/NotebookMapper.java | 28 +- .../types/tag/mappers/GlobalTagsMapper.java | 22 +- .../graphql/types/tag/mappers/TagMapper.java | 4 +- .../src/main/resources/entity.graphql | 108 ++++-- datahub-web-react/src/Mocks.tsx | 47 ++- .../src/app/entity/EntityRegistry.tsx | 6 +- .../src/app/entity/chart/ChartEntity.tsx | 4 +- .../app/entity/container/ContainerEntity.tsx | 4 +- .../app/entity/dashboard/DashboardEntity.tsx | 4 +- .../app/entity/dataFlow/DataFlowEntity.tsx | 4 +- .../src/app/entity/dataJob/DataJobEntity.tsx | 4 +- .../src/app/entity/dataset/DatasetEntity.tsx | 12 +- .../dataset/profile/stories/sampleSchema.ts | 4 + .../src/app/entity/shared/EntityContext.tsx | 10 +- .../shared/components/legacy/Ownership.tsx | 349 ------------------ .../styled/search/EmbeddedListSearch.tsx | 5 + .../styled/search/downloadAsCsvUtil.ts | 2 +- .../search/navigateToEntitySearchUrl.ts | 3 + .../containers/profile/EntityProfile.tsx | 26 +- .../profile/header/EntityHeader.tsx | 2 +- .../PlatformContent/PlatformContentView.tsx | 12 +- .../sidebar/Domain/SidebarDomainSection.tsx | 17 +- .../sidebar/Ownership/SidebarOwnerSection.tsx | 9 +- .../sidebar/SidebarSiblingsSection.tsx | 53 +++ .../entity/shared/containers/profile/utils.ts | 26 +- .../src/app/entity/shared/siblingUtils.ts | 37 +- .../Validations/DatasetAssertionsList.tsx | 4 +- .../Dataset/Validations/ValidationsTab.tsx | 12 +- .../Dataset/Validations/assertionUtils.tsx | 4 +- .../entity/shared/tabs/Lineage/LineageTab.tsx | 2 +- .../src/app/entity/shared/types.ts | 9 +- .../src/app/entity/shared/utils.ts | 6 + .../src/app/lineage/LineageEntityNode.tsx | 18 +- .../src/app/lineage/LineageExplorer.tsx | 16 +- .../src/app/lineage/LineageVizInsideZoom.tsx | 58 ++- .../app/lineage/utils/navigateToLineageUrl.ts | 11 +- .../component/CompactEntityNameList.tsx | 55 ++- .../renderer/component/EntityNameList.tsx | 2 +- .../renderer/component/EntityPreviewTag.tsx | 21 +- .../src/app/search/SearchBar.tsx | 2 +- .../src/app/search/SearchResults.tsx | 11 +- .../src/app/shared/tags/TagTermGroup.tsx | 37 +- .../fixtures/entity/chartEntity.ts | 1 + .../fixtures/entity/dashboardEntity.ts | 7 + .../fixtures/entity/dataFlowEntity.ts | 1 + .../fixtures/entity/dataJobEntity.ts | 117 +++++- .../fixtures/entity/datasetEntity.ts | 1 + .../src/graphql-mock/fixtures/tag.ts | 1 + .../src/graphql/fragments.graphql | 14 +- datahub-web-react/src/graphql/lineage.graphql | 10 +- .../metadata/graph/SiblingGraphService.java | 12 +- .../cypress/integration/siblings/siblings.js | 78 ++++ smoke-test/tests/domains/domains_test.py | 12 +- 90 files changed, 1158 insertions(+), 941 deletions(-) create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/CustomPropertiesMapper.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/domain/DomainAssociationMapper.java delete mode 100644 datahub-web-react/src/app/entity/shared/components/legacy/Ownership.tsx create mode 100644 datahub-web-react/src/app/entity/shared/containers/profile/sidebar/SidebarSiblingsSection.tsx diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 3eefd597583d4f..ef581fbe142bc1 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -544,10 +544,6 @@ private void configureContainerResolvers(final RuntimeWiring.Builder builder) { .type("Container", typeWiring -> typeWiring .dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient)) .dataFetcher("entities", new ContainerEntitiesResolver(entityClient)) - .dataFetcher("domain", new LoadableTypeResolver<>(domainType, (env) -> { - final Container container = env.getSource(); - return container.getDomain() != null ? container.getDomain().getUrn() : null; - })) .dataFetcher("platform", new LoadableTypeResolver<>(dataPlatformType, (env) -> ((Container) env.getSource()).getPlatform().getUrn())) @@ -819,13 +815,6 @@ private void configureDatasetResolvers(final RuntimeWiring.Builder builder) { .type("Dataset", typeWiring -> typeWiring .dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient)) .dataFetcher("lineage", new EntityLineageResultResolver(siblingGraphService)) - .dataFetcher("domain", - new LoadableTypeResolver<>( - domainType, - (env) -> { - final Dataset dataset = env.getSource(); - return dataset.getDomain() != null ? dataset.getDomain().getUrn() : null; - })) .dataFetcher("platform", new LoadableTypeResolver<>(dataPlatformType, (env) -> ((Dataset) env.getSource()).getPlatform().getUrn()) ) @@ -1000,9 +989,7 @@ private void configureNotebookResolvers(final RuntimeWiring.Builder builder) { return notebook.getDataPlatformInstance() != null ? notebook.getDataPlatformInstance().getUrn() : null; }) ) - .dataFetcher("domain", new LoadableTypeResolver<>(domainType, - (env) -> ((Notebook) env.getSource()).getDomain().getUrn()) - )); + ); } /** @@ -1014,14 +1001,6 @@ private void configureDashboardResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("lineage", new EntityLineageResultResolver(siblingGraphService)) .dataFetcher("platform", new LoadableTypeResolver<>(dataPlatformType, (env) -> ((Dashboard) env.getSource()).getPlatform().getUrn())) - .dataFetcher("domain", new LoadableTypeResolver<>( - domainType, - (env) -> { - final Dashboard dashboard = env.getSource(); - return dashboard.getDomain() != null ? dashboard.getDomain().getUrn() : null; - } - ) - ) .dataFetcher("dataPlatformInstance", new LoadableTypeResolver<>(dataPlatformInstanceType, (env) -> { @@ -1054,13 +1033,6 @@ private void configureChartResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("lineage", new EntityLineageResultResolver(siblingGraphService)) .dataFetcher("platform", new LoadableTypeResolver<>(dataPlatformType, (env) -> ((Chart) env.getSource()).getPlatform().getUrn())) - .dataFetcher("domain", new LoadableTypeResolver<>( - domainType, - (env) -> { - final Chart chart = env.getSource(); - return chart.getDomain() != null ? chart.getDomain().getUrn() : null; - }) - ) .dataFetcher("dataPlatformInstance", new LoadableTypeResolver<>(dataPlatformInstanceType, (env) -> { @@ -1138,13 +1110,6 @@ private void configureDataJobResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("lineage", new EntityLineageResultResolver(siblingGraphService)) .dataFetcher("dataFlow", new LoadableTypeResolver<>(dataFlowType, (env) -> ((DataJob) env.getSource()).getDataFlow().getUrn())) - .dataFetcher("domain", new LoadableTypeResolver<>( - domainType, - (env) -> { - final DataJob dataJob = env.getSource(); - return dataJob.getDomain() != null ? dataJob.getDomain().getUrn() : null; - }) - ) .dataFetcher("dataPlatformInstance", new LoadableTypeResolver<>(dataPlatformInstanceType, (env) -> { @@ -1180,13 +1145,6 @@ private void configureDataFlowResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("lineage", new EntityLineageResultResolver(siblingGraphService)) .dataFetcher("platform", new LoadableTypeResolver<>(dataPlatformType, (env) -> ((DataFlow) env.getSource()).getPlatform().getUrn())) - .dataFetcher("domain", new LoadableTypeResolver<>( - domainType, - (env) -> { - final DataFlow dataFlow = env.getSource(); - return dataFlow.getDomain() != null ? dataFlow.getDomain().getUrn() : null; - }) - ) .dataFetcher("dataPlatformInstance", new LoadableTypeResolver<>(dataPlatformInstanceType, (env) -> { @@ -1215,12 +1173,6 @@ private void configureMLFeatureTableResolvers(final RuntimeWiring.Builder builde return entity.getDataPlatformInstance() != null ? entity.getDataPlatformInstance().getUrn() : null; }) ) - .dataFetcher("domain", new LoadableTypeResolver<>( - domainType, - (env) -> { - final MLFeatureTable entity = env.getSource(); - return entity.getDomain() != null ? entity.getDomain().getUrn() : null; - })) ) .type("MLFeatureTableProperties", typeWiring -> typeWiring .dataFetcher("mlFeatures", @@ -1264,13 +1216,6 @@ private void configureMLFeatureTableResolvers(final RuntimeWiring.Builder builde return mlModel.getDataPlatformInstance() != null ? mlModel.getDataPlatformInstance().getUrn() : null; }) ) - .dataFetcher("domain", - new LoadableTypeResolver<>( - domainType, - (env) -> { - final MLModel mlModel = env.getSource(); - return mlModel.getDomain() != null ? mlModel.getDomain().getUrn() : null; - })) ) .type("MLModelProperties", typeWiring -> typeWiring .dataFetcher("groups", new LoadableTypeBatchResolver<>(mlModelGroupType, @@ -1298,13 +1243,6 @@ private void configureMLFeatureTableResolvers(final RuntimeWiring.Builder builde return entity.getDataPlatformInstance() != null ? entity.getDataPlatformInstance().getUrn() : null; }) ) - .dataFetcher("domain", - new LoadableTypeResolver<>( - domainType, - (env) -> { - final MLModelGroup entity = env.getSource(); - return entity.getDomain() != null ? entity.getDomain().getUrn() : null; - })) ) .type("MLFeature", typeWiring -> typeWiring .dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient)) @@ -1316,13 +1254,6 @@ private void configureMLFeatureTableResolvers(final RuntimeWiring.Builder builde return entity.getDataPlatformInstance() != null ? entity.getDataPlatformInstance().getUrn() : null; }) ) - .dataFetcher("domain", - new LoadableTypeResolver<>( - domainType, - (env) -> { - final MLFeature entity = env.getSource(); - return entity.getDomain() != null ? entity.getDomain().getUrn() : null; - })) ) .type("MLPrimaryKey", typeWiring -> typeWiring .dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient)) @@ -1334,13 +1265,6 @@ private void configureMLFeatureTableResolvers(final RuntimeWiring.Builder builde return entity.getDataPlatformInstance() != null ? entity.getDataPlatformInstance().getUrn() : null; }) ) - .dataFetcher("domain", - new LoadableTypeResolver<>( - domainType, - (env) -> { - final MLPrimaryKey entity = env.getSource(); - return entity.getDomain() != null ? entity.getDomain().getUrn() : null; - })) ); } @@ -1357,6 +1281,11 @@ private void configureDomainResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient) ) ); + builder.type("DomainAssociation", typeWiring -> typeWiring + .dataFetcher("domain", + new LoadableTypeResolver<>(domainType, + (env) -> ((com.linkedin.datahub.graphql.generated.DomainAssociation) env.getSource()).getDomain().getUrn())) + ); } private void configureAssertionResolvers(final RuntimeWiring.Builder builder) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityLineageResultResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityLineageResultResolver.java index cac228fd98eaf4..b6941a91125374 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityLineageResultResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/EntityLineageResultResolver.java @@ -42,6 +42,8 @@ public CompletableFuture get(DataFetchingEnvironment enviro final Integer start = input.getStart(); // Optional! @Nullable final Integer count = input.getCount(); // Optional! + @Nullable + final Boolean separateSiblings = input.getSeparateSiblings(); // Optional! com.linkedin.metadata.graph.LineageDirection resolvedDirection = com.linkedin.metadata.graph.LineageDirection.valueOf(lineageDirection.toString()); @@ -49,7 +51,14 @@ public CompletableFuture get(DataFetchingEnvironment enviro return CompletableFuture.supplyAsync(() -> { try { return mapEntityRelationships(lineageDirection, - _siblingGraphService.getLineage(Urn.createFromString(urn), resolvedDirection, start != null ? start : 0, count != null ? count : 100, 1)); + _siblingGraphService.getLineage( + Urn.createFromString(urn), + resolvedDirection, + start != null ? start : 0, + count != null ? count : 100, + 1, + separateSiblings != null ? input.getSeparateSiblings() : false + )); } catch (URISyntaxException e) { log.error("Failed to fetch lineage for {}", urn); throw new RuntimeException(String.format("Failed to fetch lineage for {}", urn), e); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/aspect/AspectMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/aspect/AspectMapper.java index 30f154bd2d57a1..c9e2c322ace8df 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/aspect/AspectMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/aspect/AspectMapper.java @@ -1,25 +1,24 @@ package com.linkedin.datahub.graphql.types.aspect; +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.generated.Aspect; import com.linkedin.datahub.graphql.types.dataset.mappers.SchemaMetadataMapper; -import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.metadata.Constants; import javax.annotation.Nonnull; -public class AspectMapper implements ModelMapper { +public class AspectMapper { public static final AspectMapper INSTANCE = new AspectMapper(); - public static Aspect map(@Nonnull final EnvelopedAspect aspect) { - return INSTANCE.apply(aspect); + public static Aspect map(@Nonnull final EnvelopedAspect aspect, @Nonnull final Urn entityUrn) { + return INSTANCE.apply(aspect, entityUrn); } - @Override - public Aspect apply(@Nonnull final EnvelopedAspect aspect) { + public Aspect apply(@Nonnull final EnvelopedAspect aspect, @Nonnull final Urn entityUrn) { if (Constants.SCHEMA_METADATA_ASPECT_NAME.equals(aspect.getName())) { - return SchemaMetadataMapper.map(aspect); + return SchemaMetadataMapper.map(aspect, entityUrn); } return null; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/aspect/AspectType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/aspect/AspectType.java index 50762d08ab55a5..f3fdfdaa86f9e3 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/aspect/AspectType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/aspect/AspectType.java @@ -62,7 +62,7 @@ public List> batchLoad(@Nonnull ListnewResult().data(null).build(); } final EnvelopedAspect aspect = entityResponse.getAspects().get(key.getAspectName()); - return DataFetcherResult.newResult().data(AspectMapper.map(aspect)).build(); + return DataFetcherResult.newResult().data(AspectMapper.map(aspect, entityUrn)).build(); } catch (Exception e) { if (e instanceof RestLiResponseException) { // if no aspect is found, restli will return a 404 rather than null diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/AssertionMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/AssertionMapper.java index d88e5cc2c19648..e1d81bb31f4712 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/AssertionMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/AssertionMapper.java @@ -142,4 +142,4 @@ private static AssertionStdParameter mapParameter(final com.linkedin.assertion.A private AssertionMapper() { } -} \ No newline at end of file +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/mappers/ChartMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/mappers/ChartMapper.java index 46c4806e71e646..5da31b2db66e01 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/mappers/ChartMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/mappers/ChartMapper.java @@ -8,6 +8,7 @@ import com.linkedin.common.InstitutionalMemory; import com.linkedin.common.Ownership; import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.generated.AccessLevel; import com.linkedin.datahub.graphql.generated.Chart; @@ -20,7 +21,6 @@ import com.linkedin.datahub.graphql.generated.Container; import com.linkedin.datahub.graphql.generated.DataPlatform; import com.linkedin.datahub.graphql.generated.Dataset; -import com.linkedin.datahub.graphql.generated.Domain; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.types.common.mappers.AuditStampMapper; import com.linkedin.datahub.graphql.types.common.mappers.DataPlatformInstanceAspectMapper; @@ -28,8 +28,9 @@ import com.linkedin.datahub.graphql.types.common.mappers.InstitutionalMemoryMapper; import com.linkedin.datahub.graphql.types.common.mappers.OwnershipMapper; import com.linkedin.datahub.graphql.types.common.mappers.StatusMapper; -import com.linkedin.datahub.graphql.types.common.mappers.StringMapMapper; +import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; +import com.linkedin.datahub.graphql.types.domain.DomainAssociationMapper; import com.linkedin.datahub.graphql.types.glossary.mappers.GlossaryTermsMapper; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; @@ -56,24 +57,25 @@ public static Chart map(@Nonnull final EntityResponse entityResponse) { @Override public Chart apply(@Nonnull final EntityResponse entityResponse) { final Chart result = new Chart(); + Urn entityUrn = entityResponse.getUrn(); result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.CHART); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); mappingHelper.mapToResult(CHART_KEY_ASPECT_NAME, this::mapChartKey); - mappingHelper.mapToResult(CHART_INFO_ASPECT_NAME, this::mapChartInfo); + mappingHelper.mapToResult(CHART_INFO_ASPECT_NAME, (entity, dataMap) -> this.mapChartInfo(entity, dataMap, entityUrn)); mappingHelper.mapToResult(CHART_QUERY_ASPECT_NAME, this::mapChartQuery); mappingHelper.mapToResult(EDITABLE_CHART_PROPERTIES_ASPECT_NAME, this::mapEditableChartProperties); mappingHelper.mapToResult(OWNERSHIP_ASPECT_NAME, (chart, dataMap) -> - chart.setOwnership(OwnershipMapper.map(new Ownership(dataMap)))); + chart.setOwnership(OwnershipMapper.map(new Ownership(dataMap), entityUrn))); mappingHelper.mapToResult(STATUS_ASPECT_NAME, (chart, dataMap) -> chart.setStatus(StatusMapper.map(new Status(dataMap)))); - mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, this::mapGlobalTags); + mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, (dataset, dataMap) -> this.mapGlobalTags(dataset, dataMap, entityUrn)); mappingHelper.mapToResult(INSTITUTIONAL_MEMORY_ASPECT_NAME, (chart, dataMap) -> chart.setInstitutionalMemory(InstitutionalMemoryMapper.map(new InstitutionalMemory(dataMap)))); mappingHelper.mapToResult(GLOSSARY_TERMS_ASPECT_NAME, (chart, dataMap) -> - chart.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap)))); + chart.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap), entityUrn))); mappingHelper.mapToResult(CONTAINER_ASPECT_NAME, this::mapContainers); mappingHelper.mapToResult(DOMAINS_ASPECT_NAME, this::mapDomains); mappingHelper.mapToResult(DEPRECATION_ASPECT_NAME, (chart, dataMap) -> @@ -95,16 +97,16 @@ private void mapChartKey(@Nonnull Chart chart, @Nonnull DataMap dataMap) { .setPlatformName(gmsKey.getDashboardTool()), DATA_PLATFORM_ENTITY_NAME).toString()).build()); } - private void mapChartInfo(@Nonnull Chart chart, @Nonnull DataMap dataMap) { + private void mapChartInfo(@Nonnull Chart chart, @Nonnull DataMap dataMap, @Nonnull Urn entityUrn) { final com.linkedin.chart.ChartInfo gmsChartInfo = new com.linkedin.chart.ChartInfo(dataMap); - chart.setInfo(mapInfo(gmsChartInfo)); - chart.setProperties(mapChartInfoToProperties(gmsChartInfo)); + chart.setInfo(mapInfo(gmsChartInfo, entityUrn)); + chart.setProperties(mapChartInfoToProperties(gmsChartInfo, entityUrn)); } /** * Maps GMS {@link com.linkedin.chart.ChartInfo} to deprecated GraphQL {@link ChartInfo} */ - private ChartInfo mapInfo(final com.linkedin.chart.ChartInfo info) { + private ChartInfo mapInfo(final com.linkedin.chart.ChartInfo info, @Nonnull Urn entityUrn) { final ChartInfo result = new ChartInfo(); result.setDescription(info.getDescription()); result.setName(info.getTitle()); @@ -136,7 +138,7 @@ private ChartInfo mapInfo(final com.linkedin.chart.ChartInfo info) { result.setExternalUrl(info.getChartUrl().toString()); } if (info.hasCustomProperties()) { - result.setCustomProperties(StringMapMapper.map(info.getCustomProperties())); + result.setCustomProperties(CustomPropertiesMapper.map(info.getCustomProperties(), entityUrn)); } return result; } @@ -144,7 +146,7 @@ private ChartInfo mapInfo(final com.linkedin.chart.ChartInfo info) { /** * Maps GMS {@link com.linkedin.chart.ChartInfo} to new GraphQL {@link ChartProperties} */ - private ChartProperties mapChartInfoToProperties(final com.linkedin.chart.ChartInfo info) { + private ChartProperties mapChartInfoToProperties(final com.linkedin.chart.ChartInfo info, @Nonnull Urn entityUrn) { final ChartProperties result = new ChartProperties(); result.setDescription(info.getDescription()); result.setName(info.getTitle()); @@ -168,7 +170,7 @@ private ChartProperties mapChartInfoToProperties(final com.linkedin.chart.ChartI result.setExternalUrl(info.getChartUrl().toString()); } if (info.hasCustomProperties()) { - result.setCustomProperties(StringMapMapper.map(info.getCustomProperties())); + result.setCustomProperties(CustomPropertiesMapper.map(info.getCustomProperties(), entityUrn)); } return result; } @@ -192,8 +194,8 @@ private void mapEditableChartProperties(@Nonnull Chart chart, @Nonnull DataMap d chart.setEditableProperties(chartEditableProperties); } - private void mapGlobalTags(@Nonnull Chart chart, @Nonnull DataMap dataMap) { - com.linkedin.datahub.graphql.generated.GlobalTags globalTags = GlobalTagsMapper.map(new GlobalTags(dataMap)); + private void mapGlobalTags(@Nonnull Chart chart, @Nonnull DataMap dataMap, @Nonnull Urn entityUrn) { + com.linkedin.datahub.graphql.generated.GlobalTags globalTags = GlobalTagsMapper.map(new GlobalTags(dataMap), entityUrn); chart.setGlobalTags(globalTags); chart.setTags(globalTags); } @@ -209,11 +211,6 @@ private void mapContainers(@Nonnull Chart chart, @Nonnull DataMap dataMap) { private void mapDomains(@Nonnull Chart chart, @Nonnull DataMap dataMap) { final Domains domains = new Domains(dataMap); - // Currently we only take the first domain if it exists. - if (domains.getDomains().size() > 0) { - chart.setDomain(Domain.builder() - .setType(EntityType.DOMAIN) - .setUrn(domains.getDomains().get(0).toString()).build()); - } + chart.setDomain(DomainAssociationMapper.map(domains, chart.getUrn())); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/CustomPropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/CustomPropertiesMapper.java new file mode 100644 index 00000000000000..50e4846611a9b0 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/CustomPropertiesMapper.java @@ -0,0 +1,36 @@ +package com.linkedin.datahub.graphql.types.common.mappers; + + +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.generated.CustomPropertiesEntry; + +import javax.annotation.Nonnull; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * Maps Pegasus {@link RecordTemplate} objects to objects conforming to the GQL schema. + * + * To be replaced by auto-generated mappers implementations + */ +public class CustomPropertiesMapper { + + public static final CustomPropertiesMapper INSTANCE = new CustomPropertiesMapper(); + + public static List map(@Nonnull final Map input, @Nonnull Urn urn) { + return INSTANCE.apply(input, urn); + } + + public List apply(@Nonnull final Map input, @Nonnull Urn urn) { + List results = new ArrayList<>(); + for (String key : input.keySet()) { + final CustomPropertiesEntry entry = new CustomPropertiesEntry(); + entry.setKey(key); + entry.setValue(input.get(key)); + entry.setAssociatedUrn(urn.toString()); + results.add(entry); + } + return results; + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/OwnerMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/OwnerMapper.java index c6fb2044c1773f..d66c5fd09b8f42 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/OwnerMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/OwnerMapper.java @@ -1,10 +1,10 @@ package com.linkedin.datahub.graphql.types.common.mappers; +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.generated.CorpUser; import com.linkedin.datahub.graphql.generated.CorpGroup; import com.linkedin.datahub.graphql.generated.Owner; import com.linkedin.datahub.graphql.generated.OwnershipType; -import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import javax.annotation.Nonnull; @@ -13,16 +13,15 @@ * * To be replaced by auto-generated mappers implementations */ -public class OwnerMapper implements ModelMapper { +public class OwnerMapper { public static final OwnerMapper INSTANCE = new OwnerMapper(); - public static Owner map(@Nonnull final com.linkedin.common.Owner owner) { - return INSTANCE.apply(owner); + public static Owner map(@Nonnull final com.linkedin.common.Owner owner, @Nonnull final Urn entityUrn) { + return INSTANCE.apply(owner, entityUrn); } - @Override - public Owner apply(@Nonnull final com.linkedin.common.Owner owner) { + public Owner apply(@Nonnull final com.linkedin.common.Owner owner, @Nonnull final Urn entityUrn) { final Owner result = new Owner(); result.setType(Enum.valueOf(OwnershipType.class, owner.getType().toString())); if (owner.getOwner().getEntityType().equals("corpuser")) { @@ -37,6 +36,7 @@ public Owner apply(@Nonnull final com.linkedin.common.Owner owner) { if (owner.hasSource()) { result.setSource(OwnershipSourceMapper.map(owner.getSource())); } + result.setAssociatedUrn(entityUrn.toString()); return result; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/OwnershipMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/OwnershipMapper.java index 8c80bac2bb1f58..6614cfb28a4784 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/OwnershipMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/OwnershipMapper.java @@ -1,7 +1,7 @@ package com.linkedin.datahub.graphql.types.common.mappers; +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.generated.Ownership; -import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import javax.annotation.Nonnull; import java.util.stream.Collectors; @@ -11,21 +11,20 @@ * * To be replaced by auto-generated mappers implementations */ -public class OwnershipMapper implements ModelMapper { +public class OwnershipMapper { public static final OwnershipMapper INSTANCE = new OwnershipMapper(); - public static Ownership map(@Nonnull final com.linkedin.common.Ownership ownership) { - return INSTANCE.apply(ownership); + public static Ownership map(@Nonnull final com.linkedin.common.Ownership ownership, @Nonnull final Urn entityUrn) { + return INSTANCE.apply(ownership, entityUrn); } - @Override - public Ownership apply(@Nonnull final com.linkedin.common.Ownership ownership) { + public Ownership apply(@Nonnull final com.linkedin.common.Ownership ownership, @Nonnull final Urn entityUrn) { final Ownership result = new Ownership(); result.setLastModified(AuditStampMapper.map(ownership.getLastModified())); result.setOwners(ownership.getOwners() .stream() - .map(OwnerMapper::map) + .map(owner -> OwnerMapper.map(owner, entityUrn)) .collect(Collectors.toList())); return result; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/StringMapMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/StringMapMapper.java index 67754ad6aab01d..32c49a20104142 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/StringMapMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/StringMapMapper.java @@ -1,13 +1,12 @@ package com.linkedin.datahub.graphql.types.common.mappers; - import com.linkedin.datahub.graphql.generated.StringMapEntry; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; - -import javax.annotation.Nonnull; import java.util.ArrayList; import java.util.List; import java.util.Map; +import javax.annotation.Nonnull; + /** * Maps Pegasus {@link RecordTemplate} objects to objects conforming to the GQL schema. diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/container/mappers/ContainerMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/container/mappers/ContainerMapper.java index e13acd06e2181f..013f074cdca84c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/container/mappers/ContainerMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/container/mappers/ContainerMapper.java @@ -13,13 +13,13 @@ import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.generated.Container; import com.linkedin.datahub.graphql.generated.DataPlatform; -import com.linkedin.datahub.graphql.generated.Domain; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.types.common.mappers.DataPlatformInstanceAspectMapper; import com.linkedin.datahub.graphql.types.common.mappers.DeprecationMapper; import com.linkedin.datahub.graphql.types.common.mappers.InstitutionalMemoryMapper; import com.linkedin.datahub.graphql.types.common.mappers.OwnershipMapper; -import com.linkedin.datahub.graphql.types.common.mappers.StringMapMapper; +import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; +import com.linkedin.datahub.graphql.types.domain.DomainAssociationMapper; import com.linkedin.datahub.graphql.types.glossary.mappers.GlossaryTermsMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; import com.linkedin.domain.Domains; @@ -56,7 +56,7 @@ public static Container map(final EntityResponse entityResponse) { final EnvelopedAspect envelopedContainerProperties = aspects.get(Constants.CONTAINER_PROPERTIES_ASPECT_NAME); if (envelopedContainerProperties != null) { - result.setProperties(mapContainerProperties(new ContainerProperties(envelopedContainerProperties.getValue().data()))); + result.setProperties(mapContainerProperties(new ContainerProperties(envelopedContainerProperties.getValue().data()), entityUrn)); } final EnvelopedAspect envelopedEditableContainerProperties = aspects.get(Constants.CONTAINER_EDITABLE_PROPERTIES_ASPECT_NAME); @@ -66,18 +66,18 @@ public static Container map(final EntityResponse entityResponse) { final EnvelopedAspect envelopedOwnership = aspects.get(Constants.OWNERSHIP_ASPECT_NAME); if (envelopedOwnership != null) { - result.setOwnership(OwnershipMapper.map(new Ownership(envelopedOwnership.getValue().data()))); + result.setOwnership(OwnershipMapper.map(new Ownership(envelopedOwnership.getValue().data()), entityUrn)); } final EnvelopedAspect envelopedTags = aspects.get(Constants.GLOBAL_TAGS_ASPECT_NAME); if (envelopedTags != null) { - com.linkedin.datahub.graphql.generated.GlobalTags globalTags = GlobalTagsMapper.map(new GlobalTags(envelopedTags.getValue().data())); + com.linkedin.datahub.graphql.generated.GlobalTags globalTags = GlobalTagsMapper.map(new GlobalTags(envelopedTags.getValue().data()), entityUrn); result.setTags(globalTags); } final EnvelopedAspect envelopedTerms = aspects.get(Constants.GLOSSARY_TERMS_ASPECT_NAME); if (envelopedTerms != null) { - result.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(envelopedTerms.getValue().data()))); + result.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(envelopedTerms.getValue().data()), entityUrn)); } final EnvelopedAspect envelopedInstitutionalMemory = aspects.get(Constants.INSTITUTIONAL_MEMORY_ASPECT_NAME); @@ -104,11 +104,7 @@ public static Container map(final EntityResponse entityResponse) { if (envelopedDomains != null) { final Domains domains = new Domains(envelopedDomains.getValue().data()); // Currently we only take the first domain if it exists. - if (domains.getDomains().size() > 0) { - result.setDomain(Domain.builder() - .setType(EntityType.DOMAIN) - .setUrn(domains.getDomains().get(0).toString()).build()); - } + result.setDomain(DomainAssociationMapper.map(domains, entityUrn.toString())); } final EnvelopedAspect envelopedDeprecation = aspects.get(Constants.DEPRECATION_ASPECT_NAME); @@ -119,7 +115,7 @@ public static Container map(final EntityResponse entityResponse) { return result; } - private static com.linkedin.datahub.graphql.generated.ContainerProperties mapContainerProperties(final ContainerProperties gmsProperties) { + private static com.linkedin.datahub.graphql.generated.ContainerProperties mapContainerProperties(final ContainerProperties gmsProperties, Urn entityUrn) { final com.linkedin.datahub.graphql.generated.ContainerProperties propertiesResult = new com.linkedin.datahub.graphql.generated.ContainerProperties(); propertiesResult.setName(gmsProperties.getName()); propertiesResult.setDescription(gmsProperties.getDescription()); @@ -127,7 +123,7 @@ private static com.linkedin.datahub.graphql.generated.ContainerProperties mapCon propertiesResult.setExternalUrl(gmsProperties.getExternalUrl().toString()); } if (gmsProperties.hasCustomProperties()) { - propertiesResult.setCustomProperties(StringMapMapper.map(gmsProperties.getCustomProperties())); + propertiesResult.setCustomProperties(CustomPropertiesMapper.map(gmsProperties.getCustomProperties(), entityUrn)); } return propertiesResult; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/mappers/CorpGroupMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/mappers/CorpGroupMapper.java index c9b8bcafd80e21..783c95a3c67a31 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/mappers/CorpGroupMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/mappers/CorpGroupMapper.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.types.corpgroup.mappers; import com.linkedin.common.Ownership; +import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.generated.CorpGroup; import com.linkedin.datahub.graphql.generated.EntityType; @@ -33,6 +34,8 @@ public static CorpGroup map(@Nonnull final EntityResponse entityResponse) { @Override public CorpGroup apply(@Nonnull final EntityResponse entityResponse) { final CorpGroup result = new CorpGroup(); + Urn entityUrn = entityResponse.getUrn(); + result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.CORP_GROUP); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); @@ -40,7 +43,7 @@ public CorpGroup apply(@Nonnull final EntityResponse entityResponse) { mappingHelper.mapToResult(CORP_GROUP_KEY_ASPECT_NAME, this::mapCorpGroupKey); mappingHelper.mapToResult(CORP_GROUP_INFO_ASPECT_NAME, this::mapCorpGroupInfo); mappingHelper.mapToResult(CORP_GROUP_EDITABLE_INFO_ASPECT_NAME, this::mapCorpGroupEditableInfo); - mappingHelper.mapToResult(OWNERSHIP_ASPECT_NAME, this::mapOwnership); + mappingHelper.mapToResult(OWNERSHIP_ASPECT_NAME, (entity, dataMap) -> this.mapOwnership(entity, dataMap, entityUrn)); return mappingHelper.getResult(); } @@ -59,7 +62,7 @@ private void mapCorpGroupEditableInfo(@Nonnull CorpGroup corpGroup, @Nonnull Dat corpGroup.setEditableProperties(CorpGroupEditablePropertiesMapper.map(new CorpGroupEditableInfo(dataMap))); } - private void mapOwnership(@Nonnull CorpGroup corpGroup, @Nonnull DataMap dataMap) { - corpGroup.setOwnership(OwnershipMapper.map(new Ownership(dataMap))); + private void mapOwnership(@Nonnull CorpGroup corpGroup, @Nonnull DataMap dataMap, @Nonnull Urn entityUrn) { + corpGroup.setOwnership(OwnershipMapper.map(new Ownership(dataMap), entityUrn)); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/mappers/CorpUserMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/mappers/CorpUserMapper.java index c2859d0d312d99..ac91970f62847d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/mappers/CorpUserMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/mappers/CorpUserMapper.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.types.corpuser.mappers; import com.linkedin.common.GlobalTags; +import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; import com.linkedin.datahub.graphql.generated.CorpUser; @@ -36,6 +37,8 @@ public static CorpUser map(@Nonnull final EntityResponse entityResponse) { @Override public CorpUser apply(@Nonnull final EntityResponse entityResponse) { final CorpUser result = new CorpUser(); + Urn entityUrn = entityResponse.getUrn(); + result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.CORP_USER); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); @@ -45,7 +48,7 @@ public CorpUser apply(@Nonnull final EntityResponse entityResponse) { mappingHelper.mapToResult(CORP_USER_EDITABLE_INFO_ASPECT_NAME, (corpUser, dataMap) -> corpUser.setEditableProperties(CorpUserEditableInfoMapper.map(new CorpUserEditableInfo(dataMap)))); mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, (corpUser, dataMap) -> - corpUser.setGlobalTags(GlobalTagsMapper.map(new GlobalTags(dataMap)))); + corpUser.setGlobalTags(GlobalTagsMapper.map(new GlobalTags(dataMap), entityUrn))); mappingHelper.mapToResult(CORP_USER_STATUS_ASPECT_NAME, (corpUser, dataMap) -> corpUser.setStatus(CorpUserStatusMapper.map(new CorpUserStatus(dataMap)))); mappingHelper.mapToResult(CORP_USER_CREDENTIALS_ASPECT_NAME, this::mapIsNativeUser); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dashboard/mappers/DashboardMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dashboard/mappers/DashboardMapper.java index 2f4dc0ff5f97b8..d4a39d1c92263c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dashboard/mappers/DashboardMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dashboard/mappers/DashboardMapper.java @@ -7,6 +7,7 @@ import com.linkedin.common.InstitutionalMemory; import com.linkedin.common.Ownership; import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; import com.linkedin.dashboard.EditableDashboardProperties; import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.generated.AccessLevel; @@ -17,7 +18,6 @@ import com.linkedin.datahub.graphql.generated.DashboardInfo; import com.linkedin.datahub.graphql.generated.DashboardProperties; import com.linkedin.datahub.graphql.generated.DataPlatform; -import com.linkedin.datahub.graphql.generated.Domain; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.types.common.mappers.AuditStampMapper; import com.linkedin.datahub.graphql.types.common.mappers.DataPlatformInstanceAspectMapper; @@ -25,8 +25,9 @@ import com.linkedin.datahub.graphql.types.common.mappers.InstitutionalMemoryMapper; import com.linkedin.datahub.graphql.types.common.mappers.OwnershipMapper; import com.linkedin.datahub.graphql.types.common.mappers.StatusMapper; -import com.linkedin.datahub.graphql.types.common.mappers.StringMapMapper; +import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; +import com.linkedin.datahub.graphql.types.domain.DomainAssociationMapper; import com.linkedin.datahub.graphql.types.glossary.mappers.GlossaryTermsMapper; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; @@ -53,26 +54,28 @@ public static Dashboard map(@Nonnull final EntityResponse entityResponse) { @Override public Dashboard apply(@Nonnull final EntityResponse entityResponse) { final Dashboard result = new Dashboard(); + Urn entityUrn = entityResponse.getUrn(); + result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.DASHBOARD); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); mappingHelper.mapToResult(DASHBOARD_KEY_ASPECT_NAME, this::mapDashboardKey); - mappingHelper.mapToResult(DASHBOARD_INFO_ASPECT_NAME, this::mapDashboardInfo); + mappingHelper.mapToResult(DASHBOARD_INFO_ASPECT_NAME, (entity, dataMap) -> this.mapDashboardInfo(entity, dataMap, entityUrn)); mappingHelper.mapToResult(EDITABLE_DASHBOARD_PROPERTIES_ASPECT_NAME, this::mapEditableDashboardProperties); mappingHelper.mapToResult(OWNERSHIP_ASPECT_NAME, (dashboard, dataMap) -> - dashboard.setOwnership(OwnershipMapper.map(new Ownership(dataMap)))); + dashboard.setOwnership(OwnershipMapper.map(new Ownership(dataMap), entityUrn))); mappingHelper.mapToResult(STATUS_ASPECT_NAME, (dashboard, dataMap) -> dashboard.setStatus(StatusMapper.map(new Status(dataMap)))); mappingHelper.mapToResult(INSTITUTIONAL_MEMORY_ASPECT_NAME, (dashboard, dataMap) -> dashboard.setInstitutionalMemory(InstitutionalMemoryMapper.map(new InstitutionalMemory(dataMap)))); mappingHelper.mapToResult(GLOSSARY_TERMS_ASPECT_NAME, (dashboard, dataMap) -> - dashboard.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap)))); + dashboard.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap), entityUrn))); mappingHelper.mapToResult(CONTAINER_ASPECT_NAME, this::mapContainers); mappingHelper.mapToResult(DOMAINS_ASPECT_NAME, this::mapDomains); mappingHelper.mapToResult(DEPRECATION_ASPECT_NAME, (dashboard, dataMap) -> dashboard.setDeprecation(DeprecationMapper.map(new Deprecation(dataMap)))); - mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, this::mapGlobalTags); + mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, (dataset, dataMap) -> this.mapGlobalTags(dataset, dataMap, entityUrn)); mappingHelper.mapToResult(DATA_PLATFORM_INSTANCE_ASPECT_NAME, (dataset, dataMap) -> dataset.setDataPlatformInstance(DataPlatformInstanceAspectMapper.map(new DataPlatformInstance(dataMap)))); @@ -90,16 +93,16 @@ private void mapDashboardKey(@Nonnull Dashboard dashboard, @Nonnull DataMap data .setPlatformName(gmsKey.getDashboardTool()), DATA_PLATFORM_ENTITY_NAME).toString()).build()); } - private void mapDashboardInfo(@Nonnull Dashboard dashboard, @Nonnull DataMap dataMap) { + private void mapDashboardInfo(@Nonnull Dashboard dashboard, @Nonnull DataMap dataMap, Urn entityUrn) { final com.linkedin.dashboard.DashboardInfo gmsDashboardInfo = new com.linkedin.dashboard.DashboardInfo(dataMap); - dashboard.setInfo(mapInfo(gmsDashboardInfo)); - dashboard.setProperties(mapDashboardInfoToProperties(gmsDashboardInfo)); + dashboard.setInfo(mapInfo(gmsDashboardInfo, entityUrn)); + dashboard.setProperties(mapDashboardInfoToProperties(gmsDashboardInfo, entityUrn)); } /** * Maps GMS {@link com.linkedin.dashboard.DashboardInfo} to deprecated GraphQL {@link DashboardInfo} */ - private DashboardInfo mapInfo(final com.linkedin.dashboard.DashboardInfo info) { + private DashboardInfo mapInfo(final com.linkedin.dashboard.DashboardInfo info, Urn entityUrn) { final DashboardInfo result = new DashboardInfo(); result.setDescription(info.getDescription()); result.setName(info.getTitle()); @@ -116,7 +119,7 @@ private DashboardInfo mapInfo(final com.linkedin.dashboard.DashboardInfo info) { result.setExternalUrl(info.getDashboardUrl().toString()); } if (info.hasCustomProperties()) { - result.setCustomProperties(StringMapMapper.map(info.getCustomProperties())); + result.setCustomProperties(CustomPropertiesMapper.map(info.getCustomProperties(), entityUrn)); } if (info.hasAccess()) { result.setAccess(AccessLevel.valueOf(info.getAccess().toString())); @@ -132,7 +135,7 @@ private DashboardInfo mapInfo(final com.linkedin.dashboard.DashboardInfo info) { /** * Maps GMS {@link com.linkedin.dashboard.DashboardInfo} to new GraphQL {@link DashboardProperties} */ - private DashboardProperties mapDashboardInfoToProperties(final com.linkedin.dashboard.DashboardInfo info) { + private DashboardProperties mapDashboardInfoToProperties(final com.linkedin.dashboard.DashboardInfo info, Urn entityUrn) { final DashboardProperties result = new DashboardProperties(); result.setDescription(info.getDescription()); result.setName(info.getTitle()); @@ -145,7 +148,7 @@ private DashboardProperties mapDashboardInfoToProperties(final com.linkedin.dash result.setExternalUrl(info.getDashboardUrl().toString()); } if (info.hasCustomProperties()) { - result.setCustomProperties(StringMapMapper.map(info.getCustomProperties())); + result.setCustomProperties(CustomPropertiesMapper.map(info.getCustomProperties(), entityUrn)); } if (info.hasAccess()) { result.setAccess(AccessLevel.valueOf(info.getAccess().toString())); @@ -165,8 +168,8 @@ private void mapEditableDashboardProperties(@Nonnull Dashboard dashboard, @Nonnu dashboard.setEditableProperties(dashboardEditableProperties); } - private void mapGlobalTags(@Nonnull Dashboard dashboard, @Nonnull DataMap dataMap) { - com.linkedin.datahub.graphql.generated.GlobalTags globalTags = GlobalTagsMapper.map(new GlobalTags(dataMap)); + private void mapGlobalTags(@Nonnull Dashboard dashboard, @Nonnull DataMap dataMap, @Nonnull Urn entityUrn) { + com.linkedin.datahub.graphql.generated.GlobalTags globalTags = GlobalTagsMapper.map(new GlobalTags(dataMap), entityUrn); dashboard.setGlobalTags(globalTags); dashboard.setTags(globalTags); } @@ -182,11 +185,6 @@ private void mapContainers(@Nonnull Dashboard dashboard, @Nonnull DataMap dataMa private void mapDomains(@Nonnull Dashboard dashboard, @Nonnull DataMap dataMap) { final Domains domains = new Domains(dataMap); - // Currently we only take the first domain if it exists. - if (domains.getDomains().size() > 0) { - dashboard.setDomain(Domain.builder() - .setType(EntityType.DOMAIN) - .setUrn(domains.getDomains().get(0).toString()).build()); - } + dashboard.setDomain(DomainAssociationMapper.map(domains, dashboard.getUrn())); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/mappers/DataFlowMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/mappers/DataFlowMapper.java index 3fabafe8be0de0..1db56d66180285 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/mappers/DataFlowMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/mappers/DataFlowMapper.java @@ -7,21 +7,22 @@ import com.linkedin.common.InstitutionalMemory; import com.linkedin.common.Ownership; import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.generated.DataFlow; import com.linkedin.datahub.graphql.generated.DataFlowEditableProperties; import com.linkedin.datahub.graphql.generated.DataFlowInfo; import com.linkedin.datahub.graphql.generated.DataFlowProperties; import com.linkedin.datahub.graphql.generated.DataPlatform; -import com.linkedin.datahub.graphql.generated.Domain; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.types.common.mappers.DataPlatformInstanceAspectMapper; import com.linkedin.datahub.graphql.types.common.mappers.DeprecationMapper; import com.linkedin.datahub.graphql.types.common.mappers.InstitutionalMemoryMapper; import com.linkedin.datahub.graphql.types.common.mappers.OwnershipMapper; import com.linkedin.datahub.graphql.types.common.mappers.StatusMapper; -import com.linkedin.datahub.graphql.types.common.mappers.StringMapMapper; +import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; +import com.linkedin.datahub.graphql.types.domain.DomainAssociationMapper; import com.linkedin.datahub.graphql.types.glossary.mappers.GlossaryTermsMapper; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; @@ -50,20 +51,22 @@ public DataFlow apply(@Nonnull final EntityResponse entityResponse) { final DataFlow result = new DataFlow(); result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.DATA_FLOW); + Urn entityUrn = entityResponse.getUrn(); + EnvelopedAspectMap aspectMap = entityResponse.getAspects(); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); mappingHelper.mapToResult(DATA_FLOW_KEY_ASPECT_NAME, this::mapKey); - mappingHelper.mapToResult(DATA_FLOW_INFO_ASPECT_NAME, this::mapInfo); + mappingHelper.mapToResult(DATA_FLOW_INFO_ASPECT_NAME, (entity, dataMap) -> this.mapInfo(entity, dataMap, entityUrn)); mappingHelper.mapToResult(EDITABLE_DATA_FLOW_PROPERTIES_ASPECT_NAME, this::mapEditableProperties); mappingHelper.mapToResult(OWNERSHIP_ASPECT_NAME, (dataFlow, dataMap) -> - dataFlow.setOwnership(OwnershipMapper.map(new Ownership(dataMap)))); + dataFlow.setOwnership(OwnershipMapper.map(new Ownership(dataMap), entityUrn))); mappingHelper.mapToResult(STATUS_ASPECT_NAME, (dataFlow, dataMap) -> dataFlow.setStatus(StatusMapper.map(new Status(dataMap)))); - mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, this::mapGlobalTags); + mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, (dataFlow, dataMap) -> this.mapGlobalTags(dataFlow, dataMap, entityUrn)); mappingHelper.mapToResult(INSTITUTIONAL_MEMORY_ASPECT_NAME, (dataFlow, dataMap) -> dataFlow.setInstitutionalMemory(InstitutionalMemoryMapper.map(new InstitutionalMemory(dataMap)))); mappingHelper.mapToResult(GLOSSARY_TERMS_ASPECT_NAME, (dataFlow, dataMap) -> - dataFlow.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap)))); + dataFlow.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap), entityUrn))); mappingHelper.mapToResult(DOMAINS_ASPECT_NAME, this::mapDomains); mappingHelper.mapToResult(DEPRECATION_ASPECT_NAME, (dataFlow, dataMap) -> dataFlow.setDeprecation(DeprecationMapper.map(new Deprecation(dataMap)))); @@ -85,16 +88,16 @@ private void mapKey(@Nonnull DataFlow dataFlow, @Nonnull DataMap dataMap) { .setPlatformName(gmsKey.getOrchestrator()), DATA_PLATFORM_ENTITY_NAME).toString()).build()); } - private void mapInfo(@Nonnull DataFlow dataFlow, @Nonnull DataMap dataMap) { + private void mapInfo(@Nonnull DataFlow dataFlow, @Nonnull DataMap dataMap, Urn entityUrn) { final com.linkedin.datajob.DataFlowInfo gmsDataFlowInfo = new com.linkedin.datajob.DataFlowInfo(dataMap); - dataFlow.setInfo(mapDataFlowInfo(gmsDataFlowInfo)); - dataFlow.setProperties(mapDataFlowInfoToProperties(gmsDataFlowInfo)); + dataFlow.setInfo(mapDataFlowInfo(gmsDataFlowInfo, entityUrn)); + dataFlow.setProperties(mapDataFlowInfoToProperties(gmsDataFlowInfo, entityUrn)); } /** * Maps GMS {@link com.linkedin.datajob.DataFlowInfo} to deprecated GraphQL {@link DataFlowInfo} */ - private DataFlowInfo mapDataFlowInfo(final com.linkedin.datajob.DataFlowInfo info) { + private DataFlowInfo mapDataFlowInfo(final com.linkedin.datajob.DataFlowInfo info, Urn entityUrn) { final DataFlowInfo result = new DataFlowInfo(); result.setName(info.getName()); result.setDescription(info.getDescription()); @@ -103,7 +106,7 @@ private DataFlowInfo mapDataFlowInfo(final com.linkedin.datajob.DataFlowInfo inf result.setExternalUrl(info.getExternalUrl().toString()); } if (info.hasCustomProperties()) { - result.setCustomProperties(StringMapMapper.map(info.getCustomProperties())); + result.setCustomProperties(CustomPropertiesMapper.map(info.getCustomProperties(), entityUrn)); } return result; } @@ -111,7 +114,7 @@ private DataFlowInfo mapDataFlowInfo(final com.linkedin.datajob.DataFlowInfo inf /** * Maps GMS {@link com.linkedin.datajob.DataFlowInfo} to new GraphQL {@link DataFlowProperties} */ - private DataFlowProperties mapDataFlowInfoToProperties(final com.linkedin.datajob.DataFlowInfo info) { + private DataFlowProperties mapDataFlowInfoToProperties(final com.linkedin.datajob.DataFlowInfo info, Urn entityUrn) { final DataFlowProperties result = new DataFlowProperties(); result.setName(info.getName()); result.setDescription(info.getDescription()); @@ -120,7 +123,7 @@ private DataFlowProperties mapDataFlowInfoToProperties(final com.linkedin.datajo result.setExternalUrl(info.getExternalUrl().toString()); } if (info.hasCustomProperties()) { - result.setCustomProperties(StringMapMapper.map(info.getCustomProperties())); + result.setCustomProperties(CustomPropertiesMapper.map(info.getCustomProperties(), entityUrn)); } return result; } @@ -132,8 +135,8 @@ private void mapEditableProperties(@Nonnull DataFlow dataFlow, @Nonnull DataMap dataFlow.setEditableProperties(dataFlowEditableProperties); } - private void mapGlobalTags(@Nonnull DataFlow dataFlow, @Nonnull DataMap dataMap) { - com.linkedin.datahub.graphql.generated.GlobalTags globalTags = GlobalTagsMapper.map(new GlobalTags(dataMap)); + private void mapGlobalTags(@Nonnull DataFlow dataFlow, @Nonnull DataMap dataMap, @Nonnull Urn entityUrn) { + com.linkedin.datahub.graphql.generated.GlobalTags globalTags = GlobalTagsMapper.map(new GlobalTags(dataMap), entityUrn); dataFlow.setGlobalTags(globalTags); dataFlow.setTags(globalTags); } @@ -141,10 +144,6 @@ private void mapGlobalTags(@Nonnull DataFlow dataFlow, @Nonnull DataMap dataMap) private void mapDomains(@Nonnull DataFlow dataFlow, @Nonnull DataMap dataMap) { final Domains domains = new Domains(dataMap); // Currently we only take the first domain if it exists. - if (domains.getDomains().size() > 0) { - dataFlow.setDomain(Domain.builder() - .setType(EntityType.DOMAIN) - .setUrn(domains.getDomains().get(0).toString()).build()); - } + dataFlow.setDomain(DomainAssociationMapper.map(domains, dataFlow.getUrn())); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapper.java index 7b32da0ecc245d..50788f748fbcdf 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapper.java @@ -8,6 +8,7 @@ import com.linkedin.common.InstitutionalMemory; import com.linkedin.common.Ownership; import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.generated.DataFlow; import com.linkedin.datahub.graphql.generated.DataJob; @@ -16,14 +17,14 @@ import com.linkedin.datahub.graphql.generated.DataJobInputOutput; import com.linkedin.datahub.graphql.generated.DataJobProperties; import com.linkedin.datahub.graphql.generated.Dataset; -import com.linkedin.datahub.graphql.generated.Domain; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.types.common.mappers.DataPlatformInstanceAspectMapper; import com.linkedin.datahub.graphql.types.common.mappers.DeprecationMapper; import com.linkedin.datahub.graphql.types.common.mappers.InstitutionalMemoryMapper; import com.linkedin.datahub.graphql.types.common.mappers.OwnershipMapper; import com.linkedin.datahub.graphql.types.common.mappers.StatusMapper; -import com.linkedin.datahub.graphql.types.common.mappers.StringMapMapper; +import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; +import com.linkedin.datahub.graphql.types.domain.DomainAssociationMapper; import com.linkedin.datahub.graphql.types.glossary.mappers.GlossaryTermsMapper; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; @@ -48,6 +49,8 @@ public static DataJob map(@Nonnull final EntityResponse entityResponse) { @Override public DataJob apply(@Nonnull final EntityResponse entityResponse) { final DataJob result = new DataJob(); + Urn entityUrn = entityResponse.getUrn(); + result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.DATA_JOB); @@ -59,8 +62,8 @@ public DataJob apply(@Nonnull final EntityResponse entityResponse) { result.setJobId(gmsKey.getJobId()); } else if (DATA_JOB_INFO_ASPECT_NAME.equals(name)) { final com.linkedin.datajob.DataJobInfo gmsDataJobInfo = new com.linkedin.datajob.DataJobInfo(data); - result.setInfo(mapDataJobInfo(gmsDataJobInfo)); - result.setProperties(mapDataJobInfoToProperties(gmsDataJobInfo)); + result.setInfo(mapDataJobInfo(gmsDataJobInfo, entityUrn)); + result.setProperties(mapDataJobInfoToProperties(gmsDataJobInfo, entityUrn)); } else if (DATA_JOB_INPUT_OUTPUT_ASPECT_NAME.equals(name)) { final com.linkedin.datajob.DataJobInputOutput gmsDataJobInputOutput = new com.linkedin.datajob.DataJobInputOutput(data); result.setInputOutput(mapDataJobInputOutput(gmsDataJobInputOutput)); @@ -70,25 +73,21 @@ public DataJob apply(@Nonnull final EntityResponse entityResponse) { dataJobEditableProperties.setDescription(editableDataJobProperties.getDescription()); result.setEditableProperties(dataJobEditableProperties); } else if (OWNERSHIP_ASPECT_NAME.equals(name)) { - result.setOwnership(OwnershipMapper.map(new Ownership(data))); + result.setOwnership(OwnershipMapper.map(new Ownership(data), entityUrn)); } else if (STATUS_ASPECT_NAME.equals(name)) { result.setStatus(StatusMapper.map(new Status(data))); } else if (GLOBAL_TAGS_ASPECT_NAME.equals(name)) { - com.linkedin.datahub.graphql.generated.GlobalTags globalTags = GlobalTagsMapper.map(new GlobalTags(data)); + com.linkedin.datahub.graphql.generated.GlobalTags globalTags = GlobalTagsMapper.map(new GlobalTags(data), entityUrn); result.setGlobalTags(globalTags); result.setTags(globalTags); } else if (INSTITUTIONAL_MEMORY_ASPECT_NAME.equals(name)) { result.setInstitutionalMemory(InstitutionalMemoryMapper.map(new InstitutionalMemory(data))); } else if (GLOSSARY_TERMS_ASPECT_NAME.equals(name)) { - result.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(data))); + result.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(data), entityUrn)); } else if (DOMAINS_ASPECT_NAME.equals(name)) { final Domains domains = new Domains(data); // Currently we only take the first domain if it exists. - if (domains.getDomains().size() > 0) { - result.setDomain(Domain.builder() - .setType(EntityType.DOMAIN) - .setUrn(domains.getDomains().get(0).toString()).build()); - } + result.setDomain(DomainAssociationMapper.map(domains, entityUrn.toString())); } else if (DEPRECATION_ASPECT_NAME.equals(name)) { result.setDeprecation(DeprecationMapper.map(new Deprecation(data))); } else if (DATA_PLATFORM_INSTANCE_ASPECT_NAME.equals(name)) { @@ -102,7 +101,7 @@ public DataJob apply(@Nonnull final EntityResponse entityResponse) { /** * Maps GMS {@link com.linkedin.datajob.DataJobInfo} to deprecated GraphQL {@link DataJobInfo} */ - private DataJobInfo mapDataJobInfo(final com.linkedin.datajob.DataJobInfo info) { + private DataJobInfo mapDataJobInfo(final com.linkedin.datajob.DataJobInfo info, Urn entityUrn) { final DataJobInfo result = new DataJobInfo(); result.setName(info.getName()); result.setDescription(info.getDescription()); @@ -110,7 +109,7 @@ private DataJobInfo mapDataJobInfo(final com.linkedin.datajob.DataJobInfo info) result.setExternalUrl(info.getExternalUrl().toString()); } if (info.hasCustomProperties()) { - result.setCustomProperties(StringMapMapper.map(info.getCustomProperties())); + result.setCustomProperties(CustomPropertiesMapper.map(info.getCustomProperties(), entityUrn)); } return result; } @@ -118,7 +117,7 @@ private DataJobInfo mapDataJobInfo(final com.linkedin.datajob.DataJobInfo info) /** * Maps GMS {@link com.linkedin.datajob.DataJobInfo} to new GraphQL {@link DataJobProperties} */ - private DataJobProperties mapDataJobInfoToProperties(final com.linkedin.datajob.DataJobInfo info) { + private DataJobProperties mapDataJobInfoToProperties(final com.linkedin.datajob.DataJobInfo info, Urn entityUrn) { final DataJobProperties result = new DataJobProperties(); result.setName(info.getName()); result.setDescription(info.getDescription()); @@ -126,7 +125,7 @@ private DataJobProperties mapDataJobInfoToProperties(final com.linkedin.datajob. result.setExternalUrl(info.getExternalUrl().toString()); } if (info.hasCustomProperties()) { - result.setCustomProperties(StringMapMapper.map(info.getCustomProperties())); + result.setCustomProperties(CustomPropertiesMapper.map(info.getCustomProperties(), entityUrn)); } return result; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java index 5862bd160d605b..6b300a9e2bb8b8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java @@ -8,12 +8,12 @@ import com.linkedin.common.Ownership; import com.linkedin.common.Siblings; import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.generated.Container; import com.linkedin.datahub.graphql.generated.DataPlatform; import com.linkedin.datahub.graphql.generated.Dataset; import com.linkedin.datahub.graphql.generated.DatasetEditableProperties; -import com.linkedin.datahub.graphql.generated.Domain; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.FabricType; import com.linkedin.datahub.graphql.types.common.mappers.DataPlatformInstanceAspectMapper; @@ -22,8 +22,9 @@ import com.linkedin.datahub.graphql.types.common.mappers.OwnershipMapper; import com.linkedin.datahub.graphql.types.common.mappers.SiblingsMapper; import com.linkedin.datahub.graphql.types.common.mappers.StatusMapper; -import com.linkedin.datahub.graphql.types.common.mappers.StringMapMapper; +import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; +import com.linkedin.datahub.graphql.types.domain.DomainAssociationMapper; import com.linkedin.datahub.graphql.types.glossary.mappers.GlossaryTermsMapper; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; @@ -60,30 +61,31 @@ public static Dataset map(@Nonnull final EntityResponse dataset) { @Override public Dataset apply(@Nonnull final EntityResponse entityResponse) { Dataset result = new Dataset(); + Urn entityUrn = entityResponse.getUrn(); result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.DATASET); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); mappingHelper.mapToResult(DATASET_KEY_ASPECT_NAME, this::mapDatasetKey); - mappingHelper.mapToResult(DATASET_PROPERTIES_ASPECT_NAME, this::mapDatasetProperties); + mappingHelper.mapToResult(DATASET_PROPERTIES_ASPECT_NAME, (entity, dataMap) -> this.mapDatasetProperties(entity, dataMap, entityUrn)); mappingHelper.mapToResult(DATASET_DEPRECATION_ASPECT_NAME, (dataset, dataMap) -> dataset.setDeprecation(DatasetDeprecationMapper.map(new DatasetDeprecation(dataMap)))); mappingHelper.mapToResult(SCHEMA_METADATA_ASPECT_NAME, (dataset, dataMap) -> - dataset.setSchema(SchemaMapper.map(new SchemaMetadata(dataMap)))); + dataset.setSchema(SchemaMapper.map(new SchemaMetadata(dataMap), entityUrn))); mappingHelper.mapToResult(EDITABLE_DATASET_PROPERTIES_ASPECT_NAME, this::mapEditableDatasetProperties); mappingHelper.mapToResult(VIEW_PROPERTIES_ASPECT_NAME, this::mapViewProperties); mappingHelper.mapToResult(INSTITUTIONAL_MEMORY_ASPECT_NAME, (dataset, dataMap) -> dataset.setInstitutionalMemory(InstitutionalMemoryMapper.map(new InstitutionalMemory(dataMap)))); mappingHelper.mapToResult(OWNERSHIP_ASPECT_NAME, (dataset, dataMap) -> - dataset.setOwnership(OwnershipMapper.map(new Ownership(dataMap)))); + dataset.setOwnership(OwnershipMapper.map(new Ownership(dataMap), entityUrn))); mappingHelper.mapToResult(STATUS_ASPECT_NAME, (dataset, dataMap) -> dataset.setStatus(StatusMapper.map(new Status(dataMap)))); - mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, this::mapGlobalTags); + mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, (dataset, dataMap) -> this.mapGlobalTags(dataset, dataMap, entityUrn)); mappingHelper.mapToResult(EDITABLE_SCHEMA_METADATA_ASPECT_NAME, (dataset, dataMap) -> - dataset.setEditableSchemaMetadata(EditableSchemaMetadataMapper.map(new EditableSchemaMetadata(dataMap)))); + dataset.setEditableSchemaMetadata(EditableSchemaMetadataMapper.map(new EditableSchemaMetadata(dataMap), entityUrn))); mappingHelper.mapToResult(GLOSSARY_TERMS_ASPECT_NAME, (dataset, dataMap) -> - dataset.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap)))); + dataset.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap), entityUrn))); mappingHelper.mapToResult(CONTAINER_ASPECT_NAME, this::mapContainers); mappingHelper.mapToResult(DOMAINS_ASPECT_NAME, this::mapDomains); mappingHelper.mapToResult(DEPRECATION_ASPECT_NAME, (dataset, dataMap) -> @@ -105,7 +107,7 @@ private void mapDatasetKey(@Nonnull Dataset dataset, @Nonnull DataMap dataMap) { .setUrn(gmsKey.getPlatform().toString()).build()); } - private void mapDatasetProperties(@Nonnull Dataset dataset, @Nonnull DataMap dataMap) { + private void mapDatasetProperties(@Nonnull Dataset dataset, @Nonnull DataMap dataMap, @Nonnull Urn entityUrn) { final DatasetProperties gmsProperties = new DatasetProperties(dataMap); final com.linkedin.datahub.graphql.generated.DatasetProperties properties = new com.linkedin.datahub.graphql.generated.DatasetProperties(); @@ -115,7 +117,7 @@ private void mapDatasetProperties(@Nonnull Dataset dataset, @Nonnull DataMap dat if (gmsProperties.getExternalUrl() != null) { properties.setExternalUrl(gmsProperties.getExternalUrl().toString()); } - properties.setCustomProperties(StringMapMapper.map(gmsProperties.getCustomProperties())); + properties.setCustomProperties(CustomPropertiesMapper.map(gmsProperties.getCustomProperties(), entityUrn)); if (gmsProperties.getName() != null) { properties.setName(gmsProperties.getName()); } else { @@ -146,8 +148,8 @@ private void mapViewProperties(@Nonnull Dataset dataset, @Nonnull DataMap dataMa dataset.setViewProperties(graphqlProperties); } - private void mapGlobalTags(@Nonnull Dataset dataset, @Nonnull DataMap dataMap) { - com.linkedin.datahub.graphql.generated.GlobalTags globalTags = GlobalTagsMapper.map(new GlobalTags(dataMap)); + private void mapGlobalTags(@Nonnull Dataset dataset, @Nonnull DataMap dataMap, @Nonnull final Urn entityUrn) { + com.linkedin.datahub.graphql.generated.GlobalTags globalTags = GlobalTagsMapper.map(new GlobalTags(dataMap), entityUrn); dataset.setGlobalTags(globalTags); dataset.setTags(globalTags); } @@ -163,11 +165,6 @@ private void mapContainers(@Nonnull Dataset dataset, @Nonnull DataMap dataMap) { private void mapDomains(@Nonnull Dataset dataset, @Nonnull DataMap dataMap) { final Domains domains = new Domains(dataMap); - // Currently we only take the first domain if it exists. - if (domains.getDomains().size() > 0) { - dataset.setDomain(Domain.builder() - .setType(EntityType.DOMAIN) - .setUrn(domains.getDomains().get(0).toString()).build()); - } + dataset.setDomain(DomainAssociationMapper.map(domains, dataset.getUrn())); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/EditableSchemaFieldInfoMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/EditableSchemaFieldInfoMapper.java index e78295dc375476..922574d5051d30 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/EditableSchemaFieldInfoMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/EditableSchemaFieldInfoMapper.java @@ -1,23 +1,28 @@ package com.linkedin.datahub.graphql.types.dataset.mappers; +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.types.glossary.mappers.GlossaryTermsMapper; -import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; import com.linkedin.schema.EditableSchemaFieldInfo; import javax.annotation.Nonnull; -public class EditableSchemaFieldInfoMapper implements ModelMapper { +public class EditableSchemaFieldInfoMapper { public static final EditableSchemaFieldInfoMapper INSTANCE = new EditableSchemaFieldInfoMapper(); - public static com.linkedin.datahub.graphql.generated.EditableSchemaFieldInfo map(@Nonnull final EditableSchemaFieldInfo fieldInfo) { - return INSTANCE.apply(fieldInfo); + public static com.linkedin.datahub.graphql.generated.EditableSchemaFieldInfo map( + @Nonnull final EditableSchemaFieldInfo fieldInfo, + @Nonnull final Urn entityUrn + ) { + return INSTANCE.apply(fieldInfo, entityUrn); } - @Override - public com.linkedin.datahub.graphql.generated.EditableSchemaFieldInfo apply(@Nonnull final EditableSchemaFieldInfo input) { + public com.linkedin.datahub.graphql.generated.EditableSchemaFieldInfo apply( + @Nonnull final EditableSchemaFieldInfo input, + @Nonnull final Urn entityUrn + ) { final com.linkedin.datahub.graphql.generated.EditableSchemaFieldInfo result = new com.linkedin.datahub.graphql.generated.EditableSchemaFieldInfo(); if (input.hasDescription()) { result.setDescription((input.getDescription())); @@ -26,11 +31,11 @@ public com.linkedin.datahub.graphql.generated.EditableSchemaFieldInfo apply(@Non result.setFieldPath((input.getFieldPath())); } if (input.hasGlobalTags()) { - result.setGlobalTags(GlobalTagsMapper.map(input.getGlobalTags())); - result.setTags(GlobalTagsMapper.map(input.getGlobalTags())); + result.setGlobalTags(GlobalTagsMapper.map(input.getGlobalTags(), entityUrn)); + result.setTags(GlobalTagsMapper.map(input.getGlobalTags(), entityUrn)); } if (input.hasGlossaryTerms()) { - result.setGlossaryTerms(GlossaryTermsMapper.map(input.getGlossaryTerms())); + result.setGlossaryTerms(GlossaryTermsMapper.map(input.getGlossaryTerms(), entityUrn)); } return result; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/EditableSchemaMetadataMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/EditableSchemaMetadataMapper.java index 84803ed40a9220..376558d2fd18cb 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/EditableSchemaMetadataMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/EditableSchemaMetadataMapper.java @@ -1,23 +1,27 @@ package com.linkedin.datahub.graphql.types.dataset.mappers; -import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.schema.EditableSchemaMetadata; +import com.linkedin.common.urn.Urn; import javax.annotation.Nonnull; import java.util.stream.Collectors; -public class EditableSchemaMetadataMapper implements ModelMapper { +public class EditableSchemaMetadataMapper { public static final EditableSchemaMetadataMapper INSTANCE = new EditableSchemaMetadataMapper(); - public static com.linkedin.datahub.graphql.generated.EditableSchemaMetadata map(@Nonnull final EditableSchemaMetadata metadata) { - return INSTANCE.apply(metadata); + public static com.linkedin.datahub.graphql.generated.EditableSchemaMetadata map( + @Nonnull final EditableSchemaMetadata metadata, + @Nonnull final Urn entityUrn + ) { + return INSTANCE.apply(metadata, entityUrn); } - @Override - public com.linkedin.datahub.graphql.generated.EditableSchemaMetadata apply(@Nonnull final EditableSchemaMetadata input) { + public com.linkedin.datahub.graphql.generated.EditableSchemaMetadata apply(@Nonnull final EditableSchemaMetadata input, @Nonnull final Urn entityUrn) { final com.linkedin.datahub.graphql.generated.EditableSchemaMetadata result = new com.linkedin.datahub.graphql.generated.EditableSchemaMetadata(); - result.setEditableSchemaFieldInfo(input.getEditableSchemaFieldInfo().stream().map(EditableSchemaFieldInfoMapper::map).collect(Collectors.toList())); + result.setEditableSchemaFieldInfo(input.getEditableSchemaFieldInfo().stream().map(schemaField -> + EditableSchemaFieldInfoMapper.map(schemaField, entityUrn) + ).collect(Collectors.toList())); return result; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaFieldMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaFieldMapper.java index 7d05d4c26a50ac..69b3aac6f05125 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaFieldMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaFieldMapper.java @@ -1,23 +1,22 @@ package com.linkedin.datahub.graphql.types.dataset.mappers; +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.generated.SchemaField; import com.linkedin.datahub.graphql.generated.SchemaFieldDataType; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; import com.linkedin.datahub.graphql.types.glossary.mappers.GlossaryTermsMapper; -import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import javax.annotation.Nonnull; -public class SchemaFieldMapper implements ModelMapper { +public class SchemaFieldMapper { public static final SchemaFieldMapper INSTANCE = new SchemaFieldMapper(); - public static SchemaField map(@Nonnull final com.linkedin.schema.SchemaField metadata) { - return INSTANCE.apply(metadata); + public static SchemaField map(@Nonnull final com.linkedin.schema.SchemaField metadata, @Nonnull Urn entityUrn) { + return INSTANCE.apply(metadata, entityUrn); } - @Override - public SchemaField apply(@Nonnull final com.linkedin.schema.SchemaField input) { + public SchemaField apply(@Nonnull final com.linkedin.schema.SchemaField input, @Nonnull Urn entityUrn) { final SchemaField result = new SchemaField(); result.setDescription(input.getDescription()); result.setFieldPath(input.getFieldPath()); @@ -27,11 +26,11 @@ public SchemaField apply(@Nonnull final com.linkedin.schema.SchemaField input) { result.setNativeDataType(input.getNativeDataType()); result.setType(mapSchemaFieldDataType(input.getType())); if (input.hasGlobalTags()) { - result.setGlobalTags(GlobalTagsMapper.map(input.getGlobalTags())); - result.setTags(GlobalTagsMapper.map(input.getGlobalTags())); + result.setGlobalTags(GlobalTagsMapper.map(input.getGlobalTags(), entityUrn)); + result.setTags(GlobalTagsMapper.map(input.getGlobalTags(), entityUrn)); } if (input.hasGlossaryTerms()) { - result.setGlossaryTerms(GlossaryTermsMapper.map(input.getGlossaryTerms())); + result.setGlossaryTerms(GlossaryTermsMapper.map(input.getGlossaryTerms(), entityUrn)); } result.setIsPartOfKey(input.isIsPartOfKey()); return result; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaMapper.java index ca6b30b36f4cd3..eb793cc17efb6b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaMapper.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.types.dataset.mappers; +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.generated.Schema; import com.linkedin.mxe.SystemMetadata; import com.linkedin.schema.SchemaMetadata; @@ -12,15 +13,15 @@ public class SchemaMapper { public static final SchemaMapper INSTANCE = new SchemaMapper(); - public static Schema map(@Nonnull final SchemaMetadata metadata) { - return INSTANCE.apply(metadata, null); + public static Schema map(@Nonnull final SchemaMetadata metadata, @Nonnull final Urn entityUrn) { + return INSTANCE.apply(metadata, null, entityUrn); } - public static Schema map(@Nonnull final SchemaMetadata metadata, @Nullable final SystemMetadata systemMetadata) { - return INSTANCE.apply(metadata, systemMetadata); + public static Schema map(@Nonnull final SchemaMetadata metadata, @Nullable final SystemMetadata systemMetadata, @Nonnull final Urn entityUrn) { + return INSTANCE.apply(metadata, systemMetadata, entityUrn); } - public Schema apply(@Nonnull final com.linkedin.schema.SchemaMetadata input, @Nullable final SystemMetadata systemMetadata) { + public Schema apply(@Nonnull final com.linkedin.schema.SchemaMetadata input, @Nullable final SystemMetadata systemMetadata, @Nonnull final Urn entityUrn) { final Schema result = new Schema(); if (input.getDataset() != null) { result.setDatasetUrn(input.getDataset().toString()); @@ -34,7 +35,7 @@ public Schema apply(@Nonnull final com.linkedin.schema.SchemaMetadata input, @Nu result.setCluster(input.getCluster()); result.setHash(input.getHash()); result.setPrimaryKeys(input.getPrimaryKeys()); - result.setFields(input.getFields().stream().map(SchemaFieldMapper::map).collect(Collectors.toList())); + result.setFields(input.getFields().stream().map(field -> SchemaFieldMapper.map(field, entityUrn)).collect(Collectors.toList())); result.setPlatformSchema(PlatformSchemaMapper.map(input.getPlatformSchema())); if (input.getForeignKeys() != null) { result.setForeignKeys(input.getForeignKeys().stream() diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaMetadataMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaMetadataMapper.java index c89de8249a10fd..00cb91bed8abb2 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaMetadataMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaMetadataMapper.java @@ -1,22 +1,27 @@ package com.linkedin.datahub.graphql.types.dataset.mappers; -import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.common.urn.Urn; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.schema.SchemaMetadata; import java.util.stream.Collectors; import javax.annotation.Nonnull; -public class SchemaMetadataMapper implements ModelMapper { +public class SchemaMetadataMapper { public static final SchemaMetadataMapper INSTANCE = new SchemaMetadataMapper(); - public static com.linkedin.datahub.graphql.generated.SchemaMetadata map(@Nonnull final EnvelopedAspect aspect) { - return INSTANCE.apply(aspect); + public static com.linkedin.datahub.graphql.generated.SchemaMetadata map( + @Nonnull final EnvelopedAspect aspect, + @Nonnull final Urn entityUrn + ) { + return INSTANCE.apply(aspect, entityUrn); } - @Override - public com.linkedin.datahub.graphql.generated.SchemaMetadata apply(@Nonnull final EnvelopedAspect aspect) { + public com.linkedin.datahub.graphql.generated.SchemaMetadata apply( + @Nonnull final EnvelopedAspect aspect, + @Nonnull final Urn entityUrn + ) { final SchemaMetadata input = new SchemaMetadata(aspect.getValue().data()); final com.linkedin.datahub.graphql.generated.SchemaMetadata result = new com.linkedin.datahub.graphql.generated.SchemaMetadata(); @@ -30,7 +35,7 @@ public com.linkedin.datahub.graphql.generated.SchemaMetadata apply(@Nonnull fina result.setCluster(input.getCluster()); result.setHash(input.getHash()); result.setPrimaryKeys(input.getPrimaryKeys()); - result.setFields(input.getFields().stream().map(SchemaFieldMapper::map).collect(Collectors.toList())); + result.setFields(input.getFields().stream().map(field -> SchemaFieldMapper.map(field, entityUrn)).collect(Collectors.toList())); result.setPlatformSchema(PlatformSchemaMapper.map(input.getPlatformSchema())); result.setAspectVersion(aspect.getVersion()); if (input.hasForeignKeys()) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/VersionedDatasetMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/VersionedDatasetMapper.java index 1b0fb5c73326b7..5d9d40970ac434 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/VersionedDatasetMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/VersionedDatasetMapper.java @@ -6,11 +6,11 @@ import com.linkedin.common.InstitutionalMemory; import com.linkedin.common.Ownership; import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.generated.Container; import com.linkedin.datahub.graphql.generated.DataPlatform; import com.linkedin.datahub.graphql.generated.DatasetEditableProperties; -import com.linkedin.datahub.graphql.generated.Domain; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.FabricType; import com.linkedin.datahub.graphql.generated.VersionedDataset; @@ -18,8 +18,9 @@ import com.linkedin.datahub.graphql.types.common.mappers.InstitutionalMemoryMapper; import com.linkedin.datahub.graphql.types.common.mappers.OwnershipMapper; import com.linkedin.datahub.graphql.types.common.mappers.StatusMapper; -import com.linkedin.datahub.graphql.types.common.mappers.StringMapMapper; +import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; +import com.linkedin.datahub.graphql.types.domain.DomainAssociationMapper; import com.linkedin.datahub.graphql.types.glossary.mappers.GlossaryTermsMapper; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; @@ -57,6 +58,7 @@ public static VersionedDataset map(@Nonnull final EntityResponse dataset) { @Override public VersionedDataset apply(@Nonnull final EntityResponse entityResponse) { VersionedDataset result = new VersionedDataset(); + Urn entityUrn = entityResponse.getUrn(); result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.DATASET); @@ -65,24 +67,24 @@ public VersionedDataset apply(@Nonnull final EntityResponse entityResponse) { SystemMetadata schemaSystemMetadata = getSystemMetadata(aspectMap, SCHEMA_METADATA_ASPECT_NAME); mappingHelper.mapToResult(DATASET_KEY_ASPECT_NAME, this::mapDatasetKey); - mappingHelper.mapToResult(DATASET_PROPERTIES_ASPECT_NAME, this::mapDatasetProperties); + mappingHelper.mapToResult(DATASET_PROPERTIES_ASPECT_NAME, (entity, dataMap) -> this.mapDatasetProperties(entity, dataMap, entityUrn)); mappingHelper.mapToResult(DATASET_DEPRECATION_ASPECT_NAME, (dataset, dataMap) -> dataset.setDeprecation(DatasetDeprecationMapper.map(new DatasetDeprecation(dataMap)))); mappingHelper.mapToResult(SCHEMA_METADATA_ASPECT_NAME, (dataset, dataMap) -> - dataset.setSchema(SchemaMapper.map(new SchemaMetadata(dataMap), schemaSystemMetadata))); + dataset.setSchema(SchemaMapper.map(new SchemaMetadata(dataMap), schemaSystemMetadata, entityUrn))); mappingHelper.mapToResult(EDITABLE_DATASET_PROPERTIES_ASPECT_NAME, this::mapEditableDatasetProperties); mappingHelper.mapToResult(VIEW_PROPERTIES_ASPECT_NAME, this::mapViewProperties); mappingHelper.mapToResult(INSTITUTIONAL_MEMORY_ASPECT_NAME, (dataset, dataMap) -> dataset.setInstitutionalMemory(InstitutionalMemoryMapper.map(new InstitutionalMemory(dataMap)))); mappingHelper.mapToResult(OWNERSHIP_ASPECT_NAME, (dataset, dataMap) -> - dataset.setOwnership(OwnershipMapper.map(new Ownership(dataMap)))); + dataset.setOwnership(OwnershipMapper.map(new Ownership(dataMap), entityUrn))); mappingHelper.mapToResult(STATUS_ASPECT_NAME, (dataset, dataMap) -> dataset.setStatus(StatusMapper.map(new Status(dataMap)))); - mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, this::mapGlobalTags); + mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, (dataset, dataMap) -> this.mapGlobalTags(dataset, dataMap, entityUrn)); mappingHelper.mapToResult(EDITABLE_SCHEMA_METADATA_ASPECT_NAME, (dataset, dataMap) -> - dataset.setEditableSchemaMetadata(EditableSchemaMetadataMapper.map(new EditableSchemaMetadata(dataMap)))); + dataset.setEditableSchemaMetadata(EditableSchemaMetadataMapper.map(new EditableSchemaMetadata(dataMap), entityUrn))); mappingHelper.mapToResult(GLOSSARY_TERMS_ASPECT_NAME, (dataset, dataMap) -> - dataset.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap)))); + dataset.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap), entityUrn))); mappingHelper.mapToResult(CONTAINER_ASPECT_NAME, this::mapContainers); mappingHelper.mapToResult(DOMAINS_ASPECT_NAME, this::mapDomains); mappingHelper.mapToResult(DEPRECATION_ASPECT_NAME, (dataset, dataMap) -> @@ -107,7 +109,7 @@ private void mapDatasetKey(@Nonnull VersionedDataset dataset, @Nonnull DataMap d .setUrn(gmsKey.getPlatform().toString()).build()); } - private void mapDatasetProperties(@Nonnull VersionedDataset dataset, @Nonnull DataMap dataMap) { + private void mapDatasetProperties(@Nonnull VersionedDataset dataset, @Nonnull DataMap dataMap, Urn entityUrn) { final DatasetProperties gmsProperties = new DatasetProperties(dataMap); final com.linkedin.datahub.graphql.generated.DatasetProperties properties = new com.linkedin.datahub.graphql.generated.DatasetProperties(); @@ -116,7 +118,7 @@ private void mapDatasetProperties(@Nonnull VersionedDataset dataset, @Nonnull Da if (gmsProperties.getExternalUrl() != null) { properties.setExternalUrl(gmsProperties.getExternalUrl().toString()); } - properties.setCustomProperties(StringMapMapper.map(gmsProperties.getCustomProperties())); + properties.setCustomProperties(CustomPropertiesMapper.map(gmsProperties.getCustomProperties(), entityUrn)); if (gmsProperties.getName() != null) { properties.setName(gmsProperties.getName()); } else { @@ -143,8 +145,8 @@ private void mapViewProperties(@Nonnull VersionedDataset dataset, @Nonnull DataM dataset.setViewProperties(graphqlProperties); } - private void mapGlobalTags(@Nonnull VersionedDataset dataset, @Nonnull DataMap dataMap) { - com.linkedin.datahub.graphql.generated.GlobalTags globalTags = GlobalTagsMapper.map(new GlobalTags(dataMap)); + private void mapGlobalTags(@Nonnull VersionedDataset dataset, @Nonnull DataMap dataMap, @Nonnull Urn entityUrn) { + com.linkedin.datahub.graphql.generated.GlobalTags globalTags = GlobalTagsMapper.map(new GlobalTags(dataMap), entityUrn); dataset.setTags(globalTags); } @@ -160,10 +162,6 @@ private void mapContainers(@Nonnull VersionedDataset dataset, @Nonnull DataMap d private void mapDomains(@Nonnull VersionedDataset dataset, @Nonnull DataMap dataMap) { final Domains domains = new Domains(dataMap); // Currently we only take the first domain if it exists. - if (domains.getDomains().size() > 0) { - dataset.setDomain(Domain.builder() - .setType(EntityType.DOMAIN) - .setUrn(domains.getDomains().get(0).toString()).build()); - } + dataset.setDomain(DomainAssociationMapper.map(domains, dataset.getUrn())); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/domain/DomainAssociationMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/domain/DomainAssociationMapper.java new file mode 100644 index 00000000000000..df8de87ff69ff1 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/domain/DomainAssociationMapper.java @@ -0,0 +1,36 @@ +package com.linkedin.datahub.graphql.types.domain; + +import com.linkedin.datahub.graphql.generated.Domain; +import com.linkedin.datahub.graphql.generated.DomainAssociation; +import com.linkedin.datahub.graphql.generated.EntityType; +import javax.annotation.Nonnull; + + +/** + * Maps Pegasus {@link RecordTemplate} objects to objects conforming to the GQL schema. + * + * To be replaced by auto-generated mappers implementations + */ +public class DomainAssociationMapper { + + public static final DomainAssociationMapper INSTANCE = new DomainAssociationMapper(); + + public static DomainAssociation map( + @Nonnull final com.linkedin.domain.Domains domains, + @Nonnull final String entityUrn + ) { + return INSTANCE.apply(domains, entityUrn); + } + + public DomainAssociation apply(@Nonnull final com.linkedin.domain.Domains domains, @Nonnull final String entityUrn) { + if (domains.getDomains().size() > 0) { + DomainAssociation association = new DomainAssociation(); + association.setDomain(Domain.builder() + .setType(EntityType.DOMAIN) + .setUrn(domains.getDomains().get(0).toString()).build()); + association.setAssociatedUrn(entityUrn); + return association; + } + return null; + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/domain/DomainMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/domain/DomainMapper.java index 9f107aeb0dece7..98919ff1f44303 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/domain/DomainMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/domain/DomainMapper.java @@ -40,7 +40,7 @@ public static Domain map(final EntityResponse entityResponse) { final EnvelopedAspect envelopedOwnership = aspects.get(Constants.OWNERSHIP_ASPECT_NAME); if (envelopedOwnership != null) { - result.setOwnership(OwnershipMapper.map(new Ownership(envelopedOwnership.getValue().data()))); + result.setOwnership(OwnershipMapper.map(new Ownership(envelopedOwnership.getValue().data()), entityUrn)); } final EnvelopedAspect envelopedInstitutionalMemory = aspects.get(Constants.INSTITUTIONAL_MEMORY_ASPECT_NAME); @@ -59,4 +59,4 @@ private static com.linkedin.datahub.graphql.generated.DomainProperties mapDomain } private DomainMapper() { } -} \ No newline at end of file +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryNodeMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryNodeMapper.java index 1f9c048d908163..6a1d849dd23bf5 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryNodeMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryNodeMapper.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.types.glossary.mappers; import com.linkedin.common.Ownership; +import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.GlossaryNode; @@ -30,6 +31,7 @@ public GlossaryNode apply(@Nonnull final EntityResponse entityResponse) { GlossaryNode result = new GlossaryNode(); result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.GLOSSARY_NODE); + Urn entityUrn = entityResponse.getUrn(); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); @@ -37,7 +39,7 @@ public GlossaryNode apply(@Nonnull final EntityResponse entityResponse) { glossaryNode.setProperties(mapGlossaryNodeProperties(dataMap))); mappingHelper.mapToResult(GLOSSARY_NODE_KEY_ASPECT_NAME, this::mapGlossaryNodeKey); mappingHelper.mapToResult(OWNERSHIP_ASPECT_NAME, (glossaryNode, dataMap) -> - glossaryNode.setOwnership(OwnershipMapper.map(new Ownership(dataMap)))); + glossaryNode.setOwnership(OwnershipMapper.map(new Ownership(dataMap), entityUrn))); return mappingHelper.getResult(); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryTermInfoMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryTermInfoMapper.java index 9885cce7176a3c..2f99700bc30a14 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryTermInfoMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryTermInfoMapper.java @@ -1,26 +1,25 @@ package com.linkedin.datahub.graphql.types.glossary.mappers; +import com.linkedin.common.urn.Urn; import javax.annotation.Nonnull; import com.linkedin.datahub.graphql.generated.GlossaryTermInfo; -import com.linkedin.datahub.graphql.types.common.mappers.StringMapMapper; -import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; /** * Maps Pegasus {@link RecordTemplate} objects to objects conforming to the GQL schema. * * To be replaced by auto-generated mappers implementations */ -public class GlossaryTermInfoMapper implements ModelMapper { +public class GlossaryTermInfoMapper { public static final GlossaryTermInfoMapper INSTANCE = new GlossaryTermInfoMapper(); - public static GlossaryTermInfo map(@Nonnull final com.linkedin.glossary.GlossaryTermInfo glossaryTermInfo) { - return INSTANCE.apply(glossaryTermInfo); + public static GlossaryTermInfo map(@Nonnull final com.linkedin.glossary.GlossaryTermInfo glossaryTermInfo, Urn entityUrn) { + return INSTANCE.apply(glossaryTermInfo, entityUrn); } - @Override - public GlossaryTermInfo apply(@Nonnull final com.linkedin.glossary.GlossaryTermInfo glossaryTermInfo) { + public GlossaryTermInfo apply(@Nonnull final com.linkedin.glossary.GlossaryTermInfo glossaryTermInfo, Urn entityUrn) { com.linkedin.datahub.graphql.generated.GlossaryTermInfo glossaryTermInfoResult = new com.linkedin.datahub.graphql.generated.GlossaryTermInfo(); glossaryTermInfoResult.setDefinition(glossaryTermInfo.getDefinition()); glossaryTermInfoResult.setDescription(glossaryTermInfo.getDefinition()); @@ -35,7 +34,7 @@ public GlossaryTermInfo apply(@Nonnull final com.linkedin.glossary.GlossaryTermI glossaryTermInfoResult.setSourceUrl(glossaryTermInfo.getSourceUrl().toString()); } if (glossaryTermInfo.hasCustomProperties()) { - glossaryTermInfoResult.setCustomProperties(StringMapMapper.map(glossaryTermInfo.getCustomProperties())); + glossaryTermInfoResult.setCustomProperties(CustomPropertiesMapper.map(glossaryTermInfo.getCustomProperties(), entityUrn)); } return glossaryTermInfoResult; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryTermMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryTermMapper.java index e7406673c0b1d2..99cbb7d04a9745 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryTermMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryTermMapper.java @@ -3,6 +3,7 @@ import com.linkedin.common.Deprecation; import com.linkedin.common.InstitutionalMemory; import com.linkedin.common.Ownership; +import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; import com.linkedin.datahub.graphql.generated.EntityType; @@ -38,6 +39,8 @@ public static GlossaryTerm map(@Nonnull final EntityResponse entityResponse) { @Override public GlossaryTerm apply(@Nonnull final EntityResponse entityResponse) { GlossaryTerm result = new GlossaryTerm(); + Urn entityUrn = entityResponse.getUrn(); + result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.GLOSSARY_TERM); final String legacyName = GlossaryTermUtils.getGlossaryTermName(entityResponse.getUrn().getId()); @@ -46,11 +49,11 @@ public GlossaryTerm apply(@Nonnull final EntityResponse entityResponse) { MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); mappingHelper.mapToResult(GLOSSARY_TERM_KEY_ASPECT_NAME, this::mapGlossaryTermKey); mappingHelper.mapToResult(GLOSSARY_TERM_INFO_ASPECT_NAME, (glossaryTerm, dataMap) -> - glossaryTerm.setGlossaryTermInfo(GlossaryTermInfoMapper.map(new GlossaryTermInfo(dataMap)))); + glossaryTerm.setGlossaryTermInfo(GlossaryTermInfoMapper.map(new GlossaryTermInfo(dataMap), entityUrn))); mappingHelper.mapToResult(GLOSSARY_TERM_INFO_ASPECT_NAME, (glossaryTerm, dataMap) -> - glossaryTerm.setProperties(GlossaryTermPropertiesMapper.map(new GlossaryTermInfo(dataMap)))); + glossaryTerm.setProperties(GlossaryTermPropertiesMapper.map(new GlossaryTermInfo(dataMap), entityUrn))); mappingHelper.mapToResult(OWNERSHIP_ASPECT_NAME, (glossaryTerm, dataMap) -> - glossaryTerm.setOwnership(OwnershipMapper.map(new Ownership(dataMap)))); + glossaryTerm.setOwnership(OwnershipMapper.map(new Ownership(dataMap), entityUrn))); mappingHelper.mapToResult(DEPRECATION_ASPECT_NAME, (glossaryTerm, dataMap) -> glossaryTerm.setDeprecation(DeprecationMapper.map(new Deprecation(dataMap)))); mappingHelper.mapToResult(INSTITUTIONAL_MEMORY_ASPECT_NAME, (dataset, dataMap) -> diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryTermPropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryTermPropertiesMapper.java index d56f63ac8854cf..6b358331833937 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryTermPropertiesMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryTermPropertiesMapper.java @@ -1,26 +1,25 @@ package com.linkedin.datahub.graphql.types.glossary.mappers; +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.generated.GlossaryTermProperties; import javax.annotation.Nonnull; -import com.linkedin.datahub.graphql.types.common.mappers.StringMapMapper; -import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; /** * Maps Pegasus {@link RecordTemplate} objects to objects conforming to the GQL schema. * * To be replaced by auto-generated mappers implementations */ -public class GlossaryTermPropertiesMapper implements ModelMapper { +public class GlossaryTermPropertiesMapper { public static final GlossaryTermPropertiesMapper INSTANCE = new GlossaryTermPropertiesMapper(); - public static GlossaryTermProperties map(@Nonnull final com.linkedin.glossary.GlossaryTermInfo glossaryTermInfo) { - return INSTANCE.apply(glossaryTermInfo); + public static GlossaryTermProperties map(@Nonnull final com.linkedin.glossary.GlossaryTermInfo glossaryTermInfo, Urn entityUrn) { + return INSTANCE.apply(glossaryTermInfo, entityUrn); } - @Override - public GlossaryTermProperties apply(@Nonnull final com.linkedin.glossary.GlossaryTermInfo glossaryTermInfo) { + public GlossaryTermProperties apply(@Nonnull final com.linkedin.glossary.GlossaryTermInfo glossaryTermInfo, Urn entityUrn) { com.linkedin.datahub.graphql.generated.GlossaryTermProperties result = new com.linkedin.datahub.graphql.generated.GlossaryTermProperties(); result.setDefinition(glossaryTermInfo.getDefinition()); result.setDescription(glossaryTermInfo.getDefinition()); @@ -35,7 +34,7 @@ public GlossaryTermProperties apply(@Nonnull final com.linkedin.glossary.Glossar result.setSourceUrl(glossaryTermInfo.getSourceUrl().toString()); } if (glossaryTermInfo.hasCustomProperties()) { - result.setCustomProperties(StringMapMapper.map(glossaryTermInfo.getCustomProperties())); + result.setCustomProperties(CustomPropertiesMapper.map(glossaryTermInfo.getCustomProperties(), entityUrn)); } return result; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryTermsMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryTermsMapper.java index 9e88f12e355b13..a64b0f7dc64fbe 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryTermsMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/glossary/mappers/GlossaryTermsMapper.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.types.glossary.mappers; +import com.linkedin.common.urn.Urn; import javax.annotation.Nonnull; import java.util.stream.Collectors; @@ -7,7 +8,6 @@ import com.linkedin.datahub.graphql.generated.GlossaryTerms; import com.linkedin.common.GlossaryTermAssociation; import com.linkedin.datahub.graphql.generated.GlossaryTerm; -import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.datahub.graphql.types.glossary.GlossaryTermUtils; /** @@ -15,29 +15,37 @@ * * To be replaced by auto-generated mappers implementations */ -public class GlossaryTermsMapper implements ModelMapper { +public class GlossaryTermsMapper { public static final GlossaryTermsMapper INSTANCE = new GlossaryTermsMapper(); - public static GlossaryTerms map(@Nonnull final com.linkedin.common.GlossaryTerms glossaryTerms) { - return INSTANCE.apply(glossaryTerms); + public static GlossaryTerms map( + @Nonnull final com.linkedin.common.GlossaryTerms glossaryTerms, + @Nonnull final Urn entityUrn + ) { + return INSTANCE.apply(glossaryTerms, entityUrn); } - @Override - public GlossaryTerms apply(@Nonnull final com.linkedin.common.GlossaryTerms glossaryTerms) { + public GlossaryTerms apply(@Nonnull final com.linkedin.common.GlossaryTerms glossaryTerms, @Nonnull final Urn entityUrn) { com.linkedin.datahub.graphql.generated.GlossaryTerms result = new com.linkedin.datahub.graphql.generated.GlossaryTerms(); - result.setTerms(glossaryTerms.getTerms().stream().map(this::mapGlossaryTermAssociation).collect(Collectors.toList())); + result.setTerms(glossaryTerms.getTerms().stream().map( + association -> this.mapGlossaryTermAssociation(association, entityUrn) + ).collect(Collectors.toList())); return result; } - private com.linkedin.datahub.graphql.generated.GlossaryTermAssociation mapGlossaryTermAssociation(@Nonnull final GlossaryTermAssociation input) { + private com.linkedin.datahub.graphql.generated.GlossaryTermAssociation mapGlossaryTermAssociation( + @Nonnull final GlossaryTermAssociation input, + @Nonnull final Urn entityUrn + ) { final com.linkedin.datahub.graphql.generated.GlossaryTermAssociation result = new com.linkedin.datahub.graphql.generated.GlossaryTermAssociation(); final GlossaryTerm resultGlossaryTerm = new GlossaryTerm(); resultGlossaryTerm.setType(EntityType.GLOSSARY_TERM); resultGlossaryTerm.setUrn(input.getUrn().toString()); resultGlossaryTerm.setName(GlossaryTermUtils.getGlossaryTermName(input.getUrn().getNameEntity())); result.setTerm(resultGlossaryTerm); + result.setAssociatedUrn(entityUrn.toString()); return result; } -} \ No newline at end of file +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureMapper.java index db2ba676234656..fdb151af321496 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureMapper.java @@ -9,9 +9,9 @@ import com.linkedin.common.InstitutionalMemory; import com.linkedin.common.Ownership; import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; -import com.linkedin.datahub.graphql.generated.Domain; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.MLFeature; import com.linkedin.datahub.graphql.generated.MLFeatureDataType; @@ -22,6 +22,7 @@ import com.linkedin.datahub.graphql.types.common.mappers.OwnershipMapper; import com.linkedin.datahub.graphql.types.common.mappers.StatusMapper; import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; +import com.linkedin.datahub.graphql.types.domain.DomainAssociationMapper; import com.linkedin.datahub.graphql.types.glossary.mappers.GlossaryTermsMapper; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; @@ -51,13 +52,15 @@ public static MLFeature map(@Nonnull final EntityResponse entityResponse) { @Override public MLFeature apply(@Nonnull final EntityResponse entityResponse) { final MLFeature result = new MLFeature(); + Urn entityUrn = entityResponse.getUrn(); + result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.MLFEATURE); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); mappingHelper.mapToResult(ML_FEATURE_KEY_ASPECT_NAME, this::mapMLFeatureKey); mappingHelper.mapToResult(OWNERSHIP_ASPECT_NAME, (mlFeature, dataMap) -> - mlFeature.setOwnership(OwnershipMapper.map(new Ownership(dataMap)))); + mlFeature.setOwnership(OwnershipMapper.map(new Ownership(dataMap), entityUrn))); mappingHelper.mapToResult(ML_FEATURE_PROPERTIES_ASPECT_NAME, this::mapMLFeatureProperties); mappingHelper.mapToResult(INSTITUTIONAL_MEMORY_ASPECT_NAME, (mlFeature, dataMap) -> mlFeature.setInstitutionalMemory(InstitutionalMemoryMapper.map(new InstitutionalMemory(dataMap)))); @@ -66,9 +69,9 @@ public MLFeature apply(@Nonnull final EntityResponse entityResponse) { mappingHelper.mapToResult(DEPRECATION_ASPECT_NAME, (mlFeature, dataMap) -> mlFeature.setDeprecation(DeprecationMapper.map(new Deprecation(dataMap)))); - mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, this::mapGlobalTags); + mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, (entity, dataMap) -> this.mapGlobalTags(entity, dataMap, entityUrn)); mappingHelper.mapToResult(GLOSSARY_TERMS_ASPECT_NAME, (entity, dataMap) -> - entity.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap)))); + entity.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap), entityUrn))); mappingHelper.mapToResult(DOMAINS_ASPECT_NAME, this::mapDomains); mappingHelper.mapToResult(ML_FEATURE_EDITABLE_PROPERTIES_ASPECT_NAME, this::mapEditableProperties); mappingHelper.mapToResult(DATA_PLATFORM_INSTANCE_ASPECT_NAME, (dataset, dataMap) -> @@ -93,20 +96,16 @@ private void mapMLFeatureProperties(@Nonnull MLFeature mlFeature, @Nonnull DataM } } - private void mapGlobalTags(MLFeature entity, DataMap dataMap) { + private void mapGlobalTags(MLFeature entity, DataMap dataMap, Urn entityUrn) { GlobalTags globalTags = new GlobalTags(dataMap); - com.linkedin.datahub.graphql.generated.GlobalTags graphQlGlobalTags = GlobalTagsMapper.map(globalTags); + com.linkedin.datahub.graphql.generated.GlobalTags graphQlGlobalTags = GlobalTagsMapper.map(globalTags, entityUrn); entity.setTags(graphQlGlobalTags); } private void mapDomains(@Nonnull MLFeature entity, @Nonnull DataMap dataMap) { final Domains domains = new Domains(dataMap); // Currently we only take the first domain if it exists. - if (domains.getDomains().size() > 0) { - entity.setDomain(Domain.builder() - .setType(EntityType.DOMAIN) - .setUrn(domains.getDomains().get(0).toString()).build()); - } + entity.setDomain(DomainAssociationMapper.map(domains, entity.getUrn())); } private void mapEditableProperties(MLFeature entity, DataMap dataMap) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureTableMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureTableMapper.java index f700e44b4f564e..a881cb9313274d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureTableMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureTableMapper.java @@ -8,10 +8,10 @@ import com.linkedin.common.InstitutionalMemory; import com.linkedin.common.Ownership; import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; import com.linkedin.datahub.graphql.generated.DataPlatform; -import com.linkedin.datahub.graphql.generated.Domain; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.MLFeatureTable; import com.linkedin.datahub.graphql.generated.MLFeatureTableEditableProperties; @@ -21,6 +21,7 @@ import com.linkedin.datahub.graphql.types.common.mappers.OwnershipMapper; import com.linkedin.datahub.graphql.types.common.mappers.StatusMapper; import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; +import com.linkedin.datahub.graphql.types.domain.DomainAssociationMapper; import com.linkedin.datahub.graphql.types.glossary.mappers.GlossaryTermsMapper; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; @@ -50,14 +51,16 @@ public static MLFeatureTable map(@Nonnull final EntityResponse entityResponse) { @Override public MLFeatureTable apply(@Nonnull final EntityResponse entityResponse) { final MLFeatureTable result = new MLFeatureTable(); + Urn entityUrn = entityResponse.getUrn(); + result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.MLFEATURE_TABLE); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); mappingHelper.mapToResult(OWNERSHIP_ASPECT_NAME, (mlFeatureTable, dataMap) -> - mlFeatureTable.setOwnership(OwnershipMapper.map(new Ownership(dataMap)))); + mlFeatureTable.setOwnership(OwnershipMapper.map(new Ownership(dataMap), entityUrn))); mappingHelper.mapToResult(ML_FEATURE_TABLE_KEY_ASPECT_NAME, this::mapMLFeatureTableKey); - mappingHelper.mapToResult(ML_FEATURE_TABLE_PROPERTIES_ASPECT_NAME, this::mapMLFeatureTableProperties); + mappingHelper.mapToResult(ML_FEATURE_TABLE_PROPERTIES_ASPECT_NAME, (entity, dataMap) -> this.mapMLFeatureTableProperties(entity, dataMap, entityUrn)); mappingHelper.mapToResult(INSTITUTIONAL_MEMORY_ASPECT_NAME, (mlFeatureTable, dataMap) -> mlFeatureTable.setInstitutionalMemory(InstitutionalMemoryMapper.map(new InstitutionalMemory(dataMap)))); mappingHelper.mapToResult(STATUS_ASPECT_NAME, (mlFeatureTable, dataMap) -> @@ -65,9 +68,9 @@ public MLFeatureTable apply(@Nonnull final EntityResponse entityResponse) { mappingHelper.mapToResult(DEPRECATION_ASPECT_NAME, (mlFeatureTable, dataMap) -> mlFeatureTable.setDeprecation(DeprecationMapper.map(new Deprecation(dataMap)))); - mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, this::mapGlobalTags); + mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, (entity, dataMap) -> this.mapGlobalTags(entity, dataMap, entityUrn)); mappingHelper.mapToResult(GLOSSARY_TERMS_ASPECT_NAME, (entity, dataMap) -> - entity.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap)))); + entity.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap), entityUrn))); mappingHelper.mapToResult(DOMAINS_ASPECT_NAME, this::mapDomains); mappingHelper.mapToResult(ML_FEATURE_TABLE_EDITABLE_PROPERTIES_ASPECT_NAME, this::mapEditableProperties); mappingHelper.mapToResult(DATA_PLATFORM_INSTANCE_ASPECT_NAME, (dataset, dataMap) -> @@ -84,27 +87,23 @@ private void mapMLFeatureTableKey(@Nonnull MLFeatureTable mlFeatureTable, @Nonnu mlFeatureTable.setPlatform(partialPlatform); } - private void mapMLFeatureTableProperties(@Nonnull MLFeatureTable mlFeatureTable, @Nonnull DataMap dataMap) { + private void mapMLFeatureTableProperties(@Nonnull MLFeatureTable mlFeatureTable, @Nonnull DataMap dataMap, Urn entityUrn) { MLFeatureTableProperties featureTableProperties = new MLFeatureTableProperties(dataMap); - mlFeatureTable.setFeatureTableProperties(MLFeatureTablePropertiesMapper.map(featureTableProperties)); - mlFeatureTable.setProperties(MLFeatureTablePropertiesMapper.map(featureTableProperties)); + mlFeatureTable.setFeatureTableProperties(MLFeatureTablePropertiesMapper.map(featureTableProperties, entityUrn)); + mlFeatureTable.setProperties(MLFeatureTablePropertiesMapper.map(featureTableProperties, entityUrn)); mlFeatureTable.setDescription(featureTableProperties.getDescription()); } - private void mapGlobalTags(MLFeatureTable entity, DataMap dataMap) { + private void mapGlobalTags(MLFeatureTable entity, DataMap dataMap, Urn entityUrn) { GlobalTags globalTags = new GlobalTags(dataMap); - com.linkedin.datahub.graphql.generated.GlobalTags graphQlGlobalTags = GlobalTagsMapper.map(globalTags); + com.linkedin.datahub.graphql.generated.GlobalTags graphQlGlobalTags = GlobalTagsMapper.map(globalTags, entityUrn); entity.setTags(graphQlGlobalTags); } private void mapDomains(@Nonnull MLFeatureTable entity, @Nonnull DataMap dataMap) { final Domains domains = new Domains(dataMap); // Currently we only take the first domain if it exists. - if (domains.getDomains().size() > 0) { - entity.setDomain(Domain.builder() - .setType(EntityType.DOMAIN) - .setUrn(domains.getDomains().get(0).toString()).build()); - } + entity.setDomain(DomainAssociationMapper.map(domains, entity.getUrn())); } private void mapEditableProperties(MLFeatureTable entity, DataMap dataMap) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureTablePropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureTablePropertiesMapper.java index 578b7d0731f4a4..13e3c795997250 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureTablePropertiesMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureTablePropertiesMapper.java @@ -1,24 +1,23 @@ package com.linkedin.datahub.graphql.types.mlmodel.mappers; +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.generated.MLFeature; import com.linkedin.datahub.graphql.generated.MLFeatureTableProperties; import com.linkedin.datahub.graphql.generated.MLPrimaryKey; -import com.linkedin.datahub.graphql.types.common.mappers.StringMapMapper; -import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; import lombok.NonNull; import java.util.stream.Collectors; -public class MLFeatureTablePropertiesMapper implements ModelMapper { +public class MLFeatureTablePropertiesMapper { public static final MLFeatureTablePropertiesMapper INSTANCE = new MLFeatureTablePropertiesMapper(); - public static MLFeatureTableProperties map(@NonNull final com.linkedin.ml.metadata.MLFeatureTableProperties mlFeatureTableProperties) { - return INSTANCE.apply(mlFeatureTableProperties); + public static MLFeatureTableProperties map(@NonNull final com.linkedin.ml.metadata.MLFeatureTableProperties mlFeatureTableProperties, Urn entityUrn) { + return INSTANCE.apply(mlFeatureTableProperties, entityUrn); } - @Override - public MLFeatureTableProperties apply(@NonNull final com.linkedin.ml.metadata.MLFeatureTableProperties mlFeatureTableProperties) { + public MLFeatureTableProperties apply(@NonNull final com.linkedin.ml.metadata.MLFeatureTableProperties mlFeatureTableProperties, Urn entityUrn) { final MLFeatureTableProperties result = new MLFeatureTableProperties(); result.setDescription(mlFeatureTableProperties.getDescription()); @@ -43,7 +42,7 @@ public MLFeatureTableProperties apply(@NonNull final com.linkedin.ml.metadata.ML } if (mlFeatureTableProperties.hasCustomProperties()) { - result.setCustomProperties(StringMapMapper.map(mlFeatureTableProperties.getCustomProperties())); + result.setCustomProperties(CustomPropertiesMapper.map(mlFeatureTableProperties.getCustomProperties(), entityUrn)); } return result; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupMapper.java index 36d2918bf4f041..a4ae4fc968707f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupMapper.java @@ -6,10 +6,10 @@ import com.linkedin.common.GlossaryTerms; import com.linkedin.common.Ownership; import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; import com.linkedin.datahub.graphql.generated.DataPlatform; -import com.linkedin.datahub.graphql.generated.Domain; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.FabricType; import com.linkedin.datahub.graphql.generated.MLModelGroup; @@ -19,6 +19,7 @@ import com.linkedin.datahub.graphql.types.common.mappers.OwnershipMapper; import com.linkedin.datahub.graphql.types.common.mappers.StatusMapper; import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; +import com.linkedin.datahub.graphql.types.domain.DomainAssociationMapper; import com.linkedin.datahub.graphql.types.glossary.mappers.GlossaryTermsMapper; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; @@ -48,12 +49,14 @@ public static MLModelGroup map(@Nonnull final EntityResponse entityResponse) { @Override public MLModelGroup apply(@Nonnull final EntityResponse entityResponse) { final MLModelGroup result = new MLModelGroup(); + Urn entityUrn = entityResponse.getUrn(); + result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.MLMODEL_GROUP); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); mappingHelper.mapToResult(OWNERSHIP_ASPECT_NAME, (mlModelGroup, dataMap) -> - mlModelGroup.setOwnership(OwnershipMapper.map(new Ownership(dataMap)))); + mlModelGroup.setOwnership(OwnershipMapper.map(new Ownership(dataMap), entityUrn))); mappingHelper.mapToResult(ML_MODEL_GROUP_KEY_ASPECT_NAME, this::mapToMLModelGroupKey); mappingHelper.mapToResult(ML_MODEL_GROUP_PROPERTIES_ASPECT_NAME, this::mapToMLModelGroupProperties); mappingHelper.mapToResult(STATUS_ASPECT_NAME, (mlModelGroup, dataMap) -> @@ -61,9 +64,9 @@ public MLModelGroup apply(@Nonnull final EntityResponse entityResponse) { mappingHelper.mapToResult(DEPRECATION_ASPECT_NAME, (mlModelGroup, dataMap) -> mlModelGroup.setDeprecation(DeprecationMapper.map(new Deprecation(dataMap)))); - mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, this::mapGlobalTags); + mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, (entity, dataMap) -> this.mapGlobalTags(entity, dataMap, entityUrn)); mappingHelper.mapToResult(GLOSSARY_TERMS_ASPECT_NAME, (entity, dataMap) -> - entity.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap)))); + entity.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap), entityUrn))); mappingHelper.mapToResult(DOMAINS_ASPECT_NAME, this::mapDomains); mappingHelper.mapToResult(ML_MODEL_GROUP_EDITABLE_PROPERTIES_ASPECT_NAME, this::mapEditableProperties); mappingHelper.mapToResult(DATA_PLATFORM_INSTANCE_ASPECT_NAME, (dataset, dataMap) -> @@ -89,20 +92,16 @@ private void mapToMLModelGroupProperties(MLModelGroup mlModelGroup, DataMap data } } - private void mapGlobalTags(MLModelGroup entity, DataMap dataMap) { + private void mapGlobalTags(MLModelGroup entity, DataMap dataMap, Urn entityUrn) { GlobalTags globalTags = new GlobalTags(dataMap); - com.linkedin.datahub.graphql.generated.GlobalTags graphQlGlobalTags = GlobalTagsMapper.map(globalTags); + com.linkedin.datahub.graphql.generated.GlobalTags graphQlGlobalTags = GlobalTagsMapper.map(globalTags, entityUrn); entity.setTags(graphQlGlobalTags); } private void mapDomains(@Nonnull MLModelGroup entity, @Nonnull DataMap dataMap) { final Domains domains = new Domains(dataMap); // Currently we only take the first domain if it exists. - if (domains.getDomains().size() > 0) { - entity.setDomain(Domain.builder() - .setType(EntityType.DOMAIN) - .setUrn(domains.getDomains().get(0).toString()).build()); - } + entity.setDomain(DomainAssociationMapper.map(domains, entity.getUrn())); } private void mapEditableProperties(MLModelGroup entity, DataMap dataMap) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java index 5df8e86a310dd0..71a19cd69d8072 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java @@ -8,10 +8,10 @@ import com.linkedin.common.InstitutionalMemory; import com.linkedin.common.Ownership; import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; import com.linkedin.datahub.graphql.generated.DataPlatform; -import com.linkedin.datahub.graphql.generated.Domain; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.FabricType; import com.linkedin.datahub.graphql.generated.MLModel; @@ -23,6 +23,7 @@ import com.linkedin.datahub.graphql.types.common.mappers.OwnershipMapper; import com.linkedin.datahub.graphql.types.common.mappers.StatusMapper; import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; +import com.linkedin.datahub.graphql.types.domain.DomainAssociationMapper; import com.linkedin.datahub.graphql.types.glossary.mappers.GlossaryTermsMapper; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; @@ -62,15 +63,17 @@ public static MLModel map(@Nonnull final EntityResponse entityResponse) { @Override public MLModel apply(@Nonnull final EntityResponse entityResponse) { final MLModel result = new MLModel(); + Urn entityUrn = entityResponse.getUrn(); + result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.MLMODEL); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); mappingHelper.mapToResult(ML_MODEL_KEY_ASPECT_NAME, this::mapMLModelKey); mappingHelper.mapToResult(OWNERSHIP_ASPECT_NAME, (mlModel, dataMap) -> - mlModel.setOwnership(OwnershipMapper.map(new Ownership(dataMap)))); - mappingHelper.mapToResult(ML_MODEL_PROPERTIES_ASPECT_NAME, this::mapMLModelProperties); - mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, this::mapGlobalTags); + mlModel.setOwnership(OwnershipMapper.map(new Ownership(dataMap), entityUrn))); + mappingHelper.mapToResult(ML_MODEL_PROPERTIES_ASPECT_NAME, (entity, dataMap) -> this.mapMLModelProperties(entity, dataMap, entityUrn)); + mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, (mlModel, dataMap) -> this.mapGlobalTags(mlModel, dataMap, entityUrn)); mappingHelper.mapToResult(INTENDED_USE_ASPECT_NAME, (mlModel, dataMap) -> mlModel.setIntendedUse(IntendedUseMapper.map(new IntendedUse(dataMap)))); mappingHelper.mapToResult(ML_MODEL_FACTOR_PROMPTS_ASPECT_NAME, (mlModel, dataMap) -> @@ -101,7 +104,7 @@ public MLModel apply(@Nonnull final EntityResponse entityResponse) { mappingHelper.mapToResult(DEPRECATION_ASPECT_NAME, (mlModel, dataMap) -> mlModel.setDeprecation(DeprecationMapper.map(new Deprecation(dataMap)))); mappingHelper.mapToResult(GLOSSARY_TERMS_ASPECT_NAME, (entity, dataMap) -> - entity.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap)))); + entity.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap), entityUrn))); mappingHelper.mapToResult(DOMAINS_ASPECT_NAME, this::mapDomains); mappingHelper.mapToResult(ML_MODEL_EDITABLE_PROPERTIES_ASPECT_NAME, this::mapEditableProperties); mappingHelper.mapToResult(DATA_PLATFORM_INSTANCE_ASPECT_NAME, (dataset, dataMap) -> @@ -119,17 +122,17 @@ private void mapMLModelKey(MLModel mlModel, DataMap dataMap) { mlModel.setPlatform(partialPlatform); } - private void mapMLModelProperties(MLModel mlModel, DataMap dataMap) { + private void mapMLModelProperties(MLModel mlModel, DataMap dataMap, Urn entityUrn) { MLModelProperties modelProperties = new MLModelProperties(dataMap); - mlModel.setProperties(MLModelPropertiesMapper.map(modelProperties)); + mlModel.setProperties(MLModelPropertiesMapper.map(modelProperties, entityUrn)); if (modelProperties.getDescription() != null) { mlModel.setDescription(modelProperties.getDescription()); } } - private void mapGlobalTags(MLModel mlModel, DataMap dataMap) { + private void mapGlobalTags(MLModel mlModel, DataMap dataMap, Urn entityUrn) { GlobalTags globalTags = new GlobalTags(dataMap); - com.linkedin.datahub.graphql.generated.GlobalTags graphQlGlobalTags = GlobalTagsMapper.map(globalTags); + com.linkedin.datahub.graphql.generated.GlobalTags graphQlGlobalTags = GlobalTagsMapper.map(globalTags, entityUrn); mlModel.setGlobalTags(graphQlGlobalTags); mlModel.setTags(graphQlGlobalTags); } @@ -143,15 +146,10 @@ private void mapSourceCode(MLModel mlModel, DataMap dataMap) { mlModel.setSourceCode(graphQlSourceCode); } - private void mapDomains(@Nonnull MLModel entity, @Nonnull DataMap dataMap) { final Domains domains = new Domains(dataMap); // Currently we only take the first domain if it exists. - if (domains.getDomains().size() > 0) { - entity.setDomain(Domain.builder() - .setType(EntityType.DOMAIN) - .setUrn(domains.getDomains().get(0).toString()).build()); - } + entity.setDomain(DomainAssociationMapper.map(domains, entity.getUrn())); } private void mapEditableProperties(MLModel entity, DataMap dataMap) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java index e19bb9b2738602..950cfcc0689667 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java @@ -2,25 +2,23 @@ import com.linkedin.datahub.graphql.generated.MLModelGroup; -import com.linkedin.datahub.graphql.types.common.mappers.StringMapMapper; +import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; import java.util.stream.Collectors; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.generated.MLModelProperties; -import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import lombok.NonNull; -public class MLModelPropertiesMapper implements ModelMapper { +public class MLModelPropertiesMapper { public static final MLModelPropertiesMapper INSTANCE = new MLModelPropertiesMapper(); - public static MLModelProperties map(@NonNull final com.linkedin.ml.metadata.MLModelProperties mlModelProperties) { - return INSTANCE.apply(mlModelProperties); + public static MLModelProperties map(@NonNull final com.linkedin.ml.metadata.MLModelProperties mlModelProperties, Urn entityUrn) { + return INSTANCE.apply(mlModelProperties, entityUrn); } - @Override - public MLModelProperties apply(@NonNull final com.linkedin.ml.metadata.MLModelProperties mlModelProperties) { + public MLModelProperties apply(@NonNull final com.linkedin.ml.metadata.MLModelProperties mlModelProperties, Urn entityUrn) { final MLModelProperties result = new MLModelProperties(); result.setDate(mlModelProperties.getDate()); @@ -34,7 +32,7 @@ public MLModelProperties apply(@NonNull final com.linkedin.ml.metadata.MLModelPr param -> MLHyperParamMapper.map(param)).collect(Collectors.toList())); } - result.setCustomProperties(StringMapMapper.map(mlModelProperties.getCustomProperties())); + result.setCustomProperties(CustomPropertiesMapper.map(mlModelProperties.getCustomProperties(), entityUrn)); if (mlModelProperties.getTrainingMetrics() != null) { result.setTrainingMetrics(mlModelProperties.getTrainingMetrics().stream().map(metric -> diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLPrimaryKeyMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLPrimaryKeyMapper.java index e33921fce14ec6..b7d83a6111d9f4 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLPrimaryKeyMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLPrimaryKeyMapper.java @@ -7,9 +7,9 @@ import com.linkedin.common.InstitutionalMemory; import com.linkedin.common.Ownership; import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; -import com.linkedin.datahub.graphql.generated.Domain; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.MLFeatureDataType; import com.linkedin.datahub.graphql.generated.MLPrimaryKey; @@ -20,6 +20,7 @@ import com.linkedin.datahub.graphql.types.common.mappers.OwnershipMapper; import com.linkedin.datahub.graphql.types.common.mappers.StatusMapper; import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; +import com.linkedin.datahub.graphql.types.domain.DomainAssociationMapper; import com.linkedin.datahub.graphql.types.glossary.mappers.GlossaryTermsMapper; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; @@ -49,12 +50,14 @@ public static MLPrimaryKey map(@Nonnull final EntityResponse entityResponse) { @Override public MLPrimaryKey apply(@Nonnull final EntityResponse entityResponse) { final MLPrimaryKey result = new MLPrimaryKey(); + Urn entityUrn = entityResponse.getUrn(); + result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.MLPRIMARY_KEY); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); mappingHelper.mapToResult(OWNERSHIP_ASPECT_NAME, (mlPrimaryKey, dataMap) -> - mlPrimaryKey.setOwnership(OwnershipMapper.map(new Ownership(dataMap)))); + mlPrimaryKey.setOwnership(OwnershipMapper.map(new Ownership(dataMap), entityUrn))); mappingHelper.mapToResult(ML_PRIMARY_KEY_KEY_ASPECT_NAME, this::mapMLPrimaryKeyKey); mappingHelper.mapToResult(ML_PRIMARY_KEY_PROPERTIES_ASPECT_NAME, this::mapMLPrimaryKeyProperties); mappingHelper.mapToResult(INSTITUTIONAL_MEMORY_ASPECT_NAME, (mlPrimaryKey, dataMap) -> @@ -64,9 +67,9 @@ public MLPrimaryKey apply(@Nonnull final EntityResponse entityResponse) { mappingHelper.mapToResult(DEPRECATION_ASPECT_NAME, (mlPrimaryKey, dataMap) -> mlPrimaryKey.setDeprecation(DeprecationMapper.map(new Deprecation(dataMap)))); - mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, this::mapGlobalTags); + mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, (entity, dataMap) -> this.mapGlobalTags(entity, dataMap, entityUrn)); mappingHelper.mapToResult(GLOSSARY_TERMS_ASPECT_NAME, (entity, dataMap) -> - entity.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap)))); + entity.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap), entityUrn))); mappingHelper.mapToResult(DOMAINS_ASPECT_NAME, this::mapDomains); mappingHelper.mapToResult(ML_PRIMARY_KEY_EDITABLE_PROPERTIES_ASPECT_NAME, this::mapEditableProperties); mappingHelper.mapToResult(DATA_PLATFORM_INSTANCE_ASPECT_NAME, (dataset, dataMap) -> @@ -90,20 +93,16 @@ private void mapMLPrimaryKeyProperties(MLPrimaryKey mlPrimaryKey, DataMap dataMa } } - private void mapGlobalTags(MLPrimaryKey entity, DataMap dataMap) { + private void mapGlobalTags(MLPrimaryKey entity, DataMap dataMap, Urn entityUrn) { GlobalTags globalTags = new GlobalTags(dataMap); - com.linkedin.datahub.graphql.generated.GlobalTags graphQlGlobalTags = GlobalTagsMapper.map(globalTags); + com.linkedin.datahub.graphql.generated.GlobalTags graphQlGlobalTags = GlobalTagsMapper.map(globalTags, entityUrn); entity.setTags(graphQlGlobalTags); } private void mapDomains(@Nonnull MLPrimaryKey entity, @Nonnull DataMap dataMap) { final Domains domains = new Domains(dataMap); // Currently we only take the first domain if it exists. - if (domains.getDomains().size() > 0) { - entity.setDomain(Domain.builder() - .setType(EntityType.DOMAIN) - .setUrn(domains.getDomains().get(0).toString()).build()); - } + entity.setDomain(DomainAssociationMapper.map(domains, entity.getUrn())); } private void mapEditableProperties(MLPrimaryKey entity, DataMap dataMap) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/notebook/mappers/NotebookMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/notebook/mappers/NotebookMapper.java index 793fa7bcadb5cc..610bdb309114dd 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/notebook/mappers/NotebookMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/notebook/mappers/NotebookMapper.java @@ -7,12 +7,12 @@ import com.linkedin.common.Ownership; import com.linkedin.common.Status; import com.linkedin.common.SubTypes; +import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode; import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; import com.linkedin.datahub.graphql.generated.ChartCell; import com.linkedin.datahub.graphql.generated.DataPlatform; -import com.linkedin.datahub.graphql.generated.Domain; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.Notebook; import com.linkedin.datahub.graphql.generated.NotebookCell; @@ -28,8 +28,9 @@ import com.linkedin.datahub.graphql.types.common.mappers.InstitutionalMemoryMapper; import com.linkedin.datahub.graphql.types.common.mappers.OwnershipMapper; import com.linkedin.datahub.graphql.types.common.mappers.StatusMapper; -import com.linkedin.datahub.graphql.types.common.mappers.StringMapMapper; +import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; +import com.linkedin.datahub.graphql.types.domain.DomainAssociationMapper; import com.linkedin.datahub.graphql.types.glossary.mappers.GlossaryTermsMapper; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; @@ -54,23 +55,28 @@ public static Notebook map(EntityResponse response) { @Override public Notebook apply(EntityResponse response) { final Notebook convertedNotebook = new Notebook(); + Urn entityUrn = response.getUrn(); + convertedNotebook.setUrn(response.getUrn().toString()); convertedNotebook.setType(EntityType.NOTEBOOK); EnvelopedAspectMap aspectMap = response.getAspects(); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, convertedNotebook); mappingHelper.mapToResult(NOTEBOOK_KEY_ASPECT_NAME, this::mapNotebookKey); - mappingHelper.mapToResult(NOTEBOOK_INFO_ASPECT_NAME, this::mapNotebookInfo); + mappingHelper.mapToResult(NOTEBOOK_INFO_ASPECT_NAME, (entity, dataMap) -> this.mapNotebookInfo(entity, dataMap, entityUrn)); mappingHelper.mapToResult(NOTEBOOK_CONTENT_ASPECT_NAME, this::mapNotebookContent); mappingHelper.mapToResult(EDITABLE_NOTEBOOK_PROPERTIES_ASPECT_NAME, this::mapEditableNotebookProperties); - mappingHelper.mapToResult(OWNERSHIP_ASPECT_NAME, (notebook, dataMap) -> notebook.setOwnership(OwnershipMapper.map(new Ownership(dataMap)))); + mappingHelper.mapToResult(OWNERSHIP_ASPECT_NAME, (notebook, dataMap) -> notebook.setOwnership( + OwnershipMapper.map(new Ownership(dataMap), entityUrn) + )); mappingHelper.mapToResult(STATUS_ASPECT_NAME, (notebook, dataMap) -> notebook.setStatus(StatusMapper.map(new Status(dataMap)))); - mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, (notebook, dataMap) -> notebook.setTags(GlobalTagsMapper.map(new GlobalTags(dataMap)))); + mappingHelper.mapToResult(GLOBAL_TAGS_ASPECT_NAME, (notebook, dataMap) -> + notebook.setTags(GlobalTagsMapper.map(new GlobalTags(dataMap), entityUrn))); mappingHelper.mapToResult(INSTITUTIONAL_MEMORY_ASPECT_NAME, (notebook, dataMap) -> notebook.setInstitutionalMemory(InstitutionalMemoryMapper.map(new InstitutionalMemory(dataMap)))); mappingHelper.mapToResult(DOMAINS_ASPECT_NAME, this::mapDomains); mappingHelper.mapToResult(SUB_TYPES_ASPECT_NAME, this::mapSubTypes); mappingHelper.mapToResult(GLOSSARY_TERMS_ASPECT_NAME, (notebook, dataMap) -> - notebook.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap)))); + notebook.setGlossaryTerms(GlossaryTermsMapper.map(new GlossaryTerms(dataMap), entityUrn))); mappingHelper.mapToResult(DATA_PLATFORM_INSTANCE_ASPECT_NAME, this::mapDataPlatformInstance); return mappingHelper.getResult(); } @@ -100,7 +106,7 @@ private void mapNotebookKey(@Nonnull Notebook notebook, @Nonnull DataMap dataMap notebook.setTool(notebookKey.getNotebookTool()); } - private void mapNotebookInfo(@Nonnull Notebook notebook, @Nonnull DataMap dataMap) { + private void mapNotebookInfo(@Nonnull Notebook notebook, @Nonnull DataMap dataMap, Urn entityUrn) { final com.linkedin.notebook.NotebookInfo gmsNotebookInfo = new com.linkedin.notebook.NotebookInfo(dataMap); final NotebookInfo notebookInfo = new NotebookInfo(); notebookInfo.setTitle(gmsNotebookInfo.getTitle()); @@ -112,7 +118,7 @@ private void mapNotebookInfo(@Nonnull Notebook notebook, @Nonnull DataMap dataMa } if (gmsNotebookInfo.hasCustomProperties()) { - notebookInfo.setCustomProperties(StringMapMapper.map(gmsNotebookInfo.getCustomProperties())); + notebookInfo.setCustomProperties(CustomPropertiesMapper.map(gmsNotebookInfo.getCustomProperties(), entityUrn)); } notebook.setInfo(notebookInfo); } @@ -188,10 +194,6 @@ private void mapEditableNotebookProperties(@Nonnull Notebook notebook, @Nonnull private void mapDomains(@Nonnull Notebook notebook, @Nonnull DataMap dataMap) { final Domains domains = new Domains(dataMap); // Currently we only take the first domain if it exists. - if (domains.getDomains().size() > 0) { - notebook.setDomain(Domain.builder() - .setType(EntityType.DOMAIN) - .setUrn(domains.getDomains().get(0).toString()).build()); - } + notebook.setDomain(DomainAssociationMapper.map(domains, notebook.getUrn())); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/mappers/GlobalTagsMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/mappers/GlobalTagsMapper.java index 8acc40f23f98ec..f4d5f0a549a0ed 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/mappers/GlobalTagsMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/mappers/GlobalTagsMapper.java @@ -2,31 +2,37 @@ import com.linkedin.common.GlobalTags; import com.linkedin.common.TagAssociation; +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.generated.Tag; -import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import javax.annotation.Nonnull; import java.util.stream.Collectors; -public class GlobalTagsMapper implements ModelMapper { +public class GlobalTagsMapper { public static final GlobalTagsMapper INSTANCE = new GlobalTagsMapper(); - public static com.linkedin.datahub.graphql.generated.GlobalTags map(@Nonnull final GlobalTags standardTags) { - return INSTANCE.apply(standardTags); + public static com.linkedin.datahub.graphql.generated.GlobalTags map( + @Nonnull final GlobalTags standardTags, + @Nonnull final Urn entityUrn + ) { + return INSTANCE.apply(standardTags, entityUrn); } - @Override - public com.linkedin.datahub.graphql.generated.GlobalTags apply(@Nonnull final GlobalTags input) { + public com.linkedin.datahub.graphql.generated.GlobalTags apply(@Nonnull final GlobalTags input, @Nonnull final Urn entityUrn) { final com.linkedin.datahub.graphql.generated.GlobalTags result = new com.linkedin.datahub.graphql.generated.GlobalTags(); - result.setTags(input.getTags().stream().map(this::mapTagAssociation).collect(Collectors.toList())); + result.setTags(input.getTags().stream().map(tag -> this.mapTagAssociation(tag, entityUrn)).collect(Collectors.toList())); return result; } - private com.linkedin.datahub.graphql.generated.TagAssociation mapTagAssociation(@Nonnull final TagAssociation input) { + private com.linkedin.datahub.graphql.generated.TagAssociation mapTagAssociation( + @Nonnull final TagAssociation input, + @Nonnull final Urn entityUrn + ) { final com.linkedin.datahub.graphql.generated.TagAssociation result = new com.linkedin.datahub.graphql.generated.TagAssociation(); final Tag resultTag = new Tag(); resultTag.setUrn(input.getTag().toString()); result.setTag(resultTag); + result.setAssociatedUrn(entityUrn.toString()); return result; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/mappers/TagMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/mappers/TagMapper.java index 214a63b7df065c..43736b412b0045 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/mappers/TagMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/mappers/TagMapper.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.types.tag.mappers; import com.linkedin.common.Ownership; +import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.data.template.GetMode; import com.linkedin.data.template.RecordTemplate; @@ -34,6 +35,7 @@ public static Tag map(@Nonnull final EntityResponse entityResponse) { @Override public Tag apply(@Nonnull final EntityResponse entityResponse) { final Tag result = new Tag(); + Urn entityUrn = entityResponse.getUrn(); result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.TAG); @@ -45,7 +47,7 @@ public Tag apply(@Nonnull final EntityResponse entityResponse) { mappingHelper.mapToResult(TAG_KEY_ASPECT_NAME, this::mapTagKey); mappingHelper.mapToResult(TAG_PROPERTIES_ASPECT_NAME, this::mapTagProperties); mappingHelper.mapToResult(OWNERSHIP_ASPECT_NAME, (tag, dataMap) -> - tag.setOwnership(OwnershipMapper.map(new Ownership(dataMap)))); + tag.setOwnership(OwnershipMapper.map(new Ownership(dataMap), entityUrn))); if (result.getProperties() != null && result.getProperties().getName() == null) { result.getProperties().setName(legacyName); diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index b0c7a9bd7b1282..cb35bc72ffc234 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -625,6 +625,11 @@ input LineageInput { The number of results to be returned """ count: Int + + """ + Optional flag to not merge siblings in the response. They are merged by default. + """ + separateSiblings: Boolean } """ @@ -916,7 +921,7 @@ type Dataset implements EntityWithRelationships & Entity { """ The Domain associated with the Dataset """ - domain: Domain + domain: DomainAssociation """ Statistics about how this Dataset is used @@ -1137,7 +1142,7 @@ type VersionedDataset implements Entity { """ The Domain associated with the Dataset """ - domain: Domain + domain: DomainAssociation """ Experimental! The resolved health status of the Dataset @@ -1278,7 +1283,7 @@ type DatasetProperties { """ Custom properties of the Dataset """ - customProperties: [StringMapEntry!] + customProperties: [CustomPropertiesEntry!] """ External URL associated with the Dataset @@ -1392,7 +1397,7 @@ type GlossaryTermInfo { """ Properties of the glossary term """ - customProperties: [StringMapEntry!] + customProperties: [CustomPropertiesEntry!] """ Schema definition of glossary term @@ -1437,7 +1442,7 @@ type GlossaryTermProperties { """ Properties of the glossary term """ - customProperties: [StringMapEntry!] + customProperties: [CustomPropertiesEntry!] """ Schema definition of glossary term @@ -1808,7 +1813,7 @@ type Container implements Entity { """ The Domain associated with the Dataset """ - domain: Domain + domain: DomainAssociation """ The deprecation status of the container @@ -1848,7 +1853,7 @@ type ContainerProperties { """ Custom properties of the Container """ - customProperties: [StringMapEntry!] + customProperties: [CustomPropertiesEntry!] """ Native platform URL of the Container @@ -2562,6 +2567,26 @@ type StringMapEntry { value: String } +""" +An entry in a custom properties map represented as a tuple +""" +type CustomPropertiesEntry { + """ + The key of the map entry + """ + key: String! + + """ + The value fo the map entry + """ + value: String + + """ + The urn of the entity this property came from for tracking purposes e.g. when sibling nodes are merged together + """ + associatedUrn: String! +} + """ The origin of Ownership metadata associated with a Metadata Entity """ @@ -3188,6 +3213,11 @@ type Owner { Source information for the ownership """ source: OwnershipSource + + """ + Reference back to the owned urn for tracking purposes e.g. when sibling nodes are merged together + """ + associatedUrn: String! } """ @@ -3299,6 +3329,11 @@ type TagAssociation { The tag itself """ tag: Tag! + + """ + Reference back to the tagged urn for tracking purposes e.g. when sibling nodes are merged together + """ + associatedUrn: String! } """ @@ -3338,6 +3373,11 @@ type GlossaryTermAssociation { The glossary term itself """ term: GlossaryTerm! + + """ + Reference back to the associated urn for tracking purposes e.g. when sibling nodes are merged together + """ + associatedUrn: String! } """ @@ -3807,7 +3847,7 @@ type Notebook implements Entity { """ The Domain associated with the Notebook """ - domain: Domain + domain: DomainAssociation """ The specific instance of the data platform that this entity belongs to @@ -4064,7 +4104,7 @@ type Dashboard implements EntityWithRelationships & Entity { """ The Domain associated with the Dashboard """ - domain: Domain + domain: DomainAssociation """ The specific instance of the data platform that this entity belongs to @@ -4140,7 +4180,7 @@ type DashboardInfo { """ A list of platform specific metadata tuples """ - customProperties: [StringMapEntry!] + customProperties: [CustomPropertiesEntry!] """ The time when this dashboard last refreshed @@ -4185,7 +4225,7 @@ type NotebookInfo { """ A list of platform specific metadata tuples """ - customProperties: [StringMapEntry!] + customProperties: [CustomPropertiesEntry!] """ Captures information about who created/last modified/deleted this Notebook and when @@ -4221,7 +4261,7 @@ type DashboardProperties { """ A list of platform specific metadata tuples """ - customProperties: [StringMapEntry!] + customProperties: [CustomPropertiesEntry!] """ The time when this dashboard last refreshed @@ -4327,7 +4367,7 @@ type Chart implements EntityWithRelationships & Entity { """ The Domain associated with the Chart """ - domain: Domain + domain: DomainAssociation """ The specific instance of the data platform that this entity belongs to @@ -4407,7 +4447,7 @@ type ChartInfo { """ A list of platform specific metadata tuples """ - customProperties: [StringMapEntry!] + customProperties: [CustomPropertiesEntry!] """ The time when this chart last refreshed @@ -4462,7 +4502,7 @@ type ChartProperties { """ A list of platform specific metadata tuples """ - customProperties: [StringMapEntry!] + customProperties: [CustomPropertiesEntry!] """ The time when this chart last refreshed @@ -4664,7 +4704,7 @@ type DataFlow implements EntityWithRelationships & Entity { """ The Domain associated with the DataFlow """ - domain: Domain + domain: DomainAssociation """ The specific instance of the data platform that this entity belongs to @@ -4733,7 +4773,7 @@ type DataFlowInfo { """ A list of platform specific metadata tuples """ - customProperties: [StringMapEntry!] + customProperties: [CustomPropertiesEntry!] } """ @@ -4763,7 +4803,7 @@ type DataFlowProperties { """ A list of platform specific metadata tuples """ - customProperties: [StringMapEntry!] + customProperties: [CustomPropertiesEntry!] } """ @@ -4840,7 +4880,7 @@ type DataJob implements EntityWithRelationships & Entity { """ The Domain associated with the Data Job """ - domain: Domain + domain: DomainAssociation """ Granular API for querying edges extending from this entity @@ -5023,7 +5063,7 @@ type DataJobInfo { """ A list of platform specific metadata tuples """ - customProperties: [StringMapEntry!] + customProperties: [CustomPropertiesEntry!] } """ @@ -5048,7 +5088,7 @@ type DataJobProperties { """ A list of platform specific metadata tuples """ - customProperties: [StringMapEntry!] + customProperties: [CustomPropertiesEntry!] } """ @@ -7204,7 +7244,7 @@ type MLModel implements EntityWithRelationships & Entity { """ The Domain associated with the entity """ - domain: Domain + domain: DomainAssociation """ An additional set of of read write properties @@ -7295,7 +7335,7 @@ type MLModelGroup implements EntityWithRelationships & Entity { """ The Domain associated with the entity """ - domain: Domain + domain: DomainAssociation """ An additional set of of read write properties @@ -7404,7 +7444,7 @@ type MLFeature implements EntityWithRelationships & Entity { """ The Domain associated with the entity """ - domain: Domain + domain: DomainAssociation """ An additional set of of read write properties @@ -7454,7 +7494,7 @@ type MLModelProperties { groups: [MLModelGroup] - customProperties: [StringMapEntry!] + customProperties: [CustomPropertiesEntry!] } type MLFeatureProperties { @@ -7561,7 +7601,7 @@ type MLPrimaryKey implements EntityWithRelationships & Entity { """ The Domain associated with the entity """ - domain: Domain + domain: DomainAssociation """ An additional set of of read write properties @@ -7668,7 +7708,7 @@ type MLFeatureTable implements EntityWithRelationships & Entity { """ The Domain associated with the entity """ - domain: Domain + domain: DomainAssociation """ An additional set of of read write properties @@ -7719,7 +7759,7 @@ description: String mlPrimaryKeys: [MLPrimaryKey] - customProperties: [StringMapEntry!] + customProperties: [CustomPropertiesEntry!] } type HyperParameterMap { @@ -7989,6 +8029,18 @@ type SubTypes { typeNames: [String!] } +type DomainAssociation { + """ + The domain related to the assocaited urn + """ + domain: Domain! + + """ + Reference back to the tagged urn for tracking purposes e.g. when sibling nodes are merged together + """ + associatedUrn: String! +} + """ A domain, or a logical grouping of Metadata Entities """ diff --git a/datahub-web-react/src/Mocks.tsx b/datahub-web-react/src/Mocks.tsx index 18535e905f812e..7e1aca061850ec 100644 --- a/datahub-web-react/src/Mocks.tsx +++ b/datahub-web-react/src/Mocks.tsx @@ -67,6 +67,7 @@ const user1 = { colorHex: 'sample tag color', }, }, + associatedUrn: 'urn:li:corpuser:1', }, ], }, @@ -109,6 +110,7 @@ const user2 = { colorHex: 'sample tag color', }, }, + associatedUrn: 'urn:li:corpuser:3', }, ], }, @@ -152,9 +154,11 @@ export const dataset1 = { customProperties: [ { key: 'TestProperty', + associatedUrn: 'urn:li:dataset:1', value: 'My property value.', }, { + associatedUrn: 'urn:li:dataset:1', key: 'AnotherTestProperty', value: 'My other property value.', }, @@ -173,12 +177,14 @@ export const dataset1 = { owner: { ...user1, }, + associatedUrn: 'urn:li:dataset:1', type: 'DATAOWNER', }, { owner: { ...user2, }, + associatedUrn: 'urn:li:dataset:1', type: 'DELEGATE', }, ], @@ -260,6 +266,7 @@ export const dataset2 = { owner: { ...user1, }, + associatedUrn: 'urn:li:dataset:2', type: 'DATAOWNER', }, { @@ -267,6 +274,7 @@ export const dataset2 = { ...user2, }, type: 'DELEGATE', + associatedUrn: 'urn:li:dataset:2', }, ], lastModified: { @@ -327,7 +335,7 @@ export const dataset3 = { name: 'Yet Another Dataset', description: 'This and here we have yet another Dataset (YAN). Are there more?', origin: 'PROD', - customProperties: [{ key: 'propertyAKey', value: 'propertyAValue' }], + customProperties: [{ key: 'propertyAKey', value: 'propertyAValue', associatedUrn: 'urn:li:dataset:3' }], externalUrl: 'https://data.hub', }, parentContainers: { @@ -348,12 +356,14 @@ export const dataset3 = { ...user1, }, type: 'DATAOWNER', + associatedUrn: 'urn:li:dataset:3', }, { owner: { ...user2, }, type: 'DELEGATE', + associatedUrn: 'urn:li:dataset:3', }, ], lastModified: { @@ -375,6 +385,7 @@ export const dataset3 = { colorHex: 'sample tag color', }, }, + associatedUrn: 'urn:li:dataset:3', }, ], }, @@ -393,6 +404,7 @@ export const dataset3 = { termSource: 'sample term source', }, }, + associatedUrn: 'urn:li:dataset:3', }, ], }, @@ -535,7 +547,7 @@ export const dataset5 = { name: 'Fifth Test Dataset', description: 'This and here we have yet another Dataset (YAN). Are there more?', origin: 'PROD', - customProperties: [{ key: 'propertyAKey', value: 'propertyAValue' }], + customProperties: [{ key: 'propertyAKey', value: 'propertyAValue', associatedUrn: 'urn:li:dataset:5' }], externalUrl: 'https://data.hub', }, }; @@ -549,7 +561,7 @@ export const dataset6 = { qualifiedName: 'Fully Qualified Name of Sixth Test Dataset', description: 'This and here we have yet another Dataset (YAN). Are there more?', origin: 'PROD', - customProperties: [{ key: 'propertyAKey', value: 'propertyAValue' }], + customProperties: [{ key: 'propertyAKey', value: 'propertyAValue', associatedUrn: 'urn:li:dataset:6' }], externalUrl: 'https://data.hub', }, }; @@ -809,12 +821,14 @@ const glossaryTerm1 = { owner: { ...user1, }, + associatedUrn: 'urn:li:glossaryTerm:1', type: 'DATAOWNER', }, { owner: { ...user2, }, + associatedUrn: 'urn:li:glossaryTerm:1', type: 'DELEGATE', }, ], @@ -859,7 +873,7 @@ const glossaryTerm2 = { { key: 'keyProperty', value: 'valueProperty', - __typename: 'StringMapEntry', + __typename: 'CustomPropertiesEntry', }, ], __typename: 'GlossaryTermInfo', @@ -876,7 +890,7 @@ const glossaryTerm2 = { { key: 'keyProperty', value: 'valueProperty', - __typename: 'StringMapEntry', + __typename: 'CustomPropertiesEntry', }, ], __typename: 'GlossaryTermProperties', @@ -931,7 +945,8 @@ const glossaryTerm3 = { { key: 'keyProperty', value: 'valueProperty', - __typename: 'StringMapEntry', + associatedUrn: 'urn:li:glossaryTerm:example.glossaryterm2', + __typename: 'CustomPropertiesEntry', }, ], __typename: 'GlossaryTermInfo', @@ -948,7 +963,8 @@ const glossaryTerm3 = { { key: 'keyProperty', value: 'valueProperty', - __typename: 'StringMapEntry', + associatedUrn: 'urn:li:glossaryTerm:example.glossaryterm2', + __typename: 'CustomPropertiesEntry', }, ], __typename: 'GlossaryTermProperties', @@ -1094,12 +1110,14 @@ export const dataFlow1 = { ...user1, }, type: 'DATAOWNER', + associatedUrn: 'urn:li:dataFlow:1', }, { owner: { ...user2, }, type: 'DELEGATE', + associatedUrn: 'urn:li:dataFlow:1', }, ], lastModified: { @@ -1120,6 +1138,7 @@ export const dataFlow1 = { colorHex: 'sample tag color', }, }, + associatedUrn: 'urn:li:dataFlow:1', }, ], }, @@ -1151,12 +1170,14 @@ export const dataJob1 = { owner: { ...user1, }, + associatedUrn: 'urn:li:dataJob:1', type: 'DATAOWNER', }, { owner: { ...user2, }, + associatedUrn: 'urn:li:dataJob:1', type: 'DELEGATE', }, ], @@ -1191,6 +1212,7 @@ export const dataJob1 = { colorHex: 'sample tag color', }, }, + associatedUrn: 'urn:li:dataJob:1', }, ], }, @@ -1228,12 +1250,14 @@ export const dataJob2 = { owner: { ...user1, }, + associatedUrn: 'urn:li:dataJob:2', type: 'DATAOWNER', }, { owner: { ...user2, }, + associatedUrn: 'urn:li:dataJob:2', type: 'DELEGATE', }, ], @@ -1268,6 +1292,7 @@ export const dataJob2 = { colorHex: 'sample tag color', }, }, + associatedUrn: 'urn:li:dataJob:2', }, ], }, @@ -1290,12 +1315,14 @@ export const dataJob3 = { owner: { ...user1, }, + associatedUrn: 'urn:li:dataJob:3', type: 'DATAOWNER', }, { owner: { ...user2, }, + associatedUrn: 'urn:li:dataJob:3', type: 'DELEGATE', }, ], @@ -1330,6 +1357,7 @@ export const dataJob3 = { colorHex: 'sample tag color', }, }, + associatedUrn: 'urn:li:dataJob:3', }, ], }, @@ -1377,12 +1405,14 @@ export const mlModel = { ...user1, }, type: 'DATAOWNER', + associatedUrn: 'urn:li:mlModel:(urn:li:dataPlatform:sagemaker,trustmodel,PROD)', }, { owner: { ...user2, }, type: 'DELEGATE', + associatedUrn: 'urn:li:mlModel:(urn:li:dataPlatform:sagemaker,trustmodel,PROD)', }, ], lastModified: { @@ -1405,6 +1435,7 @@ export const mlModel = { colorHex: 'sample tag color', }, }, + associatedUrn: 'urn:li:mlModel:(urn:li:dataPlatform:sagemaker,trustmodel,PROD)', }, ], }, @@ -1462,12 +1493,14 @@ export const mlModelGroup = { owner: { ...user1, }, + associatedUrn: 'urn:li:mlModelGroup:(urn:li:dataPlatform:sagemaker,another-group,PROD)', type: 'DATAOWNER', }, { owner: { ...user2, }, + associatedUrn: 'urn:li:mlModelGroup:(urn:li:dataPlatform:sagemaker,another-group,PROD)', type: 'DELEGATE', }, ], diff --git a/datahub-web-react/src/app/entity/EntityRegistry.tsx b/datahub-web-react/src/app/entity/EntityRegistry.tsx index 2bf32cc833178e..f87e6e534cfbe8 100644 --- a/datahub-web-react/src/app/entity/EntityRegistry.tsx +++ b/datahub-web-react/src/app/entity/EntityRegistry.tsx @@ -2,7 +2,7 @@ import { Entity as EntityInterface, EntityType, SearchResult } from '../../types import { FetchedEntity } from '../lineage/types'; import { Entity, IconStyleType, PreviewType } from './Entity'; import { GenericEntityProperties } from './shared/types'; -import { urlEncodeUrn } from './shared/utils'; +import { dictToQueryStringParams, urlEncodeUrn } from './shared/utils'; function validatedGet(key: K, map: Map): V { if (map.has(key)) { @@ -78,8 +78,8 @@ export default class EntityRegistry { return entity.getPathName(); } - getEntityUrl(type: EntityType, urn: string): string { - return `/${this.getPathName(type)}/${urlEncodeUrn(urn)}`; + getEntityUrl(type: EntityType, urn: string, params?: Record): string { + return `/${this.getPathName(type)}/${urlEncodeUrn(urn)}${params ? `?${dictToQueryStringParams(params)}` : ''}`; } getTypeFromPathName(pathName: string): EntityType { diff --git a/datahub-web-react/src/app/entity/chart/ChartEntity.tsx b/datahub-web-react/src/app/entity/chart/ChartEntity.tsx index 797a598ebf7f58..5d98ed7d1b2529 100644 --- a/datahub-web-react/src/app/entity/chart/ChartEntity.tsx +++ b/datahub-web-react/src/app/entity/chart/ChartEntity.tsx @@ -153,7 +153,7 @@ export class ChartEntity implements Entity { tags={data?.globalTags || undefined} glossaryTerms={data?.glossaryTerms} logoUrl={data?.platform?.properties?.logoUrl} - domain={data.domain} + domain={data.domain?.domain} parentContainers={data.parentContainers} /> ); @@ -174,7 +174,7 @@ export class ChartEntity implements Entity { glossaryTerms={data?.glossaryTerms} insights={result.insights} logoUrl={data?.platform?.properties?.logoUrl || ''} - domain={data.domain} + domain={data.domain?.domain} /> ); }; diff --git a/datahub-web-react/src/app/entity/container/ContainerEntity.tsx b/datahub-web-react/src/app/entity/container/ContainerEntity.tsx index 532e7464e73f2e..16b37ca659eafc 100644 --- a/datahub-web-react/src/app/entity/container/ContainerEntity.tsx +++ b/datahub-web-react/src/app/entity/container/ContainerEntity.tsx @@ -119,7 +119,7 @@ export class ContainerEntity implements Entity { subTypes={data.subTypes} container={data.container} entityCount={data.entities?.total} - domain={data.domain} + domain={data.domain?.domain} tags={data.tags} /> ); @@ -139,7 +139,7 @@ export class ContainerEntity implements Entity { subTypes={data.subTypes} container={data.container} entityCount={data.entities?.total} - domain={data.domain} + domain={data.domain?.domain} parentContainers={data.parentContainers} /> ); diff --git a/datahub-web-react/src/app/entity/dashboard/DashboardEntity.tsx b/datahub-web-react/src/app/entity/dashboard/DashboardEntity.tsx index 923263863d8108..d3d5bd3f56287e 100644 --- a/datahub-web-react/src/app/entity/dashboard/DashboardEntity.tsx +++ b/datahub-web-react/src/app/entity/dashboard/DashboardEntity.tsx @@ -162,7 +162,7 @@ export class DashboardEntity implements Entity { owners={data.ownership?.owners} glossaryTerms={data?.glossaryTerms} logoUrl={data?.platform?.properties?.logoUrl} - domain={data.domain} + domain={data.domain?.domain} container={data.container} /> ); @@ -183,7 +183,7 @@ export class DashboardEntity implements Entity { glossaryTerms={data?.glossaryTerms} insights={result.insights} logoUrl={data?.platform?.properties?.logoUrl || ''} - domain={data.domain} + domain={data.domain?.domain} container={data.container} parentContainers={data.parentContainers} /> diff --git a/datahub-web-react/src/app/entity/dataFlow/DataFlowEntity.tsx b/datahub-web-react/src/app/entity/dataFlow/DataFlowEntity.tsx index e687880093a66f..1fd559a1c4d520 100644 --- a/datahub-web-react/src/app/entity/dataFlow/DataFlowEntity.tsx +++ b/datahub-web-react/src/app/entity/dataFlow/DataFlowEntity.tsx @@ -121,7 +121,7 @@ export class DataFlowEntity implements Entity { platformLogo={data?.platform?.properties?.logoUrl || ''} owners={data.ownership?.owners} globalTags={data.globalTags} - domain={data.domain} + domain={data.domain?.domain} /> ); }; @@ -139,7 +139,7 @@ export class DataFlowEntity implements Entity { owners={data.ownership?.owners} globalTags={data.globalTags} insights={result.insights} - domain={data.domain} + domain={data.domain?.domain} /> ); }; diff --git a/datahub-web-react/src/app/entity/dataJob/DataJobEntity.tsx b/datahub-web-react/src/app/entity/dataJob/DataJobEntity.tsx index 270d52c1f1162c..038c50b29c42fd 100644 --- a/datahub-web-react/src/app/entity/dataJob/DataJobEntity.tsx +++ b/datahub-web-react/src/app/entity/dataJob/DataJobEntity.tsx @@ -146,7 +146,7 @@ export class DataJobEntity implements Entity { platformLogo={data?.dataFlow?.platform?.properties?.logoUrl || ''} owners={data.ownership?.owners} globalTags={data.globalTags || null} - domain={data.domain} + domain={data.domain?.domain} /> ); }; @@ -163,7 +163,7 @@ export class DataJobEntity implements Entity { platformInstanceId={data.dataPlatformInstance?.instanceId} owners={data.ownership?.owners} globalTags={data.globalTags} - domain={data.domain} + domain={data.domain?.domain} insights={result.insights} /> ); diff --git a/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx b/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx index 324335e05376de..810e89e28ef8ef 100644 --- a/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx +++ b/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx @@ -27,6 +27,7 @@ import { SidebarDomainSection } from '../shared/containers/profile/sidebar/Domai import { ValidationsTab } from '../shared/tabs/Dataset/Validations/ValidationsTab'; import { OperationsTab } from './profile/OperationsTab'; import { EntityMenuItems } from '../shared/EntityDropdown/EntityDropdown'; +import { SidebarSiblingsSection } from '../shared/containers/profile/sidebar/SidebarSiblingsSection'; const SUBTYPES = { VIEW: 'view', @@ -174,6 +175,13 @@ export class DatasetEntity implements Entity { { component: SidebarAboutSection, }, + { + component: SidebarSiblingsSection, + display: { + visible: (_, dataset: GetDatasetQuery) => + (dataset?.dataset?.siblings?.siblings?.length || 0) > 0, + }, + }, { component: SidebarViewDefinitionSection, display: { @@ -241,7 +249,7 @@ export class DatasetEntity implements Entity { owners={data.ownership?.owners} globalTags={data.globalTags} glossaryTerms={data.glossaryTerms} - domain={data.domain} + domain={data.domain?.domain} container={data.container} /> ); @@ -265,7 +273,7 @@ export class DatasetEntity implements Entity { platformLogos={genericProperties?.siblingPlatforms?.map((platform) => platform.properties?.logoUrl)} owners={data.ownership?.owners} globalTags={data.globalTags} - domain={data.domain} + domain={data.domain?.domain} glossaryTerms={data.glossaryTerms} subtype={data.subTypes?.typeNames?.[0]} container={data.container} diff --git a/datahub-web-react/src/app/entity/dataset/profile/stories/sampleSchema.ts b/datahub-web-react/src/app/entity/dataset/profile/stories/sampleSchema.ts index 71c27ab7bd2ce0..819ee2f9b58f1b 100644 --- a/datahub-web-react/src/app/entity/dataset/profile/stories/sampleSchema.ts +++ b/datahub-web-react/src/app/entity/dataset/profile/stories/sampleSchema.ts @@ -113,6 +113,7 @@ export const sampleSchemaWithTags: Schema = { description: 'this is a legacy dataset', type: EntityType.Tag, }, + associatedUrn: 'mock:urn', }, ], }, @@ -131,6 +132,7 @@ export const sampleSchemaWithTags: Schema = { termSource: 'sample term source', }, }, + associatedUrn: 'mock:urn', }, ], }, @@ -237,6 +239,7 @@ export const sampleSchemaWithPkFk: SchemaMetadata = { description: 'this is a legacy dataset', type: EntityType.Tag, }, + associatedUrn: 'mock:urn', }, ], }, @@ -255,6 +258,7 @@ export const sampleSchemaWithPkFk: SchemaMetadata = { termSource: 'sample term source', }, }, + associatedUrn: 'mock:urn', }, ], }, diff --git a/datahub-web-react/src/app/entity/shared/EntityContext.tsx b/datahub-web-react/src/app/entity/shared/EntityContext.tsx index 3ff43f1dc6dc80..c6b31c1b7781ba 100644 --- a/datahub-web-react/src/app/entity/shared/EntityContext.tsx +++ b/datahub-web-react/src/app/entity/shared/EntityContext.tsx @@ -1,5 +1,6 @@ import React, { useContext } from 'react'; import { EntityType } from '../../../types.generated'; +import { useIsSeparateSiblingsMode } from './siblingUtils'; import { EntityContextType, UpdateEntityType } from './types'; const EntityContext = React.createContext({ @@ -11,6 +12,7 @@ const EntityContext = React.createContext({ routeToTab: () => {}, refetch: () => Promise.resolve({}), lineage: undefined, + dataNotCombinedWithSiblings: null, }); export default EntityContext; @@ -20,6 +22,11 @@ export const useBaseEntity = (): T => { return baseEntity as T; }; +export const useDataNotCombinedWithSiblings = (): T => { + const { dataNotCombinedWithSiblings } = useContext(EntityContext); + return dataNotCombinedWithSiblings as T; +}; + export const useEntityUpdate = (): UpdateEntityType | null | undefined => { const { updateEntity } = useContext(EntityContext); return updateEntity; @@ -47,7 +54,8 @@ export const useLineageData = () => { export const useMutationUrn = () => { const { urn, entityData } = useContext(EntityContext); - if (!entityData?.siblings || entityData?.siblings?.isPrimary) { + const isHideSiblingMode = useIsSeparateSiblingsMode(); + if (!entityData?.siblings || entityData?.siblings?.isPrimary || isHideSiblingMode) { return urn; } return entityData?.siblings?.siblings?.[0]?.urn || urn; diff --git a/datahub-web-react/src/app/entity/shared/components/legacy/Ownership.tsx b/datahub-web-react/src/app/entity/shared/components/legacy/Ownership.tsx deleted file mode 100644 index b5abf6e306df24..00000000000000 --- a/datahub-web-react/src/app/entity/shared/components/legacy/Ownership.tsx +++ /dev/null @@ -1,349 +0,0 @@ -import { AutoComplete, Button, Form, Select, Space, Table, Tag, Typography } from 'antd'; -import React, { useEffect, useMemo, useState } from 'react'; -import styled from 'styled-components'; -import { - CorpUser, - EntityType, - Owner, - OwnershipSourceType, - OwnershipType, - OwnershipUpdate, -} from '../../../../../types.generated'; -import CustomAvatar from '../../../../shared/avatar/CustomAvatar'; -import { useGetAutoCompleteResultsLazyQuery } from '../../../../../graphql/search.generated'; -import { useEntityRegistry } from '../../../../useEntityRegistry'; - -const UpdatedText = styled(Typography.Text)` - position: absolute; - right: 0; - margin: 0; -`; - -const OWNER_SEARCH_PLACEHOLDER = 'Search an LDAP'; -const NUMBER_OWNERS_REQUIRED = 2; - -interface Props { - owners: Array; - lastModifiedAt: number; - updateOwnership?: (update: OwnershipUpdate) => void; -} - -/** - * Displays an array of owners! Work-in-progress. - * - * TODO: Add mutations to change ownership on explicit save. - */ -export const Ownership: React.FC = ({ owners, lastModifiedAt, updateOwnership }: Props): JSX.Element => { - const entityRegistry = useEntityRegistry(); - - const [form] = Form.useForm(); - const [editingIndex, setEditingIndex] = useState(-1); - const [stagedOwners, setStagedOwners] = useState(owners); - const [ownerQuery, setOwnerQuery] = useState(''); - const [getOwnerAutoCompleteResults, { data: searchOwnerSuggestionsData }] = useGetAutoCompleteResultsLazyQuery(); - - useEffect(() => { - setStagedOwners(owners); - }, [owners]); - - const ownerTableData = useMemo( - () => - // eslint-disable-next-line consistent-return, array-callback-return - stagedOwners.map((owner, index) => { - if (owner.owner.__typename === 'CorpUser') { - return { - key: index, - urn: owner.owner.urn, - ldap: owner.owner.username, - fullName: owner.owner.info?.fullName || owner.owner.username, - role: owner.type, - pictureLink: owner.owner.editableInfo?.pictureLink, - type: EntityType.CorpUser, - }; - } - if (owner.owner.__typename === 'CorpGroup') { - return { - key: index, - urn: owner.owner.urn, - ldap: owner.owner.name, - fullName: owner.owner.name, - role: owner.type, - type: EntityType.CorpGroup, - }; - } - return { - key: index, - urn: owner.owner.urn, - ldap: (owner.owner as CorpUser).username, - fullName: (owner.owner as CorpUser).info?.fullName || (owner.owner as CorpUser).username, - role: owner.type, - pictureLink: (owner.owner as CorpUser).editableInfo?.pictureLink, - type: EntityType.CorpUser, - }; - }), - [stagedOwners], - ); - - const isEditing = (record: { key: number }) => record.key === editingIndex; - - const onAdd = () => { - setEditingIndex(stagedOwners.length); - - form.setFieldsValue({ - ldap: '', - role: OwnershipType.Stakeholder, - type: EntityType.CorpUser, - }); - - const newOwner = { - owner: { - type: EntityType.CorpUser, - urn: '', - username: '', - __typename: 'CorpUser' as const, - }, - type: OwnershipType.Stakeholder, - source: { - type: OwnershipSourceType.Manual, - }, - }; - - const newStagedOwners = [...stagedOwners, newOwner]; - setStagedOwners(newStagedOwners); - }; - - const onDelete = (urn: string, role: OwnershipType) => { - if (updateOwnership) { - const updatedOwners = owners - .filter((owner) => !(owner.owner.urn === urn && owner.type === role)) - .map((owner) => ({ - owner: owner.owner.urn, - type: owner.type, - })); - - updateOwnership({ owners: updatedOwners }); - } - }; - - const onChangeOwnerQuery = async (query: string) => { - if (query && query !== '') { - const row = await form.validateFields(); - getOwnerAutoCompleteResults({ - variables: { - input: { - type: row.type, - query, - field: row.type === EntityType.CorpUser ? 'ldap' : 'name', - }, - }, - }); - } - setOwnerQuery(query); - }; - - const onSave = async (record: any) => { - if (updateOwnership) { - const row = await form.validateFields(); - const updatedOwners = stagedOwners.map((owner, index) => { - if (record.key === index) { - return { - owner: `urn:li:${row.type === EntityType.CorpGroup ? 'corpGroup' : 'corpuser'}:${row.ldap}`, - type: row.role, - }; - } - return { - owner: owner.owner.urn, - type: owner.type, - }; - }); - updateOwnership({ owners: updatedOwners }); - } - setEditingIndex(-1); - }; - - const onCancel = () => { - const newStagedOwners = stagedOwners.filter((_, index) => index !== editingIndex); - setStagedOwners(newStagedOwners); - setEditingIndex(-1); - }; - - const onSelectSuggestion = (ldap: string) => { - setOwnerQuery(ldap); - }; - - const ownerTableColumns = [ - { - title: 'LDAP', - dataIndex: 'ldap', - render: (text: string, record: any) => { - return isEditing(record) ? ( - - ({ - value: suggestion, - }))) || - [] - } - value={ownerQuery} - onSelect={onSelectSuggestion} - onSearch={onChangeOwnerQuery} - placeholder={OWNER_SEARCH_PLACEHOLDER} - /> - - ) : ( - - ); - }, - }, - { - title: 'Full Name', - dataIndex: 'fullName', - }, - { - title: 'Role', - dataIndex: 'role', - render: (role: OwnershipType, record: any) => { - return isEditing(record) ? ( - - - - ) : ( - {role} - ); - }, - }, - { - title: 'Type', - dataIndex: 'type', - render: (type: EntityType, record: any) => { - return isEditing(record) ? ( - - - - ) : ( - {type} - ); - }, - }, - { - title: '', - key: 'action', - render: (_: string, record: any) => { - return ( - - {isEditing(record) ? ( - <> - - - - ) : ( - - )} - - ); - }, - }, - ]; - - return ( - <> - {!!lastModifiedAt && ( - - Last updated {new Date(lastModifiedAt).toLocaleDateString('en-US')} - - )} - - Ownership - - Please maintain at least {NUMBER_OWNERS_REQUIRED} owners. - -
- - - {editingIndex < 0 && ( - - )} - - - ); -}; diff --git a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearch.tsx b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearch.tsx index cd9b08fefda6f9..17c58a2e7e21fe 100644 --- a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearch.tsx +++ b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearch.tsx @@ -15,6 +15,7 @@ import { EmbeddedListSearchResults } from './EmbeddedListSearchResults'; import EmbeddedListSearchHeader from './EmbeddedListSearchHeader'; import { useGetSearchResultsForMultipleQuery } from '../../../../../../graphql/search.generated'; import { GetSearchResultsParams, SearchResultsInterface } from './types'; +import { useEntityQueryParams } from '../../../containers/profile/utils'; const Container = styled.div` display: flex; @@ -79,6 +80,7 @@ export const EmbeddedListSearch = ({ const history = useHistory(); const location = useLocation(); const entityRegistry = useEntityRegistry(); + const baseParams = useEntityQueryParams(); const params = QueryString.parse(location.search, { arrayFormat: 'comma' }); const query: string = addFixedQuery(params?.query as string, fixedQuery as string, emptySearchQuery as string); @@ -128,6 +130,7 @@ export const EmbeddedListSearch = ({ const finalQuery = addFixedQuery(q as string, fixedQuery as string, emptySearchQuery as string); navigateToEntitySearchUrl({ baseUrl: location.pathname, + baseParams, type: activeType, query: finalQuery, page: 1, @@ -138,6 +141,7 @@ export const EmbeddedListSearch = ({ const onChangeFilters = (newFilters: Array) => { navigateToEntitySearchUrl({ baseUrl: location.pathname, + baseParams, type: activeType, query, page: 1, @@ -149,6 +153,7 @@ export const EmbeddedListSearch = ({ const onChangePage = (newPage: number) => { navigateToEntitySearchUrl({ baseUrl: location.pathname, + baseParams, type: activeType, query, page: newPage, diff --git a/datahub-web-react/src/app/entity/shared/components/styled/search/downloadAsCsvUtil.ts b/datahub-web-react/src/app/entity/shared/components/styled/search/downloadAsCsvUtil.ts index dc16c9e07a1e11..23d6569de2138a 100644 --- a/datahub-web-react/src/app/entity/shared/components/styled/search/downloadAsCsvUtil.ts +++ b/datahub-web-react/src/app/entity/shared/components/styled/search/downloadAsCsvUtil.ts @@ -78,7 +78,7 @@ export const transformGenericEntityPropertiesToCsvRow = ( // terms properties?.glossaryTerms?.terms?.map((term) => term.term.name).join(',') || '', // domain - properties?.domain?.properties?.name || '', + properties?.domain?.domain?.properties?.name || '', // properties properties?.platform?.properties?.displayName || '', // container diff --git a/datahub-web-react/src/app/entity/shared/components/styled/search/navigateToEntitySearchUrl.ts b/datahub-web-react/src/app/entity/shared/components/styled/search/navigateToEntitySearchUrl.ts index 3b719ae61f984a..e0f59c4f7fa2ac 100644 --- a/datahub-web-react/src/app/entity/shared/components/styled/search/navigateToEntitySearchUrl.ts +++ b/datahub-web-react/src/app/entity/shared/components/styled/search/navigateToEntitySearchUrl.ts @@ -5,6 +5,7 @@ import filtersToQueryStringParams from '../../../../../search/utils/filtersToQue export const navigateToEntitySearchUrl = ({ baseUrl, + baseParams, type: newType, query: newQuery, page: newPage = 1, @@ -12,6 +13,7 @@ export const navigateToEntitySearchUrl = ({ history, }: { baseUrl: string; + baseParams: Record; type?: EntityType; query?: string; page?: number; @@ -28,6 +30,7 @@ export const navigateToEntitySearchUrl = ({ ...filtersToQueryStringParams(constructedFilters), query: newQuery, page: newPage, + ...baseParams, }, { arrayFormat: 'comma' }, ); diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx index 5e078a482ed718..c77f9a26f75104 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx @@ -24,7 +24,7 @@ import { EntityMenuItems } from '../../EntityDropdown/EntityDropdown'; import GlossaryBrowser from '../../../../glossary/GlossaryBrowser/GlossaryBrowser'; import GlossarySearch from '../../../../glossary/GlossarySearch'; import { BrowserWrapper, MAX_BROWSER_WIDTH, MIN_BROWSWER_WIDTH } from '../../../../glossary/BusinessGlossaryPage'; -import { combineEntityDataWithSiblings } from '../../siblingUtils'; +import { combineEntityDataWithSiblings, useIsSeparateSiblingsMode } from '../../siblingUtils'; type Props = { urn: string; @@ -138,6 +138,7 @@ export const EntityProfile = ({ isNameEditable, }: Props): JSX.Element => { const isLineageMode = useIsLineageMode(); + const isHideSiblingMode = useIsSeparateSiblingsMode(); const entityRegistry = useEntityRegistry(); const history = useHistory(); const isCompact = React.useContext(CompactContext); @@ -172,9 +173,11 @@ export const EntityProfile = ({ entityUrn: urn, section: tabName.toLowerCase(), }); - history[method](getEntityPath(entityType, urn, entityRegistry, false, tabName, tabParams)); + history[method]( + getEntityPath(entityType, urn, entityRegistry, false, isHideSiblingMode, tabName, tabParams), + ); }, - [history, entityType, urn, entityRegistry], + [history, entityType, urn, entityRegistry, isHideSiblingMode], ); const { @@ -186,7 +189,9 @@ export const EntityProfile = ({ variables: { urn }, }); - const dataCombinedWithSiblings = combineEntityDataWithSiblings(dataNotCombinedWithSiblings); + const dataPossiblyCombinedWithSiblings = isHideSiblingMode + ? dataNotCombinedWithSiblings + : combineEntityDataWithSiblings(dataNotCombinedWithSiblings); const maybeUpdateEntity = useUpdateQuery?.({ onCompleted: () => refetch(), @@ -197,12 +202,13 @@ export const EntityProfile = ({ } const entityData = - (dataCombinedWithSiblings && - Object.keys(dataCombinedWithSiblings).length > 0 && + (dataPossiblyCombinedWithSiblings && + Object.keys(dataPossiblyCombinedWithSiblings).length > 0 && getDataForEntityType({ - data: dataCombinedWithSiblings[Object.keys(dataCombinedWithSiblings)[0]], + data: dataPossiblyCombinedWithSiblings[Object.keys(dataPossiblyCombinedWithSiblings)[0]], entityType, getOverrideProperties, + isHideSiblingMode, })) || null; @@ -233,7 +239,8 @@ export const EntityProfile = ({ urn, entityType, entityData, - baseEntity: dataCombinedWithSiblings, + baseEntity: dataPossiblyCombinedWithSiblings, + dataNotCombinedWithSiblings, updateEntity, routeToTab, refetch, @@ -267,7 +274,8 @@ export const EntityProfile = ({ urn, entityType, entityData, - baseEntity: dataCombinedWithSiblings, + baseEntity: dataPossiblyCombinedWithSiblings, + dataNotCombinedWithSiblings, updateEntity, routeToTab, refetch, diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx index 7bd977d3e36c3e..499adb18e81eb8 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityHeader.tsx @@ -157,7 +157,7 @@ export const EntityHeader = ({ refreshBrowser, headerDropdownItems, isNameEditab const canEditName = isNameEditable && getCanEditName(entityType, me?.platformPrivileges as PlatformPrivileges); return ( - + diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/header/PlatformContent/PlatformContentView.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/header/PlatformContent/PlatformContentView.tsx index e7ba5ff775ee04..24483d0fd9d716 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/header/PlatformContent/PlatformContentView.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/header/PlatformContent/PlatformContentView.tsx @@ -128,12 +128,12 @@ function PlatformContentView(props: Props) { {!!platformLogoUrl && !platformLogoUrls && ( )} - {!!platformLogoUrls && ( - <> - - - - )} + {!!platformLogoUrls && + platformLogoUrls.slice(0, 2).map((platformLogoUrlsEntry) => ( + <> + + + ))} )} diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Domain/SidebarDomainSection.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Domain/SidebarDomainSection.tsx index f8e574dd025f4b..446d9e6aa242f8 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Domain/SidebarDomainSection.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Domain/SidebarDomainSection.tsx @@ -2,7 +2,7 @@ import { Typography, Button, Modal, message } from 'antd'; import React, { useState } from 'react'; import { EditOutlined } from '@ant-design/icons'; import { EMPTY_MESSAGES } from '../../../../constants'; -import { useEntityData, useMutationUrn, useRefetch } from '../../../../EntityContext'; +import { useEntityData, useRefetch } from '../../../../EntityContext'; import { SidebarHeader } from '../SidebarHeader'; import { SetDomainModal } from './SetDomainModal'; import { useEntityRegistry } from '../../../../../../useEntityRegistry'; @@ -11,16 +11,15 @@ import { useUnsetDomainMutation } from '../../../../../../../graphql/mutations.g import { DomainLink } from '../../../../../../shared/tags/DomainLink'; export const SidebarDomainSection = () => { - const mutationUrn = useMutationUrn(); const { entityData } = useEntityData(); const entityRegistry = useEntityRegistry(); const refetch = useRefetch(); const [unsetDomainMutation] = useUnsetDomainMutation(); const [showModal, setShowModal] = useState(false); - const domain = entityData?.domain; + const domain = entityData?.domain?.domain; - const removeDomain = () => { - unsetDomainMutation({ variables: { entityUrn: mutationUrn } }) + const removeDomain = (urnToRemoveFrom) => { + unsetDomainMutation({ variables: { entityUrn: urnToRemoveFrom } }) .then(() => { message.success({ content: 'Removed Domain.', duration: 2 }); refetch?.(); @@ -33,12 +32,12 @@ export const SidebarDomainSection = () => { }); }; - const onRemoveDomain = () => { + const onRemoveDomain = (urnToRemoveFrom) => { Modal.confirm({ title: `Confirm Domain Removal`, content: `Are you sure you want to remove this domain?`, onOk() { - removeDomain(); + removeDomain(urnToRemoveFrom); }, onCancel() {}, okText: 'Yes', @@ -53,12 +52,12 @@ export const SidebarDomainSection = () => {
{domain && ( { e.preventDefault(); - onRemoveDomain(); + onRemoveDomain(entityData?.domain?.associatedUrn); }} /> )} diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/SidebarOwnerSection.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/SidebarOwnerSection.tsx index 37a9f74d69a1cc..2bbaf0551c4178 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/SidebarOwnerSection.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/SidebarOwnerSection.tsx @@ -8,7 +8,7 @@ import { SidebarHeader } from '../SidebarHeader'; import { AddOwnersModal } from './AddOwnersModal'; export const SidebarOwnerSection = ({ properties }: { properties?: any }) => { - const { urn, entityType, entityData } = useEntityData(); + const { entityType, entityData } = useEntityData(); const mutationUrn = useMutationUrn(); const refetch = useRefetch(); @@ -20,7 +20,12 @@ export const SidebarOwnerSection = ({ properties }: { properties?: any }) => {
{entityData?.ownership?.owners?.map((owner) => ( - + ))} {ownersEmpty && ( diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/SidebarSiblingsSection.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/SidebarSiblingsSection.tsx new file mode 100644 index 00000000000000..4b0089e6b9214a --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/SidebarSiblingsSection.tsx @@ -0,0 +1,53 @@ +import React from 'react'; +import styled from 'styled-components'; + +import { useDataNotCombinedWithSiblings, useEntityData } from '../../../EntityContext'; +import { SidebarHeader } from './SidebarHeader'; +import { CompactEntityNameList } from '../../../../../recommendations/renderer/component/CompactEntityNameList'; +import { Entity } from '../../../../../../types.generated'; +import { SEPARATE_SIBLINGS_URL_PARAM, stripSiblingsFromEntity, useIsSeparateSiblingsMode } from '../../../siblingUtils'; +import { GetDatasetQuery } from '../../../../../../graphql/dataset.generated'; + +const EntityListContainer = styled.div` + margin-left: -8px; +`; + +export const SidebarSiblingsSection = () => { + const { entityData } = useEntityData(); + const dataNotCombinedWithSiblings = useDataNotCombinedWithSiblings(); + + const isHideSiblingMode = useIsSeparateSiblingsMode(); + + if (!entityData) { + return <>; + } + + if (isHideSiblingMode) { + return ( +
+ + + + +
+ ); + } + + const siblingEntities = entityData?.siblings?.siblings || []; + const entityDataWithoutSiblings = stripSiblingsFromEntity(dataNotCombinedWithSiblings.dataset); + + const allSiblingsInGroup = [...siblingEntities, entityDataWithoutSiblings] as Entity[]; + + return ( +
+ + + + +
+ ); +}; diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/utils.ts b/datahub-web-react/src/app/entity/shared/containers/profile/utils.ts index c9c1b483db28dd..d467a944e5d71d 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/utils.ts +++ b/datahub-web-react/src/app/entity/shared/containers/profile/utils.ts @@ -5,14 +5,17 @@ import useIsLineageMode from '../../../../lineage/utils/useIsLineageMode'; import { useEntityRegistry } from '../../../../useEntityRegistry'; import EntityRegistry from '../../../EntityRegistry'; import { EntityTab, GenericEntityProperties } from '../../types'; +import { useIsSeparateSiblingsMode, SEPARATE_SIBLINGS_URL_PARAM } from '../../siblingUtils'; export function getDataForEntityType({ data: entityData, getOverrideProperties, + isHideSiblingMode, }: { data: T; entityType?: EntityType; getOverrideProperties: (T) => GenericEntityProperties; + isHideSiblingMode?: boolean; }): GenericEntityProperties | null { if (!entityData) { return null; @@ -34,7 +37,7 @@ export function getDataForEntityType({ }; } - if (anyEntityData?.siblings?.siblings?.length > 0) { + if (anyEntityData?.siblings?.siblings?.length > 0 && !isHideSiblingMode) { const genericSiblingProperties: GenericEntityProperties[] = anyEntityData?.siblings?.siblings?.map((sibling) => getDataForEntityType({ data: sibling, getOverrideProperties: () => ({}) }), ); @@ -60,6 +63,7 @@ export function getEntityPath( urn: string, entityRegistry: EntityRegistry, isLineageMode: boolean, + isHideSiblingMode: boolean, tabName?: string, tabParams?: Record, ) { @@ -68,16 +72,16 @@ export function getEntityPath( if (!tabName) { return `${entityRegistry.getEntityUrl(entityType, urn)}?is_lineage_mode=${isLineageMode}${tabParamsString}`; } - return `${entityRegistry.getEntityUrl( - entityType, - urn, - )}/${tabName}?is_lineage_mode=${isLineageMode}${tabParamsString}`; + return `${entityRegistry.getEntityUrl(entityType, urn)}/${tabName}?is_lineage_mode=${isLineageMode}${ + isHideSiblingMode ? `&${SEPARATE_SIBLINGS_URL_PARAM}=${isHideSiblingMode}` : '' + }${tabParamsString}`; } export function useEntityPath(entityType: EntityType, urn: string, tabName?: string, tabParams?: Record) { const isLineageMode = useIsLineageMode(); + const isHideSiblingMode = useIsSeparateSiblingsMode(); const entityRegistry = useEntityRegistry(); - return getEntityPath(entityType, urn, entityRegistry, isLineageMode, tabName, tabParams); + return getEntityPath(entityType, urn, entityRegistry, isLineageMode, isHideSiblingMode, tabName, tabParams); } export function useRoutedTab(tabs: EntityTab[]): EntityTab | undefined { @@ -93,3 +97,13 @@ export function formatDateString(time: number) { const date = new Date(time); return date.toLocaleDateString('en-US'); } + +export function useEntityQueryParams() { + const isHideSiblingMode = useIsSeparateSiblingsMode(); + const response = {}; + if (isHideSiblingMode) { + response[SEPARATE_SIBLINGS_URL_PARAM] = true; + } + + return response; +} diff --git a/datahub-web-react/src/app/entity/shared/siblingUtils.ts b/datahub-web-react/src/app/entity/shared/siblingUtils.ts index d94f3fbda6e352..c0076a1829ce06 100644 --- a/datahub-web-react/src/app/entity/shared/siblingUtils.ts +++ b/datahub-web-react/src/app/entity/shared/siblingUtils.ts @@ -1,7 +1,16 @@ import merge from 'deepmerge'; import { unionBy } from 'lodash'; +import { useLocation } from 'react-router-dom'; +import * as QueryString from 'query-string'; import { Entity, MatchedField, Maybe, SiblingProperties } from '../../../types.generated'; +export function stripSiblingsFromEntity(entity: any) { + return { + ...entity, + siblings: null, + siblingPlatforms: null, + }; +} function cleanHelper(obj, visited) { if (visited.has(obj)) return obj; visited.add(obj); @@ -54,6 +63,14 @@ const mergeAssertions = (destinationArray, sourceArray, _options) => { return unionBy(destinationArray, sourceArray, 'urn'); }; +const mergeProperties = (destinationArray, sourceArray, _options) => { + return unionBy(destinationArray, sourceArray, 'key'); +}; + +const mergeOwners = (destinationArray, sourceArray, _options) => { + return unionBy(destinationArray, sourceArray, 'owner.urn'); +}; + function getArrayMergeFunction(key) { switch (key) { case 'tags': @@ -62,6 +79,10 @@ function getArrayMergeFunction(key) { return mergeTerms; case 'assertions': return mergeAssertions; + case 'customProperties': + return mergeProperties; + case 'owners': + return mergeOwners; default: return undefined; } @@ -74,7 +95,7 @@ const customMerge = (isPrimary, key) => { if (key === 'platform') { return (secondary, primary) => (isPrimary ? primary : secondary); } - if (key === 'tags' || key === 'terms' || key === 'assertions') { + if (key === 'tags' || key === 'terms' || key === 'assertions' || key === 'customProperties' || key === 'owners') { return (secondary, primary) => { return merge(secondary, primary, { arrayMerge: getArrayMergeFunction(key), @@ -164,8 +185,8 @@ export function combineSiblingsInSearchResults( const siblingUrns = entity?.siblings?.siblings?.map((sibling) => sibling.urn) || []; if (siblingUrns.length > 0) { combinedResult.matchedEntities = entity.siblings.isPrimary - ? [entity, ...entity.siblings.siblings] - : [...entity.siblings.siblings, entity]; + ? [stripSiblingsFromEntity(entity), ...entity.siblings.siblings] + : [...entity.siblings.siblings, stripSiblingsFromEntity(entity)]; siblingUrns.forEach((urn) => { siblingsToPair[urn] = combinedResult; }); @@ -175,3 +196,13 @@ export function combineSiblingsInSearchResults( return combinedResults; } +// used to determine whether sibling entities should be shown merged or not +export const SEPARATE_SIBLINGS_URL_PARAM = 'separate_siblings'; + +// used to determine whether sibling entities should be shown merged or not +export function useIsSeparateSiblingsMode() { + const location = useLocation(); + const params = QueryString.parse(location.search, { arrayFormat: 'comma' }); + + return params[SEPARATE_SIBLINGS_URL_PARAM] === 'true'; +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/DatasetAssertionsList.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/DatasetAssertionsList.tsx index f43fbf07aacdb2..13dcfd34b8219e 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/DatasetAssertionsList.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/DatasetAssertionsList.tsx @@ -77,9 +77,9 @@ export const DatasetAssertionsList = ({ assertions, onDelete }: Props) => { type: assertion.info?.type, platform: assertion.platform, datasetAssertionInfo: assertion.info?.datasetAssertion, - lastExecTime: assertion.runEvents?.runEvents.length && assertion.runEvents.runEvents[0].timestampMillis, + lastExecTime: assertion.runEvents?.runEvents?.length && assertion.runEvents.runEvents[0].timestampMillis, lastExecResult: - assertion.runEvents?.runEvents.length && + assertion.runEvents?.runEvents?.length && assertion.runEvents.runEvents[0].status === AssertionRunStatus.Complete && assertion.runEvents.runEvents[0].result?.type, })); diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/ValidationsTab.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/ValidationsTab.tsx index 8b747c2a776804..ccfb6a3fdfc772 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/ValidationsTab.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/ValidationsTab.tsx @@ -9,7 +9,7 @@ import { DatasetAssertionsList } from './DatasetAssertionsList'; import { DatasetAssertionsSummary } from './DatasetAssertionsSummary'; import { sortAssertions } from './assertionUtils'; import { TestResults } from './TestResults'; -import { combineEntityDataWithSiblings } from '../../../siblingUtils'; +import { combineEntityDataWithSiblings, useIsSeparateSiblingsMode } from '../../../siblingUtils'; /** * Returns a status summary for the assertions associated with a Dataset. @@ -22,9 +22,9 @@ const getAssertionsStatusSummary = (assertions: Array) => { totalAssertions: assertions.length, }; assertions.forEach((assertion) => { - if (assertion.runEvents?.runEvents.length) { - const mostRecentRun = assertion.runEvents?.runEvents[0]; - const resultType = mostRecentRun.result?.type; + if ((assertion.runEvents?.runEvents?.length || 0) > 0) { + const mostRecentRun = assertion.runEvents?.runEvents?.[0]; + const resultType = mostRecentRun?.result?.type; if (AssertionResultType.Success === resultType) { summary.succeededRuns++; } @@ -48,7 +48,9 @@ enum ViewType { export const ValidationsTab = () => { const { urn, entityData } = useEntityData(); const { data, refetch } = useGetDatasetAssertionsQuery({ variables: { urn } }); - const combinedData = combineEntityDataWithSiblings(data); + const isHideSiblingMode = useIsSeparateSiblingsMode(); + + const combinedData = isHideSiblingMode ? data : combineEntityDataWithSiblings(data); const [removedUrns, setRemovedUrns] = useState([]); /** * Determines which view should be visible: assertions or tests. diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/assertionUtils.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/assertionUtils.tsx index a91ad6f0e80d21..1eaacb36515a1b 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/assertionUtils.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/assertionUtils.tsx @@ -12,10 +12,10 @@ import { * Utility methods */ export const sortAssertions = (a, b) => { - if (!a.runEvents?.runEvents.length) { + if (!a.runEvents?.runEvents?.length) { return 1; } - if (!b.runEvents?.runEvents.length) { + if (!b.runEvents?.runEvents?.length) { return -1; } return b.runEvents.runEvents[0].timestampMillis - a.runEvents.runEvents[0].timestampMillis; diff --git a/datahub-web-react/src/app/entity/shared/tabs/Lineage/LineageTab.tsx b/datahub-web-react/src/app/entity/shared/tabs/Lineage/LineageTab.tsx index fc454a549db26f..9bf15153e57254 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Lineage/LineageTab.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Lineage/LineageTab.tsx @@ -33,7 +33,7 @@ export const LineageTab = () => { const [lineageDirection, setLineageDirection] = useState(LineageDirection.Downstream); const routeToLineage = useCallback(() => { - history.push(getEntityPath(entityType, urn, entityRegistry, true)); + history.push(getEntityPath(entityType, urn, entityRegistry, true, false)); }, [history, entityType, urn, entityRegistry]); return ( diff --git a/datahub-web-react/src/app/entity/shared/types.ts b/datahub-web-react/src/app/entity/shared/types.ts index cf94fee7dfdd26..bbb9101f3472d6 100644 --- a/datahub-web-react/src/app/entity/shared/types.ts +++ b/datahub-web-react/src/app/entity/shared/types.ts @@ -17,9 +17,7 @@ import { Ownership, OwnershipUpdate, SchemaMetadata, - StringMapEntry, EntityLineageResult, - Domain, SubTypes, Container, Health, @@ -30,6 +28,8 @@ import { EntityRelationshipsResult, ParentNodesResult, SiblingProperties, + CustomPropertiesEntry, + DomainAssociation, } from '../../../types.generated'; import { FetchedEntity } from '../../lineage/types'; @@ -63,10 +63,10 @@ export type GenericEntityProperties = { globalTags?: Maybe; glossaryTerms?: Maybe; ownership?: Maybe; - domain?: Maybe; + domain?: Maybe; platform?: Maybe; dataPlatformInstance?: Maybe; - customProperties?: Maybe; + customProperties?: Maybe; institutionalMemory?: Maybe; schemaMetadata?: Maybe; externalUrl?: Maybe; @@ -116,6 +116,7 @@ export type UpdateEntityType = ( export type EntityContextType = { urn: string; entityType: EntityType; + dataNotCombinedWithSiblings: any; entityData: GenericEntityProperties | null; baseEntity: any; updateEntity?: UpdateEntityType | null; diff --git a/datahub-web-react/src/app/entity/shared/utils.ts b/datahub-web-react/src/app/entity/shared/utils.ts index 3c849aac3e261e..d7026091949ba6 100644 --- a/datahub-web-react/src/app/entity/shared/utils.ts +++ b/datahub-web-react/src/app/entity/shared/utils.ts @@ -1,5 +1,11 @@ import { GenericEntityProperties } from './types'; +export function dictToQueryStringParams(params: Record) { + return Object.keys(params) + .map((key) => `${key}=${params[key]}`) + .join('&'); +} + export function urlEncodeUrn(urn: string) { return ( urn && diff --git a/datahub-web-react/src/app/lineage/LineageEntityNode.tsx b/datahub-web-react/src/app/lineage/LineageEntityNode.tsx index 5c8110f0704f1a..19887f831081fc 100644 --- a/datahub-web-react/src/app/lineage/LineageEntityNode.tsx +++ b/datahub-web-react/src/app/lineage/LineageEntityNode.tsx @@ -11,6 +11,7 @@ import { capitalizeFirstLetter } from '../shared/textUtil'; import { nodeHeightFromTitleLength } from './utils/nodeHeightFromTitleLength'; import { LineageExplorerContext } from './utils/LineageExplorerContext'; import { useGetEntityLineageLazyQuery } from '../../graphql/lineage.generated'; +import { useIsSeparateSiblingsMode } from '../entity/shared/siblingUtils'; const CLICK_DELAY_THRESHOLD = 1000; const DRAG_DISTANCE_THRESHOLD = 20; @@ -90,6 +91,7 @@ export default function LineageEntityNode({ const [isExpanding, setIsExpanding] = useState(false); const [expandHover, setExpandHover] = useState(false); const [getAsyncEntityLineage, { data: asyncLineageData }] = useGetEntityLineageLazyQuery(); + const isHideSiblingMode = useIsSeparateSiblingsMode(); useEffect(() => { if (asyncLineageData && asyncLineageData.entity) { @@ -117,7 +119,7 @@ export default function LineageEntityNode({ ); let platformDisplayText = capitalizeFirstLetter(node.data.platform); - if (node.data.siblingPlatforms) { + if (node.data.siblingPlatforms && !isHideSiblingMode) { platformDisplayText = node.data.siblingPlatforms .map((platform) => platform.properties?.displayName || platform.name) .join(' & '); @@ -160,7 +162,9 @@ export default function LineageEntityNode({ setIsExpanding(true); if (node.data.urn && node.data.type) { // getAsyncEntity(node.data.urn, node.data.type); - getAsyncEntityLineage({ variables: { urn: node.data.urn } }); + getAsyncEntityLineage({ + variables: { urn: node.data.urn, separateSiblings: isHideSiblingMode }, + }); } }} onMouseOver={() => { @@ -249,14 +253,14 @@ export default function LineageEntityNode({ // eslint-disable-next-line react/style-prop-object style={{ filter: isSelected ? 'url(#shadow1-selected)' : 'url(#shadow1)' }} /> - {node.data.siblingPlatforms && ( + {node.data.siblingPlatforms && !isHideSiblingMode && ( )} - {!node.data.siblingPlatforms && node.data.icon && ( + {(!node.data.siblingPlatforms || isHideSiblingMode) && node.data.icon && ( )} - {!node.data.icon && !node.data.siblingPlatforms && node.data.type && ( + {!node.data.icon && (!node.data.siblingPlatforms || isHideSiblingMode) && node.data.type && ( getEntityAndType(data), [data]); const [isDrawerVisible, setIsDrawVisible] = useState(false); const [selectedEntity, setSelectedEntity] = useState(undefined); const [asyncEntities, setAsyncEntities] = useState({}); + // in the case that sibling mode changes, we want to clear out our cache of entities + useEffect(() => { + setAsyncEntities({}); + }, [isHideSiblingMode]); + const drawerRef: React.MutableRefObject = useRef(null); const maybeAddAsyncLoadedEntity = useCallback( @@ -100,10 +110,10 @@ export default function LineageExplorer({ urn, type }: Props) { }; useEffect(() => { - if (type && entityData) { + if (type && entityData && !loading) { maybeAddAsyncLoadedEntity(entityData); } - }, [entityData, asyncEntities, setAsyncEntities, maybeAddAsyncLoadedEntity, urn, previousUrn, type]); + }, [entityData, setAsyncEntities, maybeAddAsyncLoadedEntity, urn, previousUrn, type, loading]); if (error || (!loading && !error && !data)) { return ; diff --git a/datahub-web-react/src/app/lineage/LineageVizInsideZoom.tsx b/datahub-web-react/src/app/lineage/LineageVizInsideZoom.tsx index ca8987b7e6497e..ffbc6bf39d1775 100644 --- a/datahub-web-react/src/app/lineage/LineageVizInsideZoom.tsx +++ b/datahub-web-react/src/app/lineage/LineageVizInsideZoom.tsx @@ -1,8 +1,9 @@ import React, { SVGProps, useEffect, useMemo, useState } from 'react'; -import { PlusOutlined, MinusOutlined } from '@ant-design/icons'; +import { PlusOutlined, MinusOutlined, QuestionCircleOutlined } from '@ant-design/icons'; import styled from 'styled-components'; -import { Button, Switch } from 'antd'; +import { Button, Switch, Tooltip } from 'antd'; import { ProvidedZoom, TransformMatrix } from '@vx/zoom/lib/types'; +import { useHistory, useLocation } from 'react-router-dom'; import LineageTree from './LineageTree'; import constructTree from './utils/constructTree'; @@ -10,6 +11,8 @@ import { Direction, EntityAndType, EntitySelectParams, FetchedEntity } from './t import { useEntityRegistry } from '../useEntityRegistry'; import { ANTD_GRAY } from '../entity/shared/constants'; import { LineageExplorerContext } from './utils/LineageExplorerContext'; +import { useIsSeparateSiblingsMode } from '../entity/shared/siblingUtils'; +import { navigateToLineageUrl } from './utils/navigateToLineageUrl'; const ZoomContainer = styled.div` position: relative; @@ -32,11 +35,8 @@ const DisplayControls = styled.div` box-shadow: 0px 0px 4px 0px #0000001a; `; -const ControlsTitle = styled.div` - margin-bottom: 12px; -`; - const ControlsSwitch = styled(Switch)` + margin-top: 12px; margin-right: 8px; `; @@ -62,6 +62,10 @@ const RootSvg = styled.svg<{ isDragging: boolean } & SVGProps>` } `; +const ControlLabel = styled.span` + vertical-align: sub; +`; + type Props = { margin: { top: number; right: number; bottom: number; left: number }; entityAndType?: EntityAndType | null; @@ -78,6 +82,11 @@ type Props = { height: number; }; +const HelpIcon = styled(QuestionCircleOutlined)` + color: ${ANTD_GRAY[7]}; + padding-left: 4px; +`; + export default function LineageVizInsideZoom({ zoom, margin, @@ -91,10 +100,13 @@ export default function LineageVizInsideZoom({ height, }: Props) { const [draggedNodes, setDraggedNodes] = useState>({}); + const history = useHistory(); + const location = useLocation(); const [hoveredEntity, setHoveredEntity] = useState(undefined); const [isDraggingNode, setIsDraggingNode] = useState(false); const [showExpandedTitles, setShowExpandedTitles] = useState(false); + const isHideSiblingMode = useIsSeparateSiblingsMode(); const entityRegistry = useEntityRegistry(); @@ -131,12 +143,34 @@ export default function LineageVizInsideZoom({ - Controls - setShowExpandedTitles(checked)} - />{' '} - Show Full Titles +
Controls
+
+ setShowExpandedTitles(checked)} + />{' '} + Show Full Titles +
+
+ { + navigateToLineageUrl({ + location, + history, + isLineageMode: true, + isHideSiblingMode: !checked, + }); + }} + />{' '} + + Compress Lineage{' '} + + + + +
{ const parsedSearch = QueryString.parse(location.search, { arrayFormat: 'comma' }); - const newSearch = { + let newSearch: any = { ...parsedSearch, is_lineage_mode: isLineageMode, }; + if (isHideSiblingMode !== undefined) { + newSearch = { + ...newSearch, + [SEPARATE_SIBLINGS_URL_PARAM]: isHideSiblingMode, + }; + } const newSearchStringified = QueryString.stringify(newSearch, { arrayFormat: 'comma' }); history.push({ diff --git a/datahub-web-react/src/app/recommendations/renderer/component/CompactEntityNameList.tsx b/datahub-web-react/src/app/recommendations/renderer/component/CompactEntityNameList.tsx index 176a9b4126d919..c3a116f6f4960a 100644 --- a/datahub-web-react/src/app/recommendations/renderer/component/CompactEntityNameList.tsx +++ b/datahub-web-react/src/app/recommendations/renderer/component/CompactEntityNameList.tsx @@ -1,5 +1,7 @@ +import { Tooltip } from 'antd'; import React from 'react'; -import { Entity } from '../../../../types.generated'; +import { useHistory } from 'react-router'; +import { Entity, SearchResult } from '../../../../types.generated'; import { IconStyleType } from '../../../entity/Entity'; import { useEntityRegistry } from '../../../useEntityRegistry'; import { EntityPreviewTag } from './EntityPreviewTag'; @@ -7,9 +9,13 @@ import { EntityPreviewTag } from './EntityPreviewTag'; type Props = { entities: Array; onClick?: (index: number) => void; + linkUrlParams?: Record; + showTooltips?: boolean; }; -export const CompactEntityNameList = ({ entities, onClick }: Props) => { +export const CompactEntityNameList = ({ entities, onClick, linkUrlParams, showTooltips = true }: Props) => { const entityRegistry = useEntityRegistry(); + const history = useHistory(); + return ( <> {entities.map((entity, index) => { @@ -17,15 +23,44 @@ export const CompactEntityNameList = ({ entities, onClick }: Props) => { const platformLogoUrl = genericProps?.platform?.properties?.logoUrl; const displayName = entityRegistry.getDisplayName(entity.type, entity); const fallbackIcon = entityRegistry.getIcon(entity.type, 12, IconStyleType.ACCENT); - const url = entityRegistry.getEntityUrl(entity.type, entity.urn); + const url = entityRegistry.getEntityUrl(entity.type, entity.urn, linkUrlParams); return ( - onClick?.(index)} - /> + { + // prevents the search links from taking over + e.preventDefault(); + history.push(url); + }} + > + + {entityRegistry.renderSearchResult(entity.type, { + entity, + matchedFields: [], + } as SearchResult)} + + } + > + + platform.properties?.logoUrl, + )} + logoComponent={fallbackIcon} + onClick={() => onClick?.(index)} + /> + + + ); })} diff --git a/datahub-web-react/src/app/recommendations/renderer/component/EntityNameList.tsx b/datahub-web-react/src/app/recommendations/renderer/component/EntityNameList.tsx index bbbcd26da82b5c..cdeb6932fc7e5a 100644 --- a/datahub-web-react/src/app/recommendations/renderer/component/EntityNameList.tsx +++ b/datahub-web-react/src/app/recommendations/renderer/component/EntityNameList.tsx @@ -104,7 +104,7 @@ export const EntityNameList = ({ additionalPropertiesList, entities, onClick }: titleSizePx={14} tags={genericProps?.globalTags || undefined} glossaryTerms={genericProps?.glossaryTerms || undefined} - domain={genericProps?.domain} + domain={genericProps?.domain?.domain} onClick={() => onClick?.(index)} entityCount={entityCount} degree={additionalProperties?.degree} diff --git a/datahub-web-react/src/app/recommendations/renderer/component/EntityPreviewTag.tsx b/datahub-web-react/src/app/recommendations/renderer/component/EntityPreviewTag.tsx index 181c2c0801e292..7f104c0e0cad77 100644 --- a/datahub-web-react/src/app/recommendations/renderer/component/EntityPreviewTag.tsx +++ b/datahub-web-react/src/app/recommendations/renderer/component/EntityPreviewTag.tsx @@ -2,6 +2,7 @@ import React from 'react'; import { Image, Tag } from 'antd'; import styled from 'styled-components'; import { Link } from 'react-router-dom'; +import { Maybe } from 'graphql/jsutils/Maybe'; const EntityTag = styled(Tag)` margin: 4px; @@ -37,17 +38,33 @@ type Props = { displayName: string; url: string; platformLogoUrl?: string; + platformLogoUrls?: Maybe[]; logoComponent?: React.ReactNode; onClick?: () => void; }; -export const EntityPreviewTag = ({ displayName, url, platformLogoUrl, logoComponent, onClick }: Props) => { +export const EntityPreviewTag = ({ + displayName, + url, + platformLogoUrl, + platformLogoUrls, + logoComponent, + onClick, +}: Props) => { return ( - {(platformLogoUrl && ) || + {(!!platformLogoUrl && !platformLogoUrls && ( + + )) || + (!!platformLogoUrls && + platformLogoUrls.slice(0, 2).map((platformLogoUrlsEntry) => ( + <> + + + ))) || logoComponent} diff --git a/datahub-web-react/src/app/search/SearchBar.tsx b/datahub-web-react/src/app/search/SearchBar.tsx index fda6d7db772e5c..1d9162a84344ba 100644 --- a/datahub-web-react/src/app/search/SearchBar.tsx +++ b/datahub-web-react/src/app/search/SearchBar.tsx @@ -308,7 +308,7 @@ export const SearchBar = ({ ); } else { // Navigate directly to the entity profile. - history.push(getEntityPath(option.type, value, entityRegistry, false)); + history.push(getEntityPath(option.type, value, entityRegistry, false, false)); } }} onSearch={(value: string) => onQueryChange(value)} diff --git a/datahub-web-react/src/app/search/SearchResults.tsx b/datahub-web-react/src/app/search/SearchResults.tsx index 199389ea59f2b6..ff8209f5da6ad4 100644 --- a/datahub-web-react/src/app/search/SearchResults.tsx +++ b/datahub-web-react/src/app/search/SearchResults.tsx @@ -24,7 +24,11 @@ import { SearchResultsRecommendations } from './SearchResultsRecommendations'; import { useGetAuthenticatedUser } from '../useGetAuthenticatedUser'; import { SearchResultsInterface } from '../entity/shared/components/styled/search/types'; import SearchExtendedMenu from '../entity/shared/components/styled/search/SearchExtendedMenu'; -import { CombinedSearchResult, combineSiblingsInSearchResults } from '../entity/shared/siblingUtils'; +import { + CombinedSearchResult, + combineSiblingsInSearchResults, + SEPARATE_SIBLINGS_URL_PARAM, +} from '../entity/shared/siblingUtils'; import { CompactEntityNameList } from '../recommendations/renderer/component/CompactEntityNameList'; const ResultList = styled(List)` @@ -255,7 +259,10 @@ export const SearchResults = ({ {item.matchedEntities && item.matchedEntities.length > 0 && ( - + )} diff --git a/datahub-web-react/src/app/shared/tags/TagTermGroup.tsx b/datahub-web-react/src/app/shared/tags/TagTermGroup.tsx index c8de58acd77772..307340f91db607 100644 --- a/datahub-web-react/src/app/shared/tags/TagTermGroup.tsx +++ b/datahub-web-react/src/app/shared/tags/TagTermGroup.tsx @@ -5,7 +5,15 @@ import styled from 'styled-components'; import { BookOutlined, PlusOutlined } from '@ant-design/icons'; import { useEntityRegistry } from '../../useEntityRegistry'; -import { Domain, EntityType, GlobalTags, GlossaryTerms, SubResourceType } from '../../../types.generated'; +import { + Domain, + EntityType, + GlobalTags, + GlossaryTermAssociation, + GlossaryTerms, + SubResourceType, + TagAssociation, +} from '../../../types.generated'; import { StyledTag } from '../../entity/shared/components/styled/StyledTag'; import { EMPTY_MESSAGES, ANTD_GRAY } from '../../entity/shared/constants'; import { useRemoveTagMutation, useRemoveTermMutation } from '../../../graphql/mutations.generated'; @@ -84,19 +92,19 @@ export default function TagTermGroup({ const [tagProfileDrawerVisible, setTagProfileDrawerVisible] = useState(false); const [addTagUrn, setAddTagUrn] = useState(''); - const removeTag = (urnToRemove: string) => { + const removeTag = (tagAssociationToRemove: TagAssociation) => { + const tagToRemove = tagAssociationToRemove.tag; onOpenModal?.(); - const tagToRemove = editableTags?.tags?.find((tag) => tag.tag.urn === urnToRemove); Modal.confirm({ - title: `Do you want to remove ${tagToRemove?.tag.name} tag?`, - content: `Are you sure you want to remove the ${tagToRemove?.tag.name} tag?`, + title: `Do you want to remove ${tagToRemove?.name} tag?`, + content: `Are you sure you want to remove the ${tagToRemove?.name} tag?`, onOk() { - if (entityUrn) { + if (tagAssociationToRemove.associatedUrn || entityUrn) { removeTagMutation({ variables: { input: { - tagUrn: urnToRemove, - resourceUrn: entityUrn, + tagUrn: tagToRemove.urn, + resourceUrn: tagAssociationToRemove.associatedUrn || entityUrn || '', subResource: entitySubresource, subResourceType: entitySubresource ? SubResourceType.DatasetField : null, }, @@ -121,20 +129,19 @@ export default function TagTermGroup({ }); }; - const removeTerm = (urnToRemove: string) => { + const removeTerm = (termToRemove: GlossaryTermAssociation) => { onOpenModal?.(); - const termToRemove = editableGlossaryTerms?.terms?.find((term) => term.term.urn === urnToRemove); const termName = termToRemove && entityRegistry.getDisplayName(termToRemove.term.type, termToRemove.term); Modal.confirm({ title: `Do you want to remove ${termName} term?`, content: `Are you sure you want to remove the ${termName} term?`, onOk() { - if (entityUrn) { + if (termToRemove.associatedUrn || entityUrn) { removeTermMutation({ variables: { input: { - termUrn: urnToRemove, - resourceUrn: entityUrn, + termUrn: termToRemove.term.urn, + resourceUrn: termToRemove.associatedUrn || entityUrn || '', subResource: entitySubresource, subResourceType: entitySubresource ? SubResourceType.DatasetField : null, }, @@ -205,7 +212,7 @@ export default function TagTermGroup({ closable={canRemove} onClose={(e) => { e.preventDefault(); - removeTerm(term.term.urn); + removeTerm(term); }} > @@ -249,7 +256,7 @@ export default function TagTermGroup({ closable={canRemove} onClose={(e) => { e.preventDefault(); - removeTag(tag?.tag?.urn); + removeTag(tag); }} > {tag?.tag?.name} diff --git a/datahub-web-react/src/graphql-mock/fixtures/entity/chartEntity.ts b/datahub-web-react/src/graphql-mock/fixtures/entity/chartEntity.ts index 75e6eab07864d3..3fa3f2b2bef4a0 100644 --- a/datahub-web-react/src/graphql-mock/fixtures/entity/chartEntity.ts +++ b/datahub-web-react/src/graphql-mock/fixtures/entity/chartEntity.ts @@ -60,6 +60,7 @@ export const chartEntity = (tool): Chart => { owner: datahubUser, type: OwnershipType.Stakeholder, __typename: 'Owner', + associatedUrn: `urn:li:chart:(${tool},${name})`, }, ], lastModified: { time: 1619717962718, __typename: 'AuditStamp' }, diff --git a/datahub-web-react/src/graphql-mock/fixtures/entity/dashboardEntity.ts b/datahub-web-react/src/graphql-mock/fixtures/entity/dashboardEntity.ts index 6fc703dd549b6f..73e08118739cc1 100644 --- a/datahub-web-react/src/graphql-mock/fixtures/entity/dashboardEntity.ts +++ b/datahub-web-react/src/graphql-mock/fixtures/entity/dashboardEntity.ts @@ -42,6 +42,7 @@ export const dashboardEntity = (tool): Dashboard => { owner: datahubUser, type: OwnershipType.Stakeholder, __typename: 'Owner', + associatedUrn: `urn:li:dashboard:(${tool},${name})`, }, ], lastModified: { time: 1619993818664, __typename: 'AuditStamp' }, @@ -53,6 +54,7 @@ export const dashboardEntity = (tool): Dashboard => { owner: kafkaUser, type: OwnershipType.Stakeholder, __typename: 'Owner', + associatedUrn: `urn:li:dashboard:(${tool},${name})`, }, ], lastModified: { time: 1619993818664, __typename: 'AuditStamp' }, @@ -83,16 +85,19 @@ export const dashboardEntity = (tool): Dashboard => { { owner: datahubUser, type: OwnershipType.Stakeholder, + associatedUrn: `urn:li:dashboard:(${tool},${name})`, __typename: 'Owner', }, { owner: kafkaUser, type: OwnershipType.Developer, + associatedUrn: `urn:li:dashboard:(${tool},${name})`, __typename: 'Owner', }, { owner: lookerUser, type: OwnershipType.Developer, + associatedUrn: `urn:li:dashboard:(${tool},${name})`, __typename: 'Owner', }, ], @@ -103,10 +108,12 @@ export const dashboardEntity = (tool): Dashboard => { tags: [ { tag: generateTag(datahubOwnership), + associatedUrn: `urn:li:dashboard:(${tool},${name})`, __typename: 'TagAssociation', }, { tag: generateTag(kafkaOwnership), + associatedUrn: `urn:li:dashboard:(${tool},${name})`, __typename: 'TagAssociation', }, ], diff --git a/datahub-web-react/src/graphql-mock/fixtures/entity/dataFlowEntity.ts b/datahub-web-react/src/graphql-mock/fixtures/entity/dataFlowEntity.ts index 497ac1d2c24b18..1f5e3d6daa1df6 100644 --- a/datahub-web-react/src/graphql-mock/fixtures/entity/dataFlowEntity.ts +++ b/datahub-web-react/src/graphql-mock/fixtures/entity/dataFlowEntity.ts @@ -60,6 +60,7 @@ export const dataFlowEntity = ({ orchestrator, cluster }: DataFlowEntityArg): Da { owner: datahubUser, type: OwnershipType.Stakeholder, + associatedUrn: `urn:li:dataFlow:(${orchestrator},${flowId},${cluster})`, __typename: 'Owner', }, ], diff --git a/datahub-web-react/src/graphql-mock/fixtures/entity/dataJobEntity.ts b/datahub-web-react/src/graphql-mock/fixtures/entity/dataJobEntity.ts index c241f414fe29f0..8f740d4e2d64f2 100644 --- a/datahub-web-react/src/graphql-mock/fixtures/entity/dataJobEntity.ts +++ b/datahub-web-react/src/graphql-mock/fixtures/entity/dataJobEntity.ts @@ -55,50 +55,126 @@ export const dataJobEntity = (): DataJob => { project: null, externalUrl: 'https://airflow.demo.datahubproject.io/tree?dag_id=datahub_analytics_refresh', customProperties: [ - { key: 'end_date', value: 'None', __typename: 'StringMapEntry' }, - { key: 'orientation', value: "'LR'", __typename: 'StringMapEntry' }, - { key: 'max_active_runs', value: '16', __typename: 'StringMapEntry' }, - { key: 'is_paused_upon_creation', value: 'None', __typename: 'StringMapEntry' }, - { key: 'timezone', value: "'UTC'", __typename: 'StringMapEntry' }, - { key: 'params', value: '{}', __typename: 'StringMapEntry' }, + { + key: 'end_date', + associatedUrn: `urn:li:dataJob:${dataFlowURN},${jobId})`, + value: 'None', + __typename: 'CustomPropertiesEntry', + }, + { + key: 'orientation', + value: "'LR'", + associatedUrn: `urn:li:dataJob:${dataFlowURN},${jobId})`, + __typename: 'CustomPropertiesEntry', + }, + { + key: 'max_active_runs', + value: '16', + associatedUrn: `urn:li:dataJob:${dataFlowURN},${jobId})`, + __typename: 'CustomPropertiesEntry', + }, + { + key: 'is_paused_upon_creation', + value: 'None', + associatedUrn: `urn:li:dataJob:${dataFlowURN},${jobId})`, + __typename: 'CustomPropertiesEntry', + }, + { + key: 'timezone', + value: "'UTC'", + associatedUrn: `urn:li:dataJob:${dataFlowURN},${jobId})`, + __typename: 'CustomPropertiesEntry', + }, + { + key: 'params', + value: '{}', + associatedUrn: `urn:li:dataJob:${dataFlowURN},${jobId})`, + __typename: 'CustomPropertiesEntry', + }, { key: 'fileloc', value: "'/opt/airflow/dags/repo/airflow/dags/datahub_analytics_refresh.py'", - __typename: 'StringMapEntry', + associatedUrn: `urn:li:dataJob:${dataFlowURN},${jobId})`, + __typename: 'CustomPropertiesEntry', }, { key: 'default_args', value: "{: {'owner': 'harshal', 'depends_on_past': False, 'email': ['harshal@acryl.io'], 'email_on_failure': False, 'execution_timeout': {: 300.0, : }}, : }", - __typename: 'StringMapEntry', + associatedUrn: `urn:li:dataJob:${dataFlowURN},${jobId})`, + __typename: 'CustomPropertiesEntry', + }, + { + key: 'tags', + value: 'None', + associatedUrn: `urn:li:dataJob:${dataFlowURN},${jobId})`, + __typename: 'CustomPropertiesEntry', + }, + { + key: '_access_control', + value: 'None', + associatedUrn: `urn:li:dataJob:${dataFlowURN},${jobId})`, + __typename: 'CustomPropertiesEntry', + }, + { + key: 'doc_md', + value: 'None', + associatedUrn: `urn:li:dataJob:${dataFlowURN},${jobId})`, + __typename: 'CustomPropertiesEntry', + }, + { + key: 'dagrun_timeout', + value: 'None', + associatedUrn: `urn:li:dataJob:${dataFlowURN},${jobId})`, + __typename: 'CustomPropertiesEntry', }, - { key: 'tags', value: 'None', __typename: 'StringMapEntry' }, - { key: '_access_control', value: 'None', __typename: 'StringMapEntry' }, - { key: 'doc_md', value: 'None', __typename: 'StringMapEntry' }, - { key: 'dagrun_timeout', value: 'None', __typename: 'StringMapEntry' }, { key: '_dag_id', value: "'datahub_analytics_refresh'", - __typename: 'StringMapEntry', + associatedUrn: `urn:li:dataJob:${dataFlowURN},${jobId})`, + __typename: 'CustomPropertiesEntry', + }, + { + key: 'catchup', + value: 'False', + associatedUrn: `urn:li:dataJob:${dataFlowURN},${jobId})`, + __typename: 'CustomPropertiesEntry', }, - { key: 'catchup', value: 'False', __typename: 'StringMapEntry' }, { key: 'schedule_interval', value: "{: 86400.0, : }", - __typename: 'StringMapEntry', + associatedUrn: `urn:li:dataJob:${dataFlowURN},${jobId})`, + __typename: 'CustomPropertiesEntry', + }, + { + key: '_default_view', + value: 'None', + associatedUrn: `urn:li:dataJob:${dataFlowURN},${jobId})`, + __typename: 'CustomPropertiesEntry', }, - { key: '_default_view', value: 'None', __typename: 'StringMapEntry' }, { key: '_description', value: "'Refresh snowflake tables for analytics purposes'", - __typename: 'StringMapEntry', + associatedUrn: `urn:li:dataJob:${dataFlowURN},${jobId})`, + __typename: 'CustomPropertiesEntry', + }, + { + key: '_concurrency', + value: '16', + associatedUrn: `urn:li:dataJob:${dataFlowURN},${jobId})`, + __typename: 'CustomPropertiesEntry', }, - { key: '_concurrency', value: '16', __typename: 'StringMapEntry' }, { key: 'tasks', value: "[{'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': []}, 'ui_color': '#f0ede4', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'run_data_task', '_inlets': {'auto': False, 'task_ids': [], 'datasets': []}, 'template_fields': ['bash_command', 'env'], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['split_s3_task'], '_task_type': 'BashOperator', '_task_module': 'airflow.operators.bash_operator', 'bash_command': \"echo 'This is where we might run the backup job'\"}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.all_entities', env='PROD')\", \"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.user_basic_info', env='PROD')\", \"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.user_extra_info', env='PROD')\", \"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dataset_ownerships', env='PROD')\", \"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dataset_properties', env='PROD')\", \"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dataset_schemas', env='PROD')\", \"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dataset_schema_extras', env='PROD')\", \"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dataset_tags', env='PROD')\", \"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dataset_lineages', env='PROD')\", \"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dataset_statuses', env='PROD')\", \"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.tag_ownerships', env='PROD')\", \"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.tag_properties', env='PROD')\", \"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dataflow_ownerships', env='PROD')\", \"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dataflow_info', env='PROD')\", \"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dataflow_tags', env='PROD')\", \"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.datajob_ownerships', env='PROD')\", \"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.datajob_info', env='PROD')\", \"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.datajob_lineages', env='PROD')\", \"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.datajob_tags', env='PROD')\", \"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.chart_info', env='PROD')\", \"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dashboard_info', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'split_s3_task', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahub-demo-backup.demo.aspects', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['load_all_entities_to_snowflake', 'load_tag_ownerships_to_snowflake', 'load_dataset_tags_to_snowflake', 'load_dataset_properties_to_snowflake', 'load_dataset_schema_extras_to_snowflake', 'load_dataset_schemas_to_snowflake', 'load_datajob_lineages_to_snowflake', 'load_dataflow_ownerships_to_snowflake', 'load_dashboard_info_to_snowflake', 'load_datajob_tags_to_snowflake', 'load_dataset_statuses_to_snowflake', 'load_chart_info_to_snowflake', 'load_user_basic_info_to_snowflake', 'load_user_extra_info_to_snowflake', 'load_datajob_info_to_snowflake', 'load_dataset_lineages_to_snowflake', 'load_dataset_ownerships_to_snowflake', 'load_datajob_ownerships_to_snowflake', 'load_tag_properties_to_snowflake', 'load_dataflow_info_to_snowflake', 'load_dataflow_tags_to_snowflake'], '_task_type': 'S3FileToDirectoryTransform', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': []}, 'ui_color': '#e8f7e4', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'wait_for_load_finish', '_inlets': {'auto': False, 'task_ids': [], 'datasets': []}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['update_generated_latest_dataset_owners', 'update_generated_latest_dataset_info', 'update_generated_dataset_platforms'], '_task_type': 'DummyOperator', '_task_module': 'airflow.operators.dummy_operator'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.all_entities', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'load_all_entities_to_snowflake', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.all_entities', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['wait_for_load_finish'], '_task_type': 'LoadS3IntoSnowflakeOperator', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.user_basic_info', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'load_user_basic_info_to_snowflake', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.user_basic_info', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['wait_for_load_finish'], '_task_type': 'LoadS3IntoSnowflakeOperator', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.user_extra_info', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'load_user_extra_info_to_snowflake', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.user_extra_info', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['wait_for_load_finish'], '_task_type': 'LoadS3IntoSnowflakeOperator', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.dataset_ownerships', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'load_dataset_ownerships_to_snowflake', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dataset_ownerships', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['wait_for_load_finish'], '_task_type': 'LoadS3IntoSnowflakeOperator', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.dataset_properties', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'load_dataset_properties_to_snowflake', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dataset_properties', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['wait_for_load_finish'], '_task_type': 'LoadS3IntoSnowflakeOperator', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.dataset_schemas', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'load_dataset_schemas_to_snowflake', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dataset_schemas', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['wait_for_load_finish'], '_task_type': 'LoadS3IntoSnowflakeOperator', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.dataset_schema_extras', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'load_dataset_schema_extras_to_snowflake', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dataset_schema_extras', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['wait_for_load_finish'], '_task_type': 'LoadS3IntoSnowflakeOperator', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.dataset_tags', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'load_dataset_tags_to_snowflake', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dataset_tags', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['wait_for_load_finish'], '_task_type': 'LoadS3IntoSnowflakeOperator', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.dataset_lineages', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'load_dataset_lineages_to_snowflake', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dataset_lineages', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['wait_for_load_finish'], '_task_type': 'LoadS3IntoSnowflakeOperator', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.dataset_statuses', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'load_dataset_statuses_to_snowflake', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dataset_statuses', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['wait_for_load_finish'], '_task_type': 'LoadS3IntoSnowflakeOperator', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.tag_ownerships', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'load_tag_ownerships_to_snowflake', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.tag_ownerships', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['wait_for_load_finish'], '_task_type': 'LoadS3IntoSnowflakeOperator', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.tag_properties', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'load_tag_properties_to_snowflake', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.tag_properties', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['wait_for_load_finish'], '_task_type': 'LoadS3IntoSnowflakeOperator', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.dataflow_ownerships', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'load_dataflow_ownerships_to_snowflake', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dataflow_ownerships', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['wait_for_load_finish'], '_task_type': 'LoadS3IntoSnowflakeOperator', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.dataflow_info', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'load_dataflow_info_to_snowflake', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dataflow_info', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['wait_for_load_finish'], '_task_type': 'LoadS3IntoSnowflakeOperator', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.dataflow_tags', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'load_dataflow_tags_to_snowflake', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dataflow_tags', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['wait_for_load_finish'], '_task_type': 'LoadS3IntoSnowflakeOperator', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.datajob_ownerships', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'load_datajob_ownerships_to_snowflake', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.datajob_ownerships', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['wait_for_load_finish'], '_task_type': 'LoadS3IntoSnowflakeOperator', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.datajob_info', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'load_datajob_info_to_snowflake', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.datajob_info', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['wait_for_load_finish'], '_task_type': 'LoadS3IntoSnowflakeOperator', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.datajob_lineages', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'load_datajob_lineages_to_snowflake', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.datajob_lineages', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['wait_for_load_finish'], '_task_type': 'LoadS3IntoSnowflakeOperator', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.datajob_tags', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'load_datajob_tags_to_snowflake', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.datajob_tags', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['wait_for_load_finish'], '_task_type': 'LoadS3IntoSnowflakeOperator', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.chart_info', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'load_chart_info_to_snowflake', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.chart_info', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['wait_for_load_finish'], '_task_type': 'LoadS3IntoSnowflakeOperator', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.dashboard_info', env='PROD')\"]}, 'ui_color': '#fff', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'load_dashboard_info_to_snowflake', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='s3', name='datahubproject-demo-pipelines.entity_aspect_splits.dashboard_info', env='PROD')\"]}, 'template_fields': [], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['wait_for_load_finish'], '_task_type': 'LoadS3IntoSnowflakeOperator', '_task_module': 'unusual_prefix_436311363ce00ecbd709f747db697044ba2913d4_datahub_analytics_refresh'}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.generated_dataset_platforms', env='PROD')\"]}, 'ui_color': '#ededed', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'update_generated_dataset_platforms', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.all_entities', env='PROD')\"]}, 'template_fields': ['sql'], 'email': ['harshal@acryl.io'], '_downstream_task_ids': ['update_generated_latest_dataset_owners', 'update_generated_latest_dataset_info'], '_task_type': 'SnowflakeOperator', '_task_module': 'airflow.providers.snowflake.operators.snowflake', 'sql': \"\\nCREATE OR REPLACE TABLE generated_dataset_platforms AS (\\n SELECT urn, split(split(urn, ',')[0], ':')[6]::string as platform\\n FROM all_entities\\n WHERE all_entities.entity = 'dataset'\\n);\\n \"}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.generated_latest_dataset_owners', env='PROD')\"]}, 'ui_color': '#ededed', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'update_generated_latest_dataset_owners', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.all_entities', env='PROD')\", \"Dataset(platform='snowflake', name='demo_pipeline.public.dataset_ownerships', env='PROD')\", \"Dataset(platform='snowflake', name='demo_pipeline.public.generated_dataset_platforms', env='PROD')\"]}, 'template_fields': ['sql'], 'email': ['harshal@acryl.io'], '_downstream_task_ids': [], '_task_type': 'SnowflakeOperator', '_task_module': 'airflow.providers.snowflake.operators.snowflake', 'sql': \"\\nCREATE OR REPLACE TABLE generated_latest_dataset_owners AS (\\n WITH latest_dataset_owners AS (SELECT * FROM dataset_ownerships WHERE version = 0)\\n SELECT all_entities.urn, generated_dataset_platforms.platform, metadata:owners as owners, ARRAY_SIZE(COALESCE(metadata:owners, array_construct())) as owner_count\\n FROM all_entities\\n LEFT JOIN latest_dataset_owners ON all_entities.urn = latest_dataset_owners.urn\\n LEFT JOIN generated_dataset_platforms ON all_entities.urn = generated_dataset_platforms.urn\\n WHERE all_entities.entity = 'dataset'\\n);\\n \"}, {'execution_timeout': 300.0, 'ui_fgcolor': '#000', '_outlets': {'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.generated_latest_dataset_info', env='PROD')\"]}, 'ui_color': '#ededed', 'email_on_failure': False, 'owner': 'harshal', 'task_id': 'update_generated_latest_dataset_info', '_inlets': {'auto': False, 'task_ids': [], 'datasets': [\"Dataset(platform='snowflake', name='demo_pipeline.public.all_entities', env='PROD')\", \"Dataset(platform='snowflake', name='demo_pipeline.public.dataset_properties', env='PROD')\", \"Dataset(platform='snowflake', name='demo_pipeline.public.dataset_tags', env='PROD')\", \"Dataset(platform='snowflake', name='demo_pipeline.public.dataset_lineages', env='PROD')\", \"Dataset(platform='snowflake', name='demo_pipeline.public.generated_dataset_platforms', env='PROD')\"]}, 'template_fields': ['sql'], 'email': ['harshal@acryl.io'], '_downstream_task_ids': [], '_task_type': 'SnowflakeOperator', '_task_module': 'airflow.providers.snowflake.operators.snowflake', 'sql': \"\\nCREATE OR REPLACE TABLE generated_latest_dataset_info AS (\\n WITH\\n latest_dataset_info AS (SELECT * FROM dataset_properties WHERE version = 0),\\n latest_dataset_tags AS (SELECT * FROM dataset_tags WHERE version = 0),\\n latest_dataset_lineages AS (SELECT * FROM dataset_lineages WHERE version = 0)\\n SELECT\\n all_entities.urn,\\n generated_dataset_platforms.platform,\\n latest_dataset_info.metadata:description::string as description,\\n COALESCE(IS_NULL_VALUE(latest_dataset_info.metadata:description), TRUE) as is_missing_docs,\\n latest_dataset_info.metadata:customProperties as properties,\\n ARRAY_CAT(COALESCE(latest_dataset_tags.metadata:tags, array_construct()), COALESCE(latest_dataset_info.metadata:tags, array_construct())) as tags,\\n latest_dataset_lineages.metadata:upstreams as upstreams,\\n COALESCE(ARRAY_SIZE(latest_dataset_lineages.metadata:upstreams), 0) as upstream_count,\\n COALESCE(ARRAY_SIZE(latest_dataset_lineages.metadata:upstreams), 0) > 0 as has_upstreams\\n FROM all_entities\\n LEFT JOIN latest_dataset_info ON all_entities.urn = latest_dataset_info.urn\\n LEFT JOIN latest_dataset_tags ON all_entities.urn = latest_dataset_tags.urn\\n LEFT JOIN latest_dataset_lineages ON all_entities.urn = latest_dataset_lineages.urn\\n LEFT JOIN generated_dataset_platforms ON all_entities.urn = generated_dataset_platforms.urn\\n WHERE all_entities.entity = 'dataset'\\n);\\n \"}]", - __typename: 'StringMapEntry', + associatedUrn: `urn:li:dataJob:${dataFlowURN},${jobId})`, + __typename: 'CustomPropertiesEntry', + }, + { + key: 'start_date', + value: '1619913600.0', + associatedUrn: `urn:li:dataJob:${dataFlowURN},${jobId})`, + __typename: 'CustomPropertiesEntry', }, - { key: 'start_date', value: '1619913600.0', __typename: 'StringMapEntry' }, ], __typename: 'DataFlowInfo', }, @@ -118,6 +194,7 @@ export const dataJobEntity = (): DataJob => { owner: kafkaUser, type: OwnershipType.Developer, __typename: 'Owner', + associatedUrn: `urn:li:dataJob:${dataFlowURN},${jobId})`, }, ], lastModified: { time: 1620079975489, __typename: 'AuditStamp' }, diff --git a/datahub-web-react/src/graphql-mock/fixtures/entity/datasetEntity.ts b/datahub-web-react/src/graphql-mock/fixtures/entity/datasetEntity.ts index 8361f354c1ad98..d7476f6c44a162 100644 --- a/datahub-web-react/src/graphql-mock/fixtures/entity/datasetEntity.ts +++ b/datahub-web-react/src/graphql-mock/fixtures/entity/datasetEntity.ts @@ -59,6 +59,7 @@ export const datasetEntity = ({ platform, origin, path }: DatasetEntityArg): Dat { owner: datahubUser, type: OwnershipType.Dataowner, + associatedUrn: `urn:li:dataset:(${platformURN},${name},${origin.toUpperCase()})`, __typename: 'Owner', }, ], diff --git a/datahub-web-react/src/graphql-mock/fixtures/tag.ts b/datahub-web-react/src/graphql-mock/fixtures/tag.ts index 1c5e01d26521c4..a681e421b1b8a5 100644 --- a/datahub-web-react/src/graphql-mock/fixtures/tag.ts +++ b/datahub-web-react/src/graphql-mock/fixtures/tag.ts @@ -34,6 +34,7 @@ export const createTag = ({ name, urn, description }: TagUpdateInput): Tag => { { owner: user, type: OwnershipType.Dataowner, + associatedUrn: urn, __typename: 'Owner', }, ], diff --git a/datahub-web-react/src/graphql/fragments.graphql b/datahub-web-react/src/graphql/fragments.graphql index 92e83b260293e8..51cbc29d4f9708 100644 --- a/datahub-web-react/src/graphql/fragments.graphql +++ b/datahub-web-react/src/graphql/fragments.graphql @@ -8,6 +8,7 @@ fragment globalTagsFields on GlobalTags { colorHex } } + associatedUrn } } @@ -39,6 +40,7 @@ fragment glossaryTerms on GlossaryTerms { term { ...glossaryTerm } + associatedUrn } } @@ -147,6 +149,7 @@ fragment ownershipFields on Ownership { } } type + associatedUrn } lastModified { time @@ -805,9 +808,12 @@ fragment entityContainer on Container { } } -fragment entityDomain on Domain { - urn - properties { - name +fragment entityDomain on DomainAssociation { + domain { + urn + properties { + name + } } + associatedUrn } diff --git a/datahub-web-react/src/graphql/lineage.graphql b/datahub-web-react/src/graphql/lineage.graphql index 2e7a376cb49bbf..6c7e438fd2f76e 100644 --- a/datahub-web-react/src/graphql/lineage.graphql +++ b/datahub-web-react/src/graphql/lineage.graphql @@ -263,7 +263,7 @@ fragment partialLineageResults on EntityLineageResult { total } -query getEntityLineage($urn: String!) { +query getEntityLineage($urn: String!, $separateSiblings: Boolean) { entity(urn: $urn) { urn type @@ -279,10 +279,14 @@ query getEntityLineage($urn: String!) { } } ... on EntityWithRelationships { - upstream: lineage(input: { direction: UPSTREAM, start: 0, count: 100 }) { + upstream: lineage( + input: { direction: UPSTREAM, start: 0, count: 100, separateSiblings: $separateSiblings } + ) { ...fullLineageResults } - downstream: lineage(input: { direction: DOWNSTREAM, start: 0, count: 100 }) { + downstream: lineage( + input: { direction: DOWNSTREAM, start: 0, count: 100, separateSiblings: $separateSiblings } + ) { ...fullLineageResults } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java index 401dcededbe79d..9cf08072878dd9 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java @@ -27,6 +27,12 @@ public class SiblingGraphService { private final EntityService _entityService; private final GraphService _graphService; + @Nonnull + public EntityLineageResult getLineage(@Nonnull Urn entityUrn, @Nonnull LineageDirection direction, int offset, + int count, int maxHops) { + return getLineage(entityUrn, direction, offset, count, maxHops, false); + } + /** * Traverse from the entityUrn towards the input direction up to maxHops number of hops * Abstracts away the concept of relationship types @@ -35,7 +41,11 @@ public class SiblingGraphService { */ @Nonnull public EntityLineageResult getLineage(@Nonnull Urn entityUrn, @Nonnull LineageDirection direction, int offset, - int count, int maxHops) { + int count, int maxHops, boolean separateSiblings) { + if (separateSiblings) { + return _graphService.getLineage(entityUrn, direction, offset, count, maxHops); + } + if (maxHops > 1) { throw new UnsupportedOperationException( String.format("More than 1 hop is not supported for %s", this.getClass().getSimpleName())); diff --git a/smoke-test/tests/cypress/cypress/integration/siblings/siblings.js b/smoke-test/tests/cypress/cypress/integration/siblings/siblings.js index ad04670eecdcf7..a0fa90e97e3d11 100644 --- a/smoke-test/tests/cypress/cypress/integration/siblings/siblings.js +++ b/smoke-test/tests/cypress/cypress/integration/siblings/siblings.js @@ -29,6 +29,62 @@ describe('siblings', () => { cy.get('[data-testid="table-stats-rowcount"]').contains("100"); }); + it('can view individual nodes', () => { + cy.login(); + cy.visit('/dataset/urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)/?is_lineage_mode=false'); + + // navigate to the bq entity + cy.get('[data-testid="compact-entity-link-urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers,PROD)"]').click({ force: true }); + + // check merged platforms is not shown + cy.get('[data-testid="entity-header-test-id"]').contains('dbt & BigQuery').should('not.exist'); + cy.get('[data-testid="entity-header-test-id"]').contains('BigQuery'); + + // check dbt schema descriptions not shown + cy.contains('This is a unique identifier for a customer').should('not.exist'); + + // check merged profile still there (from bigquery) + cy.contains('Stats').click({ force: true }); + cy.get('[data-testid="table-stats-rowcount"]').contains("100"); + }); + + it('can mutate at individual node or combined node level', () => { + cy.login(); + cy.visit('/dataset/urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)/?is_lineage_mode=false'); + + // navigate to the bq entity + cy.get('[data-testid="compact-entity-link-urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers,PROD)"]').click({ force: true }); + + cy.contains('Add Term').click(); + + cy.focused().type('CypressTerm'); + + cy.get('.ant-select-item-option-content').within(() => cy.contains('CypressTerm').click({force: true})); + + cy.get('[data-testid="add-tag-term-from-modal-btn"]').click({force: true}); + + cy.wait(2000); + + cy.visit('/dataset/urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)/?is_lineage_mode=false'); + + cy.get('a[href="/glossaryTerm/urn:li:glossaryTerm:CypressNode.CypressTerm"]').within(() => cy.get('span[aria-label=close]').click()); + cy.contains('Yes').click(); + + cy.contains('CypressTerm').should('not.exist'); + }); + + it('will combine results in search', () => { + cy.login(); + cy.visit('/search?page=1&query=%2522raw_orders%2522'); + + cy.contains('Showing 1 - 2 of 2 results'); + + cy.get('.test-search-result').should('have.length', 1); + cy.get('.test-search-result-sibling-section').should('have.length', 1); + + cy.get('.test-search-result-sibling-section').get('.test-mini-preview-class:contains(raw_orders)').should('have.length', 2); + }); + it('will combine results in search', () => { cy.login(); cy.visit('/search?page=1&query=%2522raw_orders%2522'); @@ -58,4 +114,26 @@ describe('siblings', () => { // check the platform cy.get('svg').get('text:contains(dbt & BigQuery)').should('have.length', 5); }); + + it('can separate results in lineage if flag is set', () => { + cy.login(); + cy.visit('dataset/urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_orders,PROD)/?is_lineage_mode=true'); + + cy.get('[data-testid="compress-lineage-toggle"]').click({ force: true }); + + // check the subtypes + cy.get('text:contains(View)').should('have.length', 2); + cy.get('text:contains(Table)').should('have.length', 0); + cy.get('text:contains(Seed)').should('have.length', 1); + + // check the names + cy.get('text:contains(raw_orders)').should('have.length', 1); + // center counts twice since we secretely render two center nodes, plus the downstream bigquery + cy.get('text:contains(stg_orders)').should('have.length', 3); + + // check the platform + cy.get('svg').get('text:contains(dbt & BigQuery)').should('have.length', 0); + cy.get('svg').get('text:contains(Dbt)').should('have.length', 3); + cy.get('svg').get('text:contains(Bigquery)').should('have.length', 1); + }); }); diff --git a/smoke-test/tests/domains/domains_test.py b/smoke-test/tests/domains/domains_test.py index 50f447f80a4065..a53ef71c9e4143 100644 --- a/smoke-test/tests/domains/domains_test.py +++ b/smoke-test/tests/domains/domains_test.py @@ -218,9 +218,11 @@ def test_set_unset_domain(frontend_session, ingest_cleanup_data): dataset(urn: $urn) {\n urn\n domain {\n - urn\n - properties{\n - name\n + domain {\n + urn\n + properties{\n + name\n + }\n }\n }\n }\n @@ -235,5 +237,5 @@ def test_set_unset_domain(frontend_session, ingest_cleanup_data): res_data = response.json() assert res_data - assert res_data["data"]["dataset"]["domain"]["urn"] == domain_urn - assert res_data["data"]["dataset"]["domain"]["properties"]["name"] == "Engineering" + assert res_data["data"]["dataset"]["domain"]["domain"]["urn"] == domain_urn + assert res_data["data"]["dataset"]["domain"]["domain"]["properties"]["name"] == "Engineering" From ab549257b440668b696d92edd5f913c87fa7735b Mon Sep 17 00:00:00 2001 From: Ankit keshari <86347578+Ankit-Keshari-Vituity@users.noreply.github.com> Date: Fri, 15 Jul 2022 06:59:54 +0530 Subject: [PATCH 21/22] refactor(ui): Added Cursor pointer to tags (#5389) --- datahub-web-react/src/app/shared/tags/TagTermGroup.tsx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datahub-web-react/src/app/shared/tags/TagTermGroup.tsx b/datahub-web-react/src/app/shared/tags/TagTermGroup.tsx index 307340f91db607..0f74072739d80d 100644 --- a/datahub-web-react/src/app/shared/tags/TagTermGroup.tsx +++ b/datahub-web-react/src/app/shared/tags/TagTermGroup.tsx @@ -199,7 +199,7 @@ export default function TagTermGroup({ to={entityRegistry.getEntityUrl(EntityType.GlossaryTerm, term.term.urn)} key={term.term.urn} > - + {entityRegistry.getDisplayName(EntityType.GlossaryTerm, term.term)} @@ -209,6 +209,7 @@ export default function TagTermGroup({ {editableGlossaryTerms?.terms?.map((term) => ( { e.preventDefault(); @@ -232,6 +233,7 @@ export default function TagTermGroup({ return ( showTagProfileDrawer(tag?.tag?.urn)} $colorHash={tag?.tag?.urn} $color={tag?.tag?.properties?.colorHex} From 4857af57c1899ee542c11f5ef3efe776d83e741b Mon Sep 17 00:00:00 2001 From: John Joyce Date: Thu, 14 Jul 2022 23:47:46 -0700 Subject: [PATCH 22/22] feat(GMS): Adding Dashboard Usage Models (#5399) --- .../dashboard/DashboardUsageStatistics.pdl | 54 +++++++++++++++++++ .../dashboard/DashboardUserUsageCounts.pdl | 32 +++++++++++ .../src/main/resources/entity-registry.yml | 1 + 3 files changed, 87 insertions(+) create mode 100644 metadata-models/src/main/pegasus/com/linkedin/dashboard/DashboardUsageStatistics.pdl create mode 100644 metadata-models/src/main/pegasus/com/linkedin/dashboard/DashboardUserUsageCounts.pdl diff --git a/metadata-models/src/main/pegasus/com/linkedin/dashboard/DashboardUsageStatistics.pdl b/metadata-models/src/main/pegasus/com/linkedin/dashboard/DashboardUsageStatistics.pdl new file mode 100644 index 00000000000000..468afd34ca5269 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/dashboard/DashboardUsageStatistics.pdl @@ -0,0 +1,54 @@ +namespace com.linkedin.dashboard + +import com.linkedin.timeseries.TimeseriesAspectBase + +/** + * Stats corresponding to dashboard's usage. + * + * If this aspect represents the latest snapshot of the statistics about a Dashboard, the eventGranularity field should be null. + * If this aspect represents a bucketed window of usage statistics (e.g. over a day), then the eventGranularity field should be set accordingly. + */ +@Aspect = { + "name": "dashboardUsageStatistics", + "type": "timeseries", +} +record DashboardUsageStatistics includes TimeseriesAspectBase { + /** + * The total number of times dashboard has been viewed + */ + @TimeseriesField = {} + viewsCount: optional int + + /** + * The total number of dashboard executions (refreshes / syncs) + */ + @TimeseriesField = {} + executionsCount: optional int + + /** + * Unique user count + */ + @TimeseriesField = {} + uniqueUserCount: optional int + + /** + * Users within this bucket, with frequency counts + */ + @TimeseriesFieldCollection = {"key":"user"} + userCounts: optional array[DashboardUserUsageCounts] + + /** + * The total number of times that the dashboard has been favorited + */ + @TimeseriesField = {} + favoritesCount: optional int + + /** + * Last viewed at + * + * This should not be set in cases where statistics are windowed. + */ + @TimeseriesField = {} + lastViewedAt: optional long + +} diff --git a/metadata-models/src/main/pegasus/com/linkedin/dashboard/DashboardUserUsageCounts.pdl b/metadata-models/src/main/pegasus/com/linkedin/dashboard/DashboardUserUsageCounts.pdl new file mode 100644 index 00000000000000..da56af6472196a --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/dashboard/DashboardUserUsageCounts.pdl @@ -0,0 +1,32 @@ +namespace com.linkedin.dashboard + +import com.linkedin.common.Urn + +/** + * Records a single user's usage counts for a given resource + */ +record DashboardUserUsageCounts { + /** + * The unique id of the user. + */ + user: Urn + + /** + * The number of times the user has viewed the dashboard + */ + @TimeseriesField = {} + viewsCount: optional int + + /** + * The number of times the user has executed (refreshed) the dashboard + */ + @TimeseriesField = {} + executionsCount: optional int + + /** + * Normalized numeric metric representing user's dashboard usage -- the number of times the user executed or viewed the dashboard. + */ + @TimeseriesField = {} + usageCount: optional int + +} diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index fd174c9fbacd7b..75e72ee5eec811 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -63,6 +63,7 @@ entities: - domains - container - deprecation + - dashboardUsageStatistics - name: notebook doc: Notebook represents a combination of query, text, chart and etc. This is in BETA version keyAspect: notebookKey