diff --git a/README.md b/README.md index d78295523d6519..fc68777e10101b 100644 --- a/README.md +++ b/README.md @@ -110,8 +110,11 @@ Here are the companies that have officially adopted DataHub. Please feel free to - [Adevinta](https://www.adevinta.com/) - [Banksalad](https://www.banksalad.com) - [Cabify](https://cabify.tech/) +- [ClassDojo](https://www.classdojo.com/) +- [Coursera](https://www.coursera.org/) - [DefinedCrowd](http://www.definedcrowd.com) - [DFDS](https://www.dfds.com/) +- [Digital Turbine](https://www.digitalturbine.com/) - [Expedia Group](http://expedia.com) - [Experius](https://www.experius.nl) - [Geotab](https://www.geotab.com) @@ -122,15 +125,21 @@ Here are the companies that have officially adopted DataHub. Please feel free to - [Klarna](https://www.klarna.com) - [LinkedIn](http://linkedin.com) - [Moloco](https://www.moloco.com/en) +- [N26](https://n26brasil.com/) +- [Optum](https://www.optum.com/) - [Peloton](https://www.onepeloton.com) - [Razer](https://www.razer.com) - [Saxo Bank](https://www.home.saxo) +- [Showroomprive](https://www.showroomprive.com/) +- [SpotHero](https://spothero.com) - [Stash](https://www.stash.com) - [Shanghai HuaRui Bank](https://www.shrbank.com) - [ThoughtWorks](https://www.thoughtworks.com) - [TypeForm](http://typeform.com) +- [Udemy](https://www.udemy.com/) - [Uphold](https://uphold.com) - [Viasat](https://viasat.com) +- [Wikimedia](https://www.wikimedia.org) - [Wolt](https://wolt.com) - [Zynga](https://www.zynga.com) diff --git a/build.gradle b/build.gradle index b57419e285a38c..044699c58e8544 100644 --- a/build.gradle +++ b/build.gradle @@ -10,6 +10,10 @@ buildscript { ext.testContainersVersion = '1.17.4' ext.jacksonVersion = '2.13.4' ext.jettyVersion = '9.4.46.v20220331' + ext.log4jVersion = '2.19.0' + ext.slf4jVersion = '1.7.32' + ext.logbackClassic = '1.2.11' + apply from: './repositories.gradle' buildscript.repositories.addAll(project.repositories) dependencies { @@ -55,6 +59,8 @@ project.ext.externalDependency = [ 'awsGlueSchemaRegistrySerde': 'software.amazon.glue:schema-registry-serde:1.1.10', 'awsMskIamAuth': 'software.amazon.msk:aws-msk-iam-auth:1.1.1', 'awsSecretsManagerJdbc': 'com.amazonaws.secretsmanager:aws-secretsmanager-jdbc:1.0.8', + 'awsPostgresIamAuth': 'software.amazon.jdbc:aws-advanced-jdbc-wrapper:1.0.0', + 'awsRds':'software.amazon.awssdk:rds:2.18.24', 'cacheApi' : 'javax.cache:cache-api:1.1.0', 'commonsCli': 'commons-cli:commons-cli:1.5.0', 'commonsIo': 'commons-io:commons-io:2.4', @@ -112,9 +118,11 @@ project.ext.externalDependency = [ 'kafkaAvroSerde': 'io.confluent:kafka-streams-avro-serde:5.5.1', 'kafkaAvroSerializer': 'io.confluent:kafka-avro-serializer:5.1.4', 'kafkaClients': 'org.apache.kafka:kafka-clients:2.3.0', - 'logbackClassic': 'ch.qos.logback:logback-classic:1.2.9', - 'log4jCore': 'org.apache.logging.log4j:log4j-core:2.19.0', - 'log4jApi': 'org.apache.logging.log4j:log4j-api:2.19.0', + 'logbackClassic': "ch.qos.logback:logback-classic:$logbackClassic", + 'slf4jApi': "org.slf4j:slf4j-api:$slf4jVersion", + 'log4jCore': "org.apache.logging.log4j:log4j-core:$log4jVersion", + 'log4jApi': "org.apache.logging.log4j:log4j-api:$log4jVersion", + 'log4j12Api': "org.slf4j:log4j-over-slf4j:$slf4jVersion", 'lombok': 'org.projectlombok:lombok:1.18.12', 'mariadbConnector': 'org.mariadb.jdbc:mariadb-java-client:2.6.0', 'mavenArtifact': "org.apache.maven:maven-artifact:$mavenVersion", @@ -193,15 +201,12 @@ configure(subprojects.findAll {! it.name.startsWith('spark-lineage') }) { exclude group: "io.netty", module: "netty" exclude group: "log4j", module: "log4j" exclude group: "org.springframework.boot", module: "spring-boot-starter-logging" - exclude group: "ch.qos.logback", module: "logback-classic" exclude group: "org.apache.logging.log4j", module: "log4j-to-slf4j" exclude group: "com.vaadin.external.google", module: "android-json" exclude group: "org.slf4j", module: "slf4j-reload4j" exclude group: "org.slf4j", module: "slf4j-log4j12" exclude group: "org.slf4j", module: "slf4j-nop" exclude group: "org.slf4j", module: "slf4j-ext" - exclude group: "org.slf4j", module: "jul-to-slf4j" - exclude group: "org.slf4j", module: "jcl-over-toslf4j" } } diff --git a/datahub-frontend/play.gradle b/datahub-frontend/play.gradle index 579449e9e39b16..f6ecd57534dfa6 100644 --- a/datahub-frontend/play.gradle +++ b/datahub-frontend/play.gradle @@ -55,13 +55,14 @@ dependencies { testImplementation externalDependency.playTest testCompile externalDependency.testng + implementation externalDependency.slf4jApi compileOnly externalDependency.lombok runtime externalDependency.guice runtime (externalDependency.playDocs) { exclude group: 'com.typesafe.akka', module: 'akka-http-core_2.12' } runtime externalDependency.playGuice - runtime externalDependency.logbackClassic + implementation externalDependency.logbackClassic annotationProcessor externalDependency.lombok } diff --git a/datahub-graphql-core/build.gradle b/datahub-graphql-core/build.gradle index aa13ce05d7d59e..528054833bb9aa 100644 --- a/datahub-graphql-core/build.gradle +++ b/datahub-graphql-core/build.gradle @@ -15,6 +15,7 @@ dependencies { compile externalDependency.antlr4 compile externalDependency.guava + implementation externalDependency.slf4jApi compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok diff --git a/datahub-ranger-plugin/build.gradle b/datahub-ranger-plugin/build.gradle index b3277a664af22f..a08d3f2b1e4c9c 100644 --- a/datahub-ranger-plugin/build.gradle +++ b/datahub-ranger-plugin/build.gradle @@ -28,7 +28,7 @@ dependencies { exclude group: "org.apache.htrace", module: "htrace-core4" } implementation externalDependency.hadoopCommon3 - implementation externalDependency.log4jApi + implementation externalDependency.log4j12Api constraints { implementation(externalDependency.woodstoxCore) { diff --git a/datahub-upgrade/build.gradle b/datahub-upgrade/build.gradle index 49872fa111d514..4d4d2b99390bcf 100644 --- a/datahub-upgrade/build.gradle +++ b/datahub-upgrade/build.gradle @@ -14,7 +14,8 @@ dependencies { exclude group: 'com.nimbusds', module: 'nimbus-jose-jwt' exclude group: "org.apache.htrace", module: "htrace-core4" } - compile externalDependency.lombok + implementation externalDependency.slf4jApi + compileOnly externalDependency.lombok compile externalDependency.picocli compile externalDependency.parquet compile externalDependency.springBeans diff --git a/datahub-web-react/src/app/analyticsDashboard/components/AnalyticsPage.tsx b/datahub-web-react/src/app/analyticsDashboard/components/AnalyticsPage.tsx index 76bd2e34eddeaa..440d16f518fe03 100644 --- a/datahub-web-react/src/app/analyticsDashboard/components/AnalyticsPage.tsx +++ b/datahub-web-react/src/app/analyticsDashboard/components/AnalyticsPage.tsx @@ -10,6 +10,7 @@ import { Message } from '../../shared/Message'; import { useListDomainsQuery } from '../../../graphql/domain.generated'; import filterSearchQuery from '../../search/utils/filterSearchQuery'; import { ANTD_GRAY } from '../../entity/shared/constants'; +import { useGetAuthenticatedUser } from '../../useGetAuthenticatedUser'; const HighlightGroup = styled.div` display: flex; @@ -46,6 +47,8 @@ const StyledSearchBar = styled(Input)` `; export const AnalyticsPage = () => { + const me = useGetAuthenticatedUser(); + const canManageDomains = me?.platformPrivileges?.createDomains; const { data: chartData, loading: chartLoading, error: chartError } = useGetAnalyticsChartsQuery(); const { data: highlightData, loading: highlightLoading, error: highlightError } = useGetHighlightsQuery(); const { @@ -53,6 +56,7 @@ export const AnalyticsPage = () => { error: domainError, data: domainData, } = useListDomainsQuery({ + skip: !canManageDomains, variables: { input: { start: 0, @@ -82,12 +86,11 @@ export const AnalyticsPage = () => { skip: domain === '' && query === '', }); + const isLoading = highlightLoading || chartLoading || domainLoading || metadataAnalyticsLoading; return ( <> + {isLoading && } - {highlightLoading && ( - - )} {highlightError && ( )} @@ -96,7 +99,6 @@ export const AnalyticsPage = () => { ))} <> - {chartLoading && } {chartError && ( )} @@ -107,7 +109,6 @@ export const AnalyticsPage = () => { ))} <> - {domainLoading && } {domainError && ( )} @@ -148,9 +149,6 @@ export const AnalyticsPage = () => { )} <> - {metadataAnalyticsLoading && ( - - )} {metadataAnalyticsError && ( )} @@ -165,7 +163,6 @@ export const AnalyticsPage = () => { ))} <> - {chartLoading && } {chartError && } {!chartLoading && chartData?.getAnalyticsCharts diff --git a/datahub-web-react/src/app/entity/mlFeatureTable/profile/features/MlFeatureTableFeatures.tsx b/datahub-web-react/src/app/entity/mlFeatureTable/profile/features/MlFeatureTableFeatures.tsx index 8f908e3a2e486b..7a3a933140e2d3 100644 --- a/datahub-web-react/src/app/entity/mlFeatureTable/profile/features/MlFeatureTableFeatures.tsx +++ b/datahub-web-react/src/app/entity/mlFeatureTable/profile/features/MlFeatureTableFeatures.tsx @@ -1,163 +1,23 @@ -import React, { useState } from 'react'; -import { Table, Typography } from 'antd'; -import { CheckSquareOutlined } from '@ant-design/icons'; -import { AlignType } from 'rc-table/lib/interface'; -import styled from 'styled-components'; -import { Link } from 'react-router-dom'; +import React from 'react'; -import MlFeatureDataTypeIcon from './MlFeatureDataTypeIcon'; -import { MlFeatureDataType, MlPrimaryKey, MlFeature } from '../../../../../types.generated'; +import { MlPrimaryKey, MlFeature } from '../../../../../types.generated'; import { GetMlFeatureTableQuery } from '../../../../../graphql/mlFeatureTable.generated'; -import { useBaseEntity, useRefetch } from '../../../shared/EntityContext'; +import { useBaseEntity } from '../../../shared/EntityContext'; import { notEmpty } from '../../../shared/utils'; -import TagTermGroup from '../../../../shared/tags/TagTermGroup'; -import SchemaDescriptionField from '../../../dataset/profile/schema/components/SchemaDescriptionField'; -import { useUpdateDescriptionMutation } from '../../../../../graphql/mutations.generated'; -import { useEntityRegistry } from '../../../../useEntityRegistry'; - -const FeaturesContainer = styled.div` - margin-bottom: 100px; -`; - -const defaultColumns = [ - { - title: 'Type', - dataIndex: 'dataType', - key: 'dataType', - width: 100, - align: 'left' as AlignType, - render: (dataType: MlFeatureDataType) => { - return ; - }, - }, -]; +import TableOfMlFeatures from './TableOfMlFeatures'; export default function MlFeatureTableFeatures() { const baseEntity = useBaseEntity(); - const refetch = useRefetch(); const featureTable = baseEntity?.mlFeatureTable; - const [updateDescription] = useUpdateDescriptionMutation(); - const entityRegistry = useEntityRegistry(); - - const [tagHoveredIndex, setTagHoveredIndex] = useState(undefined); - const features = + const features = ( featureTable?.properties && (featureTable?.properties?.mlFeatures || featureTable?.properties?.mlPrimaryKeys) ? [ ...(featureTable?.properties?.mlPrimaryKeys || []), ...(featureTable?.properties?.mlFeatures || []), ].filter(notEmpty) - : []; - - const onTagTermCell = (record: any, rowIndex: number | undefined) => ({ - onMouseEnter: () => { - setTagHoveredIndex(`${record.urn}-${rowIndex}`); - }, - onMouseLeave: () => { - setTagHoveredIndex(undefined); - }, - }); - - const nameColumn = { - title: 'Name', - dataIndex: 'name', - key: 'name', - width: 100, - render: (name: string, feature: MlFeature | MlPrimaryKey) => ( - - {name} - - ), - }; - - const descriptionColumn = { - title: 'Description', - dataIndex: 'description', - key: 'description', - render: (_, feature: MlFeature | MlPrimaryKey) => ( - - updateDescription({ - variables: { - input: { - description: updatedDescription, - resourceUrn: feature.urn, - }, - }, - }).then(refetch) - } - /> - ), - width: 300, - }; - - const tagColumn = { - width: 125, - title: 'Tags', - dataIndex: 'tags', - key: 'tags', - render: (_, feature: MlFeature | MlPrimaryKey, rowIndex: number) => ( - setTagHoveredIndex(undefined)} - entityUrn={feature.urn} - entityType={feature.type} - refetch={refetch} - /> - ), - onCell: onTagTermCell, - }; - - const termColumn = { - width: 125, - title: 'Terms', - dataIndex: 'glossaryTerms', - key: 'glossarTerms', - render: (_, feature: MlFeature | MlPrimaryKey, rowIndex: number) => ( - setTagHoveredIndex(undefined)} - entityUrn={feature.urn} - entityType={feature.type} - refetch={refetch} - /> - ), - onCell: onTagTermCell, - }; - - const primaryKeyColumn = { - title: 'Primary Key', - dataIndex: 'primaryKey', - key: 'primaryKey', - render: (_: any, record: MlFeature | MlPrimaryKey) => - record.__typename === 'MLPrimaryKey' ? : null, - width: 50, - }; - - const allColumns = [...defaultColumns, nameColumn, descriptionColumn, tagColumn, termColumn, primaryKeyColumn]; + : [] + ) as Array; - return ( - - {features && features.length > 0 && ( - `${record.dataType}-${record.name}`} - expandable={{ defaultExpandAllRows: true, expandRowByClick: true }} - pagination={false} - /> - )} - - ); + return ; } diff --git a/datahub-web-react/src/app/entity/mlFeatureTable/profile/features/TableOfMlFeatures.tsx b/datahub-web-react/src/app/entity/mlFeatureTable/profile/features/TableOfMlFeatures.tsx new file mode 100644 index 00000000000000..cf0bb808b3278d --- /dev/null +++ b/datahub-web-react/src/app/entity/mlFeatureTable/profile/features/TableOfMlFeatures.tsx @@ -0,0 +1,155 @@ +import React, { useState } from 'react'; +import { Table, Typography } from 'antd'; +import { CheckSquareOutlined } from '@ant-design/icons'; +import { AlignType } from 'rc-table/lib/interface'; +import styled from 'styled-components'; +import { Link } from 'react-router-dom'; + +import MlFeatureDataTypeIcon from './MlFeatureDataTypeIcon'; +import { MlFeatureDataType, MlPrimaryKey, MlFeature } from '../../../../../types.generated'; +import { useRefetch } from '../../../shared/EntityContext'; +import TagTermGroup from '../../../../shared/tags/TagTermGroup'; +import SchemaDescriptionField from '../../../dataset/profile/schema/components/SchemaDescriptionField'; +import { useUpdateDescriptionMutation } from '../../../../../graphql/mutations.generated'; +import { useEntityRegistry } from '../../../../useEntityRegistry'; + +const FeaturesContainer = styled.div` + margin-bottom: 100px; +`; + +const defaultColumns = [ + { + title: 'Type', + dataIndex: 'dataType', + key: 'dataType', + width: 100, + align: 'left' as AlignType, + render: (dataType: MlFeatureDataType) => { + return ; + }, + }, +]; + +type Props = { + features: Array; +}; + +export default function TableOfMlFeatures({ features }: Props) { + const refetch = useRefetch(); + const [updateDescription] = useUpdateDescriptionMutation(); + const entityRegistry = useEntityRegistry(); + + const [tagHoveredIndex, setTagHoveredIndex] = useState(undefined); + + const onTagTermCell = (record: any, rowIndex: number | undefined) => ({ + onMouseEnter: () => { + setTagHoveredIndex(`${record.urn}-${rowIndex}`); + }, + onMouseLeave: () => { + setTagHoveredIndex(undefined); + }, + }); + + const nameColumn = { + title: 'Name', + dataIndex: 'name', + key: 'name', + width: 100, + render: (name: string, feature: MlFeature | MlPrimaryKey) => ( + + {name} + + ), + }; + + const descriptionColumn = { + title: 'Description', + dataIndex: 'description', + key: 'description', + render: (_, feature: MlFeature | MlPrimaryKey) => ( + + updateDescription({ + variables: { + input: { + description: updatedDescription, + resourceUrn: feature.urn, + }, + }, + }).then(refetch) + } + /> + ), + width: 300, + }; + + const tagColumn = { + width: 125, + title: 'Tags', + dataIndex: 'tags', + key: 'tags', + render: (_, feature: MlFeature | MlPrimaryKey, rowIndex: number) => ( + setTagHoveredIndex(undefined)} + entityUrn={feature.urn} + entityType={feature.type} + refetch={refetch} + /> + ), + onCell: onTagTermCell, + }; + + const termColumn = { + width: 125, + title: 'Terms', + dataIndex: 'glossaryTerms', + key: 'glossaryTerms', + render: (_, feature: MlFeature | MlPrimaryKey, rowIndex: number) => ( + setTagHoveredIndex(undefined)} + entityUrn={feature.urn} + entityType={feature.type} + refetch={refetch} + /> + ), + onCell: onTagTermCell, + }; + + const primaryKeyColumn = { + title: 'Primary Key', + dataIndex: 'primaryKey', + key: 'primaryKey', + render: (_: any, record: MlFeature | MlPrimaryKey) => + record.__typename === 'MLPrimaryKey' ? : null, + width: 50, + }; + + const allColumns = [...defaultColumns, nameColumn, descriptionColumn, tagColumn, termColumn, primaryKeyColumn]; + + return ( + + {features && features.length > 0 && ( +
`${record.dataType}-${record.name}`} + expandable={{ defaultExpandAllRows: true, expandRowByClick: true }} + pagination={false} + /> + )} + + ); +} diff --git a/datahub-web-react/src/app/entity/mlModel/profile/MlModelFeaturesTab.tsx b/datahub-web-react/src/app/entity/mlModel/profile/MlModelFeaturesTab.tsx index 65c6a0c9b84a72..b8dc64793c2256 100644 --- a/datahub-web-react/src/app/entity/mlModel/profile/MlModelFeaturesTab.tsx +++ b/datahub-web-react/src/app/entity/mlModel/profile/MlModelFeaturesTab.tsx @@ -1,15 +1,17 @@ import React from 'react'; -import { EntityType } from '../../../../types.generated'; +import { MlPrimaryKey, MlFeature } from '../../../../types.generated'; import { useBaseEntity } from '../../shared/EntityContext'; import { GetMlModelQuery } from '../../../../graphql/mlModel.generated'; -import { EntityList } from '../../shared/tabs/Entity/components/EntityList'; +import TableOfMlFeatures from '../../mlFeatureTable/profile/features/TableOfMlFeatures'; export default function MlModelFeaturesTab() { const entity = useBaseEntity() as GetMlModelQuery; const model = entity && entity.mlModel; - const features = model?.features?.relationships.map((relationship) => relationship.entity); + const features = model?.features?.relationships.map((relationship) => relationship.entity) as Array< + MlFeature | MlPrimaryKey + >; - return ; + return ; } diff --git a/datahub-web-react/src/app/recommendations/renderer/component/EntityNameList.tsx b/datahub-web-react/src/app/recommendations/renderer/component/EntityNameList.tsx index 6323a8ac74bb51..73819d1f9fcf5d 100644 --- a/datahub-web-react/src/app/recommendations/renderer/component/EntityNameList.tsx +++ b/datahub-web-react/src/app/recommendations/renderer/component/EntityNameList.tsx @@ -13,7 +13,7 @@ const StyledCheckbox = styled(Checkbox)` `; const StyledList = styled(List)` - overflow-y: scroll; + overflow-y: auto; height: 100%; margin-top: -1px; box-shadow: ${(props) => props.theme.styles['box-shadow']}; diff --git a/datahub-web-react/src/app/search/AdvancedSearchFilters.tsx b/datahub-web-react/src/app/search/AdvancedSearchFilters.tsx index f4e70e1b9007d4..ab242b49fed69f 100644 --- a/datahub-web-react/src/app/search/AdvancedSearchFilters.tsx +++ b/datahub-web-react/src/app/search/AdvancedSearchFilters.tsx @@ -11,11 +11,9 @@ import { FIELDS_THAT_USE_CONTAINS_OPERATOR, UnionType } from './utils/constants' import { AdvancedSearchAddFilterSelect } from './AdvancedSearchAddFilterSelect'; export const SearchFilterWrapper = styled.div` - min-height: 100%; + flex: 1; + padding: 6px 12px 10px 12px; overflow: auto; - margin-top: 6px; - margin-left: 12px; - margin-right: 12px; &::-webkit-scrollbar { height: 12px; diff --git a/datahub-web-react/src/app/search/SearchFiltersSection.tsx b/datahub-web-react/src/app/search/SearchFiltersSection.tsx index a2fca0605b4ec3..cca78ae2ae4923 100644 --- a/datahub-web-react/src/app/search/SearchFiltersSection.tsx +++ b/datahub-web-react/src/app/search/SearchFiltersSection.tsx @@ -17,7 +17,8 @@ type Props = { }; const FiltersContainer = styled.div` - display: block; + display: flex; + flex-direction: column; max-width: 260px; min-width: 260px; overflow-wrap: break-word; @@ -45,7 +46,8 @@ const FiltersHeader = styled.div` `; const SearchFilterContainer = styled.div` - padding-top: 10px; + flex: 1; + overflow: auto; `; // This component renders the entire filters section that allows toggling diff --git a/datahub-web-react/src/app/search/SimpleSearchFilters.tsx b/datahub-web-react/src/app/search/SimpleSearchFilters.tsx index e6b4da2f455310..654341be7715c5 100644 --- a/datahub-web-react/src/app/search/SimpleSearchFilters.tsx +++ b/datahub-web-react/src/app/search/SimpleSearchFilters.tsx @@ -7,6 +7,7 @@ import { SimpleSearchFilter } from './SimpleSearchFilter'; const TOP_FILTERS = ['degree', 'entity', 'tags', 'glossaryTerms', 'domains', 'owners']; export const SearchFilterWrapper = styled.div` + padding-top: 10px; max-height: 100%; overflow: auto; diff --git a/datahub-web-react/src/graphql/mlModel.graphql b/datahub-web-react/src/graphql/mlModel.graphql index 5d60be86a31c2b..91280f0904c558 100644 --- a/datahub-web-react/src/graphql/mlModel.graphql +++ b/datahub-web-react/src/graphql/mlModel.graphql @@ -8,7 +8,21 @@ query getMLModel($urn: String!) { ...partialLineageResults } features: relationships(input: { types: ["Consumes"], direction: OUTGOING, start: 0, count: 100 }) { - ...fullRelationshipResults + start + count + total + relationships { + type + direction + entity { + ... on MLFeature { + ...nonRecursiveMLFeature + } + ... on MLPrimaryKey { + ...nonRecursiveMLPrimaryKey + } + } + } } } } diff --git a/docker/datahub-actions/env/docker.env b/docker/datahub-actions/env/docker.env index 48fb0b080d2436..363d9bc578b426 100644 --- a/docker/datahub-actions/env/docker.env +++ b/docker/datahub-actions/env/docker.env @@ -20,3 +20,18 @@ KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT # KAFKA_PROPERTIES_SSL_KEYSTORE_PASSWORD=keystore_password # KAFKA_PROPERTIES_SSL_KEY_PASSWORD=keystore_password # KAFKA_PROPERTIES_SSL_TRUSTSTORE_PASSWORD=truststore_password + +# The following env vars are meant to be passed through from the Host System +# to configure the Slack and Teams Actions +# _ENABLED flags need to be set to "true" case sensitive for the action to be enabled +DATAHUB_ACTIONS_SLACK_ENABLED +DATAHUB_ACTIONS_SLACK_DATAHUB_BASE_URL +DATAHUB_ACTIONS_SLACK_BOT_TOKEN +DATAHUB_ACTIONS_SLACK_SIGNING_SECRET +DATAHUB_ACTIONS_SLACK_CHANNEL +DATAHUB_ACTIONS_SLACK_SUPPRESS_SYSTEM_ACTIVITY + +DATAHUB_ACTIONS_TEAMS_ENABLED +DATAHUB_ACTIONS_TEAMS_DATAHUB_BASE_URL +DATAHUB_ACTIONS_TEAMS_WEBHOOK_URL +DATAHUB_ACTIONS_TEAMS_SUPPRESS_SYSTEM_ACTIVITY diff --git a/docker/datahub-gms/env/docker.postgres.env b/docker/datahub-gms/env/docker.postgres.env index 0f4f78dccb77b8..f99134ebb02388 100644 --- a/docker/datahub-gms/env/docker.postgres.env +++ b/docker/datahub-gms/env/docker.postgres.env @@ -3,6 +3,9 @@ EBEAN_DATASOURCE_PASSWORD=datahub EBEAN_DATASOURCE_HOST=postgres:5432 EBEAN_DATASOURCE_URL=jdbc:postgresql://postgres:5432/datahub EBEAN_DATASOURCE_DRIVER=org.postgresql.Driver +# Uncomment EBEAN_POSTGRES_USE_AWS_IAM_AUTH below to add support for IAM authentication for Postgres. +# Password is not required when accessing Postgres using IAM auth. It can be replaced by dummy password +# EBEAN_POSTGRES_USE_AWS_IAM_AUTH=true KAFKA_BOOTSTRAP_SERVER=broker:29092 KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 ELASTICSEARCH_HOST=elasticsearch diff --git a/docker/mariadb/init.sql b/docker/mariadb/init.sql index 084fdc93a3717a..c4132575cf442c 100644 --- a/docker/mariadb/init.sql +++ b/docker/mariadb/init.sql @@ -11,6 +11,8 @@ create table metadata_aspect_v2 ( constraint pk_metadata_aspect_v2 primary key (urn,aspect,version) ); +create index timeIndex ON metadata_aspect_v2 (createdon); + insert into metadata_aspect_v2 (urn, aspect, version, metadata, createdon, createdby) values( 'urn:li:corpuser:datahub', 'corpUserInfo', diff --git a/docker/mysql-setup/init.sql b/docker/mysql-setup/init.sql index 6bd7133a359a89..78098af4648bce 100644 --- a/docker/mysql-setup/init.sql +++ b/docker/mysql-setup/init.sql @@ -12,7 +12,8 @@ create table if not exists metadata_aspect_v2 ( createdon datetime(6) not null, createdby varchar(255) not null, createdfor varchar(255), - constraint pk_metadata_aspect_v2 primary key (urn,aspect,version) + constraint pk_metadata_aspect_v2 primary key (urn,aspect,version), + INDEX timeIndex (createdon) ); -- create default records for datahub user if not exists diff --git a/docker/mysql/init.sql b/docker/mysql/init.sql index fa9d856f499e4e..97ae3ea1467445 100644 --- a/docker/mysql/init.sql +++ b/docker/mysql/init.sql @@ -8,7 +8,8 @@ CREATE TABLE metadata_aspect_v2 ( createdon datetime(6) NOT NULL, createdby VARCHAR(255) NOT NULL, createdfor VARCHAR(255), - CONSTRAINT pk_metadata_aspect_v2 PRIMARY KEY (urn,aspect,version) + constraint pk_metadata_aspect_v2 primary key (urn,aspect,version), + INDEX timeIndex (createdon) ) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; INSERT INTO metadata_aspect_v2 (urn, aspect, version, metadata, createdon, createdby) VALUES( diff --git a/docker/postgres-setup/init.sql b/docker/postgres-setup/init.sql index e7c515e7385acc..12fff7aec7fe6f 100644 --- a/docker/postgres-setup/init.sql +++ b/docker/postgres-setup/init.sql @@ -11,6 +11,8 @@ CREATE TABLE IF NOT EXISTS metadata_aspect_v2 ( CONSTRAINT pk_metadata_aspect_v2 PRIMARY KEY (urn, aspect, version) ); +create index timeIndex ON metadata_aspect_v2 (createdon); + -- create default records for datahub user if not exists CREATE TEMP TABLE temp_metadata_aspect_v2 AS TABLE metadata_aspect_v2; INSERT INTO temp_metadata_aspect_v2 (urn, aspect, version, metadata, createdon, createdby) VALUES( diff --git a/docker/postgres/init.sql b/docker/postgres/init.sql index 72298ed4b6726d..4da8adaf8a6da0 100644 --- a/docker/postgres/init.sql +++ b/docker/postgres/init.sql @@ -11,6 +11,8 @@ create table metadata_aspect_v2 ( constraint pk_metadata_aspect_v2 primary key (urn,aspect,version) ); +create index timeIndex ON metadata_aspect_v2 (createdon); + insert into metadata_aspect_v2 (urn, aspect, version, metadata, createdon, createdby) values( 'urn:li:corpuser:datahub', 'corpUserInfo', diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml index e87f55c0086c81..942a7b1f4c952c 100644 --- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml @@ -33,6 +33,16 @@ services: - DATAHUB_SYSTEM_CLIENT_ID=__datahub_system - DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing - KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT + - DATAHUB_ACTIONS_SLACK_ENABLED + - DATAHUB_ACTIONS_SLACK_DATAHUB_BASE_URL + - DATAHUB_ACTIONS_SLACK_BOT_TOKEN + - DATAHUB_ACTIONS_SLACK_SIGNING_SECRET + - DATAHUB_ACTIONS_SLACK_CHANNEL + - DATAHUB_ACTIONS_SLACK_SUPPRESS_SYSTEM_ACTIVITY + - DATAHUB_ACTIONS_TEAMS_ENABLED + - DATAHUB_ACTIONS_TEAMS_DATAHUB_BASE_URL + - DATAHUB_ACTIONS_TEAMS_WEBHOOK_URL + - DATAHUB_ACTIONS_TEAMS_SUPPRESS_SYSTEM_ACTIVITY hostname: actions image: acryldata/datahub-actions:${ACTIONS_VERSION:-head} restart: on-failure:5 diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml index e6f6b73396de39..7917b845c91d54 100644 --- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml @@ -33,6 +33,16 @@ services: - DATAHUB_SYSTEM_CLIENT_ID=__datahub_system - DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing - KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT + - DATAHUB_ACTIONS_SLACK_ENABLED + - DATAHUB_ACTIONS_SLACK_DATAHUB_BASE_URL + - DATAHUB_ACTIONS_SLACK_BOT_TOKEN + - DATAHUB_ACTIONS_SLACK_SIGNING_SECRET + - DATAHUB_ACTIONS_SLACK_CHANNEL + - DATAHUB_ACTIONS_SLACK_SUPPRESS_SYSTEM_ACTIVITY + - DATAHUB_ACTIONS_TEAMS_ENABLED + - DATAHUB_ACTIONS_TEAMS_DATAHUB_BASE_URL + - DATAHUB_ACTIONS_TEAMS_WEBHOOK_URL + - DATAHUB_ACTIONS_TEAMS_SUPPRESS_SYSTEM_ACTIVITY hostname: actions image: acryldata/datahub-actions:${ACTIONS_VERSION:-head} restart: on-failure:5 diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index 486740bcf418ad..3f6bd5a348c326 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -35,6 +35,16 @@ services: - DATAHUB_SYSTEM_CLIENT_ID=__datahub_system - DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing - KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT + - DATAHUB_ACTIONS_SLACK_ENABLED + - DATAHUB_ACTIONS_SLACK_DATAHUB_BASE_URL + - DATAHUB_ACTIONS_SLACK_BOT_TOKEN + - DATAHUB_ACTIONS_SLACK_SIGNING_SECRET + - DATAHUB_ACTIONS_SLACK_CHANNEL + - DATAHUB_ACTIONS_SLACK_SUPPRESS_SYSTEM_ACTIVITY + - DATAHUB_ACTIONS_TEAMS_ENABLED + - DATAHUB_ACTIONS_TEAMS_DATAHUB_BASE_URL + - DATAHUB_ACTIONS_TEAMS_WEBHOOK_URL + - DATAHUB_ACTIONS_TEAMS_SUPPRESS_SYSTEM_ACTIVITY hostname: actions image: acryldata/datahub-actions:${ACTIONS_VERSION:-head} restart: on-failure:5 diff --git a/docker/quickstart/generate_docker_quickstart.py b/docker/quickstart/generate_docker_quickstart.py index 3a54d8c21155dd..4888adda2d0382 100644 --- a/docker/quickstart/generate_docker_quickstart.py +++ b/docker/quickstart/generate_docker_quickstart.py @@ -1,11 +1,11 @@ import os +from collections import OrderedDict +from collections.abc import Mapping + import click import yaml -from collections.abc import Mapping from dotenv import dotenv_values from yaml import Loader -from collections import OrderedDict - # Generates a merged docker-compose file with env variables inlined. # Usage: python3 docker_compose_cli_gen.py ../docker-compose.yml ../docker-compose.override.yml ../docker-compose-gen.yml @@ -54,7 +54,10 @@ def modify_docker_config(base_path, docker_yaml_config): # 5. Append to an "environment" block to YAML for key, value in env_vars.items(): - service["environment"].append(f"{key}={value}") + if value is not None: + service["environment"].append(f"{key}={value}") + else: + service["environment"].append(f"{key}") # 6. Delete the "env_file" value del service["env_file"] diff --git a/docs-website/src/pages/_components/Logos/index.js b/docs-website/src/pages/_components/Logos/index.js index da8484fec7eb72..0046cb6094288a 100644 --- a/docs-website/src/pages/_components/Logos/index.js +++ b/docs-website/src/pages/_components/Logos/index.js @@ -151,6 +151,11 @@ const companiesByIndustry = [ { name: "And More", companies: [ + { + name: "Wikimedia Foundation", + imageUrl: "/img/logos/companies/wikimedia-foundation.png", + imageSize: "medium", + }, { name: "Cabify", imageUrl: "/img/logos/companies/cabify.png", diff --git a/docs-website/static/img/logos/companies/wikimedia-foundation.png b/docs-website/static/img/logos/companies/wikimedia-foundation.png new file mode 100644 index 00000000000000..c4119fab23be1d Binary files /dev/null and b/docs-website/static/img/logos/companies/wikimedia-foundation.png differ diff --git a/docs/actions/README.md b/docs/actions/README.md index fa0c6cb4b71efe..23596ec67514e5 100644 --- a/docs/actions/README.md +++ b/docs/actions/README.md @@ -203,6 +203,8 @@ Some pre-included Actions include - [Hello World](actions/hello_world.md) - [Executor](actions/executor.md) +- [Slack](actions/slack.md) +- [Microsoft Teams](actions/teams.md) ## Development diff --git a/docs/actions/actions/slack.md b/docs/actions/actions/slack.md new file mode 100644 index 00000000000000..6416fdb2665538 --- /dev/null +++ b/docs/actions/actions/slack.md @@ -0,0 +1,282 @@ +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + +# Slack + + + + +# Slack + +| | | +| --- | --- | +| **Status** | ![Incubating](https://img.shields.io/badge/support%20status-incubating-blue) | +| **Version Requirements** | ![Minimum Version Requirements](https://img.shields.io/badge/acryl_datahub_actions-v0.0.9+-green.svg) | + + +## Overview + +This Action integrates DataHub with Slack to send notifications to a configured Slack channel in your workspace. + +### Capabilities + +- Sending notifications of important events to a Slack channel + - Adding or Removing a tag from an entity (dataset, dashboard etc.) + - Updating documentation at the entity or field (column) level. + - Adding or Removing ownership from an entity (dataset, dashboard, etc.) + - Creating a Domain + - and many more. + +### User Experience + +On startup, the action will produce a welcome message that looks like the one below. +![](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/slack/slack_welcome_message.png) + + +On each event, the action will produce a notification message that looks like the one below. +![](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/slack/slack_notification_message.png) + +Watch the townhall demo to see this in action: +[![Slack Action Demo](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/slack/slack_demo_image.png)](https://www.youtube.com/watch?v=BlCLhG8lGoY&t=2998s) + +### Supported Events + +- `EntityChangeEvent_v1` +- Currently, the `MetadataChangeLog_v1` event is **not** processed by the Action. + +## Action Quickstart + +### Prerequisites + +Ensure that you have configured the Slack App in your Slack workspace. + +#### Install the DataHub Slack App into your Slack workspace + +The following steps should be performed by a Slack Workspace Admin. +- Navigate to https://api.slack.com/apps/ +- Click Create New App +- Use “From an app manifest” option +- Select your workspace +- Paste this Manifest in YAML. We suggest changing the name and `display_name` to be `DataHub App YOUR_TEAM_NAME` but this is not required. This name will show up in your Slack workspace. +```yml +display_information: + name: DataHub App + description: An app to integrate DataHub with Slack + background_color: "#000000" +features: + bot_user: + display_name: DataHub App + always_online: false +oauth_config: + scopes: + bot: + - channels:history + - channels:read + - chat:write + - commands + - groups:read + - im:read + - mpim:read + - team:read + - users:read + - users:read.email +settings: + org_deploy_enabled: false + socket_mode_enabled: false + token_rotation_enabled: false +``` + +- Confirm you see the Basic Information Tab + +![](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/slack/slack_basic_info.png) + +- Click **Install to Workspace** +- It will show you permissions the Slack App is asking for, what they mean and a default channel in which you want to add the slack app + - Note that the Slack App will only be able to post in channels that the app has been added to. This is made clear by slack’s Authentication screen also. +- Select the channel you'd like notifications to go to and click **Allow** +- Go to the DataHub App page + - You can find your workspace's list of apps at https://api.slack.com/apps/ + +#### Getting Credentials and Configuration + +Now that you've created your app and installed it in your workspace, you need a few pieces of information before you can activate your Slack action. + +#### 1. The Signing Secret + +On your app's Basic Information page, you will see a App Credentials area. Take note of the Signing Secret information, you will need it later. + +![](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/slack/slack_app_credentials.png) + + +#### 2. The Bot Token + +Navigate to the **OAuth & Permissions** Tab + +![](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/slack/slack_oauth_and_permissions.png) + +Here you'll find a “Bot User OAuth Token” which DataHub will need to communicate with your Slack workspace through the bot. + +#### 3. The Slack Channel + +Finally, you need to figure out which Slack channel you will send notifications to. Perhaps it should be called #datahub-notifications or maybe, #data-notifications or maybe you already have a channel where important notifications about datasets and pipelines are already being routed to. Once you have decided what channel to send notifications to, make sure to add the app to the channel. + +![](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/slack/slack_channel_add_app.png) + +Next, figure out the channel id for this Slack channel. You can find it in the About section for the channel if you scroll to the very bottom of the app. +![](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/slack/slack_channel_id.png) + +Alternately, if you are on the browser, you can figure it out from the URL. e.g. for the troubleshoot channel in OSS DataHub slack + +![](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/slack/slack_channel_url.png) + +- Notice `TUMKD5EGJ/C029A3M079U` in the URL + - Channel ID = `C029A3M079U` from above + + +In the next steps, we'll show you how to configure the Slack Action based on the credentials and configuration values that you have collected. + +### Installation Instructions (Deployment specific) + +#### Managed DataHub + +Head over to the [Configuring Notifications](../../managed-datahub/saas-slack-setup.md#configuring-notifications) section in the Managed DataHub guide to configure Slack notifications for your Managed DataHub instance. + + +#### Quickstart + +If you are running DataHub using the docker quickstart option, there are no additional software installation steps. The `datahub-actions` container comes pre-installed with the Slack action. + +All you need to do is export a few environment variables to activate and configure the integration. See below for the list of environment variables to export. + +| Env Variable | Required for Integration | Purpose | +| --- | --- | --- | +| DATAHUB_ACTIONS_SLACK_ENABLED | ✅ | Set to "true" to enable the Slack action | +| DATAHUB_ACTIONS_SLACK_SIGNING_SECRET | ✅ | Set to the [Slack Signing Secret](#1-the-signing-secret) that you configured in the pre-requisites step above | +| DATAHUB_ACTIONS_SLACK_BOT_TOKEN | ✅ | Set to the [Bot User OAuth Token](#2-the-bot-token) that you configured in the pre-requisites step above | +| DATAHUB_ACTIONS_SLACK_CHANNEL | ✅ | Set to the [Slack Channel ID](#3-the-slack-channel) that you want the action to send messages to | +| DATAHUB_ACTIONS_DATAHUB_BASE_URL | ❌ | Defaults to "http://localhost:9002". Set to the location where your DataHub UI is running. On a local quickstart this is usually "http://localhost:9002", so you shouldn't need to modify this | + +:::note + +You will have to restart the `datahub-actions` docker container after you have exported these environment variables if this is the first time. The simplest way to do it is via the Docker Desktop UI, or by just issuing a `datahub docker quickstart --stop && datahub docker quickstart` command to restart the whole instance. + +::: + + +For example: +```shell +export DATAHUB_ACTIONS_SLACK_ENABLED=true +export DATAHUB_ACTIONS_SLACK_SIGNING_SECRET= +.... +export DATAHUB_ACTIONS_SLACK_CHANNEL= + +datahub docker quickstart --stop && datahub docker quickstart +``` + +#### k8s / helm + +Similar to the quickstart scenario, there are no specific software installation steps. The `datahub-actions` container comes pre-installed with the Slack action. You just need to export a few environment variables and make them available to the `datahub-actions` container to activate and configure the integration. See below for the list of environment variables to export. + +| Env Variable | Required for Integration | Purpose | +| --- | --- | --- | +| DATAHUB_ACTIONS_SLACK_ENABLED | ✅ | Set to "true" to enable the Slack action | +| DATAHUB_ACTIONS_SLACK_SIGNING_SECRET | ✅ | Set to the [Slack Signing Secret](#1-the-signing-secret) that you configured in the pre-requisites step above | +| DATAHUB_ACTIONS_SLACK_BOT_TOKEN | ✅ | Set to the [Bot User OAuth Token](#2-the-bot-token) that you configured in the pre-requisites step above | +| DATAHUB_ACTIONS_SLACK_CHANNEL | ✅ | Set to the [Slack Channel ID](#3-the-slack-channel) that you want the action to send messages to | +| DATAHUB_ACTIONS_DATAHUB_BASE_URL | ✅| Set to the location where your DataHub UI is running. For example, if your DataHub UI is hosted at "https://datahub.my-company.biz", set this to "https://datahub.my-company.biz"| + + +#### Bare Metal - CLI or Python-based + +If you are using the `datahub-actions` library directly from Python, or the `datahub-actions` cli directly, then you need to first install the `slack` action plugin in your Python virtualenv. + +``` +pip install "datahub-actions[slack]" +``` + +Then run the action with a configuration file that you have modified to capture your credentials and configuration. + +##### Sample Slack Action Configuration File + +```yml +name: datahub_slack_action +enabled: true +source: + type: "kafka" + config: + connection: + bootstrap: ${KAFKA_BOOTSTRAP_SERVER:-localhost:9092} + schema_registry_url: ${SCHEMA_REGISTRY_URL:-http://localhost:8081} + topic_routes: + mcl: ${METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME:-MetadataChangeLog_Versioned_v1} + pe: ${PLATFORM_EVENT_TOPIC_NAME:-PlatformEvent_v1} + +## 3a. Optional: Filter to run on events (map) +# filter: +# event_type: +# event: +# # Filter event fields by exact-match +# + +# 3b. Optional: Custom Transformers to run on events (array) +# transform: +# - type: +# config: +# # Transformer-specific configs (map) + +action: + type: slack + config: + # Action-specific configs (map) + base_url: ${DATAHUB_ACTIONS_SLACK_DATAHUB_BASE_URL:-http://localhost:9002} + bot_token: ${DATAHUB_ACTIONS_SLACK_BOT_TOKEN} + signing_secret: ${DATAHUB_ACTIONS_SLACK_SIGNING_SECRET} + default_channel: ${DATAHUB_ACTIONS_SLACK_CHANNEL} + suppress_system_activity: ${DATAHUB_ACTIONS_SLACK_SUPPRESS_SYSTEM_ACTIVITY:-true} + +datahub: + server: "http://${DATAHUB_GMS_HOST:-localhost}:${DATAHUB_GMS_PORT:-8080}" + +``` + +##### Slack Action Configuration Parameters + +| Field | Required | Default | Description | +| --- | --- | --- | --- | +| `base_url` | ❌| `False` | Whether to print events in upper case. | +| `signing_secret` | ✅ | | Set to the [Slack Signing Secret](#1-the-signing-secret) that you configured in the pre-requisites step above | +| `bot_token` | ✅ | | Set to the [Bot User OAuth Token](#2-the-bot-token) that you configured in the pre-requisites step above | +| `default_channel` | ✅ | | Set to the [Slack Channel ID](#3-the-slack-channel) that you want the action to send messages to | +| `suppress_system_activity` | ❌ | `True` | Set to `False` if you want to get low level system activity events, e.g. when datasets are ingested, etc. Note: this will currently result in a very spammy Slack notifications experience, so this is not recommended to be changed. | + + +## Troubleshooting + +If things are configured correctly, you should see logs on the `datahub-actions` container that indicate success in enabling and running the Slack action. + +```shell +docker logs datahub-datahub-actions-1 + +... +[2022-12-04 07:07:53,804] INFO {datahub_actions.plugin.action.slack.slack:96} - Slack notification action configured with bot_token=SecretStr('**********') signing_secret=SecretStr('**********') default_channel='C04CZUSSR5X' base_url='http://localhost:9002' suppress_system_activity=True +[2022-12-04 07:07:54,506] WARNING {datahub_actions.cli.actions:103} - Skipping pipeline datahub_teams_action as it is not enabled +[2022-12-04 07:07:54,506] INFO {datahub_actions.cli.actions:119} - Action Pipeline with name 'ingestion_executor' is now running. +[2022-12-04 07:07:54,507] INFO {datahub_actions.cli.actions:119} - Action Pipeline with name 'datahub_slack_action' is now running. +... +``` + + +If the Slack action was not enabled, you would see messages indicating that. +e.g. the following logs below show that neither the Slack or Teams action were enabled. + +```shell +docker logs datahub-datahub-actions-1 + +.... +No user action configurations found. Not starting user actions. +[2022-12-04 06:45:27,509] INFO {datahub_actions.cli.actions:76} - DataHub Actions version: unavailable (installed editable via git) +[2022-12-04 06:45:27,647] WARNING {datahub_actions.cli.actions:103} - Skipping pipeline datahub_slack_action as it is not enabled +[2022-12-04 06:45:27,649] WARNING {datahub_actions.cli.actions:103} - Skipping pipeline datahub_teams_action as it is not enabled +[2022-12-04 06:45:27,649] INFO {datahub_actions.cli.actions:119} - Action Pipeline with name 'ingestion_executor' is now running. +... + +``` \ No newline at end of file diff --git a/docs/actions/actions/teams.md b/docs/actions/actions/teams.md new file mode 100644 index 00000000000000..55f0c72dff6403 --- /dev/null +++ b/docs/actions/actions/teams.md @@ -0,0 +1,184 @@ +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + +# Microsoft Teams + + + +| | | +| --- | --- | +| **Status** | ![Incubating](https://img.shields.io/badge/support%20status-incubating-blue) | +| **Version Requirements** | ![Minimum Version Requirements](https://img.shields.io/badge/acryl_datahub_actions-v0.0.9+-green.svg) | + +## Overview + +This Action integrates DataHub with Microsoft Teams to send notifications to a configured Teams channel in your workspace. + +### Capabilities + +- Sending notifications of important events to a Teams channel + - Adding or Removing a tag from an entity (dataset, dashboard etc.) + - Updating documentation at the entity or field (column) level. + - Adding or Removing ownership from an entity (dataset, dashboard, etc.) + - Creating a Domain + - and many more. + +### User Experience + +On startup, the action will produce a welcome message that looks like the one below. +![](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/teams/teams_welcome_message.png) + + +On each event, the action will produce a notification message that looks like the one below. +![](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/teams/teams_notification_message.png) + +Watch the townhall demo to see this in action: +[![Teams Action Demo](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/teams/teams_demo_image.png)](https://www.youtube.com/watch?v=BlCLhG8lGoY&t=2998s) + + +### Supported Events + +- `EntityChangeEvent_v1` +- Currently, the `MetadataChangeLog_v1` event is **not** processed by the Action. + +## Action Quickstart + +### Prerequisites + +Ensure that you have configured an incoming webhook in your Teams channel. + +Follow the guide [here](https://learn.microsoft.com/en-us/microsoftteams/platform/webhooks-and-connectors/how-to/add-incoming-webhook) to set it up. + +Take note of the incoming webhook url as you will need to use that to configure the Team action. + +### Installation Instructions (Deployment specific) + +#### Quickstart + +If you are running DataHub using the docker quickstart option, there are no additional software installation steps. The `datahub-actions` container comes pre-installed with the Teams action. + +All you need to do is export a few environment variables to activate and configure the integration. See below for the list of environment variables to export. + +| Env Variable | Required for Integration | Purpose | +| --- | --- | --- | +| DATAHUB_ACTIONS_TEAMS_ENABLED | ✅ | Set to "true" to enable the Teams action | +| DATAHUB_ACTIONS_TEAMS_WEBHOOK_URL | ✅ | Set to the incoming webhook url that you configured in the [pre-requisites step](#prerequisites) above | +| DATAHUB_ACTIONS_DATAHUB_BASE_URL | ❌ | Defaults to "http://localhost:9002". Set to the location where your DataHub UI is running. On a local quickstart this is usually "http://localhost:9002", so you shouldn't need to modify this | + +:::note + +You will have to restart the `datahub-actions` docker container after you have exported these environment variables if this is the first time. The simplest way to do it is via the Docker Desktop UI, or by just issuing a `datahub docker quickstart --stop && datahub docker quickstart` command to restart the whole instance. + +::: + + +For example: +```shell +export DATAHUB_ACTIONS_TEAMS_ENABLED=true +export DATAHUB_ACTIONS_TEAMS_WEBHOOK_URL= + +datahub docker quickstart --stop && datahub docker quickstart +``` + +#### k8s / helm + +Similar to the quickstart scenario, there are no specific software installation steps. The `datahub-actions` container comes pre-installed with the Teams action. You just need to export a few environment variables and make them available to the `datahub-actions` container to activate and configure the integration. See below for the list of environment variables to export. + +| Env Variable | Required for Integration | Purpose | +| --- | --- | --- | +| DATAHUB_ACTIONS_TEAMS_ENABLED | ✅ | Set to "true" to enable the Teams action | +| DATAHUB_ACTIONS_TEAMS_WEBHOOK_URL | ✅ | Set to the incoming webhook url that you configured in the [pre-requisites step](#prerequisites) above | +| DATAHUB_ACTIONS_DATAHUB_BASE_URL | ✅| Set to the location where your DataHub UI is running. For example, if your DataHub UI is hosted at "https://datahub.my-company.biz", set this to "https://datahub.my-company.biz"| + + +#### Bare Metal - CLI or Python-based + +If you are using the `datahub-actions` library directly from Python, or the `datahub-actions` cli directly, then you need to first install the `teams` action plugin in your Python virtualenv. + +``` +pip install "datahub-actions[teams]" +``` + +Then run the action with a configuration file that you have modified to capture your credentials and configuration. + +##### Sample Teams Action Configuration File + +```yml +name: datahub_teams_action +enabled: true +source: + type: "kafka" + config: + connection: + bootstrap: ${KAFKA_BOOTSTRAP_SERVER:-localhost:9092} + schema_registry_url: ${SCHEMA_REGISTRY_URL:-http://localhost:8081} + topic_routes: + mcl: ${METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME:-MetadataChangeLog_Versioned_v1} + pe: ${PLATFORM_EVENT_TOPIC_NAME:-PlatformEvent_v1} + +## 3a. Optional: Filter to run on events (map) +# filter: +# event_type: +# event: +# # Filter event fields by exact-match +# + +# 3b. Optional: Custom Transformers to run on events (array) +# transform: +# - type: +# config: +# # Transformer-specific configs (map) + +action: + type: teams + config: + # Action-specific configs (map) + base_url: ${DATAHUB_ACTIONS_TEAMS_DATAHUB_BASE_URL:-http://localhost:9002} + webhook_url: ${DATAHUB_ACTIONS_TEAMS_WEBHOOK_URL} + suppress_system_activity: ${DATAHUB_ACTIONS_TEAMS_SUPPRESS_SYSTEM_ACTIVITY:-true} + +datahub: + server: "http://${DATAHUB_GMS_HOST:-localhost}:${DATAHUB_GMS_PORT:-8080}" +``` + +##### Teams Action Configuration Parameters + +| Field | Required | Default | Description | +| --- | --- | --- | --- | +| `base_url` | ❌| `False` | Whether to print events in upper case. | +| `webhook_url` | ✅ | Set to the incoming webhook url that you configured in the [pre-requisites step](#prerequisites) above | +| `suppress_system_activity` | ❌ | `True` | Set to `False` if you want to get low level system activity events, e.g. when datasets are ingested, etc. Note: this will currently result in a very spammy Teams notifications experience, so this is not recommended to be changed. | + + +## Troubleshooting + +If things are configured correctly, you should see logs on the `datahub-actions` container that indicate success in enabling and running the Teams action. + +```shell +docker logs datahub-datahub-actions-1 + +... +[2022-12-04 16:47:44,536] INFO {datahub_actions.cli.actions:76} - DataHub Actions version: unavailable (installed editable via git) +[2022-12-04 16:47:44,565] WARNING {datahub_actions.cli.actions:103} - Skipping pipeline datahub_slack_action as it is not enabled +[2022-12-04 16:47:44,581] INFO {datahub_actions.plugin.action.teams.teams:60} - Teams notification action configured with webhook_url=SecretStr('**********') base_url='http://localhost:9002' suppress_system_activity=True +[2022-12-04 16:47:46,393] INFO {datahub_actions.cli.actions:119} - Action Pipeline with name 'ingestion_executor' is now running. +[2022-12-04 16:47:46,393] INFO {datahub_actions.cli.actions:119} - Action Pipeline with name 'datahub_teams_action' is now running. +... +``` + + +If the Teams action was not enabled, you would see messages indicating that. +e.g. the following logs below show that neither the Teams or Slack action were enabled. + +```shell +docker logs datahub-datahub-actions-1 + +.... +No user action configurations found. Not starting user actions. +[2022-12-04 06:45:27,509] INFO {datahub_actions.cli.actions:76} - DataHub Actions version: unavailable (installed editable via git) +[2022-12-04 06:45:27,647] WARNING {datahub_actions.cli.actions:103} - Skipping pipeline datahub_slack_action as it is not enabled +[2022-12-04 06:45:27,649] WARNING {datahub_actions.cli.actions:103} - Skipping pipeline datahub_teams_action as it is not enabled +[2022-12-04 06:45:27,649] INFO {datahub_actions.cli.actions:119} - Action Pipeline with name 'ingestion_executor' is now running. +... + +``` + diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 5c9f13a9c8879c..5b896bc41cbf51 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -12,6 +12,8 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ### Other notable Changes +- #6611 - Snowflake `schema_pattern` now accepts pattern for fully qualified schema name in format `.` by setting config `match_fully_qualified_names : True`. Current default `match_fully_qualified_names: False` is only to maintain backward compatibility. The config option `match_fully_qualified_names` will be deprecated in future and the default behavior will assume `match_fully_qualified_names: True`." + ## 0.9.3 ### Breaking Changes diff --git a/docs/imgs/slack/slack_add_token.png b/docs/imgs/slack/slack_add_token.png deleted file mode 100644 index 6a12dc545ec62c..00000000000000 Binary files a/docs/imgs/slack/slack_add_token.png and /dev/null differ diff --git a/docs/imgs/slack/slack_basic_info.png b/docs/imgs/slack/slack_basic_info.png deleted file mode 100644 index a01a1af370442d..00000000000000 Binary files a/docs/imgs/slack/slack_basic_info.png and /dev/null differ diff --git a/docs/imgs/slack/slack_channel.png b/docs/imgs/slack/slack_channel.png deleted file mode 100644 index 83645e00d724a4..00000000000000 Binary files a/docs/imgs/slack/slack_channel.png and /dev/null differ diff --git a/docs/imgs/slack/slack_channel_url.png b/docs/imgs/slack/slack_channel_url.png deleted file mode 100644 index 7715bf4a51fbe4..00000000000000 Binary files a/docs/imgs/slack/slack_channel_url.png and /dev/null differ diff --git a/docs/imgs/slack/slack_oauth_and_permissions.png b/docs/imgs/slack/slack_oauth_and_permissions.png deleted file mode 100644 index 87846e7897f6ed..00000000000000 Binary files a/docs/imgs/slack/slack_oauth_and_permissions.png and /dev/null differ diff --git a/docs/imgs/slack/slack_user_id.png b/docs/imgs/slack/slack_user_id.png deleted file mode 100644 index 02d84b326896c8..00000000000000 Binary files a/docs/imgs/slack/slack_user_id.png and /dev/null differ diff --git a/docs/managed-datahub/saas-slack-setup.md b/docs/managed-datahub/saas-slack-setup.md index 430f926c08daae..68f947f1717158 100644 --- a/docs/managed-datahub/saas-slack-setup.md +++ b/docs/managed-datahub/saas-slack-setup.md @@ -41,7 +41,7 @@ settings: Confirm you see the Basic Information Tab -![](../imgs/slack/slack_basic_info.png) +![](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/slack/slack_basic_info.png) - Click **Install to Workspace** - It will show you permissions the Slack App is asking for, what they mean and a default channel in which you want to add the slack app @@ -54,7 +54,7 @@ Confirm you see the Basic Information Tab - Go to **OAuth & Permissions** Tab -![](../imgs/slack/slack_oauth_and_permissions.png) +![](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/slack/slack_oauth_and_permissions.png) Here you'll find a “Bot User OAuth Token” which DataHub will need to communicate with your slack through the bot. In the next steps, we'll show you how to configure the Slack Integration inside of Acryl DataHub. @@ -71,13 +71,13 @@ To enable the integration with slack - Enter a **Default Slack Channel** - this is where all notifications will be routed unless - Click **Update** to save your settings -![](../imgs/slack/slack_add_token.png) +![](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/slack/slack_add_token.png) To enable and disable specific types of notifications, or configure custom routing for notifications, start by navigating to **Settings > Notifications**. To enable or disable a specific notification type in Slack, simply click the check mark. By default, all notification types are enabled. To customize the channel where notifications are send, click the button to the right of the check box. -![](../imgs/slack/slack_channel.png) +![](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/slack/slack_channel.png) If provided, a custom channel will be used to route notifications of the given type. If not provided, the default channel will be used. That's it! You should begin to receive notifications on Slack. Note that it may take up to 1 minute for notification settings to take effect after saving. @@ -94,7 +94,7 @@ For now we support sending notifications to - Go to the Slack channel for which you want to get channel ID - Check the URL e.g. for the troubleshoot channel in OSS DataHub slack -![](../imgs/slack/slack_channel_url.png) +![](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/slack/slack_channel_url.png) - Notice `TUMKD5EGJ/C029A3M079U` in the URL - Team ID = `TUMKD5EGJ` from above @@ -108,4 +108,4 @@ For now we support sending notifications to - Click on “More” - Click on “Copy member ID” -![](../imgs/slack/slack_user_id.png) +![](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/slack/slack_user_id.png) diff --git a/docs/townhall-history.md b/docs/townhall-history.md index cea81013f14042..51a8770ef09284 100644 --- a/docs/townhall-history.md +++ b/docs/townhall-history.md @@ -2,6 +2,21 @@ A list of previous Town Halls, their planned schedule, and the recording of the meeting. +## 12/01/2022 +[Full YouTube video](https://youtu.be/BlCLhG8lGoY) + +### Agenda + +November Town Hall (in December!) + +- Community Case Study - The Pinterest Team will share how they have integrated DataHub + Thrift and extended the Metadata Model with a Data Element entity to capture semantic types. +- NEW! Ingestion Quickstart Guides - DataHub newbies, this one is for you! We’re rolling out ingestion quickstart guides to help you quickly get up and running with DataHub + Snowflake, BigQuery, and more! +- NEW! In-App Product Tours - We’re making it easier than ever for end-users to get familiar with all that DataHub has to offer - hear all about the in-product onboarding resources we’re rolling out soon! +- DataHub UI Navigation and Performance - Learn all about upcoming changes to our user experience to make it easier (and faster!) for end users to work within DataHub. +- Sneak Peek! Manual Lineage via the UI - The Community asked and we’re delivering! Soon you’ll be able to manually add lineage connections between Entities in DataHub. +- NEW! Slack + Microsoft Teams Integrations - Send automated alerts to Slack and/or Teams to keep track of critical events and changes within DataHub. +- Hacktoberfest Winners Announced - We’ll recap this year’s Hacktoberfest and announce three winners of a $250 Amazon gift card & DataHub Swag. + ## 10/27/2022 [Full YouTube video](https://youtu.be/B74WHxX5EMk) diff --git a/entity-registry/build.gradle b/entity-registry/build.gradle index 3594e0440f63d4..af742d240d1e6b 100644 --- a/entity-registry/build.gradle +++ b/entity-registry/build.gradle @@ -4,7 +4,8 @@ dependencies { compile spec.product.pegasus.data compile spec.product.pegasus.generator compile project(path: ':metadata-models') - compile externalDependency.lombok + implementation externalDependency.slf4jApi + compileOnly externalDependency.lombok compile externalDependency.guava compile externalDependency.jacksonDataBind compile externalDependency.jacksonDataFormatYaml diff --git a/ingestion-scheduler/build.gradle b/ingestion-scheduler/build.gradle index 7023ce1208b513..3dec8ee400150a 100644 --- a/ingestion-scheduler/build.gradle +++ b/ingestion-scheduler/build.gradle @@ -4,12 +4,12 @@ dependencies { compile project(path: ':metadata-models') compile project(path: ':metadata-io') compile project(path: ':metadata-service:restli-client') - compile externalDependency.lombok + implementation externalDependency.slf4jApi + compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok testCompile externalDependency.mockito testCompile externalDependency.testng - testAnnotationProcessor externalDependency.lombok constraints { implementation(externalDependency.log4jCore) { diff --git a/li-utils/build.gradle b/li-utils/build.gradle index 6a6971589ae8b0..d11cd86659605c 100644 --- a/li-utils/build.gradle +++ b/li-utils/build.gradle @@ -20,6 +20,7 @@ dependencies { } compile externalDependency.guava + implementation externalDependency.slf4jApi compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok diff --git a/metadata-dao-impl/kafka-producer/build.gradle b/metadata-dao-impl/kafka-producer/build.gradle index 18b129297f19f6..5b40eb5f322321 100644 --- a/metadata-dao-impl/kafka-producer/build.gradle +++ b/metadata-dao-impl/kafka-producer/build.gradle @@ -9,6 +9,7 @@ dependencies { compile externalDependency.kafkaClients + implementation externalDependency.slf4jApi compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok diff --git a/metadata-ingestion/schedule_docs/airflow.md b/metadata-ingestion/schedule_docs/airflow.md index 03a5930fea1368..e48710964b01c7 100644 --- a/metadata-ingestion/schedule_docs/airflow.md +++ b/metadata-ingestion/schedule_docs/airflow.md @@ -2,11 +2,41 @@ If you are using Apache Airflow for your scheduling then you might want to also use it for scheduling your ingestion recipes. For any Airflow specific questions you can go through [Airflow docs](https://airflow.apache.org/docs/apache-airflow/stable/) for more details. -To schedule your recipe through Airflow you can follow these steps -- Create a recipe file e.g. `recipe.yml` -- Ensure the receipe file is in a folder accessible to your airflow workers. You can either specify absolute path on the machines where Airflow is installed or a path relative to `AIRFLOW_HOME`. -- Ensure [DataHub CLI](../../docs/cli.md) is installed in your airflow environment -- Create a sample DAG file like [`generic_recipe_sample_dag.py`](../src/datahub_provider/example_dags/generic_recipe_sample_dag.py). This will read your DataHub ingestion recipe file and run it. +We've provided a few examples of how to configure your DAG: + +- [`mysql_sample_dag`](../src/datahub_provider/example_dags/mysql_sample_dag.py) embeds the full MySQL ingestion configuration inside the DAG. + +- [`snowflake_sample_dag`](../src/datahub_provider/example_dags/snowflake_sample_dag.py) avoids embedding credentials inside the recipe, and instead fetches them from Airflow's [Connections](https://airflow.apache.org/docs/apache-airflow/stable/howto/connection/index.html) feature. You must configure your connections in Airflow to use this approach. + +:::tip + +These example DAGs use the `PythonVirtualenvOperator` to run the ingestion. This is the recommended approach, since it guarantees that there will not be any conflicts between DataHub and the rest of your Airflow environment. + +When configuring the task, it's important to specify the requirements with your source and set the `system_site_packages` option to false. + +```py +ingestion_task = PythonVirtualenvOperator( + task_id="ingestion_task", + requirements=[ + "acryl-datahub[]", + ], + system_site_packages=False, + python_callable=your_callable, +) +``` + +::: + +
+Advanced: loading a recipe file + +In more advanced cases, you might want to store your ingestion recipe in a file and load it from your task. + +- Ensure the recipe file is in a folder accessible to your airflow workers. You can either specify absolute path on the machines where Airflow is installed or a path relative to `AIRFLOW_HOME`. +- Ensure [DataHub CLI](../../docs/cli.md) is installed in your airflow environment. +- Create a DAG task to read your DataHub ingestion recipe file and run it. See the example below for reference. - Deploy the DAG file into airflow for scheduling. Typically this involves checking in the DAG file into your dags folder which is accessible to your Airflow instance. -Alternatively you can have an inline recipe as given in [`mysql_sample_dag.py`](../src/datahub_provider/example_dags/mysql_sample_dag.py). This runs a MySQL metadata ingestion pipeline using an inlined configuration. +Example: [`generic_recipe_sample_dag`](../src/datahub_provider/example_dags/generic_recipe_sample_dag.py) + +
diff --git a/metadata-ingestion/setup.cfg b/metadata-ingestion/setup.cfg index b6d4f55a09e3a5..3f0e8ab611b054 100644 --- a/metadata-ingestion/setup.cfg +++ b/metadata-ingestion/setup.cfg @@ -27,6 +27,7 @@ plugins = exclude = ^(venv|build|dist)/ ignore_missing_imports = yes namespace_packages = no +implicit_optional = no strict_optional = yes check_untyped_defs = yes disallow_incomplete_defs = yes @@ -38,8 +39,16 @@ disallow_untyped_defs = no # try to be a bit more strict in certain areas of the codebase [mypy-datahub.*] ignore_missing_imports = no +[mypy-datahub_provider.*] +ignore_missing_imports = no [mypy-tests.*] ignore_missing_imports = no +[mypy-google.protobuf.*] +# mypy sometimes ignores the above ignore_missing_imports = yes +# See https://github.com/python/mypy/issues/10632 and +# https://github.com/python/mypy/issues/10619#issuecomment-1174208395 +# for a discussion of why this happens. +ignore_missing_imports = yes [mypy-datahub.configuration.*] disallow_untyped_defs = yes [mypy-datahub.emitter.*] diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 1a744a6fe328a6..5547914236de69 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -221,7 +221,7 @@ def get_long_description(): delta_lake = { *s3_base, - "deltalake>=0.6.3", + "deltalake>=0.6.3, != 0.6.4", } powerbi_report_server = {"requests", "requests_ntlm"} @@ -385,8 +385,7 @@ def get_long_description(): "types-ujson>=5.2.0", "types-termcolor>=1.0.0", "types-Deprecated", - # Mypy complains with 4.21.0.0 => error: Library stubs not installed for "google.protobuf.descriptor" - "types-protobuf<4.21.0.0", + "types-protobuf>=4.21.0.1", } base_dev_requirements = { @@ -399,10 +398,7 @@ def get_long_description(): "flake8>=3.8.3", "flake8-tidy-imports>=4.3.0", "isort>=5.7.0", - # mypy 0.990 enables namespace packages by default and sets - # no implicit optional to True. - # FIXME: Enable mypy 0.990 when our codebase is fixed. - "mypy>=0.981,<0.990", + "mypy==0.991", # pydantic 1.8.2 is incompatible with mypy 0.910. # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910. # Restricting top version to <1.10 until we can fix our types. @@ -465,8 +461,9 @@ def get_long_description(): dev_requirements = { *base_dev_requirements, + # Extra requirements for Airflow. "apache-airflow[snowflake]>=2.0.2", # snowflake is used in example dags - "snowflake-sqlalchemy<=1.2.4", # make constraint consistent with extras + "virtualenv", # needed by PythonVirtualenvOperator } full_test_dev_requirements = { diff --git a/metadata-ingestion/src/datahub/api/graphql/operation.py b/metadata-ingestion/src/datahub/api/graphql/operation.py index 5e1575e6f75dd2..9cb40ce5815a56 100644 --- a/metadata-ingestion/src/datahub/api/graphql/operation.py +++ b/metadata-ingestion/src/datahub/api/graphql/operation.py @@ -122,8 +122,6 @@ def query_operations( "operationType": operation_type, "partition": partition, } - if filter - else None ), }, ) diff --git a/metadata-ingestion/src/datahub/cli/cli_utils.py b/metadata-ingestion/src/datahub/cli/cli_utils.py index 5bd8841e6755a8..d6ece814c9c47a 100644 --- a/metadata-ingestion/src/datahub/cli/cli_utils.py +++ b/metadata-ingestion/src/datahub/cli/cli_utils.py @@ -581,6 +581,11 @@ def post_entity( curl_command, ) response = session.post(url, payload) + if not response.ok: + try: + log.info(response.json()["message"].strip()) + except Exception: + log.info(f"post_entity failed: {response.text}") response.raise_for_status() return response.status_code diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py index e134a5a8495b91..95d852bbe7b606 100644 --- a/metadata-ingestion/src/datahub/configuration/common.py +++ b/metadata-ingestion/src/datahub/configuration/common.py @@ -80,7 +80,7 @@ class OperationalError(PipelineExecutionError): message: str info: dict - def __init__(self, message: str, info: dict = None): + def __init__(self, message: str, info: Optional[dict] = None): self.message = message self.info = info or {} diff --git a/metadata-ingestion/src/datahub/configuration/pattern_utils.py b/metadata-ingestion/src/datahub/configuration/pattern_utils.py new file mode 100644 index 00000000000000..313e68c41812f0 --- /dev/null +++ b/metadata-ingestion/src/datahub/configuration/pattern_utils.py @@ -0,0 +1,13 @@ +from datahub.configuration.common import AllowDenyPattern + + +def is_schema_allowed( + schema_pattern: AllowDenyPattern, + schema_name: str, + db_name: str, + match_fully_qualified_schema_name: bool, +) -> bool: + if match_fully_qualified_schema_name: + return schema_pattern.allowed(f"{db_name}.{schema_name}") + else: + return schema_pattern.allowed(schema_name) diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py index 70e5ce7db7e821..9f3740aa9f3eea 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source.py @@ -120,7 +120,12 @@ class Source(Closeable, metaclass=ABCMeta): @classmethod def create(cls, config_dict: dict, ctx: PipelineContext) -> "Source": - pass + # Technically, this method should be abstract. However, the @config_class + # decorator automatically generates a create method at runtime if one is + # not defined. Python still treats the class as abstract because it thinks + # the create method is missing. To avoid the class becoming abstract, we + # can't make this method abstract. + raise NotImplementedError('sources must implement "create"') @abstractmethod def get_workunits(self) -> Iterable[WorkUnit]: diff --git a/metadata-ingestion/src/datahub/ingestion/api/workunit.py b/metadata-ingestion/src/datahub/ingestion/api/workunit.py index 522bcd9fbdbf7e..53a77798f756c8 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/workunit.py +++ b/metadata-ingestion/src/datahub/ingestion/api/workunit.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Iterable, Union, overload +from typing import Iterable, Optional, Union, overload from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.source import WorkUnit @@ -42,9 +42,9 @@ def __init__( def __init__( self, id: str, - mce: MetadataChangeEvent = None, - mcp: MetadataChangeProposalWrapper = None, - mcp_raw: MetadataChangeProposal = None, + mce: Optional[MetadataChangeEvent] = None, + mcp: Optional[MetadataChangeProposalWrapper] = None, + mcp_raw: Optional[MetadataChangeProposal] = None, treat_errors_as_warnings: bool = False, ): super().__init__(id) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py index 5de36facb6b98e..fa475f66d01208 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py @@ -2,7 +2,7 @@ from collections import defaultdict from dataclasses import dataclass, field from datetime import datetime, timezone -from typing import Dict, List, Optional +from typing import Dict, List, Optional, cast from google.cloud import bigquery from google.cloud.bigquery.table import RowIterator, TableListItem, TimePartitioning @@ -280,6 +280,8 @@ def get_projects(conn: bigquery.Client) -> List[BigqueryProject]: def get_datasets_for_project_id( conn: bigquery.Client, project_id: str, maxResults: Optional[int] = None ) -> List[BigqueryDataset]: + # FIXME: Due to a bug in BigQuery's type annotations, we need to cast here. + maxResults = cast(int, maxResults) datasets = conn.list_datasets(project_id, max_results=maxResults) return [BigqueryDataset(name=d.dataset_id) for d in datasets] diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py index 726b54e58629fb..d58457534977ac 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py @@ -165,6 +165,16 @@ def get_workunits( continue for table in tables[project][dataset]: + for column in table.columns: + # Profiler has issues with complex types (array, struct, geography, json), so we deny those types from profiling + # We also filter columns without data type as it means that column is part of a complex type. + if not column.data_type or any( + word in column.data_type.lower() + for word in ["array", "struct", "geography", "json"] + ): + self.config.profile_pattern.deny.append( + f"^{project}.{dataset}.{table.name}.{column.field_path}$" + ) # Emit the profile work unit profile_request = self.get_bigquery_profile_request( project=project, dataset=dataset, table=table diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py index e6782a92e3f452..8860162660a4d7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py @@ -833,8 +833,8 @@ def _generate_single_profile( self, query_combiner: SQLAlchemyQueryCombiner, pretty_name: str, - schema: str = None, - table: str = None, + schema: Optional[str] = None, + table: Optional[str] = None, partition: Optional[str] = None, custom_sql: Optional[str] = None, platform: Optional[str] = None, diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py index 0e3487eb927a16..431f90643d017d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py @@ -123,7 +123,9 @@ def remove_prefix(text: str, prefix: str) -> str: return text -def unquote(string: str, leading_quote: str = '"', trailing_quote: str = None) -> str: +def unquote( + string: str, leading_quote: str = '"', trailing_quote: Optional[str] = None +) -> str: """ If string starts and ends with a quote, unquote it """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py index 935eeac00b519e..9ba336becab325 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py @@ -28,7 +28,6 @@ import datahub.emitter.mce_builder as builder from datahub.configuration import ConfigModel from datahub.configuration.common import ConfigurationError -from datahub.configuration.github import GitHubInfo from datahub.configuration.source_common import DatasetSourceConfigBase from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.report import Report @@ -186,10 +185,6 @@ class LookerCommonConfig(DatasetSourceConfigBase): platform_name: str = Field( "looker", description="Default platform name. Don't change." ) - github_info: Optional[GitHubInfo] = Field( - None, - description="Reference to your github location. If present, supplies handy links to your lookml on the dataset entity page.", - ) extract_column_level_lineage: bool = Field( True, description="When enabled, extracts column-level lineage from Views and Explores", diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py index 48d19c6d127651..3283ad18d01a1c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py @@ -173,6 +173,10 @@ class LookMLSourceConfig(LookerCommonConfig, StatefulIngestionConfigBase): None, description="Required if not providing github configuration and deploy keys. A pointer to a local directory (accessible to the ingestion system) where the root of the LookML repo has been checked out (typically via a git clone). This is typically the root folder where the `*.model.lkml` and `*.view.lkml` files are stored. e.g. If you have checked out your LookML repo under `/Users/jdoe/workspace/my-lookml-repo`, then set `base_folder` to `/Users/jdoe/workspace/my-lookml-repo`.", ) + github_info: Optional[GitHubInfo] = Field( + None, + description="Reference to your github location. If present, supplies handy links to your lookml on the dataset entity page.", + ) project_dependencies: Dict[str, Union[pydantic.DirectoryPath, GitHubInfo]] = Field( {}, description="A map of project_name to local directory (accessible to the ingestion system) or Git credentials. " diff --git a/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py b/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py index f33654daa15595..8d0421871694cd 100755 --- a/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py +++ b/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py @@ -2,7 +2,7 @@ import re import time import warnings -from typing import Any, Dict, Generator, List, Tuple +from typing import Any, Dict, Generator, List, Optional, Tuple import requests import yaml @@ -47,7 +47,10 @@ def flatten2list(d: dict) -> list: def request_call( - url: str, token: str = None, username: str = None, password: str = None + url: str, + token: Optional[str] = None, + username: Optional[str] = None, + password: Optional[str] = None, ) -> requests.Response: headers = {"accept": "application/json"} @@ -66,9 +69,9 @@ def request_call( def get_swag_json( url: str, - token: str = None, - username: str = None, - password: str = None, + token: Optional[str] = None, + username: Optional[str] = None, + password: Optional[str] = None, swagger_file: str = "", ) -> Dict: tot_url = url + swagger_file diff --git a/metadata-ingestion/src/datahub/ingestion/source/redash.py b/metadata-ingestion/src/datahub/ingestion/source/redash.py index c196e1a8cf684e..3a35c2190ff809 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redash.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redash.py @@ -402,7 +402,7 @@ def _get_sql_table_names(cls, sql: str, sql_parser_path: str) -> List[str]: return sql_table_names - def _get_chart_data_source(self, data_source_id: int = None) -> Dict: + def _get_chart_data_source(self, data_source_id: Optional[int] = None) -> Dict: url = f"/api/data_sources/{data_source_id}" resp = self.client._get(url).json() logger.debug(resp) diff --git a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py index 7b9ab6dd789b9d..70cb7653f414e9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py +++ b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py @@ -735,7 +735,7 @@ def get_report(self) -> SourceReport: return self.report -def get_tags(params: List[str] = None) -> GlobalTagsClass: +def get_tags(params: Optional[List[str]] = None) -> GlobalTagsClass: if params is None: params = [] tags = [TagAssociationClass(tag=builder.make_tag_urn(tag)) for tag in params if tag] diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py index b6729bd43f647e..643ba4f1db579c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py @@ -44,6 +44,16 @@ class SnowflakeV2Config(SnowflakeConfig, SnowflakeUsageConfig): description="For details, refer [Classification](../../../../metadata-ingestion/docs/dev_guides/classification.md).", ) + include_external_url: bool = Field( + default=True, + description="Whether to populate Snowsight url for Snowflake Objects", + ) + + match_fully_qualified_names = bool = Field( + default=False, + description="Whether `schema_pattern` is matched against fully qualified schema name `.`.", + ) + @root_validator(pre=False) def validate_unsupported_configs(cls, values: Dict) -> Dict: @@ -65,11 +75,26 @@ def validate_unsupported_configs(cls, values: Dict) -> Dict: "include_read_operational_stats is not supported. Set `include_read_operational_stats` to False.", ) + match_fully_qualified_names = values.get("match_fully_qualified_names") + + schema_pattern: Optional[AllowDenyPattern] = values.get("schema_pattern") + + if ( + schema_pattern is not None + and schema_pattern != AllowDenyPattern.allow_all() + and match_fully_qualified_names is not None + and not match_fully_qualified_names + ): + logger.warning( + "Please update `schema_pattern` to match against fully qualified schema name `.` and set config `match_fully_qualified_names : True`." + "Current default `match_fully_qualified_names: False` is only to maintain backward compatibility. " + "The config option `match_fully_qualified_names` will be deprecated in future and the default behavior will assume `match_fully_qualified_names: True`." + ) + # Always exclude reporting metadata for INFORMATION_SCHEMA schema - schema_pattern = values.get("schema_pattern") if schema_pattern is not None and schema_pattern: logger.debug("Adding deny for INFORMATION_SCHEMA to schema_pattern.") - cast(AllowDenyPattern, schema_pattern).deny.append(r"^INFORMATION_SCHEMA$") + cast(AllowDenyPattern, schema_pattern).deny.append(r".*INFORMATION_SCHEMA$") include_technical_schema = values.get("include_technical_schema") include_profiles = ( @@ -95,7 +120,7 @@ def validate_unsupported_configs(cls, values: Dict) -> Dict: def get_sql_alchemy_url( self, - database: str = None, + database: Optional[str] = None, username: Optional[str] = None, password: Optional[SecretStr] = None, role: Optional[str] = None, diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py index 9fe9696a0f2a02..2cc2c9100199c4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py @@ -7,6 +7,7 @@ from sqlalchemy import create_engine, inspect from sqlalchemy.sql import sqltypes +from datahub.configuration.pattern_utils import is_schema_allowed from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance from datahub.ingestion.api.common import WorkUnit from datahub.ingestion.source.ge_data_profiler import ( @@ -55,7 +56,12 @@ def get_workunits(self, databases: List[SnowflakeDatabase]) -> Iterable[WorkUnit continue profile_requests = [] for schema in db.schemas: - if not self.config.schema_pattern.allowed(schema.name): + if not is_schema_allowed( + self.config.schema_pattern, + schema.name, + db.name, + self.config.match_fully_qualified_names, + ): continue for table in schema.tables: diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py index fe95a36a1e242d..c3b9be555a4f83 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py @@ -1,10 +1,13 @@ import logging +from enum import Enum +from functools import lru_cache from typing import Any, Optional from snowflake.connector import SnowflakeConnection from snowflake.connector.cursor import DictCursor from typing_extensions import Protocol +from datahub.configuration.pattern_utils import is_schema_allowed from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config @@ -12,6 +15,18 @@ from datahub.metadata.com.linkedin.pegasus2avro.events.metadata import ChangeType from datahub.metadata.schema_classes import _Aspect +logger: logging.Logger = logging.getLogger(__name__) + + +class SnowflakeCloudProvider(str, Enum): + AWS = "aws" + GCP = "gcp" + AZURE = "azure" + + +SNOWFLAKE_DEFAULT_CLOUD_REGION_ID = "us-west-2" +SNOWFLAKE_DEFAULT_CLOUD = SnowflakeCloudProvider.AWS + # Required only for mypy, since we are using mixin classes, and not inheritance. # Reference - https://mypy.readthedocs.io/en/latest/more_types.html#mixin-classes @@ -59,6 +74,51 @@ class SnowflakeCommonMixin: platform = "snowflake" + @staticmethod + @lru_cache(maxsize=128) + def create_snowsight_base_url(account_id: str) -> Optional[str]: + cloud: Optional[str] = None + account_locator: Optional[str] = None + cloud_region_id: Optional[str] = None + privatelink: bool = False + + if "." not in account_id: # e.g. xy12345 + account_locator = account_id.lower() + cloud_region_id = SNOWFLAKE_DEFAULT_CLOUD_REGION_ID + else: + parts = account_id.split(".") + if len(parts) == 2: # e.g. xy12345.us-east-1 + account_locator = parts[0].lower() + cloud_region_id = parts[1].lower() + elif len(parts) == 3 and parts[2] in ( + SnowflakeCloudProvider.AWS, + SnowflakeCloudProvider.GCP, + SnowflakeCloudProvider.AZURE, + ): + # e.g. xy12345.ap-south-1.aws or xy12345.us-central1.gcp or xy12345.west-us-2.azure + # NOT xy12345.us-west-2.privatelink or xy12345.eu-central-1.privatelink + account_locator = parts[0].lower() + cloud_region_id = parts[1].lower() + cloud = parts[2].lower() + elif len(parts) == 3 and parts[2] == "privatelink": + account_locator = parts[0].lower() + cloud_region_id = parts[1].lower() + privatelink = True + else: + logger.warning( + f"Could not create Snowsight base url for account {account_id}." + ) + return None + + if not privatelink and (cloud is None or cloud == SNOWFLAKE_DEFAULT_CLOUD): + return f"https://app.snowflake.com/{cloud_region_id}/{account_locator}/" + elif privatelink: + return f"https://app.{account_locator}.{cloud_region_id}.privatelink.snowflakecomputing.com/" + return f"https://app.snowflake.com/{cloud_region_id}.{cloud}/{account_locator}/" + + def get_snowsight_base_url(self: SnowflakeCommonProtocol) -> Optional[str]: + return SnowflakeCommonMixin.create_snowsight_base_url(self.config.get_account()) + def _is_dataset_pattern_allowed( self: SnowflakeCommonProtocol, dataset_name: Optional[str], @@ -77,7 +137,12 @@ def _is_dataset_pattern_allowed( if not self.config.database_pattern.allowed( dataset_params[0].strip('"') - ) or not self.config.schema_pattern.allowed(dataset_params[1].strip('"')): + ) or not is_schema_allowed( + self.config.schema_pattern, + dataset_params[1].strip('"'), + dataset_params[0].strip('"'), + self.config.match_fully_qualified_names, + ): return False if dataset_type.lower() in {"table"} and not self.config.table_pattern.allowed( diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index 5cee4cb719dbed..4c96e0fb32edb6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -7,6 +7,7 @@ import pydantic from snowflake.connector import SnowflakeConnection +from datahub.configuration.pattern_utils import is_schema_allowed from datahub.emitter.mce_builder import ( make_container_urn, make_data_platform_urn, @@ -508,7 +509,12 @@ def _process_database( self.report.report_entity_scanned(snowflake_schema.name, "schema") - if not self.config.schema_pattern.allowed(snowflake_schema.name): + if not is_schema_allowed( + self.config.schema_pattern, + snowflake_schema.name, + db_name, + self.config.match_fully_qualified_names, + ): self.report.report_dropped(f"{db_name}.{snowflake_schema.name}.*") continue @@ -646,6 +652,14 @@ def gen_dataset_workunits( description=table.comment, qualifiedName=dataset_name, customProperties={**upstream_column_props}, + externalUrl=self.get_external_url_for_table( + table.name, + schema_name, + db_name, + "table" if isinstance(table, SnowflakeTable) else "view", + ) + if self.config.include_external_url + else None, ) yield self.wrap_aspect_as_workunit( "dataset", dataset_urn, "datasetProperties", dataset_properties @@ -889,6 +903,9 @@ def gen_database_containers( description=database.comment, sub_types=[SqlContainerSubTypes.DATABASE], domain_urn=domain_urn, + external_url=self.get_external_url_for_database(database.name) + if self.config.include_external_url + else None, ) self.stale_entity_removal_handler.add_entity_to_state( @@ -922,6 +939,9 @@ def gen_schema_containers( description=schema.comment, sub_types=[SqlContainerSubTypes.SCHEMA], parent_container_key=database_container_key, + external_url=self.get_external_url_for_schema(schema.name, db_name) + if self.config.include_external_url + else None, ) for wu in container_workunits: @@ -1077,3 +1097,26 @@ def get_sample_values_for_table(self, conn, table_name, schema_name, db_name): df = pd.DataFrame(dat, columns=[col.name for col in cur.description]) return df + + # domain is either "view" or "table" + def get_external_url_for_table( + self, table_name: str, schema_name: str, db_name: str, domain: str + ) -> Optional[str]: + base_url = self.get_snowsight_base_url() + if base_url is not None: + return f"{base_url}#/data/databases/{db_name}/schemas/{schema_name}/{domain}/{table_name}/" + return None + + def get_external_url_for_schema( + self, schema_name: str, db_name: str + ) -> Optional[str]: + base_url = self.get_snowsight_base_url() + if base_url is not None: + return f"{base_url}#/data/databases/{db_name}/schemas/{schema_name}/" + return None + + def get_external_url_for_database(self, db_name: str) -> Optional[str]: + base_url = self.get_snowsight_base_url() + if base_url is not None: + return f"{base_url}#/data/databases/{db_name}/" + return None diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py index 480ab2c46d588e..63712b39696f34 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py @@ -103,7 +103,9 @@ def get_schema_names(self) -> List[str]: for row in cursor ] - def get_table_names(self, schema: str = None, order_by: str = None) -> List[str]: + def get_table_names( + self, schema: Optional[str] = None, order_by: Optional[str] = None + ) -> List[str]: """ skip order_by, we are not using order_by """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/presto.py b/metadata-ingestion/src/datahub/ingestion/source/sql/presto.py index 0583520c091e64..7cd022b76b38fd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/presto.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/presto.py @@ -1,4 +1,5 @@ from textwrap import dedent +from typing import Optional from pydantic.fields import Field from pyhive.sqlalchemy_presto import PrestoDialect @@ -60,7 +61,7 @@ def get_view_definition(self, connection, view_name, schema=None, **kw): def _get_full_table( # type: ignore - self, table_name: str, schema: str = None, quote: bool = True + self, table_name: str, schema: Optional[str] = None, quote: bool = True ) -> str: table_part = ( self.identifier_preparer.quote_identifier(table_name) if quote else table_name diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/sql/redshift.py index 9a30f2dc8637a0..19419f8d191cf0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/redshift.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/redshift.py @@ -691,7 +691,7 @@ def _get_sources_from_query(self, db_name: str, query: str) -> List[LineageDatas return sources - def get_db_name(self, inspector: Inspector = None) -> str: + def get_db_name(self, inspector: Optional[Inspector] = None) -> str: db_name = getattr(self.config, "database") db_alias = getattr(self.config, "database_alias") if db_alias: diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 23df9ec521c043..6dffda4e83649a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -419,8 +419,8 @@ def get_schema_metadata( dataset_name: str, platform: str, columns: List[dict], - pk_constraints: dict = None, - foreign_keys: List[ForeignKeyConstraint] = None, + pk_constraints: Optional[dict] = None, + foreign_keys: Optional[List[ForeignKeyConstraint]] = None, canonical_schema: List[SchemaField] = [], ) -> SchemaMetadata: schema_metadata = SchemaMetadata( @@ -985,7 +985,7 @@ def get_schema_fields( self, dataset_name: str, columns: List[dict], - pk_constraints: dict = None, + pk_constraints: Optional[dict] = None, tags: Optional[Dict[str, List[str]]] = None, ) -> List[SchemaField]: canonical_schema = [] @@ -1003,7 +1003,7 @@ def get_schema_fields_for_column( self, dataset_name: str, column: dict, - pk_constraints: dict = None, + pk_constraints: Optional[dict] = None, tags: Optional[List[str]] = None, ) -> List[SchemaField]: gtc: Optional[GlobalTagsClass] = None diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py index 296bc7a4935282..277dfe704d10c5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py @@ -194,7 +194,7 @@ def get_schema_fields_for_column( self, dataset_name: str, column: dict, - pk_constraints: dict = None, + pk_constraints: Optional[dict] = None, tags: Optional[List[str]] = None, ) -> List[SchemaField]: diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py index 9434381ff6e3c3..5929f1fa2ad89a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py @@ -992,7 +992,10 @@ def get_metadata_change_proposal( return mcp_workunit def emit_datasource( - self, datasource: dict, workbook: dict = None, is_embedded_ds: bool = False + self, + datasource: dict, + workbook: Optional[dict] = None, + is_embedded_ds: bool = False, ) -> Iterable[MetadataWorkUnit]: datasource_info = workbook if not is_embedded_ds: diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/bigquery_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/bigquery_usage.py index b9052c35d465dd..8e7685b9042cd0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/usage/bigquery_usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/usage/bigquery_usage.py @@ -173,7 +173,7 @@ def bigquery_audit_metadata_query_template( dataset: str, use_date_sharded_tables: bool, - table_allow_filter: str = None, + table_allow_filter: Optional[str] = None, ) -> str: """ Receives a dataset (with project specified) and returns a query template that is used to query exported diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py index 8202f877d6274a..f5d2b441cb8629 100644 --- a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py +++ b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py @@ -105,15 +105,21 @@ class BaseSnowflakeConfig(BaseTimeWindowConfig): password: Optional[pydantic.SecretStr] = pydantic.Field( default=None, exclude=True, description="Snowflake password." ) + private_key: Optional[str] = pydantic.Field( + default=None, + description="Private key in a form of '-----BEGIN PRIVATE KEY-----\\nprivate-key\\n-----END PRIVATE KEY-----\\n' if using key pair authentication. Encrypted version of private key will be in a form of '-----BEGIN ENCRYPTED PRIVATE KEY-----\\nencrypted-private-key\\n-----END ECNCRYPTED PRIVATE KEY-----\\n' See: https://docs.snowflake.com/en/user-guide/key-pair-auth.html", + ) + private_key_path: Optional[str] = pydantic.Field( default=None, - description="The path to the private key if using key pair authentication. See: https://docs.snowflake.com/en/user-guide/key-pair-auth.html", + description="The path to the private key if using key pair authentication. Ignored if `private_key` is set. See: https://docs.snowflake.com/en/user-guide/key-pair-auth.html", ) private_key_password: Optional[pydantic.SecretStr] = pydantic.Field( default=None, exclude=True, - description="Password for your private key if using key pair authentication.", + description="Password for your private key. Required if using key pair authentication with encrypted private key.", ) + oauth_config: Optional[OauthConfiguration] = pydantic.Field( default=None, description="oauth configuration - https://docs.snowflake.com/en/user-guide/python-connector-example.html#connecting-with-oauth", @@ -182,10 +188,13 @@ def authenticator_type_is_valid(cls, v, values, field): ) if v == "KEY_PAIR_AUTHENTICATOR": # If we are using key pair auth, we need the private key path and password to be set - if values.get("private_key_path") is None: + if ( + values.get("private_key") is None + and values.get("private_key_path") is None + ): raise ValueError( - f"'private_key_path' was none " - f"but should be set when using {v} authentication" + f"Both `private_key` and `private_key_path` are none. " + f"At least one should be set when using {v} authentication" ) elif v == "OAUTH_AUTHENTICATOR": if values.get("oauth_config") is None: @@ -275,16 +284,22 @@ def get_sql_alchemy_connect_args(self) -> dict: if self.authentication_type != "KEY_PAIR_AUTHENTICATOR": return {} if self.connect_args is None: - if self.private_key_path is None: - raise ValueError("missing required private key path to read key from") - if self.private_key_password is None: - raise ValueError("missing required private key password") - with open(self.private_key_path, "rb") as key: - p_key = serialization.load_pem_private_key( - key.read(), - password=self.private_key_password.get_secret_value().encode(), - backend=default_backend(), - ) + if self.private_key is not None: + pkey_bytes = self.private_key.replace("\\n", "\n").encode() + else: + assert ( + self.private_key_path + ), "missing required private key path to read key from" + with open(self.private_key_path, "rb") as key: + pkey_bytes = key.read() + + p_key = serialization.load_pem_private_key( + pkey_bytes, + password=self.private_key_password.get_secret_value().encode() + if self.private_key_password is not None + else None, + backend=default_backend(), + ) pkb = p_key.private_bytes( encoding=serialization.Encoding.DER, @@ -306,7 +321,7 @@ class SnowflakeConfig(BaseSnowflakeConfig, SQLAlchemyConfig): def get_sql_alchemy_url( self, - database: str = None, + database: Optional[str] = None, username: Optional[str] = None, password: Optional[pydantic.SecretStr] = None, role: Optional[str] = None, diff --git a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py index 931798f5aa5c59..3b6382865762bb 100644 --- a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py +++ b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py @@ -110,9 +110,9 @@ def _run( ValidationResultIdentifier, "GXCloudIdentifier" ], data_asset: Union[Validator, DataAsset, Batch], - payload: Any = None, + payload: Optional[Any] = None, expectation_suite_identifier: Optional[ExpectationSuiteIdentifier] = None, - checkpoint_identifier: Any = None, + checkpoint_identifier: Optional[Any] = None, ) -> Dict: datasets = [] try: diff --git a/metadata-ingestion/src/datahub/utilities/mapping.py b/metadata-ingestion/src/datahub/utilities/mapping.py index f7fb251d3eafc3..13a9fc66dd7972 100644 --- a/metadata-ingestion/src/datahub/utilities/mapping.py +++ b/metadata-ingestion/src/datahub/utilities/mapping.py @@ -67,7 +67,7 @@ def __init__( self, operation_defs: Dict[str, Dict], tag_prefix: str = "", - owner_source_type: str = None, + owner_source_type: Optional[str] = None, strip_owner_email_id: bool = False, ): self.operation_defs = operation_defs diff --git a/metadata-ingestion/src/datahub_provider/example_dags/.airflowignore b/metadata-ingestion/src/datahub_provider/example_dags/.airflowignore new file mode 100644 index 00000000000000..decb9b559aea89 --- /dev/null +++ b/metadata-ingestion/src/datahub_provider/example_dags/.airflowignore @@ -0,0 +1,2 @@ +# This file uses a connection hook, which fails to load unless configured. +snowflake_sample_dag.py diff --git a/metadata-ingestion/src/datahub_provider/example_dags/generic_recipe_sample_dag.py b/metadata-ingestion/src/datahub_provider/example_dags/generic_recipe_sample_dag.py index 289155a877b55e..d0e4aa944e8401 100644 --- a/metadata-ingestion/src/datahub_provider/example_dags/generic_recipe_sample_dag.py +++ b/metadata-ingestion/src/datahub_provider/example_dags/generic_recipe_sample_dag.py @@ -26,6 +26,7 @@ def datahub_recipe(): + # Note that this will also resolve environment variables in the recipe. config = load_config_file("path/to/recipe.yml") pipeline = Pipeline.create(config) diff --git a/metadata-ingestion/src/datahub_provider/example_dags/mysql_sample_dag.py b/metadata-ingestion/src/datahub_provider/example_dags/mysql_sample_dag.py index 35744ea367d353..2c833e14256342 100644 --- a/metadata-ingestion/src/datahub_provider/example_dags/mysql_sample_dag.py +++ b/metadata-ingestion/src/datahub_provider/example_dags/mysql_sample_dag.py @@ -5,33 +5,22 @@ embedded within the code. """ -from datetime import timedelta +from datetime import datetime, timedelta from airflow import DAG -from airflow.operators.python import PythonOperator -from airflow.utils.dates import days_ago - -from datahub.ingestion.run.pipeline import Pipeline - -default_args = { - "owner": "airflow", - "depends_on_past": False, - "email": ["jdoe@example.com"], - "email_on_failure": False, - "email_on_retry": False, - "retries": 1, - "retry_delay": timedelta(minutes=5), - "execution_timeout": timedelta(minutes=120), -} +from airflow.operators.python import PythonVirtualenvOperator def ingest_from_mysql(): + from datahub.ingestion.run.pipeline import Pipeline + pipeline = Pipeline.create( # This configuration is analogous to a recipe configuration. { "source": { "type": "mysql", "config": { + # If you want to use Airflow connections, take a look at the snowflake_sample_dag.py example. "username": "user", "password": "pass", "database": "db_name", @@ -45,18 +34,28 @@ def ingest_from_mysql(): } ) pipeline.run() + pipeline.pretty_print_summary() pipeline.raise_from_status() with DAG( "datahub_mysql_ingest", - default_args=default_args, + default_args={ + "owner": "airflow", + }, description="An example DAG which ingests metadata from MySQL to DataHub", + start_date=datetime(2022, 1, 1), schedule_interval=timedelta(days=1), - start_date=days_ago(2), catchup=False, ) as dag: - ingest_task = PythonOperator( + # While it is also possible to use the PythonOperator, we recommend using + # the PythonVirtualenvOperator to ensure that there are no dependency + # conflicts between DataHub and the rest of your Airflow environment. + ingest_task = PythonVirtualenvOperator( task_id="ingest_from_mysql", + requirements=[ + "acryl-datahub[mysql]", + ], + system_site_packages=False, python_callable=ingest_from_mysql, ) diff --git a/metadata-ingestion/src/datahub_provider/example_dags/snowflake_sample_dag.py b/metadata-ingestion/src/datahub_provider/example_dags/snowflake_sample_dag.py new file mode 100644 index 00000000000000..c107bb479262cd --- /dev/null +++ b/metadata-ingestion/src/datahub_provider/example_dags/snowflake_sample_dag.py @@ -0,0 +1,87 @@ +"""Snowflake DataHub Ingest DAG + +This example demonstrates how to ingest metadata from Snowflake into DataHub +from within an Airflow DAG. In contrast to the MySQL example, this DAG +pulls the DB connection configuration from Airflow's connection store. +""" + +from datetime import datetime, timedelta + +from airflow import DAG +from airflow.hooks.base import BaseHook +from airflow.operators.python import PythonVirtualenvOperator + + +def ingest_from_snowflake(snowflake_credentials, datahub_gms_server): + from datahub.ingestion.run.pipeline import Pipeline + + pipeline = Pipeline.create( + # This configuration is analogous to a recipe configuration. + { + "source": { + "type": "snowflake", + "config": { + **snowflake_credentials, + # Other Snowflake config can be added here. + "profiling": {"enabled": False}, + }, + }, + # Other ingestion features, like transformers, are also supported. + # "transformers": [ + # { + # "type": "simple_add_dataset_ownership", + # "config": { + # "owner_urns": [ + # "urn:li:corpuser:example", + # ] + # }, + # } + # ], + "sink": { + "type": "datahub-rest", + "config": {"server": datahub_gms_server}, + }, + } + ) + pipeline.run() + pipeline.pretty_print_summary() + pipeline.raise_from_status() + + +with DAG( + "datahub_snowflake_ingest", + default_args={ + "owner": "airflow", + }, + description="An example DAG which ingests metadata from Snowflake to DataHub", + start_date=datetime(2022, 1, 1), + schedule_interval=timedelta(days=1), + catchup=False, +) as dag: + # This example pulls credentials from Airflow's connection store. + # For this to work, you must have previously configured these connections in Airflow. + # See the Airflow docs for details: https://airflow.apache.org/docs/apache-airflow/stable/howto/connection.html + snowflake_conn = BaseHook.get_connection("snowflake_admin_default") + datahub_conn = BaseHook.get_connection("datahub_rest_default") + + # While it is also possible to use the PythonOperator, we recommend using + # the PythonVirtualenvOperator to ensure that there are no dependency + # conflicts between DataHub and the rest of your Airflow environment. + ingest_task = PythonVirtualenvOperator( + task_id="ingest_from_snowflake", + requirements=[ + "acryl-datahub[snowflake]", + ], + system_site_packages=False, + python_callable=ingest_from_snowflake, + op_kwargs={ + "snowflake_credentials": { + "username": snowflake_conn.login, + "password": snowflake_conn.password, + "account_id": snowflake_conn.extra_dejson["account"], + "warehouse": snowflake_conn.extra_dejson.get("warehouse"), + "role": snowflake_conn.extra_dejson.get("role"), + }, + "datahub_gms_server": datahub_conn.host, + }, + ) diff --git a/metadata-ingestion/src/datahub_provider/lineage/datahub.py b/metadata-ingestion/src/datahub_provider/lineage/datahub.py index fb3728aa53f471..009ce4bb29a97f 100644 --- a/metadata-ingestion/src/datahub_provider/lineage/datahub.py +++ b/metadata-ingestion/src/datahub_provider/lineage/datahub.py @@ -70,7 +70,7 @@ def send_lineage( operator: "BaseOperator", inlets: Optional[List] = None, # unused outlets: Optional[List] = None, # unused - context: Dict = None, + context: Optional[Dict] = None, ) -> None: config = get_lineage_config() if not config.enabled: diff --git a/metadata-ingestion/tests/integration/snowflake-beta/snowflake_beta_golden.json b/metadata-ingestion/tests/integration/snowflake-beta/snowflake_beta_golden.json index 69ce2d646c9939..68c572788cfdfc 100644 --- a/metadata-ingestion/tests/integration/snowflake-beta/snowflake_beta_golden.json +++ b/metadata-ingestion/tests/integration/snowflake-beta/snowflake_beta_golden.json @@ -5,7 +5,7 @@ "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"snowflake\", \"instance\": \"PROD\", \"database\": \"test_db\"}, \"name\": \"TEST_DB\", \"description\": \"Comment for TEST_DB\"}", + "value": "{\"customProperties\": {\"platform\": \"snowflake\", \"instance\": \"PROD\", \"database\": \"test_db\"}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/\", \"name\": \"TEST_DB\", \"description\": \"Comment for TEST_DB\"}", "contentType": "application/json" }, "systemMetadata": { @@ -61,7 +61,7 @@ "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"snowflake\", \"instance\": \"PROD\", \"database\": \"test_db\", \"schema\": \"test_schema\"}, \"name\": \"TEST_SCHEMA\", \"description\": \"comment for TEST_DB.TEST_SCHEMA\"}", + "value": "{\"customProperties\": {\"platform\": \"snowflake\", \"instance\": \"PROD\", \"database\": \"test_db\", \"schema\": \"test_schema\"}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/\", \"name\": \"TEST_SCHEMA\", \"description\": \"comment for TEST_DB.TEST_SCHEMA\"}", "contentType": "application/json" }, "systemMetadata": { @@ -159,7 +159,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"name\": \"TABLE_1\", \"qualifiedName\": \"test_db.test_schema.table_1\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_1/\", \"name\": \"TABLE_1\", \"qualifiedName\": \"test_db.test_schema.table_1\", \"description\": \"Comment for Table\", \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -243,7 +243,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"name\": \"TABLE_2\", \"qualifiedName\": \"test_db.test_schema.table_2\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_2/\", \"name\": \"TABLE_2\", \"qualifiedName\": \"test_db.test_schema.table_2\", \"description\": \"Comment for Table\", \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -327,7 +327,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"name\": \"TABLE_3\", \"qualifiedName\": \"test_db.test_schema.table_3\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_3/\", \"name\": \"TABLE_3\", \"qualifiedName\": \"test_db.test_schema.table_3\", \"description\": \"Comment for Table\", \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -411,7 +411,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"name\": \"TABLE_4\", \"qualifiedName\": \"test_db.test_schema.table_4\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_4/\", \"name\": \"TABLE_4\", \"qualifiedName\": \"test_db.test_schema.table_4\", \"description\": \"Comment for Table\", \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -495,7 +495,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"name\": \"TABLE_5\", \"qualifiedName\": \"test_db.test_schema.table_5\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_5/\", \"name\": \"TABLE_5\", \"qualifiedName\": \"test_db.test_schema.table_5\", \"description\": \"Comment for Table\", \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -579,7 +579,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"name\": \"TABLE_6\", \"qualifiedName\": \"test_db.test_schema.table_6\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_6/\", \"name\": \"TABLE_6\", \"qualifiedName\": \"test_db.test_schema.table_6\", \"description\": \"Comment for Table\", \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -663,7 +663,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"name\": \"TABLE_7\", \"qualifiedName\": \"test_db.test_schema.table_7\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_7/\", \"name\": \"TABLE_7\", \"qualifiedName\": \"test_db.test_schema.table_7\", \"description\": \"Comment for Table\", \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -747,7 +747,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"name\": \"TABLE_8\", \"qualifiedName\": \"test_db.test_schema.table_8\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_8/\", \"name\": \"TABLE_8\", \"qualifiedName\": \"test_db.test_schema.table_8\", \"description\": \"Comment for Table\", \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -831,7 +831,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"name\": \"TABLE_9\", \"qualifiedName\": \"test_db.test_schema.table_9\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_9/\", \"name\": \"TABLE_9\", \"qualifiedName\": \"test_db.test_schema.table_9\", \"description\": \"Comment for Table\", \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { @@ -915,7 +915,7 @@ "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { - "value": "{\"customProperties\": {}, \"name\": \"TABLE_10\", \"qualifiedName\": \"test_db.test_schema.table_10\", \"description\": \"Comment for Table\", \"tags\": []}", + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.snowflake.com/ap-south-1/abc12345/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_10/\", \"name\": \"TABLE_10\", \"qualifiedName\": \"test_db.test_schema.table_10\", \"description\": \"Comment for Table\", \"tags\": []}", "contentType": "application/json" }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/snowflake-beta/snowflake_privatelink_beta_golden.json b/metadata-ingestion/tests/integration/snowflake-beta/snowflake_privatelink_beta_golden.json new file mode 100644 index 00000000000000..542d07ebe6e3f3 --- /dev/null +++ b/metadata-ingestion/tests/integration/snowflake-beta/snowflake_privatelink_beta_golden.json @@ -0,0 +1,828 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "value": "{\"customProperties\": {\"platform\": \"snowflake\", \"instance\": \"PROD\", \"database\": \"test_db\"}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/\", \"name\": \"TEST_DB\", \"description\": \"Comment for TEST_DB\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "value": "{\"removed\": false}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "value": "{\"platform\": \"urn:li:dataPlatform:snowflake\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"Database\"]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "value": "{\"customProperties\": {\"platform\": \"snowflake\", \"instance\": \"PROD\", \"database\": \"test_db\", \"schema\": \"test_schema\"}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/\", \"name\": \"TEST_SCHEMA\", \"description\": \"comment for TEST_DB.TEST_SCHEMA\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "value": "{\"removed\": false}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "value": "{\"platform\": \"urn:li:dataPlatform:snowflake\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"Schema\"]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "value": "{\"container\": \"urn:li:container:5e359958be02ce647cd9ac196dbd4585\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "value": "{\"removed\": false}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "value": "{\"schemaName\": \"test_db.test_schema.table_1\", \"platform\": \"urn:li:dataPlatform:snowflake\", \"version\": 0, \"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"hash\": \"\", \"platformSchema\": {\"com.linkedin.schema.MySqlDDL\": {\"tableSchema\": \"\"}}, \"fields\": [{\"fieldPath\": \"col_1\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.NumberType\": {}}}, \"nativeDataType\": \"NUMBER(38,0)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_2\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_3\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_4\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_5\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_6\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_7\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_8\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_9\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_10\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_1/\", \"name\": \"TABLE_1\", \"qualifiedName\": \"test_db.test_schema.table_1\", \"description\": \"Comment for Table\", \"tags\": []}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "value": "{\"container\": \"urn:li:container:94c696a054bab40b73e640a7f82e3b1c\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"table\"]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "value": "{\"removed\": false}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "value": "{\"schemaName\": \"test_db.test_schema.table_2\", \"platform\": \"urn:li:dataPlatform:snowflake\", \"version\": 0, \"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"hash\": \"\", \"platformSchema\": {\"com.linkedin.schema.MySqlDDL\": {\"tableSchema\": \"\"}}, \"fields\": [{\"fieldPath\": \"col_1\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.NumberType\": {}}}, \"nativeDataType\": \"NUMBER(38,0)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_2\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_3\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_4\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_5\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_6\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_7\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_8\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_9\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_10\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_2/\", \"name\": \"TABLE_2\", \"qualifiedName\": \"test_db.test_schema.table_2\", \"description\": \"Comment for Table\", \"tags\": []}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "value": "{\"container\": \"urn:li:container:94c696a054bab40b73e640a7f82e3b1c\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"table\"]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "value": "{\"removed\": false}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "value": "{\"schemaName\": \"test_db.test_schema.table_3\", \"platform\": \"urn:li:dataPlatform:snowflake\", \"version\": 0, \"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"hash\": \"\", \"platformSchema\": {\"com.linkedin.schema.MySqlDDL\": {\"tableSchema\": \"\"}}, \"fields\": [{\"fieldPath\": \"col_1\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.NumberType\": {}}}, \"nativeDataType\": \"NUMBER(38,0)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_2\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_3\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_4\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_5\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_6\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_7\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_8\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_9\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_10\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_3/\", \"name\": \"TABLE_3\", \"qualifiedName\": \"test_db.test_schema.table_3\", \"description\": \"Comment for Table\", \"tags\": []}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "value": "{\"container\": \"urn:li:container:94c696a054bab40b73e640a7f82e3b1c\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"table\"]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "value": "{\"removed\": false}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "value": "{\"schemaName\": \"test_db.test_schema.table_4\", \"platform\": \"urn:li:dataPlatform:snowflake\", \"version\": 0, \"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"hash\": \"\", \"platformSchema\": {\"com.linkedin.schema.MySqlDDL\": {\"tableSchema\": \"\"}}, \"fields\": [{\"fieldPath\": \"col_1\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.NumberType\": {}}}, \"nativeDataType\": \"NUMBER(38,0)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_2\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_3\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_4\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_5\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_6\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_7\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_8\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_9\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_10\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_4/\", \"name\": \"TABLE_4\", \"qualifiedName\": \"test_db.test_schema.table_4\", \"description\": \"Comment for Table\", \"tags\": []}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "value": "{\"container\": \"urn:li:container:94c696a054bab40b73e640a7f82e3b1c\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"table\"]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "value": "{\"removed\": false}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "value": "{\"schemaName\": \"test_db.test_schema.table_5\", \"platform\": \"urn:li:dataPlatform:snowflake\", \"version\": 0, \"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"hash\": \"\", \"platformSchema\": {\"com.linkedin.schema.MySqlDDL\": {\"tableSchema\": \"\"}}, \"fields\": [{\"fieldPath\": \"col_1\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.NumberType\": {}}}, \"nativeDataType\": \"NUMBER(38,0)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_2\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_3\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_4\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_5\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_6\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_7\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_8\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_9\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_10\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_5/\", \"name\": \"TABLE_5\", \"qualifiedName\": \"test_db.test_schema.table_5\", \"description\": \"Comment for Table\", \"tags\": []}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "value": "{\"container\": \"urn:li:container:94c696a054bab40b73e640a7f82e3b1c\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"table\"]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "value": "{\"removed\": false}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "value": "{\"schemaName\": \"test_db.test_schema.table_6\", \"platform\": \"urn:li:dataPlatform:snowflake\", \"version\": 0, \"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"hash\": \"\", \"platformSchema\": {\"com.linkedin.schema.MySqlDDL\": {\"tableSchema\": \"\"}}, \"fields\": [{\"fieldPath\": \"col_1\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.NumberType\": {}}}, \"nativeDataType\": \"NUMBER(38,0)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_2\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_3\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_4\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_5\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_6\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_7\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_8\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_9\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_10\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_6/\", \"name\": \"TABLE_6\", \"qualifiedName\": \"test_db.test_schema.table_6\", \"description\": \"Comment for Table\", \"tags\": []}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "value": "{\"container\": \"urn:li:container:94c696a054bab40b73e640a7f82e3b1c\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"table\"]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "value": "{\"removed\": false}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "value": "{\"schemaName\": \"test_db.test_schema.table_7\", \"platform\": \"urn:li:dataPlatform:snowflake\", \"version\": 0, \"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"hash\": \"\", \"platformSchema\": {\"com.linkedin.schema.MySqlDDL\": {\"tableSchema\": \"\"}}, \"fields\": [{\"fieldPath\": \"col_1\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.NumberType\": {}}}, \"nativeDataType\": \"NUMBER(38,0)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_2\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_3\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_4\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_5\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_6\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_7\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_8\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_9\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_10\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_7/\", \"name\": \"TABLE_7\", \"qualifiedName\": \"test_db.test_schema.table_7\", \"description\": \"Comment for Table\", \"tags\": []}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "value": "{\"container\": \"urn:li:container:94c696a054bab40b73e640a7f82e3b1c\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"table\"]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "value": "{\"removed\": false}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "value": "{\"schemaName\": \"test_db.test_schema.table_8\", \"platform\": \"urn:li:dataPlatform:snowflake\", \"version\": 0, \"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"hash\": \"\", \"platformSchema\": {\"com.linkedin.schema.MySqlDDL\": {\"tableSchema\": \"\"}}, \"fields\": [{\"fieldPath\": \"col_1\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.NumberType\": {}}}, \"nativeDataType\": \"NUMBER(38,0)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_2\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_3\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_4\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_5\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_6\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_7\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_8\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_9\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_10\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_8/\", \"name\": \"TABLE_8\", \"qualifiedName\": \"test_db.test_schema.table_8\", \"description\": \"Comment for Table\", \"tags\": []}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "value": "{\"container\": \"urn:li:container:94c696a054bab40b73e640a7f82e3b1c\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"table\"]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "value": "{\"removed\": false}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "value": "{\"schemaName\": \"test_db.test_schema.table_9\", \"platform\": \"urn:li:dataPlatform:snowflake\", \"version\": 0, \"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"hash\": \"\", \"platformSchema\": {\"com.linkedin.schema.MySqlDDL\": {\"tableSchema\": \"\"}}, \"fields\": [{\"fieldPath\": \"col_1\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.NumberType\": {}}}, \"nativeDataType\": \"NUMBER(38,0)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_2\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_3\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_4\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_5\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_6\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_7\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_8\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_9\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_10\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_9/\", \"name\": \"TABLE_9\", \"qualifiedName\": \"test_db.test_schema.table_9\", \"description\": \"Comment for Table\", \"tags\": []}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "value": "{\"container\": \"urn:li:container:94c696a054bab40b73e640a7f82e3b1c\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"table\"]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "value": "{\"removed\": false}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "value": "{\"schemaName\": \"test_db.test_schema.table_10\", \"platform\": \"urn:li:dataPlatform:snowflake\", \"version\": 0, \"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"hash\": \"\", \"platformSchema\": {\"com.linkedin.schema.MySqlDDL\": {\"tableSchema\": \"\"}}, \"fields\": [{\"fieldPath\": \"col_1\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.NumberType\": {}}}, \"nativeDataType\": \"NUMBER(38,0)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_2\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_3\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_4\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_5\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_6\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_7\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_8\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_9\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}, {\"fieldPath\": \"col_10\", \"nullable\": false, \"description\": \"Comment for column\", \"type\": {\"type\": {\"com.linkedin.schema.StringType\": {}}}, \"nativeDataType\": \"VARCHAR(255)\", \"recursive\": false, \"isPartOfKey\": false}]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "value": "{\"customProperties\": {}, \"externalUrl\": \"https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_10/\", \"name\": \"TABLE_10\", \"qualifiedName\": \"test_db.test_schema.table_10\", \"description\": \"Comment for Table\", \"tags\": []}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "value": "{\"container\": \"urn:li:container:94c696a054bab40b73e640a7f82e3b1c\"}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"table\"]}", + "contentType": "application/json" + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-beta-2022_06_07-17_00_00" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/snowflake-beta/test_snowflake_beta.py b/metadata-ingestion/tests/integration/snowflake-beta/test_snowflake_beta.py index 00d0c94815fa7c..7945b7f1fa1522 100644 --- a/metadata-ingestion/tests/integration/snowflake-beta/test_snowflake_beta.py +++ b/metadata-ingestion/tests/integration/snowflake-beta/test_snowflake_beta.py @@ -55,7 +55,13 @@ def default_query_results(query): "CREATED": datetime(2021, 6, 8, 0, 0, 0, 0), "LAST_ALTERED": datetime(2021, 6, 8, 0, 0, 0, 0), "COMMENT": "comment for TEST_DB.TEST_SCHEMA", - } + }, + { + "SCHEMA_NAME": "TEST2_SCHEMA", + "CREATED": datetime(2021, 6, 8, 0, 0, 0, 0), + "LAST_ALTERED": datetime(2021, 6, 8, 0, 0, 0, 0), + "COMMENT": "comment for TEST_DB.TEST_SCHEMA", + }, ] elif query == SnowflakeQuery.tables_for_database("TEST_DB"): return [ @@ -335,11 +341,12 @@ def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph): source=SourceConfig( type="snowflake", config=SnowflakeV2Config( - account_id="ABC12345", + account_id="ABC12345.ap-south-1.aws", username="TST_USR", password="TST_PWD", include_views=False, - table_pattern=AllowDenyPattern(allow=["test_db.test_schema.*"]), + match_fully_qualified_names=True, + schema_pattern=AllowDenyPattern(allow=["test_db.test_schema"]), include_technical_schema=True, include_table_lineage=True, include_view_lineage=False, @@ -381,3 +388,61 @@ def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph): golden_path=golden_file, ignore_paths=[], ) + + +@freeze_time(FROZEN_TIME) +def test_snowflake_private_link(pytestconfig, tmp_path, mock_time, mock_datahub_graph): + test_resources_dir = pytestconfig.rootpath / "tests/integration/snowflake-beta" + + # Run the metadata ingestion pipeline. + output_file = tmp_path / "snowflake_privatelink_test_events.json" + golden_file = test_resources_dir / "snowflake_privatelink_beta_golden.json" + + with mock.patch("snowflake.connector.connect") as mock_connect: + sf_connection = mock.MagicMock() + sf_cursor = mock.MagicMock() + mock_connect.return_value = sf_connection + sf_connection.cursor.return_value = sf_cursor + sf_cursor.execute.side_effect = default_query_results + + pipeline = Pipeline( + config=PipelineConfig( + run_id="snowflake-beta-2022_06_07-17_00_00", + source=SourceConfig( + type="snowflake", + config=SnowflakeV2Config( + account_id="ABC12345.ap-south-1.privatelink", + username="TST_USR", + password="TST_PWD", + include_views=False, + schema_pattern=AllowDenyPattern(allow=["test_schema"]), + include_technical_schema=True, + include_table_lineage=False, + include_view_lineage=False, + include_usage_stats=False, + include_operational_stats=False, + start_time=datetime(2022, 6, 6, 7, 17, 0, 0).replace( + tzinfo=timezone.utc + ), + end_time=datetime(2022, 6, 7, 7, 17, 0, 0).replace( + tzinfo=timezone.utc + ), + ), + ), + sink=DynamicTypedConfig( + type="file", config={"filename": str(output_file)} + ), + ) + ) + pipeline.run() + pipeline.pretty_print_summary() + pipeline.raise_from_status() + + # Verify the output. + + mce_helpers.check_golden_file( + pytestconfig, + output_path=output_file, + golden_path=golden_file, + ignore_paths=[], + ) diff --git a/metadata-ingestion/tests/test_helpers/docker_helpers.py b/metadata-ingestion/tests/test_helpers/docker_helpers.py index 7ba82b25a62008..0cbae4b2db104d 100644 --- a/metadata-ingestion/tests/test_helpers/docker_helpers.py +++ b/metadata-ingestion/tests/test_helpers/docker_helpers.py @@ -24,7 +24,7 @@ def wait_for_port( docker_services: pytest_docker.plugin.Services, container_name: str, container_port: int, - hostname: str = None, + hostname: Optional[str] = None, timeout: float = 30.0, pause: float = 0.5, checker: Optional[Callable[[], bool]] = None, diff --git a/metadata-ingestion/tests/unit/test_airflow.py b/metadata-ingestion/tests/unit/test_airflow.py index 82e76f43b00c72..4d1b737e985fbe 100644 --- a/metadata-ingestion/tests/unit/test_airflow.py +++ b/metadata-ingestion/tests/unit/test_airflow.py @@ -75,21 +75,18 @@ def test_airflow_provider_info(): assert get_provider_info() -@pytest.mark.skipif( - AIRFLOW_VERSION < packaging.version.parse("2.0.0"), - reason="the examples use list-style lineage, which is only supported on Airflow 2.x", -) -def test_dags_load_with_no_errors(pytestconfig): +def test_dags_load_with_no_errors(pytestconfig: pytest.Config) -> None: airflow_examples_folder = ( pytestconfig.rootpath / "src/datahub_provider/example_dags" ) + # Note: the .airflowignore file skips the snowflake DAG. dag_bag = DagBag(dag_folder=str(airflow_examples_folder), include_examples=False) import_errors = dag_bag.import_errors - assert import_errors == {} - assert len(dag_bag.dag_ids) > 0 + assert len(import_errors) == 0 + assert dag_bag.size() > 0 @contextmanager diff --git a/metadata-ingestion/tests/unit/test_sql_common.py b/metadata-ingestion/tests/unit/test_sql_common.py index 7b86b6ee6abcc4..f382705c70803a 100644 --- a/metadata-ingestion/tests/unit/test_sql_common.py +++ b/metadata-ingestion/tests/unit/test_sql_common.py @@ -4,7 +4,6 @@ import pytest from sqlalchemy.engine.reflection import Inspector -from datahub.ingestion.api.source import Source from datahub.ingestion.source.sql.sql_common import ( PipelineContext, SQLAlchemyConfig, @@ -19,9 +18,7 @@ def get_sql_alchemy_url(self): class _TestSQLAlchemySource(SQLAlchemySource): - @classmethod - def create(cls, config_dict: dict, ctx: PipelineContext) -> Source: - pass + pass def test_generate_foreign_key(): diff --git a/metadata-integration/java/datahub-client/build.gradle b/metadata-integration/java/datahub-client/build.gradle index a8afb1afc10a9d..ca97472db7d0b9 100644 --- a/metadata-integration/java/datahub-client/build.gradle +++ b/metadata-integration/java/datahub-client/build.gradle @@ -42,7 +42,8 @@ dependencies { implementation(externalDependency.kafkaAvroSerializer) { exclude group: "org.apache.avro" } - + + implementation externalDependency.slf4jApi compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok testCompile externalDependency.mockito diff --git a/metadata-integration/java/datahub-protobuf/build.gradle b/metadata-integration/java/datahub-protobuf/build.gradle index bbd18805292a66..7f3616840b9d8d 100644 --- a/metadata-integration/java/datahub-protobuf/build.gradle +++ b/metadata-integration/java/datahub-protobuf/build.gradle @@ -35,6 +35,7 @@ dependencies { implementation externalDependency.gson implementation externalDependency.commonsCli + implementation externalDependency.slf4jApi compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok testImplementation externalDependency.junitJupiterApi diff --git a/metadata-integration/java/spark-lineage/build.gradle b/metadata-integration/java/spark-lineage/build.gradle index ee4f02438d8f63..3b69772af635bc 100644 --- a/metadata-integration/java/spark-lineage/build.gradle +++ b/metadata-integration/java/spark-lineage/build.gradle @@ -47,6 +47,7 @@ dependencies { force = true } // required for org.apache.commons.io.Charsets that is used internally + implementation externalDependency.slf4jApi compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok @@ -109,6 +110,7 @@ shadowJar { exclude('module-info.class', 'META-INF/versions/**') relocate 'com.fasterxml.jackson', 'datahub.shaded.jackson' + relocate 'org.slf4j','datahub.shaded.org.slf4j' relocate 'org.apache.http','datahub.spark2.shaded.http' relocate 'org.apache.commons.codec', 'datahub.spark2.shaded.o.a.c.codec' relocate 'org.apache.commons.compress', 'datahub.spark2.shaded.o.a.c.compress' diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index 0ebee676567521..f8c27ad93bf974 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -19,6 +19,8 @@ dependencies { compile spec.product.pegasus.generator compile externalDependency.dgraph4j exclude group: 'com.google.guava', module: 'guava' + implementation externalDependency.slf4jApi + runtime externalDependency.logbackClassic compileOnly externalDependency.lombok implementation externalDependency.commonsCollections compile externalDependency.datastaxOssNativeProtocol diff --git a/metadata-jobs/mae-consumer/build.gradle b/metadata-jobs/mae-consumer/build.gradle index 19540b2a4cffcf..610fddde3c2b82 100644 --- a/metadata-jobs/mae-consumer/build.gradle +++ b/metadata-jobs/mae-consumer/build.gradle @@ -33,6 +33,7 @@ dependencies { compile externalDependency.springKafka compile externalDependency.springActuator + implementation externalDependency.slf4jApi compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok diff --git a/metadata-jobs/mce-consumer/build.gradle b/metadata-jobs/mce-consumer/build.gradle index 7423bb3c3807e8..730cd56d47bc56 100644 --- a/metadata-jobs/mce-consumer/build.gradle +++ b/metadata-jobs/mce-consumer/build.gradle @@ -30,6 +30,7 @@ dependencies { compile externalDependency.springKafka compile externalDependency.springActuator + implementation externalDependency.slf4jApi compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok diff --git a/metadata-jobs/pe-consumer/build.gradle b/metadata-jobs/pe-consumer/build.gradle index ff4f1e577a9934..38b451fafc2f9e 100644 --- a/metadata-jobs/pe-consumer/build.gradle +++ b/metadata-jobs/pe-consumer/build.gradle @@ -18,6 +18,7 @@ dependencies { } compile externalDependency.springKafka compile externalDependency.springActuator + implementation externalDependency.slf4jApi compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok runtime externalDependency.logbackClassic diff --git a/metadata-models-validator/build.gradle b/metadata-models-validator/build.gradle index 9a34255513d8a9..bd1ec9449fb194 100644 --- a/metadata-models-validator/build.gradle +++ b/metadata-models-validator/build.gradle @@ -9,5 +9,7 @@ dependencies { compile externalDependency.findbugsAnnotations compile externalDependency.guava + implementation externalDependency.slf4jApi + runtimeOnly externalDependency.logbackClassic compileOnly externalDependency.lombok } \ No newline at end of file diff --git a/metadata-service/auth-api/build.gradle b/metadata-service/auth-api/build.gradle index e0c563eb79cfde..74d55abd18ba74 100644 --- a/metadata-service/auth-api/build.gradle +++ b/metadata-service/auth-api/build.gradle @@ -3,6 +3,7 @@ apply plugin: 'java' dependencies { compile project(path: ':metadata-models') compile externalDependency.guava - compile externalDependency.lombok + implementation externalDependency.slf4jApi + compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok } \ No newline at end of file diff --git a/metadata-service/auth-filter/build.gradle b/metadata-service/auth-filter/build.gradle index d76297e71a7838..8de29b75e35a61 100644 --- a/metadata-service/auth-filter/build.gradle +++ b/metadata-service/auth-filter/build.gradle @@ -5,7 +5,8 @@ dependencies { compile project(path: ':metadata-service:factories') compile externalDependency.servletApi - compile externalDependency.lombok + implementation externalDependency.slf4jApi + compileOnly externalDependency.lombok compile externalDependency.springWeb annotationProcessor externalDependency.lombok diff --git a/metadata-service/auth-impl/build.gradle b/metadata-service/auth-impl/build.gradle index 76b18f314ede17..5116c668c7d903 100644 --- a/metadata-service/auth-impl/build.gradle +++ b/metadata-service/auth-impl/build.gradle @@ -14,7 +14,8 @@ dependencies { runtimeOnly 'io.jsonwebtoken:jjwt-impl:0.11.2', 'io.jsonwebtoken:jjwt-jackson:0.11.2' - compile externalDependency.lombok + implementation externalDependency.slf4jApi + compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok diff --git a/metadata-service/auth-ranger-impl/build.gradle b/metadata-service/auth-ranger-impl/build.gradle index 8d13106bc6657d..da4946d3edb2e3 100644 --- a/metadata-service/auth-ranger-impl/build.gradle +++ b/metadata-service/auth-ranger-impl/build.gradle @@ -28,7 +28,8 @@ dependencies { implementation 'org.apache.logging.log4j:log4j-1.2-api:2.17.1' implementation 'rome:rome:1.0' runtimeOnly externalDependency.jna - compile externalDependency.lombok + implementation externalDependency.slf4jApi + compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok testCompile externalDependency.mockito diff --git a/metadata-service/auth-servlet-impl/build.gradle b/metadata-service/auth-servlet-impl/build.gradle index 9ffaa4f6879174..bb258a800b00e9 100644 --- a/metadata-service/auth-servlet-impl/build.gradle +++ b/metadata-service/auth-servlet-impl/build.gradle @@ -10,7 +10,8 @@ dependencies { compile externalDependency.graphqlJava compile externalDependency.springBeans compile externalDependency.springContext - compile externalDependency.lombok + implementation externalDependency.slf4jApi + compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok } diff --git a/metadata-service/factories/build.gradle b/metadata-service/factories/build.gradle index 1a82f4d52f210f..7068f530d8e0bb 100644 --- a/metadata-service/factories/build.gradle +++ b/metadata-service/factories/build.gradle @@ -15,7 +15,7 @@ dependencies { compile externalDependency.gson compile externalDependency.kafkaClients compile externalDependency.kafkaAvroSerde - compile externalDependency.lombok + compileOnly externalDependency.lombok compile externalDependency.servletApi compile externalDependency.springBeans compile externalDependency.springBootAutoconfigure @@ -25,12 +25,14 @@ dependencies { compile externalDependency.springKafka compile externalDependency.springWeb compile project(':metadata-service:auth-ranger-impl') - + implementation externalDependency.awsPostgresIamAuth + implementation externalDependency.awsRds annotationProcessor externalDependency.lombok compile spec.product.pegasus.restliSpringBridge testImplementation externalDependency.springBootTest + testCompile externalDependency.mockito testCompile externalDependency.testng diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/LocalEbeanServerConfigFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/LocalEbeanServerConfigFactory.java index fa2e420e7e16ac..66d917b444e01c 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/LocalEbeanServerConfigFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/LocalEbeanServerConfigFactory.java @@ -6,6 +6,8 @@ import io.ebean.datasource.DataSourceConfig; import io.ebean.datasource.DataSourcePoolListener; import java.sql.Connection; +import java.util.HashMap; +import java.util.Map; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; @@ -51,6 +53,9 @@ public class LocalEbeanServerConfigFactory { @Value("${ebean.autoCreateDdl:false}") private Boolean ebeanAutoCreate; + @Value("${ebean.postgresUseIamAuth:false}") + private Boolean postgresUseIamAuth; + private DataSourcePoolListener getListenerToTrackCounts(String metricName) { final String counterName = "ebeans_connection_pool_size_" + metricName; return new DataSourcePoolListener() { @@ -79,6 +84,12 @@ private DataSourceConfig buildDataSourceConfig(String dataSourceUrl, String data dataSourceConfig.setLeakTimeMinutes(ebeanLeakTimeMinutes); dataSourceConfig.setWaitTimeoutMillis(ebeanWaitTimeoutMillis); dataSourceConfig.setListener(getListenerToTrackCounts(dataSourceType)); + // Adding IAM auth access for AWS Postgres + if (postgresUseIamAuth) { + Map custom = new HashMap<>(); + custom.put("wrapperPlugins", "iam"); + dataSourceConfig.setCustomProperties(custom); + } return dataSourceConfig; } diff --git a/metadata-service/factories/src/main/resources/application.yml b/metadata-service/factories/src/main/resources/application.yml index 85a1735d221e32..697c88b2c29918 100644 --- a/metadata-service/factories/src/main/resources/application.yml +++ b/metadata-service/factories/src/main/resources/application.yml @@ -110,6 +110,7 @@ ebean: leakTimeMinutes: ${EBEAN_LEAK_TIME_MINUTES:15} waitTimeoutMillis: ${EBEAN_WAIT_TIMEOUT_MILLIS:1000} autoCreateDdl: ${EBEAN_AUTOCREATE:false} + postgresUseIamAuth: ${EBEAN_POSTGRES_USE_AWS_IAM_AUTH:false} # Only required if entityService.impl is cassandra cassandra: diff --git a/metadata-service/graphql-servlet-impl/build.gradle b/metadata-service/graphql-servlet-impl/build.gradle index 041b484836b981..7c365bcdac26ef 100644 --- a/metadata-service/graphql-servlet-impl/build.gradle +++ b/metadata-service/graphql-servlet-impl/build.gradle @@ -11,7 +11,8 @@ dependencies { compile externalDependency.graphqlJava compile externalDependency.springBeans compile externalDependency.springContext - compile externalDependency.lombok + implementation externalDependency.slf4jApi + compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok } diff --git a/metadata-service/openapi-servlet/build.gradle b/metadata-service/openapi-servlet/build.gradle index 08f019bed70fb2..3b947fee082727 100644 --- a/metadata-service/openapi-servlet/build.gradle +++ b/metadata-service/openapi-servlet/build.gradle @@ -13,8 +13,10 @@ dependencies { compile externalDependency.springWebMVC compile externalDependency.springBeans compile externalDependency.springContext - compile externalDependency.lombok - compile externalDependency.antlr4 + implementation externalDependency.slf4jApi + compileOnly externalDependency.lombok + implementation externalDependency.antlr4Runtime + implementation externalDependency.antlr4 annotationProcessor externalDependency.lombok diff --git a/metadata-service/restli-client/build.gradle b/metadata-service/restli-client/build.gradle index 8f25cf7294bbac..7cc643a626c06c 100644 --- a/metadata-service/restli-client/build.gradle +++ b/metadata-service/restli-client/build.gradle @@ -7,7 +7,8 @@ dependencies { compile project(':metadata-events:mxe-schemas') compile project(':metadata-utils') - compile externalDependency.lombok + implementation externalDependency.slf4jApi + compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok compile spec.product.pegasus.restliClient diff --git a/metadata-service/restli-servlet-impl/build.gradle b/metadata-service/restli-servlet-impl/build.gradle index 5e95bc6c6450cd..08d3a1c2e30920 100644 --- a/metadata-service/restli-servlet-impl/build.gradle +++ b/metadata-service/restli-servlet-impl/build.gradle @@ -40,6 +40,8 @@ dependencies { compile project(':metadata-utils') compile project(':metadata-io') compile spec.product.pegasus.restliServer + implementation externalDependency.slf4jApi + // This is compile and not compileOnly because of restli compile externalDependency.lombok compile externalDependency.neo4jJavaDriver compile externalDependency.opentelemetryAnnotations @@ -61,7 +63,6 @@ dependencies { modelValidation project(path: ':metadata-models-validator') dataModel project(path: ':metadata-models', configuration: 'dataTemplate') dataModel project(path: ':li-utils', configuration: 'dataTemplate') - } task integrationTest(type: Test) { diff --git a/metadata-service/war/build.gradle b/metadata-service/war/build.gradle index 3a3fdcb66e6116..cdb2b67b423734 100644 --- a/metadata-service/war/build.gradle +++ b/metadata-service/war/build.gradle @@ -13,9 +13,9 @@ dependencies { runtime project(':metadata-jobs:mae-consumer') runtime project(':metadata-jobs:pe-consumer') + runtime externalDependency.logbackClassic runtime externalDependency.awsSecretsManagerJdbc runtime externalDependency.h2 - runtime externalDependency.logbackClassic runtime externalDependency.mariadbConnector runtime externalDependency.mysqlConnector runtime externalDependency.postgresql @@ -28,15 +28,15 @@ dependencies { } configurations { - jetty8 + jetty9 } dependencies { - jetty8 "org.eclipse.jetty:jetty-runner:9.4.46.v20220331" + jetty9 "org.eclipse.jetty:jetty-runner:9.4.46.v20220331" } task run(type: JavaExec, dependsOn: build) { main = "org.eclipse.jetty.runner.Runner" args = [war.archivePath] - classpath configurations.jetty8 + classpath configurations.jetty9 } diff --git a/metadata-utils/build.gradle b/metadata-utils/build.gradle index cb6593a1e581c3..3ab83a97d27dff 100644 --- a/metadata-utils/build.gradle +++ b/metadata-utils/build.gradle @@ -18,6 +18,7 @@ dependencies { compile project(':metadata-events:mxe-avro-1.7') compile project(':metadata-events:mxe-utils-avro-1.7') + implementation externalDependency.slf4jApi compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok