From b58021ab0a542520aaac6d991dd7b547f1499416 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Tue, 20 Dec 2022 09:22:03 -0800 Subject: [PATCH] feat(ui): Adding DBT Cloud support for UI ingestion (#6804) --- .../source/builder/RecipeForm/constants.ts | 39 ++- .../source/builder/RecipeForm/dbt_cloud.tsx | 306 ++++++++++++++++++ .../app/ingest/source/builder/constants.ts | 3 + .../app/ingest/source/builder/sources.json | 7 + 4 files changed, 354 insertions(+), 1 deletion(-) create mode 100644 datahub-web-react/src/app/ingest/source/builder/RecipeForm/dbt_cloud.tsx diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts index 1b0963f1f9db2..e4609c0586d26 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts @@ -83,7 +83,7 @@ import { PROJECT_NAME, } from './lookml'; import { PRESTO, PRESTO_HOST_PORT, PRESTO_DATABASE, PRESTO_USERNAME, PRESTO_PASSWORD } from './presto'; -import { BIGQUERY_BETA, MYSQL, UNITY_CATALOG } from '../constants'; +import { BIGQUERY_BETA, DBT_CLOUD, MYSQL, UNITY_CATALOG } from '../constants'; import { BIGQUERY_BETA_PROJECT_ID, DATASET_ALLOW, DATASET_DENY, PROJECT_ALLOW, PROJECT_DENY } from './bigqueryBeta'; import { MYSQL_HOST_PORT, MYSQL_PASSWORD, MYSQL_USERNAME } from './mysql'; import { MSSQL, MSSQL_DATABASE, MSSQL_HOST_PORT, MSSQL_PASSWORD, MSSQL_USERNAME } from './mssql'; @@ -100,6 +100,22 @@ import { UNITY_TABLE_DENY, WORKSPACE_URL, } from './unity_catalog'; +import { + DBT_CLOUD_ACCOUNT_ID, + DBT_CLOUD_JOB_ID, + DBT_CLOUD_PROJECT_ID, + INCLUDE_MODELS, + INCLUDE_SEEDS, + INCLUDE_SOURCES, + INCLUDE_TEST_DEFINITIONS, + INCLUDE_TEST_RESULTS, + EXTRACT_OWNERS as DBT_EXTRACT_OWNERS, + NODE_ALLOW, + NODE_DENY, + TARGET_PLATFORM, + TARGET_PLATFORM_INSTANCE, + DBT_CLOUD_TOKEN, +} from './dbt_cloud'; export enum RecipeSections { Connection = 0, @@ -364,6 +380,27 @@ export const RECIPE_FIELDS: RecipeFields = { advancedFields: [INCLUDE_TABLE_LINEAGE, INCLUDE_COLUMN_LINEAGE, STATEFUL_INGESTION_ENABLED], filterSectionTooltip: 'Include or exclude specific Metastores, Catalogs, Schemas, and Tables from ingestion.', }, + [DBT_CLOUD]: { + fields: [ + DBT_CLOUD_ACCOUNT_ID, + DBT_CLOUD_PROJECT_ID, + DBT_CLOUD_JOB_ID, + DBT_CLOUD_TOKEN, + TARGET_PLATFORM, + TARGET_PLATFORM_INSTANCE, + ], + filterFields: [NODE_ALLOW, NODE_DENY], + advancedFields: [ + INCLUDE_MODELS, + INCLUDE_SOURCES, + INCLUDE_SEEDS, + INCLUDE_TEST_DEFINITIONS, + INCLUDE_TEST_RESULTS, + DBT_EXTRACT_OWNERS, + STATEFUL_INGESTION_ENABLED, + ], + filterSectionTooltip: 'Include or exclude specific dbt Node (resources) from ingestion.', + }, }; export const CONNECTORS_WITH_FORM = new Set(Object.keys(RECIPE_FIELDS)); diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/dbt_cloud.tsx b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/dbt_cloud.tsx new file mode 100644 index 0000000000000..684b7f949a48c --- /dev/null +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/dbt_cloud.tsx @@ -0,0 +1,306 @@ +import React from 'react'; +import styled from 'styled-components'; +import { get } from 'lodash'; +import { RecipeField, FieldType, setFieldValueOnRecipe } from './common'; + +const TipSection = styled.div` + margin-bottom: 12px; +`; + +export const DBT_CLOUD = 'dbt-cloud'; + +export const DBT_CLOUD_TOKEN: RecipeField = { + name: 'token', + label: 'API Token', + tooltip: ( + + + A service account API token for extracting metadata from dbt Cloud APIs. This token must have the + privileges required to read metadata (e.g. Metadata Only permissions). + + + For more information about dbt service account tokens, check out the docs + here + + + ), + type: FieldType.SECRET, + fieldPath: 'source.config.token', + placeholder: 'dbts_ndg_m5oCuSRRC80tpx4ysYfN2tOreiHuATAu5VFcdrkIznQgl4VCOs6w==', + required: true, + rules: null, +}; + +export const DBT_CLOUD_ACCOUNT_ID: RecipeField = { + name: 'account_id', + label: 'Account ID', + tooltip: ( + + The ID of the dbt Cloud account to extract metadata for. + + This can be found in the URL of your dbt instance: https://cloud.getdbt.com/#/accounts/ACCOUNT_ID/. + + + ), + type: FieldType.TEXT, + fieldPath: 'source.config.account_id', + placeholder: '123', + required: true, + rules: null, +}; + +export const DBT_CLOUD_PROJECT_ID: RecipeField = { + name: 'project_id', + label: 'Project ID', + tooltip: ( + + The ID of the dbt Cloud project to extract metadata for. + + This can be found in the URL of your dbt instance: + https://cloud.getdbt.com/#/accounts/123/projects/PROJECT_ID. + + + ), + type: FieldType.TEXT, + fieldPath: 'source.config.project_id', + placeholder: '456', + required: true, + rules: null, +}; + +export const DBT_CLOUD_JOB_ID: RecipeField = { + name: 'job_id', + label: 'Job ID', + tooltip: ( + + + The ID of the dbt Cloud job to extract metadata for. Choose the job that serves as the primary mechanism + for updating your production data. + + The Job ID can be found in the URL on the Jobs tab of dbt Cloud. + + Ensure that your job enables documentation generation on each run by enabling 'Generate Docs' + on dbt Cloud. + + + ), + type: FieldType.TEXT, + fieldPath: 'source.config.job_id', + placeholder: '789', + required: true, + rules: null, +}; + +const includeModelsPath = 'source.config.entities_enabled.models'; +export const INCLUDE_MODELS: RecipeField = { + name: 'entities_enabled.models', + label: 'Include Models', + tooltip: 'Whether to include extraction of Models or not.', + type: FieldType.BOOLEAN, + fieldPath: includeModelsPath, + required: false, + rules: null, + getValueFromRecipeOverride: (recipe: any) => { + const includeModels = get(recipe, includeModelsPath); + if (!includeModels || includeModels === 'YES') { + return true; + } + return false; + }, + setValueOnRecipeOverride: (recipe: any, value: boolean) => { + const includeModels = value === true ? 'YES' : 'NO'; + return setFieldValueOnRecipe(recipe, includeModels, includeModelsPath); + }, +}; + +const includeSourcesPath = 'source.config.entities_enabled.sources'; +export const INCLUDE_SOURCES: RecipeField = { + name: 'entities_enabled.sources', + label: 'Include Sources', + tooltip: 'Whether to include extraction of Sources or not.', + type: FieldType.BOOLEAN, + fieldPath: includeSourcesPath, + required: false, + rules: null, + getValueFromRecipeOverride: (recipe: any) => { + const includeSources = get(recipe, includeSourcesPath); + if (includeSources === 'YES' || includeSources === undefined || includeSources === null) { + return true; + } + return false; + }, + setValueOnRecipeOverride: (recipe: any, value: boolean) => { + const includeSources = value === true ? 'YES' : 'NO'; + return setFieldValueOnRecipe(recipe, includeSources, includeSourcesPath); + }, +}; + +const includeSeedsPath = 'source.config.entities_enabled.seeds'; +export const INCLUDE_SEEDS: RecipeField = { + name: 'entities_enabled.seeds', + label: 'Include Seeds', + tooltip: 'Whether to include extraction of Seeds or not.', + type: FieldType.BOOLEAN, + fieldPath: includeSeedsPath, + required: false, + rules: null, + getValueFromRecipeOverride: (recipe: any) => { + const includeSeeds = get(recipe, includeSeedsPath); + if (includeSeeds === 'YES' || includeSeeds === undefined || includeSeeds === null) { + return true; + } + return false; + }, + setValueOnRecipeOverride: (recipe: any, value: boolean) => { + const includeSeeds = value === true ? 'YES' : 'NO'; + return setFieldValueOnRecipe(recipe, includeSeeds, includeSourcesPath); + }, +}; + +const includeTestDefinitionsPath = 'source.config.entities_enabled.test_definitions'; +export const INCLUDE_TEST_DEFINITIONS: RecipeField = { + name: 'entities_enabled.test_definitions', + label: 'Include Test Definitions', + tooltip: 'Whether to include extraction of Test Definitions or not.', + type: FieldType.BOOLEAN, + fieldPath: includeTestDefinitionsPath, + required: false, + rules: null, + getValueFromRecipeOverride: (recipe: any) => { + const includeTestDefinitions = get(recipe, includeTestDefinitionsPath); + if ( + includeTestDefinitions === 'YES' || + includeTestDefinitions === undefined || + includeTestDefinitions === null + ) { + return true; + } + return false; + }, + setValueOnRecipeOverride: (recipe: any, value: boolean) => { + const includeTestDefinitions = value === true ? 'YES' : 'NO'; + return setFieldValueOnRecipe(recipe, includeTestDefinitions, includeTestDefinitionsPath); + }, +}; + +const includeTestResultsPath = 'source.config.entities_enabled.test_results'; +export const INCLUDE_TEST_RESULTS: RecipeField = { + name: 'entities_enabled.test_results', + label: 'Include Test Results', + tooltip: 'Whether to include extraction of Test Results or not.', + type: FieldType.BOOLEAN, + fieldPath: includeTestResultsPath, + required: false, + rules: null, + getValueFromRecipeOverride: (recipe: any) => { + const includeTestResults = get(recipe, includeTestResultsPath); + if (includeTestResults === 'YES' || includeTestResults === undefined || includeTestResults === null) { + return true; + } + return false; + }, + setValueOnRecipeOverride: (recipe: any, value: boolean) => { + const includeTestResults = value === true ? 'YES' : 'NO'; + return setFieldValueOnRecipe(recipe, includeTestResults, includeTestResultsPath); + }, +}; + +const nodeAllowFieldPath = 'source.config.node_name_pattern.allow'; +export const NODE_ALLOW: RecipeField = { + name: 'node_name_pattern.allow', + label: 'Allow Patterns', + tooltip: + 'Only include specific dbt Nodes (resources) by providing their name, or a Regular Expression (REGEX). If not provided, all Nodes will be included.', + placeholder: 'model_name', + type: FieldType.LIST, + buttonLabel: 'Add pattern', + fieldPath: nodeAllowFieldPath, + rules: null, + section: 'Nodes', +}; + +const nodeDenyFieldPath = 'source.config.node_name_pattern.deny'; +export const NODE_DENY: RecipeField = { + name: 'node_name_pattern.deny', + label: 'Deny Patterns', + tooltip: + 'Exclude specific dbt Nodes (Resources) by providing their name, or a Regular Expression (REGEX). If not provided, all Nodes will be included. Deny patterns always take precedence over Allow patterns.', + placeholder: 'node_name', + type: FieldType.LIST, + buttonLabel: 'Add pattern', + fieldPath: nodeDenyFieldPath, + rules: null, + section: 'Nodes', +}; + +export const METADATA_ENDPOINT: RecipeField = { + name: 'metadata_endpoint', + label: 'Custom Metadata Endpoint URL', + tooltip: + 'A custom URL used for extracting Metadata. By default, this metadata is extracted from https://metadata.cloud.getdbt.com/graphql. In most cases, users should NOT need to provide this value.', + placeholder: 'https://metadata.cloud.getdbt.com/graphql', + type: FieldType.TEXT, + fieldPath: 'source.config.metadata_endpoint', + rules: null, +}; + +const extractOwnersPath = 'source.config.enable_owner_extraction'; +export const EXTRACT_OWNERS: RecipeField = { + name: 'extract_owners', + label: 'Extract Owners', + tooltip: + 'Try to extract owners from dbt meta properties. Be careful: This can override Owners added by users of DataHub.', + type: FieldType.BOOLEAN, + fieldPath: 'source.config.enable_owner_extraction', + rules: null, + getValueFromRecipeOverride: (recipe: any) => { + const extractOwners = get(recipe, extractOwnersPath); + if (extractOwners !== undefined && extractOwners !== null) { + return extractOwners; + } + return true; + }, +}; + +export const TARGET_PLATFORM: RecipeField = { + name: 'target_platform', + label: 'Data Platform (Connection Type)', + tooltip: 'The type of Data Platform that dbt is connected to.', + placeholder: 'Select a Data Platform Type...', + type: FieldType.SELECT, + options: [ + { label: 'Snowflake', value: 'snowflake' }, + { label: 'BigQuery', value: 'bigquery' }, + { label: 'Redshift', value: 'redshift' }, + { label: 'Postgres', value: 'postgres' }, + { label: 'Trino (Starburst)', value: 'trino' }, + { label: 'Databricks', value: 'databricks' }, + ], + fieldPath: 'source.config.target_platform', + required: true, + rules: null, +}; + +export const TARGET_PLATFORM_INSTANCE: RecipeField = { + name: 'target_platform_instance', + label: 'Data Platform Instance', + tooltip: ( + + + The DataHub Platform Instance identifier that should be used for the assets extracted from dbt. + + + This is used to correctly connect the metadata extracted from the Data Platform with that extracted from + dbt Cloud. + + + Leave this blank if you have not configured a Data Platform Instance when ingesting from the associated + Data Platform. + + + ), + placeholder: 'redshift_instance_2', + type: FieldType.TEXT, + fieldPath: 'source.config.target_platform_instance', + rules: null, +}; diff --git a/datahub-web-react/src/app/ingest/source/builder/constants.ts b/datahub-web-react/src/app/ingest/source/builder/constants.ts index ca925aa3a7821..ea11e48f43964 100644 --- a/datahub-web-react/src/app/ingest/source/builder/constants.ts +++ b/datahub-web-react/src/app/ingest/source/builder/constants.ts @@ -94,6 +94,8 @@ export const CUSTOM = 'custom'; export const CUSTOM_URN = `urn:li:dataPlatform:${CUSTOM}`; export const UNITY_CATALOG = 'unity-catalog'; export const UNITY_CATALOG_URN = `urn:li:dataPlatform:${UNITY_CATALOG}`; +export const DBT_CLOUD = 'dbt-cloud'; +export const DBT_CLOUD_URN = `urn:li:dataPlatform:dbt`; export const PLATFORM_URN_TO_LOGO = { [ATHENA_URN]: athenaLogo, @@ -136,4 +138,5 @@ export const SOURCE_TO_PLATFORM_URN = { [SNOWFLAKE_BETA]: SNOWFLAKE_URN, [SNOWFLAKE_USAGE]: SNOWFLAKE_URN, [STARBURST_TRINO_USAGE]: TRINO_URN, + [DBT_CLOUD]: DBT_URN, }; diff --git a/datahub-web-react/src/app/ingest/source/builder/sources.json b/datahub-web-react/src/app/ingest/source/builder/sources.json index bb0a796e9af5b..835adc193400c 100644 --- a/datahub-web-react/src/app/ingest/source/builder/sources.json +++ b/datahub-web-react/src/app/ingest/source/builder/sources.json @@ -48,6 +48,13 @@ "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/tableau/", "recipe": "source:\n type: tableau\n config:\n # Coordinates\n connect_uri: null\n stateful_ingestion:\n enabled: true" }, + { + "urn": "urn:li:dataPlatform:dbt", + "name": "dbt-cloud", + "displayName": "dbt Cloud", + "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/dbt/#module-dbt-cloud", + "recipe": "source:\n type: dbt-cloud\n config:\n account_id: null\n project_id: null\n job_id: null\n target_platform: null\n stateful_ingestion:\n enabled: true" + }, { "urn": "urn:li:dataPlatform:mysql", "name": "mysql",