diff --git a/metadata-ingestion/docs/sources/bigquery/bigquery-beta_pre.md b/metadata-ingestion/docs/sources/bigquery/bigquery-beta_pre.md index 5a73826f468cc9..6162a99a7a5220 100644 --- a/metadata-ingestion/docs/sources/bigquery/bigquery-beta_pre.md +++ b/metadata-ingestion/docs/sources/bigquery/bigquery-beta_pre.md @@ -83,11 +83,11 @@ Use `profiling.bigquery_temp_table_schema` to restrict to one specific dataset t ```yml credential: - project_id: project-id-1234567 - private_key_id: "d0121d0000882411234e11166c6aaa23ed5d74e0" - private_key: "-----BEGIN PRIVATE KEY-----\nMIIyourkey\n-----END PRIVATE KEY-----\n" - client_email: "test@suppproject-id-1234567.iam.gserviceaccount.com" - client_id: "123456678890" + project_id: project-id-1234567 + private_key_id: "d0121d0000882411234e11166c6aaa23ed5d74e0" + private_key: "-----BEGIN PRIVATE KEY-----\nMIIyourkey\n-----END PRIVATE KEY-----\n" + client_email: "test@suppproject-id-1234567.iam.gserviceaccount.com" + client_id: "123456678890" ``` ### Lineage Computation Details diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py index 6ce2a8e24fa507..2ae5514ce76e02 100644 --- a/metadata-ingestion/src/datahub/configuration/common.py +++ b/metadata-ingestion/src/datahub/configuration/common.py @@ -1,7 +1,7 @@ import re from abc import ABC, abstractmethod from enum import auto -from typing import IO, Any, ClassVar, Dict, List, Optional, Pattern, cast +from typing import IO, Any, ClassVar, Dict, List, Optional, Pattern, Type, cast from cached_property import cached_property from pydantic import BaseModel, Extra @@ -18,6 +18,18 @@ class Config: cached_property, ) # needed to allow cached_property to work. See https://github.com/samuelcolvin/pydantic/issues/1241 for more info. + @staticmethod + def schema_extra(schema: Dict[str, Any], model: Type["ConfigModel"]) -> None: + # We use the custom "hidden_from_schema" attribute to hide fields from the + # autogenerated docs. + remove_fields = [] + for key, prop in schema.get("properties", {}).items(): + if prop.get("hidden_from_schema"): + remove_fields.append(key) + + for key in remove_fields: + del schema["properties"][key] + class PermissiveConfigModel(ConfigModel): # A permissive config model that allows extra fields. diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py index 81107fa1316908..d9241772540590 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py @@ -66,7 +66,7 @@ class BigQueryV2Config(BigQueryConfig): default=None, description="[deprecated] Use project_id_pattern instead.", ) - storage_project_id: None = Field(default=None, exclude=True) + storage_project_id: None = Field(default=None, hidden_from_schema=True) lineage_use_sql_parser: bool = Field( default=False, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py index f2fa7b95d55c86..d17b297222ad48 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py @@ -120,9 +120,9 @@ class ClickHouseConfig( ): # defaults host_port = Field(default="localhost:8123", description="ClickHouse host URL.") - scheme = Field(default="clickhouse", description="", exclude=True) + scheme = Field(default="clickhouse", description="", hidden_from_schema=True) password: pydantic.SecretStr = Field( - default=pydantic.SecretStr(""), exclude=True, description="password" + default=pydantic.SecretStr(""), description="password" ) secure: Optional[bool] = Field(default=None, description="") diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py b/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py index 40bb9b07bb6743..632f96963735f4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py @@ -93,14 +93,14 @@ def dbapi_get_columns_patched(self, connection, table_name, schema=None, **kw): class HiveConfig(BasicSQLAlchemyConfig): # defaults - scheme = Field(default="hive", exclude=True) + scheme = Field(default="hive", hidden_from_schema=True) # Hive SQLAlchemy connector returns views as tables. # See https://github.com/dropbox/PyHive/blob/b21c507a24ed2f2b0cf15b0b6abb1c43f31d3ee0/pyhive/sqlalchemy_hive.py#L270-L273. # Disabling views helps us prevent this duplication. include_views = Field( default=False, - exclude=True, + hidden_from_schema=True, description="Hive SQLAlchemy connector returns views as tables. See https://github.com/dropbox/PyHive/blob/b21c507a24ed2f2b0cf15b0b6abb1c43f31d3ee0/pyhive/sqlalchemy_hive.py#L270-L273. Disabling views helps us prevent this duplication.", ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql.py index ca991c9c4d60c1..c490bb4738b188 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql.py @@ -34,7 +34,7 @@ class SQLServerConfig(BasicSQLAlchemyConfig): # defaults host_port: str = Field(default="localhost:1433", description="MSSQL host URL.") - scheme: str = Field(default="mssql+pytds", description="", exclude=True) + scheme: str = Field(default="mssql+pytds", description="", hidden_from_schema=True) use_odbc: bool = Field( default=False, description="See https://docs.sqlalchemy.org/en/14/dialects/mssql.html#module-sqlalchemy.dialects.mssql.pyodbc.", diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py b/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py index 874dc72ac2a00f..2350c7f915887c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py @@ -101,7 +101,9 @@ class PrestoOnHiveConfig(BasicSQLAlchemyConfig): default="localhost:3306", description="Host URL and port to connect to. Example: localhost:3306", ) - scheme: str = Field(default="mysql+pymysql", description="", exclude=True) + scheme: str = Field( + default="mysql+pymysql", description="", hidden_from_schema=True + ) metastore_db_name: Optional[str] = Field( default=None, description="Name of the Hive metastore's database (usually: metastore). For backward compatibility, if this field is not provided, the database field will be used. If both the 'database' and 'metastore_db_name' fields are set then the 'database' field will be used to filter the hive/presto/trino database", diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/sql/redshift.py index c9edeb15559465..5281c5471be938 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/redshift.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/redshift.py @@ -141,7 +141,7 @@ class RedshiftConfig( scheme = Field( default="redshift+psycopg2", description="", - exclude=True, + hidden_from_schema=True, ) default_schema: str = Field( diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py index 0b3103938fc9f2..5a64612022644e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py @@ -131,7 +131,7 @@ def _get_columns(self, connection, table_name, schema: str = None, **kw): # typ class TrinoConfig(BasicSQLAlchemyConfig): # defaults - scheme = Field(default="trino", description="", exclude=True) + scheme = Field(default="trino", description="", hidden_from_schema=True) def get_identifier(self: BasicSQLAlchemyConfig, schema: str, table: str) -> str: regular = f"{schema}.{table}"