Skip to content

Commit

Permalink
fix(ingest): allow hiding some fields from the schema (#6077)
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored Oct 11, 2022
1 parent 128e3a8 commit 0427122
Show file tree
Hide file tree
Showing 9 changed files with 29 additions and 15 deletions.
10 changes: 5 additions & 5 deletions metadata-ingestion/docs/sources/bigquery/bigquery-beta_pre.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,11 @@ Use `profiling.bigquery_temp_table_schema` to restrict to one specific dataset t

```yml
credential:
project_id: project-id-1234567
private_key_id: "d0121d0000882411234e11166c6aaa23ed5d74e0"
private_key: "-----BEGIN PRIVATE KEY-----\nMIIyourkey\n-----END PRIVATE KEY-----\n"
client_email: "[email protected]"
client_id: "123456678890"
project_id: project-id-1234567
private_key_id: "d0121d0000882411234e11166c6aaa23ed5d74e0"
private_key: "-----BEGIN PRIVATE KEY-----\nMIIyourkey\n-----END PRIVATE KEY-----\n"
client_email: "[email protected]"
client_id: "123456678890"
```
### Lineage Computation Details
Expand Down
14 changes: 13 additions & 1 deletion metadata-ingestion/src/datahub/configuration/common.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re
from abc import ABC, abstractmethod
from enum import auto
from typing import IO, Any, ClassVar, Dict, List, Optional, Pattern, cast
from typing import IO, Any, ClassVar, Dict, List, Optional, Pattern, Type, cast

from cached_property import cached_property
from pydantic import BaseModel, Extra
Expand All @@ -18,6 +18,18 @@ class Config:
cached_property,
) # needed to allow cached_property to work. See https://github.com/samuelcolvin/pydantic/issues/1241 for more info.

@staticmethod
def schema_extra(schema: Dict[str, Any], model: Type["ConfigModel"]) -> None:
# We use the custom "hidden_from_schema" attribute to hide fields from the
# autogenerated docs.
remove_fields = []
for key, prop in schema.get("properties", {}).items():
if prop.get("hidden_from_schema"):
remove_fields.append(key)

for key in remove_fields:
del schema["properties"][key]


class PermissiveConfigModel(ConfigModel):
# A permissive config model that allows extra fields.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class BigQueryV2Config(BigQueryConfig):
default=None,
description="[deprecated] Use project_id_pattern instead.",
)
storage_project_id: None = Field(default=None, exclude=True)
storage_project_id: None = Field(default=None, hidden_from_schema=True)

lineage_use_sql_parser: bool = Field(
default=False,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,9 @@ class ClickHouseConfig(
):
# defaults
host_port = Field(default="localhost:8123", description="ClickHouse host URL.")
scheme = Field(default="clickhouse", description="", exclude=True)
scheme = Field(default="clickhouse", description="", hidden_from_schema=True)
password: pydantic.SecretStr = Field(
default=pydantic.SecretStr(""), exclude=True, description="password"
default=pydantic.SecretStr(""), description="password"
)

secure: Optional[bool] = Field(default=None, description="")
Expand Down
4 changes: 2 additions & 2 deletions metadata-ingestion/src/datahub/ingestion/source/sql/hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,14 @@ def dbapi_get_columns_patched(self, connection, table_name, schema=None, **kw):

class HiveConfig(BasicSQLAlchemyConfig):
# defaults
scheme = Field(default="hive", exclude=True)
scheme = Field(default="hive", hidden_from_schema=True)

# Hive SQLAlchemy connector returns views as tables.
# See https://github.com/dropbox/PyHive/blob/b21c507a24ed2f2b0cf15b0b6abb1c43f31d3ee0/pyhive/sqlalchemy_hive.py#L270-L273.
# Disabling views helps us prevent this duplication.
include_views = Field(
default=False,
exclude=True,
hidden_from_schema=True,
description="Hive SQLAlchemy connector returns views as tables. See https://github.com/dropbox/PyHive/blob/b21c507a24ed2f2b0cf15b0b6abb1c43f31d3ee0/pyhive/sqlalchemy_hive.py#L270-L273. Disabling views helps us prevent this duplication.",
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
class SQLServerConfig(BasicSQLAlchemyConfig):
# defaults
host_port: str = Field(default="localhost:1433", description="MSSQL host URL.")
scheme: str = Field(default="mssql+pytds", description="", exclude=True)
scheme: str = Field(default="mssql+pytds", description="", hidden_from_schema=True)
use_odbc: bool = Field(
default=False,
description="See https://docs.sqlalchemy.org/en/14/dialects/mssql.html#module-sqlalchemy.dialects.mssql.pyodbc.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,9 @@ class PrestoOnHiveConfig(BasicSQLAlchemyConfig):
default="localhost:3306",
description="Host URL and port to connect to. Example: localhost:3306",
)
scheme: str = Field(default="mysql+pymysql", description="", exclude=True)
scheme: str = Field(
default="mysql+pymysql", description="", hidden_from_schema=True
)
metastore_db_name: Optional[str] = Field(
default=None,
description="Name of the Hive metastore's database (usually: metastore). For backward compatibility, if this field is not provided, the database field will be used. If both the 'database' and 'metastore_db_name' fields are set then the 'database' field will be used to filter the hive/presto/trino database",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ class RedshiftConfig(
scheme = Field(
default="redshift+psycopg2",
description="",
exclude=True,
hidden_from_schema=True,
)

default_schema: str = Field(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def _get_columns(self, connection, table_name, schema: str = None, **kw): # typ

class TrinoConfig(BasicSQLAlchemyConfig):
# defaults
scheme = Field(default="trino", description="", exclude=True)
scheme = Field(default="trino", description="", hidden_from_schema=True)

def get_identifier(self: BasicSQLAlchemyConfig, schema: str, table: str) -> str:
regular = f"{schema}.{table}"
Expand Down

0 comments on commit 0427122

Please sign in to comment.