Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/117 new algo module semantic #120

Merged
merged 10 commits into from
Jul 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ["3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v3
Expand Down
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@

Generate the ERD-as-a-code ([DBML](https://dbdiagram.io/d), [Mermaid](https://mermaid-js.github.io/mermaid-live-editor/), [PlantUML](https://plantuml.com/ie-diagram), [GraphViz](https://graphviz.org/), [D2](https://d2lang.com/)) from dbt artifact files (`dbt Core`) or from dbt metadata (`dbt Cloud`)

Entity Relationships are configurably detected by ([docs](https://dbterd.datnguyen.de/latest/nav/guide/cli-references.html#dbterd-run-algo-a)):

- [Test Relationships](https://docs.getdbt.com/reference/resource-properties/data-tests#relationships) (default)
- [Semantic Entities](https://docs.getdbt.com/docs/build/entities) (use `-a` option)

[![PyPI version](https://badge.fury.io/py/dbterd.svg)](https://pypi.org/project/dbterd/)
![python-cli](https://img.shields.io/badge/CLI-Python-FFCE3E?labelColor=14354C&logo=python&logoColor=white)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
[![python](https://img.shields.io/badge/Python-3.9|3.10|3.11-3776AB.svg?style=flat&logo=python&logoColor=white)](https://www.python.org)
[![python](https://img.shields.io/badge/Python-3.9|3.10|3.11|3.12-3776AB.svg?style=flat&logo=python&logoColor=white)](https://www.python.org)
[![codecov](https://codecov.io/gh/datnguye/dbterd/branch/main/graph/badge.svg?token=N7DMQBLH4P)](https://codecov.io/gh/datnguye/dbterd)

```bash
Expand Down
22 changes: 22 additions & 0 deletions dbterd/adapters/algos/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import click

from dbterd.adapters.filter import is_selected_table
from dbterd.adapters.meta import Column, Ref, Table
from dbterd.constants import (
DEFAULT_ALGO_RULE,
Expand Down Expand Up @@ -96,6 +97,27 @@ def get_tables(manifest: Manifest, catalog: Catalog, **kwargs) -> List[Table]:
return tables


def filter_tables_based_on_selection(tables: List[Table], **kwargs) -> List[Table]:
"""Filter list of tables based on the Selection Rules

Args:
tables (List[Table]): Parsed tables

Returns:
List[Table]: Filtered tables
"""
return [
table
for table in tables
if is_selected_table(
table=table,
select_rules=kwargs.get("select") or [],
resource_types=kwargs.get("resource_type", []),
exclude_rules=kwargs.get("exclude") or [],
)
]


def enrich_tables_from_relationships(
tables: List[Table], relationships: List[Ref]
) -> List[Table]:
Expand Down
189 changes: 189 additions & 0 deletions dbterd/adapters/algos/semantic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
from typing import List, Tuple, Union

from dbterd.adapters.algos import base
from dbterd.adapters.meta import Ref, SemanticEntity, Table
from dbterd.constants import TEST_META_RELATIONSHIP_TYPE
from dbterd.helpers.log import logger
from dbterd.types import Catalog, Manifest


def parse_metadata(data, **kwargs) -> Tuple[List[Table], List[Ref]]:
raise NotImplementedError() # pragma: no cover


def parse(
manifest: Manifest, catalog: Union[str, Catalog], **kwargs
) -> Tuple[List[Table], List[Ref]]:
# Parse metadata
if catalog == "metadata": # pragma: no cover
return parse_metadata(data=manifest, **kwargs)

# Parse Table
tables = base.get_tables(manifest=manifest, catalog=catalog, **kwargs)
tables = base.filter_tables_based_on_selection(tables=tables, **kwargs)

# Parse Ref
relationships = _get_relationships(manifest=manifest, **kwargs)
relationships = base.make_up_relationships(
relationships=relationships, tables=tables
)

# Fulfill columns in Tables (due to `select *`)
tables = base.enrich_tables_from_relationships(
tables=tables, relationships=relationships
)

logger.info(
f"Collected {len(tables)} table(s) and {len(relationships)} relationship(s)"
)
return (
sorted(tables, key=lambda tbl: tbl.node_name),
sorted(relationships, key=lambda rel: rel.name),
)


def find_related_nodes_by_id(
manifest: Union[Manifest, dict], node_unique_id: str, type: str = None, **kwargs
) -> List[str]:
"""Find FK/PK nodes which are linked to the given node

Args:
manifest (Union[Manifest, dict]): Manifest data
node_unique_id (str): Manifest model node unique id
type (str, optional): Manifest type (local file or metadata). Defaults to None.

Returns:
List[str]: Manifest nodes' unique ID
"""
found_nodes = [node_unique_id]
if type == "metadata": # pragma: no cover
return found_nodes # not supported yet, returned input only

entities = _get_linked_semantic_entities(manifest=manifest)
for foreign, primary in entities:
if primary.model == node_unique_id:
found_nodes.append(foreign.model)
if foreign.model == node_unique_id:
found_nodes.append(primary.model)

return list(set(found_nodes))


def _get_relationships(manifest: Manifest, **kwargs) -> List[Ref]:
"""_summary_

Args:
manifest (Manifest): Extract relationships from dbt artifacts based on Semantic Entities

Returns:
List[Ref]: List of parsed relationship
"""
entities = _get_linked_semantic_entities(manifest=manifest)
return base.get_unique_refs(
refs=[
Ref(
name=primary_entity.semantic_model,
table_map=(primary_entity.model, foreign_entity.model),
column_map=(
primary_entity.column_name,
foreign_entity.column_name,
),
type=primary_entity.relationship_type,
)
for foreign_entity, primary_entity in entities
]
)


def _get_linked_semantic_entities(
manifest: Manifest,
) -> List[Tuple[SemanticEntity, SemanticEntity]]:
"""Get filtered list of Semantic Entities which are linked

Args:
manifest (Manifest): Manifest data

Returns:
List[Tuple[SemanticEntity, SemanticEntity]]: List of (FK, PK) objects
"""
foreigns, primaries = _get_semantic_entities(manifest=manifest)
linked_entities = []
for foreign_entity in foreigns:
for primary_entity in primaries:
if foreign_entity.entity_name == primary_entity.entity_name:
linked_entities.append((foreign_entity, primary_entity))
return linked_entities


def _get_semantic_entities(
manifest: Manifest,
) -> Tuple[List[SemanticEntity], List[SemanticEntity]]:
"""Get all Semantic Entities

Args:
manifest (Manifest): Manifest data

Returns:
Tuple[List[SemanticEntity], List[SemanticEntity]]: FK list and PK list
"""
FK = "foreign"
PK = "primary"

semantic_entities = []
for x in _get_semantic_nodes(manifest=manifest):
semantic_node = manifest.semantic_models[x]
for e in semantic_node.entities:
if e.type.value in [PK, FK]:
semantic_entities.append(
SemanticEntity(
semantic_model=x,
model=semantic_node.depends_on.nodes[0],
entity_name=e.name,
entity_type=e.type.value,
column_name=e.expr or e.name,
relationship_type=semantic_node.config.meta.get(
TEST_META_RELATIONSHIP_TYPE, ""
),
)
)
if semantic_node.primary_entity:
semantic_entities.append(
SemanticEntity(
semantic_model=x,
model=semantic_node.depends_on.nodes[0],
entity_name=semantic_node.primary_entity,
entity_type=PK,
column_name=semantic_node.primary_entity,
relationship_type=semantic_node.config.meta.get(
TEST_META_RELATIONSHIP_TYPE, ""
),
)
)

return (
[x for x in semantic_entities if x.entity_type == FK],
[x for x in semantic_entities if x.entity_type == PK],
)


def _get_semantic_nodes(manifest: Manifest) -> List:
"""Extract the Semantic Models

Args:
manifest (Manifest): Manifest data

Returns:
List: List of Semantic Models
"""
if not hasattr(manifest, "semantic_models"):
logger.warning(
"No relationships will be captured"
"since dbt version is NOT supported for the Semantic Models"
)
return []

return [
x
for x in manifest.semantic_models
if len(manifest.semantic_models[x].depends_on.nodes)
]
30 changes: 2 additions & 28 deletions dbterd/adapters/algos/test_relationship.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from typing import List, Tuple, Union

from dbterd.adapters.algos import base
from dbterd.adapters.filter import is_selected_table
from dbterd.adapters.meta import Ref, Table
from dbterd.helpers.log import logger
from dbterd.types import Catalog, Manifest
Expand All @@ -22,18 +21,7 @@ def parse_metadata(data, **kwargs) -> Tuple[List[Table], List[Ref]]:

# Parse Table
tables = base.get_tables_from_metadata(data=data, **kwargs)

# Apply selection
tables = [
table
for table in tables
if is_selected_table(
table=table,
select_rules=kwargs.get("select") or [],
resource_types=kwargs.get("resource_type", []),
exclude_rules=kwargs.get("exclude") or [],
)
]
tables = base.filter_tables_based_on_selection(tables=tables, **kwargs)

# Parse Ref
relationships = base.get_relationships_from_metadata(data=data, **kwargs)
Expand Down Expand Up @@ -68,18 +56,7 @@ def parse(

# Parse Table
tables = base.get_tables(manifest=manifest, catalog=catalog, **kwargs)

# Apply selection
tables = [
table
for table in tables
if is_selected_table(
table=table,
select_rules=kwargs.get("select") or [],
resource_types=kwargs.get("resource_type", []),
exclude_rules=kwargs.get("exclude") or [],
)
]
tables = base.filter_tables_based_on_selection(tables=tables, **kwargs)

# Parse Ref
relationships = base.get_relationships(manifest=manifest, **kwargs)
Expand Down Expand Up @@ -113,9 +90,6 @@ def find_related_nodes_by_id(
node_unique_id (str): Manifest node unique ID
type (str, optional): Manifest type (local file or metadata). Defaults to None.

Raises:
click.BadParameter: Not Supported manifest type

Returns:
List[str]: Manifest nodes' unique ID
"""
Expand Down
12 changes: 12 additions & 0 deletions dbterd/adapters/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,18 @@ class Ref:
type: str = "n1"


@dataclass
class SemanticEntity:
"""Parsed Semantic Model's Entity object"""

semantic_model: str
model: str
entity_name: str
entity_type: str
column_name: str
relationship_type: str


class SelectionType(Enum):
START_WITH_NAME = ""
EXACT_NAME = "exact"
Expand Down
2 changes: 1 addition & 1 deletion dbterd/adapters/targets/mermaid.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def parse(manifest: Manifest, catalog: Catalog, **kwargs) -> str:
key_to = f'"{rel.table_map[0]}"'
reference_text = replace_column_name(rel.column_map[0])
if rel.column_map[0] != rel.column_map[1]:
reference_text += f"--{ replace_column_name(rel.column_map[1])}"
reference_text += f"--{replace_column_name(rel.column_map[1])}"
mermaid += f" {key_from.upper()} {get_rel_symbol(rel.type)} {key_to.upper()}: {reference_text}\n"

return mermaid
Expand Down
3 changes: 2 additions & 1 deletion dbterd/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import click

from dbterd import default
from dbterd.adapters.base import Executor
from dbterd.cli import params
from dbterd.helpers import jsonify
Expand Down Expand Up @@ -51,7 +52,7 @@ def invoke(self, args: List[str]):
@click.pass_context
def dbterd(ctx, **kwargs):
"""Tools for producing diagram-as-code"""
logger.info(f"Run with dbterd=={__version__}")
logger.info(f"Run with dbterd=={__version__} [{default.default_algo()}]")


# dbterd run
Expand Down
13 changes: 7 additions & 6 deletions dbterd/default.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,27 @@
import os
from pathlib import Path
from typing import List


def default_artifact_path() -> str:
return str(Path.cwd() / "target")
return os.environ.get("DBTERD_ARTIFACT_PATH", str(Path.cwd() / "target"))


def default_output_path() -> str:
return str(Path.cwd() / "target")
return os.environ.get("DBTERD_OUTPUT_PATH", str(Path.cwd() / "target"))


def default_target() -> str:
return "dbml"
return os.environ.get("DBTERD_TARGET", "dbml")


def default_algo() -> str:
return "test_relationship"
return os.environ.get("DBTERD_ALGO", "test_relationship")


def default_resource_types() -> List[str]:
return ["model"]
return os.environ.get("DBTERD_RESOURCE_TYPES", ["model"])


def default_entity_name_format() -> str:
return "resource.package.model"
return os.environ.get("DBTERD_ENTITY_NAME_FORMAT", "resource.package.model")
Loading