Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SavedQuery nodes #8798

Merged
merged 32 commits into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
bc52f61
Bump to dbt-semantic-interfaces 0.3.0b1
QMalcolm Oct 6, 2023
7f1dc68
Update import path of `WhereFilterParser` from `dbt-semantic-interfaces`
QMalcolm Oct 6, 2023
2546b6a
Create basic `SavedQuery` node type based on `SavedQuery` protocol fr…
QMalcolm Oct 9, 2023
8e75ff5
Add ability to add SavedQueries to the manifest
QMalcolm Oct 9, 2023
5ae7886
Define unparsed SavedQuery node
QMalcolm Oct 9, 2023
8444841
Begin parsing saved_query objects to manifest
QMalcolm Oct 9, 2023
6fc4ce1
Skip jinja rendering of `SavedQuery.where` property
QMalcolm Oct 9, 2023
1a01946
Begin propagating `SavedQueries` on the manifest to the semantic mani…
QMalcolm Oct 9, 2023
35134fd
Add tests for basic saved query parsing
QMalcolm Oct 9, 2023
6d16c05
Add custom pluralization handling of SavedQuery node type
QMalcolm Oct 10, 2023
048d085
Add a config subclass to SavedQuery node
QMalcolm Oct 10, 2023
1c4d6c2
Move the SavedQuery node to nodes.py
QMalcolm Oct 10, 2023
2f100d5
Add basic plumbing of saved query configs to projects
QMalcolm Oct 10, 2023
2417209
Add basic lookup utility for saved queries, SavedQueryLookup
QMalcolm Oct 10, 2023
4867e65
Handle disabled SavedQuery nodes in parsing and lookups
QMalcolm Oct 10, 2023
251e678
Add SavedQuery nodes to grouping process
QMalcolm Oct 10, 2023
ff63ab4
Plumb through saved query in a lot more places
QMalcolm Oct 10, 2023
6844cb7
Add `depends_on` to `SavedQuery` nodes and populate from `metrics` pr…
QMalcolm Oct 10, 2023
728a3cb
Add partial parsing support to SavedQuery nodes
QMalcolm Oct 10, 2023
3cb4468
Add `docs` support for SavedQuery descriptions
QMalcolm Oct 10, 2023
32c90fa
Support selctor methods for SavedQuery nodes
QMalcolm Oct 10, 2023
c237bde
Add `refs` property to SavedQuery node
QMalcolm Oct 10, 2023
496ef47
Support `saved_queries` when upgrading from manifests <= v10 (and reg…
QMalcolm Oct 10, 2023
df343cf
Add changie doc for saved query node support
QMalcolm Oct 10, 2023
d74f2c8
Pin to dbt-semantic-interfaces 0.3.0b1 for saved query work
QMalcolm Oct 11, 2023
d4fb6a2
Bump supported DSI version to 0.3.x
QMalcolm Oct 11, 2023
df687ea
Switch metric filters and saved query where to use ne WhereFilterInte…
QMalcolm Oct 11, 2023
40e7d94
Update schema yaml readers to create WhereFilterInterfaces
QMalcolm Oct 11, 2023
84c6f28
Expand metric filters and saved query where property to handle both s…
QMalcolm Oct 11, 2023
588068e
Update tests which were broken by where filter changes
QMalcolm Oct 11, 2023
091503a
Regeneate v11 manifest
QMalcolm Oct 11, 2023
5c337b5
Fixup: Update `SavedQueryLookup.perform_lookup` to operate on saved q…
QMalcolm Oct 11, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20231010-154217.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Add support semantic layer SavedQuery node type
time: 2023-10-10T15:42:17.796231-07:00
custom:
Author: QMalcolm
Issue: "8594"
5 changes: 5 additions & 0 deletions core/dbt/config/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,7 @@ def create_project(self, rendered: RenderComponents) -> "Project":
tests: Dict[str, Any]
metrics: Dict[str, Any]
semantic_models: Dict[str, Any]
saved_queries: Dict[str, Any]
exposures: Dict[str, Any]
vars_value: VarProvider
dbt_cloud: Dict[str, Any]
Expand All @@ -439,6 +440,7 @@ def create_project(self, rendered: RenderComponents) -> "Project":
tests = cfg.tests
metrics = cfg.metrics
semantic_models = cfg.semantic_models
saved_queries = cfg.saved_queries
exposures = cfg.exposures
if cfg.vars is None:
vars_dict: Dict[str, Any] = {}
Expand Down Expand Up @@ -498,6 +500,7 @@ def create_project(self, rendered: RenderComponents) -> "Project":
tests=tests,
metrics=metrics,
semantic_models=semantic_models,
saved_queries=saved_queries,
exposures=exposures,
vars=vars_value,
config_version=cfg.config_version,
Expand Down Expand Up @@ -606,6 +609,7 @@ class Project:
tests: Dict[str, Any]
metrics: Dict[str, Any]
semantic_models: Dict[str, Any]
saved_queries: Dict[str, Any]
exposures: Dict[str, Any]
vars: VarProvider
dbt_version: List[VersionSpecifier]
Expand Down Expand Up @@ -683,6 +687,7 @@ def to_project_config(self, with_packages=False):
"tests": self.tests,
"metrics": self.metrics,
"semantic-models": self.semantic_models,
"saved-queries": self.saved_queries,
"exposures": self.exposures,
"vars": self.vars.to_dict(),
"require-dbt-version": [v.to_version_string() for v in self.dbt_version],
Expand Down
2 changes: 2 additions & 0 deletions core/dbt/config/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ def from_parts(
tests=project.tests,
metrics=project.metrics,
semantic_models=project.semantic_models,
saved_queries=project.saved_queries,
exposures=project.exposures,
vars=project.vars,
config_version=project.config_version,
Expand Down Expand Up @@ -325,6 +326,7 @@ def get_resource_config_paths(self) -> Dict[str, PathSet]:
"tests": self._get_config_paths(self.tests),
"metrics": self._get_config_paths(self.metrics),
"semantic_models": self._get_config_paths(self.semantic_models),
"saved_queries": self._get_config_paths(self.saved_queries),
"exposures": self._get_config_paths(self.exposures),
}

Expand Down
4 changes: 4 additions & 0 deletions core/dbt/context/context_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ def get_config_dict(self, resource_type: NodeType) -> Dict[str, Any]:
model_configs = unrendered.get("metrics")
elif resource_type == NodeType.SemanticModel:
model_configs = unrendered.get("semantic_models")
elif resource_type == NodeType.SavedQuery:
model_configs = unrendered.get("saved_queries")
elif resource_type == NodeType.Exposure:
model_configs = unrendered.get("exposures")
else:
Expand Down Expand Up @@ -74,6 +76,8 @@ def get_config_dict(self, resource_type: NodeType) -> Dict[str, Any]:
model_configs = self.project.metrics
elif resource_type == NodeType.SemanticModel:
model_configs = self.project.semantic_models
elif resource_type == NodeType.SavedQuery:
model_configs = self.project.saved_queries
elif resource_type == NodeType.Exposure:
model_configs = self.project.exposures
else:
Expand Down
1 change: 1 addition & 0 deletions core/dbt/contracts/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ class SchemaSourceFile(BaseSourceFile):
# node patches contain models, seeds, snapshots, analyses
ndp: List[str] = field(default_factory=list)
semantic_models: List[str] = field(default_factory=list)
saved_queries: List[str] = field(default_factory=list)
# any macro patches in this file by macro unique_id.
mcp: Dict[str, str] = field(default_factory=dict)
# any source patches in this file. The entries are package, name pairs
Expand Down
100 changes: 100 additions & 0 deletions core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
ModelNode,
DeferRelation,
ResultNode,
SavedQuery,
SemanticModel,
SourceDefinition,
UnpatchedSourceDefinition,
Expand Down Expand Up @@ -299,6 +300,41 @@ def perform_lookup(self, unique_id: UniqueID, manifest: "Manifest") -> Metric:
return manifest.metrics[unique_id]


class SavedQueryLookup(dbtClassMixin):
"""Lookup utility for finding SavedQuery nodes"""

def __init__(self, manifest: "Manifest") -> None:
self.storage: Dict[str, Dict[PackageName, UniqueID]] = {}
self.populate(manifest)

def get_unique_id(self, search_name, package: Optional[PackageName]):
return find_unique_id_for_package(self.storage, search_name, package)

def find(self, search_name, package: Optional[PackageName], manifest: "Manifest"):
unique_id = self.get_unique_id(search_name, package)
if unique_id is not None:
return self.perform_lookup(unique_id, manifest)
return None

def add_saved_query(self, saved_query: SavedQuery):
if saved_query.search_name not in self.storage:
self.storage[saved_query.search_name] = {}

self.storage[saved_query.search_name][saved_query.package_name] = saved_query.unique_id

def populate(self, manifest):
for saved_query in manifest.saved_queries.values():
if hasattr(saved_query, "name"):
self.add_saved_query(saved_query)

def perform_lookup(self, unique_id: UniqueID, manifest: "Manifest") -> SavedQuery:
if unique_id not in manifest.metrics:
QMalcolm marked this conversation as resolved.
Show resolved Hide resolved
raise dbt.exceptions.DbtInternalError(
f"SavedQUery {unique_id} found in cache but not found in manifest"
)
return manifest.saved_queries[unique_id]


class SemanticModelByMeasureLookup(dbtClassMixin):
"""Lookup utility for finding SemanticModel by measure

Expand Down Expand Up @@ -609,6 +645,9 @@ class Disabled(Generic[D]):
MaybeMetricNode = Optional[Union[Metric, Disabled[Metric]]]


MaybeSavedQueryNode = Optional[Union[SavedQuery, Disabled[SavedQuery]]]


MaybeDocumentation = Optional[Documentation]


Expand Down Expand Up @@ -753,6 +792,7 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
disabled: MutableMapping[str, List[GraphMemberNode]] = field(default_factory=dict)
env_vars: MutableMapping[str, str] = field(default_factory=dict)
semantic_models: MutableMapping[str, SemanticModel] = field(default_factory=dict)
saved_queries: MutableMapping[str, SavedQuery] = field(default_factory=dict)

_doc_lookup: Optional[DocLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
Expand All @@ -766,6 +806,9 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin):
_metric_lookup: Optional[MetricLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
)
_saved_query_lookup: Optional[SavedQueryLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
)
_semantic_model_by_measure_lookup: Optional[SemanticModelByMeasureLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
)
Expand Down Expand Up @@ -810,6 +853,9 @@ def build_flat_graph(self):
"semantic_models": {
k: v.to_dict(omit_none=False) for k, v in self.semantic_models.items()
},
"saved_queries": {
k: v.to_dict(omit_none=False) for k, v in self.saved_queries.items()
},
}

def build_disabled_by_file_id(self):
Expand Down Expand Up @@ -871,6 +917,7 @@ def get_resource_fqns(self) -> Mapping[str, PathSet]:
self.sources.values(),
self.metrics.values(),
self.semantic_models.values(),
self.saved_queries.values(),
)
for resource in all_resources:
resource_type_plural = resource.resource_type.pluralize()
Expand Down Expand Up @@ -906,6 +953,7 @@ def deepcopy(self):
files={k: _deepcopy(v) for k, v in self.files.items()},
state_check=_deepcopy(self.state_check),
semantic_models={k: _deepcopy(v) for k, v in self.semantic_models.items()},
saved_queries={k: _deepcopy(v) for k, v in self.saved_queries.items()},
)
copy.build_flat_graph()
return copy
Expand All @@ -918,6 +966,7 @@ def build_parent_and_child_maps(self):
self.exposures.values(),
self.metrics.values(),
self.semantic_models.values(),
self.saved_queries.values(),
)
)
forward_edges, backward_edges = build_node_edges(edge_members)
Expand All @@ -938,6 +987,7 @@ def build_group_map(self):
groupable_nodes = list(
chain(
self.nodes.values(),
self.saved_queries.values(),
self.semantic_models.values(),
self.metrics.values(),
)
Expand Down Expand Up @@ -966,6 +1016,7 @@ def writable_manifest(self) -> "WritableManifest":
parent_map=self.parent_map,
group_map=self.group_map,
semantic_models=self.semantic_models,
saved_queries=self.saved_queries,
)

def write(self, path):
Expand All @@ -984,6 +1035,8 @@ def expect(self, unique_id: str) -> GraphMemberNode:
return self.metrics[unique_id]
elif unique_id in self.semantic_models:
return self.semantic_models[unique_id]
elif unique_id in self.saved_queries:
return self.saved_queries[unique_id]
else:
# something terrible has happened
raise dbt.exceptions.DbtInternalError(
Expand Down Expand Up @@ -1020,6 +1073,13 @@ def metric_lookup(self) -> MetricLookup:
self._metric_lookup = MetricLookup(self)
return self._metric_lookup

@property
def saved_query_lookup(self) -> SavedQueryLookup:
"""Retuns a SavedQueryLookup, instantiating it first if necessary."""
if self._saved_query_lookup is None:
self._saved_query_lookup = SavedQueryLookup(self)
return self._saved_query_lookup

@property
def semantic_model_by_measure_lookup(self) -> SemanticModelByMeasureLookup:
"""Gets (and creates if necessary) the lookup utility for getting SemanticModels by measures"""
Expand Down Expand Up @@ -1153,6 +1213,35 @@ def resolve_metric(
return Disabled(disabled[0])
return None

def resolve_saved_query(
self,
target_saved_query_name: str,
target_saved_query_package: Optional[str],
current_project: str,
node_package: str,
) -> MaybeSavedQueryNode:
"""Tries to find the SavedQuery by name within the available project and packages.

Will return the first enabled SavedQuery matching the name found while iterating over
the scoped packages. If no enabled SavedQuery node match is found, returns the last
disabled SavedQuery node. Otherwise it returns None.
"""
disabled: Optional[List[SavedQuery]] = None
candidates = _packages_to_search(current_project, node_package, target_saved_query_package)
for pkg in candidates:
saved_query = self.saved_query_lookup.find(target_saved_query_name, pkg, self)

if saved_query is not None and saved_query.config.enabled:
return saved_query

# it's possible that the node is disabled
if disabled is None:
disabled = self.disabled_lookup.find(f"{target_saved_query_name}", pkg)
if disabled:
return Disabled(disabled[0])

return None

def resolve_semantic_model_for_measure(
self,
target_measure_name: str,
Expand Down Expand Up @@ -1371,6 +1460,8 @@ def add_disabled(self, source_file: AnySourceFile, node: ResultNode, test_from=N
source_file.add_test(node.unique_id, test_from)
if isinstance(node, Metric):
source_file.metrics.append(node.unique_id)
if isinstance(node, SavedQuery):
source_file.saved_queries.append(node.unique_id)
if isinstance(node, SemanticModel):
source_file.semantic_models.append(node.unique_id)
if isinstance(node, Exposure):
Expand All @@ -1388,6 +1479,11 @@ def add_semantic_model(self, source_file: SchemaSourceFile, semantic_model: Sema
self.semantic_models[semantic_model.unique_id] = semantic_model
source_file.semantic_models.append(semantic_model.unique_id)

def add_saved_query(self, source_file: SchemaSourceFile, saved_query: SavedQuery) -> None:
_check_duplicates(saved_query, self.saved_queries)
self.saved_queries[saved_query.unique_id] = saved_query
source_file.saved_queries.append(saved_query.unique_id)

# end of methods formerly in ParseResult

# Provide support for copy.deepcopy() - we just need to avoid the lock!
Expand Down Expand Up @@ -1415,6 +1511,7 @@ def __reduce_ex__(self, protocol):
self.disabled,
self.env_vars,
self.semantic_models,
self.saved_queries,
self._doc_lookup,
self._source_lookup,
self._ref_lookup,
Expand Down Expand Up @@ -1485,6 +1582,9 @@ class WritableManifest(ArtifactMixin):
description="A mapping from group names to their nodes",
)
)
saved_queries: Mapping[UniqueID, SavedQuery] = field(
metadata=dict(description=("The saved queries defined in the dbt project"))
)
semantic_models: Mapping[UniqueID, SemanticModel] = field(
metadata=dict(description=("The semantic models defined in the dbt project"))
)
Expand Down
2 changes: 2 additions & 0 deletions core/dbt/contracts/graph/manifest_upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,6 @@ def upgrade_manifest_json(manifest: dict, manifest_schema_version: int) -> dict:
doc_content["resource_type"] = "doc"
if "semantic_models" not in manifest:
manifest["semantic_models"] = {}
if "saved_queries" not in manifest:
manifest["saved_queries"] = {}
return manifest
22 changes: 22 additions & 0 deletions core/dbt/contracts/graph/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,27 @@ class SemanticModelConfig(BaseConfig):
)


@dataclass
class SavedQueryConfig(BaseConfig):
"""Where config options for SavedQueries are stored.

This class is much like many other node config classes. It's likely that
this class will expand in the direction of what's in the `NodeAndTestConfig`
class. It might make sense to clean the various *Config classes into one at
some point.
"""

enabled: bool = True
group: Optional[str] = field(
default=None,
metadata=CompareBehavior.Exclude.meta(),
)
meta: Dict[str, Any] = field(
default_factory=dict,
metadata=MergeBehavior.Update.meta(),
)


@dataclass
class MetricConfig(BaseConfig):
enabled: bool = True
Expand Down Expand Up @@ -655,6 +676,7 @@ def finalize_and_validate(self):
RESOURCE_TYPES: Dict[NodeType, Type[BaseConfig]] = {
NodeType.Metric: MetricConfig,
NodeType.SemanticModel: SemanticModelConfig,
NodeType.SavedQuery: SavedQueryConfig,
NodeType.Exposure: ExposureConfig,
NodeType.Source: SourceConfig,
NodeType.Seed: SeedConfig,
Expand Down
Loading