-
Notifications
You must be signed in to change notification settings - Fork 1.7k
/
Copy pathbase.py
447 lines (381 loc) · 17.2 KB
/
base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
import abc
import itertools
import os
from typing import List, Dict, Any, Generic, Optional, TypeVar
from dbt.dataclass_schema import ValidationError
from dbt import utils
from dbt.clients.jinja import MacroGenerator
from dbt.context.providers import (
generate_parser_model_context,
generate_generate_name_macro_context,
)
from dbt.adapters.factory import get_adapter # noqa: F401
from dbt.clients.jinja import get_rendered
from dbt.config import Project, RuntimeConfig
from dbt.context.context_config import ContextConfig
from dbt.contracts.graph.manifest import Manifest
from dbt.contracts.graph.nodes import ManifestNode, BaseNode
from dbt.contracts.graph.unparsed import UnparsedNode, Docs
from dbt.exceptions import DbtInternalError, ConfigUpdateError, DictParseError, ParsingError
from dbt import hooks
from dbt.node_types import NodeType, ModelLanguage
from dbt.parser.search import FileBlock
# internally, the parser may store a less-restrictive type that will be
# transformed into the final type. But it will have to be derived from
# ParsedNode to be operable.
FinalValue = TypeVar("FinalValue", bound=BaseNode)
IntermediateValue = TypeVar("IntermediateValue", bound=BaseNode)
IntermediateNode = TypeVar("IntermediateNode", bound=Any)
FinalNode = TypeVar("FinalNode", bound=ManifestNode)
ConfiguredBlockType = TypeVar("ConfiguredBlockType", bound=FileBlock)
class BaseParser(Generic[FinalValue]):
def __init__(self, project: Project, manifest: Manifest) -> None:
self.project = project
self.manifest = manifest
@abc.abstractmethod
def parse_file(self, block: FileBlock) -> None:
pass
@abc.abstractproperty
def resource_type(self) -> NodeType:
pass
def generate_unique_id(self, resource_name: str, hash: Optional[str] = None) -> str:
"""Returns a unique identifier for a resource
An optional hash may be passed in to ensure uniqueness for edge cases"""
return ".".join(
filter(None, [self.resource_type, self.project.project_name, resource_name, hash])
)
class Parser(BaseParser[FinalValue], Generic[FinalValue]):
def __init__(
self,
project: Project,
manifest: Manifest,
root_project: RuntimeConfig,
) -> None:
super().__init__(project, manifest)
self.root_project = root_project
class RelationUpdate:
def __init__(self, config: RuntimeConfig, manifest: Manifest, component: str) -> None:
macro = manifest.find_generate_macro_by_name(
component=component,
root_project_name=config.project_name,
)
if macro is None:
raise DbtInternalError(f"No macro with name generate_{component}_name found")
root_context = generate_generate_name_macro_context(macro, config, manifest)
self.updater = MacroGenerator(macro, root_context)
self.component = component
def __call__(self, parsed_node: Any, config_dict: Dict[str, Any]) -> None:
override = config_dict.get(self.component)
new_value = self.updater(override, parsed_node)
if isinstance(new_value, str):
new_value = new_value.strip()
setattr(parsed_node, self.component, new_value)
class ConfiguredParser(
Parser[FinalNode],
Generic[ConfiguredBlockType, IntermediateNode, FinalNode],
):
def __init__(
self,
project: Project,
manifest: Manifest,
root_project: RuntimeConfig,
) -> None:
super().__init__(project, manifest, root_project)
self._update_node_database = RelationUpdate(
manifest=manifest, config=root_project, component="database"
)
self._update_node_schema = RelationUpdate(
manifest=manifest, config=root_project, component="schema"
)
self._update_node_alias = RelationUpdate(
manifest=manifest, config=root_project, component="alias"
)
@classmethod
@abc.abstractmethod
def get_compiled_path(cls, block: ConfiguredBlockType) -> str:
pass
@abc.abstractmethod
def parse_from_dict(self, dict, validate=True) -> IntermediateNode:
pass
@abc.abstractproperty
def resource_type(self) -> NodeType:
pass
@property
def default_schema(self):
return self.root_project.credentials.schema
@property
def default_database(self):
return self.root_project.credentials.database
def get_fqn_prefix(self, path: str) -> List[str]:
no_ext = os.path.splitext(path)[0]
fqn = [self.project.project_name]
fqn.extend(utils.split_path(no_ext)[:-1])
return fqn
def get_fqn(self, path: str, name: str) -> List[str]:
"""Get the FQN for the node. This impacts node selection and config
application.
"""
fqn = self.get_fqn_prefix(path)
fqn.append(name)
return fqn
def _mangle_hooks(self, config):
"""Given a config dict that may have `pre-hook`/`post-hook` keys,
convert it from the yucky maybe-a-string, maybe-a-dict to a dict.
"""
# Like most of parsing, this is a horrible hack :(
for key in hooks.ModelHookType:
if key in config:
config[key] = [hooks.get_hook_dict(h) for h in config[key]]
def _create_error_node(
self, name: str, path: str, original_file_path: str, raw_code: str, language: str = "sql"
) -> UnparsedNode:
"""If we hit an error before we've actually parsed a node, provide some
level of useful information by attaching this to the exception.
"""
# this is a bit silly, but build an UnparsedNode just for error
# message reasons
return UnparsedNode(
name=name,
resource_type=self.resource_type,
path=path,
original_file_path=original_file_path,
package_name=self.project.project_name,
raw_code=raw_code,
language=language,
)
def _create_parsetime_node(
self,
block: ConfiguredBlockType,
path: str,
config: ContextConfig,
fqn: List[str],
name=None,
**kwargs,
) -> IntermediateNode:
"""Create the node that will be passed in to the parser context for
"rendering". Some information may be partial, as it'll be updated by
config() and any ref()/source() calls discovered during rendering.
"""
if name is None:
name = block.name
if block.path.relative_path.endswith(".py"):
language = ModelLanguage.python
config.add_config_call({"materialized": "table"})
else:
# this is not ideal but we have a lot of tests to adjust if don't do it
language = ModelLanguage.sql
dct = {
"alias": name,
"schema": self.default_schema,
"database": self.default_database,
"fqn": fqn,
"name": name,
"resource_type": self.resource_type,
"path": path,
"original_file_path": block.path.original_file_path,
"package_name": self.project.project_name,
"raw_code": block.contents,
"language": language,
"unique_id": self.generate_unique_id(name),
"config": self.config_dict(config),
"checksum": block.file.checksum.to_dict(omit_none=True),
}
dct.update(kwargs)
try:
return self.parse_from_dict(dct, validate=True)
except ValidationError as exc:
# this is a bit silly, but build an UnparsedNode just for error
# message reasons
node = self._create_error_node(
name=block.name,
path=path,
original_file_path=block.path.original_file_path,
raw_code=block.contents,
)
raise DictParseError(exc, node=node)
def _context_for(self, parsed_node: IntermediateNode, config: ContextConfig) -> Dict[str, Any]:
return generate_parser_model_context(parsed_node, self.root_project, self.manifest, config)
def render_with_context(self, parsed_node: IntermediateNode, config: ContextConfig):
# Given the parsed node and a ContextConfig to use during parsing,
# render the node's sql with macro capture enabled.
# Note: this mutates the config object when config calls are rendered.
context = self._context_for(parsed_node, config)
# this goes through the process of rendering, but just throws away
# the rendered result. The "macro capture" is the point?
get_rendered(parsed_node.raw_code, context, parsed_node, capture_macros=True)
return context
# This is taking the original config for the node, converting it to a dict,
# updating the config with new config passed in, then re-creating the
# config from the dict in the node.
def update_parsed_node_config_dict(
self, parsed_node: IntermediateNode, config_dict: Dict[str, Any]
) -> None:
# Overwrite node config
final_config_dict = parsed_node.config.to_dict(omit_none=True)
final_config_dict.update({k.strip(): v for (k, v) in config_dict.items()})
# re-mangle hooks, in case we got new ones
self._mangle_hooks(final_config_dict)
parsed_node.config = parsed_node.config.from_dict(final_config_dict)
def update_parsed_node_relation_names(
self, parsed_node: IntermediateNode, config_dict: Dict[str, Any]
) -> None:
self._update_node_database(parsed_node, config_dict)
self._update_node_schema(parsed_node, config_dict)
self._update_node_alias(parsed_node, config_dict)
self._update_node_relation_name(parsed_node)
def update_parsed_node_config(
self,
parsed_node: IntermediateNode,
config: ContextConfig,
context=None,
patch_config_dict=None,
) -> None:
"""Given the ContextConfig used for parsing and the parsed node,
generate and set the true values to use, overriding the temporary parse
values set in _build_intermediate_parsed_node.
"""
# build_config_dict takes the config_call_dict in the ContextConfig object
# and calls calculate_node_config to combine dbt_project configs and
# config calls from SQL files, plus patch configs (from schema files)
config_dict = config.build_config_dict(patch_config_dict=patch_config_dict)
# Set tags on node provided in config blocks. Tags are additive, so even if
# config has been built before, we don't have to reset tags in the parsed_node.
model_tags = config_dict.get("tags", [])
for tag in model_tags:
if tag not in parsed_node.tags:
parsed_node.tags.append(tag)
# If we have meta in the config, copy to node level, for backwards
# compatibility with earlier node-only config.
if "meta" in config_dict and config_dict["meta"]:
parsed_node.meta = config_dict["meta"]
# If we have group in the config, copy to node level
if "group" in config_dict and config_dict["group"]:
parsed_node.group = config_dict["group"]
# If we have docs in the config, merge with the node level, for backwards
# compatibility with earlier node-only config.
if "docs" in config_dict and config_dict["docs"]:
# we set show at the value of the config if it is set, otherwize, inherit the value
docs_show = (
config_dict["docs"]["show"]
if "show" in config_dict["docs"]
else parsed_node.docs.show
)
if "node_color" in config_dict["docs"]:
parsed_node.docs = Docs(
show=docs_show, node_color=config_dict["docs"]["node_color"]
)
else:
parsed_node.docs = Docs(show=docs_show)
# If we have "contract" in the config, copy to node level, for backwards
# compatibility with earlier node-only config.
if config_dict.get("contract", False):
parsed_node.contract = True
parser_name = type(self).__name__
if parser_name == "ModelParser":
original_file_path = parsed_node.original_file_path
error_message = "\n `contract=true` can only be configured within `schema.yml` files\n NOT within a model file(ex: .sql, .py) or `dbt_project.yml`."
raise ParsingError(
f"Original File Path: ({original_file_path})\nConstraints must be defined in a `yml` schema configuration file like `schema.yml`.\nOnly the SQL table materialization is supported for constraints. \n`data_type` values must be defined for all columns and NOT be null or blank.{error_message}"
)
# unrendered_config is used to compare the original database/schema/alias
# values and to handle 'same_config' and 'same_contents' calls
parsed_node.unrendered_config = config.build_config_dict(
rendered=False, patch_config_dict=patch_config_dict
)
parsed_node.config_call_dict = config._config_call_dict
# do this once before we parse the node database/schema/alias, so
# parsed_node.config is what it would be if they did nothing
self.update_parsed_node_config_dict(parsed_node, config_dict)
# This updates the node database/schema/alias
self.update_parsed_node_relation_names(parsed_node, config_dict)
# tests don't have hooks
if parsed_node.resource_type == NodeType.Test:
return
# at this point, we've collected our hooks. Use the node context to
# render each hook and collect refs/sources
hooks = list(itertools.chain(parsed_node.config.pre_hook, parsed_node.config.post_hook))
# skip context rebuilding if there aren't any hooks
if not hooks:
return
if not context:
context = self._context_for(parsed_node, config)
for hook in hooks:
get_rendered(hook.sql, context, parsed_node, capture_macros=True)
def initial_config(self, fqn: List[str]) -> ContextConfig:
config_version = min([self.project.config_version, self.root_project.config_version])
if config_version == 2:
return ContextConfig(
self.root_project,
fqn,
self.resource_type,
self.project.project_name,
)
else:
raise DbtInternalError(
f"Got an unexpected project version={config_version}, expected 2"
)
def config_dict(
self,
config: ContextConfig,
) -> Dict[str, Any]:
config_dict = config.build_config_dict(base=True)
self._mangle_hooks(config_dict)
return config_dict
def render_update(self, node: IntermediateNode, config: ContextConfig) -> None:
try:
context = self.render_with_context(node, config)
self.update_parsed_node_config(node, config, context=context)
except ValidationError as exc:
# we got a ValidationError - probably bad types in config()
raise ConfigUpdateError(exc, node=node) from exc
def add_result_node(self, block: FileBlock, node: ManifestNode):
if node.config.enabled:
self.manifest.add_node(block.file, node)
else:
self.manifest.add_disabled(block.file, node)
def parse_node(self, block: ConfiguredBlockType) -> FinalNode:
compiled_path: str = self.get_compiled_path(block)
fqn = self.get_fqn(compiled_path, block.name)
config: ContextConfig = self.initial_config(fqn)
node = self._create_parsetime_node(
block=block,
path=compiled_path,
config=config,
fqn=fqn,
)
self.render_update(node, config)
result = self.transform(node)
self.add_result_node(block, result)
return result
def _update_node_relation_name(self, node: ManifestNode):
# Seed and Snapshot nodes and Models that are not ephemeral,
# and TestNodes that store_failures.
# TestNodes do not get a relation_name without store failures
# because no schema is created.
if node.is_relational and not node.is_ephemeral_model:
adapter = get_adapter(self.root_project)
relation_cls = adapter.Relation
node.relation_name = str(relation_cls.create_from(self.root_project, node))
else:
# Set it to None in case it changed with a config update
node.relation_name = None
@abc.abstractmethod
def parse_file(self, file_block: FileBlock) -> None:
pass
@abc.abstractmethod
def transform(self, node: IntermediateNode) -> FinalNode:
pass
class SimpleParser(
ConfiguredParser[ConfiguredBlockType, FinalNode, FinalNode],
Generic[ConfiguredBlockType, FinalNode],
):
def transform(self, node):
return node
class SQLParser(
ConfiguredParser[FileBlock, IntermediateNode, FinalNode], Generic[IntermediateNode, FinalNode]
):
def parse_file(self, file_block: FileBlock) -> None:
self.parse_node(file_block)
class SimpleSQLParser(SQLParser[FinalNode, FinalNode]):
def transform(self, node):
return node