From a195e282a696fe05d4e638200ac1696827a72dff Mon Sep 17 00:00:00 2001 From: Kyle Wigley Date: Fri, 25 Jun 2021 08:20:46 -0400 Subject: [PATCH] don't use `meta`! need better separation between dbt internal objects and external facing data. hacked an internal field on the manifest to save off this parsing info for the time being --- core/dbt/contracts/graph/manifest.py | 10 ++++++++++ core/dbt/contracts/graph/parsed.py | 1 - core/dbt/parser/manifest.py | 22 ++++++++++------------ core/dbt/parser/models.py | 4 ++-- test/unit/test_parser.py | 1 - 5 files changed, 22 insertions(+), 16 deletions(-) diff --git a/core/dbt/contracts/graph/manifest.py b/core/dbt/contracts/graph/manifest.py index 38f1f5e1d6b..9107e9bc32c 100644 --- a/core/dbt/contracts/graph/manifest.py +++ b/core/dbt/contracts/graph/manifest.py @@ -525,6 +525,12 @@ def _find_macros_by_name( return candidates +@dataclass +class ParsingInfo: + static_analysis_parsed_path_count: int = 0 + static_analysis_path_count: int = 0 + + @dataclass class ManifestStateCheck(dbtClassMixin): vars_hash: FileHash = field(default_factory=FileHash.empty) @@ -566,6 +572,10 @@ class Manifest(MacroMethods, DataClassMessagePackMixin, dbtClassMixin): _analysis_lookup: Optional[AnalysisLookup] = field( default=None, metadata={'serialize': lambda x: None, 'deserialize': lambda x: None} ) + _parsing_info: ParsingInfo = field( + default_factory=ParsingInfo, + metadata={'serialize': lambda x: None, 'deserialize': lambda x: None} + ) _lock: Lock = field( default_factory=flags.MP_CONTEXT.Lock, metadata={'serialize': lambda x: None, 'deserialize': lambda x: None} diff --git a/core/dbt/contracts/graph/parsed.py b/core/dbt/contracts/graph/parsed.py index 26f75c9722f..62d8b3b3086 100644 --- a/core/dbt/contracts/graph/parsed.py +++ b/core/dbt/contracts/graph/parsed.py @@ -201,7 +201,6 @@ class ParsedNodeDefaults(ParsedNodeMandatory): compiled_path: Optional[str] = None build_path: Optional[str] = None deferred: bool = False - unrendered_config: Dict[str, Any] = field(default_factory=dict) created_at: int = field(default_factory=lambda: int(time.time())) def write_node(self, target_path: str, subdirectory: str, payload: str): diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index 0868c224506..642f30bd953 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -91,6 +91,7 @@ class ManifestLoaderInfo(dbtClassMixin, Writable): static_analysis_path_count: int = 0 static_analysis_parsed_path_count: int = 0 is_partial_parse_enabled: Optional[bool] = None + is_static_analysis_enabled: Optional[bool] = None read_files_elapsed: Optional[float] = None load_macros_elapsed: Optional[float] = None parse_project_elapsed: Optional[float] = None @@ -307,16 +308,12 @@ def load(self): time.perf_counter() - start_process ) # update tracking data with static parsing info - statically_extracted_nodes = [ - node for node in self.manifest.nodes.values() - if node.meta.pop('is_statically_extracted', False) is True - ] - statically_extractable_nodes = [ - node for node in self.manifest.nodes.values() - if node.meta.pop('is_statically_extractable', False) is True - ] - self._perf_info.static_analysis_parsed_path_count = len(statically_extracted_nodes) - self._perf_info.static_analysis_path_count = len(statically_extractable_nodes) + self._perf_info.static_analysis_parsed_path_count = ( + self.manifest._parsing_info.static_analysis_parsed_path_count + ) + self._perf_info.static_analysis_path_count = ( + self.manifest._parsing_info.static_analysis_path_count + ) # write out the fully parsed manifest self.write_manifest_for_partial_parse() @@ -516,7 +513,8 @@ def read_manifest_for_partial_parse(self) -> Optional[Manifest]: def build_perf_info(self): mli = ManifestLoaderInfo( - is_partial_parse_enabled=self._partial_parse_enabled() + is_partial_parse_enabled=self._partial_parse_enabled(), + is_static_analysis_enabled=flags.USE_EXPERIMENTAL_PARSER ) for project in self.all_projects.values(): project_info = ProjectLoaderInfo( @@ -629,7 +627,7 @@ def track_project_load(self): "is_partial_parse_enabled": ( self._perf_info.is_partial_parse_enabled ), - "is_static_analysis_enabled": flags.USE_EXPERIMENTAL_PARSER, + "is_static_analysis_enabled": self._perf_info.is_static_analysis_enabled, "static_analysis_path_count": self._perf_info.static_analysis_path_count, "static_analysis_parsed_path_count": self._perf_info.static_analysis_parsed_path_count, }) diff --git a/core/dbt/parser/models.py b/core/dbt/parser/models.py index 6dbc64f9188..cc72cd13588 100644 --- a/core/dbt/parser/models.py +++ b/core/dbt/parser/models.py @@ -24,7 +24,7 @@ def get_compiled_path(cls, block: FileBlock): def render_update( self, node: ParsedModelNode, config: ContextConfig ) -> None: - node.meta["is_statically_extractable"] = True + self.manifest._parsing_info.static_analysis_path_count += 1 # normal dbt run if not flags.USE_EXPERIMENTAL_PARSER: @@ -64,7 +64,7 @@ def render_update( for configv in res['configs']: node.config[configv[0]] = configv[1] - node.meta['is_statically_extracted'] = True + self.manifest._parsing_info.static_analysis_parsed_path_count += 1 else: super().render_update(node, config) diff --git a/test/unit/test_parser.py b/test/unit/test_parser.py index 2c53f147a8c..ea843aae455 100644 --- a/test/unit/test_parser.py +++ b/test/unit/test_parser.py @@ -501,7 +501,6 @@ def test_basic(self): original_file_path=normalize('models/nested/model_1.sql'), root_path=get_abs_os_path('./dbt_modules/snowplow'), config=NodeConfig(materialized='table'), - meta={'is_statically_extractable': True}, path=normalize('nested/model_1.sql'), raw_sql=raw_sql, checksum=block.file.checksum,