From 9cb8958b06f83e969cf4006647bc287945431d6e Mon Sep 17 00:00:00 2001 From: Alekhya Kommasani Date: Mon, 28 Aug 2023 15:43:56 -0400 Subject: [PATCH] Initial draft for adding deprecation support to bqetl --- bigquery_etl/cli/__init__.py | 2 + bigquery_etl/cli/metadata.py | 68 +++++++++---------- tests/cli/test_cli_metadata.py | 67 ++++++++++++++++++ .../test/dataset_metadata.yaml | 2 +- .../metadata.yaml | 9 +++ .../metadata.yaml | 6 ++ .../clients_daily_v6/metadata.yaml | 6 ++ .../telemetry_derived/dataset_metadata.yaml | 13 ++++ 8 files changed, 138 insertions(+), 35 deletions(-) create mode 100644 tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_keyed_scalar_aggregates_v1/metadata.yaml create mode 100644 tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_scalar_aggregates_v1/metadata.yaml create mode 100644 tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/metadata.yaml create mode 100644 tests/sql/moz-fx-data-shared-prod/telemetry_derived/dataset_metadata.yaml diff --git a/bigquery_etl/cli/__init__.py b/bigquery_etl/cli/__init__.py index 730a2adab6e..9d83a5424be 100644 --- a/bigquery_etl/cli/__init__.py +++ b/bigquery_etl/cli/__init__.py @@ -16,6 +16,7 @@ from ..cli.dryrun import dryrun from ..cli.format import format from ..cli.generate import generate +from ..cli.metadata import metadata from ..cli.query import query from ..cli.routine import mozfun, routine from ..cli.stage import stage @@ -52,6 +53,7 @@ def cli(prog_name=None): "stage": stage, "backfill": backfill, "check": check, + "metadata": metadata, } @click.group(commands=commands) diff --git a/bigquery_etl/cli/metadata.py b/bigquery_etl/cli/metadata.py index 1395c326ece..55413437562 100644 --- a/bigquery_etl/cli/metadata.py +++ b/bigquery_etl/cli/metadata.py @@ -1,30 +1,17 @@ """bigquery-etl CLI metadata command.""" -import re -import subprocess -import sys -import tempfile from pathlib import Path -from subprocess import CalledProcessError -from typing import List, Optional, Union +from typing import Optional import click -import sqlparse -from bigquery_etl.format_sql.formatter import reformat - -from ..cli.utils import ( - is_authenticated, - paths_matching_checks_pattern, - project_id_option, - sql_dir_option, -) -from ..util.common import render as render_template +from bigquery_etl.metadata.parse_metadata import DatasetMetadata, Metadata +from ..cli.utils import paths_matching_name_pattern, project_id_option, sql_dir_option @click.group( help=""" - Commands for managing metadata. + Commands for managing bqetl metadata. \b UNDER ACTIVE DEVELOPMENT See https://mozilla-hub.atlassian.net/browse/DENG-1381 @@ -35,9 +22,10 @@ def metadata(ctx): """Create the CLI group for the metadata command.""" pass -@check.command( + +@metadata.command( help=""" - update metadata yaml files. + Update table level metadata yaml files. \b Example: @@ -51,24 +39,36 @@ def metadata(ctx): @click.argument("name") @project_id_option() @sql_dir_option -@click.pass_context -def update( - ctx: click.Context, name: str, project_id: Optional[str], sql_dir: Optional[str] -) -> None: - """Update metadata yaml file """ +def update(name: str, sql_dir: Optional[str], project_id: Optional[str]) -> None: + """Update metadata yaml file.""" table_metadata_files = paths_matching_name_pattern( name, sql_dir, project_id=project_id, files=["metadata.yaml"] ) for table_metadata_file in table_metadata_files: - dataset_metadata_path = Path(table_metadata_file).parent.parent / "dataset_metadata.yaml" - dataset_metadata = DatasetMetadata.from_file(dataset_metadata_path) - table_metadata = Metadata.from_file(table_metadata_file) - if table_metadata.deprecated: - table_metadata.workgroup_access = [dict(role="roles/bigquery.metadataViewer", members=["workgroup:deprecated"])] - else: - if dataset_metadata.default_table_workgroup_access and (table_metadata.workgroup_access == [] or table_metadata.workgroup_access is None): - table_metadata.workgroup_access = dataset_metadata.default_table_workgroup_access - table_metadata.write(table_metadata_file) + dataset_metadata_path = ( + Path(table_metadata_file).parent.parent / "dataset_metadata.yaml" + ) + if os.path.exists(dataset_metadata_path): + dataset_metadata = DatasetMetadata.from_file(dataset_metadata_path) + table_metadata = Metadata.from_file(table_metadata_file) + if table_metadata.deprecated: + table_metadata.workgroup_access = [ + dict( + role="roles/bigquery.metadataViewer", + members=["workgroup:deprecated"], + ) + ] + else: + if dataset_metadata.default_table_workgroup_access and ( + table_metadata.workgroup_access == [] + or table_metadata.workgroup_access is None + ): + table_metadata.workgroup_access = ( + dataset_metadata.default_table_workgroup_access + ) + table_metadata.write(table_metadata_file) - click.echo(f"Updated {table_metadata_file}") + click.echo(f"Updated {table_metadata_file}") + else: + click.echo(f"Dataset metadata yaml does not exist") return None diff --git a/tests/cli/test_cli_metadata.py b/tests/cli/test_cli_metadata.py index c4f123efef3..1c0d9284609 100644 --- a/tests/cli/test_cli_metadata.py +++ b/tests/cli/test_cli_metadata.py @@ -1,12 +1,18 @@ +import distutils import os +import tempfile +from pathlib import Path import pytest import yaml from click.testing import CliRunner +from bigquery_etl.cli.metadata import update from bigquery_etl.metadata.parse_metadata import Metadata from bigquery_etl.metadata.validate_metadata import validate_change_control +TEST_DIR = Path(__file__).parent.parent + class TestMetadata: test_path = "sql/moz-fx-data-shared-prod/telemetry_derived/query_v1" @@ -175,3 +181,64 @@ def test_validate_change_control_commented_line(self, runner): codeowners_conf=codeowners, expected_result=False, ) + + def test_metadata_update_with_no_deprecation(self, runner): + with tempfile.TemporaryDirectory() as tmpdirname: + distutils.dir_util.copy_tree(str(TEST_DIR), str(tmpdirname)) + name = [ + str(tmpdirname) + + "/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/" + ] + runner.invoke(update, name, "--sql_dir=" + str(tmpdirname) + "/sql") + with open( + tmpdirname + + "/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/metadata.yaml", + "r", + ) as stream: + metadata = yaml.safe_load(stream) + assert metadata["workgroup_access"][0]["role"] == "roles/bigquery.dataViewer" + assert metadata["workgroup_access"][0]["members"] == [ + "workgroup:mozilla-confidential" + ] + assert not metadata["deprecated"] + + def test_metadata_update_with_deprecation(self, runner): + with tempfile.TemporaryDirectory() as tmpdirname: + distutils.dir_util.copy_tree(str(TEST_DIR), str(tmpdirname)) + name = [ + str(tmpdirname) + + "/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_scalar_aggregates_v1/" + ] + runner.invoke(update, name, "--sql_dir=" + str(tmpdirname) + "/sql") + with open( + tmpdirname + + "/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_scalar_aggregates_v1/metadata.yaml", + "r", + ) as stream: + metadata = yaml.safe_load(stream) + print(metadata) + assert ( + metadata["workgroup_access"][0]["role"] == "roles/bigquery.metadataViewer" + ) + assert metadata["workgroup_access"][0]["members"] == ["workgroup:deprecated"] + assert metadata["deprecated"] + # tmpdirname.clean + + def test_metadata_update_do_not_update(self, runner): + with tempfile.TemporaryDirectory() as tmpdirname: + distutils.dir_util.copy_tree(str(TEST_DIR), str(tmpdirname)) + name = [ + str(tmpdirname) + + "/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_keyed_scalar_aggregates_v1/" + ] + runner.invoke(update, name, "--sql_dir=" + str(tmpdirname) + "/sql") + with open( + tmpdirname + + "/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_keyed_scalar_aggregates_v1/metadata.yaml", + "r", + ) as stream: + metadata = yaml.safe_load(stream) + print(metadata) + assert metadata["workgroup_access"][0]["role"] == "roles/bigquery.dataViewer" + assert metadata["workgroup_access"][0]["members"] == ["workgroup:revenue/cat4"] + assert not metadata["deprecated"] diff --git a/tests/data/test_sql/moz-fx-data-test-project/test/dataset_metadata.yaml b/tests/data/test_sql/moz-fx-data-test-project/test/dataset_metadata.yaml index daf2ef957e4..0c78e5eea14 100644 --- a/tests/data/test_sql/moz-fx-data-test-project/test/dataset_metadata.yaml +++ b/tests/data/test_sql/moz-fx-data-test-project/test/dataset_metadata.yaml @@ -11,4 +11,4 @@ default_table_workgroup_access: workgroup_access: - role: roles/bigquery.dataViewer members: - - test_member \ No newline at end of file + - test_member diff --git a/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_keyed_scalar_aggregates_v1/metadata.yaml b/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_keyed_scalar_aggregates_v1/metadata.yaml new file mode 100644 index 00000000000..1aa621bff18 --- /dev/null +++ b/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_keyed_scalar_aggregates_v1/metadata.yaml @@ -0,0 +1,9 @@ +friendly_name: Test metadata.yaml +description: |- + Clustering fields: `column1` +owners: + - test@mozilla.com +workgroup_access: + - role: roles/bigquery.dataViewer + members: + - workgroup:revenue/cat4 diff --git a/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_scalar_aggregates_v1/metadata.yaml b/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_scalar_aggregates_v1/metadata.yaml new file mode 100644 index 00000000000..40a5f48d52b --- /dev/null +++ b/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_scalar_aggregates_v1/metadata.yaml @@ -0,0 +1,6 @@ +friendly_name: Test metadata.yaml +description: |- + Clustering fields: `column1` +owners: + - test@mozilla.com +deprecated: true diff --git a/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/metadata.yaml b/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/metadata.yaml new file mode 100644 index 00000000000..9ca10b4d68b --- /dev/null +++ b/tests/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v6/metadata.yaml @@ -0,0 +1,6 @@ +friendly_name: Test metadata.yaml +description: |- + + Clustering fields: `column1` +owners: + - test@mozilla.com diff --git a/tests/sql/moz-fx-data-shared-prod/telemetry_derived/dataset_metadata.yaml b/tests/sql/moz-fx-data-shared-prod/telemetry_derived/dataset_metadata.yaml new file mode 100644 index 00000000000..1cecaa8f219 --- /dev/null +++ b/tests/sql/moz-fx-data-shared-prod/telemetry_derived/dataset_metadata.yaml @@ -0,0 +1,13 @@ +friendly_name: User-Defined Functions +description: |- + Persistent user-defined functions +dataset_base_acl: derived +user_facing: false +workgroup_access: +- role: roles/bigquery.metadataViewer + members: + - workgroup:mozilla-confidential +default_table_workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential