Skip to content

Commit

Permalink
Make base tables configurable in glean_usage generator (#4534)
Browse files Browse the repository at this point in the history
* Make base tables configurable in glean_usage generator

* Fix event extras unnesting in event monitoring
  • Loading branch information
scholtzan authored Nov 9, 2023
1 parent 7dc6014 commit 3cfaf48
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 24 deletions.
32 changes: 17 additions & 15 deletions sql_generators/glean_usage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
metrics_clients_daily,
metrics_clients_last_seen,
)
from sql_generators.glean_usage.common import get_app_info, list_baseline_tables
from sql_generators.glean_usage.common import get_app_info, list_tables

# list of methods for generating queries
GLEAN_TABLES = [
Expand Down Expand Up @@ -97,19 +97,21 @@ def generate(
elif exclude:
table_filter = partial(table_matches_patterns, exclude, True)

baseline_tables = list_baseline_tables(
project_id=target_project,
only_tables=[only] if only else None,
table_filter=table_filter,
)

# filter out skipped apps
baseline_tables = [
baseline_table
for baseline_table in baseline_tables
if baseline_table.split(".")[1]
not in [f"{skipped_app}_stable" for skipped_app in SKIP_APPS]
]
def get_tables(table_name="baseline_v1"):
baseline_tables = list_tables(
project_id=target_project,
only_tables=[only] if only else None,
table_filter=table_filter,
table_name=table_name,
)

# filter out skipped apps
return [
baseline_table
for baseline_table in baseline_tables
if baseline_table.split(".")[1]
not in [f"{skipped_app}_stable" for skipped_app in SKIP_APPS]
]

output_dir = Path(output_dir) / target_project

Expand All @@ -134,8 +136,8 @@ def generate(
),
baseline_table,
)
for baseline_table in baseline_tables
for table in GLEAN_TABLES
for baseline_table in get_tables(table_name=table.base_table_name)
]

# Parameters to generate per-app datasets consist of the function to be called
Expand Down
9 changes: 5 additions & 4 deletions sql_generators/glean_usage/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,13 @@ def write_dataset_metadata(output_dir, full_table_id, derived_dataset_metadata=F
target.write_text(rendered)


def list_baseline_tables(project_id, only_tables, table_filter):
def list_tables(project_id, only_tables, table_filter, table_name="baseline_v1"):
"""Return names of all matching baseline tables in shared-prod."""
prod_baseline_tables = [
s.stable_table
for s in get_stable_table_schemas()
if s.schema_id == "moz://mozilla.org/schemas/glean/ping/1"
and s.bq_table == "baseline_v1"
and s.bq_table == table_name
]
prod_datasets_with_baseline = [t.split(".")[0] for t in prod_baseline_tables]
stable_datasets = prod_datasets_with_baseline
Expand All @@ -78,9 +78,9 @@ def list_baseline_tables(project_id, only_tables, table_filter):
if d.endswith("_stable") and d in prod_datasets_with_baseline
}
return [
f"{project_id}.{d}.baseline_v1"
f"{project_id}.{d}.{table_name}"
for d in stable_datasets
if table_filter(f"{d}.baseline_v1")
if table_filter(f"{d}.{table_name}")
]


Expand Down Expand Up @@ -163,6 +163,7 @@ def __init__(self):
self.per_app_enabled = True
self.across_apps_enabled = True
self.cross_channel_template = "cross_channel.view.sql"
self.base_table_name = "baseline_v1"

def skip_existing(self, output_dir="sql/", project_id="moz-fx-data-shared-prod"):
"""Existing files configured not to be overridden during generation."""
Expand Down
7 changes: 4 additions & 3 deletions sql_generators/glean_usage/event_monitoring_live.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def __init__(self) -> None:
self.prefix = PREFIX
self.target_table_id = TARGET_TABLE_ID
self.custom_render_kwargs = {}
self.base_table_name = "events_v1"

def generate_per_app_id(
self, project_id, baseline_table, output_dir=None, use_cloud_function=True
Expand Down Expand Up @@ -106,11 +107,11 @@ def generate_across_apps(
if not self.across_apps_enabled:
return

prod_datasets_with_baseline = [
prod_datasets_with_event = [
s.bq_dataset_family
for s in get_stable_table_schemas()
if s.schema_id == "moz://mozilla.org/schemas/glean/ping/1"
and s.bq_table == "baseline_v1"
and s.bq_table == "events_v1"
]

aggregate_table = "event_monitoring_aggregates_v1"
Expand All @@ -124,7 +125,7 @@ def generate_across_apps(
table=target_view_name,
target_table=f"{TARGET_DATASET_CROSS_APP}_derived.{aggregate_table}",
apps=apps,
prod_datasets=prod_datasets_with_baseline,
prod_datasets=prod_datasets_with_event,
)
render_kwargs.update(self.custom_render_kwargs)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,11 @@
`{{ project_id }}.{{ dataset['bq_dataset_family'] }}_stable.events_v1`
CROSS JOIN
UNNEST(events) AS event,
UNNEST(event.extra) AS event_extra,
-- Iterator for accessing experiments.
-- Add one more for aggregating events across all experiments
UNNEST(GENERATE_ARRAY(0, ARRAY_LENGTH(ping_info.experiments))) AS experiment_index
LEFT JOIN
UNNEST(event.extra) AS event_extra
WHERE
DATE(submission_timestamp) = @submission_date
GROUP BY
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,11 @@ IF
`{{ project_id }}.{{ dataset }}_live.events_v1`
CROSS JOIN
UNNEST(events) AS event,
UNNEST(event.extra) AS event_extra,
-- Iterator for accessing experiments.
-- Add one more for aggregating events across all experiments
UNNEST(GENERATE_ARRAY(0, ARRAY_LENGTH(ping_info.experiments))) AS experiment_index
LEFT JOIN
UNNEST(event.extra) AS event_extra
{% elif dataset_id in ["accounts_frontend", "accounts_backend"] %}
-- FxA uses custom pings to send events without a category and extras.
SELECT
Expand Down

0 comments on commit 3cfaf48

Please sign in to comment.