Skip to content

Commit

Permalink
DENG-6883 & DENG-6884 - Create desktop_acquisition_funnel_aggregates …
Browse files Browse the repository at this point in the history
…table (#6780)

* Create desktop_acquisition_funnel_aggregates_v1

* Add minimum row count check

* Fix YAML format, remove trailing space
  • Loading branch information
kwindau authored Jan 9, 2025
1 parent 851e010 commit ae8150f
Show file tree
Hide file tree
Showing 6 changed files with 197 additions and 0 deletions.
19 changes: 19 additions & 0 deletions dags.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2075,3 +2075,22 @@ bqetl_default_browser_aggregates:
schedule_interval: 0 22 * * *
tags:
- impact/tier_3

bqetl_dsktp_acqstn_fnnl:
description: |
This DAG builds the Desktop Acquisiton Funnel aggregate table
default_args:
depends_on_past: false
owner: [email protected]
email:
- [email protected]
- [email protected]
email_on_failure: true
email_on_retry: false
start_date: "2025-01-08"
retries: 2
retry_delay: 5m
tags:
- impact/tier_2
repo: bigquery-etl
schedule_interval: 30 11 * * *
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.telemetry.desktop_acquisition_funnel_aggregates`
AS
SELECT
*
FROM
`moz-fx-data-shared-prod.telemetry_derived.desktop_acquisition_funnel_aggregates_v1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#fail
{{ min_row_count(1, "first_seen_date = @fsd") }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
friendly_name: Desktop Acquisition Funnel Aggregates
description: |-
Aggregate table used to track desktop installation and retention 28 days later
owners:
- [email protected]
labels:
incremental: true
owner1: kwindau
table_type: aggregate
shredder_mitigation: true
scheduling:
dag_name: bqetl_dsktp_acqstn_fnnl
date_partition_offset: -29
date_partition_parameter: fsd
parameters:
- submission_date:DATE:{{ds}}
bigquery:
time_partitioning:
type: day
field: first_seen_date
require_partition_filter: false
expiration_days: null
range_partitioning: null
references: {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
SELECT
first_seen_date,
country_code,
channel,
os,
os_version,
distribution_id,
attribution_ua,
CASE
WHEN attribution_source = "%2528not%2Bset%2529"
THEN "(not set)"
ELSE attribution_source
END AS attribution_source,
CASE
WHEN attribution_medium = "%2528not%2Bset%2529"
THEN "(not set)"
ELSE attribution_medium
END AS attribution_medium,
CASE
WHEN attribution_campaign = "%2528not%2Bset%2529"
THEN "(not set)"
ELSE attribution_campaign
END AS attribution_campaign,
CASE
WHEN attribution_content = "%2528not%2Bset%2529"
THEN "(not set)"
ELSE attribution_content
END AS attribution_content,
CASE
WHEN attribution_experiment = "%2528not%2Bset%2529"
THEN "(not set)"
ELSE attribution_experiment
END AS attribution_experiment,
CASE
WHEN attribution_dlsource = "%2528not%2Bset%2529"
THEN "(not set)"
ELSE attribution_dlsource
END AS attribution_dlsource,
startup_profile_selection_reason,
COUNT(client_id) AS cohort,
COUNTIF(activated) AS activated,
COUNTIF(returned_second_day) AS returned_second_day,
COUNTIF(qualified_second_day) AS qualified_second_day,
COUNTIF(retained_week4) AS retained_week4,
COUNTIF(qualified_week4) AS qualified_week4
FROM
`moz-fx-data-shared-prod.telemetry_derived.clients_first_seen_28_days_later_v1`
WHERE
first_seen_date = @fsd
GROUP BY
first_seen_date,
country_code,
channel,
os,
os_version,
distribution_id,
attribution_source,
attribution_ua,
attribution_medium,
attribution_campaign,
attribution_content,
attribution_experiment,
attribution_dlsource,
startup_profile_selection_reason
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
fields:
- mode: NULLABLE
name: first_seen_date
type: DATE
description: First Seen Date
- name: country_code
type: STRING
mode: NULLABLE
description: Country Code
- name: channel
type: STRING
mode: NULLABLE
description: Channel
- name: os
type: STRING
mode: NULLABLE
description: Operating System
- name: os_version
type: NUMERIC
mode: NULLABLE
description: Operating System Version
- name: distribution_id
type: STRING
mode: NULLABLE
description: Distribution ID
- name: attribution_ua
type: STRING
mode: NULLABLE
description: Attribution User Agent
- name: attribution_source
type: STRING
mode: NULLABLE
description: Attribution Source
- name: attribution_medium
type: STRING
mode: NULLABLE
description: Attribution Medium
- name: attribution_campaign
type: STRING
mode: NULLABLE
description: Attribution Campaign
- name: attribution_content
type: STRING
mode: NULLABLE
description: Attribution Content
- name: attribution_experiment
type: STRING
mode: NULLABLE
description: Attribution Experiment
- name: attribution_dlsource
type: STRING
mode: NULLABLE
description: Attribution Download Source
- name: startup_profile_selection_reason
type: STRING
mode: NULLABLE
description: Startup Profile Selection Reason
- name: cohort
type: INTEGER
mode: NULLABLE
description: Cohort
- name: activated
type: INTEGER
mode: NULLABLE
description: Activated
- name: returned_second_day
type: INTEGER
mode: NULLABLE
description: Returned Second Day
- name: qualified_second_day
type: INTEGER
mode: NULLABLE
description: Qualified Second Day
- name: retained_week4
type: INTEGER
mode: NULLABLE
description: Retained Week 4
- name: qualified_week4
type: INTEGER
mode: NULLABLE
description: Qualified Week 4

1 comment on commit ae8150f

@dataops-ci-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Integration report for "DENG-6883 & DENG-6884 - Create desktop_acquisition_funnel_aggregates table (#6780)"

sql.diff

Click to expand!
Only in /tmp/workspace/generated-sql/dags/: bqetl_dsktp_acqstn_fnnl.py
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py
--- /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py	2025-01-09 14:09:03.000000000 +0000
+++ /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py	2025-01-09 14:09:38.000000000 +0000
@@ -722,6 +722,20 @@
         parameters=["submission_date:DATE:{{ds}}"],
     )
 
+    with TaskGroup(
+        "telemetry_derived__clients_first_seen_28_days_later__v1_external",
+    ) as telemetry_derived__clients_first_seen_28_days_later__v1_external:
+        ExternalTaskMarker(
+            task_id="bqetl_dsktp_acqstn_fnnl__wait_for_telemetry_derived__clients_first_seen_28_days_later__v1",
+            external_dag_id="bqetl_dsktp_acqstn_fnnl",
+            external_task_id="wait_for_telemetry_derived__clients_first_seen_28_days_later__v1",
+            execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=52200)).isoformat() }}",
+        )
+
+        telemetry_derived__clients_first_seen_28_days_later__v1_external.set_upstream(
+            telemetry_derived__clients_first_seen_28_days_later__v1
+        )
+
     telemetry_derived__clients_first_seen_28_days_later__v3 = bigquery_etl_query(
         task_id="telemetry_derived__clients_first_seen_28_days_later__v3",
         destination_table='clients_first_seen_28_days_later_v3${{ macros.ds_format(macros.ds_add(ds, -27), "%Y-%m-%d", "%Y%m%d") }}',
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_dsktp_acqstn_fnnl.py /tmp/workspace/generated-sql/dags/bqetl_dsktp_acqstn_fnnl.py
--- /tmp/workspace/main-generated-sql/dags/bqetl_dsktp_acqstn_fnnl.py	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/dags/bqetl_dsktp_acqstn_fnnl.py	2025-01-09 14:09:41.000000000 +0000
@@ -0,0 +1,102 @@
+# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py
+
+from airflow import DAG
+from airflow.sensors.external_task import ExternalTaskMarker
+from airflow.sensors.external_task import ExternalTaskSensor
+from airflow.utils.task_group import TaskGroup
+import datetime
+from operators.gcp_container_operator import GKEPodOperator
+from utils.constants import ALLOWED_STATES, FAILED_STATES
+from utils.gcp import bigquery_etl_query, bigquery_dq_check, bigquery_bigeye_check
+
+docs = """
+### bqetl_dsktp_acqstn_fnnl
+
+Built from bigquery-etl repo, [`dags/bqetl_dsktp_acqstn_fnnl.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_dsktp_acqstn_fnnl.py)
+
+#### Description
+
+This DAG builds the Desktop Acquisiton Funnel aggregate table
+
+#### Owner
+
+[email protected]
+
+#### Tags
+
+* impact/tier_2
+* repo/bigquery-etl
+"""
+
+
+default_args = {
+    "owner": "[email protected]",
+    "start_date": datetime.datetime(2025, 1, 8, 0, 0),
+    "end_date": None,
+    "email": ["[email protected]", "[email protected]"],
+    "depends_on_past": False,
+    "retry_delay": datetime.timedelta(seconds=300),
+    "email_on_failure": True,
+    "email_on_retry": False,
+    "retries": 2,
+}
+
+tags = ["impact/tier_2", "repo/bigquery-etl"]
+
+with DAG(
+    "bqetl_dsktp_acqstn_fnnl",
+    default_args=default_args,
+    schedule_interval="30 11 * * *",
+    doc_md=docs,
+    tags=tags,
+) as dag:
+
+    wait_for_telemetry_derived__clients_first_seen_28_days_later__v1 = (
+        ExternalTaskSensor(
+            task_id="wait_for_telemetry_derived__clients_first_seen_28_days_later__v1",
+            external_dag_id="bqetl_analytics_tables",
+            external_task_id="telemetry_derived__clients_first_seen_28_days_later__v1",
+            execution_delta=datetime.timedelta(seconds=34200),
+            check_existence=True,
+            mode="reschedule",
+            poke_interval=datetime.timedelta(minutes=5),
+            allowed_states=ALLOWED_STATES,
+            failed_states=FAILED_STATES,
+            pool="DATA_ENG_EXTERNALTASKSENSOR",
+        )
+    )
+
+    checks__fail_telemetry_derived__desktop_acquisition_funnel_aggregates__v1 = bigquery_dq_check(
+        task_id="checks__fail_telemetry_derived__desktop_acquisition_funnel_aggregates__v1",
+        source_table='desktop_acquisition_funnel_aggregates_v1${{ macros.ds_format(macros.ds_add(ds, -29), "%Y-%m-%d", "%Y%m%d") }}',
+        dataset_id="telemetry_derived",
+        project_id="moz-fx-data-shared-prod",
+        is_dq_check_fail=True,
+        owner="[email protected]",
+        email=["[email protected]", "[email protected]"],
+        depends_on_past=False,
+        parameters=["fsd:DATE:{{macros.ds_add(ds, -29)}}"]
+        + ["submission_date:DATE:{{ds}}"],
+        retries=0,
+    )
+
+    telemetry_derived__desktop_acquisition_funnel_aggregates__v1 = bigquery_etl_query(
+        task_id="telemetry_derived__desktop_acquisition_funnel_aggregates__v1",
+        destination_table='desktop_acquisition_funnel_aggregates_v1${{ macros.ds_format(macros.ds_add(ds, -29), "%Y-%m-%d", "%Y%m%d") }}',
+        dataset_id="telemetry_derived",
+        project_id="moz-fx-data-shared-prod",
+        owner="[email protected]",
+        email=["[email protected]", "[email protected]"],
+        date_partition_parameter=None,
+        depends_on_past=False,
+        parameters=["fsd:DATE:{{macros.ds_add(ds, -29)}}"]
+        + ["submission_date:DATE:{{ds}}"],
+    )
+
+    checks__fail_telemetry_derived__desktop_acquisition_funnel_aggregates__v1.set_upstream(
+        telemetry_derived__desktop_acquisition_funnel_aggregates__v1
+    )
+
+    telemetry_derived__desktop_acquisition_funnel_aggregates__v1.set_upstream(
+        wait_for_telemetry_derived__clients_first_seen_28_days_later__v1
+    )
Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry: desktop_acquisition_funnel_aggregates
Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived: desktop_acquisition_funnel_aggregates_v1
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry/desktop_acquisition_funnel_aggregates/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry/desktop_acquisition_funnel_aggregates/metadata.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry/desktop_acquisition_funnel_aggregates/metadata.yaml	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry/desktop_acquisition_funnel_aggregates/metadata.yaml	2025-01-09 14:05:03.000000000 +0000
@@ -0,0 +1,14 @@
+friendly_name: Desktop Acquisition Funnel Aggregates
+description: |-
+  Please provide a description for the query
+owners: []
+labels: {}
+bigquery: null
+workgroup_access:
+- role: roles/bigquery.dataViewer
+  members:
+  - workgroup:dataops-managed/taar
+  - workgroup:mozilla-confidential
+references:
+  view.sql:
+  - moz-fx-data-shared-prod.telemetry_derived.desktop_acquisition_funnel_aggregates_v1
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry/desktop_acquisition_funnel_aggregates/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry/desktop_acquisition_funnel_aggregates/view.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry/desktop_acquisition_funnel_aggregates/view.sql	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry/desktop_acquisition_funnel_aggregates/view.sql	2025-01-09 14:02:58.000000000 +0000
@@ -0,0 +1,7 @@
+CREATE OR REPLACE VIEW
+  `moz-fx-data-shared-prod.telemetry.desktop_acquisition_funnel_aggregates`
+AS
+SELECT
+  *
+FROM
+  `moz-fx-data-shared-prod.telemetry_derived.desktop_acquisition_funnel_aggregates_v1`
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_acquisition_funnel_aggregates_v1/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_acquisition_funnel_aggregates_v1/checks.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_acquisition_funnel_aggregates_v1/checks.sql	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_acquisition_funnel_aggregates_v1/checks.sql	2025-01-09 14:02:58.000000000 +0000
@@ -0,0 +1,2 @@
+#fail
+{{ min_row_count(1, "first_seen_date = @fsd") }}
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_acquisition_funnel_aggregates_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_acquisition_funnel_aggregates_v1/metadata.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_acquisition_funnel_aggregates_v1/metadata.yaml	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_acquisition_funnel_aggregates_v1/metadata.yaml	2025-01-09 14:05:01.000000000 +0000
@@ -0,0 +1,30 @@
+friendly_name: Desktop Acquisition Funnel Aggregates
+description: |-
+  Aggregate table used to track desktop installation and retention 28 days later
+owners:
+- [email protected]
+labels:
+  incremental: true
+  owner1: kwindau
+  table_type: aggregate
+  shredder_mitigation: true
+  dag: bqetl_dsktp_acqstn_fnnl
+scheduling:
+  dag_name: bqetl_dsktp_acqstn_fnnl
+  date_partition_offset: -29
+  date_partition_parameter: fsd
+  parameters:
+  - submission_date:DATE:{{ds}}
+bigquery:
+  time_partitioning:
+    type: day
+    field: first_seen_date
+    require_partition_filter: false
+    expiration_days: null
+  range_partitioning: null
+  clustering: null
+workgroup_access:
+- role: roles/bigquery.dataViewer
+  members:
+  - workgroup:mozilla-confidential
+references: {}
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_acquisition_funnel_aggregates_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_acquisition_funnel_aggregates_v1/query.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_acquisition_funnel_aggregates_v1/query.sql	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_acquisition_funnel_aggregates_v1/query.sql	2025-01-09 14:02:58.000000000 +0000
@@ -0,0 +1,64 @@
+SELECT
+  first_seen_date,
+  country_code,
+  channel,
+  os,
+  os_version,
+  distribution_id,
+  attribution_ua,
+  CASE
+    WHEN attribution_source = "%2528not%2Bset%2529"
+      THEN "(not set)"
+    ELSE attribution_source
+  END AS attribution_source,
+  CASE
+    WHEN attribution_medium = "%2528not%2Bset%2529"
+      THEN "(not set)"
+    ELSE attribution_medium
+  END AS attribution_medium,
+  CASE
+    WHEN attribution_campaign = "%2528not%2Bset%2529"
+      THEN "(not set)"
+    ELSE attribution_campaign
+  END AS attribution_campaign,
+  CASE
+    WHEN attribution_content = "%2528not%2Bset%2529"
+      THEN "(not set)"
+    ELSE attribution_content
+  END AS attribution_content,
+  CASE
+    WHEN attribution_experiment = "%2528not%2Bset%2529"
+      THEN "(not set)"
+    ELSE attribution_experiment
+  END AS attribution_experiment,
+  CASE
+    WHEN attribution_dlsource = "%2528not%2Bset%2529"
+      THEN "(not set)"
+    ELSE attribution_dlsource
+  END AS attribution_dlsource,
+  startup_profile_selection_reason,
+  COUNT(client_id) AS cohort,
+  COUNTIF(activated) AS activated,
+  COUNTIF(returned_second_day) AS returned_second_day,
+  COUNTIF(qualified_second_day) AS qualified_second_day,
+  COUNTIF(retained_week4) AS retained_week4,
+  COUNTIF(qualified_week4) AS qualified_week4
+FROM
+  `moz-fx-data-shared-prod.telemetry_derived.clients_first_seen_28_days_later_v1`
+WHERE
+  first_seen_date = @fsd
+GROUP BY
+  first_seen_date,
+  country_code,
+  channel,
+  os,
+  os_version,
+  distribution_id,
+  attribution_source,
+  attribution_ua,
+  attribution_medium,
+  attribution_campaign,
+  attribution_content,
+  attribution_experiment,
+  attribution_dlsource,
+  startup_profile_selection_reason
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_acquisition_funnel_aggregates_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_acquisition_funnel_aggregates_v1/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_acquisition_funnel_aggregates_v1/schema.yaml	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/desktop_acquisition_funnel_aggregates_v1/schema.yaml	2025-01-09 14:02:58.000000000 +0000
@@ -0,0 +1,81 @@
+fields:
+- mode: NULLABLE
+  name: first_seen_date
+  type: DATE
+  description: First Seen Date
+- name: country_code
+  type: STRING
+  mode: NULLABLE
+  description: Country Code
+- name: channel
+  type: STRING
+  mode: NULLABLE
+  description: Channel
+- name: os
+  type: STRING
+  mode: NULLABLE
+  description: Operating System
+- name: os_version
+  type: NUMERIC
+  mode: NULLABLE
+  description: Operating System Version
+- name: distribution_id
+  type: STRING
+  mode: NULLABLE
+  description: Distribution ID
+- name: attribution_ua
+  type: STRING
+  mode: NULLABLE
+  description: Attribution User Agent
+- name: attribution_source
+  type: STRING
+  mode: NULLABLE
+  description: Attribution Source
+- name: attribution_medium
+  type: STRING
+  mode: NULLABLE
+  description: Attribution Medium
+- name: attribution_campaign
+  type: STRING
+  mode: NULLABLE
+  description: Attribution Campaign
+- name: attribution_content
+  type: STRING
+  mode: NULLABLE
+  description: Attribution Content
+- name: attribution_experiment
+  type: STRING
+  mode: NULLABLE
+  description: Attribution Experiment
+- name: attribution_dlsource
+  type: STRING
+  mode: NULLABLE
+  description: Attribution Download Source
+- name: startup_profile_selection_reason
+  type: STRING
+  mode: NULLABLE
+  description: Startup Profile Selection Reason
+- name: cohort
+  type: INTEGER
+  mode: NULLABLE
+  description: Cohort
+- name: activated
+  type: INTEGER
+  mode: NULLABLE
+  description: Activated
+- name: returned_second_day
+  type: INTEGER
+  mode: NULLABLE
+  description: Returned Second Day
+- name: qualified_second_day
+  type: INTEGER
+  mode: NULLABLE
+  description: Qualified Second Day
+- name: retained_week4
+  type: INTEGER
+  mode: NULLABLE
+  description: Retained Week 4
+- name: qualified_week4
+  type: INTEGER
+  mode: NULLABLE
+  description: Qualified Week 4

Link to full diff

Please sign in to comment.