diff --git a/bigquery_etl/shredder/config.py b/bigquery_etl/shredder/config.py index 4d4d873fb0c..33ae0999592 100755 --- a/bigquery_etl/shredder/config.py +++ b/bigquery_etl/shredder/config.py @@ -157,7 +157,6 @@ def fields(self) -> tuple[str, ...]: DELETE_TARGETS: DeleteIndex = { client_id_target(table="fenix_derived.new_profile_activation_v1"): FENIX_SRC, client_id_target(table="fenix_derived.firefox_android_clients_v1"): FENIX_SRC, - client_id_target(table="fenix_derived.firefox_android_clients_v2"): FENIX_SRC, client_id_target(table="search_derived.acer_cohort_v1"): DESKTOP_SRC, client_id_target( table="search_derived.mobile_search_clients_daily_v1" diff --git a/sql/moz-fx-data-shared-prod/fenix_derived/firefox_android_clients_v2/checks.sql b/sql/moz-fx-data-shared-prod/fenix_derived/firefox_android_clients_v2/checks.sql deleted file mode 100644 index c9cc5cae7db..00000000000 --- a/sql/moz-fx-data-shared-prod/fenix_derived/firefox_android_clients_v2/checks.sql +++ /dev/null @@ -1,38 +0,0 @@ --- TODO: convert is_unique to fail after duplication is resolved. -#warn -{{ is_unique(columns=["client_id"]) }} -#fail -{{ not_null(columns=["client_id", "sample_id"]) }} -#fail -{{ min_row_count(1, "first_seen_date = @submission_date") }} -#warn -WITH base AS ( - SELECT - COUNTIF(is_activated) - FROM - `{{ project_id }}.{{ dataset_id }}.{{ table_name }}` - WHERE - first_seen_date = @submission_date -), -upstream AS ( - SELECT - COUNTIF(activated = 1) - FROM - `{{ project_id }}.{{ dataset_id }}.new_profile_activation_v1` - WHERE - first_seen_date = @submission_date - AND submission_date = DATE_SUB(@submission_date, INTERVAL 6 DAY) -) -SELECT - IF( - (SELECT * FROM base) <> (SELECT * FROM upstream), - ERROR( - CONCAT( - "Number of activations does not match up that of the upstream table. Upstream count: ", - (SELECT * FROM upstream), - ", base count: ", - (SELECT * FROM base) - ) - ), - NULL - ); diff --git a/sql/moz-fx-data-shared-prod/fenix_derived/firefox_android_clients_v2/metadata.yaml b/sql/moz-fx-data-shared-prod/fenix_derived/firefox_android_clients_v2/metadata.yaml deleted file mode 100644 index d0050abb1d8..00000000000 --- a/sql/moz-fx-data-shared-prod/fenix_derived/firefox_android_clients_v2/metadata.yaml +++ /dev/null @@ -1,46 +0,0 @@ -friendly_name: Firefox Android Clients -description: |- - First observations for Firefox Android clients of channel Release, - retrieved from the earliest pings: baseline, first_session and metrics. - The attributes stored in this table include the first attribution, - device, OS and ISP details. - - This table should be accessed through the user-facing view - `fenix.firefox_android_clients`. - - For analysis purposes, use first_seen_date to query clients that - effectively appeared on that date. The submission_date indicates - when the server received the data. - - The query for this table overwrites the whole table instead of writing to - a single partition, so manual backfills must use parameter --no_partition. - - Proposal: - https://docs.google.com/document/d/12bj4DhCybelqHVgOVq8KJlzgtbbUw3f68palNrv-gaM/. - - For more details about attribution and campaign structure see: - https://help.adjust.com/en/article/tracker-urls#campaign-structure-parameters. -owners: -- kik@mozilla.com -labels: - application: firefox_android - incremental: true - schedule: daily - owner1: kik -scheduling: - dag_name: bqetl_analytics_tables - depends_on_past: true - date_partition_parameter: null - parameters: - - submission_date:DATE:{{ds}} -bigquery: - time_partitioning: - type: day - field: first_seen_date - require_partition_filter: false - clustering: - fields: - - sample_id - - channel - - first_reported_country -references: {} diff --git a/sql/moz-fx-data-shared-prod/fenix_derived/firefox_android_clients_v2/query.sql b/sql/moz-fx-data-shared-prod/fenix_derived/firefox_android_clients_v2/query.sql deleted file mode 100644 index 9cf7c25b64c..00000000000 --- a/sql/moz-fx-data-shared-prod/fenix_derived/firefox_android_clients_v2/query.sql +++ /dev/null @@ -1,253 +0,0 @@ --- Query first observations for Firefox Android Clients. -WITH baseline_first_seen AS ( - SELECT - client_id, - submission_date, - first_seen_date, - sample_id, - country AS first_reported_country, - isp AS first_reported_isp, - normalized_channel AS channel, - device_manufacturer, - device_model, - normalized_os_version AS os_version, - app_display_version AS app_version, - DATETIME(first_run_date) AS first_run_datetime, - locale, - FROM - fenix.baseline_clients_first_seen - WHERE - {% if is_init() %} - submission_date < CURRENT_DATE - {% else %} - submission_date = @submission_date - {% endif %} - AND client_id IS NOT NULL -), -activations AS ( - SELECT - client_id, - CAST(activated AS BOOLEAN) AS is_activated, - FROM - fenix.new_profile_activation - WHERE - {% if is_init() %} - submission_date < CURRENT_DATE - {% else %} - submission_date = @submission_date - {% endif %} -), --- Find earliest data per client from the first_session ping. -first_session_ping_base AS ( - SELECT - client_info.client_id, - sample_id, - submission_timestamp, - NULLIF(metrics.string.first_session_adgroup, "") AS adjust_ad_group, - NULLIF(metrics.string.first_session_campaign, "") AS adjust_campaign, - NULLIF(metrics.string.first_session_creative, "") AS adjust_creative, - NULLIF(metrics.string.first_session_network, "") AS adjust_network, - FROM - fenix.first_session - WHERE - {% if is_init() %} - DATE(submission_timestamp) < CURRENT_DATE - {% else %} - DATE(submission_timestamp) = @submission_date - {% endif %} - AND client_info.client_id IS NOT NULL -), -first_session_ping AS ( - SELECT - client_id, - sample_id, - ARRAY_AGG( - IF( - adjust_ad_group IS NOT NULL - OR adjust_campaign IS NOT NULL - OR adjust_creative IS NOT NULL - OR adjust_network IS NOT NULL, - STRUCT( - submission_timestamp, - adjust_ad_group, - adjust_campaign, - adjust_creative, - adjust_network - ), - NULL - ) IGNORE NULLS - ORDER BY - submission_timestamp ASC - LIMIT - 1 - )[SAFE_OFFSET(0)] AS adjust_info, - FROM - first_session_ping_base - GROUP BY - client_id, - sample_id -), --- Find earliest data per client from the metrics ping. -metrics_ping_base AS ( - SELECT - client_info.client_id AS client_id, - sample_id, - submission_timestamp, - NULLIF(fenix_metrics.metrics.string.metrics_adjust_ad_group, "") AS adjust_ad_group, - NULLIF(fenix_metrics.metrics.string.metrics_adjust_campaign, "") AS adjust_campaign, - NULLIF(fenix_metrics.metrics.string.metrics_adjust_creative, "") AS adjust_creative, - NULLIF(fenix_metrics.metrics.string.metrics_adjust_network, "") AS adjust_network, - NULLIF(fenix_metrics.metrics.string.metrics_install_source, "") AS install_source, - FROM - fenix.metrics AS fenix_metrics - WHERE - {% if is_init() %} - DATE(submission_timestamp) < CURRENT_DATE - {% else %} - DATE(submission_timestamp) = @submission_date - {% endif %} - AND client_info.client_id IS NOT NULL -), -metrics_ping AS ( - SELECT - client_id, - sample_id, - ARRAY_AGG( - IF( - adjust_ad_group IS NOT NULL - OR adjust_campaign IS NOT NULL - OR adjust_creative IS NOT NULL - OR adjust_network IS NOT NULL, - STRUCT( - submission_timestamp, - adjust_ad_group, - adjust_campaign, - adjust_creative, - adjust_network - ), - NULL - ) IGNORE NULLS - ORDER BY - submission_timestamp ASC - LIMIT - 1 - )[SAFE_OFFSET(0)] AS adjust_info, - ARRAY_AGG(install_source IGNORE NULLS ORDER BY submission_timestamp ASC)[ - SAFE_OFFSET(0) - ] AS install_source, - FROM - metrics_ping_base - GROUP BY - client_id, - sample_id -), -_current AS ( - SELECT - client_id, - sample_id, - first_seen_date, - first_reported_country, - first_reported_isp, - channel, - device_manufacturer, - device_model, - os_version, - app_version, - locale, - COALESCE(first_session.adjust_info, metrics.adjust_info) AS adjust_info, - metrics.install_source, - STRUCT( - IF(baseline_first_seen.client_id IS NULL, FALSE, TRUE) AS reported_baseline_ping, - IF(first_session.client_id IS NULL, FALSE, TRUE) AS reported_first_session_ping, - IF(metrics.client_id IS NULL, FALSE, TRUE) AS reported_metrics_ping, - CASE - WHEN first_session.adjust_info IS NOT NULL - THEN "first_session" - WHEN metrics.adjust_info IS NOT NULL - THEN "metrics" - ELSE NULL - END AS adjust_info__source_ping - ) AS metadata, - FROM - baseline_first_seen - FULL OUTER JOIN - first_session_ping AS first_session - USING - (client_id, sample_id) - FULL OUTER JOIN - metrics_ping AS metrics - USING - (client_id, sample_id) - WHERE - client_id IS NOT NULL -), -_previous AS ( - SELECT - * - FROM - fenix_derived.firefox_android_clients_v2 -) -SELECT - client_id, - sample_id, - COALESCE(_previous.first_seen_date, _current.first_seen_date) AS first_seen_date, - COALESCE( - _previous.first_reported_country, - _current.first_reported_country - ) AS first_reported_country, - COALESCE(_previous.first_reported_isp, _current.first_reported_isp) AS first_reported_isp, - COALESCE(_previous.channel, _current.channel) AS channel, - COALESCE(_previous.device_manufacturer, _current.device_manufacturer) AS device_manufacturer, - COALESCE(_previous.device_model, _current.device_model) AS device_model, - COALESCE(_previous.os_version, _current.os_version) AS os_version, - COALESCE(_previous.app_version, _current.app_version) AS app_version, - COALESCE(_previous.locale, _current.locale) AS locale, - activations.is_activated, - -- below is to avoid mix and matching different adjust attributes - -- from different records. This way we always treat them as a single "unit" - IF( - _previous.adjust_ad_group IS NULL - AND _previous.adjust_campaign IS NULL - AND _previous.adjust_creative IS NULL - AND _previous.adjust_network IS NULL, - _current.adjust_info, - STRUCT( - _previous.submission_timestamp, - _previous.adjust_ad_group, - _previous.adjust_campaign, - _previous.adjust_creative, - _previous.adjust_network - ) - ).*, - COALESCE(_previous.install_source, _current.install_source) AS install_source, - STRUCT( - COALESCE( - _previous.metadata.reported_baseline_ping - OR _current.metadata.reported_baseline_ping, - FALSE - ) AS reported_baseline_ping, - COALESCE( - _previous.metadata.reported_first_session_ping - OR _current.metadata.reported_first_session_ping, - FALSE - ) AS reported_first_session_ping, - COALESCE( - _previous.metadata.reported_metrics_ping - OR _current.metadata.reported_metrics_ping, - FALSE - ) AS reported_metrics_ping, - COALESCE( - _previous.metadata.adjust_info__source_ping, - _current.metadata.adjust_info__source_ping - ) AS adjust_info__source_ping - ) AS metadata, -FROM - _current -FULL OUTER JOIN - _previous -USING - (client_id, sample_id) -LEFT JOIN - activations -USING - (client_id) diff --git a/sql/moz-fx-data-shared-prod/fenix_derived/firefox_android_clients_v2/schema.yaml b/sql/moz-fx-data-shared-prod/fenix_derived/firefox_android_clients_v2/schema.yaml deleted file mode 100644 index 57b53e635da..00000000000 --- a/sql/moz-fx-data-shared-prod/fenix_derived/firefox_android_clients_v2/schema.yaml +++ /dev/null @@ -1,140 +0,0 @@ -fields: - -- mode: NULLABLE - name: client_id - type: STRING - description: | - Unique ID for the client installation. - -- mode: NULLABLE - name: sample_id - type: INTEGER - description: | - Sample ID to limit query results during an analysis. - -- mode: NULLABLE - name: first_seen_date - type: DATE - description: | - Date when the app first reported a baseline ping for the client. - -- mode: NULLABLE - name: first_reported_country - type: STRING - description: | - First reported country for the client installation. - -- mode: NULLABLE - name: first_reported_isp - type: STRING - description: | - Name of the first reported isp (Internet Service Provider). - -- mode: NULLABLE - name: channel - type: STRING - description: | - Channel where the browser is released. - -- mode: NULLABLE - name: device_manufacturer - type: STRING - description: | - Manufacturer of the device where the client is installed. - -- mode: NULLABLE - name: device_model - type: STRING - description: | - Model of the device where the client is installed. - -- mode: NULLABLE - name: os_version - type: STRING - description: | - Version of the Operating System where the client is originally installed. - -- mode: NULLABLE - name: app_version - type: STRING - description: | - App display version for this client installation. - -- mode: NULLABLE - name: locale - type: STRING - description: | - Client's initial locale. - -- mode: NULLABLE - name: is_activated - type: BOOLEAN - description: | - Determines if a client is activated based on the activation metric and a 7 day lag. - -- mode: NULLABLE - name: submission_timestamp - type: TIMESTAMP - description: | - Timestamp of the ping which contained the adjust information. - -- mode: NULLABLE - name: adjust_ad_group - type: STRING - description: | - Structure parameter for the the ad group of a campaign. - -- mode: NULLABLE - name: adjust_campaign - type: STRING - description: | - Structure parameter for the campaign name. - -- mode: NULLABLE - name: adjust_creative - type: STRING - description: | - Structure parameter for the creative content of a campaign. - -- mode: NULLABLE - name: adjust_network - type: STRING - description: | - The type of source of a client installation. - -- mode: NULLABLE - name: install_source - type: STRING - description: | - This value is only sourced from the metrics ping. - -- mode: NULLABLE - name: metadata - type: RECORD - description: | - Additional context around the source of this record. - - fields: - - mode: NULLABLE - name: reported_baseline_ping - type: BOOLEAN - description: | - True if the client reported a baseline_clients_first_seen ping. - - - mode: NULLABLE - name: reported_first_session_ping - type: BOOLEAN - description: | - True if the client ever reported a first_session ping. - - - mode: NULLABLE - name: reported_metrics_ping - type: BOOLEAN - description: | - True if the client ever reported a metrics ping. - - - mode: NULLABLE - name: adjust_info__source_ping - type: STRING - description: | - Ping from which the adjust_info values originate.