Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DENG-1705 - Add missing client attribution columns to clients daily/first-seen #4505

Merged
merged 2 commits into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,9 @@ fields:
- name: dlsource
type: STRING
mode: NULLABLE
- name: ua
type: STRING
mode: NULLABLE
mode: NULLABLE
name: attribution
type: RECORD
Expand Down Expand Up @@ -2339,3 +2342,36 @@ fields:
- name: startup_profile_selection_reason_first
type: STRING
mode: NULLABLE
- name: first_document_id
type: STRING
mode: NULLABLE
- name: partner_id
type: STRING
mode: NULLABLE
- name: distribution_version
type: STRING
mode: NULLABLE
- name: distributor
type: STRING
mode: NULLABLE
- name: distributor_channel
type: STRING
mode: NULLABLE
- name: env_build_platform_version
type: STRING
mode: NULLABLE
- name: env_build_xpcom_abi
type: STRING
mode: NULLABLE
- name: geo_db_version
type: STRING
mode: NULLABLE
- name: apple_model_id
type: STRING
mode: NULLABLE
- name: max_subsession_counter
type: INTEGER
mode: NULLABLE
- name: min_subsession_counter
type: INTEGER
mode: NULLABLE
Original file line number Diff line number Diff line change
Expand Up @@ -177,13 +177,15 @@ clients_summary AS (
submission_timestamp,
client_id,
sample_id,
document_id,
metadata.uri.app_update_channel AS channel,
normalized_channel,
normalized_os_version,
metadata.geo.country,
metadata.geo.city,
metadata.geo.subdivision1 AS geo_subdivision1,
metadata.geo.subdivision2 AS geo_subdivision2,
metadata.geo.db_version AS geo_db_version,
metadata.isp.name AS isp_name,
metadata.isp.organization AS isp_organization,
environment.system.os.name AS os,
Expand All @@ -194,6 +196,7 @@ clients_summary AS (
SAFE_CAST(environment.system.os.windows_ubr AS INT64) AS windows_ubr,
SAFE_CAST(environment.system.os.install_year AS INT64) AS install_year,
environment.system.is_wow64,
environment.system.apple_model_id,
SAFE_CAST(environment.system.memory_mb AS INT64) AS memory_mb,
environment.system.cpu.count AS cpu_count,
environment.system.cpu.cores AS cpu_cores,
Expand All @@ -214,6 +217,10 @@ clients_summary AS (
payload.info.subsession_counter,
payload.info.subsession_length,
environment.partner.distribution_id,
environment.partner.partner_id,
environment.partner.distribution_version,
environment.partner.distributor,
environment.partner.distributor_channel,
IFNULL(
environment.services.account_enabled,
udf.boolean_histogram_to_boolean(payload.histograms.fxa_configured)
Expand All @@ -235,6 +242,8 @@ clients_summary AS (
environment.build.build_id AS env_build_id,
environment.build.version AS env_build_version,
environment.build.architecture AS env_build_arch,
environment.build.platform_version AS env_build_platform_version,
ANich marked this conversation as resolved.
Show resolved Hide resolved
environment.build.xpcom_abi AS env_build_xpcom_abi,
ANich marked this conversation as resolved.
Show resolved Hide resolved
environment.settings.e10s_enabled,
environment.settings.locale,
environment.settings.update.channel AS update_channel,
Expand All @@ -251,7 +260,8 @@ clients_summary AS (
environment.settings.attribution.experiment,
environment.settings.attribution.variation,
environment.settings.attribution.dltoken,
environment.settings.attribution.dlsource
environment.settings.attribution.dlsource,
environment.settings.attribution.ua
),
NULL
) AS attribution,
Expand Down Expand Up @@ -686,6 +696,7 @@ aggregates AS (
SELECT
DATE(submission_timestamp) AS submission_date,
client_id,
ARRAY_AGG(document_id ORDER BY submission_timestamp)[OFFSET(0)] AS first_document_id,
SUM(aborts_content) AS aborts_content_sum,
SUM(aborts_gmplugin) AS aborts_gmplugin_sum,
SUM(aborts_plugin) AS aborts_plugin_sum,
Expand Down Expand Up @@ -772,10 +783,20 @@ aggregates AS (
mozfun.stats.mode_last(
ARRAY_AGG(distribution_id ORDER BY submission_timestamp)
) AS distribution_id,
mozfun.stats.mode_last(ARRAY_AGG(partner_id ORDER BY submission_timestamp)) AS partner_id,
mozfun.stats.mode_last(ARRAY_AGG(distribution_version ORDER BY submission_timestamp)) AS distribution_version,
mozfun.stats.mode_last(ARRAY_AGG(distributor ORDER BY submission_timestamp)) AS distributor,
mozfun.stats.mode_last(ARRAY_AGG(distributor_channel ORDER BY submission_timestamp)) AS distributor_channel,
ANich marked this conversation as resolved.
Show resolved Hide resolved
mozfun.stats.mode_last(ARRAY_AGG(e10s_enabled ORDER BY submission_timestamp)) AS e10s_enabled,
mozfun.stats.mode_last(
ARRAY_AGG(env_build_arch ORDER BY submission_timestamp)
) AS env_build_arch,
mozfun.stats.mode_last(
ARRAY_AGG(env_build_platform_version ORDER BY submission_timestamp)
) AS env_build_platform_version,
mozfun.stats.mode_last(
ARRAY_AGG(env_build_xpcom_abi ORDER BY submission_timestamp)
) AS env_build_xpcom_abi,
mozfun.stats.mode_last(ARRAY_AGG(env_build_id ORDER BY submission_timestamp)) AS env_build_id,
mozfun.stats.mode_last(
ARRAY_AGG(env_build_version ORDER BY submission_timestamp)
Expand Down Expand Up @@ -858,6 +879,7 @@ aggregates AS (
submission_timestamp
)
).*,
mozfun.stats.mode_last(ARRAY_AGG(geo_db_version ORDER BY submission_timestamp)) AS geo_db_version,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the use case for this field?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't say, this is used downstream in clients_first_seen_v2 cc @lucia-vargas-a

mozfun.json.mode_last(
ARRAY_AGG(
IF(
Expand Down Expand Up @@ -978,6 +1000,7 @@ aggregates AS (
ARRAY_AGG(is_default_browser ORDER BY submission_timestamp)
) AS is_default_browser,
mozfun.stats.mode_last(ARRAY_AGG(is_wow64 ORDER BY submission_timestamp)) AS is_wow64,
mozfun.stats.mode_last(ARRAY_AGG(apple_model_id ORDER BY submission_timestamp)) AS apple_model_id,
mozfun.stats.mode_last(ARRAY_AGG(locale ORDER BY submission_timestamp)) AS locale,
mozfun.stats.mode_last(ARRAY_AGG(memory_mb ORDER BY submission_timestamp)) AS memory_mb,
mozfun.stats.mode_last(
Expand Down Expand Up @@ -1135,6 +1158,8 @@ aggregates AS (
udf.aggregate_search_counts(ARRAY_CONCAT_AGG(search_counts ORDER BY submission_timestamp)).*,
AVG(session_restored) AS session_restored_mean,
COUNTIF(subsession_counter = 1) AS sessions_started_on_this_day,
MAX(subsession_counter)AS max_subsession_counter,
MIN(subsession_counter)AS min_subsession_counter,
SUM(shutdown_kill) AS shutdown_kill_sum,
SUM(subsession_length / NUMERIC '3600') AS subsession_hours_sum,
SUM(ssl_handshake_result_failure) AS ssl_handshake_result_failure_sum,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,9 @@ fields:
- name: dlsource
type: STRING
mode: NULLABLE
- name: ua
type: STRING
mode: NULLABLE
mode: NULLABLE
name: attribution
type: RECORD
Expand Down Expand Up @@ -2303,3 +2306,36 @@ fields:
- name: startup_profile_selection_reason_first
type: STRING
mode: NULLABLE
- name: first_document_id
type: STRING
mode: NULLABLE
- name: partner_id
type: STRING
mode: NULLABLE
- name: distribution_version
ANich marked this conversation as resolved.
Show resolved Hide resolved
type: STRING
mode: NULLABLE
- name: distributor
type: STRING
mode: NULLABLE
- name: distributor_channel
type: STRING
mode: NULLABLE
- name: env_build_platform_version
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I notice a different naming convention e.g. environment.partner.partner_id is named partner_id, while environment.build.xpcom_abi is named env_build_xpcom_abi which might cause confusion to users and also differs from the naming in v2. It makes sense to me to keep the naming without the suffix for consistency e.g. xpcom_abi instead of env_build_xpcom_abi, wdyt?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I matched the convention for the existing environment.build columns in clients_daily which all prefix with env_build_ see https://github.com/mozilla/bigquery-etl/pull/4505/files#diff-0802d82f91d4f1ab2d91e8d0d1ca4062467a1b723cee0b293eab62248966b949R242-R245. Since we're not likely to change those upstream columns

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, some prefix and some don't e.g. partner_id, is_wow64

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those are from environment.partner and environment.system not environment.build so doesn't seem like there was a previous convention

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately this leads to an unnecessary complication of needing to update downstream queries when cascading the changes due to different naming in v1 and v2. It'd be so much better if we align the naming between them in this PR rather than updating the schema or expanding the queries later, see e.g. platfform_version

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't follow. The schemas for clients_first_seen_v2 and clients_first_seen_v1 are already very different. Keeping the convention makes sense for the same reason we wouldn't change the upstream env_build_arch, default_search_engine_data_load_path, or geo_subdivision1 to match the downstream column names.

type: STRING
mode: NULLABLE
- name: env_build_xpcom_abi
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same case of env_build_platform_version about naming convention.

type: STRING
mode: NULLABLE
- name: geo_db_version
type: STRING
mode: NULLABLE
- name: apple_model_id
type: STRING
mode: NULLABLE
- name: max_subsession_counter
type: INTEGER
mode: NULLABLE
- name: min_subsession_counter
type: INTEGER
mode: NULLABLE
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ fields:
- name: dlsource
type: STRING
mode: NULLABLE
- name: ua
type: STRING
mode: NULLABLE
mode: NULLABLE
name: attribution
type: RECORD
Expand Down Expand Up @@ -2305,3 +2308,36 @@ fields:
- name: startup_profile_selection_reason_first
type: STRING
mode: NULLABLE
- name: first_document_id
type: STRING
mode: NULLABLE
- name: partner_id
type: STRING
mode: NULLABLE
- name: distribution_version
type: STRING
mode: NULLABLE
- name: distributor
type: STRING
mode: NULLABLE
- name: distributor_channel
type: STRING
mode: NULLABLE
- name: env_build_platform_version
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment about naming, which also differs from v2. It might be confusing for users.

type: STRING
mode: NULLABLE
- name: env_build_xpcom_abi
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment about naming as above.

type: STRING
mode: NULLABLE
- name: geo_db_version
type: STRING
mode: NULLABLE
- name: apple_model_id
type: STRING
mode: NULLABLE
- name: max_subsession_counter
type: INTEGER
mode: NULLABLE
- name: min_subsession_counter
type: INTEGER
mode: NULLABLE
Original file line number Diff line number Diff line change
Expand Up @@ -298,38 +298,36 @@ main_ping AS (
TIMESTAMP(MIN(submission_date))
) AS first_seen_timestamp,
ARRAY_AGG(DATE(submission_date) ORDER BY submission_date ASC) AS all_dates,
CAST(
NULL AS STRING
) AS architecture, -- main_v5:environment.build.architecture
ARRAY_AGG(env_build_arch RESPECT NULLS ORDER BY submission_date)[
SAFE_OFFSET(0)
] AS architecture,
ARRAY_AGG(env_build_id RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS app_build_id,
ARRAY_AGG(app_name RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS app_name,
ARRAY_AGG(locale RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS locale,
CAST(
NULL AS STRING
) AS platform_version, -- main_v5:environment.build.platform_version
ARRAY_AGG(env_build_platform_version RESPECT NULLS ORDER BY submission_date)[
SAFE_OFFSET(0)
] AS platform_version,
ARRAY_AGG(vendor RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS vendor,
ARRAY_AGG(app_version RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS app_version,
CAST(
NULL AS STRING
) AS xpcom_abi, -- main_v5:environment.build.xpcom_abi / application.xpcom_abi
CAST(
NULL AS STRING
) AS document_id, -- main_v5:document_id
ARRAY_AGG(env_build_xpcom_abi RESPECT NULLS ORDER BY submission_date)[
SAFE_OFFSET(0)
] AS xpcom_abi,
ARRAY_AGG(first_document_id RESPECT NULLS ORDER BY submission_date)[
SAFE_OFFSET(0)
] AS document_id,
ARRAY_AGG(distribution_id RESPECT NULLS ORDER BY submission_date)[
SAFE_OFFSET(0)
] AS distribution_id,
CAST(
NULL AS STRING
) AS partner_distribution_version, -- main_v5:environment.partner.distribution_version
CAST(
NULL AS STRING
) AS partner_distributor, -- main_v5:environment.partner.distributor
CAST(
NULL AS STRING
) AS partner_distributor_channel, -- main_v5:environment.partner.distributor_channel
CAST(
NULL AS STRING
) AS partner_id, -- main_v5:environment.partner.distribution_id
ARRAY_AGG(distribution_version RESPECT NULLS ORDER BY submission_date)[
SAFE_OFFSET(0)
] AS partner_distribution_version,
ARRAY_AGG(distributor RESPECT NULLS ORDER BY submission_date)[
SAFE_OFFSET(0)
] AS partner_distributor,
ARRAY_AGG(distributor_channel RESPECT NULLS ORDER BY submission_date)[
SAFE_OFFSET(0)
] AS partner_distributor_channel,
ARRAY_AGG(partner_id RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS partner_id,
ARRAY_AGG(attribution.campaign RESPECT NULLS ORDER BY submission_date)[
SAFE_OFFSET(0)
] AS attribution_campaign,
Expand All @@ -345,9 +343,9 @@ main_ping AS (
ARRAY_AGG(attribution.source RESPECT NULLS ORDER BY submission_date)[
SAFE_OFFSET(0)
] AS attribution_source,
CAST(
NULL AS STRING
) AS attribution_ua, -- main_v5:environment.settings.attribution.ua
ARRAY_AGG(attribution.ua RESPECT NULLS ORDER BY submission_date)[
SAFE_OFFSET(0)
] AS attribution_ua,
ARRAY_AGG(default_search_engine_data_load_path RESPECT NULLS ORDER BY submission_date)[
SAFE_OFFSET(0)
] AS engine_data_load_path,
Expand All @@ -360,13 +358,11 @@ main_ping AS (
ARRAY_AGG(default_search_engine_data_submission_url RESPECT NULLS ORDER BY submission_date)[
SAFE_OFFSET(0)
] AS engine_data_submission_url,
CAST(
NULL AS STRING
) AS apple_model_id, -- main_v5:environment.system.apple_model_id
ARRAY_AGG(apple_model_id RESPECT NULLS ORDER BY submission_date)[
SAFE_OFFSET(0)
] AS apple_model_id,
ARRAY_AGG(city RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS city,
CAST(
NULL AS STRING
) AS db_version, -- main_v5:metadata.geo.db_version
ARRAY_AGG(geo_db_version RESPECT NULLS ORDER BY submission_date)[SAFE_OFFSET(0)] AS db_version,
ARRAY_AGG(geo_subdivision1 RESPECT NULLS ORDER BY submission_date)[
SAFE_OFFSET(0)
] AS subdivision1,
Expand All @@ -384,9 +380,9 @@ main_ping AS (
ARRAY_AGG(attribution.dltoken RESPECT NULLS ORDER BY submission_date)[
SAFE_OFFSET(0)
] AS attribution_dltoken,
CAST(
NULL AS STRING
) AS attribution_dlsource -- main_v5:environment.settings.attribution.dlsource
ARRAY_AGG(attribution.dlsource RESPECT NULLS ORDER BY submission_date)[
SAFE_OFFSET(0)
] AS attribution_dlsource
FROM
`moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6`
WHERE
Expand Down
Loading