Skip to content

Commit

Permalink
Use live tables for structured error counts
Browse files Browse the repository at this point in the history
  • Loading branch information
scholtzan committed Nov 22, 2023
1 parent 57013e2 commit 22239aa
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.monitoring.structured_error_counts`
AS
SELECT
*
*,
CAST(submission_date AS TIMESTAMP) AS hour -- for backwards compatibility
FROM
`moz-fx-data-shared-prod.monitoring_derived.structured_error_counts_v1`
`moz-fx-data-shared-prod.monitoring_derived.structured_error_counts_v2`
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
---
friendly_name: Structured Error Counts
description: >
A daily count of structured errors by document namespace
owners:
- [email protected]
labels:
schedule: daily
scheduling:
dag_name: bqetl_monitoring
referenced_tables:
- ['moz-fx-data-shared-prod', '*_live', '*']
date_partition_parameter: null
parameters: ["submission_date:DATE:{{ds}}"]
bigquery:
time_partitioning:
type: day
field: submission_date
require_partition_filter: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
fields:
- mode: NULLABLE
name: submission_date
type: DATE
- mode: NULLABLE
name: document_namespace
type: STRING
- mode: NULLABLE
name: document_type
type: STRING
- mode: NULLABLE
name: document_version
type: STRING
- mode: NULLABLE
name: ping_count
type: INTEGER
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
DECLARE dummy INT64; -- dummy variable to indicate to bigquery-etl that this is a script
CREATE TEMP TABLE
ping_counts(
submission_date DATE,
document_namespace STRING,
document_type STRING,
document_version STRING,
ping_count INT64
);

FOR record IN (
SELECT
schema_name AS dataset_id
FROM
`moz-fx-data-shared-prod.INFORMATION_SCHEMA.SCHEMATA`
WHERE
schema_name LIKE "%_live%"
)
DO
EXECUTE IMMEDIATE CONCAT(
"INSERT ping_counts (submission_date, document_namespace, document_type, document_version, ping_count) ",
"SELECT PARSE_DATE('%Y%m%d', PARTITION_ID) AS submission_date, ",
"REPLACE(TABLE_SCHEMA, '_live', '') AS document_namespace, ",
"REGEXP_EXTRACT(TABLE_NAME, r'(.+)_v[0-9]+') AS document_type, ",
"REGEXP_EXTRACT(TABLE_NAME, r'.+_v([0-9]+)') AS document_version, ",
"TOTAL_ROWS AS ping_count ",
"FROM ",
record.dataset_id,
".INFORMATION_SCHEMA.PARTITIONS ",
"WHERE PARTITION_ID != '__NULL__' AND ",
"PARSE_DATE('%Y%m%d', PARTITION_ID) < CURRENT_DATE AND ('",
@submission_date,
"' IS NULL OR '",
@submission_date,
"' = PARSE_DATE('%Y%m%d', PARTITION_ID))"
);
END
FOR;

MERGE
`moz-fx-data-shared-prod.monitoring_derived.structured_error_counts_v2` r
USING
deletion_counts d
ON
d.submission_date = r.submission_date
AND r.document_namespace = d.document_namespace
AND r.document_type = d.document_type
AND r.document_version = d.document_version
WHEN NOT MATCHED
THEN
INSERT
(submission_date, document_namespace, document_type, document_version, ping_count)
VALUES
(d.submission_date, d.document_namespace, d.document_type, d.document_version, d.ping_count)
WHEN NOT MATCHED BY SOURCE
THEN
DELETE;

0 comments on commit 22239aa

Please sign in to comment.