Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
friendly_name: Telemetry Health Glean Errors
description: |-
Counts the number of Glean metrics with recording errors that exceed 1% of clients per day.
owners:
- [email protected]
labels:
owner: tlong
require_column_descriptions: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.fenix.telemetry_health_glean_errors`
AS
SELECT
*
FROM
`moz-fx-data-shared-prod.fenix_derived.telemetry_health_glean_errors_v1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
friendly_name: Telemetry Health Ping Latency
description: |-
Reports latency percentiles (p95, median) for collection-to-submission, submission-to-ingestion, and collection-to-ingestion for telemetry pings per day.
owners:
- [email protected]
labels:
owner: tlong
require_column_descriptions: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.fenix.telemetry_health_ping_latency`
AS
SELECT
*
FROM
`moz-fx-data-shared-prod.fenix_derived.telemetry_health_ping_latency_v1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
friendly_name: Telemetry Health Ping Volume 80th Percentile
description: |-
Calculates the 80th percentile of ping volume per client per day.
owners:
- [email protected]
labels:
owner: tlong
require_column_descriptions: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.fenix.telemetry_health_ping_volume_p80`
AS
SELECT
*
FROM
`moz-fx-data-shared-prod.fenix_derived.telemetry_health_ping_volume_p80_v1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
friendly_name: Telemetry Health Sequence Holes
description: |-
Counts the number of clients experiencing sequence holes in their Glean pings per day.
owners:
- [email protected]
labels:
owner: tlong
require_column_descriptions: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.fenix.telemetry_health_sequence_holes`
AS
SELECT
*
FROM
`moz-fx-data-shared-prod.fenix_derived.telemetry_health_sequence_holes_v1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
friendly_name: Telemetry Health Glean Errors
description: |-
Counts the number of Glean metrics with recording errors that exceed 1% of clients per day.
owners:
- [email protected]
labels:
incremental: true
owner1: tlong
scheduling:
dag_name: bqetl_default
bigquery:
time_partitioning:
type: day
field: submission_date
require_partition_filter: true
expiration_days: 775
range_partitioning: null
require_column_descriptions: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
-- Query for telemetry health glean errors
WITH sample AS (
SELECT
client_info.client_id AS client_id,
normalized_channel,
DATE(submission_timestamp) AS submission_date,
metrics.labeled_counter.glean_error_invalid_value AS ev,
metrics.labeled_counter.glean_error_invalid_label AS el,
metrics.labeled_counter.glean_error_invalid_state AS es,
metrics.labeled_counter.glean_error_invalid_overflow AS eo
FROM
`moz-fx-data-shared-prod.fenix.metrics`
WHERE
sample_id = 0
AND submission_timestamp >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 DAY)
),
-- Denominator: distinct clients per app and day
app_day_totals AS (
SELECT
submission_date,
normalized_channel,
COUNT(DISTINCT client_id) AS total_clients
FROM
sample
GROUP BY
submission_date,
normalized_channel
),
-- Numerator per metric key: distinct clients with any error for that key on that day
metric_clients_by_day AS (
SELECT
s.normalized_channel,
s.submission_date,
e.key AS metric_key,
COUNT(DISTINCT s.client_id) AS clients_with_error
FROM
sample AS s
JOIN
UNNEST(ARRAY_CONCAT(IFNULL(ev, []), IFNULL(el, []), IFNULL(es, []), IFNULL(eo, []))) AS e
WHERE
NOT STARTS_WITH(e.key, 'glean')
AND NOT STARTS_WITH(e.key, 'fog')
AND e.value > 0
GROUP BY
s.submission_date,
s.normalized_channel,
metric_key
)
SELECT
m.normalized_channel,
m.submission_date,
COUNTIF(SAFE_DIVIDE(m.clients_with_error, t.total_clients) > 0.01) AS num_metrics_over_1pct
FROM
metric_clients_by_day AS m
JOIN
app_day_totals AS t
USING (submission_date, normalized_channel)
GROUP BY
m.submission_date,
m.normalized_channel
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
fields:
- name: normalized_channel
type: STRING
mode: NULLABLE
- name: submission_date
type: DATE
mode: NULLABLE
- name: num_metrics_over_1pct
type: INTEGER
mode: NULLABLE
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
friendly_name: Telemetry Health Ping Latency
description: |-
Reports latency percentiles (p95, median) for collection-to-submission, submission-to-ingestion, and collection-to-ingestion for telemetry pings per day.
owners:
- [email protected]
labels:
incremental: true
owner1: tlong
scheduling:
dag_name: bqetl_default
bigquery:
time_partitioning:
type: day
field: submission_date
require_partition_filter: true
expiration_days: 775
range_partitioning: null
require_column_descriptions: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
WITH sample AS (
SELECT
normalized_channel,
metadata.header.parsed_date,
ping_info.parsed_end_time,
submission_timestamp,
FROM
`moz-fx-data-shared-prod.fenix.baseline`
WHERE
sample_id = 0
AND DATE(submission_timestamp) >= DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
),
latency_quantiles AS (
SELECT
normalized_channel,
DATE(submission_timestamp) AS submission_date,
APPROX_QUANTILES(
TIMESTAMP_DIFF(parsed_date, parsed_end_time, SECOND),
100
) AS collection_to_submission_latency,
APPROX_QUANTILES(
TIMESTAMP_DIFF(submission_timestamp, parsed_date, SECOND),
100
) AS submission_to_ingestion_latency,
APPROX_QUANTILES(
TIMESTAMP_DIFF(submission_timestamp, parsed_end_time, SECOND),
100
) AS collection_to_ingestion_latency
FROM
sample
GROUP BY
ALL
)
SELECT
normalized_channel,
submission_date,
collection_to_submission_latency[OFFSET(95)] AS collection_to_submission_latency_p95,
collection_to_submission_latency[OFFSET(50)] AS collection_to_submission_latency_median,
submission_to_ingestion_latency[OFFSET(95)] AS submission_to_ingestion_latency_p95,
submission_to_ingestion_latency[OFFSET(50)] AS submission_to_ingestion_latency_median,
collection_to_ingestion_latency[OFFSET(95)] AS collection_to_ingestion_latency_p95,
collection_to_ingestion_latency[OFFSET(50)] AS collection_to_ingestion_latency_median
FROM
latency_quantiles
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
fields:
- name: normalized_channel
type: STRING
mode: REQUIRED
- name: submission_date
type: DATE
mode: REQUIRED
- name: collection_to_submission_latency_p95
type: INTEGER
mode: NULLABLE
- name: collection_to_submission_latency_median
type: INTEGER
mode: NULLABLE
- name: submission_to_ingestion_latency_p95
type: INTEGER
mode: NULLABLE
- name: submission_to_ingestion_latency_median
type: INTEGER
mode: NULLABLE
- name: collection_to_ingestion_latency_p95
type: INTEGER
mode: NULLABLE
- name: collection_to_ingestion_latency_median
type: INTEGER
mode: NULLABLE
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
friendly_name: Telemetry Health Ping Volume 80th Percentile
description: |-
Calculates the 80th percentile of ping volume per client per day.
owners:
- [email protected]
labels:
incremental: true
owner1: tlong
scheduling:
dag_name: bqetl_default
bigquery:
time_partitioning:
type: day
field: submission_date
require_partition_filter: true
expiration_days: 775
range_partitioning: null
require_column_descriptions: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
-- Query for telemetry health ping volume p80
WITH sample AS (
SELECT
normalized_channel,
DATE(submission_timestamp) AS submission_date,
COUNT(1) AS ping_count
FROM
`moz-fx-data-shared-prod.fenix.baseline`
WHERE
sample_id = 0
AND DATE(submission_timestamp) >= DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
GROUP BY
normalized_channel,
submission_date,
client_info.client_id
),
ping_count_quantiles AS (
SELECT
normalized_channel,
submission_date,
APPROX_QUANTILES(ping_count, 100) AS quantiles,
FROM
sample
GROUP BY
ALL
)
SELECT
normalized_channel,
submission_date,
quantiles[OFFSET(80)] AS p80
FROM
ping_count_quantiles
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
fields:
- name: normalized_channel
type: STRING
mode: REQUIRED
- name: submission_date
type: DATE
mode: REQUIRED
- name: p80
type: INTEGER
mode: NULLABLE
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
friendly_name: Telemetry Health Sequence Holes
description: |-
Counts the number of clients experiencing sequence holes in their Glean pings per day..
owners:
- [email protected]
labels:
incremental: true
owner1: tlong
scheduling:
dag_name: bqetl_default
bigquery:
time_partitioning:
type: day
field: submission_date
require_partition_filter: true
expiration_days: 775
range_partitioning: null
require_column_descriptions: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
-- Query for telemetry health sequence holes
WITH sample AS (
SELECT
normalized_channel,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
ping_info.seq AS sequence_number
FROM
`moz-fx-data-shared-prod.fenix.baseline`
WHERE
sample_id = 0
AND DATE(submission_timestamp) >= DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
),
lagged AS (
SELECT
normalized_channel,
submission_date,
client_id,
sequence_number,
LAG(sequence_number) OVER (
PARTITION BY
submission_date,
client_id
ORDER BY
sequence_number
) AS prev_seq
FROM
sample
),
per_client_day AS (
SELECT
normalized_channel,
submission_date,
client_id,
-- A client has a gap on that date if any step isn't prev+1.
LOGICAL_OR(prev_seq IS NOT NULL AND sequence_number != prev_seq + 1) AS has_gap
FROM
lagged
GROUP BY
ALL
)
SELECT
normalized_channel,
submission_date,
COUNTIF(has_gap) AS clients_with_sequence_gaps_1pct,
COUNT(DISTINCT client_id) AS total_unique_clients_1pct,
SAFE_DIVIDE(COUNTIF(has_gap), COUNT(DISTINCT client_id)) * 100 AS pct_clients_with_gaps
FROM
per_client_day
GROUP BY
ALL
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
fields:
- name: normalized_channel
type: STRING
mode: REQUIRED
- name: submission_date
type: DATE
mode: REQUIRED
- name: clients_with_sequence_gaps_1pct
type: INTEGER
mode: NULLABLE
- name: total_unique_clients_1pct
type: INTEGER
mode: NULLABLE
- name: pct_clients_with_gaps
type: FLOAT
mode: NULLABLE
Loading