diff --git a/dags.yaml b/dags.yaml index 85356f3a5bc..181569568ab 100644 --- a/dags.yaml +++ b/dags.yaml @@ -2505,3 +2505,25 @@ bqetl_market_intel_bot: tags: - impact/tier_3 - repo/bigquery-etl + +bqetl_gecko_trace: + catchup: false + default_args: + depends_on_past: false + email: + - telemetry-alerts@mozilla.com + - mvanstraten@mozilla.com + email_on_failure: true + email_on_retry: true + end_date: null + max_active_tis_per_dag: null + owner: mvanstraten@mozilla.com + retries: 2 + retry_delay: 30m + start_date: "2025-09-26" + description: | + Processes gecko trace data across multiple Firefox applications. + repo: bigquery-etl + schedule_interval: 0 9 * * * + tags: + - impact/tier_3 diff --git a/sql/mozfun/gecko_trace/build_root_span/README.md b/sql/mozfun/gecko_trace/build_root_span/README.md new file mode 100644 index 00000000000..992df928096 --- /dev/null +++ b/sql/mozfun/gecko_trace/build_root_span/README.md @@ -0,0 +1,44 @@ +# gecko_trace.build_root_span + +Builds a root span tree structure from an array of span objects. + +## Signature + +```sql +gecko_trace.build_root_span(spans ARRAY) RETURNS JSON +``` + +## Arguments + +- `spans`: Array of JSON objects representing individual spans. Each span should + contain at minimum: + - `span_id`: Unique identifier for the span + - `parent_span_id`: ID of the parent span (null for root spans) + +## Description + +Takes an array of JSON span objects and constructs a hierarchical tree structure +by linking spans with their parent-child relationships. + +If no explicit root span is found, the function will attempt to find a single +"missing" root span. If there are multiple or no missing roots, an error is +thrown. + +## Returns + +Returns a JSON object representing the root span with all child spans nested in +`childSpans` arrays throughout the tree structure. + +## Example + +```sql +SELECT gecko_trace.build_root_span([ + JSON '{"span_id": "root", "parent_span_id": null, "name": "main_process"}', + JSON '{"span_id": "child1", "parent_span_id": "root", "name": "network_request"}', + JSON '{"span_id": "child2", "parent_span_id": "root", "name": "dom_parse"}', + JSON '{"span_id": "grandchild", "parent_span_id": "child1", "name": "dns_lookup"}' +]) +``` + +This would return a tree structure where the root span contains two child spans +in its `childSpans` array, and one of those children has its own child span. diff --git a/sql/mozfun/gecko_trace/build_root_span/metadata.yaml b/sql/mozfun/gecko_trace/build_root_span/metadata.yaml new file mode 100644 index 00000000000..b79e25eeced --- /dev/null +++ b/sql/mozfun/gecko_trace/build_root_span/metadata.yaml @@ -0,0 +1,14 @@ +--- +friendly_name: Gecko Trace Build Root Span +description: |- + Builds a root span tree structure from an array of span objects. + + Takes an array of JSON span objects and constructs a hierarchical tree structure + by linking spans with their parent-child relationships. Returns the root span + with all child spans nested in a `childSpans` array property. + + If no root span is found, the function will attempt to find a single "missing" + root span. If there are multiple or no missing roots, an error is thrown. + + This function is used for processing Gecko trace data to reconstruct the + hierarchical structure of spans within a trace. diff --git a/sql/mozfun/gecko_trace/build_root_span/udf.sql b/sql/mozfun/gecko_trace/build_root_span/udf.sql new file mode 100644 index 00000000000..6b2c451f594 --- /dev/null +++ b/sql/mozfun/gecko_trace/build_root_span/udf.sql @@ -0,0 +1,67 @@ +CREATE OR REPLACE FUNCTION gecko_trace.build_root_span(spans ARRAY) +RETURNS JSON +LANGUAGE js AS r""" + const spansById = new Map(); + let rootSpanId; + + spans.forEach((span) => { + const spanId = span.span_id; + // Re-attach any children accumulated while parent was "missing" + const maybeMissingSelf = spansById.get(spanId); + span.childSpans = maybeMissingSelf?.childSpans ?? []; + spansById.set(spanId, span); + + if (!span.parent_span_id) { + rootSpanId = spanId; // yay, we found the root span + return; + } + + const parent = spansById.get(span.parent_span_id) || { + span_id: span.parent_span_id, + childSpans: [], + type: "missing", + }; + parent.childSpans.push(span); + spansById.set(span.parent_span_id, parent); + }); + + if (!rootSpanId) { + // Find the single missing root, if any + const missingRoots = Array.from(spansById.values()).filter( + (span) => span.type == "missing", + ); + if (missingRoots.length != 1) { + throw new Error( + `Unable to construct span tree: expected exactly one missing root span, but found ${missingRoots.length}`, + ); + } + + rootSpanId = missingRoots[0].span_id; + } + + return spansById.get(rootSpanId); +"""; + +-- Tests +SELECT + -- Test with simple parent-child relationship + assert.not_null( + gecko_trace.build_root_span( + [ + JSON '{"span_id": "root", "parent_span_id": null, "name": "root_span"}', + JSON '{"span_id": "child1", "parent_span_id": "root", "name": "child_span"}' + ] + ) + ), + -- Test with empty array + assert.null(gecko_trace.build_root_span([])), + -- Test single span (should be root) + assert.equals( + "root", + JSON_VALUE( + gecko_trace.build_root_span( + [JSON '{"span_id": "root", "parent_span_id": null, "name": "root_span"}'] + ), + "$.span_id" + ) + ); diff --git a/sql/mozfun/gecko_trace/calculate_signature/README.md b/sql/mozfun/gecko_trace/calculate_signature/README.md new file mode 100644 index 00000000000..6966dcbe4c2 --- /dev/null +++ b/sql/mozfun/gecko_trace/calculate_signature/README.md @@ -0,0 +1,57 @@ +# gecko_trace.calculate_signature + +Calculates a signature hash for a trace based on its root span structure. + +## Signature + +```sql +gecko_trace.calculate_signature(rootSpan JSON) RETURNS STRING +``` + +## Arguments + +- `rootSpan`: JSON object representing the root span of a trace tree, typically + generated by `gecko_trace.build_root_span()`. Should contain: + - `name`: Span name + - `scope`: Object with `name` property + - `resource`: Object with `attributes` property + - `events`: Optional array of event objects with `name` and `attributes` + - `childSpans`: Array of child span objects with the same structure + +## Description + +Uses a fast hash function (cyrb64) to generate a deterministic signature based +on the hierarchical structure and attributes of spans in a trace. The signature +is calculated by traversing the span tree depth-first and hashing: + +- Resource attributes (excluding certain internal IDs like + `gecko_process_internal_id`) +- Scope names +- Span names +- Event names and attributes + +## Returns + +Returns a string hash that serves as a deterministic signature for the trace +structure. Traces with identical signatures have the same execution pattern and +can be grouped together for analysis. + +## Example + +```sql +WITH root_span AS ( + SELECT gecko_trace.build_root_span(spans_array) as root + FROM traces_table + WHERE trace_id = 'some_trace_id' +) +SELECT gecko_trace.calculate_signature(root) as signature +FROM root_span +``` + +## Notes + +- Internal process IDs and other volatile attributes are excluded from hashing + to focus on logical execution patterns +- Used in conjunction with `gecko_trace.build_root_span()` for complete trace + analysis workflows +- Returns empty string for NULL input diff --git a/sql/mozfun/gecko_trace/calculate_signature/metadata.yaml b/sql/mozfun/gecko_trace/calculate_signature/metadata.yaml new file mode 100644 index 00000000000..1e668f0e893 --- /dev/null +++ b/sql/mozfun/gecko_trace/calculate_signature/metadata.yaml @@ -0,0 +1,15 @@ +--- +friendly_name: Gecko Trace Calculate Signature +description: |- + Calculates a signature hash for a trace based on its root span structure. + + Uses a fast hash function (cyrb64) to generate a deterministic signature + based on the hierarchical structure and attributes of spans in a trace. + The signature is calculated by traversing the span tree and hashing: + - Resource attributes (excluding certain internal IDs like gecko_process_internal_id) + - Scope names + - Span names + - Event names and attributes + + The function returns a string hash that can be used to identify traces with + similar execution patterns. diff --git a/sql/mozfun/gecko_trace/calculate_signature/udf.sql b/sql/mozfun/gecko_trace/calculate_signature/udf.sql new file mode 100644 index 00000000000..0bc097c6988 --- /dev/null +++ b/sql/mozfun/gecko_trace/calculate_signature/udf.sql @@ -0,0 +1,84 @@ +CREATE OR REPLACE FUNCTION gecko_trace.calculate_signature(rootSpan JSON) +RETURNS STRING +LANGUAGE js AS r""" + // cyrb53 (c) 2018 bryc (github.com/bryc). License: Public domain. Attribution appreciated. + // A fast and simple 64-bit (or 53-bit) string hash function with decent collision resistance. + // Largely inspired by MurmurHash2/3, but with a focus on speed/simplicity. + // See https://stackoverflow.com/questions/7616461/generate-a-hash-from-string-in-javascript/52171480#52171480 + // https://github.com/bryc/code/blob/master/jshash/experimental/cyrb53.js + const cyrb64 = (str, seed = 0) => { + let h1 = 0xdeadbeef ^ seed, + h2 = 0x41c6ce57 ^ seed; + for (let i = 0, ch; i < str.length; i++) { + ch = str.charCodeAt(i); + h1 = Math.imul(h1 ^ ch, 2654435761); + h2 = Math.imul(h2 ^ ch, 1597334677); + } + h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507); + h1 ^= Math.imul(h2 ^ (h2 >>> 13), 3266489909); + h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507); + h2 ^= Math.imul(h1 ^ (h1 >>> 13), 3266489909); + // For a single 53-bit numeric return value we could return + // 4294967296 * (2097151 & h2) + (h1 >>> 0); + // but we instead return the full 64-bit value: + return [h2 >>> 0, h1 >>> 0]; + }; + + const seed = 0; + let digest = ""; + const hash = (str) => { + const [h2, h1] = cyrb64(digest + str, seed); + digest = + h2.toString(36).padStart(7, "0") + h1.toString(36).padStart(7, "0"); + }; + + const ATTRS_TO_SKIP = {"gecko_process_internal_id": null} + const hashAttrs = (attrs) => { + for (const [key, value] of Object.entries(attrs)) { + if (key in ATTRS_TO_SKIP) continue; + hash(key); + hash(value); + } + } + + const hashEvents = (events) => { + for (const event of events) { + hash(event.name); + hashAttrs(event.attributes); + } + }; + + const stack = [rootSpan]; + while (stack.length > 0) { + const span = stack.pop(); + hashAttrs(span.resource.attributes); + hash(span.scope.name); + hash(span.name); + if (span.events) { + hashEvents(span.events); + } + stack.push(...span.childSpans); + } + + return digest; +"""; + +-- Tests +SELECT + -- Test with simple root span + assert.not_null( + gecko_trace.calculate_signature( + JSON '{"span_id": "root", "name": "test", "scope": {"name": "test_scope"}, "resource": {"attributes": {}}, "childSpans": []}' + ) + ), + -- Test that same input produces same signature + assert.equals( + gecko_trace.calculate_signature( + JSON '{"span_id": "root", "name": "test", "scope": {"name": "test_scope"}, "resource": {"attributes": {}}, "childSpans": []}' + ), + gecko_trace.calculate_signature( + JSON '{"span_id": "root", "name": "test", "scope": {"name": "test_scope"}, "resource": {"attributes": {}}, "childSpans": []}' + ) + ), + -- Test that null input returns empty string + assert.equals("", gecko_trace.calculate_signature(NULL)); diff --git a/sql_generators/gecko_trace/README.md b/sql_generators/gecko_trace/README.md new file mode 100644 index 00000000000..1160caae86c --- /dev/null +++ b/sql_generators/gecko_trace/README.md @@ -0,0 +1,163 @@ +# Gecko Trace SQL Generator + +This generator creates BigQuery tables and views for processing Gecko trace +telemetry data from Firefox applications. + +## Overview + +The generator creates a complete data pipeline for analyzing Gecko traces: + +1. **Derived Tables** (`{dataset}_derived`): Process raw telemetry into + structured span and trace data +2. **Aggregate Views** (`gecko_trace_aggregates`): Unified views combining data + across all Firefox applications + +## Generated Tables + +### Derived Tables (per Firefox application) + +For each Firefox application (`firefox_desktop`, `org_mozilla_fenix_nightly`, +`org_mozilla_firefox_beta`), the generator creates: + +#### `gecko_trace_spans_v1` + +- **Purpose**: Individual spans extracted from raw traces +- **Schema**: Flattened span data with trace/parent relationships, timing, + events, and metadata +- **Source**: Raw telemetry `traces` table + +#### `gecko_trace_traces_v1` + +- **Purpose**: Complete traces with hierarchical span structures +- **Schema**: Aggregated traces with `root_span` JSON tree and calculated + `signature` hash +- **Dependencies**: Uses `mozfun.gecko_trace.build_root_span()` and + `mozfun.gecko_trace.calculate_signature()` +- **Source**: `gecko_trace_spans_v1` table + +#### `gecko_trace_signatures_v1` + +- **Purpose**: Statistics grouped by trace signature +- **Schema**: Signature hash, average duration, and hit counts +- **Source**: `gecko_trace_traces_v1` table + +### Aggregate Views + +Located in `moz-fx-data-shared-prod.gecko_trace_aggregates`: + +#### `spans` + +- Unified view of all span data across Firefox applications +- UNION ALL of all `gecko_trace_spans_v1` tables + +#### `traces` + +- Unified view of all trace data across Firefox applications +- UNION ALL of all `gecko_trace_traces_v1` tables + +#### `signatures` + +- Unified view of all signature statistics across Firefox applications +- UNION ALL of all `gecko_trace_signatures_v1` tables + +## Usage + +```bash +# Generate all tables and views with default settings +./bqetl generate gecko_trace + +# Specify custom output directory and target project +./bqetl generate gecko_trace \ + --output-dir /path/to/output \ + --target-project my-project-id +``` + +### Options + +- `--output-dir`: Directory where generated SQL files are written (default: + `sql`) +- `--target-project`: BigQuery project ID for generated queries (default: + `moz-fx-data-shared-prod`) + +## Generated File Structure + +``` +/ +├── / +│ ├── _derived/ +│ │ ├── gecko_trace_spans_v1/ +│ │ │ ├── query.sql +│ │ │ ├── metadata.yaml +│ │ │ └── schema.yaml +│ │ ├── gecko_trace_traces_v1/ +│ │ └── gecko_trace_signatures_v1/ +│ └── gecko_trace_aggregates/ +│ ├── dataset_metadata.yaml +│ ├── spans/ +│ │ ├── view.sql +│ │ ├── metadata.yaml +│ │ └── schema.yaml +│ ├── traces/ +│ └── signatures/ +``` + +## Data Flow + +``` +Raw Telemetry (gecko_trace ping table) + ↓ +gecko_trace_spans_v1 (individual spans) + ↓ +gecko_trace_traces_v1 (complete traces with root_span + signature) + ↓ +gecko_trace_signatures_v1 (signature statistics) + ↓ +gecko_trace_aggregates.* (unified views across applications) +``` + +## Example Queries + +### Analyze trace signatures across applications + +```sql +SELECT + application, + signature, + average_duration_nano / 1000000 as avg_duration_ms, + hits +FROM `moz-fx-data-shared-prod.gecko_trace_aggregates.signatures` +WHERE hits > 100 +ORDER BY average_duration_nano DESC +``` + +### Examine span hierarchy for a specific trace + +```sql +SELECT + JSON_EXTRACT_SCALAR(root_span, '$.name') as root_name, + JSON_EXTRACT_ARRAY(root_span, '$.childSpans') as children, + duration_nano / 1000000 as duration_ms +FROM `moz-fx-data-shared-prod.gecko_trace_aggregates.traces` +WHERE trace_id = 'your-trace-id-here' +``` + +## Configuration + +The generator processes data for these Firefox applications: + +- `firefox_desktop` - Firefox Desktop +- `org_mozilla_fenix_nightly` - Firefox for Android (Nightly) +- `org_mozilla_firefox_beta` - Firefox for Android (Beta) + +To add additional applications, update the `APPLICATIONS ` list in +`__init__.py`. + +## Templates + +The generator uses Jinja2 templates located in `templates/`: + +- `{database}_derived/` - Templates for derived tables (per application) +- `moz-fx-data-shared-prod/gecko_trace_aggregates/` - Templates for aggregate + views + +All templates include proper metadata, schemas, and documentation. diff --git a/sql_generators/gecko_trace/__init__.py b/sql_generators/gecko_trace/__init__.py new file mode 100644 index 00000000000..df05a9c6690 --- /dev/null +++ b/sql_generators/gecko_trace/__init__.py @@ -0,0 +1,116 @@ +"""SQL generator for gecko trace data queries and metadata.""" + +import os +import shutil +from pathlib import Path + +import click +from jinja2 import Environment, FileSystemLoader + +from bigquery_etl.cli.utils import use_cloud_function_option +from bigquery_etl.util.common import write_sql + +THIS_MODULE = Path(os.path.dirname(__file__)) +TEMPLATES = THIS_MODULE / "templates" +PING_NAME = "gecko_trace" +APPLICATIONS = ( + "firefox_desktop", # The desktop version of Firefox + "org_mozilla_fenix_nightly", # Nightly channel of Firefox Preview + "org_mozilla_firefox_beta", # Beta channel of Firefox for Android +) + + +def generate_derived(output_dir, target_project): + """Generate derived table SQL queries and metadata for gecko trace data.""" + env = Environment(loader=FileSystemLoader(TEMPLATES / "derived")) + + for app_id in APPLICATIONS: + for template_name in env.list_templates("*.sql"): + query_template = env.get_template(template_name) + table_name = template_name.split("/")[0] + write_sql( + output_dir / target_project, + f"{app_id}_derived.{table_name}", + "query.sql", + query_template.render( + target_project=target_project, + app_id=app_id, + ping_name=PING_NAME, + ), + ) + + # Copy metadata.yaml and schema.yaml files + template_dir = TEMPLATES / "derived" / table_name + output_table_dir = ( + output_dir / target_project / f"{app_id}_derived" / table_name + ) + + if (template_dir / "metadata.yaml").exists(): + with open(output_table_dir / "metadata.yaml", "w") as f: + rendered_metadata = env.get_template( + table_name + "/metadata.yaml" + ).render(app_id=app_id) + f.write(rendered_metadata) + + if (template_dir / "schema.yaml").exists(): + shutil.copyfile( + template_dir / "schema.yaml", + output_table_dir / "schema.yaml", + ) + + +def generate_aggregates(output_dir, target_project): + """Generate aggregate view SQL queries for gecko trace data.""" + env = Environment(loader=FileSystemLoader(TEMPLATES / "aggregates")) + + for template_name in env.list_templates("*.sql"): + view_template = env.get_template(template_name) + view = template_name.split("/")[0] + write_sql( + output_dir / target_project, + f"{target_project}.gecko_trace_aggregates.{view}", + "view.sql", + view_template.render( + target_project=target_project, + applications=APPLICATIONS, + ping_name=PING_NAME, + ), + ) + + # Copy dataset_metadata.yaml file + dataset_output_dir = output_dir / target_project / "gecko_trace_aggregates" + dataset_metadata_path = TEMPLATES / "aggregates" / "dataset_metadata.yaml" + shutil.copyfile( + dataset_metadata_path, + dataset_output_dir / "dataset_metadata.yaml", + ) + + +@click.command() +@click.option( + "--output-dir", + help="Output directory generated SQL is written to.", + type=click.Path(file_okay=False), + default="sql", + show_default=True, +) +@click.option( + "--target-project", + help="Which project the queries should be generated for.", + default="moz-fx-data-shared-prod", + show_default=True, +) +@use_cloud_function_option +def generate(output_dir, target_project, use_cloud_function): + """Generate all gecko trace SQL queries and metadata files. + + This command generates both derived table queries and aggregate views + for gecko trace data across all supported applications. + """ + output_dir = Path(output_dir) + generate_derived(output_dir, target_project) + generate_aggregates(output_dir, target_project) + + +if __name__ == "__main__": + generate() # type: ignore diff --git a/sql_generators/gecko_trace/templates/aggregates/dataset_metadata.yaml b/sql_generators/gecko_trace/templates/aggregates/dataset_metadata.yaml new file mode 100644 index 00000000000..f2beb09375d --- /dev/null +++ b/sql_generators/gecko_trace/templates/aggregates/dataset_metadata.yaml @@ -0,0 +1,15 @@ +--- +friendly_name: Gecko Trace +description: |- + Unified views of spans and traces from the Gecko Firefox engine across all Firefox applications. + + This dataset provides aggregate views that combine trace data from Firefox Desktop, + Firefox for Android (Fenix Nightly), and Firefox for Android (Beta) into unified + interfaces for cross-application analysis and monitoring. +labels: {} +dataset_base_acl: view +user_facing: true +workgroup_access: + - role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential diff --git a/sql_generators/gecko_trace/templates/aggregates/signatures/metadata.yaml b/sql_generators/gecko_trace/templates/aggregates/signatures/metadata.yaml new file mode 100644 index 00000000000..ea667f9cde7 --- /dev/null +++ b/sql_generators/gecko_trace/templates/aggregates/signatures/metadata.yaml @@ -0,0 +1,20 @@ +--- +friendly_name: Gecko Trace Signatures +description: |- + Signature-based trace statistics from Gecko across all Firefox applications. + + This view provides a unified interface to signature data from multiple Firefox + applications including Firefox Desktop, Firefox for Android (Fenix Nightly), + and Firefox for Android (Beta). Each row represents aggregated statistics + for a specific trace signature pattern, with an additional application field + to identify the source. + + Signatures are calculated based on the hierarchical structure and attributes + of spans within traces, allowing identification of common execution patterns. + This enables performance analysis, anomaly detection, and comparison of + execution patterns across different Firefox applications. The average duration + and hit count metrics help identify performance regressions and optimization + opportunities. + +owners: + - mvanstraten@mozilla.com diff --git a/sql_generators/gecko_trace/templates/aggregates/signatures/view.sql b/sql_generators/gecko_trace/templates/aggregates/signatures/view.sql new file mode 100644 index 00000000000..f5beb81b9a5 --- /dev/null +++ b/sql_generators/gecko_trace/templates/aggregates/signatures/view.sql @@ -0,0 +1,11 @@ +CREATE OR REPLACE VIEW + `{{ target_project }}.gecko_trace_aggregates.signatures` AS +{% for app_id in applications -%} +SELECT + * +FROM + `{{ target_project }}.{{ app_id }}_derived.gecko_trace_signatures_v1` +{%- if not loop.last %} +UNION ALL +{% endif -%} +{% endfor %} diff --git a/sql_generators/gecko_trace/templates/aggregates/spans/metadata.yaml b/sql_generators/gecko_trace/templates/aggregates/spans/metadata.yaml new file mode 100644 index 00000000000..843140b061d --- /dev/null +++ b/sql_generators/gecko_trace/templates/aggregates/spans/metadata.yaml @@ -0,0 +1,17 @@ +--- +friendly_name: Gecko Trace Spans +description: |- + Individual spans from Gecko traces across all Firefox applications. + + This view provides a unified interface to span data from multiple Firefox + applications including Firefox Desktop, Firefox for Android (Fenix Nightly), + and Firefox for Android (Beta). Each row represents a single span within + a trace, with an additional application field to identify the source. + + Spans represent units of work or operations within Firefox, such as DOM + parsing, network requests, JavaScript execution, or rendering operations. + Each span includes timing information, hierarchical relationships, and + contextual metadata for performance analysis and debugging. + +owners: + - mvanstraten@mozilla.com diff --git a/sql_generators/gecko_trace/templates/aggregates/spans/view.sql b/sql_generators/gecko_trace/templates/aggregates/spans/view.sql new file mode 100644 index 00000000000..4d554948567 --- /dev/null +++ b/sql_generators/gecko_trace/templates/aggregates/spans/view.sql @@ -0,0 +1,11 @@ +CREATE OR REPLACE VIEW + `{{ target_project }}.gecko_trace_aggregates.spans` AS +{% for app_id in applications -%} +SELECT + *, +FROM + `{{ target_project }}.{{ app_id }}_derived.gecko_trace_spans_v1` +{%- if not loop.last %} +UNION ALL +{% endif -%} +{% endfor %} diff --git a/sql_generators/gecko_trace/templates/aggregates/traces/metadata.yaml b/sql_generators/gecko_trace/templates/aggregates/traces/metadata.yaml new file mode 100644 index 00000000000..56bb455e5d2 --- /dev/null +++ b/sql_generators/gecko_trace/templates/aggregates/traces/metadata.yaml @@ -0,0 +1,19 @@ +--- +friendly_name: Gecko Trace Traces +description: |- + Complete traces from Gecko across all Firefox applications. + + This view provides a unified interface to trace data from multiple Firefox + applications including Firefox Desktop, Firefox for Android (Fenix Nightly), + and Firefox for Android (Beta). Each row represents a complete trace with + an additional application field to identify the source. + + Each trace contains a hierarchical tree structure of spans with their + parent-child relationships preserved in the root_span field. This enables + analysis of complete execution flows, performance bottlenecks, and + behavioral patterns across different Firefox applications. The signature + field allows grouping traces with similar execution patterns for + comparative performance analysis. + +owners: + - mvanstraten@mozilla.com diff --git a/sql_generators/gecko_trace/templates/aggregates/traces/view.sql b/sql_generators/gecko_trace/templates/aggregates/traces/view.sql new file mode 100644 index 00000000000..7ad344c8b9e --- /dev/null +++ b/sql_generators/gecko_trace/templates/aggregates/traces/view.sql @@ -0,0 +1,11 @@ +CREATE OR REPLACE VIEW + `{{ target_project }}.gecko_trace_aggregates.traces` AS +{% for app_id in applications -%} +SELECT + * +FROM + `{{ target_project }}.{{ app_id }}_derived.gecko_trace_traces_v1` +{%- if not loop.last %} +UNION ALL +{% endif -%} +{% endfor %} diff --git a/sql_generators/gecko_trace/templates/derived/gecko_trace_signatures_v1/metadata.yaml b/sql_generators/gecko_trace/templates/derived/gecko_trace_signatures_v1/metadata.yaml new file mode 100644 index 00000000000..5818a3a9733 --- /dev/null +++ b/sql_generators/gecko_trace/templates/derived/gecko_trace_signatures_v1/metadata.yaml @@ -0,0 +1,30 @@ +--- +friendly_name: Gecko Trace Signatures +description: |- + Statistics for Gecko traces grouped by signature. + + This table groups traces with similar execution patterns (same signature) + and provides summary statistics including average duration and hit counts. + Signatures are calculated based on the hierarchical structure and attributes + of spans within each trace, allowing identification of common execution + patterns. + +owners: + - mvanstraten@mozilla.com + +labels: + incremental: true + schedule: daily + +scheduling: + dag_name: bqetl_gecko_trace + task_group: {{ app_id }} + +bigquery: + time_partitioning: + type: day + field: submission_date + require_partition_filter: true + clustering: + fields: + - signature diff --git a/sql_generators/gecko_trace/templates/derived/gecko_trace_signatures_v1/query.sql b/sql_generators/gecko_trace/templates/derived/gecko_trace_signatures_v1/query.sql new file mode 100644 index 00000000000..f2b2711e460 --- /dev/null +++ b/sql_generators/gecko_trace/templates/derived/gecko_trace_signatures_v1/query.sql @@ -0,0 +1,10 @@ +SELECT + signature, + AVG(duration_nano) AS average_duration_nano, + COUNT(*) AS hits +FROM + `{{ target_project }}.{{ app_id }}_derived.gecko_trace_traces_v1` +WHERE + DATE(submission_timestamp) = @submission_date +GROUP BY + signature diff --git a/sql_generators/gecko_trace/templates/derived/gecko_trace_signatures_v1/schema.yaml b/sql_generators/gecko_trace/templates/derived/gecko_trace_signatures_v1/schema.yaml new file mode 100644 index 00000000000..47fce764155 --- /dev/null +++ b/sql_generators/gecko_trace/templates/derived/gecko_trace_signatures_v1/schema.yaml @@ -0,0 +1,21 @@ +--- +fields: + - name: signature + type: STRING + mode: NULLABLE + description: |- + Hash signature calculated from the trace structure and span attributes. + Traces with identical signatures have similar execution patterns. + - name: average_duration_nano + type: FLOAT64 + mode: NULLABLE + description: |- + Average duration in nanoseconds for all traces with this signature. + Calculated as the time between the earliest start time and latest end time + across all spans in the trace. + - name: hits + type: INT64 + mode: NULLABLE + description: |- + Number of traces that share this signature within the time period. + Higher hit counts indicate more common execution patterns. diff --git a/sql_generators/gecko_trace/templates/derived/gecko_trace_spans_v1/metadata.yaml b/sql_generators/gecko_trace/templates/derived/gecko_trace_spans_v1/metadata.yaml new file mode 100644 index 00000000000..03eb1f0de8f --- /dev/null +++ b/sql_generators/gecko_trace/templates/derived/gecko_trace_spans_v1/metadata.yaml @@ -0,0 +1,33 @@ +--- +friendly_name: Gecko Trace Spans +description: |- + Individual spans extracted from Gecko traces in Firefox applications. + + This table contains flattened span data from Gecko trace telemetry, where each + row represents a single span within a trace. Spans represent units of work or + operations within Firefox, such as DOM parsing, network requests, JavaScript + execution, or rendering operations. + + Each span includes timing information, hierarchical relationships (via parent_span_id), + and contextual metadata through resource and scope fields. + +owners: + - mvanstraten@mozilla.com + +labels: + incremental: true + schedule: daily + +scheduling: + dag_name: bqetl_gecko_trace + task_group: {{ app_id }} + +bigquery: + time_partitioning: + type: day + field: submission_date + require_partition_filter: true + clustering: + fields: + - trace_id + - span_id diff --git a/sql_generators/gecko_trace/templates/derived/gecko_trace_spans_v1/query.sql b/sql_generators/gecko_trace/templates/derived/gecko_trace_spans_v1/query.sql new file mode 100644 index 00000000000..f2b7c6a2ad7 --- /dev/null +++ b/sql_generators/gecko_trace/templates/derived/gecko_trace_spans_v1/query.sql @@ -0,0 +1,25 @@ +WITH ping AS ( + SELECT + submission_timestamp, + JSON_QUERY_ARRAY(metrics.object.gecko_trace_traces_data, "$.resource_spans") AS resource_spans + FROM + `{{ target_project }}.{{ app_id }}.{{ ping_name }}` + WHERE + DATE(submission_timestamp) = @submission_date +) +SELECT + JSON_VALUE(span, "$.trace_id") AS trace_id, + JSON_VALUE(span, '$.span_id') AS span_id, + JSON_VALUE(span, '$.parent_span_id') AS parent_span_id, + JSON_VALUE(span, '$.name') AS span_name, + SAFE_CAST(JSON_VALUE(span, '$.start_time_unix_nano') AS INT64) AS start_time_unix_nano, + SAFE_CAST(JSON_VALUE(span, '$.end_time_unix_nano') AS INT64) AS end_time_unix_nano, + JSON_QUERY_ARRAY(span, '$.events') AS events, + JSON_QUERY(resource_span, '$.resource') AS resource, + JSON_QUERY(scope_span, '$.scope') AS scope +FROM + ping +CROSS JOIN + UNNEST(resource_spans) AS resource_span, + UNNEST(JSON_QUERY_ARRAY(resource_span, '$.scope_spans')) AS scope_span, + UNNEST(JSON_QUERY_ARRAY(scope_span, '$.spans')) AS span; diff --git a/sql_generators/gecko_trace/templates/derived/gecko_trace_spans_v1/schema.yaml b/sql_generators/gecko_trace/templates/derived/gecko_trace_spans_v1/schema.yaml new file mode 100644 index 00000000000..f6750c00d4e --- /dev/null +++ b/sql_generators/gecko_trace/templates/derived/gecko_trace_spans_v1/schema.yaml @@ -0,0 +1,59 @@ +--- +fields: + - name: trace_id + type: STRING + mode: NULLABLE + description: |- + Unique identifier for the trace. All spans within a trace share + the same trace_id, allowing reconstruction of the complete execution flow. + - name: span_id + type: STRING + mode: NULLABLE + description: |- + Unique identifier for this individual span within the trace. + Used to establish parent-child relationships between spans. + - name: parent_span_id + type: STRING + mode: NULLABLE + description: |- + Identifier of the parent span. NULL for root spans that have no parent. + Used to build the hierarchical structure of spans within a trace. + - name: span_name + type: STRING + mode: NULLABLE + description: |- + Human-readable name describing what this span represents, + such as function names, operation types, or processing phases. + - name: start_time_unix_nano + type: INT64 + mode: NULLABLE + description: |- + Start time of the span as a Unix timestamp in nanoseconds. + Represents when the operation or process began execution. + - name: end_time_unix_nano + type: INT64 + mode: NULLABLE + description: |- + End time of the span as a Unix timestamp in nanoseconds. + Represents when the operation or process completed execution. + - name: events + type: JSON + mode: REPEATED + description: |- + Array of events that occurred during the span's execution. + Each event contains timing information and attributes describing + significant points or milestones within the span's lifecycle. + - name: resource + type: JSON + mode: NULLABLE + description: |- + Resource information associated with the span, including attributes + that describe the environment, process, or system context where + the span was generated. + - name: scope + type: JSON + mode: NULLABLE + description: |- + Scope information for the span, typically containing the name + and version of the instrumentation library or component that + generated the span data. diff --git a/sql_generators/gecko_trace/templates/derived/gecko_trace_traces_v1/metadata.yaml b/sql_generators/gecko_trace/templates/derived/gecko_trace_traces_v1/metadata.yaml new file mode 100644 index 00000000000..e20318299b0 --- /dev/null +++ b/sql_generators/gecko_trace/templates/derived/gecko_trace_traces_v1/metadata.yaml @@ -0,0 +1,35 @@ +--- +friendly_name: Gecko Trace Traces +description: |- + Complete Gecko traces with hierarchical span structures and calculated signatures. + + This table combines individual spans into complete traces, building the + hierarchical tree structure of spans within each trace. Each row represents + a single trace containing: + - Root span with nested child spans in a tree structure + - Total trace duration calculated from span timings + - Signature hash for grouping traces with similar execution patterns + + The root_span field contains the complete hierarchical structure of all + spans within the trace, with child spans nested in childSpans arrays. + +owners: + - mvanstraten@mozilla.com + +labels: + incremental: true + schedule: daily + +scheduling: + dag_name: bqetl_gecko_trace + task_group: {{ app_id }} + +bigquery: + time_partitioning: + type: day + field: submission_date + require_partition_filter: true + clustering: + fields: + - trace_id + - signature diff --git a/sql_generators/gecko_trace/templates/derived/gecko_trace_traces_v1/query.sql b/sql_generators/gecko_trace/templates/derived/gecko_trace_traces_v1/query.sql new file mode 100644 index 00000000000..7ceb1ea5b3f --- /dev/null +++ b/sql_generators/gecko_trace/templates/derived/gecko_trace_traces_v1/query.sql @@ -0,0 +1,51 @@ +WITH spans AS ( + SELECT + submission_timestamp, + trace_id, + span_id, + parent_span_id, + span_name AS name, + start_time_unix_nano, + end_time_unix_nano, + events, + resource, + scope + FROM + `{{ target_project }}.{{ app_id }}_derived.gecko_trace_spans_v1` + WHERE + DATE(submission_timestamp) = @submission_date +), +trace AS ( + SELECT + MIN(submission_timestamp) AS submission_timestamp, + trace_id, + MAX(end_time_unix_nano) - MIN(start_time_unix_nano) AS duration_nano, + mozfun.gecko_trace.build_root_span( + ARRAY_AGG( + TO_JSON( + STRUCT( + span_id, + parent_span_id, + name, + start_time_unix_nano, + end_time_unix_nano, + events, + resource, + scope + ) + ) + ) + ) AS root_span + FROM + spans + GROUP BY + trace_id +) +SELECT + submission_timestamp, + mozfun.gecko_trace.calculate_signature(root_span) AS signature, + trace_id, + duration_nano, + root_span +FROM + trace diff --git a/sql_generators/gecko_trace/templates/derived/gecko_trace_traces_v1/schema.yaml b/sql_generators/gecko_trace/templates/derived/gecko_trace_traces_v1/schema.yaml new file mode 100644 index 00000000000..86ad6129f44 --- /dev/null +++ b/sql_generators/gecko_trace/templates/derived/gecko_trace_traces_v1/schema.yaml @@ -0,0 +1,36 @@ +--- +fields: + - name: submission_timestamp + type: TIMESTAMP + mode: NULLABLE + description: |- + The earliest submission timestamp for any span within this trace. + Used to determine when the trace was first observed. + - name: trace_id + type: STRING + mode: NULLABLE + description: |- + Unique identifier for the trace. All spans within a trace share + the same trace_id. + - name: duration_nano + type: INT64 + mode: NULLABLE + description: |- + Total duration of the trace in nanoseconds, calculated as the + difference between the latest end time and earliest start time + across all spans in the trace. + - name: root_span + type: JSON + mode: NULLABLE + description: |- + Complete hierarchical structure of the trace as a JSON object. + Contains the root span with all child spans nested in childSpans + arrays, preserving the parent-child relationships and timing + information for the entire trace execution flow. + - name: signature + type: STRING + mode: NULLABLE + description: |- + Calculated hash signature for the trace based on its structure + and span attributes. Traces with identical signatures represent + similar execution patterns and can be grouped for analysis.