From 33c939fc6ca06ecb0b1e7f03425025733a9d35ac Mon Sep 17 00:00:00 2001 From: Jan-Erik Rediger Date: Mon, 23 Jan 2023 18:09:33 +0100 Subject: [PATCH] Remove internal-only fields from data dump --- CHANGELOG.md | 1 + glean_parser/metrics.py | 2 ++ tests/test_parser.py | 64 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b1a57246..df29bfb9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Unreleased +- BUGFIX: Remove internal-only fields from serialized metrics data ([#550](https://github.com/mozilla/glean_parser/pull/550)) - FEATURE: New subcommand: `dump` to dump the metrics data as JSON ([#550](https://github.com/mozilla/glean_parser/pull/550)) ## 6.4.0 diff --git a/glean_parser/metrics.py b/glean_parser/metrics.py index ab2c71bee..a091e4663 100644 --- a/glean_parser/metrics.py +++ b/glean_parser/metrics.py @@ -177,6 +177,8 @@ def serialize(self) -> Dict[str, util.JSONType]: d[key] = [x.name for x in val] del d["name"] del d["category"] + d.pop("_config", None) + d.pop("_generate_enums", None) return d def _serialize_input(self) -> Dict[str, util.JSONType]: diff --git a/tests/test_parser.py b/tests/test_parser.py index 4e2d135b5..762a098f1 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -4,6 +4,7 @@ # http://creativecommons.org/publicdomain/zero/1.0/ from pathlib import Path +import json import re import sys import textwrap @@ -901,3 +902,66 @@ def test_no_lint_sorted(): assert all_objects.value["category"]["metric"].no_lint == ["lint1", "lint2"] assert all_objects.value["pings"]["ping"].no_lint == ["lint1", "lint2"] assert all_objects.value["tags"]["tag"].no_lint == ["lint1", "lint2"] + + +def test_no_internal_fields_exposed(): + """ + We accidentally exposed fields like `_config` and `_generate_enums` before. + These ended up in probe-scraper output. + + We replicate the code probe-scraper uses + and ensure we get the JSON we expect from it. + """ + + results = parser.parse_objects( + [ + util.add_required( + { + "category": { + "metric": { + "type": "event", + "extra_keys": { + "key_a": {"description": "desc-a", "type": "boolean"} + }, + } + }, + } + ), + ] + ) + errs = list(results) + assert len(errs) == 0 + + metrics = { + metric.identifier(): metric.serialize() + for category, probes in results.value.items() + for probe_name, metric in probes.items() + } + + expected = { + "category.metric": { + "bugs": ["http://bugzilla.mozilla.org/12345678"], + "data_reviews": ["https://example.com/review/"], + "defined_in": {"line": 3}, + "description": "DESCRIPTION...", + "disabled": False, + "expires": "never", + "extra_keys": {"key_a": {"description": "desc-a", "type": "boolean"}}, + "gecko_datapoint": "", + "lifetime": "ping", + "metadata": {}, + "no_lint": [], + "notification_emails": ["nobody@example.com"], + "send_in_pings": ["events"], + "type": "event", + "version": 0, + } + } + expected_json = json.dumps(expected, sort_keys=True, indent=2) + + out_json = json.dumps( + metrics, + sort_keys=True, + indent=2, + ) + assert expected_json == out_json