SOCIALPLAT-470: Add server-side Ruby outputter

Co-authored-by: Jan-Erik Rediger <[email protected]>
mozilla · Oct 19, 2023 · 7adb017 · 7adb017
1 parent 5079870
commit 7adb017
Show file tree

Hide file tree

Showing 11 changed files with 781 additions and 0 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## Unreleased
 
+- Add Ruby log outputter (`ruby_server`) ([#620](https://github.com/mozilla/glean_parser/pull/620))
 - BREAKING CHANE: `ping` lifetime metrics on the events ping are now disallowed ([#625](https://github.com/mozilla/glean_parser/pull/625))
 - Disallow `unit` field for anything but quantity ([#630](https://github.com/mozilla/glean_parser/pull/630)).
   Note that this was already considered the case, now the code enforces it.

diff --git a/glean_parser/ruby_server.py b/glean_parser/ruby_server.py
@@ -0,0 +1,128 @@
+# -*- coding: utf-8 -*-
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Outputter to generate server ruby code for collecting events.
+
+This outputter is different from the rest of the outputters in that the code it
+generates does not use the Glean SDK. It is meant to be used to collect events
+using "events as pings" pattern in server-side environments. In these environments
+SDK assumptions to measurement window and connectivity don't hold.
+Generated code takes care of assembling pings with metrics, serializing to messages
+conforming to Glean schema, and logging using a standard Ruby logger.
+Then it's the role of the ingestion pipeline to pick the messages up and process.
+
+Warning: this outputter supports a limited set of metrics,
+see `SUPPORTED_METRIC_TYPES` below.
+"""
+from collections import defaultdict
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from . import __version__, metrics, util
+
+SUPPORTED_METRIC_TYPES = ["string", "event"]
+
+
+def ping_class_name(pingName: str) -> str:
+    return f"Glean{util.Camelize(pingName)}Logger"
+
+
+def generate_metric_name(metric: metrics.Metric) -> str:
+    return f"{metric.category}.{metric.name}"
+
+
+def generate_metric_argument_name(metric: metrics.Metric) -> str:
+    return f"{metric.category}_{metric.name}"
+
+
+def generate_metric_argument_description(metric: metrics.Metric) -> str:
+    return metric.description.replace("\n", " ").rstrip()
+
+
+def event_class_name(metric: metrics.Metric) -> str:
+    return f"{util.Camelize(generate_metric_argument_name(metric))}Event"
+
+
+def output_ruby(
+    objs: metrics.ObjectTree, output_dir: Path, options: Optional[Dict[str, Any]]
+) -> None:
+    """
+    Given a tree of objects, output ruby code to `output_dir`.
+
+    The output is a single file containing all the code for assembling pings with
+    metrics, serializing, and submitting.
+
+    :param objects: A tree of objects (metrics and pings) as returned from
+        `parser.parse_objects`.
+    :param output_dir: Path to an output directory to write to.
+    """
+
+    template = util.get_jinja2_template(
+        "ruby_server.jinja2",
+        filters=(
+            ("ping_class_name", ping_class_name),
+            ("metric_name", generate_metric_name),
+            ("metric_argument_name", generate_metric_argument_name),
+            ("metric_argument_description", generate_metric_argument_description),
+            ("event_class_name", event_class_name),
+        ),
+    )
+
+    # In this environment we don't use a concept of measurement window for collecting
+    # metrics. Only "events as pings" are supported.
+    # For each ping we generate code which contains all the logic for assembling it
+    # with metrics, serializing, and submitting. Therefore we don't generate classes for
+    # each metric as in standard outputters.
+    PING_METRIC_ERROR_MSG = (
+        " Server-side environment is simplified and only supports the events ping type."
+        + " You should not be including pings.yaml with your parser call"
+        + " or referencing any other pings in your metric configuration."
+    )
+    if "pings" in objs:
+        print("❌ Ping definition found." + PING_METRIC_ERROR_MSG)
+        return
+
+    # Go through all metrics in objs and build a map of
+    # ping->list of metric categories->list of metrics
+    # for easier processing in the template.
+    ping_to_metrics: Dict[str, Dict[str, List[metrics.Metric]]] = defaultdict(dict)
+    for _category_key, category_val in objs.items():
+        for _metric_name, metric in category_val.items():
+            if isinstance(metric, metrics.Metric):
+                if metric.type not in SUPPORTED_METRIC_TYPES:
+                    print(
+                        "❌ Ignoring unsupported metric type: "
+                        + f"{metric.type}:{metric.name}."
+                        + " Reach out to Glean team to add support for this"
+                        + " metric type."
+                    )
+                    continue
+                for ping in metric.send_in_pings:
+                    if ping != "events":
+                        (
+                            print(
+                                "❌ Non-events ping reference found."
+                                + PING_METRIC_ERROR_MSG
+                                + f"Ignoring the {ping} ping type."
+                            )
+                        )
+                        continue
+                    metrics_by_type = ping_to_metrics[ping]
+                    metrics_list = metrics_by_type.setdefault(metric.type, [])
+                    metrics_list.append(metric)
+    if "event" not in ping_to_metrics["events"]:
+        print("❌ No event metrics found...at least one event metric is required")
+        return
+    extension = ".rb"
+    filepath = output_dir / ("server_events" + extension)
+    with filepath.open("w", encoding="utf-8") as fd:
+        fd.write(
+            template.render(
+                parser_version=__version__,
+                pings=ping_to_metrics,
+            )
+        )
diff --git a/glean_parser/templates/ruby_server.jinja2 b/glean_parser/templates/ruby_server.jinja2
@@ -0,0 +1,202 @@
+{# The final Ruby code is autogenerated, but this
+Jinja2 template is not. Please file bugs! #}
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# AUTOGENERATED BY glean_parser v{{ parser_version }}. DO NOT EDIT.
+
+# frozen_string_literal: true
+
+# requires json, securerandom, and logger libs
+require 'json'
+require 'securerandom'
+require 'logger'
+require 'rbconfig'
+
+# this will be used for identifying logs that need to forward to Moz Data Pipeline
+GLEAN_EVENT_MOZLOG_TYPE = 'glean-server-event'
+
+# helper module to get OS level info for logs
+module OS
+  def self.name
+    case RbConfig::CONFIG['host_os']
+
+    when /linux/
+      'Linux'
+    when /darwin/
+      'OS X'
+    when /mswin|mingw32|windows/
+      'Windows'
+    when /solaris/
+      'Solaris'
+    when /bsd/
+      'BSD'
+    else
+      RbConfig::CONFIG['host_os']
+    end
+  end
+end
+
+module Glean
+{% for ping, metrics_by_type in pings.items() %}
+  class {{ ping|ping_class_name }}
+    def initialize(app_id:, app_display_version:, app_channel:, logger_options:)
+      @app_id = app_id # string - Application Id to identify application per Glean standards
+      @app_display_version = app_display_version # string - Version of application emitting the event
+      @app_channel = app_channel # string - Application Id to identify application per Glean standards
+      @logger = Logger.new(logger_options)
+
+      # Logger configuration
+      @logger.formatter = proc do |severity, datetime, _progname, msg|
+        date_format = datetime.to_i
+        logger_name = 'glean'
+        "#{JSON.dump(Timestamp: date_format.to_s, Logger: logger_name.to_s, Type: GLEAN_EVENT_MOZLOG_TYPE.to_s, Severity: severity.ljust(5).to_s, Pid: Process.pid.to_s, Fields: msg)}\n"
+      end
+
+      # Generated events
+      {% for metric_type, metrics in metrics_by_type.items() %}
+      {% for metric in metrics %}
+      {% if metric.type == 'event' %}
+      # {{ metric|metric_argument_description }}
+      @{{ metric|metric_argument_name }} = {{ metric|event_class_name }}.new(self)
+      {% endif %}
+      {% endfor %}
+      {% endfor %}
+    end
+
+    def _record(
+      {% for metric_type, metrics in metrics_by_type.items() %}
+      {% if metric_type != 'event' %}
+      {% for metric in metrics %}
+      # {{ metric|metric_argument_description }}
+      {{ metric|metric_argument_name }}:,
+      {% endfor %}
+      {% endif %}
+      {% endfor %}
+      # full user_agent value from controller context
+      user_agent:,
+      # ip address value from controller context
+      ip_address:,
+      # event being sent in the ping
+      event:
+    )
+      t_utc = Time.now.utc
+      # create raw metrics hash that can have nil values
+      metrics_raw = {
+        {% for metric_type, metrics in metrics_by_type.items() %}
+        {% if metric_type != 'event' %}
+        '{{ metric_type }}' => {
+        {% for metric in metrics %}
+          '{{ metric|metric_name }}' => {{ metric|metric_argument_name }},
+        {% endfor %}
+        },
+        {% endif %}
+        {% endfor %}
+      }
+      # filter out key value pairs where value is nil
+      metrics_raw.each do |key, value|
+        metrics_raw[key] = value.compact.transform_values(&:to_s)
+      end
+      # filter out metrics with empty hashes
+      metrics = metrics_raw.reject { |_k, v| v.empty? }
+      event_payload = {
+        # `Unknown` fields below are required in the Glean schema, however they are not useful in server context.
+        'client_info' => {
+          'telemetry_sdk_build' => 'glean_parser v{{ parser_version }}',
+          'first_run_date' => 'Unknown',
+          'os' => OS.name,
+          'os_version' => 'Unknown',
+          'architecture' => 'Unknown',
+          'app_build' => 'Unknown',
+          'app_display_version' => @app_display_version,
+          'app_channel' => @app_channel,
+        },
+        'ping_info' => {
+          'seq' => 0,
+          'start_time' => t_utc,
+          'end_time' => t_utc,
+        },
+        'metrics' => metrics,
+        'events' => event,
+      }
+      serialized_event_payload = event_payload.to_json
+      # This is the message structure that Decoder expects: https://github.com/mozilla/gcp-ingestion/pull/2400.
+      ping = {
+        'document_namespace' => @app_id,
+        'document_type' => '{{ ping }}',
+        'document_version' => '1',
+        'document_id' => SecureRandom.uuid,
+        'user_agent' => user_agent,
+        'ip_address' => ip_address,
+        'payload' => serialized_event_payload,
+      }
+      @logger.info(ping)
+    end
+    {% for metric_type, metrics in metrics_by_type.items() %}
+    {% for metric in metrics %}
+    {% if metric.type == 'event' %}
+    attr_accessor :{{ metric|metric_argument_name }}
+    {% endif %}
+    {% endfor %}
+    {% endfor %}
+  end
+{% endfor %}
+
+{% for event in pings["events"]["event"] %}
+  class {{ event|event_class_name }}
+    # {{ event|metric_argument_description }}
+    def initialize(glean)
+      @glean = glean
+    end
+
+    def record(
+      # extras to pass into event detail
+      {% for extra, metadata in event.extra_keys.items() %}
+      {{ extra }}:,
+      {% endfor %}
+      {% for ping, metric_types in pings.items() %}
+      {% for metric_type, metrics in metric_types.items() %}
+      {% if metric_type != 'event' %}
+      {% for metric in metrics %}
+      # {{ metric|metric_argument_description }}
+      {{ metric|metric_argument_name }}:,
+      {% endfor %}
+      {% endif %}
+      {% endfor %}
+      {% endfor %}
+      # full user_agent value from controller context
+      user_agent:,
+      # ip address value from controller context
+      ip_address:
+    )
+      event = [
+        {
+          'category' => '{{ event.category }}',
+          'name' => '{{ event.name }}',
+          'timestamp' => (Time.now.utc.to_f * 1000).to_i,
+          'extra' => [
+            {% for extra, metadata in event.extra_keys.items() %}
+            ['{{ extra }}', {{ extra }}],
+            {% endfor %}
+          ].to_h,
+        },
+      ]
+      @glean._record(
+        {% for ping, metric_types in pings.items() %}
+        {% for metric_type, metrics in metric_types.items() %}
+        {% if metric_type != 'event' %}
+        {% for metric in metrics %}
+        {{ metric|metric_argument_name }}: {{ metric|metric_argument_name }},
+        {% endfor %}
+        {% endif %}
+        {% endfor %}
+        {% endfor %}
+        user_agent: user_agent,
+        ip_address: ip_address,
+        event: event
+      )
+    end
+  end
+end
+{% endfor %}
diff --git a/glean_parser/translate.py b/glean_parser/translate.py
@@ -22,6 +22,7 @@
 from . import kotlin
 from . import markdown
 from . import metrics
+from . import ruby_server
 from . import rust
 from . import swift
 from . import util
@@ -57,6 +58,7 @@ def __init__(
     "typescript": Outputter(javascript.output_typescript, []),
     "javascript_server": Outputter(javascript_server.output_javascript, []),
     "typescript_server": Outputter(javascript_server.output_typescript, []),
+    "ruby_server": Outputter(ruby_server.output_ruby, []),
     "kotlin": Outputter(kotlin.output_kotlin, ["*.kt"]),
     "markdown": Outputter(markdown.output_markdown, []),
     "swift": Outputter(swift.output_swift, ["*.swift"]),