-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
SOCIALPLAT-470: Add server-side Ruby outputter
Co-authored-by: Jan-Erik Rediger <[email protected]>
- Loading branch information
1 parent
5079870
commit 7adb017
Showing
11 changed files
with
781 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
# This Source Code Form is subject to the terms of the Mozilla Public | ||
# License, v. 2.0. If a copy of the MPL was not distributed with this | ||
# file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||
|
||
""" | ||
Outputter to generate server ruby code for collecting events. | ||
This outputter is different from the rest of the outputters in that the code it | ||
generates does not use the Glean SDK. It is meant to be used to collect events | ||
using "events as pings" pattern in server-side environments. In these environments | ||
SDK assumptions to measurement window and connectivity don't hold. | ||
Generated code takes care of assembling pings with metrics, serializing to messages | ||
conforming to Glean schema, and logging using a standard Ruby logger. | ||
Then it's the role of the ingestion pipeline to pick the messages up and process. | ||
Warning: this outputter supports a limited set of metrics, | ||
see `SUPPORTED_METRIC_TYPES` below. | ||
""" | ||
from collections import defaultdict | ||
from pathlib import Path | ||
from typing import Any, Dict, List, Optional | ||
|
||
from . import __version__, metrics, util | ||
|
||
SUPPORTED_METRIC_TYPES = ["string", "event"] | ||
|
||
|
||
def ping_class_name(pingName: str) -> str: | ||
return f"Glean{util.Camelize(pingName)}Logger" | ||
|
||
|
||
def generate_metric_name(metric: metrics.Metric) -> str: | ||
return f"{metric.category}.{metric.name}" | ||
|
||
|
||
def generate_metric_argument_name(metric: metrics.Metric) -> str: | ||
return f"{metric.category}_{metric.name}" | ||
|
||
|
||
def generate_metric_argument_description(metric: metrics.Metric) -> str: | ||
return metric.description.replace("\n", " ").rstrip() | ||
|
||
|
||
def event_class_name(metric: metrics.Metric) -> str: | ||
return f"{util.Camelize(generate_metric_argument_name(metric))}Event" | ||
|
||
|
||
def output_ruby( | ||
objs: metrics.ObjectTree, output_dir: Path, options: Optional[Dict[str, Any]] | ||
) -> None: | ||
""" | ||
Given a tree of objects, output ruby code to `output_dir`. | ||
The output is a single file containing all the code for assembling pings with | ||
metrics, serializing, and submitting. | ||
:param objects: A tree of objects (metrics and pings) as returned from | ||
`parser.parse_objects`. | ||
:param output_dir: Path to an output directory to write to. | ||
""" | ||
|
||
template = util.get_jinja2_template( | ||
"ruby_server.jinja2", | ||
filters=( | ||
("ping_class_name", ping_class_name), | ||
("metric_name", generate_metric_name), | ||
("metric_argument_name", generate_metric_argument_name), | ||
("metric_argument_description", generate_metric_argument_description), | ||
("event_class_name", event_class_name), | ||
), | ||
) | ||
|
||
# In this environment we don't use a concept of measurement window for collecting | ||
# metrics. Only "events as pings" are supported. | ||
# For each ping we generate code which contains all the logic for assembling it | ||
# with metrics, serializing, and submitting. Therefore we don't generate classes for | ||
# each metric as in standard outputters. | ||
PING_METRIC_ERROR_MSG = ( | ||
" Server-side environment is simplified and only supports the events ping type." | ||
+ " You should not be including pings.yaml with your parser call" | ||
+ " or referencing any other pings in your metric configuration." | ||
) | ||
if "pings" in objs: | ||
print("❌ Ping definition found." + PING_METRIC_ERROR_MSG) | ||
return | ||
|
||
# Go through all metrics in objs and build a map of | ||
# ping->list of metric categories->list of metrics | ||
# for easier processing in the template. | ||
ping_to_metrics: Dict[str, Dict[str, List[metrics.Metric]]] = defaultdict(dict) | ||
for _category_key, category_val in objs.items(): | ||
for _metric_name, metric in category_val.items(): | ||
if isinstance(metric, metrics.Metric): | ||
if metric.type not in SUPPORTED_METRIC_TYPES: | ||
print( | ||
"❌ Ignoring unsupported metric type: " | ||
+ f"{metric.type}:{metric.name}." | ||
+ " Reach out to Glean team to add support for this" | ||
+ " metric type." | ||
) | ||
continue | ||
for ping in metric.send_in_pings: | ||
if ping != "events": | ||
( | ||
print( | ||
"❌ Non-events ping reference found." | ||
+ PING_METRIC_ERROR_MSG | ||
+ f"Ignoring the {ping} ping type." | ||
) | ||
) | ||
continue | ||
metrics_by_type = ping_to_metrics[ping] | ||
metrics_list = metrics_by_type.setdefault(metric.type, []) | ||
metrics_list.append(metric) | ||
if "event" not in ping_to_metrics["events"]: | ||
print("❌ No event metrics found...at least one event metric is required") | ||
return | ||
extension = ".rb" | ||
filepath = output_dir / ("server_events" + extension) | ||
with filepath.open("w", encoding="utf-8") as fd: | ||
fd.write( | ||
template.render( | ||
parser_version=__version__, | ||
pings=ping_to_metrics, | ||
) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,202 @@ | ||
{# The final Ruby code is autogenerated, but this | ||
Jinja2 template is not. Please file bugs! #} | ||
# This Source Code Form is subject to the terms of the Mozilla Public | ||
# License, v. 2.0. If a copy of the MPL was not distributed with this | ||
# file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||
|
||
# AUTOGENERATED BY glean_parser v{{ parser_version }}. DO NOT EDIT. | ||
|
||
# frozen_string_literal: true | ||
|
||
# requires json, securerandom, and logger libs | ||
require 'json' | ||
require 'securerandom' | ||
require 'logger' | ||
require 'rbconfig' | ||
|
||
# this will be used for identifying logs that need to forward to Moz Data Pipeline | ||
GLEAN_EVENT_MOZLOG_TYPE = 'glean-server-event' | ||
|
||
# helper module to get OS level info for logs | ||
module OS | ||
def self.name | ||
case RbConfig::CONFIG['host_os'] | ||
|
||
when /linux/ | ||
'Linux' | ||
when /darwin/ | ||
'OS X' | ||
when /mswin|mingw32|windows/ | ||
'Windows' | ||
when /solaris/ | ||
'Solaris' | ||
when /bsd/ | ||
'BSD' | ||
else | ||
RbConfig::CONFIG['host_os'] | ||
end | ||
end | ||
end | ||
|
||
module Glean | ||
{% for ping, metrics_by_type in pings.items() %} | ||
class {{ ping|ping_class_name }} | ||
def initialize(app_id:, app_display_version:, app_channel:, logger_options:) | ||
@app_id = app_id # string - Application Id to identify application per Glean standards | ||
@app_display_version = app_display_version # string - Version of application emitting the event | ||
@app_channel = app_channel # string - Application Id to identify application per Glean standards | ||
@logger = Logger.new(logger_options) | ||
|
||
# Logger configuration | ||
@logger.formatter = proc do |severity, datetime, _progname, msg| | ||
date_format = datetime.to_i | ||
logger_name = 'glean' | ||
"#{JSON.dump(Timestamp: date_format.to_s, Logger: logger_name.to_s, Type: GLEAN_EVENT_MOZLOG_TYPE.to_s, Severity: severity.ljust(5).to_s, Pid: Process.pid.to_s, Fields: msg)}\n" | ||
end | ||
|
||
# Generated events | ||
{% for metric_type, metrics in metrics_by_type.items() %} | ||
{% for metric in metrics %} | ||
{% if metric.type == 'event' %} | ||
# {{ metric|metric_argument_description }} | ||
@{{ metric|metric_argument_name }} = {{ metric|event_class_name }}.new(self) | ||
{% endif %} | ||
{% endfor %} | ||
{% endfor %} | ||
end | ||
|
||
def _record( | ||
{% for metric_type, metrics in metrics_by_type.items() %} | ||
{% if metric_type != 'event' %} | ||
{% for metric in metrics %} | ||
# {{ metric|metric_argument_description }} | ||
{{ metric|metric_argument_name }}:, | ||
{% endfor %} | ||
{% endif %} | ||
{% endfor %} | ||
# full user_agent value from controller context | ||
user_agent:, | ||
# ip address value from controller context | ||
ip_address:, | ||
# event being sent in the ping | ||
event: | ||
) | ||
t_utc = Time.now.utc | ||
# create raw metrics hash that can have nil values | ||
metrics_raw = { | ||
{% for metric_type, metrics in metrics_by_type.items() %} | ||
{% if metric_type != 'event' %} | ||
'{{ metric_type }}' => { | ||
{% for metric in metrics %} | ||
'{{ metric|metric_name }}' => {{ metric|metric_argument_name }}, | ||
{% endfor %} | ||
}, | ||
{% endif %} | ||
{% endfor %} | ||
} | ||
# filter out key value pairs where value is nil | ||
metrics_raw.each do |key, value| | ||
metrics_raw[key] = value.compact.transform_values(&:to_s) | ||
end | ||
# filter out metrics with empty hashes | ||
metrics = metrics_raw.reject { |_k, v| v.empty? } | ||
event_payload = { | ||
# `Unknown` fields below are required in the Glean schema, however they are not useful in server context. | ||
'client_info' => { | ||
'telemetry_sdk_build' => 'glean_parser v{{ parser_version }}', | ||
'first_run_date' => 'Unknown', | ||
'os' => OS.name, | ||
'os_version' => 'Unknown', | ||
'architecture' => 'Unknown', | ||
'app_build' => 'Unknown', | ||
'app_display_version' => @app_display_version, | ||
'app_channel' => @app_channel, | ||
}, | ||
'ping_info' => { | ||
'seq' => 0, | ||
'start_time' => t_utc, | ||
'end_time' => t_utc, | ||
}, | ||
'metrics' => metrics, | ||
'events' => event, | ||
} | ||
serialized_event_payload = event_payload.to_json | ||
# This is the message structure that Decoder expects: https://github.com/mozilla/gcp-ingestion/pull/2400. | ||
ping = { | ||
'document_namespace' => @app_id, | ||
'document_type' => '{{ ping }}', | ||
'document_version' => '1', | ||
'document_id' => SecureRandom.uuid, | ||
'user_agent' => user_agent, | ||
'ip_address' => ip_address, | ||
'payload' => serialized_event_payload, | ||
} | ||
@logger.info(ping) | ||
end | ||
{% for metric_type, metrics in metrics_by_type.items() %} | ||
{% for metric in metrics %} | ||
{% if metric.type == 'event' %} | ||
attr_accessor :{{ metric|metric_argument_name }} | ||
{% endif %} | ||
{% endfor %} | ||
{% endfor %} | ||
end | ||
{% endfor %} | ||
|
||
{% for event in pings["events"]["event"] %} | ||
class {{ event|event_class_name }} | ||
# {{ event|metric_argument_description }} | ||
def initialize(glean) | ||
@glean = glean | ||
end | ||
|
||
def record( | ||
# extras to pass into event detail | ||
{% for extra, metadata in event.extra_keys.items() %} | ||
{{ extra }}:, | ||
{% endfor %} | ||
{% for ping, metric_types in pings.items() %} | ||
{% for metric_type, metrics in metric_types.items() %} | ||
{% if metric_type != 'event' %} | ||
{% for metric in metrics %} | ||
# {{ metric|metric_argument_description }} | ||
{{ metric|metric_argument_name }}:, | ||
{% endfor %} | ||
{% endif %} | ||
{% endfor %} | ||
{% endfor %} | ||
# full user_agent value from controller context | ||
user_agent:, | ||
# ip address value from controller context | ||
ip_address: | ||
) | ||
event = [ | ||
{ | ||
'category' => '{{ event.category }}', | ||
'name' => '{{ event.name }}', | ||
'timestamp' => (Time.now.utc.to_f * 1000).to_i, | ||
'extra' => [ | ||
{% for extra, metadata in event.extra_keys.items() %} | ||
['{{ extra }}', {{ extra }}], | ||
{% endfor %} | ||
].to_h, | ||
}, | ||
] | ||
@glean._record( | ||
{% for ping, metric_types in pings.items() %} | ||
{% for metric_type, metrics in metric_types.items() %} | ||
{% if metric_type != 'event' %} | ||
{% for metric in metrics %} | ||
{{ metric|metric_argument_name }}: {{ metric|metric_argument_name }}, | ||
{% endfor %} | ||
{% endif %} | ||
{% endfor %} | ||
{% endfor %} | ||
user_agent: user_agent, | ||
ip_address: ip_address, | ||
event: event | ||
) | ||
end | ||
end | ||
end | ||
{% endfor %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.