Skip to content

Commit

Permalink
SOCIALPLAT-470: Add server-side Ruby outputter
Browse files Browse the repository at this point in the history
Co-authored-by: Jan-Erik Rediger <[email protected]>
  • Loading branch information
braunreyes and badboy authored Oct 19, 2023
1 parent 5079870 commit 7adb017
Show file tree
Hide file tree
Showing 11 changed files with 781 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## Unreleased

- Add Ruby log outputter (`ruby_server`) ([#620](https://github.com/mozilla/glean_parser/pull/620))
- BREAKING CHANE: `ping` lifetime metrics on the events ping are now disallowed ([#625](https://github.com/mozilla/glean_parser/pull/625))
- Disallow `unit` field for anything but quantity ([#630](https://github.com/mozilla/glean_parser/pull/630)).
Note that this was already considered the case, now the code enforces it.
Expand Down
128 changes: 128 additions & 0 deletions glean_parser/ruby_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# -*- coding: utf-8 -*-

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

"""
Outputter to generate server ruby code for collecting events.
This outputter is different from the rest of the outputters in that the code it
generates does not use the Glean SDK. It is meant to be used to collect events
using "events as pings" pattern in server-side environments. In these environments
SDK assumptions to measurement window and connectivity don't hold.
Generated code takes care of assembling pings with metrics, serializing to messages
conforming to Glean schema, and logging using a standard Ruby logger.
Then it's the role of the ingestion pipeline to pick the messages up and process.
Warning: this outputter supports a limited set of metrics,
see `SUPPORTED_METRIC_TYPES` below.
"""
from collections import defaultdict
from pathlib import Path
from typing import Any, Dict, List, Optional

from . import __version__, metrics, util

SUPPORTED_METRIC_TYPES = ["string", "event"]


def ping_class_name(pingName: str) -> str:
return f"Glean{util.Camelize(pingName)}Logger"


def generate_metric_name(metric: metrics.Metric) -> str:
return f"{metric.category}.{metric.name}"


def generate_metric_argument_name(metric: metrics.Metric) -> str:
return f"{metric.category}_{metric.name}"


def generate_metric_argument_description(metric: metrics.Metric) -> str:
return metric.description.replace("\n", " ").rstrip()


def event_class_name(metric: metrics.Metric) -> str:
return f"{util.Camelize(generate_metric_argument_name(metric))}Event"


def output_ruby(
objs: metrics.ObjectTree, output_dir: Path, options: Optional[Dict[str, Any]]
) -> None:
"""
Given a tree of objects, output ruby code to `output_dir`.
The output is a single file containing all the code for assembling pings with
metrics, serializing, and submitting.
:param objects: A tree of objects (metrics and pings) as returned from
`parser.parse_objects`.
:param output_dir: Path to an output directory to write to.
"""

template = util.get_jinja2_template(
"ruby_server.jinja2",
filters=(
("ping_class_name", ping_class_name),
("metric_name", generate_metric_name),
("metric_argument_name", generate_metric_argument_name),
("metric_argument_description", generate_metric_argument_description),
("event_class_name", event_class_name),
),
)

# In this environment we don't use a concept of measurement window for collecting
# metrics. Only "events as pings" are supported.
# For each ping we generate code which contains all the logic for assembling it
# with metrics, serializing, and submitting. Therefore we don't generate classes for
# each metric as in standard outputters.
PING_METRIC_ERROR_MSG = (
" Server-side environment is simplified and only supports the events ping type."
+ " You should not be including pings.yaml with your parser call"
+ " or referencing any other pings in your metric configuration."
)
if "pings" in objs:
print("❌ Ping definition found." + PING_METRIC_ERROR_MSG)
return

# Go through all metrics in objs and build a map of
# ping->list of metric categories->list of metrics
# for easier processing in the template.
ping_to_metrics: Dict[str, Dict[str, List[metrics.Metric]]] = defaultdict(dict)
for _category_key, category_val in objs.items():
for _metric_name, metric in category_val.items():
if isinstance(metric, metrics.Metric):
if metric.type not in SUPPORTED_METRIC_TYPES:
print(
"❌ Ignoring unsupported metric type: "
+ f"{metric.type}:{metric.name}."
+ " Reach out to Glean team to add support for this"
+ " metric type."
)
continue
for ping in metric.send_in_pings:
if ping != "events":
(
print(
"❌ Non-events ping reference found."
+ PING_METRIC_ERROR_MSG
+ f"Ignoring the {ping} ping type."
)
)
continue
metrics_by_type = ping_to_metrics[ping]
metrics_list = metrics_by_type.setdefault(metric.type, [])
metrics_list.append(metric)
if "event" not in ping_to_metrics["events"]:
print("❌ No event metrics found...at least one event metric is required")
return
extension = ".rb"
filepath = output_dir / ("server_events" + extension)
with filepath.open("w", encoding="utf-8") as fd:
fd.write(
template.render(
parser_version=__version__,
pings=ping_to_metrics,
)
)
202 changes: 202 additions & 0 deletions glean_parser/templates/ruby_server.jinja2
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
{# The final Ruby code is autogenerated, but this
Jinja2 template is not. Please file bugs! #}
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

# AUTOGENERATED BY glean_parser v{{ parser_version }}. DO NOT EDIT.

# frozen_string_literal: true

# requires json, securerandom, and logger libs
require 'json'
require 'securerandom'
require 'logger'
require 'rbconfig'

# this will be used for identifying logs that need to forward to Moz Data Pipeline
GLEAN_EVENT_MOZLOG_TYPE = 'glean-server-event'

# helper module to get OS level info for logs
module OS
def self.name
case RbConfig::CONFIG['host_os']

when /linux/
'Linux'
when /darwin/
'OS X'
when /mswin|mingw32|windows/
'Windows'
when /solaris/
'Solaris'
when /bsd/
'BSD'
else
RbConfig::CONFIG['host_os']
end
end
end

module Glean
{% for ping, metrics_by_type in pings.items() %}
class {{ ping|ping_class_name }}
def initialize(app_id:, app_display_version:, app_channel:, logger_options:)
@app_id = app_id # string - Application Id to identify application per Glean standards
@app_display_version = app_display_version # string - Version of application emitting the event
@app_channel = app_channel # string - Application Id to identify application per Glean standards
@logger = Logger.new(logger_options)

# Logger configuration
@logger.formatter = proc do |severity, datetime, _progname, msg|
date_format = datetime.to_i
logger_name = 'glean'
"#{JSON.dump(Timestamp: date_format.to_s, Logger: logger_name.to_s, Type: GLEAN_EVENT_MOZLOG_TYPE.to_s, Severity: severity.ljust(5).to_s, Pid: Process.pid.to_s, Fields: msg)}\n"
end

# Generated events
{% for metric_type, metrics in metrics_by_type.items() %}
{% for metric in metrics %}
{% if metric.type == 'event' %}
# {{ metric|metric_argument_description }}
@{{ metric|metric_argument_name }} = {{ metric|event_class_name }}.new(self)
{% endif %}
{% endfor %}
{% endfor %}
end

def _record(
{% for metric_type, metrics in metrics_by_type.items() %}
{% if metric_type != 'event' %}
{% for metric in metrics %}
# {{ metric|metric_argument_description }}
{{ metric|metric_argument_name }}:,
{% endfor %}
{% endif %}
{% endfor %}
# full user_agent value from controller context
user_agent:,
# ip address value from controller context
ip_address:,
# event being sent in the ping
event:
)
t_utc = Time.now.utc
# create raw metrics hash that can have nil values
metrics_raw = {
{% for metric_type, metrics in metrics_by_type.items() %}
{% if metric_type != 'event' %}
'{{ metric_type }}' => {
{% for metric in metrics %}
'{{ metric|metric_name }}' => {{ metric|metric_argument_name }},
{% endfor %}
},
{% endif %}
{% endfor %}
}
# filter out key value pairs where value is nil
metrics_raw.each do |key, value|
metrics_raw[key] = value.compact.transform_values(&:to_s)
end
# filter out metrics with empty hashes
metrics = metrics_raw.reject { |_k, v| v.empty? }
event_payload = {
# `Unknown` fields below are required in the Glean schema, however they are not useful in server context.
'client_info' => {
'telemetry_sdk_build' => 'glean_parser v{{ parser_version }}',
'first_run_date' => 'Unknown',
'os' => OS.name,
'os_version' => 'Unknown',
'architecture' => 'Unknown',
'app_build' => 'Unknown',
'app_display_version' => @app_display_version,
'app_channel' => @app_channel,
},
'ping_info' => {
'seq' => 0,
'start_time' => t_utc,
'end_time' => t_utc,
},
'metrics' => metrics,
'events' => event,
}
serialized_event_payload = event_payload.to_json
# This is the message structure that Decoder expects: https://github.com/mozilla/gcp-ingestion/pull/2400.
ping = {
'document_namespace' => @app_id,
'document_type' => '{{ ping }}',
'document_version' => '1',
'document_id' => SecureRandom.uuid,
'user_agent' => user_agent,
'ip_address' => ip_address,
'payload' => serialized_event_payload,
}
@logger.info(ping)
end
{% for metric_type, metrics in metrics_by_type.items() %}
{% for metric in metrics %}
{% if metric.type == 'event' %}
attr_accessor :{{ metric|metric_argument_name }}
{% endif %}
{% endfor %}
{% endfor %}
end
{% endfor %}

{% for event in pings["events"]["event"] %}
class {{ event|event_class_name }}
# {{ event|metric_argument_description }}
def initialize(glean)
@glean = glean
end

def record(
# extras to pass into event detail
{% for extra, metadata in event.extra_keys.items() %}
{{ extra }}:,
{% endfor %}
{% for ping, metric_types in pings.items() %}
{% for metric_type, metrics in metric_types.items() %}
{% if metric_type != 'event' %}
{% for metric in metrics %}
# {{ metric|metric_argument_description }}
{{ metric|metric_argument_name }}:,
{% endfor %}
{% endif %}
{% endfor %}
{% endfor %}
# full user_agent value from controller context
user_agent:,
# ip address value from controller context
ip_address:
)
event = [
{
'category' => '{{ event.category }}',
'name' => '{{ event.name }}',
'timestamp' => (Time.now.utc.to_f * 1000).to_i,
'extra' => [
{% for extra, metadata in event.extra_keys.items() %}
['{{ extra }}', {{ extra }}],
{% endfor %}
].to_h,
},
]
@glean._record(
{% for ping, metric_types in pings.items() %}
{% for metric_type, metrics in metric_types.items() %}
{% if metric_type != 'event' %}
{% for metric in metrics %}
{{ metric|metric_argument_name }}: {{ metric|metric_argument_name }},
{% endfor %}
{% endif %}
{% endfor %}
{% endfor %}
user_agent: user_agent,
ip_address: ip_address,
event: event
)
end
end
end
{% endfor %}
2 changes: 2 additions & 0 deletions glean_parser/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from . import kotlin
from . import markdown
from . import metrics
from . import ruby_server
from . import rust
from . import swift
from . import util
Expand Down Expand Up @@ -57,6 +58,7 @@ def __init__(
"typescript": Outputter(javascript.output_typescript, []),
"javascript_server": Outputter(javascript_server.output_javascript, []),
"typescript_server": Outputter(javascript_server.output_typescript, []),
"ruby_server": Outputter(ruby_server.output_ruby, []),
"kotlin": Outputter(kotlin.output_kotlin, ["*.kt"]),
"markdown": Outputter(markdown.output_markdown, []),
"swift": Outputter(swift.output_swift, ["*.swift"]),
Expand Down
Loading

0 comments on commit 7adb017

Please sign in to comment.