Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MPP-3831: Send metrics_event data to influxdb, logs instead of UA #5108

Merged
merged 4 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 12 additions & 10 deletions METRICS.md
Original file line number Diff line number Diff line change
Expand Up @@ -1492,19 +1492,21 @@ The Google Analytics events:
The Relay Extension makes it easier to use Relay email masks on websites. It is
available as a [Firefox Extension][] and a [Chrome Extension][].

The Relay Extension uses a background listener to send interaction events to
the Relay API server. These events are then recorded as statsd-style statistics
and in server logs.

The Relay Extension generates a [random UUID][] for the extension identifier
that is [stored locally in the browser][]. A different ID will be generated for
each browser and machine. A truncated hash of this identifier is included in the
system logs to estimate the count of unique extension installations.

<!-- References in this paragraph are defined in section "Google Analytics" -->

The Relay Extension uses a background listener to send interaction events to
the Relay API server. The API server forwards the events to Google Analytics.
Events are reported using the [Universal Measurement Protocol][].
Before October 2024, the API server forwarded the events
to Google Analytics, using the [Universal Measurement Protocol][].
Google [replaced Universal Analytics with Google Analytics 4][] (GA4) on July 1, 2024,
and these events are no longer recorded. Relay is in the process of switching
to [GA4][].

The Relay Extension generates a [random UUID][] for the Google Analytics
identifier that is [stored locally in the browser][]. This ID is different from
the GA identifier on the Relay webpage. A different ID will be generated for
each browser and machine.
and these events stopped being recorded.

[Chrome Extension]: https://chromewebstore.google.com/detail/firefox-relay/lknpoadjjkjcmjhbjpcljdednccbldeb "The Firefox Relay extension on the Chrome Web Store"
[Firefox Extension]: https://addons.mozilla.org/en-US/firefox/addon/private-relay/ "The Firefox Relay extension on Firefox Browser Add-Ons"
Expand Down
107 changes: 42 additions & 65 deletions privaterelay/tests/views_tests.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import json
import logging
from collections.abc import Callable, Iterator
from collections.abc import Iterator
from copy import deepcopy
from dataclasses import dataclass
from datetime import UTC, datetime, timedelta
from pathlib import Path
from typing import Any, Literal
from unittest.mock import ANY, Mock, patch
from unittest.mock import Mock, patch
from uuid import uuid4

from django.contrib.auth.models import User
from django.test import Client, TestCase
from django.test import Client, TestCase, override_settings
from django.utils import timezone

import jwt
Expand Down Expand Up @@ -38,7 +38,7 @@
from ..apps import PrivateRelayConfig
from ..fxa_utils import NoSocialToken
from ..models import Profile
from ..views import _update_all_data, fxa_verifying_keys, send_ga_ping
from ..views import _update_all_data, fxa_verifying_keys


def test_no_social_token():
Expand Down Expand Up @@ -598,82 +598,59 @@ def test_lbheartbeat_view(client: Client) -> None:
assert response.content == b""


_ThreadAndReportMocks = dict[Literal["thread", "report"], Mock]


@pytest.fixture
def mock_metrics_thread_and_report() -> Iterator[_ThreadAndReportMocks]:
"""
Setup mocks for metrics event.

Replace google_measurement_protocol.report with a Mock
Replace Thread with a mock version that calls immediately.
"""

with (
patch("privaterelay.views.threading.Thread", spec=True) as mock_thread_cls,
patch("privaterelay.views.report") as mock_report,
):

mock_thread = Mock(spec_set=["start"])

def create_thread(
target: Callable[[str, str, Any], None],
args: tuple[str, str, Any],
daemon: bool,
) -> Mock:
assert target == send_ga_ping
assert daemon

def call_send_ga_ping() -> None:
target(*args)

mock_thread.start.side_effect = call_send_ga_ping
return mock_thread

mock_thread_cls.side_effect = create_thread
yield {"thread": mock_thread, "report": mock_report}


def test_metrics_event_GET(
client: Client, mock_metrics_thread_and_report: _ThreadAndReportMocks
) -> None:
response = client.get("/metrics-event")
@override_settings(STATSD_ENABLED=True)
def test_metrics_event_GET(client: Client, caplog: pytest.LogCaptureFixture) -> None:
with MetricsMock() as mm:
response = client.get("/metrics-event")
assert response.status_code == 405
mock_metrics_thread_and_report["thread"].start.assert_not_called()
mock_metrics_thread_and_report["report"].assert_not_called()
assert caplog.record_tuples == [("request.summary", logging.INFO, "")]
mm.assert_not_incr("fx.private.relay.metrics_event")


@override_settings(STATSD_ENABLED=True)
def test_metrics_event_POST_non_json(
client: Client, mock_metrics_thread_and_report: _ThreadAndReportMocks
client: Client, caplog: pytest.LogCaptureFixture
) -> None:
response = client.post("/metrics-event")
with MetricsMock() as mm:
response = client.post("/metrics-event")
assert response.status_code == 415
mock_metrics_thread_and_report["thread"].start.assert_not_called()
mock_metrics_thread_and_report["report"].assert_not_called()
assert caplog.record_tuples == [("request.summary", logging.INFO, "")]
mm.assert_not_incr("fx.private.relay.metrics_event")


@override_settings(STATSD_ENABLED=True)
def test_metrics_event_POST_json_no_ga_uuid(
client: Client, mock_metrics_thread_and_report: _ThreadAndReportMocks
client: Client, caplog: pytest.LogCaptureFixture
) -> None:
response = client.post(
"/metrics-event", {"category": "addon"}, content_type="application/json"
)
with MetricsMock() as mm:
response = client.post(
"/metrics-event", {"category": "addon"}, content_type="application/json"
)
assert response.status_code == 404
mock_metrics_thread_and_report["thread"].start.assert_not_called()
mock_metrics_thread_and_report["report"].assert_not_called()
assert caplog.record_tuples == [("request.summary", logging.INFO, "")]
mm.assert_not_incr("fx.private.relay.metrics_event")


@override_settings(STATSD_ENABLED=True)
def test_metrics_event_POST_json_ga_uuid_ok(
client: Client,
mock_metrics_thread_and_report: _ThreadAndReportMocks,
caplog: pytest.LogCaptureFixture,
settings: SettingsWrapper,
) -> None:
response = client.post(
"/metrics-event", {"ga_uuid": "anything-is-ok"}, content_type="application/json"
)
with MetricsMock() as mm:
response = client.post(
"/metrics-event",
{"ga_uuid": "anything-is-ok"},
content_type="application/json",
)
assert response.status_code == 200
mock_metrics_thread_and_report["thread"].start.assert_called_once_with()
mock_metrics_thread_and_report["report"].assert_called_once_with(
settings.GOOGLE_ANALYTICS_ID, "anything-is-ok", ANY
)

assert caplog.record_tuples == [
("eventsinfo", logging.INFO, "metrics_event"),
("request.summary", logging.INFO, ""),
]
record = caplog.records[0]
assert getattr(record, "ga_uuid_hash") == "1aa8606ede8415d8"
assert getattr(record, "source") == "website"

mm.assert_incr_once("fx.private.relay.metrics_event", 1, tags=["source:website"])
53 changes: 26 additions & 27 deletions privaterelay/views.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import json
import logging
import threading
from collections.abc import Iterable
from datetime import UTC, datetime
from functools import cache
Expand All @@ -21,7 +20,7 @@
from allauth.socialaccount.models import SocialAccount, SocialApp
from allauth.socialaccount.providers.fxa.views import FirefoxAccountsOAuth2Adapter
from cryptography.hazmat.primitives.asymmetric.rsa import RSAPublicKey
from google_measurement_protocol import event, report
from markus.utils import generate_tag
from oauthlib.oauth2.rfc6749.errors import CustomOAuth2Error
from rest_framework.decorators import api_view, schema

Expand Down Expand Up @@ -91,40 +90,40 @@ def profile_subdomain(request):
return JsonResponse({"message": e.message, "subdomain": subdomain}, status=400)


def send_ga_ping(ga_id: str, ga_uuid: str, data: Any) -> None:
try:
report(ga_id, ga_uuid, data)
except Exception as e:
logger.error("metrics_event", extra={"error": e})


@csrf_exempt
@require_http_methods(["POST"])
def metrics_event(request: HttpRequest) -> JsonResponse:
"""
Handle metrics events from the Relay extension.

This used to forward data to Google Analytics, but was not updated for GA4.

Now it logs the information and updates statsd counters.
"""
try:
request_data = json.loads(request.body)
except json.JSONDecodeError:
return JsonResponse({"msg": "Could not decode JSON"}, status=415)
if "ga_uuid" not in request_data:
return JsonResponse({"msg": "No GA uuid found"}, status=404)
# "dimension5" is a Google Analytics-specific variable to track a custom dimension,
# used to determine which browser vendor the add-on is using: Firefox or Chrome
# "dimension7" is a Google Analytics-specific variable to track a custom dimension,
# used to determine where the ping is coming from: website (default), add-on or app
event_data = event(
request_data.get("category", None),
request_data.get("action", None),
request_data.get("label", None),
request_data.get("value", None),
dimension5=request_data.get("dimension5", None),
dimension7=request_data.get("dimension7", "website"),
)
t = threading.Thread(
target=send_ga_ping,
args=[settings.GOOGLE_ANALYTICS_ID, request_data.get("ga_uuid"), event_data],
daemon=True,
)
t.start()

# "real_address": sha256(new_emddail.encode("utf-8")).hexdigest(),
groovecoder marked this conversation as resolved.
Show resolved Hide resolved
event_data = {
"ga_uuid_hash": sha256(request_data["ga_uuid"].encode()).hexdigest()[:16],
"category": request_data.get("category", None),
"action": request_data.get("action", None),
"label": request_data.get("label", None),
"value": request_data.get("value", None),
"browser": request_data.get("browser", None), # dimension5 in GA
"source": request_data.get("dimension7", "website"),
}
info_logger.info("metrics_event", extra=event_data)
tags = [
generate_tag(key, val)
for key, val in event_data.items()
if val is not None and key != "ga_uuid_hash"
]
incr_if_enabled("metrics_event", tags=tags)
return JsonResponse({"msg": "OK"}, status=200)


Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ dockerflow==2024.4.2
drf-spectacular==0.27.2
drf-spectacular-sidecar==2024.7.1
glean_parser==15.0.1
google-measurement-protocol==1.1.0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

praise: nice clean-up.

google-cloud-profiler==4.1.0
google-cloud-sqlcommenter==2.0.0; python_version < '3.12'
gunicorn==23.0.0
Expand Down