From 325cadc8c4a64137a93aaf8ea199cc36e6fa30a3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 15 Aug 2022 21:46:07 -0500 Subject: [PATCH] Add metrics to track how the rate limiter is affecting requests Related to https://github.com/matrix-org/synapse/pull/13499 Mentioned in https://docs.google.com/document/d/1lvUoVfYUiy6UaHB6Rb4HicjaJAU40-APue9Q4vzuW3c/edit#bookmark=id.zjko88lr25j --- synapse/util/ratelimitutils.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index 6394cc39ac02..fa6f63c77846 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -20,6 +20,8 @@ from twisted.internet import defer +from prometheus_client.core import Counter + from synapse.api.errors import LimitExceededError from synapse.config.ratelimiting import FederationRatelimitSettings from synapse.logging.context import ( @@ -35,6 +37,11 @@ logger = logging.getLogger(__name__) +rate_limit_sleep_counter = Counter("synapse_rate_limit_sleep", "", ["host"]) + +rate_limit_reject_counter = Counter("synapse_rate_limit_reject", "", ["host"]) + + class FederationRateLimiter: def __init__(self, clock: Clock, config: FederationRatelimitSettings): def new_limiter() -> "_PerHostRatelimiter": @@ -59,7 +66,7 @@ def ratelimit(self, host: str) -> "_GeneratorContextManager[defer.Deferred[None] Returns: context manager which returns a deferred. """ - return self.ratelimiters[host].ratelimit() + return self.ratelimiters[host].ratelimit(host) class _PerHostRatelimiter: @@ -94,12 +101,14 @@ def __init__(self, clock: Clock, config: FederationRatelimitSettings): self.request_times: List[int] = [] @contextlib.contextmanager - def ratelimit(self) -> "Iterator[defer.Deferred[None]]": + def ratelimit(self, host: str) -> "Iterator[defer.Deferred[None]]": # `contextlib.contextmanager` takes a generator and turns it into a # context manager. The generator should only yield once with a value # to be returned by manager. # Exceptions will be reraised at the yield. + self.host = host + request_id = object() ret = self._on_enter(request_id) try: @@ -119,6 +128,7 @@ def _on_enter(self, request_id: object) -> "defer.Deferred[None]": # sleeping or in the ready queue). queue_size = len(self.ready_request_queue) + len(self.sleeping_requests) if queue_size > self.reject_limit: + rate_limit_reject_counter.labels(self.host).inc() raise LimitExceededError( retry_after_ms=int(self.window_size / self.sleep_limit) ) @@ -146,6 +156,7 @@ def queue_request() -> "defer.Deferred[None]": if len(self.request_times) > self.sleep_limit: logger.debug("Ratelimiter: sleeping request for %f sec", self.sleep_sec) + rate_limit_sleep_counter.labels(self.host).inc() ret_defer = run_in_background(self.clock.sleep, self.sleep_sec) self.sleeping_requests.add(request_id)