From 2db1099c8d453ccd97cc46fd521967716ee72972 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 21 Oct 2020 07:50:02 -0400 Subject: [PATCH 01/23] Remove the structured logging configuration code. --- changelog.d/8607.misc | 1 + synapse/config/logger.py | 14 +- synapse/logging/_structured.py | 378 ------------------------------- tests/logging/test_structured.py | 214 ----------------- 4 files changed, 3 insertions(+), 604 deletions(-) create mode 100644 changelog.d/8607.misc delete mode 100644 synapse/logging/_structured.py delete mode 100644 tests/logging/test_structured.py diff --git a/changelog.d/8607.misc b/changelog.d/8607.misc new file mode 100644 index 000000000000..9e56551a34bf --- /dev/null +++ b/changelog.d/8607.misc @@ -0,0 +1 @@ +Re-organize the structured logging code to separate the TCP transport handling from the JSON formatting. diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 13d6f6a3ea68..fc12738bf2c6 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -32,10 +32,6 @@ import synapse from synapse.app import _base as appbase -from synapse.logging._structured import ( - reload_structured_logging, - setup_structured_logging, -) from synapse.logging.context import LoggingContextFilter from synapse.util.versionstring import get_version_string @@ -303,14 +299,8 @@ def read_config(*args, callback=None): log_config_body = read_config() - if log_config_body and log_config_body.get("structured") is True: - logger = setup_structured_logging( - hs, config, log_config_body, logBeginner=logBeginner - ) - appbase.register_sighup(read_config, callback=reload_structured_logging) - else: - logger = _setup_stdlib_logging(config, log_config_body, logBeginner=logBeginner) - appbase.register_sighup(read_config, callback=_reload_stdlib_logging) + logger = _setup_stdlib_logging(config, log_config_body, logBeginner=logBeginner) + appbase.register_sighup(read_config, callback=_reload_stdlib_logging) # make sure that the first thing we log is a thing we can grep backwards # for diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py deleted file mode 100644 index 0fc2ea609e7e..000000000000 --- a/synapse/logging/_structured.py +++ /dev/null @@ -1,378 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2019 The Matrix.org Foundation C.I.C. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import logging -import os.path -import sys -import typing -import warnings -from typing import List - -import attr -from constantly import NamedConstant, Names, ValueConstant, Values -from zope.interface import implementer - -from twisted.logger import ( - FileLogObserver, - FilteringLogObserver, - ILogObserver, - LogBeginner, - Logger, - LogLevel, - LogLevelFilterPredicate, - LogPublisher, - eventAsText, - jsonFileLogObserver, -) - -from synapse.config._base import ConfigError -from synapse.logging._terse_json import ( - TerseJSONToConsoleLogObserver, - TerseJSONToTCPLogObserver, -) -from synapse.logging.context import current_context - - -def stdlib_log_level_to_twisted(level: str) -> LogLevel: - """ - Convert a stdlib log level to Twisted's log level. - """ - lvl = level.lower().replace("warning", "warn") - return LogLevel.levelWithName(lvl) - - -@attr.s -@implementer(ILogObserver) -class LogContextObserver: - """ - An ILogObserver which adds Synapse-specific log context information. - - Attributes: - observer (ILogObserver): The target parent observer. - """ - - observer = attr.ib() - - def __call__(self, event: dict) -> None: - """ - Consume a log event and emit it to the parent observer after filtering - and adding log context information. - - Args: - event (dict) - """ - # Filter out some useless events that Twisted outputs - if "log_text" in event: - if event["log_text"].startswith("DNSDatagramProtocol starting on "): - return - - if event["log_text"].startswith("(UDP Port "): - return - - if event["log_text"].startswith("Timing out client") or event[ - "log_format" - ].startswith("Timing out client"): - return - - context = current_context() - - # Copy the context information to the log event. - context.copy_to_twisted_log_entry(event) - - self.observer(event) - - -class PythonStdlibToTwistedLogger(logging.Handler): - """ - Transform a Python stdlib log message into a Twisted one. - """ - - def __init__(self, observer, *args, **kwargs): - """ - Args: - observer (ILogObserver): A Twisted logging observer. - *args, **kwargs: Args/kwargs to be passed to logging.Handler. - """ - self.observer = observer - super().__init__(*args, **kwargs) - - def emit(self, record: logging.LogRecord) -> None: - """ - Emit a record to Twisted's observer. - - Args: - record (logging.LogRecord) - """ - - self.observer( - { - "log_time": record.created, - "log_text": record.getMessage(), - "log_format": "{log_text}", - "log_namespace": record.name, - "log_level": stdlib_log_level_to_twisted(record.levelname), - } - ) - - -def SynapseFileLogObserver(outFile: typing.IO[str]) -> FileLogObserver: - """ - A log observer that formats events like the traditional log formatter and - sends them to `outFile`. - - Args: - outFile (file object): The file object to write to. - """ - - def formatEvent(_event: dict) -> str: - event = dict(_event) - event["log_level"] = event["log_level"].name.upper() - event["log_format"] = "- {log_namespace} - {log_level} - {request} - " + ( - event.get("log_format", "{log_text}") or "{log_text}" - ) - return eventAsText(event, includeSystem=False) + "\n" - - return FileLogObserver(outFile, formatEvent) - - -class DrainType(Names): - CONSOLE = NamedConstant() - CONSOLE_JSON = NamedConstant() - CONSOLE_JSON_TERSE = NamedConstant() - FILE = NamedConstant() - FILE_JSON = NamedConstant() - NETWORK_JSON_TERSE = NamedConstant() - - -class OutputPipeType(Values): - stdout = ValueConstant(sys.__stdout__) - stderr = ValueConstant(sys.__stderr__) - - -@attr.s -class DrainConfiguration: - name = attr.ib() - type = attr.ib() - location = attr.ib() - options = attr.ib(default=None) - - -@attr.s -class NetworkJSONTerseOptions: - maximum_buffer = attr.ib(type=int) - - -DEFAULT_LOGGERS = {"synapse": {"level": "INFO"}} - - -def parse_drain_configs( - drains: dict, -) -> typing.Generator[DrainConfiguration, None, None]: - """ - Parse the drain configurations. - - Args: - drains (dict): A list of drain configurations. - - Yields: - DrainConfiguration instances. - - Raises: - ConfigError: If any of the drain configuration items are invalid. - """ - for name, config in drains.items(): - if "type" not in config: - raise ConfigError("Logging drains require a 'type' key.") - - try: - logging_type = DrainType.lookupByName(config["type"].upper()) - except ValueError: - raise ConfigError( - "%s is not a known logging drain type." % (config["type"],) - ) - - if logging_type in [ - DrainType.CONSOLE, - DrainType.CONSOLE_JSON, - DrainType.CONSOLE_JSON_TERSE, - ]: - location = config.get("location") - if location is None or location not in ["stdout", "stderr"]: - raise ConfigError( - ( - "The %s drain needs the 'location' key set to " - "either 'stdout' or 'stderr'." - ) - % (logging_type,) - ) - - pipe = OutputPipeType.lookupByName(location).value - - yield DrainConfiguration(name=name, type=logging_type, location=pipe) - - elif logging_type in [DrainType.FILE, DrainType.FILE_JSON]: - if "location" not in config: - raise ConfigError( - "The %s drain needs the 'location' key set." % (logging_type,) - ) - - location = config.get("location") - if os.path.abspath(location) != location: - raise ConfigError( - "File paths need to be absolute, '%s' is a relative path" - % (location,) - ) - yield DrainConfiguration(name=name, type=logging_type, location=location) - - elif logging_type in [DrainType.NETWORK_JSON_TERSE]: - host = config.get("host") - port = config.get("port") - maximum_buffer = config.get("maximum_buffer", 1000) - yield DrainConfiguration( - name=name, - type=logging_type, - location=(host, port), - options=NetworkJSONTerseOptions(maximum_buffer=maximum_buffer), - ) - - else: - raise ConfigError( - "The %s drain type is currently not implemented." - % (config["type"].upper(),) - ) - - -class StoppableLogPublisher(LogPublisher): - """ - A log publisher that can tell its observers to shut down any external - communications. - """ - - def stop(self): - for obs in self._observers: - if hasattr(obs, "stop"): - obs.stop() - - -def setup_structured_logging( - hs, - config, - log_config: dict, - logBeginner: LogBeginner, - redirect_stdlib_logging: bool = True, -) -> LogPublisher: - """ - Set up Twisted's structured logging system. - - Args: - hs: The homeserver to use. - config (HomeserverConfig): The configuration of the Synapse homeserver. - log_config (dict): The log configuration to use. - """ - if config.no_redirect_stdio: - raise ConfigError( - "no_redirect_stdio cannot be defined using structured logging." - ) - - logger = Logger() - - if "drains" not in log_config: - raise ConfigError("The logging configuration requires a list of drains.") - - observers = [] # type: List[ILogObserver] - - for observer in parse_drain_configs(log_config["drains"]): - # Pipe drains - if observer.type == DrainType.CONSOLE: - logger.debug( - "Starting up the {name} console logger drain", name=observer.name - ) - observers.append(SynapseFileLogObserver(observer.location)) - elif observer.type == DrainType.CONSOLE_JSON: - logger.debug( - "Starting up the {name} JSON console logger drain", name=observer.name - ) - observers.append(jsonFileLogObserver(observer.location)) - elif observer.type == DrainType.CONSOLE_JSON_TERSE: - logger.debug( - "Starting up the {name} terse JSON console logger drain", - name=observer.name, - ) - observers.append( - TerseJSONToConsoleLogObserver(observer.location, metadata={}) - ) - - # File drains - elif observer.type == DrainType.FILE: - logger.debug("Starting up the {name} file logger drain", name=observer.name) - log_file = open(observer.location, "at", buffering=1, encoding="utf8") - observers.append(SynapseFileLogObserver(log_file)) - elif observer.type == DrainType.FILE_JSON: - logger.debug( - "Starting up the {name} JSON file logger drain", name=observer.name - ) - log_file = open(observer.location, "at", buffering=1, encoding="utf8") - observers.append(jsonFileLogObserver(log_file)) - - elif observer.type == DrainType.NETWORK_JSON_TERSE: - metadata = {"server_name": hs.config.server_name} - log_observer = TerseJSONToTCPLogObserver( - hs=hs, - host=observer.location[0], - port=observer.location[1], - metadata=metadata, - maximum_buffer=observer.options.maximum_buffer, - ) - log_observer.start() - observers.append(log_observer) - else: - # We should never get here, but, just in case, throw an error. - raise ConfigError("%s drain type cannot be configured" % (observer.type,)) - - publisher = StoppableLogPublisher(*observers) - log_filter = LogLevelFilterPredicate() - - for namespace, namespace_config in log_config.get( - "loggers", DEFAULT_LOGGERS - ).items(): - # Set the log level for twisted.logger.Logger namespaces - log_filter.setLogLevelForNamespace( - namespace, - stdlib_log_level_to_twisted(namespace_config.get("level", "INFO")), - ) - - # Also set the log levels for the stdlib logger namespaces, to prevent - # them getting to PythonStdlibToTwistedLogger and having to be formatted - if "level" in namespace_config: - logging.getLogger(namespace).setLevel(namespace_config.get("level")) - - f = FilteringLogObserver(publisher, [log_filter]) - lco = LogContextObserver(f) - - if redirect_stdlib_logging: - stuff_into_twisted = PythonStdlibToTwistedLogger(lco) - stdliblogger = logging.getLogger() - stdliblogger.addHandler(stuff_into_twisted) - - # Always redirect standard I/O, otherwise other logging outputs might miss - # it. - logBeginner.beginLoggingTo([lco], redirectStandardIO=True) - - return publisher - - -def reload_structured_logging(*args, log_config=None) -> None: - warnings.warn( - "Currently the structured logging system can not be reloaded, doing nothing" - ) diff --git a/tests/logging/test_structured.py b/tests/logging/test_structured.py deleted file mode 100644 index d36f5f426cd3..000000000000 --- a/tests/logging/test_structured.py +++ /dev/null @@ -1,214 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2019 The Matrix.org Foundation C.I.C. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import os -import os.path -import shutil -import sys -import textwrap - -from twisted.logger import Logger, eventAsText, eventsFromJSONLogFile - -from synapse.config.logger import setup_logging -from synapse.logging._structured import setup_structured_logging -from synapse.logging.context import LoggingContext - -from tests.unittest import DEBUG, HomeserverTestCase - - -class FakeBeginner: - def beginLoggingTo(self, observers, **kwargs): - self.observers = observers - - -class StructuredLoggingTestBase: - """ - Test base that registers a cleanup handler to reset the stdlib log handler - to 'unset'. - """ - - def prepare(self, reactor, clock, hs): - def _cleanup(): - logging.getLogger("synapse").setLevel(logging.NOTSET) - - self.addCleanup(_cleanup) - - -class StructuredLoggingTestCase(StructuredLoggingTestBase, HomeserverTestCase): - """ - Tests for Synapse's structured logging support. - """ - - def test_output_to_json_round_trip(self): - """ - Synapse logs can be outputted to JSON and then read back again. - """ - temp_dir = self.mktemp() - os.mkdir(temp_dir) - self.addCleanup(shutil.rmtree, temp_dir) - - json_log_file = os.path.abspath(os.path.join(temp_dir, "out.json")) - - log_config = { - "drains": {"jsonfile": {"type": "file_json", "location": json_log_file}} - } - - # Begin the logger with our config - beginner = FakeBeginner() - setup_structured_logging( - self.hs, self.hs.config, log_config, logBeginner=beginner - ) - - # Make a logger and send an event - logger = Logger( - namespace="tests.logging.test_structured", observer=beginner.observers[0] - ) - logger.info("Hello there, {name}!", name="wally") - - # Read the log file and check it has the event we sent - with open(json_log_file, "r") as f: - logged_events = list(eventsFromJSONLogFile(f)) - self.assertEqual(len(logged_events), 1) - - # The event pulled from the file should render fine - self.assertEqual( - eventAsText(logged_events[0], includeTimestamp=False), - "[tests.logging.test_structured#info] Hello there, wally!", - ) - - def test_output_to_text(self): - """ - Synapse logs can be outputted to text. - """ - temp_dir = self.mktemp() - os.mkdir(temp_dir) - self.addCleanup(shutil.rmtree, temp_dir) - - log_file = os.path.abspath(os.path.join(temp_dir, "out.log")) - - log_config = {"drains": {"file": {"type": "file", "location": log_file}}} - - # Begin the logger with our config - beginner = FakeBeginner() - setup_structured_logging( - self.hs, self.hs.config, log_config, logBeginner=beginner - ) - - # Make a logger and send an event - logger = Logger( - namespace="tests.logging.test_structured", observer=beginner.observers[0] - ) - logger.info("Hello there, {name}!", name="wally") - - # Read the log file and check it has the event we sent - with open(log_file, "r") as f: - logged_events = f.read().strip().split("\n") - self.assertEqual(len(logged_events), 1) - - # The event pulled from the file should render fine - self.assertTrue( - logged_events[0].endswith( - " - tests.logging.test_structured - INFO - None - Hello there, wally!" - ) - ) - - def test_collects_logcontext(self): - """ - Test that log outputs have the attached logging context. - """ - log_config = {"drains": {}} - - # Begin the logger with our config - beginner = FakeBeginner() - publisher = setup_structured_logging( - self.hs, self.hs.config, log_config, logBeginner=beginner - ) - - logs = [] - - publisher.addObserver(logs.append) - - # Make a logger and send an event - logger = Logger( - namespace="tests.logging.test_structured", observer=beginner.observers[0] - ) - - with LoggingContext("testcontext", request="somereq"): - logger.info("Hello there, {name}!", name="steve") - - self.assertEqual(len(logs), 1) - self.assertEqual(logs[0]["request"], "somereq") - - -class StructuredLoggingConfigurationFileTestCase( - StructuredLoggingTestBase, HomeserverTestCase -): - def make_homeserver(self, reactor, clock): - - tempdir = self.mktemp() - os.mkdir(tempdir) - log_config_file = os.path.abspath(os.path.join(tempdir, "log.config.yaml")) - self.homeserver_log = os.path.abspath(os.path.join(tempdir, "homeserver.log")) - - config = self.default_config() - config["log_config"] = log_config_file - - with open(log_config_file, "w") as f: - f.write( - textwrap.dedent( - """\ - structured: true - - drains: - file: - type: file_json - location: %s - """ - % (self.homeserver_log,) - ) - ) - - self.addCleanup(self._sys_cleanup) - - return self.setup_test_homeserver(config=config) - - def _sys_cleanup(self): - sys.stdout = sys.__stdout__ - sys.stderr = sys.__stderr__ - - # Do not remove! We need the logging system to be set other than WARNING. - @DEBUG - def test_log_output(self): - """ - When a structured logging config is given, Synapse will use it. - """ - beginner = FakeBeginner() - publisher = setup_logging(self.hs, self.hs.config, logBeginner=beginner) - - # Make a logger and send an event - logger = Logger(namespace="tests.logging.test_structured", observer=publisher) - - with LoggingContext("testcontext", request="somereq"): - logger.info("Hello there, {name}!", name="steve") - - with open(self.homeserver_log, "r") as f: - logged_events = [ - eventAsText(x, includeTimestamp=False) for x in eventsFromJSONLogFile(f) - ] - - logs = "\n".join(logged_events) - self.assertTrue("***** STARTING SERVER *****" in logs) - self.assertTrue("Hello there, steve!" in logs) From 509594f3f147021ca9c217e91f7c531121d1be0d Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 20 Oct 2020 10:15:05 -0400 Subject: [PATCH 02/23] Remove an unnecessary return value. --- synapse/config/logger.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/synapse/config/logger.py b/synapse/config/logger.py index fc12738bf2c6..726fe8b5a0c5 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -23,7 +23,6 @@ import yaml from twisted.logger import ( - ILogObserver, LogBeginner, STDLibLogObserver, eventAsText, @@ -172,9 +171,9 @@ def generate_files(self, config, config_dir_path): log_config_file.write(DEFAULT_LOG_CONFIG.substitute(log_file=log_file)) -def _setup_stdlib_logging(config, log_config, logBeginner: LogBeginner): +def _setup_stdlib_logging(config, log_config, logBeginner: LogBeginner) -> None: """ - Set up Python stdlib logging. + Set up Python standard library logging. """ if log_config is None: log_format = ( @@ -251,8 +250,6 @@ def _log(event): if not config.no_redirect_stdio: print("Redirected stdout/stderr to logs") - return observer - def _reload_stdlib_logging(*args, log_config=None): logger = logging.getLogger("") @@ -265,7 +262,7 @@ def _reload_stdlib_logging(*args, log_config=None): def setup_logging( hs, config, use_worker_options=False, logBeginner: LogBeginner = globalLogBeginner -) -> ILogObserver: +) -> None: """ Set up the logging subsystem. @@ -278,9 +275,6 @@ def setup_logging( logBeginner: The Twisted logBeginner to use. - Returns: - The "root" Twisted Logger observer, suitable for sending logs to from a - Logger instance. """ log_config = config.worker_log_config if use_worker_options else config.log_config @@ -299,7 +293,7 @@ def read_config(*args, callback=None): log_config_body = read_config() - logger = _setup_stdlib_logging(config, log_config_body, logBeginner=logBeginner) + _setup_stdlib_logging(config, log_config_body, logBeginner=logBeginner) appbase.register_sighup(read_config, callback=_reload_stdlib_logging) # make sure that the first thing we log is a thing we can grep backwards @@ -308,5 +302,3 @@ def read_config(*args, callback=None): logging.warning("Server %s version %s", sys.argv[0], get_version_string(synapse)) logging.info("Server hostname: %s", config.server_name) logging.info("Instance name: %s", hs.get_instance_name()) - - return logger From 99f50ec05d57a61f32dd003c890ff36b98a99f39 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 21 Oct 2020 07:52:11 -0400 Subject: [PATCH 03/23] Rework structured logging to use the Python standard library logging framework. --- docs/structured_logging.md | 113 +++++++------- synapse/logging/__init__.py | 18 +++ synapse/logging/_remote.py | 95 ++++++------ synapse/logging/_terse_json.py | 151 +++---------------- tests/logging/test_remote_handler.py | 172 +++++++++++++++++++++ tests/logging/test_terse_json.py | 215 +++------------------------ tests/server.py | 4 +- 7 files changed, 336 insertions(+), 432 deletions(-) create mode 100644 tests/logging/test_remote_handler.py diff --git a/docs/structured_logging.md b/docs/structured_logging.md index decec9b8fa1e..3f1d6a3fd657 100644 --- a/docs/structured_logging.md +++ b/docs/structured_logging.md @@ -1,83 +1,76 @@ # Structured Logging -A structured logging system can be useful when your logs are destined for a machine to parse and process. By maintaining its machine-readable characteristics, it enables more efficient searching and aggregations when consumed by software such as the "ELK stack". +A structured logging system can be useful when your logs are destined for a +machine to parse and process. By maintaining its machine-readable characteristics, +it enables more efficient searching and aggregations when consumed by software +such as the "ELK stack". -Synapse's structured logging system is configured via the file that Synapse's `log_config` config option points to. The file must be YAML and contain `structured: true`. It must contain a list of "drains" (places where logs go to). +Synapse's structured logging system is configured via the file that Synapse's +`log_config` config option points to. The file should include a formatter which +uses the `synapse.logging.TerseJsonFormatter` class included with Synapse and a +handler which uses the above formatter. A structured logging configuration looks similar to the following: ```yaml -structured: true +version: 1 + +formatters: + structured: + class: synapse.logging.TerseJsonFormatter + +handlers: + file: + class: logging.handlers.TimedRotatingFileHandler + formatter: structured + filename: /path/to/my/logs/homeserver.log + when: midnight + backupCount: 3 # Does not include the current log file. + encoding: utf8 loggers: synapse: level: INFO + handlers: [remote] synapse.storage.SQL: level: WARNING - -drains: - console: - type: console - location: stdout - file: - type: file_json - location: homeserver.log ``` -The above logging config will set Synapse as 'INFO' logging level by default, with the SQL layer at 'WARNING', and will have two logging drains (to the console and to a file, stored as JSON). - -## Drain Types - -Drain types can be specified by the `type` key. - -### `console` - -Outputs human-readable logs to the console. - -Arguments: - -- `location`: Either `stdout` or `stderr`. - -### `console_json` - -Outputs machine-readable JSON logs to the console. - -Arguments: - -- `location`: Either `stdout` or `stderr`. - -### `console_json_terse` +The above logging config will set Synapse as 'INFO' logging level by default, +with the SQL layer at 'WARNING', and will log to a file, stored as JSON. -Outputs machine-readable JSON logs to the console, separated by newlines. This -format is not designed to be read and re-formatted into human-readable text, but -is optimal for a logging aggregation system. +It is also possible to figure Synapse to log to a remote endpoint by using the +`synapse.logging.RemoteHandler` class included with Synapse. It takes the +following arguments: -Arguments: - -- `location`: Either `stdout` or `stderr`. - -### `file` - -Outputs human-readable logs to a file. - -Arguments: - -- `location`: An absolute path to the file to log to. - -### `file_json` - -Outputs machine-readable logs to a file. +- `host`: Hostname or IP address of the log aggregator. +- `port`: Numerical port to contact on the host. +- `maximum_buffer`: (Optional, defaults to 1000) The maximum buffer size to allow. -Arguments: +A remote structured logging configuration looks similar to the following: -- `location`: An absolute path to the file to log to. +```yaml +version: 1 -### `network_json_terse` +formatters: + structured: + class: synapse.logging.TerseJsonFormatter -Delivers machine-readable JSON logs to a log aggregator over TCP. This is -compatible with LogStash's TCP input with the codec set to `json_lines`. +handlers: + remote: + class: synapse.logging.RemoteHandler + formatter: structured + host: 10.1.2.3 + port: 9999 -Arguments: +loggers: + synapse: + level: INFO + handlers: [remote] + synapse.storage.SQL: + level: WARNING +``` -- `host`: Hostname or IP address of the log aggregator. -- `port`: Numerical port to contact on the host. \ No newline at end of file +The above logging config will set Synapse as 'INFO' logging level by default, +with the SQL layer at 'WARNING', and will log JSON formatted messages to a +remote endpoint at 10.1.2.3:9999. diff --git a/synapse/logging/__init__.py b/synapse/logging/__init__.py index e69de29bb2d1..1aff2df42400 100644 --- a/synapse/logging/__init__.py +++ b/synapse/logging/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# These are imported to allow for nicer logging configuration files. +from synapse.logging._remote import RemoteHandler # noqa +from synapse.logging._terse_json import TerseJsonFormatter # noqa diff --git a/synapse/logging/_remote.py b/synapse/logging/_remote.py index 0caf32591623..136e704d37d4 100644 --- a/synapse/logging/_remote.py +++ b/synapse/logging/_remote.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import sys import traceback from collections import deque @@ -21,6 +22,7 @@ from typing import Callable, Optional import attr +from typing_extensions import Deque from zope.interface import implementer from twisted.application.internet import ClientService @@ -32,7 +34,7 @@ ) from twisted.internet.interfaces import IPushProducer, ITransport from twisted.internet.protocol import Factory, Protocol -from twisted.logger import ILogObserver, Logger, LogLevel +from twisted.logger import Logger @attr.s @@ -45,11 +47,11 @@ class LogProducer: Args: buffer: Log buffer to read logs from. transport: Transport to write to. - format_event: A callable to format the log entry to a string. + format: A callable to format the log record to a string. """ transport = attr.ib(type=ITransport) - format_event = attr.ib(type=Callable[[dict], str]) + _format = attr.ib(type=Callable[[logging.LogRecord], str]) _buffer = attr.ib(type=deque) _paused = attr.ib(default=False, type=bool, init=False) @@ -61,16 +63,19 @@ def stopProducing(self): self._buffer = deque() def resumeProducing(self): + # If we're already producing, nothing to do. self._paused = False + # Loop until paused. while self._paused is False and (self._buffer and self.transport.connected): try: - # Request the next event and format it. - event = self._buffer.popleft() - msg = self.format_event(event) + # Request the next record and format it. + record = self._buffer.popleft() + msg = self._format(record) # Send it as a new line over the transport. self.transport.write(msg.encode("utf8")) + self.transport.write(b"\n") except Exception: # Something has gone wrong writing to the transport -- log it # and break out of the while. @@ -78,60 +83,64 @@ def resumeProducing(self): break -@attr.s -@implementer(ILogObserver) -class TCPLogObserver: +class RemoteHandler(logging.Handler): """ - An IObserver that writes JSON logs to a TCP target. + An logging handler that writes logs to a TCP target. Args: - hs (HomeServer): The homeserver that is being logged for. host: The host of the logging target. port: The logging target's port. - format_event: A callable to format the log entry to a string. maximum_buffer: The maximum buffer size. """ - hs = attr.ib() - host = attr.ib(type=str) - port = attr.ib(type=int) - format_event = attr.ib(type=Callable[[dict], str]) - maximum_buffer = attr.ib(type=int) - _buffer = attr.ib(default=attr.Factory(deque), type=deque) - _connection_waiter = attr.ib(default=None, type=Optional[Deferred]) - _logger = attr.ib(default=attr.Factory(Logger)) - _producer = attr.ib(default=None, type=Optional[LogProducer]) - - def start(self) -> None: + def __init__( + self, + host: str, + port: int, + maximum_buffer: int = 1000, + level=logging.NOTSET, + _reactor=None, + ): + super().__init__(level=level) + self.host = host + self.port = port + self.maximum_buffer = maximum_buffer + + self._buffer = deque() # type: Deque[logging.LogRecord] + self._connection_waiter = None # type: Optional[Deferred] + self._logger = Logger() + self._producer = None # type: Optional[LogProducer] # Connect without DNS lookups if it's a direct IP. + if _reactor is None: + from twisted.internet import reactor + + _reactor = reactor + try: ip = ip_address(self.host) if isinstance(ip, IPv4Address): - endpoint = TCP4ClientEndpoint( - self.hs.get_reactor(), self.host, self.port - ) + endpoint = TCP4ClientEndpoint(_reactor, self.host, self.port) elif isinstance(ip, IPv6Address): - endpoint = TCP6ClientEndpoint( - self.hs.get_reactor(), self.host, self.port - ) + endpoint = TCP6ClientEndpoint(_reactor, self.host, self.port) else: raise ValueError("Unknown IP address provided: %s" % (self.host,)) except ValueError: - endpoint = HostnameEndpoint(self.hs.get_reactor(), self.host, self.port) + endpoint = HostnameEndpoint(_reactor, self.host, self.port) factory = Factory.forProtocol(Protocol) - self._service = ClientService(endpoint, factory, clock=self.hs.get_reactor()) + self._service = ClientService(endpoint, factory, clock=_reactor) self._service.startService() self._connect() - def stop(self): + def close(self): self._service.stopService() def _connect(self) -> None: """ Triggers an attempt to connect then write to the remote if not already writing. """ + # Do not attempt to open multiple connections. if self._connection_waiter: return @@ -158,9 +167,7 @@ def writer(r): # Make a new producer and start it. self._producer = LogProducer( - buffer=self._buffer, - transport=r.transport, - format_event=self.format_event, + buffer=self._buffer, transport=r.transport, format=self.format, ) r.transport.registerProducer(self._producer, True) self._producer.resumeProducing() @@ -168,19 +175,19 @@ def writer(r): def _handle_pressure(self) -> None: """ - Handle backpressure by shedding events. + Handle backpressure by shedding records. The buffer will, in this order, until the buffer is below the maximum: - - Shed DEBUG events - - Shed INFO events - - Shed the middle 50% of the events. + - Shed DEBUG records. + - Shed INFO records. + - Shed the middle 50% of the records. """ if len(self._buffer) <= self.maximum_buffer: return # Strip out DEBUGs self._buffer = deque( - filter(lambda event: event["log_level"] != LogLevel.debug, self._buffer) + filter(lambda record: record.levelno > logging.DEBUG, self._buffer) ) if len(self._buffer) <= self.maximum_buffer: @@ -188,7 +195,7 @@ def _handle_pressure(self) -> None: # Strip out INFOs self._buffer = deque( - filter(lambda event: event["log_level"] != LogLevel.info, self._buffer) + filter(lambda record: record.levelno > logging.INFO, self._buffer) ) if len(self._buffer) <= self.maximum_buffer: @@ -209,14 +216,14 @@ def _handle_pressure(self) -> None: self._buffer.extend(reversed(end_buffer)) - def __call__(self, event: dict) -> None: - self._buffer.append(event) + def emit(self, record: logging.LogRecord) -> None: + self._buffer.append(record) # Handle backpressure, if it exists. try: self._handle_pressure() except Exception: - # If handling backpressure fails,clear the buffer and log the + # If handling backpressure fails, clear the buffer and log the # exception. self._buffer.clear() self._logger.failure("Failed clearing backpressure") diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py index 9b46956ca96b..afcf4cb1fc01 100644 --- a/synapse/logging/_terse_json.py +++ b/synapse/logging/_terse_json.py @@ -16,141 +16,34 @@ """ Log formatters that output terse JSON. """ - import json -from typing import IO - -from twisted.logger import FileLogObserver - -from synapse.logging._remote import TCPLogObserver +import logging +from typing import Optional _encoder = json.JSONEncoder(ensure_ascii=False, separators=(",", ":")) -def flatten_event(event: dict, metadata: dict, include_time: bool = False): - """ - Flatten a Twisted logging event to an dictionary capable of being sent - as a log event to a logging aggregation system. - - The format is vastly simplified and is not designed to be a "human readable - string" in the sense that traditional logs are. Instead, the structure is - optimised for searchability and filtering, with human-understandable log - keys. - - Args: - event (dict): The Twisted logging event we are flattening. - metadata (dict): Additional data to include with each log message. This - can be information like the server name. Since the target log - consumer does not know who we are other than by host IP, this - allows us to forward through static information. - include_time (bool): Should we include the `time` key? If False, the - event time is stripped from the event. - """ - new_event = {} - - # If it's a failure, make the new event's log_failure be the traceback text. - if "log_failure" in event: - new_event["log_failure"] = event["log_failure"].getTraceback() - - # If it's a warning, copy over a string representation of the warning. - if "warning" in event: - new_event["warning"] = str(event["warning"]) - - # Stdlib logging events have "log_text" as their human-readable portion, - # Twisted ones have "log_format". For now, include the log_format, so that - # context only given in the log format (e.g. what is being logged) is - # available. - if "log_text" in event: - new_event["log"] = event["log_text"] - else: - new_event["log"] = event["log_format"] - - # We want to include the timestamp when forwarding over the network, but - # exclude it when we are writing to stdout. This is because the log ingester - # (e.g. logstash, fluentd) can add its own timestamp. - if include_time: - new_event["time"] = round(event["log_time"], 2) - - # Convert the log level to a textual representation. - new_event["level"] = event["log_level"].name.upper() - - # Ignore these keys, and do not transfer them over to the new log object. - # They are either useless (isError), transferred manually above (log_time, - # log_level, etc), or contain Python objects which are not useful for output - # (log_logger, log_source). - keys_to_delete = [ - "isError", - "log_failure", - "log_format", - "log_level", - "log_logger", - "log_source", - "log_system", - "log_time", - "log_text", - "observer", - "warning", - ] - - # If it's from the Twisted legacy logger (twisted.python.log), it adds some - # more keys we want to purge. - if event.get("log_namespace") == "log_legacy": - keys_to_delete.extend(["message", "system", "time"]) - - # Rather than modify the dictionary in place, construct a new one with only - # the content we want. The original event should be considered 'frozen'. - for key in event.keys(): - - if key in keys_to_delete: - continue - - if isinstance(event[key], (str, int, bool, float)) or event[key] is None: - # If it's a plain type, include it as is. - new_event[key] = event[key] - else: - # If it's not one of those basic types, write out a string - # representation. This should probably be a warning in development, - # so that we are sure we are only outputting useful data. - new_event[key] = str(event[key]) - - # Add the metadata information to the event (e.g. the server_name). - new_event.update(metadata) - - return new_event - - -def TerseJSONToConsoleLogObserver(outFile: IO[str], metadata: dict) -> FileLogObserver: - """ - A log observer that formats events to a flattened JSON representation. - - Args: - outFile: The file object to write to. - metadata: Metadata to be added to each log object. - """ - - def formatEvent(_event: dict) -> str: - flattened = flatten_event(_event, metadata) - return _encoder.encode(flattened) + "\n" - - return FileLogObserver(outFile, formatEvent) - +class TerseJsonFormatter: + def __init__( + self, *args, include_time: bool = True, metadata: Optional[dict] = None + ): + self.include_time = include_time + self.metadata = metadata or {} -def TerseJSONToTCPLogObserver( - hs, host: str, port: int, metadata: dict, maximum_buffer: int -) -> FileLogObserver: - """ - A log observer that formats events to a flattened JSON representation. + def format(self, record: logging.LogRecord) -> str: + event = { + "log": record.getMessage(), + "namespace": record.name, + "level": record.levelname, + } - Args: - hs (HomeServer): The homeserver that is being logged for. - host: The host of the logging target. - port: The logging target's port. - metadata: Metadata to be added to each log object. - maximum_buffer: The maximum buffer size. - """ + # We want to include the timestamp when forwarding over the network, but + # exclude it when we are writing to stdout. This is because the log ingester + # (e.g. logstash, fluentd) can add its own timestamp. + if self.include_time: + event["time"] = round(record.created, 2) - def formatEvent(_event: dict) -> str: - flattened = flatten_event(_event, metadata, include_time=True) - return _encoder.encode(flattened) + "\n" + # Add the metadata information to the event (e.g. the server_name). + event.update(self.metadata) - return TCPLogObserver(hs, host, port, formatEvent, maximum_buffer) + return _encoder.encode(event) diff --git a/tests/logging/test_remote_handler.py b/tests/logging/test_remote_handler.py new file mode 100644 index 000000000000..08395f27d505 --- /dev/null +++ b/tests/logging/test_remote_handler.py @@ -0,0 +1,172 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +from twisted.test.proto_helpers import AccumulatingProtocol + +from synapse.logging import RemoteHandler + +from tests.server import FakeTransport +from tests.unittest import DEBUG, HomeserverTestCase + + +class StructuredLoggingTestBase: + """ + Test base that registers a cleanup handler to reset the stdlib log handler + to 'unset'. + """ + + def prepare(self, reactor, clock, hs): + def _cleanup(): + logging.getLogger("synapse").setLevel(logging.NOTSET) + + self.addCleanup(_cleanup) + + +def connect_logging_client(reactor, client_id): + # This is essentially tests.server.connect_client, but disabling autoflush on + # the client transport. This is necessary to avoid an infinite loop due to + # sending of data via the logging transport causing additional logs to be + # written. + factory = reactor.tcpClients.pop(client_id)[2] + client = factory.buildProtocol(None) + server = AccumulatingProtocol() + server.makeConnection(FakeTransport(client, reactor)) + client.makeConnection(FakeTransport(server, reactor, autoflush=False)) + + return client, server + + +class RemoteHandlerTestCase(StructuredLoggingTestBase, HomeserverTestCase): + @DEBUG + def test_log_output(self): + """ + The remote handler delivers logs over TCP. + """ + handler = RemoteHandler("127.0.0.1", 9000, _reactor=self.reactor) + logger = logging.getLogger() + logger.addHandler(handler) + + logger.info("Hello there, %s!", "wally") + + # Trigger the connection + client, server = connect_logging_client(self.reactor, 0) + + # Trigger data being sent + client.transport.flush() + + # One log message, with a single trailing newline + logs = server.data.decode("utf8").splitlines() + self.assertEqual(len(logs), 1) + self.assertEqual(server.data.count(b"\n"), 1) + + # Ensure the data passed through properly. + self.assertEqual(logs[0], "Hello there, wally!") + + @DEBUG + def test_log_backpressure_debug(self): + """ + When backpressure is hit, DEBUG logs will be shed. + """ + handler = RemoteHandler( + "127.0.0.1", 9000, maximum_buffer=10, _reactor=self.reactor + ) + logger = logging.getLogger() + logger.addHandler(handler) + + # Send some debug messages + for i in range(0, 3): + logger.debug("debug %s" % (i,)) + + # Send a bunch of useful messages + for i in range(0, 7): + logger.info("info %s" % (i,)) + + # The last debug message pushes it past the maximum buffer + logger.debug("too much debug") + + # Allow the reconnection + client, server = connect_logging_client(self.reactor, 0) + client.transport.flush() + + # Only the 7 infos made it through, the debugs were elided + logs = server.data.splitlines() + self.assertEqual(len(logs), 7) + self.assertNotIn(b"debug", server.data) + + @DEBUG + def test_log_backpressure_info(self): + """ + When backpressure is hit, DEBUG and INFO logs will be shed. + """ + handler = RemoteHandler( + "127.0.0.1", 9000, maximum_buffer=10, _reactor=self.reactor + ) + logger = logging.getLogger() + logger.addHandler(handler) + + # Send some debug messages + for i in range(0, 3): + logger.debug("debug %s" % (i,)) + + # Send a bunch of useful messages + for i in range(0, 10): + logger.warning("warn %s" % (i,)) + + # Send a bunch of info messages + for i in range(0, 3): + logger.info("info %s" % (i,)) + + # The last debug message pushes it past the maximum buffer + logger.debug("too much debug") + + # Allow the reconnection + client, server = connect_logging_client(self.reactor, 0) + client.transport.flush() + + # The 10 warnings made it through, the debugs and infos were elided + logs = server.data.splitlines() + self.assertEqual(len(logs), 10) + self.assertNotIn(b"debug", server.data) + self.assertNotIn(b"info", server.data) + + @DEBUG + def test_log_backpressure_cut_middle(self): + """ + When backpressure is hit, and no more DEBUG and INFOs cannot be culled, + it will cut the middle messages out. + """ + handler = RemoteHandler( + "127.0.0.1", 9000, maximum_buffer=10, _reactor=self.reactor + ) + logger = logging.getLogger() + logger.addHandler(handler) + + # Send a bunch of useful messages + for i in range(0, 20): + logger.warning("warn %s" % (i,)) + + # Allow the reconnection + client, server = connect_logging_client(self.reactor, 0) + client.transport.flush() + + # The first five and last five warnings made it through, the debugs and + # infos were elided + logs = server.data.decode("utf8").splitlines() + self.assertEqual( + ["warn %s" % (i,) for i in range(5)] + + ["warn %s" % (i,) for i in range(15, 20)], + logs, + ) diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py index fd128b88e0c0..43ed77214fbd 100644 --- a/tests/logging/test_terse_json.py +++ b/tests/logging/test_terse_json.py @@ -14,57 +14,35 @@ # limitations under the License. import json -from collections import Counter +import logging +from io import StringIO -from twisted.logger import Logger +from synapse.logging._terse_json import TerseJsonFormatter -from synapse.logging._structured import setup_structured_logging +from tests.unittest import DEBUG, HomeserverTestCase -from tests.server import connect_client -from tests.unittest import HomeserverTestCase -from .test_structured import FakeBeginner, StructuredLoggingTestBase - - -class TerseJSONTCPTestCase(StructuredLoggingTestBase, HomeserverTestCase): +class TerseJsonTestCase(HomeserverTestCase): + @DEBUG def test_log_output(self): """ - The Terse JSON outputter delivers simplified structured logs over TCP. + The Terse JSON formatter converts log messages to JSON. """ - log_config = { - "drains": { - "tersejson": { - "type": "network_json_terse", - "host": "127.0.0.1", - "port": 8000, - } - } - } - - # Begin the logger with our config - beginner = FakeBeginner() - setup_structured_logging( - self.hs, self.hs.config, log_config, logBeginner=beginner - ) + output = StringIO() - logger = Logger( - namespace="tests.logging.test_terse_json", observer=beginner.observers[0] - ) - logger.info("Hello there, {name}!", name="wally") + handler = logging.StreamHandler(output) + handler.setFormatter(TerseJsonFormatter(metadata={"server_name": "foo"})) - # Trigger the connection - self.pump() + logger = logging.getLogger() + logger.addHandler(handler) - _, server = connect_client(self.reactor, 0) + logger.info("Hello there, %s!", "wally") - # Trigger data being sent - self.pump() - - # One log message, with a single trailing newline - logs = server.data.decode("utf8").splitlines() + # One log message, with a single trailing newline. + data = output.getvalue() + logs = data.splitlines() self.assertEqual(len(logs), 1) - self.assertEqual(server.data.count(b"\n"), 1) - + self.assertEqual(data.count("\n"), 1) log = json.loads(logs[0]) # The terse logger should give us these keys. @@ -72,163 +50,8 @@ def test_log_output(self): "log", "time", "level", - "log_namespace", - "request", - "scope", + "namespace", "server_name", - "name", ] self.assertCountEqual(log.keys(), expected_log_keys) - - # It contains the data we expect. - self.assertEqual(log["name"], "wally") - - def test_log_backpressure_debug(self): - """ - When backpressure is hit, DEBUG logs will be shed. - """ - log_config = { - "loggers": {"synapse": {"level": "DEBUG"}}, - "drains": { - "tersejson": { - "type": "network_json_terse", - "host": "127.0.0.1", - "port": 8000, - "maximum_buffer": 10, - } - }, - } - - # Begin the logger with our config - beginner = FakeBeginner() - setup_structured_logging( - self.hs, - self.hs.config, - log_config, - logBeginner=beginner, - redirect_stdlib_logging=False, - ) - - logger = Logger( - namespace="synapse.logging.test_terse_json", observer=beginner.observers[0] - ) - - # Send some debug messages - for i in range(0, 3): - logger.debug("debug %s" % (i,)) - - # Send a bunch of useful messages - for i in range(0, 7): - logger.info("test message %s" % (i,)) - - # The last debug message pushes it past the maximum buffer - logger.debug("too much debug") - - # Allow the reconnection - _, server = connect_client(self.reactor, 0) - self.pump() - - # Only the 7 infos made it through, the debugs were elided - logs = server.data.splitlines() - self.assertEqual(len(logs), 7) - - def test_log_backpressure_info(self): - """ - When backpressure is hit, DEBUG and INFO logs will be shed. - """ - log_config = { - "loggers": {"synapse": {"level": "DEBUG"}}, - "drains": { - "tersejson": { - "type": "network_json_terse", - "host": "127.0.0.1", - "port": 8000, - "maximum_buffer": 10, - } - }, - } - - # Begin the logger with our config - beginner = FakeBeginner() - setup_structured_logging( - self.hs, - self.hs.config, - log_config, - logBeginner=beginner, - redirect_stdlib_logging=False, - ) - - logger = Logger( - namespace="synapse.logging.test_terse_json", observer=beginner.observers[0] - ) - - # Send some debug messages - for i in range(0, 3): - logger.debug("debug %s" % (i,)) - - # Send a bunch of useful messages - for i in range(0, 10): - logger.warn("test warn %s" % (i,)) - - # Send a bunch of info messages - for i in range(0, 3): - logger.info("test message %s" % (i,)) - - # The last debug message pushes it past the maximum buffer - logger.debug("too much debug") - - # Allow the reconnection - client, server = connect_client(self.reactor, 0) - self.pump() - - # The 10 warnings made it through, the debugs and infos were elided - logs = list(map(json.loads, server.data.decode("utf8").splitlines())) - self.assertEqual(len(logs), 10) - - self.assertEqual(Counter([x["level"] for x in logs]), {"WARN": 10}) - - def test_log_backpressure_cut_middle(self): - """ - When backpressure is hit, and no more DEBUG and INFOs cannot be culled, - it will cut the middle messages out. - """ - log_config = { - "loggers": {"synapse": {"level": "DEBUG"}}, - "drains": { - "tersejson": { - "type": "network_json_terse", - "host": "127.0.0.1", - "port": 8000, - "maximum_buffer": 10, - } - }, - } - - # Begin the logger with our config - beginner = FakeBeginner() - setup_structured_logging( - self.hs, - self.hs.config, - log_config, - logBeginner=beginner, - redirect_stdlib_logging=False, - ) - - logger = Logger( - namespace="synapse.logging.test_terse_json", observer=beginner.observers[0] - ) - - # Send a bunch of useful messages - for i in range(0, 20): - logger.warn("test warn", num=i) - - # Allow the reconnection - client, server = connect_client(self.reactor, 0) - self.pump() - - # The first five and last five warnings made it through, the debugs and - # infos were elided - logs = list(map(json.loads, server.data.decode("utf8").splitlines())) - self.assertEqual(len(logs), 10) - self.assertEqual(Counter([x["level"] for x in logs]), {"WARN": 10}) - self.assertEqual([0, 1, 2, 3, 4, 15, 16, 17, 18, 19], [x["num"] for x in logs]) + self.assertEqual(log["log"], "Hello there, wally!") diff --git a/tests/server.py b/tests/server.py index 4d33b8409736..cf09d1a13273 100644 --- a/tests/server.py +++ b/tests/server.py @@ -571,12 +571,10 @@ def connect_client(reactor: IReactorTCP, client_id: int) -> AccumulatingProtocol reactor factory: The connecting factory to build. """ - factory = reactor.tcpClients[client_id][2] + factory = reactor.tcpClients.pop(client_id)[2] client = factory.buildProtocol(None) server = AccumulatingProtocol() server.makeConnection(FakeTransport(client, reactor)) client.makeConnection(FakeTransport(server, reactor)) - reactor.tcpClients.pop(client_id) - return client, server From fcb74ae5c9ac9ad49ac4635ea716f4b18f86dc79 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 21 Oct 2020 07:52:18 -0400 Subject: [PATCH 04/23] Pipe through the server_name properly. --- synapse/config/logger.py | 7 +++-- synapse/logging/_terse_json.py | 47 ++++++++++++++++++++++---------- synapse/logging/filter.py | 33 ++++++++++++++++++++++ tests/logging/test_terse_json.py | 5 ++-- 4 files changed, 73 insertions(+), 19 deletions(-) create mode 100644 synapse/logging/filter.py diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 726fe8b5a0c5..49a059c51cfb 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -32,6 +32,7 @@ import synapse from synapse.app import _base as appbase from synapse.logging.context import LoggingContextFilter +from synapse.logging.filter import MetadataFilter from synapse.util.versionstring import get_version_string from ._base import Config, ConfigError @@ -199,12 +200,14 @@ def _setup_stdlib_logging(config, log_config, logBeginner: LogBeginner) -> None: # filter options, but care must when using e.g. MemoryHandler to buffer # writes. - log_filter = LoggingContextFilter(request="") + log_context_filter = LoggingContextFilter(request="") + log_metadata_filter = MetadataFilter({"server_name": config.server_name}) old_factory = logging.getLogRecordFactory() def factory(*args, **kwargs): record = old_factory(*args, **kwargs) - log_filter.filter(record) + log_context_filter.filter(record) + log_metadata_filter.filter(record) return record logging.setLogRecordFactory(factory) diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py index afcf4cb1fc01..b342e9eec7a9 100644 --- a/synapse/logging/_terse_json.py +++ b/synapse/logging/_terse_json.py @@ -18,32 +18,49 @@ """ import json import logging -from typing import Optional _encoder = json.JSONEncoder(ensure_ascii=False, separators=(",", ":")) +# The properties of a standard LogRecord. +_LOG_RECORD_ATTRIBUTES = { + "args", + "asctime", + "created", + "exc_info", + # exc_text isn't a public attribute, but is used to cache the result of formatException. + "exc_text", + "filename", + "funcName", + "levelname", + "levelno", + "lineno", + "message", + "module", + "msecs", + "msg", + "name", + "pathname", + "process", + "processName", + "relativeCreated", + "stack_info", + "thread", + "threadName", +} -class TerseJsonFormatter: - def __init__( - self, *args, include_time: bool = True, metadata: Optional[dict] = None - ): - self.include_time = include_time - self.metadata = metadata or {} +class TerseJsonFormatter(logging.Formatter): def format(self, record: logging.LogRecord) -> str: event = { "log": record.getMessage(), "namespace": record.name, "level": record.levelname, + "time": round(record.created, 2), } - # We want to include the timestamp when forwarding over the network, but - # exclude it when we are writing to stdout. This is because the log ingester - # (e.g. logstash, fluentd) can add its own timestamp. - if self.include_time: - event["time"] = round(record.created, 2) - - # Add the metadata information to the event (e.g. the server_name). - event.update(self.metadata) + # Add any extra attributes to the event. + extra_attributes = set(record.__dict__.keys()) - _LOG_RECORD_ATTRIBUTES + for key in extra_attributes: + event[key] = getattr(record, key) return _encoder.encode(event) diff --git a/synapse/logging/filter.py b/synapse/logging/filter.py new file mode 100644 index 000000000000..1baf8dd67934 --- /dev/null +++ b/synapse/logging/filter.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +from typing_extensions import Literal + + +class MetadataFilter(logging.Filter): + """Logging filter that adds constant values to each record. + + Args: + metadata: Key-value pairs to add to each record. + """ + + def __init__(self, metadata: dict): + self._metadata = metadata + + def filter(self, record: logging.LogRecord) -> Literal[True]: + for key, value in self._metadata.items(): + setattr(record, key, value) + return True diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py index 43ed77214fbd..6e698fc197c5 100644 --- a/tests/logging/test_terse_json.py +++ b/tests/logging/test_terse_json.py @@ -31,7 +31,7 @@ def test_log_output(self): output = StringIO() handler = logging.StreamHandler(output) - handler.setFormatter(TerseJsonFormatter(metadata={"server_name": "foo"})) + handler.setFormatter(TerseJsonFormatter()) logger = logging.getLogger() logger.addHandler(handler) @@ -51,7 +51,8 @@ def test_log_output(self): "time", "level", "namespace", - "server_name", + # Added via LoggingContextFilter, configured in tests.test_utils.logging_setup.setup_logging. + "request", ] self.assertCountEqual(log.keys(), expected_log_keys) self.assertEqual(log["log"], "Hello there, wally!") From 24a98826a04ae13eaef6bcd5f29800acea2a5c24 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 21 Oct 2020 08:10:38 -0400 Subject: [PATCH 05/23] Use a standard library logger. --- synapse/logging/_remote.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/logging/_remote.py b/synapse/logging/_remote.py index 136e704d37d4..ba45424f0274 100644 --- a/synapse/logging/_remote.py +++ b/synapse/logging/_remote.py @@ -34,7 +34,8 @@ ) from twisted.internet.interfaces import IPushProducer, ITransport from twisted.internet.protocol import Factory, Protocol -from twisted.logger import Logger + +logger = logging.getLogger(__name__) @attr.s @@ -108,7 +109,6 @@ def __init__( self._buffer = deque() # type: Deque[logging.LogRecord] self._connection_waiter = None # type: Optional[Deferred] - self._logger = Logger() self._producer = None # type: Optional[LogProducer] # Connect without DNS lookups if it's a direct IP. @@ -226,7 +226,7 @@ def emit(self, record: logging.LogRecord) -> None: # If handling backpressure fails, clear the buffer and log the # exception. self._buffer.clear() - self._logger.failure("Failed clearing backpressure") + logger.warning("Failed clearing backpressure") # Try and write immediately. self._connect() From 4462758e76c056a8d48e736c9b5a44f2a143de2e Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 21 Oct 2020 13:40:24 -0400 Subject: [PATCH 06/23] Update synmark for the changes. This dramatically simplifies the configuration necessary for running the synmark logging suite by avoiding creating a homeserver object we can avoid creating a database and more quickly run the testing suite. --- scripts-dev/lint.sh | 2 +- synmark/__init__.py | 39 --------------------------- synmark/__main__.py | 6 ++--- synmark/suites/logging.py | 56 ++++++++++++++++++++++++--------------- 4 files changed, 38 insertions(+), 65 deletions(-) diff --git a/scripts-dev/lint.sh b/scripts-dev/lint.sh index f2b65a210533..c6d0ea4beff3 100755 --- a/scripts-dev/lint.sh +++ b/scripts-dev/lint.sh @@ -80,7 +80,7 @@ else # then lint everything! if [[ -z ${files+x} ]]; then # Lint all source code files and directories - files=("synapse" "tests" "scripts-dev" "scripts" "contrib" "synctl" "setup.py") + files=("synapse" "tests" "scripts-dev" "scripts" "contrib" "synctl" "setup.py" "synmark") fi fi diff --git a/synmark/__init__.py b/synmark/__init__.py index 09bc7e7927e9..3d4ec3e1846b 100644 --- a/synmark/__init__.py +++ b/synmark/__init__.py @@ -21,45 +21,6 @@ from twisted.internet.pollreactor import PollReactor as Reactor from twisted.internet.main import installReactor -from synapse.config.homeserver import HomeServerConfig -from synapse.util import Clock - -from tests.utils import default_config, setup_test_homeserver - - -async def make_homeserver(reactor, config=None): - """ - Make a Homeserver suitable for running benchmarks against. - - Args: - reactor: A Twisted reactor to run under. - config: A HomeServerConfig to use, or None. - """ - cleanup_tasks = [] - clock = Clock(reactor) - - if not config: - config = default_config("test") - - config_obj = HomeServerConfig() - config_obj.parse_config_dict(config, "", "") - - hs = setup_test_homeserver( - cleanup_tasks.append, config=config_obj, reactor=reactor, clock=clock - ) - stor = hs.get_datastore() - - # Run the database background updates. - if hasattr(stor.db_pool.updates, "do_next_background_update"): - while not await stor.db_pool.updates.has_completed_background_updates(): - await stor.db_pool.updates.do_next_background_update(1) - - def cleanup(): - for i in cleanup_tasks: - i() - - return hs, clock.sleep, cleanup - def make_reactor(): """ diff --git a/synmark/__main__.py b/synmark/__main__.py index 17df9ddeb797..de13c1a9094c 100644 --- a/synmark/__main__.py +++ b/synmark/__main__.py @@ -12,20 +12,20 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import sys from argparse import REMAINDER from contextlib import redirect_stderr from io import StringIO import pyperf -from synmark import make_reactor -from synmark.suites import SUITES from twisted.internet.defer import Deferred, ensureDeferred from twisted.logger import globalLogBeginner, textFileLogObserver from twisted.python.failure import Failure +from synmark import make_reactor +from synmark.suites import SUITES + from tests.utils import setupdb diff --git a/synmark/suites/logging.py b/synmark/suites/logging.py index d8e4c7d58f26..241593082ecb 100644 --- a/synmark/suites/logging.py +++ b/synmark/suites/logging.py @@ -13,20 +13,22 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import warnings from io import StringIO from mock import Mock from pyperf import perf_counter -from synmark import make_homeserver from twisted.internet.defer import Deferred from twisted.internet.protocol import ServerFactory -from twisted.logger import LogBeginner, Logger, LogPublisher +from twisted.logger import LogBeginner, LogPublisher from twisted.protocols.basic import LineOnlyReceiver -from synapse.logging._structured import setup_structured_logging +from synapse.config.logger import _setup_stdlib_logging +from synapse.logging import RemoteHandler +from synapse.util import Clock class LineCounter(LineOnlyReceiver): @@ -62,7 +64,15 @@ def protocol(): logger_factory.on_done = Deferred() port = reactor.listenTCP(0, logger_factory, interface="127.0.0.1") - hs, wait, cleanup = await make_homeserver(reactor) + # A fake homeserver config. + class Config: + server_name = "synmark-" + str(loops) + no_redirect_stdio = True + + hs_config = Config() + + # To be able to sleep. + clock = Clock(reactor) errors = StringIO() publisher = LogPublisher() @@ -72,24 +82,33 @@ def protocol(): ) log_config = { - "loggers": {"synapse": {"level": "DEBUG"}}, - "drains": { + "version": 1, + "loggers": {"synapse": {"level": "DEBUG", "handlers": ["tersejson"]}}, + "formatters": {"tersejson": {"class": "synapse.logging.TerseJsonFormatter"}}, + "handlers": { "tersejson": { - "type": "network_json_terse", + "class": "synapse.logging.RemoteHandler", "host": "127.0.0.1", "port": port.getHost().port, "maximum_buffer": 100, + "_reactor": reactor, } }, } - logger = Logger(namespace="synapse.logging.test_terse_json", observer=publisher) - logging_system = setup_structured_logging( - hs, hs.config, log_config, logBeginner=beginner, redirect_stdlib_logging=False + logger = logging.getLogger("synapse.logging.test_terse_json") + _setup_stdlib_logging( + hs_config, log_config, logBeginner=beginner, ) # Wait for it to connect... - await logging_system._observers[0]._service.whenConnected() + for handler in logging.getLogger("synapse").handlers: + if isinstance(handler, RemoteHandler): + break + else: + raise RuntimeError("Improperly configured: no RemoteHandler found.") + + await handler._service.whenConnected() start = perf_counter() @@ -97,22 +116,15 @@ def protocol(): for i in range(0, loops): logger.info("test message %s" % (i,)) - if ( - len(logging_system._observers[0]._buffer) - == logging_system._observers[0].maximum_buffer - ): - while ( - len(logging_system._observers[0]._buffer) - > logging_system._observers[0].maximum_buffer / 2 - ): - await wait(0.01) + if len(handler._buffer) == handler.maximum_buffer: + while len(handler._buffer) > handler.maximum_buffer / 2: + await clock.sleep(0.01) await logger_factory.on_done end = perf_counter() - start - logging_system.stop() + handler.close() port.stopListening() - cleanup() return end From 24ab2df7151a2d15b622e9ad7aeb565bdac4d7c1 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 26 Oct 2020 10:05:45 -0400 Subject: [PATCH 07/23] Update logging format Co-authored-by: Erik Johnston --- synmark/suites/logging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synmark/suites/logging.py b/synmark/suites/logging.py index 241593082ecb..c9d9cf761ef5 100644 --- a/synmark/suites/logging.py +++ b/synmark/suites/logging.py @@ -114,7 +114,7 @@ class Config: # Send a bunch of useful messages for i in range(0, loops): - logger.info("test message %s" % (i,)) + logger.info("test message %s", i) if len(handler._buffer) == handler.maximum_buffer: while len(handler._buffer) > handler.maximum_buffer / 2: From 5fbc11c8f638bada892cde355544006e147ae524 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 26 Oct 2020 10:48:30 -0400 Subject: [PATCH 08/23] Fix-up formatting using __all__. --- synapse/logging/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/logging/__init__.py b/synapse/logging/__init__.py index 1aff2df42400..b2ef9a189bf8 100644 --- a/synapse/logging/__init__.py +++ b/synapse/logging/__init__.py @@ -14,5 +14,7 @@ # limitations under the License. # These are imported to allow for nicer logging configuration files. -from synapse.logging._remote import RemoteHandler # noqa -from synapse.logging._terse_json import TerseJsonFormatter # noqa +from synapse.logging._remote import RemoteHandler +from synapse.logging._terse_json import TerseJsonFormatter + +__all__ = [RemoteHandler, TerseJsonFormatter] From b2fc88bce2e4217cf68a6a09f80d029f13b2fbb8 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 26 Oct 2020 10:58:09 -0400 Subject: [PATCH 09/23] Do not build an unnecessary set. --- synapse/logging/_terse_json.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py index b342e9eec7a9..3d257b49edf6 100644 --- a/synapse/logging/_terse_json.py +++ b/synapse/logging/_terse_json.py @@ -59,8 +59,8 @@ def format(self, record: logging.LogRecord) -> str: } # Add any extra attributes to the event. - extra_attributes = set(record.__dict__.keys()) - _LOG_RECORD_ATTRIBUTES - for key in extra_attributes: - event[key] = getattr(record, key) + for key, value in record.__dict__.items(): + if key not in _LOG_RECORD_ATTRIBUTES: + event[key] = value return _encoder.encode(event) From 650bb09f88353e8513adb6b91cb5492663167c96 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 26 Oct 2020 11:04:30 -0400 Subject: [PATCH 10/23] Stop using the DEBUG decorators. --- tests/logging/test_remote_handler.py | 10 +++++----- tests/logging/test_terse_json.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/logging/test_remote_handler.py b/tests/logging/test_remote_handler.py index 08395f27d505..81ed285fb418 100644 --- a/tests/logging/test_remote_handler.py +++ b/tests/logging/test_remote_handler.py @@ -19,7 +19,7 @@ from synapse.logging import RemoteHandler from tests.server import FakeTransport -from tests.unittest import DEBUG, HomeserverTestCase +from tests.unittest import HomeserverTestCase class StructuredLoggingTestBase: @@ -50,7 +50,6 @@ def connect_logging_client(reactor, client_id): class RemoteHandlerTestCase(StructuredLoggingTestBase, HomeserverTestCase): - @DEBUG def test_log_output(self): """ The remote handler delivers logs over TCP. @@ -58,6 +57,7 @@ def test_log_output(self): handler = RemoteHandler("127.0.0.1", 9000, _reactor=self.reactor) logger = logging.getLogger() logger.addHandler(handler) + logger.setLevel(logging.INFO) logger.info("Hello there, %s!", "wally") @@ -75,7 +75,6 @@ def test_log_output(self): # Ensure the data passed through properly. self.assertEqual(logs[0], "Hello there, wally!") - @DEBUG def test_log_backpressure_debug(self): """ When backpressure is hit, DEBUG logs will be shed. @@ -85,6 +84,7 @@ def test_log_backpressure_debug(self): ) logger = logging.getLogger() logger.addHandler(handler) + logger.setLevel(logging.INFO) # Send some debug messages for i in range(0, 3): @@ -106,7 +106,6 @@ def test_log_backpressure_debug(self): self.assertEqual(len(logs), 7) self.assertNotIn(b"debug", server.data) - @DEBUG def test_log_backpressure_info(self): """ When backpressure is hit, DEBUG and INFO logs will be shed. @@ -116,6 +115,7 @@ def test_log_backpressure_info(self): ) logger = logging.getLogger() logger.addHandler(handler) + logger.setLevel(logging.INFO) # Send some debug messages for i in range(0, 3): @@ -142,7 +142,6 @@ def test_log_backpressure_info(self): self.assertNotIn(b"debug", server.data) self.assertNotIn(b"info", server.data) - @DEBUG def test_log_backpressure_cut_middle(self): """ When backpressure is hit, and no more DEBUG and INFOs cannot be culled, @@ -153,6 +152,7 @@ def test_log_backpressure_cut_middle(self): ) logger = logging.getLogger() logger.addHandler(handler) + logger.setLevel(logging.INFO) # Send a bunch of useful messages for i in range(0, 20): diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py index 6e698fc197c5..e4e691ddb3e4 100644 --- a/tests/logging/test_terse_json.py +++ b/tests/logging/test_terse_json.py @@ -19,11 +19,10 @@ from synapse.logging._terse_json import TerseJsonFormatter -from tests.unittest import DEBUG, HomeserverTestCase +from tests.unittest import HomeserverTestCase class TerseJsonTestCase(HomeserverTestCase): - @DEBUG def test_log_output(self): """ The Terse JSON formatter converts log messages to JSON. @@ -35,6 +34,7 @@ def test_log_output(self): logger = logging.getLogger() logger.addHandler(handler) + logger.setLevel(logging.INFO) logger.info("Hello there, %s!", "wally") From 7115697e0cd4f175db02f28043142c629667e0bc Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 26 Oct 2020 11:13:00 -0400 Subject: [PATCH 11/23] Raise an error if structured is in the logging config. --- synapse/config/logger.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 49a059c51cfb..87142d3ce0ac 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -44,7 +44,11 @@ # This is a YAML file containing a standard Python logging configuration # dictionary. See [1] for details on the valid settings. # +# Synapse also supports structured logging for machine readable logs which can +# be ingested by ELK stacks. See [2] for details. +# # [1]: https://docs.python.org/3.7/library/logging.config.html#configuration-dictionary-schema +# [2]: https://github.com/matrix-org/synapse/blob/master/docs/structured_logging.md version: 1 @@ -192,6 +196,15 @@ def _setup_stdlib_logging(config, log_config, logBeginner: LogBeginner) -> None: handler.setFormatter(formatter) logger.addHandler(handler) else: + # If the old structured logging configuration is being used, raise an + # error. + if "structured" in log_config and log_config.get("structured"): + raise ConfigError( + "The `structured` parameter is no longer supported, see the documentation " + "for enabling structured logging: " + "https://github.com/matrix-org/synapse/blob/master/docs/structured_logging.md" + ) + logging.config.dictConfig(log_config) # We add a log record factory that runs all messages through the From e855dbb0484312197329b45c23d171056711ceb7 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 26 Oct 2020 11:15:12 -0400 Subject: [PATCH 12/23] __all__ takes strings, not objects. --- synapse/logging/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/logging/__init__.py b/synapse/logging/__init__.py index b2ef9a189bf8..6a6847189916 100644 --- a/synapse/logging/__init__.py +++ b/synapse/logging/__init__.py @@ -17,4 +17,4 @@ from synapse.logging._remote import RemoteHandler from synapse.logging._terse_json import TerseJsonFormatter -__all__ = [RemoteHandler, TerseJsonFormatter] +__all__ = ["RemoteHandler", "TerseJsonFormatter"] From 7071d897f1ec8e3271aecc6a437edaddfb0dc7cf Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 26 Oct 2020 11:19:08 -0400 Subject: [PATCH 13/23] Update the sample config. --- docs/sample_log_config.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/sample_log_config.yaml b/docs/sample_log_config.yaml index 55a48a9ed622..253d0641d404 100644 --- a/docs/sample_log_config.yaml +++ b/docs/sample_log_config.yaml @@ -3,7 +3,11 @@ # This is a YAML file containing a standard Python logging configuration # dictionary. See [1] for details on the valid settings. # +# Synapse also supports structured logging for machine readable logs which can +# be ingested by ELK stacks. See [2] for details. +# # [1]: https://docs.python.org/3.7/library/logging.config.html#configuration-dictionary-schema +# [2]: https://github.com/matrix-org/synapse/blob/master/docs/structured_logging.md version: 1 From 6b785c13105aa19135981b477073fa2522448b7a Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 26 Oct 2020 13:08:26 -0400 Subject: [PATCH 14/23] Revamp tests a bit to avoid impacting other tests. --- tests/logging/test_remote_handler.py | 52 +++++++++++++--------------- tests/logging/test_terse_json.py | 10 +++--- 2 files changed, 29 insertions(+), 33 deletions(-) diff --git a/tests/logging/test_remote_handler.py b/tests/logging/test_remote_handler.py index 81ed285fb418..2f70ebda97a8 100644 --- a/tests/logging/test_remote_handler.py +++ b/tests/logging/test_remote_handler.py @@ -18,21 +18,8 @@ from synapse.logging import RemoteHandler -from tests.server import FakeTransport -from tests.unittest import HomeserverTestCase - - -class StructuredLoggingTestBase: - """ - Test base that registers a cleanup handler to reset the stdlib log handler - to 'unset'. - """ - - def prepare(self, reactor, clock, hs): - def _cleanup(): - logging.getLogger("synapse").setLevel(logging.NOTSET) - - self.addCleanup(_cleanup) +from tests.server import FakeTransport, get_clock +from tests.unittest import TestCase def connect_logging_client(reactor, client_id): @@ -49,15 +36,30 @@ def connect_logging_client(reactor, client_id): return client, server -class RemoteHandlerTestCase(StructuredLoggingTestBase, HomeserverTestCase): +class RemoteHandlerTestCase(TestCase): + def setUp(self): + self.reactor, _ = get_clock() + + def get_logger(self, handler): + # Create a logger and add the handler to it. + logger = logging.getLogger(__name__) + logger.addHandler(handler) + + # Ensure the logger actually logs something. + logger.setLevel(logging.INFO) + + # Ensure the logger gets cleaned-up appropriately. + self.addCleanup(logger.removeHandler, handler) + self.addCleanup(logger.setLevel, logging.NOTSET) + + return logger + def test_log_output(self): """ The remote handler delivers logs over TCP. """ handler = RemoteHandler("127.0.0.1", 9000, _reactor=self.reactor) - logger = logging.getLogger() - logger.addHandler(handler) - logger.setLevel(logging.INFO) + logger = self.get_logger(handler) logger.info("Hello there, %s!", "wally") @@ -82,9 +84,7 @@ def test_log_backpressure_debug(self): handler = RemoteHandler( "127.0.0.1", 9000, maximum_buffer=10, _reactor=self.reactor ) - logger = logging.getLogger() - logger.addHandler(handler) - logger.setLevel(logging.INFO) + logger = self.get_logger(handler) # Send some debug messages for i in range(0, 3): @@ -113,9 +113,7 @@ def test_log_backpressure_info(self): handler = RemoteHandler( "127.0.0.1", 9000, maximum_buffer=10, _reactor=self.reactor ) - logger = logging.getLogger() - logger.addHandler(handler) - logger.setLevel(logging.INFO) + logger = self.get_logger(handler) # Send some debug messages for i in range(0, 3): @@ -150,9 +148,7 @@ def test_log_backpressure_cut_middle(self): handler = RemoteHandler( "127.0.0.1", 9000, maximum_buffer=10, _reactor=self.reactor ) - logger = logging.getLogger() - logger.addHandler(handler) - logger.setLevel(logging.INFO) + logger = self.get_logger(handler) # Send a bunch of useful messages for i in range(0, 20): diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py index e4e691ddb3e4..b58ebd241061 100644 --- a/tests/logging/test_terse_json.py +++ b/tests/logging/test_terse_json.py @@ -19,10 +19,10 @@ from synapse.logging._terse_json import TerseJsonFormatter -from tests.unittest import HomeserverTestCase +from tests.unittest import TestCase -class TerseJsonTestCase(HomeserverTestCase): +class TerseJsonTestCase(TestCase): def test_log_output(self): """ The Terse JSON formatter converts log messages to JSON. @@ -32,9 +32,11 @@ def test_log_output(self): handler = logging.StreamHandler(output) handler.setFormatter(TerseJsonFormatter()) - logger = logging.getLogger() + logger = logging.getLogger(__name__) logger.addHandler(handler) + self.addCleanup(logger.removeHandler, handler) logger.setLevel(logging.INFO) + self.addCleanup(logger.setLevel, logging.NOTSET) logger.info("Hello there, %s!", "wally") @@ -51,8 +53,6 @@ def test_log_output(self): "time", "level", "namespace", - # Added via LoggingContextFilter, configured in tests.test_utils.logging_setup.setup_logging. - "request", ] self.assertCountEqual(log.keys(), expected_log_keys) self.assertEqual(log["log"], "Hello there, wally!") From 8d51476b22af12d3791fdc6bacf9164302017eb2 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 27 Oct 2020 07:25:20 -0400 Subject: [PATCH 15/23] Abstract handling of loggers in tests. --- tests/logging/__init__.py | 34 ++++++++++++++++++++++++++++ tests/logging/test_remote_handler.py | 19 ++-------------- tests/logging/test_terse_json.py | 10 +++----- 3 files changed, 39 insertions(+), 24 deletions(-) diff --git a/tests/logging/__init__.py b/tests/logging/__init__.py index e69de29bb2d1..a58d51441c6e 100644 --- a/tests/logging/__init__.py +++ b/tests/logging/__init__.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + + +class LoggerCleanupMixin: + def get_logger(self, handler): + """ + Attach a handler to a logger and add clean-ups to remove revert this. + """ + # Create a logger and add the handler to it. + logger = logging.getLogger(__name__) + logger.addHandler(handler) + + # Ensure the logger actually logs something. + logger.setLevel(logging.INFO) + + # Ensure the logger gets cleaned-up appropriately. + self.addCleanup(logger.removeHandler, handler) + self.addCleanup(logger.setLevel, logging.NOTSET) + + return logger diff --git a/tests/logging/test_remote_handler.py b/tests/logging/test_remote_handler.py index 2f70ebda97a8..58ee1f2f3c09 100644 --- a/tests/logging/test_remote_handler.py +++ b/tests/logging/test_remote_handler.py @@ -12,12 +12,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import logging - from twisted.test.proto_helpers import AccumulatingProtocol from synapse.logging import RemoteHandler +from tests.logging import LoggerCleanupMixin from tests.server import FakeTransport, get_clock from tests.unittest import TestCase @@ -36,24 +35,10 @@ def connect_logging_client(reactor, client_id): return client, server -class RemoteHandlerTestCase(TestCase): +class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase): def setUp(self): self.reactor, _ = get_clock() - def get_logger(self, handler): - # Create a logger and add the handler to it. - logger = logging.getLogger(__name__) - logger.addHandler(handler) - - # Ensure the logger actually logs something. - logger.setLevel(logging.INFO) - - # Ensure the logger gets cleaned-up appropriately. - self.addCleanup(logger.removeHandler, handler) - self.addCleanup(logger.setLevel, logging.NOTSET) - - return logger - def test_log_output(self): """ The remote handler delivers logs over TCP. diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py index b58ebd241061..668fde1c244f 100644 --- a/tests/logging/test_terse_json.py +++ b/tests/logging/test_terse_json.py @@ -19,10 +19,11 @@ from synapse.logging._terse_json import TerseJsonFormatter +from tests.logging import LoggerCleanupMixin from tests.unittest import TestCase -class TerseJsonTestCase(TestCase): +class TerseJsonTestCase(LoggerCleanupMixin, TestCase): def test_log_output(self): """ The Terse JSON formatter converts log messages to JSON. @@ -31,12 +32,7 @@ def test_log_output(self): handler = logging.StreamHandler(output) handler.setFormatter(TerseJsonFormatter()) - - logger = logging.getLogger(__name__) - logger.addHandler(handler) - self.addCleanup(logger.removeHandler, handler) - logger.setLevel(logging.INFO) - self.addCleanup(logger.setLevel, logging.NOTSET) + logger = self.get_logger(handler) logger.info("Hello there, %s!", "wally") From 7fb55052c2660f605165752e03aeb301f852ba7f Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 27 Oct 2020 07:26:53 -0400 Subject: [PATCH 16/23] Add a test for including additional structured data. --- tests/logging/test_terse_json.py | 39 ++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py index 668fde1c244f..6db06b769bb9 100644 --- a/tests/logging/test_terse_json.py +++ b/tests/logging/test_terse_json.py @@ -52,3 +52,42 @@ def test_log_output(self): ] self.assertCountEqual(log.keys(), expected_log_keys) self.assertEqual(log["log"], "Hello there, wally!") + + def test_extra_data(self): + """ + Additional information can be included in the structured logging. + """ + output = StringIO() + + handler = logging.StreamHandler(output) + handler.setFormatter(TerseJsonFormatter()) + logger = self.get_logger(handler) + + logger.info( + "Hello there, %s!", "wally", extra={"foo": "bar", "int": 3, "bool": True,} + ) + + # One log message, with a single trailing newline. + data = output.getvalue() + logs = data.splitlines() + self.assertEqual(len(logs), 1) + self.assertEqual(data.count("\n"), 1) + log = json.loads(logs[0]) + + # The terse logger should give us these keys. + expected_log_keys = [ + "log", + "time", + "level", + "namespace", + # The additional keys given via extra. + "foo", + "int", + "bool", + ] + self.assertCountEqual(log.keys(), expected_log_keys) + + # Check the values of the extra fields. + self.assertEqual(log["foo"], "property") + self.assertEqual(log["int"], 3) + self.assertIs(log["bool"], True) From 11a488c80ab05e992444bd351504fb6da7ffe691 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 27 Oct 2020 07:39:26 -0400 Subject: [PATCH 17/23] Lint. --- tests/logging/test_terse_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py index 6db06b769bb9..62d4c2381000 100644 --- a/tests/logging/test_terse_json.py +++ b/tests/logging/test_terse_json.py @@ -64,7 +64,7 @@ def test_extra_data(self): logger = self.get_logger(handler) logger.info( - "Hello there, %s!", "wally", extra={"foo": "bar", "int": 3, "bool": True,} + "Hello there, %s!", "wally", extra={"foo": "bar", "int": 3, "bool": True} ) # One log message, with a single trailing newline. From a19c96743bfa182fc778585891f6fb90c441492d Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 27 Oct 2020 08:54:02 -0400 Subject: [PATCH 18/23] Fix test after rename. --- tests/logging/test_terse_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py index 62d4c2381000..6121c07ae599 100644 --- a/tests/logging/test_terse_json.py +++ b/tests/logging/test_terse_json.py @@ -88,6 +88,6 @@ def test_extra_data(self): self.assertCountEqual(log.keys(), expected_log_keys) # Check the values of the extra fields. - self.assertEqual(log["foo"], "property") + self.assertEqual(log["foo"], "bar") self.assertEqual(log["int"], 3) self.assertIs(log["bool"], True) From e98a6d14271b3e80f42a5553f63578bcaf8fc950 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 27 Oct 2020 12:48:02 -0400 Subject: [PATCH 19/23] Add an upgrade note. --- UPGRADE.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/UPGRADE.rst b/UPGRADE.rst index 5a683122178e..3f6327de7e1f 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -75,6 +75,20 @@ for example: wget https://packages.matrix.org/debian/pool/main/m/matrix-synapse-py3/matrix-synapse-py3_1.3.0+stretch1_amd64.deb dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb +Upgrading to v1.23.0 +==================== + +Structured logging configuration breaking changes +------------------------------------------------- + +This release introduces a backwars-incompatible change in the configuration of +structured logging. If your logging configuration contains ``structured: true`` +then it must be modified based on the `structured logging document +`_. + +``drains`` are now an unused option and should be replaced by standard logging +configuration of ``handlers`` and ``formatters`. + Upgrading to v1.22.0 ==================== From babdd5bbbca8f8b398428921b6729ec86a700c76 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 28 Oct 2020 13:33:46 -0400 Subject: [PATCH 20/23] Rework the code to load logging configs. --- synapse/config/logger.py | 75 +++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 87142d3ce0ac..b73bdb8a3a90 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -176,11 +176,11 @@ def generate_files(self, config, config_dir_path): log_config_file.write(DEFAULT_LOG_CONFIG.substitute(log_file=log_file)) -def _setup_stdlib_logging(config, log_config, logBeginner: LogBeginner) -> None: +def _setup_stdlib_logging(config, log_config_path, logBeginner: LogBeginner) -> None: """ Set up Python standard library logging. """ - if log_config is None: + if log_config_path is None: log_format = ( "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s" " - %(message)s" @@ -196,16 +196,8 @@ def _setup_stdlib_logging(config, log_config, logBeginner: LogBeginner) -> None: handler.setFormatter(formatter) logger.addHandler(handler) else: - # If the old structured logging configuration is being used, raise an - # error. - if "structured" in log_config and log_config.get("structured"): - raise ConfigError( - "The `structured` parameter is no longer supported, see the documentation " - "for enabling structured logging: " - "https://github.com/matrix-org/synapse/blob/master/docs/structured_logging.md" - ) - - logging.config.dictConfig(log_config) + # Load the logging configuration. + _load_logging_config(log_config_path) # We add a log record factory that runs all messages through the # LoggingContextFilter so that we get the context *at the time we log* @@ -267,15 +259,40 @@ def _log(event): print("Redirected stdout/stderr to logs") -def _reload_stdlib_logging(*args, log_config=None): - logger = logging.getLogger("") +def _load_logging_config(log_config_path: str) -> None: + """ + Configure logging from a log config path. + """ + with open(log_config_path, "rb") as f: + log_config = yaml.safe_load(f.read()) if not log_config: - logger.warning("Reloaded a blank config?") + logging.warning("Loaded a blank logging config?") + + # If the old structured logging configuration is being used, raise an + # error. + if "structured" in log_config and log_config.get("structured"): + raise ConfigError( + "The `structured` parameter is no longer supported, see the documentation " + "for enabling structured logging: " + "https://github.com/matrix-org/synapse/blob/master/docs/structured_logging.md" + ) logging.config.dictConfig(log_config) +def _reload_logging_config(log_config_path): + """ + Reload the log configuration from the file and apply it. + """ + # If no log config path was given, it cannot be reloaded. + if log_config_path is None: + return + + _load_logging_config(log_config_path) + logging.info("Reloaded log config from %s due to SIGHUP", log_config_path) + + def setup_logging( hs, config, use_worker_options=False, logBeginner: LogBeginner = globalLogBeginner ) -> None: @@ -292,28 +309,16 @@ def setup_logging( logBeginner: The Twisted logBeginner to use. """ - log_config = config.worker_log_config if use_worker_options else config.log_config - - def read_config(*args, callback=None): - if log_config is None: - return None - - with open(log_config, "rb") as f: - log_config_body = yaml.safe_load(f.read()) - - if callback: - callback(log_config=log_config_body) - logging.info("Reloaded log config from %s due to SIGHUP", log_config) - - return log_config_body - - log_config_body = read_config() + log_config_path = ( + config.worker_log_config if use_worker_options else config.log_config + ) - _setup_stdlib_logging(config, log_config_body, logBeginner=logBeginner) - appbase.register_sighup(read_config, callback=_reload_stdlib_logging) + # Perform one-time logging configuration. + _setup_stdlib_logging(config, log_config_path, logBeginner=logBeginner) + # Add a SIGHUP handler to reload the logging configuration, if one is available. + appbase.register_sighup(_reload_logging_config, log_config_path) - # make sure that the first thing we log is a thing we can grep backwards - # for + # Log immediately so we can grep backwards. logging.warning("***** STARTING SERVER *****") logging.warning("Server %s version %s", sys.argv[0], get_version_string(synapse)) logging.info("Server hostname: %s", config.server_name) From 10738cc76ea13627e0211d65bf9a90c5e00766c8 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 28 Oct 2020 14:22:21 -0400 Subject: [PATCH 21/23] Convert legacy drain configurations to standard library handler configs. --- UPGRADE.rst | 12 ++- docs/structured_logging.md | 30 +++++++ synapse/config/logger.py | 11 +-- synapse/logging/_structured.py | 154 +++++++++++++++++++++++++++++++++ 4 files changed, 196 insertions(+), 11 deletions(-) create mode 100644 synapse/logging/_structured.py diff --git a/UPGRADE.rst b/UPGRADE.rst index 3f6327de7e1f..b187d0d24ef2 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -81,13 +81,17 @@ Upgrading to v1.23.0 Structured logging configuration breaking changes ------------------------------------------------- -This release introduces a backwars-incompatible change in the configuration of +This release deprecates use of the ``structured: true`` logging configuration for structured logging. If your logging configuration contains ``structured: true`` -then it must be modified based on the `structured logging document +then it should be modified based on the `structured logging documentation `_. -``drains`` are now an unused option and should be replaced by standard logging -configuration of ``handlers`` and ``formatters`. +The ``structured`` and ``drains`` logging options are now deprecated and should +be replaced by standard logging configuration of ``handlers`` and ``formatters`. + +Note that updated configurations *must* include ``version: 1`` in them. + +A future will release of Synapse will make using ``structured: true`` an error. Upgrading to v1.22.0 ==================== diff --git a/docs/structured_logging.md b/docs/structured_logging.md index 3f1d6a3fd657..13c800fa515f 100644 --- a/docs/structured_logging.md +++ b/docs/structured_logging.md @@ -74,3 +74,33 @@ loggers: The above logging config will set Synapse as 'INFO' logging level by default, with the SQL layer at 'WARNING', and will log JSON formatted messages to a remote endpoint at 10.1.2.3:9999. + +## Upgrading from legacy structured logging configuration + +Versions of Synapse prior to v1.23.0 included a custom structured logging +configuration which is deprecated. It used a `structured: true` flag and +configured `drains` instead of ``handlers`` and `formatters`. + +Synapse currently automatically converts the old configuration to the new +configuration, but this will be removed in a future version of Synapse. The +following reference can be used to update your configuration. Based on the drain +`type`, we can pick a new handler: + +1. For a type of `console`, `console_json`, or `console_json_terse`: a handler + with a class of `logging.StreamHandler` and a `stream` of `ext://sys.stdout` + or `ext://sys.stderr` should be used. +2. For a type of `file` or `file_json`: a handler of `logging.FileHandler` with + a location of the file path should be used. +3. For a type of `network_json_terse`: a handler of `synapse.logging.RemoteHandler` + with the host and port should be used. + +Then based on the drain `type` we can pick a new formatter: + +1. For a type of `console` or `file` no formatter is necessary. +2. For a type of `console_json`, `console_json_terse`, `file_json`, or + `network_json_terse`: a formatter of `synapse.logging.TerseJsonFormatter` + should be used. + +You might notice that there are now configurations that were not possible before, +namely there was no plain `network` type. It is also no longer possible to output +JSON without the timestamp (`json` is equivalent to `json_terse`). diff --git a/synapse/config/logger.py b/synapse/config/logger.py index b73bdb8a3a90..268e288abec1 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -31,6 +31,7 @@ import synapse from synapse.app import _base as appbase +from synapse.logging._structured import setup_structured_logging from synapse.logging.context import LoggingContextFilter from synapse.logging.filter import MetadataFilter from synapse.util.versionstring import get_version_string @@ -269,14 +270,10 @@ def _load_logging_config(log_config_path: str) -> None: if not log_config: logging.warning("Loaded a blank logging config?") - # If the old structured logging configuration is being used, raise an - # error. + # If the old structured logging configuration is being used, convert it to + # the new style configuration. if "structured" in log_config and log_config.get("structured"): - raise ConfigError( - "The `structured` parameter is no longer supported, see the documentation " - "for enabling structured logging: " - "https://github.com/matrix-org/synapse/blob/master/docs/structured_logging.md" - ) + log_config = setup_structured_logging(log_config) logging.config.dictConfig(log_config) diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py new file mode 100644 index 000000000000..b32b8f261c22 --- /dev/null +++ b/synapse/logging/_structured.py @@ -0,0 +1,154 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os.path +import typing + +from constantly import NamedConstant, Names + +from synapse.config._base import ConfigError + + +class DrainType(Names): + CONSOLE = NamedConstant() + CONSOLE_JSON = NamedConstant() + CONSOLE_JSON_TERSE = NamedConstant() + FILE = NamedConstant() + FILE_JSON = NamedConstant() + NETWORK_JSON_TERSE = NamedConstant() + + +DEFAULT_LOGGERS = {"synapse": {"level": "info"}} + + +def parse_drain_configs(drains: dict,) -> typing.Generator[dict, None, None]: + """ + Parse the drain configurations. + + Args: + drains (dict): A list of drain configurations. + + Yields: + dict instances representing a logging handler. + + Raises: + ConfigError: If any of the drain configuration items are invalid. + """ + + for name, config in drains.items(): + if "type" not in config: + raise ConfigError("Logging drains require a 'type' key.") + + try: + logging_type = DrainType.lookupByName(config["type"].upper()) + except ValueError: + raise ConfigError( + "%s is not a known logging drain type." % (config["type"],) + ) + + # Either use the default formatter or the tersejson one. + if logging_type in ( + DrainType.CONSOLE_JSON, + DrainType.CONSOLE_JSON_TERSE, + DrainType.FILE_JSON, + DrainType.NETWORK_JSON_TERSE, + ): + formatter = "tersejson" + else: + # A formatter of None implies using the default formatter. + formatter = None + + if logging_type in [ + DrainType.CONSOLE, + DrainType.CONSOLE_JSON, + DrainType.CONSOLE_JSON_TERSE, + ]: + location = config.get("location") + if location is None or location not in ["stdout", "stderr"]: + raise ConfigError( + ( + "The %s drain needs the 'location' key set to " + "either 'stdout' or 'stderr'." + ) + % (logging_type,) + ) + + yield name, { + "class": "logging.StreamHandler", + "formatter": formatter, + "stream": "ext://sys." + location, + } + + elif logging_type in [DrainType.FILE, DrainType.FILE_JSON]: + if "location" not in config: + raise ConfigError( + "The %s drain needs the 'location' key set." % (logging_type,) + ) + + location = config.get("location") + if os.path.abspath(location) != location: + raise ConfigError( + "File paths need to be absolute, '%s' is a relative path" + % (location,) + ) + + yield name, { + "class": "logging.FileHandler", + "formatter": formatter, + "filename": location, + } + + elif logging_type in [DrainType.NETWORK_JSON_TERSE]: + host = config.get("host") + port = config.get("port") + maximum_buffer = config.get("maximum_buffer", 1000) + + yield name, { + "class": "synapse.logging.RemoteHandler", + "formatter": formatter, + "host": host, + "port": port, + "maximum_buffer": maximum_buffer, + } + + else: + raise ConfigError( + "The %s drain type is currently not implemented." + % (config["type"].upper(),) + ) + + +def setup_structured_logging(log_config: dict,) -> dict: + """ + Convert a legacy structured logging configuration (from Synapse < v1.23.0) + to one compatible with the new standard library handlers. + """ + if "drains" not in log_config: + raise ConfigError("The logging configuration requires a list of drains.") + + new_config = { + "version": 1, + "formatters": {"tersejson": {"class": "synapse.logging.TerseJsonFormatter"}}, + "handlers": {}, + "loggers": log_config.get("loggers", DEFAULT_LOGGERS), + "root": {"handlers": []}, + } + + for handler_name, handler in parse_drain_configs(log_config["drains"]): + new_config["handlers"][handler_name] = handler + + # Add each handler to the root logger. + new_config["root"]["handlers"].append(handler_name) + + return new_config From f801d710abdbab6ce701e0bd4839698b49956cdd Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 28 Oct 2020 15:00:25 -0400 Subject: [PATCH 22/23] Add back a JSON formatter without time. --- UPGRADE.rst | 2 - docs/structured_logging.md | 67 +++++++++++++++++++++++++++++--- synapse/logging/__init__.py | 4 +- synapse/logging/_structured.py | 11 ++++-- synapse/logging/_terse_json.py | 18 ++++++++- tests/logging/test_terse_json.py | 32 ++++++++++++++- 6 files changed, 116 insertions(+), 18 deletions(-) diff --git a/UPGRADE.rst b/UPGRADE.rst index b187d0d24ef2..960c2aeb2bdc 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -89,8 +89,6 @@ then it should be modified based on the `structured logging documentation The ``structured`` and ``drains`` logging options are now deprecated and should be replaced by standard logging configuration of ``handlers`` and ``formatters`. -Note that updated configurations *must* include ``version: 1`` in them. - A future will release of Synapse will make using ``structured: true`` an error. Upgrading to v1.22.0 diff --git a/docs/structured_logging.md b/docs/structured_logging.md index 13c800fa515f..b1281667e02b 100644 --- a/docs/structured_logging.md +++ b/docs/structured_logging.md @@ -10,6 +10,10 @@ Synapse's structured logging system is configured via the file that Synapse's uses the `synapse.logging.TerseJsonFormatter` class included with Synapse and a handler which uses the above formatter. +There is also a `synapse.logging.JsonFormatter` option which does not include +a timestamp in the resulting JSON. This is useful if the log ingester adds its +own timestamp. + A structured logging configuration looks similar to the following: ```yaml @@ -97,10 +101,61 @@ following reference can be used to update your configuration. Based on the drain Then based on the drain `type` we can pick a new formatter: 1. For a type of `console` or `file` no formatter is necessary. -2. For a type of `console_json`, `console_json_terse`, `file_json`, or - `network_json_terse`: a formatter of `synapse.logging.TerseJsonFormatter` - should be used. +2. For a type of `console_json` or `file_json`: a formatter of + `synapse.logging.JsonFormatter` should be used. +3. For a type of `console_json_terse` or `network_json_terse`: a formatter of + `synapse.logging.TerseJsonFormatter` should be used. + +For each new handler and formatter they should be added to the logging configuration +and then assigned to either a logger or the root logger. + +An example legacy configuration: + +```yaml +structured: true + +loggers: + synapse: + level: INFO + synapse.storage.SQL: + level: WARNING + +drains: + console: + type: console + location: stdout + file: + type: file_json + location: homeserver.log +``` + +Would be converted into a new configuration: + +```yaml +version: 1 + +formatters: + json: + class: synapse.logging.JsonFormatter + +handlers: + console: + class: logging.StreamHandler + location: ext://sys.stdout + file: + class: logging.FileHandler + formatter: json + filename: homeserver.log + +loggers: + synapse: + level: INFO + handlers: [console, file] + synapse.storage.SQL: + level: WARNING +``` -You might notice that there are now configurations that were not possible before, -namely there was no plain `network` type. It is also no longer possible to output -JSON without the timestamp (`json` is equivalent to `json_terse`). +The new logging configuration is a bit more verbose, but significantly more +flexible. It allows for configuration that were not previously possible, such as +sending plain logs over the network, or using different handlers for different +modules. diff --git a/synapse/logging/__init__.py b/synapse/logging/__init__.py index 6a6847189916..b28b7b2ef761 100644 --- a/synapse/logging/__init__.py +++ b/synapse/logging/__init__.py @@ -15,6 +15,6 @@ # These are imported to allow for nicer logging configuration files. from synapse.logging._remote import RemoteHandler -from synapse.logging._terse_json import TerseJsonFormatter +from synapse.logging._terse_json import JsonFormatter, TerseJsonFormatter -__all__ = ["RemoteHandler", "TerseJsonFormatter"] +__all__ = ["RemoteHandler", "JsonFormatter", "TerseJsonFormatter"] diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index b32b8f261c22..fd683e722d3c 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -58,10 +58,10 @@ def parse_drain_configs(drains: dict,) -> typing.Generator[dict, None, None]: ) # Either use the default formatter or the tersejson one. - if logging_type in ( - DrainType.CONSOLE_JSON, + if logging_type in (DrainType.CONSOLE_JSON, DrainType.FILE_JSON,): + formatter = "json" + elif logging_type in ( DrainType.CONSOLE_JSON_TERSE, - DrainType.FILE_JSON, DrainType.NETWORK_JSON_TERSE, ): formatter = "tersejson" @@ -139,7 +139,10 @@ def setup_structured_logging(log_config: dict,) -> dict: new_config = { "version": 1, - "formatters": {"tersejson": {"class": "synapse.logging.TerseJsonFormatter"}}, + "formatters": { + "json": {"class": "synapse.logging.JsonFormatter"}, + "tersejson": {"class": "synapse.logging.TerseJsonFormatter"}, + }, "handlers": {}, "loggers": log_config.get("loggers", DEFAULT_LOGGERS), "root": {"handlers": []}, diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py index 3d257b49edf6..2fbf5549a1fb 100644 --- a/synapse/logging/_terse_json.py +++ b/synapse/logging/_terse_json.py @@ -49,18 +49,32 @@ } -class TerseJsonFormatter(logging.Formatter): +class JsonFormatter(logging.Formatter): def format(self, record: logging.LogRecord) -> str: event = { "log": record.getMessage(), "namespace": record.name, "level": record.levelname, - "time": round(record.created, 2), } + return self._format(record, event) + + def _format(self, record: logging.LogRecord, event: dict) -> str: # Add any extra attributes to the event. for key, value in record.__dict__.items(): if key not in _LOG_RECORD_ATTRIBUTES: event[key] = value return _encoder.encode(event) + + +class TerseJsonFormatter(JsonFormatter): + def format(self, record: logging.LogRecord) -> str: + event = { + "log": record.getMessage(), + "namespace": record.name, + "level": record.levelname, + "time": round(record.created, 2), + } + + return self._format(record, event) diff --git a/tests/logging/test_terse_json.py b/tests/logging/test_terse_json.py index 6121c07ae599..73f469b802e3 100644 --- a/tests/logging/test_terse_json.py +++ b/tests/logging/test_terse_json.py @@ -17,14 +17,14 @@ import logging from io import StringIO -from synapse.logging._terse_json import TerseJsonFormatter +from synapse.logging._terse_json import JsonFormatter, TerseJsonFormatter from tests.logging import LoggerCleanupMixin from tests.unittest import TestCase class TerseJsonTestCase(LoggerCleanupMixin, TestCase): - def test_log_output(self): + def test_terse_json_output(self): """ The Terse JSON formatter converts log messages to JSON. """ @@ -91,3 +91,31 @@ def test_extra_data(self): self.assertEqual(log["foo"], "bar") self.assertEqual(log["int"], 3) self.assertIs(log["bool"], True) + + def test_json_output(self): + """ + The Terse JSON formatter converts log messages to JSON. + """ + output = StringIO() + + handler = logging.StreamHandler(output) + handler.setFormatter(JsonFormatter()) + logger = self.get_logger(handler) + + logger.info("Hello there, %s!", "wally") + + # One log message, with a single trailing newline. + data = output.getvalue() + logs = data.splitlines() + self.assertEqual(len(logs), 1) + self.assertEqual(data.count("\n"), 1) + log = json.loads(logs[0]) + + # The terse logger should give us these keys. + expected_log_keys = [ + "log", + "level", + "namespace", + ] + self.assertCountEqual(log.keys(), expected_log_keys) + self.assertEqual(log["log"], "Hello there, wally!") From 1c0181a04ac894bce48cdb99c30052a4a4e154df Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 28 Oct 2020 15:05:53 -0400 Subject: [PATCH 23/23] Fix type hints. --- synapse/logging/_structured.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index fd683e722d3c..14d9c104c2e0 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import os.path -import typing +from typing import Any, Dict, Generator, Optional, Tuple from constantly import NamedConstant, Names @@ -32,7 +32,9 @@ class DrainType(Names): DEFAULT_LOGGERS = {"synapse": {"level": "info"}} -def parse_drain_configs(drains: dict,) -> typing.Generator[dict, None, None]: +def parse_drain_configs( + drains: dict, +) -> Generator[Tuple[str, Dict[str, Any]], None, None]: """ Parse the drain configurations. @@ -59,7 +61,7 @@ def parse_drain_configs(drains: dict,) -> typing.Generator[dict, None, None]: # Either use the default formatter or the tersejson one. if logging_type in (DrainType.CONSOLE_JSON, DrainType.FILE_JSON,): - formatter = "json" + formatter = "json" # type: Optional[str] elif logging_type in ( DrainType.CONSOLE_JSON_TERSE, DrainType.NETWORK_JSON_TERSE,