From adeb59c6a75a442a401ceebb164fefd6cc4381d9 Mon Sep 17 00:00:00 2001 From: Jason Little Date: Thu, 13 Apr 2023 05:37:35 -0500 Subject: [PATCH 01/11] Switch InstanceLocationConfig to a pydantic BaseModel, apply Strict* types and add a few helper methods(that will make more sense in follow up work). --- synapse/config/workers.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/synapse/config/workers.py b/synapse/config/workers.py index 1dfbe27e89a2..d39dc1d5aaa1 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -18,6 +18,7 @@ from typing import Any, Dict, List, Union import attr +from pydantic import BaseModel, StrictBool, StrictInt, StrictStr, parse_obj_as from synapse.config._base import ( Config, @@ -50,13 +51,20 @@ def _instance_to_list_converter(obj: Union[str, List[str]]) -> List[str]: return obj -@attr.s(auto_attribs=True) -class InstanceLocationConfig: +class InstanceLocationConfig(BaseModel): """The host and port to talk to an instance via HTTP replication.""" - host: str - port: int - tls: bool = False + host: StrictStr + port: StrictInt + tls: StrictBool = False + + def scheme(self) -> str: + """Hardcode a retrievable scheme based on self.tls""" + return "https" if self.tls else "http" + + def netloc(self) -> str: + """Nicely format the network location data""" + return f"{self.host}:{self.port}" @attr.s @@ -183,10 +191,10 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: ) # A map from instance name to host/port of their HTTP replication endpoint. - instance_map = config.get("instance_map") or {} - self.instance_map = { - name: InstanceLocationConfig(**c) for name, c in instance_map.items() - } + # instance_map = config.get("instance_map") or {} + self.instance_map = parse_obj_as( + Dict[str, InstanceLocationConfig], config.get("instance_map") or {} + ) # Map from type of streams to source, c.f. WriterLocations. writers = config.get("stream_writers") or {} From 6d05b7e0b95b76100af823ea1ab3645d4d832fe4 Mon Sep 17 00:00:00 2001 From: Jason Little Date: Thu, 13 Apr 2023 06:55:01 -0500 Subject: [PATCH 02/11] Changelog --- changelog.d/15431.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/15431.misc diff --git a/changelog.d/15431.misc b/changelog.d/15431.misc new file mode 100644 index 000000000000..4492406b49fb --- /dev/null +++ b/changelog.d/15431.misc @@ -0,0 +1 @@ +Add some validation to `instance_map` configuration loading. From cae783f456287405c9a3a293ac85775092b5a164 Mon Sep 17 00:00:00 2001 From: Jason Little Date: Fri, 14 Apr 2023 14:46:05 -0500 Subject: [PATCH 03/11] Apply to fixes from review --- synapse/config/workers.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/synapse/config/workers.py b/synapse/config/workers.py index d39dc1d5aaa1..73e68053b434 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -18,7 +18,7 @@ from typing import Any, Dict, List, Union import attr -from pydantic import BaseModel, StrictBool, StrictInt, StrictStr, parse_obj_as +from pydantic import BaseModel, StrictBool, StrictInt, StrictStr, parse_obj_as, Extra from synapse.config._base import ( Config, @@ -51,7 +51,13 @@ def _instance_to_list_converter(obj: Union[str, List[str]]) -> List[str]: return obj -class InstanceLocationConfig(BaseModel): +class InstanceLocationConfigModel(BaseModel): + class Config: + extra = Extra.ignore + allow_mutation = False + + +class InstanceLocationConfig(InstanceLocationConfigModel): """The host and port to talk to an instance via HTTP replication.""" host: StrictStr @@ -191,9 +197,8 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: ) # A map from instance name to host/port of their HTTP replication endpoint. - # instance_map = config.get("instance_map") or {} self.instance_map = parse_obj_as( - Dict[str, InstanceLocationConfig], config.get("instance_map") or {} + Dict[str, InstanceLocationConfig], config.get("instance_map", {}) ) # Map from type of streams to source, c.f. WriterLocations. From 60d3cfd0699ba4d53ef9648c6843de98b4a405fe Mon Sep 17 00:00:00 2001 From: Jason Little Date: Sun, 16 Apr 2023 16:36:31 -0500 Subject: [PATCH 04/11] Add docstring/comments to new ConfigModel. --- synapse/config/workers.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/synapse/config/workers.py b/synapse/config/workers.py index 73e68053b434..68a03e00a426 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -52,8 +52,24 @@ def _instance_to_list_converter(obj: Union[str, List[str]]) -> List[str]: class InstanceLocationConfigModel(BaseModel): + """A custom version of Pydantic's BaseModel which + + - ignores unknown fields and + - does not allow fields to be overwritten after construction, + + but otherwise uses Pydantic's default behaviour. + + Ignoring unknown fields is a useful default. It means that clients can provide + unstable field not known to the server without the request being refused outright. + + Subclassing in this way is recommended by + https://pydantic-docs.helpmanual.io/usage/model_config/#change-behaviour-globally + """ + class Config: + # By default, ignore fields that we don't recognise. extra = Extra.ignore + # By default, don't allow fields to be reassigned after parsing. allow_mutation = False From 09657ff56c0f095b5a54f386cc04a24e22902537 Mon Sep 17 00:00:00 2001 From: Jason Little Date: Sun, 16 Apr 2023 17:07:40 -0500 Subject: [PATCH 05/11] Try and abstract validation logic into wrapper function for better error handling --- synapse/config/_util.py | 30 +++++++++++++++++++++++++++++- synapse/config/workers.py | 10 +++++++--- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/synapse/config/_util.py b/synapse/config/_util.py index d3a4b484abb8..ff349a8edcc3 100644 --- a/synapse/config/_util.py +++ b/synapse/config/_util.py @@ -11,9 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Iterable +from typing import Any, Dict, Iterable, TypeAlias, TypeVar import jsonschema +from pydantic import BaseModel, ValidationError, parse_obj_as from synapse.config._base import ConfigError from synapse.types import JsonDict @@ -64,3 +65,30 @@ def json_error_to_config_error( else: path.append(str(p)) return ConfigError(e.message, path) + + +Model = TypeVar("Model", bound=BaseModel) + + +def validate_instance_map_config( + config: Any, + model_type: TypeAlias, +) -> Dict[str, Model]: + """Validate Dict type data in the style of Dict[str, ExampleBaseModel] and check it + for realness. Acts as a wrapper for parse_obj_as() from pydantic that changes any + ValidationError to ConfigError. + + Args: + config: The configuration data to check + model_type: The BaseModel to validate and parse against. + Returns: Fully validated and parsed Dict[str, ExampleBaseModel] + """ + try: + instances = parse_obj_as(Dict[str, model_type], config) + except ValidationError as e: + raise validation_error_to_config_error(e) + return instances + + +def validation_error_to_config_error(e: ValidationError) -> ConfigError: + return ConfigError(str(e)) diff --git a/synapse/config/workers.py b/synapse/config/workers.py index 68a03e00a426..1cc2ca639f57 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -18,7 +18,7 @@ from typing import Any, Dict, List, Union import attr -from pydantic import BaseModel, StrictBool, StrictInt, StrictStr, parse_obj_as, Extra +from pydantic import BaseModel, Extra, StrictBool, StrictInt, StrictStr from synapse.config._base import ( Config, @@ -26,6 +26,7 @@ RoutableShardedWorkerHandlingConfig, ShardedWorkerHandlingConfig, ) +from synapse.config._util import validate_instance_map_config from synapse.config.server import ( DIRECT_TCP_ERROR, TCPListenerConfig, @@ -213,8 +214,11 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: ) # A map from instance name to host/port of their HTTP replication endpoint. - self.instance_map = parse_obj_as( - Dict[str, InstanceLocationConfig], config.get("instance_map", {}) + self.instance_map: Dict[ + str, InstanceLocationConfig + ] = validate_instance_map_config( + config.get("instance_map", {}), + InstanceLocationConfig, ) # Map from type of streams to source, c.f. WriterLocations. From 96f71541f2233cb0a1a7d8643cebfd30ebeb3bbd Mon Sep 17 00:00:00 2001 From: Jason Little Date: Sun, 16 Apr 2023 17:10:07 -0500 Subject: [PATCH 06/11] Change to feature. --- changelog.d/{15431.misc => 15431.feature} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename changelog.d/{15431.misc => 15431.feature} (100%) diff --git a/changelog.d/15431.misc b/changelog.d/15431.feature similarity index 100% rename from changelog.d/15431.misc rename to changelog.d/15431.feature From dd5a1a138beea86cf693c75b61f03a97f1f04912 Mon Sep 17 00:00:00 2001 From: Jason Little Date: Sun, 16 Apr 2023 18:54:49 -0500 Subject: [PATCH 07/11] Fix import to be compatible with older python versions(I hope) --- synapse/config/_util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/config/_util.py b/synapse/config/_util.py index ff349a8edcc3..5d6193c73812 100644 --- a/synapse/config/_util.py +++ b/synapse/config/_util.py @@ -11,10 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, Iterable, TypeAlias, TypeVar +from typing import Any, Dict, Iterable, TypeVar import jsonschema from pydantic import BaseModel, ValidationError, parse_obj_as +from typing_extensions import TypeAlias from synapse.config._base import ConfigError from synapse.types import JsonDict From 6a7903ce020a9c283985eba2642f3db6a3fa39d2 Mon Sep 17 00:00:00 2001 From: Jason Little Date: Mon, 17 Apr 2023 02:07:45 -0500 Subject: [PATCH 08/11] Apply suggestions from code review Co-authored-by: David Robertson --- synapse/config/_util.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/synapse/config/_util.py b/synapse/config/_util.py index 5d6193c73812..2897d03ed089 100644 --- a/synapse/config/_util.py +++ b/synapse/config/_util.py @@ -75,21 +75,17 @@ def validate_instance_map_config( config: Any, model_type: TypeAlias, ) -> Dict[str, Model]: - """Validate Dict type data in the style of Dict[str, ExampleBaseModel] and check it - for realness. Acts as a wrapper for parse_obj_as() from pydantic that changes any - ValidationError to ConfigError. - + """Parse `config` as a mapping from strings to a given `Model` type. Args: config: The configuration data to check model_type: The BaseModel to validate and parse against. - Returns: Fully validated and parsed Dict[str, ExampleBaseModel] + Returns: + Fully validated and parsed Dict[str, Model]. + Raises: + ConfigError, if given improper input. """ try: instances = parse_obj_as(Dict[str, model_type], config) except ValidationError as e: - raise validation_error_to_config_error(e) + raise ConfigError(str(e)) from e return instances - - -def validation_error_to_config_error(e: ValidationError) -> ConfigError: - return ConfigError(str(e)) From 28f03ce806cfd0662be754af3be94f4c78238060 Mon Sep 17 00:00:00 2001 From: Jason Little Date: Mon, 17 Apr 2023 03:01:40 -0500 Subject: [PATCH 09/11] Change `InstanceLocationConfigModel` to just `ConfigModel`. --- synapse/config/workers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/config/workers.py b/synapse/config/workers.py index 1cc2ca639f57..c44f8ad39bb9 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -52,7 +52,7 @@ def _instance_to_list_converter(obj: Union[str, List[str]]) -> List[str]: return obj -class InstanceLocationConfigModel(BaseModel): +class ConfigModel(BaseModel): """A custom version of Pydantic's BaseModel which - ignores unknown fields and @@ -74,7 +74,7 @@ class Config: allow_mutation = False -class InstanceLocationConfig(InstanceLocationConfigModel): +class InstanceLocationConfig(ConfigModel): """The host and port to talk to an instance via HTTP replication.""" host: StrictStr From cfe048ecd677bcfb04edc5cb7020c8f9e9e9af57 Mon Sep 17 00:00:00 2001 From: Jason Little Date: Mon, 17 Apr 2023 06:12:43 -0500 Subject: [PATCH 10/11] Rename parsing function to be clearer, and add type-ignore so mypy is happier. --- synapse/config/_util.py | 11 ++++++----- synapse/config/workers.py | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/synapse/config/_util.py b/synapse/config/_util.py index 2897d03ed089..dfc5d1221063 100644 --- a/synapse/config/_util.py +++ b/synapse/config/_util.py @@ -11,11 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, Iterable, TypeVar +from typing import Any, Dict, Iterable, Type, TypeVar import jsonschema from pydantic import BaseModel, ValidationError, parse_obj_as -from typing_extensions import TypeAlias from synapse.config._base import ConfigError from synapse.types import JsonDict @@ -71,9 +70,9 @@ def json_error_to_config_error( Model = TypeVar("Model", bound=BaseModel) -def validate_instance_map_config( +def parse_and_validate_mapping( config: Any, - model_type: TypeAlias, + model_type: Type[Model], ) -> Dict[str, Model]: """Parse `config` as a mapping from strings to a given `Model` type. Args: @@ -85,7 +84,9 @@ def validate_instance_map_config( ConfigError, if given improper input. """ try: - instances = parse_obj_as(Dict[str, model_type], config) + # type-ignore: mypy doesn't like constructing `Dict[str, model_type]` because + # `model_type` is a runtime variable. Pydantic is fine with this. + instances = parse_obj_as(Dict[str, model_type], config) # type: ignore[valid-type] except ValidationError as e: raise ConfigError(str(e)) from e return instances diff --git a/synapse/config/workers.py b/synapse/config/workers.py index c44f8ad39bb9..27f393c9a38a 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -26,7 +26,7 @@ RoutableShardedWorkerHandlingConfig, ShardedWorkerHandlingConfig, ) -from synapse.config._util import validate_instance_map_config +from synapse.config._util import parse_and_validate_mapping from synapse.config.server import ( DIRECT_TCP_ERROR, TCPListenerConfig, @@ -216,7 +216,7 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: # A map from instance name to host/port of their HTTP replication endpoint. self.instance_map: Dict[ str, InstanceLocationConfig - ] = validate_instance_map_config( + ] = parse_and_validate_mapping( config.get("instance_map", {}), InstanceLocationConfig, ) From ee620a07348f1fae124548614065eadc7f0a914b Mon Sep 17 00:00:00 2001 From: Jason Little Date: Mon, 17 Apr 2023 16:02:41 -0500 Subject: [PATCH 11/11] Update comment for ConfigModel. --- synapse/config/workers.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/config/workers.py b/synapse/config/workers.py index 27f393c9a38a..95b4047f1d3f 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -60,8 +60,9 @@ class ConfigModel(BaseModel): but otherwise uses Pydantic's default behaviour. - Ignoring unknown fields is a useful default. It means that clients can provide - unstable field not known to the server without the request being refused outright. + For now, ignore unknown fields. In the future, we could change this so that unknown + config values cause a ValidationError, provided the error messages are meaningful to + server operators. Subclassing in this way is recommended by https://pydantic-docs.helpmanual.io/usage/model_config/#change-behaviour-globally