Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Implement MSC4009 to widen the allowed Matrix ID grammar #15536

Merged
merged 2 commits into from
May 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/15536.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Implement [MSC4009](https://github.com/matrix-org/matrix-spec-proposals/pull/4009) to expand the supported characters in Matrix IDs.
3 changes: 3 additions & 0 deletions synapse/config/experimental.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,3 +199,6 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:

# MSC3970: Scope transaction IDs to devices
self.msc3970_enabled = experimental.get("msc3970_enabled", False)

# MSC4009: E.164 Matrix IDs
self.msc4009_e164_mxids = experimental.get("msc4009_e164_mxids", False)
27 changes: 14 additions & 13 deletions synapse/handlers/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
ReplicationRegisterServlet,
)
from synapse.spam_checker_api import RegistrationBehaviour
from synapse.types import RoomAlias, UserID, create_requester
from synapse.types import GUEST_USER_ID_PATTERN, RoomAlias, UserID, create_requester
from synapse.types.state import StateFilter

if TYPE_CHECKING:
Expand Down Expand Up @@ -143,10 +143,15 @@ async def check_username(
assigned_user_id: Optional[str] = None,
inhibit_user_in_use_error: bool = False,
) -> None:
clokep marked this conversation as resolved.
Show resolved Hide resolved
if types.contains_invalid_mxid_characters(localpart):
if types.contains_invalid_mxid_characters(
localpart, self.hs.config.experimental.msc4009_e164_mxids
):
extra_chars = (
"=_-./+" if self.hs.config.experimental.msc4009_e164_mxids else "=_-./"
)
raise SynapseError(
400,
"User ID can only contain characters a-z, 0-9, or '=_-./'",
f"User ID can only contain characters a-z, 0-9, or '{extra_chars}'",
Codes.INVALID_USERNAME,
)

Expand Down Expand Up @@ -195,16 +200,12 @@ async def check_username(
errcode=Codes.FORBIDDEN,
)

if guest_access_token is None:
try:
int(localpart)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hah, can't believe this is how we defined the grammar for guest localparts. Goodness help us if Python allowed things like 0x42 in int()!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did try int("+3") in a Python shell after seeing that 👼

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hah, can't believe this is how we defined the grammar for guest localparts. Goodness help us if Python allowed things like 0x42 in int()!

It did not, but it did allow things like 042. The new behavior should match that.

It did also allow +42 and -42 those will no longer be allowed.

raise SynapseError(
400,
"Numeric user IDs are reserved for guest users.",
errcode=Codes.INVALID_USERNAME,
)
except ValueError:
pass
if guest_access_token is None and GUEST_USER_ID_PATTERN.fullmatch(localpart):
raise SynapseError(
400,
"Numeric user IDs are reserved for guest users.",
errcode=Codes.INVALID_USERNAME,
)

async def register_user(
self,
Expand Down
6 changes: 4 additions & 2 deletions synapse/handlers/sso.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,8 @@ def __init__(self, hs: "HomeServer"):

self._consent_at_registration = hs.config.consent.user_consent_at_registration

self._e164_mxids = hs.config.experimental.msc4009_e164_mxids

def register_identity_provider(self, p: SsoIdentityProvider) -> None:
p_id = p.idp_id
assert p_id not in self._identity_providers
Expand Down Expand Up @@ -710,7 +712,7 @@ async def _register_mapped_user(
# Since the localpart is provided via a potentially untrusted module,
# ensure the MXID is valid before registering.
if not attributes.localpart or contains_invalid_mxid_characters(
attributes.localpart
attributes.localpart, self._e164_mxids
):
Comment on lines 714 to 716
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The SSO mapping code (done via the map_username_to_mxid_localpart) doesn't really support this though, so maybe we should hard-code the calls in this file to False?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that we don't need to worry about changing a currently registered user's mapping since we find those users via their remote ID.

Changing the mapping could, however, cause conflicts where there didn't used to be one.

raise MappingException("localpart is invalid: %s" % (attributes.localpart,))

Expand Down Expand Up @@ -943,7 +945,7 @@ async def check_username_availability(
localpart,
)

if contains_invalid_mxid_characters(localpart):
if contains_invalid_mxid_characters(localpart, self._e164_mxids):
raise SynapseError(400, "localpart is invalid: %s" % (localpart,))
user_id = UserID(localpart, self._server_name).to_string()
user_infos = await self._store.get_users_by_id_case_insensitive(user_id)
Expand Down
21 changes: 19 additions & 2 deletions synapse/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,18 +335,35 @@ class EventID(DomainSpecificString):
mxid_localpart_allowed_characters = set(
"_-./=" + string.ascii_lowercase + string.digits
)
# MSC4007 adds the + to the allowed characters.
#
# TODO If this was accepted, update the SSO code to support this, see the callers
# of map_username_to_mxid_localpart.
extended_mxid_localpart_allowed_characters = mxid_localpart_allowed_characters | {"+"}

# Guest user IDs are purely numeric.
GUEST_USER_ID_PATTERN = re.compile(r"^\d+$")


def contains_invalid_mxid_characters(localpart: str) -> bool:
def contains_invalid_mxid_characters(
localpart: str, use_extended_character_set: bool
) -> bool:
"""Check for characters not allowed in an mxid or groupid localpart

Args:
localpart: the localpart to be checked
use_extended_character_set: True to use the extended allowed characters
from MSC4009.

Returns:
True if there are any naughty characters
"""
return any(c not in mxid_localpart_allowed_characters for c in localpart)
allowed_characters = (
extended_mxid_localpart_allowed_characters
if use_extended_character_set
else mxid_localpart_allowed_characters
)
return any(c not in allowed_characters for c in localpart)


UPPER_CASE_PATTERN = re.compile(b"[A-Z_]")
Expand Down
13 changes: 13 additions & 0 deletions tests/handlers/test_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,19 @@ def test_register_not_support_user(self) -> None:
d = self.store.is_support_user(user_id)
self.assertFalse(self.get_success(d))

def test_invalid_user_id(self) -> None:
invalid_user_id = "+abcd"
self.get_failure(
self.handler.register_user(localpart=invalid_user_id), SynapseError
)

@override_config({"experimental_features": {"msc4009_e164_mxids": True}})
def text_extended_user_ids(self) -> None:
"""+ should be allowed according to MSC4009."""
valid_user_id = "+1234"
user_id = self.get_success(self.handler.register_user(localpart=valid_user_id))
self.assertEqual(user_id, valid_user_id)

def test_invalid_user_id_length(self) -> None:
invalid_user_id = "x" * 256
self.get_failure(
Expand Down