Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Merge pull request #6301 from matrix-org/babolivier/msc2326
Browse files Browse the repository at this point in the history
Implement MSC2326 (label based filtering)
  • Loading branch information
babolivier authored Nov 1, 2019
2 parents c6516ad + 988d8d6 commit f496d25
Show file tree
Hide file tree
Showing 11 changed files with 396 additions and 9 deletions.
1 change: 1 addition & 0 deletions changelog.d/6301.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Implement label-based filtering on `/sync` and `/messages` ([MSC2326](https://github.com/matrix-org/matrix-doc/pull/2326)).
7 changes: 7 additions & 0 deletions synapse/api/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,3 +138,10 @@ class LimitBlockingTypes(object):

MONTHLY_ACTIVE_USER = "monthly_active_user"
HS_DISABLED = "hs_disabled"


class EventContentFields(object):
"""Fields found in events' content, regardless of type."""

# Labels for the event, cf https://github.com/matrix-org/matrix-doc/pull/2326
LABELS = "org.matrix.labels"
15 changes: 13 additions & 2 deletions synapse/api/filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from twisted.internet import defer

from synapse.api.constants import EventContentFields
from synapse.api.errors import SynapseError
from synapse.storage.presence import UserPresenceState
from synapse.types import RoomID, UserID
Expand Down Expand Up @@ -66,6 +67,10 @@
"contains_url": {"type": "boolean"},
"lazy_load_members": {"type": "boolean"},
"include_redundant_members": {"type": "boolean"},
# Include or exclude events with the provided labels.
# cf https://github.com/matrix-org/matrix-doc/pull/2326
"org.matrix.labels": {"type": "array", "items": {"type": "string"}},
"org.matrix.not_labels": {"type": "array", "items": {"type": "string"}},
},
}

Expand Down Expand Up @@ -259,6 +264,9 @@ def __init__(self, filter_json):

self.contains_url = self.filter_json.get("contains_url", None)

self.labels = self.filter_json.get("org.matrix.labels", None)
self.not_labels = self.filter_json.get("org.matrix.not_labels", [])

def filters_all_types(self):
return "*" in self.not_types

Expand All @@ -282,6 +290,7 @@ def check(self, event):
room_id = None
ev_type = "m.presence"
contains_url = False
labels = []
else:
sender = event.get("sender", None)
if not sender:
Expand All @@ -300,10 +309,11 @@ def check(self, event):
content = event.get("content", {})
# check if there is a string url field in the content for filtering purposes
contains_url = isinstance(content.get("url"), text_type)
labels = content.get(EventContentFields.LABELS, [])

return self.check_fields(room_id, sender, ev_type, contains_url)
return self.check_fields(room_id, sender, ev_type, labels, contains_url)

def check_fields(self, room_id, sender, event_type, contains_url):
def check_fields(self, room_id, sender, event_type, labels, contains_url):
"""Checks whether the filter matches the given event fields.
Returns:
Expand All @@ -313,6 +323,7 @@ def check_fields(self, room_id, sender, event_type, contains_url):
"rooms": lambda v: room_id == v,
"senders": lambda v: sender == v,
"types": lambda v: _matches_wildcard(event_type, v),
"labels": lambda v: v in labels,
}

for name, match_func in literal_keys.items():
Expand Down
3 changes: 3 additions & 0 deletions synapse/rest/client/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ def on_GET(self, request):
"m.require_identity_server": False,
# as per MSC2290
"m.separate_add_and_bind": True,
# Implements support for label-based filtering as described in
# MSC2326.
"org.matrix.label_based_filtering": True,
},
},
)
Expand Down
36 changes: 35 additions & 1 deletion synapse/storage/data_stores/main/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from twisted.internet import defer

import synapse.metrics
from synapse.api.constants import EventTypes
from synapse.api.constants import EventContentFields, EventTypes
from synapse.api.errors import SynapseError
from synapse.events import EventBase # noqa: F401
from synapse.events.snapshot import EventContext # noqa: F401
Expand Down Expand Up @@ -932,6 +932,13 @@ def _update_metadata_tables_txn(

self._handle_event_relations(txn, event)

# Store the labels for this event.
labels = event.content.get(EventContentFields.LABELS)
if labels:
self.insert_labels_for_event_txn(
txn, event.event_id, labels, event.room_id, event.depth
)

# Insert into the room_memberships table.
self._store_room_members_txn(
txn,
Expand Down Expand Up @@ -1917,6 +1924,33 @@ def get_all_updated_current_state_deltas_txn(txn):
get_all_updated_current_state_deltas_txn,
)

def insert_labels_for_event_txn(
self, txn, event_id, labels, room_id, topological_ordering
):
"""Store the mapping between an event's ID and its labels, with one row per
(event_id, label) tuple.
Args:
txn (LoggingTransaction): The transaction to execute.
event_id (str): The event's ID.
labels (list[str]): A list of text labels.
room_id (str): The ID of the room the event was sent to.
topological_ordering (int): The position of the event in the room's topology.
"""
return self._simple_insert_many_txn(
txn=txn,
table="event_labels",
values=[
{
"event_id": event_id,
"label": label,
"room_id": room_id,
"topological_ordering": topological_ordering,
}
for label in labels
],
)


AllNewEventsResult = namedtuple(
"AllNewEventsResult",
Expand Down
30 changes: 30 additions & 0 deletions synapse/storage/data_stores/main/schema/delta/56/event_labels.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/* Copyright 2019 The Matrix.org Foundation C.I.C.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

-- room_id and topoligical_ordering are denormalised from the events table in order to
-- make the index work.
CREATE TABLE IF NOT EXISTS event_labels (
event_id TEXT,
label TEXT,
room_id TEXT NOT NULL,
topological_ordering BIGINT NOT NULL,
PRIMARY KEY(event_id, label)
);


-- This index enables an event pagination looking for a particular label to index the
-- event_labels table first, which is much quicker than scanning the events table and then
-- filtering by label, if the label is rarely used relative to the size of the room.
CREATE INDEX event_labels_room_id_label_idx ON event_labels(room_id, label, topological_ordering);
11 changes: 10 additions & 1 deletion synapse/storage/data_stores/main/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,14 @@ def filter_to_clause(event_filter):
clauses.append("contains_url = ?")
args.append(event_filter.contains_url)

# We're only applying the "labels" filter on the database query, because applying the
# "not_labels" filter via a SQL query is non-trivial. Instead, we let
# event_filter.check_fields apply it, which is not as efficient but makes the
# implementation simpler.
if event_filter.labels:
clauses.append("(%s)" % " OR ".join("label = ?" for _ in event_filter.labels))
args.extend(event_filter.labels)

return " AND ".join(clauses), args


Expand Down Expand Up @@ -864,8 +872,9 @@ def _paginate_room_events_txn(
args.append(int(limit))

sql = (
"SELECT event_id, topological_ordering, stream_ordering"
"SELECT DISTINCT event_id, topological_ordering, stream_ordering"
" FROM events"
" LEFT JOIN event_labels USING (event_id, room_id, topological_ordering)"
" WHERE outlier = ? AND room_id = ? AND %(bounds)s"
" ORDER BY topological_ordering %(order)s,"
" stream_ordering %(order)s LIMIT ?"
Expand Down
43 changes: 43 additions & 0 deletions tests/api/test_filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from twisted.internet import defer

from synapse.api.constants import EventContentFields
from synapse.api.errors import SynapseError
from synapse.api.filtering import Filter
from synapse.events import FrozenEvent
Expand Down Expand Up @@ -95,6 +96,8 @@ def test_valid_filters(self):
"types": ["m.room.message"],
"not_rooms": ["!726s6s6q:example.com"],
"not_senders": ["@spam:example.com"],
"org.matrix.labels": ["#fun"],
"org.matrix.not_labels": ["#work"],
},
"ephemeral": {
"types": ["m.receipt", "m.typing"],
Expand Down Expand Up @@ -320,6 +323,46 @@ def test_definition_combined_event_bad_type(self):
)
self.assertFalse(Filter(definition).check(event))

def test_filter_labels(self):
definition = {"org.matrix.labels": ["#fun"]}
event = MockEvent(
sender="@foo:bar",
type="m.room.message",
room_id="!secretbase:unknown",
content={EventContentFields.LABELS: ["#fun"]},
)

self.assertTrue(Filter(definition).check(event))

event = MockEvent(
sender="@foo:bar",
type="m.room.message",
room_id="!secretbase:unknown",
content={EventContentFields.LABELS: ["#notfun"]},
)

self.assertFalse(Filter(definition).check(event))

def test_filter_not_labels(self):
definition = {"org.matrix.not_labels": ["#fun"]}
event = MockEvent(
sender="@foo:bar",
type="m.room.message",
room_id="!secretbase:unknown",
content={EventContentFields.LABELS: ["#fun"]},
)

self.assertFalse(Filter(definition).check(event))

event = MockEvent(
sender="@foo:bar",
type="m.room.message",
room_id="!secretbase:unknown",
content={EventContentFields.LABELS: ["#notfun"]},
)

self.assertTrue(Filter(definition).check(event))

@defer.inlineCallbacks
def test_filter_presence_match(self):
user_filter_json = {"presence": {"types": ["m.*"]}}
Expand Down
101 changes: 100 additions & 1 deletion tests/rest/client/v1/test_rooms.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from twisted.internet import defer

import synapse.rest.admin
from synapse.api.constants import Membership
from synapse.api.constants import EventContentFields, EventTypes, Membership
from synapse.rest.client.v1 import login, profile, room

from tests import unittest
Expand Down Expand Up @@ -811,6 +811,105 @@ def test_stream_token_is_accepted_for_fwd_pagianation(self):
self.assertTrue("chunk" in channel.json_body)
self.assertTrue("end" in channel.json_body)

def test_filter_labels(self):
"""Test that we can filter by a label."""
message_filter = json.dumps(
{"types": [EventTypes.Message], "org.matrix.labels": ["#fun"]}
)

events = self._test_filter_labels(message_filter)

self.assertEqual(len(events), 2, [event["content"] for event in events])
self.assertEqual(events[0]["content"]["body"], "with right label", events[0])
self.assertEqual(events[1]["content"]["body"], "with right label", events[1])

def test_filter_not_labels(self):
"""Test that we can filter by the absence of a label."""
message_filter = json.dumps(
{"types": [EventTypes.Message], "org.matrix.not_labels": ["#fun"]}
)

events = self._test_filter_labels(message_filter)

self.assertEqual(len(events), 3, [event["content"] for event in events])
self.assertEqual(events[0]["content"]["body"], "without label", events[0])
self.assertEqual(events[1]["content"]["body"], "with wrong label", events[1])
self.assertEqual(
events[2]["content"]["body"], "with two wrong labels", events[2]
)

def test_filter_labels_not_labels(self):
"""Test that we can filter by both a label and the absence of another label."""
sync_filter = json.dumps(
{
"types": [EventTypes.Message],
"org.matrix.labels": ["#work"],
"org.matrix.not_labels": ["#notfun"],
}
)

events = self._test_filter_labels(sync_filter)

self.assertEqual(len(events), 1, [event["content"] for event in events])
self.assertEqual(events[0]["content"]["body"], "with wrong label", events[0])

def _test_filter_labels(self, message_filter):
self.helper.send_event(
room_id=self.room_id,
type=EventTypes.Message,
content={
"msgtype": "m.text",
"body": "with right label",
EventContentFields.LABELS: ["#fun"],
},
)

self.helper.send_event(
room_id=self.room_id,
type=EventTypes.Message,
content={"msgtype": "m.text", "body": "without label"},
)

self.helper.send_event(
room_id=self.room_id,
type=EventTypes.Message,
content={
"msgtype": "m.text",
"body": "with wrong label",
EventContentFields.LABELS: ["#work"],
},
)

self.helper.send_event(
room_id=self.room_id,
type=EventTypes.Message,
content={
"msgtype": "m.text",
"body": "with two wrong labels",
EventContentFields.LABELS: ["#work", "#notfun"],
},
)

self.helper.send_event(
room_id=self.room_id,
type=EventTypes.Message,
content={
"msgtype": "m.text",
"body": "with right label",
EventContentFields.LABELS: ["#fun"],
},
)

token = "s0_0_0_0_0_0_0_0_0"
request, channel = self.make_request(
"GET",
"/rooms/%s/messages?access_token=x&from=%s&filter=%s"
% (self.room_id, token, message_filter),
)
self.render(request)

return channel.json_body["chunk"]


class RoomSearchTestCase(unittest.HomeserverTestCase):
servlets = [
Expand Down
Loading

0 comments on commit f496d25

Please sign in to comment.