From 177ca9ac208d8935d322bc271b42fe927bac621d Mon Sep 17 00:00:00 2001 From: "H.Shay" Date: Mon, 13 Sep 2021 15:50:05 -0700 Subject: [PATCH 01/25] add test to check if null code points are being inserted --- tests/test_null_byte_insertion.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 tests/test_null_byte_insertion.py diff --git a/tests/test_null_byte_insertion.py b/tests/test_null_byte_insertion.py new file mode 100644 index 000000000000..683e36b2d9bc --- /dev/null +++ b/tests/test_null_byte_insertion.py @@ -0,0 +1,29 @@ +from tests.unittest import HomeserverTestCase +import synapse.rest.admin +from synapse.rest.client import account, login, register, room + + +class NullByteInsertionTest(HomeserverTestCase): + servlets = [ + synapse.rest.admin.register_servlets_for_client_rest_resource, + login.register_servlets, + account.register_servlets, + register.register_servlets, + room.register_servlets + ] + + def setUp(self): + super().setUp() + + # Note that this test must be run with postgres or else is meaningless, + # as sqlite will accept insertion of null code points + def test_null_byte(self): + user_id = self.register_user("alice", "password") + access_token = self.login("alice", "password") + room_id = self.helper.create_room_as("alice", True, "1", access_token) + body = '{"body":"\u0000", "msgtype":"m.text"}' + + resp = self.helper.send(room_id, body, "1", access_token) + self.assertTrue("event_id" in resp) + + From 7ae5ccfddc979bfc76f84fa18a958cbc7be48157 Mon Sep 17 00:00:00 2001 From: "H.Shay" Date: Mon, 13 Sep 2021 15:51:59 -0700 Subject: [PATCH 02/25] add logic to detect and replace null code points before insertion into db --- synapse/storage/databases/main/events.py | 5 +++++ synapse/storage/databases/main/search.py | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 14ada8a8b359..a6f624864b10 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1958,6 +1958,11 @@ def store_event_search_txn(self, txn, event, key, value): key (str): value (str): """ + + # postgres throws an error if we try to insert null code points + if "\u0000" in value: + value = value.replace("\u0000", "\u0020") + self.store.store_search_entries_txn( txn, ( diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index 6480d5a9f5eb..ee05ce046667 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -179,6 +179,10 @@ def reindex_search_txn(txn): # then skip over it continue + # postgres throws an error if we try to insert null code points + if "\u0000" in value: + value = value.replace("\u0000", "\u0020") + event_search_rows.append( SearchEntry( key=key, From c86d8b516116f84e9867d1a4e394ba8742d30d47 Mon Sep 17 00:00:00 2001 From: "H.Shay" Date: Mon, 13 Sep 2021 15:58:59 -0700 Subject: [PATCH 03/25] lints --- synapse/storage/databases/main/events.py | 2 +- tests/test_null_byte_insertion.py | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index a6f624864b10..05da120fbd72 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1961,7 +1961,7 @@ def store_event_search_txn(self, txn, event, key, value): # postgres throws an error if we try to insert null code points if "\u0000" in value: - value = value.replace("\u0000", "\u0020") + value = value.replace("\u0000", "\u0020") self.store.store_search_entries_txn( txn, diff --git a/tests/test_null_byte_insertion.py b/tests/test_null_byte_insertion.py index 683e36b2d9bc..233cd4d2733c 100644 --- a/tests/test_null_byte_insertion.py +++ b/tests/test_null_byte_insertion.py @@ -1,7 +1,8 @@ -from tests.unittest import HomeserverTestCase import synapse.rest.admin from synapse.rest.client import account, login, register, room +from tests.unittest import HomeserverTestCase + class NullByteInsertionTest(HomeserverTestCase): servlets = [ @@ -9,7 +10,7 @@ class NullByteInsertionTest(HomeserverTestCase): login.register_servlets, account.register_servlets, register.register_servlets, - room.register_servlets + room.register_servlets, ] def setUp(self): @@ -18,12 +19,10 @@ def setUp(self): # Note that this test must be run with postgres or else is meaningless, # as sqlite will accept insertion of null code points def test_null_byte(self): - user_id = self.register_user("alice", "password") + self.register_user("alice", "password") access_token = self.login("alice", "password") room_id = self.helper.create_room_as("alice", True, "1", access_token) body = '{"body":"\u0000", "msgtype":"m.text"}' resp = self.helper.send(room_id, body, "1", access_token) self.assertTrue("event_id" in resp) - - From f681896c0db4bb4cfb6af28436de80d901583cb3 Mon Sep 17 00:00:00 2001 From: "H.Shay" Date: Tue, 14 Sep 2021 09:10:22 -0700 Subject: [PATCH 04/25] add license to test --- tests/test_null_byte_insertion.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/test_null_byte_insertion.py b/tests/test_null_byte_insertion.py index 233cd4d2733c..b04e6d6466ba 100644 --- a/tests/test_null_byte_insertion.py +++ b/tests/test_null_byte_insertion.py @@ -1,3 +1,18 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + import synapse.rest.admin from synapse.rest.client import account, login, register, room From 534f141ed091b07f7cf0e0563c535fe24422c185 Mon Sep 17 00:00:00 2001 From: "H.Shay" Date: Tue, 14 Sep 2021 09:12:10 -0700 Subject: [PATCH 05/25] change approach to null substitution --- synapse/storage/databases/main/events.py | 4 ---- synapse/storage/databases/main/search.py | 6 +----- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 05da120fbd72..7aefe15e2a98 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1959,10 +1959,6 @@ def store_event_search_txn(self, txn, event, key, value): value (str): """ - # postgres throws an error if we try to insert null code points - if "\u0000" in value: - value = value.replace("\u0000", "\u0020") - self.store.store_search_entries_txn( txn, ( diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index ee05ce046667..ff32f51607ac 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -55,7 +55,7 @@ def store_search_entries_txn(self, txn, entries): entry.event_id, entry.room_id, entry.key, - entry.value, + entry.value.replace("\u0000", "\u0020"), entry.stream_ordering, entry.origin_server_ts, ) @@ -179,10 +179,6 @@ def reindex_search_txn(txn): # then skip over it continue - # postgres throws an error if we try to insert null code points - if "\u0000" in value: - value = value.replace("\u0000", "\u0020") - event_search_rows.append( SearchEntry( key=key, From 4574d66f997879bf9b0a03c13302aeaa605e30d0 Mon Sep 17 00:00:00 2001 From: "H.Shay" Date: Tue, 14 Sep 2021 09:17:57 -0700 Subject: [PATCH 06/25] add type hint for SearchEntry --- synapse/storage/databases/main/search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index ff32f51607ac..cf582c082c13 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -15,7 +15,7 @@ import logging import re from collections import namedtuple -from typing import Collection, List, Optional, Set +from typing import Collection, Iterable, List, Optional, Set from synapse.api.errors import SynapseError from synapse.events import EventBase @@ -33,7 +33,7 @@ class SearchWorkerStore(SQLBaseStore): - def store_search_entries_txn(self, txn, entries): + def store_search_entries_txn(self, txn, entries: Iterable[SearchEntry]): """Add entries to the search table Args: From 0628de71b72cf943ddd29d9a0ebd5e3a5b3fa63a Mon Sep 17 00:00:00 2001 From: "H.Shay" Date: Tue, 14 Sep 2021 09:29:30 -0700 Subject: [PATCH 07/25] Add changelog entry Signed-off-by: H.Shay --- changelog.d/10818.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/10818.misc diff --git a/changelog.d/10818.misc b/changelog.d/10818.misc new file mode 100644 index 000000000000..083b1126de94 --- /dev/null +++ b/changelog.d/10818.misc @@ -0,0 +1 @@ +Replace "\u0000" with "\u0020" in message body before insertion into Postgres database. \ No newline at end of file From b0b5792ad578aa1c8c0168bd6da2197c89d9f082 Mon Sep 17 00:00:00 2001 From: "H.Shay" Date: Tue, 14 Sep 2021 09:35:16 -0700 Subject: [PATCH 08/25] updated changelog --- changelog.d/10818.misc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/10818.misc b/changelog.d/10818.misc index 083b1126de94..a45139090dc8 100644 --- a/changelog.d/10818.misc +++ b/changelog.d/10818.misc @@ -1 +1 @@ -Replace "\u0000" with "\u0020" in message body before insertion into Postgres database. \ No newline at end of file +Made a change so that posting an event with "\u0000" no longer causes an internal server error. \ No newline at end of file From 49fc93573b13f0774245b5ae41b01fd6003bd4fe Mon Sep 17 00:00:00 2001 From: "H.Shay" Date: Tue, 14 Sep 2021 09:45:06 -0700 Subject: [PATCH 09/25] update chanelog message --- changelog.d/10820.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/10820.misc diff --git a/changelog.d/10820.misc b/changelog.d/10820.misc new file mode 100644 index 000000000000..a45139090dc8 --- /dev/null +++ b/changelog.d/10820.misc @@ -0,0 +1 @@ +Made a change so that posting an event with "\u0000" no longer causes an internal server error. \ No newline at end of file From 8de418a192a56b02dcc666b2c50988f86cf801e6 Mon Sep 17 00:00:00 2001 From: "H.Shay" Date: Tue, 14 Sep 2021 10:15:23 -0700 Subject: [PATCH 10/25] remove duplicate changelog --- changelog.d/10818.misc | 1 - 1 file changed, 1 deletion(-) delete mode 100644 changelog.d/10818.misc diff --git a/changelog.d/10818.misc b/changelog.d/10818.misc deleted file mode 100644 index a45139090dc8..000000000000 --- a/changelog.d/10818.misc +++ /dev/null @@ -1 +0,0 @@ -Made a change so that posting an event with "\u0000" no longer causes an internal server error. \ No newline at end of file From b2c7c88cc6381bc172e227cfb29b8ca9706bd34d Mon Sep 17 00:00:00 2001 From: Hillery Shay Date: Tue, 14 Sep 2021 12:54:30 -0700 Subject: [PATCH 11/25] Update synapse/storage/databases/main/events.py remove extra space Co-authored-by: Patrick Cloke --- synapse/storage/databases/main/events.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index d099c97a1a8c..8e691678e543 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1955,7 +1955,6 @@ def store_event_search_txn(self, txn, event, key, value): key (str): value (str): """ - self.store.store_search_entries_txn( txn, ( From 163f5692724d0e6a0b067484192f5970bf9a4fac Mon Sep 17 00:00:00 2001 From: "H.Shay" Date: Thu, 16 Sep 2021 14:12:19 -0700 Subject: [PATCH 12/25] rename and move test file, update tests, delete old test file --- tests/storage/test_room_search.py | 77 +++++++++++++++++++++++++++++++ tests/test_null_byte_insertion.py | 43 ----------------- 2 files changed, 77 insertions(+), 43 deletions(-) create mode 100644 tests/storage/test_room_search.py delete mode 100644 tests/test_null_byte_insertion.py diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py new file mode 100644 index 000000000000..dbda23019646 --- /dev/null +++ b/tests/storage/test_room_search.py @@ -0,0 +1,77 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import synapse.rest.admin +from synapse.rest.client import login, room + +from tests.unittest import HomeserverTestCase + + +class NullByteInsertionTest(HomeserverTestCase): + servlets = [ + synapse.rest.admin.register_servlets_for_client_rest_resource, + login.register_servlets, + room.register_servlets, + ] + + def test_null_byte(self): + """ + Postgres doesn't like null bytes going into the search tables. Internally + we replace those with a space. + + Ensure this doesn't break anything. + """ + + # register a user and create a room, creat some messages + self.register_user("alice", "password") + access_token = self.login("alice", "password") + room_id = self.helper.create_room_as("alice", True, "1", access_token) + body1 = "hi\u0000bob" + body2 = "another message" + body3 = "hi alice" + + # send messages and ensure they don't cause an internal server + # error + resp1 = self.helper.send(room_id, body1, "1", access_token) + resp2 = self.helper.send(room_id, body2, "2", access_token) + resp3 = self.helper.send(room_id, body3, "3", access_token) + self.assertTrue("event_id" in resp1) + self.assertTrue("event_id" in resp2) + self.assertTrue("event_id" in resp3) + + # check that search still works with the message where the null byte was replaced + store = self.hs.get_datastore() + res1 = self.get_success( + store.search_msgs([room_id], "hi bob", ["content.body"]) + ) + self.assertEquals(res1.get("count"), 1) + self.assertIn("bob", res1.get("highlights")) + self.assertIn("hi", res1.get("highlights")) + + # check that search still works with another unrelated message + res2 = self.get_success( + store.search_msgs([room_id], "another", ["content.body"]) + ) + self.assertEquals(res2.get("count"), 1) + self.assertIn("another", res2.get("highlights")) + + # check that search still works when given a search term that overlaps + # with the message that we replaced the null byte in and an unrelated one + res3 = self.get_success(store.search_msgs([room_id], "hi", ["content.body"])) + self.assertEquals(res3.get("count"), 2) + res4 = self.get_success( + store.search_msgs([room_id], "hi alice", ["content.body"]) + ) + self.assertIn("alice", res4.get("highlights")) diff --git a/tests/test_null_byte_insertion.py b/tests/test_null_byte_insertion.py deleted file mode 100644 index b04e6d6466ba..000000000000 --- a/tests/test_null_byte_insertion.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright 2021 The Matrix.org Foundation C.I.C. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import synapse.rest.admin -from synapse.rest.client import account, login, register, room - -from tests.unittest import HomeserverTestCase - - -class NullByteInsertionTest(HomeserverTestCase): - servlets = [ - synapse.rest.admin.register_servlets_for_client_rest_resource, - login.register_servlets, - account.register_servlets, - register.register_servlets, - room.register_servlets, - ] - - def setUp(self): - super().setUp() - - # Note that this test must be run with postgres or else is meaningless, - # as sqlite will accept insertion of null code points - def test_null_byte(self): - self.register_user("alice", "password") - access_token = self.login("alice", "password") - room_id = self.helper.create_room_as("alice", True, "1", access_token) - body = '{"body":"\u0000", "msgtype":"m.text"}' - - resp = self.helper.send(room_id, body, "1", access_token) - self.assertTrue("event_id" in resp) From 0d2c31603d822716c210b3a5e893d53ea65fe479 Mon Sep 17 00:00:00 2001 From: "H.Shay" Date: Thu, 16 Sep 2021 14:56:41 -0700 Subject: [PATCH 13/25] fix typo in comments --- tests/storage/test_room_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py index dbda23019646..ef19a11f0fe9 100644 --- a/tests/storage/test_room_search.py +++ b/tests/storage/test_room_search.py @@ -34,7 +34,7 @@ def test_null_byte(self): Ensure this doesn't break anything. """ - # register a user and create a room, creat some messages + # register a user and create a room, create some messages self.register_user("alice", "password") access_token = self.login("alice", "password") room_id = self.helper.create_room_as("alice", True, "1", access_token) From 7e7de36e97a5e9c326153a139460b568f71e86e0 Mon Sep 17 00:00:00 2001 From: "H.Shay" Date: Thu, 16 Sep 2021 15:00:59 -0700 Subject: [PATCH 14/25] update _find_highlights_in_postgres to replace null byte with space --- synapse/storage/databases/main/search.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index cf582c082c13..6868abf2bece 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -646,6 +646,7 @@ def f(txn): for key in ("body", "name", "topic"): v = event.content.get(key, None) if v: + v = v.replace("\u0000", " ") values.append(v) if not values: From 584ccd58bb05cd013c7b500017731be27664ace8 Mon Sep 17 00:00:00 2001 From: "H.Shay" Date: Thu, 16 Sep 2021 19:34:05 -0700 Subject: [PATCH 15/25] replace null byte in sqlite search insertion --- synapse/storage/databases/main/search.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index 6868abf2bece..97a34349577b 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -55,7 +55,7 @@ def store_search_entries_txn(self, txn, entries: Iterable[SearchEntry]): entry.event_id, entry.room_id, entry.key, - entry.value.replace("\u0000", "\u0020"), + entry.value.replace("\u0000", " "), entry.stream_ordering, entry.origin_server_ts, ) @@ -70,11 +70,16 @@ def store_search_entries_txn(self, txn, entries: Iterable[SearchEntry]): " VALUES (?,?,?,?)" ) args = ( - (entry.event_id, entry.room_id, entry.key, entry.value) + ( + entry.event_id, + entry.room_id, + entry.key, + entry.value.replace("\u0000", " "), + ) for entry in entries ) - txn.execute_batch(sql, args) + else: # This should be unreachable. raise Exception("Unrecognized database engine") From e922659ca0003dd976b44160b2365b7616a16924 Mon Sep 17 00:00:00 2001 From: "H.Shay" Date: Thu, 16 Sep 2021 19:34:50 -0700 Subject: [PATCH 16/25] beef up and reorganize test for this pr --- tests/storage/test_room_search.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py index ef19a11f0fe9..52f6510d9266 100644 --- a/tests/storage/test_room_search.py +++ b/tests/storage/test_room_search.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import synapse.rest.admin from synapse.rest.client import login, room @@ -28,10 +29,11 @@ class NullByteInsertionTest(HomeserverTestCase): def test_null_byte(self): """ - Postgres doesn't like null bytes going into the search tables. Internally + Postgres/SQLite don't like null bytes going into the search tables. Internally we replace those with a space. - Ensure this doesn't break anything. + Ensure this doesn't break anything.q + """ # register a user and create a room, create some messages @@ -57,15 +59,12 @@ def test_null_byte(self): store.search_msgs([room_id], "hi bob", ["content.body"]) ) self.assertEquals(res1.get("count"), 1) - self.assertIn("bob", res1.get("highlights")) - self.assertIn("hi", res1.get("highlights")) # check that search still works with another unrelated message res2 = self.get_success( store.search_msgs([room_id], "another", ["content.body"]) ) self.assertEquals(res2.get("count"), 1) - self.assertIn("another", res2.get("highlights")) # check that search still works when given a search term that overlaps # with the message that we replaced the null byte in and an unrelated one @@ -74,4 +73,10 @@ def test_null_byte(self): res4 = self.get_success( store.search_msgs([room_id], "hi alice", ["content.body"]) ) - self.assertIn("alice", res4.get("highlights")) + + # check content of highlights if we are using postgres + if "SYNAPSE_POSTGRES" in os.environ and os.environ["SYNAPSE_POSTGRES"] == "1": + self.assertIn("bob", res1.get("highlights")) + self.assertIn("hi", res1.get("highlights")) + self.assertIn("another", res2.get("highlights")) + self.assertIn("alice", res4.get("highlights")) From baf2250d354c199192204fd26bf424ed52f5c316 Mon Sep 17 00:00:00 2001 From: "H.Shay" Date: Fri, 17 Sep 2021 08:12:18 -0700 Subject: [PATCH 17/25] update changelog --- changelog.d/10820.misc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/10820.misc b/changelog.d/10820.misc index a45139090dc8..453ee779116d 100644 --- a/changelog.d/10820.misc +++ b/changelog.d/10820.misc @@ -1 +1 @@ -Made a change so that posting an event with "\u0000" no longer causes an internal server error. \ No newline at end of file +Fix a bug where sending an `m.room.text` event containing a null byte would cause an internal server error. \ No newline at end of file From 54ca415fdf3657bb6b4fba96d1481bd6a87b60c9 Mon Sep 17 00:00:00 2001 From: "H.Shay" Date: Fri, 17 Sep 2021 08:15:45 -0700 Subject: [PATCH 18/25] add type hints and update docstring --- synapse/storage/databases/main/search.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index 97a34349577b..d9453f059d7a 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -20,7 +20,7 @@ from synapse.api.errors import SynapseError from synapse.events import EventBase from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause -from synapse.storage.database import DatabasePool +from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.engines import PostgresEngine, Sqlite3Engine @@ -33,13 +33,14 @@ class SearchWorkerStore(SQLBaseStore): - def store_search_entries_txn(self, txn, entries: Iterable[SearchEntry]): + def store_search_entries_txn( + self, txn: LoggingTransaction, entries: Iterable[SearchEntry] + ) -> None: """Add entries to the search table Args: - txn (cursor): - entries (iterable[SearchEntry]): - entries to be added to the table + txn: + entries: entries to be added to the table """ if not self.hs.config.enable_search: return From 0cad2fe906eaf63246b0cba3e25754342aeac196 Mon Sep 17 00:00:00 2001 From: "H.Shay" Date: Fri, 17 Sep 2021 08:16:30 -0700 Subject: [PATCH 19/25] check db engine directly vs using env variable --- tests/storage/test_room_search.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py index 52f6510d9266..673ef3191855 100644 --- a/tests/storage/test_room_search.py +++ b/tests/storage/test_room_search.py @@ -12,10 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os - import synapse.rest.admin from synapse.rest.client import login, room +from synapse.storage.engines import PostgresEngine from tests.unittest import HomeserverTestCase @@ -32,8 +31,7 @@ def test_null_byte(self): Postgres/SQLite don't like null bytes going into the search tables. Internally we replace those with a space. - Ensure this doesn't break anything.q - + Ensure this doesn't break anything. """ # register a user and create a room, create some messages @@ -75,7 +73,7 @@ def test_null_byte(self): ) # check content of highlights if we are using postgres - if "SYNAPSE_POSTGRES" in os.environ and os.environ["SYNAPSE_POSTGRES"] == "1": + if isinstance(store.database_engine, PostgresEngine): self.assertIn("bob", res1.get("highlights")) self.assertIn("hi", res1.get("highlights")) self.assertIn("another", res2.get("highlights")) From c6f6fc2e60d45eee29402bbb009af782ca793d92 Mon Sep 17 00:00:00 2001 From: "H.Shay" Date: Mon, 20 Sep 2021 12:56:55 -0700 Subject: [PATCH 20/25] refactor tests to be less repetetive --- tests/storage/test_room_search.py | 38 +++++++++++++------------------ 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py index 673ef3191855..397a9862a29f 100644 --- a/tests/storage/test_room_search.py +++ b/tests/storage/test_room_search.py @@ -38,43 +38,37 @@ def test_null_byte(self): self.register_user("alice", "password") access_token = self.login("alice", "password") room_id = self.helper.create_room_as("alice", True, "1", access_token) - body1 = "hi\u0000bob" - body2 = "another message" - body3 = "hi alice" # send messages and ensure they don't cause an internal server # error - resp1 = self.helper.send(room_id, body1, "1", access_token) - resp2 = self.helper.send(room_id, body2, "2", access_token) - resp3 = self.helper.send(room_id, body3, "3", access_token) - self.assertTrue("event_id" in resp1) - self.assertTrue("event_id" in resp2) - self.assertTrue("event_id" in resp3) + for body in ["hi\u0000bob", "another message", "hi alice"]: + response = self.helper.send(room_id, body, tok=access_token) + self.assertIn("event_id", response) # check that search still works with the message where the null byte was replaced store = self.hs.get_datastore() - res1 = self.get_success( + result = self.get_success( store.search_msgs([room_id], "hi bob", ["content.body"]) ) - self.assertEquals(res1.get("count"), 1) + self.assertEquals(result.get("count"), 1) + if isinstance(store.database_engine, PostgresEngine): + self.assertIn("hi", result.get("highlights")) + self.assertIn("bob", result.get("highlights")) # check that search still works with another unrelated message - res2 = self.get_success( + result = self.get_success( store.search_msgs([room_id], "another", ["content.body"]) ) - self.assertEquals(res2.get("count"), 1) + self.assertEquals(result.get("count"), 1) + if isinstance(store.database_engine, PostgresEngine): + self.assertIn("another", result.get("highlights")) # check that search still works when given a search term that overlaps # with the message that we replaced the null byte in and an unrelated one - res3 = self.get_success(store.search_msgs([room_id], "hi", ["content.body"])) - self.assertEquals(res3.get("count"), 2) - res4 = self.get_success( + result = self.get_success(store.search_msgs([room_id], "hi", ["content.body"])) + self.assertEquals(result.get("count"), 2) + result = self.get_success( store.search_msgs([room_id], "hi alice", ["content.body"]) ) - - # check content of highlights if we are using postgres if isinstance(store.database_engine, PostgresEngine): - self.assertIn("bob", res1.get("highlights")) - self.assertIn("hi", res1.get("highlights")) - self.assertIn("another", res2.get("highlights")) - self.assertIn("alice", res4.get("highlights")) + self.assertIn("alice", result.get("highlights")) From 32471062ca1663d9a79e3615611769346940f512 Mon Sep 17 00:00:00 2001 From: "H.Shay" Date: Mon, 20 Sep 2021 12:57:45 -0700 Subject: [PATCH 21/25] move rplace logic into seperate function --- synapse/storage/databases/main/search.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index d9453f059d7a..b761b70e2ec5 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -33,6 +33,11 @@ class SearchWorkerStore(SQLBaseStore): + def _clean_value_for_search(self, value: str) -> str: + """Replaces null code points in string as postgres/sql do not + like the insertion of strings with null code points""" + return value.replace("\u0000", " ") + def store_search_entries_txn( self, txn: LoggingTransaction, entries: Iterable[SearchEntry] ) -> None: @@ -56,7 +61,7 @@ def store_search_entries_txn( entry.event_id, entry.room_id, entry.key, - entry.value.replace("\u0000", " "), + self._clean_value_for_search(entry.value), entry.stream_ordering, entry.origin_server_ts, ) @@ -75,7 +80,7 @@ def store_search_entries_txn( entry.event_id, entry.room_id, entry.key, - entry.value.replace("\u0000", " "), + self._clean_value_for_search(entry.value), ) for entry in entries ) @@ -652,7 +657,7 @@ def f(txn): for key in ("body", "name", "topic"): v = event.content.get(key, None) if v: - v = v.replace("\u0000", " ") + v = self._clean_value_for_search(v) values.append(v) if not values: From 103125fc6fd28f94766271cf54447169848a7a06 Mon Sep 17 00:00:00 2001 From: "H.Shay" Date: Tue, 21 Sep 2021 08:07:10 -0700 Subject: [PATCH 22/25] requested changes --- changelog.d/10820.misc | 2 +- synapse/storage/databases/main/search.py | 17 +++++++++-------- tests/storage/test_room_search.py | 14 +++++++------- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/changelog.d/10820.misc b/changelog.d/10820.misc index 453ee779116d..9c74d38308dc 100644 --- a/changelog.d/10820.misc +++ b/changelog.d/10820.misc @@ -1 +1 @@ -Fix a bug where sending an `m.room.text` event containing a null byte would cause an internal server error. \ No newline at end of file +Fix a long-standing bug where an `m.room.text` event containing a null byte would cause an internal server error. \ No newline at end of file diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index b761b70e2ec5..582223b63611 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -32,12 +32,13 @@ ) -class SearchWorkerStore(SQLBaseStore): - def _clean_value_for_search(self, value: str) -> str: - """Replaces null code points in string as postgres/sql do not - like the insertion of strings with null code points""" - return value.replace("\u0000", " ") +def _clean_value_for_search(value: str) -> str: + """Replaces null code points in string as postgres/sql do not + like the insertion of strings with null code points""" + return value.replace("\u0000", " ") + +class SearchWorkerStore(SQLBaseStore): def store_search_entries_txn( self, txn: LoggingTransaction, entries: Iterable[SearchEntry] ) -> None: @@ -61,7 +62,7 @@ def store_search_entries_txn( entry.event_id, entry.room_id, entry.key, - self._clean_value_for_search(entry.value), + _clean_value_for_search(entry.value), entry.stream_ordering, entry.origin_server_ts, ) @@ -80,7 +81,7 @@ def store_search_entries_txn( entry.event_id, entry.room_id, entry.key, - self._clean_value_for_search(entry.value), + _clean_value_for_search(entry.value), ) for entry in entries ) @@ -657,7 +658,7 @@ def f(txn): for key in ("body", "name", "topic"): v = event.content.get(key, None) if v: - v = self._clean_value_for_search(v) + v = _clean_value_for_search(v) values.append(v) if not values: diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py index 397a9862a29f..8971ecccbd6d 100644 --- a/tests/storage/test_room_search.py +++ b/tests/storage/test_room_search.py @@ -34,18 +34,18 @@ def test_null_byte(self): Ensure this doesn't break anything. """ - # register a user and create a room, create some messages + # Register a user and create a room, create some messages self.register_user("alice", "password") access_token = self.login("alice", "password") - room_id = self.helper.create_room_as("alice", True, "1", access_token) + room_id = self.helper.create_room_as("alice", tok=access_token) - # send messages and ensure they don't cause an internal server + # Send messages and ensure they don't cause an internal server # error for body in ["hi\u0000bob", "another message", "hi alice"]: response = self.helper.send(room_id, body, tok=access_token) self.assertIn("event_id", response) - # check that search still works with the message where the null byte was replaced + # Check that search works for the message where the null byte was replaced store = self.hs.get_datastore() result = self.get_success( store.search_msgs([room_id], "hi bob", ["content.body"]) @@ -55,7 +55,7 @@ def test_null_byte(self): self.assertIn("hi", result.get("highlights")) self.assertIn("bob", result.get("highlights")) - # check that search still works with another unrelated message + # Check that search works for an unrelated message result = self.get_success( store.search_msgs([room_id], "another", ["content.body"]) ) @@ -63,8 +63,8 @@ def test_null_byte(self): if isinstance(store.database_engine, PostgresEngine): self.assertIn("another", result.get("highlights")) - # check that search still works when given a search term that overlaps - # with the message that we replaced the null byte in and an unrelated one + # Check that search works for a search term that overlaps with the message + # containing a null byte and an unrelated message. result = self.get_success(store.search_msgs([room_id], "hi", ["content.body"])) self.assertEquals(result.get("count"), 2) result = self.get_success( From 26390297f7c6c20cbad633eda3cfbad1049447cd Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 21 Sep 2021 12:04:13 -0400 Subject: [PATCH 23/25] Fix typo. --- synapse/storage/databases/main/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index 582223b63611..d5fcb405b025 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -33,7 +33,7 @@ def _clean_value_for_search(value: str) -> str: - """Replaces null code points in string as postgres/sql do not + """Replaces null code points in string as postgres/sqlite do not like the insertion of strings with null code points""" return value.replace("\u0000", " ") From 7c1679bc90fc4415438935e6aac3d9d8f17dfc99 Mon Sep 17 00:00:00 2001 From: Hillery Shay Date: Wed, 22 Sep 2021 07:57:35 -0700 Subject: [PATCH 24/25] Update synapse/storage/databases/main/search.py Co-authored-by: reivilibre --- synapse/storage/databases/main/search.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index d5fcb405b025..2a1e99e17a90 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -33,8 +33,11 @@ def _clean_value_for_search(value: str) -> str: - """Replaces null code points in string as postgres/sqlite do not - like the insertion of strings with null code points""" + """ + Replaces any null code points in the string with spaces as + Postgres and SQLite do not like the insertion of strings with + null code points into the full-text search tables. + """ return value.replace("\u0000", " ") From 88ad982d6718f8ddb28d8ec32a52f177caf5ec39 Mon Sep 17 00:00:00 2001 From: Hillery Shay Date: Wed, 22 Sep 2021 07:58:04 -0700 Subject: [PATCH 25/25] Update changelog.d/10820.misc Co-authored-by: Aaron Raimist --- changelog.d/10820.misc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/10820.misc b/changelog.d/10820.misc index 9c74d38308dc..4373bf6f6b6c 100644 --- a/changelog.d/10820.misc +++ b/changelog.d/10820.misc @@ -1 +1 @@ -Fix a long-standing bug where an `m.room.text` event containing a null byte would cause an internal server error. \ No newline at end of file +Fix a long-standing bug where an `m.room.message` event containing a null byte would cause an internal server error. \ No newline at end of file