matrix-org · erikjohnston · Oct 2, 2019 · Oct 2, 2019 · Oct 2, 2019
@@ -0,0 +1 @@
+Improve performance of the public room list directory.
@@ -142,12 +142,12 @@ def _get_public_room_list(
         if since_token:
             batch_token = RoomListNextBatch.from_token(since_token)
 
-            last_room_id = batch_token.last_room_id
+            bounds = (batch_token.last_joined_members, batch_token.last_room_id)
             forwards = batch_token.direction_is_forward
         else:
             batch_token = None
+            bounds = None
 
-            last_room_id = None
             forwards = True
 
         # we request one more than wanted to see if there are more pages to come
@@ -157,7 +157,7 @@ def _get_public_room_list(
             network_tuple,
             search_filter,
             probing_limit,
-            last_room_id=last_room_id,
+            bounds=bounds,
             forwards=forwards,
             ignore_non_federatable=from_federation,
         )
@@ -193,30 +193,38 @@ def build_room_entry(room):
             more_to_come = False
 
         if num_results > 0:
-            final_room_id = results[-1]["room_id"]
-            initial_room_id = results[0]["room_id"]
+            final_entry = results[-1]
+            initial_entry = results[0]
 
             if forwards:
                 if batch_token:
                     # If there was a token given then we assume that there
                     # must be previous results.
                     response["prev_batch"] = RoomListNextBatch(
-                        last_room_id=initial_room_id, direction_is_forward=False
+                        last_joined_members=initial_entry["num_joined_members"],
+                        last_room_id=initial_entry["room_id"],
+                        direction_is_forward=False,
                     ).to_token()
 
                 if more_to_come:
                     response["next_batch"] = RoomListNextBatch(
-                        last_room_id=final_room_id, direction_is_forward=True
+                        last_joined_members=final_entry["num_joined_members"],
+                        last_room_id=final_entry["room_id"],
+                        direction_is_forward=True,
                     ).to_token()
             else:
                 if batch_token:
                     response["next_batch"] = RoomListNextBatch(
-                        last_room_id=final_room_id, direction_is_forward=True
+                        last_joined_members=final_entry["num_joined_members"],
+                        last_room_id=final_entry["room_id"],
+                        direction_is_forward=True,
                     ).to_token()
 
                 if more_to_come:
                     response["prev_batch"] = RoomListNextBatch(
-                        last_room_id=initial_room_id, direction_is_forward=False
+                        last_joined_members=initial_entry["num_joined_members"],
+                        last_room_id=initial_entry["room_id"],
+                        direction_is_forward=False,
                     ).to_token()
 
         for room in results:
@@ -449,12 +457,17 @@ class RoomListNextBatch(
     namedtuple(
         "RoomListNextBatch",
         (
+            "last_joined_members",  # The count to get rooms after/before
             "last_room_id",  # The room_id to get rooms after/before
             "direction_is_forward",  # Bool if this is a next_batch, false if prev_batch
         ),
     )
 ):
-    KEY_DICT = {"last_room_id": "r", "direction_is_forward": "d"}
+    KEY_DICT = {
+        "last_joined_members": "m",
+        "last_room_id": "r",
+        "direction_is_forward": "d",
+    }
 
     REVERSE_KEY_DICT = {v: k for k, v in KEY_DICT.items()}
 

@@ -17,6 +17,7 @@
 import collections
 import logging
 import re
+from typing import Optional, Tuple
 
 from canonicaljson import json
 
@@ -25,6 +26,7 @@
 from synapse.api.errors import StoreError
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.search import SearchStore
+from synapse.types import ThirdPartyInstanceID
 from synapse.util.caches.descriptors import cached, cachedInlineCallbacks
 
 logger = logging.getLogger(__name__)
@@ -119,24 +121,25 @@ def _count_public_rooms_txn(txn):
     @defer.inlineCallbacks
     def get_largest_public_rooms(
         self,
-        network_tuple,
-        search_filter,
-        limit,
-        last_room_id,
-        forwards,
-        ignore_non_federatable=False,
+        network_tuple: Optional[ThirdPartyInstanceID],
+        search_filter: Optional[dict],
+        limit: Optional[int],
+        bounds: Optional[Tuple[int, str]],
+        forwards: bool,
+        ignore_non_federatable: bool = False,
     ):
         """Gets the largest public rooms (where largest is in terms of joined
         members, as tracked in the statistics table).
 
         Args:
-            network_tuple (ThirdPartyInstanceID|None):
-            search_filter (dict|None):
-            limit (int|None): Maxmimum number of rows to return, unlimited otherwise.
-            last_room_id (str|None): if present, a room ID which bounds the
-                result set, and is always *excluded* from the result set.
-            forwards (bool): true iff going forwards, going backwards otherwise
-            ignore_non_federatable (bool): If true filters out non-federatable rooms.
+            network_tuple
+            search_filter
+            limit: Maxmimum number of rows to return, unlimited otherwise.
+            bounds: An uppoer or lower bound to apply to result set if given,
+                consists of a joined member count and room_id (these are
+                excluded from result set).
+            forwards: true iff going forwards, going backwards otherwise
+            ignore_non_federatable: If true filters out non-federatable rooms.
 
         Returns:
             Rooms in order: biggest number of joined users first.
@@ -147,13 +150,29 @@ def get_largest_public_rooms(
         where_clauses = []
         query_args = []
 
-        if last_room_id:
+        # Work out the bounds if we're given them, these bounds look slightly
+        # odd, but are designed to help query planner use indices by pulling
+        # out a common bound.
+        if bounds:
+            last_joined_members, last_room_id = bounds
             if forwards:
-                where_clauses.append("room_id < ?")
+                where_clauses.append(
+                    """
+                        joined_members <= ? AND (
+                            joined_members < ? OR room_id < ?
+                        )
+                    """
+                )
             else:
-                where_clauses.append("? < room_id")
+                where_clauses.append(
+                    """
+                        joined_members >= ? AND (
+                            joined_members > ? OR room_id > ?
+                        )
+                    """
+                )
 
-            query_args += [last_room_id]
+            query_args += [last_joined_members, last_joined_members, last_room_id]
 
         if search_filter and search_filter.get("generic_search_term", None):
             search_term = "%" + search_filter["generic_search_term"] + "%"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Improve performance of the public room list directory.