From f78a456e797a432aeb86cfd09d505ac8dddcede1 Mon Sep 17 00:00:00 2001
From: Jason Little <realtyem@gmail.com>
Date: Wed, 12 Apr 2023 19:27:02 -0500
Subject: [PATCH 1/5] Factor the methods and basic setup out of
 SimpleHttpClient to allow a reusable base class.

---
 synapse/http/client.py | 129 ++++++++++++++++++++++++-----------------
 1 file changed, 77 insertions(+), 52 deletions(-)

diff --git a/synapse/http/client.py b/synapse/http/client.py
index b5cf8123ce84..014046f5d09e 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -312,35 +312,24 @@ def request(
         )
 
 
-class SimpleHttpClient:
+class BaseSynapseClient:
     """
     A simple, no-frills HTTP client with methods that wrap up common ways of
     using HTTP in Matrix
+
+    Args:
+        hs: The HomeServer instance to pass in
+        treq_args: Extra keyword arguments to be given to treq.request.
     """
 
     def __init__(
         self,
         hs: "HomeServer",
         treq_args: Optional[Dict[str, Any]] = None,
-        ip_whitelist: Optional[IPSet] = None,
-        ip_blacklist: Optional[IPSet] = None,
-        use_proxy: bool = False,
     ):
-        """
-        Args:
-            hs
-            treq_args: Extra keyword arguments to be given to treq.request.
-            ip_blacklist: The IP addresses that are blacklisted that
-                we may not request.
-            ip_whitelist: The whitelisted IP addresses, that we can
-               request if it were otherwise caught in a blacklist.
-            use_proxy: Whether proxy settings should be discovered and used
-                from conventional environment variables.
-        """
         self.hs = hs
+        self.reactor = hs.get_reactor()
 
-        self._ip_whitelist = ip_whitelist
-        self._ip_blacklist = ip_blacklist
         self._extra_treq_args = treq_args or {}
         self.clock = hs.get_clock()
 
@@ -356,44 +345,11 @@ def __init__(
         # reactor.
         self._cooperator = Cooperator(scheduler=_make_scheduler(hs.get_reactor()))
 
-        if self._ip_blacklist:
-            # If we have an IP blacklist, we need to use a DNS resolver which
-            # filters out blacklisted IP addresses, to prevent DNS rebinding.
-            self.reactor: ISynapseReactor = BlacklistingReactorWrapper(
-                hs.get_reactor(), self._ip_whitelist, self._ip_blacklist
-            )
-        else:
-            self.reactor = hs.get_reactor()
-
-        # the pusher makes lots of concurrent SSL connections to sygnal, and
-        # tends to do so in batches, so we need to allow the pool to keep
-        # lots of idle connections around.
-        pool = HTTPConnectionPool(self.reactor)
-        # XXX: The justification for using the cache factor here is that larger instances
-        # will need both more cache and more connections.
-        # Still, this should probably be a separate dial
-        pool.maxPersistentPerHost = max(int(100 * hs.config.caches.global_factor), 5)
-        pool.cachedConnectionTimeout = 2 * 60
-
-        self.agent: IAgent = ProxyAgent(
+        self.agent: IAgent = Agent(
             self.reactor,
-            hs.get_reactor(),
-            connectTimeout=15,
-            contextFactory=self.hs.get_http_client_context_factory(),
-            pool=pool,
-            use_proxy=use_proxy,
+            contextFactory=hs.get_http_client_context_factory(),
         )
 
-        if self._ip_blacklist:
-            # If we have an IP blacklist, we then install the blacklisting Agent
-            # which prevents direct access to IP addresses, that are not caught
-            # by the DNS resolution.
-            self.agent = BlacklistingAgentWrapper(
-                self.agent,
-                ip_blacklist=self._ip_blacklist,
-                ip_whitelist=self._ip_whitelist,
-            )
-
     async def request(
         self,
         method: str,
@@ -799,6 +755,75 @@ async def get_file(
         )
 
 
+class SimpleHttpClient(BaseSynapseClient):
+    """
+    An HTTP client capable of crossing a proxy and respecting a block/allow list. This
+    does set up a HTTPConnectionPool based on a multiplier of 100 times
+    hs.config.caches.global_factor, as it is responsible for pushing to Sygnal.
+
+    Args:
+        hs: The HomeServer instance to pass in
+        treq_args: Extra keyword arguments to be given to treq.request.
+        ip_blacklist: The IP addresses that are blacklisted that
+            we may not request.
+        ip_whitelist: The whitelisted IP addresses, that we can
+           request if it were otherwise caught in a blacklist.
+        use_proxy: Whether proxy settings should be discovered and used
+            from conventional environment variables.
+    """
+
+    def __init__(
+        self,
+        hs: "HomeServer",
+        treq_args: Optional[Dict[str, Any]] = None,
+        ip_whitelist: Optional[IPSet] = None,
+        ip_blacklist: Optional[IPSet] = None,
+        use_proxy: bool = False,
+    ):
+        super().__init__(hs, treq_args=treq_args)
+        self._ip_whitelist = ip_whitelist
+        self._ip_blacklist = ip_blacklist
+
+        if self._ip_blacklist:
+            # If we have an IP blacklist, we need to use a DNS resolver which
+            # filters out blacklisted IP addresses, to prevent DNS rebinding.
+            self.reactor: ISynapseReactor = BlacklistingReactorWrapper(
+                hs.get_reactor(), self._ip_whitelist, self._ip_blacklist
+            )
+        else:
+            # This should have already been set up in the call to super(). Make sure.
+            self.reactor = hs.get_reactor()
+
+        # the pusher makes lots of concurrent SSL connections to Sygnal, and tends to
+        # do so in batches, so we need to allow the pool to keep lots of idle
+        # connections around.
+        pool = HTTPConnectionPool(self.reactor)
+        # XXX: The justification for using the cache factor here is that larger
+        # instances will need both more cache and more connections.
+        # Still, this should probably be a separate dial
+        pool.maxPersistentPerHost = max(int(100 * hs.config.caches.global_factor), 5)
+        pool.cachedConnectionTimeout = 2 * 60
+
+        self.agent: IAgent = ProxyAgent(
+            self.reactor,
+            hs.get_reactor(),
+            connectTimeout=15,
+            contextFactory=self.hs.get_http_client_context_factory(),
+            pool=pool,
+            use_proxy=use_proxy,
+        )
+
+        if self._ip_blacklist:
+            # If we have an IP blacklist, we then install the blacklisting Agent
+            # which prevents direct access to IP addresses, that are not caught
+            # by the DNS resolution.
+            self.agent = BlacklistingAgentWrapper(
+                self.agent,
+                ip_blacklist=self._ip_blacklist,
+                ip_whitelist=self._ip_whitelist,
+            )
+
+
 def _timeout_to_request_timed_out_error(f: Failure) -> Failure:
     if f.check(twisted_error.TimeoutError, twisted_error.ConnectingCancelledError):
         # The TCP connection has its own timeout (set by the 'connectTimeout' param

From f04879ca34494a30c887aca65dc0f2a0a57d16db Mon Sep 17 00:00:00 2001
From: Jason Little <realtyem@gmail.com>
Date: Wed, 12 Apr 2023 19:54:05 -0500
Subject: [PATCH 2/5] Changelog

---
 changelog.d/15427.misc | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/15427.misc

diff --git a/changelog.d/15427.misc b/changelog.d/15427.misc
new file mode 100644
index 000000000000..ef873e3b2b49
--- /dev/null
+++ b/changelog.d/15427.misc
@@ -0,0 +1 @@
+Refactor `SimpleHttpClient` to pull out a base class.

From 4eec2e7138375d7825e8b00ad523dfd4b1f78d30 Mon Sep 17 00:00:00 2001
From: Jason Little <realtyem@gmail.com>
Date: Thu, 13 Apr 2023 15:22:57 -0500
Subject: [PATCH 3/5] Apply suggestions from code review

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
---
 synapse/http/client.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/synapse/http/client.py b/synapse/http/client.py
index 014046f5d09e..a969f4f40929 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -312,7 +312,7 @@ def request(
         )
 
 
-class BaseSynapseClient:
+class BaseHttpClient:
     """
     A simple, no-frills HTTP client with methods that wrap up common ways of
     using HTTP in Matrix
@@ -757,9 +757,9 @@ async def get_file(
 
 class SimpleHttpClient(BaseSynapseClient):
     """
-    An HTTP client capable of crossing a proxy and respecting a block/allow list. This
-    does set up a HTTPConnectionPool based on a multiplier of 100 times
-    hs.config.caches.global_factor, as it is responsible for pushing to Sygnal.
+    An HTTP client capable of crossing a proxy and respecting a block/allow list.
+
+    This also configures a larger / longer lasting HTTP connection pool.
 
     Args:
         hs: The HomeServer instance to pass in
@@ -788,11 +788,8 @@ def __init__(
             # If we have an IP blacklist, we need to use a DNS resolver which
             # filters out blacklisted IP addresses, to prevent DNS rebinding.
             self.reactor: ISynapseReactor = BlacklistingReactorWrapper(
-                hs.get_reactor(), self._ip_whitelist, self._ip_blacklist
+                self.reactor, self._ip_whitelist, self._ip_blacklist
             )
-        else:
-            # This should have already been set up in the call to super(). Make sure.
-            self.reactor = hs.get_reactor()
 
         # the pusher makes lots of concurrent SSL connections to Sygnal, and tends to
         # do so in batches, so we need to allow the pool to keep lots of idle

From a3bdb1f01032225dfb51db4d2a5acc1a1c71d993 Mon Sep 17 00:00:00 2001
From: Jason Little <realtyem@gmail.com>
Date: Thu, 13 Apr 2023 15:28:27 -0500
Subject: [PATCH 4/5] Follow up fix to rename `BaseSynapseClient` to
 `BaseHttpClient`

---
 synapse/http/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/http/client.py b/synapse/http/client.py
index a969f4f40929..fbb6cd2bc218 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -755,7 +755,7 @@ async def get_file(
         )
 
 
-class SimpleHttpClient(BaseSynapseClient):
+class SimpleHttpClient(BaseHttpClient):
     """
     An HTTP client capable of crossing a proxy and respecting a block/allow list.
 

From 445b6e3421a9b6b9971a86b2d2f9e9db2f4741de Mon Sep 17 00:00:00 2001
From: Jason Little <realtyem@gmail.com>
Date: Fri, 14 Apr 2023 15:05:49 -0500
Subject: [PATCH 5/5] From review: remove default Agent and add to docstring
 'BYOA'.

---
 synapse/http/client.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/synapse/http/client.py b/synapse/http/client.py
index fbb6cd2bc218..91fe474f36d9 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -315,13 +315,16 @@ def request(
 class BaseHttpClient:
     """
     A simple, no-frills HTTP client with methods that wrap up common ways of
-    using HTTP in Matrix
+    using HTTP in Matrix. Does not come with a default Agent, subclasses will need to
+    define their own.
 
     Args:
         hs: The HomeServer instance to pass in
         treq_args: Extra keyword arguments to be given to treq.request.
     """
 
+    agent: IAgent
+
     def __init__(
         self,
         hs: "HomeServer",
@@ -345,11 +348,6 @@ def __init__(
         # reactor.
         self._cooperator = Cooperator(scheduler=_make_scheduler(hs.get_reactor()))
 
-        self.agent: IAgent = Agent(
-            self.reactor,
-            contextFactory=hs.get_http_client_context_factory(),
-        )
-
     async def request(
         self,
         method: str,