Skip to content

Commit

Permalink
pythongh-66897: Upgrade HTTP CONNECT to protocol HTTP/1.1 (python#8305)
Browse files Browse the repository at this point in the history
* bpo-22708: Upgrade HTTP CONNECT to protocol HTTP/1.1 (GH-NNNN)

Use protocol HTTP/1.1 when sending HTTP CONNECT tunnelling requests;
generate Host: headers if one is not already provided (required by
HTTP/1.1), convert IDN domains to punycode in HTTP CONNECT requests.

* Refactor tests to pass under -bb (fix ByteWarnings); missed some lines >80.

* Use consistent 'tunnelling' spelling in Lib/http/client.py

* Lib/test/test_httplib: Remove remnant of obsoleted test.

* Use dict.copy() not copy.copy()

* fix version changed

* Update Lib/http/client.py

Co-authored-by: bgehman <[email protected]>

* Switch to for/else: syntax, as suggested

* Don't use for: else:

* Sure, fine, w/e

* Oops

* 1nm to the left

---------

Co-authored-by: Éric <[email protected]>
Co-authored-by: bgehman <[email protected]>
Co-authored-by: Oleg Iarygin <[email protected]>
  • Loading branch information
4 people authored and warsaw committed Apr 11, 2023
1 parent 9ca78d7 commit 3eb10b2
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 21 deletions.
12 changes: 12 additions & 0 deletions Doc/library/http.client.rst
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,13 @@ HTTPConnection Objects
The *headers* argument should be a mapping of extra HTTP headers to send with
the CONNECT request.

As HTTP/1.1 is used for HTTP CONNECT tunnelling request, `as per the RFC
<https://tools.ietf.org/html/rfc7231#section-4.3.6>`_, a HTTP ``Host:``
header must be provided, matching the authority-form of the request target
provided as the destination for the CONNECT request. If a HTTP ``Host:``
header is not provided via the headers argument, one is generated and
transmitted automatically.

For example, to tunnel through a HTTPS proxy server running locally on port
8080, we would pass the address of the proxy to the :class:`HTTPSConnection`
constructor, and the address of the host that we eventually want to reach to
Expand All @@ -365,6 +372,11 @@ HTTPConnection Objects

.. versionadded:: 3.2

.. versionchanged:: 3.12
HTTP CONNECT tunnelling requests use protocol HTTP/1.1, upgraded from
protocol HTTP/1.0. ``Host:`` HTTP headers are mandatory for HTTP/1.1, so
one will be automatically generated and transmitted if not provided in
the headers argument.

.. method:: HTTPConnection.connect()

Expand Down
25 changes: 19 additions & 6 deletions Lib/http/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,27 +870,39 @@ def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
def set_tunnel(self, host, port=None, headers=None):
"""Set up host and port for HTTP CONNECT tunnelling.
In a connection that uses HTTP CONNECT tunneling, the host passed to the
constructor is used as a proxy server that relays all communication to
the endpoint passed to `set_tunnel`. This done by sending an HTTP
In a connection that uses HTTP CONNECT tunnelling, the host passed to
the constructor is used as a proxy server that relays all communication
to the endpoint passed to `set_tunnel`. This done by sending an HTTP
CONNECT request to the proxy server when the connection is established.
This method must be called before the HTTP connection has been
established.
The headers argument should be a mapping of extra HTTP headers to send
with the CONNECT request.
As HTTP/1.1 is used for HTTP CONNECT tunnelling request, as per the RFC
(https://tools.ietf.org/html/rfc7231#section-4.3.6), a HTTP Host:
header must be provided, matching the authority-form of the request
target provided as the destination for the CONNECT request. If a
HTTP Host: header is not provided via the headers argument, one
is generated and transmitted automatically.
"""

if self.sock:
raise RuntimeError("Can't set up tunnel for established connection")

self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)
if headers:
self._tunnel_headers = headers
self._tunnel_headers = headers.copy()
else:
self._tunnel_headers.clear()

if not any(header.lower() == "host" for header in self._tunnel_headers):
encoded_host = self._tunnel_host.encode("idna").decode("ascii")
self._tunnel_headers["Host"] = "%s:%d" % (
encoded_host, self._tunnel_port)

def _get_hostport(self, host, port):
if port is None:
i = host.rfind(':')
Expand All @@ -915,8 +927,9 @@ def set_debuglevel(self, level):
self.debuglevel = level

def _tunnel(self):
connect = b"CONNECT %s:%d HTTP/1.0\r\n" % (
self._tunnel_host.encode("ascii"), self._tunnel_port)
connect = b"CONNECT %s:%d %s\r\n" % (
self._tunnel_host.encode("idna"), self._tunnel_port,
self._http_vsn_str.encode("ascii"))
headers = [connect]
for header, value in self._tunnel_headers.items():
headers.append(f"{header}: {value}\r\n".encode("latin-1"))
Expand Down
147 changes: 132 additions & 15 deletions Lib/test/test_httplib.py
Original file line number Diff line number Diff line change
Expand Up @@ -2187,11 +2187,12 @@ def test_getting_header_defaultint(self):
class TunnelTests(TestCase):
def setUp(self):
response_text = (
'HTTP/1.0 200 OK\r\n\r\n' # Reply to CONNECT
'HTTP/1.1 200 OK\r\n\r\n' # Reply to CONNECT
'HTTP/1.1 200 OK\r\n' # Reply to HEAD
'Content-Length: 42\r\n\r\n'
)
self.host = 'proxy.com'
self.port = client.HTTP_PORT
self.conn = client.HTTPConnection(self.host)
self.conn._create_connection = self._create_connection(response_text)

Expand All @@ -2203,15 +2204,45 @@ def create_connection(address, timeout=None, source_address=None):
return FakeSocket(response_text, host=address[0], port=address[1])
return create_connection

def test_set_tunnel_host_port_headers(self):
def test_set_tunnel_host_port_headers_add_host_missing(self):
tunnel_host = 'destination.com'
tunnel_port = 8888
tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)'}
tunnel_headers_after = tunnel_headers.copy()
tunnel_headers_after['Host'] = '%s:%d' % (tunnel_host, tunnel_port)
self.conn.set_tunnel(tunnel_host, port=tunnel_port,
headers=tunnel_headers)
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, client.HTTP_PORT)
self.assertEqual(self.conn.sock.port, self.port)
self.assertEqual(self.conn._tunnel_host, tunnel_host)
self.assertEqual(self.conn._tunnel_port, tunnel_port)
self.assertEqual(self.conn._tunnel_headers, tunnel_headers_after)

def test_set_tunnel_host_port_headers_set_host_identical(self):
tunnel_host = 'destination.com'
tunnel_port = 8888
tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)',
'Host': '%s:%d' % (tunnel_host, tunnel_port)}
self.conn.set_tunnel(tunnel_host, port=tunnel_port,
headers=tunnel_headers)
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertEqual(self.conn._tunnel_host, tunnel_host)
self.assertEqual(self.conn._tunnel_port, tunnel_port)
self.assertEqual(self.conn._tunnel_headers, tunnel_headers)

def test_set_tunnel_host_port_headers_set_host_different(self):
tunnel_host = 'destination.com'
tunnel_port = 8888
tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)',
'Host': '%s:%d' % ('example.com', 4200)}
self.conn.set_tunnel(tunnel_host, port=tunnel_port,
headers=tunnel_headers)
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertEqual(self.conn._tunnel_host, tunnel_host)
self.assertEqual(self.conn._tunnel_port, tunnel_port)
self.assertEqual(self.conn._tunnel_headers, tunnel_headers)
Expand All @@ -2223,17 +2254,96 @@ def test_disallow_set_tunnel_after_connect(self):
'destination.com')

def test_connect_with_tunnel(self):
self.conn.set_tunnel('destination.com')
d = {
b'host': b'destination.com',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'))
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)

def test_connect_with_tunnel_with_default_port(self):
d = {
b'host': b'destination.com',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'), port=d[b'port'])
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)

def test_connect_with_tunnel_with_nonstandard_port(self):
d = {
b'host': b'destination.com',
b'port': 8888,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'), port=d[b'port'])
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s:%(port)d\r\n' % d,
self.conn.sock.data)

# This request is not RFC-valid, but it's been possible with the library
# for years, so don't break it unexpectedly... This also tests
# case-insensitivity when injecting Host: headers if they're missing.
def test_connect_with_tunnel_with_different_host_header(self):
d = {
b'host': b'destination.com',
b'tunnel_host_header': b'example.com:9876',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(
d[b'host'].decode('ascii'),
headers={'HOST': d[b'tunnel_host_header'].decode('ascii')})
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'HOST: %(tunnel_host_header)s\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)

def test_connect_with_tunnel_different_host(self):
d = {
b'host': b'destination.com',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'))
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)

def test_connect_with_tunnel_idna(self):
dest = '\u03b4\u03c0\u03b8.gr'
dest_port = b'%s:%d' % (dest.encode('idna'), client.HTTP_PORT)
expected = b'CONNECT %s HTTP/1.1\r\nHost: %s\r\n\r\n' % (
dest_port, dest_port)
self.conn.set_tunnel(dest)
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, client.HTTP_PORT)
self.assertIn(b'CONNECT destination.com', self.conn.sock.data)
# issue22095
self.assertNotIn(b'Host: destination.com:None', self.conn.sock.data)
self.assertIn(b'Host: destination.com', self.conn.sock.data)

# This test should be removed when CONNECT gets the HTTP/1.1 blessing
self.assertNotIn(b'Host: proxy.com', self.conn.sock.data)
self.assertIn(expected, self.conn.sock.data)

def test_tunnel_connect_single_send_connection_setup(self):
"""Regresstion test for https://bugs.python.org/issue43332."""
Expand All @@ -2253,12 +2363,19 @@ def test_tunnel_connect_single_send_connection_setup(self):
msg=f'unexpected proxy data sent {proxy_setup_data_sent!r}')

def test_connect_put_request(self):
self.conn.set_tunnel('destination.com')
d = {
b'host': b'destination.com',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'))
self.conn.request('PUT', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, client.HTTP_PORT)
self.assertIn(b'CONNECT destination.com', self.conn.sock.data)
self.assertIn(b'Host: destination.com', self.conn.sock.data)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'PUT / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)

def test_tunnel_debuglog(self):
expected_header = 'X-Dummy: 1'
Expand Down
1 change: 1 addition & 0 deletions Misc/ACKS
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,7 @@ Anders Hammarquist
Mark Hammond
Harald Hanche-Olsen
Manus Hand
Michael Handler
Andreas Hangauer
Milton L. Hankins
Carl Bordum Hansen
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
http.client CONNECT method tunnel improvements: Use HTTP 1.1 protocol; send
a matching Host: header with CONNECT, if one is not provided; convert IDN
domain names to Punycode. Patch by Michael Handler.

0 comments on commit 3eb10b2

Please sign in to comment.