Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-66897: Upgrade HTTP CONNECT to protocol HTTP/1.1 #8305

Merged
merged 17 commits into from
Apr 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions Doc/library/http.client.rst
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,13 @@ HTTPConnection Objects
The *headers* argument should be a mapping of extra HTTP headers to send with
the CONNECT request.

As HTTP/1.1 is used for HTTP CONNECT tunnelling request, `as per the RFC
<https://tools.ietf.org/html/rfc7231#section-4.3.6>`_, a HTTP ``Host:``
header must be provided, matching the authority-form of the request target
provided as the destination for the CONNECT request. If a HTTP ``Host:``
header is not provided via the headers argument, one is generated and
transmitted automatically.

For example, to tunnel through a HTTPS proxy server running locally on port
8080, we would pass the address of the proxy to the :class:`HTTPSConnection`
constructor, and the address of the host that we eventually want to reach to
Expand All @@ -365,6 +372,11 @@ HTTPConnection Objects

.. versionadded:: 3.2

.. versionchanged:: 3.12
HTTP CONNECT tunnelling requests use protocol HTTP/1.1, upgraded from
protocol HTTP/1.0. ``Host:`` HTTP headers are mandatory for HTTP/1.1, so
one will be automatically generated and transmitted if not provided in
the headers argument.

.. method:: HTTPConnection.connect()

Expand Down
25 changes: 19 additions & 6 deletions Lib/http/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -869,27 +869,39 @@ def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
def set_tunnel(self, host, port=None, headers=None):
"""Set up host and port for HTTP CONNECT tunnelling.

In a connection that uses HTTP CONNECT tunneling, the host passed to the
constructor is used as a proxy server that relays all communication to
the endpoint passed to `set_tunnel`. This done by sending an HTTP
In a connection that uses HTTP CONNECT tunnelling, the host passed to
the constructor is used as a proxy server that relays all communication
to the endpoint passed to `set_tunnel`. This done by sending an HTTP
CONNECT request to the proxy server when the connection is established.

This method must be called before the HTTP connection has been
established.

The headers argument should be a mapping of extra HTTP headers to send
with the CONNECT request.

As HTTP/1.1 is used for HTTP CONNECT tunnelling request, as per the RFC
(https://tools.ietf.org/html/rfc7231#section-4.3.6), a HTTP Host:
header must be provided, matching the authority-form of the request
target provided as the destination for the CONNECT request. If a
HTTP Host: header is not provided via the headers argument, one
is generated and transmitted automatically.
"""

if self.sock:
raise RuntimeError("Can't set up tunnel for established connection")

self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)
if headers:
self._tunnel_headers = headers
self._tunnel_headers = headers.copy()
else:
self._tunnel_headers.clear()

if not any(header.lower() == "host" for header in self._tunnel_headers):
encoded_host = self._tunnel_host.encode("idna").decode("ascii")
self._tunnel_headers["Host"] = "%s:%d" % (
encoded_host, self._tunnel_port)

def _get_hostport(self, host, port):
if port is None:
i = host.rfind(':')
Expand All @@ -914,8 +926,9 @@ def set_debuglevel(self, level):
self.debuglevel = level

def _tunnel(self):
connect = b"CONNECT %s:%d HTTP/1.0\r\n" % (
self._tunnel_host.encode("ascii"), self._tunnel_port)
connect = b"CONNECT %s:%d %s\r\n" % (
self._tunnel_host.encode("idna"), self._tunnel_port,
self._http_vsn_str.encode("ascii"))
headers = [connect]
for header, value in self._tunnel_headers.items():
headers.append(f"{header}: {value}\r\n".encode("latin-1"))
Expand Down
147 changes: 132 additions & 15 deletions Lib/test/test_httplib.py
Original file line number Diff line number Diff line change
Expand Up @@ -2187,11 +2187,12 @@ def test_getting_header_defaultint(self):
class TunnelTests(TestCase):
def setUp(self):
response_text = (
'HTTP/1.0 200 OK\r\n\r\n' # Reply to CONNECT
'HTTP/1.1 200 OK\r\n\r\n' # Reply to CONNECT
'HTTP/1.1 200 OK\r\n' # Reply to HEAD
'Content-Length: 42\r\n\r\n'
)
self.host = 'proxy.com'
self.port = client.HTTP_PORT
self.conn = client.HTTPConnection(self.host)
self.conn._create_connection = self._create_connection(response_text)

Expand All @@ -2203,15 +2204,45 @@ def create_connection(address, timeout=None, source_address=None):
return FakeSocket(response_text, host=address[0], port=address[1])
return create_connection

def test_set_tunnel_host_port_headers(self):
def test_set_tunnel_host_port_headers_add_host_missing(self):
tunnel_host = 'destination.com'
tunnel_port = 8888
tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)'}
tunnel_headers_after = tunnel_headers.copy()
tunnel_headers_after['Host'] = '%s:%d' % (tunnel_host, tunnel_port)
self.conn.set_tunnel(tunnel_host, port=tunnel_port,
headers=tunnel_headers)
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, client.HTTP_PORT)
self.assertEqual(self.conn.sock.port, self.port)
self.assertEqual(self.conn._tunnel_host, tunnel_host)
self.assertEqual(self.conn._tunnel_port, tunnel_port)
self.assertEqual(self.conn._tunnel_headers, tunnel_headers_after)

def test_set_tunnel_host_port_headers_set_host_identical(self):
tunnel_host = 'destination.com'
tunnel_port = 8888
tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)',
'Host': '%s:%d' % (tunnel_host, tunnel_port)}
self.conn.set_tunnel(tunnel_host, port=tunnel_port,
headers=tunnel_headers)
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertEqual(self.conn._tunnel_host, tunnel_host)
self.assertEqual(self.conn._tunnel_port, tunnel_port)
self.assertEqual(self.conn._tunnel_headers, tunnel_headers)

def test_set_tunnel_host_port_headers_set_host_different(self):
tunnel_host = 'destination.com'
tunnel_port = 8888
tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)',
'Host': '%s:%d' % ('example.com', 4200)}
self.conn.set_tunnel(tunnel_host, port=tunnel_port,
headers=tunnel_headers)
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertEqual(self.conn._tunnel_host, tunnel_host)
self.assertEqual(self.conn._tunnel_port, tunnel_port)
self.assertEqual(self.conn._tunnel_headers, tunnel_headers)
Expand All @@ -2223,17 +2254,96 @@ def test_disallow_set_tunnel_after_connect(self):
'destination.com')

def test_connect_with_tunnel(self):
self.conn.set_tunnel('destination.com')
d = {
b'host': b'destination.com',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'))
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)

def test_connect_with_tunnel_with_default_port(self):
d = {
b'host': b'destination.com',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'), port=d[b'port'])
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)

def test_connect_with_tunnel_with_nonstandard_port(self):
d = {
b'host': b'destination.com',
b'port': 8888,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'), port=d[b'port'])
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s:%(port)d\r\n' % d,
self.conn.sock.data)

# This request is not RFC-valid, but it's been possible with the library
# for years, so don't break it unexpectedly... This also tests
# case-insensitivity when injecting Host: headers if they're missing.
def test_connect_with_tunnel_with_different_host_header(self):
d = {
b'host': b'destination.com',
b'tunnel_host_header': b'example.com:9876',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(
d[b'host'].decode('ascii'),
headers={'HOST': d[b'tunnel_host_header'].decode('ascii')})
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'HOST: %(tunnel_host_header)s\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)

def test_connect_with_tunnel_different_host(self):
d = {
b'host': b'destination.com',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'))
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)

def test_connect_with_tunnel_idna(self):
dest = '\u03b4\u03c0\u03b8.gr'
dest_port = b'%s:%d' % (dest.encode('idna'), client.HTTP_PORT)
expected = b'CONNECT %s HTTP/1.1\r\nHost: %s\r\n\r\n' % (
dest_port, dest_port)
self.conn.set_tunnel(dest)
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, client.HTTP_PORT)
self.assertIn(b'CONNECT destination.com', self.conn.sock.data)
# issue22095
self.assertNotIn(b'Host: destination.com:None', self.conn.sock.data)
self.assertIn(b'Host: destination.com', self.conn.sock.data)

# This test should be removed when CONNECT gets the HTTP/1.1 blessing
self.assertNotIn(b'Host: proxy.com', self.conn.sock.data)
self.assertIn(expected, self.conn.sock.data)

def test_tunnel_connect_single_send_connection_setup(self):
"""Regresstion test for https://bugs.python.org/issue43332."""
Expand All @@ -2253,12 +2363,19 @@ def test_tunnel_connect_single_send_connection_setup(self):
msg=f'unexpected proxy data sent {proxy_setup_data_sent!r}')

def test_connect_put_request(self):
self.conn.set_tunnel('destination.com')
d = {
b'host': b'destination.com',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'))
self.conn.request('PUT', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, client.HTTP_PORT)
self.assertIn(b'CONNECT destination.com', self.conn.sock.data)
self.assertIn(b'Host: destination.com', self.conn.sock.data)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'PUT / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)

def test_tunnel_debuglog(self):
expected_header = 'X-Dummy: 1'
Expand Down
1 change: 1 addition & 0 deletions Misc/ACKS
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,7 @@ Anders Hammarquist
Mark Hammond
Harald Hanche-Olsen
Manus Hand
Michael Handler
Andreas Hangauer
Milton L. Hankins
Carl Bordum Hansen
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
http.client CONNECT method tunnel improvements: Use HTTP 1.1 protocol; send
a matching Host: header with CONNECT, if one is not provided; convert IDN
domain names to Punycode. Patch by Michael Handler.