From 217295b1dd30f556ea374d62007f6821688f00f0 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Tue, 8 Aug 2023 21:55:02 -0400 Subject: [PATCH 1/4] http1connection: Make content-length parsing more strict Content-length and chunk size parsing now strictly matches the RFCs. We previously used the python int() function which accepted leading plus signs and internal underscores, which are not allowed by the HTTP RFCs (it also accepts minus signs, but these are less problematic in this context since they'd result in errors elsewhere) It is important to fix this because when combined with certain proxies, the lax parsing could result in a request smuggling vulnerability (if both Tornado and the proxy accepted an invalid content-length but interpreted it differently). This is known to occur with old versions of haproxy, although the current version of haproxy is unaffected. --- tornado/http1connection.py | 27 +++++++- tornado/test/httpserver_test.py | 105 +++++++++++++++++++++++++++----- 2 files changed, 115 insertions(+), 17 deletions(-) diff --git a/tornado/http1connection.py b/tornado/http1connection.py index 5ca9168887..ca50e8ff55 100644 --- a/tornado/http1connection.py +++ b/tornado/http1connection.py @@ -442,7 +442,7 @@ def write_headers( ): self._expected_content_remaining = 0 elif "Content-Length" in headers: - self._expected_content_remaining = int(headers["Content-Length"]) + self._expected_content_remaining = parse_int(headers["Content-Length"]) else: self._expected_content_remaining = None # TODO: headers are supposed to be of type str, but we still have some @@ -618,7 +618,7 @@ def _read_body( headers["Content-Length"] = pieces[0] try: - content_length = int(headers["Content-Length"]) # type: Optional[int] + content_length: Optional[int] = parse_int(headers["Content-Length"]) except ValueError: # Handles non-integer Content-Length value. raise httputil.HTTPInputError( @@ -668,7 +668,10 @@ async def _read_chunked_body(self, delegate: httputil.HTTPMessageDelegate) -> No total_size = 0 while True: chunk_len_str = await self.stream.read_until(b"\r\n", max_bytes=64) - chunk_len = int(chunk_len_str.strip(), 16) + try: + chunk_len = parse_hex_int(native_str(chunk_len_str[:-2])) + except ValueError: + raise httputil.HTTPInputError("invalid chunk size") if chunk_len == 0: crlf = await self.stream.read_bytes(2) if crlf != b"\r\n": @@ -842,3 +845,21 @@ async def _server_request_loop( await asyncio.sleep(0) finally: delegate.on_close(self) + + +DIGITS = re.compile(r"[0-9]+") +HEXDIGITS = re.compile(r"[0-9a-fA-F]+") + + +def parse_int(s: str) -> int: + """Parse a non-negative integer from a string.""" + if DIGITS.fullmatch(s) is None: + raise ValueError("not an integer: %r" % s) + return int(s) + + +def parse_hex_int(s: str) -> int: + """Parse a non-negative hexadecimal integer from a string.""" + if HEXDIGITS.fullmatch(s) is None: + raise ValueError("not a hexadecimal integer: %r" % s) + return int(s, 16) diff --git a/tornado/test/httpserver_test.py b/tornado/test/httpserver_test.py index cd0a0e1004..db91d62daa 100644 --- a/tornado/test/httpserver_test.py +++ b/tornado/test/httpserver_test.py @@ -18,7 +18,7 @@ ) from tornado.iostream import IOStream from tornado.locks import Event -from tornado.log import gen_log +from tornado.log import gen_log, app_log from tornado.netutil import ssl_options_to_context from tornado.simple_httpclient import SimpleAsyncHTTPClient from tornado.testing import ( @@ -41,6 +41,7 @@ import ssl import sys import tempfile +import textwrap import unittest import urllib.parse from io import BytesIO @@ -118,7 +119,7 @@ class SSLTestMixin(object): def get_ssl_options(self): return dict( ssl_version=self.get_ssl_version(), - **AsyncHTTPSTestCase.default_ssl_options() + **AsyncHTTPSTestCase.default_ssl_options(), ) def get_ssl_version(self): @@ -558,23 +559,59 @@ def test_chunked_request_uppercase(self): ) self.assertEqual(json_decode(response), {"foo": ["bar"]}) - @gen_test - def test_invalid_content_length(self): - with ExpectLog( - gen_log, ".*Only integer Content-Length is allowed", level=logging.INFO - ): - self.stream.write( - b"""\ + def test_chunked_request_body_invalid_size(self): + # Only hex digits are allowed in chunk sizes. Python's int() function + # also accepts underscores, so make sure we reject them here. + self.stream.write( + b"""\ POST /echo HTTP/1.1 -Content-Length: foo +Transfer-Encoding: chunked -bar +1_a +1234567890abcdef1234567890 +0 """.replace( - b"\n", b"\r\n" - ) + b"\n", b"\r\n" ) - yield self.stream.read_until_close() + ) + start_line, headers, response = self.io_loop.run_sync( + lambda: read_stream_body(self.stream) + ) + self.assertEqual(400, start_line.code) + + @gen_test + def test_invalid_content_length(self): + # HTTP only allows decimal digits in content-length. Make sure we don't + # accept anything else, with special attention to things accepted by the + # python int() function (leading plus signs and internal underscores). + test_cases = [ + ("alphabetic", "foo"), + ("leading plus", "+10"), + ("internal underscore", "1_0"), + ] + for name, value in test_cases: + with self.subTest(name=name), closing(IOStream(socket.socket())) as stream: + with ExpectLog( + gen_log, + ".*Only integer Content-Length is allowed", + level=logging.INFO, + ): + yield stream.connect(("127.0.0.1", self.get_http_port())) + stream.write( + utf8( + textwrap.dedent( + f"""\ + POST /echo HTTP/1.1 + Content-Length: {value} + Connection: close + + 1234567890 + """ + ).replace("\n", "\r\n") + ) + ) + yield stream.read_until_close() class XHeaderTest(HandlerBaseTestCase): @@ -1123,6 +1160,46 @@ def body_producer(write): ) +class InvalidOutputContentLengthTest(AsyncHTTPTestCase): + class MessageDelegate(HTTPMessageDelegate): + def __init__(self, connection): + self.connection = connection + + def headers_received(self, start_line, headers): + content_lengths = { + "normal": "10", + "alphabetic": "foo", + "leading plus": "+10", + "underscore": "1_0", + } + self.connection.write_headers( + ResponseStartLine("HTTP/1.1", 200, "OK"), + HTTPHeaders({"Content-Length": content_lengths[headers["x-test"]]}), + ) + self.connection.write(b"1234567890") + self.connection.finish() + + def get_app(self): + class App(HTTPServerConnectionDelegate): + def start_request(self, server_conn, request_conn): + return InvalidOutputContentLengthTest.MessageDelegate(request_conn) + + return App() + + def test_invalid_output_content_length(self): + with self.subTest("normal"): + response = self.fetch("/", method="GET", headers={"x-test": "normal"}) + response.rethrow() + self.assertEqual(response.body, b"1234567890") + for test in ["alphabetic", "leading plus", "underscore"]: + with self.subTest(test): + # This log matching could be tighter but I think I'm already + # over-testing here. + with ExpectLog(app_log, "Uncaught exception"): + with self.assertRaises(HTTPError): + self.fetch("/", method="GET", headers={"x-test": test}) + + class MaxHeaderSizeTest(AsyncHTTPTestCase): def get_app(self): return Application([("/", HelloWorldRequestHandler)]) From 7dfe8b597f2d179334d7b528f61e9449ac131273 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Thu, 10 Aug 2023 21:41:40 -0400 Subject: [PATCH 2/4] httpserver_test: Add ExpectLog to fix CI The github security advisory feature lets you make private PRs but it apparently doesn't support CI so this log failure wasn't caught until after the PR was merged. --- tornado/test/httpserver_test.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tornado/test/httpserver_test.py b/tornado/test/httpserver_test.py index db91d62daa..1faf63fabf 100644 --- a/tornado/test/httpserver_test.py +++ b/tornado/test/httpserver_test.py @@ -575,9 +575,10 @@ def test_chunked_request_body_invalid_size(self): b"\n", b"\r\n" ) ) - start_line, headers, response = self.io_loop.run_sync( - lambda: read_stream_body(self.stream) - ) + with ExpectLog(gen_log, ".*invalid chunk size", level=logging.INFO): + start_line, headers, response = self.io_loop.run_sync( + lambda: read_stream_body(self.stream) + ) self.assertEqual(400, start_line.code) @gen_test From 5c8a9a4fa792f8b18bd26bc7a8335e3bbe837852 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Thu, 10 Aug 2023 22:38:19 -0400 Subject: [PATCH 3/4] Set version to 6.3.3 --- docs/releases.rst | 1 + docs/releases/v6.3.3.rst | 12 ++++++++++++ tornado/__init__.py | 4 ++-- 3 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 docs/releases/v6.3.3.rst diff --git a/docs/releases.rst b/docs/releases.rst index fc7e41654f..076ac86331 100644 --- a/docs/releases.rst +++ b/docs/releases.rst @@ -4,6 +4,7 @@ Release notes .. toctree:: :maxdepth: 2 + releases/v6.3.3 releases/v6.3.2 releases/v6.3.1 releases/v6.3.0 diff --git a/docs/releases/v6.3.3.rst b/docs/releases/v6.3.3.rst new file mode 100644 index 0000000000..7fe0110fda --- /dev/null +++ b/docs/releases/v6.3.3.rst @@ -0,0 +1,12 @@ +What's new in Tornado 6.3.3 +=========================== + +Aug 11, 2023 +------------ + +Security improvements +~~~~~~~~~~~~~~~~~~~~~ + +- The ``Content-Length`` header and ``chunked`` ``Transfer-Encoding`` sizes are now parsed + more strictly (according to the relevant RFCs) to avoid potential request-smuggling + vulnerabilities when deployed behind certain proxies. diff --git a/tornado/__init__.py b/tornado/__init__.py index 475c1f612e..c2a8f25b43 100644 --- a/tornado/__init__.py +++ b/tornado/__init__.py @@ -22,8 +22,8 @@ # is zero for an official release, positive for a development branch, # or negative for a release candidate or beta (after the base version # number has been incremented) -version = "6.3.2" -version_info = (6, 3, 2, 0) +version = "6.3.3" +version_info = (6, 3, 3, 0) import importlib import typing From 6a9e6fbaf7830b3edae68805211f35f5954292ab Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Thu, 10 Aug 2023 22:46:33 -0400 Subject: [PATCH 4/4] ci: Don't test py312 in branch6.3 --- .github/workflows/test.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ff38e66520..4064fd6925 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -51,8 +51,9 @@ jobs: tox_env: py311-full - python: '3.11.0' tox_env: py311-full - - python: '3.12.0-alpha - 3.12' - tox_env: py312-full + # py312 testing is disabled in branch6.3; full support is coming in tornado 6.4 + #- python: '3.12.0-alpha - 3.12' + # tox_env: py312-full - python: 'pypy-3.8' # Pypy is a lot slower due to jit warmup costs, so don't run the # "full" test config there.