diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ff38e66520..4064fd6925 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -51,8 +51,9 @@ jobs: tox_env: py311-full - python: '3.11.0' tox_env: py311-full - - python: '3.12.0-alpha - 3.12' - tox_env: py312-full + # py312 testing is disabled in branch6.3; full support is coming in tornado 6.4 + #- python: '3.12.0-alpha - 3.12' + # tox_env: py312-full - python: 'pypy-3.8' # Pypy is a lot slower due to jit warmup costs, so don't run the # "full" test config there. diff --git a/docs/releases.rst b/docs/releases.rst index fc7e41654f..076ac86331 100644 --- a/docs/releases.rst +++ b/docs/releases.rst @@ -4,6 +4,7 @@ Release notes .. toctree:: :maxdepth: 2 + releases/v6.3.3 releases/v6.3.2 releases/v6.3.1 releases/v6.3.0 diff --git a/docs/releases/v6.3.3.rst b/docs/releases/v6.3.3.rst new file mode 100644 index 0000000000..7fe0110fda --- /dev/null +++ b/docs/releases/v6.3.3.rst @@ -0,0 +1,12 @@ +What's new in Tornado 6.3.3 +=========================== + +Aug 11, 2023 +------------ + +Security improvements +~~~~~~~~~~~~~~~~~~~~~ + +- The ``Content-Length`` header and ``chunked`` ``Transfer-Encoding`` sizes are now parsed + more strictly (according to the relevant RFCs) to avoid potential request-smuggling + vulnerabilities when deployed behind certain proxies. diff --git a/tornado/__init__.py b/tornado/__init__.py index 475c1f612e..c2a8f25b43 100644 --- a/tornado/__init__.py +++ b/tornado/__init__.py @@ -22,8 +22,8 @@ # is zero for an official release, positive for a development branch, # or negative for a release candidate or beta (after the base version # number has been incremented) -version = "6.3.2" -version_info = (6, 3, 2, 0) +version = "6.3.3" +version_info = (6, 3, 3, 0) import importlib import typing diff --git a/tornado/http1connection.py b/tornado/http1connection.py index 5ca9168887..ca50e8ff55 100644 --- a/tornado/http1connection.py +++ b/tornado/http1connection.py @@ -442,7 +442,7 @@ def write_headers( ): self._expected_content_remaining = 0 elif "Content-Length" in headers: - self._expected_content_remaining = int(headers["Content-Length"]) + self._expected_content_remaining = parse_int(headers["Content-Length"]) else: self._expected_content_remaining = None # TODO: headers are supposed to be of type str, but we still have some @@ -618,7 +618,7 @@ def _read_body( headers["Content-Length"] = pieces[0] try: - content_length = int(headers["Content-Length"]) # type: Optional[int] + content_length: Optional[int] = parse_int(headers["Content-Length"]) except ValueError: # Handles non-integer Content-Length value. raise httputil.HTTPInputError( @@ -668,7 +668,10 @@ async def _read_chunked_body(self, delegate: httputil.HTTPMessageDelegate) -> No total_size = 0 while True: chunk_len_str = await self.stream.read_until(b"\r\n", max_bytes=64) - chunk_len = int(chunk_len_str.strip(), 16) + try: + chunk_len = parse_hex_int(native_str(chunk_len_str[:-2])) + except ValueError: + raise httputil.HTTPInputError("invalid chunk size") if chunk_len == 0: crlf = await self.stream.read_bytes(2) if crlf != b"\r\n": @@ -842,3 +845,21 @@ async def _server_request_loop( await asyncio.sleep(0) finally: delegate.on_close(self) + + +DIGITS = re.compile(r"[0-9]+") +HEXDIGITS = re.compile(r"[0-9a-fA-F]+") + + +def parse_int(s: str) -> int: + """Parse a non-negative integer from a string.""" + if DIGITS.fullmatch(s) is None: + raise ValueError("not an integer: %r" % s) + return int(s) + + +def parse_hex_int(s: str) -> int: + """Parse a non-negative hexadecimal integer from a string.""" + if HEXDIGITS.fullmatch(s) is None: + raise ValueError("not a hexadecimal integer: %r" % s) + return int(s, 16) diff --git a/tornado/test/httpserver_test.py b/tornado/test/httpserver_test.py index cd0a0e1004..1faf63fabf 100644 --- a/tornado/test/httpserver_test.py +++ b/tornado/test/httpserver_test.py @@ -18,7 +18,7 @@ ) from tornado.iostream import IOStream from tornado.locks import Event -from tornado.log import gen_log +from tornado.log import gen_log, app_log from tornado.netutil import ssl_options_to_context from tornado.simple_httpclient import SimpleAsyncHTTPClient from tornado.testing import ( @@ -41,6 +41,7 @@ import ssl import sys import tempfile +import textwrap import unittest import urllib.parse from io import BytesIO @@ -118,7 +119,7 @@ class SSLTestMixin(object): def get_ssl_options(self): return dict( ssl_version=self.get_ssl_version(), - **AsyncHTTPSTestCase.default_ssl_options() + **AsyncHTTPSTestCase.default_ssl_options(), ) def get_ssl_version(self): @@ -558,23 +559,60 @@ def test_chunked_request_uppercase(self): ) self.assertEqual(json_decode(response), {"foo": ["bar"]}) - @gen_test - def test_invalid_content_length(self): - with ExpectLog( - gen_log, ".*Only integer Content-Length is allowed", level=logging.INFO - ): - self.stream.write( - b"""\ + def test_chunked_request_body_invalid_size(self): + # Only hex digits are allowed in chunk sizes. Python's int() function + # also accepts underscores, so make sure we reject them here. + self.stream.write( + b"""\ POST /echo HTTP/1.1 -Content-Length: foo +Transfer-Encoding: chunked -bar +1_a +1234567890abcdef1234567890 +0 """.replace( - b"\n", b"\r\n" - ) + b"\n", b"\r\n" + ) + ) + with ExpectLog(gen_log, ".*invalid chunk size", level=logging.INFO): + start_line, headers, response = self.io_loop.run_sync( + lambda: read_stream_body(self.stream) ) - yield self.stream.read_until_close() + self.assertEqual(400, start_line.code) + + @gen_test + def test_invalid_content_length(self): + # HTTP only allows decimal digits in content-length. Make sure we don't + # accept anything else, with special attention to things accepted by the + # python int() function (leading plus signs and internal underscores). + test_cases = [ + ("alphabetic", "foo"), + ("leading plus", "+10"), + ("internal underscore", "1_0"), + ] + for name, value in test_cases: + with self.subTest(name=name), closing(IOStream(socket.socket())) as stream: + with ExpectLog( + gen_log, + ".*Only integer Content-Length is allowed", + level=logging.INFO, + ): + yield stream.connect(("127.0.0.1", self.get_http_port())) + stream.write( + utf8( + textwrap.dedent( + f"""\ + POST /echo HTTP/1.1 + Content-Length: {value} + Connection: close + + 1234567890 + """ + ).replace("\n", "\r\n") + ) + ) + yield stream.read_until_close() class XHeaderTest(HandlerBaseTestCase): @@ -1123,6 +1161,46 @@ def body_producer(write): ) +class InvalidOutputContentLengthTest(AsyncHTTPTestCase): + class MessageDelegate(HTTPMessageDelegate): + def __init__(self, connection): + self.connection = connection + + def headers_received(self, start_line, headers): + content_lengths = { + "normal": "10", + "alphabetic": "foo", + "leading plus": "+10", + "underscore": "1_0", + } + self.connection.write_headers( + ResponseStartLine("HTTP/1.1", 200, "OK"), + HTTPHeaders({"Content-Length": content_lengths[headers["x-test"]]}), + ) + self.connection.write(b"1234567890") + self.connection.finish() + + def get_app(self): + class App(HTTPServerConnectionDelegate): + def start_request(self, server_conn, request_conn): + return InvalidOutputContentLengthTest.MessageDelegate(request_conn) + + return App() + + def test_invalid_output_content_length(self): + with self.subTest("normal"): + response = self.fetch("/", method="GET", headers={"x-test": "normal"}) + response.rethrow() + self.assertEqual(response.body, b"1234567890") + for test in ["alphabetic", "leading plus", "underscore"]: + with self.subTest(test): + # This log matching could be tighter but I think I'm already + # over-testing here. + with ExpectLog(app_log, "Uncaught exception"): + with self.assertRaises(HTTPError): + self.fetch("/", method="GET", headers={"x-test": test}) + + class MaxHeaderSizeTest(AsyncHTTPTestCase): def get_app(self): return Application([("/", HelloWorldRequestHandler)])