diff --git a/CHANGES/6227.bugfix b/CHANGES/6227.bugfix new file mode 100644 index 00000000000..df097565bcd --- /dev/null +++ b/CHANGES/6227.bugfix @@ -0,0 +1 @@ +Started supporting ``authority-form`` and ``absolute-form`` URLs on the server-side. diff --git a/aiohttp/_http_parser.pyx b/aiohttp/_http_parser.pyx index b76d723fb2e..bebd9894374 100644 --- a/aiohttp/_http_parser.pyx +++ b/aiohttp/_http_parser.pyx @@ -425,7 +425,7 @@ cdef class HttpParser: raw_headers = tuple(self._raw_headers) headers = CIMultiDictProxy(self._headers) - if upgrade or self._cparser.method == 5: # cparser.CONNECT: + if upgrade or self._cparser.method == cparser.HTTP_CONNECT: self._upgraded = True # do not support old websocket spec @@ -453,7 +453,7 @@ cdef class HttpParser: if ( ULLONG_MAX > self._cparser.content_length > 0 or chunked or - self._cparser.method == 5 or # CONNECT: 5 + self._cparser.method == cparser.HTTP_CONNECT or (self._cparser.status_code >= 199 and self._cparser.content_length == 0 and self._read_until_eof) @@ -586,34 +586,45 @@ cdef class HttpRequestParser(HttpParser): self._path = self._buf.decode('utf-8', 'surrogateescape') try: idx3 = len(self._path) - idx1 = self._path.find("?") - if idx1 == -1: - query = "" - idx2 = self._path.find("#") - if idx2 == -1: - path = self._path - fragment = "" - else: - path = self._path[0: idx2] - fragment = self._path[idx2+1:] + if self._cparser.method == cparser.HTTP_CONNECT: + # authority-form, + # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3 + self._url = URL.build(authority=self._path, encoded=True) + elif idx3 > 1 and self._path[0] == '/': + # origin-form, + # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1 + idx1 = self._path.find("?") + if idx1 == -1: + query = "" + idx2 = self._path.find("#") + if idx2 == -1: + path = self._path + fragment = "" + else: + path = self._path[0: idx2] + fragment = self._path[idx2+1:] - else: - path = self._path[0:idx1] - idx1 += 1 - idx2 = self._path.find("#", idx1+1) - if idx2 == -1: - query = self._path[idx1:] - fragment = "" else: - query = self._path[idx1: idx2] - fragment = self._path[idx2+1:] - - self._url = URL.build( - path=path, - query_string=query, - fragment=fragment, - encoded=True, - ) + path = self._path[0:idx1] + idx1 += 1 + idx2 = self._path.find("#", idx1+1) + if idx2 == -1: + query = self._path[idx1:] + fragment = "" + else: + query = self._path[idx1: idx2] + fragment = self._path[idx2+1:] + + self._url = URL.build( + path=path, + query_string=query, + fragment=fragment, + encoded=True, + ) + else: + # absolute-form for proxy maybe, + # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2 + self._url = URL(self._path, encoded=True) finally: PyByteArray_Resize(self._buf, 0) @@ -726,7 +737,10 @@ cdef int cb_on_headers_complete(cparser.llhttp_t* parser) except -1: pyparser._last_error = exc return -1 else: - if pyparser._cparser.upgrade or pyparser._cparser.method == 5: # CONNECT + if ( + pyparser._cparser.upgrade or + pyparser._cparser.method == cparser.HTTP_CONNECT + ): return 2 else: return 0 diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 96ba28e67bd..8b702f98e6f 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -533,9 +533,6 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage: "Status line is too long", str(self.max_line_size), str(len(path)) ) - path_part, _hash_separator, url_fragment = path.partition("#") - path_part, _question_mark_separator, qs_part = path_part.partition("?") - # method if not METHRE.match(method): raise BadStatusLine(method) @@ -550,6 +547,31 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage: except Exception: raise BadStatusLine(version) + if method == "CONNECT": + # authority-form, + # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3 + url = URL.build(authority=path, encoded=True) + elif path.startswith("/"): + # origin-form, + # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1 + path_part, _hash_separator, url_fragment = path.partition("#") + path_part, _question_mark_separator, qs_part = path_part.partition("?") + + # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based + # NOTE: parser does, otherwise it results into the same + # NOTE: HTTP Request-Line input producing different + # NOTE: `yarl.URL()` objects + url = URL.build( + path=path_part, + query_string=qs_part, + fragment=url_fragment, + encoded=True, + ) + else: + # absolute-form for proxy maybe, + # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2 + url = URL(path, encoded=True) + # read headers ( headers, @@ -576,16 +598,7 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage: compression, upgrade, chunked, - # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based - # NOTE: parser does, otherwise it results into the same - # NOTE: HTTP Request-Line input producing different - # NOTE: `yarl.URL()` objects - URL.build( - path=path_part, - query_string=qs_part, - fragment=url_fragment, - encoded=True, - ), + url, ) diff --git a/aiohttp/web_request.py b/aiohttp/web_request.py index 46a2aeb05b8..c5bb1e52392 100644 --- a/aiohttp/web_request.py +++ b/aiohttp/web_request.py @@ -171,14 +171,24 @@ def __init__( self._headers = message.headers self._method = message.method self._version = message.version - self._rel_url = message.url + self._cache = {} # type: Dict[str, Any] + url = message.url + if url.is_absolute(): + # absolute URL is given, + # override auto-calculating url, host, and scheme + # all other properties should be good + self._cache["url"] = url + self._cache["host"] = url.host + self._cache["scheme"] = url.scheme + self._rel_url = url.relative() + else: + self._rel_url = message.url self._post = ( None ) # type: Optional[MultiDictProxy[Union[str, bytes, FileField]]] self._read_bytes = None # type: Optional[bytes] self._state = state - self._cache = {} # type: Dict[str, Any] self._task = task self._client_max_size = client_max_size self._loop = loop diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index 24b484a6c90..50f7b27daab 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -363,6 +363,14 @@ def test_compression_unknown(parser: Any) -> None: assert msg.compression is None +def test_url_connect(parser: Any) -> None: + text = b"CONNECT www.google.com HTTP/1.1\r\n" b"content-length: 0\r\n\r\n" + messages, upgrade, tail = parser.feed_data(text) + msg, payload = messages[0] + assert upgrade + assert msg.url == URL.build(authority="www.google.com") + + def test_headers_connect(parser: Any) -> None: text = b"CONNECT www.google.com HTTP/1.1\r\n" b"content-length: 0\r\n\r\n" messages, upgrade, tail = parser.feed_data(text) @@ -371,6 +379,18 @@ def test_headers_connect(parser: Any) -> None: assert isinstance(payload, streams.StreamReader) +def test_url_absolute(parser: Any) -> None: + text = ( + b"GET https://www.google.com/path/to.html HTTP/1.1\r\n" + b"content-length: 0\r\n\r\n" + ) + messages, upgrade, tail = parser.feed_data(text) + msg, payload = messages[0] + assert not upgrade + assert msg.method == "GET" + assert msg.url == URL("https://www.google.com/path/to.html") + + def test_headers_old_websocket_key1(parser: Any) -> None: text = b"GET /test HTTP/1.1\r\n" b"SEC-WEBSOCKET-KEY1: line\r\n\r\n" diff --git a/tests/test_proxy_functional.py b/tests/test_proxy_functional.py index 9393450b185..b6c0bc07e82 100644 --- a/tests/test_proxy_functional.py +++ b/tests/test_proxy_functional.py @@ -280,17 +280,17 @@ async def test_proxy_http_absolute_path( assert len(proxy.requests_list) == 1 assert proxy.request.method == "GET" assert proxy.request.host == "aiohttp.io" - assert proxy.request.path_qs == "http://aiohttp.io/path?query=yes" + assert proxy.request.path_qs == "/path?query=yes" async def test_proxy_http_raw_path(proxy_test_server: Any, get_request: Any) -> None: url = "http://aiohttp.io:2561/space sheep?q=can:fly" - raw_url = "http://aiohttp.io:2561/space%20sheep?q=can:fly" + raw_url = "/space%20sheep?q=can:fly" proxy = await proxy_test_server() await get_request(url=url, proxy=proxy.url) - assert proxy.request.host == "aiohttp.io:2561" + assert proxy.request.host == "aiohttp.io" assert proxy.request.path_qs == raw_url @@ -298,13 +298,12 @@ async def test_proxy_http_idna_support( proxy_test_server: Any, get_request: Any ) -> None: url = "http://éé.com/" - raw_url = "http://xn--9caa.com/" proxy = await proxy_test_server() await get_request(url=url, proxy=proxy.url) - assert proxy.request.host == "xn--9caa.com" - assert proxy.request.path_qs == raw_url + assert proxy.request.host == "éé.com" + assert proxy.request.path_qs == "/" async def test_proxy_http_connection_error(get_request: Any) -> None: @@ -716,7 +715,7 @@ async def test_proxy_from_env_http( assert len(proxy.requests_list) == 1 assert proxy.request.method == "GET" assert proxy.request.host == "aiohttp.io" - assert proxy.request.path_qs == "http://aiohttp.io/path" + assert proxy.request.path_qs == "/path" assert "Proxy-Authorization" not in proxy.request.headers @@ -740,7 +739,7 @@ async def test_proxy_from_env_http_with_auth( assert len(proxy.requests_list) == 1 assert proxy.request.method == "GET" assert proxy.request.host == "aiohttp.io" - assert proxy.request.path_qs == "http://aiohttp.io/path" + assert proxy.request.path_qs == "/path" assert proxy.request.headers["Proxy-Authorization"] == auth.encode() @@ -766,7 +765,7 @@ async def test_proxy_from_env_http_with_auth_from_netrc( assert len(proxy.requests_list) == 1 assert proxy.request.method == "GET" assert proxy.request.host == "aiohttp.io" - assert proxy.request.path_qs == "http://aiohttp.io/path" + assert proxy.request.path_qs == "/path" assert proxy.request.headers["Proxy-Authorization"] == auth.encode() @@ -792,7 +791,7 @@ async def test_proxy_from_env_http_without_auth_from_netrc( assert len(proxy.requests_list) == 1 assert proxy.request.method == "GET" assert proxy.request.host == "aiohttp.io" - assert proxy.request.path_qs == "http://aiohttp.io/path" + assert proxy.request.path_qs == "/path" assert "Proxy-Authorization" not in proxy.request.headers @@ -816,7 +815,7 @@ async def test_proxy_from_env_http_without_auth_from_wrong_netrc( assert len(proxy.requests_list) == 1 assert proxy.request.method == "GET" assert proxy.request.host == "aiohttp.io" - assert proxy.request.path_qs == "http://aiohttp.io/path" + assert proxy.request.path_qs == "/path" assert "Proxy-Authorization" not in proxy.request.headers @@ -834,7 +833,7 @@ async def xtest_proxy_from_env_https( assert len(proxy.requests_list) == 2 assert proxy.request.method == "GET" assert proxy.request.host == "aiohttp.io" - assert proxy.request.path_qs == "https://aiohttp.io/path" + assert proxy.request.path_qs == "/path" assert "Proxy-Authorization" not in proxy.request.headers diff --git a/tests/test_web_request.py b/tests/test_web_request.py index 60c34319ddc..9a89bf7f30a 100644 --- a/tests/test_web_request.py +++ b/tests/test_web_request.py @@ -158,6 +158,14 @@ def test_non_ascii_raw_path() -> None: assert "/путь" == req.raw_path +def test_absolute_url() -> None: + req = make_mocked_request("GET", "https://example.com/path/to?a=1") + assert req.url == URL("https://example.com/path/to?a=1") + assert req.scheme == "https" + assert req.host == "example.com" + assert req.rel_url == URL.build(path="/path/to", query={"a": "1"}) + + def test_content_length() -> None: req = make_mocked_request("Get", "/", CIMultiDict([("CONTENT-LENGTH", "123")]))