From a89584fd313b7bef6cf75460d27159d095f87c00 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 11 Dec 2021 14:23:42 +0200 Subject: [PATCH 1/7] Support authority-form for CONNECT method --- Makefile | 1 + aiohttp/_http_parser.pyx | 68 +++++++++++++++++++++------------------ aiohttp/http_parser.py | 31 ++++++++++-------- tests/test_http_parser.py | 8 +++++ 4 files changed, 64 insertions(+), 44 deletions(-) diff --git a/Makefile b/Makefile index 57b3601d24d..1f013cefcd4 100644 --- a/Makefile +++ b/Makefile @@ -190,6 +190,7 @@ compile-deps: .update-pip $(REQS) .PHONY: install install: .update-pip @pip install -r requirements/dev.txt -c requirements/constraints.txt + @touch .develop .PHONY: install-dev install-dev: .develop diff --git a/aiohttp/_http_parser.pyx b/aiohttp/_http_parser.pyx index b76d723fb2e..4747af5d2f3 100644 --- a/aiohttp/_http_parser.pyx +++ b/aiohttp/_http_parser.pyx @@ -425,7 +425,7 @@ cdef class HttpParser: raw_headers = tuple(self._raw_headers) headers = CIMultiDictProxy(self._headers) - if upgrade or self._cparser.method == 5: # cparser.CONNECT: + if upgrade or self._cparser.method == cparser.HTTP_CONNECT: self._upgraded = True # do not support old websocket spec @@ -453,7 +453,7 @@ cdef class HttpParser: if ( ULLONG_MAX > self._cparser.content_length > 0 or chunked or - self._cparser.method == 5 or # CONNECT: 5 + self._cparser.method == cparser.HTTP_CONNECT or (self._cparser.status_code >= 199 and self._cparser.content_length == 0 and self._read_until_eof) @@ -585,35 +585,38 @@ cdef class HttpRequestParser(HttpParser): return self._path = self._buf.decode('utf-8', 'surrogateescape') try: - idx3 = len(self._path) - idx1 = self._path.find("?") - if idx1 == -1: - query = "" - idx2 = self._path.find("#") - if idx2 == -1: - path = self._path - fragment = "" - else: - path = self._path[0: idx2] - fragment = self._path[idx2+1:] - + if self._cparser.method == cparser.HTTP_CONNECT: + self._url = URL.build(authority=self._path) else: - path = self._path[0:idx1] - idx1 += 1 - idx2 = self._path.find("#", idx1+1) - if idx2 == -1: - query = self._path[idx1:] - fragment = "" - else: - query = self._path[idx1: idx2] - fragment = self._path[idx2+1:] - - self._url = URL.build( - path=path, - query_string=query, - fragment=fragment, - encoded=True, - ) + idx3 = len(self._path) + idx1 = self._path.find("?") + if idx1 == -1: + query = "" + idx2 = self._path.find("#") + if idx2 == -1: + path = self._path + fragment = "" + else: + path = self._path[0: idx2] + fragment = self._path[idx2+1:] + + else: + path = self._path[0:idx1] + idx1 += 1 + idx2 = self._path.find("#", idx1+1) + if idx2 == -1: + query = self._path[idx1:] + fragment = "" + else: + query = self._path[idx1: idx2] + fragment = self._path[idx2+1:] + + self._url = URL.build( + path=path, + query_string=query, + fragment=fragment, + encoded=True, + ) finally: PyByteArray_Resize(self._buf, 0) @@ -726,7 +729,10 @@ cdef int cb_on_headers_complete(cparser.llhttp_t* parser) except -1: pyparser._last_error = exc return -1 else: - if pyparser._cparser.upgrade or pyparser._cparser.method == 5: # CONNECT + if ( + pyparser._cparser.upgrade or + pyparser._cparser.method == cparser.HTTP_CONNECT + ): return 2 else: return 0 diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 96ba28e67bd..a1d6bb771ae 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -533,9 +533,6 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage: "Status line is too long", str(self.max_line_size), str(len(path)) ) - path_part, _hash_separator, url_fragment = path.partition("#") - path_part, _question_mark_separator, qs_part = path_part.partition("?") - # method if not METHRE.match(method): raise BadStatusLine(method) @@ -550,6 +547,23 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage: except Exception: raise BadStatusLine(version) + if method == "CONNECT": + url = URL.build(authority=path) + else: + path_part, _hash_separator, url_fragment = path.partition("#") + path_part, _question_mark_separator, qs_part = path_part.partition("?") + + # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based + # NOTE: parser does, otherwise it results into the same + # NOTE: HTTP Request-Line input producing different + # NOTE: `yarl.URL()` objects + url = URL.build( + path=path_part, + query_string=qs_part, + fragment=url_fragment, + encoded=True, + ) + # read headers ( headers, @@ -576,16 +590,7 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage: compression, upgrade, chunked, - # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based - # NOTE: parser does, otherwise it results into the same - # NOTE: HTTP Request-Line input producing different - # NOTE: `yarl.URL()` objects - URL.build( - path=path_part, - query_string=qs_part, - fragment=url_fragment, - encoded=True, - ), + url, ) diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index 24b484a6c90..3213dd0c831 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -363,6 +363,14 @@ def test_compression_unknown(parser: Any) -> None: assert msg.compression is None +def test_url_connect(parser: Any) -> None: + text = b"CONNECT www.google.com HTTP/1.1\r\n" b"content-length: 0\r\n\r\n" + messages, upgrade, tail = parser.feed_data(text) + msg, payload = messages[0] + assert upgrade + assert msg.url == URL.build(authority="www.google.com") + + def test_headers_connect(parser: Any) -> None: text = b"CONNECT www.google.com HTTP/1.1\r\n" b"content-length: 0\r\n\r\n" messages, upgrade, tail = parser.feed_data(text) From 27f1347a9309e9108880d1ec9cd3e00321ea2a80 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 11 Dec 2021 14:49:44 +0200 Subject: [PATCH 2/7] Support absolute-form --- Makefile | 1 - aiohttp/_http_parser.pyx | 66 ++++++++++++++++++++++----------------- aiohttp/http_parser.py | 12 +++++-- tests/test_http_parser.py | 12 +++++++ 4 files changed, 59 insertions(+), 32 deletions(-) diff --git a/Makefile b/Makefile index 1f013cefcd4..57b3601d24d 100644 --- a/Makefile +++ b/Makefile @@ -190,7 +190,6 @@ compile-deps: .update-pip $(REQS) .PHONY: install install: .update-pip @pip install -r requirements/dev.txt -c requirements/constraints.txt - @touch .develop .PHONY: install-dev install-dev: .develop diff --git a/aiohttp/_http_parser.pyx b/aiohttp/_http_parser.pyx index 4747af5d2f3..bebd9894374 100644 --- a/aiohttp/_http_parser.pyx +++ b/aiohttp/_http_parser.pyx @@ -585,38 +585,46 @@ cdef class HttpRequestParser(HttpParser): return self._path = self._buf.decode('utf-8', 'surrogateescape') try: + idx3 = len(self._path) if self._cparser.method == cparser.HTTP_CONNECT: - self._url = URL.build(authority=self._path) - else: - idx3 = len(self._path) - idx1 = self._path.find("?") - if idx1 == -1: - query = "" - idx2 = self._path.find("#") - if idx2 == -1: - path = self._path - fragment = "" - else: - path = self._path[0: idx2] - fragment = self._path[idx2+1:] + # authority-form, + # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3 + self._url = URL.build(authority=self._path, encoded=True) + elif idx3 > 1 and self._path[0] == '/': + # origin-form, + # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1 + idx1 = self._path.find("?") + if idx1 == -1: + query = "" + idx2 = self._path.find("#") + if idx2 == -1: + path = self._path + fragment = "" + else: + path = self._path[0: idx2] + fragment = self._path[idx2+1:] + else: + path = self._path[0:idx1] + idx1 += 1 + idx2 = self._path.find("#", idx1+1) + if idx2 == -1: + query = self._path[idx1:] + fragment = "" else: - path = self._path[0:idx1] - idx1 += 1 - idx2 = self._path.find("#", idx1+1) - if idx2 == -1: - query = self._path[idx1:] - fragment = "" - else: - query = self._path[idx1: idx2] - fragment = self._path[idx2+1:] - - self._url = URL.build( - path=path, - query_string=query, - fragment=fragment, - encoded=True, - ) + query = self._path[idx1: idx2] + fragment = self._path[idx2+1:] + + self._url = URL.build( + path=path, + query_string=query, + fragment=fragment, + encoded=True, + ) + else: + # absolute-form for proxy maybe, + # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2 + self._url = URL(self._path, encoded=True) finally: PyByteArray_Resize(self._buf, 0) diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index a1d6bb771ae..8b702f98e6f 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -548,8 +548,12 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage: raise BadStatusLine(version) if method == "CONNECT": - url = URL.build(authority=path) - else: + # authority-form, + # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3 + url = URL.build(authority=path, encoded=True) + elif path.startswith("/"): + # origin-form, + # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1 path_part, _hash_separator, url_fragment = path.partition("#") path_part, _question_mark_separator, qs_part = path_part.partition("?") @@ -563,6 +567,10 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage: fragment=url_fragment, encoded=True, ) + else: + # absolute-form for proxy maybe, + # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2 + url = URL(path, encoded=True) # read headers ( diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index 3213dd0c831..50f7b27daab 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -379,6 +379,18 @@ def test_headers_connect(parser: Any) -> None: assert isinstance(payload, streams.StreamReader) +def test_url_absolute(parser: Any) -> None: + text = ( + b"GET https://www.google.com/path/to.html HTTP/1.1\r\n" + b"content-length: 0\r\n\r\n" + ) + messages, upgrade, tail = parser.feed_data(text) + msg, payload = messages[0] + assert not upgrade + assert msg.method == "GET" + assert msg.url == URL("https://www.google.com/path/to.html") + + def test_headers_old_websocket_key1(parser: Any) -> None: text = b"GET /test HTTP/1.1\r\n" b"SEC-WEBSOCKET-KEY1: line\r\n\r\n" From 14edd88be06952559360a6a316fbb651efb21817 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 11 Dec 2021 14:50:48 +0200 Subject: [PATCH 3/7] Add CHANGES --- CHANGES/6227.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 CHANGES/6227.bugfix diff --git a/CHANGES/6227.bugfix b/CHANGES/6227.bugfix new file mode 100644 index 00000000000..d09c1d560a7 --- /dev/null +++ b/CHANGES/6227.bugfix @@ -0,0 +1 @@ +Support authority-form and absolut-form URLs when sent to the server. From bee40254b8bd4385e85bd34d2ba3b5ad28d3fbbd Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 11 Dec 2021 15:06:17 +0200 Subject: [PATCH 4/7] Support absolute URLs in web.Request --- CHANGES/6227.bugfix | 2 +- aiohttp/web_request.py | 14 ++++++++++++-- tests/test_web_request.py | 8 ++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/CHANGES/6227.bugfix b/CHANGES/6227.bugfix index d09c1d560a7..a0a8d5340fe 100644 --- a/CHANGES/6227.bugfix +++ b/CHANGES/6227.bugfix @@ -1 +1 @@ -Support authority-form and absolut-form URLs when sent to the server. +Supported authority-form and absolut-form URLs when sent to the server. diff --git a/aiohttp/web_request.py b/aiohttp/web_request.py index 46a2aeb05b8..c5bb1e52392 100644 --- a/aiohttp/web_request.py +++ b/aiohttp/web_request.py @@ -171,14 +171,24 @@ def __init__( self._headers = message.headers self._method = message.method self._version = message.version - self._rel_url = message.url + self._cache = {} # type: Dict[str, Any] + url = message.url + if url.is_absolute(): + # absolute URL is given, + # override auto-calculating url, host, and scheme + # all other properties should be good + self._cache["url"] = url + self._cache["host"] = url.host + self._cache["scheme"] = url.scheme + self._rel_url = url.relative() + else: + self._rel_url = message.url self._post = ( None ) # type: Optional[MultiDictProxy[Union[str, bytes, FileField]]] self._read_bytes = None # type: Optional[bytes] self._state = state - self._cache = {} # type: Dict[str, Any] self._task = task self._client_max_size = client_max_size self._loop = loop diff --git a/tests/test_web_request.py b/tests/test_web_request.py index 60c34319ddc..9a89bf7f30a 100644 --- a/tests/test_web_request.py +++ b/tests/test_web_request.py @@ -158,6 +158,14 @@ def test_non_ascii_raw_path() -> None: assert "/путь" == req.raw_path +def test_absolute_url() -> None: + req = make_mocked_request("GET", "https://example.com/path/to?a=1") + assert req.url == URL("https://example.com/path/to?a=1") + assert req.scheme == "https" + assert req.host == "example.com" + assert req.rel_url == URL.build(path="/path/to", query={"a": "1"}) + + def test_content_length() -> None: req = make_mocked_request("Get", "/", CIMultiDict([("CONTENT-LENGTH", "123")])) From 5946da3771708d313434a1bbd00f8de11b4bdacf Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 11 Dec 2021 15:52:26 +0200 Subject: [PATCH 5/7] Fix linter --- CHANGES/6227.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES/6227.bugfix b/CHANGES/6227.bugfix index a0a8d5340fe..9a5bab5ebb6 100644 --- a/CHANGES/6227.bugfix +++ b/CHANGES/6227.bugfix @@ -1 +1 @@ -Supported authority-form and absolut-form URLs when sent to the server. +Supported authority-form and absolute-form URLs when sent to the server. From 004405b1a4edd5010404a366105a2589fdc45270 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 11 Dec 2021 16:28:06 +0200 Subject: [PATCH 6/7] Fix proxy tests --- tests/test_proxy_functional.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/tests/test_proxy_functional.py b/tests/test_proxy_functional.py index 9393450b185..b6c0bc07e82 100644 --- a/tests/test_proxy_functional.py +++ b/tests/test_proxy_functional.py @@ -280,17 +280,17 @@ async def test_proxy_http_absolute_path( assert len(proxy.requests_list) == 1 assert proxy.request.method == "GET" assert proxy.request.host == "aiohttp.io" - assert proxy.request.path_qs == "http://aiohttp.io/path?query=yes" + assert proxy.request.path_qs == "/path?query=yes" async def test_proxy_http_raw_path(proxy_test_server: Any, get_request: Any) -> None: url = "http://aiohttp.io:2561/space sheep?q=can:fly" - raw_url = "http://aiohttp.io:2561/space%20sheep?q=can:fly" + raw_url = "/space%20sheep?q=can:fly" proxy = await proxy_test_server() await get_request(url=url, proxy=proxy.url) - assert proxy.request.host == "aiohttp.io:2561" + assert proxy.request.host == "aiohttp.io" assert proxy.request.path_qs == raw_url @@ -298,13 +298,12 @@ async def test_proxy_http_idna_support( proxy_test_server: Any, get_request: Any ) -> None: url = "http://éé.com/" - raw_url = "http://xn--9caa.com/" proxy = await proxy_test_server() await get_request(url=url, proxy=proxy.url) - assert proxy.request.host == "xn--9caa.com" - assert proxy.request.path_qs == raw_url + assert proxy.request.host == "éé.com" + assert proxy.request.path_qs == "/" async def test_proxy_http_connection_error(get_request: Any) -> None: @@ -716,7 +715,7 @@ async def test_proxy_from_env_http( assert len(proxy.requests_list) == 1 assert proxy.request.method == "GET" assert proxy.request.host == "aiohttp.io" - assert proxy.request.path_qs == "http://aiohttp.io/path" + assert proxy.request.path_qs == "/path" assert "Proxy-Authorization" not in proxy.request.headers @@ -740,7 +739,7 @@ async def test_proxy_from_env_http_with_auth( assert len(proxy.requests_list) == 1 assert proxy.request.method == "GET" assert proxy.request.host == "aiohttp.io" - assert proxy.request.path_qs == "http://aiohttp.io/path" + assert proxy.request.path_qs == "/path" assert proxy.request.headers["Proxy-Authorization"] == auth.encode() @@ -766,7 +765,7 @@ async def test_proxy_from_env_http_with_auth_from_netrc( assert len(proxy.requests_list) == 1 assert proxy.request.method == "GET" assert proxy.request.host == "aiohttp.io" - assert proxy.request.path_qs == "http://aiohttp.io/path" + assert proxy.request.path_qs == "/path" assert proxy.request.headers["Proxy-Authorization"] == auth.encode() @@ -792,7 +791,7 @@ async def test_proxy_from_env_http_without_auth_from_netrc( assert len(proxy.requests_list) == 1 assert proxy.request.method == "GET" assert proxy.request.host == "aiohttp.io" - assert proxy.request.path_qs == "http://aiohttp.io/path" + assert proxy.request.path_qs == "/path" assert "Proxy-Authorization" not in proxy.request.headers @@ -816,7 +815,7 @@ async def test_proxy_from_env_http_without_auth_from_wrong_netrc( assert len(proxy.requests_list) == 1 assert proxy.request.method == "GET" assert proxy.request.host == "aiohttp.io" - assert proxy.request.path_qs == "http://aiohttp.io/path" + assert proxy.request.path_qs == "/path" assert "Proxy-Authorization" not in proxy.request.headers @@ -834,7 +833,7 @@ async def xtest_proxy_from_env_https( assert len(proxy.requests_list) == 2 assert proxy.request.method == "GET" assert proxy.request.host == "aiohttp.io" - assert proxy.request.path_qs == "https://aiohttp.io/path" + assert proxy.request.path_qs == "/path" assert "Proxy-Authorization" not in proxy.request.headers From f74a39b88a63f4bab04349c95ebb4bf611b80e2d Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sun, 12 Dec 2021 10:53:42 +0200 Subject: [PATCH 7/7] Update CHANGES/6227.bugfix Co-authored-by: Sviatoslav Sydorenko --- CHANGES/6227.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES/6227.bugfix b/CHANGES/6227.bugfix index 9a5bab5ebb6..df097565bcd 100644 --- a/CHANGES/6227.bugfix +++ b/CHANGES/6227.bugfix @@ -1 +1 @@ -Supported authority-form and absolute-form URLs when sent to the server. +Started supporting ``authority-form`` and ``absolute-form`` URLs on the server-side.