diff --git a/CHANGES/5498.bugfix b/CHANGES/5498.bugfix new file mode 100644 index 00000000000..c11630e8743 --- /dev/null +++ b/CHANGES/5498.bugfix @@ -0,0 +1,6 @@ +Fix interpretation difference of the pure-Python and the Cython-based +HTTP parsers construct a ``yarl.URL`` object for HTTP request-target. + +Before this fix, the Python parser would turn the URI's absolute-path +for ``//some-path`` into ``/`` while the Cython code preserved it as +``//some-path``. Now, both do the latter. diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 90bd05a25c3..71ba815ae67 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -498,6 +498,9 @@ def parse_message(self, lines: List[bytes]) -> Any: "Status line is too long", str(self.max_line_size), str(len(path)) ) + path_part, _hash_separator, url_fragment = path.partition("#") + path_part, _question_mark_separator, qs_part = path_part.partition("?") + # method if not METHRE.match(method): raise BadStatusLine(method) @@ -538,7 +541,16 @@ def parse_message(self, lines: List[bytes]) -> Any: compression, upgrade, chunked, - URL(path), + # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based + # NOTE: parser does, otherwise it results into the same + # NOTE: HTTP Request-Line input producing different + # NOTE: `yarl.URL()` objects + URL.build( + path=path_part, + query_string=qs_part, + fragment=url_fragment, + encoded=True, + ), ) diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index 38b83ff4863..87e98eaad37 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -528,6 +528,7 @@ def test_http_request_parser_two_slashes(parser) -> None: assert msg.method == "GET" assert msg.path == "//path" + assert msg.url.path == "//path" assert msg.version == (1, 1) assert not msg.should_close assert msg.compression is None