Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support absolute-form and authority-form URLs by web server #6409

Merged
merged 7 commits into from
Dec 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/6227.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Started supporting ``authority-form`` and ``absolute-form`` URLs on the server-side.
72 changes: 43 additions & 29 deletions aiohttp/_http_parser.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ cdef class HttpParser:
raw_headers = tuple(self._raw_headers)
headers = CIMultiDictProxy(self._headers)

if upgrade or self._cparser.method == 5: # cparser.CONNECT:
if upgrade or self._cparser.method == cparser.HTTP_CONNECT:
self._upgraded = True

# do not support old websocket spec
Expand Down Expand Up @@ -453,7 +453,7 @@ cdef class HttpParser:

if (
ULLONG_MAX > self._cparser.content_length > 0 or chunked or
self._cparser.method == 5 or # CONNECT: 5
self._cparser.method == cparser.HTTP_CONNECT or
(self._cparser.status_code >= 199 and
self._cparser.content_length == 0 and
self._read_until_eof)
Expand Down Expand Up @@ -586,34 +586,45 @@ cdef class HttpRequestParser(HttpParser):
self._path = self._buf.decode('utf-8', 'surrogateescape')
try:
idx3 = len(self._path)
idx1 = self._path.find("?")
if idx1 == -1:
query = ""
idx2 = self._path.find("#")
if idx2 == -1:
path = self._path
fragment = ""
else:
path = self._path[0: idx2]
fragment = self._path[idx2+1:]
if self._cparser.method == cparser.HTTP_CONNECT:
# authority-form,
# https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3
self._url = URL.build(authority=self._path, encoded=True)
elif idx3 > 1 and self._path[0] == '/':
# origin-form,
# https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1
idx1 = self._path.find("?")
if idx1 == -1:
query = ""
idx2 = self._path.find("#")
if idx2 == -1:
path = self._path
fragment = ""
else:
path = self._path[0: idx2]
fragment = self._path[idx2+1:]

else:
path = self._path[0:idx1]
idx1 += 1
idx2 = self._path.find("#", idx1+1)
if idx2 == -1:
query = self._path[idx1:]
fragment = ""
else:
query = self._path[idx1: idx2]
fragment = self._path[idx2+1:]

self._url = URL.build(
path=path,
query_string=query,
fragment=fragment,
encoded=True,
)
path = self._path[0:idx1]
idx1 += 1
idx2 = self._path.find("#", idx1+1)
if idx2 == -1:
query = self._path[idx1:]
fragment = ""
else:
query = self._path[idx1: idx2]
fragment = self._path[idx2+1:]

self._url = URL.build(
path=path,
query_string=query,
fragment=fragment,
encoded=True,
)
else:
# absolute-form for proxy maybe,
# https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2
self._url = URL(self._path, encoded=True)
finally:
PyByteArray_Resize(self._buf, 0)

Expand Down Expand Up @@ -726,7 +737,10 @@ cdef int cb_on_headers_complete(cparser.llhttp_t* parser) except -1:
pyparser._last_error = exc
return -1
else:
if pyparser._cparser.upgrade or pyparser._cparser.method == 5: # CONNECT
if (
pyparser._cparser.upgrade or
pyparser._cparser.method == cparser.HTTP_CONNECT
):
return 2
else:
return 0
Expand Down
39 changes: 26 additions & 13 deletions aiohttp/http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,9 +533,6 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
"Status line is too long", str(self.max_line_size), str(len(path))
)

path_part, _hash_separator, url_fragment = path.partition("#")
path_part, _question_mark_separator, qs_part = path_part.partition("?")

# method
if not METHRE.match(method):
raise BadStatusLine(method)
Expand All @@ -550,6 +547,31 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
except Exception:
raise BadStatusLine(version)

if method == "CONNECT":
# authority-form,
# https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3
url = URL.build(authority=path, encoded=True)
elif path.startswith("/"):
# origin-form,
# https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1
path_part, _hash_separator, url_fragment = path.partition("#")
path_part, _question_mark_separator, qs_part = path_part.partition("?")

# NOTE: `yarl.URL.build()` is used to mimic what the Cython-based
# NOTE: parser does, otherwise it results into the same
# NOTE: HTTP Request-Line input producing different
# NOTE: `yarl.URL()` objects
url = URL.build(
path=path_part,
query_string=qs_part,
fragment=url_fragment,
encoded=True,
)
else:
# absolute-form for proxy maybe,
# https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2
url = URL(path, encoded=True)

# read headers
(
headers,
Expand All @@ -576,16 +598,7 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
compression,
upgrade,
chunked,
# NOTE: `yarl.URL.build()` is used to mimic what the Cython-based
# NOTE: parser does, otherwise it results into the same
# NOTE: HTTP Request-Line input producing different
# NOTE: `yarl.URL()` objects
URL.build(
path=path_part,
query_string=qs_part,
fragment=url_fragment,
encoded=True,
),
url,
)


Expand Down
14 changes: 12 additions & 2 deletions aiohttp/web_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,14 +171,24 @@ def __init__(
self._headers = message.headers
self._method = message.method
self._version = message.version
self._rel_url = message.url
self._cache = {} # type: Dict[str, Any]
url = message.url
if url.is_absolute():
# absolute URL is given,
# override auto-calculating url, host, and scheme
# all other properties should be good
self._cache["url"] = url
self._cache["host"] = url.host
self._cache["scheme"] = url.scheme
self._rel_url = url.relative()
else:
self._rel_url = message.url
self._post = (
None
) # type: Optional[MultiDictProxy[Union[str, bytes, FileField]]]
self._read_bytes = None # type: Optional[bytes]

self._state = state
self._cache = {} # type: Dict[str, Any]
self._task = task
self._client_max_size = client_max_size
self._loop = loop
Expand Down
20 changes: 20 additions & 0 deletions tests/test_http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,14 @@ def test_compression_unknown(parser: Any) -> None:
assert msg.compression is None


def test_url_connect(parser: Any) -> None:
text = b"CONNECT www.google.com HTTP/1.1\r\n" b"content-length: 0\r\n\r\n"
messages, upgrade, tail = parser.feed_data(text)
msg, payload = messages[0]
assert upgrade
assert msg.url == URL.build(authority="www.google.com")


def test_headers_connect(parser: Any) -> None:
text = b"CONNECT www.google.com HTTP/1.1\r\n" b"content-length: 0\r\n\r\n"
messages, upgrade, tail = parser.feed_data(text)
Expand All @@ -371,6 +379,18 @@ def test_headers_connect(parser: Any) -> None:
assert isinstance(payload, streams.StreamReader)


def test_url_absolute(parser: Any) -> None:
text = (
b"GET https://www.google.com/path/to.html HTTP/1.1\r\n"
b"content-length: 0\r\n\r\n"
)
messages, upgrade, tail = parser.feed_data(text)
msg, payload = messages[0]
assert not upgrade
assert msg.method == "GET"
assert msg.url == URL("https://www.google.com/path/to.html")


def test_headers_old_websocket_key1(parser: Any) -> None:
text = b"GET /test HTTP/1.1\r\n" b"SEC-WEBSOCKET-KEY1: line\r\n\r\n"

Expand Down
23 changes: 11 additions & 12 deletions tests/test_proxy_functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,31 +280,30 @@ async def test_proxy_http_absolute_path(
assert len(proxy.requests_list) == 1
assert proxy.request.method == "GET"
assert proxy.request.host == "aiohttp.io"
assert proxy.request.path_qs == "http://aiohttp.io/path?query=yes"
assert proxy.request.path_qs == "/path?query=yes"


async def test_proxy_http_raw_path(proxy_test_server: Any, get_request: Any) -> None:
url = "http://aiohttp.io:2561/space sheep?q=can:fly"
raw_url = "http://aiohttp.io:2561/space%20sheep?q=can:fly"
raw_url = "/space%20sheep?q=can:fly"
proxy = await proxy_test_server()

await get_request(url=url, proxy=proxy.url)

assert proxy.request.host == "aiohttp.io:2561"
assert proxy.request.host == "aiohttp.io"
assert proxy.request.path_qs == raw_url


async def test_proxy_http_idna_support(
proxy_test_server: Any, get_request: Any
) -> None:
url = "http://éé.com/"
raw_url = "http://xn--9caa.com/"
proxy = await proxy_test_server()

await get_request(url=url, proxy=proxy.url)

assert proxy.request.host == "xn--9caa.com"
assert proxy.request.path_qs == raw_url
assert proxy.request.host == "éé.com"
assert proxy.request.path_qs == "/"


async def test_proxy_http_connection_error(get_request: Any) -> None:
Expand Down Expand Up @@ -716,7 +715,7 @@ async def test_proxy_from_env_http(
assert len(proxy.requests_list) == 1
assert proxy.request.method == "GET"
assert proxy.request.host == "aiohttp.io"
assert proxy.request.path_qs == "http://aiohttp.io/path"
assert proxy.request.path_qs == "/path"
assert "Proxy-Authorization" not in proxy.request.headers


Expand All @@ -740,7 +739,7 @@ async def test_proxy_from_env_http_with_auth(
assert len(proxy.requests_list) == 1
assert proxy.request.method == "GET"
assert proxy.request.host == "aiohttp.io"
assert proxy.request.path_qs == "http://aiohttp.io/path"
assert proxy.request.path_qs == "/path"
assert proxy.request.headers["Proxy-Authorization"] == auth.encode()


Expand All @@ -766,7 +765,7 @@ async def test_proxy_from_env_http_with_auth_from_netrc(
assert len(proxy.requests_list) == 1
assert proxy.request.method == "GET"
assert proxy.request.host == "aiohttp.io"
assert proxy.request.path_qs == "http://aiohttp.io/path"
assert proxy.request.path_qs == "/path"
assert proxy.request.headers["Proxy-Authorization"] == auth.encode()


Expand All @@ -792,7 +791,7 @@ async def test_proxy_from_env_http_without_auth_from_netrc(
assert len(proxy.requests_list) == 1
assert proxy.request.method == "GET"
assert proxy.request.host == "aiohttp.io"
assert proxy.request.path_qs == "http://aiohttp.io/path"
assert proxy.request.path_qs == "/path"
assert "Proxy-Authorization" not in proxy.request.headers


Expand All @@ -816,7 +815,7 @@ async def test_proxy_from_env_http_without_auth_from_wrong_netrc(
assert len(proxy.requests_list) == 1
assert proxy.request.method == "GET"
assert proxy.request.host == "aiohttp.io"
assert proxy.request.path_qs == "http://aiohttp.io/path"
assert proxy.request.path_qs == "/path"
assert "Proxy-Authorization" not in proxy.request.headers


Expand All @@ -834,7 +833,7 @@ async def xtest_proxy_from_env_https(
assert len(proxy.requests_list) == 2
assert proxy.request.method == "GET"
assert proxy.request.host == "aiohttp.io"
assert proxy.request.path_qs == "https://aiohttp.io/path"
assert proxy.request.path_qs == "/path"
assert "Proxy-Authorization" not in proxy.request.headers


Expand Down
8 changes: 8 additions & 0 deletions tests/test_web_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,14 @@ def test_non_ascii_raw_path() -> None:
assert "/путь" == req.raw_path


def test_absolute_url() -> None:
req = make_mocked_request("GET", "https://example.com/path/to?a=1")
assert req.url == URL("https://example.com/path/to?a=1")
assert req.scheme == "https"
assert req.host == "example.com"
assert req.rel_url == URL.build(path="/path/to", query={"a": "1"})


def test_content_length() -> None:
req = make_mocked_request("Get", "/", CIMultiDict([("CONTENT-LENGTH", "123")]))

Expand Down