Skip to content

Commit

Permalink
Add support for host_port_subcomponent (#1375)
Browse files Browse the repository at this point in the history
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
bdraco and pre-commit-ci[bot] authored Oct 22, 2024
1 parent 55305c7 commit cc1ff18
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGES/1375.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added :attr:`~yarl.URL.host_port_subcomponent` which returns the :rfc:`3986#section-3.2.2` host and :rfc:`3986#section-3.2.3` port subcomponent -- by :user:`bdraco`.
23 changes: 23 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,29 @@ There are two kinds of properties: *decoded* and *encoded* (with

.. versionadded:: 1.13

.. attribute:: URL.host_port_subcomponent

:rfc:`3986#section-3.2.2` host and :rfc:`3986#section-3.2.3` port subcomponent part of URL, ``None`` for relative URLs
(:ref:`yarl-api-relative-urls`).

Trailing dots are stripped from the host to ensure
this value can be used for an HTTP Host header.

The port is omitted if it is the default port for the scheme.

.. doctest::

>>> URL('http://хост.домен:81').host_port_subcomponent
'xn--n1agdj.xn--d1acufc:81'
>>> URL('https://[::1]:8443').host_port_subcomponent
'[::1]:8443'
>>> URL('http://example.com./').host_port_subcomponent
'example.com'
>>> URL('http://[::1]').host_port_subcomponent
'[::1]'

.. versionadded:: 1.17

.. attribute:: URL.port

*port* part of URL, with scheme-based fallback.
Expand Down
19 changes: 19 additions & 0 deletions tests/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,25 @@ def test_host_subcomponent(host: str):
assert url.host_subcomponent == host


@pytest.mark.parametrize(
("input", "result"),
[
("/", None),
("http://example.com", "example.com"),
("http://[::1]", "[::1]"),
("http://xn--gnter-4ya.com", "xn--gnter-4ya.com"),
("http://example.com.", "example.com"),
("https://example.com.", "example.com"),
("http://example.com:80", "example.com"),
("http://example.com:8080", "example.com:8080"),
("http://[::1]:8080", "[::1]:8080"),
],
)
def test_host_port_subcomponent(input: str, result: str):
url = URL(input)
assert url.host_port_subcomponent == result


def test_host_subcomponent_return_idna_encoded_host():
url = URL("http://оун-упа.укр")
assert url.host_subcomponent == "xn----8sb1bdhvc.xn--j1amh"
Expand Down
13 changes: 13 additions & 0 deletions tests/test_url_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,3 +592,16 @@ def test_url_subtract(benchmark: BenchmarkFixture) -> None:
def _run() -> None:
for _ in range(100):
URL_WITH_LONGER_PATH - URL_WITH_PATH


def test_url_host_port_subcomponent(benchmark: BenchmarkFixture) -> None:
cache_non_default = URL_WITH_NOT_DEFAULT_PORT._cache
cache = BASE_URL._cache

@benchmark
def _run() -> None:
for _ in range(100):
cache.pop("host_port_subcomponent", None)
cache_non_default.pop("host_port_subcomponent", None)
URL_WITH_NOT_DEFAULT_PORT.host_port_subcomponent
BASE_URL.host_port_subcomponent
39 changes: 39 additions & 0 deletions yarl/_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ class _InternalURLCache(TypedDict, total=False):
raw_host: Union[str, None]
host: Union[str, None]
host_subcomponent: Union[str, None]
host_port_subcomponent: Union[str, None]
port: Union[int, None]
explicit_port: Union[int, None]
raw_path: str
Expand Down Expand Up @@ -733,6 +734,44 @@ def host_subcomponent(self) -> Union[str, None]:
return None
return f"[{raw}]" if ":" in raw else raw

@cached_property
def host_port_subcomponent(self) -> Union[str, None]:
"""Return the host and port subcomponent part of URL.
Trailing dots are removed from the host part.
This value is suitable for use in the Host header of an HTTP request.
None for relative URLs.
https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2
`IP-literal = "[" ( IPv6address / IPvFuture ) "]"`
https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.3
port = *DIGIT
Examples:
- `http://example.com:8080` -> `example.com:8080`
- `http://example.com:80` -> `example.com`
- `http://example.com.:80` -> `example.com`
- `https://127.0.0.1:8443` -> `127.0.0.1:8443`
- `https://[::1]:8443` -> `[::1]:8443`
- `http://[::1]` -> `[::1]`
"""
if (raw := self.raw_host) is None:
return None
port = self.explicit_port
if raw[-1] == ".":
# Remove all trailing dots from the netloc as while
# they are valid FQDNs in DNS, TLS validation fails.
# See https://github.com/aio-libs/aiohttp/issues/3636.
# To avoid string manipulation we only call rstrip if
# the last character is a dot.
raw = raw.rstrip(".")
if port is None or port == self._default_port:
return f"[{raw}]" if ":" in raw else raw
return f"[{raw}]:{port}" if ":" in raw else f"{raw}:{port}"

@cached_property
def port(self) -> Union[int, None]:
"""Port part of URL, with scheme-based fallback.
Expand Down

0 comments on commit cc1ff18

Please sign in to comment.