Skip to content

Commit

Permalink
Merge pull request #2041 from edugonza/main
Browse files Browse the repository at this point in the history
fixes #2040 Wrapped urlsplit call in a try-except block
  • Loading branch information
liZe authored Jan 16, 2024
2 parents a4a6c7d + 178b507 commit bf58cb3
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 4 deletions.
32 changes: 32 additions & 0 deletions tests/test_url.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Test URLs."""

import re

import pytest

from .testing_utils import FakeHTML, capture_logs, resource_path


@pytest.mark.parametrize('url, base_url', (
('https://weasyprint.org]', resource_path('<inline HTML>')),
('https://weasyprint.org]', 'https://weasyprint.org]'),
('https://weasyprint.org/', 'https://weasyprint.org]'),
))
def test_malformed_url_link(url, base_url):
"""Test malformed URLs."""
with capture_logs() as logs:
pdf = FakeHTML(
string=f'<p><a href="{url}">My Link</a></p>',
base_url=base_url).write_pdf()

assert len(logs) == 1
assert "Malformed" in logs[0]
assert "]" in logs[0]

uris = re.findall(b'/URI \\((.*)\\)', pdf)
types = re.findall(b'/S (/\\w*)', pdf)
subtypes = re.findall(b'/Subtype (/\\w*)', pdf)

assert uris.pop(0) == url.encode()
assert subtypes.pop(0) == b'/Link'
assert types.pop(0) == b'/URI'
17 changes: 13 additions & 4 deletions weasyprint/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,10 +151,19 @@ def get_link_attribute(element, attr_name, base_url):
uri = get_url_attribute(element, attr_name, base_url, allow_relative=True)
if uri:
if base_url:
parsed = urlsplit(uri)
# Compare with fragments removed
if parsed.fragment and parsed[:-1] == urlsplit(base_url)[:-1]:
return ('url', ('internal', unquote(parsed.fragment)))
try:
parsed = urlsplit(uri)
except ValueError:
LOGGER.warning('Malformed URL: %s', uri)
else:
try:
parsed_base = urlsplit(base_url)
except ValueError:
LOGGER.warning('Malformed base URL: %s', base_url)
else:
# Compare with fragments removed
if parsed.fragment and parsed[:-1] == parsed_base[:-1]:
return ('url', ('internal', unquote(parsed.fragment)))
return ('url', ('external', uri))


Expand Down

0 comments on commit bf58cb3

Please sign in to comment.