Skip to content

Commit

Permalink
sanitizer: use urlparse from vendored CPython 3.6.14 urllib.parse
Browse files Browse the repository at this point in the history
Update test_uri_value_allowed_protocols testcases:

  * convert test_invalid_uri_does_not_raise_error into a test case
  * add test case for data: scheme
  * add test case for implicit http for IP and port with path and fragment
  * add test case for relative path URI
  * test "is not allowed by default" test cases against default
    ALLOWED_PROTOCOLS
  * change anchor-only test that doesn't include a domain and add a
    comment to the domain one refs:
    https://github.com/mozilla/bleach/pull/565/files#r568229243
  • Loading branch information
Greg Guthe committed Aug 25, 2021
1 parent 9023f7f commit 1033d4d
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 8 deletions.
2 changes: 1 addition & 1 deletion bleach/sanitizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import re
import warnings

from urllib.parse import urlparse
from bleach._vendor.parse import urlparse
from xml.sax.saxutils import unescape

from bleach import html5lib_shim
Expand Down
50 changes: 43 additions & 7 deletions tests/test_clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from bleach import clean
from bleach.html5lib_shim import Filter
from bleach.sanitizer import Cleaner
from bleach.sanitizer import ALLOWED_PROTOCOLS, Cleaner
from bleach._vendor.html5lib.constants import rcdataElements


Expand Down Expand Up @@ -56,10 +56,6 @@ def test_html_is_lowercased():
)


def test_invalid_uri_does_not_raise_error():
assert clean('<a href="http://example.com]">text</a>') == "<a>text</a>"


@pytest.mark.parametrize(
"data, should_strip, expected",
[
Expand Down Expand Up @@ -469,10 +465,31 @@ def test_attributes_list():
@pytest.mark.parametrize(
"data, kwargs, expected",
[
# invalid URI (urlparse raises a ValueError: Invalid IPv6 URL)
# is not allowed by default
(
'<a href="http://example.com]">text</a>',
{"protocols": ALLOWED_PROTOCOLS},
"<a>text</a>",
),
# data protocol is not allowed by default
(
'<a href="data:text/javascript,prompt(1)">foo</a>',
{"protocols": ALLOWED_PROTOCOLS},
"<a>foo</a>",
),
# javascript: is not allowed by default
("<a href=\"javascript:alert('XSS')\">xss</a>", {}, "<a>xss</a>"),
(
"<a href=\"javascript:alert('XSS')\">xss</a>",
{"protocols": ALLOWED_PROTOCOLS},
"<a>xss</a>",
),
# File protocol is not allowed by default
('<a href="file:///tmp/foo">foo</a>', {}, "<a>foo</a>"),
(
'<a href="file:///tmp/foo">foo</a>',
{"protocols": ALLOWED_PROTOCOLS},
"<a>foo</a>",
),
# Specified protocols are allowed
(
'<a href="myprotocol://more_text">allowed href</a>',
Expand All @@ -486,12 +503,23 @@ def test_attributes_list():
"<a>invalid href</a>",
),
# Anchors are ok
(
'<a href="#section-1">foo</a>',
{"protocols": []},
'<a href="#section-1">foo</a>',
),
# Anchor that looks like a domain is ok
(
'<a href="#example.com">foo</a>',
{"protocols": []},
'<a href="#example.com">foo</a>',
),
# Allow implicit http if allowed
(
'<a href="/path">valid</a>',
{"protocols": ["http"]},
'<a href="/path">valid</a>',
),
(
'<a href="example.com">valid</a>',
{"protocols": ["http"]},
Expand Down Expand Up @@ -522,6 +550,14 @@ def test_attributes_list():
{"protocols": ["http"]},
'<a href="192.168.100.100:8000">valid</a>',
),
pytest.param(
*(
'<a href="192.168.100.100:8000/foo#bar">valid</a>',
{"protocols": ["http"]},
'<a href="192.168.100.100:8000/foo#bar">valid</a>',
),
marks=pytest.mark.xfail,
),
# Disallow implicit http if disallowed
('<a href="example.com">foo</a>', {"protocols": []}, "<a>foo</a>"),
('<a href="example.com:8000">foo</a>', {"protocols": []}, "<a>foo</a>"),
Expand Down

0 comments on commit 1033d4d

Please sign in to comment.