Skip to content

Commit

Permalink
fix bug 1623633
Browse files Browse the repository at this point in the history
expand and comment on sanitize_css gauntlet regex per
https://github.com/mozilla/bleach/pull/61/files#r677453
  • Loading branch information
Greg Guthe authored and g-k committed Mar 26, 2020
1 parent fc77027 commit d6018f2
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 8 deletions.
10 changes: 8 additions & 2 deletions bleach/sanitizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,8 +593,14 @@ def sanitize_css(self, style):
# the whole thing.
parts = style.split(';')
gauntlet = re.compile(
r"""^([-/:,#%.'"\s!\w]|\w-\w|'[\s\w]+'\s*|"[\s\w]+"|\([\d,%\.\s]+\))*$""",
flags=re.U
r"""^( # consider a style attribute value as composed of:
[/:,#%!.\s\w] # a non-newline character
|\w-\w # 3 characters in the form \w-\w
|'[\s\w]+'\s* # a single quoted string of [\s\w]+ with trailing space
|"[\s\w]+" # a double quoted string of [\s\w]+
|\([\d,%\.\s]+\) # a parenthesized string of one or more digits, commas, periods, percent signs, or whitespace e.g. from 'color: hsl(30,100%,50%)''
)*$""",
flags=re.U | re.VERBOSE
)

for part in parts:
Expand Down
33 changes: 27 additions & 6 deletions tests/test_css.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import unicode_literals

from functools import partial
from timeit import timeit

import pytest

Expand Down Expand Up @@ -37,10 +38,12 @@
'<p style="color: red;">bar</p>'
),
# Handle leading - in attributes
(
# regressed with the fix for bug 1623633
pytest.param(
'<p style="cursor: -moz-grab;">bar</p>',
['cursor'],
'<p style="cursor: -moz-grab;">bar</p>'
'<p style="cursor: -moz-grab;">bar</p>',
marks=pytest.mark.xfail,
),
# Handle () in attributes
(
Expand All @@ -54,16 +57,20 @@
'<p style="color: rgba(255,0,0,0.4);">bar</p>',
),
# Handle ' in attributes
(
# regressed with the fix for bug 1623633
pytest.param(
'<p style="text-overflow: \',\' ellipsis;">bar</p>',
['text-overflow'],
'<p style="text-overflow: \',\' ellipsis;">bar</p>'
'<p style="text-overflow: \',\' ellipsis;">bar</p>',
marks=pytest.mark.xfail,
),
# Handle " in attributes
(
# regressed with the fix for bug 1623633
pytest.param(
'<p style=\'text-overflow: "," ellipsis;\'>bar</p>',
['text-overflow'],
'<p style=\'text-overflow: "," ellipsis;\'>bar</p>'
'<p style=\'text-overflow: "," ellipsis;\'>bar</p>',
marks=pytest.mark.xfail,
),
(
'<p style=\'font-family: "Arial";\'>bar</p>',
Expand Down Expand Up @@ -223,3 +230,17 @@ def test_style_hang():
def test_css_parsing_with_entities(data, styles, expected):
"""The sanitizer should be ok with character entities"""
assert clean(data, tags=['p'], attributes={'p': ['style']}, styles=styles) == expected


@pytest.mark.parametrize('overlap_test_char', ["\"", "'", "-"])
def test_css_parsing_gauntlet_regex_backtracking(overlap_test_char):
"""The sanitizer gauntlet regex should not catastrophically backtrack"""
# refs: https://bugzilla.mozilla.org/show_bug.cgi?id=1623633

def time_clean(test_char, size):
style_attr_value = (test_char + 'a' + test_char) * size + '^'
stmt = """clean('''<a style='%s'></a>''', attributes={'a': ['style']})""" % style_attr_value
return timeit(stmt=stmt, setup='from bleach import clean', number=1)

# should complete in less than one second
assert time_clean(overlap_test_char, 22) < 1.0

0 comments on commit d6018f2

Please sign in to comment.