From 4bff37ecc60120e2f4dcf093b96898c516fdaba8 Mon Sep 17 00:00:00 2001 From: Les Orchard Date: Wed, 11 Apr 2012 13:10:53 -0400 Subject: [PATCH 1/2] Fix a hang in the style attribute validation gauntlet Turns out that the addition of a hyphen for vendor-prefixes causes a long hang in the style validator. Not sure how long, but it seems to be influenced by the length of the inline styles. This appears to fix it and passes tests. --- bleach/sanitizer.py | 13 +++++++++---- bleach/tests/test_css.py | 23 ++++++++++++++++++++++- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/bleach/sanitizer.py b/bleach/sanitizer.py index 16e3ef02..0fad2bbd 100644 --- a/bleach/sanitizer.py +++ b/bleach/sanitizer.py @@ -103,10 +103,15 @@ def sanitize_css(self, style): style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) # gauntlet - if not re.match("""^([-:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+""" - """'|"[\s\w]+"|\([\d,\s]+\))*$""", - style): - return '' + # TODO: Make sure this does what it's meant to - I *think* it wants to + # validate style attribute contents. + parts = style.split(';') + gauntlet = re.compile("""^([-:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+""" + """'|"[\s\w]+"|\([\d,\s]+\))*$""") + for part in parts: + if not gauntlet.match(part): + return '' + if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): return '' diff --git a/bleach/tests/test_css.py b/bleach/tests/test_css.py index 23c26760..415bf120 100644 --- a/bleach/tests/test_css.py +++ b/bleach/tests/test_css.py @@ -1,6 +1,6 @@ from functools import partial -from nose.tools import eq_ +from nose.tools import eq_, ok_ from bleach import clean @@ -38,3 +38,24 @@ def test_valid_css(): clean('

foo

', styles=styles)) eq_('

foo

', clean('

foo

', styles=styles)) + + +def test_style_hang(): + """The sanitizer should not hang on any inline styles""" + # TODO: Neaten this up. It's copypasta from MDN/Kuma to repro the bug + style = """margin-top: 0px; margin-right: 0px; margin-bottom: 1.286em; margin-left: 0px; padding-top: 15px; padding-right: 15px; padding-bottom: 15px; padding-left: 15px; border-top-width: 1px; border-right-width: 1px; border-bottom-width: 1px; border-left-width: 1px; border-top-style: dotted; border-right-style: dotted; border-bottom-style: dotted; border-left-style: dotted; border-top-color: rgb(203, 200, 185); border-right-color: rgb(203, 200, 185); border-bottom-color: rgb(203, 200, 185); border-left-color: rgb(203, 200, 185); background-image: initial; background-attachment: initial; background-origin: initial; background-clip: initial; background-color: rgb(246, 246, 242); overflow-x: auto; overflow-y: auto; font: normal normal normal 100%/normal 'Courier New', 'Andale Mono', monospace; background-position: initial initial; background-repeat: initial initial;""" + html = '

Hello world

' % style + styles = [ + 'border', 'float', 'overflow', 'min-height', 'vertical-align', + 'white-space', + 'margin', 'margin-left', 'margin-top', 'margin-bottom', 'margin-right', + 'padding', 'padding-left', 'padding-top', 'padding-bottom', 'padding-right', + 'background', + 'background-color', + 'font', 'font-size', 'font-weight', 'text-align', 'text-transform', + ] + expected_style = """margin-top: 0px; margin-right: 0px; margin-bottom: 1.286em; margin-left: 0px; padding-top: 15px; padding-right: 15px; padding-bottom: 15px; padding-left: 15px; background-color: rgb(246, 246, 242); font: normal normal normal 100%/normal 'Courier New', 'Andale Mono', monospace;""" + + result = clean(html, styles=styles) + + ok_(True) From 52896d40c5a9a1ea5d7ea9794f9a7c12bf8bf550 Mon Sep 17 00:00:00 2001 From: Les Orchard Date: Tue, 29 May 2012 17:50:46 -0400 Subject: [PATCH 2/2] Accept "/" in inline styles, fix hang test expected result --- bleach/sanitizer.py | 2 +- bleach/tests/test_css.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bleach/sanitizer.py b/bleach/sanitizer.py index 0fad2bbd..3e7afd0c 100644 --- a/bleach/sanitizer.py +++ b/bleach/sanitizer.py @@ -106,7 +106,7 @@ def sanitize_css(self, style): # TODO: Make sure this does what it's meant to - I *think* it wants to # validate style attribute contents. parts = style.split(';') - gauntlet = re.compile("""^([-:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+""" + gauntlet = re.compile("""^([-/:,#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+""" """'|"[\s\w]+"|\([\d,\s]+\))*$""") for part in parts: if not gauntlet.match(part): diff --git a/bleach/tests/test_css.py b/bleach/tests/test_css.py index 415bf120..72ccc882 100644 --- a/bleach/tests/test_css.py +++ b/bleach/tests/test_css.py @@ -54,8 +54,8 @@ def test_style_hang(): 'background-color', 'font', 'font-size', 'font-weight', 'text-align', 'text-transform', ] - expected_style = """margin-top: 0px; margin-right: 0px; margin-bottom: 1.286em; margin-left: 0px; padding-top: 15px; padding-right: 15px; padding-bottom: 15px; padding-left: 15px; background-color: rgb(246, 246, 242); font: normal normal normal 100%/normal 'Courier New', 'Andale Mono', monospace;""" - result = clean(html, styles=styles) + expected = """

Hello world

""" - ok_(True) + result = clean(html, styles=styles) + eq_(expected, result)