From 131f2e2871447dc332c85b5f66844d7fc4d6ab50 Mon Sep 17 00:00:00 2001 From: keithasaurus <592217+keithasaurus@users.noreply.github.com> Date: Sun, 26 Nov 2023 13:24:55 -0800 Subject: [PATCH] Escape attributes by default (#9) * wip * wip * ruff format * wip * escape tests * docs * wip * add a few other common attributes * rename var * clarify comment --- .github/workflows/push.yml | 5 +- README.md | 40 +++++--- bench/simple.py | 34 ++----- pyproject.toml | 2 +- simple_html/__init__.py | 91 ++++++++++++++++++- tests/{test_render.py => test_simple_html.py} | 49 +++++++++- 6 files changed, 170 insertions(+), 51 deletions(-) rename tests/{test_render.py => test_simple_html.py} (70%) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 3107f0e..50d5712 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -27,7 +27,4 @@ jobs: run: poetry run pytest - name: run bench (pure python) run: poetry run python -m bench.run - - name: compile - run: poetry run mypyc simple_html - - name: run bench (compiled) - run: poetry run python -m bench.run + diff --git a/README.md b/README.md index 0903b13..f3a3c50 100644 --- a/README.md +++ b/README.md @@ -61,18 +61,6 @@ render( #
``` -Strings are escaped by default, but you can pass in `SafeString`s to avoid escaping. - -```python -from simple_html import br, p, SafeString, render - -node = p({}, - "Escaped & stuff", - br, - SafeString("Not escaped & stuff")) - -render(node) # returns:

Escaped & stuff
Not escaped & stuff

-``` Lists and generators are both valid collections of nodes: ```python @@ -115,3 +103,31 @@ node = custom_elem( render(node) # Wow ``` + + +Strings are escaped by default, but you can pass in `SafeString`s to avoid escaping. + +```python +from simple_html import br, p, SafeString, render + +node = p({}, + "Escaped & stuff", + br, + SafeString("Not escaped & stuff")) + +render(node) #

Escaped & stuff
Not escaped & stuff

+``` + +Attributes are also escaped -- both names and values. You can use `SafeString` to bypass, if needed. + +```python +from simple_html import div, render, SafeString + +escaped_attrs_node = div({"":""}) + +render(escaped_attrs_node) #
+ +unescaped_attrs_node = div({SafeString(""): SafeString("")}) + +render(unescaped_attrs_node) #
="">
+``` diff --git a/bench/simple.py b/bench/simple.py index 367cbf5..f75d8fe 100644 --- a/bench/simple.py +++ b/bench/simple.py @@ -38,10 +38,7 @@ def basic(objs: List[Tuple[str, str, List[str]]]) -> None: p({}, content, br, br), ul( {}, - [ - li({"class": "item-stuff"}, SafeString(ss)) - for ss in oks - ], + [li({"class": "item-stuff"}, SafeString(ss)) for ss in oks], ), ), ), @@ -55,9 +52,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None: "", html( {}, - head({}, - title({}, - title_)), + head({}, title({}, title_)), body( {}, h1({"class": "great header", "other_attr": "5", "id": "header1"}), @@ -66,10 +61,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None: p({}, content, br, br), ul( {}, - [ - li({"class": "item-stuff"}, SafeString(ss)) - for ss in oks - ], + [li({"class": "item-stuff"}, SafeString(ss)) for ss in oks], ), ), h1({"class": "great header", "other_attr": "5", "id": "header1"}), @@ -78,10 +70,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None: p({}, content, br, br), ul( {}, - [ - li({"class": "item-stuff"}, SafeString(ss)) - for ss in oks - ], + [li({"class": "item-stuff"}, SafeString(ss)) for ss in oks], ), ), h1({"class": "great header", "other_attr": "5", "id": "header1"}), @@ -90,10 +79,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None: p({}, content, br, br), ul( {}, - [ - li({"class": "item-stuff"}, SafeString(ss)) - for ss in oks - ], + [li({"class": "item-stuff"}, SafeString(ss)) for ss in oks], ), ), h1({"class": "great header", "other_attr": "5", "id": "header1"}), @@ -102,10 +88,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None: p({}, content, br, br), ul( {}, - [ - li({"class": "item-stuff"}, SafeString(ss)) - for ss in oks - ], + [li({"class": "item-stuff"}, SafeString(ss)) for ss in oks], ), ), h1({"class": "great header", "other_attr": "5", "id": "header1"}), @@ -114,10 +97,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None: p({}, content, br, br), ul( {}, - [ - li({"class": "item-stuff"}, SafeString(ss)) - for ss in oks - ], + [li({"class": "item-stuff"}, SafeString(ss)) for ss in oks], ), ), ), diff --git a/pyproject.toml b/pyproject.toml index bb8e3b8..a0a1f32 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "simple-html" -version = "1.0.1" +version = "1.1.0" readme = "README.md" description = "Template-less html rendering in Python" authors = ["Keith Philpott "] diff --git a/simple_html/__init__.py b/simple_html/__init__.py index a44df79..f12be76 100644 --- a/simple_html/__init__.py +++ b/simple_html/__init__.py @@ -1,14 +1,17 @@ from html import escape from types import GeneratorType -from typing import Tuple, Union, Dict, List, Generator, Optional, Iterable +from typing import Tuple, Union, Dict, List, FrozenSet, Generator, Iterable class SafeString: - __slots__ = ('safe_str',) + __slots__ = ("safe_str",) def __init__(self, safe_str: str) -> None: self.safe_str = safe_str + def __hash__(self) -> int: + return hash(f"SafeString__{self.safe_str}") + Node = Union[ str, @@ -21,6 +24,69 @@ def __init__(self, safe_str: str) -> None: TagTuple = Tuple[str, Tuple[Node, ...], str] +_common_safe_attribute_names: FrozenSet[str] = frozenset( + { + "alt", + "autoplay", + "autoplay", + "charset", + "checked", + "class", + "colspan", + "content", + "contenteditable", + "dir", + "draggable", + "enctype", + "for", + "height", + "hidden", + "href", + "hreflang", + "http-equiv", + "id", + "itemprop", + "itemscope", + "itemtype", + "lang", + "loadable", + "method", + "name", + "onblur", + "onclick", + "onfocus", + "onkeydown", + "onkeyup", + "onload", + "onselect", + "onsubmit", + "placeholder", + "poster", + "property", + "rel", + "rowspan", + "sizes", + "spellcheck", + "src", + "style", + "target", + "title", + "type", + "value", + "width", + } +) + + +def escape_attribute_key(k: str) -> str: + return ( + escape(k) + .replace("=", "=") + .replace("\\", "\") + .replace("`", "`") + .replace(" ", " ") + ) + class Tag: __slots__ = ("tag_start", "rendered", "closing_tag", "no_children_close") @@ -36,13 +102,30 @@ def __init__(self, name: str, self_closing: bool = False) -> None: self.rendered = f"{self.tag_start}{self.no_children_close}" def __call__( - self, attributes: Dict[str, Optional[str]], *children: Node + self, + attributes: Dict[Union[SafeString, str], Union[str, SafeString, None]], + *children: Node, ) -> TagTuple: if attributes: # in this case this is faster than attrs = "".join([...]) attrs = "" for key, val in attributes.items(): - attrs += f" {key}" if val is None else f' {key}="{val}"' + # optimization: a large portion of attribute keys should be + # covered by this check. It allows us to skip escaping + # where it is not needed. Note this is for attribute names only; + # attributes values are always escaped (when they are `str`s) + if key not in _common_safe_attribute_names: + key = ( + key.safe_str + if isinstance(key, SafeString) + else escape_attribute_key(key) + ) + if isinstance(val, str): + attrs += f' {key}="{escape(val)}"' + elif isinstance(val, SafeString): + attrs += f' {key}="{val.safe_str}"' + elif val is None: + attrs += f" {key}" if children: return f"{self.tag_start}{attrs}>", children, self.closing_tag diff --git a/tests/test_render.py b/tests/test_simple_html.py similarity index 70% rename from tests/test_render.py rename to tests/test_simple_html.py index 119f7d1..91349a6 100644 --- a/tests/test_render.py +++ b/tests/test_simple_html.py @@ -19,6 +19,7 @@ Node, DOCTYPE_HTML5, render, + escape_attribute_key, ) @@ -97,9 +98,7 @@ def test_simple_form() -> None: def test_safestring_in_tag() -> None: - node = script( - {"type": "ld+json"}, SafeString(json.dumps({"some_key": "some_val"})) - ) + node = script({"type": "ld+json"}, SafeString(json.dumps({"some_key": "some_val"}))) assert render(node) == ('') @@ -160,3 +159,47 @@ def test_can_render_empty() -> None: render(div({}, [], "hello ", [], span({}, "World!"), [])) == "
hello World!
" ) + + +def test_hash_for_safestring() -> None: + assert hash(SafeString("okokok")) == hash("SafeString__okokok") + + +def test_escape_key() -> None: + assert escape_attribute_key("") == "" + assert escape_attribute_key(">") == ">" + assert escape_attribute_key("<") == "<" + assert escape_attribute_key('"') == """ + assert escape_attribute_key("\\") == "\" + assert escape_attribute_key("'") == "'" + assert escape_attribute_key("=") == "=" + assert escape_attribute_key("`") == "`" + assert ( + escape_attribute_key("something with spaces") + == "something with spaces" + ) + + +def test_render_with_escaped_attributes() -> None: + assert ( + render(div({'onmousenter="alert(1)" noop': "1"})) + == '
' + ) + assert ( + render(span({"": ">"})) + == '' + ) + # vals and keys escape slightly differently + assert ( + render(div({'onmousenter="alert(1)" noop': 'onmousenter="alert(1)" noop'})) + == '
' + ) + + +def test_render_with_safestring_attributes() -> None: + bad_key = 'onmousenter="alert(1)" noop' + bad_val = "" + assert ( + render(div({SafeString(bad_key): SafeString(bad_val)})) + == f'
' + )