From 131f2e2871447dc332c85b5f66844d7fc4d6ab50 Mon Sep 17 00:00:00 2001
From: keithasaurus <592217+keithasaurus@users.noreply.github.com>
Date: Sun, 26 Nov 2023 13:24:55 -0800
Subject: [PATCH] Escape attributes by default (#9)
* wip
* wip
* ruff format
* wip
* escape tests
* docs
* wip
* add a few other common attributes
* rename var
* clarify comment
---
.github/workflows/push.yml | 5 +-
README.md | 40 +++++---
bench/simple.py | 34 ++-----
pyproject.toml | 2 +-
simple_html/__init__.py | 91 ++++++++++++++++++-
tests/{test_render.py => test_simple_html.py} | 49 +++++++++-
6 files changed, 170 insertions(+), 51 deletions(-)
rename tests/{test_render.py => test_simple_html.py} (70%)
diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml
index 3107f0e..50d5712 100644
--- a/.github/workflows/push.yml
+++ b/.github/workflows/push.yml
@@ -27,7 +27,4 @@ jobs:
run: poetry run pytest
- name: run bench (pure python)
run: poetry run python -m bench.run
- - name: compile
- run: poetry run mypyc simple_html
- - name: run bench (compiled)
- run: poetry run python -m bench.run
+
diff --git a/README.md b/README.md
index 0903b13..f3a3c50 100644
--- a/README.md
+++ b/README.md
@@ -61,18 +61,6 @@ render(
#
```
-Strings are escaped by default, but you can pass in `SafeString`s to avoid escaping.
-
-```python
-from simple_html import br, p, SafeString, render
-
-node = p({},
- "Escaped & stuff",
- br,
- SafeString("Not escaped & stuff"))
-
-render(node) # returns: Escaped & stuff
Not escaped & stuff
-```
Lists and generators are both valid collections of nodes:
```python
@@ -115,3 +103,31 @@ node = custom_elem(
render(node) # Wow
```
+
+
+Strings are escaped by default, but you can pass in `SafeString`s to avoid escaping.
+
+```python
+from simple_html import br, p, SafeString, render
+
+node = p({},
+ "Escaped & stuff",
+ br,
+ SafeString("Not escaped & stuff"))
+
+render(node) # Escaped & stuff
Not escaped & stuff
+```
+
+Attributes are also escaped -- both names and values. You can use `SafeString` to bypass, if needed.
+
+```python
+from simple_html import div, render, SafeString
+
+escaped_attrs_node = div({"":""})
+
+render(escaped_attrs_node) #
+
+unescaped_attrs_node = div({SafeString(""): SafeString("")})
+
+render(unescaped_attrs_node) # ="">
+```
diff --git a/bench/simple.py b/bench/simple.py
index 367cbf5..f75d8fe 100644
--- a/bench/simple.py
+++ b/bench/simple.py
@@ -38,10 +38,7 @@ def basic(objs: List[Tuple[str, str, List[str]]]) -> None:
p({}, content, br, br),
ul(
{},
- [
- li({"class": "item-stuff"}, SafeString(ss))
- for ss in oks
- ],
+ [li({"class": "item-stuff"}, SafeString(ss)) for ss in oks],
),
),
),
@@ -55,9 +52,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None:
"",
html(
{},
- head({},
- title({},
- title_)),
+ head({}, title({}, title_)),
body(
{},
h1({"class": "great header", "other_attr": "5", "id": "header1"}),
@@ -66,10 +61,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None:
p({}, content, br, br),
ul(
{},
- [
- li({"class": "item-stuff"}, SafeString(ss))
- for ss in oks
- ],
+ [li({"class": "item-stuff"}, SafeString(ss)) for ss in oks],
),
),
h1({"class": "great header", "other_attr": "5", "id": "header1"}),
@@ -78,10 +70,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None:
p({}, content, br, br),
ul(
{},
- [
- li({"class": "item-stuff"}, SafeString(ss))
- for ss in oks
- ],
+ [li({"class": "item-stuff"}, SafeString(ss)) for ss in oks],
),
),
h1({"class": "great header", "other_attr": "5", "id": "header1"}),
@@ -90,10 +79,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None:
p({}, content, br, br),
ul(
{},
- [
- li({"class": "item-stuff"}, SafeString(ss))
- for ss in oks
- ],
+ [li({"class": "item-stuff"}, SafeString(ss)) for ss in oks],
),
),
h1({"class": "great header", "other_attr": "5", "id": "header1"}),
@@ -102,10 +88,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None:
p({}, content, br, br),
ul(
{},
- [
- li({"class": "item-stuff"}, SafeString(ss))
- for ss in oks
- ],
+ [li({"class": "item-stuff"}, SafeString(ss)) for ss in oks],
),
),
h1({"class": "great header", "other_attr": "5", "id": "header1"}),
@@ -114,10 +97,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None:
p({}, content, br, br),
ul(
{},
- [
- li({"class": "item-stuff"}, SafeString(ss))
- for ss in oks
- ],
+ [li({"class": "item-stuff"}, SafeString(ss)) for ss in oks],
),
),
),
diff --git a/pyproject.toml b/pyproject.toml
index bb8e3b8..a0a1f32 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "simple-html"
-version = "1.0.1"
+version = "1.1.0"
readme = "README.md"
description = "Template-less html rendering in Python"
authors = ["Keith Philpott "]
diff --git a/simple_html/__init__.py b/simple_html/__init__.py
index a44df79..f12be76 100644
--- a/simple_html/__init__.py
+++ b/simple_html/__init__.py
@@ -1,14 +1,17 @@
from html import escape
from types import GeneratorType
-from typing import Tuple, Union, Dict, List, Generator, Optional, Iterable
+from typing import Tuple, Union, Dict, List, FrozenSet, Generator, Iterable
class SafeString:
- __slots__ = ('safe_str',)
+ __slots__ = ("safe_str",)
def __init__(self, safe_str: str) -> None:
self.safe_str = safe_str
+ def __hash__(self) -> int:
+ return hash(f"SafeString__{self.safe_str}")
+
Node = Union[
str,
@@ -21,6 +24,69 @@ def __init__(self, safe_str: str) -> None:
TagTuple = Tuple[str, Tuple[Node, ...], str]
+_common_safe_attribute_names: FrozenSet[str] = frozenset(
+ {
+ "alt",
+ "autoplay",
+ "autoplay",
+ "charset",
+ "checked",
+ "class",
+ "colspan",
+ "content",
+ "contenteditable",
+ "dir",
+ "draggable",
+ "enctype",
+ "for",
+ "height",
+ "hidden",
+ "href",
+ "hreflang",
+ "http-equiv",
+ "id",
+ "itemprop",
+ "itemscope",
+ "itemtype",
+ "lang",
+ "loadable",
+ "method",
+ "name",
+ "onblur",
+ "onclick",
+ "onfocus",
+ "onkeydown",
+ "onkeyup",
+ "onload",
+ "onselect",
+ "onsubmit",
+ "placeholder",
+ "poster",
+ "property",
+ "rel",
+ "rowspan",
+ "sizes",
+ "spellcheck",
+ "src",
+ "style",
+ "target",
+ "title",
+ "type",
+ "value",
+ "width",
+ }
+)
+
+
+def escape_attribute_key(k: str) -> str:
+ return (
+ escape(k)
+ .replace("=", "=")
+ .replace("\\", "\")
+ .replace("`", "`")
+ .replace(" ", " ")
+ )
+
class Tag:
__slots__ = ("tag_start", "rendered", "closing_tag", "no_children_close")
@@ -36,13 +102,30 @@ def __init__(self, name: str, self_closing: bool = False) -> None:
self.rendered = f"{self.tag_start}{self.no_children_close}"
def __call__(
- self, attributes: Dict[str, Optional[str]], *children: Node
+ self,
+ attributes: Dict[Union[SafeString, str], Union[str, SafeString, None]],
+ *children: Node,
) -> TagTuple:
if attributes:
# in this case this is faster than attrs = "".join([...])
attrs = ""
for key, val in attributes.items():
- attrs += f" {key}" if val is None else f' {key}="{val}"'
+ # optimization: a large portion of attribute keys should be
+ # covered by this check. It allows us to skip escaping
+ # where it is not needed. Note this is for attribute names only;
+ # attributes values are always escaped (when they are `str`s)
+ if key not in _common_safe_attribute_names:
+ key = (
+ key.safe_str
+ if isinstance(key, SafeString)
+ else escape_attribute_key(key)
+ )
+ if isinstance(val, str):
+ attrs += f' {key}="{escape(val)}"'
+ elif isinstance(val, SafeString):
+ attrs += f' {key}="{val.safe_str}"'
+ elif val is None:
+ attrs += f" {key}"
if children:
return f"{self.tag_start}{attrs}>", children, self.closing_tag
diff --git a/tests/test_render.py b/tests/test_simple_html.py
similarity index 70%
rename from tests/test_render.py
rename to tests/test_simple_html.py
index 119f7d1..91349a6 100644
--- a/tests/test_render.py
+++ b/tests/test_simple_html.py
@@ -19,6 +19,7 @@
Node,
DOCTYPE_HTML5,
render,
+ escape_attribute_key,
)
@@ -97,9 +98,7 @@ def test_simple_form() -> None:
def test_safestring_in_tag() -> None:
- node = script(
- {"type": "ld+json"}, SafeString(json.dumps({"some_key": "some_val"}))
- )
+ node = script({"type": "ld+json"}, SafeString(json.dumps({"some_key": "some_val"})))
assert render(node) == ('')
@@ -160,3 +159,47 @@ def test_can_render_empty() -> None:
render(div({}, [], "hello ", [], span({}, "World!"), []))
== "hello World!
"
)
+
+
+def test_hash_for_safestring() -> None:
+ assert hash(SafeString("okokok")) == hash("SafeString__okokok")
+
+
+def test_escape_key() -> None:
+ assert escape_attribute_key("") == ""
+ assert escape_attribute_key(">") == ">"
+ assert escape_attribute_key("<") == "<"
+ assert escape_attribute_key('"') == """
+ assert escape_attribute_key("\\") == "\"
+ assert escape_attribute_key("'") == "'"
+ assert escape_attribute_key("=") == "="
+ assert escape_attribute_key("`") == "`"
+ assert (
+ escape_attribute_key("something with spaces")
+ == "something with spaces"
+ )
+
+
+def test_render_with_escaped_attributes() -> None:
+ assert (
+ render(div({'onmousenter="alert(1)" noop': "1"}))
+ == ''
+ )
+ assert (
+ render(span({"": ">"}))
+ == ''
+ )
+ # vals and keys escape slightly differently
+ assert (
+ render(div({'onmousenter="alert(1)" noop': 'onmousenter="alert(1)" noop'}))
+ == ''
+ )
+
+
+def test_render_with_safestring_attributes() -> None:
+ bad_key = 'onmousenter="alert(1)" noop'
+ bad_val = ""
+ assert (
+ render(div({SafeString(bad_key): SafeString(bad_val)}))
+ == f''
+ )